@dreki-gg/pi-code-reviewer 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -54,6 +54,14 @@ multi-stage pipeline modeled on Cursor's Bugbot:
54
54
  candidate and drops false positives. It **fails open** — if the validator
55
55
  errors, candidates are surfaced unvalidated rather than silently lost.
56
56
 
57
+ 4. **Recorded rejections.** The candidates the validator refutes are appended to
58
+ `.code-review/rejections.jsonl`. On later runs, any finding that matches a
59
+ past rejection is **downranked and tagged** `⟲ previously rejected` — never
60
+ hidden, so nothing is silently suppressed, but resurfaced false-positives
61
+ sink below fresh findings. The store dedupes and is capped. Persisting is
62
+ best-effort: an FS error never breaks a review. Disable with
63
+ `review.recordRejections: false`.
64
+
57
65
  The tool returns finished, validated findings as a Markdown report (vote count,
58
66
  confidence, validator justification) plus structured `details`.
59
67
 
@@ -140,6 +148,8 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
140
148
  | `review.concurrency` | `= passes` | Max passes run concurrently. |
141
149
  | `review.temperature` | `0.4` | Base sampling temperature; each pass adds a small jitter so passes diverge. |
142
150
  | `review.maxFindings` | `50` | Hard cap on findings returned. |
151
+ | `review.recordRejections` | `true` | Persist validator false-positives and downrank+tag matches on later runs. |
152
+ | `rejectionsFile` | `.code-review/rejections.jsonl` | Path (relative to cwd) of the recorded-rejections store. |
143
153
  | `review.passModel` | session model | Model for ALL passes: a spec string (`"provider/id"`, bare id, or name) or `{ "model", "reasoning" }`. |
144
154
  | `review.passModels` | — | List of models **rotated round-robin across passes** — a bake-off in one run. Overrides `passModel`. |
145
155
  | `review.validateModel` | session model | Model for the validator stage (string or `{ "model", "reasoning" }`). |
@@ -9,6 +9,12 @@ import { collectDiff, getChangedFiles } from '../diff';
9
9
  import { discoverLenses, getLensContent } from '../lenses';
10
10
  import { resolveModelPlan } from '../model-plan';
11
11
  import { runPipeline } from '../passes';
12
+ import {
13
+ appendRejections,
14
+ applyRejections,
15
+ loadRejections,
16
+ toRejectionRecords,
17
+ } from '../rejections';
12
18
  import {
13
19
  buildLensResult,
14
20
  buildPipelineResult,
@@ -176,6 +182,15 @@ export function registerReviewTool(pi: ExtensionAPI) {
176
182
  const allPassesFailed =
177
183
  config.review.passes > 0 && pipeline.telemetry.failedPasses >= config.review.passes;
178
184
  if (!allPassesFailed) {
185
+ // Recorded rejections: downrank+tag findings the validator refuted on
186
+ // a previous run, then persist this run's false-positives. All FS is
187
+ // best-effort — it must never break a completed review.
188
+ if (config.review.recordRejections) {
189
+ const rejectionsPath = join(cwd, config.rejectionsFile);
190
+ const past = await loadRejections(rejectionsPath);
191
+ pipeline.findings = applyRejections(pipeline.findings, past);
192
+ await appendRejections(rejectionsPath, toRejectionRecords(pipeline.rejected));
193
+ }
179
194
  return buildPipelineResult(
180
195
  {
181
196
  pipeline,
@@ -18,6 +18,7 @@ const CONFIG_FILE = '.code-review.json';
18
18
  const DEFAULT_LENS_DIR = '.code-review/lenses';
19
19
  const DEFAULT_TOOL_TIMEOUT_MS = 60_000;
20
20
  const DEFAULT_TOOL_CONCURRENCY = 4;
21
+ const DEFAULT_REJECTIONS_FILE = '.code-review/rejections.jsonl';
21
22
 
22
23
  const DEFAULT_PIPELINE: ReviewPipelineConfig = {
23
24
  passes: 5,
@@ -26,6 +27,7 @@ const DEFAULT_PIPELINE: ReviewPipelineConfig = {
26
27
  concurrency: 5,
27
28
  temperature: 0.4,
28
29
  maxFindings: 50,
30
+ recordRejections: true,
29
31
  };
30
32
 
31
33
  function defaultConfig(): ReviewConfig {
@@ -35,6 +37,7 @@ function defaultConfig(): ReviewConfig {
35
37
  toolTimeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
36
38
  toolConcurrency: DEFAULT_TOOL_CONCURRENCY,
37
39
  review: { ...DEFAULT_PIPELINE },
40
+ rejectionsFile: DEFAULT_REJECTIONS_FILE,
38
41
  };
39
42
  }
40
43
 
@@ -91,6 +94,10 @@ function parsePipeline(raw: unknown): ReviewPipelineConfig {
91
94
  concurrency: positiveIntOr(review.concurrency, Math.max(1, passes)),
92
95
  temperature: clampNumberOr(review.temperature, DEFAULT_PIPELINE.temperature, 0, 2),
93
96
  maxFindings: positiveIntOr(review.maxFindings, DEFAULT_PIPELINE.maxFindings),
97
+ recordRejections:
98
+ typeof review.recordRejections === 'boolean'
99
+ ? review.recordRejections
100
+ : DEFAULT_PIPELINE.recordRejections,
94
101
  passModel: parseModelStep(review.passModel),
95
102
  passModels: parseModelStepArray(review.passModels),
96
103
  validateModel: parseModelStep(review.validateModel),
@@ -118,6 +125,10 @@ export function loadConfigEffect(cwd: string): Effect.Effect<ReviewConfig, never
118
125
  toolTimeoutMs: positiveIntOr(parsed.toolTimeoutMs, DEFAULT_TOOL_TIMEOUT_MS),
119
126
  toolConcurrency: positiveIntOr(parsed.toolConcurrency, DEFAULT_TOOL_CONCURRENCY),
120
127
  review: parsePipeline((parsed as { review?: unknown }).review),
128
+ rejectionsFile:
129
+ typeof parsed.rejectionsFile === 'string' && parsed.rejectionsFile.trim()
130
+ ? parsed.rejectionsFile.trim()
131
+ : DEFAULT_REJECTIONS_FILE,
121
132
  };
122
133
  } catch {
123
134
  // Malformed config — fall back to defaults.
@@ -17,6 +17,7 @@
17
17
  import { Effect } from 'effect';
18
18
 
19
19
  import { causeMessage } from './errors';
20
+ import { sameBug, tokenize } from './similarity';
20
21
  import { type ModelResolution, Reviewer, makeReviewerService } from './effects/model';
21
22
  import type {
22
23
  CandidateFinding,
@@ -85,95 +86,6 @@ const VALIDATOR_SYSTEM_PROMPT = [
85
86
  '[{ "id": 0, "verdict": "real|false-positive", "confidence": 0.0, "justification": "..." }]',
86
87
  ].join('\n');
87
88
 
88
- const STOPWORDS = new Set([
89
- 'the',
90
- 'and',
91
- 'for',
92
- 'with',
93
- 'that',
94
- 'this',
95
- 'when',
96
- 'from',
97
- 'into',
98
- 'will',
99
- 'would',
100
- 'could',
101
- 'should',
102
- 'have',
103
- 'has',
104
- 'not',
105
- 'but',
106
- 'are',
107
- 'was',
108
- 'were',
109
- 'its',
110
- 'his',
111
- 'her',
112
- 'than',
113
- 'then',
114
- 'which',
115
- 'what',
116
- 'where',
117
- 'while',
118
- 'use',
119
- 'used',
120
- 'using',
121
- 'can',
122
- 'may',
123
- 'might',
124
- 'a',
125
- 'an',
126
- 'is',
127
- 'of',
128
- 'to',
129
- 'in',
130
- 'on',
131
- 'it',
132
- 'be',
133
- 'as',
134
- 'at',
135
- 'or',
136
- 'if',
137
- 'so',
138
- ]);
139
-
140
- /** Tokenize a finding message for similarity comparison. */
141
- function tokenize(message: string): Set<string> {
142
- const tokens = message
143
- .toLowerCase()
144
- .replace(/[^a-z0-9]+/g, ' ')
145
- .split(' ')
146
- .filter((token) => token.length > 2 && !STOPWORDS.has(token));
147
- return new Set(tokens);
148
- }
149
-
150
- function jaccard(left: Set<string>, right: Set<string>): number {
151
- if (left.size === 0 && right.size === 0) return 1;
152
- let intersection = 0;
153
- for (const token of left) if (right.has(token)) intersection += 1;
154
- const union = left.size + right.size - intersection;
155
- return union === 0 ? 0 : intersection / union;
156
- }
157
-
158
- /** Two findings are "the same bug" when they touch the same file and either sit
159
- * within a few lines (a strong co-location signal, so only a MODEST text
160
- * overlap is needed to fuse paraphrases) or — when a line is missing — read
161
- * clearly similar. The lower co-located bar matters: independent passes word
162
- * the same defect very differently, and Bugbot leans on an LLM to merge them;
163
- * co-location is our deterministic stand-in for that judgment. */
164
- function sameBug(
165
- candidate: { file: string; line?: number; tokens: Set<string> },
166
- bucket: { file: string; line?: number; tokens: Set<string> },
167
- ): boolean {
168
- if (candidate.file !== bucket.file) return false;
169
- const similarity = jaccard(candidate.tokens, bucket.tokens);
170
- if (candidate.line !== undefined && bucket.line !== undefined) {
171
- if (Math.abs(candidate.line - bucket.line) > 3) return false;
172
- return similarity >= 0.25;
173
- }
174
- // One side has no line to anchor on — demand a clearer textual match.
175
- return similarity >= 0.5;
176
- }
177
89
 
178
90
  type WorkingBucket = {
179
91
  file: string;
@@ -468,9 +380,13 @@ export function validateCandidatesEffect(
468
380
  candidates: CandidateFinding[],
469
381
  plan: ModelPlan,
470
382
  signal?: AbortSignal,
471
- ): Effect.Effect<{ findings: ValidatedFinding[]; droppedFalsePositives: number }, never, Reviewer> {
383
+ ): Effect.Effect<
384
+ { findings: ValidatedFinding[]; droppedFalsePositives: number; rejected: CandidateFinding[] },
385
+ never,
386
+ Reviewer
387
+ > {
472
388
  return Effect.gen(function* () {
473
- if (candidates.length === 0) return { findings: [], droppedFalsePositives: 0 };
389
+ if (candidates.length === 0) return { findings: [], droppedFalsePositives: 0, rejected: [] };
474
390
  const reviewer = yield* Reviewer;
475
391
 
476
392
  const result = yield* reviewer
@@ -494,17 +410,17 @@ export function validateCandidatesEffect(
494
410
  justification: '(validator unavailable — surfaced unvalidated)',
495
411
  models: contributingModels(candidate.passIndices, plan),
496
412
  }));
497
- return { findings, droppedFalsePositives: 0 };
413
+ return { findings, droppedFalsePositives: 0, rejected: [] };
498
414
  }
499
415
 
500
416
  const verdicts = parseVerdicts(result.right);
501
417
  const findings: ValidatedFinding[] = [];
502
- let droppedFalsePositives = 0;
418
+ const rejected: CandidateFinding[] = [];
503
419
  candidates.forEach((candidate, index) => {
504
420
  const verdict = verdicts.get(index);
505
421
  // A candidate with no verdict returned is kept (fail open), not dropped.
506
422
  if (verdict && verdict.verdict === 'false-positive') {
507
- droppedFalsePositives += 1;
423
+ rejected.push(candidate);
508
424
  return;
509
425
  }
510
426
  findings.push({
@@ -515,7 +431,7 @@ export function validateCandidatesEffect(
515
431
  models: contributingModels(candidate.passIndices, plan),
516
432
  });
517
433
  });
518
- return { findings, droppedFalsePositives };
434
+ return { findings, droppedFalsePositives: rejected.length, rejected };
519
435
  });
520
436
  }
521
437
 
@@ -541,11 +457,13 @@ export function runPipelineEffect(
541
457
 
542
458
  let validated: ValidatedFinding[];
543
459
  let droppedFalsePositives = 0;
460
+ let rejected: CandidateFinding[] = [];
544
461
  if (config.validate) {
545
462
  hooks.onStage?.(`validating ${kept.length} candidates`);
546
463
  const outcome = yield* validateCandidatesEffect(basePrompt, kept, plan, signal);
547
464
  validated = outcome.findings;
548
465
  droppedFalsePositives = outcome.droppedFalsePositives;
466
+ rejected = outcome.rejected;
549
467
  } else {
550
468
  validated = kept.map((candidate) => ({
551
469
  ...candidate,
@@ -571,7 +489,7 @@ export function runPipelineEffect(
571
489
  passModels: plan.passes.map((assignment) => assignment.label),
572
490
  validatorModel: plan.validator.label,
573
491
  };
574
- return { findings: capped, telemetry };
492
+ return { findings: capped, rejected, telemetry };
575
493
  });
576
494
  }
577
495
 
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Recorded-rejection store: persist validator false-positives and, on later
3
+ * runs, downrank+tag findings that match a past rejection (never hide them).
4
+ *
5
+ * Failure-tolerant by design — any FS or parse error degrades to "no
6
+ * rejections" so a review is never broken by a missing/garbled store. Node-only
7
+ * (node:fs/promises), never Bun, since extension source runs on Node via jiti.
8
+ */
9
+
10
+ import { appendFile, mkdir, readFile, writeFile } from 'node:fs/promises';
11
+ import { dirname } from 'node:path';
12
+
13
+ import { sameBug, tokenize } from './similarity';
14
+ import type { CandidateFinding, RejectionRecord, ValidatedFinding } from './types';
15
+
16
+ /** Keep the store bounded; oldest records are dropped past this many. */
17
+ export const DEFAULT_REJECTION_CAP = 200;
18
+
19
+ /** Read the JSONL store, tolerating a missing file or garbled lines. */
20
+ export async function loadRejections(path: string): Promise<RejectionRecord[]> {
21
+ let text: string;
22
+ try {
23
+ text = await readFile(path, 'utf8');
24
+ } catch {
25
+ return [];
26
+ }
27
+ const records: RejectionRecord[] = [];
28
+ for (const raw of text.split(/\r?\n/)) {
29
+ if (!raw.trim()) continue;
30
+ try {
31
+ const parsed = JSON.parse(raw) as Record<string, unknown>;
32
+ if (
33
+ typeof parsed.file === 'string' &&
34
+ typeof parsed.message === 'string' &&
35
+ typeof parsed.severity === 'string'
36
+ ) {
37
+ records.push(parsed as unknown as RejectionRecord);
38
+ }
39
+ } catch {
40
+ // Skip an unparseable line rather than discard the whole store.
41
+ }
42
+ }
43
+ return records;
44
+ }
45
+
46
+ /** Does a finding match any recorded rejection (same file + co-located/similar)? */
47
+ export function matchesRejection(
48
+ finding: { file: string; line?: number; message: string },
49
+ rejections: RejectionRecord[],
50
+ ): boolean {
51
+ const tokens = tokenize(finding.message);
52
+ return rejections.some((record) =>
53
+ sameBug(
54
+ { file: finding.file, line: finding.line, tokens },
55
+ { file: record.file, line: record.line, tokens: tokenize(record.message) },
56
+ ),
57
+ );
58
+ }
59
+
60
+ /** Tag findings matching a past rejection and downrank them to the bottom,
61
+ * preserving the existing leverage order within each group. Pure. */
62
+ export function applyRejections(
63
+ findings: ValidatedFinding[],
64
+ rejections: RejectionRecord[],
65
+ ): ValidatedFinding[] {
66
+ if (rejections.length === 0) return findings;
67
+ const tagged = findings.map((finding) =>
68
+ matchesRejection(finding, rejections) ? { ...finding, previouslyRejected: true } : finding,
69
+ );
70
+ const kept = tagged.filter((finding) => !finding.previouslyRejected);
71
+ const downranked = tagged.filter((finding) => finding.previouslyRejected);
72
+ return [...kept, ...downranked];
73
+ }
74
+
75
+ /** Convert this run's validator-refuted candidates into rejection records. */
76
+ export function toRejectionRecords(
77
+ rejected: CandidateFinding[],
78
+ now: string = new Date().toISOString(),
79
+ ): RejectionRecord[] {
80
+ return rejected.map((candidate) => ({
81
+ file: candidate.file,
82
+ line: candidate.line,
83
+ severity: candidate.severity,
84
+ message: candidate.message,
85
+ recorded_at: now,
86
+ }));
87
+ }
88
+
89
+ /** Append new rejections, deduping against existing ones and capping the total.
90
+ * Never throws — a write failure silently no-ops. */
91
+ export async function appendRejections(
92
+ path: string,
93
+ entries: RejectionRecord[],
94
+ cap: number = DEFAULT_REJECTION_CAP,
95
+ ): Promise<void> {
96
+ if (entries.length === 0) return;
97
+ try {
98
+ const existing = await loadRejections(path);
99
+ const fresh = entries.filter(
100
+ (entry) => !matchesRejection({ file: entry.file, line: entry.line, message: entry.message }, existing),
101
+ );
102
+ if (fresh.length === 0) return;
103
+ const merged = [...existing, ...fresh].slice(-cap);
104
+ await mkdir(dirname(path), { recursive: true });
105
+ if (merged.length === existing.length + fresh.length) {
106
+ // Nothing was capped out — a plain append keeps the file append-only.
107
+ await appendFile(path, fresh.map((entry) => JSON.stringify(entry)).join('\n') + '\n', 'utf8');
108
+ } else {
109
+ // Cap trimmed older records — rewrite the whole bounded store.
110
+ await writeFile(path, merged.map((entry) => JSON.stringify(entry)).join('\n') + '\n', 'utf8');
111
+ }
112
+ } catch {
113
+ // Persisting rejections must never break a review.
114
+ }
115
+ }
@@ -270,6 +270,7 @@ export function renderPipelineReport(result: PipelineResult, diff: DiffSource):
270
270
  `${Math.round(finding.confidence * 100)}% conf`,
271
271
  finding.category,
272
272
  multiModel && finding.models.length > 0 ? `models: ${finding.models.join(', ')}` : undefined,
273
+ finding.previouslyRejected ? '⟲ previously rejected' : undefined,
273
274
  ]
274
275
  .filter(Boolean)
275
276
  .join(', ');
@@ -0,0 +1,81 @@
1
+ /**
2
+ * Deterministic finding-similarity helpers, shared by the pass bucketer and the
3
+ * recorded-rejection matcher. Kept dependency-free and pure so both the
4
+ * Bugbot-style vote pipeline and the rejection store reason about "is this the
5
+ * same bug?" identically.
6
+ */
7
+
8
+ const STOPWORDS = new Set([
9
+ 'the',
10
+ 'and',
11
+ 'for',
12
+ 'with',
13
+ 'that',
14
+ 'this',
15
+ 'from',
16
+ 'into',
17
+ 'when',
18
+ 'where',
19
+ 'which',
20
+ 'while',
21
+ 'will',
22
+ 'would',
23
+ 'could',
24
+ 'should',
25
+ 'using',
26
+ 'can',
27
+ 'may',
28
+ 'might',
29
+ 'a',
30
+ 'an',
31
+ 'is',
32
+ 'of',
33
+ 'to',
34
+ 'in',
35
+ 'on',
36
+ 'it',
37
+ 'be',
38
+ 'as',
39
+ 'at',
40
+ 'or',
41
+ 'if',
42
+ 'so',
43
+ ]);
44
+
45
+ /** Tokenize a finding message for similarity comparison. */
46
+ export function tokenize(message: string): Set<string> {
47
+ const tokens = message
48
+ .toLowerCase()
49
+ .replace(/[^a-z0-9]+/g, ' ')
50
+ .split(' ')
51
+ .filter((token) => token.length > 2 && !STOPWORDS.has(token));
52
+ return new Set(tokens);
53
+ }
54
+
55
+ export function jaccard(left: Set<string>, right: Set<string>): number {
56
+ if (left.size === 0 && right.size === 0) return 1;
57
+ let intersection = 0;
58
+ for (const token of left) if (right.has(token)) intersection += 1;
59
+ const union = left.size + right.size - intersection;
60
+ return union === 0 ? 0 : intersection / union;
61
+ }
62
+
63
+ /** Two findings are "the same bug" when they touch the same file and either sit
64
+ * within a few lines (a strong co-location signal, so only a MODEST text
65
+ * overlap is needed to fuse paraphrases) or — when a line is missing — read
66
+ * clearly similar. The lower co-located bar matters: independent passes word
67
+ * the same defect very differently, and Bugbot leans on an LLM to merge them;
68
+ * co-location is our deterministic stand-in for that judgment. */
69
+ export function sameBug(
70
+ candidate: { file: string; line?: number; tokens: Set<string> },
71
+ bucket: { file: string; line?: number; tokens: Set<string> },
72
+ ): boolean {
73
+ if (candidate.file !== bucket.file) return false;
74
+ const similarity = jaccard(candidate.tokens, bucket.tokens);
75
+ if (candidate.line !== undefined && bucket.line !== undefined) {
76
+ if (Math.abs(candidate.line - bucket.line) > 3) return false;
77
+ return similarity >= 0.25;
78
+ }
79
+ // One side has no line to anchor on — demand a clearer textual match.
80
+ return similarity >= 0.5;
81
+ }
@@ -56,6 +56,9 @@ export type ValidatedFinding = CandidateFinding & {
56
56
  /** Validator confidence in `verdict`, 0..1. */
57
57
  confidence: number;
58
58
  justification?: string;
59
+ /** True when this finding matches a previously-recorded rejection. Downranked
60
+ * and tagged in the report; never hidden. */
61
+ previouslyRejected?: boolean;
59
62
  /** Distinct model keys whose passes contributed to this finding (for the
60
63
  * model bake-off: "which model caught this"). */
61
64
  models: string[];
@@ -106,9 +109,24 @@ export type PipelineTelemetry = {
106
109
 
107
110
  export type PipelineResult = {
108
111
  findings: ValidatedFinding[];
112
+ /** Candidates the validator refuted this run. Surfaced (not just counted) so
113
+ * the command layer can persist them as recorded rejections. */
114
+ rejected: CandidateFinding[];
109
115
  telemetry: PipelineTelemetry;
110
116
  };
111
117
 
118
+ /** A persisted record of a validator-refuted finding, matched against future
119
+ * runs so a refuted finding that resurfaces is downranked and tagged. */
120
+ export type RejectionRecord = {
121
+ file: string;
122
+ line?: number;
123
+ severity: LensSeverity;
124
+ message: string;
125
+ justification?: string;
126
+ /** ISO timestamp the rejection was recorded. */
127
+ recorded_at: string;
128
+ };
129
+
112
130
  /** Tunables for the self-driving pipeline (all overridable in config). */
113
131
  export type ReviewPipelineConfig = {
114
132
  /** Parallel adversarial bug-finding passes. 0 disables the pipeline
@@ -126,6 +144,8 @@ export type ReviewPipelineConfig = {
126
144
  temperature: number;
127
145
  /** Hard cap on findings returned (safety valve against runaway output). */
128
146
  maxFindings: number;
147
+ /** Persist validator false-positives and downrank+tag matches on later runs. */
148
+ recordRejections: boolean;
129
149
  /** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
130
150
  * session model. Overridden per-pass by {@link passModels}. */
131
151
  passModel?: ModelStepConfig;
@@ -153,4 +173,6 @@ export type ReviewConfig = {
153
173
  toolConcurrency: number;
154
174
  /** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
155
175
  review: ReviewPipelineConfig;
176
+ /** Path (relative to cwd) of the recorded-rejections JSONL store. */
177
+ rejectionsFile: string;
156
178
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dreki-gg/pi-code-reviewer",
3
- "version": "0.6.2",
3
+ "version": "0.7.0",
4
4
  "description": "Multi-lens code review extension for pi — configurable review criteria per project",
5
5
  "keywords": [
6
6
  "pi-package"