npm - @dreki-gg/pi-code-reviewer - Versions diffs - 0.6.2 → 0.7.0 - Mend

@dreki-gg/pi-code-reviewer 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +10 -0
package/extensions/code-reviewer/commands/review-tool.ts +15 -0
package/extensions/code-reviewer/config.ts +11 -0
package/extensions/code-reviewer/passes.ts +14 -96
package/extensions/code-reviewer/rejections.ts +115 -0
package/extensions/code-reviewer/reviewer.ts +1 -0
package/extensions/code-reviewer/similarity.ts +81 -0
package/extensions/code-reviewer/types.ts +22 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -54,6 +54,14 @@ multi-stage pipeline modeled on Cursor's Bugbot:
    candidate and drops false positives. It **fails open** — if the validator
    errors, candidates are surfaced unvalidated rather than silently lost.
+4. **Recorded rejections.** The candidates the validator refutes are appended to
+   `.code-review/rejections.jsonl`. On later runs, any finding that matches a
+   past rejection is **downranked and tagged** `⟲ previously rejected` — never
+   hidden, so nothing is silently suppressed, but resurfaced false-positives
+   sink below fresh findings. The store dedupes and is capped. Persisting is
+   best-effort: an FS error never breaks a review. Disable with
+   `review.recordRejections: false`.
 The tool returns finished, validated findings as a Markdown report (vote count,
 confidence, validator justification) plus structured `details`.
@@ -140,6 +148,8 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
 | `review.concurrency` | `= passes` | Max passes run concurrently. |
 | `review.temperature` | `0.4` | Base sampling temperature; each pass adds a small jitter so passes diverge. |
 | `review.maxFindings` | `50` | Hard cap on findings returned. |
+| `review.recordRejections` | `true` | Persist validator false-positives and downrank+tag matches on later runs. |
+| `rejectionsFile` | `.code-review/rejections.jsonl` | Path (relative to cwd) of the recorded-rejections store. |
 | `review.passModel` | session model | Model for ALL passes: a spec string (`"provider/id"`, bare id, or name) or `{ "model", "reasoning" }`. |
 | `review.passModels` | — | List of models **rotated round-robin across passes** — a bake-off in one run. Overrides `passModel`. |
 | `review.validateModel` | session model | Model for the validator stage (string or `{ "model", "reasoning" }`). |

package/extensions/code-reviewer/commands/review-tool.ts CHANGED Viewed

@@ -9,6 +9,12 @@ import { collectDiff, getChangedFiles } from '../diff';
 import { discoverLenses, getLensContent } from '../lenses';
 import { resolveModelPlan } from '../model-plan';
 import { runPipeline } from '../passes';
+import {
+  appendRejections,
+  applyRejections,
+  loadRejections,
+  toRejectionRecords,
+} from '../rejections';
 import {
   buildLensResult,
   buildPipelineResult,
@@ -176,6 +182,15 @@ export function registerReviewTool(pi: ExtensionAPI) {
           const allPassesFailed =
             config.review.passes > 0 && pipeline.telemetry.failedPasses >= config.review.passes;
           if (!allPassesFailed) {
+            // Recorded rejections: downrank+tag findings the validator refuted on
+            // a previous run, then persist this run's false-positives. All FS is
+            // best-effort — it must never break a completed review.
+            if (config.review.recordRejections) {
+              const rejectionsPath = join(cwd, config.rejectionsFile);
+              const past = await loadRejections(rejectionsPath);
+              pipeline.findings = applyRejections(pipeline.findings, past);
+              await appendRejections(rejectionsPath, toRejectionRecords(pipeline.rejected));
+            }
             return buildPipelineResult(
               {
                 pipeline,

package/extensions/code-reviewer/config.ts CHANGED Viewed

@@ -18,6 +18,7 @@ const CONFIG_FILE = '.code-review.json';
 const DEFAULT_LENS_DIR = '.code-review/lenses';
 const DEFAULT_TOOL_TIMEOUT_MS = 60_000;
 const DEFAULT_TOOL_CONCURRENCY = 4;
+const DEFAULT_REJECTIONS_FILE = '.code-review/rejections.jsonl';
 const DEFAULT_PIPELINE: ReviewPipelineConfig = {
   passes: 5,
@@ -26,6 +27,7 @@ const DEFAULT_PIPELINE: ReviewPipelineConfig = {
   concurrency: 5,
   temperature: 0.4,
   maxFindings: 50,
+  recordRejections: true,
 };
 function defaultConfig(): ReviewConfig {
@@ -35,6 +37,7 @@ function defaultConfig(): ReviewConfig {
     toolTimeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
     toolConcurrency: DEFAULT_TOOL_CONCURRENCY,
     review: { ...DEFAULT_PIPELINE },
+    rejectionsFile: DEFAULT_REJECTIONS_FILE,
   };
 }
@@ -91,6 +94,10 @@ function parsePipeline(raw: unknown): ReviewPipelineConfig {
     concurrency: positiveIntOr(review.concurrency, Math.max(1, passes)),
     temperature: clampNumberOr(review.temperature, DEFAULT_PIPELINE.temperature, 0, 2),
     maxFindings: positiveIntOr(review.maxFindings, DEFAULT_PIPELINE.maxFindings),
+    recordRejections:
+      typeof review.recordRejections === 'boolean'
+        ? review.recordRejections
+        : DEFAULT_PIPELINE.recordRejections,
     passModel: parseModelStep(review.passModel),
     passModels: parseModelStepArray(review.passModels),
     validateModel: parseModelStep(review.validateModel),
@@ -118,6 +125,10 @@ export function loadConfigEffect(cwd: string): Effect.Effect<ReviewConfig, never
         toolTimeoutMs: positiveIntOr(parsed.toolTimeoutMs, DEFAULT_TOOL_TIMEOUT_MS),
         toolConcurrency: positiveIntOr(parsed.toolConcurrency, DEFAULT_TOOL_CONCURRENCY),
         review: parsePipeline((parsed as { review?: unknown }).review),
+        rejectionsFile:
+          typeof parsed.rejectionsFile === 'string' && parsed.rejectionsFile.trim()
+            ? parsed.rejectionsFile.trim()
+            : DEFAULT_REJECTIONS_FILE,
       };
     } catch {
       // Malformed config — fall back to defaults.

package/extensions/code-reviewer/passes.ts CHANGED Viewed

@@ -17,6 +17,7 @@
 import { Effect } from 'effect';
 import { causeMessage } from './errors';
+import { sameBug, tokenize } from './similarity';
 import { type ModelResolution, Reviewer, makeReviewerService } from './effects/model';
 import type {
   CandidateFinding,
@@ -85,95 +86,6 @@ const VALIDATOR_SYSTEM_PROMPT = [
   '[{ "id": 0, "verdict": "real|false-positive", "confidence": 0.0, "justification": "..." }]',
 ].join('\n');
-const STOPWORDS = new Set([
-  'the',
-  'and',
-  'for',
-  'with',
-  'that',
-  'this',
-  'when',
-  'from',
-  'into',
-  'will',
-  'would',
-  'could',
-  'should',
-  'have',
-  'has',
-  'not',
-  'but',
-  'are',
-  'was',
-  'were',
-  'its',
-  'his',
-  'her',
-  'than',
-  'then',
-  'which',
-  'what',
-  'where',
-  'while',
-  'use',
-  'used',
-  'using',
-  'can',
-  'may',
-  'might',
-  'a',
-  'an',
-  'is',
-  'of',
-  'to',
-  'in',
-  'on',
-  'it',
-  'be',
-  'as',
-  'at',
-  'or',
-  'if',
-  'so',
-]);
-/** Tokenize a finding message for similarity comparison. */
-function tokenize(message: string): Set<string> {
-  const tokens = message
-    .toLowerCase()
-    .replace(/[^a-z0-9]+/g, ' ')
-    .split(' ')
-    .filter((token) => token.length > 2 && !STOPWORDS.has(token));
-  return new Set(tokens);
-}
-function jaccard(left: Set<string>, right: Set<string>): number {
-  if (left.size === 0 && right.size === 0) return 1;
-  let intersection = 0;
-  for (const token of left) if (right.has(token)) intersection += 1;
-  const union = left.size + right.size - intersection;
-  return union === 0 ? 0 : intersection / union;
-}
-/** Two findings are "the same bug" when they touch the same file and either sit
- *  within a few lines (a strong co-location signal, so only a MODEST text
- *  overlap is needed to fuse paraphrases) or — when a line is missing — read
- *  clearly similar. The lower co-located bar matters: independent passes word
- *  the same defect very differently, and Bugbot leans on an LLM to merge them;
- *  co-location is our deterministic stand-in for that judgment. */
-function sameBug(
-  candidate: { file: string; line?: number; tokens: Set<string> },
-  bucket: { file: string; line?: number; tokens: Set<string> },
-): boolean {
-  if (candidate.file !== bucket.file) return false;
-  const similarity = jaccard(candidate.tokens, bucket.tokens);
-  if (candidate.line !== undefined && bucket.line !== undefined) {
-    if (Math.abs(candidate.line - bucket.line) > 3) return false;
-    return similarity >= 0.25;
-  }
-  // One side has no line to anchor on — demand a clearer textual match.
-  return similarity >= 0.5;
-}
 type WorkingBucket = {
   file: string;
@@ -468,9 +380,13 @@ export function validateCandidatesEffect(
   candidates: CandidateFinding[],
   plan: ModelPlan,
   signal?: AbortSignal,
-): Effect.Effect<{ findings: ValidatedFinding[]; droppedFalsePositives: number }, never, Reviewer> {
+): Effect.Effect<
+  { findings: ValidatedFinding[]; droppedFalsePositives: number; rejected: CandidateFinding[] },
+  never,
+  Reviewer
+> {
   return Effect.gen(function* () {
-    if (candidates.length === 0) return { findings: [], droppedFalsePositives: 0 };
+    if (candidates.length === 0) return { findings: [], droppedFalsePositives: 0, rejected: [] };
     const reviewer = yield* Reviewer;
     const result = yield* reviewer
@@ -494,17 +410,17 @@ export function validateCandidatesEffect(
         justification: '(validator unavailable — surfaced unvalidated)',
         models: contributingModels(candidate.passIndices, plan),
       }));
-      return { findings, droppedFalsePositives: 0 };
+      return { findings, droppedFalsePositives: 0, rejected: [] };
     }
     const verdicts = parseVerdicts(result.right);
     const findings: ValidatedFinding[] = [];
-    let droppedFalsePositives = 0;
+    const rejected: CandidateFinding[] = [];
     candidates.forEach((candidate, index) => {
       const verdict = verdicts.get(index);
       // A candidate with no verdict returned is kept (fail open), not dropped.
       if (verdict && verdict.verdict === 'false-positive') {
-        droppedFalsePositives += 1;
+        rejected.push(candidate);
         return;
       }
       findings.push({
@@ -515,7 +431,7 @@ export function validateCandidatesEffect(
         models: contributingModels(candidate.passIndices, plan),
       });
     });
-    return { findings, droppedFalsePositives };
+    return { findings, droppedFalsePositives: rejected.length, rejected };
   });
 }
@@ -541,11 +457,13 @@ export function runPipelineEffect(
     let validated: ValidatedFinding[];
     let droppedFalsePositives = 0;
+    let rejected: CandidateFinding[] = [];
     if (config.validate) {
       hooks.onStage?.(`validating ${kept.length} candidates`);
       const outcome = yield* validateCandidatesEffect(basePrompt, kept, plan, signal);
       validated = outcome.findings;
       droppedFalsePositives = outcome.droppedFalsePositives;
+      rejected = outcome.rejected;
     } else {
       validated = kept.map((candidate) => ({
         ...candidate,
@@ -571,7 +489,7 @@ export function runPipelineEffect(
       passModels: plan.passes.map((assignment) => assignment.label),
       validatorModel: plan.validator.label,
     };
-    return { findings: capped, telemetry };
+    return { findings: capped, rejected, telemetry };
   });
 }

package/extensions/code-reviewer/rejections.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * Recorded-rejection store: persist validator false-positives and, on later
+ * runs, downrank+tag findings that match a past rejection (never hide them).
+ *
+ * Failure-tolerant by design — any FS or parse error degrades to "no
+ * rejections" so a review is never broken by a missing/garbled store. Node-only
+ * (node:fs/promises), never Bun, since extension source runs on Node via jiti.
+ */
+import { appendFile, mkdir, readFile, writeFile } from 'node:fs/promises';
+import { dirname } from 'node:path';
+import { sameBug, tokenize } from './similarity';
+import type { CandidateFinding, RejectionRecord, ValidatedFinding } from './types';
+/** Keep the store bounded; oldest records are dropped past this many. */
+export const DEFAULT_REJECTION_CAP = 200;
+/** Read the JSONL store, tolerating a missing file or garbled lines. */
+export async function loadRejections(path: string): Promise<RejectionRecord[]> {
+  let text: string;
+  try {
+    text = await readFile(path, 'utf8');
+  } catch {
+    return [];
+  }
+  const records: RejectionRecord[] = [];
+  for (const raw of text.split(/\r?\n/)) {
+    if (!raw.trim()) continue;
+    try {
+      const parsed = JSON.parse(raw) as Record<string, unknown>;
+      if (
+        typeof parsed.file === 'string' &&
+        typeof parsed.message === 'string' &&
+        typeof parsed.severity === 'string'
+      ) {
+        records.push(parsed as unknown as RejectionRecord);
+      }
+    } catch {
+      // Skip an unparseable line rather than discard the whole store.
+    }
+  }
+  return records;
+}
+/** Does a finding match any recorded rejection (same file + co-located/similar)? */
+export function matchesRejection(
+  finding: { file: string; line?: number; message: string },
+  rejections: RejectionRecord[],
+): boolean {
+  const tokens = tokenize(finding.message);
+  return rejections.some((record) =>
+    sameBug(
+      { file: finding.file, line: finding.line, tokens },
+      { file: record.file, line: record.line, tokens: tokenize(record.message) },
+    ),
+  );
+}
+/** Tag findings matching a past rejection and downrank them to the bottom,
+ *  preserving the existing leverage order within each group. Pure. */
+export function applyRejections(
+  findings: ValidatedFinding[],
+  rejections: RejectionRecord[],
+): ValidatedFinding[] {
+  if (rejections.length === 0) return findings;
+  const tagged = findings.map((finding) =>
+    matchesRejection(finding, rejections) ? { ...finding, previouslyRejected: true } : finding,
+  );
+  const kept = tagged.filter((finding) => !finding.previouslyRejected);
+  const downranked = tagged.filter((finding) => finding.previouslyRejected);
+  return [...kept, ...downranked];
+}
+/** Convert this run's validator-refuted candidates into rejection records. */
+export function toRejectionRecords(
+  rejected: CandidateFinding[],
+  now: string = new Date().toISOString(),
+): RejectionRecord[] {
+  return rejected.map((candidate) => ({
+    file: candidate.file,
+    line: candidate.line,
+    severity: candidate.severity,
+    message: candidate.message,
+    recorded_at: now,
+  }));
+}
+/** Append new rejections, deduping against existing ones and capping the total.
+ *  Never throws — a write failure silently no-ops. */
+export async function appendRejections(
+  path: string,
+  entries: RejectionRecord[],
+  cap: number = DEFAULT_REJECTION_CAP,
+): Promise<void> {
+  if (entries.length === 0) return;
+  try {
+    const existing = await loadRejections(path);
+    const fresh = entries.filter(
+      (entry) => !matchesRejection({ file: entry.file, line: entry.line, message: entry.message }, existing),
+    );
+    if (fresh.length === 0) return;
+    const merged = [...existing, ...fresh].slice(-cap);
+    await mkdir(dirname(path), { recursive: true });
+    if (merged.length === existing.length + fresh.length) {
+      // Nothing was capped out — a plain append keeps the file append-only.
+      await appendFile(path, fresh.map((entry) => JSON.stringify(entry)).join('\n') + '\n', 'utf8');
+    } else {
+      // Cap trimmed older records — rewrite the whole bounded store.
+      await writeFile(path, merged.map((entry) => JSON.stringify(entry)).join('\n') + '\n', 'utf8');
+    }
+  } catch {
+    // Persisting rejections must never break a review.
+  }
+}

package/extensions/code-reviewer/reviewer.ts CHANGED Viewed

@@ -270,6 +270,7 @@ export function renderPipelineReport(result: PipelineResult, diff: DiffSource):
       `${Math.round(finding.confidence * 100)}% conf`,
       finding.category,
       multiModel && finding.models.length > 0 ? `models: ${finding.models.join(', ')}` : undefined,
+      finding.previouslyRejected ? '⟲ previously rejected' : undefined,
     ]
       .filter(Boolean)
       .join(', ');

package/extensions/code-reviewer/similarity.ts ADDED Viewed

@@ -0,0 +1,81 @@
+/**
+ * Deterministic finding-similarity helpers, shared by the pass bucketer and the
+ * recorded-rejection matcher. Kept dependency-free and pure so both the
+ * Bugbot-style vote pipeline and the rejection store reason about "is this the
+ * same bug?" identically.
+ */
+const STOPWORDS = new Set([
+  'the',
+  'and',
+  'for',
+  'with',
+  'that',
+  'this',
+  'from',
+  'into',
+  'when',
+  'where',
+  'which',
+  'while',
+  'will',
+  'would',
+  'could',
+  'should',
+  'using',
+  'can',
+  'may',
+  'might',
+  'a',
+  'an',
+  'is',
+  'of',
+  'to',
+  'in',
+  'on',
+  'it',
+  'be',
+  'as',
+  'at',
+  'or',
+  'if',
+  'so',
+]);
+/** Tokenize a finding message for similarity comparison. */
+export function tokenize(message: string): Set<string> {
+  const tokens = message
+    .toLowerCase()
+    .replace(/[^a-z0-9]+/g, ' ')
+    .split(' ')
+    .filter((token) => token.length > 2 && !STOPWORDS.has(token));
+  return new Set(tokens);
+}
+export function jaccard(left: Set<string>, right: Set<string>): number {
+  if (left.size === 0 && right.size === 0) return 1;
+  let intersection = 0;
+  for (const token of left) if (right.has(token)) intersection += 1;
+  const union = left.size + right.size - intersection;
+  return union === 0 ? 0 : intersection / union;
+}
+/** Two findings are "the same bug" when they touch the same file and either sit
+ *  within a few lines (a strong co-location signal, so only a MODEST text
+ *  overlap is needed to fuse paraphrases) or — when a line is missing — read
+ *  clearly similar. The lower co-located bar matters: independent passes word
+ *  the same defect very differently, and Bugbot leans on an LLM to merge them;
+ *  co-location is our deterministic stand-in for that judgment. */
+export function sameBug(
+  candidate: { file: string; line?: number; tokens: Set<string> },
+  bucket: { file: string; line?: number; tokens: Set<string> },
+): boolean {
+  if (candidate.file !== bucket.file) return false;
+  const similarity = jaccard(candidate.tokens, bucket.tokens);
+  if (candidate.line !== undefined && bucket.line !== undefined) {
+    if (Math.abs(candidate.line - bucket.line) > 3) return false;
+    return similarity >= 0.25;
+  }
+  // One side has no line to anchor on — demand a clearer textual match.
+  return similarity >= 0.5;
+}

package/extensions/code-reviewer/types.ts CHANGED Viewed

@@ -56,6 +56,9 @@ export type ValidatedFinding = CandidateFinding & {
   /** Validator confidence in `verdict`, 0..1. */
   confidence: number;
   justification?: string;
+  /** True when this finding matches a previously-recorded rejection. Downranked
+   *  and tagged in the report; never hidden. */
+  previouslyRejected?: boolean;
   /** Distinct model keys whose passes contributed to this finding (for the
    *  model bake-off: "which model caught this"). */
   models: string[];
@@ -106,9 +109,24 @@ export type PipelineTelemetry = {
 export type PipelineResult = {
   findings: ValidatedFinding[];
+  /** Candidates the validator refuted this run. Surfaced (not just counted) so
+   *  the command layer can persist them as recorded rejections. */
+  rejected: CandidateFinding[];
   telemetry: PipelineTelemetry;
 };
+/** A persisted record of a validator-refuted finding, matched against future
+ *  runs so a refuted finding that resurfaces is downranked and tagged. */
+export type RejectionRecord = {
+  file: string;
+  line?: number;
+  severity: LensSeverity;
+  message: string;
+  justification?: string;
+  /** ISO timestamp the rejection was recorded. */
+  recorded_at: string;
+};
 /** Tunables for the self-driving pipeline (all overridable in config). */
 export type ReviewPipelineConfig = {
   /** Parallel adversarial bug-finding passes. 0 disables the pipeline
@@ -126,6 +144,8 @@ export type ReviewPipelineConfig = {
   temperature: number;
   /** Hard cap on findings returned (safety valve against runaway output). */
   maxFindings: number;
+  /** Persist validator false-positives and downrank+tag matches on later runs. */
+  recordRejections: boolean;
   /** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
    *  session model. Overridden per-pass by {@link passModels}. */
   passModel?: ModelStepConfig;
@@ -153,4 +173,6 @@ export type ReviewConfig = {
   toolConcurrency: number;
   /** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
   review: ReviewPipelineConfig;
+  /** Path (relative to cwd) of the recorded-rejections JSONL store. */
+  rejectionsFile: string;
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dreki-gg/pi-code-reviewer",
-  "version": "0.6.2",
+  "version": "0.7.0",
   "description": "Multi-lens code review extension for pi — configurable review criteria per project",
   "keywords": [
     "pi-package"