npm - @dreki-gg/pi-code-reviewer - Versions diffs - 0.3.0 → 0.5.0 - Mend

@dreki-gg/pi-code-reviewer 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +74 -1
package/extensions/code-reviewer/commands/review-init.ts +13 -3
package/extensions/code-reviewer/commands/review-tool.ts +125 -27
package/extensions/code-reviewer/commands/review.ts +49 -8
package/extensions/code-reviewer/config.ts +90 -2
package/extensions/code-reviewer/diff.ts +11 -5
package/extensions/code-reviewer/effects/model.ts +112 -0
package/extensions/code-reviewer/errors.ts +10 -1
package/extensions/code-reviewer/model-plan.ts +84 -0
package/extensions/code-reviewer/passes.ts +571 -0
package/extensions/code-reviewer/reviewer.ts +164 -81
package/extensions/code-reviewer/types.ts +124 -10
package/package.json +1 -1
package/skills/code-review/lenses/code-quality.md +16 -2
package/extensions/code-reviewer/report.ts +0 -109

package/extensions/code-reviewer/reviewer.ts CHANGED Viewed

@@ -4,51 +4,88 @@ import { Effect } from 'effect';
 import type { DiffSource } from './diff';
 import { Executor, makeExecutorService } from './effects/exec';
-import type { LensConfig, LensResult } from './types';
+import type { LensConfig, LensResult, PipelineResult, ValidatedFinding } from './types';
 const isWindows = platform() === 'win32';
-/** Run project tools specified by a lens and collect their output. */
-function runLensToolsEffect(
+export type ToolRunOptions = { timeoutMs: number; concurrency: number };
+/**
+ * Run a set of project tool commands ONCE, deduped and concurrently, and
+ * collect their output keyed by the original command string.
+ *
+ * Tools are deduped across lenses by the caller (and again here defensively),
+ * so a command shared by several lenses runs a single time — not once per
+ * lens. Each command is shelled out with a bounded timeout; a failure or
+ * timeout degrades to a sentinel string instead of failing the whole review.
+ */
+export function runToolsEffect(
   cwd: string,
   tools: string[],
+  options: ToolRunOptions,
   signal?: AbortSignal,
 ): Effect.Effect<Record<string, string>, never, Executor> {
   return Effect.gen(function* () {
-    const executor = yield* Executor;
-    const outputs: Record<string, string> = {};
-    for (const tool of tools) {
-      if (signal?.aborted) break;
+    const unique = [...new Set(tools)];
+    if (unique.length === 0 || signal?.aborted) return {};
-      const [shell, shellArgs] = isWindows ? ['cmd', ['/c', tool]] : ['sh', ['-c', tool]];
-      const result = yield* executor
-        .exec(shell, shellArgs as string[], { cwd, timeout: 60_000, signal })
-        .pipe(Effect.either);
+    const executor = yield* Executor;
-      outputs[tool] =
-        result._tag === 'Right'
-          ? result.right.stdout || result.right.stderr || '(no output)'
-          : `(tool failed or timed out: ${tool})`;
-    }
+    const entries = yield* Effect.forEach(
+      unique,
+      (tool) =>
+        Effect.gen(function* () {
+          if (signal?.aborted) return [tool, '(skipped: review aborted)'] as const;
+          const [shell, shellArgs] = isWindows ? ['cmd', ['/c', tool]] : ['sh', ['-c', tool]];
+          const result = yield* executor
+            .exec(shell, shellArgs as string[], { cwd, timeout: options.timeoutMs, signal })
+            .pipe(Effect.either);
+          const output =
+            result._tag === 'Right'
+              ? result.right.stdout || result.right.stderr || '(no output)'
+              : `(tool failed or timed out: ${tool})`;
+          return [tool, output] as const;
+        }),
+      { concurrency: Math.max(1, options.concurrency) },
+    );
-    return outputs;
+    return Object.fromEntries(entries);
   });
 }
+/** Pick the subset of already-run tool outputs that a given lens declares. */
+export function pickLensToolOutputs(
+  lens: LensConfig,
+  allOutputs: Record<string, string>,
+): Record<string, string> {
+  const picked: Record<string, string> = {};
+  for (const tool of lens.tools) {
+    if (tool in allOutputs) picked[tool] = allOutputs[tool];
+  }
+  return picked;
+}
 /** Build the shared diff section of the review prompt (included once). */
 export function buildDiffSection(diff: DiffSource): string {
   const parts: string[] = [];
   const maxDiffLen = 50_000;
   const diffTruncated = diff.diff.length > maxDiffLen;
+  // Cut at the last newline within budget so we never emit a half-line of
+  // diff (which reads as a corrupt hunk); fall back to a hard slice if a
+  // single line already exceeds the budget.
+  const body = diffTruncated
+    ? diff.diff.slice(0, Math.max(diff.diff.lastIndexOf('\n', maxDiffLen), 0) || maxDiffLen)
+    : diff.diff;
   parts.push(`## Diff (${diff.label})`);
   parts.push('```diff');
-  parts.push(diff.diff.slice(0, maxDiffLen));
+  parts.push(body);
   parts.push('```');
   if (diffTruncated) {
     parts.push(
-      `> ⚠️ Diff truncated (${diff.diff.length} chars → ${maxDiffLen}). Some files may not appear above.`,
+      `> ⚠️ Diff truncated (${diff.diff.length} chars → ~${maxDiffLen}). Some files may not appear above; re-run scoped with \`--base\` or per-area if needed.`,
     );
   }
   parts.push('');
@@ -60,8 +97,94 @@ export function buildDiffSection(diff: DiffSource): string {
   return parts.join('\n');
 }
+/**
+ * Build the shared review body fed to every pipeline pass: the diff (once) plus
+ * each lens definition + its tool outputs, WITHOUT the legacy per-lens output
+ * instructions (the pipeline supplies its own adversarial instructions). The
+ * legacy single-pass fallback appends its instructions separately.
+ */
+export function buildReviewBasePrompt(lensSections: string[], diff: DiffSource): string {
+  return [
+    '## Changes',
+    '```',
+    diff.stat.trim() || '(no diffstat)',
+    '```',
+    '',
+    buildDiffSection(diff),
+    '',
+    '## Review lenses (project invariants to check)',
+    '',
+    ...lensSections,
+  ].join('\n');
+}
+const SEVERITY_EMOJI: Record<ValidatedFinding['severity'], string> = {
+  blocker: '🔴',
+  warning: '🟡',
+  note: '🔵',
+};
+/** A one-line model summary, shown only when a non-default model is in play. */
+function renderModelLine(telemetry: PipelineResult['telemetry']): string[] {
+  const passKeys = new Set(telemetry.passModels);
+  const allDefault =
+    passKeys.size === 1 && passKeys.has('default') && telemetry.validatorModel === 'default';
+  if (allDefault) return [];
+  const passCounts = new Map<string, number>();
+  for (const key of telemetry.passModels) passCounts.set(key, (passCounts.get(key) ?? 0) + 1);
+  const passSummary = [...passCounts.entries()].map(([key, count]) => `${key}×${count}`).join(', ');
+  return [`Models — passes: ${passSummary}; validator: ${telemetry.validatorModel}.`];
+}
+/** Render the validated pipeline findings into a Markdown review report. */
+export function renderPipelineReport(result: PipelineResult, diff: DiffSource): string {
+  const { findings, telemetry } = result;
+  const counts = {
+    blocker: findings.filter((finding) => finding.severity === 'blocker').length,
+    warning: findings.filter((finding) => finding.severity === 'warning').length,
+    note: findings.filter((finding) => finding.severity === 'note').length,
+  };
+  const header = [
+    `# Code Review — ${new Date().toISOString().slice(0, 10)}`,
+    '',
+    `Reviewed ${diff.label} across ${telemetry.passes} adversarial pass(es)` +
+      `${telemetry.failedPasses ? ` (${telemetry.failedPasses} failed)` : ''}.`,
+    '',
+    `**${findings.length} finding(s)** — ${counts.blocker} blocker, ${counts.warning} warning, ${counts.note} note.`,
+    `Pipeline: ${telemetry.buckets} buckets → ${telemetry.candidates} candidates → ${telemetry.validated} validated` +
+      ` (dropped ${telemetry.droppedFalsePositives} false-positive, ${telemetry.droppedLowSignal} low-signal).`,
+    ...renderModelLine(telemetry),
+    '',
+  ];
+  if (findings.length === 0) {
+    return [...header, 'No bugs found that survived validation. ✅'].join('\n');
+  }
+  // Only attribute models per finding when more than one distinct model ran
+  // (a bake-off); with a single model it's noise.
+  const multiModel = new Set(telemetry.passModels).size > 1;
+  const lines = findings.map((finding) => {
+    const where = finding.line ? `\`${finding.file}:${finding.line}\`` : `\`${finding.file}\``;
+    const meta = [
+      `${finding.votes}/${telemetry.passes} votes`,
+      `${Math.round(finding.confidence * 100)}% conf`,
+      finding.category,
+      multiModel && finding.models.length > 0 ? `models: ${finding.models.join(', ')}` : undefined,
+    ]
+      .filter(Boolean)
+      .join(', ');
+    const justification = finding.justification ? `\n  ↳ ${finding.justification}` : '';
+    return `- ${SEVERITY_EMOJI[finding.severity]} **${finding.severity}** ${where} — ${finding.message} _(${meta})_${justification}`;
+  });
+  return [...header, '## Findings', '', ...lines].join('\n');
+}
 /** Build the lens-specific section of the review prompt (no diff duplication). */
-function buildLensSection(
+export function buildLensSection(
   lens: LensConfig,
   lensContent: string,
   toolOutputs: Record<string, string>,
@@ -93,75 +216,35 @@ function buildLensSection(
   return parts.join('\n');
 }
-/** Build the full review prompt for a single lens (includes diff — used by the tool path). */
-function buildReviewPrompt(
+/**
+ * Build the lens result from PRE-COMPUTED tool outputs. Pure — no IO — so tool
+ * execution happens once up front (see {@link runToolsEffect}) and is shared
+ * across every lens that declares the same command.
+ */
+export function buildLensResult(
   lens: LensConfig,
   lensContent: string,
-  diff: DiffSource,
   toolOutputs: Record<string, string>,
-): string {
-  const parts: string[] = [];
-  parts.push(`You are reviewing code changes through the "${lens.name}" lens.`);
-  parts.push('');
-  parts.push(buildDiffSection(diff));
-  parts.push('');
-  parts.push(buildLensSection(lens, lensContent, toolOutputs));
-  parts.push('');
-  parts.push('## Instructions');
-  parts.push('');
-  parts.push('Review the diff above through this lens. For each finding, output a JSON array:');
-  parts.push('');
-  parts.push('```json');
-  parts.push('[');
-  parts.push(
-    '  { "file": "path/to/file.ts", "line": 42, "severity": "warning", "message": "Description" }',
-  );
-  parts.push(']');
-  parts.push('```');
-  parts.push('');
-  parts.push(
-    'After the JSON array, write a 2-3 sentence summary of your review through this lens.',
-  );
-  parts.push('If there are no findings, return an empty array `[]` and note the code looks good.');
-  return parts.join('\n');
+): LensResult {
+  return {
+    lens: lens.name,
+    findings: [],
+    summary: '',
+    toolOutputs,
+    _lensSection: buildLensSection(lens, lensContent, toolOutputs),
+  };
 }
-/** Execute a review for a single lens: run its tools, then build the prompt. */
-export function reviewWithLensEffect(
-  cwd: string,
-  lens: LensConfig,
-  lensContent: string,
-  diff: DiffSource,
-  signal?: AbortSignal,
-): Effect.Effect<LensResult, never, Executor> {
-  return Effect.gen(function* () {
-    const toolOutputs = yield* runLensToolsEffect(cwd, lens.tools, signal);
-    return {
-      lens: lens.name,
-      findings: [],
-      summary: '',
-      toolOutputs,
-      _prompt: buildReviewPrompt(lens, lensContent, diff, toolOutputs),
-      _lensSection: buildLensSection(lens, lensContent, toolOutputs),
-    };
-  });
-}
-/** Promise wrapper building a live Executor from `pi`. */
-export function reviewWithLens(
+/** Promise wrapper: run a deduped tool set once, building a live Executor from `pi`. */
+export function runTools(
   pi: Pick<ExtensionAPI, 'exec'>,
-  _ctx: unknown,
   cwd: string,
-  lens: LensConfig,
-  lensContent: string,
-  diff: DiffSource,
+  tools: string[],
+  options: ToolRunOptions,
   signal?: AbortSignal,
-): Promise<LensResult> {
+): Promise<Record<string, string>> {
   return Effect.runPromise(
-    reviewWithLensEffect(cwd, lens, lensContent, diff, signal).pipe(
+    runToolsEffect(cwd, tools, options, signal).pipe(
       Effect.provideService(Executor, makeExecutorService(pi)),
     ),
   );

package/extensions/code-reviewer/types.ts CHANGED Viewed

@@ -20,20 +20,134 @@ export type LensResult = {
   findings: LensFinding[];
   summary: string;
   toolOutputs?: Record<string, string>;
-  /** Review prompt built for this lens, used internally to delegate to the agent. */
-  _prompt?: string;
-  /** Lens-specific section (without diff), used by /review command to avoid diff duplication. */
+  /** Lens-specific prompt section (without the diff), assembled by the command
+   *  layer with a single shared diff to avoid per-lens duplication. */
   _lensSection?: string;
 };
+// ── Self-driving review pipeline (Bugbot-style) ──────────────────────────────
+//
+// The tool can run the review itself by driving the session's model through
+// several parallel adversarial passes, bucketing + majority-voting the
+// findings, then validating each survivor — instead of returning a prompt for
+// a single downstream pass. The types below describe that pipeline's data.
+/** A finding as emitted by one bug-finding pass (before bucketing). */
+export type RawFinding = {
+  file: string;
+  line?: number;
+  severity: LensSeverity;
+  message: string;
+  /** Optional bug taxonomy tag the pass assigned (e.g. "boundary-input"). */
+  category?: string;
+};
+/** A merged bucket of near-duplicate raw findings across passes. */
+export type CandidateFinding = RawFinding & {
+  /** Number of DISTINCT passes that independently surfaced this bucket. */
+  votes: number;
+  /** Indices of the passes that contributed (0-based). */
+  passIndices: number[];
+};
+/** A candidate after the validator stage has confirmed or refuted it. */
+export type ValidatedFinding = CandidateFinding & {
+  verdict: 'real' | 'false-positive';
+  /** Validator confidence in `verdict`, 0..1. */
+  confidence: number;
+  justification?: string;
+  /** Distinct model keys whose passes contributed to this finding (for the
+   *  model bake-off: "which model caught this"). */
+  models: string[];
+};
+/** Reasoning/thinking effort for a step (mirrors pi-ai's `ThinkingLevel`). */
+export type ReasoningLevel = 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
+/** A per-step model choice in config: either a bare spec string
+ *  ("provider/id", id, or name) or that spec plus a reasoning level. */
+export type ModelSpec = { model: string; reasoning?: ReasoningLevel };
+export type ModelStepConfig = string | ModelSpec;
+/** A resolved per-step assignment the pipeline runs against. `key` is either
+ *  {@link DEFAULT_MODEL_KEY} (the session model) or a spec that resolved to a
+ *  real model; `label` is the human display (key + reasoning). */
+export type ModelAssignment = {
+  key: string;
+  label: string;
+  reasoning?: ReasoningLevel;
+};
+export type ModelPlan = {
+  /** Assignment for each pass, length === `passes` (round-robin from config). */
+  passes: ModelAssignment[];
+  /** Assignment for the validator stage. */
+  validator: ModelAssignment;
+};
+/** Counts describing what the pipeline did, for transparency in the report. */
+export type PipelineTelemetry = {
+  passes: number;
+  passFindingCounts: number[];
+  buckets: number;
+  candidates: number;
+  validated: number;
+  droppedFalsePositives: number;
+  droppedLowSignal: number;
+  failedPasses: number;
+  /** Model key used for each pass (parallel to pass index). */
+  passModels: string[];
+  /** Model key used for the validator stage. */
+  validatorModel: string;
+};
+export type PipelineResult = {
+  findings: ValidatedFinding[];
+  telemetry: PipelineTelemetry;
+};
+/** Tunables for the self-driving pipeline (all overridable in config). */
+export type ReviewPipelineConfig = {
+  /** Parallel adversarial bug-finding passes. 0 disables the pipeline
+   *  (falls back to returning a single-pass review prompt). */
+  passes: number;
+  /** Run the validator stage that falsifies each surviving candidate. */
+  validate: boolean;
+  /** Min distinct passes a NOTE-severity bucket needs to survive pre-validation
+   *  (blockers/warnings are never dropped for low votes). */
+  minVotes: number;
+  /** Max passes run concurrently. */
+  concurrency: number;
+  /** Base sampling temperature; each pass adds a small deterministic jitter so
+   *  passes diverge instead of collapsing onto identical reasoning. */
+  temperature: number;
+  /** Hard cap on findings returned (safety valve against runaway output). */
+  maxFindings: number;
+  /** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
+   *  session model. Overridden per-pass by {@link passModels}. */
+  passModel?: ModelStepConfig;
+  /** Models rotated round-robin across passes — run the same diff through
+   *  several models/reasoning levels in one review (a bake-off). Overrides
+   *  `passModel`. */
+  passModels?: ModelStepConfig[];
+  /** Model for the validator stage — a spec string or `{ model, reasoning }`.
+   *  Omitted → session model. */
+  validateModel?: ModelStepConfig;
+};
+// NOTE: findings + summary on LensResult describe what the agent produces in
+// its follow-up message; the tool/command layer emits a review *task*, it does
+// not parse findings back into a rendered report.
 export type ReviewConfig = {
   lensDir: string;
   defaultLenses: string[];
-};
-export type ReviewReport = {
-  diff: string;
-  diffStat: string;
-  lenses: LensResult[];
-  generatedAt: string;
+  /** Per-tool wall-clock timeout in ms. A lens tool that exceeds it is killed
+   *  and reported as timed-out (it must never hang the review). */
+  toolTimeoutMs: number;
+  /** Max lens tools run in parallel. Tools are deduped across lenses first,
+   *  so this bounds the distinct command set, not lens count. */
+  toolConcurrency: number;
+  /** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
+  review: ReviewPipelineConfig;
 };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@dreki-gg/pi-code-reviewer",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "description": "Multi-lens code review extension for pi — configurable review criteria per project",
   "keywords": [
     "pi-package"

package/skills/code-review/lenses/code-quality.md CHANGED Viewed

@@ -10,11 +10,25 @@ Evaluates changes for correctness, dead code introduction, and adherence to proj
 - Are there any obvious bugs or logic errors?
 - Does the code avoid known anti-patterns for the project's framework?
+### Adversarial inputs (enumerate, don't assume)
+For each changed function, construct the edge inputs that break it rather than
+trusting the happy path or the surrounding comment:
+- `null` / `undefined` / `NaN` / `Infinity` / `-0` / `""` / `[]` / `{}` / huge /
+  negative / duplicate / out-of-order / unicode.
+- Numeric-type guards that the wrong value defeats: `typeof NaN === "number"`,
+  `typeof null === "object"`, `0`/`""`/`NaN` as falsy, `JSON.parse` of
+  attacker input. Prefer `Number.isFinite` / explicit checks.
+- **Claim-vs-code audit:** every comment or test that asserts an invariant
+  ("non-numeric falls through", "never empty") — find the input that violates it
+  and confirm the code actually enforces the claim.
+- Off-by-one, boundary indices, wrong id/key space, missing `await`, swallowed
+  errors, unhandled rejection, cancellation/abort paths.
 ## Tools
 - `bun run typecheck`
 - `bun run lint`
 ## Severity
-- blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths
-- warning: New lint violations, unused code, inconsistent naming
+- blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths, an edge input (NaN/empty/boundary) that crashes or corrupts on a path users hit
+- warning: New lint violations, unused code, inconsistent naming, an unguarded edge input on a lower-risk path, a comment/test claim the code does not actually honor
 - note: Style suggestions, minor improvements

package/extensions/code-reviewer/report.ts DELETED Viewed

@@ -1,109 +0,0 @@
-import type { LensFinding, LensResult, LensSeverity, ReviewReport } from './types';
-/** Build a markdown report from lens results. */
-export function buildReport(report: ReviewReport): string {
-  const sections = [
-    `# Code Review — ${report.generatedAt}`,
-    '',
-    buildChangesSection(report.diffStat),
-    buildScoreboard(report.lenses),
-    ...report.lenses.map(buildLensSection),
-  ];
-  return sections.join('\n');
-}
-function buildChangesSection(diffStat: string): string {
-  return ['## Changes', '', '```', diffStat, '```', ''].join('\n');
-}
-function buildScoreboard(lenses: LensResult[]): string {
-  const counts = countFindings(lenses);
-  return [
-    '## Scoreboard',
-    '',
-    '| Metric | Count |',
-    '| --- | --- |',
-    `| **Total findings** | **${counts.total}** |`,
-    `| 🔴 Blockers | ${counts.blocker} |`,
-    `| 🟡 Warnings | ${counts.warning} |`,
-    `| 🔵 Notes | ${counts.note} |`,
-    `| Lenses applied | ${lenses.length} |`,
-    '',
-  ].join('\n');
-}
-function countFindings(lenses: LensResult[]): Record<LensSeverity | 'total', number> {
-  const counts = { blocker: 0, warning: 0, note: 0, total: 0 };
-  for (const lens of lenses) {
-    for (const f of lens.findings) {
-      counts[f.severity]++;
-      counts.total++;
-    }
-  }
-  return counts;
-}
-function buildLensSection(lens: LensResult): string {
-  const lines: string[] = [`## ${lens.lens}`, ''];
-  if (lens.findings.length === 0) {
-    lines.push('No findings. ✓', '');
-    if (lens.summary) lines.push(lens.summary, '');
-    return lines.join('\n');
-  }
-  lines.push(buildFindingsByGroup(lens.findings));
-  if (lens.summary) {
-    lines.push(`**Summary:** ${lens.summary}`, '');
-  }
-  if (lens.toolOutputs && Object.keys(lens.toolOutputs).length > 0) {
-    lines.push(buildToolOutputDetails(lens.toolOutputs));
-  }
-  return lines.join('\n');
-}
-const SEVERITY_ICONS: Record<LensSeverity, string> = {
-  blocker: '🔴',
-  warning: '🟡',
-  note: '🔵',
-};
-function buildFindingsByGroup(findings: LensFinding[]): string {
-  const lines: string[] = [];
-  const severities: LensSeverity[] = ['blocker', 'warning', 'note'];
-  for (const severity of severities) {
-    const group = findings.filter((f) => f.severity === severity);
-    if (group.length === 0) continue;
-    const label = severity.charAt(0).toUpperCase() + severity.slice(1);
-    lines.push(`### ${SEVERITY_ICONS[severity]} ${label}s (${group.length})`, '');
-    for (const f of group) {
-      const loc = f.line ? `${f.file}:${f.line}` : f.file;
-      lines.push(`- \`${loc}\` — ${f.message}`);
-    }
-    lines.push('');
-  }
-  return lines.join('\n');
-}
-function buildToolOutputDetails(toolOutputs: Record<string, string>): string {
-  const lines = [
-    '<details>',
-    `<summary>Tool outputs (${Object.keys(toolOutputs).length})</summary>`,
-    '',
-  ];
-  for (const [cmd, output] of Object.entries(toolOutputs)) {
-    lines.push(`**\`${cmd}\`**`, '```', output.slice(0, 5000), '```');
-  }
-  lines.push('</details>', '');
-  return lines.join('\n');
-}