@dreki-gg/pi-code-reviewer 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ import { Effect } from 'effect';
4
4
 
5
5
  import type { DiffSource } from './diff';
6
6
  import { Executor, makeExecutorService } from './effects/exec';
7
- import type { LensConfig, LensResult } from './types';
7
+ import type { LensConfig, LensResult, PipelineResult, ValidatedFinding } from './types';
8
8
 
9
9
  const isWindows = platform() === 'win32';
10
10
 
@@ -97,6 +97,120 @@ export function buildDiffSection(diff: DiffSource): string {
97
97
  return parts.join('\n');
98
98
  }
99
99
 
100
+ /**
101
+ * Build the shared review body fed to every pipeline pass: the diff (once) plus
102
+ * each lens definition + its tool outputs, WITHOUT the legacy per-lens output
103
+ * instructions (the pipeline supplies its own adversarial instructions). The
104
+ * legacy single-pass fallback appends its instructions separately.
105
+ */
106
+ export function buildReviewBasePrompt(lensSections: string[], diff: DiffSource): string {
107
+ return [
108
+ '## Changes',
109
+ '```',
110
+ diff.stat.trim() || '(no diffstat)',
111
+ '```',
112
+ '',
113
+ buildDiffSection(diff),
114
+ '',
115
+ '## Review lenses (project invariants to check)',
116
+ '',
117
+ ...lensSections,
118
+ ].join('\n');
119
+ }
120
+
121
+ const SEVERITY_EMOJI: Record<ValidatedFinding['severity'], string> = {
122
+ blocker: '🔴',
123
+ warning: '🟡',
124
+ note: '🔵',
125
+ };
126
+
127
+ /** A one-line model summary, shown only when a non-default model is in play. */
128
+ function renderModelLine(telemetry: PipelineResult['telemetry']): string[] {
129
+ const passKeys = new Set(telemetry.passModels);
130
+ const allDefault =
131
+ passKeys.size === 1 && passKeys.has('default') && telemetry.validatorModel === 'default';
132
+ if (allDefault) return [];
133
+
134
+ const passCounts = new Map<string, number>();
135
+ for (const key of telemetry.passModels) passCounts.set(key, (passCounts.get(key) ?? 0) + 1);
136
+ const passSummary = [...passCounts.entries()].map(([key, count]) => `${key}×${count}`).join(', ');
137
+ return [`Models — passes: ${passSummary}; validator: ${telemetry.validatorModel}.`];
138
+ }
139
+
140
+ /** Render the validated pipeline findings into a Markdown review report. */
141
+ export function renderPipelineReport(result: PipelineResult, diff: DiffSource): string {
142
+ const { findings, telemetry } = result;
143
+ const counts = {
144
+ blocker: findings.filter((finding) => finding.severity === 'blocker').length,
145
+ warning: findings.filter((finding) => finding.severity === 'warning').length,
146
+ note: findings.filter((finding) => finding.severity === 'note').length,
147
+ };
148
+
149
+ const header = [
150
+ `# Code Review — ${new Date().toISOString().slice(0, 10)}`,
151
+ '',
152
+ `Reviewed ${diff.label} across ${telemetry.passes} adversarial pass(es)` +
153
+ `${telemetry.failedPasses ? ` (${telemetry.failedPasses} failed)` : ''}.`,
154
+ '',
155
+ `**${findings.length} finding(s)** — ${counts.blocker} blocker, ${counts.warning} warning, ${counts.note} note.`,
156
+ `Pipeline: ${telemetry.buckets} buckets → ${telemetry.candidates} candidates → ${telemetry.validated} validated` +
157
+ ` (dropped ${telemetry.droppedFalsePositives} false-positive, ${telemetry.droppedLowSignal} low-signal).`,
158
+ ...renderModelLine(telemetry),
159
+ '',
160
+ ];
161
+
162
+ // A pass fails when its model call errors; failures are swallowed into 0
163
+ // findings, so an all-failed run must NOT masquerade as a clean review.
164
+ const someFailed = telemetry.failedPasses > 0;
165
+ const allFailed = telemetry.passes > 0 && telemetry.failedPasses >= telemetry.passes;
166
+ const errSuffix = telemetry.passErrorSample ? ` — e.g. ${telemetry.passErrorSample}` : '';
167
+
168
+ if (findings.length === 0) {
169
+ if (allFailed) {
170
+ return [
171
+ ...header,
172
+ `> ⚠️ **Inconclusive — all ${telemetry.passes} review pass(es) failed${errSuffix}.**`,
173
+ '> No analysis actually ran; this is NOT a clean result. Re-run the review',
174
+ '> (check that the review model / pi-ai is available) before trusting it.',
175
+ ].join('\n');
176
+ }
177
+ if (someFailed) {
178
+ return [
179
+ ...header,
180
+ `> ⚠️ **Partial review — ${telemetry.failedPasses}/${telemetry.passes} pass(es) failed${errSuffix}.**`,
181
+ `> The ${telemetry.passes - telemetry.failedPasses} surviving pass(es) found nothing, but coverage was reduced.`,
182
+ ].join('\n');
183
+ }
184
+ return [...header, 'No bugs found that survived validation. ✅'].join('\n');
185
+ }
186
+
187
+ const partialWarning = someFailed
188
+ ? [
189
+ `> ⚠️ **Partial review — ${telemetry.failedPasses}/${telemetry.passes} pass(es) failed${errSuffix}; findings below may be incomplete.**`,
190
+ '',
191
+ ]
192
+ : [];
193
+
194
+ // Only attribute models per finding when more than one distinct model ran
195
+ // (a bake-off); with a single model it's noise.
196
+ const multiModel = new Set(telemetry.passModels).size > 1;
197
+ const lines = findings.map((finding) => {
198
+ const where = finding.line ? `\`${finding.file}:${finding.line}\`` : `\`${finding.file}\``;
199
+ const meta = [
200
+ `${finding.votes}/${telemetry.passes} votes`,
201
+ `${Math.round(finding.confidence * 100)}% conf`,
202
+ finding.category,
203
+ multiModel && finding.models.length > 0 ? `models: ${finding.models.join(', ')}` : undefined,
204
+ ]
205
+ .filter(Boolean)
206
+ .join(', ');
207
+ const justification = finding.justification ? `\n ↳ ${finding.justification}` : '';
208
+ return `- ${SEVERITY_EMOJI[finding.severity]} **${finding.severity}** ${where} — ${finding.message} _(${meta})_${justification}`;
209
+ });
210
+
211
+ return [...header, ...partialWarning, '## Findings', '', ...lines].join('\n');
212
+ }
213
+
100
214
  /** Build the lens-specific section of the review prompt (no diff duplication). */
101
215
  export function buildLensSection(
102
216
  lens: LensConfig,
@@ -25,6 +25,119 @@ export type LensResult = {
25
25
  _lensSection?: string;
26
26
  };
27
27
 
28
+ // ── Self-driving review pipeline (Bugbot-style) ──────────────────────────────
29
+ //
30
+ // The tool can run the review itself by driving the session's model through
31
+ // several parallel adversarial passes, bucketing + majority-voting the
32
+ // findings, then validating each survivor — instead of returning a prompt for
33
+ // a single downstream pass. The types below describe that pipeline's data.
34
+
35
+ /** A finding as emitted by one bug-finding pass (before bucketing). */
36
+ export type RawFinding = {
37
+ file: string;
38
+ line?: number;
39
+ severity: LensSeverity;
40
+ message: string;
41
+ /** Optional bug taxonomy tag the pass assigned (e.g. "boundary-input"). */
42
+ category?: string;
43
+ };
44
+
45
+ /** A merged bucket of near-duplicate raw findings across passes. */
46
+ export type CandidateFinding = RawFinding & {
47
+ /** Number of DISTINCT passes that independently surfaced this bucket. */
48
+ votes: number;
49
+ /** Indices of the passes that contributed (0-based). */
50
+ passIndices: number[];
51
+ };
52
+
53
+ /** A candidate after the validator stage has confirmed or refuted it. */
54
+ export type ValidatedFinding = CandidateFinding & {
55
+ verdict: 'real' | 'false-positive';
56
+ /** Validator confidence in `verdict`, 0..1. */
57
+ confidence: number;
58
+ justification?: string;
59
+ /** Distinct model keys whose passes contributed to this finding (for the
60
+ * model bake-off: "which model caught this"). */
61
+ models: string[];
62
+ };
63
+
64
+ /** Reasoning/thinking effort for a step (mirrors pi-ai's `ThinkingLevel`). */
65
+ export type ReasoningLevel = 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
66
+
67
+ /** A per-step model choice in config: either a bare spec string
68
+ * ("provider/id", id, or name) or that spec plus a reasoning level. */
69
+ export type ModelSpec = { model: string; reasoning?: ReasoningLevel };
70
+ export type ModelStepConfig = string | ModelSpec;
71
+
72
+ /** A resolved per-step assignment the pipeline runs against. `key` is either
73
+ * {@link DEFAULT_MODEL_KEY} (the session model) or a spec that resolved to a
74
+ * real model; `label` is the human display (key + reasoning). */
75
+ export type ModelAssignment = {
76
+ key: string;
77
+ label: string;
78
+ reasoning?: ReasoningLevel;
79
+ };
80
+
81
+ export type ModelPlan = {
82
+ /** Assignment for each pass, length === `passes` (round-robin from config). */
83
+ passes: ModelAssignment[];
84
+ /** Assignment for the validator stage. */
85
+ validator: ModelAssignment;
86
+ };
87
+
88
+ /** Counts describing what the pipeline did, for transparency in the report. */
89
+ export type PipelineTelemetry = {
90
+ passes: number;
91
+ passFindingCounts: number[];
92
+ buckets: number;
93
+ candidates: number;
94
+ validated: number;
95
+ droppedFalsePositives: number;
96
+ droppedLowSignal: number;
97
+ failedPasses: number;
98
+ /** A representative error message from the first failed pass, surfaced so a
99
+ * fully-failed run reports WHY instead of a misleading "0 findings". */
100
+ passErrorSample?: string;
101
+ /** Model key used for each pass (parallel to pass index). */
102
+ passModels: string[];
103
+ /** Model key used for the validator stage. */
104
+ validatorModel: string;
105
+ };
106
+
107
+ export type PipelineResult = {
108
+ findings: ValidatedFinding[];
109
+ telemetry: PipelineTelemetry;
110
+ };
111
+
112
+ /** Tunables for the self-driving pipeline (all overridable in config). */
113
+ export type ReviewPipelineConfig = {
114
+ /** Parallel adversarial bug-finding passes. 0 disables the pipeline
115
+ * (falls back to returning a single-pass review prompt). */
116
+ passes: number;
117
+ /** Run the validator stage that falsifies each surviving candidate. */
118
+ validate: boolean;
119
+ /** Min distinct passes a NOTE-severity bucket needs to survive pre-validation
120
+ * (blockers/warnings are never dropped for low votes). */
121
+ minVotes: number;
122
+ /** Max passes run concurrently. */
123
+ concurrency: number;
124
+ /** Base sampling temperature; each pass adds a small deterministic jitter so
125
+ * passes diverge instead of collapsing onto identical reasoning. */
126
+ temperature: number;
127
+ /** Hard cap on findings returned (safety valve against runaway output). */
128
+ maxFindings: number;
129
+ /** Model for ALL passes — a spec string or `{ model, reasoning }`. Omitted →
130
+ * session model. Overridden per-pass by {@link passModels}. */
131
+ passModel?: ModelStepConfig;
132
+ /** Models rotated round-robin across passes — run the same diff through
133
+ * several models/reasoning levels in one review (a bake-off). Overrides
134
+ * `passModel`. */
135
+ passModels?: ModelStepConfig[];
136
+ /** Model for the validator stage — a spec string or `{ model, reasoning }`.
137
+ * Omitted → session model. */
138
+ validateModel?: ModelStepConfig;
139
+ };
140
+
28
141
  // NOTE: findings + summary on LensResult describe what the agent produces in
29
142
  // its follow-up message; the tool/command layer emits a review *task*, it does
30
143
  // not parse findings back into a rendered report.
@@ -38,4 +151,6 @@ export type ReviewConfig = {
38
151
  /** Max lens tools run in parallel. Tools are deduped across lenses first,
39
152
  * so this bounds the distinct command set, not lens count. */
40
153
  toolConcurrency: number;
154
+ /** Self-driving pipeline tunables (see {@link ReviewPipelineConfig}). */
155
+ review: ReviewPipelineConfig;
41
156
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@dreki-gg/pi-code-reviewer",
3
- "version": "0.4.0",
3
+ "version": "0.6.0",
4
4
  "description": "Multi-lens code review extension for pi — configurable review criteria per project",
5
5
  "keywords": [
6
6
  "pi-package"
@@ -10,11 +10,25 @@ Evaluates changes for correctness, dead code introduction, and adherence to proj
10
10
  - Are there any obvious bugs or logic errors?
11
11
  - Does the code avoid known anti-patterns for the project's framework?
12
12
 
13
+ ### Adversarial inputs (enumerate, don't assume)
14
+ For each changed function, construct the edge inputs that break it rather than
15
+ trusting the happy path or the surrounding comment:
16
+ - `null` / `undefined` / `NaN` / `Infinity` / `-0` / `""` / `[]` / `{}` / huge /
17
+ negative / duplicate / out-of-order / unicode.
18
+ - Numeric-type guards that the wrong value defeats: `typeof NaN === "number"`,
19
+ `typeof null === "object"`, `0`/`""`/`NaN` as falsy, `JSON.parse` of
20
+ attacker input. Prefer `Number.isFinite` / explicit checks.
21
+ - **Claim-vs-code audit:** every comment or test that asserts an invariant
22
+ ("non-numeric falls through", "never empty") — find the input that violates it
23
+ and confirm the code actually enforces the claim.
24
+ - Off-by-one, boundary indices, wrong id/key space, missing `await`, swallowed
25
+ errors, unhandled rejection, cancellation/abort paths.
26
+
13
27
  ## Tools
14
28
  - `bun run typecheck`
15
29
  - `bun run lint`
16
30
 
17
31
  ## Severity
18
- - blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths
19
- - warning: New lint violations, unused code, inconsistent naming
32
+ - blocker: Type errors, unresolved imports, obvious bugs, unhandled error paths, an edge input (NaN/empty/boundary) that crashes or corrupts on a path users hit
33
+ - warning: New lint violations, unused code, inconsistent naming, an unguarded edge input on a lower-risk path, a comment/test claim the code does not actually honor
20
34
  - note: Style suggestions, minor improvements