@dreki-gg/pi-code-reviewer 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +74 -1
- package/extensions/code-reviewer/commands/review-init.ts +13 -3
- package/extensions/code-reviewer/commands/review-tool.ts +125 -27
- package/extensions/code-reviewer/commands/review.ts +49 -8
- package/extensions/code-reviewer/config.ts +90 -2
- package/extensions/code-reviewer/diff.ts +11 -5
- package/extensions/code-reviewer/effects/model.ts +112 -0
- package/extensions/code-reviewer/errors.ts +10 -1
- package/extensions/code-reviewer/model-plan.ts +84 -0
- package/extensions/code-reviewer/passes.ts +571 -0
- package/extensions/code-reviewer/reviewer.ts +164 -81
- package/extensions/code-reviewer/types.ts +124 -10
- package/package.json +1 -1
- package/skills/code-review/lenses/code-quality.md +16 -2
- package/extensions/code-reviewer/report.ts +0 -109
package/README.md
CHANGED
|
@@ -35,6 +35,32 @@ This creates:
|
|
|
35
35
|
|
|
36
36
|
The `code_review` tool is also available for programmatic use by the agent.
|
|
37
37
|
|
|
38
|
+
## How the review runs (Bugbot-style pipeline)
|
|
39
|
+
|
|
40
|
+
When a session model is available, the `code_review` tool **runs the review
|
|
41
|
+
itself** rather than returning a prompt for one downstream pass. It drives a
|
|
42
|
+
multi-stage pipeline modeled on Cursor's Bugbot:
|
|
43
|
+
|
|
44
|
+
1. **Parallel adversarial passes** (default 5) over the diff. Each pass gets a
|
|
45
|
+
different focus — trust boundaries, control flow, async/lifecycle, types,
|
|
46
|
+
state integrity, security, resources, contracts — plus a temperature jitter,
|
|
47
|
+
so passes reason down different paths instead of collapsing onto the same
|
|
48
|
+
findings.
|
|
49
|
+
2. **Bucket + majority vote.** Near-duplicate findings are fused (same file +
|
|
50
|
+
line proximity + message similarity) and tracked by how many distinct passes
|
|
51
|
+
surfaced them. Low-signal single-pass *notes* are dropped; blockers and
|
|
52
|
+
warnings are never dropped for low votes.
|
|
53
|
+
3. **Validator stage.** One batched call tries to *falsify* each surviving
|
|
54
|
+
candidate and drops false positives. It **fails open** — if the validator
|
|
55
|
+
errors, candidates are surfaced unvalidated rather than silently lost.
|
|
56
|
+
|
|
57
|
+
The tool returns finished, validated findings as a Markdown report (vote count,
|
|
58
|
+
confidence, validator justification) plus structured `details`.
|
|
59
|
+
|
|
60
|
+
**Fallback:** if no model is available (e.g. print mode) or `review.passes` is
|
|
61
|
+
`0`, the tool returns the previous single-pass review prompt and the calling
|
|
62
|
+
agent produces findings in its follow-up message.
|
|
63
|
+
|
|
38
64
|
## Lenses
|
|
39
65
|
|
|
40
66
|
A lens is a markdown file that defines review criteria, project tools to run, and severity rules:
|
|
@@ -59,6 +85,13 @@ Evaluates changes for correctness and adherence to project standards.
|
|
|
59
85
|
- note: Style suggestions
|
|
60
86
|
```
|
|
61
87
|
|
|
88
|
+
> **Tools must be fast and exit on their own** (typecheck, lint, unit tests).
|
|
89
|
+
> Do **not** list dev servers, watch mode, e2e suites, or full production
|
|
90
|
+
> builds — they bind ports / run for minutes and belong in CI. Tools are
|
|
91
|
+
> **deduped across lenses and run concurrently**, so a command shared by
|
|
92
|
+
> several lenses runs once, and a slow/hanging command stalls the whole review
|
|
93
|
+
> (bounded by `toolTimeoutMs`).
|
|
94
|
+
|
|
62
95
|
### Bundled lenses
|
|
63
96
|
|
|
64
97
|
The package ships with four example lenses:
|
|
@@ -79,7 +112,19 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
|
|
|
79
112
|
```json
|
|
80
113
|
{
|
|
81
114
|
"lensDir": ".code-review/lenses",
|
|
82
|
-
"defaultLenses": ["code-quality", "maintainability"]
|
|
115
|
+
"defaultLenses": ["code-quality", "maintainability"],
|
|
116
|
+
"toolTimeoutMs": 60000,
|
|
117
|
+
"toolConcurrency": 4,
|
|
118
|
+
"review": {
|
|
119
|
+
"passes": 5,
|
|
120
|
+
"validate": true,
|
|
121
|
+
"minVotes": 2,
|
|
122
|
+
"concurrency": 5,
|
|
123
|
+
"temperature": 0.4,
|
|
124
|
+
"maxFindings": 50,
|
|
125
|
+
"passModels": [{ "model": "anthropic/claude-opus-4-8", "reasoning": "low" }],
|
|
126
|
+
"validateModel": { "model": "anthropic/claude-opus-4-8", "reasoning": "medium" }
|
|
127
|
+
}
|
|
83
128
|
}
|
|
84
129
|
```
|
|
85
130
|
|
|
@@ -87,4 +132,32 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
|
|
|
87
132
|
| --- | --- | --- |
|
|
88
133
|
| `lensDir` | `.code-review/lenses` | Directory containing lens files |
|
|
89
134
|
| `defaultLenses` | `[]` (all) | Lenses to run when none specified |
|
|
135
|
+
| `toolTimeoutMs` | `60000` | Per-tool wall-clock timeout (ms); an exceeding tool is killed and reported as timed-out |
|
|
136
|
+
| `toolConcurrency` | `4` | Max distinct tools run in parallel (tools are deduped across lenses first) |
|
|
137
|
+
| `review.passes` | `5` | Parallel adversarial bug-finding passes. `0` disables the pipeline (single-pass prompt fallback). |
|
|
138
|
+
| `review.validate` | `true` | Run the validator stage that falsifies each surviving candidate. |
|
|
139
|
+
| `review.minVotes` | `2` | Min distinct passes a NOTE bucket needs to survive pre-validation (blockers/warnings exempt). |
|
|
140
|
+
| `review.concurrency` | `= passes` | Max passes run concurrently. |
|
|
141
|
+
| `review.temperature` | `0.4` | Base sampling temperature; each pass adds a small jitter so passes diverge. |
|
|
142
|
+
| `review.maxFindings` | `50` | Hard cap on findings returned. |
|
|
143
|
+
| `review.passModel` | session model | Model for ALL passes: a spec string (`"provider/id"`, bare id, or name) or `{ "model", "reasoning" }`. |
|
|
144
|
+
| `review.passModels` | — | List of models **rotated round-robin across passes** — a bake-off in one run. Overrides `passModel`. |
|
|
145
|
+
| `review.validateModel` | session model | Model for the validator stage (string or `{ "model", "reasoning" }`). |
|
|
146
|
+
|
|
147
|
+
Each step accepts either a plain spec string or `{ "model": "provider/id", "reasoning": "low" }`
|
|
148
|
+
where `reasoning` is one of `minimal` / `low` / `medium` / `high` / `xhigh` (applied as the
|
|
149
|
+
thinking effort for that step; ignored by providers that don't support it).
|
|
150
|
+
|
|
151
|
+
> By default the pipeline reuses the **session's current model** (`ctx.model`) —
|
|
152
|
+
> no separate API key or model config. More passes = deeper coverage but higher
|
|
153
|
+
> token/latency cost; tune `review.passes` to taste (3 = cheap, 8 = Bugbot
|
|
154
|
+
> parity).
|
|
155
|
+
>
|
|
156
|
+
> **Model bake-off.** Set `passModels` to a list to run the same diff through
|
|
157
|
+
> several models in one review and compare. Models are assigned round-robin to
|
|
158
|
+
> passes, each finding is annotated with the model(s) that caught it, and the
|
|
159
|
+
> report shows a per-model breakdown. Use a cheap model for `passModels` and a
|
|
160
|
+
> stronger one for `validateModel` (or vice-versa) to probe the speed/cost/
|
|
161
|
+
> quality frontier. Specs are matched as `provider/id`, a bare `id`, or a
|
|
162
|
+
> display `name`; an unknown spec falls back to the session model with a warning.
|
|
90
163
|
|
|
@@ -12,10 +12,20 @@ export function registerReviewInitCommand(pi: ExtensionAPI) {
|
|
|
12
12
|
`Initialize a code review configuration for this project.`,
|
|
13
13
|
``,
|
|
14
14
|
`1. Read the project's AGENTS.md, package.json, and any CONTEXT.md to understand the stack and conventions.`,
|
|
15
|
-
`2. Create a \`.code-review.json\` config file at the project root
|
|
15
|
+
`2. Create a \`.code-review.json\` config file at the project root. Supported keys:`,
|
|
16
|
+
` - \`lensDir\` (default \`.code-review/lenses\`), \`defaultLenses\` (lenses run when none are specified),`,
|
|
17
|
+
` - \`toolTimeoutMs\` (per-tool timeout, default 60000), \`toolConcurrency\` (parallel tools, default 4),`,
|
|
18
|
+
` - \`review\` (self-driving pipeline): \`passes\` (default 5, 0 disables), \`validate\` (default true),`,
|
|
19
|
+
` \`minVotes\` (default 2), \`concurrency\` (default = passes), \`temperature\` (default 0.4), \`maxFindings\` (default 50),`,
|
|
20
|
+
` and per-step models for a bake-off: \`passModel\`, \`passModels\` (rotated across passes), \`validateModel\``,
|
|
21
|
+
` (each a "provider/id", bare id, or display name; default = the session model).`,
|
|
16
22
|
`3. Create lens files in \`.code-review/lenses/\` — start with: code-quality.md, maintainability.md`,
|
|
17
|
-
`4. Each lens
|
|
18
|
-
`
|
|
23
|
+
`4. Each lens's \`## Tools\` must list ONLY fast, non-side-effecting commands that EXIT on their own`,
|
|
24
|
+
` (e.g. typecheck, lint, unit tests). Do NOT list dev servers, watch mode, e2e suites, or full`,
|
|
25
|
+
` production builds — they bind ports / run for minutes and belong in CI. Tools are deduped across`,
|
|
26
|
+
` lenses and run concurrently, so a slow or hanging command stalls the whole review.`,
|
|
27
|
+
`5. Tailor the criteria to the project's stack and conventions; prefer concrete, pattern-matched checks`,
|
|
28
|
+
` (name the project's real failure modes + the diff "smells" to look for) over generic virtues.`,
|
|
19
29
|
``,
|
|
20
30
|
`Config path: ${configPath}`,
|
|
21
31
|
].join('\n'),
|
|
@@ -4,9 +4,18 @@ import { Type } from 'typebox';
|
|
|
4
4
|
import { loadConfig, getLensDir } from '../config';
|
|
5
5
|
import { collectDiff, getChangedFiles } from '../diff';
|
|
6
6
|
import { discoverLenses, getLensContent } from '../lenses';
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
import
|
|
7
|
+
import { resolveModelPlan } from '../model-plan';
|
|
8
|
+
import { runPipeline } from '../passes';
|
|
9
|
+
import {
|
|
10
|
+
buildDiffSection,
|
|
11
|
+
buildLensResult,
|
|
12
|
+
buildReviewBasePrompt,
|
|
13
|
+
pickLensToolOutputs,
|
|
14
|
+
renderPipelineReport,
|
|
15
|
+
runTools,
|
|
16
|
+
} from '../reviewer';
|
|
17
|
+
import type { DiffSource } from '../diff';
|
|
18
|
+
import type { LensResult, ReviewConfig } from '../types';
|
|
10
19
|
|
|
11
20
|
export function registerReviewTool(pi: ExtensionAPI) {
|
|
12
21
|
pi.registerTool({
|
|
@@ -72,6 +81,23 @@ export function registerReviewTool(pi: ExtensionAPI) {
|
|
|
72
81
|
};
|
|
73
82
|
}
|
|
74
83
|
|
|
84
|
+
const selected = lensNames.map((name) => available.get(name)!);
|
|
85
|
+
|
|
86
|
+
// Run the DISTINCT tool set once (deduped across lenses), concurrently —
|
|
87
|
+
// not once per lens. A command shared by several lenses executes a single
|
|
88
|
+
// time and its output is shared.
|
|
89
|
+
const allTools = [...new Set(selected.flatMap((lens) => lens.tools))];
|
|
90
|
+
if (allTools.length > 0) {
|
|
91
|
+
ctx.ui.setStatus('code-review', `🔍 Running ${allTools.length} tool(s)...`);
|
|
92
|
+
}
|
|
93
|
+
const toolOutputs = await runTools(
|
|
94
|
+
pi,
|
|
95
|
+
cwd,
|
|
96
|
+
allTools,
|
|
97
|
+
{ timeoutMs: config.toolTimeoutMs, concurrency: config.toolConcurrency },
|
|
98
|
+
signal,
|
|
99
|
+
);
|
|
100
|
+
|
|
75
101
|
const results: LensResult[] = [];
|
|
76
102
|
for (let i = 0; i < lensNames.length; i++) {
|
|
77
103
|
if (signal?.aborted) break;
|
|
@@ -84,33 +110,80 @@ export function registerReviewTool(pi: ExtensionAPI) {
|
|
|
84
110
|
details: { currentLens: name, lensIndex: i + 1, totalLenses: lensNames.length },
|
|
85
111
|
});
|
|
86
112
|
|
|
87
|
-
const lens =
|
|
113
|
+
const lens = selected[i];
|
|
88
114
|
const content = (await getLensContent(lensDir, name)) ?? '';
|
|
89
|
-
|
|
90
|
-
results.push(result);
|
|
115
|
+
results.push(buildLensResult(lens, content, pickLensToolOutputs(lens, toolOutputs)));
|
|
91
116
|
}
|
|
92
117
|
|
|
93
|
-
|
|
118
|
+
const changedFiles = await getChangedFiles(pi, cwd, {
|
|
119
|
+
base: params.base,
|
|
120
|
+
staged: params.staged,
|
|
121
|
+
});
|
|
94
122
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
123
|
+
// Self-driving path: when a model is available and passes are enabled,
|
|
124
|
+
// the tool runs the Bugbot-style pipeline itself (parallel adversarial
|
|
125
|
+
// passes → bucket → majority vote → validate) and returns FINISHED,
|
|
126
|
+
// validated findings — not a prompt for a single downstream pass.
|
|
127
|
+
const lensSections = results.map((result) => result._lensSection).filter(Boolean) as string[];
|
|
128
|
+
if (ctx.model && config.review.passes > 0 && lensSections.length > 0 && !signal?.aborted) {
|
|
129
|
+
try {
|
|
130
|
+
const { resolution, plan, warnings } = resolveModelPlan(
|
|
131
|
+
config.review,
|
|
132
|
+
ctx.model,
|
|
133
|
+
ctx.modelRegistry,
|
|
134
|
+
);
|
|
135
|
+
for (const warning of warnings) ctx.ui.notify(warning, 'warning');
|
|
136
|
+
const basePrompt = buildReviewBasePrompt(lensSections, diff);
|
|
137
|
+
const pipeline = await runPipeline(
|
|
138
|
+
resolution,
|
|
139
|
+
plan,
|
|
140
|
+
basePrompt,
|
|
141
|
+
config.review,
|
|
142
|
+
{
|
|
143
|
+
onStage: (stage) => {
|
|
144
|
+
ctx.ui.setStatus('code-review', `🔍 ${stage}...`);
|
|
145
|
+
onUpdate?.({ content: [{ type: 'text', text: stage }], details: { stage } });
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
signal,
|
|
149
|
+
);
|
|
150
|
+
ctx.ui.setStatus('code-review', undefined);
|
|
151
|
+
return {
|
|
152
|
+
content: [{ type: 'text', text: renderPipelineReport(pipeline, diff) }],
|
|
153
|
+
details: {
|
|
154
|
+
mode: 'pipeline',
|
|
155
|
+
lensCount: lensNames.length,
|
|
156
|
+
availableLenses: [...available.keys()],
|
|
157
|
+
changedFiles,
|
|
158
|
+
findings: pipeline.findings,
|
|
159
|
+
telemetry: pipeline.telemetry,
|
|
160
|
+
},
|
|
161
|
+
};
|
|
162
|
+
} catch (cause) {
|
|
163
|
+
// Pipeline failed hard (e.g. model/pi-ai unavailable at runtime) —
|
|
164
|
+
// degrade to the single-pass prompt instead of failing the review.
|
|
165
|
+
ctx.ui.setStatus('code-review', undefined);
|
|
166
|
+
onUpdate?.({
|
|
167
|
+
content: [{ type: 'text', text: 'pipeline unavailable — single-pass fallback' }],
|
|
168
|
+
details: { pipelineError: cause instanceof Error ? cause.message : String(cause) },
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
ctx.ui.setStatus('code-review', undefined);
|
|
101
174
|
|
|
102
|
-
|
|
103
|
-
|
|
175
|
+
// Fallback: return the review task for a single downstream pass (the
|
|
176
|
+
// agent produces findings in its follow-up message). Used when no model
|
|
177
|
+
// is available (e.g. print mode) or passes are disabled in config.
|
|
178
|
+
const text = buildToolContext(results, diff);
|
|
104
179
|
|
|
105
180
|
return {
|
|
106
|
-
content: [{ type: 'text', text
|
|
181
|
+
content: [{ type: 'text', text }],
|
|
107
182
|
details: {
|
|
183
|
+
mode: 'single-pass',
|
|
108
184
|
lensCount: lensNames.length,
|
|
109
185
|
availableLenses: [...available.keys()],
|
|
110
|
-
changedFiles
|
|
111
|
-
base: params.base,
|
|
112
|
-
staged: params.staged,
|
|
113
|
-
}),
|
|
186
|
+
changedFiles,
|
|
114
187
|
},
|
|
115
188
|
};
|
|
116
189
|
},
|
|
@@ -131,17 +204,42 @@ function resolveLensNames(
|
|
|
131
204
|
return [...available.keys()];
|
|
132
205
|
}
|
|
133
206
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
207
|
+
/**
|
|
208
|
+
* Build the agent-facing review instructions appended to the report. The diff
|
|
209
|
+
* is embedded ONCE (not per lens) followed by each lens's section — large
|
|
210
|
+
* diffs would otherwise be repeated for every lens, bloating the tool output.
|
|
211
|
+
*/
|
|
212
|
+
function buildToolContext(results: LensResult[], diff: DiffSource): string {
|
|
213
|
+
const sections = results.map((r) => r._lensSection).filter(Boolean) as string[];
|
|
214
|
+
if (sections.length === 0) return '';
|
|
138
215
|
|
|
139
216
|
return [
|
|
217
|
+
`# Code Review — ${new Date().toISOString().slice(0, 10)}`,
|
|
218
|
+
'',
|
|
219
|
+
'## Changes',
|
|
220
|
+
'```',
|
|
221
|
+
diff.stat.trim() || '(no diffstat)',
|
|
222
|
+
'```',
|
|
223
|
+
'',
|
|
224
|
+
'Evaluate the diff through each lens below; the tool outputs are automated analysis.',
|
|
225
|
+
'',
|
|
226
|
+
buildDiffSection(diff),
|
|
227
|
+
'',
|
|
228
|
+
'## Lenses',
|
|
229
|
+
'',
|
|
230
|
+
...sections,
|
|
231
|
+
'',
|
|
232
|
+
'## Instructions',
|
|
140
233
|
'',
|
|
141
|
-
'
|
|
234
|
+
'For each lens above, review the diff against its criteria and output a JSON array of findings:',
|
|
142
235
|
'',
|
|
143
|
-
'
|
|
236
|
+
'```json',
|
|
237
|
+
'[',
|
|
238
|
+
' { "file": "path/to/file.ts", "line": 42, "severity": "warning", "message": "Description" }',
|
|
239
|
+
']',
|
|
240
|
+
'```',
|
|
144
241
|
'',
|
|
145
|
-
|
|
242
|
+
'After each lens JSON array, write a 2-3 sentence summary.',
|
|
243
|
+
'If a lens has no findings, return an empty array `[]` and note the code looks good.',
|
|
146
244
|
].join('\n');
|
|
147
245
|
}
|
|
@@ -3,7 +3,16 @@ import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
|
|
|
3
3
|
import { loadConfig, getLensDir } from '../config';
|
|
4
4
|
import { collectDiff } from '../diff';
|
|
5
5
|
import { discoverLenses, getLensContent } from '../lenses';
|
|
6
|
-
import {
|
|
6
|
+
import { resolveModelPlan } from '../model-plan';
|
|
7
|
+
import { runPipeline } from '../passes';
|
|
8
|
+
import {
|
|
9
|
+
buildDiffSection,
|
|
10
|
+
buildLensResult,
|
|
11
|
+
buildReviewBasePrompt,
|
|
12
|
+
pickLensToolOutputs,
|
|
13
|
+
renderPipelineReport,
|
|
14
|
+
runTools,
|
|
15
|
+
} from '../reviewer';
|
|
7
16
|
import { parseReviewArgs } from '../parse-args';
|
|
8
17
|
|
|
9
18
|
export function registerReviewCommand(pi: ExtensionAPI) {
|
|
@@ -47,24 +56,56 @@ export function registerReviewCommand(pi: ExtensionAPI) {
|
|
|
47
56
|
}
|
|
48
57
|
|
|
49
58
|
ctx.ui.notify(`Reviewing ${diff.label} through ${lensNames.length} lens(es)...`, 'info');
|
|
50
|
-
|
|
59
|
+
|
|
60
|
+
const selected = lensNames.map((name) => available.get(name)!);
|
|
61
|
+
|
|
62
|
+
// Run the DISTINCT tool set once (deduped across lenses), concurrently.
|
|
63
|
+
const allTools = [...new Set(selected.flatMap((lens) => lens.tools))];
|
|
64
|
+
ctx.ui.setStatus('code-review', `🔍 Running ${allTools.length} tool(s)...`);
|
|
65
|
+
const toolOutputs = await runTools(pi, cwd, allTools, {
|
|
66
|
+
timeoutMs: config.toolTimeoutMs,
|
|
67
|
+
concurrency: config.toolConcurrency,
|
|
68
|
+
});
|
|
51
69
|
|
|
52
70
|
const lensSections: string[] = [];
|
|
53
71
|
for (let i = 0; i < lensNames.length; i++) {
|
|
54
72
|
const name = lensNames[i];
|
|
55
73
|
ctx.ui.setStatus('code-review', `🔍 Lens ${i + 1}/${lensNames.length}: ${name}`);
|
|
56
74
|
|
|
57
|
-
const lens =
|
|
75
|
+
const lens = selected[i];
|
|
58
76
|
const content = (await getLensContent(lensDir, name)) ?? '';
|
|
59
|
-
const result =
|
|
60
|
-
|
|
61
|
-
if (result._lensSection) {
|
|
62
|
-
lensSections.push(result._lensSection);
|
|
63
|
-
}
|
|
77
|
+
const result = buildLensResult(lens, content, pickLensToolOutputs(lens, toolOutputs));
|
|
78
|
+
if (result._lensSection) lensSections.push(result._lensSection);
|
|
64
79
|
}
|
|
65
80
|
|
|
66
81
|
ctx.ui.setStatus('code-review', undefined);
|
|
67
82
|
|
|
83
|
+
// Self-driving path: run the Bugbot-style pipeline in-command and deliver
|
|
84
|
+
// the validated report in-session for discussion. Mirrors the tool.
|
|
85
|
+
if (ctx.model && config.review.passes > 0 && lensSections.length > 0) {
|
|
86
|
+
try {
|
|
87
|
+
const { resolution, plan, warnings } = resolveModelPlan(
|
|
88
|
+
config.review,
|
|
89
|
+
ctx.model,
|
|
90
|
+
ctx.modelRegistry,
|
|
91
|
+
);
|
|
92
|
+
for (const warning of warnings) ctx.ui.notify(warning, 'warning');
|
|
93
|
+
const basePrompt = buildReviewBasePrompt(lensSections, diff);
|
|
94
|
+
const pipeline = await runPipeline(resolution, plan, basePrompt, config.review, {
|
|
95
|
+
onStage: (stage) => ctx.ui.setStatus('code-review', `🔍 ${stage}...`),
|
|
96
|
+
});
|
|
97
|
+
ctx.ui.setStatus('code-review', undefined);
|
|
98
|
+
pi.sendUserMessage(renderPipelineReport(pipeline, diff), { deliverAs: 'followUp' });
|
|
99
|
+
return;
|
|
100
|
+
} catch (cause) {
|
|
101
|
+
ctx.ui.setStatus('code-review', undefined);
|
|
102
|
+
ctx.ui.notify(
|
|
103
|
+
`Pipeline unavailable (${cause instanceof Error ? cause.message : String(cause)}) — single-pass fallback`,
|
|
104
|
+
'warning',
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
68
109
|
const combinedPrompt = [
|
|
69
110
|
`Review the following changes through ${lensNames.length} lens(es): ${lensNames.join(', ')}.`,
|
|
70
111
|
'',
|
|
@@ -10,13 +10,98 @@ import { Effect } from 'effect';
|
|
|
10
10
|
import { resolve } from 'node:path';
|
|
11
11
|
|
|
12
12
|
import { FileSystem, nodeFileSystemService } from './effects/filesystem';
|
|
13
|
-
import type { ReviewConfig } from './types';
|
|
13
|
+
import type { ModelStepConfig, ReasoningLevel, ReviewConfig, ReviewPipelineConfig } from './types';
|
|
14
|
+
|
|
15
|
+
const REASONING_LEVELS = new Set<ReasoningLevel>(['minimal', 'low', 'medium', 'high', 'xhigh']);
|
|
14
16
|
|
|
15
17
|
const CONFIG_FILE = '.code-review.json';
|
|
16
18
|
const DEFAULT_LENS_DIR = '.code-review/lenses';
|
|
19
|
+
const DEFAULT_TOOL_TIMEOUT_MS = 60_000;
|
|
20
|
+
const DEFAULT_TOOL_CONCURRENCY = 4;
|
|
21
|
+
|
|
22
|
+
const DEFAULT_PIPELINE: ReviewPipelineConfig = {
|
|
23
|
+
passes: 5,
|
|
24
|
+
validate: true,
|
|
25
|
+
minVotes: 2,
|
|
26
|
+
concurrency: 5,
|
|
27
|
+
temperature: 0.4,
|
|
28
|
+
maxFindings: 50,
|
|
29
|
+
};
|
|
17
30
|
|
|
18
31
|
function defaultConfig(): ReviewConfig {
|
|
19
|
-
return {
|
|
32
|
+
return {
|
|
33
|
+
lensDir: DEFAULT_LENS_DIR,
|
|
34
|
+
defaultLenses: [],
|
|
35
|
+
toolTimeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
|
|
36
|
+
toolConcurrency: DEFAULT_TOOL_CONCURRENCY,
|
|
37
|
+
review: { ...DEFAULT_PIPELINE },
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/** Coerce a config value to a non-negative integer (0 allowed: disables passes). */
|
|
42
|
+
function nonNegativeIntOr(value: unknown, fallback: number): number {
|
|
43
|
+
return typeof value === 'number' && Number.isFinite(value) && value >= 0
|
|
44
|
+
? Math.floor(value)
|
|
45
|
+
: fallback;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/** Coerce a config value to a number within [min, max]. */
|
|
49
|
+
function clampNumberOr(value: unknown, fallback: number, min: number, max: number): number {
|
|
50
|
+
return typeof value === 'number' && Number.isFinite(value)
|
|
51
|
+
? Math.min(max, Math.max(min, value))
|
|
52
|
+
: fallback;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/** Coerce a config value to a model step: a non-empty spec string or
|
|
56
|
+
* `{ model, reasoning }`. Returns undefined for anything else. */
|
|
57
|
+
function parseModelStep(value: unknown): ModelStepConfig | undefined {
|
|
58
|
+
if (typeof value === 'string') return value.trim() ? value.trim() : undefined;
|
|
59
|
+
if (typeof value === 'object' && value !== null) {
|
|
60
|
+
const record = value as Record<string, unknown>;
|
|
61
|
+
const model = typeof record.model === 'string' ? record.model.trim() : '';
|
|
62
|
+
if (!model) return undefined;
|
|
63
|
+
const reasoning =
|
|
64
|
+
typeof record.reasoning === 'string' &&
|
|
65
|
+
REASONING_LEVELS.has(record.reasoning as ReasoningLevel)
|
|
66
|
+
? (record.reasoning as ReasoningLevel)
|
|
67
|
+
: undefined;
|
|
68
|
+
return reasoning ? { model, reasoning } : { model };
|
|
69
|
+
}
|
|
70
|
+
return undefined;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/** Coerce a config value to a non-empty array of model steps, or undefined. */
|
|
74
|
+
function parseModelStepArray(value: unknown): ModelStepConfig[] | undefined {
|
|
75
|
+
if (!Array.isArray(value)) return undefined;
|
|
76
|
+
const steps = value
|
|
77
|
+
.map(parseModelStep)
|
|
78
|
+
.filter((step): step is ModelStepConfig => step !== undefined);
|
|
79
|
+
return steps.length > 0 ? steps : undefined;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function parsePipeline(raw: unknown): ReviewPipelineConfig {
|
|
83
|
+
if (typeof raw !== 'object' || raw === null) return { ...DEFAULT_PIPELINE };
|
|
84
|
+
const review = raw as Record<string, unknown>;
|
|
85
|
+
const passes = nonNegativeIntOr(review.passes, DEFAULT_PIPELINE.passes);
|
|
86
|
+
return {
|
|
87
|
+
passes,
|
|
88
|
+
validate: typeof review.validate === 'boolean' ? review.validate : DEFAULT_PIPELINE.validate,
|
|
89
|
+
minVotes: positiveIntOr(review.minVotes, DEFAULT_PIPELINE.minVotes),
|
|
90
|
+
// Default concurrency tracks pass count so all passes fan out at once.
|
|
91
|
+
concurrency: positiveIntOr(review.concurrency, Math.max(1, passes)),
|
|
92
|
+
temperature: clampNumberOr(review.temperature, DEFAULT_PIPELINE.temperature, 0, 2),
|
|
93
|
+
maxFindings: positiveIntOr(review.maxFindings, DEFAULT_PIPELINE.maxFindings),
|
|
94
|
+
passModel: parseModelStep(review.passModel),
|
|
95
|
+
passModels: parseModelStepArray(review.passModels),
|
|
96
|
+
validateModel: parseModelStep(review.validateModel),
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Coerce a config value to a positive integer, falling back when absent/invalid. */
|
|
101
|
+
function positiveIntOr(value: unknown, fallback: number): number {
|
|
102
|
+
return typeof value === 'number' && Number.isFinite(value) && value > 0
|
|
103
|
+
? Math.floor(value)
|
|
104
|
+
: fallback;
|
|
20
105
|
}
|
|
21
106
|
|
|
22
107
|
export function loadConfigEffect(cwd: string): Effect.Effect<ReviewConfig, never, FileSystem> {
|
|
@@ -30,6 +115,9 @@ export function loadConfigEffect(cwd: string): Effect.Effect<ReviewConfig, never
|
|
|
30
115
|
return {
|
|
31
116
|
lensDir: parsed.lensDir ?? DEFAULT_LENS_DIR,
|
|
32
117
|
defaultLenses: parsed.defaultLenses ?? [],
|
|
118
|
+
toolTimeoutMs: positiveIntOr(parsed.toolTimeoutMs, DEFAULT_TOOL_TIMEOUT_MS),
|
|
119
|
+
toolConcurrency: positiveIntOr(parsed.toolConcurrency, DEFAULT_TOOL_CONCURRENCY),
|
|
120
|
+
review: parsePipeline((parsed as { review?: unknown }).review),
|
|
33
121
|
};
|
|
34
122
|
} catch {
|
|
35
123
|
// Malformed config — fall back to defaults.
|
|
@@ -19,10 +19,14 @@ export type DiffSource = {
|
|
|
19
19
|
|
|
20
20
|
export type DiffOptions = { base?: string; staged?: boolean };
|
|
21
21
|
|
|
22
|
+
/** git diffs are normally instant; cap them so a pathological repo can't hang
|
|
23
|
+
* the whole review. */
|
|
24
|
+
const GIT_TIMEOUT_MS = 30_000;
|
|
25
|
+
|
|
22
26
|
function git(args: string[], cwd: string): Effect.Effect<string, ExecError, Executor> {
|
|
23
27
|
return Effect.gen(function* () {
|
|
24
28
|
const executor = yield* Executor;
|
|
25
|
-
const result = yield* executor.exec('git', args, { cwd });
|
|
29
|
+
const result = yield* executor.exec('git', args, { cwd, timeout: GIT_TIMEOUT_MS });
|
|
26
30
|
return result.stdout;
|
|
27
31
|
});
|
|
28
32
|
}
|
|
@@ -46,17 +50,19 @@ export function collectDiffEffect(
|
|
|
46
50
|
}
|
|
47
51
|
|
|
48
52
|
// Default: working directory changes (unstaged + staged) relative to HEAD.
|
|
49
|
-
|
|
53
|
+
// `git diff HEAD` fails on a repo with no commits (HEAD is unborn), so
|
|
54
|
+
// tolerate that and fall back to the bare working-directory diff.
|
|
55
|
+
const headDiff = yield* git(['diff', 'HEAD'], cwd).pipe(Effect.either);
|
|
50
56
|
|
|
51
|
-
//
|
|
52
|
-
if (!
|
|
57
|
+
// No HEAD (fresh repo) or an empty HEAD diff → fall back to the working dir.
|
|
58
|
+
if (headDiff._tag === 'Left' || !headDiff.right.trim()) {
|
|
53
59
|
const wdDiff = yield* git(['diff'], cwd);
|
|
54
60
|
const wdStat = yield* git(['diff', '--stat'], cwd);
|
|
55
61
|
return { diff: wdDiff, stat: wdStat, label: 'working directory changes' };
|
|
56
62
|
}
|
|
57
63
|
|
|
58
64
|
const stat = yield* git(['diff', 'HEAD', '--stat'], cwd);
|
|
59
|
-
return { diff, stat, label: 'all uncommitted changes' };
|
|
65
|
+
return { diff: headDiff.right, stat, label: 'all uncommitted changes' };
|
|
60
66
|
});
|
|
61
67
|
}
|
|
62
68
|
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reviewer service — wraps the session's current model so a single completion
|
|
3
|
+
* becomes an injectable, typed Effect. The self-driving pipeline (see
|
|
4
|
+
* `passes.ts`) depends on this Tag; the live implementation drives
|
|
5
|
+
* `@earendil-works/pi-ai`'s `completeSimple` over `ctx.model`, while tests
|
|
6
|
+
* provide a deterministic fake instead of calling a real provider.
|
|
7
|
+
*
|
|
8
|
+
* `@earendil-works/pi-ai` is an OPTIONAL peer dependency, so the runtime import
|
|
9
|
+
* is deferred (`import()`), reached only when the harness actually hands us a
|
|
10
|
+
* model. The extension stays loadable in environments without pi-ai.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Api, AssistantMessage, Model, TextContent } from '@earendil-works/pi-ai';
|
|
14
|
+
import { Context, Effect } from 'effect';
|
|
15
|
+
|
|
16
|
+
import { ModelError } from '../errors';
|
|
17
|
+
import type { ReasoningLevel } from '../types';
|
|
18
|
+
|
|
19
|
+
/** The model key meaning "use the session's current model". */
|
|
20
|
+
export const DEFAULT_MODEL_KEY = 'default';
|
|
21
|
+
|
|
22
|
+
export type CompletionRequest = {
|
|
23
|
+
/** Which model to run this call on — {@link DEFAULT_MODEL_KEY} or a key the
|
|
24
|
+
* resolution map holds. Unknown keys fall back to the default model. */
|
|
25
|
+
modelKey: string;
|
|
26
|
+
system: string;
|
|
27
|
+
user: string;
|
|
28
|
+
/** Sampling temperature; the pipeline jitters this per pass. */
|
|
29
|
+
temperature?: number;
|
|
30
|
+
/** Reasoning/thinking effort for this call (provider-dependent). */
|
|
31
|
+
reasoning?: ReasoningLevel;
|
|
32
|
+
/** Identifies which pipeline stage is calling, for error context. */
|
|
33
|
+
stage: string;
|
|
34
|
+
signal?: AbortSignal;
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export interface ReviewerService {
|
|
38
|
+
readonly complete: (request: CompletionRequest) => Effect.Effect<string, ModelError>;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export class Reviewer extends Context.Tag('CodeReviewer/Reviewer')<Reviewer, ReviewerService>() {}
|
|
42
|
+
|
|
43
|
+
/** Resolved models the pipeline can run against: a default (session) model plus
|
|
44
|
+
* any config-specified models keyed by their spec string. */
|
|
45
|
+
export type ModelResolution = {
|
|
46
|
+
defaultModel: Model<Api>;
|
|
47
|
+
byKey: Map<string, Model<Api>>;
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
/** Resolve a config model spec to a registered model. Accepts "provider/id",
|
|
51
|
+
* a bare model `id`, a `"provider/id"` composite, or a display `name`. */
|
|
52
|
+
export function resolveModelSpec(
|
|
53
|
+
registry: { getAll: () => Model<Api>[] },
|
|
54
|
+
spec: string,
|
|
55
|
+
): Model<Api> | undefined {
|
|
56
|
+
const trimmed = spec.trim();
|
|
57
|
+
if (!trimmed) return undefined;
|
|
58
|
+
const all = registry.getAll();
|
|
59
|
+
|
|
60
|
+
const slash = trimmed.indexOf('/');
|
|
61
|
+
if (slash > 0) {
|
|
62
|
+
const provider = trimmed.slice(0, slash);
|
|
63
|
+
const id = trimmed.slice(slash + 1);
|
|
64
|
+
const exact = all.find((model) => model.provider === provider && model.id === id);
|
|
65
|
+
if (exact) return exact;
|
|
66
|
+
}
|
|
67
|
+
return (
|
|
68
|
+
all.find((model) => model.id === trimmed) ??
|
|
69
|
+
all.find((model) => `${model.provider}/${model.id}` === trimmed) ??
|
|
70
|
+
all.find((model) => model.name === trimmed)
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/** Flatten an assistant message to its plain-text content (drop thinking/tool). */
|
|
75
|
+
export function extractText(message: AssistantMessage): string {
|
|
76
|
+
return message.content
|
|
77
|
+
.filter((block): block is TextContent => block.type === 'text')
|
|
78
|
+
.map((block) => block.text)
|
|
79
|
+
.join('\n')
|
|
80
|
+
.trim();
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Build a live Reviewer that routes each call to the model named by its
|
|
84
|
+
* `modelKey` (falling back to the default/session model) via pi-ai. */
|
|
85
|
+
export function makeReviewerService(resolution: ModelResolution): ReviewerService {
|
|
86
|
+
return {
|
|
87
|
+
complete: (request) =>
|
|
88
|
+
Effect.tryPromise({
|
|
89
|
+
try: async () => {
|
|
90
|
+
const { completeSimple } = await import('@earendil-works/pi-ai');
|
|
91
|
+
const model = resolution.byKey.get(request.modelKey) ?? resolution.defaultModel;
|
|
92
|
+
const message = await completeSimple(
|
|
93
|
+
model,
|
|
94
|
+
{
|
|
95
|
+
systemPrompt: request.system,
|
|
96
|
+
messages: [{ role: 'user', content: request.user, timestamp: Date.now() }],
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
temperature: request.temperature,
|
|
100
|
+
reasoning: request.reasoning,
|
|
101
|
+
signal: request.signal,
|
|
102
|
+
},
|
|
103
|
+
);
|
|
104
|
+
if (message.stopReason === 'error') {
|
|
105
|
+
throw new Error(message.errorMessage ?? 'model returned an error stop reason');
|
|
106
|
+
}
|
|
107
|
+
return extractText(message);
|
|
108
|
+
},
|
|
109
|
+
catch: (cause) => new ModelError({ stage: request.stage, cause }),
|
|
110
|
+
}),
|
|
111
|
+
};
|
|
112
|
+
}
|