@dreki-gg/pi-code-reviewer 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -35,6 +35,32 @@ This creates:
35
35
 
36
36
  The `code_review` tool is also available for programmatic use by the agent.
37
37
 
38
+ ## How the review runs (Bugbot-style pipeline)
39
+
40
+ When a session model is available, the `code_review` tool **runs the review
41
+ itself** rather than returning a prompt for one downstream pass. It drives a
42
+ multi-stage pipeline modeled on Cursor's Bugbot:
43
+
44
+ 1. **Parallel adversarial passes** (default 5) over the diff. Each pass gets a
45
+ different focus — trust boundaries, control flow, async/lifecycle, types,
46
+ state integrity, security, resources, contracts — plus a temperature jitter,
47
+ so passes reason down different paths instead of collapsing onto the same
48
+ findings.
49
+ 2. **Bucket + majority vote.** Near-duplicate findings are fused (same file +
50
+ line proximity + message similarity) and tracked by how many distinct passes
51
+ surfaced them. Low-signal single-pass *notes* are dropped; blockers and
52
+ warnings are never dropped for low votes.
53
+ 3. **Validator stage.** One batched call tries to *falsify* each surviving
54
+ candidate and drops false positives. It **fails open** — if the validator
55
+ errors, candidates are surfaced unvalidated rather than silently lost.
56
+
57
+ The tool returns finished, validated findings as a Markdown report (vote count,
58
+ confidence, validator justification) plus structured `details`.
59
+
60
+ **Fallback:** if no model is available (e.g. print mode) or `review.passes` is
61
+ `0`, the tool returns the previous single-pass review prompt and the calling
62
+ agent produces findings in its follow-up message.
63
+
38
64
  ## Lenses
39
65
 
40
66
  A lens is a markdown file that defines review criteria, project tools to run, and severity rules:
@@ -88,7 +114,17 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
88
114
  "lensDir": ".code-review/lenses",
89
115
  "defaultLenses": ["code-quality", "maintainability"],
90
116
  "toolTimeoutMs": 60000,
91
- "toolConcurrency": 4
117
+ "toolConcurrency": 4,
118
+ "review": {
119
+ "passes": 5,
120
+ "validate": true,
121
+ "minVotes": 2,
122
+ "concurrency": 5,
123
+ "temperature": 0.4,
124
+ "maxFindings": 50,
125
+ "passModels": [{ "model": "anthropic/claude-opus-4-8", "reasoning": "low" }],
126
+ "validateModel": { "model": "anthropic/claude-opus-4-8", "reasoning": "medium" }
127
+ }
92
128
  }
93
129
  ```
94
130
 
@@ -98,4 +134,30 @@ Run `/review-init` to scaffold these (customized for your project's tools) into
98
134
  | `defaultLenses` | `[]` (all) | Lenses to run when none specified |
99
135
  | `toolTimeoutMs` | `60000` | Per-tool wall-clock timeout (ms); an exceeding tool is killed and reported as timed-out |
100
136
  | `toolConcurrency` | `4` | Max distinct tools run in parallel (tools are deduped across lenses first) |
137
+ | `review.passes` | `5` | Parallel adversarial bug-finding passes. `0` disables the pipeline (single-pass prompt fallback). |
138
+ | `review.validate` | `true` | Run the validator stage that falsifies each surviving candidate. |
139
+ | `review.minVotes` | `2` | Min distinct passes a NOTE bucket needs to survive pre-validation (blockers/warnings exempt). |
140
+ | `review.concurrency` | `= passes` | Max passes run concurrently. |
141
+ | `review.temperature` | `0.4` | Base sampling temperature; each pass adds a small jitter so passes diverge. |
142
+ | `review.maxFindings` | `50` | Hard cap on findings returned. |
143
+ | `review.passModel` | session model | Model for ALL passes: a spec string (`"provider/id"`, bare id, or name) or `{ "model", "reasoning" }`. |
144
+ | `review.passModels` | — | List of models **rotated round-robin across passes** — a bake-off in one run. Overrides `passModel`. |
145
+ | `review.validateModel` | session model | Model for the validator stage (string or `{ "model", "reasoning" }`). |
146
+
147
+ Each step accepts either a plain spec string or `{ "model": "provider/id", "reasoning": "low" }`
148
+ where `reasoning` is one of `minimal` / `low` / `medium` / `high` / `xhigh` (applied as the
149
+ thinking effort for that step; ignored by providers that don't support it).
150
+
151
+ > By default the pipeline reuses the **session's current model** (`ctx.model`) —
152
+ > no separate API key or model config. More passes = deeper coverage but higher
153
+ > token/latency cost; tune `review.passes` to taste (3 = cheap, 8 = Bugbot
154
+ > parity).
155
+ >
156
+ > **Model bake-off.** Set `passModels` to a list to run the same diff through
157
+ > several models in one review and compare. Models are assigned round-robin to
158
+ > passes, each finding is annotated with the model(s) that caught it, and the
159
+ > report shows a per-model breakdown. Use a cheap model for `passModels` and a
160
+ > stronger one for `validateModel` (or vice-versa) to probe the speed/cost/
161
+ > quality frontier. Specs are matched as `provider/id`, a bare `id`, or a
162
+ > display `name`; an unknown spec falls back to the session model with a warning.
101
163
 
@@ -14,7 +14,11 @@ export function registerReviewInitCommand(pi: ExtensionAPI) {
14
14
  `1. Read the project's AGENTS.md, package.json, and any CONTEXT.md to understand the stack and conventions.`,
15
15
  `2. Create a \`.code-review.json\` config file at the project root. Supported keys:`,
16
16
  ` - \`lensDir\` (default \`.code-review/lenses\`), \`defaultLenses\` (lenses run when none are specified),`,
17
- ` - \`toolTimeoutMs\` (per-tool timeout, default 60000), \`toolConcurrency\` (parallel tools, default 4).`,
17
+ ` - \`toolTimeoutMs\` (per-tool timeout, default 60000), \`toolConcurrency\` (parallel tools, default 4),`,
18
+ ` - \`review\` (self-driving pipeline): \`passes\` (default 5, 0 disables), \`validate\` (default true),`,
19
+ ` \`minVotes\` (default 2), \`concurrency\` (default = passes), \`temperature\` (default 0.4), \`maxFindings\` (default 50),`,
20
+ ` and per-step models for a bake-off: \`passModel\`, \`passModels\` (rotated across passes), \`validateModel\``,
21
+ ` (each a "provider/id", bare id, or display name; default = the session model).`,
18
22
  `3. Create lens files in \`.code-review/lenses/\` — start with: code-quality.md, maintainability.md`,
19
23
  `4. Each lens's \`## Tools\` must list ONLY fast, non-side-effecting commands that EXIT on their own`,
20
24
  ` (e.g. typecheck, lint, unit tests). Do NOT list dev servers, watch mode, e2e suites, or full`,
@@ -4,7 +4,16 @@ import { Type } from 'typebox';
4
4
  import { loadConfig, getLensDir } from '../config';
5
5
  import { collectDiff, getChangedFiles } from '../diff';
6
6
  import { discoverLenses, getLensContent } from '../lenses';
7
- import { buildDiffSection, buildLensResult, pickLensToolOutputs, runTools } from '../reviewer';
7
+ import { resolveModelPlan } from '../model-plan';
8
+ import { runPipeline } from '../passes';
9
+ import {
10
+ buildDiffSection,
11
+ buildLensResult,
12
+ buildReviewBasePrompt,
13
+ pickLensToolOutputs,
14
+ renderPipelineReport,
15
+ runTools,
16
+ } from '../reviewer';
8
17
  import type { DiffSource } from '../diff';
9
18
  import type { LensResult, ReviewConfig } from '../types';
10
19
 
@@ -106,23 +115,90 @@ export function registerReviewTool(pi: ExtensionAPI) {
106
115
  results.push(buildLensResult(lens, content, pickLensToolOutputs(lens, toolOutputs)));
107
116
  }
108
117
 
118
+ const changedFiles = await getChangedFiles(pi, cwd, {
119
+ base: params.base,
120
+ staged: params.staged,
121
+ });
122
+
123
+ // Self-driving path: when a model is available and passes are enabled,
124
+ // the tool runs the Bugbot-style pipeline itself (parallel adversarial
125
+ // passes → bucket → majority vote → validate) and returns FINISHED,
126
+ // validated findings — not a prompt for a single downstream pass.
127
+ const lensSections = results.map((result) => result._lensSection).filter(Boolean) as string[];
128
+ if (ctx.model && config.review.passes > 0 && lensSections.length > 0 && !signal?.aborted) {
129
+ try {
130
+ const { resolution, plan, warnings } = resolveModelPlan(
131
+ config.review,
132
+ ctx.model,
133
+ ctx.modelRegistry,
134
+ );
135
+ for (const warning of warnings) ctx.ui.notify(warning, 'warning');
136
+ const basePrompt = buildReviewBasePrompt(lensSections, diff);
137
+ const pipeline = await runPipeline(
138
+ resolution,
139
+ plan,
140
+ basePrompt,
141
+ config.review,
142
+ {
143
+ onStage: (stage) => {
144
+ ctx.ui.setStatus('code-review', `🔍 ${stage}...`);
145
+ onUpdate?.({ content: [{ type: 'text', text: stage }], details: { stage } });
146
+ },
147
+ },
148
+ signal,
149
+ );
150
+ ctx.ui.setStatus('code-review', undefined);
151
+ // Every pass failed (e.g. the review model/pi-ai was unavailable for
152
+ // each call). The swallowed failures would render as a misleading
153
+ // "0 findings" report — instead, degrade to the single-pass prompt so
154
+ // the reviewing agent still produces a real review.
155
+ const allPassesFailed =
156
+ config.review.passes > 0 && pipeline.telemetry.failedPasses >= config.review.passes;
157
+ if (!allPassesFailed) {
158
+ return {
159
+ content: [{ type: 'text', text: renderPipelineReport(pipeline, diff) }],
160
+ details: {
161
+ mode: 'pipeline',
162
+ lensCount: lensNames.length,
163
+ availableLenses: [...available.keys()],
164
+ changedFiles,
165
+ findings: pipeline.findings,
166
+ telemetry: pipeline.telemetry,
167
+ },
168
+ };
169
+ }
170
+ onUpdate?.({
171
+ content: [{ type: 'text', text: 'all review passes failed — single-pass fallback' }],
172
+ details: {
173
+ failedPasses: pipeline.telemetry.failedPasses,
174
+ passError: pipeline.telemetry.passErrorSample,
175
+ },
176
+ });
177
+ } catch (cause) {
178
+ // Pipeline failed hard (e.g. model/pi-ai unavailable at runtime) —
179
+ // degrade to the single-pass prompt instead of failing the review.
180
+ ctx.ui.setStatus('code-review', undefined);
181
+ onUpdate?.({
182
+ content: [{ type: 'text', text: 'pipeline unavailable — single-pass fallback' }],
183
+ details: { pipelineError: cause instanceof Error ? cause.message : String(cause) },
184
+ });
185
+ }
186
+ }
187
+
109
188
  ctx.ui.setStatus('code-review', undefined);
110
189
 
111
- // The tool returns a pre-review skeleton + the review task. Findings are
112
- // produced by the agent in its follow-up message (per the instructions
113
- // below), NOT parsed back here so we deliberately do not render a
114
- // findings scoreboard that would always read "0".
190
+ // Fallback: return the review task for a single downstream pass (the
191
+ // agent produces findings in its follow-up message). Used when no model
192
+ // is available (e.g. print mode) or passes are disabled in config.
115
193
  const text = buildToolContext(results, diff);
116
194
 
117
195
  return {
118
196
  content: [{ type: 'text', text }],
119
197
  details: {
198
+ mode: 'single-pass',
120
199
  lensCount: lensNames.length,
121
200
  availableLenses: [...available.keys()],
122
- changedFiles: await getChangedFiles(pi, cwd, {
123
- base: params.base,
124
- staged: params.staged,
125
- }),
201
+ changedFiles,
126
202
  },
127
203
  };
128
204
  },
@@ -3,7 +3,16 @@ import type { ExtensionAPI } from '@earendil-works/pi-coding-agent';
3
3
  import { loadConfig, getLensDir } from '../config';
4
4
  import { collectDiff } from '../diff';
5
5
  import { discoverLenses, getLensContent } from '../lenses';
6
- import { buildDiffSection, buildLensResult, pickLensToolOutputs, runTools } from '../reviewer';
6
+ import { resolveModelPlan } from '../model-plan';
7
+ import { runPipeline } from '../passes';
8
+ import {
9
+ buildDiffSection,
10
+ buildLensResult,
11
+ buildReviewBasePrompt,
12
+ pickLensToolOutputs,
13
+ renderPipelineReport,
14
+ runTools,
15
+ } from '../reviewer';
7
16
  import { parseReviewArgs } from '../parse-args';
8
17
 
9
18
  export function registerReviewCommand(pi: ExtensionAPI) {
@@ -71,6 +80,32 @@ export function registerReviewCommand(pi: ExtensionAPI) {
71
80
 
72
81
  ctx.ui.setStatus('code-review', undefined);
73
82
 
83
+ // Self-driving path: run the Bugbot-style pipeline in-command and deliver
84
+ // the validated report in-session for discussion. Mirrors the tool.
85
+ if (ctx.model && config.review.passes > 0 && lensSections.length > 0) {
86
+ try {
87
+ const { resolution, plan, warnings } = resolveModelPlan(
88
+ config.review,
89
+ ctx.model,
90
+ ctx.modelRegistry,
91
+ );
92
+ for (const warning of warnings) ctx.ui.notify(warning, 'warning');
93
+ const basePrompt = buildReviewBasePrompt(lensSections, diff);
94
+ const pipeline = await runPipeline(resolution, plan, basePrompt, config.review, {
95
+ onStage: (stage) => ctx.ui.setStatus('code-review', `🔍 ${stage}...`),
96
+ });
97
+ ctx.ui.setStatus('code-review', undefined);
98
+ pi.sendUserMessage(renderPipelineReport(pipeline, diff), { deliverAs: 'followUp' });
99
+ return;
100
+ } catch (cause) {
101
+ ctx.ui.setStatus('code-review', undefined);
102
+ ctx.ui.notify(
103
+ `Pipeline unavailable (${cause instanceof Error ? cause.message : String(cause)}) — single-pass fallback`,
104
+ 'warning',
105
+ );
106
+ }
107
+ }
108
+
74
109
  const combinedPrompt = [
75
110
  `Review the following changes through ${lensNames.length} lens(es): ${lensNames.join(', ')}.`,
76
111
  '',
@@ -10,19 +10,90 @@ import { Effect } from 'effect';
10
10
  import { resolve } from 'node:path';
11
11
 
12
12
  import { FileSystem, nodeFileSystemService } from './effects/filesystem';
13
- import type { ReviewConfig } from './types';
13
+ import type { ModelStepConfig, ReasoningLevel, ReviewConfig, ReviewPipelineConfig } from './types';
14
+
15
+ const REASONING_LEVELS = new Set<ReasoningLevel>(['minimal', 'low', 'medium', 'high', 'xhigh']);
14
16
 
15
17
  const CONFIG_FILE = '.code-review.json';
16
18
  const DEFAULT_LENS_DIR = '.code-review/lenses';
17
19
  const DEFAULT_TOOL_TIMEOUT_MS = 60_000;
18
20
  const DEFAULT_TOOL_CONCURRENCY = 4;
19
21
 
22
+ const DEFAULT_PIPELINE: ReviewPipelineConfig = {
23
+ passes: 5,
24
+ validate: true,
25
+ minVotes: 2,
26
+ concurrency: 5,
27
+ temperature: 0.4,
28
+ maxFindings: 50,
29
+ };
30
+
20
31
  function defaultConfig(): ReviewConfig {
21
32
  return {
22
33
  lensDir: DEFAULT_LENS_DIR,
23
34
  defaultLenses: [],
24
35
  toolTimeoutMs: DEFAULT_TOOL_TIMEOUT_MS,
25
36
  toolConcurrency: DEFAULT_TOOL_CONCURRENCY,
37
+ review: { ...DEFAULT_PIPELINE },
38
+ };
39
+ }
40
+
41
+ /** Coerce a config value to a non-negative integer (0 allowed: disables passes). */
42
+ function nonNegativeIntOr(value: unknown, fallback: number): number {
43
+ return typeof value === 'number' && Number.isFinite(value) && value >= 0
44
+ ? Math.floor(value)
45
+ : fallback;
46
+ }
47
+
48
+ /** Coerce a config value to a number within [min, max]. */
49
+ function clampNumberOr(value: unknown, fallback: number, min: number, max: number): number {
50
+ return typeof value === 'number' && Number.isFinite(value)
51
+ ? Math.min(max, Math.max(min, value))
52
+ : fallback;
53
+ }
54
+
55
+ /** Coerce a config value to a model step: a non-empty spec string or
56
+ * `{ model, reasoning }`. Returns undefined for anything else. */
57
+ function parseModelStep(value: unknown): ModelStepConfig | undefined {
58
+ if (typeof value === 'string') return value.trim() ? value.trim() : undefined;
59
+ if (typeof value === 'object' && value !== null) {
60
+ const record = value as Record<string, unknown>;
61
+ const model = typeof record.model === 'string' ? record.model.trim() : '';
62
+ if (!model) return undefined;
63
+ const reasoning =
64
+ typeof record.reasoning === 'string' &&
65
+ REASONING_LEVELS.has(record.reasoning as ReasoningLevel)
66
+ ? (record.reasoning as ReasoningLevel)
67
+ : undefined;
68
+ return reasoning ? { model, reasoning } : { model };
69
+ }
70
+ return undefined;
71
+ }
72
+
73
+ /** Coerce a config value to a non-empty array of model steps, or undefined. */
74
+ function parseModelStepArray(value: unknown): ModelStepConfig[] | undefined {
75
+ if (!Array.isArray(value)) return undefined;
76
+ const steps = value
77
+ .map(parseModelStep)
78
+ .filter((step): step is ModelStepConfig => step !== undefined);
79
+ return steps.length > 0 ? steps : undefined;
80
+ }
81
+
82
+ function parsePipeline(raw: unknown): ReviewPipelineConfig {
83
+ if (typeof raw !== 'object' || raw === null) return { ...DEFAULT_PIPELINE };
84
+ const review = raw as Record<string, unknown>;
85
+ const passes = nonNegativeIntOr(review.passes, DEFAULT_PIPELINE.passes);
86
+ return {
87
+ passes,
88
+ validate: typeof review.validate === 'boolean' ? review.validate : DEFAULT_PIPELINE.validate,
89
+ minVotes: positiveIntOr(review.minVotes, DEFAULT_PIPELINE.minVotes),
90
+ // Default concurrency tracks pass count so all passes fan out at once.
91
+ concurrency: positiveIntOr(review.concurrency, Math.max(1, passes)),
92
+ temperature: clampNumberOr(review.temperature, DEFAULT_PIPELINE.temperature, 0, 2),
93
+ maxFindings: positiveIntOr(review.maxFindings, DEFAULT_PIPELINE.maxFindings),
94
+ passModel: parseModelStep(review.passModel),
95
+ passModels: parseModelStepArray(review.passModels),
96
+ validateModel: parseModelStep(review.validateModel),
26
97
  };
27
98
  }
28
99
 
@@ -46,6 +117,7 @@ export function loadConfigEffect(cwd: string): Effect.Effect<ReviewConfig, never
46
117
  defaultLenses: parsed.defaultLenses ?? [],
47
118
  toolTimeoutMs: positiveIntOr(parsed.toolTimeoutMs, DEFAULT_TOOL_TIMEOUT_MS),
48
119
  toolConcurrency: positiveIntOr(parsed.toolConcurrency, DEFAULT_TOOL_CONCURRENCY),
120
+ review: parsePipeline((parsed as { review?: unknown }).review),
49
121
  };
50
122
  } catch {
51
123
  // Malformed config — fall back to defaults.
@@ -23,6 +23,14 @@ export type DiffOptions = { base?: string; staged?: boolean };
23
23
  * the whole review. */
24
24
  const GIT_TIMEOUT_MS = 30_000;
25
25
 
26
+ /** Cap on untracked files diffed against /dev/null so a repo full of generated
27
+ * junk can't blow up the prompt. The whole diff is truncated downstream too. */
28
+ const MAX_UNTRACKED_FILES = 200;
29
+
30
+ /** The empty tree object — diffing a path against it yields a full new-file
31
+ * diff portably (no reliance on /dev/null path handling across platforms). */
32
+ const NULL_DEVICE = '/dev/null';
33
+
26
34
  function git(args: string[], cwd: string): Effect.Effect<string, ExecError, Executor> {
27
35
  return Effect.gen(function* () {
28
36
  const executor = yield* Executor;
@@ -31,6 +39,51 @@ function git(args: string[], cwd: string): Effect.Effect<string, ExecError, Exec
31
39
  });
32
40
  }
33
41
 
42
+ /**
43
+ * Diff every untracked (new, not-yet-`git add`ed) file against /dev/null so
44
+ * brand-new files show up in a working-directory review — `git diff HEAD`
45
+ * omits them entirely, which is exactly the class of change agents introduce.
46
+ *
47
+ * Read-only: it NEVER touches the index (no `git add -N`). `git diff --no-index`
48
+ * exits non-zero when files differ, but pi.exec resolves with the diff on stdout
49
+ * regardless; any per-file failure degrades to an empty string rather than
50
+ * sinking the whole review.
51
+ */
52
+ function collectUntrackedEffect(
53
+ cwd: string,
54
+ ): Effect.Effect<{ diff: string; files: string[] }, never, Executor> {
55
+ return Effect.gen(function* () {
56
+ const listed = yield* git(['ls-files', '--others', '--exclude-standard'], cwd).pipe(
57
+ Effect.orElseSucceed(() => ''),
58
+ );
59
+ const files = listed
60
+ .split('\n')
61
+ .map((f) => f.trim())
62
+ .filter(Boolean);
63
+ if (files.length === 0) return { diff: '', files: [] };
64
+
65
+ const parts = yield* Effect.forEach(
66
+ files.slice(0, MAX_UNTRACKED_FILES),
67
+ (file) =>
68
+ git(['diff', '--no-index', '--', NULL_DEVICE, file], cwd).pipe(
69
+ Effect.orElseSucceed(() => ''),
70
+ ),
71
+ { concurrency: 4 },
72
+ );
73
+ return { diff: parts.filter((part) => part.trim()).join('\n'), files };
74
+ });
75
+ }
76
+
77
+ /** Append a one-line-per-file summary of untracked files to a `--stat` block so
78
+ * the change overview reflects new files that git's own stat never lists. */
79
+ function appendUntrackedStat(stat: string, files: string[]): string {
80
+ if (files.length === 0) return stat;
81
+ const shown = files.slice(0, MAX_UNTRACKED_FILES);
82
+ const lines = shown.map((file) => ` ${file} | (new, untracked)`);
83
+ const note = `${files.length} untracked file(s) included`;
84
+ return [stat.trimEnd(), ...lines, note].filter(Boolean).join('\n');
85
+ }
86
+
34
87
  /** Collect the diff from the working directory or a specific base ref. */
35
88
  export function collectDiffEffect(
36
89
  cwd: string,
@@ -49,20 +102,31 @@ export function collectDiffEffect(
49
102
  return { diff, stat, label: `changes since ${options.base}` };
50
103
  }
51
104
 
52
- // Default: working directory changes (unstaged + staged) relative to HEAD.
53
- // `git diff HEAD` fails on a repo with no commits (HEAD is unborn), so
105
+ // Default: EVERYTHING the agent is working on but hasn't committed —
106
+ // tracked changes (unstaged + staged) relative to HEAD, PLUS untracked
107
+ // (brand-new) files. `git diff HEAD` covers only the former; untracked
108
+ // files are collected separately and merged so new files are reviewed too.
109
+ // `git diff HEAD` also fails on a repo with no commits (HEAD is unborn), so
54
110
  // tolerate that and fall back to the bare working-directory diff.
55
111
  const headDiff = yield* git(['diff', 'HEAD'], cwd).pipe(Effect.either);
112
+ const untracked = yield* collectUntrackedEffect(cwd);
56
113
 
57
- // No HEAD (fresh repo) or an empty HEAD diff → fall back to the working dir.
114
+ let tracked: string;
115
+ let stat: string;
116
+ let label: string;
58
117
  if (headDiff._tag === 'Left' || !headDiff.right.trim()) {
59
- const wdDiff = yield* git(['diff'], cwd);
60
- const wdStat = yield* git(['diff', '--stat'], cwd);
61
- return { diff: wdDiff, stat: wdStat, label: 'working directory changes' };
118
+ // No HEAD (fresh repo) or no tracked changes → use the bare working dir.
119
+ tracked = yield* git(['diff'], cwd);
120
+ stat = yield* git(['diff', '--stat'], cwd);
121
+ label = 'working directory changes';
122
+ } else {
123
+ tracked = headDiff.right;
124
+ stat = yield* git(['diff', 'HEAD', '--stat'], cwd);
125
+ label = 'all uncommitted changes';
62
126
  }
63
127
 
64
- const stat = yield* git(['diff', 'HEAD', '--stat'], cwd);
65
- return { diff: headDiff.right, stat, label: 'all uncommitted changes' };
128
+ const diff = [tracked, untracked.diff].filter((part) => part.trim()).join('\n');
129
+ return { diff, stat: appendUntrackedStat(stat, untracked.files), label };
66
130
  });
67
131
  }
68
132
 
@@ -72,19 +136,31 @@ export function getChangedFilesEffect(
72
136
  options: DiffOptions,
73
137
  ): Effect.Effect<string[], ExecError, Executor> {
74
138
  return Effect.gen(function* () {
75
- const args = ['diff', '--name-only'];
76
- if (options.staged) args.push('--staged');
77
- else if (options.base) args.push(options.base);
78
- else args.push('HEAD');
139
+ if (options.staged || options.base) {
140
+ const args = ['diff', '--name-only', options.staged ? '--staged' : options.base!];
141
+ const stdout = yield* git(args, cwd);
142
+ return splitPaths(stdout);
143
+ }
79
144
 
80
- const stdout = yield* git(args, cwd);
81
- return stdout
82
- .split('\n')
83
- .map((f) => f.trim())
84
- .filter(Boolean);
145
+ // Default: tracked changes vs HEAD (tolerate an unborn HEAD) plus untracked
146
+ // files, deduped, so the changed-file list mirrors the merged default diff.
147
+ const tracked = yield* git(['diff', '--name-only', 'HEAD'], cwd).pipe(
148
+ Effect.orElseSucceed(() => ''),
149
+ );
150
+ const untracked = yield* git(['ls-files', '--others', '--exclude-standard'], cwd).pipe(
151
+ Effect.orElseSucceed(() => ''),
152
+ );
153
+ return [...new Set([...splitPaths(tracked), ...splitPaths(untracked)])];
85
154
  });
86
155
  }
87
156
 
157
+ function splitPaths(stdout: string): string[] {
158
+ return stdout
159
+ .split('\n')
160
+ .map((f) => f.trim())
161
+ .filter(Boolean);
162
+ }
163
+
88
164
  // ── Promise wrappers (live Executor from pi) ──────────────────────────────────
89
165
 
90
166
  export function collectDiff(
@@ -0,0 +1,112 @@
1
+ /**
2
+ * Reviewer service — wraps the session's current model so a single completion
3
+ * becomes an injectable, typed Effect. The self-driving pipeline (see
4
+ * `passes.ts`) depends on this Tag; the live implementation drives
5
+ * `@earendil-works/pi-ai`'s `completeSimple` over `ctx.model`, while tests
6
+ * provide a deterministic fake instead of calling a real provider.
7
+ *
8
+ * `@earendil-works/pi-ai` is an OPTIONAL peer dependency, so the runtime import
9
+ * is deferred (`import()`), reached only when the harness actually hands us a
10
+ * model. The extension stays loadable in environments without pi-ai.
11
+ */
12
+
13
+ import type { Api, AssistantMessage, Model, TextContent } from '@earendil-works/pi-ai';
14
+ import { Context, Effect } from 'effect';
15
+
16
+ import { ModelError } from '../errors';
17
+ import type { ReasoningLevel } from '../types';
18
+
19
+ /** The model key meaning "use the session's current model". */
20
+ export const DEFAULT_MODEL_KEY = 'default';
21
+
22
+ export type CompletionRequest = {
23
+ /** Which model to run this call on — {@link DEFAULT_MODEL_KEY} or a key the
24
+ * resolution map holds. Unknown keys fall back to the default model. */
25
+ modelKey: string;
26
+ system: string;
27
+ user: string;
28
+ /** Sampling temperature; the pipeline jitters this per pass. */
29
+ temperature?: number;
30
+ /** Reasoning/thinking effort for this call (provider-dependent). */
31
+ reasoning?: ReasoningLevel;
32
+ /** Identifies which pipeline stage is calling, for error context. */
33
+ stage: string;
34
+ signal?: AbortSignal;
35
+ };
36
+
37
+ export interface ReviewerService {
38
+ readonly complete: (request: CompletionRequest) => Effect.Effect<string, ModelError>;
39
+ }
40
+
41
+ export class Reviewer extends Context.Tag('CodeReviewer/Reviewer')<Reviewer, ReviewerService>() {}
42
+
43
+ /** Resolved models the pipeline can run against: a default (session) model plus
44
+ * any config-specified models keyed by their spec string. */
45
+ export type ModelResolution = {
46
+ defaultModel: Model<Api>;
47
+ byKey: Map<string, Model<Api>>;
48
+ };
49
+
50
+ /** Resolve a config model spec to a registered model. Accepts "provider/id",
51
+ * a bare model `id`, a `"provider/id"` composite, or a display `name`. */
52
+ export function resolveModelSpec(
53
+ registry: { getAll: () => Model<Api>[] },
54
+ spec: string,
55
+ ): Model<Api> | undefined {
56
+ const trimmed = spec.trim();
57
+ if (!trimmed) return undefined;
58
+ const all = registry.getAll();
59
+
60
+ const slash = trimmed.indexOf('/');
61
+ if (slash > 0) {
62
+ const provider = trimmed.slice(0, slash);
63
+ const id = trimmed.slice(slash + 1);
64
+ const exact = all.find((model) => model.provider === provider && model.id === id);
65
+ if (exact) return exact;
66
+ }
67
+ return (
68
+ all.find((model) => model.id === trimmed) ??
69
+ all.find((model) => `${model.provider}/${model.id}` === trimmed) ??
70
+ all.find((model) => model.name === trimmed)
71
+ );
72
+ }
73
+
74
+ /** Flatten an assistant message to its plain-text content (drop thinking/tool). */
75
+ export function extractText(message: AssistantMessage): string {
76
+ return message.content
77
+ .filter((block): block is TextContent => block.type === 'text')
78
+ .map((block) => block.text)
79
+ .join('\n')
80
+ .trim();
81
+ }
82
+
83
+ /** Build a live Reviewer that routes each call to the model named by its
84
+ * `modelKey` (falling back to the default/session model) via pi-ai. */
85
+ export function makeReviewerService(resolution: ModelResolution): ReviewerService {
86
+ return {
87
+ complete: (request) =>
88
+ Effect.tryPromise({
89
+ try: async () => {
90
+ const { completeSimple } = await import('@earendil-works/pi-ai');
91
+ const model = resolution.byKey.get(request.modelKey) ?? resolution.defaultModel;
92
+ const message = await completeSimple(
93
+ model,
94
+ {
95
+ systemPrompt: request.system,
96
+ messages: [{ role: 'user', content: request.user, timestamp: Date.now() }],
97
+ },
98
+ {
99
+ temperature: request.temperature,
100
+ reasoning: request.reasoning,
101
+ signal: request.signal,
102
+ },
103
+ );
104
+ if (message.stopReason === 'error') {
105
+ throw new Error(message.errorMessage ?? 'model returned an error stop reason');
106
+ }
107
+ return extractText(message);
108
+ },
109
+ catch: (cause) => new ModelError({ stage: request.stage, cause }),
110
+ }),
111
+ };
112
+ }
@@ -28,7 +28,16 @@ export class ExecError extends Data.TaggedError('ExecError')<{
28
28
  }
29
29
  }
30
30
 
31
- export type CodeReviewerError = FileReadError | ExecError;
31
+ export class ModelError extends Data.TaggedError('ModelError')<{
32
+ readonly stage: string;
33
+ readonly cause: unknown;
34
+ }> {
35
+ get message(): string {
36
+ return `Model call failed during ${this.stage}: ${causeMessage(this.cause)}`;
37
+ }
38
+ }
39
+
40
+ export type CodeReviewerError = FileReadError | ExecError | ModelError;
32
41
 
33
42
  // ── Helpers ───────────────────────────────────────────────────────────────
34
43