openairev 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +280 -0
- package/bin/openairev.js +56 -0
- package/package.json +33 -0
- package/prompts/executor-feedback.md +25 -0
- package/prompts/plan-reviewer.md +40 -0
- package/prompts/reviewer.md +50 -0
- package/src/agents/claude-code.js +46 -0
- package/src/agents/codex.js +71 -0
- package/src/agents/detect.js +9 -0
- package/src/agents/exec-helper.js +16 -0
- package/src/agents/registry.js +20 -0
- package/src/cli/format-helpers.js +50 -0
- package/src/cli/history.js +76 -0
- package/src/cli/init.js +211 -0
- package/src/cli/resume.js +135 -0
- package/src/cli/review.js +151 -0
- package/src/cli/status.js +73 -0
- package/src/config/config-loader.js +74 -0
- package/src/config/config-loader.test.js +113 -0
- package/src/config/defaults.js +38 -0
- package/src/config/plan-verdict-schema.json +44 -0
- package/src/config/verdict-schema.json +44 -0
- package/src/mcp/mcp-server.js +261 -0
- package/src/orchestrator/orchestrator.js +344 -0
- package/src/review/input-stager.js +35 -0
- package/src/review/input-stager.test.js +53 -0
- package/src/review/prompt-loader.js +29 -0
- package/src/review/review-runner.js +82 -0
- package/src/review/review-runner.test.js +79 -0
- package/src/session/chain-manager.js +292 -0
- package/src/session/chain-manager.test.js +188 -0
- package/src/session/session-manager.js +66 -0
- package/src/session/session-manager.test.js +72 -0
- package/src/tools/git-tools.js +27 -0
- package/src/tools/tool-runner.js +47 -0
package/README.md
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
# OpenAIRev
|
|
2
|
+
|
|
3
|
+
Cross-model AI code reviewer and workflow orchestrator for AI-assisted coding. The executor is never its own reviewer — independent judgment by default.
|
|
4
|
+
|
|
5
|
+
OpenAIRev orchestrates AI coding agents (Claude Code, Codex CLI, and more) so that one model reviews another's output. You choose which models pair up. The defaults are opinionated but fully configurable — including self-review if that's what you want.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install -g openairev
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Requires at least one AI coding CLI installed:
|
|
14
|
+
- [Claude Code CLI](https://docs.anthropic.com/en/docs/claude-code)
|
|
15
|
+
- [Codex CLI](https://github.com/openai/codex)
|
|
16
|
+
|
|
17
|
+
## Quick Start
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
cd your-project
|
|
21
|
+
|
|
22
|
+
# One-time setup
|
|
23
|
+
openairev init
|
|
24
|
+
|
|
25
|
+
# Quick: implement current task → code review loop
|
|
26
|
+
openairev review --task "Add auth middleware"
|
|
27
|
+
|
|
28
|
+
# Full workflow: analyze → plan → plan review → implement → code review
|
|
29
|
+
openairev review --plan --task "Add auth middleware"
|
|
30
|
+
|
|
31
|
+
# With OpenSpec reference
|
|
32
|
+
openairev review --plan --spec-ref openspec/changes/070_add-dashboard/
|
|
33
|
+
|
|
34
|
+
# Single review of existing changes, no workflow
|
|
35
|
+
openairev review --once
|
|
36
|
+
|
|
37
|
+
# Resume an interrupted or blocked workflow
|
|
38
|
+
openairev resume
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## How It Works
|
|
42
|
+
|
|
43
|
+
OpenAIRev runs a stage-driven workflow:
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
analyze → planning → plan_review → implementation → code_review → done
|
|
47
|
+
↑ ↓ ↓
|
|
48
|
+
awaiting_user plan_fix code_fix
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Each stage:
|
|
52
|
+
|
|
53
|
+
1. **Analyze** — executor examines the codebase and task. If clarification is needed, transitions to `awaiting_user` (blocked until you answer via `openairev resume`).
|
|
54
|
+
2. **Planning** — executor creates a phased implementation plan.
|
|
55
|
+
3. **Plan review** — reviewer checks scope, sequencing, missing requirements, risk. Uses a separate plan-review prompt.
|
|
56
|
+
4. **Implementation** — executor writes code for the current phase.
|
|
57
|
+
5. **Code review** — reviewer checks correctness, edge cases, regressions. If `needs_changes`, executor fixes and re-reviews. If approved and more phases remain, loops back to implementation.
|
|
58
|
+
6. **Done** — all phases approved.
|
|
59
|
+
|
|
60
|
+
Stages are optional. `--quick` skips analyze and goes straight to implement → review. `--once` skips the workflow entirely and does a single review of existing changes.
|
|
61
|
+
|
|
62
|
+
OpenAIRev uses the CLIs' non-interactive modes (`claude -p`, `codex exec`) — no API keys needed. Works with your existing subscriptions.
|
|
63
|
+
|
|
64
|
+
## CLI Reference
|
|
65
|
+
|
|
66
|
+
### `openairev init`
|
|
67
|
+
|
|
68
|
+
Interactive setup wizard. Detects installed CLIs, lets you configure:
|
|
69
|
+
- Which agents to use
|
|
70
|
+
- Who reviews whose code (any combination)
|
|
71
|
+
- Max iterations per direction
|
|
72
|
+
- Tool gate commands (test, lint, typecheck)
|
|
73
|
+
|
|
74
|
+
### `openairev review`
|
|
75
|
+
|
|
76
|
+
Start a review workflow or single review.
|
|
77
|
+
|
|
78
|
+
```
|
|
79
|
+
Options:
|
|
80
|
+
-e, --executor <agent> Who wrote the code (claude_code|codex)
|
|
81
|
+
-r, --reviewer <agent> Who reviews (claude_code|codex)
|
|
82
|
+
--diff <ref> Git diff ref (default: staged or unstaged)
|
|
83
|
+
--file <path> Review a specific file instead of diff
|
|
84
|
+
--task <description> Task description for requirement checking
|
|
85
|
+
--spec-ref <path> Path to OpenSpec change directory
|
|
86
|
+
--rounds <number> Max review-fix rounds
|
|
87
|
+
--plan Full workflow: analyze → plan → review → implement
|
|
88
|
+
--quick Skip analyze, go straight to implement → review
|
|
89
|
+
--once Single review only, no workflow
|
|
90
|
+
--dry-run Show what would happen without executing
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### `openairev resume`
|
|
94
|
+
|
|
95
|
+
Resume an active or blocked workflow. If blocked on `awaiting_user`, prompts for answers to pending questions.
|
|
96
|
+
|
|
97
|
+
```
|
|
98
|
+
Options:
|
|
99
|
+
--chain <id> Resume a specific chain by ID
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### `openairev status`
|
|
103
|
+
|
|
104
|
+
Show current workflow state: stage, agents, rounds, pending questions.
|
|
105
|
+
|
|
106
|
+
### `openairev history`
|
|
107
|
+
|
|
108
|
+
List past workflows and sessions.
|
|
109
|
+
|
|
110
|
+
```
|
|
111
|
+
Options:
|
|
112
|
+
-n, --limit <number> Number of items to show (default: 10)
|
|
113
|
+
--chains Show workflow chains instead of sessions
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## Workflow Stages
|
|
117
|
+
|
|
118
|
+
### Review iterations
|
|
119
|
+
|
|
120
|
+
Each iteration is one complete executor↔reviewer cycle:
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
Iteration 1: Executor writes code → Reviewer reviews → "needs_changes"
|
|
124
|
+
Iteration 2: Executor fixes issues → Reviewer reviews again → "needs_changes"
|
|
125
|
+
Iteration 3: Executor fixes again → Reviewer reviews → "approved" ✓
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
The reviewer does a thorough single-pass review each iteration, covering surface issues, edge cases, requirements, and false positive reconsideration — all in one shot.
|
|
129
|
+
|
|
130
|
+
### Plan review vs code review
|
|
131
|
+
|
|
132
|
+
- **Plan review** checks scope, sequencing, missing requirements, and risk. Uses `plan-reviewer.md` prompt and a separate verdict schema with `missing_requirements`, `sequencing_issues`, and `risks` fields.
|
|
133
|
+
- **Code review** checks correctness, regressions, edge cases, and test gaps. Uses `reviewer.md` prompt.
|
|
134
|
+
|
|
135
|
+
### Multi-phase implementation
|
|
136
|
+
|
|
137
|
+
The executor's plan can define phases (`PHASE: <name>` / `GOAL: <goal>` in output). Each phase goes through its own implementation → code review loop. When a phase is approved, the next phase begins.
|
|
138
|
+
|
|
139
|
+
### User clarification
|
|
140
|
+
|
|
141
|
+
During analysis, if the executor outputs lines starting with `QUESTION:`, the workflow blocks (`awaiting_user`). Run `openairev resume` to answer the questions and continue.
|
|
142
|
+
|
|
143
|
+
### Why different defaults per direction
|
|
144
|
+
|
|
145
|
+
`max_iterations` controls how many rounds the **executor** gets to align with reviewer feedback:
|
|
146
|
+
|
|
147
|
+
- **Claude Code as executor → 5 iterations** — Claude is less stable at consistently applying reviewer feedback across rounds. More iterations give it room to converge.
|
|
148
|
+
- **Codex as executor → 1 iteration** — Codex applies review feedback more reliably, so a single cycle is usually sufficient.
|
|
149
|
+
|
|
150
|
+
Override per-run with `--rounds`.
|
|
151
|
+
|
|
152
|
+
## Review Verdict
|
|
153
|
+
|
|
154
|
+
Every code review returns a structured JSON verdict:
|
|
155
|
+
|
|
156
|
+
```json
|
|
157
|
+
{
|
|
158
|
+
"status": "approved | needs_changes | reject",
|
|
159
|
+
"critical_issues": [],
|
|
160
|
+
"test_gaps": [],
|
|
161
|
+
"requirement_mismatches": [],
|
|
162
|
+
"rule_violations": [],
|
|
163
|
+
"risk_level": "low | medium | high",
|
|
164
|
+
"confidence": 0.88,
|
|
165
|
+
"repair_instructions": [],
|
|
166
|
+
"false_positives_reconsidered": []
|
|
167
|
+
}
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
Plan reviews return a similar verdict with `missing_requirements`, `sequencing_issues`, and `risks` instead.
|
|
171
|
+
|
|
172
|
+
## Executor Feedback
|
|
173
|
+
|
|
174
|
+
When a reviewer returns `needs_changes`, the feedback is wrapped in a behavioral prompt that tells the executor:
|
|
175
|
+
|
|
176
|
+
- This is a **peer review**, not a user command
|
|
177
|
+
- Use your own judgment — don't blindly apply every suggestion
|
|
178
|
+
- Accept real bugs, ignore style nits, push back on low-confidence items
|
|
179
|
+
|
|
180
|
+
The executor keeps full autonomy over what to fix.
|
|
181
|
+
|
|
182
|
+
## MCP Server
|
|
183
|
+
|
|
184
|
+
OpenAIRev includes an MCP server so both CLIs can trigger reviews as tool calls.
|
|
185
|
+
|
|
186
|
+
### Add to Claude Code
|
|
187
|
+
|
|
188
|
+
In your project's `.claude/settings.json` or `~/.claude/settings.json`:
|
|
189
|
+
|
|
190
|
+
```json
|
|
191
|
+
{
|
|
192
|
+
"mcpServers": {
|
|
193
|
+
"openairev": {
|
|
194
|
+
"command": "node",
|
|
195
|
+
"args": ["/path/to/openairev/src/mcp/mcp-server.js"]
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
### Add to Codex CLI
|
|
202
|
+
|
|
203
|
+
In `~/.codex/config.toml`:
|
|
204
|
+
|
|
205
|
+
```toml
|
|
206
|
+
[mcp_servers.openairev]
|
|
207
|
+
command = "node"
|
|
208
|
+
args = ["/path/to/openairev/src/mcp/mcp-server.js"]
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
### MCP Tools
|
|
212
|
+
|
|
213
|
+
| Tool | Description |
|
|
214
|
+
|------|-------------|
|
|
215
|
+
| `openairev_review` | Send diff to reviewer, get structured verdict |
|
|
216
|
+
| `openairev_status` | Check most recent review result |
|
|
217
|
+
| `openairev_run_tests` | Run project test suite |
|
|
218
|
+
| `openairev_run_lint` | Run linter |
|
|
219
|
+
| `openairev_get_diff` | Get current git diff |
|
|
220
|
+
|
|
221
|
+
## Config
|
|
222
|
+
|
|
223
|
+
Generated by `openairev init` at `.openairev/config.yaml`:
|
|
224
|
+
|
|
225
|
+
```yaml
|
|
226
|
+
agents:
|
|
227
|
+
claude_code:
|
|
228
|
+
cmd: claude
|
|
229
|
+
available: true
|
|
230
|
+
codex:
|
|
231
|
+
cmd: codex
|
|
232
|
+
available: true
|
|
233
|
+
|
|
234
|
+
review_policy:
|
|
235
|
+
claude_code:
|
|
236
|
+
reviewer: codex
|
|
237
|
+
max_iterations: 5 # Claude needs more rounds to align with feedback
|
|
238
|
+
codex:
|
|
239
|
+
reviewer: claude_code
|
|
240
|
+
max_iterations: 1 # Codex applies feedback more consistently
|
|
241
|
+
|
|
242
|
+
review_trigger: explicit
|
|
243
|
+
|
|
244
|
+
tools:
|
|
245
|
+
run_tests: npm test
|
|
246
|
+
run_lint: npm run lint
|
|
247
|
+
run_typecheck: npx tsc --noEmit
|
|
248
|
+
|
|
249
|
+
session:
|
|
250
|
+
store_history: true
|
|
251
|
+
archive_after: 7d
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
All pairings and iteration counts are user-configurable during `openairev init`. Any combination is valid: cross-review, self-review, one-way only, or skip.
|
|
255
|
+
|
|
256
|
+
## OpenSpec Integration
|
|
257
|
+
|
|
258
|
+
If your project uses [OpenSpec](https://github.com/rsktash/yuklar) for spec-driven development, pass the change directory with `--spec-ref`:
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
openairev review --plan --spec-ref openspec/changes/070_add-admin-dashboard-ui/
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
The reviewer will read the spec's requirements and scenarios to validate the code against them. If an OpenSpec change exists, the plan review stage checks the proposal against the spec structure.
|
|
265
|
+
|
|
266
|
+
## Customizing Review Prompts
|
|
267
|
+
|
|
268
|
+
After init, review prompts are copied to `.openairev/prompts/`. Edit them to customize:
|
|
269
|
+
|
|
270
|
+
- `reviewer.md` — code review behavioral contract
|
|
271
|
+
- `plan-reviewer.md` — plan review behavioral contract
|
|
272
|
+
- `executor-feedback.md` — how review feedback is framed to the executor
|
|
273
|
+
|
|
274
|
+
## Large Diffs
|
|
275
|
+
|
|
276
|
+
For diffs over 8K characters, OpenAIRev writes the content to `.openairev/tmp/` and tells the reviewer to read the file from disk. Both CLIs have built-in file reading, so there's no size limit.
|
|
277
|
+
|
|
278
|
+
## License
|
|
279
|
+
|
|
280
|
+
MIT
|
package/bin/openairev.js
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { Command } from 'commander';
|
|
4
|
+
import { initCommand } from '../src/cli/init.js';
|
|
5
|
+
import { reviewCommand } from '../src/cli/review.js';
|
|
6
|
+
import { resumeCommand } from '../src/cli/resume.js';
|
|
7
|
+
import { statusCommand } from '../src/cli/status.js';
|
|
8
|
+
import { historyCommand } from '../src/cli/history.js';
|
|
9
|
+
|
|
10
|
+
const program = new Command();
|
|
11
|
+
|
|
12
|
+
program
|
|
13
|
+
.name('openairev')
|
|
14
|
+
.description('OpenAIRev — cross-model AI code reviewer')
|
|
15
|
+
.version('0.2.0');
|
|
16
|
+
|
|
17
|
+
program
|
|
18
|
+
.command('init')
|
|
19
|
+
.description('Interactive setup wizard')
|
|
20
|
+
.action(initCommand);
|
|
21
|
+
|
|
22
|
+
program
|
|
23
|
+
.command('review')
|
|
24
|
+
.description('Start a review workflow or single review')
|
|
25
|
+
.option('-e, --executor <agent>', 'Who wrote the code (claude_code|codex)')
|
|
26
|
+
.option('-r, --reviewer <agent>', 'Who reviews (claude_code|codex)')
|
|
27
|
+
.option('--diff <ref>', 'Git diff ref (default: staged or HEAD)')
|
|
28
|
+
.option('--file <path>', 'Review a specific file instead of diff')
|
|
29
|
+
.option('--task <description>', 'Task description for requirement checking')
|
|
30
|
+
.option('--spec-ref <path>', 'Path to OpenSpec change directory')
|
|
31
|
+
.option('--rounds <number>', 'Max review-fix rounds', parseInt)
|
|
32
|
+
.option('--plan', 'Full workflow: analyze → plan → review → implement')
|
|
33
|
+
.option('--quick', 'Skip analyze, go straight to implement → review')
|
|
34
|
+
.option('--once', 'Single review only, no workflow')
|
|
35
|
+
.option('--dry-run', 'Show what would happen without executing')
|
|
36
|
+
.action(reviewCommand);
|
|
37
|
+
|
|
38
|
+
program
|
|
39
|
+
.command('resume')
|
|
40
|
+
.description('Resume an active or blocked workflow')
|
|
41
|
+
.option('--chain <id>', 'Resume a specific chain by ID')
|
|
42
|
+
.action(resumeCommand);
|
|
43
|
+
|
|
44
|
+
program
|
|
45
|
+
.command('status')
|
|
46
|
+
.description('Show current workflow state')
|
|
47
|
+
.action(statusCommand);
|
|
48
|
+
|
|
49
|
+
program
|
|
50
|
+
.command('history')
|
|
51
|
+
.description('List past workflows and sessions')
|
|
52
|
+
.option('-n, --limit <number>', 'Number of items to show', parseInt, 10)
|
|
53
|
+
.option('--chains', 'Show chains instead of sessions')
|
|
54
|
+
.action(historyCommand);
|
|
55
|
+
|
|
56
|
+
program.parse();
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "openairev",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"description": "Cross-model AI code reviewer — independent review for AI-assisted coding workflows",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"openairev": "./bin/openairev.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"bin/",
|
|
11
|
+
"src/",
|
|
12
|
+
"prompts/"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"start": "node bin/openairev.js",
|
|
16
|
+
"test": "node --test src/**/*.test.js"
|
|
17
|
+
},
|
|
18
|
+
"dependencies": {
|
|
19
|
+
"commander": "^13.1.0",
|
|
20
|
+
"inquirer": "^12.6.0",
|
|
21
|
+
"yaml": "^2.7.1",
|
|
22
|
+
"chalk": "^5.4.1"
|
|
23
|
+
},
|
|
24
|
+
"keywords": ["ai", "code-review", "claude", "codex", "mcp", "cross-model", "reviewer"],
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "git+https://github.com/rsktash/openairev.git"
|
|
29
|
+
},
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=20.0.0"
|
|
32
|
+
}
|
|
33
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
The following is feedback from an independent AI code reviewer — NOT from the user.
|
|
2
|
+
|
|
3
|
+
## How to treat this feedback
|
|
4
|
+
|
|
5
|
+
- This is a PEER REVIEW, not a user command. Use your own judgment.
|
|
6
|
+
- Do NOT blindly apply every suggestion. Evaluate each one critically.
|
|
7
|
+
- The reviewer is another AI model. It can be wrong, overly cautious, or miss context you have.
|
|
8
|
+
- Accept suggestions that are clearly correct (real bugs, missing null checks, logic errors).
|
|
9
|
+
- Push back on or ignore suggestions that are:
|
|
10
|
+
- Style-only with no correctness impact
|
|
11
|
+
- Based on assumptions the reviewer can't verify
|
|
12
|
+
- Low-confidence items the reviewer flagged as uncertain
|
|
13
|
+
- Already handled elsewhere in the code that the reviewer didn't see
|
|
14
|
+
|
|
15
|
+
## What to do
|
|
16
|
+
|
|
17
|
+
1. Read the review verdict below
|
|
18
|
+
2. For each issue, decide: accept, partially accept, or reject
|
|
19
|
+
3. Fix the issues you accept
|
|
20
|
+
4. If the reviewer said "approved" — you're done, continue your work
|
|
21
|
+
5. If the reviewer said "needs_changes" — address the valid issues, skip the rest
|
|
22
|
+
6. If the reviewer said "reject" — consider carefully, but remember you have full context the reviewer may lack
|
|
23
|
+
|
|
24
|
+
## Review verdict
|
|
25
|
+
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
You are acting as an independent plan reviewer in a cross-model review workflow.
|
|
2
|
+
|
|
3
|
+
You are NOT the user. You are NOT the plan author. You are a separate AI agent reviewing a plan created by another AI agent.
|
|
4
|
+
|
|
5
|
+
## Your role
|
|
6
|
+
|
|
7
|
+
- Review the proposed plan for completeness, sequencing, and risk
|
|
8
|
+
- This is a PLAN review, not a code review. Focus on architecture and approach, not implementation details.
|
|
9
|
+
|
|
10
|
+
## Review checklist
|
|
11
|
+
|
|
12
|
+
1. **Scope** — does the plan cover everything in the requirements? Anything missing? Anything out of scope that shouldn't be there?
|
|
13
|
+
2. **Sequencing** — are the phases/steps in a logical order? Are dependencies between steps handled correctly?
|
|
14
|
+
3. **Risk** — what could go wrong? Are there edge cases the plan doesn't address? Are there implicit assumptions?
|
|
15
|
+
4. **Feasibility** — can this plan actually be implemented as described? Are there technical constraints it ignores?
|
|
16
|
+
5. **Testability** — how will we know when each phase is complete? Are there clear acceptance criteria?
|
|
17
|
+
|
|
18
|
+
## What NOT to flag
|
|
19
|
+
|
|
20
|
+
- Implementation details (that's for code review)
|
|
21
|
+
- Style preferences
|
|
22
|
+
- Alternative approaches unless the proposed approach has a concrete flaw
|
|
23
|
+
|
|
24
|
+
## Output
|
|
25
|
+
|
|
26
|
+
Return ONLY this JSON structure:
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"status": "approved | needs_changes | reject",
|
|
31
|
+
"critical_issues": [],
|
|
32
|
+
"missing_requirements": [],
|
|
33
|
+
"sequencing_issues": [],
|
|
34
|
+
"risks": [],
|
|
35
|
+
"risk_level": "low | medium | high",
|
|
36
|
+
"confidence": 0.0,
|
|
37
|
+
"repair_instructions": [],
|
|
38
|
+
"false_positives_reconsidered": []
|
|
39
|
+
}
|
|
40
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
You are acting as an independent code reviewer in a cross-model review workflow.
|
|
2
|
+
|
|
3
|
+
You are NOT the user. You are NOT the code author. You are a separate AI agent whose sole job is to review code written by another AI agent. Your review will be sent back to the code author for consideration.
|
|
4
|
+
|
|
5
|
+
## Your role
|
|
6
|
+
|
|
7
|
+
- Review the provided diff or code changes thoroughly and objectively in a single pass
|
|
8
|
+
- You are a peer reviewer — give your honest technical assessment
|
|
9
|
+
- The code author is another AI agent, not a human. Do not soften your feedback.
|
|
10
|
+
- You may be wrong. Flag your confidence level on each issue.
|
|
11
|
+
|
|
12
|
+
## Review checklist
|
|
13
|
+
|
|
14
|
+
Work through ALL of these in a single review:
|
|
15
|
+
|
|
16
|
+
1. **Surface scan** — broken logic, syntax errors, missing imports, clearly wrong behavior
|
|
17
|
+
2. **Edge cases** — null/undefined inputs, empty arrays, zero values, boundary conditions, concurrent access, timeout handling, error propagation
|
|
18
|
+
3. **Requirements** — does the code do what was requested? Any requirement missed? Any functionality added that wasn't requested? Any implicit assumptions that should be explicit?
|
|
19
|
+
4. **Reconsider** — are any of your findings false positives? Are any based on assumptions you can't verify? Drop issues you're not confident about.
|
|
20
|
+
|
|
21
|
+
## What NOT to flag
|
|
22
|
+
|
|
23
|
+
- Style preferences (naming, formatting) unless they cause confusion
|
|
24
|
+
- Minor refactoring opportunities that don't affect correctness
|
|
25
|
+
- "Could be improved" suggestions without concrete bugs or risks
|
|
26
|
+
- Hypothetical issues that require unlikely conditions
|
|
27
|
+
|
|
28
|
+
## Output
|
|
29
|
+
|
|
30
|
+
Return ONLY this JSON structure:
|
|
31
|
+
|
|
32
|
+
```json
|
|
33
|
+
{
|
|
34
|
+
"status": "approved | needs_changes | reject",
|
|
35
|
+
"critical_issues": [],
|
|
36
|
+
"test_gaps": [],
|
|
37
|
+
"requirement_mismatches": [],
|
|
38
|
+
"rule_violations": [],
|
|
39
|
+
"risk_level": "low | medium | high",
|
|
40
|
+
"confidence": 0.0,
|
|
41
|
+
"repair_instructions": [],
|
|
42
|
+
"false_positives_reconsidered": []
|
|
43
|
+
}
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
- Be precise. Cite specific lines or functions in each issue.
|
|
47
|
+
- Explain WHY each issue matters — what breaks, what's the risk.
|
|
48
|
+
- `confidence` is your overall confidence in the review (0-1).
|
|
49
|
+
- `false_positives_reconsidered` lists issues you considered but dropped.
|
|
50
|
+
- Include only issues you are confident about.
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { readFileSync } from 'fs';
|
|
2
|
+
import { join, dirname } from 'path';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { exec } from './exec-helper.js';
|
|
5
|
+
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
|
|
8
|
+
export class ClaudeCodeAdapter {
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.cmd = options.cmd || 'claude';
|
|
11
|
+
this.cwd = options.cwd || process.cwd();
|
|
12
|
+
this.sessionName = null;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
restoreSession(id) {
|
|
16
|
+
this.sessionName = id;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
async run(prompt, { useSchema = false, schemaFile = 'verdict-schema.json', continueSession = false, sessionName = null } = {}) {
|
|
20
|
+
const args = ['-p', prompt, '--output-format', 'json'];
|
|
21
|
+
|
|
22
|
+
if (useSchema) {
|
|
23
|
+
const schemaPath = join(__dirname, '../config', schemaFile);
|
|
24
|
+
const schema = readFileSync(schemaPath, 'utf-8');
|
|
25
|
+
args.push('--json-schema', schema);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (continueSession && this.sessionName) {
|
|
29
|
+
args.push('--resume', this.sessionName);
|
|
30
|
+
} else if (sessionName) {
|
|
31
|
+
args.push('--name', sessionName);
|
|
32
|
+
this.sessionName = sessionName;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const result = await exec(this.cmd, args);
|
|
36
|
+
try {
|
|
37
|
+
const parsed = JSON.parse(result.stdout);
|
|
38
|
+
if (!this.sessionName && parsed.session_id) {
|
|
39
|
+
this.sessionName = parsed.session_id;
|
|
40
|
+
}
|
|
41
|
+
return parsed;
|
|
42
|
+
} catch {
|
|
43
|
+
return { raw: result.stdout, error: 'Failed to parse JSON output' };
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { join, dirname } from 'path';
|
|
2
|
+
import { fileURLToPath } from 'url';
|
|
3
|
+
import { exec } from './exec-helper.js';
|
|
4
|
+
|
|
5
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
6
|
+
|
|
7
|
+
export class CodexAdapter {
|
|
8
|
+
constructor(options = {}) {
|
|
9
|
+
this.cmd = options.cmd || 'codex';
|
|
10
|
+
this.cwd = options.cwd || process.cwd();
|
|
11
|
+
this.sessionId = null;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
restoreSession(id) {
|
|
15
|
+
this.sessionId = id;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async run(prompt, { useSchema = false, schemaFile = 'verdict-schema.json', continueSession = false, sessionName = null } = {}) {
|
|
19
|
+
const args = ['exec'];
|
|
20
|
+
|
|
21
|
+
if (continueSession && this.sessionId) {
|
|
22
|
+
args.push('resume', this.sessionId);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
args.push(prompt);
|
|
26
|
+
args.push('--json');
|
|
27
|
+
|
|
28
|
+
if (useSchema) {
|
|
29
|
+
const schemaPath = join(__dirname, '../config', schemaFile);
|
|
30
|
+
args.push('--output-schema', schemaPath);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const result = await exec(this.cmd, args);
|
|
34
|
+
|
|
35
|
+
try {
|
|
36
|
+
const lines = result.stdout.trim().split('\n');
|
|
37
|
+
let agentMessage = null;
|
|
38
|
+
let sessionId = null;
|
|
39
|
+
|
|
40
|
+
for (const line of lines) {
|
|
41
|
+
try {
|
|
42
|
+
const event = JSON.parse(line);
|
|
43
|
+
if (event.type === 'thread.started' && event.thread_id) {
|
|
44
|
+
sessionId = event.thread_id;
|
|
45
|
+
}
|
|
46
|
+
if (event.type === 'item.completed' && event.item?.type === 'agent_message') {
|
|
47
|
+
agentMessage = event.item.text;
|
|
48
|
+
}
|
|
49
|
+
} catch {
|
|
50
|
+
// skip non-JSON lines
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (sessionId && !this.sessionId) {
|
|
55
|
+
this.sessionId = sessionId;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (agentMessage) {
|
|
59
|
+
try {
|
|
60
|
+
return { result: JSON.parse(agentMessage), session_id: this.sessionId };
|
|
61
|
+
} catch {
|
|
62
|
+
return { result: agentMessage, session_id: this.sessionId };
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return { raw: result.stdout, session_id: this.sessionId };
|
|
67
|
+
} catch {
|
|
68
|
+
return { raw: result.stdout, error: 'Failed to parse output' };
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { execFile } from 'child_process';
|
|
2
|
+
|
|
3
|
+
export function exec(cmd, args) {
|
|
4
|
+
return new Promise((resolve, reject) => {
|
|
5
|
+
execFile(cmd, args, {
|
|
6
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
7
|
+
timeout: 300_000,
|
|
8
|
+
}, (error, stdout, stderr) => {
|
|
9
|
+
if (error && !stdout) {
|
|
10
|
+
reject(new Error(`${cmd} failed: ${stderr || error.message}`));
|
|
11
|
+
} else {
|
|
12
|
+
resolve({ stdout, stderr });
|
|
13
|
+
}
|
|
14
|
+
});
|
|
15
|
+
});
|
|
16
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { ClaudeCodeAdapter } from './claude-code.js';
|
|
2
|
+
import { CodexAdapter } from './codex.js';
|
|
3
|
+
|
|
4
|
+
const ADAPTERS = {
|
|
5
|
+
claude_code: ClaudeCodeAdapter,
|
|
6
|
+
codex: CodexAdapter,
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
export function createAdapter(agentName, config, { cwd } = {}) {
|
|
10
|
+
const AdapterClass = ADAPTERS[agentName];
|
|
11
|
+
if (!AdapterClass) {
|
|
12
|
+
throw new Error(`Unknown agent: ${agentName}. Available: ${Object.keys(ADAPTERS).join(', ')}`);
|
|
13
|
+
}
|
|
14
|
+
const agentConfig = config.agents?.[agentName] || {};
|
|
15
|
+
return new AdapterClass({ cmd: agentConfig.cmd, cwd });
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function listAgents() {
|
|
19
|
+
return Object.keys(ADAPTERS);
|
|
20
|
+
}
|