@simonren/quorum 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +144 -0
- package/commands/multi-consult.md +109 -0
- package/commands/multi-review.md +139 -0
- package/dist/adapters/base.d.ts +120 -0
- package/dist/adapters/base.js +98 -0
- package/dist/adapters/claude.d.ts +25 -0
- package/dist/adapters/claude.js +217 -0
- package/dist/adapters/codex.d.ts +20 -0
- package/dist/adapters/codex.js +227 -0
- package/dist/adapters/gemini.d.ts +20 -0
- package/dist/adapters/gemini.js +197 -0
- package/dist/adapters/index.d.ts +12 -0
- package/dist/adapters/index.js +15 -0
- package/dist/cli/check.d.ts +20 -0
- package/dist/cli/check.js +78 -0
- package/dist/cli/codex.d.ts +11 -0
- package/dist/cli/codex.js +255 -0
- package/dist/cli/gemini.d.ts +12 -0
- package/dist/cli/gemini.js +253 -0
- package/dist/commands.d.ts +28 -0
- package/dist/commands.js +105 -0
- package/dist/config.d.ts +244 -0
- package/dist/config.js +179 -0
- package/dist/consult-prompt.d.ts +10 -0
- package/dist/consult-prompt.js +72 -0
- package/dist/context.d.ts +1538 -0
- package/dist/context.js +383 -0
- package/dist/decoders/claude.d.ts +53 -0
- package/dist/decoders/claude.js +106 -0
- package/dist/decoders/codex.d.ts +71 -0
- package/dist/decoders/codex.js +145 -0
- package/dist/decoders/gemini.d.ts +33 -0
- package/dist/decoders/gemini.js +58 -0
- package/dist/decoders/index.d.ts +6 -0
- package/dist/decoders/index.js +3 -0
- package/dist/errors.d.ts +46 -0
- package/dist/errors.js +192 -0
- package/dist/executor.d.ts +103 -0
- package/dist/executor.js +244 -0
- package/dist/handoff.d.ts +270 -0
- package/dist/handoff.js +599 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +134 -0
- package/dist/pipeline.d.ts +135 -0
- package/dist/pipeline.js +462 -0
- package/dist/prompt-v2.d.ts +38 -0
- package/dist/prompt-v2.js +391 -0
- package/dist/prompt.d.ts +71 -0
- package/dist/prompt.js +309 -0
- package/dist/schema.d.ts +660 -0
- package/dist/schema.js +536 -0
- package/dist/tools/consult.d.ts +104 -0
- package/dist/tools/consult.js +220 -0
- package/dist/tools/feedback.d.ts +91 -0
- package/dist/tools/feedback.js +117 -0
- package/dist/types.d.ts +105 -0
- package/dist/types.js +31 -0
- package/package.json +54 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 SimonRen
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
# Quorum - Multi-Model AI Review & Consultation for Claude Code
|
|
2
|
+
|
|
3
|
+
Convene a quorum of AI models (OpenAI Codex, Google Gemini, Claude) to review Claude Code's work or answer questions — Claude Code synthesizes one verdict.
|
|
4
|
+
|
|
5
|
+
## Quick Install
|
|
6
|
+
|
|
7
|
+
**Step 1: Add the MCP server**
|
|
8
|
+
```bash
|
|
9
|
+
claude mcp add -s user quorum -- npx -y @simonren/quorum
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
**Step 2: Restart Claude Code**
|
|
13
|
+
|
|
14
|
+
The MCP tools and slash commands (`/multi-review`, `/multi-consult`) are automatically installed.
|
|
15
|
+
|
|
16
|
+
**Manual command install** (if needed):
|
|
17
|
+
```bash
|
|
18
|
+
npx -y @simonren/quorum update
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Verify with:
|
|
22
|
+
```bash
|
|
23
|
+
claude mcp list
|
|
24
|
+
# quorum: npx -y @simonren/quorum - ✓ Connected
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Alternative: Manual Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
git clone https://github.com/SimonRen/quorum.git
|
|
31
|
+
cd quorum/mcp-server
|
|
32
|
+
npm install && npm run build
|
|
33
|
+
claude mcp add -s user quorum -- node /path/to/quorum/mcp-server/dist/index.js
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Prerequisites
|
|
37
|
+
|
|
38
|
+
Install at least one AI CLI:
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# OpenAI Codex CLI
|
|
42
|
+
npm install -g @openai/codex-cli
|
|
43
|
+
codex login
|
|
44
|
+
|
|
45
|
+
# Google Gemini CLI
|
|
46
|
+
npm install -g @google/gemini-cli
|
|
47
|
+
gemini # follow auth prompts
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
These tools provide **external second-opinion reviews** from Codex and Gemini CLIs. They are designed to complement Claude Code's native review capabilities, not replace them.
|
|
53
|
+
|
|
54
|
+
**Slash commands:**
|
|
55
|
+
- `/multi-review` - Parallel standard + adversarial reviews from all available CLIs (Codex, Gemini, Claude). For reviewing CC-produced work (plan, findings, code).
|
|
56
|
+
- `/multi-consult` - Ask all CLIs the same question and synthesize their answers. For consultation/Q&A — what's the best approach, how to solve X.
|
|
57
|
+
|
|
58
|
+
**For regular reviews:** Just say "review" and Claude Code will use its native capabilities. These external tools are only invoked when explicitly requested.
|
|
59
|
+
|
|
60
|
+
## Slash Commands
|
|
61
|
+
|
|
62
|
+
These commands are available after restart:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
/multi-review # Parallel standard + adversarial reviews from all CLIs
|
|
66
|
+
/multi-review focus on race conditions # Steer the adversarial focus
|
|
67
|
+
/multi-consult <question> # Ask all CLIs and synthesize
|
|
68
|
+
/multi-consult <question> [flex] # Use Codex flex tier (cheaper/slower)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## How It Works
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
CC does work → User: /multi-review → External CLIs review → CC synthesizes → Final output
|
|
75
|
+
User has a question → User: /multi-consult → External CLIs answer → CC synthesizes → Consolidated answer
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Key Principles:**
|
|
79
|
+
- **CC is primary**: Claude Code does all the work; external models only review
|
|
80
|
+
- **Working directory strategy**: Pass `cwd` + small CC output; external CLIs read files directly
|
|
81
|
+
- **Synthesis, not passthrough**: CC always judges external feedback before incorporating
|
|
82
|
+
|
|
83
|
+
## Focus Areas
|
|
84
|
+
|
|
85
|
+
| Area | Description |
|
|
86
|
+
|------|-------------|
|
|
87
|
+
| `security` | Vulnerabilities, auth, input validation |
|
|
88
|
+
| `performance` | Speed, memory, efficiency |
|
|
89
|
+
| `architecture` | Design patterns, structure, coupling |
|
|
90
|
+
| `correctness` | Logic errors, edge cases, bugs |
|
|
91
|
+
| `maintainability` | Code clarity, documentation, complexity |
|
|
92
|
+
| `scalability` | Load handling, bottlenecks |
|
|
93
|
+
| `testing` | Test coverage, test quality |
|
|
94
|
+
| `documentation` | Comments, docs, API docs |
|
|
95
|
+
|
|
96
|
+
## MCP Tools
|
|
97
|
+
|
|
98
|
+
The plugin exposes two MCP tools:
|
|
99
|
+
|
|
100
|
+
| Tool | Description |
|
|
101
|
+
|------|-------------|
|
|
102
|
+
| `multi_review` | Parallel standard + adversarial review from all available CLIs (Codex, Gemini, Claude). Requires `ccOutput`. |
|
|
103
|
+
| `multi_consult` | Ask all available CLIs the same question and receive a 5-section structured response per model. For consultation/Q&A. |
|
|
104
|
+
|
|
105
|
+
## Output Format
|
|
106
|
+
|
|
107
|
+
**Review tools** return structured feedback from the external CLIs. Claude Code parses this feedback to identify:
|
|
108
|
+
- **Findings**: Issues with severity, confidence, location, and suggestions
|
|
109
|
+
- **Agreements**: Validations of CC's correct assessments
|
|
110
|
+
- **Disagreements**: Challenges to CC's claims with corrections
|
|
111
|
+
- **Alternatives**: Different approaches with tradeoffs
|
|
112
|
+
- **Risk Assessment**: Overall risk level with top concerns
|
|
113
|
+
|
|
114
|
+
## Development
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
cd mcp-server
|
|
118
|
+
npm install
|
|
119
|
+
npm run build # Build once
|
|
120
|
+
npm run dev # Watch mode
|
|
121
|
+
npm test # Run tests
|
|
122
|
+
npm run test:watch # Watch mode tests
|
|
123
|
+
npm start # Run server
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Publishing
|
|
127
|
+
|
|
128
|
+
Release-based publish via npm Trusted Publishing (OIDC, no tokens needed).
|
|
129
|
+
CI triggers on GitHub Release, validates the tag matches `package.json`.
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
# 1. Bump version in package.json
|
|
133
|
+
# 2. Rebuild and test
|
|
134
|
+
npm run build && npm test
|
|
135
|
+
# 3. Commit, tag, push, release
|
|
136
|
+
git add -A && git commit -m "v1.x.x"
|
|
137
|
+
git tag v1.x.x
|
|
138
|
+
git push && git push --tags
|
|
139
|
+
gh release create v1.x.x --title "v1.x.x" --generate-notes
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## License
|
|
143
|
+
|
|
144
|
+
MIT
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Multi Consult
|
|
2
|
+
|
|
3
|
+
Ask Codex, Gemini, and Claude (Opus, fresh context) the same question in parallel and synthesize their answers. Use this for **consultation** — finding the best approach, weighing alternatives, getting a panel's take. NOT for reviewing work CC has already done (use `/multi-review` for that).
|
|
4
|
+
|
|
5
|
+
## Arguments
|
|
6
|
+
- `$ARGUMENTS` — the question itself, optional steering, or both
|
|
7
|
+
|
|
8
|
+
## When to Use
|
|
9
|
+
|
|
10
|
+
Use `/multi-consult` when you have a question or problem and want a synthesized panel opinion. The panel responds in a fixed 5-section structure (Recommendation / Reasoning / Tradeoffs / Risks / Open questions). CC reads all three responses and presents one consolidated answer with a "Models said:" provenance footer.
|
|
11
|
+
|
|
12
|
+
## Examples
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
/multi-consult Should we use Postgres or DynamoDB for a write-heavy timeseries workload?
|
|
16
|
+
/multi-consult How should I refactor the auth middleware? Focus on rollback safety.
|
|
17
|
+
/multi-consult What's the cleanest way to memoize this expensive selector? [flex]
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Before Calling - PREPARE THE HANDOFF
|
|
21
|
+
|
|
22
|
+
### 1. Pre-compose the question
|
|
23
|
+
|
|
24
|
+
**`$ARGUMENTS` parsing rule (pinned):**
|
|
25
|
+
|
|
26
|
+
- **If conversation context already contains the question CC has been working on:** compose `question` from that context. `$ARGUMENTS` is treated as pure steering — extract reserved tokens (see below) into schema fields; remainder goes into `customPrompt`.
|
|
27
|
+
- **Otherwise — `$ARGUMENTS` IS the literal question.** Set `customPrompt` to empty. Reserved tokens are extracted *only* when they appear at the *end* of `$ARGUMENTS` inside brackets or parens — e.g., `... [flex]`, `... (high reasoning)`. A bare occurrence of `flex` / `cheap` / `default tier` inside the prose is treated as part of the question, NOT a flag, to avoid corrupting questions like *"Should we offer a flex tier or default tier for customers?"*.
|
|
28
|
+
|
|
29
|
+
### 2. Triage code-grounded questions
|
|
30
|
+
|
|
31
|
+
If the question references the codebase, populate `relevantFiles` with the minimal subset (3-10 files typically) the panel needs. For purely general questions ("Postgres vs Mongo for X workload?"), omit `relevantFiles` — the panel will answer from expertise without trawling the filesystem.
|
|
32
|
+
|
|
33
|
+
### 3. Refuse sensitive working directories
|
|
34
|
+
|
|
35
|
+
If the current working directory is `/etc`, `~`, `~/.ssh`, or any other clearly sensitive system path, **refuse**. Tell the user: "Please invoke `/multi-consult` from a project root — `<cwd>` looks sensitive." Do not call the tool.
|
|
36
|
+
|
|
37
|
+
### 4. Extract criteria; clarify load-bearing assumptions BEFORE calling
|
|
38
|
+
|
|
39
|
+
Pin what the question is being judged against. Once criteria are explicit, the panel's recommendation is anchored to them instead of floating — this is the fix for "ask twice, get a different answer." Stochastic re-runs converge much better against fixed criteria than against an under-specified question.
|
|
40
|
+
|
|
41
|
+
**4a. Append a CRITERIA block to the end of `question`**, priority-ordered, each tagged `[stated]` or `[assumed]`:
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
CRITERIA (priority order):
|
|
45
|
+
1. [stated] cost-per-request under $X / 1M ops
|
|
46
|
+
2. [stated] team writes Go; minimize ops complexity
|
|
47
|
+
3. [assumed] sustained ~10k QPS write rate
|
|
48
|
+
4. [assumed] eventual consistency acceptable for analytics
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
- `[stated]` = explicit in the user's message or earlier conversation.
|
|
52
|
+
- `[assumed]` = you needed to fix it to recommend; the user did NOT say.
|
|
53
|
+
- Cap `[assumed]` at 3. If the top 3 don't fit, the question is too vague — bounce back to the user before calling.
|
|
54
|
+
|
|
55
|
+
**4b. Pre-call clarification gate.** Scan your `[assumed]` criteria. If any is **load-bearing** (the recommendation would flip if the assumption is wrong), STOP and ask the user before invoking the tool:
|
|
56
|
+
|
|
57
|
+
> "Before I consult the panel, I need to confirm: <restate assumption>. Is that right, or should I adjust to <plausible alternative>?"
|
|
58
|
+
|
|
59
|
+
A burned panel call on a wrong assumed criterion costs more than the round-trip.
|
|
60
|
+
|
|
61
|
+
**Skip the gate when:**
|
|
62
|
+
- `[stated]` criteria fully pin the answer space (no assumptions needed).
|
|
63
|
+
- The user told you to proceed without clarification.
|
|
64
|
+
- Remaining assumptions are clearly incidental (would not flip the rec).
|
|
65
|
+
|
|
66
|
+
## Tool Invocation
|
|
67
|
+
|
|
68
|
+
Call `multi_consult` with:
|
|
69
|
+
|
|
70
|
+
```json
|
|
71
|
+
{
|
|
72
|
+
"workingDir": "<current directory>",
|
|
73
|
+
"question": "<CC-composed question OR literal $ARGUMENTS minus end-bracket reserved tokens>",
|
|
74
|
+
"relevantFiles": ["<file1>", "<file2>"],
|
|
75
|
+
"customPrompt": "<steering text or empty>"
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### Reserved-token mappings (only when bracketed at end of $ARGUMENTS)
|
|
80
|
+
|
|
81
|
+
- `[flex]` / `[cheap]` / `[budget]` → `serviceTier: "flex"`
|
|
82
|
+
- `[default tier]` / `[standard tier]` → `serviceTier: "default"`
|
|
83
|
+
- `[high reasoning]` → `reasoningEffort: "high"` (overrides default `xhigh`)
|
|
84
|
+
|
|
85
|
+
If the user types one of these mid-question (not in brackets), leave it in the question.
|
|
86
|
+
|
|
87
|
+
## After Receiving the Panel
|
|
88
|
+
|
|
89
|
+
You will receive each model's structured 5-section response. Some may carry a `⚠️ Format drift: missing sections [...]` marker — degrade synthesis confidence accordingly for that model.
|
|
90
|
+
|
|
91
|
+
### Synthesize
|
|
92
|
+
|
|
93
|
+
1. **Cross-compare Recommendations.** Agreement across all three → high confidence. 2-vs-1 split → take a side and *surface the dissent explicitly* in your answer (don't flatten it). All three disagree → present the tradeoff space honestly and pick.
|
|
94
|
+
2. **Mine Tradeoffs and Risks.** Even when models agree on the recommendation, the *reasons* and *risks* often diverge — surface the union, not just the intersection. If a single model raised a Risk the others missed, surface it as "1 model raised: …" — *do not silently drop it.*
|
|
95
|
+
3. **Forward Open questions** to the user only if material — do not dump every "what's your scale?" clarifier.
|
|
96
|
+
4. **Apply your own judgment.** You have full conversation context the panel does not; you may dismiss panel suggestions that miss the user's actual constraint, but say so explicitly when overriding.
|
|
97
|
+
5. **Respond with one consolidated answer**, structured as: **Recommendation** (what to do) → **Why** (synthesis of reasoning) → **Watch out for** (consolidated risks, including any single-model-only risks) → optional **Open question for you** if a real ambiguity blocks the answer.
|
|
98
|
+
6. **Append a "Models said:" provenance footer** — a single line per model with the recommendation in <80 chars. Example:
|
|
99
|
+
|
|
100
|
+
```
|
|
101
|
+
---
|
|
102
|
+
**Models said:** Codex → Postgres + read replicas. Gemini → Postgres + Citus. Claude → DynamoDB w/ caveat on cost at scale.
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
This is **non-negotiable**. The footer is the audit trail; without it, synthesis-only is opaque.
|
|
106
|
+
7. **Do NOT paste full raw model outputs to the user** unless they explicitly ask ("show me what each model said", "raw").
|
|
107
|
+
8. **All-failed special case:** if the header is `❌ All Failed`, surface the failure types and **ASK** the user *"Panel unavailable — want my solo answer instead?"*. **Do NOT silently substitute** your own answer for the panel's.
|
|
108
|
+
|
|
109
|
+
$ARGUMENTS
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Multi Review
|
|
2
|
+
|
|
3
|
+
Get parallel standard AND adversarial reviews from all available models (Codex, Gemini, Claude Opus).
|
|
4
|
+
|
|
5
|
+
Each model runs twice: once as a standard reviewer (finding bugs, issues, improvements) and once as an adversarial challenger (breaking confidence in the change, questioning assumptions, targeting hidden failure paths). Results are presented in two sections.
|
|
6
|
+
|
|
7
|
+
Use `$ARGUMENTS` to steer the adversarial focus (e.g., "focus the challenge on race conditions and rollback safety").
|
|
8
|
+
|
|
9
|
+
## Arguments
|
|
10
|
+
- `$ARGUMENTS` - Optional: focus area, custom instructions, or adversarial steering
|
|
11
|
+
|
|
12
|
+
## When to Use
|
|
13
|
+
|
|
14
|
+
Use `/multi-review` when you want thorough parallel reviews from all available models. Every invocation includes both standard and adversarial passes.
|
|
15
|
+
|
|
16
|
+
## Examples
|
|
17
|
+
|
|
18
|
+
```
|
|
19
|
+
/multi-review
|
|
20
|
+
/multi-review focus the challenge on race conditions and rollback safety
|
|
21
|
+
/multi-review challenge whether this was the right caching and retry design
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Before Calling - PREPARE THE HANDOFF
|
|
25
|
+
|
|
26
|
+
### 1. Summarize What You Did + State the Acceptance Bar
|
|
27
|
+
|
|
28
|
+
Don't just say what you did — also state the bar the work needs to clear. The bar is what lets reviewers calibrate "material" vs "nice to have." Without it, reviewers default to general code-quality vibes, which produces drift across runs.
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
"Implemented caching layer for the product catalog API using Redis with cache invalidation on product updates.
|
|
32
|
+
Bar: safe under concurrent updates (no stale reads on the next request) AND p95 read latency under 50ms."
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### 2. List Your Uncertainties — Tag Load-Bearing vs Incidental
|
|
36
|
+
|
|
37
|
+
Tag each uncertainty:
|
|
38
|
+
- `[load-bearing]` = if your assumption here is wrong, the work is NOT shipping-ready
|
|
39
|
+
- `[incidental]` = nice to verify but won't block ship
|
|
40
|
+
|
|
41
|
+
Reviewers prioritize accordingly, and your synthesis can elevate `[load-bearing]` items above stylistic findings.
|
|
42
|
+
|
|
43
|
+
```
|
|
44
|
+
UNCERTAINTIES:
|
|
45
|
+
- [load-bearing] "Is the cache invalidation race-free under concurrent updates?"
|
|
46
|
+
- [incidental] "Is the TTL value optimal — could it be 60s instead of 30s?"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### 3. Ask Specific Questions
|
|
50
|
+
```
|
|
51
|
+
QUESTIONS:
|
|
52
|
+
- "Should I use write-through or write-behind caching?"
|
|
53
|
+
- "Is there a race condition in the invalidation logic?"
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 4. Identify Decisions You Made
|
|
57
|
+
|
|
58
|
+
If you chose between alternatives — caching strategy, retry policy, error-handling shape, schema design, etc. — list them with rationale. The handoff schema's `decisions[]` field gives the adversarial reviewer a concrete hook to attack the design choice rather than just hunt for bugs. Skip if the change is a straightforward bug fix with no design choice involved.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
DECISIONS:
|
|
62
|
+
1. Chose write-through cache over write-behind. Rationale: stronger read-after-write consistency at the cost of slightly slower writes; we prioritize correctness for catalog data.
|
|
63
|
+
2. Chose 30s TTL with explicit invalidation on update. Rationale: TTL bounds staleness if invalidation misses; explicit invalidation catches the common path immediately.
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Tool Invocation
|
|
67
|
+
|
|
68
|
+
Call `multi_review` with:
|
|
69
|
+
|
|
70
|
+
```json
|
|
71
|
+
{
|
|
72
|
+
"workingDir": "<current directory>",
|
|
73
|
+
"ccOutput": "<structured handoff>",
|
|
74
|
+
"outputType": "analysis",
|
|
75
|
+
"focusAreas": ["<from $ARGUMENTS>"],
|
|
76
|
+
"customPrompt": "<steering text from $ARGUMENTS for adversarial focus>"
|
|
77
|
+
}
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Service Tier (from $ARGUMENTS, applies to Codex only)
|
|
81
|
+
- If user says "flex", "cheap", or "budget" → set `serviceTier: "flex"`
|
|
82
|
+
- If user says "default tier" or "standard tier" → set `serviceTier: "default"`
|
|
83
|
+
- Otherwise → omit `serviceTier` (defaults to `"fast"` — priority processing, ~2x cost)
|
|
84
|
+
|
|
85
|
+
### Structure your ccOutput:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
SUMMARY:
|
|
89
|
+
<what you did, 1-3 sentences>
|
|
90
|
+
Bar: <what counts as shipping-ready — concrete acceptance criteria>
|
|
91
|
+
|
|
92
|
+
UNCERTAINTIES (verify these):
|
|
93
|
+
1. [load-bearing|incidental] <uncertainty>
|
|
94
|
+
2. [load-bearing|incidental] <uncertainty>
|
|
95
|
+
|
|
96
|
+
QUESTIONS:
|
|
97
|
+
1. <question>
|
|
98
|
+
|
|
99
|
+
DECISIONS:
|
|
100
|
+
1. <choice>. Rationale: <why this over alternatives>
|
|
101
|
+
2. <choice>. Rationale: <why this over alternatives>
|
|
102
|
+
|
|
103
|
+
PRIORITY FILES:
|
|
104
|
+
- <file>
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## After Receiving Review
|
|
108
|
+
|
|
109
|
+
You will receive two sections: **Standard Review Findings** and **Challenge Review Findings**.
|
|
110
|
+
|
|
111
|
+
### Synthesize
|
|
112
|
+
|
|
113
|
+
1. **Standard findings** — bugs, issues, improvements from each model
|
|
114
|
+
- Find agreements across models (higher confidence)
|
|
115
|
+
- Identify conflicts (YOU decide who's right)
|
|
116
|
+
|
|
117
|
+
2. **Challenge findings** — adversarial challenges from each model
|
|
118
|
+
- These target assumptions and design decisions, not just bugs
|
|
119
|
+
- Evaluate on merit — some challenges are speculative by design
|
|
120
|
+
- Strong challenges with evidence deserve serious consideration
|
|
121
|
+
|
|
122
|
+
3. **Cross-reference** standard vs challenge findings
|
|
123
|
+
- Standard + challenge agreement = high confidence issue
|
|
124
|
+
- Challenge-only finding = investigate further before acting
|
|
125
|
+
|
|
126
|
+
4. **Verify all findings**
|
|
127
|
+
- Check file/line references exist
|
|
128
|
+
- Read actual code
|
|
129
|
+
- Mark your confidence:
|
|
130
|
+
- ✓✓ Verified
|
|
131
|
+
- ✓ Plausible
|
|
132
|
+
- ? Investigate
|
|
133
|
+
- ✗ Rejected
|
|
134
|
+
|
|
135
|
+
5. **Make YOUR recommendation**
|
|
136
|
+
- Don't just relay findings
|
|
137
|
+
- Apply your judgment
|
|
138
|
+
|
|
139
|
+
$ARGUMENTS
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Adapter Interface for AI Reviewers
|
|
3
|
+
*
|
|
4
|
+
* This provides a generic interface that any AI CLI can implement.
|
|
5
|
+
* Makes it easy to add new models (Ollama, Azure, etc.) without
|
|
6
|
+
* changing the core orchestration logic.
|
|
7
|
+
*/
|
|
8
|
+
import { FocusArea, OutputType, ReasoningEffort, ServiceTier } from '../types.js';
|
|
9
|
+
export interface ReviewerCapabilities {
|
|
10
|
+
/** Display name for this reviewer */
|
|
11
|
+
name: string;
|
|
12
|
+
/** Short description of the reviewer's strengths */
|
|
13
|
+
description: string;
|
|
14
|
+
/** Focus areas this reviewer excels at */
|
|
15
|
+
strengths: FocusArea[];
|
|
16
|
+
/** Focus areas this reviewer is weaker at */
|
|
17
|
+
weaknesses: FocusArea[];
|
|
18
|
+
/** Whether the reviewer can read files from the filesystem */
|
|
19
|
+
hasFilesystemAccess: boolean;
|
|
20
|
+
/** Whether the reviewer supports JSON structured output */
|
|
21
|
+
supportsStructuredOutput: boolean;
|
|
22
|
+
/** Maximum context window size (tokens) */
|
|
23
|
+
maxContextTokens: number;
|
|
24
|
+
/** Supported reasoning effort levels (if applicable) */
|
|
25
|
+
reasoningLevels?: ReasoningEffort[];
|
|
26
|
+
}
|
|
27
|
+
export interface ReviewRequest {
|
|
28
|
+
/** Working directory containing the code */
|
|
29
|
+
workingDir: string;
|
|
30
|
+
/** Claude Code's output to review */
|
|
31
|
+
ccOutput: string;
|
|
32
|
+
/** Type of output being reviewed */
|
|
33
|
+
outputType: OutputType;
|
|
34
|
+
/** Specific files that CC analyzed */
|
|
35
|
+
analyzedFiles?: string[];
|
|
36
|
+
/** Areas to focus the review on */
|
|
37
|
+
focusAreas?: FocusArea[];
|
|
38
|
+
/** Custom instructions from the user */
|
|
39
|
+
customPrompt?: string;
|
|
40
|
+
/** Reasoning effort level (for models that support it) */
|
|
41
|
+
reasoningEffort?: ReasoningEffort;
|
|
42
|
+
/** Service tier (Codex). Omit for the review chain's default 'fast' (priority). Pass 'flex' for cheap/slow or 'default' for the Codex API default tier. */
|
|
43
|
+
serviceTier?: ServiceTier;
|
|
44
|
+
/** Review mode: standard finds bugs, adversarial challenges assumptions */
|
|
45
|
+
reviewMode?: 'standard' | 'adversarial';
|
|
46
|
+
}
|
|
47
|
+
export interface ConsultRequest {
|
|
48
|
+
/** Working directory containing the code (always passed) */
|
|
49
|
+
workingDir: string;
|
|
50
|
+
/** CC-composed, self-contained question for the panel */
|
|
51
|
+
question: string;
|
|
52
|
+
/** CC-triaged file subset for code-grounded questions; omitted on general questions */
|
|
53
|
+
relevantFiles?: string[];
|
|
54
|
+
/** Free-form steering from $ARGUMENTS */
|
|
55
|
+
customPrompt?: string;
|
|
56
|
+
/** Reasoning effort (Codex). Default 'xhigh' for consult (deeper questions). */
|
|
57
|
+
reasoningEffort?: ReasoningEffort;
|
|
58
|
+
/** Service tier (Codex). Same defaulting rules as ReviewRequest. */
|
|
59
|
+
serviceTier?: ServiceTier;
|
|
60
|
+
}
|
|
61
|
+
export type ConsultResult = ReviewResult;
|
|
62
|
+
/** @deprecated Use handoff.ts roles instead */
|
|
63
|
+
export interface ExpertRole {
|
|
64
|
+
name: string;
|
|
65
|
+
description: string;
|
|
66
|
+
systemPrompt: string;
|
|
67
|
+
focusAreas: FocusArea[];
|
|
68
|
+
evaluationCriteria: string[];
|
|
69
|
+
}
|
|
70
|
+
/** @deprecated Use handoff.ts selectRole() instead */
|
|
71
|
+
export declare const EXPERT_ROLES: Record<string, ExpertRole>;
|
|
72
|
+
/** @deprecated Use handoff.ts selectRole() instead */
|
|
73
|
+
export declare function selectExpertRole(focusAreas?: FocusArea[]): ExpertRole;
|
|
74
|
+
export interface ReviewSuccess {
|
|
75
|
+
success: true;
|
|
76
|
+
output: string;
|
|
77
|
+
executionTimeMs: number;
|
|
78
|
+
}
|
|
79
|
+
export interface ReviewFailure {
|
|
80
|
+
success: false;
|
|
81
|
+
error: ReviewError;
|
|
82
|
+
suggestion?: string;
|
|
83
|
+
rawOutput?: string;
|
|
84
|
+
executionTimeMs: number;
|
|
85
|
+
}
|
|
86
|
+
export type ReviewResult = ReviewSuccess | ReviewFailure;
|
|
87
|
+
export interface ReviewError {
|
|
88
|
+
type: 'cli_not_found' | 'timeout' | 'rate_limit' | 'auth_error' | 'invalid_response' | 'cli_error' | 'parse_error';
|
|
89
|
+
message: string;
|
|
90
|
+
details?: Record<string, unknown>;
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* Base interface that all reviewer adapters must implement.
|
|
94
|
+
* This allows easy addition of new AI CLIs without changing orchestration logic.
|
|
95
|
+
*/
|
|
96
|
+
export interface ReviewerAdapter {
|
|
97
|
+
/** Unique identifier for this adapter */
|
|
98
|
+
readonly id: string;
|
|
99
|
+
/** Get capabilities and metadata for this reviewer */
|
|
100
|
+
getCapabilities(): ReviewerCapabilities;
|
|
101
|
+
/** Check if the CLI is available and properly configured */
|
|
102
|
+
isAvailable(): Promise<boolean>;
|
|
103
|
+
/** Run a review and return structured output */
|
|
104
|
+
runReview(request: ReviewRequest): Promise<ReviewResult>;
|
|
105
|
+
/** Run a consultation (Q&A) — required on every adapter. */
|
|
106
|
+
runConsult(request: ConsultRequest): Promise<ConsultResult>;
|
|
107
|
+
/**
|
|
108
|
+
* Optional: Run peer review of another model's output
|
|
109
|
+
* Future capability - not currently implemented by any adapter
|
|
110
|
+
*/
|
|
111
|
+
runPeerReview?(originalRequest: ReviewRequest, reviewToScore: string): Promise<ReviewResult>;
|
|
112
|
+
}
|
|
113
|
+
export declare function registerAdapter(adapter: ReviewerAdapter): void;
|
|
114
|
+
export declare function getAdapter(id: string): ReviewerAdapter | undefined;
|
|
115
|
+
export declare function getAllAdapters(): ReviewerAdapter[];
|
|
116
|
+
export declare function getAvailableAdapters(): Promise<ReviewerAdapter[]>;
|
|
117
|
+
/**
|
|
118
|
+
* Select the best available adapter for given focus areas
|
|
119
|
+
*/
|
|
120
|
+
export declare function selectBestAdapter(focusAreas?: FocusArea[]): Promise<ReviewerAdapter | null>;
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Adapter Interface for AI Reviewers
|
|
3
|
+
*
|
|
4
|
+
* This provides a generic interface that any AI CLI can implement.
|
|
5
|
+
* Makes it easy to add new models (Ollama, Azure, etc.) without
|
|
6
|
+
* changing the core orchestration logic.
|
|
7
|
+
*/
|
|
8
|
+
/** @deprecated Use handoff.ts selectRole() instead */
|
|
9
|
+
export const EXPERT_ROLES = {
|
|
10
|
+
security_auditor: {
|
|
11
|
+
name: 'Security Auditor', description: 'Security vulnerabilities',
|
|
12
|
+
systemPrompt: 'Security auditor. Focus on injection, auth bypass, data exposure, input validation.',
|
|
13
|
+
focusAreas: ['security'], evaluationCriteria: ['Injection', 'Auth', 'Data exposure'],
|
|
14
|
+
},
|
|
15
|
+
performance_engineer: {
|
|
16
|
+
name: 'Performance Engineer', description: 'Performance optimization',
|
|
17
|
+
systemPrompt: 'Performance engineer. Focus on complexity, N+1 queries, memory leaks.',
|
|
18
|
+
focusAreas: ['performance', 'scalability'], evaluationCriteria: ['Complexity', 'Memory', 'I/O'],
|
|
19
|
+
},
|
|
20
|
+
architect: {
|
|
21
|
+
name: 'Software Architect', description: 'Architecture and design',
|
|
22
|
+
systemPrompt: 'Software architect. Focus on SOLID, coupling, abstractions.',
|
|
23
|
+
focusAreas: ['architecture', 'maintainability'], evaluationCriteria: ['SOLID', 'Coupling', 'Patterns'],
|
|
24
|
+
},
|
|
25
|
+
correctness_analyst: {
|
|
26
|
+
name: 'Correctness Analyst', description: 'Logic errors and bugs',
|
|
27
|
+
systemPrompt: 'Correctness analyst. Focus on logic errors, edge cases, race conditions.',
|
|
28
|
+
focusAreas: ['correctness', 'testing'], evaluationCriteria: ['Logic', 'Edge cases', 'Concurrency'],
|
|
29
|
+
},
|
|
30
|
+
general_reviewer: {
|
|
31
|
+
name: 'General Reviewer', description: 'Balanced review',
|
|
32
|
+
systemPrompt: 'Senior engineer. Review correctness, security, performance, maintainability.',
|
|
33
|
+
focusAreas: ['security', 'performance', 'architecture', 'correctness', 'maintainability'],
|
|
34
|
+
evaluationCriteria: ['Correctness', 'Security', 'Performance', 'Quality'],
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
/** @deprecated Use handoff.ts selectRole() instead */
|
|
38
|
+
export function selectExpertRole(focusAreas) {
|
|
39
|
+
if (!focusAreas || focusAreas.length === 0)
|
|
40
|
+
return EXPERT_ROLES.general_reviewer;
|
|
41
|
+
if (focusAreas.includes('security'))
|
|
42
|
+
return EXPERT_ROLES.security_auditor;
|
|
43
|
+
if (focusAreas.includes('performance') || focusAreas.includes('scalability'))
|
|
44
|
+
return EXPERT_ROLES.performance_engineer;
|
|
45
|
+
if (focusAreas.includes('architecture') || focusAreas.includes('maintainability'))
|
|
46
|
+
return EXPERT_ROLES.architect;
|
|
47
|
+
if (focusAreas.includes('correctness') || focusAreas.includes('testing'))
|
|
48
|
+
return EXPERT_ROLES.correctness_analyst;
|
|
49
|
+
return EXPERT_ROLES.general_reviewer;
|
|
50
|
+
}
|
|
51
|
+
// =============================================================================
|
|
52
|
+
// ADAPTER REGISTRY
|
|
53
|
+
// =============================================================================
|
|
54
|
+
const adapterRegistry = new Map();
|
|
55
|
+
export function registerAdapter(adapter) {
|
|
56
|
+
adapterRegistry.set(adapter.id, adapter);
|
|
57
|
+
}
|
|
58
|
+
export function getAdapter(id) {
|
|
59
|
+
return adapterRegistry.get(id);
|
|
60
|
+
}
|
|
61
|
+
export function getAllAdapters() {
|
|
62
|
+
return Array.from(adapterRegistry.values());
|
|
63
|
+
}
|
|
64
|
+
export async function getAvailableAdapters() {
|
|
65
|
+
const adapters = getAllAdapters();
|
|
66
|
+
const availability = await Promise.all(adapters.map(async (adapter) => ({
|
|
67
|
+
adapter,
|
|
68
|
+
available: await adapter.isAvailable(),
|
|
69
|
+
})));
|
|
70
|
+
return availability.filter((a) => a.available).map((a) => a.adapter);
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Select the best available adapter for given focus areas
|
|
74
|
+
*/
|
|
75
|
+
export async function selectBestAdapter(focusAreas) {
|
|
76
|
+
const available = await getAvailableAdapters();
|
|
77
|
+
if (available.length === 0)
|
|
78
|
+
return null;
|
|
79
|
+
if (!focusAreas || focusAreas.length === 0) {
|
|
80
|
+
return available[0]; // Return first available
|
|
81
|
+
}
|
|
82
|
+
// Score each adapter by how well it matches the focus areas
|
|
83
|
+
const scored = available.map((adapter) => {
|
|
84
|
+
const caps = adapter.getCapabilities();
|
|
85
|
+
let score = 0;
|
|
86
|
+
for (const focus of focusAreas) {
|
|
87
|
+
if (caps.strengths.includes(focus))
|
|
88
|
+
score += 2;
|
|
89
|
+
else if (!caps.weaknesses.includes(focus))
|
|
90
|
+
score += 1;
|
|
91
|
+
else
|
|
92
|
+
score -= 1;
|
|
93
|
+
}
|
|
94
|
+
return { adapter, score };
|
|
95
|
+
});
|
|
96
|
+
scored.sort((a, b) => b.score - a.score);
|
|
97
|
+
return scored[0].adapter;
|
|
98
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Claude CLI Adapter
|
|
3
|
+
*
|
|
4
|
+
* Implements the ReviewerAdapter interface for Anthropic's Claude CLI.
|
|
5
|
+
* Spawns a FRESH Claude Code instance with zero session context.
|
|
6
|
+
* Returns raw text — CC handles interpretation.
|
|
7
|
+
*
|
|
8
|
+
* Read-only enforcement (defense-in-depth):
|
|
9
|
+
* 1. --permission-mode plan (CLI-level read-only)
|
|
10
|
+
* 2. --disallowed-tools (write tools explicitly blocked)
|
|
11
|
+
* 3. Handoff prompt (explicit READ-ONLY instruction)
|
|
12
|
+
*/
|
|
13
|
+
import { ReviewerAdapter, ReviewerCapabilities, ReviewRequest, ReviewResult, ConsultRequest, ConsultResult } from './base.js';
|
|
14
|
+
export declare class ClaudeAdapter implements ReviewerAdapter {
|
|
15
|
+
readonly id = "claude";
|
|
16
|
+
getCapabilities(): ReviewerCapabilities;
|
|
17
|
+
isAvailable(): Promise<boolean>;
|
|
18
|
+
runReview(request: ReviewRequest): Promise<ReviewResult>;
|
|
19
|
+
private runCli;
|
|
20
|
+
private handleException;
|
|
21
|
+
private categorizeError;
|
|
22
|
+
private getSuggestion;
|
|
23
|
+
runConsult(request: ConsultRequest): Promise<ConsultResult>;
|
|
24
|
+
}
|
|
25
|
+
export declare const claudeAdapter: ClaudeAdapter;
|