@simonren/quorum 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +144 -0
  3. package/commands/multi-consult.md +109 -0
  4. package/commands/multi-review.md +139 -0
  5. package/dist/adapters/base.d.ts +120 -0
  6. package/dist/adapters/base.js +98 -0
  7. package/dist/adapters/claude.d.ts +25 -0
  8. package/dist/adapters/claude.js +217 -0
  9. package/dist/adapters/codex.d.ts +20 -0
  10. package/dist/adapters/codex.js +227 -0
  11. package/dist/adapters/gemini.d.ts +20 -0
  12. package/dist/adapters/gemini.js +197 -0
  13. package/dist/adapters/index.d.ts +12 -0
  14. package/dist/adapters/index.js +15 -0
  15. package/dist/cli/check.d.ts +20 -0
  16. package/dist/cli/check.js +78 -0
  17. package/dist/cli/codex.d.ts +11 -0
  18. package/dist/cli/codex.js +255 -0
  19. package/dist/cli/gemini.d.ts +12 -0
  20. package/dist/cli/gemini.js +253 -0
  21. package/dist/commands.d.ts +28 -0
  22. package/dist/commands.js +105 -0
  23. package/dist/config.d.ts +244 -0
  24. package/dist/config.js +179 -0
  25. package/dist/consult-prompt.d.ts +10 -0
  26. package/dist/consult-prompt.js +72 -0
  27. package/dist/context.d.ts +1538 -0
  28. package/dist/context.js +383 -0
  29. package/dist/decoders/claude.d.ts +53 -0
  30. package/dist/decoders/claude.js +106 -0
  31. package/dist/decoders/codex.d.ts +71 -0
  32. package/dist/decoders/codex.js +145 -0
  33. package/dist/decoders/gemini.d.ts +33 -0
  34. package/dist/decoders/gemini.js +58 -0
  35. package/dist/decoders/index.d.ts +6 -0
  36. package/dist/decoders/index.js +3 -0
  37. package/dist/errors.d.ts +46 -0
  38. package/dist/errors.js +192 -0
  39. package/dist/executor.d.ts +103 -0
  40. package/dist/executor.js +244 -0
  41. package/dist/handoff.d.ts +270 -0
  42. package/dist/handoff.js +599 -0
  43. package/dist/index.d.ts +18 -0
  44. package/dist/index.js +134 -0
  45. package/dist/pipeline.d.ts +135 -0
  46. package/dist/pipeline.js +462 -0
  47. package/dist/prompt-v2.d.ts +38 -0
  48. package/dist/prompt-v2.js +391 -0
  49. package/dist/prompt.d.ts +71 -0
  50. package/dist/prompt.js +309 -0
  51. package/dist/schema.d.ts +660 -0
  52. package/dist/schema.js +536 -0
  53. package/dist/tools/consult.d.ts +104 -0
  54. package/dist/tools/consult.js +220 -0
  55. package/dist/tools/feedback.d.ts +91 -0
  56. package/dist/tools/feedback.js +117 -0
  57. package/dist/types.d.ts +105 -0
  58. package/dist/types.js +31 -0
  59. package/package.json +54 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 SimonRen
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,144 @@
1
+ # Quorum - Multi-Model AI Review & Consultation for Claude Code
2
+
3
+ Convene a quorum of AI models (OpenAI Codex, Google Gemini, Claude) to review Claude Code's work or answer questions — Claude Code synthesizes one verdict.
4
+
5
+ ## Quick Install
6
+
7
+ **Step 1: Add the MCP server**
8
+ ```bash
9
+ claude mcp add -s user quorum -- npx -y @simonren/quorum
10
+ ```
11
+
12
+ **Step 2: Restart Claude Code**
13
+
14
+ The MCP tools and slash commands (`/multi-review`, `/multi-consult`) are automatically installed.
15
+
16
+ **Manual command install** (if needed):
17
+ ```bash
18
+ npx -y @simonren/quorum update
19
+ ```
20
+
21
+ Verify with:
22
+ ```bash
23
+ claude mcp list
24
+ # quorum: npx -y @simonren/quorum - ✓ Connected
25
+ ```
26
+
27
+ ### Alternative: Manual Install
28
+
29
+ ```bash
30
+ git clone https://github.com/SimonRen/quorum.git
31
+ cd quorum/mcp-server
32
+ npm install && npm run build
33
+ claude mcp add -s user quorum -- node /path/to/quorum/mcp-server/dist/index.js
34
+ ```
35
+
36
+ ## Prerequisites
37
+
38
+ Install at least one AI CLI:
39
+
40
+ ```bash
41
+ # OpenAI Codex CLI
42
+ npm install -g @openai/codex-cli
43
+ codex login
44
+
45
+ # Google Gemini CLI
46
+ npm install -g @google/gemini-cli
47
+ gemini # follow auth prompts
48
+ ```
49
+
50
+ ## Usage
51
+
52
+ These tools provide **external second-opinion reviews** from Codex and Gemini CLIs. They are designed to complement Claude Code's native review capabilities, not replace them.
53
+
54
+ **Slash commands:**
55
+ - `/multi-review` - Parallel standard + adversarial reviews from all available CLIs (Codex, Gemini, Claude). For reviewing CC-produced work (plan, findings, code).
56
+ - `/multi-consult` - Ask all CLIs the same question and synthesize their answers. For consultation/Q&A — what's the best approach, how to solve X.
57
+
58
+ **For regular reviews:** Just say "review" and Claude Code will use its native capabilities. These external tools are only invoked when explicitly requested.
59
+
60
+ ## Slash Commands
61
+
62
+ These commands are available after restart:
63
+
64
+ ```bash
65
+ /multi-review # Parallel standard + adversarial reviews from all CLIs
66
+ /multi-review focus on race conditions # Steer the adversarial focus
67
+ /multi-consult <question> # Ask all CLIs and synthesize
68
+ /multi-consult <question> [flex] # Use Codex flex tier (cheaper/slower)
69
+ ```
70
+
71
+ ## How It Works
72
+
73
+ ```
74
+ CC does work → User: /multi-review → External CLIs review → CC synthesizes → Final output
75
+ User has a question → User: /multi-consult → External CLIs answer → CC synthesizes → Consolidated answer
76
+ ```
77
+
78
+ **Key Principles:**
79
+ - **CC is primary**: Claude Code does all the work; external models only review
80
+ - **Working directory strategy**: Pass `cwd` + small CC output; external CLIs read files directly
81
+ - **Synthesis, not passthrough**: CC always judges external feedback before incorporating
82
+
83
+ ## Focus Areas
84
+
85
+ | Area | Description |
86
+ |------|-------------|
87
+ | `security` | Vulnerabilities, auth, input validation |
88
+ | `performance` | Speed, memory, efficiency |
89
+ | `architecture` | Design patterns, structure, coupling |
90
+ | `correctness` | Logic errors, edge cases, bugs |
91
+ | `maintainability` | Code clarity, documentation, complexity |
92
+ | `scalability` | Load handling, bottlenecks |
93
+ | `testing` | Test coverage, test quality |
94
+ | `documentation` | Comments, docs, API docs |
95
+
96
+ ## MCP Tools
97
+
98
+ The plugin exposes two MCP tools:
99
+
100
+ | Tool | Description |
101
+ |------|-------------|
102
+ | `multi_review` | Parallel standard + adversarial review from all available CLIs (Codex, Gemini, Claude). Requires `ccOutput`. |
103
+ | `multi_consult` | Ask all available CLIs the same question and receive a 5-section structured response per model. For consultation/Q&A. |
104
+
105
+ ## Output Format
106
+
107
+ **Review tools** return structured feedback from the external CLIs. Claude Code parses this feedback to identify:
108
+ - **Findings**: Issues with severity, confidence, location, and suggestions
109
+ - **Agreements**: Validations of CC's correct assessments
110
+ - **Disagreements**: Challenges to CC's claims with corrections
111
+ - **Alternatives**: Different approaches with tradeoffs
112
+ - **Risk Assessment**: Overall risk level with top concerns
113
+
114
+ ## Development
115
+
116
+ ```bash
117
+ cd mcp-server
118
+ npm install
119
+ npm run build # Build once
120
+ npm run dev # Watch mode
121
+ npm test # Run tests
122
+ npm run test:watch # Watch mode tests
123
+ npm start # Run server
124
+ ```
125
+
126
+ ## Publishing
127
+
128
+ Release-based publish via npm Trusted Publishing (OIDC, no tokens needed).
129
+ CI triggers on GitHub Release, validates the tag matches `package.json`.
130
+
131
+ ```bash
132
+ # 1. Bump version in package.json
133
+ # 2. Rebuild and test
134
+ npm run build && npm test
135
+ # 3. Commit, tag, push, release
136
+ git add -A && git commit -m "v1.x.x"
137
+ git tag v1.x.x
138
+ git push && git push --tags
139
+ gh release create v1.x.x --title "v1.x.x" --generate-notes
140
+ ```
141
+
142
+ ## License
143
+
144
+ MIT
@@ -0,0 +1,109 @@
1
+ # Multi Consult
2
+
3
+ Ask Codex, Gemini, and Claude (Opus, fresh context) the same question in parallel and synthesize their answers. Use this for **consultation** — finding the best approach, weighing alternatives, getting a panel's take. NOT for reviewing work CC has already done (use `/multi-review` for that).
4
+
5
+ ## Arguments
6
+ - `$ARGUMENTS` — the question itself, optional steering, or both
7
+
8
+ ## When to Use
9
+
10
+ Use `/multi-consult` when you have a question or problem and want a synthesized panel opinion. The panel responds in a fixed 5-section structure (Recommendation / Reasoning / Tradeoffs / Risks / Open questions). CC reads all three responses and presents one consolidated answer with a "Models said:" provenance footer.
11
+
12
+ ## Examples
13
+
14
+ ```
15
+ /multi-consult Should we use Postgres or DynamoDB for a write-heavy timeseries workload?
16
+ /multi-consult How should I refactor the auth middleware? Focus on rollback safety.
17
+ /multi-consult What's the cleanest way to memoize this expensive selector? [flex]
18
+ ```
19
+
20
+ ## Before Calling - PREPARE THE HANDOFF
21
+
22
+ ### 1. Pre-compose the question
23
+
24
+ **`$ARGUMENTS` parsing rule (pinned):**
25
+
26
+ - **If conversation context already contains the question CC has been working on:** compose `question` from that context. `$ARGUMENTS` is treated as pure steering — extract reserved tokens (see below) into schema fields; remainder goes into `customPrompt`.
27
+ - **Otherwise — `$ARGUMENTS` IS the literal question.** Set `customPrompt` to empty. Reserved tokens are extracted *only* when they appear at the *end* of `$ARGUMENTS` inside brackets or parens — e.g., `... [flex]`, `... (high reasoning)`. A bare occurrence of `flex` / `cheap` / `default tier` inside the prose is treated as part of the question, NOT a flag, to avoid corrupting questions like *"Should we offer a flex tier or default tier for customers?"*.
28
+
29
+ ### 2. Triage code-grounded questions
30
+
31
+ If the question references the codebase, populate `relevantFiles` with the minimal subset (3-10 files typically) the panel needs. For purely general questions ("Postgres vs Mongo for X workload?"), omit `relevantFiles` — the panel will answer from expertise without trawling the filesystem.
32
+
33
+ ### 3. Refuse sensitive working directories
34
+
35
+ If the current working directory is `/etc`, `~`, `~/.ssh`, or any other clearly sensitive system path, **refuse**. Tell the user: "Please invoke `/multi-consult` from a project root — `<cwd>` looks sensitive." Do not call the tool.
36
+
37
+ ### 4. Extract criteria; clarify load-bearing assumptions BEFORE calling
38
+
39
+ Pin what the question is being judged against. Once criteria are explicit, the panel's recommendation is anchored to them instead of floating — this is the fix for "ask twice, get a different answer." Stochastic re-runs converge much better against fixed criteria than against an under-specified question.
40
+
41
+ **4a. Append a CRITERIA block to the end of `question`**, priority-ordered, each tagged `[stated]` or `[assumed]`:
42
+
43
+ ```
44
+ CRITERIA (priority order):
45
+ 1. [stated] cost-per-request under $X / 1M ops
46
+ 2. [stated] team writes Go; minimize ops complexity
47
+ 3. [assumed] sustained ~10k QPS write rate
48
+ 4. [assumed] eventual consistency acceptable for analytics
49
+ ```
50
+
51
+ - `[stated]` = explicit in the user's message or earlier conversation.
52
+ - `[assumed]` = you needed to fix it to recommend; the user did NOT say.
53
+ - Cap `[assumed]` at 3. If the top 3 don't fit, the question is too vague — bounce back to the user before calling.
54
+
55
+ **4b. Pre-call clarification gate.** Scan your `[assumed]` criteria. If any is **load-bearing** (the recommendation would flip if the assumption is wrong), STOP and ask the user before invoking the tool:
56
+
57
+ > "Before I consult the panel, I need to confirm: <restate assumption>. Is that right, or should I adjust to <plausible alternative>?"
58
+
59
+ A burned panel call on a wrong assumed criterion costs more than the round-trip.
60
+
61
+ **Skip the gate when:**
62
+ - `[stated]` criteria fully pin the answer space (no assumptions needed).
63
+ - The user told you to proceed without clarification.
64
+ - Remaining assumptions are clearly incidental (would not flip the rec).
65
+
66
+ ## Tool Invocation
67
+
68
+ Call `multi_consult` with:
69
+
70
+ ```json
71
+ {
72
+ "workingDir": "<current directory>",
73
+ "question": "<CC-composed question OR literal $ARGUMENTS minus end-bracket reserved tokens>",
74
+ "relevantFiles": ["<file1>", "<file2>"],
75
+ "customPrompt": "<steering text or empty>"
76
+ }
77
+ ```
78
+
79
+ ### Reserved-token mappings (only when bracketed at end of $ARGUMENTS)
80
+
81
+ - `[flex]` / `[cheap]` / `[budget]` → `serviceTier: "flex"`
82
+ - `[default tier]` / `[standard tier]` → `serviceTier: "default"`
83
+ - `[high reasoning]` → `reasoningEffort: "high"` (overrides default `xhigh`)
84
+
85
+ If the user types one of these mid-question (not in brackets), leave it in the question.
86
+
87
+ ## After Receiving the Panel
88
+
89
+ You will receive each model's structured 5-section response. Some may carry a `⚠️ Format drift: missing sections [...]` marker — degrade synthesis confidence accordingly for that model.
90
+
91
+ ### Synthesize
92
+
93
+ 1. **Cross-compare Recommendations.** Agreement across all three → high confidence. 2-vs-1 split → take a side and *surface the dissent explicitly* in your answer (don't flatten it). All three disagree → present the tradeoff space honestly and pick.
94
+ 2. **Mine Tradeoffs and Risks.** Even when models agree on the recommendation, the *reasons* and *risks* often diverge — surface the union, not just the intersection. If a single model raised a Risk the others missed, surface it as "1 model raised: …" — *do not silently drop it.*
95
+ 3. **Forward Open questions** to the user only if material — do not dump every "what's your scale?" clarifier.
96
+ 4. **Apply your own judgment.** You have full conversation context the panel does not; you may dismiss panel suggestions that miss the user's actual constraint, but say so explicitly when overriding.
97
+ 5. **Respond with one consolidated answer**, structured as: **Recommendation** (what to do) → **Why** (synthesis of reasoning) → **Watch out for** (consolidated risks, including any single-model-only risks) → optional **Open question for you** if a real ambiguity blocks the answer.
98
+ 6. **Append a "Models said:" provenance footer** — a single line per model with the recommendation in <80 chars. Example:
99
+
100
+ ```
101
+ ---
102
+ **Models said:** Codex → Postgres + read replicas. Gemini → Postgres + Citus. Claude → DynamoDB w/ caveat on cost at scale.
103
+ ```
104
+
105
+ This is **non-negotiable**. The footer is the audit trail; without it, synthesis-only is opaque.
106
+ 7. **Do NOT paste full raw model outputs to the user** unless they explicitly ask ("show me what each model said", "raw").
107
+ 8. **All-failed special case:** if the header is `❌ All Failed`, surface the failure types and **ASK** the user *"Panel unavailable — want my solo answer instead?"*. **Do NOT silently substitute** your own answer for the panel's.
108
+
109
+ $ARGUMENTS
@@ -0,0 +1,139 @@
1
+ # Multi Review
2
+
3
+ Get parallel standard AND adversarial reviews from all available models (Codex, Gemini, Claude Opus).
4
+
5
+ Each model runs twice: once as a standard reviewer (finding bugs, issues, improvements) and once as an adversarial challenger (breaking confidence in the change, questioning assumptions, targeting hidden failure paths). Results are presented in two sections.
6
+
7
+ Use `$ARGUMENTS` to steer the adversarial focus (e.g., "focus the challenge on race conditions and rollback safety").
8
+
9
+ ## Arguments
10
+ - `$ARGUMENTS` - Optional: focus area, custom instructions, or adversarial steering
11
+
12
+ ## When to Use
13
+
14
+ Use `/multi-review` when you want thorough parallel reviews from all available models. Every invocation includes both standard and adversarial passes.
15
+
16
+ ## Examples
17
+
18
+ ```
19
+ /multi-review
20
+ /multi-review focus the challenge on race conditions and rollback safety
21
+ /multi-review challenge whether this was the right caching and retry design
22
+ ```
23
+
24
+ ## Before Calling - PREPARE THE HANDOFF
25
+
26
+ ### 1. Summarize What You Did + State the Acceptance Bar
27
+
28
+ Don't just say what you did — also state the bar the work needs to clear. The bar is what lets reviewers calibrate "material" vs "nice to have." Without it, reviewers default to general code-quality vibes, which produces drift across runs.
29
+
30
+ ```
31
+ "Implemented caching layer for the product catalog API using Redis with cache invalidation on product updates.
32
+ Bar: safe under concurrent updates (no stale reads on the next request) AND p95 read latency under 50ms."
33
+ ```
34
+
35
+ ### 2. List Your Uncertainties — Tag Load-Bearing vs Incidental
36
+
37
+ Tag each uncertainty:
38
+ - `[load-bearing]` = if your assumption here is wrong, the work is NOT shipping-ready
39
+ - `[incidental]` = nice to verify but won't block ship
40
+
41
+ Reviewers prioritize accordingly, and your synthesis can elevate `[load-bearing]` items above stylistic findings.
42
+
43
+ ```
44
+ UNCERTAINTIES:
45
+ - [load-bearing] "Is the cache invalidation race-free under concurrent updates?"
46
+ - [incidental] "Is the TTL value optimal — could it be 60s instead of 30s?"
47
+ ```
48
+
49
+ ### 3. Ask Specific Questions
50
+ ```
51
+ QUESTIONS:
52
+ - "Should I use write-through or write-behind caching?"
53
+ - "Is there a race condition in the invalidation logic?"
54
+ ```
55
+
56
+ ### 4. Identify Decisions You Made
57
+
58
+ If you chose between alternatives — caching strategy, retry policy, error-handling shape, schema design, etc. — list them with rationale. The handoff schema's `decisions[]` field gives the adversarial reviewer a concrete hook to attack the design choice rather than just hunt for bugs. Skip if the change is a straightforward bug fix with no design choice involved.
59
+
60
+ ```
61
+ DECISIONS:
62
+ 1. Chose write-through cache over write-behind. Rationale: stronger read-after-write consistency at the cost of slightly slower writes; we prioritize correctness for catalog data.
63
+ 2. Chose 30s TTL with explicit invalidation on update. Rationale: TTL bounds staleness if invalidation misses; explicit invalidation catches the common path immediately.
64
+ ```
65
+
66
+ ## Tool Invocation
67
+
68
+ Call `multi_review` with:
69
+
70
+ ```json
71
+ {
72
+ "workingDir": "<current directory>",
73
+ "ccOutput": "<structured handoff>",
74
+ "outputType": "analysis",
75
+ "focusAreas": ["<from $ARGUMENTS>"],
76
+ "customPrompt": "<steering text from $ARGUMENTS for adversarial focus>"
77
+ }
78
+ ```
79
+
80
+ ### Service Tier (from $ARGUMENTS, applies to Codex only)
81
+ - If user says "flex", "cheap", or "budget" → set `serviceTier: "flex"`
82
+ - If user says "default tier" or "standard tier" → set `serviceTier: "default"`
83
+ - Otherwise → omit `serviceTier` (defaults to `"fast"` — priority processing, ~2x cost)
84
+
85
+ ### Structure your ccOutput:
86
+
87
+ ```
88
+ SUMMARY:
89
+ <what you did, 1-3 sentences>
90
+ Bar: <what counts as shipping-ready — concrete acceptance criteria>
91
+
92
+ UNCERTAINTIES (verify these):
93
+ 1. [load-bearing|incidental] <uncertainty>
94
+ 2. [load-bearing|incidental] <uncertainty>
95
+
96
+ QUESTIONS:
97
+ 1. <question>
98
+
99
+ DECISIONS:
100
+ 1. <choice>. Rationale: <why this over alternatives>
101
+ 2. <choice>. Rationale: <why this over alternatives>
102
+
103
+ PRIORITY FILES:
104
+ - <file>
105
+ ```
106
+
107
+ ## After Receiving Review
108
+
109
+ You will receive two sections: **Standard Review Findings** and **Challenge Review Findings**.
110
+
111
+ ### Synthesize
112
+
113
+ 1. **Standard findings** — bugs, issues, improvements from each model
114
+ - Find agreements across models (higher confidence)
115
+ - Identify conflicts (YOU decide who's right)
116
+
117
+ 2. **Challenge findings** — adversarial challenges from each model
118
+ - These target assumptions and design decisions, not just bugs
119
+ - Evaluate on merit — some challenges are speculative by design
120
+ - Strong challenges with evidence deserve serious consideration
121
+
122
+ 3. **Cross-reference** standard vs challenge findings
123
+ - Standard + challenge agreement = high confidence issue
124
+ - Challenge-only finding = investigate further before acting
125
+
126
+ 4. **Verify all findings**
127
+ - Check file/line references exist
128
+ - Read actual code
129
+ - Mark your confidence:
130
+ - ✓✓ Verified
131
+ - ✓ Plausible
132
+ - ? Investigate
133
+ - ✗ Rejected
134
+
135
+ 5. **Make YOUR recommendation**
136
+ - Don't just relay findings
137
+ - Apply your judgment
138
+
139
+ $ARGUMENTS
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Base Adapter Interface for AI Reviewers
3
+ *
4
+ * This provides a generic interface that any AI CLI can implement.
5
+ * Makes it easy to add new models (Ollama, Azure, etc.) without
6
+ * changing the core orchestration logic.
7
+ */
8
+ import { FocusArea, OutputType, ReasoningEffort, ServiceTier } from '../types.js';
9
+ export interface ReviewerCapabilities {
10
+ /** Display name for this reviewer */
11
+ name: string;
12
+ /** Short description of the reviewer's strengths */
13
+ description: string;
14
+ /** Focus areas this reviewer excels at */
15
+ strengths: FocusArea[];
16
+ /** Focus areas this reviewer is weaker at */
17
+ weaknesses: FocusArea[];
18
+ /** Whether the reviewer can read files from the filesystem */
19
+ hasFilesystemAccess: boolean;
20
+ /** Whether the reviewer supports JSON structured output */
21
+ supportsStructuredOutput: boolean;
22
+ /** Maximum context window size (tokens) */
23
+ maxContextTokens: number;
24
+ /** Supported reasoning effort levels (if applicable) */
25
+ reasoningLevels?: ReasoningEffort[];
26
+ }
27
+ export interface ReviewRequest {
28
+ /** Working directory containing the code */
29
+ workingDir: string;
30
+ /** Claude Code's output to review */
31
+ ccOutput: string;
32
+ /** Type of output being reviewed */
33
+ outputType: OutputType;
34
+ /** Specific files that CC analyzed */
35
+ analyzedFiles?: string[];
36
+ /** Areas to focus the review on */
37
+ focusAreas?: FocusArea[];
38
+ /** Custom instructions from the user */
39
+ customPrompt?: string;
40
+ /** Reasoning effort level (for models that support it) */
41
+ reasoningEffort?: ReasoningEffort;
42
+ /** Service tier (Codex). Omit for the review chain's default 'fast' (priority). Pass 'flex' for cheap/slow or 'default' for the Codex API default tier. */
43
+ serviceTier?: ServiceTier;
44
+ /** Review mode: standard finds bugs, adversarial challenges assumptions */
45
+ reviewMode?: 'standard' | 'adversarial';
46
+ }
47
+ export interface ConsultRequest {
48
+ /** Working directory containing the code (always passed) */
49
+ workingDir: string;
50
+ /** CC-composed, self-contained question for the panel */
51
+ question: string;
52
+ /** CC-triaged file subset for code-grounded questions; omitted on general questions */
53
+ relevantFiles?: string[];
54
+ /** Free-form steering from $ARGUMENTS */
55
+ customPrompt?: string;
56
+ /** Reasoning effort (Codex). Default 'xhigh' for consult (deeper questions). */
57
+ reasoningEffort?: ReasoningEffort;
58
+ /** Service tier (Codex). Same defaulting rules as ReviewRequest. */
59
+ serviceTier?: ServiceTier;
60
+ }
61
+ export type ConsultResult = ReviewResult;
62
+ /** @deprecated Use handoff.ts roles instead */
63
+ export interface ExpertRole {
64
+ name: string;
65
+ description: string;
66
+ systemPrompt: string;
67
+ focusAreas: FocusArea[];
68
+ evaluationCriteria: string[];
69
+ }
70
+ /** @deprecated Use handoff.ts selectRole() instead */
71
+ export declare const EXPERT_ROLES: Record<string, ExpertRole>;
72
+ /** @deprecated Use handoff.ts selectRole() instead */
73
+ export declare function selectExpertRole(focusAreas?: FocusArea[]): ExpertRole;
74
+ export interface ReviewSuccess {
75
+ success: true;
76
+ output: string;
77
+ executionTimeMs: number;
78
+ }
79
+ export interface ReviewFailure {
80
+ success: false;
81
+ error: ReviewError;
82
+ suggestion?: string;
83
+ rawOutput?: string;
84
+ executionTimeMs: number;
85
+ }
86
+ export type ReviewResult = ReviewSuccess | ReviewFailure;
87
+ export interface ReviewError {
88
+ type: 'cli_not_found' | 'timeout' | 'rate_limit' | 'auth_error' | 'invalid_response' | 'cli_error' | 'parse_error';
89
+ message: string;
90
+ details?: Record<string, unknown>;
91
+ }
92
+ /**
93
+ * Base interface that all reviewer adapters must implement.
94
+ * This allows easy addition of new AI CLIs without changing orchestration logic.
95
+ */
96
+ export interface ReviewerAdapter {
97
+ /** Unique identifier for this adapter */
98
+ readonly id: string;
99
+ /** Get capabilities and metadata for this reviewer */
100
+ getCapabilities(): ReviewerCapabilities;
101
+ /** Check if the CLI is available and properly configured */
102
+ isAvailable(): Promise<boolean>;
103
+ /** Run a review and return structured output */
104
+ runReview(request: ReviewRequest): Promise<ReviewResult>;
105
+ /** Run a consultation (Q&A) — required on every adapter. */
106
+ runConsult(request: ConsultRequest): Promise<ConsultResult>;
107
+ /**
108
+ * Optional: Run peer review of another model's output
109
+ * Future capability - not currently implemented by any adapter
110
+ */
111
+ runPeerReview?(originalRequest: ReviewRequest, reviewToScore: string): Promise<ReviewResult>;
112
+ }
113
+ export declare function registerAdapter(adapter: ReviewerAdapter): void;
114
+ export declare function getAdapter(id: string): ReviewerAdapter | undefined;
115
+ export declare function getAllAdapters(): ReviewerAdapter[];
116
+ export declare function getAvailableAdapters(): Promise<ReviewerAdapter[]>;
117
+ /**
118
+ * Select the best available adapter for given focus areas
119
+ */
120
+ export declare function selectBestAdapter(focusAreas?: FocusArea[]): Promise<ReviewerAdapter | null>;
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Base Adapter Interface for AI Reviewers
3
+ *
4
+ * This provides a generic interface that any AI CLI can implement.
5
+ * Makes it easy to add new models (Ollama, Azure, etc.) without
6
+ * changing the core orchestration logic.
7
+ */
8
+ /** @deprecated Use handoff.ts selectRole() instead */
9
+ export const EXPERT_ROLES = {
10
+ security_auditor: {
11
+ name: 'Security Auditor', description: 'Security vulnerabilities',
12
+ systemPrompt: 'Security auditor. Focus on injection, auth bypass, data exposure, input validation.',
13
+ focusAreas: ['security'], evaluationCriteria: ['Injection', 'Auth', 'Data exposure'],
14
+ },
15
+ performance_engineer: {
16
+ name: 'Performance Engineer', description: 'Performance optimization',
17
+ systemPrompt: 'Performance engineer. Focus on complexity, N+1 queries, memory leaks.',
18
+ focusAreas: ['performance', 'scalability'], evaluationCriteria: ['Complexity', 'Memory', 'I/O'],
19
+ },
20
+ architect: {
21
+ name: 'Software Architect', description: 'Architecture and design',
22
+ systemPrompt: 'Software architect. Focus on SOLID, coupling, abstractions.',
23
+ focusAreas: ['architecture', 'maintainability'], evaluationCriteria: ['SOLID', 'Coupling', 'Patterns'],
24
+ },
25
+ correctness_analyst: {
26
+ name: 'Correctness Analyst', description: 'Logic errors and bugs',
27
+ systemPrompt: 'Correctness analyst. Focus on logic errors, edge cases, race conditions.',
28
+ focusAreas: ['correctness', 'testing'], evaluationCriteria: ['Logic', 'Edge cases', 'Concurrency'],
29
+ },
30
+ general_reviewer: {
31
+ name: 'General Reviewer', description: 'Balanced review',
32
+ systemPrompt: 'Senior engineer. Review correctness, security, performance, maintainability.',
33
+ focusAreas: ['security', 'performance', 'architecture', 'correctness', 'maintainability'],
34
+ evaluationCriteria: ['Correctness', 'Security', 'Performance', 'Quality'],
35
+ },
36
+ };
37
+ /** @deprecated Use handoff.ts selectRole() instead */
38
+ export function selectExpertRole(focusAreas) {
39
+ if (!focusAreas || focusAreas.length === 0)
40
+ return EXPERT_ROLES.general_reviewer;
41
+ if (focusAreas.includes('security'))
42
+ return EXPERT_ROLES.security_auditor;
43
+ if (focusAreas.includes('performance') || focusAreas.includes('scalability'))
44
+ return EXPERT_ROLES.performance_engineer;
45
+ if (focusAreas.includes('architecture') || focusAreas.includes('maintainability'))
46
+ return EXPERT_ROLES.architect;
47
+ if (focusAreas.includes('correctness') || focusAreas.includes('testing'))
48
+ return EXPERT_ROLES.correctness_analyst;
49
+ return EXPERT_ROLES.general_reviewer;
50
+ }
51
+ // =============================================================================
52
+ // ADAPTER REGISTRY
53
+ // =============================================================================
54
+ const adapterRegistry = new Map();
55
+ export function registerAdapter(adapter) {
56
+ adapterRegistry.set(adapter.id, adapter);
57
+ }
58
+ export function getAdapter(id) {
59
+ return adapterRegistry.get(id);
60
+ }
61
+ export function getAllAdapters() {
62
+ return Array.from(adapterRegistry.values());
63
+ }
64
+ export async function getAvailableAdapters() {
65
+ const adapters = getAllAdapters();
66
+ const availability = await Promise.all(adapters.map(async (adapter) => ({
67
+ adapter,
68
+ available: await adapter.isAvailable(),
69
+ })));
70
+ return availability.filter((a) => a.available).map((a) => a.adapter);
71
+ }
72
+ /**
73
+ * Select the best available adapter for given focus areas
74
+ */
75
+ export async function selectBestAdapter(focusAreas) {
76
+ const available = await getAvailableAdapters();
77
+ if (available.length === 0)
78
+ return null;
79
+ if (!focusAreas || focusAreas.length === 0) {
80
+ return available[0]; // Return first available
81
+ }
82
+ // Score each adapter by how well it matches the focus areas
83
+ const scored = available.map((adapter) => {
84
+ const caps = adapter.getCapabilities();
85
+ let score = 0;
86
+ for (const focus of focusAreas) {
87
+ if (caps.strengths.includes(focus))
88
+ score += 2;
89
+ else if (!caps.weaknesses.includes(focus))
90
+ score += 1;
91
+ else
92
+ score -= 1;
93
+ }
94
+ return { adapter, score };
95
+ });
96
+ scored.sort((a, b) => b.score - a.score);
97
+ return scored[0].adapter;
98
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Claude CLI Adapter
3
+ *
4
+ * Implements the ReviewerAdapter interface for Anthropic's Claude CLI.
5
+ * Spawns a FRESH Claude Code instance with zero session context.
6
+ * Returns raw text — CC handles interpretation.
7
+ *
8
+ * Read-only enforcement (defense-in-depth):
9
+ * 1. --permission-mode plan (CLI-level read-only)
10
+ * 2. --disallowed-tools (write tools explicitly blocked)
11
+ * 3. Handoff prompt (explicit READ-ONLY instruction)
12
+ */
13
+ import { ReviewerAdapter, ReviewerCapabilities, ReviewRequest, ReviewResult, ConsultRequest, ConsultResult } from './base.js';
14
+ export declare class ClaudeAdapter implements ReviewerAdapter {
15
+ readonly id = "claude";
16
+ getCapabilities(): ReviewerCapabilities;
17
+ isAvailable(): Promise<boolean>;
18
+ runReview(request: ReviewRequest): Promise<ReviewResult>;
19
+ private runCli;
20
+ private handleException;
21
+ private categorizeError;
22
+ private getSuggestion;
23
+ runConsult(request: ConsultRequest): Promise<ConsultResult>;
24
+ }
25
+ export declare const claudeAdapter: ClaudeAdapter;