opencode-multiagent 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/agents/advisor.md +57 -0
- package/agents/auditor.md +45 -0
- package/agents/critic.md +127 -0
- package/agents/deep-worker.md +65 -0
- package/agents/devil.md +36 -0
- package/agents/executor.md +141 -0
- package/agents/heavy-worker.md +68 -0
- package/agents/lead.md +155 -0
- package/agents/librarian.md +62 -0
- package/agents/planner.md +121 -0
- package/agents/qa.md +50 -0
- package/agents/quick.md +65 -0
- package/agents/reviewer.md +55 -0
- package/agents/scout.md +58 -0
- package/agents/scribe.md +78 -0
- package/agents/strategist.md +63 -0
- package/agents/ui-heavy-worker.md +62 -0
- package/agents/ui-worker.md +69 -0
- package/agents/validator.md +47 -0
- package/agents/worker.md +68 -0
- package/commands/execute.md +14 -0
- package/commands/init-deep.md +18 -0
- package/commands/init.md +18 -0
- package/commands/inspect.md +13 -0
- package/commands/plan.md +15 -0
- package/commands/quality.md +14 -0
- package/commands/review.md +14 -0
- package/commands/status.md +15 -0
- package/defaults/agent-settings.json +102 -0
- package/defaults/agent-settings.schema.json +25 -0
- package/defaults/flags.json +35 -0
- package/defaults/flags.schema.json +119 -0
- package/defaults/mcp-defaults.json +47 -0
- package/defaults/mcp-defaults.schema.json +38 -0
- package/defaults/profiles.json +53 -0
- package/defaults/profiles.schema.json +60 -0
- package/defaults/team-profiles.json +83 -0
- package/examples/opencode.json +4 -0
- package/examples/opencode.with-overrides.json +23 -0
- package/package.json +62 -0
- package/skills/advanced-evaluation/SKILL.md +454 -0
- package/skills/advanced-evaluation/manifest.json +20 -0
- package/skills/cek-context-engineering/SKILL.md +1261 -0
- package/skills/cek-context-engineering/manifest.json +17 -0
- package/skills/cek-prompt-engineering/SKILL.md +559 -0
- package/skills/cek-prompt-engineering/manifest.json +17 -0
- package/skills/cek-test-prompt/SKILL.md +714 -0
- package/skills/cek-test-prompt/manifest.json +17 -0
- package/skills/cek-thought-based-reasoning/SKILL.md +658 -0
- package/skills/cek-thought-based-reasoning/manifest.json +17 -0
- package/skills/context-degradation/SKILL.md +231 -0
- package/skills/context-degradation/manifest.json +17 -0
- package/skills/debate/SKILL.md +316 -0
- package/skills/debate/manifest.json +19 -0
- package/skills/design-first/SKILL.md +5 -0
- package/skills/design-first/manifest.json +20 -0
- package/skills/dispatching-parallel-agents/SKILL.md +180 -0
- package/skills/dispatching-parallel-agents/manifest.json +18 -0
- package/skills/drift-analysis/SKILL.md +324 -0
- package/skills/drift-analysis/manifest.json +19 -0
- package/skills/evaluation/SKILL.md +5 -0
- package/skills/evaluation/manifest.json +19 -0
- package/skills/executing-plans/SKILL.md +70 -0
- package/skills/executing-plans/manifest.json +17 -0
- package/skills/handoff-protocols/SKILL.md +5 -0
- package/skills/handoff-protocols/manifest.json +19 -0
- package/skills/parallel-investigation/SKILL.md +206 -0
- package/skills/parallel-investigation/manifest.json +18 -0
- package/skills/reflexion-critique/SKILL.md +477 -0
- package/skills/reflexion-critique/manifest.json +17 -0
- package/skills/reflexion-reflect/SKILL.md +650 -0
- package/skills/reflexion-reflect/manifest.json +17 -0
- package/skills/root-cause-analysis/SKILL.md +5 -0
- package/skills/root-cause-analysis/manifest.json +20 -0
- package/skills/sadd-judge-with-debate/SKILL.md +426 -0
- package/skills/sadd-judge-with-debate/manifest.json +17 -0
- package/skills/structured-code-review/SKILL.md +5 -0
- package/skills/structured-code-review/manifest.json +18 -0
- package/skills/task-decomposition/SKILL.md +5 -0
- package/skills/task-decomposition/manifest.json +20 -0
- package/skills/verification-before-completion/SKILL.md +5 -0
- package/skills/verification-before-completion/manifest.json +22 -0
- package/skills/verification-gates/SKILL.md +281 -0
- package/skills/verification-gates/manifest.json +19 -0
- package/src/control-plane.ts +21 -0
- package/src/index.ts +8 -0
- package/src/opencode-multiagent/compiler.ts +168 -0
- package/src/opencode-multiagent/constants.ts +178 -0
- package/src/opencode-multiagent/file-lock.ts +90 -0
- package/src/opencode-multiagent/hooks.ts +599 -0
- package/src/opencode-multiagent/log.ts +12 -0
- package/src/opencode-multiagent/mailbox.ts +287 -0
- package/src/opencode-multiagent/markdown.ts +99 -0
- package/src/opencode-multiagent/mcp.ts +35 -0
- package/src/opencode-multiagent/policy.ts +67 -0
- package/src/opencode-multiagent/quality.ts +140 -0
- package/src/opencode-multiagent/runtime.ts +55 -0
- package/src/opencode-multiagent/skills.ts +144 -0
- package/src/opencode-multiagent/supervision.ts +156 -0
- package/src/opencode-multiagent/task-manager.ts +148 -0
- package/src/opencode-multiagent/team-manager.ts +219 -0
- package/src/opencode-multiagent/team-tools.ts +359 -0
- package/src/opencode-multiagent/telemetry.ts +124 -0
- package/src/opencode-multiagent/utils.ts +54 -0
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: parallel-investigation
|
|
3
|
+
description: Coordinates parallel investigation threads to simultaneously explore multiple hypotheses or root causes across different system areas. Use when debugging production incidents, slow API performance, multi-system integration failures, or complex bugs where the root cause is unclear and multiple plausible theories exist; when serial troubleshooting is too slow; or when multiple investigators can divide root-cause analysis work. Provides structured phases for problem decomposition, thread assignment, sync points with Continue/Pivot/Converge decisions, and final report synthesis.
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
triggers:
|
|
6
|
+
- investigate in parallel
|
|
7
|
+
- multiple approaches
|
|
8
|
+
- divide investigation
|
|
9
|
+
- complex problem
|
|
10
|
+
- explore options
|
|
11
|
+
tags:
|
|
12
|
+
- collaboration
|
|
13
|
+
- investigation
|
|
14
|
+
- parallel
|
|
15
|
+
- problem-solving
|
|
16
|
+
difficulty: advanced
|
|
17
|
+
estimatedTime: 15
|
|
18
|
+
relatedSkills:
|
|
19
|
+
- debugging/root-cause-analysis
|
|
20
|
+
- collaboration/handoff-protocols
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
# Parallel Investigation
|
|
24
|
+
|
|
25
|
+
Coordinate parallel investigation threads to explore multiple hypotheses simultaneously. Most effective for production incidents, performance regressions, or integration failures where the root cause is unclear.
|
|
26
|
+
|
|
27
|
+
## Core Principle
|
|
28
|
+
|
|
29
|
+
**When uncertain, explore multiple paths in parallel. Converge when evidence points to an answer.**
|
|
30
|
+
|
|
31
|
+
Parallel investigation reduces time-to-solution by eliminating serial bottlenecks.
|
|
32
|
+
|
|
33
|
+
## Investigation Structure
|
|
34
|
+
|
|
35
|
+
### Phase 1: Problem Decomposition
|
|
36
|
+
|
|
37
|
+
Break the problem into independent investigation threads:
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
Problem: API responses are slow
|
|
41
|
+
|
|
42
|
+
Investigation Threads:
|
|
43
|
+
├── Thread A: Database performance
|
|
44
|
+
│ └── Check slow queries, indexes, connection pool
|
|
45
|
+
├── Thread B: Application code
|
|
46
|
+
│ └── Profile endpoint handlers, check for N+1
|
|
47
|
+
├── Thread C: Infrastructure
|
|
48
|
+
│ └── Check CPU, memory, network latency
|
|
49
|
+
└── Thread D: External services
|
|
50
|
+
└── Check third-party API response times
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
Each thread should be independent (no blocking dependencies), focused (clear scope), and time-boxed.
|
|
54
|
+
|
|
55
|
+
### Phase 2: Thread Assignment
|
|
56
|
+
|
|
57
|
+
Assign threads with clear ownership:
|
|
58
|
+
|
|
59
|
+
```markdown
|
|
60
|
+
## Thread A: Database Performance
|
|
61
|
+
**Investigator:** [Name/Agent A]
|
|
62
|
+
**Duration:** 30 minutes
|
|
63
|
+
**Scope:**
|
|
64
|
+
- Query execution times
|
|
65
|
+
- Index utilization
|
|
66
|
+
- Connection pool metrics
|
|
67
|
+
**Report Format:** Summary + evidence
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Phase 3: Parallel Execution
|
|
71
|
+
|
|
72
|
+
Each thread follows this pattern:
|
|
73
|
+
|
|
74
|
+
1. Gather evidence specific to thread scope
|
|
75
|
+
2. Document findings as you go
|
|
76
|
+
3. Identify if thread is a lead or dead end
|
|
77
|
+
4. Prepare summary for sync point
|
|
78
|
+
|
|
79
|
+
**Thread Log Template:**
|
|
80
|
+
```markdown
|
|
81
|
+
## Thread: [Name]
|
|
82
|
+
**Start:** [Time]
|
|
83
|
+
|
|
84
|
+
### Findings
|
|
85
|
+
- [Timestamp] [Finding]
|
|
86
|
+
|
|
87
|
+
### Evidence
|
|
88
|
+
- [Log/Metric/Screenshot]
|
|
89
|
+
|
|
90
|
+
### Preliminary Conclusion
|
|
91
|
+
[What this thread suggests about the problem]
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Phase 4: Sync Points
|
|
95
|
+
|
|
96
|
+
Regular convergence to share findings:
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
Sync Point Agenda:
|
|
100
|
+
1. Each thread report (2 min each)
|
|
101
|
+
2. Discussion & correlation (5 min)
|
|
102
|
+
3. Decision: Continue, Pivot, or Converge (3 min)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
**Sync Point Decisions:**
|
|
106
|
+
- **Continue**: Threads are progressing, maintain parallel execution
|
|
107
|
+
- **Pivot**: Redirect threads based on new evidence
|
|
108
|
+
- **Converge**: One thread found the answer, others join to validate
|
|
109
|
+
|
|
110
|
+
### Phase 5: Convergence
|
|
111
|
+
|
|
112
|
+
When a thread identifies the likely cause:
|
|
113
|
+
|
|
114
|
+
1. **Validate** — Other threads verify the finding
|
|
115
|
+
2. **Deep dive** — Focused investigation on identified cause
|
|
116
|
+
3. **Document** — Compile findings from all threads
|
|
117
|
+
|
|
118
|
+
## Coordination Patterns
|
|
119
|
+
|
|
120
|
+
**Hub and Spoke**: One coordinator assigns threads, tracks progress, calls sync points, and makes convergence decisions. Best when one person has the most context.
|
|
121
|
+
|
|
122
|
+
**Peer Network**: Equal investigators post findings to a shared channel and self-organize convergence when a pattern emerges. Best when investigators have similar expertise.
|
|
123
|
+
|
|
124
|
+
## Communication Protocol
|
|
125
|
+
|
|
126
|
+
### During Investigation
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
[Thread A] [Status] Starting query analysis
|
|
130
|
+
[Thread B] [Finding] No N+1 patterns in user endpoint
|
|
131
|
+
[Thread A] [Finding] Slow query: SELECT * FROM orders WHERE...
|
|
132
|
+
[Thread C] [Dead End] CPU and memory within normal
|
|
133
|
+
[Thread A] [Hot Lead] Missing index on orders.user_id
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### At Sync Point
|
|
137
|
+
|
|
138
|
+
```markdown
|
|
139
|
+
## Thread A Summary
|
|
140
|
+
|
|
141
|
+
**Status:** Hot Lead
|
|
142
|
+
**Key Finding:** Missing index on orders.user_id
|
|
143
|
+
**Evidence:** Query taking 3.2s, explain shows full table scan
|
|
144
|
+
**Recommendation:** Likely root cause — suggest converge
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
## Decision Framework
|
|
148
|
+
|
|
149
|
+
| Thread Status | Action |
|
|
150
|
+
|---------------|--------|
|
|
151
|
+
| All exploring | Continue parallel |
|
|
152
|
+
| One hot lead | Validate lead, others support |
|
|
153
|
+
| Multiple leads | Prioritize by evidence strength |
|
|
154
|
+
| All dead ends | Reframe problem, new threads |
|
|
155
|
+
| Confirmed cause | Converge, begin fix |
|
|
156
|
+
|
|
157
|
+
## Time Management
|
|
158
|
+
|
|
159
|
+
A typical two-hour investigation:
|
|
160
|
+
|
|
161
|
+
```
|
|
162
|
+
0:00 Problem decomposition & thread assignment
|
|
163
|
+
0:15 Parallel investigation begins
|
|
164
|
+
0:45 Sync point #1 → Continue/Pivot/Converge decision
|
|
165
|
+
1:30 Sync point #2 (if continuing)
|
|
166
|
+
1:35 Final convergence & documentation
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Adjust sync point cadence based on incident severity — every 20 minutes for critical outages, every 45 minutes for lower-urgency investigations.
|
|
170
|
+
|
|
171
|
+
## Documentation
|
|
172
|
+
|
|
173
|
+
### Final Report Structure
|
|
174
|
+
|
|
175
|
+
```markdown
|
|
176
|
+
# Investigation: [Problem]
|
|
177
|
+
|
|
178
|
+
## Summary
|
|
179
|
+
[Brief description and resolution]
|
|
180
|
+
|
|
181
|
+
## Threads Explored
|
|
182
|
+
|
|
183
|
+
### Thread A: [Area]
|
|
184
|
+
- Investigator: [Name]
|
|
185
|
+
- Findings: [Summary]
|
|
186
|
+
- Outcome: [Lead / Dead End / Root Cause]
|
|
187
|
+
|
|
188
|
+
## Root Cause
|
|
189
|
+
[Detailed explanation of what was found]
|
|
190
|
+
|
|
191
|
+
## Evidence
|
|
192
|
+
- [Evidence 1]
|
|
193
|
+
- [Evidence 2]
|
|
194
|
+
|
|
195
|
+
## Resolution
|
|
196
|
+
[What was done to fix]
|
|
197
|
+
|
|
198
|
+
## Lessons Learned
|
|
199
|
+
- [Learning 1]
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Integration with Other Skills
|
|
203
|
+
|
|
204
|
+
- **debugging/root-cause-analysis**: Each thread follows RCA principles
|
|
205
|
+
- **debugging/hypothesis-testing**: Threads test specific hypotheses
|
|
206
|
+
- **handoff-protocols**: When passing a thread to another person
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "parallel-investigation",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Coordinate multiple investigation threads for root-cause analysis and debugging",
|
|
5
|
+
"triggers": [
|
|
6
|
+
"parallel investigation",
|
|
7
|
+
"multiple hypotheses",
|
|
8
|
+
"root cause",
|
|
9
|
+
"incident",
|
|
10
|
+
"simultaneous debug"
|
|
11
|
+
],
|
|
12
|
+
"applicable_agents": [
|
|
13
|
+
"critic",
|
|
14
|
+
"strategist"
|
|
15
|
+
],
|
|
16
|
+
"max_context_tokens": 2200,
|
|
17
|
+
"entry_file": "SKILL.md"
|
|
18
|
+
}
|
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: reflexion-critique
|
|
3
|
+
description: Comprehensive multi-perspective review using specialized judges with debate and consensus building
|
|
4
|
+
argument-hint: Optional file paths, commits, or context to review (defaults to recent changes)
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Work Critique Command
|
|
8
|
+
|
|
9
|
+
<task>
|
|
10
|
+
You are a critique coordinator conducting a comprehensive multi-perspective review of completed work using the Multi-Agent Debate + LLM-as-a-Judge pattern. Your role is to orchestrate multiple specialized judges who will independently review the work, debate their findings, and reach consensus on quality, correctness, and improvement opportunities.
|
|
11
|
+
</task>
|
|
12
|
+
|
|
13
|
+
<context>
|
|
14
|
+
This command implements a sophisticated review pattern combining:
|
|
15
|
+
- **Multi-Agent Debate**: Multiple specialized judges provide independent perspectives
|
|
16
|
+
- **LLM-as-a-Judge**: Structured evaluation framework for consistent assessment
|
|
17
|
+
- **Chain-of-Verification (CoVe)**: Each judge validates their own critique before submission
|
|
18
|
+
- **Consensus Building**: Judges debate findings to reach agreement on recommendations
|
|
19
|
+
|
|
20
|
+
The review is **report-only** - findings are presented for user consideration without automatic fixes.
|
|
21
|
+
</context>
|
|
22
|
+
|
|
23
|
+
## Your Workflow
|
|
24
|
+
|
|
25
|
+
### Phase 1: Context Gathering
|
|
26
|
+
|
|
27
|
+
Before starting the review, understand what was done:
|
|
28
|
+
|
|
29
|
+
1. **Identify the scope of work to review**:
|
|
30
|
+
- If arguments provided: Use them to identify specific files, commits, or conversation context
|
|
31
|
+
- If no arguments: Review the recent conversation history and file changes
|
|
32
|
+
- Ask user if scope is unclear: "What work should I review? (recent changes, specific feature, entire conversation, etc.)"
|
|
33
|
+
|
|
34
|
+
2. **Capture relevant context**:
|
|
35
|
+
- Original requirements or user request
|
|
36
|
+
- Files that were modified or created
|
|
37
|
+
- Decisions made during implementation
|
|
38
|
+
- Any constraints or assumptions
|
|
39
|
+
|
|
40
|
+
3. **Summarize scope for confirmation**:
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
📋 Review Scope:
|
|
44
|
+
- Original request: [summary]
|
|
45
|
+
- Files changed: [list]
|
|
46
|
+
- Approach taken: [brief description]
|
|
47
|
+
|
|
48
|
+
Proceeding with multi-agent review...
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Phase 2: Independent Judge Reviews (Parallel)
|
|
52
|
+
|
|
53
|
+
Use the Task tool to spawn three specialized judge agents in parallel. Each judge operates independently without seeing others' reviews.
|
|
54
|
+
|
|
55
|
+
#### Judge 1: Requirements Validator
|
|
56
|
+
|
|
57
|
+
**Prompt for Agent:**
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
You are a Requirements Validator conducting a thorough review of completed work.
|
|
61
|
+
|
|
62
|
+
## Your Task
|
|
63
|
+
|
|
64
|
+
Review the following work and assess alignment with original requirements:
|
|
65
|
+
|
|
66
|
+
[CONTEXT]
|
|
67
|
+
Original Requirements: {requirements}
|
|
68
|
+
Work Completed: {summary of changes}
|
|
69
|
+
Files Modified: {file list}
|
|
70
|
+
[/CONTEXT]
|
|
71
|
+
|
|
72
|
+
## Your Process (Chain-of-Verification)
|
|
73
|
+
|
|
74
|
+
1. **Initial Analysis**:
|
|
75
|
+
- List all requirements from the original request
|
|
76
|
+
- Check each requirement against the implementation
|
|
77
|
+
- Identify gaps, over-delivery, or misalignments
|
|
78
|
+
|
|
79
|
+
2. **Self-Verification**:
|
|
80
|
+
- Generate 3-5 verification questions about your analysis
|
|
81
|
+
- Example: "Did I check for edge cases mentioned in requirements?"
|
|
82
|
+
- Answer each question honestly
|
|
83
|
+
- Refine your analysis based on answers
|
|
84
|
+
|
|
85
|
+
3. **Final Critique**:
|
|
86
|
+
Provide structured output:
|
|
87
|
+
|
|
88
|
+
### Requirements Alignment Score: X/10
|
|
89
|
+
|
|
90
|
+
### Requirements Coverage:
|
|
91
|
+
✅ [Met requirement 1]
|
|
92
|
+
✅ [Met requirement 2]
|
|
93
|
+
⚠️ [Partially met requirement 3] - [explanation]
|
|
94
|
+
❌ [Missed requirement 4] - [explanation]
|
|
95
|
+
|
|
96
|
+
### Gaps Identified:
|
|
97
|
+
- [gap 1 with severity: Critical/High/Medium/Low]
|
|
98
|
+
- [gap 2 with severity]
|
|
99
|
+
|
|
100
|
+
### Over-Delivery/Scope Creep:
|
|
101
|
+
- [item 1] - [is this good or problematic?]
|
|
102
|
+
|
|
103
|
+
### Verification Questions & Answers:
|
|
104
|
+
Q1: [question]
|
|
105
|
+
A1: [answer that influenced your critique]
|
|
106
|
+
...
|
|
107
|
+
|
|
108
|
+
Be specific, objective, and cite examples from the code.
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
#### Judge 2: Solution Architect
|
|
112
|
+
|
|
113
|
+
**Prompt for Agent:**
|
|
114
|
+
|
|
115
|
+
```
|
|
116
|
+
You are a Solution Architect evaluating the technical approach and design decisions.
|
|
117
|
+
|
|
118
|
+
## Your Task
|
|
119
|
+
|
|
120
|
+
Review the implementation approach and assess if it's optimal:
|
|
121
|
+
|
|
122
|
+
[CONTEXT]
|
|
123
|
+
Problem to Solve: {problem description}
|
|
124
|
+
Solution Implemented: {summary of approach}
|
|
125
|
+
Files Modified: {file list with brief description of changes}
|
|
126
|
+
[/CONTEXT]
|
|
127
|
+
|
|
128
|
+
## Your Process (Chain-of-Verification)
|
|
129
|
+
|
|
130
|
+
1. **Initial Evaluation**:
|
|
131
|
+
- Analyze the chosen approach
|
|
132
|
+
- Consider alternative approaches
|
|
133
|
+
- Evaluate trade-offs and design decisions
|
|
134
|
+
- Check for architectural patterns and best practices
|
|
135
|
+
|
|
136
|
+
2. **Self-Verification**:
|
|
137
|
+
- Generate 3-5 verification questions about your evaluation
|
|
138
|
+
- Example: "Am I being biased toward a particular pattern?"
|
|
139
|
+
- Example: "Did I consider the project's existing architecture?"
|
|
140
|
+
- Answer each question honestly
|
|
141
|
+
- Adjust your evaluation based on answers
|
|
142
|
+
|
|
143
|
+
3. **Final Critique**:
|
|
144
|
+
Provide structured output:
|
|
145
|
+
|
|
146
|
+
### Solution Optimality Score: X/10
|
|
147
|
+
|
|
148
|
+
### Approach Assessment:
|
|
149
|
+
**Chosen Approach**: [brief description]
|
|
150
|
+
**Strengths**:
|
|
151
|
+
- [strength 1 with explanation]
|
|
152
|
+
- [strength 2]
|
|
153
|
+
|
|
154
|
+
**Weaknesses**:
|
|
155
|
+
- [weakness 1 with explanation]
|
|
156
|
+
- [weakness 2]
|
|
157
|
+
|
|
158
|
+
### Alternative Approaches Considered:
|
|
159
|
+
1. **[Alternative 1]**
|
|
160
|
+
- Pros: [list]
|
|
161
|
+
- Cons: [list]
|
|
162
|
+
- Recommendation: [Better/Worse/Equivalent to current approach]
|
|
163
|
+
|
|
164
|
+
2. **[Alternative 2]**
|
|
165
|
+
- Pros: [list]
|
|
166
|
+
- Cons: [list]
|
|
167
|
+
- Recommendation: [Better/Worse/Equivalent]
|
|
168
|
+
|
|
169
|
+
### Design Pattern Assessment:
|
|
170
|
+
- Patterns used correctly: [list]
|
|
171
|
+
- Patterns missing: [list with explanation why they'd help]
|
|
172
|
+
- Anti-patterns detected: [list with severity]
|
|
173
|
+
|
|
174
|
+
### Scalability & Maintainability:
|
|
175
|
+
- [assessment of how solution scales]
|
|
176
|
+
- [assessment of maintainability]
|
|
177
|
+
|
|
178
|
+
### Verification Questions & Answers:
|
|
179
|
+
Q1: [question]
|
|
180
|
+
A1: [answer that influenced your critique]
|
|
181
|
+
...
|
|
182
|
+
|
|
183
|
+
Be objective and consider the context of the project (size, team, constraints).
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
#### Judge 3: Code Quality Reviewer
|
|
187
|
+
|
|
188
|
+
**Prompt for Agent:**
|
|
189
|
+
|
|
190
|
+
```
|
|
191
|
+
You are a Code Quality Reviewer assessing implementation quality and suggesting refactorings.
|
|
192
|
+
|
|
193
|
+
## Your Task
|
|
194
|
+
|
|
195
|
+
Review the code quality and identify refactoring opportunities:
|
|
196
|
+
|
|
197
|
+
[CONTEXT]
|
|
198
|
+
Files Changed: {file list}
|
|
199
|
+
Implementation Details: {code snippets or file contents as needed}
|
|
200
|
+
Project Conventions: {any known conventions from codebase}
|
|
201
|
+
[/CONTEXT]
|
|
202
|
+
|
|
203
|
+
## Your Process (Chain-of-Verification)
|
|
204
|
+
|
|
205
|
+
1. **Initial Review**:
|
|
206
|
+
- Assess code readability and clarity
|
|
207
|
+
- Check for code smells and complexity
|
|
208
|
+
- Evaluate naming, structure, and organization
|
|
209
|
+
- Look for duplication and coupling issues
|
|
210
|
+
- Verify error handling and edge cases
|
|
211
|
+
|
|
212
|
+
2. **Self-Verification**:
|
|
213
|
+
- Generate 3-5 verification questions about your review
|
|
214
|
+
- Example: "Am I applying personal preferences vs. objective quality criteria?"
|
|
215
|
+
- Example: "Did I consider the existing codebase style?"
|
|
216
|
+
- Answer each question honestly
|
|
217
|
+
- Refine your review based on answers
|
|
218
|
+
|
|
219
|
+
3. **Final Critique**:
|
|
220
|
+
Provide structured output:
|
|
221
|
+
|
|
222
|
+
### Code Quality Score: X/10
|
|
223
|
+
|
|
224
|
+
### Quality Assessment:
|
|
225
|
+
**Strengths**:
|
|
226
|
+
- [strength 1 with specific example]
|
|
227
|
+
- [strength 2]
|
|
228
|
+
|
|
229
|
+
**Issues Found**:
|
|
230
|
+
- [issue 1] - Severity: [Critical/High/Medium/Low]
|
|
231
|
+
- Location: [file:line]
|
|
232
|
+
- Example: [code snippet]
|
|
233
|
+
|
|
234
|
+
### Refactoring Opportunities:
|
|
235
|
+
|
|
236
|
+
1. **[Refactoring 1 Name]** - Priority: [High/Medium/Low]
|
|
237
|
+
- Current code:
|
|
238
|
+
```
|
|
239
|
+
[code snippet]
|
|
240
|
+
```
|
|
241
|
+
- Suggested refactoring:
|
|
242
|
+
```
|
|
243
|
+
[improved code]
|
|
244
|
+
```
|
|
245
|
+
- Benefits: [explanation]
|
|
246
|
+
- Effort: [Small/Medium/Large]
|
|
247
|
+
|
|
248
|
+
2. **[Refactoring 2]**
|
|
249
|
+
- [same structure]
|
|
250
|
+
|
|
251
|
+
### Code Smells Detected:
|
|
252
|
+
- [smell 1] at [location] - [explanation and impact]
|
|
253
|
+
- [smell 2]
|
|
254
|
+
|
|
255
|
+
### Complexity Analysis:
|
|
256
|
+
- High complexity areas: [list with locations]
|
|
257
|
+
- Suggested simplifications: [list]
|
|
258
|
+
|
|
259
|
+
### Verification Questions & Answers:
|
|
260
|
+
Q1: [question]
|
|
261
|
+
A1: [answer that influenced your critique]
|
|
262
|
+
...
|
|
263
|
+
|
|
264
|
+
Provide specific, actionable feedback with code examples.
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
**Implementation Note**: Use the Task tool with subagent_type="general-purpose" to spawn these three agents in parallel, each with their respective prompt and context.
|
|
268
|
+
|
|
269
|
+
### Phase 3: Cross-Review & Debate
|
|
270
|
+
|
|
271
|
+
After receiving all three judge reports:
|
|
272
|
+
|
|
273
|
+
1. **Synthesize the findings**:
|
|
274
|
+
- Identify areas of agreement
|
|
275
|
+
- Identify contradictions or disagreements
|
|
276
|
+
- Note gaps in any review
|
|
277
|
+
|
|
278
|
+
2. **Conduct debate session** (if significant disagreements exist):
|
|
279
|
+
- Present conflicting viewpoints to judges
|
|
280
|
+
- Ask each judge to review the other judges' findings
|
|
281
|
+
- Example: "Requirements Validator says approach is overengineered, but Solution Architect says it's appropriate for scale. Please both review this disagreement and provide reasoning."
|
|
282
|
+
- Use Task tool to spawn follow-up agents that have context of previous reviews
|
|
283
|
+
|
|
284
|
+
3. **Reach consensus**:
|
|
285
|
+
- Synthesize the debate outcomes
|
|
286
|
+
- Identify which viewpoints are better supported
|
|
287
|
+
- Document any unresolved disagreements with "reasonable people may disagree" notation
|
|
288
|
+
|
|
289
|
+
### Phase 4: Generate Consensus Report
|
|
290
|
+
|
|
291
|
+
Compile all findings into a comprehensive, actionable report:
|
|
292
|
+
|
|
293
|
+
```markdown
|
|
294
|
+
# 🔍 Work Critique Report
|
|
295
|
+
|
|
296
|
+
## Executive Summary
|
|
297
|
+
[2-3 sentences summarizing overall assessment]
|
|
298
|
+
|
|
299
|
+
**Overall Quality Score**: X/10 (average of three judge scores)
|
|
300
|
+
|
|
301
|
+
---
|
|
302
|
+
|
|
303
|
+
## 📊 Judge Scores
|
|
304
|
+
|
|
305
|
+
| Judge | Score | Key Finding |
|
|
306
|
+
|-------|-------|-------------|
|
|
307
|
+
| Requirements Validator | X/10 | [one-line summary] |
|
|
308
|
+
| Solution Architect | X/10 | [one-line summary] |
|
|
309
|
+
| Code Quality Reviewer | X/10 | [one-line summary] |
|
|
310
|
+
|
|
311
|
+
---
|
|
312
|
+
|
|
313
|
+
## ✅ Strengths
|
|
314
|
+
|
|
315
|
+
[Synthesized list of what was done well, with specific examples]
|
|
316
|
+
|
|
317
|
+
1. **[Strength 1]**
|
|
318
|
+
- Source: [which judge(s) noted this]
|
|
319
|
+
- Evidence: [specific example]
|
|
320
|
+
|
|
321
|
+
---
|
|
322
|
+
|
|
323
|
+
## ⚠️ Issues & Gaps
|
|
324
|
+
|
|
325
|
+
### Critical Issues
|
|
326
|
+
[Issues that need immediate attention]
|
|
327
|
+
|
|
328
|
+
- **[Issue 1]**
|
|
329
|
+
- Identified by: [judge name]
|
|
330
|
+
- Location: [file:line if applicable]
|
|
331
|
+
- Impact: [explanation]
|
|
332
|
+
- Recommendation: [what to do]
|
|
333
|
+
|
|
334
|
+
### High Priority
|
|
335
|
+
[Important but not blocking]
|
|
336
|
+
|
|
337
|
+
### Medium Priority
|
|
338
|
+
[Nice to have improvements]
|
|
339
|
+
|
|
340
|
+
### Low Priority
|
|
341
|
+
[Minor polish items]
|
|
342
|
+
|
|
343
|
+
---
|
|
344
|
+
|
|
345
|
+
## 🎯 Requirements Alignment
|
|
346
|
+
|
|
347
|
+
[Detailed breakdown from Requirements Validator]
|
|
348
|
+
|
|
349
|
+
**Requirements Met**: X/Y
|
|
350
|
+
**Coverage**: Z%
|
|
351
|
+
|
|
352
|
+
[Specific requirements table with status]
|
|
353
|
+
|
|
354
|
+
---
|
|
355
|
+
|
|
356
|
+
## 🏗️ Solution Architecture
|
|
357
|
+
|
|
358
|
+
[Key insights from Solution Architect]
|
|
359
|
+
|
|
360
|
+
**Chosen Approach**: [brief description]
|
|
361
|
+
|
|
362
|
+
**Alternative Approaches Considered**:
|
|
363
|
+
1. [Alternative 1] - [Why chosen approach is better/worse]
|
|
364
|
+
2. [Alternative 2] - [Why chosen approach is better/worse]
|
|
365
|
+
|
|
366
|
+
**Recommendation**: [Stick with current / Consider alternative X because...]
|
|
367
|
+
|
|
368
|
+
---
|
|
369
|
+
|
|
370
|
+
## 🔨 Refactoring Recommendations
|
|
371
|
+
|
|
372
|
+
[Prioritized list from Code Quality Reviewer]
|
|
373
|
+
|
|
374
|
+
### High Priority Refactorings
|
|
375
|
+
|
|
376
|
+
1. **[Refactoring Name]**
|
|
377
|
+
- Benefit: [explanation]
|
|
378
|
+
- Effort: [estimate]
|
|
379
|
+
- Before/After: [code examples]
|
|
380
|
+
|
|
381
|
+
### Medium Priority Refactorings
|
|
382
|
+
[similar structure]
|
|
383
|
+
|
|
384
|
+
---
|
|
385
|
+
|
|
386
|
+
## 🤝 Areas of Consensus
|
|
387
|
+
|
|
388
|
+
[List where all judges agreed]
|
|
389
|
+
|
|
390
|
+
- [Agreement 1]
|
|
391
|
+
- [Agreement 2]
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
|
|
395
|
+
## 💬 Areas of Debate
|
|
396
|
+
|
|
397
|
+
[If applicable - where judges disagreed]
|
|
398
|
+
|
|
399
|
+
**Debate 1: [Topic]**
|
|
400
|
+
- Requirements Validator position: [summary]
|
|
401
|
+
- Solution Architect position: [summary]
|
|
402
|
+
- Resolution: [consensus reached or "reasonable disagreement"]
|
|
403
|
+
|
|
404
|
+
---
|
|
405
|
+
|
|
406
|
+
## 📋 Action Items (Prioritized)
|
|
407
|
+
|
|
408
|
+
Based on the critique, here are recommended next steps:
|
|
409
|
+
|
|
410
|
+
**Must Do**:
|
|
411
|
+
- [ ] [Critical action 1]
|
|
412
|
+
- [ ] [Critical action 2]
|
|
413
|
+
|
|
414
|
+
**Should Do**:
|
|
415
|
+
- [ ] [High priority action 1]
|
|
416
|
+
- [ ] [High priority action 2]
|
|
417
|
+
|
|
418
|
+
**Could Do**:
|
|
419
|
+
- [ ] [Medium priority action 1]
|
|
420
|
+
- [ ] [Nice to have action 2]
|
|
421
|
+
|
|
422
|
+
---
|
|
423
|
+
|
|
424
|
+
## 🎓 Learning Opportunities
|
|
425
|
+
|
|
426
|
+
[Lessons that could improve future work]
|
|
427
|
+
|
|
428
|
+
- [Learning 1]
|
|
429
|
+
- [Learning 2]
|
|
430
|
+
|
|
431
|
+
---
|
|
432
|
+
|
|
433
|
+
## 📝 Conclusion
|
|
434
|
+
|
|
435
|
+
[Final assessment paragraph summarizing whether the work meets quality standards and key takeaways]
|
|
436
|
+
|
|
437
|
+
**Verdict**: ✅ Ready to ship | ⚠️ Needs improvements before shipping | ❌ Requires significant rework
|
|
438
|
+
|
|
439
|
+
---
|
|
440
|
+
|
|
441
|
+
*Generated using Multi-Agent Debate + LLM-as-a-Judge pattern*
|
|
442
|
+
*Review Date: [timestamp]*
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
## Important Guidelines
|
|
446
|
+
|
|
447
|
+
1. **Be Objective**: Base assessments on evidence, not preferences
|
|
448
|
+
2. **Be Specific**: Always cite file locations, line numbers, and code examples
|
|
449
|
+
3. **Be Constructive**: Frame criticism as opportunities for improvement
|
|
450
|
+
4. **Be Balanced**: Acknowledge both strengths and weaknesses
|
|
451
|
+
5. **Be Actionable**: Provide concrete recommendations with examples
|
|
452
|
+
6. **Consider Context**: Account for project constraints, team size, timelines
|
|
453
|
+
7. **Avoid Bias**: Don't favor certain patterns/styles without justification
|
|
454
|
+
|
|
455
|
+
## Usage Examples
|
|
456
|
+
|
|
457
|
+
```bash
|
|
458
|
+
# Review recent work from conversation
|
|
459
|
+
/critique
|
|
460
|
+
|
|
461
|
+
# Review specific files
|
|
462
|
+
/critique src/feature.ts src/feature.test.ts
|
|
463
|
+
|
|
464
|
+
# Review with specific focus
|
|
465
|
+
/critique --focus=security
|
|
466
|
+
|
|
467
|
+
# Review a git commit
|
|
468
|
+
/critique HEAD~1..HEAD
|
|
469
|
+
```
|
|
470
|
+
|
|
471
|
+
## Notes
|
|
472
|
+
|
|
473
|
+
- This is a **report-only** command - it does not make changes
|
|
474
|
+
- The review may take 2-5 minutes due to multi-agent coordination
|
|
475
|
+
- Scores are relative to professional development standards
|
|
476
|
+
- Disagreements between judges are valuable insights, not failures
|
|
477
|
+
- Use findings to inform future development decisions
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "reflexion-critique",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Multi-perspective critique workflow using debate and consensus",
|
|
5
|
+
"triggers": [
|
|
6
|
+
"critique",
|
|
7
|
+
"reflective review",
|
|
8
|
+
"multi perspective",
|
|
9
|
+
"judge with debate",
|
|
10
|
+
"consensus"
|
|
11
|
+
],
|
|
12
|
+
"applicable_agents": [
|
|
13
|
+
"critic"
|
|
14
|
+
],
|
|
15
|
+
"max_context_tokens": 2400,
|
|
16
|
+
"entry_file": "SKILL.md"
|
|
17
|
+
}
|