opencode-multiagent 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +83 -0
- package/CHANGELOG.md +31 -0
- package/CONTRIBUTING.md +36 -0
- package/README.md +44 -168
- package/README.tr.md +84 -0
- package/RELEASE.md +68 -0
- package/agents/AGENTS.md +91 -0
- package/agents/auditor.md +67 -23
- package/agents/{worker.md → coder.md} +24 -17
- package/agents/docmaster.md +91 -0
- package/agents/executor.md +63 -79
- package/agents/planner.md +78 -58
- package/agents/reviewer.md +31 -15
- package/agents/scout.md +25 -17
- package/agents/sec-coder.md +83 -0
- package/agents/ui-coder.md +77 -0
- package/commands/board.md +17 -0
- package/commands/execute.md +9 -7
- package/commands/init-deep.md +7 -6
- package/commands/init.md +5 -5
- package/commands/inspect.md +6 -5
- package/commands/plan.md +8 -6
- package/commands/quality.md +4 -3
- package/commands/review.md +5 -3
- package/commands/status.md +5 -3
- package/defaults/AGENTS.md +48 -0
- package/defaults/opencode-multiagent.json +180 -0
- package/defaults/opencode-multiagent.schema.json +265 -0
- package/dist/control-plane.d.ts +4 -0
- package/dist/control-plane.d.ts.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1916 -0
- package/dist/opencode-multiagent/compiler.d.ts +25 -0
- package/dist/opencode-multiagent/compiler.d.ts.map +1 -0
- package/dist/opencode-multiagent/constants.d.ts +128 -0
- package/dist/opencode-multiagent/constants.d.ts.map +1 -0
- package/dist/opencode-multiagent/correlation.d.ts +21 -0
- package/dist/opencode-multiagent/correlation.d.ts.map +1 -0
- package/dist/opencode-multiagent/defaults.d.ts +10 -0
- package/dist/opencode-multiagent/defaults.d.ts.map +1 -0
- package/dist/opencode-multiagent/hooks.d.ts +62 -0
- package/dist/opencode-multiagent/hooks.d.ts.map +1 -0
- package/dist/opencode-multiagent/log.d.ts +2 -0
- package/dist/opencode-multiagent/log.d.ts.map +1 -0
- package/dist/opencode-multiagent/markdown.d.ts +8 -0
- package/dist/opencode-multiagent/markdown.d.ts.map +1 -0
- package/dist/opencode-multiagent/mcp.d.ts +3 -0
- package/dist/opencode-multiagent/mcp.d.ts.map +1 -0
- package/dist/opencode-multiagent/policy.d.ts +5 -0
- package/dist/opencode-multiagent/policy.d.ts.map +1 -0
- package/dist/opencode-multiagent/quality.d.ts +18 -0
- package/dist/opencode-multiagent/quality.d.ts.map +1 -0
- package/dist/opencode-multiagent/runtime.d.ts +7 -0
- package/dist/opencode-multiagent/runtime.d.ts.map +1 -0
- package/dist/opencode-multiagent/session-tracker.d.ts +32 -0
- package/dist/opencode-multiagent/session-tracker.d.ts.map +1 -0
- package/dist/opencode-multiagent/skills.d.ts +17 -0
- package/dist/opencode-multiagent/skills.d.ts.map +1 -0
- package/dist/opencode-multiagent/supervision.d.ts +26 -0
- package/dist/opencode-multiagent/supervision.d.ts.map +1 -0
- package/dist/opencode-multiagent/task-manager.d.ts +54 -0
- package/dist/opencode-multiagent/task-manager.d.ts.map +1 -0
- package/dist/opencode-multiagent/telemetry.d.ts +28 -0
- package/dist/opencode-multiagent/telemetry.d.ts.map +1 -0
- package/dist/opencode-multiagent/tools.d.ts +87 -0
- package/dist/opencode-multiagent/tools.d.ts.map +1 -0
- package/dist/opencode-multiagent/types.d.ts +36 -0
- package/dist/opencode-multiagent/types.d.ts.map +1 -0
- package/dist/opencode-multiagent/utils.d.ts +9 -0
- package/dist/opencode-multiagent/utils.d.ts.map +1 -0
- package/docs/agents.md +148 -0
- package/docs/agents.tr.md +149 -0
- package/docs/configuration.md +244 -0
- package/docs/configuration.tr.md +244 -0
- package/docs/usage-guide.md +224 -0
- package/docs/usage-guide.tr.md +225 -0
- package/examples/opencode.with-overrides.json +3 -7
- package/package.json +23 -13
- package/skills/AGENTS.md +51 -0
- package/skills/advanced-evaluation/SKILL.md +37 -21
- package/skills/advanced-evaluation/manifest.json +2 -13
- package/skills/cek-context-engineering/SKILL.md +159 -87
- package/skills/cek-context-engineering/manifest.json +1 -3
- package/skills/cek-prompt-engineering/SKILL.md +13 -10
- package/skills/cek-prompt-engineering/manifest.json +1 -3
- package/skills/cek-test-prompt/SKILL.md +38 -28
- package/skills/cek-test-prompt/manifest.json +1 -3
- package/skills/cek-thought-based-reasoning/SKILL.md +75 -21
- package/skills/cek-thought-based-reasoning/manifest.json +1 -3
- package/skills/context-degradation/SKILL.md +14 -13
- package/skills/context-degradation/manifest.json +1 -3
- package/skills/debate/SKILL.md +23 -78
- package/skills/debate/manifest.json +2 -12
- package/skills/design-first/manifest.json +2 -13
- package/skills/dispatching-parallel-agents/SKILL.md +14 -3
- package/skills/dispatching-parallel-agents/manifest.json +1 -4
- package/skills/drift-analysis/SKILL.md +50 -29
- package/skills/drift-analysis/manifest.json +2 -12
- package/skills/evaluation/manifest.json +2 -12
- package/skills/executing-plans/SKILL.md +15 -8
- package/skills/executing-plans/manifest.json +1 -3
- package/skills/handoff-protocols/manifest.json +2 -12
- package/skills/parallel-investigation/SKILL.md +25 -12
- package/skills/parallel-investigation/manifest.json +1 -4
- package/skills/reflexion-critique/SKILL.md +21 -10
- package/skills/reflexion-critique/manifest.json +1 -3
- package/skills/reflexion-reflect/SKILL.md +36 -34
- package/skills/reflexion-reflect/manifest.json +2 -10
- package/skills/root-cause-analysis/manifest.json +2 -13
- package/skills/sadd-judge-with-debate/SKILL.md +50 -26
- package/skills/sadd-judge-with-debate/manifest.json +1 -3
- package/skills/structured-code-review/manifest.json +2 -11
- package/skills/task-decomposition/manifest.json +2 -13
- package/skills/verification-before-completion/manifest.json +2 -15
- package/skills/verification-gates/SKILL.md +27 -19
- package/skills/verification-gates/manifest.json +2 -12
- package/agents/advisor.md +0 -57
- package/agents/critic.md +0 -127
- package/agents/deep-worker.md +0 -65
- package/agents/devil.md +0 -36
- package/agents/heavy-worker.md +0 -68
- package/agents/lead.md +0 -155
- package/agents/librarian.md +0 -62
- package/agents/qa.md +0 -50
- package/agents/quick.md +0 -65
- package/agents/scribe.md +0 -78
- package/agents/strategist.md +0 -63
- package/agents/ui-heavy-worker.md +0 -62
- package/agents/ui-worker.md +0 -69
- package/agents/validator.md +0 -47
- package/defaults/agent-settings.json +0 -102
- package/defaults/agent-settings.schema.json +0 -25
- package/defaults/flags.json +0 -35
- package/defaults/flags.schema.json +0 -119
- package/defaults/mcp-defaults.json +0 -47
- package/defaults/mcp-defaults.schema.json +0 -38
- package/defaults/profiles.json +0 -53
- package/defaults/profiles.schema.json +0 -60
- package/defaults/team-profiles.json +0 -83
- package/src/control-plane.ts +0 -21
- package/src/index.ts +0 -8
- package/src/opencode-multiagent/compiler.ts +0 -168
- package/src/opencode-multiagent/constants.ts +0 -178
- package/src/opencode-multiagent/file-lock.ts +0 -90
- package/src/opencode-multiagent/hooks.ts +0 -599
- package/src/opencode-multiagent/log.ts +0 -12
- package/src/opencode-multiagent/mailbox.ts +0 -287
- package/src/opencode-multiagent/markdown.ts +0 -99
- package/src/opencode-multiagent/mcp.ts +0 -35
- package/src/opencode-multiagent/policy.ts +0 -67
- package/src/opencode-multiagent/quality.ts +0 -140
- package/src/opencode-multiagent/runtime.ts +0 -55
- package/src/opencode-multiagent/skills.ts +0 -144
- package/src/opencode-multiagent/supervision.ts +0 -156
- package/src/opencode-multiagent/task-manager.ts +0 -148
- package/src/opencode-multiagent/team-manager.ts +0 -219
- package/src/opencode-multiagent/team-tools.ts +0 -359
- package/src/opencode-multiagent/telemetry.ts +0 -124
- package/src/opencode-multiagent/utils.ts +0 -54
|
@@ -20,7 +20,7 @@ This command implements iterative multi-judge debate:
|
|
|
20
20
|
|
|
21
21
|
```
|
|
22
22
|
Phase 0: Setup
|
|
23
|
-
mkdir -p .
|
|
23
|
+
mkdir -p .opencode/reports
|
|
24
24
|
│
|
|
25
25
|
Phase 1: Independent Analysis
|
|
26
26
|
┌─ Judge 1 → {name}.1.md ─┐
|
|
@@ -46,12 +46,13 @@ Phase 2: Debate Round (iterative) │
|
|
|
46
46
|
Before starting evaluation, ensure the reports directory exists:
|
|
47
47
|
|
|
48
48
|
```bash
|
|
49
|
-
mkdir -p .
|
|
49
|
+
mkdir -p .opencode/reports
|
|
50
50
|
```
|
|
51
51
|
|
|
52
|
-
**Report naming convention:** `.
|
|
52
|
+
**Report naming convention:** `.opencode/reports/{solution-name}-{YYYY-MM-DD}.[1|2|3].md`
|
|
53
53
|
|
|
54
54
|
Where:
|
|
55
|
+
|
|
55
56
|
- `{solution-name}` - Derived from solution filename (e.g., `users-api` from `src/api/users.ts`)
|
|
56
57
|
- `{YYYY-MM-DD}` - Current date
|
|
57
58
|
- `[1|2|3]` - Judge number
|
|
@@ -64,7 +65,7 @@ Launch **3 independent judge agents in parallel** (recommended: Opus for rigor):
|
|
|
64
65
|
- Path to solution(s) being evaluated
|
|
65
66
|
- Evaluation criteria with weights
|
|
66
67
|
- Clear rubric for scoring
|
|
67
|
-
2. Each produces **independent assessment** saved to `.
|
|
68
|
+
2. Each produces **independent assessment** saved to `.opencode/reports/{solution-name}-{date}.[1|2|3].md`
|
|
68
69
|
3. Reports must include:
|
|
69
70
|
- Per-criterion scores with evidence
|
|
70
71
|
- Specific quotes/examples supporting ratings
|
|
@@ -91,12 +92,13 @@ You are Judge {N} evaluating a solution independently.
|
|
|
91
92
|
</evaluation_criteria>
|
|
92
93
|
|
|
93
94
|
<output_file>
|
|
94
|
-
.
|
|
95
|
+
.opencode/reports/{solution-name}-{date}.{N}.md
|
|
95
96
|
</output_file>
|
|
96
97
|
|
|
97
|
-
|
|
98
|
+
Apply the evaluation methodology criteria below.
|
|
98
99
|
|
|
99
100
|
Instructions:
|
|
101
|
+
|
|
100
102
|
1. Read the solution thoroughly
|
|
101
103
|
2. For each criterion:
|
|
102
104
|
- Find specific evidence (quote exact text)
|
|
@@ -121,8 +123,8 @@ For each debate round (max 3 rounds):
|
|
|
121
123
|
Launch **3 debate agents in parallel**:
|
|
122
124
|
|
|
123
125
|
1. Each judge agent receives:
|
|
124
|
-
- Path to their own previous report (`.
|
|
125
|
-
- Paths to other judges' reports (`.
|
|
126
|
+
- Path to their own previous report (`.opencode/reports/{solution-name}-{date}.[1|2|3].md`)
|
|
127
|
+
- Paths to other judges' reports (`.opencode/reports/{solution-name}-{date}.[1|2|3].md`)
|
|
126
128
|
- The original solution
|
|
127
129
|
2. Each judge:
|
|
128
130
|
- Identifies disagreements with other judges (>1 point score gap on any criterion)
|
|
@@ -141,11 +143,11 @@ Launch **3 debate agents in parallel**:
|
|
|
141
143
|
You are Judge {N} in debate round {R}.
|
|
142
144
|
|
|
143
145
|
<your_previous_report>
|
|
144
|
-
{path to .
|
|
146
|
+
{path to .opencode/reports/{solution-name}-{date}.{N}.md}
|
|
145
147
|
</your_previous_report>
|
|
146
148
|
|
|
147
149
|
<other_judges_reports>
|
|
148
|
-
Judge 1: .
|
|
150
|
+
Judge 1: .opencode/reports/{solution-name}-{date}.1.md
|
|
149
151
|
...
|
|
150
152
|
</other_judges_reports>
|
|
151
153
|
|
|
@@ -158,12 +160,13 @@ Judge 1: .specs/reports/{solution-name}-{date}.1.md
|
|
|
158
160
|
</solution_path>
|
|
159
161
|
|
|
160
162
|
<output_file>
|
|
161
|
-
.
|
|
163
|
+
.opencode/reports/{solution-name}-{date}.{N}.md (append to existing file)
|
|
162
164
|
</output_file>
|
|
163
165
|
|
|
164
|
-
|
|
166
|
+
Apply the evaluation methodology principles below.
|
|
165
167
|
|
|
166
168
|
Instructions:
|
|
169
|
+
|
|
167
170
|
1. Read your previous assessment from {your_previous_report}
|
|
168
171
|
2. Read all other judges' reports
|
|
169
172
|
3. Identify disagreements (where your scores differ by >1 point)
|
|
@@ -182,6 +185,7 @@ Instructions:
|
|
|
182
185
|
### Disagreements Identified
|
|
183
186
|
|
|
184
187
|
**Disagreement with Judge {X} on Criterion "{Name}"**
|
|
188
|
+
|
|
185
189
|
- My score: {my_score}/5
|
|
186
190
|
- Their score: {their_score}/5
|
|
187
191
|
- My defense: [quote evidence supporting my score]
|
|
@@ -192,6 +196,7 @@ Instructions:
|
|
|
192
196
|
### Revised Assessment
|
|
193
197
|
|
|
194
198
|
After considering other judges' arguments:
|
|
199
|
+
|
|
195
200
|
- **Criterion "{Name}"**: [Maintained {X}/5 | Revised from {X} to {Y}/5]
|
|
196
201
|
- Reason for change: [what convinced me] OR
|
|
197
202
|
- Reason maintained: [why I stand by original score]
|
|
@@ -201,11 +206,13 @@ After considering other judges' arguments:
|
|
|
201
206
|
**New Weighted Score**: {updated_total}/5.0
|
|
202
207
|
|
|
203
208
|
## Evidences
|
|
209
|
+
|
|
204
210
|
[specific quotes]
|
|
205
211
|
|
|
206
|
-
---
|
|
212
|
+
---
|
|
207
213
|
|
|
208
214
|
CRITICAL:
|
|
215
|
+
|
|
209
216
|
- Only revise if you find their evidence compelling
|
|
210
217
|
- Defend your original scores if you still believe them
|
|
211
218
|
- Quote specific evidence from the solution
|
|
@@ -216,11 +223,13 @@ CRITICAL:
|
|
|
216
223
|
After each debate round, check for consensus:
|
|
217
224
|
|
|
218
225
|
**Consensus achieved if:**
|
|
226
|
+
|
|
219
227
|
- All judges' overall scores within 0.5 points of each other
|
|
220
228
|
- No criterion has >1 point disagreement across any two judges
|
|
221
229
|
- All judges explicitly state they accept the consensus
|
|
222
230
|
|
|
223
231
|
**If no consensus after 3 rounds:**
|
|
232
|
+
|
|
224
233
|
- Report persistent disagreements
|
|
225
234
|
- Provide all judge reports for human review
|
|
226
235
|
- Flag that automated evaluation couldn't reach consensus
|
|
@@ -230,7 +239,7 @@ After each debate round, check for consensus:
|
|
|
230
239
|
**Step 1: Run Independent Analysis (Round 1)**
|
|
231
240
|
|
|
232
241
|
1. Launch 3 judge agents in parallel (Judge 1, 2, 3)
|
|
233
|
-
2. Each writes their independent assessment to `.
|
|
242
|
+
2. Each writes their independent assessment to `.opencode/reports/{solution-name}-{date}.[1|2|3].md`
|
|
234
243
|
3. Wait for all 3 agents to complete
|
|
235
244
|
|
|
236
245
|
**Step 2: Check for Consensus**
|
|
@@ -238,10 +247,12 @@ After each debate round, check for consensus:
|
|
|
238
247
|
Let's work through this systematically to ensure accurate consensus detection.
|
|
239
248
|
|
|
240
249
|
Read all three reports and extract:
|
|
250
|
+
|
|
241
251
|
- Each judge's overall weighted score
|
|
242
252
|
- Each judge's score for every criterion
|
|
243
253
|
|
|
244
254
|
Check consensus step by step:
|
|
255
|
+
|
|
245
256
|
1. First, extract all overall scores from each report and list them explicitly
|
|
246
257
|
2. Calculate the difference between the highest and lowest overall scores
|
|
247
258
|
- If difference ≤ 0.5 points → overall consensus achieved
|
|
@@ -288,10 +299,10 @@ Let's synthesize the evaluation results step by step.
|
|
|
288
299
|
- Number of rounds to reach consensus
|
|
289
300
|
- Final recommendation with clear justification
|
|
290
301
|
- If there is no consensus:
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
302
|
+
- All judges' final scores showing disagreements
|
|
303
|
+
- Specific criteria where consensus wasn't reached
|
|
304
|
+
- Analysis of why consensus couldn't be reached
|
|
305
|
+
- Flag for human review
|
|
295
306
|
4. Command complete
|
|
296
307
|
|
|
297
308
|
### Phase 3: Consensus Report
|
|
@@ -308,25 +319,31 @@ Let's compile the final consensus by analyzing each component systematically.
|
|
|
308
319
|
First, let's consolidate all judges' final scores:
|
|
309
320
|
|
|
310
321
|
| Criterion | Judge 1 | Judge 2 | Judge 3 | Final |
|
|
311
|
-
|
|
322
|
+
| --------- | ------- | ------- | ------- | ----- |
|
|
312
323
|
| {Name} | {X}/5 | {X}/5 | {X}/5 | {X}/5 |
|
|
324
|
+
|
|
313
325
|
...
|
|
314
326
|
|
|
315
327
|
**Consensus Overall Score**: {avg}/5.0
|
|
316
328
|
|
|
317
329
|
## Consensus Strengths
|
|
330
|
+
|
|
318
331
|
[Review each judge's identified strengths and extract the common themes that all judges agreed upon]
|
|
319
332
|
|
|
320
333
|
## Consensus Weaknesses
|
|
334
|
+
|
|
321
335
|
[Review each judge's identified weaknesses and extract the common themes that all judges agreed upon]
|
|
322
336
|
|
|
323
337
|
## Debate Summary
|
|
338
|
+
|
|
324
339
|
Let's trace how consensus was reached:
|
|
340
|
+
|
|
325
341
|
- Rounds to consensus: {N}
|
|
326
342
|
- Initial disagreements: {list with specific criteria and score gaps}
|
|
327
343
|
- How resolved: {for each disagreement, explain what evidence or argument led to resolution}
|
|
328
344
|
|
|
329
345
|
## Final Recommendation
|
|
346
|
+
|
|
330
347
|
Based on the consensus scores and the key strengths/weaknesses identified:
|
|
331
348
|
{Pass/Fail/Needs Revision with clear justification tied to the evidence}
|
|
332
349
|
```
|
|
@@ -334,11 +351,11 @@ Based on the consensus scores and the key strengths/weaknesses identified:
|
|
|
334
351
|
<output>
|
|
335
352
|
The command produces:
|
|
336
353
|
|
|
337
|
-
1. **Reports directory**: `.
|
|
338
|
-
2. **Initial reports**: `.
|
|
354
|
+
1. **Reports directory**: `.opencode/reports/` (created if not exists)
|
|
355
|
+
2. **Initial reports**: `.opencode/reports/{solution-name}-{date}.1.md`, `.opencode/reports/{solution-name}-{date}.2.md`, `.opencode/reports/{solution-name}-{date}.3.md`
|
|
339
356
|
3. **Debate updates**: Appended sections in each report file per round
|
|
340
357
|
4. **Final synthesis**: Replied to user (consensus or disagreement summary)
|
|
341
|
-
</output>
|
|
358
|
+
</output>
|
|
342
359
|
|
|
343
360
|
## Best Practices
|
|
344
361
|
|
|
@@ -347,6 +364,7 @@ The command produces:
|
|
|
347
364
|
Choose 3-5 weighted criteria relevant to the solution type:
|
|
348
365
|
|
|
349
366
|
**Code evaluation:**
|
|
367
|
+
|
|
350
368
|
- Correctness (30%) - Does it work? Handles edge cases?
|
|
351
369
|
- Design Quality (25%) - Clean architecture? Maintainable?
|
|
352
370
|
- Efficiency (20%) - Performance considerations?
|
|
@@ -354,6 +372,7 @@ Choose 3-5 weighted criteria relevant to the solution type:
|
|
|
354
372
|
- Testing (10%) - Test coverage? Test quality?
|
|
355
373
|
|
|
356
374
|
**Design/Architecture evaluation:**
|
|
375
|
+
|
|
357
376
|
- Completeness (30%) - All requirements addressed?
|
|
358
377
|
- Feasibility (25%) - Can it actually be built?
|
|
359
378
|
- Scalability (20%) - Handles growth?
|
|
@@ -361,6 +380,7 @@ Choose 3-5 weighted criteria relevant to the solution type:
|
|
|
361
380
|
- Documentation (10%) - Clear and comprehensive?
|
|
362
381
|
|
|
363
382
|
**Documentation evaluation:**
|
|
383
|
+
|
|
364
384
|
- Accuracy (35%) - Technically correct?
|
|
365
385
|
- Completeness (30%) - Covers all necessary topics?
|
|
366
386
|
- Clarity (20%) - Easy to understand?
|
|
@@ -392,28 +412,33 @@ Choose 3-5 weighted criteria relevant to the solution type:
|
|
|
392
412
|
```
|
|
393
413
|
|
|
394
414
|
**Round 1 outputs** (assuming date 2025-01-15):
|
|
395
|
-
|
|
396
|
-
- `.
|
|
397
|
-
- `.
|
|
415
|
+
|
|
416
|
+
- `.opencode/reports/users-api-2025-01-15.1.md` - Judge 1 scores correctness 4/5, security 3/5
|
|
417
|
+
- `.opencode/reports/users-api-2025-01-15.2.md` - Judge 2 scores correctness 4/5, security 5/5
|
|
418
|
+
- `.opencode/reports/users-api-2025-01-15.3.md` - Judge 3 scores correctness 5/5, security 4/5
|
|
398
419
|
|
|
399
420
|
**Disagreement detected:** Security scores range from 3-5
|
|
400
421
|
|
|
401
422
|
**Round 2 debate:**
|
|
423
|
+
|
|
402
424
|
- Judge 1 defends 3/5: "Missing rate limiting, input validation incomplete"
|
|
403
425
|
- Judge 2 challenges: "Rate limiting exists in middleware (line 45)"
|
|
404
426
|
- Judge 1 revises to 4/5: "Missed middleware, but input validation still weak"
|
|
405
427
|
- Judge 3 defends 4/5: "Input validation adequate for requirements"
|
|
406
428
|
|
|
407
429
|
**Round 2 outputs:**
|
|
430
|
+
|
|
408
431
|
- All judges now 4-5/5 on security (within 1 point)
|
|
409
432
|
- Disagreement on input validation remains
|
|
410
433
|
|
|
411
434
|
**Round 3 debate:**
|
|
435
|
+
|
|
412
436
|
- Judges examine specific validation code
|
|
413
437
|
- Judge 2 revises to 4/5: "Upon re-examination, email validation regex is weak"
|
|
414
438
|
- Consensus: Security = 4/5
|
|
415
439
|
|
|
416
440
|
**Final consensus:**
|
|
441
|
+
|
|
417
442
|
```
|
|
418
443
|
Correctness: 4.3/5
|
|
419
444
|
Design: 4.5/5
|
|
@@ -423,4 +448,3 @@ Documentation: 4.0/5
|
|
|
423
448
|
|
|
424
449
|
Overall: 4.3/5 - PASS
|
|
425
450
|
```
|
|
426
|
-
|
|
@@ -2,17 +2,8 @@
|
|
|
2
2
|
"name": "structured-code-review",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Structured checklist for local code review passes",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"code review",
|
|
8
|
-
"check code",
|
|
9
|
-
"examine"
|
|
10
|
-
],
|
|
11
|
-
"applicable_agents": [
|
|
12
|
-
"reviewer",
|
|
13
|
-
"worker",
|
|
14
|
-
"heavy-worker"
|
|
15
|
-
],
|
|
5
|
+
"triggers": ["review", "code review", "check code", "examine"],
|
|
6
|
+
"applicable_agents": ["planner", "reviewer", "auditor"],
|
|
16
7
|
"max_context_tokens": 2000,
|
|
17
8
|
"entry_file": "SKILL.md"
|
|
18
9
|
}
|
|
@@ -2,19 +2,8 @@
|
|
|
2
2
|
"name": "task-decomposition",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Break larger requests into clear executable subtasks",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"break down",
|
|
8
|
-
"split",
|
|
9
|
-
"tasks",
|
|
10
|
-
"planning",
|
|
11
|
-
"subtasks"
|
|
12
|
-
],
|
|
13
|
-
"applicable_agents": [
|
|
14
|
-
"planner",
|
|
15
|
-
"executor",
|
|
16
|
-
"worker"
|
|
17
|
-
],
|
|
5
|
+
"triggers": ["decompose", "break down", "split", "tasks", "planning", "subtasks"],
|
|
6
|
+
"applicable_agents": ["planner"],
|
|
18
7
|
"max_context_tokens": 1500,
|
|
19
8
|
"entry_file": "SKILL.md"
|
|
20
9
|
}
|
|
@@ -2,21 +2,8 @@
|
|
|
2
2
|
"name": "verification-before-completion",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Verification checklist before claiming work complete",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"complete",
|
|
8
|
-
"done",
|
|
9
|
-
"finish",
|
|
10
|
-
"check",
|
|
11
|
-
"validation"
|
|
12
|
-
],
|
|
13
|
-
"applicable_agents": [
|
|
14
|
-
"worker",
|
|
15
|
-
"heavy-worker",
|
|
16
|
-
"deep-worker",
|
|
17
|
-
"ui-worker",
|
|
18
|
-
"ui-heavy-worker"
|
|
19
|
-
],
|
|
5
|
+
"triggers": ["verify", "complete", "done", "finish", "check", "validation"],
|
|
6
|
+
"applicable_agents": ["coder", "ui-coder", "sec-coder"],
|
|
20
7
|
"max_context_tokens": 2000,
|
|
21
8
|
"entry_file": "SKILL.md"
|
|
22
9
|
}
|
|
@@ -16,9 +16,8 @@ tags:
|
|
|
16
16
|
difficulty: beginner
|
|
17
17
|
estimatedTime: 10
|
|
18
18
|
relatedSkills:
|
|
19
|
-
-
|
|
20
|
-
-
|
|
21
|
-
- testing/red-green-refactor
|
|
19
|
+
- design-first
|
|
20
|
+
- task-decomposition
|
|
22
21
|
---
|
|
23
22
|
|
|
24
23
|
# Verification Gates
|
|
@@ -44,6 +43,7 @@ Before starting design:
|
|
|
44
43
|
- [ ] Edge cases are identified
|
|
45
44
|
|
|
46
45
|
**Actions:**
|
|
46
|
+
|
|
47
47
|
1. Review requirements document
|
|
48
48
|
2. Identify any unclear items
|
|
49
49
|
3. Get explicit confirmation on ambiguous points
|
|
@@ -61,6 +61,7 @@ Before starting implementation:
|
|
|
61
61
|
- [ ] Design has been reviewed (self or peer)
|
|
62
62
|
|
|
63
63
|
**Actions:**
|
|
64
|
+
|
|
64
65
|
1. Walk through design against requirements
|
|
65
66
|
2. Review with rubber duck or teammate
|
|
66
67
|
3. Check for missing pieces
|
|
@@ -78,6 +79,7 @@ Before calling task complete:
|
|
|
78
79
|
- [ ] Dependencies are appropriate
|
|
79
80
|
|
|
80
81
|
**Actions:**
|
|
82
|
+
|
|
81
83
|
1. Run full test suite
|
|
82
84
|
2. Self-review the diff
|
|
83
85
|
3. Check for code smells
|
|
@@ -94,6 +96,7 @@ Before merging:
|
|
|
94
96
|
- [ ] Documentation is updated
|
|
95
97
|
|
|
96
98
|
**Actions:**
|
|
99
|
+
|
|
97
100
|
1. Test the full user flow
|
|
98
101
|
2. Run integration test suite
|
|
99
102
|
3. Compare performance metrics
|
|
@@ -110,6 +113,7 @@ Before marking complete:
|
|
|
110
113
|
- [ ] Stakeholders can verify
|
|
111
114
|
|
|
112
115
|
**Actions:**
|
|
116
|
+
|
|
113
117
|
1. Smoke test in environment
|
|
114
118
|
2. Check error logs and metrics
|
|
115
119
|
3. Get stakeholder sign-off
|
|
@@ -150,6 +154,7 @@ Gates requiring human judgment:
|
|
|
150
154
|
## Manual Verification Checklist
|
|
151
155
|
|
|
152
156
|
Before Code Review:
|
|
157
|
+
|
|
153
158
|
- [ ] I've tested my changes locally
|
|
154
159
|
- [ ] I've written/updated tests
|
|
155
160
|
- [ ] I've read my own diff
|
|
@@ -157,6 +162,7 @@ Before Code Review:
|
|
|
157
162
|
- [ ] I've updated documentation
|
|
158
163
|
|
|
159
164
|
Before Deployment:
|
|
165
|
+
|
|
160
166
|
- [ ] Code review approved
|
|
161
167
|
- [ ] QA verified (if applicable)
|
|
162
168
|
- [ ] Stakeholder approved (if required)
|
|
@@ -167,13 +173,13 @@ Before Deployment:
|
|
|
167
173
|
|
|
168
174
|
Gates that apply in specific situations:
|
|
169
175
|
|
|
170
|
-
| Condition
|
|
171
|
-
|
|
172
|
-
| Security-related
|
|
173
|
-
| Public API change
|
|
174
|
-
| Database change
|
|
175
|
-
| Performance-sensitive | Performance test
|
|
176
|
-
| Breaking change
|
|
176
|
+
| Condition | Required Gates |
|
|
177
|
+
| --------------------- | ------------------------------ |
|
|
178
|
+
| Security-related | Security review |
|
|
179
|
+
| Public API change | API review + migration plan |
|
|
180
|
+
| Database change | DBA review + backup plan |
|
|
181
|
+
| Performance-sensitive | Performance test |
|
|
182
|
+
| Breaking change | Deprecation notice + migration |
|
|
177
183
|
|
|
178
184
|
## Implementing Gates
|
|
179
185
|
|
|
@@ -213,14 +219,17 @@ Task Complete
|
|
|
213
219
|
**Purpose:** [What this gate ensures]
|
|
214
220
|
|
|
215
221
|
**Checklist:**
|
|
222
|
+
|
|
216
223
|
- [ ] Item 1
|
|
217
224
|
- [ ] Item 2
|
|
218
225
|
- [ ] Item 3
|
|
219
226
|
|
|
220
227
|
**Verification Method:**
|
|
228
|
+
|
|
221
229
|
- [How to verify each item]
|
|
222
230
|
|
|
223
231
|
**Failure Actions:**
|
|
232
|
+
|
|
224
233
|
- [What to do if gate fails]
|
|
225
234
|
|
|
226
235
|
**Approver:** [Who can approve passage]
|
|
@@ -265,17 +274,16 @@ jobs:
|
|
|
265
274
|
|
|
266
275
|
## Quick Reference
|
|
267
276
|
|
|
268
|
-
| Phase
|
|
269
|
-
|
|
270
|
-
| Design
|
|
271
|
-
| Implementation | Design
|
|
272
|
-
| Review
|
|
273
|
-
| Merge
|
|
274
|
-
| Deploy
|
|
277
|
+
| Phase | Gate Before | Key Checks |
|
|
278
|
+
| -------------- | -------------- | ------------------------------ |
|
|
279
|
+
| Design | Requirements | Clear, complete, approved |
|
|
280
|
+
| Implementation | Design | Reviewed, feasible |
|
|
281
|
+
| Review | Implementation | Tests, conventions, working |
|
|
282
|
+
| Merge | Review | Approved, conflicts resolved |
|
|
283
|
+
| Deploy | Merge | Environment ready, plan exists |
|
|
275
284
|
|
|
276
285
|
## Integration with Other Skills
|
|
277
286
|
|
|
278
287
|
- **design-first**: Gates validate design before implementation
|
|
279
288
|
- **task-decomposition**: Gates between task phases
|
|
280
|
-
- **
|
|
281
|
-
- **collaboration/structured-review**: Review is a gate
|
|
289
|
+
- **structured-code-review**: Review is a gate
|
|
@@ -2,18 +2,8 @@
|
|
|
2
2
|
"name": "verification-gates",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Verification gates and acceptance checkpoints between risky phases",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"quality gate",
|
|
8
|
-
"milestone check",
|
|
9
|
-
"approval step",
|
|
10
|
-
"go no go"
|
|
11
|
-
],
|
|
12
|
-
"applicable_agents": [
|
|
13
|
-
"planner",
|
|
14
|
-
"auditor",
|
|
15
|
-
"strategist"
|
|
16
|
-
],
|
|
5
|
+
"triggers": ["verification gate", "quality gate", "milestone check", "approval step", "go no go"],
|
|
6
|
+
"applicable_agents": ["planner", "auditor"],
|
|
17
7
|
"max_context_tokens": 1800,
|
|
18
8
|
"entry_file": "SKILL.md"
|
|
19
9
|
}
|
package/agents/advisor.md
DELETED
|
@@ -1,57 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
description: Read-only coding advisor for workers that are blocked, low-confidence, or choosing between implementation paths
|
|
3
|
-
mode: subagent
|
|
4
|
-
model: openai/gpt-5.4
|
|
5
|
-
temperature: 0
|
|
6
|
-
steps: 20
|
|
7
|
-
permission:
|
|
8
|
-
"*": deny
|
|
9
|
-
read:
|
|
10
|
-
"*": allow
|
|
11
|
-
"*.env": deny
|
|
12
|
-
"*.env.*": deny
|
|
13
|
-
"*.env.example": allow
|
|
14
|
-
glob: allow
|
|
15
|
-
grep: allow
|
|
16
|
-
list: allow
|
|
17
|
-
lsp: allow
|
|
18
|
-
todoread: allow
|
|
19
|
-
todowrite: allow
|
|
20
|
-
code_index_set_project_path: allow
|
|
21
|
-
code_index_search_code_advanced: allow
|
|
22
|
-
code_index_find_files: allow
|
|
23
|
-
code_index_get_file_summary: allow
|
|
24
|
-
code_index_get_symbol_body: allow
|
|
25
|
-
repo_git_status: allow
|
|
26
|
-
repo_git_diff: allow
|
|
27
|
-
task: deny
|
|
28
|
-
skill:
|
|
29
|
-
"*": deny
|
|
30
|
-
task-decomposition: allow
|
|
31
|
-
root-cause-analysis: allow
|
|
32
|
-
design-first: allow
|
|
33
|
-
edit: deny
|
|
34
|
-
bash: allow
|
|
35
|
-
webfetch: deny
|
|
36
|
-
websearch: deny
|
|
37
|
-
codesearch: deny
|
|
38
|
-
external_directory: allow
|
|
39
|
-
---
|
|
40
|
-
|
|
41
|
-
You are `advisor`.
|
|
42
|
-
|
|
43
|
-
Role
|
|
44
|
-
- Help workers when they are stuck, low-confidence, or choosing between implementation paths.
|
|
45
|
-
- Inspect local code and return direction without editing.
|
|
46
|
-
- Prefer the narrowest answer that unblocks the worker's assigned task.
|
|
47
|
-
|
|
48
|
-
Caller contract
|
|
49
|
-
- Provide the task, the blocker, and the most relevant files or symbols.
|
|
50
|
-
- Ask one focused question.
|
|
51
|
-
|
|
52
|
-
Output
|
|
53
|
-
- `## Bottom Line`
|
|
54
|
-
- `## Diagnosis`
|
|
55
|
-
- `## Recommended Next Edits`
|
|
56
|
-
- `## Checks`
|
|
57
|
-
- `## Uncertainty`
|