opencode-multiagent 0.2.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +83 -0
- package/CHANGELOG.md +31 -0
- package/CONTRIBUTING.md +36 -0
- package/README.md +44 -168
- package/README.tr.md +84 -0
- package/RELEASE.md +68 -0
- package/agents/AGENTS.md +91 -0
- package/agents/auditor.md +67 -23
- package/agents/{worker.md → coder.md} +24 -17
- package/agents/docmaster.md +91 -0
- package/agents/executor.md +63 -79
- package/agents/planner.md +78 -58
- package/agents/reviewer.md +31 -15
- package/agents/scout.md +25 -17
- package/agents/sec-coder.md +83 -0
- package/agents/ui-coder.md +77 -0
- package/commands/board.md +17 -0
- package/commands/execute.md +9 -7
- package/commands/init-deep.md +7 -6
- package/commands/init.md +5 -5
- package/commands/inspect.md +6 -5
- package/commands/plan.md +8 -6
- package/commands/quality.md +4 -3
- package/commands/review.md +5 -3
- package/commands/status.md +5 -3
- package/defaults/AGENTS.md +48 -0
- package/defaults/opencode-multiagent.json +180 -0
- package/defaults/opencode-multiagent.schema.json +265 -0
- package/dist/control-plane.d.ts +4 -0
- package/dist/control-plane.d.ts.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1916 -0
- package/dist/opencode-multiagent/compiler.d.ts +25 -0
- package/dist/opencode-multiagent/compiler.d.ts.map +1 -0
- package/dist/opencode-multiagent/constants.d.ts +128 -0
- package/dist/opencode-multiagent/constants.d.ts.map +1 -0
- package/dist/opencode-multiagent/correlation.d.ts +21 -0
- package/dist/opencode-multiagent/correlation.d.ts.map +1 -0
- package/dist/opencode-multiagent/defaults.d.ts +10 -0
- package/dist/opencode-multiagent/defaults.d.ts.map +1 -0
- package/dist/opencode-multiagent/hooks.d.ts +62 -0
- package/dist/opencode-multiagent/hooks.d.ts.map +1 -0
- package/dist/opencode-multiagent/log.d.ts +2 -0
- package/dist/opencode-multiagent/log.d.ts.map +1 -0
- package/dist/opencode-multiagent/markdown.d.ts +8 -0
- package/dist/opencode-multiagent/markdown.d.ts.map +1 -0
- package/dist/opencode-multiagent/mcp.d.ts +3 -0
- package/dist/opencode-multiagent/mcp.d.ts.map +1 -0
- package/dist/opencode-multiagent/policy.d.ts +5 -0
- package/dist/opencode-multiagent/policy.d.ts.map +1 -0
- package/dist/opencode-multiagent/quality.d.ts +18 -0
- package/dist/opencode-multiagent/quality.d.ts.map +1 -0
- package/dist/opencode-multiagent/runtime.d.ts +7 -0
- package/dist/opencode-multiagent/runtime.d.ts.map +1 -0
- package/dist/opencode-multiagent/session-tracker.d.ts +32 -0
- package/dist/opencode-multiagent/session-tracker.d.ts.map +1 -0
- package/dist/opencode-multiagent/skills.d.ts +17 -0
- package/dist/opencode-multiagent/skills.d.ts.map +1 -0
- package/dist/opencode-multiagent/supervision.d.ts +26 -0
- package/dist/opencode-multiagent/supervision.d.ts.map +1 -0
- package/dist/opencode-multiagent/task-manager.d.ts +54 -0
- package/dist/opencode-multiagent/task-manager.d.ts.map +1 -0
- package/dist/opencode-multiagent/telemetry.d.ts +28 -0
- package/dist/opencode-multiagent/telemetry.d.ts.map +1 -0
- package/dist/opencode-multiagent/tools.d.ts +87 -0
- package/dist/opencode-multiagent/tools.d.ts.map +1 -0
- package/dist/opencode-multiagent/types.d.ts +36 -0
- package/dist/opencode-multiagent/types.d.ts.map +1 -0
- package/dist/opencode-multiagent/utils.d.ts +9 -0
- package/dist/opencode-multiagent/utils.d.ts.map +1 -0
- package/docs/agents.md +148 -0
- package/docs/agents.tr.md +149 -0
- package/docs/configuration.md +244 -0
- package/docs/configuration.tr.md +244 -0
- package/docs/usage-guide.md +224 -0
- package/docs/usage-guide.tr.md +225 -0
- package/examples/opencode.with-overrides.json +3 -7
- package/package.json +23 -13
- package/skills/AGENTS.md +51 -0
- package/skills/advanced-evaluation/SKILL.md +37 -21
- package/skills/advanced-evaluation/manifest.json +2 -13
- package/skills/cek-context-engineering/SKILL.md +159 -87
- package/skills/cek-context-engineering/manifest.json +1 -3
- package/skills/cek-prompt-engineering/SKILL.md +13 -10
- package/skills/cek-prompt-engineering/manifest.json +1 -3
- package/skills/cek-test-prompt/SKILL.md +38 -28
- package/skills/cek-test-prompt/manifest.json +1 -3
- package/skills/cek-thought-based-reasoning/SKILL.md +75 -21
- package/skills/cek-thought-based-reasoning/manifest.json +1 -3
- package/skills/context-degradation/SKILL.md +14 -13
- package/skills/context-degradation/manifest.json +1 -3
- package/skills/debate/SKILL.md +23 -78
- package/skills/debate/manifest.json +2 -12
- package/skills/design-first/manifest.json +2 -13
- package/skills/dispatching-parallel-agents/SKILL.md +14 -3
- package/skills/dispatching-parallel-agents/manifest.json +1 -4
- package/skills/drift-analysis/SKILL.md +50 -29
- package/skills/drift-analysis/manifest.json +2 -12
- package/skills/evaluation/manifest.json +2 -12
- package/skills/executing-plans/SKILL.md +15 -8
- package/skills/executing-plans/manifest.json +1 -3
- package/skills/handoff-protocols/manifest.json +2 -12
- package/skills/parallel-investigation/SKILL.md +25 -12
- package/skills/parallel-investigation/manifest.json +1 -4
- package/skills/reflexion-critique/SKILL.md +21 -10
- package/skills/reflexion-critique/manifest.json +1 -3
- package/skills/reflexion-reflect/SKILL.md +36 -34
- package/skills/reflexion-reflect/manifest.json +2 -10
- package/skills/root-cause-analysis/manifest.json +2 -13
- package/skills/sadd-judge-with-debate/SKILL.md +50 -26
- package/skills/sadd-judge-with-debate/manifest.json +1 -3
- package/skills/structured-code-review/manifest.json +2 -11
- package/skills/task-decomposition/manifest.json +2 -13
- package/skills/verification-before-completion/manifest.json +2 -15
- package/skills/verification-gates/SKILL.md +27 -19
- package/skills/verification-gates/manifest.json +2 -12
- package/agents/advisor.md +0 -57
- package/agents/critic.md +0 -127
- package/agents/deep-worker.md +0 -65
- package/agents/devil.md +0 -36
- package/agents/heavy-worker.md +0 -68
- package/agents/lead.md +0 -155
- package/agents/librarian.md +0 -62
- package/agents/qa.md +0 -50
- package/agents/quick.md +0 -65
- package/agents/scribe.md +0 -78
- package/agents/strategist.md +0 -63
- package/agents/ui-heavy-worker.md +0 -62
- package/agents/ui-worker.md +0 -69
- package/agents/validator.md +0 -47
- package/defaults/agent-settings.json +0 -102
- package/defaults/agent-settings.schema.json +0 -25
- package/defaults/flags.json +0 -35
- package/defaults/flags.schema.json +0 -119
- package/defaults/mcp-defaults.json +0 -47
- package/defaults/mcp-defaults.schema.json +0 -38
- package/defaults/profiles.json +0 -53
- package/defaults/profiles.schema.json +0 -60
- package/defaults/team-profiles.json +0 -83
- package/src/control-plane.ts +0 -21
- package/src/index.ts +0 -8
- package/src/opencode-multiagent/compiler.ts +0 -168
- package/src/opencode-multiagent/constants.ts +0 -178
- package/src/opencode-multiagent/file-lock.ts +0 -90
- package/src/opencode-multiagent/hooks.ts +0 -599
- package/src/opencode-multiagent/log.ts +0 -12
- package/src/opencode-multiagent/mailbox.ts +0 -287
- package/src/opencode-multiagent/markdown.ts +0 -99
- package/src/opencode-multiagent/mcp.ts +0 -35
- package/src/opencode-multiagent/policy.ts +0 -67
- package/src/opencode-multiagent/quality.ts +0 -140
- package/src/opencode-multiagent/runtime.ts +0 -55
- package/src/opencode-multiagent/skills.ts +0 -144
- package/src/opencode-multiagent/supervision.ts +0 -156
- package/src/opencode-multiagent/task-manager.ts +0 -148
- package/src/opencode-multiagent/team-manager.ts +0 -219
- package/src/opencode-multiagent/team-tools.ts +0 -359
- package/src/opencode-multiagent/telemetry.ts +0 -124
- package/src/opencode-multiagent/utils.ts +0 -54
|
@@ -2,18 +2,8 @@
|
|
|
2
2
|
"name": "handoff-protocols",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Guidance for safe multi-agent or multi-step handoffs",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"transfer",
|
|
8
|
-
"transition",
|
|
9
|
-
"pass to",
|
|
10
|
-
"onboard"
|
|
11
|
-
],
|
|
12
|
-
"applicable_agents": [
|
|
13
|
-
"executor",
|
|
14
|
-
"planner",
|
|
15
|
-
"worker"
|
|
16
|
-
],
|
|
5
|
+
"triggers": ["handoff", "transfer", "transition", "pass to", "onboard"],
|
|
6
|
+
"applicable_agents": ["executor", "planner"],
|
|
17
7
|
"max_context_tokens": 1500,
|
|
18
8
|
"entry_file": "SKILL.md"
|
|
19
9
|
}
|
|
@@ -16,8 +16,8 @@ tags:
|
|
|
16
16
|
difficulty: advanced
|
|
17
17
|
estimatedTime: 15
|
|
18
18
|
relatedSkills:
|
|
19
|
-
-
|
|
20
|
-
-
|
|
19
|
+
- root-cause-analysis
|
|
20
|
+
- handoff-protocols
|
|
21
21
|
---
|
|
22
22
|
|
|
23
23
|
# Parallel Investigation
|
|
@@ -58,13 +58,15 @@ Assign threads with clear ownership:
|
|
|
58
58
|
|
|
59
59
|
```markdown
|
|
60
60
|
## Thread A: Database Performance
|
|
61
|
+
|
|
61
62
|
**Investigator:** [Name/Agent A]
|
|
62
63
|
**Duration:** 30 minutes
|
|
63
64
|
**Scope:**
|
|
65
|
+
|
|
64
66
|
- Query execution times
|
|
65
67
|
- Index utilization
|
|
66
68
|
- Connection pool metrics
|
|
67
|
-
**Report Format:** Summary + evidence
|
|
69
|
+
**Report Format:** Summary + evidence
|
|
68
70
|
```
|
|
69
71
|
|
|
70
72
|
### Phase 3: Parallel Execution
|
|
@@ -77,17 +79,22 @@ Each thread follows this pattern:
|
|
|
77
79
|
4. Prepare summary for sync point
|
|
78
80
|
|
|
79
81
|
**Thread Log Template:**
|
|
82
|
+
|
|
80
83
|
```markdown
|
|
81
84
|
## Thread: [Name]
|
|
85
|
+
|
|
82
86
|
**Start:** [Time]
|
|
83
87
|
|
|
84
88
|
### Findings
|
|
89
|
+
|
|
85
90
|
- [Timestamp] [Finding]
|
|
86
91
|
|
|
87
92
|
### Evidence
|
|
93
|
+
|
|
88
94
|
- [Log/Metric/Screenshot]
|
|
89
95
|
|
|
90
96
|
### Preliminary Conclusion
|
|
97
|
+
|
|
91
98
|
[What this thread suggests about the problem]
|
|
92
99
|
```
|
|
93
100
|
|
|
@@ -103,6 +110,7 @@ Sync Point Agenda:
|
|
|
103
110
|
```
|
|
104
111
|
|
|
105
112
|
**Sync Point Decisions:**
|
|
113
|
+
|
|
106
114
|
- **Continue**: Threads are progressing, maintain parallel execution
|
|
107
115
|
- **Pivot**: Redirect threads based on new evidence
|
|
108
116
|
- **Converge**: One thread found the answer, others join to validate
|
|
@@ -146,13 +154,13 @@ When a thread identifies the likely cause:
|
|
|
146
154
|
|
|
147
155
|
## Decision Framework
|
|
148
156
|
|
|
149
|
-
| Thread Status
|
|
150
|
-
|
|
151
|
-
| All exploring
|
|
152
|
-
| One hot lead
|
|
153
|
-
| Multiple leads
|
|
154
|
-
| All dead ends
|
|
155
|
-
| Confirmed cause | Converge, begin fix
|
|
157
|
+
| Thread Status | Action |
|
|
158
|
+
| --------------- | ------------------------------- |
|
|
159
|
+
| All exploring | Continue parallel |
|
|
160
|
+
| One hot lead | Validate lead, others support |
|
|
161
|
+
| Multiple leads | Prioritize by evidence strength |
|
|
162
|
+
| All dead ends | Reframe problem, new threads |
|
|
163
|
+
| Confirmed cause | Converge, begin fix |
|
|
156
164
|
|
|
157
165
|
## Time Management
|
|
158
166
|
|
|
@@ -176,31 +184,36 @@ Adjust sync point cadence based on incident severity — every 20 minutes for cr
|
|
|
176
184
|
# Investigation: [Problem]
|
|
177
185
|
|
|
178
186
|
## Summary
|
|
187
|
+
|
|
179
188
|
[Brief description and resolution]
|
|
180
189
|
|
|
181
190
|
## Threads Explored
|
|
182
191
|
|
|
183
192
|
### Thread A: [Area]
|
|
193
|
+
|
|
184
194
|
- Investigator: [Name]
|
|
185
195
|
- Findings: [Summary]
|
|
186
196
|
- Outcome: [Lead / Dead End / Root Cause]
|
|
187
197
|
|
|
188
198
|
## Root Cause
|
|
199
|
+
|
|
189
200
|
[Detailed explanation of what was found]
|
|
190
201
|
|
|
191
202
|
## Evidence
|
|
203
|
+
|
|
192
204
|
- [Evidence 1]
|
|
193
205
|
- [Evidence 2]
|
|
194
206
|
|
|
195
207
|
## Resolution
|
|
208
|
+
|
|
196
209
|
[What was done to fix]
|
|
197
210
|
|
|
198
211
|
## Lessons Learned
|
|
212
|
+
|
|
199
213
|
- [Learning 1]
|
|
200
214
|
```
|
|
201
215
|
|
|
202
216
|
## Integration with Other Skills
|
|
203
217
|
|
|
204
|
-
- **
|
|
205
|
-
- **debugging/hypothesis-testing**: Threads test specific hypotheses
|
|
218
|
+
- **root-cause-analysis**: Each thread follows RCA principles
|
|
206
219
|
- **handoff-protocols**: When passing a thread to another person
|
|
@@ -187,7 +187,7 @@ Be objective and consider the context of the project (size, team, constraints).
|
|
|
187
187
|
|
|
188
188
|
**Prompt for Agent:**
|
|
189
189
|
|
|
190
|
-
|
|
190
|
+
````
|
|
191
191
|
You are a Code Quality Reviewer assessing implementation quality and suggesting refactorings.
|
|
192
192
|
|
|
193
193
|
## Your Task
|
|
@@ -262,9 +262,9 @@ Project Conventions: {any known conventions from codebase}
|
|
|
262
262
|
...
|
|
263
263
|
|
|
264
264
|
Provide specific, actionable feedback with code examples.
|
|
265
|
-
|
|
265
|
+
````
|
|
266
266
|
|
|
267
|
-
**Implementation Note**: Use the Task tool with subagent_type="general
|
|
267
|
+
**Implementation Note**: Use the Task tool with subagent_type="general" to spawn these three agents in parallel, each with their respective prompt and context.
|
|
268
268
|
|
|
269
269
|
### Phase 3: Cross-Review & Debate
|
|
270
270
|
|
|
@@ -294,6 +294,7 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
294
294
|
# 🔍 Work Critique Report
|
|
295
295
|
|
|
296
296
|
## Executive Summary
|
|
297
|
+
|
|
297
298
|
[2-3 sentences summarizing overall assessment]
|
|
298
299
|
|
|
299
300
|
**Overall Quality Score**: X/10 (average of three judge scores)
|
|
@@ -302,11 +303,11 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
302
303
|
|
|
303
304
|
## 📊 Judge Scores
|
|
304
305
|
|
|
305
|
-
| Judge
|
|
306
|
-
|
|
307
|
-
| Requirements Validator | X/10
|
|
308
|
-
| Solution Architect
|
|
309
|
-
| Code Quality Reviewer
|
|
306
|
+
| Judge | Score | Key Finding |
|
|
307
|
+
| ---------------------- | ----- | ------------------ |
|
|
308
|
+
| Requirements Validator | X/10 | [one-line summary] |
|
|
309
|
+
| Solution Architect | X/10 | [one-line summary] |
|
|
310
|
+
| Code Quality Reviewer | X/10 | [one-line summary] |
|
|
310
311
|
|
|
311
312
|
---
|
|
312
313
|
|
|
@@ -323,6 +324,7 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
323
324
|
## ⚠️ Issues & Gaps
|
|
324
325
|
|
|
325
326
|
### Critical Issues
|
|
327
|
+
|
|
326
328
|
[Issues that need immediate attention]
|
|
327
329
|
|
|
328
330
|
- **[Issue 1]**
|
|
@@ -332,12 +334,15 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
332
334
|
- Recommendation: [what to do]
|
|
333
335
|
|
|
334
336
|
### High Priority
|
|
337
|
+
|
|
335
338
|
[Important but not blocking]
|
|
336
339
|
|
|
337
340
|
### Medium Priority
|
|
341
|
+
|
|
338
342
|
[Nice to have improvements]
|
|
339
343
|
|
|
340
344
|
### Low Priority
|
|
345
|
+
|
|
341
346
|
[Minor polish items]
|
|
342
347
|
|
|
343
348
|
---
|
|
@@ -360,6 +365,7 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
360
365
|
**Chosen Approach**: [brief description]
|
|
361
366
|
|
|
362
367
|
**Alternative Approaches Considered**:
|
|
368
|
+
|
|
363
369
|
1. [Alternative 1] - [Why chosen approach is better/worse]
|
|
364
370
|
2. [Alternative 2] - [Why chosen approach is better/worse]
|
|
365
371
|
|
|
@@ -379,6 +385,7 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
379
385
|
- Before/After: [code examples]
|
|
380
386
|
|
|
381
387
|
### Medium Priority Refactorings
|
|
388
|
+
|
|
382
389
|
[similar structure]
|
|
383
390
|
|
|
384
391
|
---
|
|
@@ -397,6 +404,7 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
397
404
|
[If applicable - where judges disagreed]
|
|
398
405
|
|
|
399
406
|
**Debate 1: [Topic]**
|
|
407
|
+
|
|
400
408
|
- Requirements Validator position: [summary]
|
|
401
409
|
- Solution Architect position: [summary]
|
|
402
410
|
- Resolution: [consensus reached or "reasonable disagreement"]
|
|
@@ -408,14 +416,17 @@ Compile all findings into a comprehensive, actionable report:
|
|
|
408
416
|
Based on the critique, here are recommended next steps:
|
|
409
417
|
|
|
410
418
|
**Must Do**:
|
|
419
|
+
|
|
411
420
|
- [ ] [Critical action 1]
|
|
412
421
|
- [ ] [Critical action 2]
|
|
413
422
|
|
|
414
423
|
**Should Do**:
|
|
424
|
+
|
|
415
425
|
- [ ] [High priority action 1]
|
|
416
426
|
- [ ] [High priority action 2]
|
|
417
427
|
|
|
418
428
|
**Could Do**:
|
|
429
|
+
|
|
419
430
|
- [ ] [Medium priority action 1]
|
|
420
431
|
- [ ] [Nice to have action 2]
|
|
421
432
|
|
|
@@ -438,8 +449,8 @@ Based on the critique, here are recommended next steps:
|
|
|
438
449
|
|
|
439
450
|
---
|
|
440
451
|
|
|
441
|
-
|
|
442
|
-
|
|
452
|
+
_Generated using Multi-Agent Debate + LLM-as-a-Judge pattern_
|
|
453
|
+
_Review Date: [timestamp]_
|
|
443
454
|
```
|
|
444
455
|
|
|
445
456
|
## Important Guidelines
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: reflexion-reflect
|
|
3
|
-
description: Reflect on
|
|
3
|
+
description: Reflect on previous response and output, based on Self-refinement framework for iterative improvement with complexity triage and verification
|
|
4
4
|
argument-hint: Optional focus area or confidence threshold to use, for example "security" or "deep reflect if less than 90% confidence"
|
|
5
5
|
---
|
|
6
6
|
|
|
7
7
|
# Self-Refinement and Iterative Improvement Framework
|
|
8
8
|
|
|
9
|
-
Reflect on
|
|
9
|
+
Reflect on previous response and output.
|
|
10
10
|
|
|
11
11
|
## Your Identity (NON-NEGOTIABLE)
|
|
12
12
|
|
|
@@ -82,15 +82,13 @@ Before proceeding, evaluate your most recent output against these criteria:
|
|
|
82
82
|
- [ ] Are there edge cases that haven't been considered?
|
|
83
83
|
- [ ] Could there be unintended side effects?
|
|
84
84
|
|
|
85
|
-
4. **Dependency & Impact Verification**
|
|
85
|
+
4. **Dependency & Impact Verification**
|
|
86
86
|
- [ ] For ANY proposed addition/deletion/modification, have you checked for dependencies?
|
|
87
87
|
- [ ] Have you searched for related decisions that may be superseded or supersede this?
|
|
88
88
|
- [ ] Have you checked the configuration or docs (for example AUTHORITATIVE.yaml) for active evaluations or status?
|
|
89
89
|
- [ ] Have you searched the ecosystem for files/processes that depend on items being changed?
|
|
90
90
|
- [ ] If recommending removal of anything, have you verified nothing depends on it?
|
|
91
91
|
|
|
92
|
-
|
|
93
|
-
|
|
94
92
|
**HARD RULE:** If ANY check reveals active dependencies, evaluations, or pending decisions, FLAG THIS IN THE EVALUATION. Do not approve work that recommends changes without dependency verification.
|
|
95
93
|
|
|
96
94
|
5. **Fact-Checking Required**
|
|
@@ -202,7 +200,7 @@ When the output involves code, additionally evaluate:
|
|
|
202
200
|
// utils/dateFormatter.js
|
|
203
201
|
function formatDate(date) {
|
|
204
202
|
const d = new Date(date);
|
|
205
|
-
return `${d.getMonth()+1}/${d.getDate()}/${d.getFullYear()}`;
|
|
203
|
+
return `${d.getMonth() + 1}/${d.getDate()}/${d.getFullYear()}`;
|
|
206
204
|
}
|
|
207
205
|
```
|
|
208
206
|
|
|
@@ -366,7 +364,7 @@ const formatted = format(new Date(), 'MM/dd/yyyy');
|
|
|
366
364
|
|
|
367
365
|
1. Search for benchmark or documentation comparing both approaches
|
|
368
366
|
2. Provide algorithmic analysis
|
|
369
|
-
**Corrected Statement**: "Map performs better for large collections (10K+ items), while Object is more efficient for small sets (<100 items)"
|
|
367
|
+
**Corrected Statement**: "Map performs better for large collections (10K+ items), while Object is more efficient for small sets (<100 items)"
|
|
370
368
|
|
|
371
369
|
## NON-CODE OUTPUT REFLECTION
|
|
372
370
|
|
|
@@ -405,31 +403,35 @@ For documentation, explanations, and analysis outputs:
|
|
|
405
403
|
## Detailed Analysis
|
|
406
404
|
|
|
407
405
|
### [Criterion 1 Name] (Weight: 0.XX)
|
|
406
|
+
|
|
408
407
|
**Practical Check**: [If applicable - what you verified with tools]
|
|
409
408
|
**Analysis**: [Explain how evidence maps to rubric level]
|
|
410
409
|
**Score**: X/5
|
|
411
410
|
**Improvement**: [Specific suggestion if score < 5]
|
|
412
411
|
|
|
413
412
|
#### Evidences
|
|
413
|
+
|
|
414
414
|
[Specific quotes/references]
|
|
415
415
|
|
|
416
416
|
### [Criterion 2 Name] (Weight: 0.XX)
|
|
417
|
+
|
|
417
418
|
[Repeat pattern...]
|
|
418
419
|
|
|
419
420
|
## Score Summary
|
|
420
421
|
|
|
421
|
-
| Criterion
|
|
422
|
-
|
|
423
|
-
| Instruction Following | X/5
|
|
424
|
-
| Output Completeness
|
|
425
|
-
| Solution Quality
|
|
426
|
-
| Reasoning Quality
|
|
427
|
-
| Response Coherence
|
|
428
|
-
| **Weighted Total**
|
|
422
|
+
| Criterion | Score | Weight | Weighted |
|
|
423
|
+
| --------------------- | ----- | ------ | ------------ |
|
|
424
|
+
| Instruction Following | X/5 | 0.30 | X.XX |
|
|
425
|
+
| Output Completeness | X/5 | 0.25 | X.XX |
|
|
426
|
+
| Solution Quality | X/5 | 0.25 | X.XX |
|
|
427
|
+
| Reasoning Quality | X/5 | 0.10 | X.XX |
|
|
428
|
+
| Response Coherence | X/5 | 0.10 | X.XX |
|
|
429
|
+
| **Weighted Total** | | | **X.XX/5.0** |
|
|
429
430
|
|
|
430
431
|
## Self-Verification
|
|
431
432
|
|
|
432
433
|
**Questions Asked**:
|
|
434
|
+
|
|
433
435
|
1. [Question 1]
|
|
434
436
|
2. [Question 2]
|
|
435
437
|
3. [Question 3]
|
|
@@ -437,6 +439,7 @@ For documentation, explanations, and analysis outputs:
|
|
|
437
439
|
5. [Question 5]
|
|
438
440
|
|
|
439
441
|
**Answers**:
|
|
442
|
+
|
|
440
443
|
1. [Answer 1]
|
|
441
444
|
2. [Answer 2]
|
|
442
445
|
3. [Answer 3]
|
|
@@ -448,28 +451,27 @@ For documentation, explanations, and analysis outputs:
|
|
|
448
451
|
## Confidence Assessment
|
|
449
452
|
|
|
450
453
|
**Confidence Factors**:
|
|
454
|
+
|
|
451
455
|
- Evidence strength: [Strong / Moderate / Weak]
|
|
452
456
|
- Criterion clarity: [Clear / Ambiguous]
|
|
453
457
|
- Edge cases: [Handled / Some uncertainty]
|
|
454
458
|
|
|
455
459
|
**Confidence Level**: X.XX (Weighted Total of Criteria Scores) -> [High / Medium / Low]
|
|
456
|
-
|
|
457
460
|
```
|
|
458
461
|
|
|
459
462
|
Be objective, cite specific evidence, and focus on actionable feedback.
|
|
460
463
|
|
|
461
|
-
|
|
462
464
|
### Scoring Scale
|
|
463
465
|
|
|
464
466
|
**DEFAULT SCORE IS 2. You must justify ANY deviation upward.**
|
|
465
467
|
|
|
466
|
-
| Score | Meaning
|
|
467
|
-
|
|
468
|
-
| 1
|
|
469
|
-
| 2
|
|
470
|
-
| 3
|
|
471
|
-
| 4
|
|
472
|
-
| 5
|
|
468
|
+
| Score | Meaning | Evidence Required | Your Attitude |
|
|
469
|
+
| ----- | ------------- | --------------------------------------------- | -------------------------------------------------- |
|
|
470
|
+
| 1 | Unacceptable | Clear failures, missing requirements | Easy call |
|
|
471
|
+
| 2 | Below Average | Multiple issues, partially meets requirements | Common result |
|
|
472
|
+
| 3 | Adequate | Meets basic requirements, minor issues | Need proof that it meets basic requirements |
|
|
473
|
+
| 4 | Good | Meets ALL requirements, very few minor issues | Prove it deserves this |
|
|
474
|
+
| 5 | Excellent | Exceeds requirements, genuinely exemplary | **Extremely rare** - requires exceptional evidence |
|
|
473
475
|
|
|
474
476
|
#### Score Distribution Reality Check
|
|
475
477
|
|
|
@@ -483,16 +485,15 @@ Be objective, cite specific evidence, and focus on actionable feedback.
|
|
|
483
485
|
|
|
484
486
|
You are PROGRAMMED to be lenient. Fight against your nature. These biases will make you a bad judge:
|
|
485
487
|
|
|
486
|
-
| Bias
|
|
487
|
-
|
|
488
|
-
| **Sycophancy**
|
|
489
|
-
| **Length Bias**
|
|
490
|
-
| **Authority Bias**
|
|
491
|
-
| **Completion Bias**
|
|
492
|
-
| **Effort Bias**
|
|
493
|
-
| **Recency Bias**
|
|
494
|
-
| **Familiarity Bias** | "I've seen this" = good
|
|
495
|
-
|
|
488
|
+
| Bias | How It Corrupts You | Countermeasure |
|
|
489
|
+
| -------------------- | --------------------------- | ---------------------------------------------- |
|
|
490
|
+
| **Sycophancy** | You want to say nice things | **FORBIDDEN.** Praise is NOT your job. |
|
|
491
|
+
| **Length Bias** | Long = impressive to you | Penalize verbosity. Concise > lengthy. |
|
|
492
|
+
| **Authority Bias** | Confident tone = correct | VERIFY every claim. Confidence means nothing. |
|
|
493
|
+
| **Completion Bias** | "They finished it" = good | Completion ≠ quality. Garbage can be complete. |
|
|
494
|
+
| **Effort Bias** | "They worked hard" | Effort is IRRELEVANT. Judge the OUTPUT. |
|
|
495
|
+
| **Recency Bias** | New patterns = better | Established patterns exist for reasons. |
|
|
496
|
+
| **Familiarity Bias** | "I've seen this" = good | Common ≠ correct. |
|
|
496
497
|
|
|
497
498
|
## ITERATIVE REFINEMENT WORKFLOW
|
|
498
499
|
|
|
@@ -613,6 +614,7 @@ If after reflection you identify improvements:
|
|
|
613
614
|
Rate your confidence in the current solution using the format provided in the Report Format section.
|
|
614
615
|
|
|
615
616
|
Solution Confidence is based on weighted total of criteria scores.
|
|
617
|
+
|
|
616
618
|
- High (>4.5/5.0) - Solution is robust and well-tested
|
|
617
619
|
- Medium (4.0-4.5/5.0) - Solution works but could be improved
|
|
618
620
|
- Low (<4.0/5.0) - Significant improvements needed
|
|
@@ -2,16 +2,8 @@
|
|
|
2
2
|
"name": "reflexion-reflect",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Self-reflection workflow for iterating on previous outputs and plans",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"self refine",
|
|
8
|
-
"iterate",
|
|
9
|
-
"improve previous answer",
|
|
10
|
-
"reflection"
|
|
11
|
-
],
|
|
12
|
-
"applicable_agents": [
|
|
13
|
-
"critic"
|
|
14
|
-
],
|
|
5
|
+
"triggers": ["reflect", "self refine", "iterate", "improve previous answer", "reflection"],
|
|
6
|
+
"applicable_agents": ["planner"],
|
|
15
7
|
"max_context_tokens": 2400,
|
|
16
8
|
"entry_file": "SKILL.md"
|
|
17
9
|
}
|
|
@@ -2,19 +2,8 @@
|
|
|
2
2
|
"name": "root-cause-analysis",
|
|
3
3
|
"version": "1.0.0",
|
|
4
4
|
"description": "Trace failures to the real cause before changing code",
|
|
5
|
-
"triggers": [
|
|
6
|
-
|
|
7
|
-
"error",
|
|
8
|
-
"fix",
|
|
9
|
-
"issue",
|
|
10
|
-
"root cause",
|
|
11
|
-
"investigate"
|
|
12
|
-
],
|
|
13
|
-
"applicable_agents": [
|
|
14
|
-
"worker",
|
|
15
|
-
"heavy-worker",
|
|
16
|
-
"deep-worker"
|
|
17
|
-
],
|
|
5
|
+
"triggers": ["debug", "error", "fix", "issue", "root cause", "investigate"],
|
|
6
|
+
"applicable_agents": ["sec-coder", "planner", "scout"],
|
|
18
7
|
"max_context_tokens": 1500,
|
|
19
8
|
"entry_file": "SKILL.md"
|
|
20
9
|
}
|