xp-gate 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/adapter-common.sh +192 -0
  2. package/adapters/cpp.sh +76 -0
  3. package/adapters/dart.sh +41 -0
  4. package/adapters/flutter.sh +41 -0
  5. package/adapters/go.sh +59 -0
  6. package/adapters/iac.sh +189 -0
  7. package/adapters/java.sh +191 -0
  8. package/adapters/kotlin.sh +77 -0
  9. package/adapters/objectivec.sh +38 -0
  10. package/adapters/powershell.sh +138 -0
  11. package/adapters/python.sh +104 -0
  12. package/adapters/shell.sh +55 -0
  13. package/adapters/swift.sh +44 -0
  14. package/adapters/typescript.sh +61 -0
  15. package/bin/xp-gate.js +157 -0
  16. package/hooks/adapter-common.sh +192 -0
  17. package/hooks/pre-commit +1667 -0
  18. package/hooks/pre-push +395 -0
  19. package/lib/__tests__/detect-deps.test.js +209 -0
  20. package/lib/__tests__/doctor.test.js +448 -0
  21. package/lib/__tests__/download-skill.test.js +281 -0
  22. package/lib/__tests__/init.test.js +327 -0
  23. package/lib/__tests__/install-skill.test.js +326 -0
  24. package/lib/__tests__/migrate.test.js +212 -0
  25. package/lib/__tests__/rollback.test.js +183 -0
  26. package/lib/__tests__/ui-detector.test.ts +200 -0
  27. package/lib/__tests__/uninstall-skill.test.js +189 -0
  28. package/lib/__tests__/uninstall.test.js +589 -0
  29. package/lib/__tests__/update-skill.test.js +276 -0
  30. package/lib/detect-deps.js +157 -0
  31. package/lib/doctor.js +370 -0
  32. package/lib/download-skill.js +96 -0
  33. package/lib/init.js +367 -0
  34. package/lib/install-skill.js +184 -0
  35. package/lib/migrate.js +120 -0
  36. package/lib/rollback.js +78 -0
  37. package/lib/ui-detector.ts +99 -0
  38. package/lib/uninstall-skill.js +69 -0
  39. package/lib/uninstall.js +401 -0
  40. package/lib/update-skill.js +90 -0
  41. package/package.json +39 -0
  42. package/plugins/claude-code/.claude-plugin/plugin.json +21 -0
  43. package/plugins/claude-code/bin/delphi-review-guard.sh +68 -0
  44. package/plugins/claude-code/bin/xp-gate-check +47 -0
  45. package/plugins/claude-code/hooks/hooks.json +37 -0
  46. package/skills/delphi-review/.delphi-config.json.example +45 -0
  47. package/skills/delphi-review/AGENTS.md +54 -0
  48. package/skills/delphi-review/INSTALL.md +152 -0
  49. package/skills/delphi-review/SKILL.md +371 -0
  50. package/skills/delphi-review/evals/evals.json +82 -0
  51. package/skills/delphi-review/opencode.json.delphi.example +56 -0
  52. package/skills/delphi-review/references/code-walkthrough.md +486 -0
  53. package/skills/ralph-loop/SKILL.md +330 -0
  54. package/skills/ralph-loop/evals/evals.json +311 -0
  55. package/skills/ralph-loop/evolution-history.json +59 -0
  56. package/skills/ralph-loop/evolution-log.md +16 -0
  57. package/skills/ralph-loop/references/components/memory.md +55 -0
  58. package/skills/ralph-loop/references/components/middleware.md +54 -0
  59. package/skills/ralph-loop/references/components/skill-invocations.md +39 -0
  60. package/skills/ralph-loop/references/components/system-prompt.md +24 -0
  61. package/skills/ralph-loop/references/components/tool-descriptions.md +32 -0
  62. package/skills/ralph-loop/references/phase-2-build-ralph.md +89 -0
  63. package/skills/ralph-loop/templates/progress-log.md +36 -0
  64. package/skills/sprint-flow/SKILL.md +600 -0
  65. package/skills/sprint-flow/evals/evals.json +78 -0
  66. package/skills/sprint-flow/evolution-history.json +39 -0
  67. package/skills/sprint-flow/evolution-log.md +23 -0
  68. package/skills/sprint-flow/references/components/memory.md +87 -0
  69. package/skills/sprint-flow/references/components/middleware.md +72 -0
  70. package/skills/sprint-flow/references/components/skill-invocations.md +104 -0
  71. package/skills/sprint-flow/references/components/system-prompt.md +27 -0
  72. package/skills/sprint-flow/references/components/tool-descriptions.md +96 -0
  73. package/skills/sprint-flow/references/phase-0-think.md +115 -0
  74. package/skills/sprint-flow/references/phase-1-plan.md +178 -0
  75. package/skills/sprint-flow/references/phase-2-build.md +198 -0
  76. package/skills/sprint-flow/references/phase-3-review.md +213 -0
  77. package/skills/sprint-flow/references/phase-4-uat.md +125 -0
  78. package/skills/sprint-flow/references/phase-5-feedback.md +100 -0
  79. package/skills/sprint-flow/references/phase-6-ship.md +193 -0
  80. package/skills/sprint-flow/references/phase-7-land.md +140 -0
  81. package/skills/sprint-flow/references/phase-8-cleanup.md +192 -0
  82. package/skills/sprint-flow/templates/emergent-issues-template.md +120 -0
  83. package/skills/sprint-flow/templates/pain-document-template.md +115 -0
  84. package/skills/sprint-flow/templates/sprint-summary-template.md +120 -0
  85. package/skills/test-specification-alignment/AGENTS.md +59 -0
  86. package/skills/test-specification-alignment/SKILL.md +605 -0
  87. package/skills/test-specification-alignment/evals/evals.json +75 -0
  88. package/skills/test-specification-alignment/references/alignment-verification-algorithm.md +493 -0
  89. package/skills/test-specification-alignment/references/phase2-constraint-enforcement.md +431 -0
  90. package/skills/test-specification-alignment/references/specification-format.md +348 -0
@@ -0,0 +1,371 @@
1
+ ---
2
+ name: delphi-review
3
+ description: "Delphi consensus review: multi-round anonymous expert review until unanimous APPROVAL. Supports design/code-walkthrough modes. 2-3 experts from different providers. MANDATORY before implementation, design, or architecture decisions. Trigger: 'review this design', '评审这个需求', 'design review', '多专家评审', 'consensus review', 'code walkthrough', 'push review', or any request for multi-expert review of requirements, design docs, architecture, or PRs."
4
+ ---
5
+
6
+ # Delphi Consensus Review
7
+
8
+ ## 核心原则
9
+
10
+ **Delphi 方法只有一个目的:得到所有专家一致认可的可行方案。**
11
+
12
+ ### 四大核心特性(RAND 方法论)
13
+
14
+ 1. **匿名性** — Round 1 专家互不知道对方意见
15
+ 2. **迭代** — 多轮直到共识,不是固定轮数
16
+ 3. **受控反馈** — 每轮看到其他专家意见
17
+ 4. **统计共识** — >=95% 一致才算共识
18
+
19
+ ### 质量优先
20
+
21
+ | 原则 | 说明 |
22
+ |------|------|
23
+ | Token 是投资 | 相比后期修复成本,评审消耗微不足道 |
24
+ | APPROVED 才是终点 | REQUEST_CHANGES 必须修复并重新评审 |
25
+ | 零容忍 | Critical/Major 问题全部必须处理,不可跳过或降级 |
26
+
27
+ 详见:Anti-Patterns 章节。
28
+
29
+ ---
30
+
31
+ ## 评审模式
32
+
33
+ | 模式 | 触发 | 用途 | 输出 |
34
+ |------|------|------|------|
35
+ | `design`(默认) | `/delphi-review` | 需求/设计/架构/PR 评审 | 共识报告 + specification.yaml |
36
+ | `code-walkthrough` | `/delphi-review --mode code-walkthrough` | git push 前代码走查 | `.code-walkthrough-result.json` |
37
+
38
+ **Code Walkthrough 模式**的完整规范已移至 `references/code-walkthrough.md`。当用户使用 `--mode code-walkthrough` 时,读取该文件并执行其中定义的全部流程。
39
+
40
+ ---
41
+
42
+ ## 参数配置
43
+
44
+ ### 专家数量与角色
45
+
46
+ | 配置 | 专家 | 适用场景 |
47
+ |------|------|---------|
48
+ | 2 专家(默认) | A(架构) + B(实现) | 代码变更、小型设计 |
49
+ | 3 专家 | A(架构) + B(实现) + C(可行性仲裁) | 架构决策、需求文档 |
50
+
51
+ > ⚠️ 至少选择 **两家不同 provider** 的模型,避免同源盲点。模型映射见 `INSTALL.md`。
52
+ >
53
+ > ### 模型选择策略(强制)
54
+ >
55
+ > **MUST 使用国产开源模型**,严禁使用国外昂贵模型(Anthropic Claude、OpenAI GPT、Google Gemini 等)。
56
+ >
57
+ > **允许的模型列表**(从以下国产开源模型厂家中选择):
58
+ >
59
+ > | 厂家 | 可选模型 |
60
+ > |------|---------|
61
+ > | 智谱 GLM | `glm-5.1`, `glm-5.0` |
62
+ > | 月之暗面 Kimi | `kimi-k2.6`, `kimi-k2.5` |
63
+ > | MiniMax | `minimax-m2.7`, `minimax-m2.5` |
64
+ > | 阿里 Qwen | `qwen3.6-plus`, `qwen3.5-plus` |
65
+ > | 深度求索 DeepSeek | `deepseek-v4-pro`, `deepseek-v4-lite` |
66
+ >
67
+ > **专家分配建议**:
68
+ >
69
+ > | Expert | 推荐模型 | 备选 |
70
+ > |--------|---------|------|
71
+ > | Expert A (架构) | `deepseek-v4-pro` | `qwen3.6-plus`, `glm-5.1` |
72
+ > | Expert B (技术) | `kimi-k2.6` | `deepseek-v4-pro`, `minimax-m2.7` |
73
+ > | Expert C (可行性) | `qwen3.6-plus` | `kimi-k2.6`, `glm-5.1` |
74
+ >
75
+ > **关键原则**:
76
+ > - ✅ 三个专家必须来自 **至少 2 家不同厂家**(如 DeepSeek + Kimi + Qwen)
77
+ > - ❌ 禁止使用 Anthropic、OpenAI、Google 等国外模型
78
+ > - ❌ 禁止三个专家全部使用同一厂家模型
79
+ > - ⚠️ 成本控制:`deepseek-v4-lite`、`kimi-k2.5`、`qwen3.5-plus` 作为轻量备选
80
+
81
+ ### 共识阈值
82
+
83
+ | 阈值 | 说明 |
84
+ |------|------|
85
+ | **>=95%** | 推荐默认 |
86
+ | 100% | 完全一致(更严格) |
87
+
88
+ ---
89
+
90
+ ## 完整流程
91
+
92
+ ```
93
+ Phase 0: 准备 → Round 1: 匿名独立评审 → 共识检查
94
+
95
+ ├─ 一致 + >=95% + APPROVED → ✅ 完成
96
+
97
+ └─ 不一致 或 <95% 或 REQUEST_CHANGES
98
+
99
+
100
+ Round 2: 交换意见 → 共识检查
101
+
102
+ ├─ 一致 + >=95% + APPROVED → ✅ 完成
103
+
104
+ └─ 仍分歧 或 REQUEST_CHANGES
105
+
106
+
107
+ Round 3: 最终立场 → 共识检查
108
+
109
+ ├─ APPROVED → ✅ 完成
110
+
111
+ └─ REQUEST_CHANGES → 修复方案 → 回到 Round 2 重新评审
112
+ ```
113
+
114
+ ---
115
+
116
+ ## Round 1: 匿名独立评审
117
+
118
+ ### 为什么必须匿名
119
+
120
+ 匿名防止 anchoring bias(锚定偏差)—— 知道其他专家意见后倾向于同意"权威",不敢提出相反观点。
121
+
122
+ ### 执行方式
123
+
124
+ 每位专家独立收到:原始文档 + 评审模板 + "独立评审,不知道其他专家意见"。
125
+
126
+ ### 输出格式
127
+
128
+ ```markdown
129
+ ## 独立评审 - Expert [A/B/C]
130
+ ### 优点
131
+ 1. [具体优点 + 文档位置]
132
+ ### 问题清单
133
+ #### Critical Issues (必须修复才能批准)
134
+ 1. [问题] - 位置: [...] - 修复建议: [...]
135
+ #### Major Concerns (必须处理)
136
+ 1. [...]
137
+ #### Minor Concerns (需要说明)
138
+ 1. [...]
139
+ ### 裁决: [APPROVED / REQUEST_CHANGES / REJECTED]
140
+ ### 置信度: [X/10]
141
+ ### 关键理由
142
+ 1. [...]
143
+ ```
144
+
145
+ ---
146
+
147
+ ## Round 2: 交换意见
148
+
149
+ ### 执行方式
150
+
151
+ 每位专家看到:原始文档 + 其他专家的评审 + "响应其他专家的关切,是否调整立场?"
152
+
153
+ ### 输出格式
154
+
155
+ ```markdown
156
+ ## Round 2 Response - Expert [A/B/C]
157
+ ### 响应其他专家关切
158
+ **Expert [X] 提到: [问题]**
159
+ - 我的立场: [同意/部分同意/不同意] - 理由: [...]
160
+ ### 更新后问题清单 / 裁决 / 置信度 / 立场变化说明
161
+ ```
162
+
163
+ ---
164
+
165
+ ## Round 3: 最终立场(如需要)
166
+
167
+ 触发条件:Round 2 后仍无共识。所有专家提交最终绑定立场。3 专家模式下若仍无完全一致,2/3 或 3/3 多数裁决生效,记录少数派意见。
168
+
169
+ ### 输出格式
170
+
171
+ ```markdown
172
+ ## Round 3 Final Position - Expert [A/B/C]
173
+ ### 最终裁决: [APPROVED / REQUEST_CHANGES / REJECTED]
174
+ ### 最终置信度: [X/10]
175
+ ### 关键理由 + 与其他专家的差异
176
+ ```
177
+
178
+ ---
179
+
180
+ ## 修复与重新评审
181
+
182
+ 如果最终裁决是 REQUEST_CHANGES 或 REJECTED:
183
+ 1. 修复所有 Critical Issues + 处理所有 Major Concerns
184
+ 2. 重新评审(从 Round 2 起步,不是 Round 1)
185
+ 3. 迭代直到 APPROVED
186
+
187
+ 修复报告格式:
188
+ ```markdown
189
+ ## 修复报告
190
+ ### Critical Issues 修复 | ### Major Concerns 处理 | ### Minor Concerns 说明
191
+ ### 请求重新评审
192
+ ```
193
+
194
+ ---
195
+
196
+ ## 终止条件
197
+
198
+ | 参数 | 默认值 | 说明 |
199
+ |------|--------|------|
200
+ | `max_review_rounds` | 5 | 超过后生成"未达成共识报告",交人决策 |
201
+ | `timeout` | 60min | 单次评审超时 |
202
+
203
+ ---
204
+
205
+ ## Output Format (MANDATORY)
206
+ Every review round output MUST follow this exact JSON structure:
207
+
208
+ ```json
209
+ {
210
+ "expert_id": "A|B|C",
211
+ "round": 1,
212
+ "mode": "design|code-walkthrough",
213
+ "verdict": "APPROVED|REQUEST_CHANGES|REJECTED",
214
+ "confidence": 9,
215
+ "critical_issues": ["..."],
216
+ "major_concerns": ["..."],
217
+ "minor_concerns": ["..."],
218
+ "consensus_report": {
219
+ "agreed_items": ["..."],
220
+ "disagreed_items": ["..."],
221
+ "final_verdict": "APPROVED|REQUEST_CHANGES"
222
+ }
223
+ }
224
+ ```
225
+
226
+ **Anti-patterns mapping to assertions:**
227
+ - `Round 1 → 生成报告 → "评审完成"` → Output MUST NOT have `verdict: APPROVED` if `critical_issues` exist.
228
+ - `只处理 Critical,忽略 Major` → Output MUST include `major_concerns` array, even if empty.
229
+ - `用户说"时间紧急"就跳过` → Output MUST include `round` field, proving multi-round process.
230
+
231
+ ---
232
+
233
+ ## Terminal State Checklist
234
+
235
+ <MANDATORY-CHECKLIST>
236
+
237
+ ### 你只能在以下条件全部满足后声明"Delphi review complete":
238
+
239
+ **Pre-requisites:**
240
+ - [ ] Phase 0 完成(文档验证 + 专家分配)
241
+ - [ ] Round 1 完成(所有专家匿名独立评审)
242
+ - [ ] Round 2+ 完成(交换意见 / 最终立场)
243
+
244
+ **CRITICAL — 共识验证:**
245
+ - [ ] 问题共识比例 >=95%
246
+ - [ ] 所有 Critical Issues 已解决
247
+ - [ ] 所有 Major Concerns 已处理
248
+
249
+ **CRITICAL — 裁决检查:**
250
+ - [ ] 最终裁决是 **APPROVED** 或 **APPROVED_WITH_MINOR**
251
+ - [ ] 如果 REQUEST_CHANGES → 已修复 → 已重新评审 → APPROVED
252
+
253
+ **Final Requirements:**
254
+ - [ ] 共识报告生成并保存
255
+ - [ ] 用户已确认报告
256
+ - [ ] ⭐ **IF APPROVED (design mode): 提示用户生成 specification.yaml,如用户同意则调用 specification-generator**
257
+
258
+ **IF 裁决是 REQUEST_CHANGES 或 REJECTED → CANNOT claim complete, MUST 修复并重新评审**
259
+ **IF 任何条件未满足 → CANNOT claim complete, MUST BLOCK 并通知用户**
260
+
261
+ ### ⭐ APPROVED 后必做
262
+
263
+ **Design mode**:
264
+
265
+ 1. **Automatic: 生成 specification.yaml** — 从设计文档提取需求到 specification.yaml。这是 test-specification-alignment 的输入,没有它 test-spec 会进入不推荐的 legacy mode。
266
+
267
+ 2. **必须提示用户** — Delphi review APPROVED 后,agent 必须主动输出以下提示:
268
+
269
+ ```
270
+ ⭐ Delphi review APPROVED 完成!
271
+
272
+ Next Step: 生成 specification.yaml
273
+
274
+ 设计文档已稳定,现在可以生成 specification.yaml 用于后续的 test-specification-alignment 验证。
275
+
276
+ 是否现在生成?
277
+ - 回答 "yes" 或 "generate spec" → 我将调用 specification-generator 自动生成
278
+ - 回答 "no" 或 "later" → 稍后手动调用 /specification-generator
279
+ ```
280
+
281
+ 3. **如果用户同意** → 立即调用 `task(subagent_type="deep", load_skills=["specification-generator"], prompt="基于以下 APPROVED 设计文档生成 specification.yaml: [粘贴设计文档内容]")`
282
+
283
+ 4. **生成完成后** → 将 specification.yaml 包含在提交中,标记为 "auto-generated from Delphi review consensus"
284
+
285
+ **Code-walkthrough mode**: 写入 `.code-walkthrough-result.json`(commit hash 匹配 HEAD,expires = timestamp + 1小时)。详见 `references/code-walkthrough.md`。
286
+
287
+ ### ⭐ 状态文件输出(MANDATORY — 防止跳过门禁)
288
+
289
+ **Design mode APPROVED 后** — 必须立即写入 `.sprint-state/delphi-reviewed.json`:
290
+
291
+ ```json
292
+ {
293
+ "mode": "design",
294
+ "timestamp": "2026-05-30T10:30:00Z",
295
+ "verdict": "APPROVED",
296
+ "consensus_ratio": 1.0,
297
+ "specification_path": ".sprint-state/phase-outputs/specification.yaml"
298
+ }
299
+ ```
300
+
301
+ **Code-walkthrough mode APPROVED 后** — 必须立即写入 `.sprint-state/delphi-reviewed.json`:
302
+
303
+ ```json
304
+ {
305
+ "mode": "code-walkthrough",
306
+ "commit": "abc123def...",
307
+ "timestamp": "2026-05-30T10:30:00Z",
308
+ "verdict": "APPROVED",
309
+ "consensus_ratio": 1.0
310
+ }
311
+ ```
312
+
313
+ **用途**: Phase 2 BUILD 入口检查 (DELPHI-GATE) 读取此文件。`verdict != "APPROVED"` → 禁止编码。
314
+
315
+ </MANDATORY-CHECKLIST>
316
+
317
+ ---
318
+
319
+ ## Anti-Patterns
320
+
321
+ | ❌ 错误 | ✅ 正确 |
322
+ |---------|---------|
323
+ | Round 1 → 生成报告 → "评审完成"(未 APPROVED) | 迭代直到 APPROVED,修复后重新评审 |
324
+ | 只处理 Critical,忽略 Major | 零容忍:Critical/Major 全部必须处理,不可跳过或降级 |
325
+ | 单专家自评 | 至少 2 位不同 provider 的专家 |
326
+ | 用户说"时间紧急"就跳过 | 评审是投资不是开销,跳过后期返工成本更高 |
327
+ | "专家几乎一致"就通过 | "几乎" = 不一致,继续到 >=95% |
328
+ | 使用 Anthropic/GPT/Gemini 等国外昂贵模型 | 必须使用国产开源模型(DeepSeek, Qwen, Kimi, GLM, MiniMax) |
329
+ | 三个专家使用同一厂家模型 | 必须来自至少 2 家不同厂家 |
330
+
331
+ **Code-walkthrough 专属 Anti-Patterns**: 详见 `references/code-walkthrough.md`。
332
+
333
+ ---
334
+
335
+ ## Red Flags
336
+
337
+ | 借口 | 现实 |
338
+ |------|------|
339
+ | "这只是小变更" | 所有变更都需要评审 |
340
+ | "Round 1 就够了" | 不够,必须多轮直到共识 |
341
+ | "生成报告就完成了" | APPROVED 才算完成 |
342
+ | "2/3 同意就是共识" | 还要检查问题共识比例 >=95% |
343
+
344
+ ---
345
+
346
+ ## 成功标准
347
+
348
+ **Delphi 评审完成的唯一标准:**
349
+ 1. ✅ 所有专家裁决 APPROVED
350
+ 2. ✅ 问题共识 >=95%
351
+ 3. ✅ 所有 Critical Issues 已修复验证
352
+ 4. ✅ 所有 Major Concerns 已处理
353
+ 5. ✅ 共识报告已生成
354
+ 6. ✅ 用户已确认
355
+
356
+ **缺少任何一项 = 未完成**
357
+ ## Output Format (MANDATORY)
358
+ Every delphi review round MUST output valid JSON:
359
+ ```json
360
+ {
361
+ "skill_name": "delphi-review",
362
+ "mode": "design|code-walkthrough",
363
+ "phase": "Round 1|Round 2|Round 3|Consensus",
364
+ "expert_id": "A|B|C",
365
+ "verdict": "APPROVED|REQUEST_CHANGES|REJECTED",
366
+ "confidence": 8,
367
+ "issues": [{"id": "string", "severity": "critical|major|minor", "description": "string"}],
368
+ "consensus_report": {"status": "pending|consensus|disagreement"}
369
+ }
370
+ ```
371
+ **Eval assertions check for:** `verdict` enum values, `confidence` range, `issues` structure, `consensus_report.status`.
@@ -0,0 +1,82 @@
1
+ {
2
+ "skill_name": "delphi-review",
3
+ "skill_path": "skills/delphi-review/SKILL.md",
4
+ "evals": [
5
+ {
6
+ "id": 1,
7
+ "name": "design-mode-full-review",
8
+ "category": "normal",
9
+ "prompt": "我需要评审一份设计文档:我们要给XP-Gate项目添加一个自动生成CHANGELOG的功能。功能包括:1) 从git log提取commit信息 2) 按照conventional commits分类 3) 生成markdown格式的changelog 4) 支持自定义模板。请用delphi-review评审这个设计。",
10
+ "expected_output": "执行完整的Delphi评审流程:Phase 0验证→Round 1匿名独立评审(至少2位Expert)→Round 2交换意见→共识检查(>=95%)→Terminal State Checklist→如果APPROVED则生成specification.yaml。绝不能只做Round 1就停止。",
11
+ "files": [],
12
+ "assertions": [
13
+ {"name": "round-1-executed", "type": "contains", "value": "Round 1"},
14
+ {"name": "round-2-executed", "type": "contains", "value": "Round 2"},
15
+ {"name": "consensus-check", "type": "contains", "value": "95%"},
16
+ {"name": "terminal-state-checklist", "type": "contains", "value": "Terminal State"},
17
+ {"name": "expert-a-present", "type": "contains", "value": "Expert A"},
18
+ {"name": "expert-b-present", "type": "contains", "value": "Expert B"},
19
+ {"name": "anonymity-mentioned", "type": "contains", "value": "匿名"},
20
+ {"name": "verdict-present", "type": "regex", "value": "(APPROVED|REQUEST_CHANGES|REJECTED)"}
21
+ ]
22
+ },
23
+ {
24
+ "id": 2,
25
+ "name": "design-mode-flawed-document",
26
+ "category": "boundary",
27
+ "prompt": "请评审以下架构决策:我们打算用单个SQLite文件存储所有用户数据、日志、审计记录和缓存。这样部署简单,只需要一个文件。请用delphi-review评审。",
28
+ "expected_output": "Expert应该识别出这个设计的严重问题(单文件SQLite不适合多用途、缺乏扩展性、并发问题等),给出REQUEST_CHANGES而非盲目APPROVED。仍需执行完整多轮流程。",
29
+ "files": [],
30
+ "assertions": [
31
+ {"name": "round-1-executed", "type": "contains", "value": "Round 1"},
32
+ {"name": "critical-issues-found", "type": "contains", "value": "Critical"},
33
+ {"name": "verdict-not-blind-approved", "type": "contains", "value": "REQUEST_CHANGES"}
34
+ ]
35
+ },
36
+ {
37
+ "id": 3,
38
+ "name": "code-walkthrough-mode",
39
+ "category": "normal",
40
+ "prompt": "我修改了几个文件准备push,需要做代码走查。变更包括:1) 新增了boy-scout.ts的calculateDelta函数 2) 修改了baseline.ts的loadBaseline函数 3) 添加了3个新的clean-code规则。请用code-walkthrough模式评审。",
41
+ "expected_output": "执行code-walkthrough模式:运行Principles Checker→Expert A架构评审(含principles_findings)→Expert B实现评审(含principles_findings)→共识检查→写入.code-walkthrough-result.json文件。",
42
+ "files": [],
43
+ "assertions": [
44
+ {"name": "code-walkthrough-mode", "type": "contains", "value": "code-walkthrough"},
45
+ {"name": "principles-findings", "type": "contains", "value": "Principles Findings"},
46
+ {"name": "expert-a-review", "type": "contains", "value": "Expert A"},
47
+ {"name": "expert-b-review", "type": "contains", "value": "Expert B"},
48
+ {"name": "result-file-generated", "type": "contains", "value": ".code-walkthrough-result.json"},
49
+ {"name": "consensus-check", "type": "contains", "value": "共识"}
50
+ ]
51
+ },
52
+ {
53
+ "id": 4,
54
+ "name": "zero-tolerance-enforcement",
55
+ "category": "boundary",
56
+ "prompt": "评审时发现了一个Critical Issue,但我觉得这个问题不大,可以忽略。评审还需要继续吗?",
57
+ "expected_output": "必须坚持零容忍原则:Critical Issues不可忽略,必须修复后重新评审。不能因为用户认为问题不大就跳过。应明确告知零容忍原则并拒绝跳过。",
58
+ "files": [],
59
+ "assertions": [
60
+ {"name": "zero-tolerance-enforced", "type": "contains", "value": "零容忍"},
61
+ {"name": "critical-not-skippable", "type": "not_contains", "value": "可以忽略"},
62
+ {"name": "requires-fix", "type": "contains", "value": "修复"}
63
+ ]
64
+ }
65
+ ],
66
+ "trigger_evals": {
67
+ "should_trigger": [
68
+ "我需要多个专家来评审这个架构设计",
69
+ "评审这份需求文档,确保没有遗漏",
70
+ "设计评审:我们的微服务拆分方案",
71
+ "代码走查,准备push了",
72
+ "delphi review this design doc"
73
+ ],
74
+ "should_not_trigger": [
75
+ "帮我写一个单元测试",
76
+ "这段代码有bug,帮我修一下",
77
+ "创建一个新的React组件",
78
+ "部署到生产环境",
79
+ "解释一下这个函数的作用"
80
+ ]
81
+ }
82
+ }
@@ -0,0 +1,56 @@
1
+ {
2
+ "$comment": [
3
+ "Delphi Review Agent Configuration Template",
4
+ "===========================================",
5
+ "Copy the relevant 'agent' block below into your opencode.json.",
6
+ "Replace YOUR_PROVIDER/YOUR_MODEL with your actual provider and model names.",
7
+ "At least 2 of the 3 agents should use different providers for cross-bias protection."
8
+ ],
9
+
10
+ "$example_providers": {
11
+ "OpenAI": "openai/gpt-4o",
12
+ "Anthropic": "anthropic/claude-sonnet-4-20250514",
13
+ "Ali_Bailian": "bailian-coding-plan/qwen3.6-plus",
14
+ "Google_Gemini": "google/gemini-2.5-pro",
15
+ "DeepSeek": "deepseek/deepseek-chat"
16
+ },
17
+
18
+ "agent": {
19
+ "delphi-reviewer-architecture": {
20
+ "description": "Delphi expert reviewer for architecture perspective. Focuses on requirements alignment, system design, and high-level coherence.",
21
+ "mode": "subagent",
22
+ "model": "YOUR_PROVIDER/YOUR_MODEL_A",
23
+ "prompt": "You are a Delphi expert reviewer specializing in architecture and requirements alignment. Review the input for: 1) Alignment with stated goals 2) Architectural coherence 3) System-level implications 4) Trade-offs and alternatives. Output structured review with verdict (APPROVED/REQUEST_CHANGES/REJECTED) and confidence level.",
24
+ "tools": {
25
+ "read": true,
26
+ "bash": true,
27
+ "write": false,
28
+ "edit": false
29
+ }
30
+ },
31
+ "delphi-reviewer-technical": {
32
+ "description": "Delphi expert reviewer for technical depth. Focuses on implementation details, code correctness, and edge cases. Used in delphi-review.",
33
+ "mode": "subagent",
34
+ "model": "YOUR_PROVIDER/YOUR_MODEL_B",
35
+ "prompt": "You are a Delphi expert reviewer specializing in technical implementation. Review the input for: 1) Implementation correctness 2) Code quality and patterns 3) Edge cases and error handling 4) Performance implications. Output structured review with verdict (APPROVED/REQUEST_CHANGES/REJECTED) and confidence level.",
36
+ "tools": {
37
+ "read": true,
38
+ "bash": true,
39
+ "write": false,
40
+ "edit": false
41
+ }
42
+ },
43
+ "delphi-reviewer-feasibility": {
44
+ "description": "Delphi expert reviewer for feasibility analysis. Focuses on practical constraints, risks, and execution viability. Used in delphi-review.",
45
+ "mode": "subagent",
46
+ "model": "YOUR_PROVIDER/YOUR_MODEL_C",
47
+ "prompt": "You are a Delphi expert reviewer specializing in feasibility and risk analysis. Review the input for: 1) Practical constraints (time, resources, dependencies) 2) Risk identification and mitigation 3) Execution complexity 4) Alternative approaches. Output structured review with verdict (APPROVED/REQUEST_CHANGES/REJECTED) and confidence level.",
48
+ "tools": {
49
+ "read": true,
50
+ "bash": true,
51
+ "write": false,
52
+ "edit": false
53
+ }
54
+ }
55
+ }
56
+ }