xp-gate 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/adapter-common.sh +192 -0
- package/adapters/cpp.sh +76 -0
- package/adapters/dart.sh +41 -0
- package/adapters/flutter.sh +41 -0
- package/adapters/go.sh +59 -0
- package/adapters/iac.sh +189 -0
- package/adapters/java.sh +191 -0
- package/adapters/kotlin.sh +77 -0
- package/adapters/objectivec.sh +38 -0
- package/adapters/powershell.sh +138 -0
- package/adapters/python.sh +104 -0
- package/adapters/shell.sh +55 -0
- package/adapters/swift.sh +44 -0
- package/adapters/typescript.sh +61 -0
- package/bin/xp-gate.js +157 -0
- package/hooks/adapter-common.sh +192 -0
- package/hooks/pre-commit +1667 -0
- package/hooks/pre-push +395 -0
- package/lib/__tests__/detect-deps.test.js +209 -0
- package/lib/__tests__/doctor.test.js +448 -0
- package/lib/__tests__/download-skill.test.js +281 -0
- package/lib/__tests__/init.test.js +327 -0
- package/lib/__tests__/install-skill.test.js +326 -0
- package/lib/__tests__/migrate.test.js +212 -0
- package/lib/__tests__/rollback.test.js +183 -0
- package/lib/__tests__/ui-detector.test.ts +200 -0
- package/lib/__tests__/uninstall-skill.test.js +189 -0
- package/lib/__tests__/uninstall.test.js +589 -0
- package/lib/__tests__/update-skill.test.js +276 -0
- package/lib/detect-deps.js +157 -0
- package/lib/doctor.js +370 -0
- package/lib/download-skill.js +96 -0
- package/lib/init.js +367 -0
- package/lib/install-skill.js +184 -0
- package/lib/migrate.js +120 -0
- package/lib/rollback.js +78 -0
- package/lib/ui-detector.ts +99 -0
- package/lib/uninstall-skill.js +69 -0
- package/lib/uninstall.js +401 -0
- package/lib/update-skill.js +90 -0
- package/package.json +39 -0
- package/plugins/claude-code/.claude-plugin/plugin.json +21 -0
- package/plugins/claude-code/bin/delphi-review-guard.sh +68 -0
- package/plugins/claude-code/bin/xp-gate-check +47 -0
- package/plugins/claude-code/hooks/hooks.json +37 -0
- package/skills/delphi-review/.delphi-config.json.example +45 -0
- package/skills/delphi-review/AGENTS.md +54 -0
- package/skills/delphi-review/INSTALL.md +152 -0
- package/skills/delphi-review/SKILL.md +371 -0
- package/skills/delphi-review/evals/evals.json +82 -0
- package/skills/delphi-review/opencode.json.delphi.example +56 -0
- package/skills/delphi-review/references/code-walkthrough.md +486 -0
- package/skills/ralph-loop/SKILL.md +330 -0
- package/skills/ralph-loop/evals/evals.json +311 -0
- package/skills/ralph-loop/evolution-history.json +59 -0
- package/skills/ralph-loop/evolution-log.md +16 -0
- package/skills/ralph-loop/references/components/memory.md +55 -0
- package/skills/ralph-loop/references/components/middleware.md +54 -0
- package/skills/ralph-loop/references/components/skill-invocations.md +39 -0
- package/skills/ralph-loop/references/components/system-prompt.md +24 -0
- package/skills/ralph-loop/references/components/tool-descriptions.md +32 -0
- package/skills/ralph-loop/references/phase-2-build-ralph.md +89 -0
- package/skills/ralph-loop/templates/progress-log.md +36 -0
- package/skills/sprint-flow/SKILL.md +600 -0
- package/skills/sprint-flow/evals/evals.json +78 -0
- package/skills/sprint-flow/evolution-history.json +39 -0
- package/skills/sprint-flow/evolution-log.md +23 -0
- package/skills/sprint-flow/references/components/memory.md +87 -0
- package/skills/sprint-flow/references/components/middleware.md +72 -0
- package/skills/sprint-flow/references/components/skill-invocations.md +104 -0
- package/skills/sprint-flow/references/components/system-prompt.md +27 -0
- package/skills/sprint-flow/references/components/tool-descriptions.md +96 -0
- package/skills/sprint-flow/references/phase-0-think.md +115 -0
- package/skills/sprint-flow/references/phase-1-plan.md +178 -0
- package/skills/sprint-flow/references/phase-2-build.md +198 -0
- package/skills/sprint-flow/references/phase-3-review.md +213 -0
- package/skills/sprint-flow/references/phase-4-uat.md +125 -0
- package/skills/sprint-flow/references/phase-5-feedback.md +100 -0
- package/skills/sprint-flow/references/phase-6-ship.md +193 -0
- package/skills/sprint-flow/references/phase-7-land.md +140 -0
- package/skills/sprint-flow/references/phase-8-cleanup.md +192 -0
- package/skills/sprint-flow/templates/emergent-issues-template.md +120 -0
- package/skills/sprint-flow/templates/pain-document-template.md +115 -0
- package/skills/sprint-flow/templates/sprint-summary-template.md +120 -0
- package/skills/test-specification-alignment/AGENTS.md +59 -0
- package/skills/test-specification-alignment/SKILL.md +605 -0
- package/skills/test-specification-alignment/evals/evals.json +75 -0
- package/skills/test-specification-alignment/references/alignment-verification-algorithm.md +493 -0
- package/skills/test-specification-alignment/references/phase2-constraint-enforcement.md +431 -0
- package/skills/test-specification-alignment/references/specification-format.md +348 -0
|
@@ -0,0 +1,605 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: test-specification-alignment
|
|
3
|
+
description: "测试与 Specification 对齐验证引擎。确保测试准确反映需求和设计。两阶段执行:Phase 1 验证对齐(可修改测试),Phase 2 执行测试(禁止修改测试)。MANDATORY before any release. TRIGGER: 'run tests', 'verify tests', before BUILD (TDD + review) Arbiter, before gstack-ship."
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Test-Specification Alignment Engine
|
|
7
|
+
|
|
8
|
+
## 核心原则
|
|
9
|
+
|
|
10
|
+
**测试是系统的防护网,也是系统的使用手册。测试必须准确反映原始需求和设计方案。**
|
|
11
|
+
|
|
12
|
+
### 五大特性
|
|
13
|
+
|
|
14
|
+
1. **两阶段分离** — Phase 1 可修改测试对齐,Phase 2 禁止修改测试执行
|
|
15
|
+
2. **结构化验证** — YAML specification + AST 解析,确定性验证
|
|
16
|
+
3. **freeze 约束** — Phase 2 调用 freeze skill 锁定测试目录
|
|
17
|
+
4. **失败分类** — 业务代码/测试数据/Specification/环境 四类错误分流
|
|
18
|
+
5. **零容忍** — Phase 2 绝对禁止修改/删除/跳过测试
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Output Format (MANDATORY)
|
|
23
|
+
Alignment report MUST be output as valid JSON:
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"alignment_status": "PASS|FAIL|BLOCKED",
|
|
27
|
+
"phase": "1|2",
|
|
28
|
+
"score": 85.5,
|
|
29
|
+
"misaligned_tests": [
|
|
30
|
+
{"test_name": "test_checkout", "spec_requirement": "REQ-003", "gap": "Missing @test annotation"}
|
|
31
|
+
],
|
|
32
|
+
"anti_pattern_detected": false,
|
|
33
|
+
"errors": []
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
**Eval assertions check for:** `alignment_status`, `phase`, `score`, `anti_pattern_detected`.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## 触发条件
|
|
41
|
+
|
|
42
|
+
### 自动触发
|
|
43
|
+
|
|
44
|
+
- BUILD (TDD + review) Round 1 后(Driver 输出 tests)
|
|
45
|
+
- Gate 1 验证前
|
|
46
|
+
- gstack-ship 发布前
|
|
47
|
+
|
|
48
|
+
### 手动触发
|
|
49
|
+
|
|
50
|
+
- `/test-specification-alignment` 命令
|
|
51
|
+
- `/verify-tests` 命令
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Specification 定义
|
|
56
|
+
|
|
57
|
+
**Specification = Requirements + Design Decisions + API Contracts**
|
|
58
|
+
|
|
59
|
+
| 组成部分 | 文件 | 格式 |
|
|
60
|
+
|----------|------|------|
|
|
61
|
+
| User Stories | `specification.yaml` → `user_stories[]` | YAML: id, actor, feature, benefit, linked_requirements |
|
|
62
|
+
| Requirements | `specification.yaml` | YAML |
|
|
63
|
+
| Acceptance Criteria | `specification.yaml` | YAML |
|
|
64
|
+
| Design Decisions | `specification.yaml` | YAML |
|
|
65
|
+
| API Contracts | `specification.yaml` | YAML |
|
|
66
|
+
|
|
67
|
+
**User Stories 追溯链**:
|
|
68
|
+
```
|
|
69
|
+
US-001 (actor/feature/benefit)
|
|
70
|
+
→ REQ-XXX-001 (requirement)
|
|
71
|
+
→ AC-XXX-001-01 (acceptance criteria)
|
|
72
|
+
→ test('REQ-XXX-001: ...') (@test annotation)
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 核心流程
|
|
78
|
+
|
|
79
|
+
```
|
|
80
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
81
|
+
│ Test-Specification Alignment Flow │
|
|
82
|
+
├─────────────────────────────────────────────────────────────┤
|
|
83
|
+
│ │
|
|
84
|
+
│ Phase 0: 准备 │
|
|
85
|
+
│ ├─ 验证 specification.yaml 存在 │
|
|
86
|
+
│ ├─ 验证 tests/ 目录存在 │
|
|
87
|
+
│ └─ ❌ 缺失 → BLOCK + 提示用户 │
|
|
88
|
+
│ └─ 提示: "先完成需求流程: │
|
|
89
|
+
│ brainstorming → delphi-review → specification-generator" │
|
|
90
|
+
│ │
|
|
91
|
+
│ Phase 1: 对齐验证 (可修改测试) │
|
|
92
|
+
│ ├─ 解析 specification.yaml (YAML parser) │
|
|
93
|
+
│ ├─ 解析测试文件 (AST parser) │
|
|
94
|
+
│ ├─ 验证对齐规则 │
|
|
95
|
+
│ ├─ 生成 Alignment Report │
|
|
96
|
+
│ └─ (可选) 修复测试 │
|
|
97
|
+
│ │
|
|
98
|
+
│ Checkpoint: Alignment Score >= 80%? │
|
|
99
|
+
│ ├─ NO → BLOCK │
|
|
100
|
+
│ └─ YES → 继续 │
|
|
101
|
+
│ │
|
|
102
|
+
│ ⭐ Pre-Phase 2: 调用 freeze skill 锁定测试目录 │
|
|
103
|
+
│ │
|
|
104
|
+
│ Phase 2: 执行测试 (禁止修改测试) │
|
|
105
|
+
│ ├─ 运行所有测试 │
|
|
106
|
+
│ ├─ IF Agent 尝试修改测试 → freeze 拦截 │
|
|
107
|
+
│ ├─ 失败分析: 业务代码/测试数据/Specification/环境 │
|
|
108
|
+
│ │ └─ Specification 错误 → ESCALATE_TO_HUMAN │
|
|
109
|
+
│ └─ 全部通过 → 继续 │
|
|
110
|
+
│ │
|
|
111
|
+
│ ⭐ Post-Phase 2: 调用 unfreeze skill 解锁测试目录 │
|
|
112
|
+
│ │
|
|
113
|
+
│ Terminal State: ✅ ALL_TESTS_PASS │
|
|
114
|
+
│ │
|
|
115
|
+
└─────────────────────────────────────────────────────────────┘
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## ⚠️ Phase 0 缺失 Specification 处理
|
|
121
|
+
|
|
122
|
+
### 错误提示格式
|
|
123
|
+
|
|
124
|
+
如果 `specification.yaml` 不存在,必须 BLOCK 并提供清晰的流程指引:
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
❌ BLOCKED: specification.yaml 不存在
|
|
128
|
+
|
|
129
|
+
要生成 specification.yaml,请先完成需求流程:
|
|
130
|
+
|
|
131
|
+
流程步骤:
|
|
132
|
+
1. brainstorming → 需求探索,生成设计文档
|
|
133
|
+
2. delphi-review → 需求评审(多轮直到 APPROVED)
|
|
134
|
+
3. spec 自动生成(从 APPROVED 设计文档提取 specification.yaml)
|
|
135
|
+
|
|
136
|
+
为什么必须这样?
|
|
137
|
+
- 在 delphi-review APPROVED 后生成,避免设计文档修改时 spec 也需重新生成
|
|
138
|
+
- 遵循"问题发现越早修复成本越低"原则
|
|
139
|
+
|
|
140
|
+
请先完成上述流程,然后再运行 test-specification-alignment。
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## 强制对齐规则
|
|
146
|
+
|
|
147
|
+
### 规则 1: 每个 Requirement 必须有测试
|
|
148
|
+
|
|
149
|
+
```yaml
|
|
150
|
+
requirement_to_test:
|
|
151
|
+
rule: "每个 REQ-* 必须对应至少一个 test case"
|
|
152
|
+
format: "test name 包含 REQ ID 或 JSDoc @test 标签"
|
|
153
|
+
|
|
154
|
+
# TypeScript 示例
|
|
155
|
+
example_ts: |
|
|
156
|
+
/**
|
|
157
|
+
* @test REQ-AUTH-001
|
|
158
|
+
* @intent 验证用户使用正确凭据可以成功登录
|
|
159
|
+
* @covers AC-AUTH-001-01, AC-AUTH-001-02
|
|
160
|
+
*/
|
|
161
|
+
test('REQ-AUTH-001: login success', () => { ... });
|
|
162
|
+
|
|
163
|
+
# Python 示例
|
|
164
|
+
example_py: |
|
|
165
|
+
# @test REQ-AUTH-001
|
|
166
|
+
# @intent 验证用户使用正确凭据可以成功登录
|
|
167
|
+
# @covers AC-AUTH-001-01, AC-AUTH-001-02
|
|
168
|
+
def test_req_auth_001_login_success():
|
|
169
|
+
...
|
|
170
|
+
|
|
171
|
+
# Go 示例
|
|
172
|
+
example_go: |
|
|
173
|
+
// @test REQ-AUTH-001
|
|
174
|
+
// @intent 验证用户使用正确凭据可以成功登录
|
|
175
|
+
// @covers AC-AUTH-001-01, AC-AUTH-001-02
|
|
176
|
+
func Test_REQ_AUTH_001_LoginSuccess(t *testing.T) { ... }
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
### 规则 2: 每个 Acceptance Criteria 必须有断言
|
|
180
|
+
|
|
181
|
+
```yaml
|
|
182
|
+
ac_to_assertion:
|
|
183
|
+
rule: "每个 AC-* 必须有对应断言覆盖"
|
|
184
|
+
format: "断言注释标注 AC ID"
|
|
185
|
+
example: |
|
|
186
|
+
// AC-AUTH-001-01: 返回 200 和 token
|
|
187
|
+
expect(response.status).toBe(200);
|
|
188
|
+
expect(response.body.token).toBeDefined();
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### 规则 3: 测试意图必须明确声明
|
|
192
|
+
|
|
193
|
+
```yaml
|
|
194
|
+
test_intent_declaration:
|
|
195
|
+
rule: "每个 test case 必须有意图声明"
|
|
196
|
+
format: "@test, @intent, @covers JSDoc 标签"
|
|
197
|
+
required_tags:
|
|
198
|
+
- "@test REQ-XXX-XXX" # 关联的 requirement ID
|
|
199
|
+
- "@intent 描述测试意图" # 测试目的
|
|
200
|
+
- "@covers AC-XXX-XX" # 覆盖的 acceptance criteria (可选)
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## 对齐验证维度
|
|
206
|
+
|
|
207
|
+
| 维度 | 权重 | 验证内容 |
|
|
208
|
+
|------|------|----------|
|
|
209
|
+
| Requirement Coverage | 30% | 每个 REQ-* 是否有对应测试 |
|
|
210
|
+
| Acceptance Criteria Coverage | 25% | 每个 AC-* 是否被断言覆盖 |
|
|
211
|
+
| Test Intent Correctness | 20% | 测试意图是否正确声明 |
|
|
212
|
+
| Edge Case Coverage | 15% | 边界条件是否覆盖 |
|
|
213
|
+
| Test Data Validity | 10% | 测试数据是否合理 |
|
|
214
|
+
|
|
215
|
+
**Pass Threshold: 80%**
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Phase 2: freeze 约束机制
|
|
220
|
+
|
|
221
|
+
### Pre-Phase 2: 锁定测试目录
|
|
222
|
+
|
|
223
|
+
```yaml
|
|
224
|
+
pre_phase2:
|
|
225
|
+
action: "INVOKE freeze skill"
|
|
226
|
+
parameters:
|
|
227
|
+
freeze_boundary:
|
|
228
|
+
- "tests/"
|
|
229
|
+
- "test/"
|
|
230
|
+
- "__tests__/"
|
|
231
|
+
- "*.test.ts"
|
|
232
|
+
- "*.test.js"
|
|
233
|
+
- "*.spec.ts"
|
|
234
|
+
- "*.spec.js"
|
|
235
|
+
- "*_test.py"
|
|
236
|
+
- "*_test.go"
|
|
237
|
+
- "cypress/"
|
|
238
|
+
- "playwright/"
|
|
239
|
+
mode: "strict"
|
|
240
|
+
|
|
241
|
+
result:
|
|
242
|
+
- "测试目录被 freeze skill 锁定"
|
|
243
|
+
- "Agent 的 Edit/Write 调用被预拦截"
|
|
244
|
+
- "返回: ✅ 测试文件已冻结"
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
### Phase 2 约束
|
|
248
|
+
|
|
249
|
+
```yaml
|
|
250
|
+
phase2_constraints:
|
|
251
|
+
forbidden_actions:
|
|
252
|
+
- action: "修改测试文件"
|
|
253
|
+
severity: "CRITICAL"
|
|
254
|
+
response: "freeze skill 返回 BLOCKED_ERROR"
|
|
255
|
+
|
|
256
|
+
- action: "删除测试文件"
|
|
257
|
+
severity: "CRITICAL"
|
|
258
|
+
response: "freeze skill 返回 BLOCKED_ERROR"
|
|
259
|
+
|
|
260
|
+
- action: "跳过测试 (test.skip, @skip, xit)"
|
|
261
|
+
severity: "CRITICAL"
|
|
262
|
+
response: "检测并拒绝"
|
|
263
|
+
|
|
264
|
+
- action: "修改断言使其永远通过"
|
|
265
|
+
severity: "CRITICAL"
|
|
266
|
+
response: "检测并拒绝"
|
|
267
|
+
|
|
268
|
+
allowed_actions:
|
|
269
|
+
- "修改业务代码"
|
|
270
|
+
- "修改配置文件"
|
|
271
|
+
- "修改环境变量"
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### Post-Phase 2: 解锁测试目录
|
|
275
|
+
|
|
276
|
+
```yaml
|
|
277
|
+
post_phase2:
|
|
278
|
+
action: "INVOKE unfreeze skill"
|
|
279
|
+
result: "测试目录解除冻结,允许后续修改"
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
---
|
|
283
|
+
|
|
284
|
+
## 失败分析分类
|
|
285
|
+
|
|
286
|
+
| 失败类型 | 判断依据 | 处理方式 |
|
|
287
|
+
|----------|----------|----------|
|
|
288
|
+
| **BUSINESS_CODE_ERROR** | 测试正确,业务代码有 bug | 修改业务代码 |
|
|
289
|
+
| **TEST_DATA_ERROR** | 测试数据不符合业务逻辑 | 回滚到 Phase 1 |
|
|
290
|
+
| **SPECIFICATION_ERROR** | 测试正确,但 specification 有误 | ESCALATE_TO_HUMAN |
|
|
291
|
+
| **ENVIRONMENT_ERROR** | 环境/依赖问题 | 修复环境配置 |
|
|
292
|
+
|
|
293
|
+
### 失败分类强制输出格式
|
|
294
|
+
|
|
295
|
+
任何 Phase 2 测试失败时,**必须**按以下格式输出:
|
|
296
|
+
|
|
297
|
+
```markdown
|
|
298
|
+
### Failure Classification
|
|
299
|
+
- **Type**: [BUSINESS_CODE_ERROR | TEST_DATA_ERROR | SPECIFICATION_ERROR | ENVIRONMENT_ERROR]
|
|
300
|
+
- **Test**: [失败的测试名称]
|
|
301
|
+
- **Root Cause**: [简要原因]
|
|
302
|
+
- **Action**: [对应处理方式]
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
### Specification 错误处理
|
|
306
|
+
|
|
307
|
+
```yaml
|
|
308
|
+
specification_issue_flow:
|
|
309
|
+
state: "ESCALATE_SPECIFICATION_ISSUE"
|
|
310
|
+
|
|
311
|
+
user_options:
|
|
312
|
+
- option: "A"
|
|
313
|
+
action: "修正 Specification → 重新 Phase 1"
|
|
314
|
+
unfreeze_tests: true
|
|
315
|
+
|
|
316
|
+
- option: "B"
|
|
317
|
+
action: "确认 Specification 正确 → 修改业务代码"
|
|
318
|
+
note: "用户明确授权"
|
|
319
|
+
|
|
320
|
+
- option: "C"
|
|
321
|
+
action: "补充 Specification 澄清歧义"
|
|
322
|
+
unfreeze_tests: true
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
## 状态机
|
|
328
|
+
|
|
329
|
+
| State | 名称 | 说明 |
|
|
330
|
+
|-------|------|------|
|
|
331
|
+
| 0 | IDLE | 初始状态 |
|
|
332
|
+
| 1 | PHASE0_PREPARING | 准备阶段 |
|
|
333
|
+
| 2 | PHASE0_COMPLETE | 准备完成 |
|
|
334
|
+
| 3 | PHASE1_ALIGNING | 对齐验证中 |
|
|
335
|
+
| 4 | PHASE1_ALIGNMENT_ISSUES | 发现对齐问题 |
|
|
336
|
+
| 5 | PHASE1_FIXING_TESTS | 修复测试中 |
|
|
337
|
+
| 6 | PHASE1_COMPLETE | 对齐完成 |
|
|
338
|
+
| 7 | PRE_PHASE2_FREEZE | 冻结测试目录 |
|
|
339
|
+
| 8 | CHECKPOINT_VERIFIED | 检查点通过 |
|
|
340
|
+
| 9 | PHASE2_EXECUTING | 执行测试中 |
|
|
341
|
+
| 10 | PHASE2_TEST_FAILURE | 测试失败 |
|
|
342
|
+
| 11 | PHASE2_FAILURE_ANALYSIS | 失败分析中 |
|
|
343
|
+
| 12 | PHASE2_FIXING_CODE | 修复业务代码中 |
|
|
344
|
+
| 13 | PHASE2_SPECIFICATION_ISSUE | Specification 问题 |
|
|
345
|
+
| 14 | PHASE2_COMPLETE | 测试通过 |
|
|
346
|
+
| 15 | POST_PHASE2_UNFREEZE | 解冻测试目录 |
|
|
347
|
+
| 16 | ALL_TESTS_PASS | 全部通过 |
|
|
348
|
+
|
|
349
|
+
### 阻塞状态
|
|
350
|
+
|
|
351
|
+
| State | 名称 | 说明 |
|
|
352
|
+
|-------|------|------|
|
|
353
|
+
| 90 | BLOCKED_MISSING_SPECIFICATION | 缺少 specification |
|
|
354
|
+
| 91 | BLOCKED_ALIGNMENT_TOO_LOW | 对齐分数过低 |
|
|
355
|
+
| 92 | BLOCKED_TEST_MODIFICATION_VIOLATION | 测试修改违规 |
|
|
356
|
+
| 93 | BLOCKED_SPECIFICATION_ISSUE | Specification 问题需用户决策 |
|
|
357
|
+
| 94 | BLOCKED_MAX_RETRIES_EXCEEDED | 超过最大重试次数 |
|
|
358
|
+
|
|
359
|
+
---
|
|
360
|
+
|
|
361
|
+
## 与现有 Skills 集成
|
|
362
|
+
|
|
363
|
+
### 与 BUILD (TDD + review) 集成
|
|
364
|
+
|
|
365
|
+
```
|
|
366
|
+
BUILD (TDD + review) Round 1: Driver
|
|
367
|
+
├─ 输出: sealed{code, decisions} + public{tests}
|
|
368
|
+
└────────────────────────────────────────────┘
|
|
369
|
+
│
|
|
370
|
+
▼
|
|
371
|
+
test-specification-alignment (本 skill)
|
|
372
|
+
├─ Phase 1: 验证 Driver 生成的 tests 与 requirements 对齐
|
|
373
|
+
├─ Phase 2: 执行测试验证实现
|
|
374
|
+
└────────────────────────────────────────────┘
|
|
375
|
+
│
|
|
376
|
+
▼
|
|
377
|
+
Gate 1 (verification-loop)
|
|
378
|
+
├─ Static Analysis
|
|
379
|
+
└────────────────────────────────────────────┘
|
|
380
|
+
│
|
|
381
|
+
▼
|
|
382
|
+
BUILD (TDD + review) Round 3: Arbiter
|
|
383
|
+
└─ 收到 test-specification-alignment 结果
|
|
384
|
+
```
|
|
385
|
+
|
|
386
|
+
### 与 freeze skill 集成
|
|
387
|
+
|
|
388
|
+
- Phase 2 开始前: 调用 `/freeze` 锁定测试目录
|
|
389
|
+
- Phase 2 执行中: freeze 拦截所有测试文件修改
|
|
390
|
+
- Phase 2 结束后: 调用 `/unfreeze` 解锁
|
|
391
|
+
|
|
392
|
+
---
|
|
393
|
+
|
|
394
|
+
## Agent 配置
|
|
395
|
+
|
|
396
|
+
### Phase 1 Agent
|
|
397
|
+
|
|
398
|
+
```yaml
|
|
399
|
+
phase1_agent:
|
|
400
|
+
type: oracle
|
|
401
|
+
model: Qwen3.5-Plus
|
|
402
|
+
skills:
|
|
403
|
+
- test-driven-development
|
|
404
|
+
- coding-standards
|
|
405
|
+
|
|
406
|
+
constraints:
|
|
407
|
+
can_modify_tests: true
|
|
408
|
+
must_preserve_test_intent: true
|
|
409
|
+
```
|
|
410
|
+
|
|
411
|
+
### Phase 2 Agent
|
|
412
|
+
|
|
413
|
+
```yaml
|
|
414
|
+
phase2_agent:
|
|
415
|
+
type: build
|
|
416
|
+
model: GLM-5
|
|
417
|
+
skills:
|
|
418
|
+
- test-driven-development
|
|
419
|
+
- coding-standards
|
|
420
|
+
|
|
421
|
+
constraints:
|
|
422
|
+
can_modify_tests: false # ❌ 禁止
|
|
423
|
+
can_delete_tests: false # ❌ 禁止
|
|
424
|
+
can_skip_tests: false # ❌ 禁止
|
|
425
|
+
can_modify_business_code: true # ✅ 允许
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
---
|
|
429
|
+
|
|
430
|
+
## 输出报告格式
|
|
431
|
+
|
|
432
|
+
### Alignment Report (Phase 1)
|
|
433
|
+
|
|
434
|
+
```markdown
|
|
435
|
+
## Test-Specification Alignment Report
|
|
436
|
+
|
|
437
|
+
### Summary
|
|
438
|
+
- Alignment Score: 85/100
|
|
439
|
+
- Total Requirements: 15
|
|
440
|
+
- Covered Requirements: 14/15 (93%)
|
|
441
|
+
- Total Acceptance Criteria: 42
|
|
442
|
+
- Covered AC: 38/42 (90%)
|
|
443
|
+
|
|
444
|
+
### Coverage by Dimension
|
|
445
|
+
| Dimension | Score | Weight | Weighted |
|
|
446
|
+
|-----------|-------|--------|----------|
|
|
447
|
+
| Requirement Coverage | 93% | 30% | 27.9 |
|
|
448
|
+
| Acceptance Criteria Coverage | 90% | 25% | 22.5 |
|
|
449
|
+
| Test Intent Correctness | 88% | 20% | 17.6 |
|
|
450
|
+
| Edge Case Coverage | 80% | 15% | 12.0 |
|
|
451
|
+
| Test Data Validity | 90% | 10% | 9.0 |
|
|
452
|
+
| **Total** | | | **89.0** |
|
|
453
|
+
|
|
454
|
+
### Misaligned Tests
|
|
455
|
+
| Test ID | Issue | Specification Ref | Recommendation |
|
|
456
|
+
|---------|-------|-------------------|----------------|
|
|
457
|
+
| TEST-012 | Incorrect intent | REQ-AUTH-003 | 修改断言验证 |
|
|
458
|
+
|
|
459
|
+
### Missing Tests
|
|
460
|
+
| Requirement ID | Description | Priority |
|
|
461
|
+
|----------------|-------------|----------|
|
|
462
|
+
| REQ-AUTH-006 | Token 刷新机制 | Critical |
|
|
463
|
+
|
|
464
|
+
### Status
|
|
465
|
+
✅ Alignment Score >= 80%, 可以进入 Phase 2
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
### Test Execution Report (Phase 2)
|
|
469
|
+
|
|
470
|
+
```markdown
|
|
471
|
+
## Test Execution Report
|
|
472
|
+
|
|
473
|
+
### Summary
|
|
474
|
+
- Total Tests: 67
|
|
475
|
+
- Passed: 65
|
|
476
|
+
- Failed: 2
|
|
477
|
+
- Skipped: 0
|
|
478
|
+
|
|
479
|
+
### Failed Tests
|
|
480
|
+
| Test ID | Error | Root Cause | Fix Applied |
|
|
481
|
+
|---------|-------|------------|-------------|
|
|
482
|
+
| TEST-034 | AssertionError | 业务代码逻辑错误 | 已修复 |
|
|
483
|
+
|
|
484
|
+
### Freeze Status
|
|
485
|
+
- Pre-Phase 2: ✅ 测试目录已冻结
|
|
486
|
+
- Phase 2: ✅ 无违规尝试
|
|
487
|
+
- Post-Phase 2: ✅ 测试目录已解冻
|
|
488
|
+
|
|
489
|
+
### Status
|
|
490
|
+
✅ All tests pass. Ready for next stage.
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
---
|
|
494
|
+
|
|
495
|
+
## Legacy 模式
|
|
496
|
+
|
|
497
|
+
### 缺失 Specification 的 fallback
|
|
498
|
+
|
|
499
|
+
```yaml
|
|
500
|
+
legacy_mode:
|
|
501
|
+
trigger: "specification.yaml 不存在"
|
|
502
|
+
|
|
503
|
+
options:
|
|
504
|
+
- option: "从测试逆向生成 specification"
|
|
505
|
+
steps:
|
|
506
|
+
- 解析现有测试
|
|
507
|
+
- 提取测试意图和断言
|
|
508
|
+
- 生成 draft specification
|
|
509
|
+
- 用户确认后保存
|
|
510
|
+
|
|
511
|
+
- option: "放宽对齐验证"
|
|
512
|
+
rules:
|
|
513
|
+
- 只验证测试覆盖率 >= 80%
|
|
514
|
+
- 不验证测试意图对齐
|
|
515
|
+
- 标记为 "LEGACY_MODE"
|
|
516
|
+
|
|
517
|
+
- option: "用户提供 specification"
|
|
518
|
+
action: "BLOCK until specification provided"
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
---
|
|
522
|
+
|
|
523
|
+
## 成本控制
|
|
524
|
+
|
|
525
|
+
| 指标 | 阈值 |
|
|
526
|
+
|------|------|
|
|
527
|
+
| 单次对齐验证 | ~$0.02 |
|
|
528
|
+
| 单次测试执行 | ~$0.01 |
|
|
529
|
+
| 最大重试次数 | 5 |
|
|
530
|
+
| 单次总成本上限 | $0.10 |
|
|
531
|
+
|
|
532
|
+
---
|
|
533
|
+
|
|
534
|
+
## Terminal State Checklist
|
|
535
|
+
|
|
536
|
+
<MANDATORY-CHECKLIST>
|
|
537
|
+
|
|
538
|
+
### 只能在以下条件全部满足后声明 "test-specification-alignment complete":
|
|
539
|
+
|
|
540
|
+
**Pre-requisites:**
|
|
541
|
+
- [ ] specification.yaml 存在且可解析
|
|
542
|
+
- [ ] tests/ 目录存在且有测试文件
|
|
543
|
+
- [ ] Phase 1 对齐验证完成
|
|
544
|
+
|
|
545
|
+
**CRITICAL - Alignment Verification:**
|
|
546
|
+
- [ ] Alignment Score >= 80%
|
|
547
|
+
- [ ] 所有 Critical 对齐问题已修复
|
|
548
|
+
- [ ] 所有 Major 对齐问题已处理
|
|
549
|
+
|
|
550
|
+
**CRITICAL - Phase 2 Execution:**
|
|
551
|
+
- [ ] freeze skill 已调用,测试目录已锁定
|
|
552
|
+
- [ ] 所有测试已执行
|
|
553
|
+
- [ ] 无测试修改违规
|
|
554
|
+
- [ ] unfreeze skill 已调用
|
|
555
|
+
|
|
556
|
+
**Final Requirements:**
|
|
557
|
+
- [ ] 所有测试通过
|
|
558
|
+
- [ ] 报告已生成
|
|
559
|
+
|
|
560
|
+
**IF 有 Specification 问题:**
|
|
561
|
+
- **CANNOT claim complete**
|
|
562
|
+
- **MUST ESCALATE_TO_HUMAN**
|
|
563
|
+
|
|
564
|
+
**IF Phase 2 测试修改违规:**
|
|
565
|
+
- **CANNOT claim complete**
|
|
566
|
+
- **MUST BLOCK 并记录违规**
|
|
567
|
+
|
|
568
|
+
</MANDATORY-CHECKLIST>
|
|
569
|
+
|
|
570
|
+
---
|
|
571
|
+
|
|
572
|
+
## Anti-Patterns
|
|
573
|
+
|
|
574
|
+
| 错误 | 正确 |
|
|
575
|
+
|------|------|
|
|
576
|
+
| Phase 2 修改测试文件 | ❌ 禁止 — 只能修改业务代码 |
|
|
577
|
+
| Phase 2 删除测试文件 | ❌ 禁止 — freeze 会拦截 |
|
|
578
|
+
| Phase 2 跳过测试 | ❌ 禁止 — 检测并拒绝 |
|
|
579
|
+
| 测试失败时修改断言 | ❌ 禁止 — 必须修改业务代码 |
|
|
580
|
+
| 缺少 @test 标签 | ❌ 必须 — 强制标注 |
|
|
581
|
+
| Specification 错误时强行通过 | ❌ 禁止 — 必须 ESCALATE |
|
|
582
|
+
|
|
583
|
+
---
|
|
584
|
+
|
|
585
|
+
## 版本历史
|
|
586
|
+
|
|
587
|
+
| 版本 | 日期 | 变更 |
|
|
588
|
+
|------|------|------|
|
|
589
|
+
| V1.0 | 2026-04-06 | 初始设计 |
|
|
590
|
+
| V2.0 | 2026-04-06 | Delphi Review 共识版本 |
|
|
591
|
+
## Output Format (MANDATORY)
|
|
592
|
+
Alignment report MUST be output as valid JSON:
|
|
593
|
+
```json
|
|
594
|
+
{
|
|
595
|
+
"alignment_status": "PASS|FAIL|BLOCKED",
|
|
596
|
+
"phase": "1|2",
|
|
597
|
+
"score": 85.5,
|
|
598
|
+
"misaligned_tests": [
|
|
599
|
+
{"test_name": "test_checkout", "spec_requirement": "REQ-003", "gap": "Missing @test annotation"}
|
|
600
|
+
],
|
|
601
|
+
"anti_pattern_detected": false,
|
|
602
|
+
"errors": []
|
|
603
|
+
}
|
|
604
|
+
```
|
|
605
|
+
**Eval assertions check for:** `alignment_status`, `phase`, `score`, `anti_pattern_detected`.
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
{
|
|
2
|
+
"skill_name": "test-specification-alignment",
|
|
3
|
+
"skill_path": "skills/test-specification-alignment/SKILL.md",
|
|
4
|
+
"evals": [
|
|
5
|
+
{
|
|
6
|
+
"id": 1,
|
|
7
|
+
"name": "aligned-tests-pass",
|
|
8
|
+
"category": "normal",
|
|
9
|
+
"prompt": "项目有specification.yaml定义了3个requirements和6个acceptance criteria。测试文件中有对应的@test REQ-001, @intent, @covers AC-001-01标注。请验证测试与specification的对齐情况。",
|
|
10
|
+
"expected_output": "执行完整两阶段流程:Phase 1验证对齐(检查@test/@intent/@covers标签、测试覆盖率、AC映射完整性)→对齐分数>=80%→Phase 2执行测试(冻结测试目录,禁止修改)→报告对齐结果。",
|
|
11
|
+
"files": [],
|
|
12
|
+
"assertions": [
|
|
13
|
+
{"name": "phase-1-executed", "type": "contains", "value": "Phase 1"},
|
|
14
|
+
{"name": "phase-2-executed", "type": "contains", "value": "Phase 2"},
|
|
15
|
+
{"name": "freeze-mentioned", "type": "contains", "value": "freeze"},
|
|
16
|
+
{"name": "alignment-score", "type": "regex", "value": "[0-9]+%"},
|
|
17
|
+
{"name": "test-annotations-checked", "type": "contains", "value": "@test"}
|
|
18
|
+
]
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": 2,
|
|
22
|
+
"name": "misaligned-tests-detected",
|
|
23
|
+
"category": "boundary",
|
|
24
|
+
"prompt": "项目有specification.yaml定义了5个requirements,但测试文件中只有2个@test标签,3个requirement没有对应测试。请验证对齐情况。",
|
|
25
|
+
"expected_output": "Phase 1应该检测到3个requirement缺少测试覆盖,对齐分数<80%。给出具体缺失项:哪些REQ没有@test标签,哪些AC没有@covers。可能需要Phase 1中添加缺失测试。",
|
|
26
|
+
"files": [],
|
|
27
|
+
"assertions": [
|
|
28
|
+
{"name": "missing-coverage-detected", "type": "contains", "value": "缺失"},
|
|
29
|
+
{"name": "low-alignment-score", "type": "contains", "value": "<80%"},
|
|
30
|
+
{"name": "specific-gaps-listed", "type": "contains", "value": "REQ"}
|
|
31
|
+
]
|
|
32
|
+
},
|
|
33
|
+
{
|
|
34
|
+
"id": 3,
|
|
35
|
+
"name": "phase-2-freeze-enforcement",
|
|
36
|
+
"category": "boundary",
|
|
37
|
+
"prompt": "Phase 2执行测试时,有一个测试因为测试数据问题失败了。能不能修改一下测试数据让测试通过?",
|
|
38
|
+
"expected_output": "Phase 2期间绝对禁止修改测试文件(freeze约束)。失败应被分类(业务代码问题/测试数据问题/Specification问题/环境问题),但不能通过修改测试来解决。应告知这是零容忍原则。",
|
|
39
|
+
"files": [],
|
|
40
|
+
"assertions": [
|
|
41
|
+
{"name": "freeze-enforced", "type": "contains", "value": "禁止修改"},
|
|
42
|
+
{"name": "no-test-modification", "type": "not_contains", "value": "修改测试"},
|
|
43
|
+
{"name": "failure-classification", "type": "contains", "value": "分类"}
|
|
44
|
+
]
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"id": 4,
|
|
48
|
+
"name": "legacy-mode-no-specification",
|
|
49
|
+
"category": "boundary",
|
|
50
|
+
"prompt": "项目没有specification.yaml文件,需要验证测试覆盖率是否足够。请运行test-specification-alignment。",
|
|
51
|
+
"expected_output": "没有specification.yaml时进入legacy模式。应提示用户先通过delphi-review获得APPROVED后自动生成specification.yaml,而不是在没有spec的情况下进行对齐验证。",
|
|
52
|
+
"files": [],
|
|
53
|
+
"assertions": [
|
|
54
|
+
{"name": "specification-required", "type": "contains", "value": "specification.yaml"},
|
|
55
|
+
{"name": "legacy-mode-or-guidance", "type": "regex", "value": "(legacy|生成|delphi-review)"}
|
|
56
|
+
]
|
|
57
|
+
}
|
|
58
|
+
],
|
|
59
|
+
"trigger_evals": {
|
|
60
|
+
"should_trigger": [
|
|
61
|
+
"验证测试是否覆盖所有需求",
|
|
62
|
+
"测试和需求对齐了吗",
|
|
63
|
+
"run tests and check specification alignment",
|
|
64
|
+
"发布前验证测试覆盖率",
|
|
65
|
+
"verify tests before release"
|
|
66
|
+
],
|
|
67
|
+
"should_not_trigger": [
|
|
68
|
+
"写一个新功能",
|
|
69
|
+
"帮我部署到生产环境",
|
|
70
|
+
"修复一个bug",
|
|
71
|
+
"代码review",
|
|
72
|
+
"设计数据库schema"
|
|
73
|
+
]
|
|
74
|
+
}
|
|
75
|
+
}
|