cc-devflow 4.5.1 → 4.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/cc-act/CHANGELOG.md +27 -0
- package/.claude/skills/cc-act/PLAYBOOK.md +32 -1
- package/.claude/skills/cc-act/SKILL.md +53 -7
- package/.claude/skills/cc-act/assets/PR_BRIEF_TEMPLATE.md +35 -1
- package/.claude/skills/cc-act/assets/RELEASE_NOTE_TEMPLATE.md +10 -1
- package/.claude/skills/cc-act/references/closure-contract.md +11 -0
- package/.claude/skills/cc-act/scripts/cc-act-common.sh +32 -1
- package/.claude/skills/cc-act/scripts/render-pr-brief.sh +130 -0
- package/.claude/skills/cc-act/scripts/verify-act-gate.sh +23 -1
- package/.claude/skills/cc-check/CHANGELOG.md +26 -0
- package/.claude/skills/cc-check/PLAYBOOK.md +128 -1
- package/.claude/skills/cc-check/SKILL.md +147 -7
- package/.claude/skills/cc-check/assets/REPORT_CARD_TEMPLATE.json +164 -1
- package/.claude/skills/cc-check/references/gate-contract.md +11 -0
- package/.claude/skills/cc-check/references/review-contract.md +104 -0
- package/.claude/skills/cc-check/scripts/render-report-card.js +209 -5
- package/.claude/skills/cc-check/scripts/verify-gate.sh +28 -0
- package/.claude/skills/cc-do/CHANGELOG.md +12 -0
- package/.claude/skills/cc-do/PLAYBOOK.md +14 -9
- package/.claude/skills/cc-do/SKILL.md +24 -13
- package/.claude/skills/cc-do/references/execution-recovery.md +16 -5
- package/.claude/skills/cc-do/scripts/verify-task-gates.sh +19 -6
- package/.claude/skills/cc-do/scripts/write-task-checkpoint.sh +14 -2
- package/.claude/skills/cc-investigate/CHANGELOG.md +31 -0
- package/.claude/skills/cc-investigate/PLAYBOOK.md +124 -8
- package/.claude/skills/cc-investigate/SKILL.md +252 -17
- package/.claude/skills/cc-investigate/assets/ANALYSIS_TEMPLATE.md +112 -3
- package/.claude/skills/cc-investigate/assets/TASKS_TEMPLATE.md +17 -5
- package/.claude/skills/cc-investigate/assets/TASK_MANIFEST_TEMPLATE.json +141 -1
- package/.claude/skills/cc-investigate/references/investigation-contract.md +192 -0
- package/.claude/skills/cc-plan/CHANGELOG.md +26 -0
- package/.claude/skills/cc-plan/PLAYBOOK.md +18 -6
- package/.claude/skills/cc-plan/SKILL.md +72 -34
- package/.claude/skills/cc-plan/assets/DESIGN_TEMPLATE.md +30 -3
- package/.claude/skills/cc-plan/assets/TASKS_TEMPLATE.md +28 -0
- package/.claude/skills/cc-plan/assets/TASK_MANIFEST_TEMPLATE.json +46 -1
- package/.claude/skills/cc-plan/assets/TINY_DESIGN_TEMPLATE.md +24 -0
- package/.claude/skills/cc-plan/references/planning-contract.md +18 -4
- package/.claude/skills/cc-roadmap/CHANGELOG.md +14 -0
- package/.claude/skills/cc-roadmap/PLAYBOOK.md +10 -7
- package/.claude/skills/cc-roadmap/SKILL.md +43 -23
- package/.claude/skills/cc-roadmap/assets/BACKLOG_TEMPLATE.md +10 -0
- package/.claude/skills/cc-roadmap/assets/ROADMAP_TEMPLATE.md +15 -0
- package/.claude/skills/cc-roadmap/assets/TRACKING_TEMPLATE.json +1 -1
- package/.claude/skills/cc-roadmap/references/roadmap-dialogue.md +11 -7
- package/.claude/skills/cc-simplify/CHANGELOG.md +21 -0
- package/.claude/skills/cc-simplify/SKILL.md +264 -35
- package/.claude/skills/cc-spec-init/CHANGELOG.md +6 -0
- package/.claude/skills/cc-spec-init/SKILL.md +14 -1
- package/CHANGELOG.md +37 -0
- package/README.md +10 -2
- package/README.zh-CN.md +10 -2
- package/docs/examples/example-bindings.json +7 -7
- package/docs/examples/full-design-blocked/BACKLOG.md +1 -1
- package/docs/examples/full-design-blocked/README.md +1 -1
- package/docs/examples/full-design-blocked/ROADMAP.md +1 -1
- package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/design.md +1 -1
- package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/tasks.md +1 -1
- package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/review/report-card.json +140 -3
- package/docs/examples/full-design-blocked/roadmap-tracking.json +1 -1
- package/docs/examples/local-handoff/BACKLOG.md +1 -1
- package/docs/examples/local-handoff/README.md +1 -1
- package/docs/examples/local-handoff/ROADMAP.md +1 -1
- package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/design.md +1 -1
- package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/tasks.md +1 -1
- package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/review/report-card.json +92 -0
- package/docs/examples/local-handoff/roadmap-tracking.json +1 -1
- package/docs/examples/pdca-loop/BACKLOG.md +1 -1
- package/docs/examples/pdca-loop/README.md +1 -1
- package/docs/examples/pdca-loop/ROADMAP.md +1 -1
- package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/handoff/pr-brief.md +20 -0
- package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/design.md +1 -1
- package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/task-manifest.json +2 -2
- package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/tasks.md +1 -1
- package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/review/report-card.json +92 -0
- package/docs/examples/pdca-loop/roadmap-tracking.json +1 -1
- package/docs/skill-strategy-audit.md +48 -0
- package/lib/skill-runtime/__tests__/runtime.integration.test.js +19 -1
- package/lib/skill-runtime/review.js +64 -1
- package/lib/skill-runtime/schemas.js +161 -4
- package/package.json +1 -1
|
@@ -40,6 +40,12 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
40
40
|
- runtime gate
|
|
41
41
|
- task review proof
|
|
42
42
|
- requirement diff review
|
|
43
|
+
- claim evidence matrix
|
|
44
|
+
- QA feedback loop and behavior evidence
|
|
45
|
+
- QA regression / test-quality proof
|
|
46
|
+
- QA coverage and browser evidence
|
|
47
|
+
- review freshness and finding confidence
|
|
48
|
+
- failure ownership
|
|
43
49
|
- spec sync readiness
|
|
44
50
|
4. **Freeze Verdict**
|
|
45
51
|
- 只允许 `pass` / `fail` / `blocked`
|
|
@@ -54,13 +60,35 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
54
60
|
3. 读取真实输出和退出码
|
|
55
61
|
4. 把证据写进 `report-card.json`
|
|
56
62
|
5. 把任务级 review 与需求级 diff review 分开写清楚
|
|
63
|
+
6. 把每个成功声明映射到 `claimEvidence[]`
|
|
64
|
+
7. 行为变更必须补 `qa` 证据或例外理由
|
|
65
|
+
8. 失败输出必须写入 `runtime.failureOwnership[]`
|
|
57
66
|
|
|
58
67
|
## Verification Layers
|
|
59
68
|
|
|
60
69
|
1. Runtime reality
|
|
61
70
|
2. Task review proof
|
|
62
71
|
3. Requirement diff truth
|
|
63
|
-
4.
|
|
72
|
+
4. Claim evidence matrix
|
|
73
|
+
5. QA feedback loop and behavior evidence
|
|
74
|
+
6. QA regression and test quality
|
|
75
|
+
7. QA coverage and browser evidence
|
|
76
|
+
8. Review freshness and confidence calibration
|
|
77
|
+
9. Failure ownership
|
|
78
|
+
10. Spec alignment and sync readiness
|
|
79
|
+
|
|
80
|
+
## Claim Evidence Matrix
|
|
81
|
+
|
|
82
|
+
每个“通过”声明都要回答:这条声明由哪条命令或 artifact 证明?
|
|
83
|
+
|
|
84
|
+
- `tests-pass`:本轮 test command、exit 0、0 failures
|
|
85
|
+
- `lint-clean` / `typecheck-clean` / `build-succeeds`:对应 gate 的本轮输出
|
|
86
|
+
- `bug-fixed`:原始症状或回归测试通过
|
|
87
|
+
- `regression-test-works`:red -> green 证据,而不是只绿一次
|
|
88
|
+
- `requirements-met`:逐项 plan / manifest checklist
|
|
89
|
+
- `agent-completed`:VCS diff 或 artifact 证明实际变化
|
|
90
|
+
|
|
91
|
+
缺少必要 claim 的证据时,verdict 至少是 `blocked`。不要把没有证据的 claim 写进 summary。
|
|
64
92
|
|
|
65
93
|
## Requirement Diff Review
|
|
66
94
|
|
|
@@ -71,9 +99,52 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
71
99
|
3. `scope drift`:识别多做、少做、做偏。
|
|
72
100
|
4. `critical pass`:检查数据安全、并发、shell、LLM trust boundary、枚举覆盖、静默失败、文档漂移。
|
|
73
101
|
5. `adversarial synthesis`:合并外部 review / codex / subagent /人工 finding,去重并标置信度。
|
|
102
|
+
6. `specialist facets`:按风险记录 testing / security / performance / api-contract / data-migration / design 等审查面;没有覆盖必须写 skip reason。
|
|
103
|
+
7. `freshness`:确认 review 对应当前 head;review 后新增 commit 时不能继续拿旧审查支撑 `pass`。
|
|
74
104
|
|
|
75
105
|
这些结论进入 `review.diffReview`,不能只写在口头总结里。
|
|
76
106
|
|
|
107
|
+
每层 review 都要带 `reviewPacket`:`baseSha`、`headSha`、`requirements`、`implemented`、`reviewerContext`。缺少审查范围时,review 不能支撑 `pass`。
|
|
108
|
+
|
|
109
|
+
review 还要带 `freshness`:`status`、`reviewedCommit`、`currentCommit`、`commitsSinceReview`、`staleReason`。`status=stale` 或缺失 freshness 时,`pass` 不成立。
|
|
110
|
+
|
|
111
|
+
每条 finding 都要带 `triageStatus`:
|
|
112
|
+
|
|
113
|
+
- `accepted-fixed`
|
|
114
|
+
- `rejected-with-evidence`
|
|
115
|
+
- `deferred-minor`
|
|
116
|
+
- `clarification-needed`
|
|
117
|
+
|
|
118
|
+
`critical` / `important` finding 未闭环或仍是 `clarification-needed`,不能进入 `cc-act`。
|
|
119
|
+
|
|
120
|
+
每条 finding 还要带 `confidenceScore`、`fingerprint`、`displayTier`、`suppressionReason`。低置信 finding 只能作为 warning 或 gap,不能伪装成 blocking fact。
|
|
121
|
+
|
|
122
|
+
## QA Test Quality
|
|
123
|
+
|
|
124
|
+
行为变化、bugfix、边界条件、用户可见流程必须补 `qa`:
|
|
125
|
+
|
|
126
|
+
- `feedbackLoop`:用什么 loop 证明现实,速度、确定性、信号锋利度、复现率如何
|
|
127
|
+
- `behaviorEvidence`:用户边界、expected / actual、复现步骤、稳定性、领域语言
|
|
128
|
+
- `regressionProof`:red command、red failure reason、green command、是否恢复最终状态
|
|
129
|
+
- `testQuality`:是否经公共接口验证真实行为、mock 是否只停在系统边界、是否存在 test-only production API
|
|
130
|
+
- `architectureFollowUps`:没有正确 test seam 时记录 seam / hidden coupling / shallow module 的后续改造
|
|
131
|
+
- `tddException`:纯配置、生成文件、throwaway prototype 等例外和替代验证
|
|
132
|
+
- `coverageAudit`:覆盖率、codepath / user-flow map、缺口、是否需要 e2e / eval、测试质量星级
|
|
133
|
+
- `browserEvidence`:UI / 用户路径变更的 affected routes、截图、console、health score、issues,或明确 skip reason
|
|
134
|
+
|
|
135
|
+
测试只绿过一次,不能证明 regression test 有效;断言 mock 本身,不能证明真实行为。没有可信反馈环时,`pass` 不成立。
|
|
136
|
+
|
|
137
|
+
## Failure Ownership
|
|
138
|
+
|
|
139
|
+
失败要先归属,再下结论:
|
|
140
|
+
|
|
141
|
+
- `in-branch`:当前分支引入,默认回 `cc-do`
|
|
142
|
+
- `pre-existing`:base branch 也存在,必须有复验证据
|
|
143
|
+
- `environment`:依赖、权限、服务、密钥、平台缺失,通常是 `blocked`
|
|
144
|
+
- `ambiguous`:无法证明归属,默认不能 `pass`
|
|
145
|
+
|
|
146
|
+
不要把环境红灯、基线红灯、本分支红灯混成一句“测试失败”。
|
|
147
|
+
|
|
77
148
|
## Verdict
|
|
78
149
|
|
|
79
150
|
只允许 3 种结论:
|
|
@@ -98,12 +169,67 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
98
169
|
"verdict": "pass",
|
|
99
170
|
"overall": "pass",
|
|
100
171
|
"summary": "one-line reality",
|
|
172
|
+
"claimEvidence": [],
|
|
173
|
+
"runtime": {
|
|
174
|
+
"status": "pass",
|
|
175
|
+
"failureOwnership": []
|
|
176
|
+
},
|
|
177
|
+
"qa": {
|
|
178
|
+
"status": "pass",
|
|
179
|
+
"feedbackLoop": {
|
|
180
|
+
"status": "pass",
|
|
181
|
+
"mode": "targeted-test",
|
|
182
|
+
"commandOrArtifact": "npm test -- src/feature/feature.test.ts",
|
|
183
|
+
"speed": "fast",
|
|
184
|
+
"determinism": "high",
|
|
185
|
+
"signalSharpness": "fails only when the target behavior is absent",
|
|
186
|
+
"reproductionRate": "1/1",
|
|
187
|
+
"attempts": [],
|
|
188
|
+
"blockedReason": ""
|
|
189
|
+
},
|
|
190
|
+
"behaviorEvidence": {
|
|
191
|
+
"status": "pass",
|
|
192
|
+
"userFacingBoundary": "feature action",
|
|
193
|
+
"expectedBehavior": "the user-visible behavior succeeds",
|
|
194
|
+
"actualBehavior": "verified by targeted test",
|
|
195
|
+
"reproductionSteps": [],
|
|
196
|
+
"consistency": "deterministic",
|
|
197
|
+
"domainLanguage": []
|
|
198
|
+
},
|
|
199
|
+
"regressionProof": [],
|
|
200
|
+
"testQuality": [],
|
|
201
|
+
"coverageAudit": {
|
|
202
|
+
"status": "pass",
|
|
203
|
+
"coveragePct": 80,
|
|
204
|
+
"pathMap": [],
|
|
205
|
+
"gaps": [],
|
|
206
|
+
"testsAdded": [],
|
|
207
|
+
"e2eRequired": false,
|
|
208
|
+
"evalRequired": false,
|
|
209
|
+
"qualityStars": "★★"
|
|
210
|
+
},
|
|
211
|
+
"browserEvidence": {
|
|
212
|
+
"status": "skipped",
|
|
213
|
+
"mode": "not-applicable",
|
|
214
|
+
"affectedRoutes": [],
|
|
215
|
+
"screenshots": [],
|
|
216
|
+
"consoleErrors": [],
|
|
217
|
+
"healthScore": null,
|
|
218
|
+
"issues": [],
|
|
219
|
+
"skipReason": "not a UI or user-path change"
|
|
220
|
+
},
|
|
221
|
+
"architectureFollowUps": [],
|
|
222
|
+
"tddException": null
|
|
223
|
+
},
|
|
101
224
|
"quickGates": [],
|
|
102
225
|
"strictGates": [],
|
|
103
226
|
"review": {
|
|
104
227
|
"status": "pass",
|
|
105
228
|
"summary": "",
|
|
106
229
|
"details": "",
|
|
230
|
+
"freshness": { "status": "fresh", "reviewedCommit": "example-head", "currentCommit": "example-head", "commitsSinceReview": 0, "staleReason": "" },
|
|
231
|
+
"qualityScore": 9,
|
|
232
|
+
"specialistReviews": [],
|
|
107
233
|
"taskReviews": { "status": "pass", "required": true, "summary": "", "reviewers": [], "findings": [] },
|
|
108
234
|
"diffReview": { "status": "skipped", "required": false, "summary": "", "reviewers": [], "findings": [] },
|
|
109
235
|
"findings": []
|
|
@@ -151,3 +277,4 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
151
277
|
4. `review.status` 是真实现实,还是我脑补的绿色?
|
|
152
278
|
5. 如果把这份 `report-card.json` 给下一位接手者,他知道接下来去哪吗?
|
|
153
279
|
6. diff review 是否同时覆盖了 plan completion、scope drift、critical pass、doc staleness?
|
|
280
|
+
7. feedback loop 是否真的证明了用户描述的行为,而不是只证明附近代码能跑?
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: cc-check
|
|
3
|
-
version: 1.
|
|
3
|
+
version: 1.10.0
|
|
4
4
|
description: Use when a planned or investigated change needs fresh verification evidence, layered gate proof, review truth, and an honest pass fail blocked verdict before entering cc-act.
|
|
5
5
|
triggers:
|
|
6
6
|
- 验收这个需求
|
|
@@ -25,7 +25,7 @@ entry_gate:
|
|
|
25
25
|
- Re-run fresh commands instead of inheriting cc-do narration.
|
|
26
26
|
- If evidence is stale or missing, reset context and rebuild the verdict from canonical artifacts.
|
|
27
27
|
exit_criteria:
|
|
28
|
-
- review/report-card.json records pass, fail, or blocked using fresh evidence, plus spec alignment and sync readiness.
|
|
28
|
+
- review/report-card.json records pass, fail, or blocked using fresh evidence, review freshness, claim evidence, QA coverage and browser evidence, failure ownership, plus spec alignment and sync readiness.
|
|
29
29
|
- Task-level review and requirement-level diff review are separated clearly.
|
|
30
30
|
- 'The next step is unambiguous: cc-act, cc-do, cc-investigate, or cc-plan.'
|
|
31
31
|
reroutes:
|
|
@@ -119,6 +119,9 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
119
119
|
- runtime gate
|
|
120
120
|
- task-level review proof
|
|
121
121
|
- requirement-level diff review
|
|
122
|
+
- claim evidence matrix
|
|
123
|
+
- QA feedback loop and behavior evidence
|
|
124
|
+
- QA regression / test-quality proof
|
|
122
125
|
- spec alignment / sync readiness
|
|
123
126
|
4. **Freeze Verdict**
|
|
124
127
|
- 只允许 `pass` / `fail` / `blocked`
|
|
@@ -131,12 +134,12 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
131
134
|
|
|
132
135
|
- Allowed actions: rerun gates, inspect review proof, record a verdict, and route the requirement honestly.
|
|
133
136
|
- Forbidden actions: continuing development, inheriting old execution claims without fresh proof, or masking blocked work as pass.
|
|
134
|
-
- Required evidence: every passing statement must cite fresh command output, exit status, and
|
|
137
|
+
- Required evidence: every passing statement must cite fresh command output, exit status, key observation, and the claim it proves.
|
|
135
138
|
- Reroute rule: code and review fixes return to `cc-do`; root-cause drift returns to `cc-investigate`; scope or design invalidation returns to `cc-plan`.
|
|
136
139
|
|
|
137
140
|
## Verification Layers
|
|
138
141
|
|
|
139
|
-
`cc-check` 不是只看“测试是不是绿的”,而是至少看
|
|
142
|
+
`cc-check` 不是只看“测试是不是绿的”,而是至少看 10 层:
|
|
140
143
|
|
|
141
144
|
1. **Runtime Layer**
|
|
142
145
|
- 测试、lint、typecheck、build、脚本 gate
|
|
@@ -147,9 +150,79 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
147
150
|
- 当前改动是否真的兑现 requirement,而不是只让局部测试通过
|
|
148
151
|
4. **Spec Sync Layer**
|
|
149
152
|
- capability truth、expected spec delta、handoff readiness 是否仍然一致
|
|
153
|
+
5. **Claim Evidence Layer**
|
|
154
|
+
- 测试通过、build 成功、bug 修复、需求完成、agent 完成等声明,是否各自有对应证据
|
|
155
|
+
6. **QA Test Layer**
|
|
156
|
+
- 回归测试是否有 red/green 证据
|
|
157
|
+
- 测试是否验证真实行为,而不是 mock 或 test-only production API
|
|
158
|
+
- 反馈环是否能稳定复现或证明用户描述的行为
|
|
159
|
+
7. **Review Freshness Layer**
|
|
160
|
+
- review 是否绑定当前 `headSha`
|
|
161
|
+
- 从 review 到当前 HEAD 是否还有新增 commit
|
|
162
|
+
- 质量分、置信度、finding 去噪是否可复盘
|
|
163
|
+
8. **QA Coverage / Browser Layer**
|
|
164
|
+
- 行为链路、错误态、边界条件是否被测试映射覆盖
|
|
165
|
+
- UI / 用户路径变更是否有浏览器证据、截图、console 结果或明确 skip 理由
|
|
166
|
+
9. **Failure Ownership Layer**
|
|
167
|
+
- 失败是本分支引入、基线已存在、环境阻塞,还是归属不明
|
|
168
|
+
- 归属不明默认不能支撑 `pass`
|
|
169
|
+
10. **Behavior Contract Layer**
|
|
170
|
+
- expected / actual / reproduction steps 是否用用户和领域语言写清
|
|
171
|
+
- follow-up 是否是行为契约,而不是易腐烂的文件行号 TODO
|
|
150
172
|
|
|
151
173
|
任何一层失真,都不能写 `pass`。
|
|
152
174
|
|
|
175
|
+
## Claim Evidence Matrix
|
|
176
|
+
|
|
177
|
+
不要把所有绿色都写成“测试过了”。`cc-check` 必须把声明拆成证据:
|
|
178
|
+
|
|
179
|
+
| Claim | Required proof | Not enough |
|
|
180
|
+
| --- | --- | --- |
|
|
181
|
+
| Tests pass | 本轮 test command、exit 0、0 failures | 旧输出、局部日志、应该会过 |
|
|
182
|
+
| Lint clean | 本轮 lint command、0 errors | 只跑 formatter、只看 touched file 且声明全仓 clean |
|
|
183
|
+
| Build succeeds | build command exit 0 | test / lint 通过 |
|
|
184
|
+
| Bug fixed | 原始症状或回归测试通过 | 代码改了、推测已修 |
|
|
185
|
+
| Regression test works | red -> green 证据 | 测试只绿过一次 |
|
|
186
|
+
| Agent completed | VCS diff / artifact 证明实际变化 | agent 自报 success |
|
|
187
|
+
| Requirements met | 逐项 plan / manifest checklist | 测试通过 |
|
|
188
|
+
|
|
189
|
+
这些事实写入 `claimEvidence[]`。缺少关键 claim 的证据时,结论至少是 `blocked`。
|
|
190
|
+
|
|
191
|
+
## QA Test Review
|
|
192
|
+
|
|
193
|
+
`cc-check` 必须区分“有测试”和“测试证明了正确行为”:
|
|
194
|
+
|
|
195
|
+
1. 先建立反馈环,再谈修复:failing test、curl / HTTP、CLI fixture、headless browser、trace replay、throwaway harness、bisect / differential loop 都可以,但必须说明速度、确定性、信号锋利度和复现率。
|
|
196
|
+
2. 回归测试必须记录 red/green 证据;red 要因为目标行为缺失而失败,不是语法、fixture 或 mock 写错。
|
|
197
|
+
3. 测试应从公共接口验证真实行为;不准为了方便直接测私有实现。
|
|
198
|
+
4. mock 只允许站在系统边界:外部 API、数据库、时间、随机数、文件系统、网络。mock 自家模块、断言内部调用次数或顺序,默认是 review finding。
|
|
199
|
+
5. 生产代码里新增仅测试使用的 API,默认是坏味道,必须 blocking,除非有明确生产生命周期理由。
|
|
200
|
+
6. 复杂 mock setup 超过测试主体时,优先要求 integration / contract test 解释。
|
|
201
|
+
7. test fixture 必须诚实表达 contract:partial fixture、generated stub、`as` / `any` / 双重 cast、缺字段 mock payload 都要说明真实字段与填充字段;如果这些技巧让测试绕过公共 seam 或隐藏错误输入,默认是 review finding。
|
|
202
|
+
8. 如果没有正确测试 seam,不要硬造脆弱测试;记录 `qa.architectureFollowUps`,说明缺失 seam / hidden coupling / shallow module,并按严重度决定 reroute 或 follow-up。
|
|
203
|
+
|
|
204
|
+
这些事实写入 `qa.regressionProof` 和 `qa.testQuality`。如果本需求没有行为测试空间,必须记录 `tddException` 或替代验证命令。
|
|
205
|
+
|
|
206
|
+
## QA Behavior Evidence
|
|
207
|
+
|
|
208
|
+
用户可见行为、bugfix、regression、工作流、CLI 行为和 API 行为都必须留下行为证据:
|
|
209
|
+
|
|
210
|
+
1. `qa.feedbackLoop` 记录本轮用什么 loop 证明现实,包含 `status`、`mode`、`commandOrArtifact`、`speed`、`determinism`、`signalSharpness`、`reproductionRate`、`attempts`、`blockedReason`。
|
|
211
|
+
2. `qa.behaviorEvidence` 记录 `userFacingBoundary`、`expectedBehavior`、`actualBehavior`、`reproductionSteps`、`consistency`、`domainLanguage`、`status`。
|
|
212
|
+
3. bugfix 不能只写“代码改了”;必须证明用户描述的原始症状已经被同一条或更可信的反馈环覆盖。
|
|
213
|
+
4. 不能复现时,verdict 默认 `blocked` 或回 `cc-investigate`,并写清尝试过哪些 loop、还缺什么 artifact / 权限 / 输入。
|
|
214
|
+
5. QA issue / follow-up 必须用行为和验收条件表达,不写易失效的文件路径或行号,除非它是当前 review finding 的证据位置。
|
|
215
|
+
|
|
216
|
+
## QA Coverage And Browser Evidence
|
|
217
|
+
|
|
218
|
+
测试不是数量游戏。`cc-check` 必须判断测试覆盖了哪条真实路径:
|
|
219
|
+
|
|
220
|
+
1. `qa.coverageAudit` 记录 `coveragePct`、`pathMap`、`gaps`、`testsAdded`、`e2eRequired`、`evalRequired`、`qualityStars`。
|
|
221
|
+
2. UI、路由、端到端用户路径、可视状态、交互状态变化时,必须记录 `qa.browserEvidence`。
|
|
222
|
+
3. `qa.browserEvidence` 至少说明 `mode`、`affectedRoutes`、`screenshots`、`consoleErrors`、`healthScore`、`issues`、`skipReason`。
|
|
223
|
+
4. 前端变更没有浏览器证据也没有 skip reason,不能写 `pass`。
|
|
224
|
+
5. 非前端或纯内部变更可以把 `browserEvidence.status` 写成 `skipped`,但必须说明为什么不需要浏览器 QA。
|
|
225
|
+
|
|
153
226
|
## Diff Review Pipeline
|
|
154
227
|
|
|
155
228
|
`cc-check` 的 requirement-level review 不能只写“diff 看过了”。至少要形成这些事实:
|
|
@@ -161,9 +234,56 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
161
234
|
5. Outside-diff lookup:新增枚举值、状态、路由、artifact 类型时,必须搜索 sibling references,不能只读 diff 内文件。
|
|
162
235
|
6. Documentation staleness:代码行为、入口、命令、结构变化时,检查 README / CLAUDE / architecture docs 是否漂移。
|
|
163
236
|
7. Adversarial synthesis:如果有 codex review、subagent review、人工 review,多视角 finding 要去重并标出高置信重叠项。
|
|
237
|
+
8. Specialist facets:按实际风险记录 `testing`、`security`、`performance`、`api-contract`、`data-migration`、`design` 等 review facet;没有派发也要写 skip reason,避免 reviewer 误以为已经覆盖。
|
|
238
|
+
9. Confidence calibration:每条 finding 必须有可比较的置信度和指纹,低置信 finding 不准伪装成 blocker。
|
|
164
239
|
|
|
165
240
|
这些事实写入 `review.diffReview.details` 或 `review.findings`。`pass` 只在 scope、completion、critical pass、doc staleness 都没有 blocking finding 时成立。
|
|
166
241
|
|
|
242
|
+
## Review Packet And Triage
|
|
243
|
+
|
|
244
|
+
每次 task-level 或 requirement-level review 都必须能脱离聊天记录复盘:
|
|
245
|
+
|
|
246
|
+
1. `reviewPacket.baseSha`
|
|
247
|
+
2. `reviewPacket.headSha`
|
|
248
|
+
3. `reviewPacket.requirements`
|
|
249
|
+
4. `reviewPacket.implemented`
|
|
250
|
+
5. `reviewPacket.reviewerContext`
|
|
251
|
+
|
|
252
|
+
每次 review 还必须记录 freshness:
|
|
253
|
+
|
|
254
|
+
1. `review.freshness.status`:`fresh` / `stale` / `unknown` / `not-applicable`
|
|
255
|
+
2. `review.freshness.reviewedCommit`
|
|
256
|
+
3. `review.freshness.currentCommit`
|
|
257
|
+
4. `review.freshness.commitsSinceReview`
|
|
258
|
+
5. `review.freshness.staleReason`
|
|
259
|
+
6. `review.qualityScore`:0-10,缺失时不能当成高置信审查
|
|
260
|
+
|
|
261
|
+
每条 finding 必须有 triage:
|
|
262
|
+
|
|
263
|
+
- `accepted-fixed`:已修并有验证
|
|
264
|
+
- `rejected-with-evidence`:经代码 / 测试证明不适用
|
|
265
|
+
- `deferred-minor`:非阻塞,已写入 follow-up
|
|
266
|
+
- `clarification-needed`:不清楚,当前 verdict 不能是 `pass`
|
|
267
|
+
|
|
268
|
+
`critical` / `important` finding 未 triage 或未闭环,不能进入 `cc-act`。
|
|
269
|
+
|
|
270
|
+
每条 finding 还必须带去噪字段:
|
|
271
|
+
|
|
272
|
+
- `confidenceScore`:1-10,低于 7 的 finding 只能作为 warning 或待验证 gap
|
|
273
|
+
- `fingerprint`:稳定去重键,避免多路 review 重复报同一件事
|
|
274
|
+
- `displayTier`:`blocking` / `warning` / `info` / `suppressed`
|
|
275
|
+
- `suppressionReason`:只有 `displayTier=suppressed` 时允许非空
|
|
276
|
+
|
|
277
|
+
## Failure Ownership
|
|
278
|
+
|
|
279
|
+
失败不能只写“测试红了”。`cc-check` 必须把失败归属写入 `runtime.failureOwnership[]`:
|
|
280
|
+
|
|
281
|
+
1. `classification` 只能是 `in-branch`、`pre-existing`、`environment`、`ambiguous`。
|
|
282
|
+
2. `ambiguous` 默认按 `in-branch` 处理,除非有 base branch 复验证据。
|
|
283
|
+
3. `pre-existing` 必须有 base branch 或历史证据,不能靠猜。
|
|
284
|
+
4. `environment` 必须记录缺失依赖、权限、服务、密钥或平台约束。
|
|
285
|
+
5. `pass` 不能带未解释的 `in-branch` 或 `ambiguous` 失败。
|
|
286
|
+
|
|
167
287
|
## Entry Gate
|
|
168
288
|
|
|
169
289
|
1. 先读 `planning/design.md` 或 `planning/analysis.md`,再读 `planning/tasks.md`、`planning/task-manifest.json`。
|
|
@@ -181,6 +301,7 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
181
301
|
- 运行真实命令
|
|
182
302
|
- 记录 exit status
|
|
183
303
|
- 识别 failure 还是 blocked
|
|
304
|
+
- 记录 failure ownership,而不是把所有红灯混成一个失败摘要
|
|
184
305
|
3. **Compare against the contract**
|
|
185
306
|
- 对照 `planning/design.md` 或 `planning/analysis.md`
|
|
186
307
|
- 对照 `planning/tasks.md`、`planning/task-manifest.json`
|
|
@@ -265,9 +386,12 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
265
386
|
|
|
266
387
|
1. severity:`critical` / `important` / `info`
|
|
267
388
|
2. confidence:`high` / `medium` / `low`,低置信不要伪装成 blocker
|
|
389
|
+
- 同时写 `confidenceScore`,用 1-10 数字表达可比较置信度
|
|
268
390
|
3. source:`runtime` / `task-review` / `diff-review` / `adversarial` / `docs`
|
|
269
391
|
4. evidence:文件、命令、退出码、manifest path、或具体观察
|
|
270
392
|
5. action:`fix-now` / `reroute-cc-do` / `reroute-cc-plan` / `reroute-cc-investigate` / `document-follow-up`
|
|
393
|
+
6. fingerprint:稳定去重键
|
|
394
|
+
7. displayTier:`blocking` / `warning` / `info` / `suppressed`
|
|
271
395
|
|
|
272
396
|
不能写“可能有问题”然后让接手者猜。要么证明,要么标成待验证 gap。
|
|
273
397
|
|
|
@@ -281,15 +405,31 @@ NO PASS WITHOUT FRESH EVIDENCE
|
|
|
281
405
|
"verdict": "pass",
|
|
282
406
|
"overall": "pass",
|
|
283
407
|
"summary": "verdict=pass quick=3/3 strict=0/0 review=pass",
|
|
408
|
+
"claimEvidence": [
|
|
409
|
+
{ "claim": "tests-pass", "requiredProof": "fresh test command", "commandOrArtifact": "npm test", "exitStatus": 0, "keyObservation": "0 failures", "status": "pass" },
|
|
410
|
+
{ "claim": "requirements-met", "requiredProof": "plan checklist", "commandOrArtifact": "planning/tasks.md", "exitStatus": null, "keyObservation": "all tasks complete", "status": "pass" }
|
|
411
|
+
],
|
|
412
|
+
"runtime": { "status": "pass", "failureOwnership": [] },
|
|
413
|
+
"qa": {
|
|
414
|
+
"status": "pass",
|
|
415
|
+
"regressionProof": [],
|
|
416
|
+
"testQuality": [],
|
|
417
|
+
"coverageAudit": { "status": "pass", "coveragePct": 80, "pathMap": [], "gaps": [], "testsAdded": [], "e2eRequired": false, "evalRequired": false, "qualityStars": "★★" },
|
|
418
|
+
"browserEvidence": { "status": "skipped", "mode": "not-applicable", "affectedRoutes": [], "screenshots": [], "consoleErrors": [], "healthScore": null, "issues": [], "skipReason": "not a UI or user-path change" },
|
|
419
|
+
"tddException": null
|
|
420
|
+
},
|
|
284
421
|
"quickGates": [],
|
|
285
422
|
"strictGates": [],
|
|
286
423
|
"review": {
|
|
287
424
|
"status": "pass",
|
|
288
425
|
"summary": "Task review and diff review both passed",
|
|
289
426
|
"details": "",
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
427
|
+
"freshness": { "status": "fresh", "reviewedCommit": "example-head", "currentCommit": "example-head", "commitsSinceReview": 0, "staleReason": "" },
|
|
428
|
+
"qualityScore": 9,
|
|
429
|
+
"specialistReviews": [],
|
|
430
|
+
"taskReviews": { "status": "pass", "required": true, "summary": "all completed tasks carry spec/code proof", "reviewPacket": {}, "reviewers": [], "findings": [] },
|
|
431
|
+
"diffReview": { "status": "pass", "required": true, "summary": "plan completion clean, no scope drift, no critical diff findings", "reviewPacket": {}, "reviewers": [], "findings": [] },
|
|
432
|
+
"findings": []
|
|
293
433
|
},
|
|
294
434
|
"blockingFindings": [],
|
|
295
435
|
"reroute": "none",
|
|
@@ -9,6 +9,123 @@
|
|
|
9
9
|
"specAlignment": "blocked",
|
|
10
10
|
"specDeltaVerified": false,
|
|
11
11
|
"specSyncReady": false,
|
|
12
|
+
"runtime": {
|
|
13
|
+
"status": "blocked",
|
|
14
|
+
"failureOwnership": [
|
|
15
|
+
{
|
|
16
|
+
"failure": "missing spec review proof",
|
|
17
|
+
"classification": "in-branch",
|
|
18
|
+
"touchedByDiff": true,
|
|
19
|
+
"evidence": "planning/task-manifest.json tasks[T002].reviews.spec is empty",
|
|
20
|
+
"action": "reroute-cc-do",
|
|
21
|
+
"status": "open"
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"claimEvidence": [
|
|
26
|
+
{
|
|
27
|
+
"claim": "tests-pass",
|
|
28
|
+
"requiredProof": "fresh test command with exit 0 and 0 failures",
|
|
29
|
+
"commandOrArtifact": "npm test -- src/feature/feature.test.ts",
|
|
30
|
+
"exitStatus": 0,
|
|
31
|
+
"keyObservation": "targeted tests passed in this run",
|
|
32
|
+
"status": "pass"
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
"claim": "requirements-met",
|
|
36
|
+
"requiredProof": "line-by-line planning/tasks.md and task-manifest.json checklist",
|
|
37
|
+
"commandOrArtifact": "planning/tasks.md + planning/task-manifest.json",
|
|
38
|
+
"exitStatus": null,
|
|
39
|
+
"keyObservation": "T002 still lacks spec review proof",
|
|
40
|
+
"status": "blocked"
|
|
41
|
+
}
|
|
42
|
+
],
|
|
43
|
+
"qa": {
|
|
44
|
+
"status": "blocked",
|
|
45
|
+
"feedbackLoop": {
|
|
46
|
+
"status": "blocked",
|
|
47
|
+
"mode": "targeted-test",
|
|
48
|
+
"commandOrArtifact": "npm test -- src/feature/feature.test.ts",
|
|
49
|
+
"speed": "fast",
|
|
50
|
+
"determinism": "high",
|
|
51
|
+
"signalSharpness": "targeted failure would indicate the user-visible behavior regressed",
|
|
52
|
+
"reproductionRate": "not recorded",
|
|
53
|
+
"attempts": [
|
|
54
|
+
"targeted unit test"
|
|
55
|
+
],
|
|
56
|
+
"blockedReason": "red/green reproduction is not recorded yet"
|
|
57
|
+
},
|
|
58
|
+
"behaviorEvidence": {
|
|
59
|
+
"status": "blocked",
|
|
60
|
+
"userFacingBoundary": "feature behavior",
|
|
61
|
+
"expectedBehavior": "target behavior succeeds for the user",
|
|
62
|
+
"actualBehavior": "not proven yet",
|
|
63
|
+
"reproductionSteps": [
|
|
64
|
+
"run the targeted verification command"
|
|
65
|
+
],
|
|
66
|
+
"consistency": "not recorded",
|
|
67
|
+
"domainLanguage": [
|
|
68
|
+
"feature behavior"
|
|
69
|
+
]
|
|
70
|
+
},
|
|
71
|
+
"regressionProof": [
|
|
72
|
+
{
|
|
73
|
+
"behavior": "original symptom",
|
|
74
|
+
"redCommand": "",
|
|
75
|
+
"redFailure": "",
|
|
76
|
+
"greenCommand": "",
|
|
77
|
+
"greenObservation": "",
|
|
78
|
+
"restoredState": false
|
|
79
|
+
}
|
|
80
|
+
],
|
|
81
|
+
"testQuality": [
|
|
82
|
+
{
|
|
83
|
+
"area": "targeted-tests",
|
|
84
|
+
"checksRealBehavior": true,
|
|
85
|
+
"mockBoundary": "none",
|
|
86
|
+
"testOnlyProductionApi": false,
|
|
87
|
+
"status": "pass"
|
|
88
|
+
}
|
|
89
|
+
],
|
|
90
|
+
"coverageAudit": {
|
|
91
|
+
"status": "blocked",
|
|
92
|
+
"coveragePct": null,
|
|
93
|
+
"pathMap": ["planning/tasks.md#T002"],
|
|
94
|
+
"gaps": ["T002 has no spec review proof, so the requirement cannot be marked covered"],
|
|
95
|
+
"testsAdded": [],
|
|
96
|
+
"e2eRequired": false,
|
|
97
|
+
"evalRequired": false,
|
|
98
|
+
"qualityStars": "★"
|
|
99
|
+
},
|
|
100
|
+
"browserEvidence": {
|
|
101
|
+
"status": "skipped",
|
|
102
|
+
"mode": "not-applicable",
|
|
103
|
+
"affectedRoutes": [],
|
|
104
|
+
"screenshots": [],
|
|
105
|
+
"consoleErrors": [],
|
|
106
|
+
"healthScore": null,
|
|
107
|
+
"issues": [],
|
|
108
|
+
"skipReason": "template example is not a UI browser QA scenario"
|
|
109
|
+
},
|
|
110
|
+
"architectureFollowUps": [
|
|
111
|
+
{
|
|
112
|
+
"summary": "Add the missing public test seam before widening coverage",
|
|
113
|
+
"currentBehavior": "review proof depends on task metadata instead of a direct behavior seam",
|
|
114
|
+
"desiredBehavior": "tests verify the behavior through a stable public interface",
|
|
115
|
+
"keyInterfaces": [
|
|
116
|
+
"feature public API"
|
|
117
|
+
],
|
|
118
|
+
"acceptanceCriteria": [
|
|
119
|
+
"target behavior can be reproduced without private implementation hooks"
|
|
120
|
+
],
|
|
121
|
+
"outOfScope": [
|
|
122
|
+
"rewriting unrelated modules"
|
|
123
|
+
],
|
|
124
|
+
"status": "deferred-minor"
|
|
125
|
+
}
|
|
126
|
+
],
|
|
127
|
+
"tddException": null
|
|
128
|
+
},
|
|
12
129
|
"quickGates": [
|
|
13
130
|
{
|
|
14
131
|
"name": "targeted-tests",
|
|
@@ -28,17 +145,63 @@
|
|
|
28
145
|
"status": "blocked",
|
|
29
146
|
"summary": "Task review evidence is incomplete",
|
|
30
147
|
"details": "T002 is implemented, but the requirement still lacks spec review proof required by the gate.",
|
|
148
|
+
"freshness": {
|
|
149
|
+
"status": "unknown",
|
|
150
|
+
"reviewedCommit": "",
|
|
151
|
+
"currentCommit": "",
|
|
152
|
+
"commitsSinceReview": null,
|
|
153
|
+
"staleReason": "review range is not recorded yet"
|
|
154
|
+
},
|
|
155
|
+
"qualityScore": null,
|
|
156
|
+
"specialistReviews": [
|
|
157
|
+
{
|
|
158
|
+
"name": "testing",
|
|
159
|
+
"status": "blocked",
|
|
160
|
+
"required": true,
|
|
161
|
+
"summary": "testing facet cannot pass while task review proof is missing",
|
|
162
|
+
"skipReason": "",
|
|
163
|
+
"findings": []
|
|
164
|
+
}
|
|
165
|
+
],
|
|
31
166
|
"taskReviews": {
|
|
32
167
|
"status": "blocked",
|
|
33
168
|
"required": true,
|
|
34
169
|
"summary": "T002 has no spec review record yet",
|
|
170
|
+
"reviewPacket": {
|
|
171
|
+
"baseSha": "",
|
|
172
|
+
"headSha": "",
|
|
173
|
+
"requirements": "planning/tasks.md#T002",
|
|
174
|
+
"implemented": "implementation report for T002",
|
|
175
|
+
"reviewerContext": "task spec and changed files"
|
|
176
|
+
},
|
|
35
177
|
"reviewers": [],
|
|
36
|
-
"findings": [
|
|
178
|
+
"findings": [
|
|
179
|
+
{
|
|
180
|
+
"severity": "important",
|
|
181
|
+
"confidence": "high",
|
|
182
|
+
"source": "task-review",
|
|
183
|
+
"summary": "T002 spec review proof is missing",
|
|
184
|
+
"evidence": "planning/task-manifest.json tasks[T002].reviews.spec is empty",
|
|
185
|
+
"action": "reroute-cc-do",
|
|
186
|
+
"triageStatus": "clarification-needed",
|
|
187
|
+
"confidenceScore": 9,
|
|
188
|
+
"fingerprint": "task-review:T002:missing-spec-review",
|
|
189
|
+
"displayTier": "blocking",
|
|
190
|
+
"suppressionReason": null
|
|
191
|
+
}
|
|
192
|
+
]
|
|
37
193
|
},
|
|
38
194
|
"diffReview": {
|
|
39
195
|
"status": "skipped",
|
|
40
196
|
"required": false,
|
|
41
197
|
"summary": "",
|
|
198
|
+
"reviewPacket": {
|
|
199
|
+
"baseSha": "",
|
|
200
|
+
"headSha": "",
|
|
201
|
+
"requirements": "planning/design.md",
|
|
202
|
+
"implemented": "",
|
|
203
|
+
"reviewerContext": ""
|
|
204
|
+
},
|
|
42
205
|
"reviewers": [],
|
|
43
206
|
"findings": []
|
|
44
207
|
},
|
|
@@ -16,6 +16,17 @@
|
|
|
16
16
|
|
|
17
17
|
所有通过结论都必须来自本次新鲜证据;旧输出只能当线索,不能直接继承 verdict。
|
|
18
18
|
|
|
19
|
+
## QA Feedback Loop
|
|
20
|
+
|
|
21
|
+
行为变更和 bugfix 的 evidence 还必须说明反馈环:
|
|
22
|
+
|
|
23
|
+
- `mode`:failing test、curl / HTTP、CLI fixture、browser、trace replay、bisect、differential loop 等
|
|
24
|
+
- `determinism`:反馈是否稳定,flaky 时复现率是多少
|
|
25
|
+
- `signalSharpness`:失败是否指向目标行为,而不是语法、fixture 或 mock 问题
|
|
26
|
+
- `blockedReason`:无法建立 loop 时缺少什么 artifact、权限、服务或输入
|
|
27
|
+
|
|
28
|
+
没有可信 loop 的 bugfix 默认不能 `pass`。
|
|
29
|
+
|
|
19
30
|
## Reroute
|
|
20
31
|
|
|
21
32
|
- `none`
|