cc-devflow 4.5.2 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/.claude/skills/cc-act/CHANGELOG.md +19 -0
  2. package/.claude/skills/cc-act/PLAYBOOK.md +14 -1
  3. package/.claude/skills/cc-act/SKILL.md +46 -6
  4. package/.claude/skills/cc-act/assets/PR_BRIEF_TEMPLATE.md +44 -1
  5. package/.claude/skills/cc-act/assets/RELEASE_NOTE_TEMPLATE.md +18 -1
  6. package/.claude/skills/cc-act/references/closure-contract.md +3 -0
  7. package/.claude/skills/cc-act/scripts/cc-act-common.sh +27 -1
  8. package/.claude/skills/cc-act/scripts/render-pr-brief.sh +31 -0
  9. package/.claude/skills/cc-act/scripts/verify-act-gate.sh +6 -0
  10. package/.claude/skills/cc-check/CHANGELOG.md +18 -0
  11. package/.claude/skills/cc-check/PLAYBOOK.md +38 -7
  12. package/.claude/skills/cc-check/SKILL.md +39 -7
  13. package/.claude/skills/cc-check/assets/REPORT_CARD_TEMPLATE.json +61 -0
  14. package/.claude/skills/cc-check/references/gate-contract.md +11 -0
  15. package/.claude/skills/cc-check/references/review-contract.md +17 -1
  16. package/.claude/skills/cc-check/scripts/render-report-card.js +37 -0
  17. package/.claude/skills/cc-check/scripts/verify-gate.sh +7 -0
  18. package/.claude/skills/cc-do/CHANGELOG.md +18 -0
  19. package/.claude/skills/cc-do/PLAYBOOK.md +20 -13
  20. package/.claude/skills/cc-do/SKILL.md +37 -17
  21. package/.claude/skills/cc-do/references/execution-recovery.md +19 -5
  22. package/.claude/skills/cc-do/references/parallel-dispatch.md +6 -4
  23. package/.claude/skills/cc-do/scripts/detect-file-conflicts.sh +49 -3
  24. package/.claude/skills/cc-do/scripts/verify-task-gates.sh +19 -6
  25. package/.claude/skills/cc-do/scripts/write-task-checkpoint.sh +14 -2
  26. package/.claude/skills/cc-investigate/CHANGELOG.md +24 -0
  27. package/.claude/skills/cc-investigate/PLAYBOOK.md +35 -13
  28. package/.claude/skills/cc-investigate/SKILL.md +87 -20
  29. package/.claude/skills/cc-investigate/assets/ANALYSIS_TEMPLATE.md +68 -3
  30. package/.claude/skills/cc-investigate/assets/TASKS_TEMPLATE.md +9 -4
  31. package/.claude/skills/cc-investigate/assets/TASK_MANIFEST_TEMPLATE.json +41 -2
  32. package/.claude/skills/cc-investigate/references/investigation-contract.md +46 -0
  33. package/.claude/skills/cc-plan/CHANGELOG.md +32 -0
  34. package/.claude/skills/cc-plan/PLAYBOOK.md +26 -8
  35. package/.claude/skills/cc-plan/SKILL.md +79 -34
  36. package/.claude/skills/cc-plan/assets/DESIGN_TEMPLATE.md +71 -3
  37. package/.claude/skills/cc-plan/assets/TASKS_TEMPLATE.md +32 -0
  38. package/.claude/skills/cc-plan/assets/TASK_MANIFEST_TEMPLATE.json +76 -2
  39. package/.claude/skills/cc-plan/assets/TINY_DESIGN_TEMPLATE.md +58 -0
  40. package/.claude/skills/cc-plan/references/planning-contract.md +26 -4
  41. package/.claude/skills/cc-roadmap/CHANGELOG.md +14 -0
  42. package/.claude/skills/cc-roadmap/PLAYBOOK.md +10 -7
  43. package/.claude/skills/cc-roadmap/SKILL.md +43 -23
  44. package/.claude/skills/cc-roadmap/assets/BACKLOG_TEMPLATE.md +10 -0
  45. package/.claude/skills/cc-roadmap/assets/ROADMAP_TEMPLATE.md +15 -0
  46. package/.claude/skills/cc-roadmap/assets/TRACKING_TEMPLATE.json +1 -1
  47. package/.claude/skills/cc-roadmap/references/roadmap-dialogue.md +11 -7
  48. package/.claude/skills/cc-simplify/CHANGELOG.md +6 -0
  49. package/.claude/skills/cc-simplify/SKILL.md +10 -1
  50. package/.claude/skills/cc-spec-init/CHANGELOG.md +6 -0
  51. package/.claude/skills/cc-spec-init/SKILL.md +14 -1
  52. package/CHANGELOG.md +29 -0
  53. package/README.md +10 -2
  54. package/README.zh-CN.md +10 -2
  55. package/bin/cc-devflow-cli.js +93 -2
  56. package/docs/examples/example-bindings.json +7 -7
  57. package/docs/examples/full-design-blocked/BACKLOG.md +1 -1
  58. package/docs/examples/full-design-blocked/README.md +1 -1
  59. package/docs/examples/full-design-blocked/ROADMAP.md +1 -1
  60. package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/design.md +1 -1
  61. package/docs/examples/full-design-blocked/changes/REQ-002-bulk-invite-import/planning/tasks.md +1 -1
  62. package/docs/examples/full-design-blocked/roadmap-tracking.json +1 -1
  63. package/docs/examples/local-handoff/BACKLOG.md +1 -1
  64. package/docs/examples/local-handoff/README.md +1 -1
  65. package/docs/examples/local-handoff/ROADMAP.md +1 -1
  66. package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/design.md +1 -1
  67. package/docs/examples/local-handoff/changes/REQ-003-audit-log-export/planning/tasks.md +1 -1
  68. package/docs/examples/local-handoff/roadmap-tracking.json +1 -1
  69. package/docs/examples/pdca-loop/BACKLOG.md +1 -1
  70. package/docs/examples/pdca-loop/README.md +1 -1
  71. package/docs/examples/pdca-loop/ROADMAP.md +1 -1
  72. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/design.md +1 -1
  73. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/task-manifest.json +2 -2
  74. package/docs/examples/pdca-loop/changes/REQ-001-copy-invite-link/planning/tasks.md +1 -1
  75. package/docs/examples/pdca-loop/roadmap-tracking.json +1 -1
  76. package/docs/get-shit-done-strategy-audit.md +518 -0
  77. package/docs/skill-strategy-audit.md +48 -0
  78. package/lib/compiler/__tests__/inventory.test.js +51 -0
  79. package/lib/compiler/inventory.js +78 -0
  80. package/lib/skill-runtime/__tests__/approve.test.js +92 -0
  81. package/lib/skill-runtime/__tests__/autopilot.test.js +4 -0
  82. package/lib/skill-runtime/__tests__/planner.tdd.test.js +20 -0
  83. package/lib/skill-runtime/__tests__/query.test.js +147 -1
  84. package/lib/skill-runtime/__tests__/readiness.test.js +53 -0
  85. package/lib/skill-runtime/__tests__/release.test.js +85 -0
  86. package/lib/skill-runtime/__tests__/runtime.integration.test.js +30 -1
  87. package/lib/skill-runtime/__tests__/schemas.test.js +56 -0
  88. package/lib/skill-runtime/__tests__/worker-run.test.js +29 -0
  89. package/lib/skill-runtime/errors.js +39 -0
  90. package/lib/skill-runtime/index.js +8 -0
  91. package/lib/skill-runtime/operations/approve.js +17 -2
  92. package/lib/skill-runtime/operations/release.js +6 -3
  93. package/lib/skill-runtime/operations/worker-run.js +30 -0
  94. package/lib/skill-runtime/planner.js +10 -2
  95. package/lib/skill-runtime/query-registry.js +101 -0
  96. package/lib/skill-runtime/query.js +159 -91
  97. package/lib/skill-runtime/readiness.js +84 -0
  98. package/lib/skill-runtime/schemas.js +39 -4
  99. package/lib/skill-runtime/trace.js +22 -0
  100. package/package.json +1 -1
@@ -9,7 +9,7 @@ set -euo pipefail
9
9
  usage() {
10
10
  cat <<'EOF'
11
11
  Usage:
12
- write-task-checkpoint.sh --dir path/to/change --task T001 --status pending|running|passed|failed|skipped --summary "..." [--event context_ready] [--attempt 0] [--session session-id] [--next-action "..."]
12
+ write-task-checkpoint.sh --dir path/to/change --task T001 --status pending|running|passed|failed|skipped --summary "..." [--event context_ready] [--attempt 0] [--session session-id] [--next-action "..."] [--tdd-json '{"red":...}']
13
13
  EOF
14
14
  }
15
15
 
@@ -23,6 +23,7 @@ EVENT_TYPE=""
23
23
  ATTEMPT="0"
24
24
  SESSION_ID=""
25
25
  NEXT_ACTION=""
26
+ TDD_JSON=""
26
27
 
27
28
  while [[ $# -gt 0 ]]; do
28
29
  case "$1" in
@@ -34,6 +35,7 @@ while [[ $# -gt 0 ]]; do
34
35
  --attempt) ATTEMPT="$2"; shift 2 ;;
35
36
  --session) SESSION_ID="$2"; shift 2 ;;
36
37
  --next-action) NEXT_ACTION="$2"; shift 2 ;;
38
+ --tdd-json) TDD_JSON="$2"; shift 2 ;;
37
39
  -h|--help) usage; exit 0 ;;
38
40
  *) echo "Unknown arg: $1" >&2; usage; exit 1 ;;
39
41
  esac
@@ -57,6 +59,15 @@ if [[ -z "$SESSION_ID" ]]; then
57
59
  SESSION_ID="${TASK_ID}-$(date -u +%s)"
58
60
  fi
59
61
 
62
+ tdd_payload="null"
63
+ if [[ -n "$TDD_JSON" ]]; then
64
+ if [[ -f "$TDD_JSON" ]]; then
65
+ tdd_payload="$(jq -c . "$TDD_JSON")"
66
+ else
67
+ tdd_payload="$(printf '%s' "$TDD_JSON" | jq -c .)"
68
+ fi
69
+ fi
70
+
60
71
  jq -nc \
61
72
  --arg changeId "$change_id" \
62
73
  --arg taskId "$TASK_ID" \
@@ -66,6 +77,7 @@ jq -nc \
66
77
  --arg summary "$SUMMARY" \
67
78
  --arg timestamp "$timestamp" \
68
79
  --arg attempt "$ATTEMPT" \
80
+ --argjson tdd "$tdd_payload" \
69
81
  '{
70
82
  changeId: $changeId,
71
83
  taskId: $taskId,
@@ -75,7 +87,7 @@ jq -nc \
75
87
  summary: $summary,
76
88
  timestamp: $timestamp,
77
89
  attempt: ($attempt | tonumber)
78
- }' > "$runtime_task_dir/checkpoint.json"
90
+ } + (if $tdd == null then {} else {tdd: $tdd} end)' > "$runtime_task_dir/checkpoint.json"
79
91
 
80
92
  if [[ -n "$EVENT_TYPE" || "$STATUS" == "failed" ]]; then
81
93
  jq -nc \
@@ -1,5 +1,29 @@
1
1
  # CC-Investigate Skill Changelog
2
2
 
3
+ ## v1.2.1 - 2026-04-29
4
+
5
+ - add persistent debug session fields for active hypothesis, probes, cleanup state, and next evidence action
6
+ - add diagnose-only and workflow-forensics modes so root-cause reports do not masquerade as completed repairs
7
+ - update the analysis template with debug session, workflow forensics, and diagnose-only outcome sections
8
+
9
+ ## v1.2.0 - 2026-04-28
10
+
11
+ - treat feedback loops as investigation products that must be made faster, sharper, and more deterministic before root cause freeze
12
+ - require flaky investigations to raise reproduction rate with stress, repetition, timing-window narrowing, or differential loops instead of guessing from weak signals
13
+ - add prevention handoff so confirmed root causes produce either a regression task, architecture finding, or explicit non-recorded reason
14
+
15
+ ## v1.1.6 - 2026-04-28
16
+
17
+ - clarify that investigation domain language and durable decisions come from cc-devflow native sources: `devflow/specs/`, roadmap/backlog handoff, historical design/analysis, and change metadata
18
+ - remove external context/architecture-decision files from the standard investigation contract so they are not implied as generated artifacts
19
+ - route conflicts through capability specs, roadmap decisions, or historical design decisions instead of external decision-document language
20
+
21
+ ## v1.1.5 - 2026-04-28
22
+
23
+ - add a feedback-loop contract so investigations record loop type, command, symptom match, runtime, determinism, failure rate, signal specificity, and sharpening plan before freezing root cause
24
+ - require ranked candidate hypotheses before narrowing to active falsification targets, plus probe tags for cleanup-safe diagnostic instrumentation
25
+ - add performance-regression, native domain/decision context, correct-test-seam, and evidence-request fields across the analysis, task, manifest, playbook, and investigation contract templates
26
+
3
27
  ## v1.1.4 - 2026-04-28
4
28
 
5
29
  - add boundary-probe, backward-trace, reference-comparison, diagnostic-instrumentation, and condition-wait investigation modes for multi-component, deep-stack, similar-path, and flaky failures
@@ -12,12 +12,16 @@
12
12
  ## Core Rules
13
13
 
14
14
  1. 先复现,再猜原因。
15
- 2. 先看最近变化,再决定是不是 regression
16
- 3. 先证伪假设,再冻结根因。
17
- 4. `planning/analysis.md` 和 `planning/tasks.md` 必须足够让 `cc-do` 脱离当前会话继续工作。
18
- 5. 调查失败三次后先重建入口,不准继续乱补。
19
- 6. 没有 frozen root-cause contract,不准进入 repair task。
20
- 7. 多组件、深层调用、flaky 问题必须先补边界探针、反向追踪或条件等待证据。
15
+ 2. 先把复现做成快、准、可复跑的 feedback loop
16
+ 3. 先确认 loop 复现的是用户报告的同一个失败。
17
+ 4. 先看最近变化,再决定是不是 regression。
18
+ 5. 先证伪假设,再冻结根因。
19
+ 6. `planning/analysis.md` `planning/tasks.md` 必须足够让 `cc-do` 脱离当前会话继续工作。
20
+ 7. 调查失败三次后先重建入口,不准继续乱补。
21
+ 8. 没有 frozen root-cause contract,不准进入 repair task。
22
+ 9. 多组件、深层调用、flaky 问题必须先补边界探针、反向追踪或条件等待证据。
23
+ 10. diagnose-only 只能输出根因、owner、风险和 next action,不能把未修复状态标成完成。
24
+ 11. workflow forensics 先分类 artifact / git / state / tool / permission / process failure,再决定是否进入修复。
21
25
 
22
26
  ## Iron Law
23
27
 
@@ -36,18 +40,21 @@ root-cause contract 至少包含:稳定复现或缩小后的可验证症状、
36
40
  ## Investigation Standard
37
41
 
38
42
  1. 先收集 symptom、expected、actual、repro。
39
- 2. 先查 prior investigationsTODOS/backlogreport-card finding 和最近变更。
40
- 3. 先沿代码路径定位触点和最近变更。
41
- 4. 先做 pattern analysis,再形成 1-3 个可证伪假设。
42
- 5. 每个假设都要写支持证据、反证、证伪方法、预期观察、实际观察。
43
- 6. 只有被证据钉死的根因才能进入 repair contract
44
- 7. repair contract 只讲最小修复边界,不顺手发明新范围。
43
+ 2. 先构造 feedback loop:失败测试、HTTP 脚本、CLI fixture、浏览器脚本、trace replaythrowaway harnessfuzz、bisect、differential,最后才是 HITL。
44
+ 3. 记录 loop 的运行时间、确定性、失败率、症状匹配证据和 sharpen 计划。
45
+ 4. 先查 prior investigations、TODOS/backlog、report-card finding 和最近变更。
46
+ 5. 先沿代码路径定位触点和最近变更。
47
+ 6. 先做 pattern analysis,再列 3-5 个候选假设并收敛到 1-3 个 active hypotheses
48
+ 7. 每个假设都要写支持证据、反证、证伪方法、预期观察、实际观察。
49
+ 8. 只有被证据钉死的根因才能进入 repair contract。
50
+ 9. repair contract 只讲最小修复边界,不顺手发明新范围。
45
51
 
46
52
  ## Investigation Modes
47
53
 
48
54
  | Mode | 什么时候用 | 第一动作 |
49
55
  | --- | --- | --- |
50
56
  | `reproduce-first` | 症状真实但不稳定 | 缩小复现命令 / 手动路径 |
57
+ | `feedback-loop` | 已有复现但信号慢、松、偶然或不确定是否同一 bug | 记录 loop type、命令、runtime、determinism、failure rate 和 symptom match |
51
58
  | `diff-trace` | 昨天可用、今天坏了 | `git log --oneline -20 -- <affected-files>` |
52
59
  | `boundary-probe` | API -> service -> DB、CI -> build -> deploy 这类链路断裂 | 记录每层输入、输出、配置和状态 |
53
60
  | `backward-trace` | 错误出现在深层堆栈或坏值来源不明 | 从 immediate failure site 反追 original trigger |
@@ -56,6 +63,8 @@ root-cause contract 至少包含:稳定复现或缩小后的可验证症状、
56
63
  | `history-trace` | 同一区域反复坏 | 查历史 `analysis.md`、TODO、report-card finding |
57
64
  | `pattern-research` | 陌生框架 / 依赖 / 平台错误 | 脱敏后查通用错误类型 |
58
65
  | `contract-check` | 修复边界可能扩大 | 判定 implementation drift / missing spec truth / roadmap mismatch |
66
+ | `diagnose-only` | 用户只要问题解释或现在不能修 | 冻结 root cause、owner、risk、next action,不生成实现完成态 |
67
+ | `workflow-forensics` | devflow artifact、git、状态、权限或工具链断裂 | 分类 failure owner 和 rescue action,再决定 reroute |
59
68
 
60
69
  ## Pattern Analysis
61
70
 
@@ -68,9 +77,12 @@ root-cause contract 至少包含:稳定复现或缩小后的可验证症状、
68
77
  - configuration drift:本地 / CI / 生产表现不同
69
78
  - stale cache:清缓存后恢复或旧状态复现
70
79
  - resource leak:OOM、句柄增长、生命周期未释放
80
+ - performance regression:变慢、CPU / IO / 查询耗时升高、吞吐下降
71
81
  - trust boundary drift:外部输入、LLM 输出、用户输入被当成可信
72
82
  - timing guess / flaky wait:任意 sleep / timeout / setTimeout 掩盖真实条件
73
83
 
84
+ 性能回归先建 baseline、profiler、query plan 或 bisect,不把普通日志当性能证据。
85
+
74
86
  ## Boundary And Trace Evidence
75
87
 
76
88
  复杂链路必须在 `analysis.md` 写清:
@@ -78,7 +90,12 @@ root-cause contract 至少包含:稳定复现或缩小后的可验证症状、
78
90
  - Boundary Probe Matrix:component boundary、input observed、output observed、config/env observed、state observed、verdict
79
91
  - Backward Trace Chain:immediate failure site、caller chain、bad value origin、original trigger、why symptom-site fix is rejected
80
92
  - Reference Comparison:similar working example、broken path、differences accepted / ruled out
81
- - Diagnostic Instrumentation Plan:probe location、question answered、command、expected signal、cleanup requirement
93
+ - Diagnostic Instrumentation Plan:probe tag、probe location、question answered、command、expected signal、cleanup requirement
94
+ - Feedback Loop Contract:loop type、command、expected / actual signal、symptom match、runtime、determinism、failure rate、sharpening plan
95
+ - Correct Test Seam:test seam、public interface exercised、why it reaches the real trigger chain、why shallow tests are rejected
96
+ - Persistent Debug Session:session id、active hypothesis、completed probes、cleanup state、next evidence action
97
+ - Workflow Forensics:artifact state、git state、runtime state、tool/permission/process failure owner、rescue action
98
+ - Diagnose-Only Outcome:root cause, owner, risk, next action, and explicit no-repair verdict
82
99
 
83
100
  这些字段不是装饰。它们的作用是证明根因位于源头,而不是报错点。
84
101
 
@@ -93,6 +110,10 @@ root-cause contract 至少包含:稳定复现或缩小后的可验证症状、
93
110
 
94
111
  命中历史时,写入 `analysis.md` 的 `Prior Investigations`,说明这次是复发、同类结构味道,还是无关历史。
95
112
 
113
+ ## Domain And Decision Context
114
+
115
+ 优先读取 cc-devflow 原生上下文:`devflow/specs/INDEX.md`、相关 capability specs、roadmap/backlog handoff、历史 `planning/design.md` / `planning/analysis.md`、`change-meta.json`。调查输出里的领域名、假设名、测试名应沿用项目词汇;如果调查结论违反 capability spec、roadmap decision 或历史 design decision,要显式写入 evidence chain,而不是静默覆盖既有设计决策。
116
+
96
117
  ## External Research Hygiene
97
118
 
98
119
  只有在本地证据不足、错误类型陌生、或像依赖 / 框架 / 平台问题时才做外部调研。
@@ -130,6 +151,7 @@ root-cause contract 至少包含:稳定复现或缩小后的可验证症状、
130
151
  - attempted evidence
131
152
  - why current entry is suspect
132
153
  - next option:continue / instrument-and-wait / human-review / reroute-cc-plan
154
+ - evidence request:需要可复现环境、HAR、日志 dump、core dump、带时间戳录屏或临时生产探针权限
133
155
  - recommendation
134
156
 
135
157
  ## Local Kit
@@ -1,6 +1,6 @@
1
1
  ---
2
2
  name: cc-investigate
3
- version: 1.1.4
3
+ version: 1.2.1
4
4
  description: "Use when a bug, regression, broken task, or unexpected behavior needs root-cause investigation, reproducible evidence, and a frozen repair handoff before cc-do resumes coding."
5
5
  triggers:
6
6
  - "帮我查这个 bug"
@@ -33,12 +33,16 @@ writes:
33
33
  entry_gate:
34
34
  - "Read the current bug report, existing requirement artifacts, relevant code, tests, and recent history before forming any hypothesis."
35
35
  - "Use a FIX-<number>-<description> change key for new bug-fix investigations."
36
- - "Reproduce or narrow the symptom first, then freeze the evidence chain before proposing repair tasks."
36
+ - "Build a runnable feedback loop, confirm it matches the reported symptom, then freeze the evidence chain before proposing repair tasks."
37
+ - "Record persistent debug session state: active hypothesis, probes, cleanup status, and next evidence action."
37
38
  - "Search prior investigations, TODO/backlog signals, and recent fixes in the affected area before declaring the bug novel."
38
39
  - "For multi-component, deep-stack, or flaky symptoms, record boundary probes, backward trace, or condition-wait evidence before declaring the root cause."
40
+ - "For performance regressions, collect a baseline or profile signal before treating logs as evidence."
39
41
  - "Do not write production code here; this stage ends with planning/analysis.md plus executable repair tasks for cc-do."
40
42
  exit_criteria:
41
43
  - "planning/analysis.md records symptom, reproduction, evidence chain, boundary probes or backward trace when applicable, pattern analysis, tested hypotheses, confirmed root cause, and repair boundary."
44
+ - "diagnose-only outcomes clearly stop before implementation while preserving root cause, owner, and next action."
45
+ - "workflow forensics classify artifact, git, state, or tool failures before repair tasks are generated."
42
46
  - "planning/tasks.md and planning/task-manifest.json are explicit enough that cc-do can repair the bug without chat memory."
43
47
  - "The honest next step is cc-do, cc-plan, or roadmap."
44
48
  reroutes:
@@ -130,6 +134,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
130
134
  | 现实状态 | 先走什么路径 |
131
135
  | --- | --- |
132
136
  | 症状真实,但还没有稳定复现 | `reproduce-first`,先把现象钉死 |
137
+ | 已有复现但信号慢 / 松 / 偶然 | `feedback-loop`,先把 pass/fail loop 做快、准、可复跑 |
133
138
  | 明显是 regression | `diff-trace`,先查最近变化 |
134
139
  | 多组件链路断裂 | `boundary-probe`,先记录每个边界的输入、输出、配置和状态 |
135
140
  | 报错点很深或坏值来源不明 | `backward-trace`,从 symptom site 一直追到 original trigger |
@@ -139,6 +144,8 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
139
144
  | 错误类型陌生,像框架 / 依赖 / 平台问题 | `pattern-research`,先做脱敏外部调研 |
140
145
  | 同一区域反复坏 | `history-trace`,先查 prior investigations 和最近修复 |
141
146
  | 看起来像 bug,实则是未定义行为或新需求 | 直接 reroute 到 `cc-plan` |
147
+ | 用户只要根因报告、不要求修复 | `diagnose-only`,停止在报告与 next action,不生成完成态实现任务 |
148
+ | 失败来自 workflow / artifact / git / state 断裂 | `workflow-forensics`,先分类坏在文件、状态、工具、权限还是流程 |
142
149
 
143
150
  先说“这是什么类问题”,再说“我要怎么修”。没有分类的 debug,最后都会变成乱打补丁。
144
151
 
@@ -172,6 +179,9 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
172
179
 
173
180
  `cc-investigate` 不写生产代码,不在这里偷跑 `cc-do`。
174
181
 
182
+ diagnose-only 仍然写 `planning/analysis.md`,但 `planning/tasks.md` 只能包含证据交接、
183
+ 监控、人工动作或明确的 `reroute`;不能把“已经诊断”伪装成“已经修复”。
184
+
175
185
  ## Entry Gate
176
186
 
177
187
  1. 先确认当前对象仍然属于一个 requirement,而不是整个项目级故障。
@@ -186,35 +196,47 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
186
196
  - 记录用户看见了什么
187
197
  - 记录预期与实际差异
188
198
  - 记录复现命令或手动路径
199
+ - 确认复现的是用户描述的同一个失败,而不是旁边的红灯
189
200
  - 如果上下文缺失,只问一个最关键问题,不一次性抛出问题清单
190
- 2. **Trace reality**
201
+ 2. **Build feedback loop**
202
+ - 优先构造 agent 可运行的 pass/fail 信号:失败测试、curl / HTTP 脚本、CLI fixture、浏览器脚本、trace replay、throwaway harness、property / fuzz loop、bisect harness、differential loop,最后才是 HITL 脚本
203
+ - 记录 loop 类型、命令、运行时间、确定性、失败率、症状匹配证据和下一步 sharpen 计划
204
+ - loop 太慢、太宽、太 flaky 时,先优化 loop 本身;没有可信 loop,不进入 confirmed root cause
205
+ - 如果确实无法建 loop,写明尝试过什么,并请求 HAR、日志 dump、core dump、带时间戳录屏、可复现环境访问或临时生产探针权限
206
+ 3. **Trace reality**
191
207
  - 沿着代码路径找触点
192
208
  - 多组件系统先写 `Boundary Probe Matrix`:每个边界的输入、输出、配置 / 环境、状态和 pass/fail
193
209
  - 深层报错先写 `Backward Trace Chain`:immediate failure site、caller chain、bad value origin、original trigger
194
210
  - 查最近提交和同类改动
195
211
  - 查既有 `devflow/changes/*/planning/analysis.md`、`TODOS.md`、backlog、report-card finding
212
+ - 如果仓库有 `devflow/specs/`、roadmap/backlog handoff、历史 `planning/design.md` / `planning/analysis.md` 或 `change-meta.json`,把领域词汇和已冻结决策当成契约证据
196
213
  - 找现有测试和断点证据
197
214
  - 判定偏离的是 capability boundary、invariant,还是只是实现细节
198
- 3. **Classify pattern**
199
- - 判定是否属于 race condition、null propagation、state corruption、integration failure、configuration drift、stale cache、resource leak、trust boundary drift、timing guess / flaky wait
215
+ 4. **Classify pattern**
216
+ - 判定是否属于 race condition、null propagation、state corruption、integration failure、configuration drift、stale cache、resource leak、performance regression、trust boundary drift、timing guess / flaky wait
200
217
  - 如果有同仓库 working example,先写 `Reference Comparison`,列出 working path、broken path、差异和被接受 / 排除的假设
201
218
  - 如果错误类型陌生,先做脱敏外部调研;只搜索通用错误类型、框架 / 库名和版本,不搜索 raw secret / path / customer data
202
- 4. **Form hypotheses**
203
- - 只保留 1-3 个可被证伪的假设
204
- - 每个假设都写支持证据和反证
219
+ 5. **Form hypotheses**
220
+ - 先列 3-5 个候选假设并排序,避免第一直觉锚定
221
+ - 再收敛到 1-3 个 active hypotheses 进入验证
222
+ - 每个假设都写支持证据、反证和优先级理由
205
223
  - 每个假设都写 `falsification method`、`expected observation`、`actual observation`
206
- 5. **Test hypotheses**
224
+ 6. **Test hypotheses**
207
225
  - 用复现、日志、断言、最小探针验证
208
- - 临时探针必须写 `Diagnostic Instrumentation Plan`:probe location、question answered、command、expected signal、cleanup requirement
226
+ - 临时探针必须写 `Diagnostic Instrumentation Plan`:probe tag、probe location、question answered、command、expected signal、cleanup requirement
227
+ - 每个 probe 只回答一个假设预测;一次只改一个变量
228
+ - debug 日志必须带唯一前缀,例如 `[DEBUG-FIX123-a4f2]`,进入 `cc-do` 前用前缀 grep 清理或转正
209
229
  - 三次假设都失败,就停下进入 escalation decision
210
- 6. **Freeze repair contract**
230
+ 7. **Freeze repair contract**
211
231
  - 根因确认后,写进 `planning/analysis.md`
212
232
  - 只保留最小修复边界
233
+ - 写清正确测试缝隙:测试是否覆盖真实触发链;如果没有正确 seam,这本身就是需要记录的架构事实
213
234
  - 写明 affected module、allowed files、forbidden files、blast radius estimate;超过 5 个文件默认拆分或 reroute
214
235
  - 输出 `planning/tasks.md` + `planning/task-manifest.json` + `change-meta.json`
215
- 7. **Hand off**
236
+ 8. **Hand off**
216
237
  - 下一步明确写成 `cc-do`
217
238
  - 如果 repair contract 越过当前 requirement,就 reroute 到 `cc-plan`
239
+ - 如果是 diagnose-only,下一步写成 human action、monitoring、backlog、`cc-plan` 或 `cc-do`,但不得标记实现完成
218
240
 
219
241
  ## Pattern Analysis
220
242
 
@@ -229,6 +251,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
229
251
  | configuration drift | 本地可用、CI/生产失败 | env、feature flag、版本、路径、权限 |
230
252
  | stale cache | 清缓存后恢复、旧状态复现 | browser / CDN / Redis / build cache |
231
253
  | resource leak | OOM、句柄增长、慢性崩溃 | lifecycle、subscription、retention、cleanup |
254
+ | performance regression | 变慢、CPU / IO / 查询耗时升高、吞吐下降 | baseline、profiler、query plan、bisect |
232
255
  | trust boundary drift | LLM / 用户输入 / 外部响应被当成可信 | validation、escaping、policy gate |
233
256
  | timing guess / flaky wait | sleep / setTimeout / timeout 增大后偶尔通过 | 真实完成条件、事件、文件、状态或队列计数 |
234
257
 
@@ -285,6 +308,39 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
285
308
 
286
309
  探针不能变成修复。进入 `cc-do` 前,要么删除,要么明确写入 repair task 的清理 / 转正方式。
287
310
 
311
+ ## Feedback Loop Contract
312
+
313
+ 根因调查首先依赖一个可信 loop:
314
+
315
+ - loop type: failing test / HTTP script / CLI fixture / browser script / trace replay / throwaway harness / property-fuzz / bisect / differential / HITL
316
+ - command or manual driver
317
+ - expected failing signal
318
+ - actual failing signal
319
+ - symptom match: 为什么它复现的是用户报告的同一个问题
320
+ - runtime and determinism
321
+ - failure rate for flaky bugs
322
+ - sharpening plan: 如何让它更快、更准、更稳定
323
+
324
+ 把 loop 当成调查产品来迭代。已有 loop 但信号差时,先优化它:
325
+
326
+ 1. faster:缓存 setup、缩小 test scope、跳过无关启动。
327
+ 2. sharper:断言用户看见的具体症状,不用“没有崩溃”冒充匹配。
328
+ 3. more deterministic:固定时间、随机种子、filesystem、network、locale、feature flag。
329
+
330
+ flaky bug 的目标不是立刻 100% 复现,而是提高复现率直到可调试。可以循环 100 次、并行触发、加压力、缩小时序窗口或做 differential loop;如果失败率仍低到不可证伪,先写 Evidence Request,不要继续猜。
331
+
332
+ 没有 loop 时,不能把代码阅读当成根因。只能写 `Evidence Request`:需要可复现环境、HAR、日志 dump、core dump、带时间戳录屏,或临时生产探针权限。
333
+
334
+ ## Correct Test Seam
335
+
336
+ 进入 repair handoff 前,必须说明回归测试缝隙是否正确:
337
+
338
+ - test seam
339
+ - public interface exercised
340
+ - why this seam reaches the real trigger chain
341
+ - why a shallower test would be false confidence
342
+ - if no correct seam exists, record it as an architecture finding and keep repair verification tied to the original feedback loop
343
+
288
344
  ## Timing And Flaky Bugs
289
345
 
290
346
  遇到 flaky、sleep、timeout、重试后消失:
@@ -345,6 +401,14 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
345
401
  2. 不能拆但仍是根因跨度,写明为什么。
346
402
  3. 如果已经变成设计 / 架构范围,reroute 到 `cc-plan`。
347
403
 
404
+ ## Prevention Handoff
405
+
406
+ 根因冻结后必须写一句后验判断:什么结构、测试 seam、capability invariant、operator guard 或文档会让这个 bug 更早暴露或根本不出现。
407
+
408
+ - 如果答案是小范围 regression test,把它写进当前 repair task。
409
+ - 如果答案是 seam / module / capability 边界问题,把它写成 architecture finding,并明确交给 `cc-plan` 或后续 backlog。
410
+ - 如果答案只是流程提醒或人工记忆,不算预防;要么转成可执行 guard,要么明确不记录。
411
+
348
412
  ## Escalation Decision
349
413
 
350
414
  三次假设失败后,不准继续硬猜。`analysis.md` 必须写:
@@ -353,6 +417,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
353
417
  - what was attempted
354
418
  - why current entry is suspect
355
419
  - next option:`continue-with-new-hypothesis` / `instrument-and-wait` / `human-review` / `reroute-cc-plan`
420
+ - evidence request if the loop cannot be built or the environment is missing
356
421
  - recommendation
357
422
 
358
423
  ## Good Output
@@ -362,6 +427,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
362
427
  - 假设不是列表装饰,而是带证伪方法和实际观察
363
428
  - 历史调查、最近改动、模式分析没有被跳过
364
429
  - 修复边界清楚到 `cc-do` 不需要二次调查
430
+ - 正确测试缝隙写清楚,不用浅层测试制造假安全
365
431
  - `planning/tasks.md` 只包含修复任务,不夹带新需求
366
432
  - 如果应该回 `cc-plan`,理由写清楚,不假装还能继续 patch
367
433
 
@@ -377,14 +443,15 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
377
443
  ## Working Rules
378
444
 
379
445
  1. 没有复现,不准声称找到了根因。
380
- 2. 没有证据,不准把猜测写成结论。
381
- 3. 先根因,再修复;先调查,再编码。
382
- 4. `planning/tasks.md` 必须足够让 `cc-do` 在脱离当前对话后继续推进。
383
- 5. 如果修复方案已经变成新 feature 设计,停止 debug,回 `cc-plan`。
384
- 6. 三次假设失败后,默认说明你的调查入口错了,不准继续硬猜。
385
- 7. 外部调研必须先脱敏,调研结论必须回到本仓库证据验证。
386
- 8. 修复触点超过 5 个文件时,默认先拆分或 reroute,不把大重构伪装成 bug fix。
387
- 9. 好的调查不是“找了很多可能性”,而是把错误世界缩成一个可信的 repair contract
446
+ 2. 没有可信 feedback loop,不准把代码阅读包装成 confirmed root cause。
447
+ 3. 没有证据,不准把猜测写成结论。
448
+ 4. 先根因,再修复;先调查,再编码。
449
+ 5. `planning/tasks.md` 必须足够让 `cc-do` 在脱离当前对话后继续推进。
450
+ 6. 如果修复方案已经变成新 feature 设计,停止 debug,回 `cc-plan`。
451
+ 7. 三次假设失败后,默认说明你的调查入口错了,不准继续硬猜。
452
+ 8. 外部调研必须先脱敏,调研结论必须回到本仓库证据验证。
453
+ 9. 修复触点超过 5 个文件时,默认先拆分或 reroute,不把大重构伪装成 bug fix
454
+ 10. 好的调查不是“找了很多可能性”,而是把错误世界缩成一个可信的 repair contract。
388
455
 
389
456
  ## Exit Criteria
390
457
 
@@ -17,10 +17,36 @@
17
17
  - What the user saw:
18
18
  - Reproduction command / path:
19
19
  - Repro stability: `stable` | `intermittent` | `not-yet-reproduced` | `narrowed-only`
20
+ - Matches reported symptom: `yes` | `no` | `partial` | `unknown`
21
+ - Symptom match evidence:
20
22
  - Expected:
21
23
  - Actual:
22
24
  - Impact / blast radius:
23
25
 
26
+ ## Feedback Loop Contract
27
+
28
+ - Loop type: `failing-test` | `http-script` | `cli-fixture` | `browser-script` | `trace-replay` | `throwaway-harness` | `property-fuzz` | `bisect` | `differential` | `hitl`
29
+ - Command or manual driver:
30
+ - Expected failing signal:
31
+ - Actual failing signal:
32
+ - Runtime:
33
+ - Determinism: `deterministic` | `high-rate-flaky` | `low-rate-flaky` | `unknown`
34
+ - Failure rate:
35
+ - Signal specificity:
36
+ - Sharpening plan:
37
+ - If no loop, evidence request:
38
+
39
+ ## Debug Session
40
+
41
+ - Session ID:
42
+ - Started at:
43
+ - Current mode: `reproduce-first` | `feedback-loop` | `diff-trace` | `boundary-probe` | `backward-trace` | `reference-compare` | `condition-wait` | `history-trace` | `pattern-research` | `contract-check` | `diagnose-only` | `workflow-forensics`
44
+ - Active hypothesis:
45
+ - Completed probes:
46
+ - Open probes:
47
+ - Cleanup status:
48
+ - Next evidence action:
49
+
24
50
  ## Evidence Chain
25
51
 
26
52
  - Logs / stack traces:
@@ -29,6 +55,13 @@
29
55
  - Existing tests:
30
56
  - Prior investigations:
31
57
  - TODO / backlog / report-card signals:
58
+ - Native domain / decision context:
59
+
60
+ ## Workflow Forensics
61
+
62
+ | Failure surface | Observed state | Owner | Rescue action | Evidence |
63
+ | --- | --- | --- | --- | --- |
64
+ | artifact / git / runtime-state / tool / permission / process | | | | |
32
65
 
33
66
  ## Boundary Probe Matrix
34
67
 
@@ -55,9 +88,9 @@
55
88
 
56
89
  ## Diagnostic Instrumentation Plan
57
90
 
58
- | Probe location | Question answered | Command to run | Expected signal | Actual signal | Cleanup requirement |
59
- | --- | --- | --- | --- | --- | --- |
60
- | | | | | | |
91
+ | Probe tag | Probe location | Question answered | Command to run | Expected signal | Actual signal | Cleanup requirement |
92
+ | --- | --- | --- | --- | --- | --- | --- |
93
+ | | | | | | | |
61
94
 
62
95
  ## Pattern Analysis
63
96
 
@@ -70,9 +103,16 @@
70
103
  | configuration drift | | ruled-out | |
71
104
  | stale cache | | ruled-out | |
72
105
  | resource leak | | ruled-out | |
106
+ | performance regression | | ruled-out | |
73
107
  | trust boundary drift | | ruled-out | |
74
108
  | timing guess / flaky wait | | ruled-out | |
75
109
 
110
+ ## Candidate Hypotheses
111
+
112
+ | Rank | Hypothesis | Why plausible | Prediction | Status |
113
+ | --- | --- | --- | --- | --- |
114
+ | 1 | | | | pending |
115
+
76
116
  ## Research Evidence
77
117
 
78
118
  - External research used: `yes` | `no`
@@ -94,6 +134,7 @@
94
134
  - Attempted evidence:
95
135
  - Why current entry is suspect:
96
136
  - Next option: `continue-with-new-hypothesis` | `instrument-and-wait` | `human-review` | `reroute-cc-plan`
137
+ - Evidence request:
97
138
  - Recommendation:
98
139
 
99
140
  ## Root Cause
@@ -108,6 +149,24 @@
108
149
  - Operator handling after fix:
109
150
  - Prior history relationship: `new` | `recurring` | `same-root-cause` | `architectural-smell-candidate`
110
151
 
152
+ ## Diagnose-Only Outcome
153
+
154
+ - Applies: `yes` | `no`
155
+ - Why no repair now:
156
+ - Root cause owner:
157
+ - Risk if left unresolved:
158
+ - Monitoring / follow-up evidence:
159
+ - Next action: `human-action` | `monitor` | `backlog` | `reroute-cc-plan` | `handoff-cc-do`
160
+ - Explicit no-repair verdict:
161
+
162
+ ## Correct Test Seam
163
+
164
+ - Test seam:
165
+ - Public interface exercised:
166
+ - Why this seam reaches the real trigger chain:
167
+ - Why a shallower test would be false confidence:
168
+ - If no correct seam exists:
169
+
111
170
  ## Repair Boundary
112
171
 
113
172
  - Fix strategy:
@@ -125,6 +184,12 @@
125
184
  ## Review Gate
126
185
 
127
186
  - Repro stable:
187
+ - Feedback loop trustworthy:
188
+ - Symptom match confirmed:
128
189
  - Root cause confirmed:
190
+ - Debug session cleanup complete:
191
+ - Workflow forensics classified:
192
+ - Diagnose-only verdict if applicable:
193
+ - Correct test seam identified:
129
194
  - Repair scope still belongs to this requirement:
130
195
  - If not, reroute:
@@ -16,6 +16,8 @@
16
16
  - Execution mode: `single-path` | `parallel-ready`
17
17
  - Confirmed root cause:
18
18
  - Root-cause hypothesis:
19
+ - Feedback loop:
20
+ - Symptom match evidence:
19
21
  - Frozen repair boundary:
20
22
  - Boundary probes:
21
23
  - Backward trace:
@@ -28,16 +30,19 @@
28
30
  - Commands to trust:
29
31
  - Do not re-decide:
30
32
  - Parallel boundaries:
33
+ - Correct test seam:
34
+ - Evidence request if blocked:
31
35
 
32
36
  ## Phase 1: Reproduce And Probe Guard
33
37
 
34
38
  - [ ] T001 [TEST] Capture the failing behavior as a stable reproduction (dependsOn:none) `path/to/test`
35
- Goal: 让 bug 先变成一个可复跑的失败事实。
39
+ Goal: 让 bug 先变成一个快、准、可复跑且匹配用户症状的失败事实。
36
40
  Files: `path/to/test`
37
41
  Read first: `analysis.md`, `tasks.md`
38
42
  Verification: `npm test -- path/to/test`
39
- Evidence: failing output or reproducible log
40
- Ready when: reproduction path 已稳定,analysis 已记录必要的 boundary / trace / comparison evidence
43
+ Evidence: failing output or reproducible log + symptom match evidence
44
+ Correct seam: test must exercise the real trigger chain through a public interface
45
+ Ready when: feedback loop 已稳定,analysis 已记录必要的 boundary / trace / comparison evidence
41
46
 
42
47
  ## Phase 2: Repair
43
48
 
@@ -47,7 +52,7 @@
47
52
  Read first: `analysis.md`, `path/to/test`
48
53
  Verification: `npm test -- path/to/test`
49
54
  Evidence: passing output + checkpoint
50
- Ready when: T001 已证明问题存在,analysis 已证明根因源头
55
+ Ready when: T001 已证明同一个用户症状存在,analysis 已证明根因源头
51
56
 
52
57
  ## Phase 3: Verify
53
58
 
@@ -20,7 +20,7 @@
20
20
  ]
21
21
  },
22
22
  "planningMeta": {
23
- "ccInvestigateSkillVersion": "1.1.4",
23
+ "ccInvestigateSkillVersion": "1.1.6",
24
24
  "analysisVersion": "analysis.v1",
25
25
  "approvedAt": "2026-04-17T12:00:00.000Z",
26
26
  "approvedBy": "user",
@@ -29,10 +29,24 @@
29
29
  "investigationMeta": {
30
30
  "symptomStatus": "stable",
31
31
  "reproductionPath": "npm test -- src/feature/feature.test.ts",
32
+ "feedbackLoop": {
33
+ "loopType": "failing-test",
34
+ "commandOrDriver": "npm test -- src/feature/feature.test.ts",
35
+ "expectedFailingSignal": "The test fails with the user-reported behavior",
36
+ "actualFailingSignal": "Observed failure output from the current repo",
37
+ "symptomMatchEvidence": "Failure output matches the reported symptom, not a nearby unrelated failure",
38
+ "runtime": "under 10s",
39
+ "determinism": "deterministic",
40
+ "failureRate": "100%",
41
+ "signalSpecificity": "asserts the exact broken behavior",
42
+ "sharpeningPlan": "Narrow setup or assertions if the loop becomes slow or broad",
43
+ "evidenceRequest": ""
44
+ },
32
45
  "patternAnalysis": {
33
- "selectedPattern": "implementation drift",
46
+ "selectedPattern": "null propagation",
34
47
  "ruledOutPatterns": [
35
48
  "race condition",
49
+ "performance regression",
36
50
  "configuration drift",
37
51
  "timing guess / flaky wait"
38
52
  ],
@@ -73,6 +87,7 @@
73
87
  },
74
88
  "diagnosticInstrumentation": [
75
89
  {
90
+ "probeTag": "[DEBUG-FIXXXX-a4f2]",
76
91
  "probeLocation": "file:line or component boundary",
77
92
  "questionAnswered": "Which boundary first emits the invalid value?",
78
93
  "commandToRun": "npm test -- src/feature/feature.test.ts",
@@ -81,8 +96,23 @@
81
96
  "cleanupRequirement": "Remove temporary probe or convert it into a durable assertion/log"
82
97
  }
83
98
  ],
99
+ "candidateHypotheses": [
100
+ {
101
+ "rank": 1,
102
+ "statement": "Specific, testable root-cause claim",
103
+ "whyPlausible": "Reproduction output points to the affected contract",
104
+ "prediction": "The failing signal disappears when that contract is restored",
105
+ "status": "accepted-for-testing"
106
+ }
107
+ ],
84
108
  "priorInvestigations": [],
85
109
  "researchEvidence": [],
110
+ "domainDecisionContext": {
111
+ "contextFilesRead": [],
112
+ "adrFilesRead": [],
113
+ "vocabularyNotes": [],
114
+ "adrConflicts": []
115
+ },
86
116
  "rootCauseHypothesis": {
87
117
  "statement": "Specific, testable root-cause claim",
88
118
  "falsificationMethod": "Command, log probe, assertion, or code-path check",
@@ -112,6 +142,13 @@
112
142
  "nextOption": "cc-do",
113
143
  "recommendation": "Repair the confirmed root cause"
114
144
  },
145
+ "correctTestSeam": {
146
+ "testSeam": "public interface or end-to-end path that reaches the real trigger chain",
147
+ "publicInterfaceExercised": "CLI/API/UI behavior observed by callers",
148
+ "realTriggerChainCoverage": "The test enters through the same trigger path as the bug",
149
+ "whyShallowTestRejected": "A lower-level unit test would not prove the upstream contract",
150
+ "ifNoCorrectSeam": ""
151
+ },
115
152
  "repairBoundary": {
116
153
  "affectedModule": "src/feature",
117
154
  "allowedFiles": [
@@ -172,6 +209,8 @@
172
209
  ],
173
210
  "acceptance": [
174
211
  "The target bug is reproduced as a stable failure",
212
+ "The failing loop matches the user-reported symptom",
213
+ "The regression test uses the correct seam for the real trigger chain",
175
214
  "The failure output points to the confirmed root-cause path"
176
215
  ],
177
216
  "verification": [