npm - cc-devflow - Versions diffs - 4.5.2 → 4.5.4 - Mend

cc-devflow 4.5.2 → 4.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (100) hide show

package/.claude/skills/cc-do/scripts/write-task-checkpoint.sh CHANGED Viewed

@@ -9,7 +9,7 @@ set -euo pipefail
 usage() {
   cat <<'EOF'
 Usage:
-  write-task-checkpoint.sh --dir path/to/change --task T001 --status pending|running|passed|failed|skipped --summary "..." [--event context_ready] [--attempt 0] [--session session-id] [--next-action "..."]
+  write-task-checkpoint.sh --dir path/to/change --task T001 --status pending|running|passed|failed|skipped --summary "..." [--event context_ready] [--attempt 0] [--session session-id] [--next-action "..."] [--tdd-json '{"red":...}']
 EOF
 }
@@ -23,6 +23,7 @@ EVENT_TYPE=""
 ATTEMPT="0"
 SESSION_ID=""
 NEXT_ACTION=""
+TDD_JSON=""
 while [[ $# -gt 0 ]]; do
   case "$1" in
@@ -34,6 +35,7 @@ while [[ $# -gt 0 ]]; do
     --attempt) ATTEMPT="$2"; shift 2 ;;
     --session) SESSION_ID="$2"; shift 2 ;;
     --next-action) NEXT_ACTION="$2"; shift 2 ;;
+    --tdd-json) TDD_JSON="$2"; shift 2 ;;
     -h|--help) usage; exit 0 ;;
     *) echo "Unknown arg: $1" >&2; usage; exit 1 ;;
   esac
@@ -57,6 +59,15 @@ if [[ -z "$SESSION_ID" ]]; then
   SESSION_ID="${TASK_ID}-$(date -u +%s)"
 fi
+tdd_payload="null"
+if [[ -n "$TDD_JSON" ]]; then
+  if [[ -f "$TDD_JSON" ]]; then
+    tdd_payload="$(jq -c . "$TDD_JSON")"
+  else
+    tdd_payload="$(printf '%s' "$TDD_JSON" | jq -c .)"
+  fi
+fi
 jq -nc \
   --arg changeId "$change_id" \
   --arg taskId "$TASK_ID" \
@@ -66,6 +77,7 @@ jq -nc \
   --arg summary "$SUMMARY" \
   --arg timestamp "$timestamp" \
   --arg attempt "$ATTEMPT" \
+  --argjson tdd "$tdd_payload" \
   '{
     changeId: $changeId,
     taskId: $taskId,
@@ -75,7 +87,7 @@ jq -nc \
     summary: $summary,
     timestamp: $timestamp,
     attempt: ($attempt | tonumber)
-  }' > "$runtime_task_dir/checkpoint.json"
+  } + (if $tdd == null then {} else {tdd: $tdd} end)' > "$runtime_task_dir/checkpoint.json"
 if [[ -n "$EVENT_TYPE" || "$STATUS" == "failed" ]]; then
   jq -nc \

package/.claude/skills/cc-investigate/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,29 @@
 # CC-Investigate Skill Changelog
+## v1.2.1 - 2026-04-29
+- add persistent debug session fields for active hypothesis, probes, cleanup state, and next evidence action
+- add diagnose-only and workflow-forensics modes so root-cause reports do not masquerade as completed repairs
+- update the analysis template with debug session, workflow forensics, and diagnose-only outcome sections
+## v1.2.0 - 2026-04-28
+- treat feedback loops as investigation products that must be made faster, sharper, and more deterministic before root cause freeze
+- require flaky investigations to raise reproduction rate with stress, repetition, timing-window narrowing, or differential loops instead of guessing from weak signals
+- add prevention handoff so confirmed root causes produce either a regression task, architecture finding, or explicit non-recorded reason
+## v1.1.6 - 2026-04-28
+- clarify that investigation domain language and durable decisions come from cc-devflow native sources: `devflow/specs/`, roadmap/backlog handoff, historical design/analysis, and change metadata
+- remove external context/architecture-decision files from the standard investigation contract so they are not implied as generated artifacts
+- route conflicts through capability specs, roadmap decisions, or historical design decisions instead of external decision-document language
+## v1.1.5 - 2026-04-28
+- add a feedback-loop contract so investigations record loop type, command, symptom match, runtime, determinism, failure rate, signal specificity, and sharpening plan before freezing root cause
+- require ranked candidate hypotheses before narrowing to active falsification targets, plus probe tags for cleanup-safe diagnostic instrumentation
+- add performance-regression, native domain/decision context, correct-test-seam, and evidence-request fields across the analysis, task, manifest, playbook, and investigation contract templates
 ## v1.1.4 - 2026-04-28
 - add boundary-probe, backward-trace, reference-comparison, diagnostic-instrumentation, and condition-wait investigation modes for multi-component, deep-stack, similar-path, and flaky failures

package/.claude/skills/cc-investigate/PLAYBOOK.md CHANGED Viewed

@@ -12,12 +12,16 @@
 ## Core Rules
 1. 先复现，再猜原因。
-2. 先看最近变化，再决定是不是 regression。
-3. 先证伪假设，再冻结根因。
-4. `planning/analysis.md` 和 `planning/tasks.md` 必须足够让 `cc-do` 脱离当前会话继续工作。
-5. 调查失败三次后先重建入口，不准继续乱补。
-6. 没有 frozen root-cause contract，不准进入 repair task。
-7. 多组件、深层调用、flaky 问题必须先补边界探针、反向追踪或条件等待证据。
+2. 先把复现做成快、准、可复跑的 feedback loop。
+3. 先确认 loop 复现的是用户报告的同一个失败。
+4. 先看最近变化，再决定是不是 regression。
+5. 先证伪假设，再冻结根因。
+6. `planning/analysis.md` 和 `planning/tasks.md` 必须足够让 `cc-do` 脱离当前会话继续工作。
+7. 调查失败三次后先重建入口，不准继续乱补。
+8. 没有 frozen root-cause contract，不准进入 repair task。
+9. 多组件、深层调用、flaky 问题必须先补边界探针、反向追踪或条件等待证据。
+10. diagnose-only 只能输出根因、owner、风险和 next action，不能把未修复状态标成完成。
+11. workflow forensics 先分类 artifact / git / state / tool / permission / process failure，再决定是否进入修复。
 ## Iron Law
@@ -36,18 +40,21 @@ root-cause contract 至少包含：稳定复现或缩小后的可验证症状、
 ## Investigation Standard
 1. 先收集 symptom、expected、actual、repro。
-2. 先查 prior investigations、TODOS/backlog、report-card finding 和最近变更。
-3. 先沿代码路径定位触点和最近变更。
-4. 先做 pattern analysis，再形成 1-3 个可证伪假设。
-5. 每个假设都要写支持证据、反证、证伪方法、预期观察、实际观察。
-6. 只有被证据钉死的根因才能进入 repair contract。
-7. repair contract 只讲最小修复边界，不顺手发明新范围。
+2. 先构造 feedback loop：失败测试、HTTP 脚本、CLI fixture、浏览器脚本、trace replay、throwaway harness、fuzz、bisect、differential，最后才是 HITL。
+3. 记录 loop 的运行时间、确定性、失败率、症状匹配证据和 sharpen 计划。
+4. 先查 prior investigations、TODOS/backlog、report-card finding 和最近变更。
+5. 先沿代码路径定位触点和最近变更。
+6. 先做 pattern analysis，再列 3-5 个候选假设并收敛到 1-3 个 active hypotheses。
+7. 每个假设都要写支持证据、反证、证伪方法、预期观察、实际观察。
+8. 只有被证据钉死的根因才能进入 repair contract。
+9. repair contract 只讲最小修复边界，不顺手发明新范围。
 ## Investigation Modes
 | Mode | 什么时候用 | 第一动作 |
 | --- | --- | --- |
 | `reproduce-first` | 症状真实但不稳定 | 缩小复现命令 / 手动路径 |
+| `feedback-loop` | 已有复现但信号慢、松、偶然或不确定是否同一 bug | 记录 loop type、命令、runtime、determinism、failure rate 和 symptom match |
 | `diff-trace` | 昨天可用、今天坏了 | `git log --oneline -20 -- <affected-files>` |
 | `boundary-probe` | API -> service -> DB、CI -> build -> deploy 这类链路断裂 | 记录每层输入、输出、配置和状态 |
 | `backward-trace` | 错误出现在深层堆栈或坏值来源不明 | 从 immediate failure site 反追 original trigger |
@@ -56,6 +63,8 @@ root-cause contract 至少包含：稳定复现或缩小后的可验证症状、
 | `history-trace` | 同一区域反复坏 | 查历史 `analysis.md`、TODO、report-card finding |
 | `pattern-research` | 陌生框架 / 依赖 / 平台错误 | 脱敏后查通用错误类型 |
 | `contract-check` | 修复边界可能扩大 | 判定 implementation drift / missing spec truth / roadmap mismatch |
+| `diagnose-only` | 用户只要问题解释或现在不能修 | 冻结 root cause、owner、risk、next action，不生成实现完成态 |
+| `workflow-forensics` | devflow artifact、git、状态、权限或工具链断裂 | 分类 failure owner 和 rescue action，再决定 reroute |
 ## Pattern Analysis
@@ -68,9 +77,12 @@ root-cause contract 至少包含：稳定复现或缩小后的可验证症状、
 - configuration drift：本地 / CI / 生产表现不同
 - stale cache：清缓存后恢复或旧状态复现
 - resource leak：OOM、句柄增长、生命周期未释放
+- performance regression：变慢、CPU / IO / 查询耗时升高、吞吐下降
 - trust boundary drift：外部输入、LLM 输出、用户输入被当成可信
 - timing guess / flaky wait：任意 sleep / timeout / setTimeout 掩盖真实条件
+性能回归先建 baseline、profiler、query plan 或 bisect，不把普通日志当性能证据。
 ## Boundary And Trace Evidence
 复杂链路必须在 `analysis.md` 写清：
@@ -78,7 +90,12 @@ root-cause contract 至少包含：稳定复现或缩小后的可验证症状、
 - Boundary Probe Matrix：component boundary、input observed、output observed、config/env observed、state observed、verdict
 - Backward Trace Chain：immediate failure site、caller chain、bad value origin、original trigger、why symptom-site fix is rejected
 - Reference Comparison：similar working example、broken path、differences accepted / ruled out
-- Diagnostic Instrumentation Plan：probe location、question answered、command、expected signal、cleanup requirement
+- Diagnostic Instrumentation Plan：probe tag、probe location、question answered、command、expected signal、cleanup requirement
+- Feedback Loop Contract：loop type、command、expected / actual signal、symptom match、runtime、determinism、failure rate、sharpening plan
+- Correct Test Seam：test seam、public interface exercised、why it reaches the real trigger chain、why shallow tests are rejected
+- Persistent Debug Session：session id、active hypothesis、completed probes、cleanup state、next evidence action
+- Workflow Forensics：artifact state、git state、runtime state、tool/permission/process failure owner、rescue action
+- Diagnose-Only Outcome：root cause, owner, risk, next action, and explicit no-repair verdict
 这些字段不是装饰。它们的作用是证明根因位于源头，而不是报错点。
@@ -93,6 +110,10 @@ root-cause contract 至少包含：稳定复现或缩小后的可验证症状、
 命中历史时，写入 `analysis.md` 的 `Prior Investigations`，说明这次是复发、同类结构味道，还是无关历史。
+## Domain And Decision Context
+优先读取 cc-devflow 原生上下文：`devflow/specs/INDEX.md`、相关 capability specs、roadmap/backlog handoff、历史 `planning/design.md` / `planning/analysis.md`、`change-meta.json`。调查输出里的领域名、假设名、测试名应沿用项目词汇；如果调查结论违反 capability spec、roadmap decision 或历史 design decision，要显式写入 evidence chain，而不是静默覆盖既有设计决策。
 ## External Research Hygiene
 只有在本地证据不足、错误类型陌生、或像依赖 / 框架 / 平台问题时才做外部调研。
@@ -130,6 +151,7 @@ root-cause contract 至少包含：稳定复现或缩小后的可验证症状、
 - attempted evidence
 - why current entry is suspect
 - next option：continue / instrument-and-wait / human-review / reroute-cc-plan
+- evidence request：需要可复现环境、HAR、日志 dump、core dump、带时间戳录屏或临时生产探针权限
 - recommendation
 ## Local Kit

package/.claude/skills/cc-investigate/SKILL.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: cc-investigate
-version: 1.1.4
+version: 1.2.1
 description: "Use when a bug, regression, broken task, or unexpected behavior needs root-cause investigation, reproducible evidence, and a frozen repair handoff before cc-do resumes coding."
 triggers:
   - "帮我查这个 bug"
@@ -33,12 +33,16 @@ writes:
 entry_gate:
   - "Read the current bug report, existing requirement artifacts, relevant code, tests, and recent history before forming any hypothesis."
   - "Use a FIX-<number>-<description> change key for new bug-fix investigations."
-  - "Reproduce or narrow the symptom first, then freeze the evidence chain before proposing repair tasks."
+  - "Build a runnable feedback loop, confirm it matches the reported symptom, then freeze the evidence chain before proposing repair tasks."
+  - "Record persistent debug session state: active hypothesis, probes, cleanup status, and next evidence action."
   - "Search prior investigations, TODO/backlog signals, and recent fixes in the affected area before declaring the bug novel."
   - "For multi-component, deep-stack, or flaky symptoms, record boundary probes, backward trace, or condition-wait evidence before declaring the root cause."
+  - "For performance regressions, collect a baseline or profile signal before treating logs as evidence."
   - "Do not write production code here; this stage ends with planning/analysis.md plus executable repair tasks for cc-do."
 exit_criteria:
   - "planning/analysis.md records symptom, reproduction, evidence chain, boundary probes or backward trace when applicable, pattern analysis, tested hypotheses, confirmed root cause, and repair boundary."
+  - "diagnose-only outcomes clearly stop before implementation while preserving root cause, owner, and next action."
+  - "workflow forensics classify artifact, git, state, or tool failures before repair tasks are generated."
   - "planning/tasks.md and planning/task-manifest.json are explicit enough that cc-do can repair the bug without chat memory."
   - "The honest next step is cc-do, cc-plan, or roadmap."
 reroutes:
@@ -130,6 +134,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 | 现实状态 | 先走什么路径 |
 | --- | --- |
 | 症状真实，但还没有稳定复现 | `reproduce-first`，先把现象钉死 |
+| 已有复现但信号慢 / 松 / 偶然 | `feedback-loop`，先把 pass/fail loop 做快、准、可复跑 |
 | 明显是 regression | `diff-trace`，先查最近变化 |
 | 多组件链路断裂 | `boundary-probe`，先记录每个边界的输入、输出、配置和状态 |
 | 报错点很深或坏值来源不明 | `backward-trace`，从 symptom site 一直追到 original trigger |
@@ -139,6 +144,8 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 | 错误类型陌生，像框架 / 依赖 / 平台问题 | `pattern-research`，先做脱敏外部调研 |
 | 同一区域反复坏 | `history-trace`，先查 prior investigations 和最近修复 |
 | 看起来像 bug，实则是未定义行为或新需求 | 直接 reroute 到 `cc-plan` |
+| 用户只要根因报告、不要求修复 | `diagnose-only`，停止在报告与 next action，不生成完成态实现任务 |
+| 失败来自 workflow / artifact / git / state 断裂 | `workflow-forensics`，先分类坏在文件、状态、工具、权限还是流程 |
 先说“这是什么类问题”，再说“我要怎么修”。没有分类的 debug，最后都会变成乱打补丁。
@@ -172,6 +179,9 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 `cc-investigate` 不写生产代码，不在这里偷跑 `cc-do`。
+diagnose-only 仍然写 `planning/analysis.md`，但 `planning/tasks.md` 只能包含证据交接、
+监控、人工动作或明确的 `reroute`；不能把“已经诊断”伪装成“已经修复”。
 ## Entry Gate
 1. 先确认当前对象仍然属于一个 requirement，而不是整个项目级故障。
@@ -186,35 +196,47 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
    - 记录用户看见了什么
    - 记录预期与实际差异
    - 记录复现命令或手动路径
+   - 确认复现的是用户描述的同一个失败，而不是旁边的红灯
    - 如果上下文缺失，只问一个最关键问题，不一次性抛出问题清单
-2. **Trace reality**
+2. **Build feedback loop**
+   - 优先构造 agent 可运行的 pass/fail 信号：失败测试、curl / HTTP 脚本、CLI fixture、浏览器脚本、trace replay、throwaway harness、property / fuzz loop、bisect harness、differential loop，最后才是 HITL 脚本
+   - 记录 loop 类型、命令、运行时间、确定性、失败率、症状匹配证据和下一步 sharpen 计划
+   - loop 太慢、太宽、太 flaky 时，先优化 loop 本身；没有可信 loop，不进入 confirmed root cause
+   - 如果确实无法建 loop，写明尝试过什么，并请求 HAR、日志 dump、core dump、带时间戳录屏、可复现环境访问或临时生产探针权限
+3. **Trace reality**
    - 沿着代码路径找触点
    - 多组件系统先写 `Boundary Probe Matrix`：每个边界的输入、输出、配置 / 环境、状态和 pass/fail
    - 深层报错先写 `Backward Trace Chain`：immediate failure site、caller chain、bad value origin、original trigger
    - 查最近提交和同类改动
    - 查既有 `devflow/changes/*/planning/analysis.md`、`TODOS.md`、backlog、report-card finding
+   - 如果仓库有 `devflow/specs/`、roadmap/backlog handoff、历史 `planning/design.md` / `planning/analysis.md` 或 `change-meta.json`，把领域词汇和已冻结决策当成契约证据
    - 找现有测试和断点证据
    - 判定偏离的是 capability boundary、invariant，还是只是实现细节
-3. **Classify pattern**
-   - 判定是否属于 race condition、null propagation、state corruption、integration failure、configuration drift、stale cache、resource leak、trust boundary drift、timing guess / flaky wait
+4. **Classify pattern**
+   - 判定是否属于 race condition、null propagation、state corruption、integration failure、configuration drift、stale cache、resource leak、performance regression、trust boundary drift、timing guess / flaky wait
    - 如果有同仓库 working example，先写 `Reference Comparison`，列出 working path、broken path、差异和被接受 / 排除的假设
    - 如果错误类型陌生，先做脱敏外部调研；只搜索通用错误类型、框架 / 库名和版本，不搜索 raw secret / path / customer data
-4. **Form hypotheses**
-   - 只保留 1-3 个可被证伪的假设
-   - 每个假设都写支持证据和反证
+5. **Form hypotheses**
+   - 先列 3-5 个候选假设并排序，避免第一直觉锚定
+   - 再收敛到 1-3 个 active hypotheses 进入验证
+   - 每个假设都写支持证据、反证和优先级理由
    - 每个假设都写 `falsification method`、`expected observation`、`actual observation`
-5. **Test hypotheses**
+6. **Test hypotheses**
    - 用复现、日志、断言、最小探针验证
-   - 临时探针必须写 `Diagnostic Instrumentation Plan`：probe location、question answered、command、expected signal、cleanup requirement
+   - 临时探针必须写 `Diagnostic Instrumentation Plan`：probe tag、probe location、question answered、command、expected signal、cleanup requirement
+   - 每个 probe 只回答一个假设预测；一次只改一个变量
+   - debug 日志必须带唯一前缀，例如 `[DEBUG-FIX123-a4f2]`，进入 `cc-do` 前用前缀 grep 清理或转正
    - 三次假设都失败，就停下进入 escalation decision
-6. **Freeze repair contract**
+7. **Freeze repair contract**
    - 根因确认后，写进 `planning/analysis.md`
    - 只保留最小修复边界
+   - 写清正确测试缝隙：测试是否覆盖真实触发链；如果没有正确 seam，这本身就是需要记录的架构事实
    - 写明 affected module、allowed files、forbidden files、blast radius estimate；超过 5 个文件默认拆分或 reroute
    - 输出 `planning/tasks.md` + `planning/task-manifest.json` + `change-meta.json`
-7. **Hand off**
+8. **Hand off**
    - 下一步明确写成 `cc-do`
    - 如果 repair contract 越过当前 requirement，就 reroute 到 `cc-plan`
+   - 如果是 diagnose-only，下一步写成 human action、monitoring、backlog、`cc-plan` 或 `cc-do`，但不得标记实现完成
 ## Pattern Analysis
@@ -229,6 +251,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 | configuration drift | 本地可用、CI/生产失败 | env、feature flag、版本、路径、权限 |
 | stale cache | 清缓存后恢复、旧状态复现 | browser / CDN / Redis / build cache |
 | resource leak | OOM、句柄增长、慢性崩溃 | lifecycle、subscription、retention、cleanup |
+| performance regression | 变慢、CPU / IO / 查询耗时升高、吞吐下降 | baseline、profiler、query plan、bisect |
 | trust boundary drift | LLM / 用户输入 / 外部响应被当成可信 | validation、escaping、policy gate |
 | timing guess / flaky wait | sleep / setTimeout / timeout 增大后偶尔通过 | 真实完成条件、事件、文件、状态或队列计数 |
@@ -285,6 +308,39 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 探针不能变成修复。进入 `cc-do` 前，要么删除，要么明确写入 repair task 的清理 / 转正方式。
+## Feedback Loop Contract
+根因调查首先依赖一个可信 loop：
+- loop type: failing test / HTTP script / CLI fixture / browser script / trace replay / throwaway harness / property-fuzz / bisect / differential / HITL
+- command or manual driver
+- expected failing signal
+- actual failing signal
+- symptom match: 为什么它复现的是用户报告的同一个问题
+- runtime and determinism
+- failure rate for flaky bugs
+- sharpening plan: 如何让它更快、更准、更稳定
+把 loop 当成调查产品来迭代。已有 loop 但信号差时，先优化它：
+1. faster：缓存 setup、缩小 test scope、跳过无关启动。
+2. sharper：断言用户看见的具体症状，不用“没有崩溃”冒充匹配。
+3. more deterministic：固定时间、随机种子、filesystem、network、locale、feature flag。
+flaky bug 的目标不是立刻 100% 复现，而是提高复现率直到可调试。可以循环 100 次、并行触发、加压力、缩小时序窗口或做 differential loop；如果失败率仍低到不可证伪，先写 Evidence Request，不要继续猜。
+没有 loop 时，不能把代码阅读当成根因。只能写 `Evidence Request`：需要可复现环境、HAR、日志 dump、core dump、带时间戳录屏，或临时生产探针权限。
+## Correct Test Seam
+进入 repair handoff 前，必须说明回归测试缝隙是否正确：
+- test seam
+- public interface exercised
+- why this seam reaches the real trigger chain
+- why a shallower test would be false confidence
+- if no correct seam exists, record it as an architecture finding and keep repair verification tied to the original feedback loop
 ## Timing And Flaky Bugs
 遇到 flaky、sleep、timeout、重试后消失：
@@ -345,6 +401,14 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 2. 不能拆但仍是根因跨度，写明为什么。
 3. 如果已经变成设计 / 架构范围，reroute 到 `cc-plan`。
+## Prevention Handoff
+根因冻结后必须写一句后验判断：什么结构、测试 seam、capability invariant、operator guard 或文档会让这个 bug 更早暴露或根本不出现。
+- 如果答案是小范围 regression test，把它写进当前 repair task。
+- 如果答案是 seam / module / capability 边界问题，把它写成 architecture finding，并明确交给 `cc-plan` 或后续 backlog。
+- 如果答案只是流程提醒或人工记忆，不算预防；要么转成可执行 guard，要么明确不记录。
 ## Escalation Decision
 三次假设失败后，不准继续硬猜。`analysis.md` 必须写：
@@ -353,6 +417,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 - what was attempted
 - why current entry is suspect
 - next option：`continue-with-new-hypothesis` / `instrument-and-wait` / `human-review` / `reroute-cc-plan`
+- evidence request if the loop cannot be built or the environment is missing
 - recommendation
 ## Good Output
@@ -362,6 +427,7 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 - 假设不是列表装饰，而是带证伪方法和实际观察
 - 历史调查、最近改动、模式分析没有被跳过
 - 修复边界清楚到 `cc-do` 不需要二次调查
+- 正确测试缝隙写清楚，不用浅层测试制造假安全
 - `planning/tasks.md` 只包含修复任务，不夹带新需求
 - 如果应该回 `cc-plan`，理由写清楚，不假装还能继续 patch
@@ -377,14 +443,15 @@ NO REPAIR WITHOUT A FROZEN ROOT-CAUSE CONTRACT
 ## Working Rules
 1. 没有复现，不准声称找到了根因。
-2. 没有证据，不准把猜测写成结论。
-3. 先根因，再修复；先调查，再编码。
-4. `planning/tasks.md` 必须足够让 `cc-do` 在脱离当前对话后继续推进。
-5. 如果修复方案已经变成新 feature 设计，停止 debug，回 `cc-plan`。
-6. 三次假设失败后，默认说明你的调查入口错了，不准继续硬猜。
-7. 外部调研必须先脱敏，调研结论必须回到本仓库证据验证。
-8. 修复触点超过 5 个文件时，默认先拆分或 reroute，不把大重构伪装成 bug fix。
-9. 好的调查不是“找了很多可能性”，而是把错误世界缩成一个可信的 repair contract。
+2. 没有可信 feedback loop，不准把代码阅读包装成 confirmed root cause。
+3. 没有证据，不准把猜测写成结论。
+4. 先根因，再修复；先调查，再编码。
+5. `planning/tasks.md` 必须足够让 `cc-do` 在脱离当前对话后继续推进。
+6. 如果修复方案已经变成新 feature 设计，停止 debug，回 `cc-plan`。
+7. 三次假设失败后，默认说明你的调查入口错了，不准继续硬猜。
+8. 外部调研必须先脱敏，调研结论必须回到本仓库证据验证。
+9. 修复触点超过 5 个文件时，默认先拆分或 reroute，不把大重构伪装成 bug fix。
+10. 好的调查不是“找了很多可能性”，而是把错误世界缩成一个可信的 repair contract。
 ## Exit Criteria

package/.claude/skills/cc-investigate/assets/ANALYSIS_TEMPLATE.md CHANGED Viewed

@@ -17,10 +17,36 @@
 - What the user saw:
 - Reproduction command / path:
 - Repro stability: `stable` | `intermittent` | `not-yet-reproduced` | `narrowed-only`
+- Matches reported symptom: `yes` | `no` | `partial` | `unknown`
+- Symptom match evidence:
 - Expected:
 - Actual:
 - Impact / blast radius:
+## Feedback Loop Contract
+- Loop type: `failing-test` | `http-script` | `cli-fixture` | `browser-script` | `trace-replay` | `throwaway-harness` | `property-fuzz` | `bisect` | `differential` | `hitl`
+- Command or manual driver:
+- Expected failing signal:
+- Actual failing signal:
+- Runtime:
+- Determinism: `deterministic` | `high-rate-flaky` | `low-rate-flaky` | `unknown`
+- Failure rate:
+- Signal specificity:
+- Sharpening plan:
+- If no loop, evidence request:
+## Debug Session
+- Session ID:
+- Started at:
+- Current mode: `reproduce-first` | `feedback-loop` | `diff-trace` | `boundary-probe` | `backward-trace` | `reference-compare` | `condition-wait` | `history-trace` | `pattern-research` | `contract-check` | `diagnose-only` | `workflow-forensics`
+- Active hypothesis:
+- Completed probes:
+- Open probes:
+- Cleanup status:
+- Next evidence action:
 ## Evidence Chain
 - Logs / stack traces:
@@ -29,6 +55,13 @@
 - Existing tests:
 - Prior investigations:
 - TODO / backlog / report-card signals:
+- Native domain / decision context:
+## Workflow Forensics
+| Failure surface | Observed state | Owner | Rescue action | Evidence |
+| --- | --- | --- | --- | --- |
+| artifact / git / runtime-state / tool / permission / process | | | | |
 ## Boundary Probe Matrix
@@ -55,9 +88,9 @@
 ## Diagnostic Instrumentation Plan
-| Probe location | Question answered | Command to run | Expected signal | Actual signal | Cleanup requirement |
-| --- | --- | --- | --- | --- | --- |
-| | | | | | |
+| Probe tag | Probe location | Question answered | Command to run | Expected signal | Actual signal | Cleanup requirement |
+| --- | --- | --- | --- | --- | --- | --- |
+| | | | | | | |
 ## Pattern Analysis
@@ -70,9 +103,16 @@
 | configuration drift | | ruled-out | |
 | stale cache | | ruled-out | |
 | resource leak | | ruled-out | |
+| performance regression | | ruled-out | |
 | trust boundary drift | | ruled-out | |
 | timing guess / flaky wait | | ruled-out | |
+## Candidate Hypotheses
+| Rank | Hypothesis | Why plausible | Prediction | Status |
+| --- | --- | --- | --- | --- |
+| 1 | | | | pending |
 ## Research Evidence
 - External research used: `yes` | `no`
@@ -94,6 +134,7 @@
 - Attempted evidence:
 - Why current entry is suspect:
 - Next option: `continue-with-new-hypothesis` | `instrument-and-wait` | `human-review` | `reroute-cc-plan`
+- Evidence request:
 - Recommendation:
 ## Root Cause
@@ -108,6 +149,24 @@
 - Operator handling after fix:
 - Prior history relationship: `new` | `recurring` | `same-root-cause` | `architectural-smell-candidate`
+## Diagnose-Only Outcome
+- Applies: `yes` | `no`
+- Why no repair now:
+- Root cause owner:
+- Risk if left unresolved:
+- Monitoring / follow-up evidence:
+- Next action: `human-action` | `monitor` | `backlog` | `reroute-cc-plan` | `handoff-cc-do`
+- Explicit no-repair verdict:
+## Correct Test Seam
+- Test seam:
+- Public interface exercised:
+- Why this seam reaches the real trigger chain:
+- Why a shallower test would be false confidence:
+- If no correct seam exists:
 ## Repair Boundary
 - Fix strategy:
@@ -125,6 +184,12 @@
 ## Review Gate
 - Repro stable:
+- Feedback loop trustworthy:
+- Symptom match confirmed:
 - Root cause confirmed:
+- Debug session cleanup complete:
+- Workflow forensics classified:
+- Diagnose-only verdict if applicable:
+- Correct test seam identified:
 - Repair scope still belongs to this requirement:
 - If not, reroute:

package/.claude/skills/cc-investigate/assets/TASKS_TEMPLATE.md CHANGED Viewed

@@ -16,6 +16,8 @@
 - Execution mode: `single-path` | `parallel-ready`
 - Confirmed root cause:
 - Root-cause hypothesis:
+- Feedback loop:
+- Symptom match evidence:
 - Frozen repair boundary:
 - Boundary probes:
 - Backward trace:
@@ -28,16 +30,19 @@
 - Commands to trust:
 - Do not re-decide:
 - Parallel boundaries:
+- Correct test seam:
+- Evidence request if blocked:
 ## Phase 1: Reproduce And Probe Guard
 - [ ] T001 [TEST] Capture the failing behavior as a stable reproduction (dependsOn:none) `path/to/test`
-  Goal: 让 bug 先变成一个可复跑的失败事实。
+  Goal: 让 bug 先变成一个快、准、可复跑且匹配用户症状的失败事实。
   Files: `path/to/test`
   Read first: `analysis.md`, `tasks.md`
   Verification: `npm test -- path/to/test`
-  Evidence: failing output or reproducible log
-  Ready when: reproduction path 已稳定，analysis 已记录必要的 boundary / trace / comparison evidence
+  Evidence: failing output or reproducible log + symptom match evidence
+  Correct seam: test must exercise the real trigger chain through a public interface
+  Ready when: feedback loop 已稳定，analysis 已记录必要的 boundary / trace / comparison evidence
 ## Phase 2: Repair
@@ -47,7 +52,7 @@
   Read first: `analysis.md`, `path/to/test`
   Verification: `npm test -- path/to/test`
   Evidence: passing output + checkpoint
-  Ready when: T001 已证明问题存在，analysis 已证明根因源头
+  Ready when: T001 已证明同一个用户症状存在，analysis 已证明根因源头
 ## Phase 3: Verify

package/.claude/skills/cc-investigate/assets/TASK_MANIFEST_TEMPLATE.json CHANGED Viewed

@@ -20,7 +20,7 @@
     ]
   },
   "planningMeta": {
-    "ccInvestigateSkillVersion": "1.1.4",
+    "ccInvestigateSkillVersion": "1.1.6",
     "analysisVersion": "analysis.v1",
     "approvedAt": "2026-04-17T12:00:00.000Z",
     "approvedBy": "user",
@@ -29,10 +29,24 @@
   "investigationMeta": {
     "symptomStatus": "stable",
     "reproductionPath": "npm test -- src/feature/feature.test.ts",
+    "feedbackLoop": {
+      "loopType": "failing-test",
+      "commandOrDriver": "npm test -- src/feature/feature.test.ts",
+      "expectedFailingSignal": "The test fails with the user-reported behavior",
+      "actualFailingSignal": "Observed failure output from the current repo",
+      "symptomMatchEvidence": "Failure output matches the reported symptom, not a nearby unrelated failure",
+      "runtime": "under 10s",
+      "determinism": "deterministic",
+      "failureRate": "100%",
+      "signalSpecificity": "asserts the exact broken behavior",
+      "sharpeningPlan": "Narrow setup or assertions if the loop becomes slow or broad",
+      "evidenceRequest": ""
+    },
     "patternAnalysis": {
-      "selectedPattern": "implementation drift",
+      "selectedPattern": "null propagation",
       "ruledOutPatterns": [
         "race condition",
+        "performance regression",
         "configuration drift",
         "timing guess / flaky wait"
       ],
@@ -73,6 +87,7 @@
     },
     "diagnosticInstrumentation": [
       {
+        "probeTag": "[DEBUG-FIXXXX-a4f2]",
         "probeLocation": "file:line or component boundary",
         "questionAnswered": "Which boundary first emits the invalid value?",
         "commandToRun": "npm test -- src/feature/feature.test.ts",
@@ -81,8 +96,23 @@
         "cleanupRequirement": "Remove temporary probe or convert it into a durable assertion/log"
       }
     ],
+    "candidateHypotheses": [
+      {
+        "rank": 1,
+        "statement": "Specific, testable root-cause claim",
+        "whyPlausible": "Reproduction output points to the affected contract",
+        "prediction": "The failing signal disappears when that contract is restored",
+        "status": "accepted-for-testing"
+      }
+    ],
     "priorInvestigations": [],
     "researchEvidence": [],
+    "domainDecisionContext": {
+      "contextFilesRead": [],
+      "adrFilesRead": [],
+      "vocabularyNotes": [],
+      "adrConflicts": []
+    },
     "rootCauseHypothesis": {
       "statement": "Specific, testable root-cause claim",
       "falsificationMethod": "Command, log probe, assertion, or code-path check",
@@ -112,6 +142,13 @@
       "nextOption": "cc-do",
       "recommendation": "Repair the confirmed root cause"
     },
+    "correctTestSeam": {
+      "testSeam": "public interface or end-to-end path that reaches the real trigger chain",
+      "publicInterfaceExercised": "CLI/API/UI behavior observed by callers",
+      "realTriggerChainCoverage": "The test enters through the same trigger path as the bug",
+      "whyShallowTestRejected": "A lower-level unit test would not prove the upstream contract",
+      "ifNoCorrectSeam": ""
+    },
     "repairBoundary": {
       "affectedModule": "src/feature",
       "allowedFiles": [
@@ -172,6 +209,8 @@
       ],
       "acceptance": [
         "The target bug is reproduced as a stable failure",
+        "The failing loop matches the user-reported symptom",
+        "The regression test uses the correct seam for the real trigger chain",
         "The failure output points to the confirmed root-cause path"
       ],
       "verification": [