@seanyao/roll 2026.522.2 → 2026.523.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Changelog
2
2
 
3
+ ## v2026.523.1
4
+
5
+ ### Added
6
+
7
+ - **`roll loop branches`** — 一眼看见本机残留的 loop 分支;每轮入口先 GC 一次,半途中止的 cycle 也会被收掉 `[loop]`
8
+
9
+ ### Changed
10
+
11
+ - **dashboard token 列拆成 input / output / cache 写 / cache 读** — cache 是真花钱的,账单终于解释得清 `[loop]`
12
+
13
+ ### Fixed
14
+
15
+ - **每日 dream / brief 在 macOS 26.4 上从来没真跑过** — 换成 interval 触发,从今天起稳定每天产出 `[loop]`
16
+ - **dashboard 上 tcr 次数、built 列表、ALERT 文案不再显示假零或别故事的旧标签** `[loop]`
17
+ - **选一个故事不再把别的依赖它的故事也标成"在做"** — dashboard 不再骗你说有人在干活 `[loop]`
18
+ - **`roll setup` / `roll update` 不再在隐藏的覆盖提示上无声卡死**
19
+ - **`$roll-notes` 现在写到 `.roll/notes/`** — 和 dream / brief 一致,不再扔到项目根目录 `[loop]`
20
+ - **loop CI 网关不再把"排队中 / 进行中"误判成失败** `[loop]`
21
+
3
22
  ## v2026.522.2
4
23
 
5
24
  ### Changed
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env bash
2
+ # dream-test-quality-scan — ad-hoc helper for roll-.dream Scan 7.
3
+ #
4
+ # Walks bats files and flags ❶-class anti-patterns (hardcoded business data
5
+ # in assertion bodies). Emits structured REFACTOR-shaped lines so the
6
+ # maintainer can sanity-check the rubric against the current suite without
7
+ # waiting for the nightly dream cycle.
8
+ #
9
+ # Usage:
10
+ # dream-test-quality-scan [--category N] [--path PATH] [--max N]
11
+ # dream-test-quality-scan --help
12
+ #
13
+ # Only category 1 (❶ hardcoded business data) is implemented as a deterministic
14
+ # heuristic; categories ❷..❻ stay with the dream skill (AI agent applies the
15
+ # rubric). The helper exists so a smoke test and a maintainer dry-run can
16
+ # confirm ❶ detection keeps working as the suite evolves.
17
+
18
+ set -euo pipefail
19
+
20
+ CATEGORY=1
21
+ TARGET=""
22
+ MAX=5
23
+
24
+ usage() {
25
+ cat <<'EOF'
26
+ dream-test-quality-scan — Scan 7 ❶ dry-run helper
27
+
28
+ Usage:
29
+ dream-test-quality-scan [--category N] [--path PATH] [--max N]
30
+ dream-test-quality-scan --help
31
+
32
+ Options:
33
+ --category N Anti-pattern category (only 1 is implemented; default 1)
34
+ --path PATH File or directory to scan (default: tests/)
35
+ --max N Maximum entries to emit (default: 5; matches dream skill cap)
36
+ --help Show this message
37
+
38
+ Output:
39
+ One line per finding:
40
+ [test-quality:❶] <file>:<line> — <one-line description>
41
+ Exit code is 0 even when nothing is found (dry-run is informational).
42
+ EOF
43
+ }
44
+
45
+ while [[ $# -gt 0 ]]; do
46
+ case "$1" in
47
+ --category) CATEGORY="${2:-1}"; shift 2 ;;
48
+ --path) TARGET="${2:-}"; shift 2 ;;
49
+ --max) MAX="${2:-5}"; shift 2 ;;
50
+ --help|-h) usage; exit 0 ;;
51
+ *) echo "unknown flag: $1" >&2; usage >&2; exit 2 ;;
52
+ esac
53
+ done
54
+
55
+ if [[ "$CATEGORY" -ne 1 ]]; then
56
+ echo "category $CATEGORY not yet implemented — only ❶ is mechanical" >&2
57
+ exit 0
58
+ fi
59
+
60
+ # Default scan root.
61
+ if [[ -z "$TARGET" ]]; then
62
+ repo_root=$(git rev-parse --show-toplevel 2>/dev/null || pwd)
63
+ TARGET="${repo_root}/tests"
64
+ fi
65
+
66
+ if [[ ! -e "$TARGET" ]]; then
67
+ echo "path not found: $TARGET" >&2
68
+ exit 2
69
+ fi
70
+
71
+ scan_file() {
72
+ local file="$1"
73
+ # ❶ heuristic — assertion lines whose RHS contains a numeric literal:
74
+ # - lines containing `[[` or `[ ` (bats assertion syntax)
75
+ # - AND containing `==` or `=` (equality)
76
+ # - AND containing a decimal/integer literal of length ≥ 1 inside quotes
77
+ # Emits one entry per file (not per line) to stay under the rate cap.
78
+ local first_hit
79
+ first_hit=$(grep -nE '\[\[.*"[^"]*[0-9]+(\.[0-9]+)?[^"]*"' "$file" 2>/dev/null \
80
+ | head -1 || true)
81
+ [[ -z "$first_hit" ]] && return 1
82
+
83
+ local lineno
84
+ lineno=$(echo "$first_hit" | cut -d: -f1)
85
+ local rel
86
+ rel=$(python3 -c "import os,sys; print(os.path.relpath(sys.argv[1]))" "$file" 2>/dev/null || echo "$file")
87
+ printf '[test-quality:❶] %s:%s — assertion body hardcodes a numeric literal that likely owns its value elsewhere\n' \
88
+ "$rel" "$lineno"
89
+ return 0
90
+ }
91
+
92
+ emitted=0
93
+ if [[ -d "$TARGET" ]]; then
94
+ # Iterate over .bats files under TARGET; stop after MAX hits.
95
+ # Exclude vendored bats-core helpers — those are framework tests, not ours.
96
+ while IFS= read -r f; do
97
+ case "$f" in
98
+ */tests/helpers/bats-*/*) continue ;;
99
+ esac
100
+ if scan_file "$f"; then
101
+ emitted=$((emitted + 1))
102
+ [[ "$emitted" -ge "$MAX" ]] && break
103
+ fi
104
+ done < <(find "$TARGET" -type f -name '*.bats' | sort)
105
+ else
106
+ scan_file "$TARGET" && emitted=1 || true
107
+ fi
108
+
109
+ # Always succeed — dry-run is informational, the dream cycle decides what to do.
110
+ exit 0
package/bin/roll CHANGED
@@ -4,7 +4,7 @@ set -euo pipefail
4
4
  # Roll — AI Agent Convention Manager
5
5
  # Single source of truth for how all AI coding agents behave.
6
6
 
7
- VERSION="2026.522.2"
7
+ VERSION="2026.523.1"
8
8
  ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
9
9
  ROLL_CONFIG="${ROLL_HOME}/config.yaml"
10
10
  ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
@@ -322,16 +322,25 @@ safe_copy() {
322
322
  if diff -q "$src" "$dst" &>/dev/null; then
323
323
  return # identical, skip silently
324
324
  fi
325
+ # Non-interactive (stdin is not a terminal): silently overwrite.
326
+ # _run_setup_step / cmd_update redirect stdin to /dev/null and all
327
+ # stdout/stderr is suppressed — prompting here would either hang on a
328
+ # hidden read or silently default to overwrite. Be explicit.
329
+ if [[ ! -t 0 ]]; then
330
+ cp "$src" "$dst"
331
+ ok "Wrote: ${dst/#$HOME/~} 已写入: ${dst/#$HOME/~}"
332
+ return
333
+ fi
325
334
  echo ""
326
335
  warn "File exists and differs: ${dst/#$HOME/~} 文件已存在且内容不同: ${dst/#$HOME/~}"
327
336
  echo -e " ${BOLD}Overwrite?${NC} [Y/n/d(iff)] "
328
- read -r answer
337
+ read -r answer || answer="Y"
329
338
  case "$answer" in
330
339
  d|D|diff)
331
340
  diff --color=auto "$dst" "$src" || true
332
341
  echo ""
333
342
  echo -e " ${BOLD}Overwrite?${NC} [Y/n] "
334
- read -r answer2
343
+ read -r answer2 || answer2="Y"
335
344
  [[ "$answer2" =~ ^[Nn]$ ]] && { info "Skipped: ${dst/#$HOME/\~} 已跳过: ${dst/#$HOME/\~}"; return; }
336
345
  ;;
337
346
  n|N) info "Skipped: ${dst/#$HOME/~} 已跳过: ${dst/#$HOME/~}"; return ;;
@@ -726,7 +735,7 @@ _run_setup_step() {
726
735
  local watch="$1"; shift
727
736
  local before after
728
737
  before=$(_setup_snapshot "$watch")
729
- if "$@" >/dev/null 2>&1; then
738
+ if "$@" </dev/null >/dev/null 2>&1; then
730
739
  after=$(_setup_snapshot "$watch")
731
740
  if [[ "$before" == "$after" ]]; then
732
741
  _ROLL_SETUP_STATE="unchanged"
@@ -883,6 +892,40 @@ HINT
883
892
  # in (or opted out) don't get spammed each upgrade.
884
893
  cmd_doctor() {
885
894
  _doctor_pr_section
895
+ _doctor_launchd_stale_section
896
+ }
897
+
898
+ # FIX-097: scan ${_LAUNCHD_DIR}/com.roll.*.plist for entries whose
899
+ # WorkingDirectory no longer exists on disk. These are the ghost agents left
900
+ # behind when a user manually reproduces a bug under /private/tmp/ or
901
+ # /var/folders/ — the auto-sandbox redirects plist writes but launchctl
902
+ # bootstrap (before this fix) registered them anyway. Print labels +
903
+ # cleanup hint; never auto-delete (host launchctl state is user-owned).
904
+ _doctor_launchd_stale_section() {
905
+ [[ "$(uname)" == "Darwin" ]] || return 0
906
+ local dir="${_LAUNCHD_DIR:-${HOME}/Library/LaunchAgents}"
907
+ [[ -d "$dir" ]] || return 0
908
+
909
+ local found=0 plist label wd
910
+ for plist in "$dir"/com.roll.*.plist; do
911
+ [[ -e "$plist" ]] || continue
912
+ wd=$(awk '
913
+ /<key>WorkingDirectory<\/key>/ { getline; gsub(/.*<string>|<\/string>.*/, ""); print; exit }
914
+ ' "$plist" 2>/dev/null)
915
+ [[ -n "$wd" ]] || continue
916
+ [[ -d "$wd" ]] && continue
917
+ if [[ "$found" -eq 0 ]]; then
918
+ echo ""
919
+ echo "Stale launchd plists 无效的 launchd 服务"
920
+ echo ""
921
+ found=1
922
+ fi
923
+ label=$(basename "$plist" .plist)
924
+ echo " ⚠ ${label}"
925
+ echo " WorkingDirectory missing: ${wd}"
926
+ echo " 路径已失效,可清理: launchctl bootout gui/$(id -u)/${label}; rm '${plist}'"
927
+ done
928
+ return 0
886
929
  }
887
930
 
888
931
  _doctor_pr_section() {
@@ -1904,7 +1947,7 @@ PY
1904
1947
  fi
1905
1948
  if [ "${#plists[@]}" -gt 0 ]; then
1906
1949
  for item in "${plists[@]}"; do
1907
- launchctl unload -w "$HOME/Library/LaunchAgents/$item" 2>/dev/null && echo " unloaded $item"
1950
+ _launchctl_safe unload -w "$HOME/Library/LaunchAgents/$item" 2>/dev/null && echo " unloaded $item"
1908
1951
  rm -f "$HOME/Library/LaunchAgents/$item" 2>/dev/null
1909
1952
  done
1910
1953
  fi
@@ -3892,7 +3935,7 @@ PYEOF
3892
3935
 
3893
3936
  local old_plist=~/Library/LaunchAgents/com.roll.loop.${old_slug}.plist
3894
3937
  if [[ -f "$old_plist" ]]; then
3895
- launchctl unload "$old_plist" 2>/dev/null || true
3938
+ _launchctl_safe unload "$old_plist" 2>/dev/null || true
3896
3939
  rm -f "$old_plist"
3897
3940
  fi
3898
3941
 
@@ -3969,6 +4012,13 @@ if [ -z "${_LAUNCHD_DIR:-}" ]; then
3969
4012
  _LAUNCHD_DIR="${_SHARED_ROOT}/LaunchAgents"
3970
4013
  mkdir -p "$_LAUNCHD_DIR"
3971
4014
  export _LAUNCHD_DIR
4015
+ # FIX-097: same trigger that sandboxed the plist FILE path must also
4016
+ # short-circuit every `launchctl bootstrap/load/unload/enable` against
4017
+ # that path. Otherwise a user who reproduces a bug under /private/tmp/
4018
+ # or /var/folders/ ends up with sandboxed plists registered in their
4019
+ # real gui/<uid> domain — when the tmp dir is cleaned, the agents become
4020
+ # ghosts that fire forever (the historical 23:13 CST Terminal popup).
4021
+ export _LAUNCHD_SKIP_REGISTRY=1
3972
4022
  fi
3973
4023
  unset _roll_in_test_ctx _roll_caller
3974
4024
  ;;
@@ -4095,6 +4145,25 @@ _launchd_label() {
4095
4145
  printf 'com.roll.%s.%s' "$service" "$(_project_slug "$project_path")"
4096
4146
  }
4097
4147
 
4148
+ # FIX-097: central skip predicate consulted by every launchctl invocation that
4149
+ # operates on a plist path Roll wrote. Returns 0 (skip) when either:
4150
+ # - explicit: _LAUNCHD_SKIP_REGISTRY=1 was exported (tests, future opt-out)
4151
+ # - implicit: _LAUNCHD_DIR is a child of _SHARED_ROOT (auto-sandbox active)
4152
+ # Returns 1 (do not skip) in production.
4153
+ #
4154
+ # History: FIX-090 introduced the same logic INSIDE _install_launchd_plists.
4155
+ # FIX-097 hoists it to a helper because the bootstrap call inside
4156
+ # _install_launchd_plists was not the only leak: _loop_on / _loop_off /
4157
+ # _loop_pause / _loop_resume each had bare `launchctl load/unload/enable`
4158
+ # calls that bypassed the gate.
4159
+ _launchd_should_skip_registry() {
4160
+ [[ "${_LAUNCHD_SKIP_REGISTRY:-}" == "1" ]] && return 0
4161
+ case "${_LAUNCHD_DIR:-}/" in
4162
+ "${_SHARED_ROOT:-/nonexistent}"/*) return 0 ;;
4163
+ esac
4164
+ return 1
4165
+ }
4166
+
4098
4167
  _launchd_plist_path() {
4099
4168
  local service="$1" project_path="$2"
4100
4169
  printf '%s/%s.plist' "$_LAUNCHD_DIR" "$(_launchd_label "$service" "$project_path")"
@@ -4123,16 +4192,34 @@ _write_launchd_plist() {
4123
4192
  ;;
4124
4193
  esac
4125
4194
 
4126
- local hour_xml=""
4127
- [[ -n "$hour" ]] && hour_xml=" <key>Hour</key>
4128
- <integer>${hour}</integer>
4129
- "
4130
-
4131
4195
  # FIX-050: bake PATH into the plist so launchd-spawned bash can find tmux,
4132
4196
  # claude, node, etc. The runner script also re-asserts PATH at runtime as
4133
4197
  # a second layer (covers stale plists where brew was installed after setup).
4134
4198
  local path_value; path_value=$(_detect_path_prepend)
4135
4199
 
4200
+ # FIX-105: macOS 26.4 launchd silently refuses to fire StartCalendarInterval
4201
+ # entries that contain BOTH Hour and Minute keys (verified: runs stays 0,
4202
+ # last exit "never exited", no log output, the calendarinterval trigger is
4203
+ # registered but never invoked by UserEventAgent-Aqua). Single-Minute (hourly)
4204
+ # entries still fire fine. Workaround: when an Hour is provided (daily
4205
+ # schedule), emit StartInterval=86400 (24h period) instead. First fire is
4206
+ # bootstrap+24h rather than the exact requested wall-clock time — acceptable
4207
+ # trade since the alternative was "never fires at all" (dream/brief broken
4208
+ # for 4+ days). The Minute/Hour args are still kept in the function signature
4209
+ # for callers that may want to filter at runtime, but they no longer steer
4210
+ # the plist trigger format for daily schedules.
4211
+ local schedule_xml
4212
+ if [[ -n "$hour" ]]; then
4213
+ schedule_xml=" <key>StartInterval</key>
4214
+ <integer>86400</integer>"
4215
+ else
4216
+ schedule_xml=" <key>StartCalendarInterval</key>
4217
+ <dict>
4218
+ <key>Minute</key>
4219
+ <integer>${minute}</integer>
4220
+ </dict>"
4221
+ fi
4222
+
4136
4223
  local content
4137
4224
  content="<?xml version=\"1.0\" encoding=\"UTF-8\"?>
4138
4225
  <!DOCTYPE plist PUBLIC \"-//Apple//DTD PLIST 1.0//EN\" \"http://www.apple.com/DTDs/PropertyList-1.0.dtd\">
@@ -4151,11 +4238,7 @@ _write_launchd_plist() {
4151
4238
  <key>PATH</key>
4152
4239
  <string>${path_value}</string>
4153
4240
  </dict>
4154
- <key>StartCalendarInterval</key>
4155
- <dict>
4156
- <key>Minute</key>
4157
- <integer>${minute}</integer>
4158
- ${hour_xml} </dict>
4241
+ ${schedule_xml}
4159
4242
  <key>WorkingDirectory</key>
4160
4243
  <string>${project_path}</string>
4161
4244
  </dict>
@@ -4329,13 +4412,29 @@ _inner_cleanup() {
4329
4412
  # FIX-091: prefer a real PR so auto-merge lands the work; tag-only is the
4330
4413
  # last-resort because it requires manual cherry-pick. Emit cycle_end "done"
4331
4414
  # (canonical success status the dashboard recognizes) when PR publishes.
4415
+ # FIX-099: compute tcr_count + built[] from the worktree (it's still alive
4416
+ # at EXIT trap time) so runs.jsonl and ALERT carry truthful data.
4417
+ _orphan_tcr=0
4418
+ _orphan_built="[]"
4419
+ if command -v jq >/dev/null 2>&1; then
4420
+ _orphan_tcr=\$(cd "\$WT" && git log --oneline "origin/main..HEAD" 2>/dev/null | grep -c ' tcr:' || echo 0)
4421
+ _orphan_built=\$(cd "\$WT" && git log --oneline "origin/main..HEAD" 2>/dev/null \
4422
+ | grep ' tcr:' \
4423
+ | grep -oE '\b(FIX|US|REFACTOR|CHORE)-[0-9]+\b' \
4424
+ | sort -u \
4425
+ | jq -R -s 'split("\n") | map(select(length>0))' 2>/dev/null || echo "[]")
4426
+ fi
4332
4427
  _slug=""
4333
4428
  if _gh_resolve _slug \\
4334
4429
  && ( cd "\$WT" && _loop_publish_pr "\$BRANCH" "loop cycle \${CYCLE_ID}" ) >/dev/null 2>&1; then
4335
4430
  _loop_event cycle_end "\${CYCLE_ID}" "\${BRANCH:-}" "done" 2>/dev/null || true
4336
4431
  _CYCLE_END_WRITTEN=1
4337
- _runs_append "done" 0 "[]" 2>/dev/null || true
4338
- _worktree_alert "cycle \${CYCLE_ID}: aborted with \${_unpushed} commits; FIX-091 published as PR" 2>/dev/null || true
4432
+ # FIX-099: pass real tcr_count + built[] instead of 0/"[]"
4433
+ _runs_append "done" "\${_orphan_tcr}" "\${_orphan_built}" 2>/dev/null || true
4434
+ # FIX-099: three-field ALERT so callers can distinguish recovered orphan
4435
+ # from a cycle's normally-picked story (was: "FIX-091 published as PR"
4436
+ # which leaked a hardcoded string regardless of what was actually built).
4437
+ _worktree_alert "cycle \${CYCLE_ID}: recovered_from_orphan=yes; tcr_commits=\${_orphan_tcr}; stories=\${_orphan_built}; pr_branch=\${BRANCH:-unknown}" 2>/dev/null || true
4339
4438
  else
4340
4439
  _orphan_tag="loop-orphan-\${CYCLE_ID}"
4341
4440
  if ( cd "\$WT" && git push origin "\$BRANCH" 2>/dev/null \\
@@ -4343,8 +4442,9 @@ _inner_cleanup() {
4343
4442
  && git push origin "\$_orphan_tag" 2>/dev/null ); then
4344
4443
  _loop_event cycle_end "\${CYCLE_ID}" "\${BRANCH:-}" "orphan" 2>/dev/null || true
4345
4444
  _CYCLE_END_WRITTEN=1
4346
- _runs_append "orphan" 0 "[]" 2>/dev/null || true
4347
- _worktree_alert "cycle \${CYCLE_ID}: aborted with \${_unpushed} commits; FIX-086 pushed orphan tag \${_orphan_tag}" 2>/dev/null || true
4445
+ # FIX-099: pass real tcr_count + built[] for the orphan-tag path too
4446
+ _runs_append "orphan" "\${_orphan_tcr}" "\${_orphan_built}" 2>/dev/null || true
4447
+ _worktree_alert "cycle \${CYCLE_ID}: recovered_from_orphan=yes; tcr_commits=\${_orphan_tcr}; stories=\${_orphan_built}; FIX-086 pushed orphan tag \${_orphan_tag}" 2>/dev/null || true
4348
4448
  fi
4349
4449
  fi
4350
4450
  fi
@@ -4395,6 +4495,10 @@ WT="\$(_worktree_path "${slug}" "cycle-\${CYCLE_ID}")"
4395
4495
  BRANCH="loop/cycle-\${CYCLE_ID}"
4396
4496
  _USE_WORKTREE=0
4397
4497
  cd "${project_path}" 2>/dev/null || true
4498
+ # FIX-104: GC stale merged temp branches at cycle entry — before worktree setup
4499
+ # and before any early-exit gate (pre-run abort, CI red precheck). The post-claude
4500
+ # call site doesn't cover those paths, so merged branches accumulated on origin.
4501
+ _loop_cleanup_stale_cycle_branches "${project_path}" || true
4398
4502
  # FIX-040: orphan worktree recovery — scan for worktrees left by previous failed
4399
4503
  # cycles (publish failed or inner script was SIGKILL'd). Attempt to publish each
4400
4504
  # before starting the new cycle. Glob is chronological via timestamp in name.
@@ -4622,9 +4726,6 @@ if [ "\$_USE_WORKTREE" = "1" ]; then
4622
4726
  fi
4623
4727
  fi
4624
4728
 
4625
- # US-AUTO-040: fallback GC — delete remote loop/cycle-* branches already merged to main.
4626
- _loop_cleanup_stale_cycle_branches "${project_path}" || true
4627
-
4628
4729
  # FIX-044 / Step 5: Write loop cycle run summary to runs.jsonl
4629
4730
  # Deterministic — runs in shell regardless of whether agent executes SKILL.md Step 5.
4630
4731
  # US-LOOP-005: now routed through _runs_append so timeout/worktree-setup-fail
@@ -4760,17 +4861,67 @@ SCRIPT
4760
4861
  }
4761
4862
 
4762
4863
  _launchd_is_loaded() {
4763
- launchctl print-disabled "gui/$(id -u)" 2>/dev/null | grep -qF "\"$1\" => enabled"
4864
+ # FIX-098: probe actual launchd registry via `launchctl print`, NOT
4865
+ # `launchctl print-disabled`. The disabled-overrides DB only tracks
4866
+ # labels explicitly enabled/disabled by the user — after `roll loop off`
4867
+ # (bootout) + `roll update` the label stays absent from the overrides DB,
4868
+ # so the old grep returned false-positive "loaded". `launchctl print`
4869
+ # returns exit 0 only when the agent is actually registered in the current
4870
+ # launchd session; non-zero means the label is unknown to launchd.
4871
+ launchctl print "gui/$(id -u)/$1" >/dev/null 2>&1
4872
+ }
4873
+
4874
+ # FIX-101 tripwire: refuse to mutate the host's launchd session when
4875
+ # _LAUNCHD_DIR has been sandboxed (i.e. is not the canonical
4876
+ # ${HOME}/Library/LaunchAgents). Tests that auto-sandbox _LAUNCHD_DIR for
4877
+ # isolation (FIX-087) may still forget to set _LAUNCHD_SKIP_REGISTRY=1 or
4878
+ # stub the launchctl binary; without this defensive layer the production
4879
+ # label's plist path can get overwritten with a transient sandbox path,
4880
+ # leading to launchd EX_CONFIG (exit 78) when the tmp dir is later cleaned
4881
+ # and the next scheduled fire can't find the plist. Read-only ops (print*,
4882
+ # list, version) are always allowed since they have no side effects.
4883
+ _launchctl_safe() {
4884
+ # Read-only ops are always safe (no host launchd state mutation).
4885
+ case "${1:-}" in
4886
+ print|print-disabled|list|version|dumpstate|examine)
4887
+ launchctl "$@"
4888
+ return $?
4889
+ ;;
4890
+ esac
4891
+ # If `launchctl` has been replaced by a function stub (typical in bats tests
4892
+ # that want to assert against captured calls), pass through to the stub.
4893
+ # Stubs by definition don't touch host launchd, so this is safe; and tests
4894
+ # like `_install_launchd_plists: bootout targets gui/<uid>/<label>` rely on
4895
+ # the literal call landing in their captured log.
4896
+ if [[ "$(type -t launchctl 2>/dev/null)" == "function" ]]; then
4897
+ launchctl "$@"
4898
+ return $?
4899
+ fi
4900
+ # Real launchctl binary path: refuse to mutate when _LAUNCHD_DIR has been
4901
+ # sandboxed (i.e. is not the canonical ${HOME}/Library/LaunchAgents). This
4902
+ # is the FIX-101 defensive layer — when a test forgets to stub launchctl
4903
+ # AND has _LAUNCHD_DIR sandboxed, prevent the call from reaching the host's
4904
+ # production launchd and overwriting a live label's plist path.
4905
+ local canonical="${HOME}/Library/LaunchAgents"
4906
+ if [[ "${_LAUNCHD_DIR:-$canonical}" != "$canonical" ]]; then
4907
+ return 0
4908
+ fi
4909
+ launchctl "$@"
4764
4910
  }
4765
4911
 
4766
4912
  _launchd_svc_state() {
4913
+ # FIX-098: three-state classification:
4914
+ # enabled — plist on disk AND registered in launchd
4915
+ # stale — plist on disk BUT NOT registered in launchd
4916
+ # installed-off — kept for back-compat (maps to stale semantics)
4917
+ # not-installed — no plist
4767
4918
  local svc="$1" project_path="$2"
4768
4919
  local label; label=$(_launchd_label "$svc" "$project_path")
4769
4920
  local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
4770
4921
  if _launchd_is_loaded "$label"; then
4771
4922
  echo "enabled"
4772
4923
  elif [[ -f "$plist" ]]; then
4773
- echo "installed-off"
4924
+ echo "stale"
4774
4925
  else
4775
4926
  echo "not-installed"
4776
4927
  fi
@@ -4833,42 +4984,25 @@ _install_launchd_plists() {
4833
4984
  local after; after=$(cat "$plist")
4834
4985
  if [[ "$before" != "$after" ]]; then
4835
4986
  updated=$((updated + 1))
4836
- # FIX-090: gate launchctl writes so a sandboxed plist never gets
4837
- # registered into the user's REAL gui/<uid> domain. Without this,
4838
- # `launchctl bootstrap gui/<uid> <sandbox-plist>` outlives TEST_TMP
4839
- # cleanup as a zombie that either fails silently (EX_CONFIG) or, when
4840
- # the label collides with the dev's project slug, displaces the real
4841
- # registration and kills the autonomous loop. Two gate paths:
4842
- # - explicit: integration_setup exports _LAUNCHD_SKIP_REGISTRY=1
4843
- # - implicit: if _LAUNCHD_DIR was auto-sandboxed under _SHARED_ROOT
4844
- # (FIX-087 inner-runner.sh re-source path) we infer skip — callers
4845
- # that genuinely want the launchctl flow override _LAUNCHD_DIR to
4846
- # a path outside _SHARED_ROOT (unit tests; production has no
4847
- # _SHARED_ROOT match against ~/Library/LaunchAgents).
4848
- # See helpers.bash and tests/unit/launchd_sandbox.bats.
4849
- local _skip_reg="${_LAUNCHD_SKIP_REGISTRY:-}"
4850
- if [[ -z "$_skip_reg" ]]; then
4851
- case "${_LAUNCHD_DIR:-}/" in
4852
- "${_SHARED_ROOT:-/nonexistent}"/*) _skip_reg=1 ;;
4853
- *) _skip_reg=0 ;;
4854
- esac
4855
- fi
4856
- if [[ "$_skip_reg" != "1" ]]; then
4987
+ # FIX-090/FIX-097: gate launchctl writes via central helper so a
4988
+ # sandboxed plist never gets registered into the user's REAL gui/<uid>
4989
+ # domain. See _launchd_should_skip_registry for the predicate rules.
4990
+ if ! _launchd_should_skip_registry; then
4857
4991
  if _launchd_is_loaded "$label"; then
4858
4992
  # FIX-027: use bootout/bootstrap so we don't disturb the label's
4859
4993
  # enabled flag in the launchd overrides db (which legacy
4860
4994
  # unload/load no-`-w` wipes on macOS Sonoma+, causing
4861
4995
  # `roll loop status` to falsely report off after `roll update`).
4862
4996
  local uid; uid=$(id -u)
4863
- launchctl bootout "gui/${uid}/${label}" 2>/dev/null || true
4864
- launchctl bootstrap "gui/${uid}" "$plist" 2>/dev/null || true
4997
+ _launchctl_safe bootout "gui/${uid}/${label}" 2>/dev/null || true
4998
+ _launchctl_safe bootstrap "gui/${uid}" "$plist" 2>/dev/null || true
4865
4999
  elif [[ -z "$before" ]]; then
4866
5000
  # FIX-059: brand-new plist — macOS FSEvents auto-bootstraps any new
4867
5001
  # file dropped in ~/Library/LaunchAgents/, so projects never enabled
4868
5002
  # via 'roll loop on' would fire every hour. Immediately mark disabled
4869
5003
  # in the overrides db to block that auto-load.
4870
5004
  local uid; uid=$(id -u)
4871
- launchctl disable "gui/${uid}/${label}" 2>/dev/null || true
5005
+ _launchctl_safe disable "gui/${uid}/${label}" 2>/dev/null || true
4872
5006
  fi
4873
5007
  fi
4874
5008
  fi
@@ -4925,7 +5059,8 @@ cmd_loop() {
4925
5059
  notify) _notify "${1:-roll}" "${2:-}" ;;
4926
5060
  enforce-tcr) _loop_enforce_tcr "${1:-}" "${2:-}" ;;
4927
5061
  precheck-ci) _loop_precheck_ci ;;
4928
- *) err "Usage: roll loop <on|off|now|test|status|monitor|runs|events|attach|mute|unmute|pause|resume|reset|notify|enforce-tcr|precheck-ci>"; exit 1 ;;
5062
+ branches) _loop_branches "$(pwd -P)" ;;
5063
+ *) err "Usage: roll loop <on|off|now|test|status|monitor|runs|events|attach|mute|unmute|pause|resume|reset|notify|enforce-tcr|precheck-ci|branches>"; exit 1 ;;
4929
5064
  esac
4930
5065
  }
4931
5066
 
@@ -4945,12 +5080,25 @@ _loop_on() {
4945
5080
  if [[ "$(uname)" == "Darwin" ]]; then
4946
5081
  _install_launchd_plists "$project_path" >/dev/null
4947
5082
 
5083
+ # FIX-098: use launchctl bootstrap/enable instead of load -w.
5084
+ # `load -w` writes to the disabled-overrides DB which causes FIX-027's
5085
+ # re-source to break after `roll update`. bootstrap is idem-potent and
5086
+ # does not disturb the overrides DB.
5087
+ local uid; uid=$(id -u)
4948
5088
  local all_loaded=true
4949
5089
  for svc in loop dream brief; do
4950
5090
  local label; label=$(_launchd_label "$svc" "$project_path")
5091
+ local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
4951
5092
  if ! _launchd_is_loaded "$label"; then
4952
5093
  all_loaded=false
4953
- launchctl load -w "$(_launchd_plist_path "$svc" "$project_path")" 2>/dev/null || true
5094
+ # FIX-097 guard: skip real launchctl when _LAUNCHD_DIR was auto-sandboxed.
5095
+ _launchd_should_skip_registry && continue
5096
+ # FIX-098 semantic: enable+bootstrap pair (better than load -w).
5097
+ # enable clears any disable-override; bootstrap registers with launchd.
5098
+ # FIX-101 wrapper additionally tripwire-gates each call so a sandboxed
5099
+ # _LAUNCHD_DIR can't accidentally touch host launchd state.
5100
+ _launchctl_safe enable "gui/${uid}/${label}" 2>/dev/null || true
5101
+ _launchctl_safe bootstrap "gui/${uid}" "$plist" 2>/dev/null || true
4954
5102
  fi
4955
5103
  done
4956
5104
 
@@ -5002,11 +5150,15 @@ _loop_off() {
5002
5150
 
5003
5151
  if [[ "$(uname)" == "Darwin" ]]; then
5004
5152
  local any_loaded=false
5153
+ local _skip_off; _launchd_should_skip_registry && _skip_off=1 || _skip_off=0
5005
5154
  for svc in loop dream brief; do
5006
5155
  local label; label=$(_launchd_label "$svc" "$project_path")
5007
5156
  if _launchd_is_loaded "$label"; then
5008
5157
  any_loaded=true
5009
- launchctl unload -w "$(_launchd_plist_path "$svc" "$project_path")" 2>/dev/null || true
5158
+ # FIX-097: skip real launchctl in sandbox to avoid touching the user's
5159
+ # real launchd registry.
5160
+ [[ "$_skip_off" == "1" ]] && continue
5161
+ _launchctl_safe unload -w "$(_launchd_plist_path "$svc" "$project_path")" 2>/dev/null || true
5010
5162
  fi
5011
5163
  done
5012
5164
  if ! $any_loaded; then
@@ -5025,7 +5177,9 @@ _loop_off() {
5025
5177
  # disable list, polluting `launchctl print-disabled` forever even after
5026
5178
  # the project dir, plists, and ~/.roll are gone.
5027
5179
  local label; label=$(_launchd_label "$svc" "$project_path")
5028
- launchctl enable "gui/${uid}/${label}" 2>/dev/null || true
5180
+ # FIX-097: same gate never touch host launchctl from a sandbox.
5181
+ [[ "$_skip_off" == "1" ]] && continue
5182
+ _launchctl_safe enable "gui/${uid}/${label}" 2>/dev/null || true
5029
5183
  done
5030
5184
  ok "Loop disabled 已停用"
5031
5185
  return 0
@@ -5169,7 +5323,7 @@ _legacy_loop_status() {
5169
5323
  else
5170
5324
  case "$state" in
5171
5325
  enabled) echo -e " ${GREEN}${svc} ● enabled${NC}" ;;
5172
- installed-off) echo -e " ${YELLOW}${svc} ⚠ installed/off${NC} run: roll loop on" ;;
5326
+ stale|installed-off) echo -e " ${YELLOW}${svc} ⚠ STALE — plist present but not loaded${NC} run: roll loop on" ;;
5173
5327
  not-installed) echo -e " ${RED}${svc} ○ not installed${NC} run: roll setup" ;;
5174
5328
  esac
5175
5329
  fi
@@ -5205,7 +5359,10 @@ _loop_pause() {
5205
5359
  if ! _launchd_is_loaded "$label"; then
5206
5360
  warn "Loop not enabled — nothing to pause loop 未启用,无需暂停"; return 0
5207
5361
  fi
5208
- launchctl unload -w "$(_launchd_plist_path "loop" "$project_path")" 2>/dev/null || true
5362
+ # FIX-097: never touch host launchctl from a sandboxed plist path.
5363
+ if ! _launchd_should_skip_registry; then
5364
+ _launchctl_safe unload -w "$(_launchd_plist_path "loop" "$project_path")" 2>/dev/null || true
5365
+ fi
5209
5366
  else
5210
5367
  local slug; slug=$(_project_slug "$project_path")
5211
5368
  mkdir -p "${_SHARED_ROOT}/loop"
@@ -5225,8 +5382,9 @@ _loop_resume() {
5225
5382
  if [[ "$(uname)" == "Darwin" ]]; then
5226
5383
  local label; label=$(_launchd_label "loop" "$project_path")
5227
5384
  local plist; plist=$(_launchd_plist_path "loop" "$project_path")
5228
- if [[ -f "$plist" ]]; then
5229
- launchctl load -w "$plist" 2>/dev/null || true
5385
+ if [[ -f "$plist" ]] && ! _launchd_should_skip_registry; then
5386
+ # FIX-097: never touch host launchctl from a sandboxed plist path.
5387
+ _launchctl_safe load -w "$plist" 2>/dev/null || true
5230
5388
  fi
5231
5389
  else
5232
5390
  local slug; slug=$(_project_slug "$project_path")
@@ -5562,15 +5720,28 @@ _loop_precheck_ci() {
5562
5720
 
5563
5721
  local commit; commit=$(git rev-parse HEAD 2>/dev/null) || return 0
5564
5722
 
5723
+ # FIX-103: fetch both `status` and `conclusion`. Pre-run gate must distinguish
5724
+ # a still-running CI (status=in_progress/queued/waiting, conclusion=null) from
5725
+ # a genuinely red CI (conclusion=failure/cancelled/timed_out/...). Treating
5726
+ # in_progress as red kills every cycle started within the first ~30s of a
5727
+ # merge-triggered CI run.
5565
5728
  local runs
5566
- runs=$(gh -R "$slug" run list --commit "$commit" --json conclusion 2>/dev/null) || return 0
5729
+ runs=$(gh -R "$slug" run list --commit "$commit" --json conclusion,status 2>/dev/null) || return 0
5567
5730
  [[ -z "$runs" || "$runs" == "[]" ]] && return 0
5568
5731
 
5569
- local failed
5570
- failed=$(echo "$runs" | jq -r '[.[] | select(.conclusion != null and .conclusion != "success" and .conclusion != "skipped")] | length' 2>/dev/null || echo "0")
5732
+ # Conclusions that block the loop. Anything else (success, skipped, neutral,
5733
+ # or null while still running) is treated as pass/pending.
5734
+ local failed_conclusions
5735
+ failed_conclusions=$(echo "$runs" \
5736
+ | jq -r '[.[] | select(.conclusion=="failure" or .conclusion=="cancelled" or .conclusion=="timed_out" or .conclusion=="action_required" or .conclusion=="startup_failure") | .conclusion] | unique | join(",")' \
5737
+ 2>/dev/null || echo "")
5571
5738
 
5572
- if [[ "$failed" -gt 0 ]]; then
5739
+ if [[ -n "$failed_conclusions" ]]; then
5573
5740
  local short; short=$(git rev-parse --short HEAD 2>/dev/null || echo unknown)
5741
+ local run_states
5742
+ run_states=$(echo "$runs" \
5743
+ | jq -r '[.[] | "\(.status // "?")/\(.conclusion // "null")"] | unique | join(", ")' \
5744
+ 2>/dev/null || echo "?")
5574
5745
  err "Pre-run CI check: HEAD CI is red — refuse to build on broken base (${short}) HEAD CI 红,拒绝在破损的基础上构建"
5575
5746
  mkdir -p "$(dirname "$_LOOP_ALERT")"
5576
5747
  cat > "$_LOOP_ALERT" << EOF
@@ -5579,6 +5750,8 @@ _loop_precheck_ci() {
5579
5750
  **Time**: $(date '+%Y-%m-%d %H:%M')
5580
5751
  **Commit**: ${short}
5581
5752
  **Reason**: HEAD CI is red — loop refused to build on a broken base HEAD CI 红,loop 拒绝在破损的基础上构建
5753
+ **Failing conclusions**: ${failed_conclusions}
5754
+ **Run states**: ${run_states}
5582
5755
 
5583
5756
  **Action required**:
5584
5757
  - Investigate and fix CI: \`gh -R ${slug} run list --commit ${commit}\`
@@ -6082,10 +6255,24 @@ _loop_mark_in_progress() {
6082
6255
  [ -n "$story_id" ] || return 1
6083
6256
  [ -f "$backlog" ] || return 0
6084
6257
  local tmp; tmp=$(mktemp "${backlog}.XXXXXX") || return 1
6258
+ # FIX-106: match the story-id column (col 2) for equality instead of doing
6259
+ # substring match on the whole row. Pre-fix, picking US-X-001 also flipped
6260
+ # any row whose description contained "depends-on:US-X-001" — leaving the
6261
+ # dashboard claiming work on stories no one had picked.
6085
6262
  awk -v sid="$story_id" '
6086
6263
  {
6087
- if (index($0, sid) > 0 && index($0, "📋 Todo") > 0) {
6088
- sub(/📋 Todo/, "🔨 In Progress")
6264
+ if (index($0, "📋 Todo") > 0) {
6265
+ n = split($0, cols, "|")
6266
+ if (n >= 2) {
6267
+ id_cell = cols[2]
6268
+ gsub(/[[:space:]]/, "", id_cell)
6269
+ # Markdown link form "[ID](path)" → keep just "ID"
6270
+ sub(/^\[/, "", id_cell)
6271
+ sub(/\].*$/, "", id_cell)
6272
+ if (id_cell == sid) {
6273
+ sub(/📋 Todo/, "🔨 In Progress")
6274
+ }
6275
+ }
6089
6276
  }
6090
6277
  print
6091
6278
  }
@@ -6101,10 +6288,20 @@ _loop_mark_todo() {
6101
6288
  [ -n "$story_id" ] || return 1
6102
6289
  [ -f "$backlog" ] || return 0
6103
6290
  local tmp; tmp=$(mktemp "${backlog}.XXXXXX") || return 1
6291
+ # FIX-106: same column-2 equality match as _loop_mark_in_progress.
6104
6292
  awk -v sid="$story_id" '
6105
6293
  {
6106
- if (index($0, sid) > 0 && index($0, "🔨 In Progress") > 0) {
6107
- sub(/🔨 In Progress/, "📋 Todo")
6294
+ if (index($0, "🔨 In Progress") > 0) {
6295
+ n = split($0, cols, "|")
6296
+ if (n >= 2) {
6297
+ id_cell = cols[2]
6298
+ gsub(/[[:space:]]/, "", id_cell)
6299
+ sub(/^\[/, "", id_cell)
6300
+ sub(/\].*$/, "", id_cell)
6301
+ if (id_cell == sid) {
6302
+ sub(/🔨 In Progress/, "📋 Todo")
6303
+ }
6304
+ }
6108
6305
  }
6109
6306
  print
6110
6307
  }
@@ -6512,14 +6709,29 @@ _claude_cleanup_stale_worktrees() {
6512
6709
  return 0
6513
6710
  }
6514
6711
 
6712
+ # FIX-104: scan multiple ephemeral prefixes (loop/cycle-, worktree-agent-,
6713
+ # claude/) and delete any already merged to origin/main. Unmerged branches
6714
+ # are preserved — they may be active WIP. Caller can pass a custom prefix
6715
+ # list via $2 (newline-separated `refs/heads/<prefix>*` patterns) but the
6716
+ # default whitelist covers every temp prefix the loop / Claude session /
6717
+ # worktree-agent paths create.
6515
6718
  _loop_cleanup_stale_cycle_branches() {
6516
6719
  local project_path="${1:-.}"
6517
6720
  local url; url=$(git -C "$project_path" remote get-url origin 2>/dev/null) || return 0
6518
6721
  [[ "$url" == *github.com* ]] || return 0
6519
6722
 
6520
- local branches
6521
- branches=$(git -C "$project_path" ls-remote --heads origin 'refs/heads/loop/cycle-*' 2>/dev/null \
6522
- | awk '{print $2}' | sed 's|^refs/heads/||')
6723
+ local prefixes="${2:-refs/heads/loop/cycle-*
6724
+ refs/heads/worktree-agent-*
6725
+ refs/heads/claude/*}"
6726
+
6727
+ local branches=""
6728
+ while IFS= read -r pat; do
6729
+ [ -z "$pat" ] && continue
6730
+ local found
6731
+ found=$(git -C "$project_path" ls-remote --heads origin "$pat" 2>/dev/null \
6732
+ | awk '{print $2}' | sed 's|^refs/heads/||')
6733
+ [ -n "$found" ] && branches+="${found}"$'\n'
6734
+ done <<< "$prefixes"
6523
6735
  [ -z "$branches" ] && return 0
6524
6736
 
6525
6737
  while IFS= read -r branch; do
@@ -6534,6 +6746,41 @@ _loop_cleanup_stale_cycle_branches() {
6534
6746
  return 0
6535
6747
  }
6536
6748
 
6749
+ # FIX-104: residual-visibility command. List origin's ephemeral temp branches
6750
+ # (loop/cycle-*, worktree-agent-*, claude/*) with their merge status so the
6751
+ # user can see what GC will clean up next cycle and what's still active WIP.
6752
+ # Output: TAB-separated `<branch>\t<merged|open>` lines, one per branch.
6753
+ # Silent on non-GitHub remote / empty / unreachable.
6754
+ _loop_branches() {
6755
+ local project_path="${1:-.}"
6756
+ local url; url=$(git -C "$project_path" remote get-url origin 2>/dev/null) || return 0
6757
+ [[ "$url" == *github.com* ]] || return 0
6758
+
6759
+ local prefixes="refs/heads/loop/cycle-*
6760
+ refs/heads/worktree-agent-*
6761
+ refs/heads/claude/*"
6762
+
6763
+ local branches=""
6764
+ while IFS= read -r pat; do
6765
+ [ -z "$pat" ] && continue
6766
+ local found
6767
+ found=$(git -C "$project_path" ls-remote --heads origin "$pat" 2>/dev/null \
6768
+ | awk '{print $2}' | sed 's|^refs/heads/||')
6769
+ [ -n "$found" ] && branches+="${found}"$'\n'
6770
+ done <<< "$prefixes"
6771
+ [ -z "$branches" ] && return 0
6772
+
6773
+ while IFS= read -r branch; do
6774
+ [ -z "$branch" ] && continue
6775
+ local status="open"
6776
+ if git -C "$project_path" merge-base --is-ancestor "$branch" origin/main 2>/dev/null; then
6777
+ status="merged"
6778
+ fi
6779
+ printf "%s\t%s\n" "$branch" "$status"
6780
+ done <<< "$branches"
6781
+ return 0
6782
+ }
6783
+
6537
6784
  # US-AUTO-033: publish a loop cycle branch as a GitHub PR with auto-merge.
6538
6785
  #
6539
6786
  # _loop_publish_pr <branch> [title]
@@ -7114,7 +7361,19 @@ cmd_ci() {
7114
7361
  # will switch to hard-fail. Output format mirrors a linter ("file:line:
7115
7362
  # message") so editors can navigate from it.
7116
7363
  _backlog_lint() {
7117
- local backlog="${1:-.roll/backlog.md}"
7364
+ # FIX-102: --gate flag flips Phase 1 warn-only behavior to hard-fail.
7365
+ # When passed, any violation makes the command exit 1 — used by the
7366
+ # PreToolUse / Stop hook in ~/.claude/settings.json to actually block
7367
+ # the assistant from leaving the backlog dirty.
7368
+ local gate=0
7369
+ local backlog=".roll/backlog.md"
7370
+ while [ $# -gt 0 ]; do
7371
+ case "$1" in
7372
+ --gate) gate=1 ;;
7373
+ *) backlog="$1" ;;
7374
+ esac
7375
+ shift
7376
+ done
7118
7377
  [ -f "$backlog" ] || { err "backlog not found: $backlog"; return 1; }
7119
7378
 
7120
7379
  local violations=0
@@ -7139,6 +7398,18 @@ _backlog_lint() {
7139
7398
  | sed -E 's|^\[[A-Z]+-[0-9]+\]\([^)]*\)[[:space:]]*||' \
7140
7399
  | sed -E 's|^[A-Z]+-[0-9]+[[:space:]]*||')
7141
7400
  local issues=""
7401
+ # FIX-102: length check — backlog rows are an index page; descriptions
7402
+ # must be one human sentence (≤120 chars). Longer = technical detail
7403
+ # that belongs in the linked .roll/features/<epic>/<slug>.md.
7404
+ if [ "${#body}" -gt 120 ]; then
7405
+ issues="${issues:+${issues}, }length>${#body}"
7406
+ fi
7407
+ # FIX-102: code-fence check — backticks (`code`) signal technical jargon
7408
+ # (commands, identifiers, paths). Keep description prose plain text;
7409
+ # any code goes in the feature file.
7410
+ if echo "$body" | grep -qF '`'; then
7411
+ issues="${issues:+${issues}, }code-fence"
7412
+ fi
7142
7413
  # Filenames: bare `something.ext` for common code/config extensions
7143
7414
  if echo "$body" | grep -qE '\b[A-Za-z_][A-Za-z0-9_.-]*\.(sh|bash|yaml|yml|json|js|ts|tsx|py|rb|go|rs|c|cpp|h)\b'; then
7144
7415
  issues="${issues:+${issues}, }filename"
@@ -7165,11 +7436,14 @@ _backlog_lint() {
7165
7436
  echo ""
7166
7437
  if [ "$violations" -gt 0 ]; then
7167
7438
  echo " ${violations} violation(s) — see conventions/global/AGENTS.md §4"
7439
+ if [ "$gate" = 1 ]; then
7440
+ echo " ${violations} 条违规 — --gate enabled, exiting 1"
7441
+ return 1
7442
+ fi
7168
7443
  echo " ${violations} 条违规 — Phase 1: warn-only, not blocking"
7169
7444
  else
7170
7445
  echo " No violations 无违规"
7171
7446
  fi
7172
- # Phase 1: warn-only. Exit 0 regardless.
7173
7447
  return 0
7174
7448
  }
7175
7449
 
@@ -7185,7 +7459,8 @@ cmd_backlog() {
7185
7459
  # ── Status management subcommands ─────────────────────────────────────────
7186
7460
  case "$subcmd" in
7187
7461
  lint)
7188
- _backlog_lint "$backlog"
7462
+ shift
7463
+ _backlog_lint "$@" "$backlog"
7189
7464
  return
7190
7465
  ;;
7191
7466
  block|defer|unblock|promote)
package/lib/loop-fmt.py CHANGED
@@ -353,14 +353,28 @@ class LoopFmt:
353
353
  # Use the cumulative totals accumulated across all assistant turns;
354
354
  # result.usage is per-turn (last only) so it would under-count badly.
355
355
  model = result_ev.get("model") or self._last_model or ""
356
+
357
+ # FIX-099: skip writing the usage event when claude returned no real
358
+ # usage data (model empty AND cost/duration both zero). This prevents
359
+ # stale/placeholder values from leaking into the events stream and
360
+ # showing up as "cost=$1.24 dur=372s" in three consecutive cycles when
361
+ # the real cycle had no token data (the default-value fallback).
362
+ # The dashboard can render "n/a" for missing usage rather than false data.
363
+ has_model = bool(model)
364
+ has_tokens = any(self._usage_totals[k] > 0 for k in self._usage_totals)
365
+ has_cost = bool(cost_usd)
366
+ has_dur = bool(dur_ms)
367
+ if not has_model and not has_tokens and not has_cost and not has_dur:
368
+ return # nothing real to report — skip rather than persist zeros
369
+
356
370
  payload = {
357
- "model": model,
371
+ "model": model if has_model else None,
358
372
  "input_tokens": self._usage_totals["input_tokens"],
359
373
  "output_tokens": self._usage_totals["output_tokens"],
360
374
  "cache_creation_tokens": self._usage_totals["cache_creation_tokens"],
361
375
  "cache_read_tokens": self._usage_totals["cache_read_tokens"],
362
- "cost_reported_usd": float(cost_usd or 0),
363
- "duration_ms": int(dur_ms or 0),
376
+ "cost_reported_usd": float(cost_usd) if has_cost else None,
377
+ "duration_ms": int(dur_ms) if has_dur else None,
364
378
  }
365
379
  evfile = os.path.join(shared, "loop", f"events-{slug}.ndjson")
366
380
  line = json.dumps({
@@ -356,8 +356,10 @@ def backfill_usage_from_claude_sessions(cycles: List[Dict[str, Any]], slug: str)
356
356
  # Path 1: usage event written by loop-fmt at result time.
357
357
  ue = cy.get("usage_event")
358
358
  if isinstance(ue, dict) and (ue.get("input_tokens") or ue.get("output_tokens")):
359
- cy["input_tokens"] = int(ue.get("input_tokens") or 0)
360
- cy["output_tokens"] = int(ue.get("output_tokens") or 0)
359
+ cy["input_tokens"] = int(ue.get("input_tokens") or 0)
360
+ cy["output_tokens"] = int(ue.get("output_tokens") or 0)
361
+ cy["cache_creation_tokens"] = int(ue.get("cache_creation_tokens") or 0)
362
+ cy["cache_read_tokens"] = int(ue.get("cache_read_tokens") or 0)
361
363
  cy["model"] = ue.get("model")
362
364
  # US-VIEW-010: aggregate now sums per-turn usage tokens, so the
363
365
  # totals in `ue` reflect the whole cycle. Always compute cost at
@@ -380,8 +382,10 @@ def backfill_usage_from_claude_sessions(cycles: List[Dict[str, Any]], slug: str)
380
382
  u = load_claude_session_usage(cy.get("label", ""), slug)
381
383
  if not u:
382
384
  continue
383
- cy["input_tokens"] = int(u.get("input_tokens") or 0)
384
- cy["output_tokens"] = int(u.get("output_tokens") or 0)
385
+ cy["input_tokens"] = int(u.get("input_tokens") or 0)
386
+ cy["output_tokens"] = int(u.get("output_tokens") or 0)
387
+ cy["cache_creation_tokens"] = int(u.get("cache_creation_tokens") or 0)
388
+ cy["cache_read_tokens"] = int(u.get("cache_read_tokens") or 0)
385
389
  cy["model"] = u["model"]
386
390
  cy["cost_list"] = mp.compute_list_cost(
387
391
  u["model"],
@@ -557,7 +561,8 @@ def rollup_for_day(day_cycles: List[Dict[str, Any]]) -> Dict[str, Any]:
557
561
  # reads all 4 fields), but they don't represent the model's actual work.
558
562
  r = {"cycles": len(day_cycles), "prs": 0, "failed": 0,
559
563
  "duration_s": 0, "cost": 0.0,
560
- "input_tokens": 0, "output_tokens": 0}
564
+ "input_tokens": 0, "output_tokens": 0,
565
+ "cache_creation_tokens": 0, "cache_read_tokens": 0}
561
566
  for cy in day_cycles:
562
567
  if cy.get("outcome") == "fail":
563
568
  r["failed"] += 1
@@ -567,6 +572,10 @@ def rollup_for_day(day_cycles: List[Dict[str, Any]]) -> Dict[str, Any]:
567
572
  r["input_tokens"] += cy["input_tokens"]
568
573
  if cy.get("output_tokens"):
569
574
  r["output_tokens"] += cy["output_tokens"]
575
+ if cy.get("cache_creation_tokens"):
576
+ r["cache_creation_tokens"] += cy["cache_creation_tokens"]
577
+ if cy.get("cache_read_tokens"):
578
+ r["cache_read_tokens"] += cy["cache_read_tokens"]
570
579
  # US-VIEW-011: rollup only counts cycles whose PR actually merged.
571
580
  # Backward compat: rows where pr_outcome is missing but pr URL exists
572
581
  # (no `pr` event after the writer upgrade ran for that cycle) are
@@ -634,10 +643,13 @@ def render(events, cron, state, backlog, *, days=3, lang="both", now=None,
634
643
  c("dim", "run ") + c("fg", "roll loop on", bold=True) +
635
644
  c("dim", " to enable"))
636
645
  eb_zh = c("dim", " 未安装 · 运行 ") + c("fg", "roll loop on") + c("dim", " 启用")
637
- elif install_state == "disabled":
638
- eb_l = (c("amber", "◌ installed/off", bold=True) + c("muted", " ") +
639
- c("dim", "loop disabled run ") + c("fg", "roll loop on", bold=True))
640
- eb_zh = c("dim", " 未启用 · 运行 ") + c("fg", "roll loop on") + c("dim", " 启用")
646
+ elif install_state in ("stale", "disabled"):
647
+ # FIX-098: 'stale' = plist on disk but agent not registered in launchd.
648
+ # 'disabled' kept for back-compat (old install_state values). Both mean
649
+ # the user needs to run 'roll loop on' to bootstrap the agent.
650
+ eb_l = (c("amber", "◌ STALE — plist present, not loaded", bold=True) + c("muted", " ") +
651
+ c("dim", "run ") + c("fg", "roll loop on", bold=True) + c("dim", " to repair"))
652
+ eb_zh = c("dim", " Plist 存在但未加载 · 运行 ") + c("fg", "roll loop on") + c("dim", " 修复")
641
653
  else:
642
654
  eb_l = (c("blue", "● IDLE", bold=True) + c("muted", " · ") +
643
655
  c("dim", "enabled · next run ") + c("fg", _next_cron_hint(state), bold=True))
@@ -723,11 +735,12 @@ def render(events, cron, state, backlog, *, days=3, lang="both", now=None,
723
735
  yest_color="amber" if yest["failed"] > 0 else "dim",
724
736
  yest_suffix="⚠" if yest["failed"] > 0 else "")
725
737
  metric_dur("duration", today["duration_s"], yest["duration_s"], d2["duration_s"], partial=is_partial)
726
- # US-VIEW-012: input + output as two separate rows. cache_read no longer
727
- # surfaces here true cost is on the "cost" line below (computed from all
728
- # 4 token kinds via list price). This row labels what the model actually
729
- # processed and generated for this cycle.
738
+ # US-VIEW-017: show all 4 token components so the cost is explainable.
739
+ # cache_creation (↑) and cache_read (↓) typically account for 80-90% of
740
+ # cost hiding them makes the cost line incomprehensible.
730
741
  metric_tokens("input tokens", today["input_tokens"], yest["input_tokens"], d2["input_tokens"], partial=is_partial)
742
+ metric_tokens("cache writes", today["cache_creation_tokens"], yest["cache_creation_tokens"], d2["cache_creation_tokens"], partial=is_partial)
743
+ metric_tokens("cache reads", today["cache_read_tokens"], yest["cache_read_tokens"], d2["cache_read_tokens"], partial=is_partial)
731
744
  metric_tokens("output tokens", today["output_tokens"], yest["output_tokens"], d2["output_tokens"], partial=is_partial)
732
745
  metric_dollar("cost", today["cost"], yest["cost"], d2["cost"], partial=is_partial)
733
746
 
@@ -784,15 +797,18 @@ def _read_plist_loop_minute() -> int:
784
797
 
785
798
 
786
799
  def _detect_install_state() -> str:
787
- """FIX-095: classify the launchd install state of the loop service.
800
+ """FIX-095 / FIX-098: classify the launchd install state of the loop service.
788
801
 
789
802
  Returns one of:
790
803
  'not-installed' — no plist for com.roll.loop.<slug> in ~/Library/LaunchAgents/
791
- 'disabled' — plist exists but launchctl print-disabled shows '=> disabled'
792
- 'enabled' — plist exists and no disable override is set
793
-
794
- Pre-FIX-095, the v2 view rendered '● IDLE' for all three states, leaving
795
- users unable to tell whether the loop was actually installed/enabled.
804
+ 'stale' — plist on disk but agent NOT registered in launchd
805
+ (happens after roll loop off + roll update without roll loop on)
806
+ 'enabled' — plist on disk AND registered in launchd
807
+
808
+ FIX-098: switched from `launchctl print-disabled` (disabled-overrides DB) to
809
+ `launchctl print gui/<uid>/<label>` which probes the actual launchd registry.
810
+ The old approach returned false-positive 'enabled' when the disabled-overrides
811
+ DB had no entry for the label (empty = not explicitly disabled, not loaded).
796
812
  """
797
813
  slug = project_slug()
798
814
  label = f"com.roll.loop.{slug}"
@@ -801,17 +817,17 @@ def _detect_install_state() -> str:
801
817
  return "not-installed"
802
818
  try:
803
819
  uid = os.getuid()
804
- out = subprocess.run(
805
- ["launchctl", "print-disabled", f"gui/{uid}"],
806
- capture_output=True, text=True, timeout=2,
807
- ).stdout or ""
808
- for line in out.splitlines():
809
- if f'"{label}"' in line and "=> disabled" in line:
810
- return "disabled"
820
+ result = subprocess.run(
821
+ ["launchctl", "print", f"gui/{uid}/{label}"],
822
+ capture_output=True, timeout=2,
823
+ )
824
+ if result.returncode == 0:
825
+ return "enabled"
826
+ return "stale"
811
827
  except Exception:
812
- # launchctl missing or timed out — best-effort fall through to enabled.
813
- pass
814
- return "enabled"
828
+ # launchctl missing or timed out — assume stale (safe: user sees STALE
829
+ # banner and is told to run 'roll loop on' to repair).
830
+ return "stale"
815
831
 
816
832
 
817
833
  def _next_cron_hint(state: Dict[str, str], zh: bool = False) -> str:
@@ -298,12 +298,19 @@ def cycle_row(cy: Dict[str, Any], backlog: Dict[str, str]) -> None:
298
298
  from datetime import datetime as _dt, timezone as _tz
299
299
  dur_s = int((_dt.now(_tz.utc) - cy["start"]).total_seconds())
300
300
  dur = fmt_dur(dur_s) if dur_s else "—"
301
- # US-VIEW-012: token column shows model's real work as input/output. Cache
302
- # creation / cache read are kept in events.ndjson for cost math but never
303
- # surface in the UI they would inflate the visible number to 10–100× the
304
- # "real" work done by the model on this cycle. fmt_tokens(0) already
305
- # returns "—", so a cycle missing usage_event prints as "—/—".
306
- tok = f"{fmt_tokens(cy.get('input_tokens') or 0)}/{fmt_tokens(cy.get('output_tokens') or 0)}"
301
+ # US-VIEW-017: show all 4 token components when cache data is available.
302
+ # Format: "in/cw↑ cr↓/out" (cache writes ↑, cache reads ↓).
303
+ # Falls back to "in/out" for cycles that predate cache tracking.
304
+ inp = cy.get('input_tokens') or 0
305
+ out_tok = cy.get('output_tokens') or 0
306
+ cw = cy.get('cache_creation_tokens') or 0
307
+ cr = cy.get('cache_read_tokens') or 0
308
+ if cw or cr:
309
+ tok = (f"{fmt_tokens(inp)}"
310
+ f"/{fmt_tokens(cw)}↑ {fmt_tokens(cr)}↓"
311
+ f"/{fmt_tokens(out_tok)}")
312
+ else:
313
+ tok = f"{fmt_tokens(inp)}/{fmt_tokens(out_tok)}"
307
314
  # cost prefers the backfilled list-price; falls back to cron.log when
308
315
  # the claude session log isn't available (only the latest cycle).
309
316
  if cy.get("cost_list") is not None:
@@ -347,7 +354,7 @@ def cycle_row(cy: Dict[str, Any], backlog: Dict[str, str]) -> None:
347
354
  " " + c(glyph_c, glyph, bold=True) + " " +
348
355
  c(time_c, pad(time_str, 5), bold=(outcome == "fail")) + " " +
349
356
  c("muted", pad(dur, 4, "r")) + " " +
350
- c("muted", pad(tok, 11, "r")) + " " +
357
+ c("muted", pad(tok, 26)) + " " +
351
358
  model_seg +
352
359
  c("muted", pad(cost, 7, "r")) + " " +
353
360
  c(sid_c, ids_str, bold=True) + pr_marker
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@seanyao/roll",
3
- "version": "2026.522.2",
3
+ "version": "2026.523.1",
4
4
  "description": "Roll — Roll out features with AI agents",
5
5
  "scripts": {
6
6
  "test": "bash tests/run.sh"
@@ -224,6 +224,65 @@ Add after `## 文档覆盖度` section:
224
224
  {发现内容列表 或 "文档新鲜度良好,无滞后或缺失项。"}
225
225
  ```
226
226
 
227
+ ### Scan 7 — Test Quality (rubric-driven)
228
+
229
+ Apply the test-quality rubric at [guide/en/testing/quality-rubric.md](../../guide/en/testing/quality-rubric.md)
230
+ (Chinese: [quality-rubric.zh.md](../../guide/zh/testing/quality-rubric.md)) against every file under
231
+ `tests/`. The rubric publishes six anti-pattern categories (❶..❻); each has a
232
+ **Signals** subsection that lists the matching heuristics. Scan 7 is purely a
233
+ mechanical apply-the-rubric step — no new logic.
234
+
235
+ **Per-category signals** — read from the rubric, summarized here:
236
+
237
+ | Marker | Anti-pattern | Cheapest signal |
238
+ |--------|--------------|-----------------|
239
+ | ❶ | Hardcoded business data | Bare numeric / version / pricing literal inside `[[ "$output" == *"..."*` that matches a value also defined in `lib/` |
240
+ | ❷ | Over-mocking real boundaries | `function git() {` / `function gh() {` overrides at the top of a unit test |
241
+ | ❸ | Asserting implementation details | `grep '_internal_helper'` against output; assertions on `.roll/internal/*` paths |
242
+ | ❹ | Fixture order coupling | `setup_file` writes shared mutable state without per-test reset |
243
+ | ❺ | Testing private functions | Test sources a `lib/` file and calls a `_underscore_prefixed` helper directly |
244
+ | ❻ | Asserting framework behavior | References to `$BATS_TEST_NUMBER`, `$BATS_SUITE_NAME` in assertions |
245
+
246
+ **Rate cap — 每轮 ≤ 5 条 test-quality REFACTOR entries**. Same dream cycle may
247
+ emit more than 5 findings; the dream scan must rank by severity (❶ > ❷ > ❸ > ❹ > ❺ > ❻
248
+ and within a class, by occurrence count) and only persist the top 5 to BACKLOG.
249
+ Remaining findings go into the dream log under `## 测试质量` but are not made
250
+ into REFACTOR rows — this prevents the backlog from being drowned in test-debt
251
+ on the first scan after rubric publication.
252
+
253
+ **REFACTOR entry format** — same as other scans, but tagged with category:
254
+
255
+ ```markdown
256
+ | REFACTOR-XXX | docs: <one-line description> [test-quality:❶] — flagged by dream YYYY-MM-DD | 📋 Todo |
257
+ ```
258
+
259
+ The `[test-quality:❶]` (through `❻`) tag is **required** so downstream filtering
260
+ (e.g. "show me all ❶ items still open") is mechanical. The marker character must
261
+ match the rubric exactly.
262
+
263
+ **Optional helper** — `bin/dream-test-quality-scan` is a thin shell script
264
+ maintainers can invoke ad-hoc to dry-run the ❶ detector against a single file
265
+ or directory (see `bin/dream-test-quality-scan --help`). The dream skill itself
266
+ does **not** depend on the helper — Scan 7 is the AI agent applying the rubric.
267
+ The helper just exists so a maintainer (or this skill's smoke test) can confirm
268
+ the ❶ heuristic still finds known instances.
269
+
270
+ #### Dream Log Section (Scan 7)
271
+
272
+ Add after `## 文档新鲜度` section:
273
+
274
+ ```markdown
275
+ ## 测试质量
276
+ - 本轮发现 {N} 项(写入 BACKLOG 的前 5 项见下;剩余 {M} 项仅记录于本日志)
277
+ - ❶ 硬编码业务数据:{count}
278
+ - ❷ 过度 mock:{count}
279
+ - ❸ 断言实现细节:{count}
280
+ - ❹ Fixture 顺序耦合:{count}
281
+ - ❺ 测私有函数:{count}
282
+ - ❻ 断言框架行为:{count}
283
+ {命中文件列表 或 "未发现可治理的测试反模式。"}
284
+ ```
285
+
227
286
  ## Output
228
287
 
229
288
  ### REFACTOR Entry (.roll/backlog.md)
@@ -118,9 +118,10 @@ Document structure (two-layer separation):
118
118
  **Important rules:**
119
119
  1. Plan files go in `.roll/features/<feature>-plan.md` (**no longer using** `docs/plans/`)
120
120
  2. US details go in the corresponding `.roll/features/<feature>.md`
121
- 3. .roll/backlog.md only contains index rows (one row per US), **do not write** AC / Files / Notes
122
- 4. Domain model files go in `.roll/domain/` create on first greenfield design, update incrementally
123
- 5. **Do not** write to `~/.kimi/` or any global config directory
121
+ 3. **FIX / IDEA detail files use ID-prefixed filenames**: `.roll/features/<epic>/FIX-097.md`, not `.roll/features/<epic>/some-descriptive-slug.md`. Reason: a single FIX is one card, not a long-lived feature; the ID is the most stable handle, descriptive slugs date quickly and break links. US can keep feature-slug naming (US lives inside a multi-Story feature file). Quick lookup: `ls .roll/features/<epic>/FIX-*.md` finds all bugs in that area without grepping content.
122
+ 4. .roll/backlog.md only contains index rows (one row per US), **do not write** AC / Files / Notes
123
+ 5. Domain model files go in `.roll/domain/` create on first greenfield design, update incrementally
124
+ 6. **Do not** write to `~/.kimi/` or any global config directory
124
125
 
125
126
  **File path resolution order:**
126
127
  1. Determine Feature ownership (based on the requirement domain: compiler / ingest / qa / ...)
@@ -29,7 +29,7 @@ $roll-notes 今天的 code review 给了很好的反馈
29
29
 
30
30
  ## Behavior
31
31
 
32
- 1. **Determine file path**: `notes/YYYY-MM-DD.md` relative to project root
32
+ 1. **Determine file path**: `.roll/notes/YYYY-MM-DD.md` relative to project root (parallel to `.roll/dream/` and `.roll/briefs/` — notes is project metadata, not source)
33
33
  2. **Get current time**: Use `Asia/Shanghai` timezone (`TZ=Asia/Shanghai date`)
34
34
  3. **Read existing entries for style**: Before writing, read the last 2–3 entries
35
35
  in the same file. Analyze their style: heading format, voice/tone,
@@ -95,6 +95,9 @@ $roll-notes 今天的 code review 给了很好的反馈
95
95
  ## File location
96
96
 
97
97
  ```
98
- notes/
99
- └── YYYY-MM-DD.md
98
+ .roll/
99
+ └── notes/
100
+ └── YYYY-MM-DD.md
100
101
  ```
102
+
103
+ 注:notes 是项目元数据(与 `.roll/dream/` / `.roll/briefs/` 同级),不入 git;由 dream/brief 等下游 skill 跨日聚合。