@seanyao/roll 2.603.1 → 2.604.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/roll CHANGED
@@ -4,7 +4,7 @@ set -euo pipefail
4
4
  # Roll — AI Agent Convention Manager
5
5
  # Single source of truth for how all AI coding agents behave.
6
6
 
7
- VERSION="2.603.1"
7
+ VERSION="2.604.2"
8
8
  ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
9
9
  ROLL_CONFIG="${ROLL_HOME}/config.yaml"
10
10
  ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
@@ -1092,8 +1092,6 @@ editor: ${EDITOR:-vim}
1092
1092
  # loop_minute: 5 # omit to auto-derive from project hash
1093
1093
  loop_dream_hour: 3
1094
1094
  # loop_dream_minute: 10 # omit to auto-derive
1095
- loop_brief_hour: 9
1096
- # loop_brief_minute: 15 # omit to auto-derive
1097
1095
  primary_agent: claude
1098
1096
  YAML
1099
1097
  ok "$(msg shared.created_roll_config_yaml)"
@@ -4046,6 +4044,11 @@ _peer_call() {
4046
4044
  _watchdog_pid=$!
4047
4045
  wait "$_peer_pid" 2>/dev/null || _peer_exit=$?
4048
4046
  # Cancel watchdog if agent finished on time.
4047
+ # FIX-181: kill children (sleep) first so they cannot outlive the
4048
+ # watchdog and later hit a reused PID, then kill the watchdog itself.
4049
+ if command -v pkill >/dev/null 2>&1; then
4050
+ pkill -P "$_watchdog_pid" 2>/dev/null || true
4051
+ fi
4049
4052
  kill "$_watchdog_pid" 2>/dev/null || true
4050
4053
  wait "$_watchdog_pid" 2>/dev/null || true
4051
4054
  output="$(cat "$_out" 2>/dev/null || true)"
@@ -5658,7 +5661,7 @@ cmd_changelog() {
5658
5661
  esac
5659
5662
  done
5660
5663
  local raw
5661
- raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" "${pyargs[@]}") || return 1
5664
+ raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" ${pyargs[@]+"${pyargs[@]}"}) || return 1
5662
5665
  if [ "$is_json" = 1 ]; then printf '%s\n' "$raw"; return 0; fi
5663
5666
  local final="$raw"
5664
5667
  if [ "$want_ai" = 1 ]; then
@@ -5697,6 +5700,34 @@ EOF
5697
5700
  esac
5698
5701
  }
5699
5702
 
5703
+ # ─── roll consistency check — unified consistency orchestrator (US-CONSIST-001) ──
5704
+ cmd_consistency() {
5705
+ local subcmd="${1:-check}"
5706
+ shift || true
5707
+ case "$subcmd" in
5708
+ check)
5709
+ python3 "${ROLL_PKG_DIR}/lib/consistency_check.py" "$@"
5710
+ ;;
5711
+ --help|-h|help)
5712
+ cat <<EOF
5713
+ Usage: roll consistency <subcommand>
5714
+
5715
+ check [--json] [--project-dir DIR] 逐维度跑一致性检查
5716
+ Run checks across five dimensions (code, docs, i18n, tests, site)
5717
+ and produce a structured pass/gap report.
5718
+
5719
+ roll consistency check # human-readable report
5720
+ roll consistency check --json # machine-readable JSON
5721
+ EOF
5722
+ ;;
5723
+ *)
5724
+ err "$(msg consistency.unknown_sub "$subcmd")"
5725
+ err "Try: roll consistency check"
5726
+ return 1
5727
+ ;;
5728
+ esac
5729
+ }
5730
+
5700
5731
  # ─── roll config — unified read/list/set for loop schedule keys (US-LOOP-033) ──
5701
5732
  #
5702
5733
  # One interactive entry point so users don't have to remember whether a key
@@ -5719,8 +5750,6 @@ loop_schedule.period_minutes|project|nested:loop_schedule|1|1440|60
5719
5750
  loop_schedule.offset_minute|project|nested:loop_schedule|0|59|0
5720
5751
  loop_dream_hour|global|flat|0|23|3
5721
5752
  loop_dream_minute|global|flat|0|59|-
5722
- loop_brief_hour|global|flat|0|23|9
5723
- loop_brief_minute|global|flat|0|59|-
5724
5753
  EOF
5725
5754
  }
5726
5755
 
@@ -5859,10 +5888,10 @@ Usage: roll config <key> print current value + source
5859
5888
  roll config --list list all loop schedule keys
5860
5889
  roll config <key> <value> [--global|--project] set a value
5861
5890
  统一调度配置
5862
- Read / list / set the loop, dream and brief schedule keys without hand-editing
5891
+ Read / list / set the loop and dream schedule keys without hand-editing
5863
5892
  yaml. Default write scope is --project (.roll/local.yaml); --global writes
5864
5893
  ~/.roll/config.yaml.
5865
- 读 / 列 / 写 loop、dream、brief 调度 key,免去手工编辑 yaml。默认写 --project
5894
+ 读 / 列 / 写 loop、dream 调度 key,免去手工编辑 yaml。默认写 --project
5866
5895
  (.roll/local.yaml);--global 写 ~/.roll/config.yaml。
5867
5896
 
5868
5897
  Supported keys (range):
@@ -5872,14 +5901,11 @@ Supported keys (range):
5872
5901
  loop_schedule.offset_minute 0-59 minute offset within the period
5873
5902
  loop_dream_hour 0-23 dream daily fire hour
5874
5903
  loop_dream_minute 0-59 dream daily fire minute
5875
- loop_brief_hour 0-23 brief daily fire hour
5876
- loop_brief_minute 0-59 brief daily fire minute
5877
5904
 
5878
5905
  Compact facades (write multiple keys at once):
5879
5906
  roll config loop-window 9-18 loop_active_start + loop_active_end
5880
5907
  roll config loop-schedule 30/7 period_minutes + offset_minute
5881
5908
  roll config dream-time 03:20 loop_dream_hour + loop_dream_minute
5882
- roll config brief-time 09:15 loop_brief_hour + loop_brief_minute
5883
5909
 
5884
5910
  Examples:
5885
5911
  roll config loop_dream_hour
@@ -5979,11 +6005,12 @@ _config_loop_schedule() {
5979
6005
  return 0
5980
6006
  }
5981
6007
 
5982
- # US-LOOP-035: `roll config dream-time <HH:MM>` / `brief-time <HH:MM>` — compact
5983
- # facade writing loop_<svc>_hour + loop_<svc>_minute in one shot. With no value,
5984
- # prints the current effective time + source. HH ∈ [0,23], MM ∈ [0,59].
6008
+ # US-LOOP-035: `roll config dream-time <HH:MM>` compact facade writing
6009
+ # loop_<svc>_hour + loop_<svc>_minute in one shot. With no value, prints the
6010
+ # current effective time + source. HH ∈ [0,23], MM ∈ [0,59].
5985
6011
  # These keys are global-scoped, so writes land in ~/.roll/config.yaml.
5986
- # _config_daily_time <svc> <value> svc {dream, brief}
6012
+ # FIX-195: brief retired — svc is {dream} (the helper stays generic).
6013
+ # _config_daily_time <svc> <value>
5987
6014
  _config_daily_time() {
5988
6015
  local svc="$1" value="$2"
5989
6016
  local hour_key="loop_${svc}_hour" min_key="loop_${svc}_minute"
@@ -6082,8 +6109,9 @@ cmd_config() {
6082
6109
  [[ $_rc -eq 0 && -n "$value" ]] && _config_reload_schedule
6083
6110
  return $_rc
6084
6111
  ;;
6085
- dream-time|brief-time)
6086
- # dream/brief schedule keys are global-scoped (~/.roll/config.yaml).
6112
+ dream-time)
6113
+ # FIX-195: brief-time retired with the brief loop; dream-time is the only
6114
+ # daily schedule facade. The key is global-scoped (~/.roll/config.yaml).
6087
6115
  local fscope="$scope"; [[ -z "$fscope" ]] && fscope="global"
6088
6116
  ROLL_CFG_SCOPE="$fscope"
6089
6117
  local _rc
@@ -6138,7 +6166,7 @@ cmd_config() {
6138
6166
  fi
6139
6167
  _config_set "$key" "$value" "$file"
6140
6168
  ok "✓ set $key = $value in $file"
6141
- # US-LOOP-036: every recognized config key is a loop/dream/brief schedule key
6169
+ # US-LOOP-036: every recognized config key is a loop/dream schedule key
6142
6170
  # (display-only keys are out of scope for this command), so a successful write
6143
6171
  # always reloads the launchd plists.
6144
6172
  _config_reload_schedule
@@ -6192,14 +6220,14 @@ cmd_review_pr() {
6192
6220
 
6193
6221
  local slug; slug=$(_gh_repo_slug) || { err "Not a GitHub repo — review-pr requires GitHub remote"; return 1; }
6194
6222
 
6195
- local pr_json
6196
- pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body,diff 2>&1) \
6223
+ local pr_json diff
6224
+ pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body 2>&1) \
6197
6225
  || { err "gh pr view failed: ${pr_json}"; return 1; }
6226
+ diff=$(gh -R "$slug" pr diff "$pr_number" 2>/dev/null) || true
6198
6227
 
6199
6228
  local title body diff
6200
6229
  title=$(echo "$pr_json" | jq -r '.title // ""')
6201
6230
  body=$(echo "$pr_json" | jq -r '.body // ""')
6202
- diff=$(echo "$pr_json" | jq -r '.diff // ""')
6203
6231
 
6204
6232
  if echo "$body" | grep -qF '[skip-ai-review]'; then
6205
6233
  gh -R "$slug" pr review "$pr_number" --approve -b "Auto-approved: [skip-ai-review] detected" 2>/dev/null || true
@@ -8301,96 +8329,6 @@ PRRUNNER
8301
8329
  chmod +x "$script_path"
8302
8330
  }
8303
8331
 
8304
- # _write_ci_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
8305
- # US-AUTO-045 Phase 2: the script the com.roll.ci.<slug> launchd plist runs
8306
- # every 5 min. Mirrors _write_pr_loop_runner_script — lightweight (no agent,
8307
- # no tmux): portable PATH, a single-flight re-entry lock (pid+ts, 15-min
8308
- # staleness so a crashed pass self-heals next tick), then drives the _ci_scan
8309
- # orchestrator via the `roll _ci_scan` dispatch.
8310
- _write_ci_loop_runner_script() {
8311
- local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
8312
- mkdir -p "$(dirname "$script_path")"
8313
- local lock="${project_path}/.roll/loop/.ci-loop.lock"
8314
- cat > "$script_path" << CIRUNNER
8315
- #!/bin/bash -l
8316
- set -o pipefail
8317
- # Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
8318
- for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
8319
- case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
8320
- done
8321
- export PATH
8322
- # Single-flight re-entry guard: one CI-loop pass at a time. 5-min cadence;
8323
- # 15-min (900s) staleness so a crashed/hung pass self-heals on the next tick.
8324
- LOCK="${lock}"
8325
- mkdir -p "\$(dirname "\$LOCK")"
8326
- if [ -f "\$LOCK" ]; then
8327
- _pp=""; _pt=""
8328
- IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
8329
- _now=\$(date -u +%s)
8330
- if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 900 ]; then
8331
- exit 0
8332
- fi
8333
- rm -f "\$LOCK"
8334
- fi
8335
- printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
8336
- trap 'rm -f "\$LOCK"' EXIT
8337
- cd "${project_path}" || exit 0
8338
- bash "${roll_bin}" _ci_scan >> "${log_path}" 2>&1 || true
8339
- CIRUNNER
8340
- chmod +x "$script_path"
8341
- }
8342
-
8343
- # _write_alert_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
8344
- # US-AUTO-046 Phase 2: the script the com.roll.alert.<slug> launchd plist runs
8345
- # every 1 min. Mirrors _write_ci_loop_runner_script — lightweight (no agent,
8346
- # no tmux): portable PATH, a single-flight re-entry lock (pid+ts), then drives
8347
- # the Phase-1 _alert_dispatch consumer via the `roll _alert_dispatch` dispatch.
8348
- # _alert_dispatch reads $_LOOP_ALERT, parses + notifies + records to
8349
- # alert-log.jsonl, then rotates the file. Staleness is 180s (3 ticks at the
8350
- # 1-min cadence) so a crashed/hung pass self-heals quickly.
8351
- _write_alert_loop_runner_script() {
8352
- local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
8353
- mkdir -p "$(dirname "$script_path")"
8354
- local lock="${project_path}/.roll/loop/.alert-loop.lock"
8355
- local slug; slug=$(_project_slug "${project_path}")
8356
- cat > "$script_path" << ALERTRUNNER
8357
- #!/bin/bash -l
8358
- set -o pipefail
8359
- # Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
8360
- for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
8361
- case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
8362
- done
8363
- export PATH
8364
- # Single-flight re-entry guard: one alert-loop pass at a time. 1-min cadence;
8365
- # 180s staleness so a crashed/hung pass self-heals within a few ticks.
8366
- LOCK="${lock}"
8367
- mkdir -p "\$(dirname "\$LOCK")"
8368
- if [ -f "\$LOCK" ]; then
8369
- _pp=""; _pt=""
8370
- IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
8371
- _now=\$(date -u +%s)
8372
- if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 180 ]; then
8373
- exit 0
8374
- fi
8375
- rm -f "\$LOCK"
8376
- fi
8377
- printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
8378
- trap 'rm -f "\$LOCK"' EXIT
8379
- cd "${project_path}" || exit 0
8380
- # FIX-171: bake the project-local runtime dir directly; do not rely on
8381
- # _loop_runtime_dir which may fail to resolve in fresh shells. Set
8382
- # _LOOP_ALERT so the dispatched roll reads the project-local ALERT file,
8383
- # but do not override an externally-supplied value (test sandboxes).
8384
- _LOOP_RT_DIR="${project_path}/.roll/loop"
8385
- if [ -d "\$_LOOP_RT_DIR" ]; then
8386
- : "\${_LOOP_ALERT:=\${_LOOP_RT_DIR}/ALERT-${slug}.md}"
8387
- export _LOOP_ALERT
8388
- fi
8389
- bash "${roll_bin}" _alert_dispatch >> "${log_path}" 2>&1 || true
8390
- ALERTRUNNER
8391
- chmod +x "$script_path"
8392
- }
8393
-
8394
8332
  # Like _write_runner_script but prepends an active window guard.
8395
8333
  # Silently exits when current hour is outside [active_start, active_end).
8396
8334
  # When tmux is available, wraps the inner command in a detached tmux session
@@ -9715,9 +9653,10 @@ _install_launchd_plists() {
9715
9653
  local shared="${_SHARED_ROOT}"
9716
9654
 
9717
9655
  mkdir -p "$_LAUNCHD_DIR"
9718
- mkdir -p "${shared}/loop" "${shared}/dream" "${shared}/brief" "${shared}/pr" "${shared}/ci" "${shared}/alert"
9656
+ # FIX-194/FIX-195: brief/ci/alert loops retired — only loop/dream/pr remain.
9657
+ mkdir -p "${shared}/loop" "${shared}/dream" "${shared}/pr"
9719
9658
 
9720
- local active_start active_end dream_hour dream_minute brief_hour brief_minute loop_period loop_offset
9659
+ local active_start active_end dream_hour dream_minute loop_period loop_offset
9721
9660
  local _aw; _aw=$(_loop_read_active_window "$project_path")
9722
9661
  active_start="${_aw%% *}"; active_end="${_aw##* }"
9723
9662
  # US-LOOP-012: use _loop_schedule_spec instead of raw loop_minute
@@ -9726,22 +9665,16 @@ _install_launchd_plists() {
9726
9665
  loop_offset="${loop_spec##* }"
9727
9666
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
9728
9667
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
9729
- brief_hour=$(_config_read_int "loop_brief_hour" "9")
9730
- brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
9731
9668
 
9732
9669
  # FIX-054: terminal preference removed — runner always uses Terminal.app.
9733
9670
 
9734
9671
  # US-AUTO-044: "pr" is the 4th service — a 5-min PR Loop (period=5, empty hour
9735
9672
  # → StartInterval=300). No skill (it drives _loop_pr_inbox, not an agent).
9736
- # US-AUTO-045: "ci" is the 5th service — a 5-min CI Loop (period=5, empty hour
9737
- # → StartInterval=300). No skill (it drives _ci_scan, not an agent).
9738
- # US-AUTO-046: "alert" is the 6th service — a 1-min Alert Loop (period=1, empty
9739
- # hour → StartInterval=60). No skill (it drives _alert_dispatch, not an agent).
9740
- local services=("loop" "dream" "brief" "pr" "ci" "alert")
9741
- local skill_names=("roll-loop" "roll-.dream" "roll-brief" "" "" "")
9742
- local periods=("$loop_period" "60" "60" "5" "5" "1")
9743
- local offsets=("$loop_offset" "$dream_minute" "$brief_minute" "0" "0" "0")
9744
- local hours=("" "$dream_hour" "$brief_hour" "" "" "")
9673
+ local services=("loop" "dream" "pr")
9674
+ local skill_names=("roll-loop" "roll-.dream" "")
9675
+ local periods=("$loop_period" "60" "5")
9676
+ local offsets=("$loop_offset" "$dream_minute" "0")
9677
+ local hours=("" "$dream_hour" "")
9745
9678
 
9746
9679
  local updated=0
9747
9680
  local slug; slug=$(_project_slug "$project_path")
@@ -9774,22 +9707,8 @@ _install_launchd_plists() {
9774
9707
  local pr_log="${project_path}/.roll/loop/pr.log"
9775
9708
  mkdir -p "${project_path}/.roll/loop"
9776
9709
  _write_pr_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$pr_log"
9777
- elif [[ "$svc" == "ci" ]]; then
9778
- # US-AUTO-045 Phase 2: lightweight CI Loop runner — drives _ci_scan every
9779
- # 5 min (no agent, no tmux). Records run timing, auto-reruns transient
9780
- # failures, and surfaces flaky / degradation stories.
9781
- local ci_log="${project_path}/.roll/loop/ci.log"
9782
- mkdir -p "${project_path}/.roll/loop"
9783
- _write_ci_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$ci_log"
9784
- elif [[ "$svc" == "alert" ]]; then
9785
- # US-AUTO-046 Phase 2: lightweight Alert Loop runner — drives _alert_dispatch
9786
- # every 1 min (no agent, no tmux). Consumes _LOOP_ALERT: parse → notify →
9787
- # record to alert-log.jsonl → rotate the file.
9788
- local alert_log="${project_path}/.roll/loop/alert.log"
9789
- mkdir -p "${project_path}/.roll/loop"
9790
- _write_alert_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$alert_log"
9791
9710
  else
9792
- # IDEA-051: dream/brief cron logs are project-local, mirroring loop (FIX-139).
9711
+ # dream cron log is project-local, mirroring loop (FIX-139).
9793
9712
  local log="${project_path}/.roll/${svc}/cron.log"
9794
9713
  mkdir -p "${project_path}/.roll/${svc}"
9795
9714
  _write_runner_script "$runner" "$project_path" "cd \"${project_path}\" && ${cmd}" "$log"
@@ -9915,7 +9834,7 @@ cmd_loop() {
9915
9834
  *) cat <<'HELP'
9916
9835
  Usage: roll loop <on|off|now|test|status|monitor|runs|log|story|events|attach|mute|unmute|pause|resume|reset|gc|branches>
9917
9836
 
9918
- on Install launchd scheduler (loop + dream + brief + pr + ci + alert)
9837
+ on Install launchd scheduler (loop + dream + pr)
9919
9838
  off Remove launchd scheduler
9920
9839
  now Run one cycle immediately
9921
9840
  test Quick smoke test (tmux/popup/stream chain)
@@ -9960,7 +9879,7 @@ _loop_on() {
9960
9879
  local project_path; project_path=$(pwd -P)
9961
9880
  local agent; agent=$(_project_agent)
9962
9881
 
9963
- local active_start active_end loop_minute dream_hour dream_minute brief_hour brief_minute
9882
+ local active_start active_end loop_minute dream_hour dream_minute
9964
9883
  local _aw; _aw=$(_loop_read_active_window "$project_path")
9965
9884
  active_start="${_aw%% *}"; active_end="${_aw##* }"
9966
9885
  # US-LOOP-011: read schedule spec from project or global config
@@ -9975,8 +9894,6 @@ _loop_on() {
9975
9894
  loop_sched_zh=$(_loop_schedule_desc "$loop_period" "$loop_offset" zh)
9976
9895
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
9977
9896
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
9978
- brief_hour=$(_config_read_int "loop_brief_hour" "9")
9979
- brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
9980
9897
 
9981
9898
  if [[ "$(uname)" == "Darwin" ]]; then
9982
9899
  _install_launchd_plists "$project_path" >/dev/null
@@ -9987,7 +9904,7 @@ _loop_on() {
9987
9904
  # does not disturb the overrides DB.
9988
9905
  local uid; uid=$(id -u)
9989
9906
  local all_loaded=true
9990
- for svc in loop dream brief pr ci alert; do
9907
+ for svc in loop dream pr; do
9991
9908
  local label; label=$(_launchd_label "$svc" "$project_path")
9992
9909
  local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
9993
9910
  if ! _launchd_is_loaded "$label"; then
@@ -10011,7 +9928,6 @@ _loop_on() {
10011
9928
  msg loop.roll_loop_s_active_02d_00 \
10012
9929
  "$loop_sched_en" "$active_start" "$active_end" "$loop_sched_zh" "$active_start" "$active_end"
10013
9930
  msg loop.roll_dream_daily_at_02d_02d "$dream_hour" "$dream_minute" "$dream_hour" "$dream_minute"
10014
- msg loop.roll_brief_daily_at_02d_02d "$brief_hour" "$brief_minute" "$brief_hour" "$brief_minute"
10015
9931
  echo " • Agent: ${agent} (change: roll agent use <name>)"
10016
9932
  return 0
10017
9933
  fi
@@ -10022,29 +9938,26 @@ _loop_on() {
10022
9938
  warn "$(msg loop.loop_already_enabled_for_this_project_2)"; return 0
10023
9939
  fi
10024
9940
 
10025
- mkdir -p "${_SHARED_ROOT}/loop" "${_SHARED_ROOT}/dream" "${_SHARED_ROOT}/brief"
9941
+ mkdir -p "${_SHARED_ROOT}/loop" "${_SHARED_ROOT}/dream"
10026
9942
 
10027
9943
  # FIX-052: per-project cron logs so concurrent projects don't interleave.
10028
9944
  local slug; slug=$(_project_slug "$project_path")
10029
- local loop_cmd dream_cmd brief_cmd
9945
+ local loop_cmd dream_cmd
10030
9946
  loop_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-loop/SKILL.md") >> ${_SHARED_ROOT}/loop/cron-${slug}.log 2>&1"
10031
- # IDEA-051: dream/brief cron logs are project-local, mirroring loop (FIX-139).
10032
- mkdir -p "${project_path}/.roll/dream" "${project_path}/.roll/brief"
9947
+ # IDEA-051: dream cron log is project-local, mirroring loop (FIX-139).
9948
+ mkdir -p "${project_path}/.roll/dream"
10033
9949
  dream_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-.dream/SKILL.md") >> ${project_path}/.roll/dream/cron.log 2>&1"
10034
- brief_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-brief/SKILL.md") >> ${project_path}/.roll/brief/cron.log 2>&1"
10035
9950
 
10036
9951
  (
10037
9952
  crontab -l 2>/dev/null
10038
9953
  printf "%d * * * * %s %s:%s\n" "$loop_minute" "$loop_cmd" "$_LOOP_TAG" "$project_path"
10039
9954
  printf "%d %d * * * %s %s:%s\n" "$dream_minute" "$dream_hour" "$dream_cmd" "$_LOOP_TAG" "$project_path"
10040
- printf "%d %d * * * %s %s:%s\n" "$brief_minute" "$brief_hour" "$brief_cmd" "$_LOOP_TAG" "$project_path"
10041
9955
  ) | crontab -
10042
9956
 
10043
9957
  ok "$(msg loop.loop_enabled_2)"
10044
9958
  msg loop.roll_loop_s_active_02d_00_2 \
10045
9959
  "$loop_sched_en" "$active_start" "$active_end" "$loop_sched_zh" "$active_start" "$active_end"
10046
9960
  msg loop.roll_dream_daily_at_02d_02d_2 "$dream_hour" "$dream_minute" "$dream_hour" "$dream_minute"
10047
- msg loop.roll_brief_daily_at_02d_02d_2 "$brief_hour" "$brief_minute" "$brief_hour" "$brief_minute"
10048
9961
  echo " • Agent: ${agent} (change: roll agent use <name>)"
10049
9962
  }
10050
9963
 
@@ -10054,7 +9967,7 @@ _loop_off() {
10054
9967
  if [[ "$(uname)" == "Darwin" ]]; then
10055
9968
  local any_loaded=false
10056
9969
  local _skip_off; _launchd_should_skip_registry && _skip_off=1 || _skip_off=0
10057
- for svc in loop dream brief pr ci alert; do
9970
+ for svc in loop dream pr; do
10058
9971
  local label; label=$(_launchd_label "$svc" "$project_path")
10059
9972
  if _launchd_is_loaded "$label"; then
10060
9973
  any_loaded=true
@@ -10069,7 +9982,7 @@ _loop_off() {
10069
9982
  fi
10070
9983
  local slug; slug=$(_project_slug "$project_path")
10071
9984
  local uid; uid=$(id -u)
10072
- for svc in loop dream brief pr ci alert; do
9985
+ for svc in loop dream pr; do
10073
9986
  rm -f "${_SHARED_ROOT}/${svc}/run-${slug}.sh"
10074
9987
  # FIX-081: reverse the FIX-059 auto-bootstrap guard. `_install_launchd_plists`
10075
9988
  # writes `launchctl disable gui/<UID>/<label>` for every brand-new plist
@@ -10405,7 +10318,7 @@ _legacy_loop_status() {
10405
10318
  echo ""
10406
10319
  if [[ "$(uname)" == "Darwin" ]]; then
10407
10320
  echo -e " Services Agent: ${CYAN}${agent}${NC}"
10408
- for svc in loop dream brief pr ci alert; do
10321
+ for svc in loop dream pr; do
10409
10322
  local state; state=$(_launchd_svc_state "$svc" "$project_path")
10410
10323
  if [[ "$svc" == "loop" ]] && $_is_paused; then
10411
10324
  local _paused_at; _paused_at=$(grep '^paused_at:' "$_LOOP_STATE" 2>/dev/null | awk '{print $2}' | tr -d '"')
@@ -10419,7 +10332,7 @@ _legacy_loop_status() {
10419
10332
  echo -e " ${YELLOW}loop ⏸ paused${NC}${_dur} run: roll loop resume"
10420
10333
  else
10421
10334
  local _tick_age=""
10422
- case "$svc" in pr|ci|alert)
10335
+ case "$svc" in pr)
10423
10336
  _tick_age=$(_loop_tick_age "$svc")
10424
10337
  [ -n "$_tick_age" ] && _tick_age=" tick ${_tick_age}"
10425
10338
  esac
@@ -11601,7 +11514,7 @@ _loop_pr_heal_self() {
11601
11514
 
11602
11515
  local agent; agent="$(_project_agent 2>/dev/null)"; agent="${agent:-claude}"
11603
11516
 
11604
- ( echo "$BASHPID" > "$lock"
11517
+ ( echo "${BASHPID:-$$}" > "$lock"
11605
11518
  _loop_pr_do_heal "$num" "$head_ref" "$slug" "$agent" >/dev/null 2>&1
11606
11519
  rm -f "$lock"
11607
11520
  ) &
@@ -11828,54 +11741,25 @@ _loop_is_roll_meta_story() {
11828
11741
 
11829
11742
  # _loop_pr_classify <head_ref> <human_review_state> <ci_state> <mergeable_state>
11830
11743
  # Prints one of:
11831
- # loop_self
11832
- # blocked_human_request_changes
11833
- # blocked_human_approved
11834
- # stale
11835
- # eligible
11836
- # Exit 0 always — callers parse the printed token.
11744
+ # ci_red — CI failed → heal
11745
+ # stale — needs rebase / conflicting / behind
11746
+ # ready — CI green + clean → merge
11747
+ # Human review intentionally irrelevant — CI is the only gate.
11837
11748
  _loop_pr_classify() {
11838
11749
  local head_ref="${1:-}"
11839
11750
  local human_review="${2:-}"
11840
11751
  local ci_state="${3:-}"
11841
11752
  local mergeable="${4:-}"
11842
11753
 
11843
- case "$head_ref" in
11844
- loop/*)
11845
- # US-LOOP-049: loop/* PRs with CI failure get their own classification
11846
- # so _loop_pr_inbox can route them to the PR hot-fix path.
11847
- if [[ "$ci_state" == "failure" ]]; then
11848
- echo "loop_self_ci_red"; return 0
11849
- fi
11850
- echo "loop_self"; return 0
11851
- ;;
11852
- claude/*)
11853
- # Claude-agent-authored PRs are loop-owned for autonomous merge/rebase
11854
- # once green — same treatment as loop/* — so they close within a
11855
- # PR-loop tick instead of waiting on a human or a GHA bot review.
11856
- # CI-red claude/* PRs are deliberately NOT routed to background heal
11857
- # (no agent re-spawn); they fall through to the stale/eligible paths
11858
- # below so a human decides what to do with a failing run.
11859
- if [[ "$ci_state" != "failure" ]]; then
11860
- echo "loop_self"; return 0
11861
- fi
11862
- ;;
11863
- esac
11864
-
11865
- case "$human_review" in
11866
- CHANGES_REQUESTED) echo "blocked_human_request_changes"; return 0 ;;
11867
- APPROVED) echo "blocked_human_approved"; return 0 ;;
11754
+ case "$mergeable" in
11755
+ BEHIND|DIRTY|CONFLICTING) echo "stale"; return 0 ;;
11868
11756
  esac
11869
11757
 
11870
- # CONFLICTING is the GraphQL `mergeable` enum; DIRTY/BEHIND are
11871
- # `mergeStateStatus` values (_loop_pr_inbox feeds the latter). Accept both
11872
- # spellings so a conflicting/out-of-date PR is reliably routed to rebase.
11873
- if [ "$ci_state" = "failure" ] || [ "$mergeable" = "CONFLICTING" ] || [ "$mergeable" = "DIRTY" ] || [ "$mergeable" = "BEHIND" ]; then
11874
- echo "stale"
11875
- return 0
11758
+ if [ "$ci_state" = "failure" ]; then
11759
+ echo "ci_red"; return 0
11876
11760
  fi
11877
11761
 
11878
- echo "eligible"
11762
+ echo "ready"
11879
11763
  }
11880
11764
 
11881
11765
  # _loop_pr_rebase_circuit <pr_number>
@@ -12013,6 +11897,9 @@ _loop_pr_rebase_stale() {
12013
11897
  fi
12014
11898
 
12015
11899
  git fetch origin "$head_ref" 2>/dev/null || return 0
11900
+ # Reset local tracking branch to the freshly-fetched remote state
11901
+ # before rebasing, otherwise force-push destroys commits pushed by others.
11902
+ git checkout -B "$head_ref" "origin/$head_ref" 2>/dev/null || return 0
12016
11903
 
12017
11904
  # FIX-159: save original branch so we can restore it unconditionally
12018
11905
  local _orig
@@ -12135,44 +12022,29 @@ _loop_pr_inbox() {
12135
12022
  verdict=$(_loop_pr_classify "$head_ref" "$human_review" "$ci_state" "$mergeable")
12136
12023
 
12137
12024
  case "$verdict" in
12138
- loop_self)
12139
- # Green self-PR: merge when clean, else rebase onto main first. A
12140
- # loop/* or claude/* PR that fell BEHIND or now CONFLICTS with main can
12141
- # never auto-merge until rebased — eager-merge alone would leave it
12142
- # stuck open forever. Rebase is circuit-gated (≥3 attempts/24h → ALERT)
12143
- # and merges on a later tick once the rebased head is green + clean.
12144
- case "$mergeable" in
12145
- BEHIND|DIRTY|CONFLICTING)
12146
- if _loop_pr_rebase_circuit "$num"; then
12147
- _loop_pr_rebase_stale "$num" "$head_ref" || true
12148
- fi
12149
- ;;
12150
- *)
12151
- _loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
12152
- ;;
12153
- esac
12154
- ;;
12155
- loop_self_ci_red)
12156
- # US-LOOP-062a: a red loop/* PR (classified by US-LOOP-049) is now
12157
- # background-healed: bounded retries via heal budget + dynamic agent,
12158
- # falling back to the deduped [TYPE:loop-pr-ci-red] ALERT (FIX-158's
12159
- # surfacing) when heal is disabled/exhausted. Re-wires US-LOOP-050.
12025
+ ci_red)
12160
12026
  _loop_pr_heal_self "$num" "$head_ref" "$slug" || true
12161
12027
  ;;
12162
- blocked_human_request_changes)
12163
- : # skip — last human review requested changes; wait for the author
12164
- ;;
12165
- blocked_human_approved)
12166
- # US-LOOP-062b: human approved — merge directly when green + mergeable
12167
- # (don't wait for repo auto-merge, which may be off).
12168
- _loop_pr_merge_approved "$num" "$ci_state" "$mergeable" "$slug" || true
12169
- ;;
12170
12028
  stale)
12171
12029
  _loop_pr_rebase_circuit "$num" || true
12172
- _loop_pr_rebase_stale "$num" "$head_ref" || true
12030
+ if _loop_pr_rebase_stale "$num" "$head_ref" || true; then
12031
+ # Re-fetch PR state after rebase — if now clean, merge immediately.
12032
+ local _re_view
12033
+ _re_view=$(gh -R "$slug" pr view "$num" --json mergeStateStatus,statusCheckRollup 2>/dev/null) || true
12034
+ if [ -n "$_re_view" ]; then
12035
+ local _re_ci _re_mb
12036
+ _re_ci=$(echo "$_re_view" | jq -r '
12037
+ if (.statusCheckRollup | length) == 0 then ""
12038
+ elif any(.statusCheckRollup[]?; .conclusion == "FAILURE") then "failure"
12039
+ elif all(.statusCheckRollup[]?; .conclusion == "SUCCESS" or .conclusion == "SKIPPED") then "success"
12040
+ else "pending" end' 2>/dev/null)
12041
+ _re_mb=$(echo "$_re_view" | jq -r '.mergeStateStatus // ""' 2>/dev/null)
12042
+ _loop_pr_merge_self_eager "$num" "$_re_ci" "$_re_mb" "$slug"
12043
+ fi
12044
+ fi
12173
12045
  ;;
12174
- eligible)
12175
- _loop_pr_review_external "$num" || true
12046
+ ready)
12047
+ _loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
12176
12048
  ;;
12177
12049
  esac
12178
12050
 
@@ -12370,569 +12242,13 @@ _loop_pr_route() {
12370
12242
  return 0
12371
12243
  }
12372
12244
 
12373
- # US-AUTO-045 Phase 1: dedicated CI Loop helpers (loop-safe pure additions).
12374
- #
12375
- # These six helpers collect CI timing data, classify failures, auto-rerun
12376
- # transient flakes, and surface flaky / degradation signals as backlog
12377
- # entries. They are NOT yet wired into any runner or launchd plist — that is
12378
- # Phase 2 (wired by hand). Each is unit-tested in
12379
- # tests/unit/roll_loop_ci_loop.bats with gh stubbed. Do not delete or inline.
12380
- #
12381
- # State lives under project-local .roll/state/:
12382
- # ci-timing.jsonl append-only NDJSON, one line per recorded CI run
12383
- # ci-rerun-state.yaml minimal YAML: rerun attempt count per run_id
12384
- # _LOOP_ALERT is the existing shared alert file (real failures, rerun limits).
12385
-
12386
- # _ci_state_dir
12387
- # Echo the project-local CI state directory, creating it if needed.
12388
- # Resolves relative to the current working dir's .roll/ (tests cd into a
12389
- # sandbox; the live loop runner cds into the project root).
12390
- _ci_state_dir() {
12245
+ # _alert_log_file echo path to alert-log.jsonl (used by `roll alert log` CLI).
12246
+ _alert_log_file() {
12391
12247
  local dir=".roll/state"
12392
12248
  mkdir -p "$dir" 2>/dev/null || true
12393
- echo "$dir"
12394
- }
12395
-
12396
- # _ci_record_timing <run_json>
12397
- # Parse one `gh run list --json ...` object and append a flat NDJSON line to
12398
- # ci-timing.jsonl. Idempotent: a run_id already present in the file is
12399
- # skipped. Duration is computed from createdAt → updatedAt (gh exposes no
12400
- # native duration field). Returns 0 always (loop-safe).
12401
- _ci_record_timing() {
12402
- local json="$1"
12403
- [ -n "$json" ] || return 0
12404
-
12405
- local run_id workflow conclusion status created updated
12406
- run_id=$(echo "$json" | jq -r '.databaseId // ""' 2>/dev/null)
12407
- [ -n "$run_id" ] || return 0
12408
-
12409
- local dir; dir=$(_ci_state_dir)
12410
- local file="${dir}/ci-timing.jsonl"
12411
-
12412
- # Idempotency: skip if this run_id is already recorded with a non-empty
12413
- # conclusion. If the existing record has an empty conclusion and the new
12414
- # data has a conclusion, update in-place so in-progress runs are completed.
12415
- if [ -f "$file" ] && grep -q "\"run_id\":${run_id}," "$file" 2>/dev/null; then
12416
- local existing_conclusion new_conclusion
12417
- existing_conclusion=$(grep "\"run_id\":${run_id}," "$file" 2>/dev/null | jq -r '.conclusion // ""' 2>/dev/null)
12418
- new_conclusion=$(echo "$json" | jq -r '.conclusion // ""' 2>/dev/null)
12419
- if [ -n "$existing_conclusion" ] || [ -z "$new_conclusion" ]; then
12420
- return 0
12421
- fi
12422
- # Remove the stale line so the new record can be appended below.
12423
- local tmpfile="${file}.tmp.$$"
12424
- grep -v "\"run_id\":${run_id}," "$file" > "$tmpfile" 2>/dev/null || true
12425
- mv "$tmpfile" "$file"
12426
- fi
12427
-
12428
- workflow=$(echo "$json" | jq -r '.workflowName // .name // ""' 2>/dev/null)
12429
- conclusion=$(echo "$json" | jq -r '.conclusion // ""' 2>/dev/null)
12430
- status=$(echo "$json" | jq -r '.status // ""' 2>/dev/null)
12431
- created=$(echo "$json" | jq -r '.createdAt // ""' 2>/dev/null)
12432
- updated=$(echo "$json" | jq -r '.updatedAt // ""' 2>/dev/null)
12433
-
12434
- # Duration in seconds from ISO-8601 timestamps; 0 if either is missing or
12435
- # unparseable. `date -j` (BSD) and `date -d` (GNU) differ — try both.
12436
- local dur=0 c_epoch u_epoch
12437
- if [ -n "$created" ] && [ -n "$updated" ]; then
12438
- c_epoch=$(_ci_iso_to_epoch "$created")
12439
- u_epoch=$(_ci_iso_to_epoch "$updated")
12440
- if [ -n "$c_epoch" ] && [ -n "$u_epoch" ] && [ "$u_epoch" -ge "$c_epoch" ] 2>/dev/null; then
12441
- dur=$((u_epoch - c_epoch))
12442
- fi
12443
- fi
12444
-
12445
- printf '{"run_id":%s,"workflow":"%s","conclusion":"%s","status":"%s","duration_sec":%s,"recorded_at":"%s"}\n' \
12446
- "$run_id" "$workflow" "$conclusion" "$status" "$dur" \
12447
- "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$file"
12448
- return 0
12449
- }
12450
-
12451
- # _ci_iso_to_epoch <iso8601>
12452
- # Convert an ISO-8601 UTC timestamp (2026-05-30T10:00:00Z) to epoch seconds.
12453
- # Echoes nothing on failure. Handles both BSD (macOS) and GNU date.
12454
- _ci_iso_to_epoch() {
12455
- local iso="$1"
12456
- [ -n "$iso" ] || return 0
12457
- local e
12458
- # GNU date
12459
- e=$(date -u -d "$iso" +%s 2>/dev/null) && { echo "$e"; return 0; }
12460
- # BSD date (strip trailing Z, parse explicit format)
12461
- local trimmed="${iso%Z}"
12462
- e=$(date -u -j -f "%Y-%m-%dT%H:%M:%S" "$trimmed" +%s 2>/dev/null) && { echo "$e"; return 0; }
12463
- return 0
12464
- }
12465
-
12466
- # _ci_classify_failure <run_id>
12467
- # Inspect `gh run view <id> --log-failed` and classify the failure as
12468
- # "transient" (infra flake: network, timeout, runner death) or "real"
12469
- # (genuine test/build failure). Echoes "transient" or "real".
12470
- # Empty / unavailable logs default to "real" (fail safe — don't auto-rerun
12471
- # something we can't read).
12472
- _ci_classify_failure() {
12473
- local run_id="$1"
12474
- [ -n "$run_id" ] || { echo "real"; return 0; }
12475
- local slug; _gh_resolve slug 2>/dev/null || slug=""
12476
-
12477
- local log
12478
- if [ -n "$slug" ]; then
12479
- log=$(gh -R "$slug" run view "$run_id" --log-failed 2>/dev/null)
12480
- else
12481
- log=$(gh run view "$run_id" --log-failed 2>/dev/null)
12482
- fi
12483
-
12484
- # Transient signatures: network/infra failures that a rerun typically clears.
12485
- if echo "$log" | grep -qiE 'ETIMEDOUT|ECONNRESET|ENOTFOUND|EAI_AGAIN|shutdown signal|runner.*(error|lost|terminated)|The runner has received a shutdown|503 Service|connection reset|TLS handshake|i/o timeout|could not resolve host'; then
12486
- echo "transient"
12487
- return 0
12488
- fi
12489
- echo "real"
12490
- return 0
12491
- }
12492
-
12493
- # _ci_rerun_state_file
12494
- # Echo path to ci-rerun-state.yaml (creating the dir).
12495
- _ci_rerun_state_file() {
12496
- local dir; dir=$(_ci_state_dir)
12497
- echo "${dir}/ci-rerun-state.yaml"
12498
- }
12499
-
12500
- # _ci_rerun_attempts <run_id>
12501
- # Echo the recorded rerun attempt count for <run_id> (0 if none).
12502
- _ci_rerun_attempts() {
12503
- local run_id="$1"
12504
- local file; file=$(_ci_rerun_state_file)
12505
- [ -f "$file" ] || { echo 0; return 0; }
12506
- local n
12507
- n=$(awk -v key="\"${run_id}\":" '$1 == key { print $2 }' "$file" 2>/dev/null | head -1)
12508
- case "$n" in
12509
- ''|*[!0-9]*) echo 0 ;;
12510
- *) echo "$n" ;;
12511
- esac
12512
- }
12513
-
12514
- # _ci_rerun_state_write <run_id> <attempts>
12515
- # Set the attempt count for <run_id> in ci-rerun-state.yaml. Minimal YAML
12516
- # writer (we own the schema): one `"<run_id>": <n>` line per run.
12517
- _ci_rerun_state_write() {
12518
- local run_id="$1" attempts="$2"
12519
- local file; file=$(_ci_rerun_state_file)
12520
- [ -f "$file" ] || : > "$file"
12521
- local tmp; tmp=$(mktemp)
12522
- awk -v key="\"${run_id}\":" -v val="$attempts" '
12523
- $1 == key { print key " " val; found=1; next }
12524
- { print }
12525
- END { if (!found) print key " " val }
12526
- ' "$file" > "$tmp" && mv "$tmp" "$file"
12527
- }
12528
-
12529
- # _ci_rerun_transient <run_id>
12530
- # Auto-rerun a transient CI failure, capped at 2 attempts. attempt<2 →
12531
- # `gh run rerun`; attempt>=2 → write an error ALERT. Echoes the action taken
12532
- # ("rerun" / "limit"). Loop-safe (returns 0).
12533
- _ci_rerun_transient() {
12534
- local run_id="$1"
12535
- [ -n "$run_id" ] || return 0
12536
- local slug; _gh_resolve slug 2>/dev/null || slug=""
12537
-
12538
- local attempts; attempts=$(_ci_rerun_attempts "$run_id")
12539
- if [ "$attempts" -ge 2 ]; then
12540
- local alert="$_LOOP_ALERT"
12541
- mkdir -p "$(dirname "$alert")" 2>/dev/null || true
12542
- printf '[%s] [error] [TYPE:ci-rerun-limit] CI rerun reached limit: run #%s (%s attempts)\n' \
12543
- "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$run_id" "$attempts" >> "$alert"
12544
- echo "limit"
12545
- return 0
12546
- fi
12547
-
12548
- if [ -n "$slug" ]; then
12549
- gh -R "$slug" run rerun "$run_id" >/dev/null 2>&1 || true
12550
- else
12551
- gh run rerun "$run_id" >/dev/null 2>&1 || true
12552
- fi
12553
- _ci_rerun_state_write "$run_id" "$((attempts + 1))"
12554
- echo "rerun"
12555
- return 0
12556
- }
12557
-
12558
- # _ci_open_story <type> <title>
12559
- # Append a FIX or US row to .roll/backlog.md's `| ID | Description | Status |`
12560
- # table. Idempotent: if a 📋 Todo row with the same title already exists, skip
12561
- # (echo "skip"). New IDs auto-increment from the max existing <TYPE>-NNN.
12562
- # Echoes the new ID on success, "skip" if already queued.
12563
- _ci_open_story() {
12564
- local type="$1" title="$2"
12565
- [ -n "$type" ] && [ -n "$title" ] || return 0
12566
-
12567
- # Resolve the backlog file (project-local).
12568
- local backlog=".roll/backlog.md"
12569
- [ -f "$backlog" ] || { echo "skip"; return 0; }
12570
-
12571
- # Idempotency: same title already queued as Todo → skip.
12572
- if grep -F "$title" "$backlog" 2>/dev/null | grep -q '📋 Todo'; then
12573
- echo "skip"
12574
- return 0
12575
- fi
12576
-
12577
- # Auto-increment: find the max existing <TYPE>-NNN id.
12578
- local prefix max next
12579
- prefix=$(echo "$type" | tr '[:lower:]' '[:upper:]')
12580
- max=$(grep -oE "${prefix}-[0-9]+" "$backlog" 2>/dev/null \
12581
- | sed "s/${prefix}-//" \
12582
- | sort -n | tail -1)
12583
- case "$max" in ''|*[!0-9]*) max=0 ;; esac
12584
- # 10# prefix forces base-10: a zero-padded id like 008/009 would otherwise be
12585
- # parsed as octal and either misnumber (010→8) or error ("value too great").
12586
- next=$((10#$max + 1))
12587
- local id
12588
- id=$(printf '%s-%03d' "$prefix" "$next")
12589
-
12590
- printf '| %s | %s | 📋 Todo |\n' "$id" "$title" >> "$backlog"
12591
- echo "$id"
12592
- return 0
12593
- }
12594
-
12595
- # _ci_detect_flaky
12596
- # Scan the last 20 ci-timing.jsonl lines, group by workflow, and flag any
12597
- # workflow whose recent runs have a 20%–80% failure rate (2..8 failures of
12598
- # the last 10) as flaky — opening a FIX story. Returns 0 (loop-safe).
12599
- _ci_detect_flaky() {
12600
- local dir; dir=$(_ci_state_dir)
12601
- local file="${dir}/ci-timing.jsonl"
12602
- [ -f "$file" ] || return 0
12603
-
12604
- # Per workflow: count total + failures over the most recent 10 records.
12605
- # awk reads last 20 lines (tail), keeps last 10 per workflow. Output is
12606
- # collected into a variable (not piped to `while`) so an empty result or
12607
- # an intermediate nonzero exit cannot trip a caller's ERR trap.
12608
- local flaky_wfs
12609
- flaky_wfs=$(tail -n 20 "$file" 2>/dev/null | awk '
12610
- {
12611
- # crude field extraction from flat JSON line
12612
- wf=""; concl="";
12613
- if (match($0, /"workflow":"[^"]*"/)) { wf=substr($0,RSTART+12,RLENGTH-13) }
12614
- if (match($0, /"conclusion":"[^"]*"/)) { concl=substr($0,RSTART+14,RLENGTH-15) }
12615
- if (wf=="") next
12616
- order[wf]=order[wf]" "NR
12617
- val[wf"|"NR]=concl
12618
- }
12619
- END {
12620
- for (wf in order) {
12621
- n=split(order[wf], idx, " ")
12622
- # keep most recent 10
12623
- start=1; if (n-10 > 0) start=n-9
12624
- total=0; fail=0
12625
- for (i=start;i<=n;i++) {
12626
- if (idx[i]=="") continue
12627
- total++
12628
- c=val[wf"|"idx[i]]
12629
- if (c=="failure" || c=="timed_out" || c=="cancelled") fail++
12630
- }
12631
- if (total>=4 && fail>=2 && fail<=8 && fail*100 <= total*80 && fail*100 >= total*20) {
12632
- print wf
12633
- }
12634
- }
12635
- }
12636
- ' || true)
12637
-
12638
- local wf
12639
- for wf in $flaky_wfs; do
12640
- [ -n "$wf" ] && _ci_open_story FIX "flaky: ${wf}" >/dev/null || true
12641
- done
12642
- return 0
12643
- }
12644
-
12645
- # _ci_detect_degradation
12646
- # Scan the last 20 ci-timing.jsonl lines, compute mean duration per workflow,
12647
- # and open a US story when a workflow crosses its threshold:
12648
- # unit* > 300s (5 min)
12649
- # integration* > 900s (15 min)
12650
- # Returns 0 (loop-safe).
12651
- _ci_detect_degradation() {
12652
- local dir; dir=$(_ci_state_dir)
12653
- local file="${dir}/ci-timing.jsonl"
12654
- [ -f "$file" ] || return 0
12655
-
12656
- local degraded
12657
- degraded=$(tail -n 20 "$file" 2>/dev/null | awk '
12658
- {
12659
- wf=""; dur=0;
12660
- if (match($0, /"workflow":"[^"]*"/)) { wf=substr($0,RSTART+12,RLENGTH-13) }
12661
- if (match($0, /"duration_sec":[0-9]+/)) { dur=substr($0,RSTART+15,RLENGTH-15)+0 }
12662
- if (wf=="") next
12663
- sum[wf]+=dur; cnt[wf]++
12664
- }
12665
- END {
12666
- for (wf in sum) {
12667
- if (cnt[wf]==0) continue
12668
- avg=sum[wf]/cnt[wf]
12669
- lc=tolower(wf)
12670
- if (index(lc,"unit")>0 && avg>300) { print wf "\t" int(avg) }
12671
- else if (index(lc,"integration")>0 && avg>900) { print wf "\t" int(avg) }
12672
- }
12673
- }
12674
- ' || true)
12675
-
12676
- local line wf avg
12677
- # IFS=newline so each "wf<TAB>avg" record is one iteration; field-split on TAB.
12678
- local _oifs="$IFS"
12679
- IFS='
12680
- '
12681
- for line in $degraded; do
12682
- IFS="$_oifs"
12683
- wf=$(printf '%s' "$line" | cut -f1)
12684
- avg=$(printf '%s' "$line" | cut -f2)
12685
- [ -n "$wf" ] && _ci_open_story US "CI degradation: ${wf} avg ${avg}s exceeds threshold" >/dev/null || true
12686
- IFS='
12687
- '
12688
- done
12689
- IFS="$_oifs"
12690
- return 0
12691
- }
12692
-
12693
- # _ci_scan
12694
- # US-AUTO-045 Phase 2 orchestrator: the entry the CI Loop runner drives every
12695
- # 5 min. Lists recent `main`-branch CI runs, records each run's timing, and on
12696
- # a `failure` conclusion classifies it — auto-rerunning transient infra
12697
- # flakes. After the loop it runs the flaky + degradation detectors over the
12698
- # accumulated history. Lenient on gh unavailability (missing / failed list →
12699
- # return 0) so the service never errors out a tick.
12700
- _ci_scan() {
12701
- local slug; _gh_resolve slug 2>/dev/null || { _loop_write_tick "ci" "idle" "gh_unavailable"; return 0; }
12702
-
12703
- local runs_json
12704
- runs_json=$(gh -R "$slug" run list --branch main \
12705
- --json databaseId,workflowName,name,conclusion,status,createdAt,updatedAt \
12706
- 2>/dev/null) || { _loop_write_tick "ci" "idle" "gh_error"; return 0; }
12707
- [ -n "$runs_json" ] || { _loop_write_tick "ci" "idle" "empty_response"; return 0; }
12708
-
12709
- # An empty list ("[]") still falls through to the detectors below: they run
12710
- # over accumulated history, not just this tick's runs.
12711
- local count; count=$(echo "$runs_json" | jq 'length' 2>/dev/null || echo 0)
12712
- case "$count" in ''|*[!0-9]*) count=0 ;; esac
12713
-
12714
- local i=0
12715
- while [ "$i" -lt "$count" ]; do
12716
- local run_json conclusion run_id
12717
- run_json=$(echo "$runs_json" | jq -c ".[$i]" 2>/dev/null)
12718
- _ci_record_timing "$run_json"
12719
-
12720
- conclusion=$(echo "$run_json" | jq -r '.conclusion // ""' 2>/dev/null)
12721
- if [ "$conclusion" = "failure" ]; then
12722
- run_id=$(echo "$run_json" | jq -r '.databaseId // ""' 2>/dev/null)
12723
- if [ -n "$run_id" ]; then
12724
- local kind; kind=$(_ci_classify_failure "$run_id")
12725
- [ "$kind" = "transient" ] && _ci_rerun_transient "$run_id" >/dev/null
12726
- fi
12727
- fi
12728
- i=$((i + 1))
12729
- done
12730
-
12731
- _ci_detect_flaky
12732
- _ci_detect_degradation
12733
- _loop_write_tick "ci" "acted" "scan_done"
12734
- return 0
12735
- }
12736
-
12737
- # ═══════════════════════════════════════════════════════════════════════════════
12738
- # US-AUTO-046 Phase 1: dedicated Alert Loop helpers (loop-safe, pure bash)
12739
- # ═══════════════════════════════════════════════════════════════════════════════
12740
- # These consume the existing $_LOOP_ALERT file — until now a write-only dumb file
12741
- # that every loop appends to but nobody reads. The Alert Loop turns it into a
12742
- # real consumer: parse → dedup (1h per category) → notify (error always) →
12743
- # log → rotate. They are NOT yet wired into any runner or launchd plist — that
12744
- # is Phase 2 (wired by hand). Each is unit-tested in
12745
- # tests/unit/roll_loop_alert_loop.bats with _notify stubbed. Do not delete or
12746
- # inline.
12747
- #
12748
- # State lives under project-local .roll/state/ (shared with the CI Loop):
12749
- # alert-log.jsonl append-only NDJSON, one line per consumed alert
12750
- # $_LOOP_ALERT.prev is the rotated copy (kept for debugging).
12751
- #
12752
- # Line format ($_LOOP_ALERT) — new tagged format, old format read-compatible:
12753
- # [2026-05-26T10:00:00] [error] [TYPE:ci-real-failure] CI failed: run #123
12754
- # [2026-05-26T10:00:00] some legacy message → level=warn category=legacy
12755
-
12756
- # _alert_parse_file [file]
12757
- # Parse each non-empty line of $_LOOP_ALERT (or <file>) into a TAB-separated
12758
- # record `ts<TAB>level<TAB>category<TAB>message`, one per output line. The
12759
- # leading `[ts]` is extracted when present; optional `[level]` and
12760
- # `[TYPE:category]` tags follow. Untagged (legacy) lines default to
12761
- # level=warn, category=legacy, with the whole remainder as the message.
12762
- # Markdown headers / ack footers (lines starting with `#` or `**`) are skipped.
12763
- # Echoes nothing for a missing/empty file. Loop-safe (returns 0).
12764
- _alert_parse_file() {
12765
- local file="${1:-$_LOOP_ALERT}"
12766
- [ -n "$file" ] && [ -f "$file" ] || return 0
12767
-
12768
- awk '
12769
- {
12770
- line=$0
12771
- # skip blank lines and markdown chrome (headers, ack footers)
12772
- if (line ~ /^[ \t]*$/) next
12773
- if (line ~ /^[ \t]*#/) next
12774
- if (line ~ /^[ \t]*\*\*/) next
12775
-
12776
- ts=""; level=""; category=""
12777
-
12778
- # leading [timestamp]
12779
- if (match(line, /^\[[^]]*\]/)) {
12780
- ts=substr(line, RSTART+1, RLENGTH-2)
12781
- line=substr(line, RSTART+RLENGTH)
12782
- sub(/^[ \t]+/, "", line)
12783
- }
12784
- # optional [level] (error|warn|info)
12785
- if (match(line, /^\[(error|warn|info)\]/)) {
12786
- level=substr(line, RSTART+1, RLENGTH-2)
12787
- line=substr(line, RSTART+RLENGTH)
12788
- sub(/^[ \t]+/, "", line)
12789
- }
12790
- # optional [TYPE:category]
12791
- if (match(line, /^\[TYPE:[^]]*\]/)) {
12792
- category=substr(line, RSTART+6, RLENGTH-7)
12793
- line=substr(line, RSTART+RLENGTH)
12794
- sub(/^[ \t]+/, "", line)
12795
- }
12796
-
12797
- # legacy "ALERT:" prefix on the remaining message — strip the keyword
12798
- sub(/^ALERT:[ \t]*/, "", line)
12799
-
12800
- if (level=="") level="warn"
12801
- if (category=="") category="legacy"
12802
-
12803
- printf "%s\t%s\t%s\t%s\n", ts, level, category, line
12804
- }
12805
- ' "$file"
12806
- return 0
12807
- }
12808
-
12809
- # _alert_log_file
12810
- # Echo path to .roll/state/alert-log.jsonl (creating the dir). Reuses the
12811
- # CI Loop's _ci_state_dir so both loops share one project-local state dir.
12812
- _alert_log_file() {
12813
- local dir; dir=$(_ci_state_dir)
12814
12249
  echo "${dir}/alert-log.jsonl"
12815
12250
  }
12816
12251
 
12817
- # _alert_should_notify <category> <level>
12818
- # Decide whether an alert should fire a notification.
12819
- # error → always true (immediate, never throttled)
12820
- # warn | info → true unless a same-category alert was already notified
12821
- # within the last hour (rate-limit / dedup)
12822
- # The 1h window is read from alert-log.jsonl (notified=1 entries only).
12823
- # Echoes "true" / "false".
12824
- _alert_should_notify() {
12825
- local category="$1" level="$2"
12826
- [ "$level" = "error" ] && { echo "true"; return 0; }
12827
-
12828
- local file; file=$(_alert_log_file)
12829
- [ -f "$file" ] || { echo "true"; return 0; }
12830
-
12831
- local now; now=$(date -u +%s)
12832
- # Most recent notified=1 entry for this category → its recorded_at epoch.
12833
- local last
12834
- last=$(grep -F "\"category\":\"${category}\"" "$file" 2>/dev/null \
12835
- | grep -F '"notified":1' \
12836
- | tail -1 \
12837
- | sed -n 's/.*"recorded_at":"\([^"]*\)".*/\1/p')
12838
- [ -n "$last" ] || { echo "true"; return 0; }
12839
-
12840
- local last_epoch; last_epoch=$(_ci_iso_to_epoch "$last")
12841
- [ -n "$last_epoch" ] || { echo "true"; return 0; }
12842
-
12843
- # Within 1h (3600s) → throttle (false); otherwise allow.
12844
- if [ "$((now - last_epoch))" -lt 3600 ] 2>/dev/null; then
12845
- echo "false"
12846
- else
12847
- echo "true"
12848
- fi
12849
- return 0
12850
- }
12851
-
12852
- # _alert_write_log <ts> <level> <category> <message> <notified>
12853
- # Append one NDJSON record to alert-log.jsonl. <notified> is the literal
12854
- # string "true"/"false" (or 1/0) and is normalized to 1/0. recorded_at is the
12855
- # consumption time (UTC), distinct from the alert's own <ts>. Quotes in the
12856
- # message are escaped so the line stays valid JSON. Loop-safe (returns 0).
12857
- _alert_write_log() {
12858
- local ts="$1" level="$2" category="$3" message="$4" notified="$5"
12859
- local file; file=$(_alert_log_file)
12860
-
12861
- local n=0
12862
- case "$notified" in true|1) n=1 ;; esac
12863
-
12864
- # Escape backslashes then double-quotes for JSON string safety.
12865
- local esc
12866
- esc=$(printf '%s' "$message" | sed 's/\\/\\\\/g; s/"/\\"/g')
12867
-
12868
- printf '{"ts":"%s","level":"%s","category":"%s","message":"%s","notified":%s,"recorded_at":"%s"}\n' \
12869
- "$ts" "$level" "$category" "$esc" "$n" \
12870
- "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$file"
12871
- return 0
12872
- }
12873
-
12874
- # _alert_rotate [file]
12875
- # Snapshot $_LOOP_ALERT (or <file>) to <file>.prev and truncate it in place.
12876
- # Idempotent: a missing source is a no-op (the .prev from a prior run is
12877
- # left untouched). Loop-safe (returns 0).
12878
- #
12879
- # US-AUTO-046 (kimi peer-review Q2): copy+truncate instead of mv. `mv` swaps
12880
- # the inode at the path, so a producer loop (main/pr/ci) that opened its `>>`
12881
- # fd *before* the rotation but writes *after* it would land in `.prev` and be
12882
- # silently lost. Copying keeps the original inode at the path; the subsequent
12883
- # `:>` truncates that same inode, so any concurrent appender's fd still points
12884
- # at the live alert file and its write is read on the next 1-min tick.
12885
- _alert_rotate() {
12886
- local file="${1:-$_LOOP_ALERT}"
12887
- [ -n "$file" ] || return 0
12888
- if [ -f "$file" ]; then
12889
- cat "$file" > "${file}.prev" 2>/dev/null || true
12890
- : > "$file"
12891
- fi
12892
- return 0
12893
- }
12894
-
12895
- # _alert_dispatch [file]
12896
- # Main consumer entry point. Parse $_LOOP_ALERT → for each alert decide
12897
- # notify → fire _notify + record to alert-log.jsonl → rotate the file.
12898
- # A missing/empty alert file is a no-op (no rotate, no log). Loop-safe.
12899
- _alert_dispatch() {
12900
- local file="${1:-$_LOOP_ALERT}"
12901
- [ -n "$file" ] && [ -f "$file" ] || { _loop_write_tick "alert" "idle" "no_file"; return 0; }
12902
- # Empty file → nothing to consume, leave it in place.
12903
- [ -s "$file" ] || { _loop_write_tick "alert" "idle" "empty_file"; return 0; }
12904
-
12905
- local parsed; parsed=$(_alert_parse_file "$file")
12906
- [ -n "$parsed" ] || { _alert_rotate "$file"; _loop_write_tick "alert" "idle" "no_parsed"; return 0; }
12907
-
12908
- local line ts level category message notify
12909
- local _oifs="$IFS"
12910
- IFS='
12911
- '
12912
- for line in $parsed; do
12913
- IFS="$_oifs"
12914
- ts=$(printf '%s' "$line" | cut -f1)
12915
- level=$(printf '%s' "$line" | cut -f2)
12916
- category=$(printf '%s' "$line" | cut -f3)
12917
- message=$(printf '%s' "$line" | cut -f4-)
12918
-
12919
- notify=$(_alert_should_notify "$category" "$level")
12920
- if [ "$notify" = "true" ]; then
12921
- _notify "roll alert: ${level}" "${message}" || true
12922
- _alert_write_log "$ts" "$level" "$category" "$message" "true"
12923
- else
12924
- _alert_write_log "$ts" "$level" "$category" "$message" "false"
12925
- fi
12926
- IFS='
12927
- '
12928
- done
12929
- IFS="$_oifs"
12930
-
12931
- _alert_rotate "$file"
12932
- _loop_write_tick "alert" "acted" "dispatch_done"
12933
- return 0
12934
- }
12935
-
12936
12252
  # FIX-070: flip a story row in the main repo's .roll/backlog.md between
12937
12253
  # 📋 Todo and 🔨 In Progress. The cycle worktree is gitignored at .roll/,
12938
12254
  # so editing the worktree copy + committing leaves no trace in git — and
@@ -14461,7 +13777,7 @@ _loop_monitor() {
14461
13777
  # Services status (three services on macOS, single on Linux)
14462
13778
  echo -e "$(msg loop.services ${BOLD} ${NC} ${CYAN} ${agent})"
14463
13779
  if [[ "$(uname)" == "Darwin" ]]; then
14464
- local active_start active_end dream_hour dream_minute brief_hour brief_minute
13780
+ local active_start active_end dream_hour dream_minute
14465
13781
  local _aw; _aw=$(_loop_read_active_window "$project_path")
14466
13782
  active_start="${_aw%% *}"; active_end="${_aw##* }"
14467
13783
  # US-LOOP-013: use schedule spec for display
@@ -14471,17 +13787,16 @@ _loop_monitor() {
14471
13787
  loop_offset="${loop_spec##* }"
14472
13788
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
14473
13789
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
14474
- brief_hour=$(_config_read_int "loop_brief_hour" "9")
14475
- brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
14476
13790
 
14477
- local loop_sched dream_sched brief_sched
13791
+ local loop_sched dream_sched pr_sched
14478
13792
  loop_sched=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
14479
13793
  loop_sched="${loop_sched} active ${active_start}:00–${active_end}:00"
14480
13794
  dream_sched=$(printf "%02d:%02d" "$dream_hour" "$dream_minute")
14481
- brief_sched=$(printf "%02d:%02d" "$brief_hour" "$brief_minute")
13795
+ # FIX-195: pr is a 5-min PR Loop (StartInterval=300); brief was retired.
13796
+ pr_sched="every 5m"
14482
13797
 
14483
- local svcs=("loop" "dream" "brief")
14484
- local scheds=("$loop_sched" "$dream_sched" "$brief_sched")
13798
+ local svcs=("loop" "dream" "pr")
13799
+ local scheds=("$loop_sched" "$dream_sched" "$pr_sched")
14485
13800
  for i in "${!svcs[@]}"; do
14486
13801
  local svc="${svcs[$i]}" schedule="${scheds[$i]}"
14487
13802
  local state; state=$(_launchd_svc_state "$svc" "$project_path")
@@ -15571,7 +14886,7 @@ _legacy_home() {
15571
14886
  else
15572
14887
  crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}" && loop_state="enabled"
15573
14888
  fi
15574
- local active_start active_end dream_hour dream_minute brief_hour brief_minute
14889
+ local active_start active_end dream_hour dream_minute
15575
14890
  local _aw; _aw=$(_loop_read_active_window "$project_path")
15576
14891
  active_start="${_aw%% *}"; active_end="${_aw##* }"
15577
14892
  # US-LOOP-013: use schedule spec for display
@@ -15581,8 +14896,6 @@ _legacy_home() {
15581
14896
  loop_offset="${loop_spec##* }"
15582
14897
  dream_hour=$(_config_read_int "loop_dream_hour" "3")
15583
14898
  dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
15584
- brief_hour=$(_config_read_int "loop_brief_hour" "9")
15585
- brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
15586
14899
 
15587
14900
  local loop_badge loop_sched
15588
14901
  loop_sched=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
@@ -15712,8 +15025,9 @@ _legacy_home() {
15712
15025
  # ── ⑥ Schedules & Last Brief ──────────────────────────────────────────────
15713
15026
  printf " ${BOLD}⏰ Schedules & Last Brief${NC}\n"
15714
15027
  local loop_sched_short; loop_sched_short=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
15715
- printf " %s · dream %02d:%02d · brief %02d:%02d\n" \
15716
- "$loop_sched_short" "$dream_hour" "$dream_minute" "$brief_hour" "$brief_minute"
15028
+ # FIX-195: brief loop retired schedule line shows loop + dream only.
15029
+ printf " %s · dream %02d:%02d\n" \
15030
+ "$loop_sched_short" "$dream_hour" "$dream_minute"
15717
15031
  local latest_brief; latest_brief=$(ls .roll/briefs/*.md 2>/dev/null | sort | tail -1 || true)
15718
15032
  if [[ -n "$latest_brief" ]]; then
15719
15033
  local mod_time now age summary
@@ -15909,11 +15223,10 @@ main() {
15909
15223
  test) cmd_test "$@" ;;
15910
15224
  prices) cmd_prices "$@" ;;
15911
15225
  changelog) cmd_changelog "$@" ;;
15226
+ consistency) cmd_consistency "$@" ;;
15912
15227
  config) cmd_config "$@" ;;
15913
15228
  _loop_render_exit_summary) _loop_render_exit_summary "$@" ;;
15914
15229
  _loop_pr_inbox) _loop_pr_inbox "$@" ;;
15915
- _ci_scan) _ci_scan "$@" ;;
15916
- _alert_dispatch) _alert_dispatch "$@" ;;
15917
15230
  version|--version|-v) echo "roll v${VERSION}" ;;
15918
15231
  help|--help|-h) _help "$@" ;;
15919
15232
  "") [[ -f ".roll/backlog.md" ]] && _home || { _help; _show_changelog; } ;;