@seanyao/roll 2.603.1 → 2.604.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/CHANGELOG.md +40 -11
  2. package/bin/roll +84 -757
  3. package/lib/changelog_audit.py +149 -0
  4. package/lib/changelog_generate.py +41 -23
  5. package/lib/consistency_check.py +409 -0
  6. package/lib/i18n/consistency.sh +8 -0
  7. package/lib/prices/snapshot-2026-05-22.json +1 -7
  8. package/lib/prices/snapshot-2026-05-23-deepseek.json +0 -2
  9. package/lib/prices/snapshot-2026-06-02-kimi.json +0 -1
  10. package/lib/prices_fetcher.py +1 -20
  11. package/lib/roll-loop-status.py +1 -1
  12. package/package.json +1 -1
  13. package/lib/__pycache__/changelog_generate.cpython-314.pyc +0 -0
  14. package/lib/__pycache__/github_sync.cpython-314.pyc +0 -0
  15. package/lib/__pycache__/loop-fmt.cpython-314.pyc +0 -0
  16. package/lib/__pycache__/loop_result_eval.cpython-314.pyc +0 -0
  17. package/lib/__pycache__/loop_unstick.cpython-314.pyc +0 -0
  18. package/lib/__pycache__/model_prices.cpython-314.pyc +0 -0
  19. package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
  20. package/lib/__pycache__/roll-home.cpython-314.pyc +0 -0
  21. package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
  22. package/lib/__pycache__/roll_git.cpython-314.pyc +0 -0
  23. package/lib/__pycache__/roll_render.cpython-314.pyc +0 -0
  24. package/lib/__pycache__/slides-render.cpython-314.pyc +0 -0
  25. package/lib/agent_usage/__pycache__/__init__.cpython-314.pyc +0 -0
  26. package/lib/agent_usage/__pycache__/gemini.cpython-314.pyc +0 -0
  27. package/lib/agent_usage/__pycache__/kimi.cpython-314.pyc +0 -0
  28. package/lib/agent_usage/__pycache__/openai.cpython-314.pyc +0 -0
  29. package/lib/agent_usage/__pycache__/pi.cpython-314.pyc +0 -0
  30. package/lib/agent_usage/__pycache__/pi_emit.cpython-314.pyc +0 -0
  31. package/lib/agent_usage/__pycache__/qwen.cpython-314.pyc +0 -0
package/bin/roll CHANGED
@@ -4,7 +4,7 @@ set -euo pipefail
4
4
  # Roll — AI Agent Convention Manager
5
5
  # Single source of truth for how all AI coding agents behave.
6
6
 
7
- VERSION="2.603.1"
7
+ VERSION="2.604.1"
8
8
  ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
9
9
  ROLL_CONFIG="${ROLL_HOME}/config.yaml"
10
10
  ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
@@ -4046,6 +4046,11 @@ _peer_call() {
4046
4046
  _watchdog_pid=$!
4047
4047
  wait "$_peer_pid" 2>/dev/null || _peer_exit=$?
4048
4048
  # Cancel watchdog if agent finished on time.
4049
+ # FIX-181: kill children (sleep) first so they cannot outlive the
4050
+ # watchdog and later hit a reused PID, then kill the watchdog itself.
4051
+ if command -v pkill >/dev/null 2>&1; then
4052
+ pkill -P "$_watchdog_pid" 2>/dev/null || true
4053
+ fi
4049
4054
  kill "$_watchdog_pid" 2>/dev/null || true
4050
4055
  wait "$_watchdog_pid" 2>/dev/null || true
4051
4056
  output="$(cat "$_out" 2>/dev/null || true)"
@@ -5658,7 +5663,7 @@ cmd_changelog() {
5658
5663
  esac
5659
5664
  done
5660
5665
  local raw
5661
- raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" "${pyargs[@]}") || return 1
5666
+ raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" ${pyargs[@]+"${pyargs[@]}"}) || return 1
5662
5667
  if [ "$is_json" = 1 ]; then printf '%s\n' "$raw"; return 0; fi
5663
5668
  local final="$raw"
5664
5669
  if [ "$want_ai" = 1 ]; then
@@ -5697,6 +5702,34 @@ EOF
5697
5702
  esac
5698
5703
  }
5699
5704
 
5705
+ # ─── roll consistency check — unified consistency orchestrator (US-CONSIST-001) ──
5706
+ cmd_consistency() {
5707
+ local subcmd="${1:-check}"
5708
+ shift || true
5709
+ case "$subcmd" in
5710
+ check)
5711
+ python3 "${ROLL_PKG_DIR}/lib/consistency_check.py" "$@"
5712
+ ;;
5713
+ --help|-h|help)
5714
+ cat <<EOF
5715
+ Usage: roll consistency <subcommand>
5716
+
5717
+ check [--json] [--project-dir DIR] 逐维度跑一致性检查
5718
+ Run checks across five dimensions (code, docs, i18n, tests, site)
5719
+ and produce a structured pass/gap report.
5720
+
5721
+ roll consistency check # human-readable report
5722
+ roll consistency check --json # machine-readable JSON
5723
+ EOF
5724
+ ;;
5725
+ *)
5726
+ err "$(msg consistency.unknown_sub "$subcmd")"
5727
+ err "Try: roll consistency check"
5728
+ return 1
5729
+ ;;
5730
+ esac
5731
+ }
5732
+
5700
5733
  # ─── roll config — unified read/list/set for loop schedule keys (US-LOOP-033) ──
5701
5734
  #
5702
5735
  # One interactive entry point so users don't have to remember whether a key
@@ -6192,14 +6225,14 @@ cmd_review_pr() {
6192
6225
 
6193
6226
  local slug; slug=$(_gh_repo_slug) || { err "Not a GitHub repo — review-pr requires GitHub remote"; return 1; }
6194
6227
 
6195
- local pr_json
6196
- pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body,diff 2>&1) \
6228
+ local pr_json diff
6229
+ pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body 2>&1) \
6197
6230
  || { err "gh pr view failed: ${pr_json}"; return 1; }
6231
+ diff=$(gh -R "$slug" pr diff "$pr_number" 2>/dev/null) || true
6198
6232
 
6199
6233
  local title body diff
6200
6234
  title=$(echo "$pr_json" | jq -r '.title // ""')
6201
6235
  body=$(echo "$pr_json" | jq -r '.body // ""')
6202
- diff=$(echo "$pr_json" | jq -r '.diff // ""')
6203
6236
 
6204
6237
  if echo "$body" | grep -qF '[skip-ai-review]'; then
6205
6238
  gh -R "$slug" pr review "$pr_number" --approve -b "Auto-approved: [skip-ai-review] detected" 2>/dev/null || true
@@ -8301,96 +8334,6 @@ PRRUNNER
8301
8334
  chmod +x "$script_path"
8302
8335
  }
8303
8336
 
8304
- # _write_ci_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
8305
- # US-AUTO-045 Phase 2: the script the com.roll.ci.<slug> launchd plist runs
8306
- # every 5 min. Mirrors _write_pr_loop_runner_script — lightweight (no agent,
8307
- # no tmux): portable PATH, a single-flight re-entry lock (pid+ts, 15-min
8308
- # staleness so a crashed pass self-heals next tick), then drives the _ci_scan
8309
- # orchestrator via the `roll _ci_scan` dispatch.
8310
- _write_ci_loop_runner_script() {
8311
- local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
8312
- mkdir -p "$(dirname "$script_path")"
8313
- local lock="${project_path}/.roll/loop/.ci-loop.lock"
8314
- cat > "$script_path" << CIRUNNER
8315
- #!/bin/bash -l
8316
- set -o pipefail
8317
- # Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
8318
- for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
8319
- case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
8320
- done
8321
- export PATH
8322
- # Single-flight re-entry guard: one CI-loop pass at a time. 5-min cadence;
8323
- # 15-min (900s) staleness so a crashed/hung pass self-heals on the next tick.
8324
- LOCK="${lock}"
8325
- mkdir -p "\$(dirname "\$LOCK")"
8326
- if [ -f "\$LOCK" ]; then
8327
- _pp=""; _pt=""
8328
- IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
8329
- _now=\$(date -u +%s)
8330
- if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 900 ]; then
8331
- exit 0
8332
- fi
8333
- rm -f "\$LOCK"
8334
- fi
8335
- printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
8336
- trap 'rm -f "\$LOCK"' EXIT
8337
- cd "${project_path}" || exit 0
8338
- bash "${roll_bin}" _ci_scan >> "${log_path}" 2>&1 || true
8339
- CIRUNNER
8340
- chmod +x "$script_path"
8341
- }
8342
-
8343
- # _write_alert_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
8344
- # US-AUTO-046 Phase 2: the script the com.roll.alert.<slug> launchd plist runs
8345
- # every 1 min. Mirrors _write_ci_loop_runner_script — lightweight (no agent,
8346
- # no tmux): portable PATH, a single-flight re-entry lock (pid+ts), then drives
8347
- # the Phase-1 _alert_dispatch consumer via the `roll _alert_dispatch` dispatch.
8348
- # _alert_dispatch reads $_LOOP_ALERT, parses + notifies + records to
8349
- # alert-log.jsonl, then rotates the file. Staleness is 180s (3 ticks at the
8350
- # 1-min cadence) so a crashed/hung pass self-heals quickly.
8351
- _write_alert_loop_runner_script() {
8352
- local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
8353
- mkdir -p "$(dirname "$script_path")"
8354
- local lock="${project_path}/.roll/loop/.alert-loop.lock"
8355
- local slug; slug=$(_project_slug "${project_path}")
8356
- cat > "$script_path" << ALERTRUNNER
8357
- #!/bin/bash -l
8358
- set -o pipefail
8359
- # Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
8360
- for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
8361
- case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
8362
- done
8363
- export PATH
8364
- # Single-flight re-entry guard: one alert-loop pass at a time. 1-min cadence;
8365
- # 180s staleness so a crashed/hung pass self-heals within a few ticks.
8366
- LOCK="${lock}"
8367
- mkdir -p "\$(dirname "\$LOCK")"
8368
- if [ -f "\$LOCK" ]; then
8369
- _pp=""; _pt=""
8370
- IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
8371
- _now=\$(date -u +%s)
8372
- if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 180 ]; then
8373
- exit 0
8374
- fi
8375
- rm -f "\$LOCK"
8376
- fi
8377
- printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
8378
- trap 'rm -f "\$LOCK"' EXIT
8379
- cd "${project_path}" || exit 0
8380
- # FIX-171: bake the project-local runtime dir directly; do not rely on
8381
- # _loop_runtime_dir which may fail to resolve in fresh shells. Set
8382
- # _LOOP_ALERT so the dispatched roll reads the project-local ALERT file,
8383
- # but do not override an externally-supplied value (test sandboxes).
8384
- _LOOP_RT_DIR="${project_path}/.roll/loop"
8385
- if [ -d "\$_LOOP_RT_DIR" ]; then
8386
- : "\${_LOOP_ALERT:=\${_LOOP_RT_DIR}/ALERT-${slug}.md}"
8387
- export _LOOP_ALERT
8388
- fi
8389
- bash "${roll_bin}" _alert_dispatch >> "${log_path}" 2>&1 || true
8390
- ALERTRUNNER
8391
- chmod +x "$script_path"
8392
- }
8393
-
8394
8337
  # Like _write_runner_script but prepends an active window guard.
8395
8338
  # Silently exits when current hour is outside [active_start, active_end).
8396
8339
  # When tmux is available, wraps the inner command in a detached tmux session
@@ -9733,15 +9676,11 @@ _install_launchd_plists() {
9733
9676
 
9734
9677
  # US-AUTO-044: "pr" is the 4th service — a 5-min PR Loop (period=5, empty hour
9735
9678
  # → StartInterval=300). No skill (it drives _loop_pr_inbox, not an agent).
9736
- # US-AUTO-045: "ci" is the 5th service — a 5-min CI Loop (period=5, empty hour
9737
- # → StartInterval=300). No skill (it drives _ci_scan, not an agent).
9738
- # US-AUTO-046: "alert" is the 6th service — a 1-min Alert Loop (period=1, empty
9739
- # hour → StartInterval=60). No skill (it drives _alert_dispatch, not an agent).
9740
- local services=("loop" "dream" "brief" "pr" "ci" "alert")
9741
- local skill_names=("roll-loop" "roll-.dream" "roll-brief" "" "" "")
9742
- local periods=("$loop_period" "60" "60" "5" "5" "1")
9743
- local offsets=("$loop_offset" "$dream_minute" "$brief_minute" "0" "0" "0")
9744
- local hours=("" "$dream_hour" "$brief_hour" "" "" "")
9679
+ local services=("loop" "dream" "pr")
9680
+ local skill_names=("roll-loop" "roll-.dream" "")
9681
+ local periods=("$loop_period" "60" "5")
9682
+ local offsets=("$loop_offset" "$dream_minute" "0")
9683
+ local hours=("" "$dream_hour" "")
9745
9684
 
9746
9685
  local updated=0
9747
9686
  local slug; slug=$(_project_slug "$project_path")
@@ -9774,22 +9713,8 @@ _install_launchd_plists() {
9774
9713
  local pr_log="${project_path}/.roll/loop/pr.log"
9775
9714
  mkdir -p "${project_path}/.roll/loop"
9776
9715
  _write_pr_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$pr_log"
9777
- elif [[ "$svc" == "ci" ]]; then
9778
- # US-AUTO-045 Phase 2: lightweight CI Loop runner — drives _ci_scan every
9779
- # 5 min (no agent, no tmux). Records run timing, auto-reruns transient
9780
- # failures, and surfaces flaky / degradation stories.
9781
- local ci_log="${project_path}/.roll/loop/ci.log"
9782
- mkdir -p "${project_path}/.roll/loop"
9783
- _write_ci_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$ci_log"
9784
- elif [[ "$svc" == "alert" ]]; then
9785
- # US-AUTO-046 Phase 2: lightweight Alert Loop runner — drives _alert_dispatch
9786
- # every 1 min (no agent, no tmux). Consumes _LOOP_ALERT: parse → notify →
9787
- # record to alert-log.jsonl → rotate the file.
9788
- local alert_log="${project_path}/.roll/loop/alert.log"
9789
- mkdir -p "${project_path}/.roll/loop"
9790
- _write_alert_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$alert_log"
9791
9716
  else
9792
- # IDEA-051: dream/brief cron logs are project-local, mirroring loop (FIX-139).
9717
+ # dream cron log is project-local, mirroring loop (FIX-139).
9793
9718
  local log="${project_path}/.roll/${svc}/cron.log"
9794
9719
  mkdir -p "${project_path}/.roll/${svc}"
9795
9720
  _write_runner_script "$runner" "$project_path" "cd \"${project_path}\" && ${cmd}" "$log"
@@ -9987,7 +9912,7 @@ _loop_on() {
9987
9912
  # does not disturb the overrides DB.
9988
9913
  local uid; uid=$(id -u)
9989
9914
  local all_loaded=true
9990
- for svc in loop dream brief pr ci alert; do
9915
+ for svc in loop dream pr; do
9991
9916
  local label; label=$(_launchd_label "$svc" "$project_path")
9992
9917
  local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
9993
9918
  if ! _launchd_is_loaded "$label"; then
@@ -10054,7 +9979,7 @@ _loop_off() {
10054
9979
  if [[ "$(uname)" == "Darwin" ]]; then
10055
9980
  local any_loaded=false
10056
9981
  local _skip_off; _launchd_should_skip_registry && _skip_off=1 || _skip_off=0
10057
- for svc in loop dream brief pr ci alert; do
9982
+ for svc in loop dream pr; do
10058
9983
  local label; label=$(_launchd_label "$svc" "$project_path")
10059
9984
  if _launchd_is_loaded "$label"; then
10060
9985
  any_loaded=true
@@ -10069,7 +9994,7 @@ _loop_off() {
10069
9994
  fi
10070
9995
  local slug; slug=$(_project_slug "$project_path")
10071
9996
  local uid; uid=$(id -u)
10072
- for svc in loop dream brief pr ci alert; do
9997
+ for svc in loop dream pr; do
10073
9998
  rm -f "${_SHARED_ROOT}/${svc}/run-${slug}.sh"
10074
9999
  # FIX-081: reverse the FIX-059 auto-bootstrap guard. `_install_launchd_plists`
10075
10000
  # writes `launchctl disable gui/<UID>/<label>` for every brand-new plist
@@ -10405,7 +10330,7 @@ _legacy_loop_status() {
10405
10330
  echo ""
10406
10331
  if [[ "$(uname)" == "Darwin" ]]; then
10407
10332
  echo -e " Services Agent: ${CYAN}${agent}${NC}"
10408
- for svc in loop dream brief pr ci alert; do
10333
+ for svc in loop dream pr; do
10409
10334
  local state; state=$(_launchd_svc_state "$svc" "$project_path")
10410
10335
  if [[ "$svc" == "loop" ]] && $_is_paused; then
10411
10336
  local _paused_at; _paused_at=$(grep '^paused_at:' "$_LOOP_STATE" 2>/dev/null | awk '{print $2}' | tr -d '"')
@@ -10419,7 +10344,7 @@ _legacy_loop_status() {
10419
10344
  echo -e " ${YELLOW}loop ⏸ paused${NC}${_dur} run: roll loop resume"
10420
10345
  else
10421
10346
  local _tick_age=""
10422
- case "$svc" in pr|ci|alert)
10347
+ case "$svc" in pr)
10423
10348
  _tick_age=$(_loop_tick_age "$svc")
10424
10349
  [ -n "$_tick_age" ] && _tick_age=" tick ${_tick_age}"
10425
10350
  esac
@@ -11601,7 +11526,7 @@ _loop_pr_heal_self() {
11601
11526
 
11602
11527
  local agent; agent="$(_project_agent 2>/dev/null)"; agent="${agent:-claude}"
11603
11528
 
11604
- ( echo "$BASHPID" > "$lock"
11529
+ ( echo "${BASHPID:-$$}" > "$lock"
11605
11530
  _loop_pr_do_heal "$num" "$head_ref" "$slug" "$agent" >/dev/null 2>&1
11606
11531
  rm -f "$lock"
11607
11532
  ) &
@@ -11828,54 +11753,25 @@ _loop_is_roll_meta_story() {
11828
11753
 
11829
11754
  # _loop_pr_classify <head_ref> <human_review_state> <ci_state> <mergeable_state>
11830
11755
  # Prints one of:
11831
- # loop_self
11832
- # blocked_human_request_changes
11833
- # blocked_human_approved
11834
- # stale
11835
- # eligible
11836
- # Exit 0 always — callers parse the printed token.
11756
+ # ci_red — CI failed → heal
11757
+ # stale — needs rebase / conflicting / behind
11758
+ # ready — CI green + clean → merge
11759
+ # Human review intentionally irrelevant — CI is the only gate.
11837
11760
  _loop_pr_classify() {
11838
11761
  local head_ref="${1:-}"
11839
11762
  local human_review="${2:-}"
11840
11763
  local ci_state="${3:-}"
11841
11764
  local mergeable="${4:-}"
11842
11765
 
11843
- case "$head_ref" in
11844
- loop/*)
11845
- # US-LOOP-049: loop/* PRs with CI failure get their own classification
11846
- # so _loop_pr_inbox can route them to the PR hot-fix path.
11847
- if [[ "$ci_state" == "failure" ]]; then
11848
- echo "loop_self_ci_red"; return 0
11849
- fi
11850
- echo "loop_self"; return 0
11851
- ;;
11852
- claude/*)
11853
- # Claude-agent-authored PRs are loop-owned for autonomous merge/rebase
11854
- # once green — same treatment as loop/* — so they close within a
11855
- # PR-loop tick instead of waiting on a human or a GHA bot review.
11856
- # CI-red claude/* PRs are deliberately NOT routed to background heal
11857
- # (no agent re-spawn); they fall through to the stale/eligible paths
11858
- # below so a human decides what to do with a failing run.
11859
- if [[ "$ci_state" != "failure" ]]; then
11860
- echo "loop_self"; return 0
11861
- fi
11862
- ;;
11863
- esac
11864
-
11865
- case "$human_review" in
11866
- CHANGES_REQUESTED) echo "blocked_human_request_changes"; return 0 ;;
11867
- APPROVED) echo "blocked_human_approved"; return 0 ;;
11766
+ case "$mergeable" in
11767
+ BEHIND|DIRTY|CONFLICTING) echo "stale"; return 0 ;;
11868
11768
  esac
11869
11769
 
11870
- # CONFLICTING is the GraphQL `mergeable` enum; DIRTY/BEHIND are
11871
- # `mergeStateStatus` values (_loop_pr_inbox feeds the latter). Accept both
11872
- # spellings so a conflicting/out-of-date PR is reliably routed to rebase.
11873
- if [ "$ci_state" = "failure" ] || [ "$mergeable" = "CONFLICTING" ] || [ "$mergeable" = "DIRTY" ] || [ "$mergeable" = "BEHIND" ]; then
11874
- echo "stale"
11875
- return 0
11770
+ if [ "$ci_state" = "failure" ]; then
11771
+ echo "ci_red"; return 0
11876
11772
  fi
11877
11773
 
11878
- echo "eligible"
11774
+ echo "ready"
11879
11775
  }
11880
11776
 
11881
11777
  # _loop_pr_rebase_circuit <pr_number>
@@ -12013,6 +11909,9 @@ _loop_pr_rebase_stale() {
12013
11909
  fi
12014
11910
 
12015
11911
  git fetch origin "$head_ref" 2>/dev/null || return 0
11912
+ # Reset local tracking branch to the freshly-fetched remote state
11913
+ # before rebasing, otherwise force-push destroys commits pushed by others.
11914
+ git checkout -B "$head_ref" "origin/$head_ref" 2>/dev/null || return 0
12016
11915
 
12017
11916
  # FIX-159: save original branch so we can restore it unconditionally
12018
11917
  local _orig
@@ -12135,44 +12034,29 @@ _loop_pr_inbox() {
12135
12034
  verdict=$(_loop_pr_classify "$head_ref" "$human_review" "$ci_state" "$mergeable")
12136
12035
 
12137
12036
  case "$verdict" in
12138
- loop_self)
12139
- # Green self-PR: merge when clean, else rebase onto main first. A
12140
- # loop/* or claude/* PR that fell BEHIND or now CONFLICTS with main can
12141
- # never auto-merge until rebased — eager-merge alone would leave it
12142
- # stuck open forever. Rebase is circuit-gated (≥3 attempts/24h → ALERT)
12143
- # and merges on a later tick once the rebased head is green + clean.
12144
- case "$mergeable" in
12145
- BEHIND|DIRTY|CONFLICTING)
12146
- if _loop_pr_rebase_circuit "$num"; then
12147
- _loop_pr_rebase_stale "$num" "$head_ref" || true
12148
- fi
12149
- ;;
12150
- *)
12151
- _loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
12152
- ;;
12153
- esac
12154
- ;;
12155
- loop_self_ci_red)
12156
- # US-LOOP-062a: a red loop/* PR (classified by US-LOOP-049) is now
12157
- # background-healed: bounded retries via heal budget + dynamic agent,
12158
- # falling back to the deduped [TYPE:loop-pr-ci-red] ALERT (FIX-158's
12159
- # surfacing) when heal is disabled/exhausted. Re-wires US-LOOP-050.
12037
+ ci_red)
12160
12038
  _loop_pr_heal_self "$num" "$head_ref" "$slug" || true
12161
12039
  ;;
12162
- blocked_human_request_changes)
12163
- : # skip — last human review requested changes; wait for the author
12164
- ;;
12165
- blocked_human_approved)
12166
- # US-LOOP-062b: human approved — merge directly when green + mergeable
12167
- # (don't wait for repo auto-merge, which may be off).
12168
- _loop_pr_merge_approved "$num" "$ci_state" "$mergeable" "$slug" || true
12169
- ;;
12170
12040
  stale)
12171
12041
  _loop_pr_rebase_circuit "$num" || true
12172
- _loop_pr_rebase_stale "$num" "$head_ref" || true
12042
+ if _loop_pr_rebase_stale "$num" "$head_ref" || true; then
12043
+ # Re-fetch PR state after rebase — if now clean, merge immediately.
12044
+ local _re_view
12045
+ _re_view=$(gh -R "$slug" pr view "$num" --json mergeStateStatus,statusCheckRollup 2>/dev/null) || true
12046
+ if [ -n "$_re_view" ]; then
12047
+ local _re_ci _re_mb
12048
+ _re_ci=$(echo "$_re_view" | jq -r '
12049
+ if (.statusCheckRollup | length) == 0 then ""
12050
+ elif any(.statusCheckRollup[]?; .conclusion == "FAILURE") then "failure"
12051
+ elif all(.statusCheckRollup[]?; .conclusion == "SUCCESS" or .conclusion == "SKIPPED") then "success"
12052
+ else "pending" end' 2>/dev/null)
12053
+ _re_mb=$(echo "$_re_view" | jq -r '.mergeStateStatus // ""' 2>/dev/null)
12054
+ _loop_pr_merge_self_eager "$num" "$_re_ci" "$_re_mb" "$slug"
12055
+ fi
12056
+ fi
12173
12057
  ;;
12174
- eligible)
12175
- _loop_pr_review_external "$num" || true
12058
+ ready)
12059
+ _loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
12176
12060
  ;;
12177
12061
  esac
12178
12062
 
@@ -12370,569 +12254,13 @@ _loop_pr_route() {
12370
12254
  return 0
12371
12255
  }
12372
12256
 
12373
- # US-AUTO-045 Phase 1: dedicated CI Loop helpers (loop-safe pure additions).
12374
- #
12375
- # These six helpers collect CI timing data, classify failures, auto-rerun
12376
- # transient flakes, and surface flaky / degradation signals as backlog
12377
- # entries. They are NOT yet wired into any runner or launchd plist — that is
12378
- # Phase 2 (wired by hand). Each is unit-tested in
12379
- # tests/unit/roll_loop_ci_loop.bats with gh stubbed. Do not delete or inline.
12380
- #
12381
- # State lives under project-local .roll/state/:
12382
- # ci-timing.jsonl append-only NDJSON, one line per recorded CI run
12383
- # ci-rerun-state.yaml minimal YAML: rerun attempt count per run_id
12384
- # _LOOP_ALERT is the existing shared alert file (real failures, rerun limits).
12385
-
12386
- # _ci_state_dir
12387
- # Echo the project-local CI state directory, creating it if needed.
12388
- # Resolves relative to the current working dir's .roll/ (tests cd into a
12389
- # sandbox; the live loop runner cds into the project root).
12390
- _ci_state_dir() {
12257
+ # _alert_log_file echo path to alert-log.jsonl (used by `roll alert log` CLI).
12258
+ _alert_log_file() {
12391
12259
  local dir=".roll/state"
12392
12260
  mkdir -p "$dir" 2>/dev/null || true
12393
- echo "$dir"
12394
- }
12395
-
12396
- # _ci_record_timing <run_json>
12397
- # Parse one `gh run list --json ...` object and append a flat NDJSON line to
12398
- # ci-timing.jsonl. Idempotent: a run_id already present in the file is
12399
- # skipped. Duration is computed from createdAt → updatedAt (gh exposes no
12400
- # native duration field). Returns 0 always (loop-safe).
12401
- _ci_record_timing() {
12402
- local json="$1"
12403
- [ -n "$json" ] || return 0
12404
-
12405
- local run_id workflow conclusion status created updated
12406
- run_id=$(echo "$json" | jq -r '.databaseId // ""' 2>/dev/null)
12407
- [ -n "$run_id" ] || return 0
12408
-
12409
- local dir; dir=$(_ci_state_dir)
12410
- local file="${dir}/ci-timing.jsonl"
12411
-
12412
- # Idempotency: skip if this run_id is already recorded with a non-empty
12413
- # conclusion. If the existing record has an empty conclusion and the new
12414
- # data has a conclusion, update in-place so in-progress runs are completed.
12415
- if [ -f "$file" ] && grep -q "\"run_id\":${run_id}," "$file" 2>/dev/null; then
12416
- local existing_conclusion new_conclusion
12417
- existing_conclusion=$(grep "\"run_id\":${run_id}," "$file" 2>/dev/null | jq -r '.conclusion // ""' 2>/dev/null)
12418
- new_conclusion=$(echo "$json" | jq -r '.conclusion // ""' 2>/dev/null)
12419
- if [ -n "$existing_conclusion" ] || [ -z "$new_conclusion" ]; then
12420
- return 0
12421
- fi
12422
- # Remove the stale line so the new record can be appended below.
12423
- local tmpfile="${file}.tmp.$$"
12424
- grep -v "\"run_id\":${run_id}," "$file" > "$tmpfile" 2>/dev/null || true
12425
- mv "$tmpfile" "$file"
12426
- fi
12427
-
12428
- workflow=$(echo "$json" | jq -r '.workflowName // .name // ""' 2>/dev/null)
12429
- conclusion=$(echo "$json" | jq -r '.conclusion // ""' 2>/dev/null)
12430
- status=$(echo "$json" | jq -r '.status // ""' 2>/dev/null)
12431
- created=$(echo "$json" | jq -r '.createdAt // ""' 2>/dev/null)
12432
- updated=$(echo "$json" | jq -r '.updatedAt // ""' 2>/dev/null)
12433
-
12434
- # Duration in seconds from ISO-8601 timestamps; 0 if either is missing or
12435
- # unparseable. `date -j` (BSD) and `date -d` (GNU) differ — try both.
12436
- local dur=0 c_epoch u_epoch
12437
- if [ -n "$created" ] && [ -n "$updated" ]; then
12438
- c_epoch=$(_ci_iso_to_epoch "$created")
12439
- u_epoch=$(_ci_iso_to_epoch "$updated")
12440
- if [ -n "$c_epoch" ] && [ -n "$u_epoch" ] && [ "$u_epoch" -ge "$c_epoch" ] 2>/dev/null; then
12441
- dur=$((u_epoch - c_epoch))
12442
- fi
12443
- fi
12444
-
12445
- printf '{"run_id":%s,"workflow":"%s","conclusion":"%s","status":"%s","duration_sec":%s,"recorded_at":"%s"}\n' \
12446
- "$run_id" "$workflow" "$conclusion" "$status" "$dur" \
12447
- "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$file"
12448
- return 0
12449
- }
12450
-
12451
- # _ci_iso_to_epoch <iso8601>
12452
- # Convert an ISO-8601 UTC timestamp (2026-05-30T10:00:00Z) to epoch seconds.
12453
- # Echoes nothing on failure. Handles both BSD (macOS) and GNU date.
12454
- _ci_iso_to_epoch() {
12455
- local iso="$1"
12456
- [ -n "$iso" ] || return 0
12457
- local e
12458
- # GNU date
12459
- e=$(date -u -d "$iso" +%s 2>/dev/null) && { echo "$e"; return 0; }
12460
- # BSD date (strip trailing Z, parse explicit format)
12461
- local trimmed="${iso%Z}"
12462
- e=$(date -u -j -f "%Y-%m-%dT%H:%M:%S" "$trimmed" +%s 2>/dev/null) && { echo "$e"; return 0; }
12463
- return 0
12464
- }
12465
-
12466
- # _ci_classify_failure <run_id>
12467
- # Inspect `gh run view <id> --log-failed` and classify the failure as
12468
- # "transient" (infra flake: network, timeout, runner death) or "real"
12469
- # (genuine test/build failure). Echoes "transient" or "real".
12470
- # Empty / unavailable logs default to "real" (fail safe — don't auto-rerun
12471
- # something we can't read).
12472
- _ci_classify_failure() {
12473
- local run_id="$1"
12474
- [ -n "$run_id" ] || { echo "real"; return 0; }
12475
- local slug; _gh_resolve slug 2>/dev/null || slug=""
12476
-
12477
- local log
12478
- if [ -n "$slug" ]; then
12479
- log=$(gh -R "$slug" run view "$run_id" --log-failed 2>/dev/null)
12480
- else
12481
- log=$(gh run view "$run_id" --log-failed 2>/dev/null)
12482
- fi
12483
-
12484
- # Transient signatures: network/infra failures that a rerun typically clears.
12485
- if echo "$log" | grep -qiE 'ETIMEDOUT|ECONNRESET|ENOTFOUND|EAI_AGAIN|shutdown signal|runner.*(error|lost|terminated)|The runner has received a shutdown|503 Service|connection reset|TLS handshake|i/o timeout|could not resolve host'; then
12486
- echo "transient"
12487
- return 0
12488
- fi
12489
- echo "real"
12490
- return 0
12491
- }
12492
-
12493
- # _ci_rerun_state_file
12494
- # Echo path to ci-rerun-state.yaml (creating the dir).
12495
- _ci_rerun_state_file() {
12496
- local dir; dir=$(_ci_state_dir)
12497
- echo "${dir}/ci-rerun-state.yaml"
12498
- }
12499
-
12500
- # _ci_rerun_attempts <run_id>
12501
- # Echo the recorded rerun attempt count for <run_id> (0 if none).
12502
- _ci_rerun_attempts() {
12503
- local run_id="$1"
12504
- local file; file=$(_ci_rerun_state_file)
12505
- [ -f "$file" ] || { echo 0; return 0; }
12506
- local n
12507
- n=$(awk -v key="\"${run_id}\":" '$1 == key { print $2 }' "$file" 2>/dev/null | head -1)
12508
- case "$n" in
12509
- ''|*[!0-9]*) echo 0 ;;
12510
- *) echo "$n" ;;
12511
- esac
12512
- }
12513
-
12514
- # _ci_rerun_state_write <run_id> <attempts>
12515
- # Set the attempt count for <run_id> in ci-rerun-state.yaml. Minimal YAML
12516
- # writer (we own the schema): one `"<run_id>": <n>` line per run.
12517
- _ci_rerun_state_write() {
12518
- local run_id="$1" attempts="$2"
12519
- local file; file=$(_ci_rerun_state_file)
12520
- [ -f "$file" ] || : > "$file"
12521
- local tmp; tmp=$(mktemp)
12522
- awk -v key="\"${run_id}\":" -v val="$attempts" '
12523
- $1 == key { print key " " val; found=1; next }
12524
- { print }
12525
- END { if (!found) print key " " val }
12526
- ' "$file" > "$tmp" && mv "$tmp" "$file"
12527
- }
12528
-
12529
- # _ci_rerun_transient <run_id>
12530
- # Auto-rerun a transient CI failure, capped at 2 attempts. attempt<2 →
12531
- # `gh run rerun`; attempt>=2 → write an error ALERT. Echoes the action taken
12532
- # ("rerun" / "limit"). Loop-safe (returns 0).
12533
- _ci_rerun_transient() {
12534
- local run_id="$1"
12535
- [ -n "$run_id" ] || return 0
12536
- local slug; _gh_resolve slug 2>/dev/null || slug=""
12537
-
12538
- local attempts; attempts=$(_ci_rerun_attempts "$run_id")
12539
- if [ "$attempts" -ge 2 ]; then
12540
- local alert="$_LOOP_ALERT"
12541
- mkdir -p "$(dirname "$alert")" 2>/dev/null || true
12542
- printf '[%s] [error] [TYPE:ci-rerun-limit] CI rerun reached limit: run #%s (%s attempts)\n' \
12543
- "$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$run_id" "$attempts" >> "$alert"
12544
- echo "limit"
12545
- return 0
12546
- fi
12547
-
12548
- if [ -n "$slug" ]; then
12549
- gh -R "$slug" run rerun "$run_id" >/dev/null 2>&1 || true
12550
- else
12551
- gh run rerun "$run_id" >/dev/null 2>&1 || true
12552
- fi
12553
- _ci_rerun_state_write "$run_id" "$((attempts + 1))"
12554
- echo "rerun"
12555
- return 0
12556
- }
12557
-
12558
- # _ci_open_story <type> <title>
12559
- # Append a FIX or US row to .roll/backlog.md's `| ID | Description | Status |`
12560
- # table. Idempotent: if a 📋 Todo row with the same title already exists, skip
12561
- # (echo "skip"). New IDs auto-increment from the max existing <TYPE>-NNN.
12562
- # Echoes the new ID on success, "skip" if already queued.
12563
- _ci_open_story() {
12564
- local type="$1" title="$2"
12565
- [ -n "$type" ] && [ -n "$title" ] || return 0
12566
-
12567
- # Resolve the backlog file (project-local).
12568
- local backlog=".roll/backlog.md"
12569
- [ -f "$backlog" ] || { echo "skip"; return 0; }
12570
-
12571
- # Idempotency: same title already queued as Todo → skip.
12572
- if grep -F "$title" "$backlog" 2>/dev/null | grep -q '📋 Todo'; then
12573
- echo "skip"
12574
- return 0
12575
- fi
12576
-
12577
- # Auto-increment: find the max existing <TYPE>-NNN id.
12578
- local prefix max next
12579
- prefix=$(echo "$type" | tr '[:lower:]' '[:upper:]')
12580
- max=$(grep -oE "${prefix}-[0-9]+" "$backlog" 2>/dev/null \
12581
- | sed "s/${prefix}-//" \
12582
- | sort -n | tail -1)
12583
- case "$max" in ''|*[!0-9]*) max=0 ;; esac
12584
- # 10# prefix forces base-10: a zero-padded id like 008/009 would otherwise be
12585
- # parsed as octal and either misnumber (010→8) or error ("value too great").
12586
- next=$((10#$max + 1))
12587
- local id
12588
- id=$(printf '%s-%03d' "$prefix" "$next")
12589
-
12590
- printf '| %s | %s | 📋 Todo |\n' "$id" "$title" >> "$backlog"
12591
- echo "$id"
12592
- return 0
12593
- }
12594
-
12595
- # _ci_detect_flaky
12596
- # Scan the last 20 ci-timing.jsonl lines, group by workflow, and flag any
12597
- # workflow whose recent runs have a 20%–80% failure rate (2..8 failures of
12598
- # the last 10) as flaky — opening a FIX story. Returns 0 (loop-safe).
12599
- _ci_detect_flaky() {
12600
- local dir; dir=$(_ci_state_dir)
12601
- local file="${dir}/ci-timing.jsonl"
12602
- [ -f "$file" ] || return 0
12603
-
12604
- # Per workflow: count total + failures over the most recent 10 records.
12605
- # awk reads last 20 lines (tail), keeps last 10 per workflow. Output is
12606
- # collected into a variable (not piped to `while`) so an empty result or
12607
- # an intermediate nonzero exit cannot trip a caller's ERR trap.
12608
- local flaky_wfs
12609
- flaky_wfs=$(tail -n 20 "$file" 2>/dev/null | awk '
12610
- {
12611
- # crude field extraction from flat JSON line
12612
- wf=""; concl="";
12613
- if (match($0, /"workflow":"[^"]*"/)) { wf=substr($0,RSTART+12,RLENGTH-13) }
12614
- if (match($0, /"conclusion":"[^"]*"/)) { concl=substr($0,RSTART+14,RLENGTH-15) }
12615
- if (wf=="") next
12616
- order[wf]=order[wf]" "NR
12617
- val[wf"|"NR]=concl
12618
- }
12619
- END {
12620
- for (wf in order) {
12621
- n=split(order[wf], idx, " ")
12622
- # keep most recent 10
12623
- start=1; if (n-10 > 0) start=n-9
12624
- total=0; fail=0
12625
- for (i=start;i<=n;i++) {
12626
- if (idx[i]=="") continue
12627
- total++
12628
- c=val[wf"|"idx[i]]
12629
- if (c=="failure" || c=="timed_out" || c=="cancelled") fail++
12630
- }
12631
- if (total>=4 && fail>=2 && fail<=8 && fail*100 <= total*80 && fail*100 >= total*20) {
12632
- print wf
12633
- }
12634
- }
12635
- }
12636
- ' || true)
12637
-
12638
- local wf
12639
- for wf in $flaky_wfs; do
12640
- [ -n "$wf" ] && _ci_open_story FIX "flaky: ${wf}" >/dev/null || true
12641
- done
12642
- return 0
12643
- }
12644
-
12645
- # _ci_detect_degradation
12646
- # Scan the last 20 ci-timing.jsonl lines, compute mean duration per workflow,
12647
- # and open a US story when a workflow crosses its threshold:
12648
- # unit* > 300s (5 min)
12649
- # integration* > 900s (15 min)
12650
- # Returns 0 (loop-safe).
12651
- _ci_detect_degradation() {
12652
- local dir; dir=$(_ci_state_dir)
12653
- local file="${dir}/ci-timing.jsonl"
12654
- [ -f "$file" ] || return 0
12655
-
12656
- local degraded
12657
- degraded=$(tail -n 20 "$file" 2>/dev/null | awk '
12658
- {
12659
- wf=""; dur=0;
12660
- if (match($0, /"workflow":"[^"]*"/)) { wf=substr($0,RSTART+12,RLENGTH-13) }
12661
- if (match($0, /"duration_sec":[0-9]+/)) { dur=substr($0,RSTART+15,RLENGTH-15)+0 }
12662
- if (wf=="") next
12663
- sum[wf]+=dur; cnt[wf]++
12664
- }
12665
- END {
12666
- for (wf in sum) {
12667
- if (cnt[wf]==0) continue
12668
- avg=sum[wf]/cnt[wf]
12669
- lc=tolower(wf)
12670
- if (index(lc,"unit")>0 && avg>300) { print wf "\t" int(avg) }
12671
- else if (index(lc,"integration")>0 && avg>900) { print wf "\t" int(avg) }
12672
- }
12673
- }
12674
- ' || true)
12675
-
12676
- local line wf avg
12677
- # IFS=newline so each "wf<TAB>avg" record is one iteration; field-split on TAB.
12678
- local _oifs="$IFS"
12679
- IFS='
12680
- '
12681
- for line in $degraded; do
12682
- IFS="$_oifs"
12683
- wf=$(printf '%s' "$line" | cut -f1)
12684
- avg=$(printf '%s' "$line" | cut -f2)
12685
- [ -n "$wf" ] && _ci_open_story US "CI degradation: ${wf} avg ${avg}s exceeds threshold" >/dev/null || true
12686
- IFS='
12687
- '
12688
- done
12689
- IFS="$_oifs"
12690
- return 0
12691
- }
12692
-
12693
- # _ci_scan
12694
- # US-AUTO-045 Phase 2 orchestrator: the entry the CI Loop runner drives every
12695
- # 5 min. Lists recent `main`-branch CI runs, records each run's timing, and on
12696
- # a `failure` conclusion classifies it — auto-rerunning transient infra
12697
- # flakes. After the loop it runs the flaky + degradation detectors over the
12698
- # accumulated history. Lenient on gh unavailability (missing / failed list →
12699
- # return 0) so the service never errors out a tick.
12700
- _ci_scan() {
12701
- local slug; _gh_resolve slug 2>/dev/null || { _loop_write_tick "ci" "idle" "gh_unavailable"; return 0; }
12702
-
12703
- local runs_json
12704
- runs_json=$(gh -R "$slug" run list --branch main \
12705
- --json databaseId,workflowName,name,conclusion,status,createdAt,updatedAt \
12706
- 2>/dev/null) || { _loop_write_tick "ci" "idle" "gh_error"; return 0; }
12707
- [ -n "$runs_json" ] || { _loop_write_tick "ci" "idle" "empty_response"; return 0; }
12708
-
12709
- # An empty list ("[]") still falls through to the detectors below: they run
12710
- # over accumulated history, not just this tick's runs.
12711
- local count; count=$(echo "$runs_json" | jq 'length' 2>/dev/null || echo 0)
12712
- case "$count" in ''|*[!0-9]*) count=0 ;; esac
12713
-
12714
- local i=0
12715
- while [ "$i" -lt "$count" ]; do
12716
- local run_json conclusion run_id
12717
- run_json=$(echo "$runs_json" | jq -c ".[$i]" 2>/dev/null)
12718
- _ci_record_timing "$run_json"
12719
-
12720
- conclusion=$(echo "$run_json" | jq -r '.conclusion // ""' 2>/dev/null)
12721
- if [ "$conclusion" = "failure" ]; then
12722
- run_id=$(echo "$run_json" | jq -r '.databaseId // ""' 2>/dev/null)
12723
- if [ -n "$run_id" ]; then
12724
- local kind; kind=$(_ci_classify_failure "$run_id")
12725
- [ "$kind" = "transient" ] && _ci_rerun_transient "$run_id" >/dev/null
12726
- fi
12727
- fi
12728
- i=$((i + 1))
12729
- done
12730
-
12731
- _ci_detect_flaky
12732
- _ci_detect_degradation
12733
- _loop_write_tick "ci" "acted" "scan_done"
12734
- return 0
12735
- }
12736
-
12737
- # ═══════════════════════════════════════════════════════════════════════════════
12738
- # US-AUTO-046 Phase 1: dedicated Alert Loop helpers (loop-safe, pure bash)
12739
- # ═══════════════════════════════════════════════════════════════════════════════
12740
- # These consume the existing $_LOOP_ALERT file — until now a write-only dumb file
12741
- # that every loop appends to but nobody reads. The Alert Loop turns it into a
12742
- # real consumer: parse → dedup (1h per category) → notify (error always) →
12743
- # log → rotate. They are NOT yet wired into any runner or launchd plist — that
12744
- # is Phase 2 (wired by hand). Each is unit-tested in
12745
- # tests/unit/roll_loop_alert_loop.bats with _notify stubbed. Do not delete or
12746
- # inline.
12747
- #
12748
- # State lives under project-local .roll/state/ (shared with the CI Loop):
12749
- # alert-log.jsonl append-only NDJSON, one line per consumed alert
12750
- # $_LOOP_ALERT.prev is the rotated copy (kept for debugging).
12751
- #
12752
- # Line format ($_LOOP_ALERT) — new tagged format, old format read-compatible:
12753
- # [2026-05-26T10:00:00] [error] [TYPE:ci-real-failure] CI failed: run #123
12754
- # [2026-05-26T10:00:00] some legacy message → level=warn category=legacy
12755
-
12756
- # _alert_parse_file [file]
12757
- # Parse each non-empty line of $_LOOP_ALERT (or <file>) into a TAB-separated
12758
- # record `ts<TAB>level<TAB>category<TAB>message`, one per output line. The
12759
- # leading `[ts]` is extracted when present; optional `[level]` and
12760
- # `[TYPE:category]` tags follow. Untagged (legacy) lines default to
12761
- # level=warn, category=legacy, with the whole remainder as the message.
12762
- # Markdown headers / ack footers (lines starting with `#` or `**`) are skipped.
12763
- # Echoes nothing for a missing/empty file. Loop-safe (returns 0).
12764
- _alert_parse_file() {
12765
- local file="${1:-$_LOOP_ALERT}"
12766
- [ -n "$file" ] && [ -f "$file" ] || return 0
12767
-
12768
- awk '
12769
- {
12770
- line=$0
12771
- # skip blank lines and markdown chrome (headers, ack footers)
12772
- if (line ~ /^[ \t]*$/) next
12773
- if (line ~ /^[ \t]*#/) next
12774
- if (line ~ /^[ \t]*\*\*/) next
12775
-
12776
- ts=""; level=""; category=""
12777
-
12778
- # leading [timestamp]
12779
- if (match(line, /^\[[^]]*\]/)) {
12780
- ts=substr(line, RSTART+1, RLENGTH-2)
12781
- line=substr(line, RSTART+RLENGTH)
12782
- sub(/^[ \t]+/, "", line)
12783
- }
12784
- # optional [level] (error|warn|info)
12785
- if (match(line, /^\[(error|warn|info)\]/)) {
12786
- level=substr(line, RSTART+1, RLENGTH-2)
12787
- line=substr(line, RSTART+RLENGTH)
12788
- sub(/^[ \t]+/, "", line)
12789
- }
12790
- # optional [TYPE:category]
12791
- if (match(line, /^\[TYPE:[^]]*\]/)) {
12792
- category=substr(line, RSTART+6, RLENGTH-7)
12793
- line=substr(line, RSTART+RLENGTH)
12794
- sub(/^[ \t]+/, "", line)
12795
- }
12796
-
12797
- # legacy "ALERT:" prefix on the remaining message — strip the keyword
12798
- sub(/^ALERT:[ \t]*/, "", line)
12799
-
12800
- if (level=="") level="warn"
12801
- if (category=="") category="legacy"
12802
-
12803
- printf "%s\t%s\t%s\t%s\n", ts, level, category, line
12804
- }
12805
- ' "$file"
12806
- return 0
12807
- }
12808
-
12809
- # _alert_log_file
12810
- # Echo path to .roll/state/alert-log.jsonl (creating the dir). Reuses the
12811
- # CI Loop's _ci_state_dir so both loops share one project-local state dir.
12812
- _alert_log_file() {
12813
- local dir; dir=$(_ci_state_dir)
12814
12261
  echo "${dir}/alert-log.jsonl"
12815
12262
  }
12816
12263
 
12817
- # _alert_should_notify <category> <level>
12818
- # Decide whether an alert should fire a notification.
12819
- # error → always true (immediate, never throttled)
12820
- # warn | info → true unless a same-category alert was already notified
12821
- # within the last hour (rate-limit / dedup)
12822
- # The 1h window is read from alert-log.jsonl (notified=1 entries only).
12823
- # Echoes "true" / "false".
12824
- _alert_should_notify() {
12825
- local category="$1" level="$2"
12826
- [ "$level" = "error" ] && { echo "true"; return 0; }
12827
-
12828
- local file; file=$(_alert_log_file)
12829
- [ -f "$file" ] || { echo "true"; return 0; }
12830
-
12831
- local now; now=$(date -u +%s)
12832
- # Most recent notified=1 entry for this category → its recorded_at epoch.
12833
- local last
12834
- last=$(grep -F "\"category\":\"${category}\"" "$file" 2>/dev/null \
12835
- | grep -F '"notified":1' \
12836
- | tail -1 \
12837
- | sed -n 's/.*"recorded_at":"\([^"]*\)".*/\1/p')
12838
- [ -n "$last" ] || { echo "true"; return 0; }
12839
-
12840
- local last_epoch; last_epoch=$(_ci_iso_to_epoch "$last")
12841
- [ -n "$last_epoch" ] || { echo "true"; return 0; }
12842
-
12843
- # Within 1h (3600s) → throttle (false); otherwise allow.
12844
- if [ "$((now - last_epoch))" -lt 3600 ] 2>/dev/null; then
12845
- echo "false"
12846
- else
12847
- echo "true"
12848
- fi
12849
- return 0
12850
- }
12851
-
12852
- # _alert_write_log <ts> <level> <category> <message> <notified>
12853
- # Append one NDJSON record to alert-log.jsonl. <notified> is the literal
12854
- # string "true"/"false" (or 1/0) and is normalized to 1/0. recorded_at is the
12855
- # consumption time (UTC), distinct from the alert's own <ts>. Quotes in the
12856
- # message are escaped so the line stays valid JSON. Loop-safe (returns 0).
12857
- _alert_write_log() {
12858
- local ts="$1" level="$2" category="$3" message="$4" notified="$5"
12859
- local file; file=$(_alert_log_file)
12860
-
12861
- local n=0
12862
- case "$notified" in true|1) n=1 ;; esac
12863
-
12864
- # Escape backslashes then double-quotes for JSON string safety.
12865
- local esc
12866
- esc=$(printf '%s' "$message" | sed 's/\\/\\\\/g; s/"/\\"/g')
12867
-
12868
- printf '{"ts":"%s","level":"%s","category":"%s","message":"%s","notified":%s,"recorded_at":"%s"}\n' \
12869
- "$ts" "$level" "$category" "$esc" "$n" \
12870
- "$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$file"
12871
- return 0
12872
- }
12873
-
12874
- # _alert_rotate [file]
12875
- # Snapshot $_LOOP_ALERT (or <file>) to <file>.prev and truncate it in place.
12876
- # Idempotent: a missing source is a no-op (the .prev from a prior run is
12877
- # left untouched). Loop-safe (returns 0).
12878
- #
12879
- # US-AUTO-046 (kimi peer-review Q2): copy+truncate instead of mv. `mv` swaps
12880
- # the inode at the path, so a producer loop (main/pr/ci) that opened its `>>`
12881
- # fd *before* the rotation but writes *after* it would land in `.prev` and be
12882
- # silently lost. Copying keeps the original inode at the path; the subsequent
12883
- # `:>` truncates that same inode, so any concurrent appender's fd still points
12884
- # at the live alert file and its write is read on the next 1-min tick.
12885
- _alert_rotate() {
12886
- local file="${1:-$_LOOP_ALERT}"
12887
- [ -n "$file" ] || return 0
12888
- if [ -f "$file" ]; then
12889
- cat "$file" > "${file}.prev" 2>/dev/null || true
12890
- : > "$file"
12891
- fi
12892
- return 0
12893
- }
12894
-
12895
- # _alert_dispatch [file]
12896
- # Main consumer entry point. Parse $_LOOP_ALERT → for each alert decide
12897
- # notify → fire _notify + record to alert-log.jsonl → rotate the file.
12898
- # A missing/empty alert file is a no-op (no rotate, no log). Loop-safe.
12899
- _alert_dispatch() {
12900
- local file="${1:-$_LOOP_ALERT}"
12901
- [ -n "$file" ] && [ -f "$file" ] || { _loop_write_tick "alert" "idle" "no_file"; return 0; }
12902
- # Empty file → nothing to consume, leave it in place.
12903
- [ -s "$file" ] || { _loop_write_tick "alert" "idle" "empty_file"; return 0; }
12904
-
12905
- local parsed; parsed=$(_alert_parse_file "$file")
12906
- [ -n "$parsed" ] || { _alert_rotate "$file"; _loop_write_tick "alert" "idle" "no_parsed"; return 0; }
12907
-
12908
- local line ts level category message notify
12909
- local _oifs="$IFS"
12910
- IFS='
12911
- '
12912
- for line in $parsed; do
12913
- IFS="$_oifs"
12914
- ts=$(printf '%s' "$line" | cut -f1)
12915
- level=$(printf '%s' "$line" | cut -f2)
12916
- category=$(printf '%s' "$line" | cut -f3)
12917
- message=$(printf '%s' "$line" | cut -f4-)
12918
-
12919
- notify=$(_alert_should_notify "$category" "$level")
12920
- if [ "$notify" = "true" ]; then
12921
- _notify "roll alert: ${level}" "${message}" || true
12922
- _alert_write_log "$ts" "$level" "$category" "$message" "true"
12923
- else
12924
- _alert_write_log "$ts" "$level" "$category" "$message" "false"
12925
- fi
12926
- IFS='
12927
- '
12928
- done
12929
- IFS="$_oifs"
12930
-
12931
- _alert_rotate "$file"
12932
- _loop_write_tick "alert" "acted" "dispatch_done"
12933
- return 0
12934
- }
12935
-
12936
12264
  # FIX-070: flip a story row in the main repo's .roll/backlog.md between
12937
12265
  # 📋 Todo and 🔨 In Progress. The cycle worktree is gitignored at .roll/,
12938
12266
  # so editing the worktree copy + committing leaves no trace in git — and
@@ -14480,7 +13808,7 @@ _loop_monitor() {
14480
13808
  dream_sched=$(printf "%02d:%02d" "$dream_hour" "$dream_minute")
14481
13809
  brief_sched=$(printf "%02d:%02d" "$brief_hour" "$brief_minute")
14482
13810
 
14483
- local svcs=("loop" "dream" "brief")
13811
+ local svcs=("loop" "dream" "pr")
14484
13812
  local scheds=("$loop_sched" "$dream_sched" "$brief_sched")
14485
13813
  for i in "${!svcs[@]}"; do
14486
13814
  local svc="${svcs[$i]}" schedule="${scheds[$i]}"
@@ -15909,11 +15237,10 @@ main() {
15909
15237
  test) cmd_test "$@" ;;
15910
15238
  prices) cmd_prices "$@" ;;
15911
15239
  changelog) cmd_changelog "$@" ;;
15240
+ consistency) cmd_consistency "$@" ;;
15912
15241
  config) cmd_config "$@" ;;
15913
15242
  _loop_render_exit_summary) _loop_render_exit_summary "$@" ;;
15914
15243
  _loop_pr_inbox) _loop_pr_inbox "$@" ;;
15915
- _ci_scan) _ci_scan "$@" ;;
15916
- _alert_dispatch) _alert_dispatch "$@" ;;
15917
15244
  version|--version|-v) echo "roll v${VERSION}" ;;
15918
15245
  help|--help|-h) _help "$@" ;;
15919
15246
  "") [[ -f ".roll/backlog.md" ]] && _home || { _help; _show_changelog; } ;;