@seanyao/roll 2.603.1 → 2.604.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +50 -11
- package/bin/roll +113 -800
- package/lib/__pycache__/changelog_audit.cpython-314.pyc +0 -0
- package/lib/__pycache__/changelog_generate.cpython-314.pyc +0 -0
- package/lib/__pycache__/loop-fmt.cpython-314.pyc +0 -0
- package/lib/__pycache__/prices_fetcher.cpython-314.pyc +0 -0
- package/lib/__pycache__/roll-loop-status.cpython-314.pyc +0 -0
- package/lib/changelog_audit.py +149 -0
- package/lib/changelog_generate.py +41 -23
- package/lib/consistency_check.py +409 -0
- package/lib/i18n/consistency.sh +8 -0
- package/lib/i18n/loop.sh +0 -4
- package/lib/prices/snapshot-2026-05-22.json +1 -7
- package/lib/prices/snapshot-2026-05-23-deepseek.json +0 -2
- package/lib/prices/snapshot-2026-06-02-kimi.json +0 -1
- package/lib/prices_fetcher.py +1 -20
- package/lib/roll-loop-status.py +15 -7
- package/package.json +1 -1
package/bin/roll
CHANGED
|
@@ -4,7 +4,7 @@ set -euo pipefail
|
|
|
4
4
|
# Roll — AI Agent Convention Manager
|
|
5
5
|
# Single source of truth for how all AI coding agents behave.
|
|
6
6
|
|
|
7
|
-
VERSION="2.
|
|
7
|
+
VERSION="2.604.2"
|
|
8
8
|
ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
|
|
9
9
|
ROLL_CONFIG="${ROLL_HOME}/config.yaml"
|
|
10
10
|
ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
|
|
@@ -1092,8 +1092,6 @@ editor: ${EDITOR:-vim}
|
|
|
1092
1092
|
# loop_minute: 5 # omit to auto-derive from project hash
|
|
1093
1093
|
loop_dream_hour: 3
|
|
1094
1094
|
# loop_dream_minute: 10 # omit to auto-derive
|
|
1095
|
-
loop_brief_hour: 9
|
|
1096
|
-
# loop_brief_minute: 15 # omit to auto-derive
|
|
1097
1095
|
primary_agent: claude
|
|
1098
1096
|
YAML
|
|
1099
1097
|
ok "$(msg shared.created_roll_config_yaml)"
|
|
@@ -4046,6 +4044,11 @@ _peer_call() {
|
|
|
4046
4044
|
_watchdog_pid=$!
|
|
4047
4045
|
wait "$_peer_pid" 2>/dev/null || _peer_exit=$?
|
|
4048
4046
|
# Cancel watchdog if agent finished on time.
|
|
4047
|
+
# FIX-181: kill children (sleep) first so they cannot outlive the
|
|
4048
|
+
# watchdog and later hit a reused PID, then kill the watchdog itself.
|
|
4049
|
+
if command -v pkill >/dev/null 2>&1; then
|
|
4050
|
+
pkill -P "$_watchdog_pid" 2>/dev/null || true
|
|
4051
|
+
fi
|
|
4049
4052
|
kill "$_watchdog_pid" 2>/dev/null || true
|
|
4050
4053
|
wait "$_watchdog_pid" 2>/dev/null || true
|
|
4051
4054
|
output="$(cat "$_out" 2>/dev/null || true)"
|
|
@@ -5658,7 +5661,7 @@ cmd_changelog() {
|
|
|
5658
5661
|
esac
|
|
5659
5662
|
done
|
|
5660
5663
|
local raw
|
|
5661
|
-
raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" "${pyargs[@]}") || return 1
|
|
5664
|
+
raw=$(python3 "${ROLL_PKG_DIR}/lib/changelog_generate.py" ${pyargs[@]+"${pyargs[@]}"}) || return 1
|
|
5662
5665
|
if [ "$is_json" = 1 ]; then printf '%s\n' "$raw"; return 0; fi
|
|
5663
5666
|
local final="$raw"
|
|
5664
5667
|
if [ "$want_ai" = 1 ]; then
|
|
@@ -5697,6 +5700,34 @@ EOF
|
|
|
5697
5700
|
esac
|
|
5698
5701
|
}
|
|
5699
5702
|
|
|
5703
|
+
# ─── roll consistency check — unified consistency orchestrator (US-CONSIST-001) ──
|
|
5704
|
+
cmd_consistency() {
|
|
5705
|
+
local subcmd="${1:-check}"
|
|
5706
|
+
shift || true
|
|
5707
|
+
case "$subcmd" in
|
|
5708
|
+
check)
|
|
5709
|
+
python3 "${ROLL_PKG_DIR}/lib/consistency_check.py" "$@"
|
|
5710
|
+
;;
|
|
5711
|
+
--help|-h|help)
|
|
5712
|
+
cat <<EOF
|
|
5713
|
+
Usage: roll consistency <subcommand>
|
|
5714
|
+
|
|
5715
|
+
check [--json] [--project-dir DIR] 逐维度跑一致性检查
|
|
5716
|
+
Run checks across five dimensions (code, docs, i18n, tests, site)
|
|
5717
|
+
and produce a structured pass/gap report.
|
|
5718
|
+
|
|
5719
|
+
roll consistency check # human-readable report
|
|
5720
|
+
roll consistency check --json # machine-readable JSON
|
|
5721
|
+
EOF
|
|
5722
|
+
;;
|
|
5723
|
+
*)
|
|
5724
|
+
err "$(msg consistency.unknown_sub "$subcmd")"
|
|
5725
|
+
err "Try: roll consistency check"
|
|
5726
|
+
return 1
|
|
5727
|
+
;;
|
|
5728
|
+
esac
|
|
5729
|
+
}
|
|
5730
|
+
|
|
5700
5731
|
# ─── roll config — unified read/list/set for loop schedule keys (US-LOOP-033) ──
|
|
5701
5732
|
#
|
|
5702
5733
|
# One interactive entry point so users don't have to remember whether a key
|
|
@@ -5719,8 +5750,6 @@ loop_schedule.period_minutes|project|nested:loop_schedule|1|1440|60
|
|
|
5719
5750
|
loop_schedule.offset_minute|project|nested:loop_schedule|0|59|0
|
|
5720
5751
|
loop_dream_hour|global|flat|0|23|3
|
|
5721
5752
|
loop_dream_minute|global|flat|0|59|-
|
|
5722
|
-
loop_brief_hour|global|flat|0|23|9
|
|
5723
|
-
loop_brief_minute|global|flat|0|59|-
|
|
5724
5753
|
EOF
|
|
5725
5754
|
}
|
|
5726
5755
|
|
|
@@ -5859,10 +5888,10 @@ Usage: roll config <key> print current value + source
|
|
|
5859
5888
|
roll config --list list all loop schedule keys
|
|
5860
5889
|
roll config <key> <value> [--global|--project] set a value
|
|
5861
5890
|
统一调度配置
|
|
5862
|
-
Read / list / set the loop
|
|
5891
|
+
Read / list / set the loop and dream schedule keys without hand-editing
|
|
5863
5892
|
yaml. Default write scope is --project (.roll/local.yaml); --global writes
|
|
5864
5893
|
~/.roll/config.yaml.
|
|
5865
|
-
读 / 列 / 写 loop、dream
|
|
5894
|
+
读 / 列 / 写 loop、dream 调度 key,免去手工编辑 yaml。默认写 --project
|
|
5866
5895
|
(.roll/local.yaml);--global 写 ~/.roll/config.yaml。
|
|
5867
5896
|
|
|
5868
5897
|
Supported keys (range):
|
|
@@ -5872,14 +5901,11 @@ Supported keys (range):
|
|
|
5872
5901
|
loop_schedule.offset_minute 0-59 minute offset within the period
|
|
5873
5902
|
loop_dream_hour 0-23 dream daily fire hour
|
|
5874
5903
|
loop_dream_minute 0-59 dream daily fire minute
|
|
5875
|
-
loop_brief_hour 0-23 brief daily fire hour
|
|
5876
|
-
loop_brief_minute 0-59 brief daily fire minute
|
|
5877
5904
|
|
|
5878
5905
|
Compact facades (write multiple keys at once):
|
|
5879
5906
|
roll config loop-window 9-18 loop_active_start + loop_active_end
|
|
5880
5907
|
roll config loop-schedule 30/7 period_minutes + offset_minute
|
|
5881
5908
|
roll config dream-time 03:20 loop_dream_hour + loop_dream_minute
|
|
5882
|
-
roll config brief-time 09:15 loop_brief_hour + loop_brief_minute
|
|
5883
5909
|
|
|
5884
5910
|
Examples:
|
|
5885
5911
|
roll config loop_dream_hour
|
|
@@ -5979,11 +6005,12 @@ _config_loop_schedule() {
|
|
|
5979
6005
|
return 0
|
|
5980
6006
|
}
|
|
5981
6007
|
|
|
5982
|
-
# US-LOOP-035: `roll config dream-time <HH:MM>`
|
|
5983
|
-
#
|
|
5984
|
-
#
|
|
6008
|
+
# US-LOOP-035: `roll config dream-time <HH:MM>` — compact facade writing
|
|
6009
|
+
# loop_<svc>_hour + loop_<svc>_minute in one shot. With no value, prints the
|
|
6010
|
+
# current effective time + source. HH ∈ [0,23], MM ∈ [0,59].
|
|
5985
6011
|
# These keys are global-scoped, so writes land in ~/.roll/config.yaml.
|
|
5986
|
-
#
|
|
6012
|
+
# FIX-195: brief retired — svc is {dream} (the helper stays generic).
|
|
6013
|
+
# _config_daily_time <svc> <value>
|
|
5987
6014
|
_config_daily_time() {
|
|
5988
6015
|
local svc="$1" value="$2"
|
|
5989
6016
|
local hour_key="loop_${svc}_hour" min_key="loop_${svc}_minute"
|
|
@@ -6082,8 +6109,9 @@ cmd_config() {
|
|
|
6082
6109
|
[[ $_rc -eq 0 && -n "$value" ]] && _config_reload_schedule
|
|
6083
6110
|
return $_rc
|
|
6084
6111
|
;;
|
|
6085
|
-
dream-time
|
|
6086
|
-
#
|
|
6112
|
+
dream-time)
|
|
6113
|
+
# FIX-195: brief-time retired with the brief loop; dream-time is the only
|
|
6114
|
+
# daily schedule facade. The key is global-scoped (~/.roll/config.yaml).
|
|
6087
6115
|
local fscope="$scope"; [[ -z "$fscope" ]] && fscope="global"
|
|
6088
6116
|
ROLL_CFG_SCOPE="$fscope"
|
|
6089
6117
|
local _rc
|
|
@@ -6138,7 +6166,7 @@ cmd_config() {
|
|
|
6138
6166
|
fi
|
|
6139
6167
|
_config_set "$key" "$value" "$file"
|
|
6140
6168
|
ok "✓ set $key = $value in $file"
|
|
6141
|
-
# US-LOOP-036: every recognized config key is a loop/dream
|
|
6169
|
+
# US-LOOP-036: every recognized config key is a loop/dream schedule key
|
|
6142
6170
|
# (display-only keys are out of scope for this command), so a successful write
|
|
6143
6171
|
# always reloads the launchd plists.
|
|
6144
6172
|
_config_reload_schedule
|
|
@@ -6192,14 +6220,14 @@ cmd_review_pr() {
|
|
|
6192
6220
|
|
|
6193
6221
|
local slug; slug=$(_gh_repo_slug) || { err "Not a GitHub repo — review-pr requires GitHub remote"; return 1; }
|
|
6194
6222
|
|
|
6195
|
-
local pr_json
|
|
6196
|
-
pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body
|
|
6223
|
+
local pr_json diff
|
|
6224
|
+
pr_json=$(gh -R "$slug" pr view "$pr_number" --json title,body 2>&1) \
|
|
6197
6225
|
|| { err "gh pr view failed: ${pr_json}"; return 1; }
|
|
6226
|
+
diff=$(gh -R "$slug" pr diff "$pr_number" 2>/dev/null) || true
|
|
6198
6227
|
|
|
6199
6228
|
local title body diff
|
|
6200
6229
|
title=$(echo "$pr_json" | jq -r '.title // ""')
|
|
6201
6230
|
body=$(echo "$pr_json" | jq -r '.body // ""')
|
|
6202
|
-
diff=$(echo "$pr_json" | jq -r '.diff // ""')
|
|
6203
6231
|
|
|
6204
6232
|
if echo "$body" | grep -qF '[skip-ai-review]'; then
|
|
6205
6233
|
gh -R "$slug" pr review "$pr_number" --approve -b "Auto-approved: [skip-ai-review] detected" 2>/dev/null || true
|
|
@@ -8301,96 +8329,6 @@ PRRUNNER
|
|
|
8301
8329
|
chmod +x "$script_path"
|
|
8302
8330
|
}
|
|
8303
8331
|
|
|
8304
|
-
# _write_ci_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
|
|
8305
|
-
# US-AUTO-045 Phase 2: the script the com.roll.ci.<slug> launchd plist runs
|
|
8306
|
-
# every 5 min. Mirrors _write_pr_loop_runner_script — lightweight (no agent,
|
|
8307
|
-
# no tmux): portable PATH, a single-flight re-entry lock (pid+ts, 15-min
|
|
8308
|
-
# staleness so a crashed pass self-heals next tick), then drives the _ci_scan
|
|
8309
|
-
# orchestrator via the `roll _ci_scan` dispatch.
|
|
8310
|
-
_write_ci_loop_runner_script() {
|
|
8311
|
-
local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
|
|
8312
|
-
mkdir -p "$(dirname "$script_path")"
|
|
8313
|
-
local lock="${project_path}/.roll/loop/.ci-loop.lock"
|
|
8314
|
-
cat > "$script_path" << CIRUNNER
|
|
8315
|
-
#!/bin/bash -l
|
|
8316
|
-
set -o pipefail
|
|
8317
|
-
# Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
|
|
8318
|
-
for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
|
|
8319
|
-
case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
|
|
8320
|
-
done
|
|
8321
|
-
export PATH
|
|
8322
|
-
# Single-flight re-entry guard: one CI-loop pass at a time. 5-min cadence;
|
|
8323
|
-
# 15-min (900s) staleness so a crashed/hung pass self-heals on the next tick.
|
|
8324
|
-
LOCK="${lock}"
|
|
8325
|
-
mkdir -p "\$(dirname "\$LOCK")"
|
|
8326
|
-
if [ -f "\$LOCK" ]; then
|
|
8327
|
-
_pp=""; _pt=""
|
|
8328
|
-
IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
|
|
8329
|
-
_now=\$(date -u +%s)
|
|
8330
|
-
if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 900 ]; then
|
|
8331
|
-
exit 0
|
|
8332
|
-
fi
|
|
8333
|
-
rm -f "\$LOCK"
|
|
8334
|
-
fi
|
|
8335
|
-
printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
|
|
8336
|
-
trap 'rm -f "\$LOCK"' EXIT
|
|
8337
|
-
cd "${project_path}" || exit 0
|
|
8338
|
-
bash "${roll_bin}" _ci_scan >> "${log_path}" 2>&1 || true
|
|
8339
|
-
CIRUNNER
|
|
8340
|
-
chmod +x "$script_path"
|
|
8341
|
-
}
|
|
8342
|
-
|
|
8343
|
-
# _write_alert_loop_runner_script <script_path> <project_path> <roll_bin> <log_path>
|
|
8344
|
-
# US-AUTO-046 Phase 2: the script the com.roll.alert.<slug> launchd plist runs
|
|
8345
|
-
# every 1 min. Mirrors _write_ci_loop_runner_script — lightweight (no agent,
|
|
8346
|
-
# no tmux): portable PATH, a single-flight re-entry lock (pid+ts), then drives
|
|
8347
|
-
# the Phase-1 _alert_dispatch consumer via the `roll _alert_dispatch` dispatch.
|
|
8348
|
-
# _alert_dispatch reads $_LOOP_ALERT, parses + notifies + records to
|
|
8349
|
-
# alert-log.jsonl, then rotates the file. Staleness is 180s (3 ticks at the
|
|
8350
|
-
# 1-min cadence) so a crashed/hung pass self-heals quickly.
|
|
8351
|
-
_write_alert_loop_runner_script() {
|
|
8352
|
-
local script_path="$1" project_path="$2" roll_bin="$3" log_path="$4"
|
|
8353
|
-
mkdir -p "$(dirname "$script_path")"
|
|
8354
|
-
local lock="${project_path}/.roll/loop/.alert-loop.lock"
|
|
8355
|
-
local slug; slug=$(_project_slug "${project_path}")
|
|
8356
|
-
cat > "$script_path" << ALERTRUNNER
|
|
8357
|
-
#!/bin/bash -l
|
|
8358
|
-
set -o pipefail
|
|
8359
|
-
# Portable PATH: launchd delivers a bare PATH missing brew/local tools. Idempotent.
|
|
8360
|
-
for _d in /opt/homebrew/bin /usr/local/bin /opt/local/bin "\$HOME/.local/bin" "\$HOME/.kimi-code/bin"; do
|
|
8361
|
-
case ":\$PATH:" in *":\$_d:"*) ;; *) [ -d "\$_d" ] && PATH="\$_d:\$PATH" ;; esac
|
|
8362
|
-
done
|
|
8363
|
-
export PATH
|
|
8364
|
-
# Single-flight re-entry guard: one alert-loop pass at a time. 1-min cadence;
|
|
8365
|
-
# 180s staleness so a crashed/hung pass self-heals within a few ticks.
|
|
8366
|
-
LOCK="${lock}"
|
|
8367
|
-
mkdir -p "\$(dirname "\$LOCK")"
|
|
8368
|
-
if [ -f "\$LOCK" ]; then
|
|
8369
|
-
_pp=""; _pt=""
|
|
8370
|
-
IFS=: read -r _pp _pt < "\$LOCK" 2>/dev/null || true
|
|
8371
|
-
_now=\$(date -u +%s)
|
|
8372
|
-
if [ -n "\$_pp" ] && [ -n "\$_pt" ] && kill -0 "\$_pp" 2>/dev/null && [ "\$((_now - _pt))" -lt 180 ]; then
|
|
8373
|
-
exit 0
|
|
8374
|
-
fi
|
|
8375
|
-
rm -f "\$LOCK"
|
|
8376
|
-
fi
|
|
8377
|
-
printf '%s:%s\n' "\$\$" "\$(date -u +%s)" > "\$LOCK"
|
|
8378
|
-
trap 'rm -f "\$LOCK"' EXIT
|
|
8379
|
-
cd "${project_path}" || exit 0
|
|
8380
|
-
# FIX-171: bake the project-local runtime dir directly; do not rely on
|
|
8381
|
-
# _loop_runtime_dir which may fail to resolve in fresh shells. Set
|
|
8382
|
-
# _LOOP_ALERT so the dispatched roll reads the project-local ALERT file,
|
|
8383
|
-
# but do not override an externally-supplied value (test sandboxes).
|
|
8384
|
-
_LOOP_RT_DIR="${project_path}/.roll/loop"
|
|
8385
|
-
if [ -d "\$_LOOP_RT_DIR" ]; then
|
|
8386
|
-
: "\${_LOOP_ALERT:=\${_LOOP_RT_DIR}/ALERT-${slug}.md}"
|
|
8387
|
-
export _LOOP_ALERT
|
|
8388
|
-
fi
|
|
8389
|
-
bash "${roll_bin}" _alert_dispatch >> "${log_path}" 2>&1 || true
|
|
8390
|
-
ALERTRUNNER
|
|
8391
|
-
chmod +x "$script_path"
|
|
8392
|
-
}
|
|
8393
|
-
|
|
8394
8332
|
# Like _write_runner_script but prepends an active window guard.
|
|
8395
8333
|
# Silently exits when current hour is outside [active_start, active_end).
|
|
8396
8334
|
# When tmux is available, wraps the inner command in a detached tmux session
|
|
@@ -9715,9 +9653,10 @@ _install_launchd_plists() {
|
|
|
9715
9653
|
local shared="${_SHARED_ROOT}"
|
|
9716
9654
|
|
|
9717
9655
|
mkdir -p "$_LAUNCHD_DIR"
|
|
9718
|
-
|
|
9656
|
+
# FIX-194/FIX-195: brief/ci/alert loops retired — only loop/dream/pr remain.
|
|
9657
|
+
mkdir -p "${shared}/loop" "${shared}/dream" "${shared}/pr"
|
|
9719
9658
|
|
|
9720
|
-
local active_start active_end dream_hour dream_minute
|
|
9659
|
+
local active_start active_end dream_hour dream_minute loop_period loop_offset
|
|
9721
9660
|
local _aw; _aw=$(_loop_read_active_window "$project_path")
|
|
9722
9661
|
active_start="${_aw%% *}"; active_end="${_aw##* }"
|
|
9723
9662
|
# US-LOOP-012: use _loop_schedule_spec instead of raw loop_minute
|
|
@@ -9726,22 +9665,16 @@ _install_launchd_plists() {
|
|
|
9726
9665
|
loop_offset="${loop_spec##* }"
|
|
9727
9666
|
dream_hour=$(_config_read_int "loop_dream_hour" "3")
|
|
9728
9667
|
dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
|
|
9729
|
-
brief_hour=$(_config_read_int "loop_brief_hour" "9")
|
|
9730
|
-
brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
|
|
9731
9668
|
|
|
9732
9669
|
# FIX-054: terminal preference removed — runner always uses Terminal.app.
|
|
9733
9670
|
|
|
9734
9671
|
# US-AUTO-044: "pr" is the 4th service — a 5-min PR Loop (period=5, empty hour
|
|
9735
9672
|
# → StartInterval=300). No skill (it drives _loop_pr_inbox, not an agent).
|
|
9736
|
-
|
|
9737
|
-
|
|
9738
|
-
|
|
9739
|
-
|
|
9740
|
-
local
|
|
9741
|
-
local skill_names=("roll-loop" "roll-.dream" "roll-brief" "" "" "")
|
|
9742
|
-
local periods=("$loop_period" "60" "60" "5" "5" "1")
|
|
9743
|
-
local offsets=("$loop_offset" "$dream_minute" "$brief_minute" "0" "0" "0")
|
|
9744
|
-
local hours=("" "$dream_hour" "$brief_hour" "" "" "")
|
|
9673
|
+
local services=("loop" "dream" "pr")
|
|
9674
|
+
local skill_names=("roll-loop" "roll-.dream" "")
|
|
9675
|
+
local periods=("$loop_period" "60" "5")
|
|
9676
|
+
local offsets=("$loop_offset" "$dream_minute" "0")
|
|
9677
|
+
local hours=("" "$dream_hour" "")
|
|
9745
9678
|
|
|
9746
9679
|
local updated=0
|
|
9747
9680
|
local slug; slug=$(_project_slug "$project_path")
|
|
@@ -9774,22 +9707,8 @@ _install_launchd_plists() {
|
|
|
9774
9707
|
local pr_log="${project_path}/.roll/loop/pr.log"
|
|
9775
9708
|
mkdir -p "${project_path}/.roll/loop"
|
|
9776
9709
|
_write_pr_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$pr_log"
|
|
9777
|
-
elif [[ "$svc" == "ci" ]]; then
|
|
9778
|
-
# US-AUTO-045 Phase 2: lightweight CI Loop runner — drives _ci_scan every
|
|
9779
|
-
# 5 min (no agent, no tmux). Records run timing, auto-reruns transient
|
|
9780
|
-
# failures, and surfaces flaky / degradation stories.
|
|
9781
|
-
local ci_log="${project_path}/.roll/loop/ci.log"
|
|
9782
|
-
mkdir -p "${project_path}/.roll/loop"
|
|
9783
|
-
_write_ci_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$ci_log"
|
|
9784
|
-
elif [[ "$svc" == "alert" ]]; then
|
|
9785
|
-
# US-AUTO-046 Phase 2: lightweight Alert Loop runner — drives _alert_dispatch
|
|
9786
|
-
# every 1 min (no agent, no tmux). Consumes _LOOP_ALERT: parse → notify →
|
|
9787
|
-
# record to alert-log.jsonl → rotate the file.
|
|
9788
|
-
local alert_log="${project_path}/.roll/loop/alert.log"
|
|
9789
|
-
mkdir -p "${project_path}/.roll/loop"
|
|
9790
|
-
_write_alert_loop_runner_script "$runner" "$project_path" "${ROLL_PKG_DIR}/bin/roll" "$alert_log"
|
|
9791
9710
|
else
|
|
9792
|
-
#
|
|
9711
|
+
# dream cron log is project-local, mirroring loop (FIX-139).
|
|
9793
9712
|
local log="${project_path}/.roll/${svc}/cron.log"
|
|
9794
9713
|
mkdir -p "${project_path}/.roll/${svc}"
|
|
9795
9714
|
_write_runner_script "$runner" "$project_path" "cd \"${project_path}\" && ${cmd}" "$log"
|
|
@@ -9915,7 +9834,7 @@ cmd_loop() {
|
|
|
9915
9834
|
*) cat <<'HELP'
|
|
9916
9835
|
Usage: roll loop <on|off|now|test|status|monitor|runs|log|story|events|attach|mute|unmute|pause|resume|reset|gc|branches>
|
|
9917
9836
|
|
|
9918
|
-
on Install launchd scheduler (loop + dream +
|
|
9837
|
+
on Install launchd scheduler (loop + dream + pr)
|
|
9919
9838
|
off Remove launchd scheduler
|
|
9920
9839
|
now Run one cycle immediately
|
|
9921
9840
|
test Quick smoke test (tmux/popup/stream chain)
|
|
@@ -9960,7 +9879,7 @@ _loop_on() {
|
|
|
9960
9879
|
local project_path; project_path=$(pwd -P)
|
|
9961
9880
|
local agent; agent=$(_project_agent)
|
|
9962
9881
|
|
|
9963
|
-
local active_start active_end loop_minute dream_hour dream_minute
|
|
9882
|
+
local active_start active_end loop_minute dream_hour dream_minute
|
|
9964
9883
|
local _aw; _aw=$(_loop_read_active_window "$project_path")
|
|
9965
9884
|
active_start="${_aw%% *}"; active_end="${_aw##* }"
|
|
9966
9885
|
# US-LOOP-011: read schedule spec from project or global config
|
|
@@ -9975,8 +9894,6 @@ _loop_on() {
|
|
|
9975
9894
|
loop_sched_zh=$(_loop_schedule_desc "$loop_period" "$loop_offset" zh)
|
|
9976
9895
|
dream_hour=$(_config_read_int "loop_dream_hour" "3")
|
|
9977
9896
|
dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
|
|
9978
|
-
brief_hour=$(_config_read_int "loop_brief_hour" "9")
|
|
9979
|
-
brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
|
|
9980
9897
|
|
|
9981
9898
|
if [[ "$(uname)" == "Darwin" ]]; then
|
|
9982
9899
|
_install_launchd_plists "$project_path" >/dev/null
|
|
@@ -9987,7 +9904,7 @@ _loop_on() {
|
|
|
9987
9904
|
# does not disturb the overrides DB.
|
|
9988
9905
|
local uid; uid=$(id -u)
|
|
9989
9906
|
local all_loaded=true
|
|
9990
|
-
for svc in loop dream
|
|
9907
|
+
for svc in loop dream pr; do
|
|
9991
9908
|
local label; label=$(_launchd_label "$svc" "$project_path")
|
|
9992
9909
|
local plist; plist=$(_launchd_plist_path "$svc" "$project_path")
|
|
9993
9910
|
if ! _launchd_is_loaded "$label"; then
|
|
@@ -10011,7 +9928,6 @@ _loop_on() {
|
|
|
10011
9928
|
msg loop.roll_loop_s_active_02d_00 \
|
|
10012
9929
|
"$loop_sched_en" "$active_start" "$active_end" "$loop_sched_zh" "$active_start" "$active_end"
|
|
10013
9930
|
msg loop.roll_dream_daily_at_02d_02d "$dream_hour" "$dream_minute" "$dream_hour" "$dream_minute"
|
|
10014
|
-
msg loop.roll_brief_daily_at_02d_02d "$brief_hour" "$brief_minute" "$brief_hour" "$brief_minute"
|
|
10015
9931
|
echo " • Agent: ${agent} (change: roll agent use <name>)"
|
|
10016
9932
|
return 0
|
|
10017
9933
|
fi
|
|
@@ -10022,29 +9938,26 @@ _loop_on() {
|
|
|
10022
9938
|
warn "$(msg loop.loop_already_enabled_for_this_project_2)"; return 0
|
|
10023
9939
|
fi
|
|
10024
9940
|
|
|
10025
|
-
mkdir -p "${_SHARED_ROOT}/loop" "${_SHARED_ROOT}/dream"
|
|
9941
|
+
mkdir -p "${_SHARED_ROOT}/loop" "${_SHARED_ROOT}/dream"
|
|
10026
9942
|
|
|
10027
9943
|
# FIX-052: per-project cron logs so concurrent projects don't interleave.
|
|
10028
9944
|
local slug; slug=$(_project_slug "$project_path")
|
|
10029
|
-
local loop_cmd dream_cmd
|
|
9945
|
+
local loop_cmd dream_cmd
|
|
10030
9946
|
loop_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-loop/SKILL.md") >> ${_SHARED_ROOT}/loop/cron-${slug}.log 2>&1"
|
|
10031
|
-
# IDEA-051: dream
|
|
10032
|
-
mkdir -p "${project_path}/.roll/dream"
|
|
9947
|
+
# IDEA-051: dream cron log is project-local, mirroring loop (FIX-139).
|
|
9948
|
+
mkdir -p "${project_path}/.roll/dream"
|
|
10033
9949
|
dream_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-.dream/SKILL.md") >> ${project_path}/.roll/dream/cron.log 2>&1"
|
|
10034
|
-
brief_cmd="cd \"${project_path}\" && $(_agent_skill_cmd "${sd}/roll-brief/SKILL.md") >> ${project_path}/.roll/brief/cron.log 2>&1"
|
|
10035
9950
|
|
|
10036
9951
|
(
|
|
10037
9952
|
crontab -l 2>/dev/null
|
|
10038
9953
|
printf "%d * * * * %s %s:%s\n" "$loop_minute" "$loop_cmd" "$_LOOP_TAG" "$project_path"
|
|
10039
9954
|
printf "%d %d * * * %s %s:%s\n" "$dream_minute" "$dream_hour" "$dream_cmd" "$_LOOP_TAG" "$project_path"
|
|
10040
|
-
printf "%d %d * * * %s %s:%s\n" "$brief_minute" "$brief_hour" "$brief_cmd" "$_LOOP_TAG" "$project_path"
|
|
10041
9955
|
) | crontab -
|
|
10042
9956
|
|
|
10043
9957
|
ok "$(msg loop.loop_enabled_2)"
|
|
10044
9958
|
msg loop.roll_loop_s_active_02d_00_2 \
|
|
10045
9959
|
"$loop_sched_en" "$active_start" "$active_end" "$loop_sched_zh" "$active_start" "$active_end"
|
|
10046
9960
|
msg loop.roll_dream_daily_at_02d_02d_2 "$dream_hour" "$dream_minute" "$dream_hour" "$dream_minute"
|
|
10047
|
-
msg loop.roll_brief_daily_at_02d_02d_2 "$brief_hour" "$brief_minute" "$brief_hour" "$brief_minute"
|
|
10048
9961
|
echo " • Agent: ${agent} (change: roll agent use <name>)"
|
|
10049
9962
|
}
|
|
10050
9963
|
|
|
@@ -10054,7 +9967,7 @@ _loop_off() {
|
|
|
10054
9967
|
if [[ "$(uname)" == "Darwin" ]]; then
|
|
10055
9968
|
local any_loaded=false
|
|
10056
9969
|
local _skip_off; _launchd_should_skip_registry && _skip_off=1 || _skip_off=0
|
|
10057
|
-
for svc in loop dream
|
|
9970
|
+
for svc in loop dream pr; do
|
|
10058
9971
|
local label; label=$(_launchd_label "$svc" "$project_path")
|
|
10059
9972
|
if _launchd_is_loaded "$label"; then
|
|
10060
9973
|
any_loaded=true
|
|
@@ -10069,7 +9982,7 @@ _loop_off() {
|
|
|
10069
9982
|
fi
|
|
10070
9983
|
local slug; slug=$(_project_slug "$project_path")
|
|
10071
9984
|
local uid; uid=$(id -u)
|
|
10072
|
-
for svc in loop dream
|
|
9985
|
+
for svc in loop dream pr; do
|
|
10073
9986
|
rm -f "${_SHARED_ROOT}/${svc}/run-${slug}.sh"
|
|
10074
9987
|
# FIX-081: reverse the FIX-059 auto-bootstrap guard. `_install_launchd_plists`
|
|
10075
9988
|
# writes `launchctl disable gui/<UID>/<label>` for every brand-new plist
|
|
@@ -10405,7 +10318,7 @@ _legacy_loop_status() {
|
|
|
10405
10318
|
echo ""
|
|
10406
10319
|
if [[ "$(uname)" == "Darwin" ]]; then
|
|
10407
10320
|
echo -e " Services Agent: ${CYAN}${agent}${NC}"
|
|
10408
|
-
for svc in loop dream
|
|
10321
|
+
for svc in loop dream pr; do
|
|
10409
10322
|
local state; state=$(_launchd_svc_state "$svc" "$project_path")
|
|
10410
10323
|
if [[ "$svc" == "loop" ]] && $_is_paused; then
|
|
10411
10324
|
local _paused_at; _paused_at=$(grep '^paused_at:' "$_LOOP_STATE" 2>/dev/null | awk '{print $2}' | tr -d '"')
|
|
@@ -10419,7 +10332,7 @@ _legacy_loop_status() {
|
|
|
10419
10332
|
echo -e " ${YELLOW}loop ⏸ paused${NC}${_dur} run: roll loop resume"
|
|
10420
10333
|
else
|
|
10421
10334
|
local _tick_age=""
|
|
10422
|
-
case "$svc" in pr
|
|
10335
|
+
case "$svc" in pr)
|
|
10423
10336
|
_tick_age=$(_loop_tick_age "$svc")
|
|
10424
10337
|
[ -n "$_tick_age" ] && _tick_age=" tick ${_tick_age}"
|
|
10425
10338
|
esac
|
|
@@ -11601,7 +11514,7 @@ _loop_pr_heal_self() {
|
|
|
11601
11514
|
|
|
11602
11515
|
local agent; agent="$(_project_agent 2>/dev/null)"; agent="${agent:-claude}"
|
|
11603
11516
|
|
|
11604
|
-
( echo "$BASHPID" > "$lock"
|
|
11517
|
+
( echo "${BASHPID:-$$}" > "$lock"
|
|
11605
11518
|
_loop_pr_do_heal "$num" "$head_ref" "$slug" "$agent" >/dev/null 2>&1
|
|
11606
11519
|
rm -f "$lock"
|
|
11607
11520
|
) &
|
|
@@ -11828,54 +11741,25 @@ _loop_is_roll_meta_story() {
|
|
|
11828
11741
|
|
|
11829
11742
|
# _loop_pr_classify <head_ref> <human_review_state> <ci_state> <mergeable_state>
|
|
11830
11743
|
# Prints one of:
|
|
11831
|
-
#
|
|
11832
|
-
#
|
|
11833
|
-
#
|
|
11834
|
-
#
|
|
11835
|
-
# eligible
|
|
11836
|
-
# Exit 0 always — callers parse the printed token.
|
|
11744
|
+
# ci_red — CI failed → heal
|
|
11745
|
+
# stale — needs rebase / conflicting / behind
|
|
11746
|
+
# ready — CI green + clean → merge
|
|
11747
|
+
# Human review intentionally irrelevant — CI is the only gate.
|
|
11837
11748
|
_loop_pr_classify() {
|
|
11838
11749
|
local head_ref="${1:-}"
|
|
11839
11750
|
local human_review="${2:-}"
|
|
11840
11751
|
local ci_state="${3:-}"
|
|
11841
11752
|
local mergeable="${4:-}"
|
|
11842
11753
|
|
|
11843
|
-
case "$
|
|
11844
|
-
|
|
11845
|
-
# US-LOOP-049: loop/* PRs with CI failure get their own classification
|
|
11846
|
-
# so _loop_pr_inbox can route them to the PR hot-fix path.
|
|
11847
|
-
if [[ "$ci_state" == "failure" ]]; then
|
|
11848
|
-
echo "loop_self_ci_red"; return 0
|
|
11849
|
-
fi
|
|
11850
|
-
echo "loop_self"; return 0
|
|
11851
|
-
;;
|
|
11852
|
-
claude/*)
|
|
11853
|
-
# Claude-agent-authored PRs are loop-owned for autonomous merge/rebase
|
|
11854
|
-
# once green — same treatment as loop/* — so they close within a
|
|
11855
|
-
# PR-loop tick instead of waiting on a human or a GHA bot review.
|
|
11856
|
-
# CI-red claude/* PRs are deliberately NOT routed to background heal
|
|
11857
|
-
# (no agent re-spawn); they fall through to the stale/eligible paths
|
|
11858
|
-
# below so a human decides what to do with a failing run.
|
|
11859
|
-
if [[ "$ci_state" != "failure" ]]; then
|
|
11860
|
-
echo "loop_self"; return 0
|
|
11861
|
-
fi
|
|
11862
|
-
;;
|
|
11863
|
-
esac
|
|
11864
|
-
|
|
11865
|
-
case "$human_review" in
|
|
11866
|
-
CHANGES_REQUESTED) echo "blocked_human_request_changes"; return 0 ;;
|
|
11867
|
-
APPROVED) echo "blocked_human_approved"; return 0 ;;
|
|
11754
|
+
case "$mergeable" in
|
|
11755
|
+
BEHIND|DIRTY|CONFLICTING) echo "stale"; return 0 ;;
|
|
11868
11756
|
esac
|
|
11869
11757
|
|
|
11870
|
-
|
|
11871
|
-
|
|
11872
|
-
# spellings so a conflicting/out-of-date PR is reliably routed to rebase.
|
|
11873
|
-
if [ "$ci_state" = "failure" ] || [ "$mergeable" = "CONFLICTING" ] || [ "$mergeable" = "DIRTY" ] || [ "$mergeable" = "BEHIND" ]; then
|
|
11874
|
-
echo "stale"
|
|
11875
|
-
return 0
|
|
11758
|
+
if [ "$ci_state" = "failure" ]; then
|
|
11759
|
+
echo "ci_red"; return 0
|
|
11876
11760
|
fi
|
|
11877
11761
|
|
|
11878
|
-
echo "
|
|
11762
|
+
echo "ready"
|
|
11879
11763
|
}
|
|
11880
11764
|
|
|
11881
11765
|
# _loop_pr_rebase_circuit <pr_number>
|
|
@@ -12013,6 +11897,9 @@ _loop_pr_rebase_stale() {
|
|
|
12013
11897
|
fi
|
|
12014
11898
|
|
|
12015
11899
|
git fetch origin "$head_ref" 2>/dev/null || return 0
|
|
11900
|
+
# Reset local tracking branch to the freshly-fetched remote state
|
|
11901
|
+
# before rebasing, otherwise force-push destroys commits pushed by others.
|
|
11902
|
+
git checkout -B "$head_ref" "origin/$head_ref" 2>/dev/null || return 0
|
|
12016
11903
|
|
|
12017
11904
|
# FIX-159: save original branch so we can restore it unconditionally
|
|
12018
11905
|
local _orig
|
|
@@ -12135,44 +12022,29 @@ _loop_pr_inbox() {
|
|
|
12135
12022
|
verdict=$(_loop_pr_classify "$head_ref" "$human_review" "$ci_state" "$mergeable")
|
|
12136
12023
|
|
|
12137
12024
|
case "$verdict" in
|
|
12138
|
-
|
|
12139
|
-
# Green self-PR: merge when clean, else rebase onto main first. A
|
|
12140
|
-
# loop/* or claude/* PR that fell BEHIND or now CONFLICTS with main can
|
|
12141
|
-
# never auto-merge until rebased — eager-merge alone would leave it
|
|
12142
|
-
# stuck open forever. Rebase is circuit-gated (≥3 attempts/24h → ALERT)
|
|
12143
|
-
# and merges on a later tick once the rebased head is green + clean.
|
|
12144
|
-
case "$mergeable" in
|
|
12145
|
-
BEHIND|DIRTY|CONFLICTING)
|
|
12146
|
-
if _loop_pr_rebase_circuit "$num"; then
|
|
12147
|
-
_loop_pr_rebase_stale "$num" "$head_ref" || true
|
|
12148
|
-
fi
|
|
12149
|
-
;;
|
|
12150
|
-
*)
|
|
12151
|
-
_loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
|
|
12152
|
-
;;
|
|
12153
|
-
esac
|
|
12154
|
-
;;
|
|
12155
|
-
loop_self_ci_red)
|
|
12156
|
-
# US-LOOP-062a: a red loop/* PR (classified by US-LOOP-049) is now
|
|
12157
|
-
# background-healed: bounded retries via heal budget + dynamic agent,
|
|
12158
|
-
# falling back to the deduped [TYPE:loop-pr-ci-red] ALERT (FIX-158's
|
|
12159
|
-
# surfacing) when heal is disabled/exhausted. Re-wires US-LOOP-050.
|
|
12025
|
+
ci_red)
|
|
12160
12026
|
_loop_pr_heal_self "$num" "$head_ref" "$slug" || true
|
|
12161
12027
|
;;
|
|
12162
|
-
blocked_human_request_changes)
|
|
12163
|
-
: # skip — last human review requested changes; wait for the author
|
|
12164
|
-
;;
|
|
12165
|
-
blocked_human_approved)
|
|
12166
|
-
# US-LOOP-062b: human approved — merge directly when green + mergeable
|
|
12167
|
-
# (don't wait for repo auto-merge, which may be off).
|
|
12168
|
-
_loop_pr_merge_approved "$num" "$ci_state" "$mergeable" "$slug" || true
|
|
12169
|
-
;;
|
|
12170
12028
|
stale)
|
|
12171
12029
|
_loop_pr_rebase_circuit "$num" || true
|
|
12172
|
-
_loop_pr_rebase_stale "$num" "$head_ref" || true
|
|
12030
|
+
if _loop_pr_rebase_stale "$num" "$head_ref" || true; then
|
|
12031
|
+
# Re-fetch PR state after rebase — if now clean, merge immediately.
|
|
12032
|
+
local _re_view
|
|
12033
|
+
_re_view=$(gh -R "$slug" pr view "$num" --json mergeStateStatus,statusCheckRollup 2>/dev/null) || true
|
|
12034
|
+
if [ -n "$_re_view" ]; then
|
|
12035
|
+
local _re_ci _re_mb
|
|
12036
|
+
_re_ci=$(echo "$_re_view" | jq -r '
|
|
12037
|
+
if (.statusCheckRollup | length) == 0 then ""
|
|
12038
|
+
elif any(.statusCheckRollup[]?; .conclusion == "FAILURE") then "failure"
|
|
12039
|
+
elif all(.statusCheckRollup[]?; .conclusion == "SUCCESS" or .conclusion == "SKIPPED") then "success"
|
|
12040
|
+
else "pending" end' 2>/dev/null)
|
|
12041
|
+
_re_mb=$(echo "$_re_view" | jq -r '.mergeStateStatus // ""' 2>/dev/null)
|
|
12042
|
+
_loop_pr_merge_self_eager "$num" "$_re_ci" "$_re_mb" "$slug"
|
|
12043
|
+
fi
|
|
12044
|
+
fi
|
|
12173
12045
|
;;
|
|
12174
|
-
|
|
12175
|
-
|
|
12046
|
+
ready)
|
|
12047
|
+
_loop_pr_merge_self_eager "$num" "$ci_state" "$mergeable" "$slug"
|
|
12176
12048
|
;;
|
|
12177
12049
|
esac
|
|
12178
12050
|
|
|
@@ -12370,569 +12242,13 @@ _loop_pr_route() {
|
|
|
12370
12242
|
return 0
|
|
12371
12243
|
}
|
|
12372
12244
|
|
|
12373
|
-
#
|
|
12374
|
-
|
|
12375
|
-
# These six helpers collect CI timing data, classify failures, auto-rerun
|
|
12376
|
-
# transient flakes, and surface flaky / degradation signals as backlog
|
|
12377
|
-
# entries. They are NOT yet wired into any runner or launchd plist — that is
|
|
12378
|
-
# Phase 2 (wired by hand). Each is unit-tested in
|
|
12379
|
-
# tests/unit/roll_loop_ci_loop.bats with gh stubbed. Do not delete or inline.
|
|
12380
|
-
#
|
|
12381
|
-
# State lives under project-local .roll/state/:
|
|
12382
|
-
# ci-timing.jsonl append-only NDJSON, one line per recorded CI run
|
|
12383
|
-
# ci-rerun-state.yaml minimal YAML: rerun attempt count per run_id
|
|
12384
|
-
# _LOOP_ALERT is the existing shared alert file (real failures, rerun limits).
|
|
12385
|
-
|
|
12386
|
-
# _ci_state_dir
|
|
12387
|
-
# Echo the project-local CI state directory, creating it if needed.
|
|
12388
|
-
# Resolves relative to the current working dir's .roll/ (tests cd into a
|
|
12389
|
-
# sandbox; the live loop runner cds into the project root).
|
|
12390
|
-
_ci_state_dir() {
|
|
12245
|
+
# _alert_log_file — echo path to alert-log.jsonl (used by `roll alert log` CLI).
|
|
12246
|
+
_alert_log_file() {
|
|
12391
12247
|
local dir=".roll/state"
|
|
12392
12248
|
mkdir -p "$dir" 2>/dev/null || true
|
|
12393
|
-
echo "$dir"
|
|
12394
|
-
}
|
|
12395
|
-
|
|
12396
|
-
# _ci_record_timing <run_json>
|
|
12397
|
-
# Parse one `gh run list --json ...` object and append a flat NDJSON line to
|
|
12398
|
-
# ci-timing.jsonl. Idempotent: a run_id already present in the file is
|
|
12399
|
-
# skipped. Duration is computed from createdAt → updatedAt (gh exposes no
|
|
12400
|
-
# native duration field). Returns 0 always (loop-safe).
|
|
12401
|
-
_ci_record_timing() {
|
|
12402
|
-
local json="$1"
|
|
12403
|
-
[ -n "$json" ] || return 0
|
|
12404
|
-
|
|
12405
|
-
local run_id workflow conclusion status created updated
|
|
12406
|
-
run_id=$(echo "$json" | jq -r '.databaseId // ""' 2>/dev/null)
|
|
12407
|
-
[ -n "$run_id" ] || return 0
|
|
12408
|
-
|
|
12409
|
-
local dir; dir=$(_ci_state_dir)
|
|
12410
|
-
local file="${dir}/ci-timing.jsonl"
|
|
12411
|
-
|
|
12412
|
-
# Idempotency: skip if this run_id is already recorded with a non-empty
|
|
12413
|
-
# conclusion. If the existing record has an empty conclusion and the new
|
|
12414
|
-
# data has a conclusion, update in-place so in-progress runs are completed.
|
|
12415
|
-
if [ -f "$file" ] && grep -q "\"run_id\":${run_id}," "$file" 2>/dev/null; then
|
|
12416
|
-
local existing_conclusion new_conclusion
|
|
12417
|
-
existing_conclusion=$(grep "\"run_id\":${run_id}," "$file" 2>/dev/null | jq -r '.conclusion // ""' 2>/dev/null)
|
|
12418
|
-
new_conclusion=$(echo "$json" | jq -r '.conclusion // ""' 2>/dev/null)
|
|
12419
|
-
if [ -n "$existing_conclusion" ] || [ -z "$new_conclusion" ]; then
|
|
12420
|
-
return 0
|
|
12421
|
-
fi
|
|
12422
|
-
# Remove the stale line so the new record can be appended below.
|
|
12423
|
-
local tmpfile="${file}.tmp.$$"
|
|
12424
|
-
grep -v "\"run_id\":${run_id}," "$file" > "$tmpfile" 2>/dev/null || true
|
|
12425
|
-
mv "$tmpfile" "$file"
|
|
12426
|
-
fi
|
|
12427
|
-
|
|
12428
|
-
workflow=$(echo "$json" | jq -r '.workflowName // .name // ""' 2>/dev/null)
|
|
12429
|
-
conclusion=$(echo "$json" | jq -r '.conclusion // ""' 2>/dev/null)
|
|
12430
|
-
status=$(echo "$json" | jq -r '.status // ""' 2>/dev/null)
|
|
12431
|
-
created=$(echo "$json" | jq -r '.createdAt // ""' 2>/dev/null)
|
|
12432
|
-
updated=$(echo "$json" | jq -r '.updatedAt // ""' 2>/dev/null)
|
|
12433
|
-
|
|
12434
|
-
# Duration in seconds from ISO-8601 timestamps; 0 if either is missing or
|
|
12435
|
-
# unparseable. `date -j` (BSD) and `date -d` (GNU) differ — try both.
|
|
12436
|
-
local dur=0 c_epoch u_epoch
|
|
12437
|
-
if [ -n "$created" ] && [ -n "$updated" ]; then
|
|
12438
|
-
c_epoch=$(_ci_iso_to_epoch "$created")
|
|
12439
|
-
u_epoch=$(_ci_iso_to_epoch "$updated")
|
|
12440
|
-
if [ -n "$c_epoch" ] && [ -n "$u_epoch" ] && [ "$u_epoch" -ge "$c_epoch" ] 2>/dev/null; then
|
|
12441
|
-
dur=$((u_epoch - c_epoch))
|
|
12442
|
-
fi
|
|
12443
|
-
fi
|
|
12444
|
-
|
|
12445
|
-
printf '{"run_id":%s,"workflow":"%s","conclusion":"%s","status":"%s","duration_sec":%s,"recorded_at":"%s"}\n' \
|
|
12446
|
-
"$run_id" "$workflow" "$conclusion" "$status" "$dur" \
|
|
12447
|
-
"$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$file"
|
|
12448
|
-
return 0
|
|
12449
|
-
}
|
|
12450
|
-
|
|
12451
|
-
# _ci_iso_to_epoch <iso8601>
|
|
12452
|
-
# Convert an ISO-8601 UTC timestamp (2026-05-30T10:00:00Z) to epoch seconds.
|
|
12453
|
-
# Echoes nothing on failure. Handles both BSD (macOS) and GNU date.
|
|
12454
|
-
_ci_iso_to_epoch() {
|
|
12455
|
-
local iso="$1"
|
|
12456
|
-
[ -n "$iso" ] || return 0
|
|
12457
|
-
local e
|
|
12458
|
-
# GNU date
|
|
12459
|
-
e=$(date -u -d "$iso" +%s 2>/dev/null) && { echo "$e"; return 0; }
|
|
12460
|
-
# BSD date (strip trailing Z, parse explicit format)
|
|
12461
|
-
local trimmed="${iso%Z}"
|
|
12462
|
-
e=$(date -u -j -f "%Y-%m-%dT%H:%M:%S" "$trimmed" +%s 2>/dev/null) && { echo "$e"; return 0; }
|
|
12463
|
-
return 0
|
|
12464
|
-
}
|
|
12465
|
-
|
|
12466
|
-
# _ci_classify_failure <run_id>
|
|
12467
|
-
# Inspect `gh run view <id> --log-failed` and classify the failure as
|
|
12468
|
-
# "transient" (infra flake: network, timeout, runner death) or "real"
|
|
12469
|
-
# (genuine test/build failure). Echoes "transient" or "real".
|
|
12470
|
-
# Empty / unavailable logs default to "real" (fail safe — don't auto-rerun
|
|
12471
|
-
# something we can't read).
|
|
12472
|
-
_ci_classify_failure() {
|
|
12473
|
-
local run_id="$1"
|
|
12474
|
-
[ -n "$run_id" ] || { echo "real"; return 0; }
|
|
12475
|
-
local slug; _gh_resolve slug 2>/dev/null || slug=""
|
|
12476
|
-
|
|
12477
|
-
local log
|
|
12478
|
-
if [ -n "$slug" ]; then
|
|
12479
|
-
log=$(gh -R "$slug" run view "$run_id" --log-failed 2>/dev/null)
|
|
12480
|
-
else
|
|
12481
|
-
log=$(gh run view "$run_id" --log-failed 2>/dev/null)
|
|
12482
|
-
fi
|
|
12483
|
-
|
|
12484
|
-
# Transient signatures: network/infra failures that a rerun typically clears.
|
|
12485
|
-
if echo "$log" | grep -qiE 'ETIMEDOUT|ECONNRESET|ENOTFOUND|EAI_AGAIN|shutdown signal|runner.*(error|lost|terminated)|The runner has received a shutdown|503 Service|connection reset|TLS handshake|i/o timeout|could not resolve host'; then
|
|
12486
|
-
echo "transient"
|
|
12487
|
-
return 0
|
|
12488
|
-
fi
|
|
12489
|
-
echo "real"
|
|
12490
|
-
return 0
|
|
12491
|
-
}
|
|
12492
|
-
|
|
12493
|
-
# _ci_rerun_state_file
|
|
12494
|
-
# Echo path to ci-rerun-state.yaml (creating the dir).
|
|
12495
|
-
_ci_rerun_state_file() {
|
|
12496
|
-
local dir; dir=$(_ci_state_dir)
|
|
12497
|
-
echo "${dir}/ci-rerun-state.yaml"
|
|
12498
|
-
}
|
|
12499
|
-
|
|
12500
|
-
# _ci_rerun_attempts <run_id>
|
|
12501
|
-
# Echo the recorded rerun attempt count for <run_id> (0 if none).
|
|
12502
|
-
_ci_rerun_attempts() {
|
|
12503
|
-
local run_id="$1"
|
|
12504
|
-
local file; file=$(_ci_rerun_state_file)
|
|
12505
|
-
[ -f "$file" ] || { echo 0; return 0; }
|
|
12506
|
-
local n
|
|
12507
|
-
n=$(awk -v key="\"${run_id}\":" '$1 == key { print $2 }' "$file" 2>/dev/null | head -1)
|
|
12508
|
-
case "$n" in
|
|
12509
|
-
''|*[!0-9]*) echo 0 ;;
|
|
12510
|
-
*) echo "$n" ;;
|
|
12511
|
-
esac
|
|
12512
|
-
}
|
|
12513
|
-
|
|
12514
|
-
# _ci_rerun_state_write <run_id> <attempts>
|
|
12515
|
-
# Set the attempt count for <run_id> in ci-rerun-state.yaml. Minimal YAML
|
|
12516
|
-
# writer (we own the schema): one `"<run_id>": <n>` line per run.
|
|
12517
|
-
_ci_rerun_state_write() {
|
|
12518
|
-
local run_id="$1" attempts="$2"
|
|
12519
|
-
local file; file=$(_ci_rerun_state_file)
|
|
12520
|
-
[ -f "$file" ] || : > "$file"
|
|
12521
|
-
local tmp; tmp=$(mktemp)
|
|
12522
|
-
awk -v key="\"${run_id}\":" -v val="$attempts" '
|
|
12523
|
-
$1 == key { print key " " val; found=1; next }
|
|
12524
|
-
{ print }
|
|
12525
|
-
END { if (!found) print key " " val }
|
|
12526
|
-
' "$file" > "$tmp" && mv "$tmp" "$file"
|
|
12527
|
-
}
|
|
12528
|
-
|
|
12529
|
-
# _ci_rerun_transient <run_id>
|
|
12530
|
-
# Auto-rerun a transient CI failure, capped at 2 attempts. attempt<2 →
|
|
12531
|
-
# `gh run rerun`; attempt>=2 → write an error ALERT. Echoes the action taken
|
|
12532
|
-
# ("rerun" / "limit"). Loop-safe (returns 0).
|
|
12533
|
-
_ci_rerun_transient() {
|
|
12534
|
-
local run_id="$1"
|
|
12535
|
-
[ -n "$run_id" ] || return 0
|
|
12536
|
-
local slug; _gh_resolve slug 2>/dev/null || slug=""
|
|
12537
|
-
|
|
12538
|
-
local attempts; attempts=$(_ci_rerun_attempts "$run_id")
|
|
12539
|
-
if [ "$attempts" -ge 2 ]; then
|
|
12540
|
-
local alert="$_LOOP_ALERT"
|
|
12541
|
-
mkdir -p "$(dirname "$alert")" 2>/dev/null || true
|
|
12542
|
-
printf '[%s] [error] [TYPE:ci-rerun-limit] CI rerun reached limit: run #%s (%s attempts)\n' \
|
|
12543
|
-
"$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$run_id" "$attempts" >> "$alert"
|
|
12544
|
-
echo "limit"
|
|
12545
|
-
return 0
|
|
12546
|
-
fi
|
|
12547
|
-
|
|
12548
|
-
if [ -n "$slug" ]; then
|
|
12549
|
-
gh -R "$slug" run rerun "$run_id" >/dev/null 2>&1 || true
|
|
12550
|
-
else
|
|
12551
|
-
gh run rerun "$run_id" >/dev/null 2>&1 || true
|
|
12552
|
-
fi
|
|
12553
|
-
_ci_rerun_state_write "$run_id" "$((attempts + 1))"
|
|
12554
|
-
echo "rerun"
|
|
12555
|
-
return 0
|
|
12556
|
-
}
|
|
12557
|
-
|
|
12558
|
-
# _ci_open_story <type> <title>
|
|
12559
|
-
# Append a FIX or US row to .roll/backlog.md's `| ID | Description | Status |`
|
|
12560
|
-
# table. Idempotent: if a 📋 Todo row with the same title already exists, skip
|
|
12561
|
-
# (echo "skip"). New IDs auto-increment from the max existing <TYPE>-NNN.
|
|
12562
|
-
# Echoes the new ID on success, "skip" if already queued.
|
|
12563
|
-
_ci_open_story() {
|
|
12564
|
-
local type="$1" title="$2"
|
|
12565
|
-
[ -n "$type" ] && [ -n "$title" ] || return 0
|
|
12566
|
-
|
|
12567
|
-
# Resolve the backlog file (project-local).
|
|
12568
|
-
local backlog=".roll/backlog.md"
|
|
12569
|
-
[ -f "$backlog" ] || { echo "skip"; return 0; }
|
|
12570
|
-
|
|
12571
|
-
# Idempotency: same title already queued as Todo → skip.
|
|
12572
|
-
if grep -F "$title" "$backlog" 2>/dev/null | grep -q '📋 Todo'; then
|
|
12573
|
-
echo "skip"
|
|
12574
|
-
return 0
|
|
12575
|
-
fi
|
|
12576
|
-
|
|
12577
|
-
# Auto-increment: find the max existing <TYPE>-NNN id.
|
|
12578
|
-
local prefix max next
|
|
12579
|
-
prefix=$(echo "$type" | tr '[:lower:]' '[:upper:]')
|
|
12580
|
-
max=$(grep -oE "${prefix}-[0-9]+" "$backlog" 2>/dev/null \
|
|
12581
|
-
| sed "s/${prefix}-//" \
|
|
12582
|
-
| sort -n | tail -1)
|
|
12583
|
-
case "$max" in ''|*[!0-9]*) max=0 ;; esac
|
|
12584
|
-
# 10# prefix forces base-10: a zero-padded id like 008/009 would otherwise be
|
|
12585
|
-
# parsed as octal and either misnumber (010→8) or error ("value too great").
|
|
12586
|
-
next=$((10#$max + 1))
|
|
12587
|
-
local id
|
|
12588
|
-
id=$(printf '%s-%03d' "$prefix" "$next")
|
|
12589
|
-
|
|
12590
|
-
printf '| %s | %s | 📋 Todo |\n' "$id" "$title" >> "$backlog"
|
|
12591
|
-
echo "$id"
|
|
12592
|
-
return 0
|
|
12593
|
-
}
|
|
12594
|
-
|
|
12595
|
-
# _ci_detect_flaky
|
|
12596
|
-
# Scan the last 20 ci-timing.jsonl lines, group by workflow, and flag any
|
|
12597
|
-
# workflow whose recent runs have a 20%–80% failure rate (2..8 failures of
|
|
12598
|
-
# the last 10) as flaky — opening a FIX story. Returns 0 (loop-safe).
|
|
12599
|
-
_ci_detect_flaky() {
|
|
12600
|
-
local dir; dir=$(_ci_state_dir)
|
|
12601
|
-
local file="${dir}/ci-timing.jsonl"
|
|
12602
|
-
[ -f "$file" ] || return 0
|
|
12603
|
-
|
|
12604
|
-
# Per workflow: count total + failures over the most recent 10 records.
|
|
12605
|
-
# awk reads last 20 lines (tail), keeps last 10 per workflow. Output is
|
|
12606
|
-
# collected into a variable (not piped to `while`) so an empty result or
|
|
12607
|
-
# an intermediate nonzero exit cannot trip a caller's ERR trap.
|
|
12608
|
-
local flaky_wfs
|
|
12609
|
-
flaky_wfs=$(tail -n 20 "$file" 2>/dev/null | awk '
|
|
12610
|
-
{
|
|
12611
|
-
# crude field extraction from flat JSON line
|
|
12612
|
-
wf=""; concl="";
|
|
12613
|
-
if (match($0, /"workflow":"[^"]*"/)) { wf=substr($0,RSTART+12,RLENGTH-13) }
|
|
12614
|
-
if (match($0, /"conclusion":"[^"]*"/)) { concl=substr($0,RSTART+14,RLENGTH-15) }
|
|
12615
|
-
if (wf=="") next
|
|
12616
|
-
order[wf]=order[wf]" "NR
|
|
12617
|
-
val[wf"|"NR]=concl
|
|
12618
|
-
}
|
|
12619
|
-
END {
|
|
12620
|
-
for (wf in order) {
|
|
12621
|
-
n=split(order[wf], idx, " ")
|
|
12622
|
-
# keep most recent 10
|
|
12623
|
-
start=1; if (n-10 > 0) start=n-9
|
|
12624
|
-
total=0; fail=0
|
|
12625
|
-
for (i=start;i<=n;i++) {
|
|
12626
|
-
if (idx[i]=="") continue
|
|
12627
|
-
total++
|
|
12628
|
-
c=val[wf"|"idx[i]]
|
|
12629
|
-
if (c=="failure" || c=="timed_out" || c=="cancelled") fail++
|
|
12630
|
-
}
|
|
12631
|
-
if (total>=4 && fail>=2 && fail<=8 && fail*100 <= total*80 && fail*100 >= total*20) {
|
|
12632
|
-
print wf
|
|
12633
|
-
}
|
|
12634
|
-
}
|
|
12635
|
-
}
|
|
12636
|
-
' || true)
|
|
12637
|
-
|
|
12638
|
-
local wf
|
|
12639
|
-
for wf in $flaky_wfs; do
|
|
12640
|
-
[ -n "$wf" ] && _ci_open_story FIX "flaky: ${wf}" >/dev/null || true
|
|
12641
|
-
done
|
|
12642
|
-
return 0
|
|
12643
|
-
}
|
|
12644
|
-
|
|
12645
|
-
# _ci_detect_degradation
|
|
12646
|
-
# Scan the last 20 ci-timing.jsonl lines, compute mean duration per workflow,
|
|
12647
|
-
# and open a US story when a workflow crosses its threshold:
|
|
12648
|
-
# unit* > 300s (5 min)
|
|
12649
|
-
# integration* > 900s (15 min)
|
|
12650
|
-
# Returns 0 (loop-safe).
|
|
12651
|
-
_ci_detect_degradation() {
|
|
12652
|
-
local dir; dir=$(_ci_state_dir)
|
|
12653
|
-
local file="${dir}/ci-timing.jsonl"
|
|
12654
|
-
[ -f "$file" ] || return 0
|
|
12655
|
-
|
|
12656
|
-
local degraded
|
|
12657
|
-
degraded=$(tail -n 20 "$file" 2>/dev/null | awk '
|
|
12658
|
-
{
|
|
12659
|
-
wf=""; dur=0;
|
|
12660
|
-
if (match($0, /"workflow":"[^"]*"/)) { wf=substr($0,RSTART+12,RLENGTH-13) }
|
|
12661
|
-
if (match($0, /"duration_sec":[0-9]+/)) { dur=substr($0,RSTART+15,RLENGTH-15)+0 }
|
|
12662
|
-
if (wf=="") next
|
|
12663
|
-
sum[wf]+=dur; cnt[wf]++
|
|
12664
|
-
}
|
|
12665
|
-
END {
|
|
12666
|
-
for (wf in sum) {
|
|
12667
|
-
if (cnt[wf]==0) continue
|
|
12668
|
-
avg=sum[wf]/cnt[wf]
|
|
12669
|
-
lc=tolower(wf)
|
|
12670
|
-
if (index(lc,"unit")>0 && avg>300) { print wf "\t" int(avg) }
|
|
12671
|
-
else if (index(lc,"integration")>0 && avg>900) { print wf "\t" int(avg) }
|
|
12672
|
-
}
|
|
12673
|
-
}
|
|
12674
|
-
' || true)
|
|
12675
|
-
|
|
12676
|
-
local line wf avg
|
|
12677
|
-
# IFS=newline so each "wf<TAB>avg" record is one iteration; field-split on TAB.
|
|
12678
|
-
local _oifs="$IFS"
|
|
12679
|
-
IFS='
|
|
12680
|
-
'
|
|
12681
|
-
for line in $degraded; do
|
|
12682
|
-
IFS="$_oifs"
|
|
12683
|
-
wf=$(printf '%s' "$line" | cut -f1)
|
|
12684
|
-
avg=$(printf '%s' "$line" | cut -f2)
|
|
12685
|
-
[ -n "$wf" ] && _ci_open_story US "CI degradation: ${wf} avg ${avg}s exceeds threshold" >/dev/null || true
|
|
12686
|
-
IFS='
|
|
12687
|
-
'
|
|
12688
|
-
done
|
|
12689
|
-
IFS="$_oifs"
|
|
12690
|
-
return 0
|
|
12691
|
-
}
|
|
12692
|
-
|
|
12693
|
-
# _ci_scan
|
|
12694
|
-
# US-AUTO-045 Phase 2 orchestrator: the entry the CI Loop runner drives every
|
|
12695
|
-
# 5 min. Lists recent `main`-branch CI runs, records each run's timing, and on
|
|
12696
|
-
# a `failure` conclusion classifies it — auto-rerunning transient infra
|
|
12697
|
-
# flakes. After the loop it runs the flaky + degradation detectors over the
|
|
12698
|
-
# accumulated history. Lenient on gh unavailability (missing / failed list →
|
|
12699
|
-
# return 0) so the service never errors out a tick.
|
|
12700
|
-
_ci_scan() {
|
|
12701
|
-
local slug; _gh_resolve slug 2>/dev/null || { _loop_write_tick "ci" "idle" "gh_unavailable"; return 0; }
|
|
12702
|
-
|
|
12703
|
-
local runs_json
|
|
12704
|
-
runs_json=$(gh -R "$slug" run list --branch main \
|
|
12705
|
-
--json databaseId,workflowName,name,conclusion,status,createdAt,updatedAt \
|
|
12706
|
-
2>/dev/null) || { _loop_write_tick "ci" "idle" "gh_error"; return 0; }
|
|
12707
|
-
[ -n "$runs_json" ] || { _loop_write_tick "ci" "idle" "empty_response"; return 0; }
|
|
12708
|
-
|
|
12709
|
-
# An empty list ("[]") still falls through to the detectors below: they run
|
|
12710
|
-
# over accumulated history, not just this tick's runs.
|
|
12711
|
-
local count; count=$(echo "$runs_json" | jq 'length' 2>/dev/null || echo 0)
|
|
12712
|
-
case "$count" in ''|*[!0-9]*) count=0 ;; esac
|
|
12713
|
-
|
|
12714
|
-
local i=0
|
|
12715
|
-
while [ "$i" -lt "$count" ]; do
|
|
12716
|
-
local run_json conclusion run_id
|
|
12717
|
-
run_json=$(echo "$runs_json" | jq -c ".[$i]" 2>/dev/null)
|
|
12718
|
-
_ci_record_timing "$run_json"
|
|
12719
|
-
|
|
12720
|
-
conclusion=$(echo "$run_json" | jq -r '.conclusion // ""' 2>/dev/null)
|
|
12721
|
-
if [ "$conclusion" = "failure" ]; then
|
|
12722
|
-
run_id=$(echo "$run_json" | jq -r '.databaseId // ""' 2>/dev/null)
|
|
12723
|
-
if [ -n "$run_id" ]; then
|
|
12724
|
-
local kind; kind=$(_ci_classify_failure "$run_id")
|
|
12725
|
-
[ "$kind" = "transient" ] && _ci_rerun_transient "$run_id" >/dev/null
|
|
12726
|
-
fi
|
|
12727
|
-
fi
|
|
12728
|
-
i=$((i + 1))
|
|
12729
|
-
done
|
|
12730
|
-
|
|
12731
|
-
_ci_detect_flaky
|
|
12732
|
-
_ci_detect_degradation
|
|
12733
|
-
_loop_write_tick "ci" "acted" "scan_done"
|
|
12734
|
-
return 0
|
|
12735
|
-
}
|
|
12736
|
-
|
|
12737
|
-
# ═══════════════════════════════════════════════════════════════════════════════
|
|
12738
|
-
# US-AUTO-046 Phase 1: dedicated Alert Loop helpers (loop-safe, pure bash)
|
|
12739
|
-
# ═══════════════════════════════════════════════════════════════════════════════
|
|
12740
|
-
# These consume the existing $_LOOP_ALERT file — until now a write-only dumb file
|
|
12741
|
-
# that every loop appends to but nobody reads. The Alert Loop turns it into a
|
|
12742
|
-
# real consumer: parse → dedup (1h per category) → notify (error always) →
|
|
12743
|
-
# log → rotate. They are NOT yet wired into any runner or launchd plist — that
|
|
12744
|
-
# is Phase 2 (wired by hand). Each is unit-tested in
|
|
12745
|
-
# tests/unit/roll_loop_alert_loop.bats with _notify stubbed. Do not delete or
|
|
12746
|
-
# inline.
|
|
12747
|
-
#
|
|
12748
|
-
# State lives under project-local .roll/state/ (shared with the CI Loop):
|
|
12749
|
-
# alert-log.jsonl append-only NDJSON, one line per consumed alert
|
|
12750
|
-
# $_LOOP_ALERT.prev is the rotated copy (kept for debugging).
|
|
12751
|
-
#
|
|
12752
|
-
# Line format ($_LOOP_ALERT) — new tagged format, old format read-compatible:
|
|
12753
|
-
# [2026-05-26T10:00:00] [error] [TYPE:ci-real-failure] CI failed: run #123
|
|
12754
|
-
# [2026-05-26T10:00:00] some legacy message → level=warn category=legacy
|
|
12755
|
-
|
|
12756
|
-
# _alert_parse_file [file]
|
|
12757
|
-
# Parse each non-empty line of $_LOOP_ALERT (or <file>) into a TAB-separated
|
|
12758
|
-
# record `ts<TAB>level<TAB>category<TAB>message`, one per output line. The
|
|
12759
|
-
# leading `[ts]` is extracted when present; optional `[level]` and
|
|
12760
|
-
# `[TYPE:category]` tags follow. Untagged (legacy) lines default to
|
|
12761
|
-
# level=warn, category=legacy, with the whole remainder as the message.
|
|
12762
|
-
# Markdown headers / ack footers (lines starting with `#` or `**`) are skipped.
|
|
12763
|
-
# Echoes nothing for a missing/empty file. Loop-safe (returns 0).
|
|
12764
|
-
_alert_parse_file() {
|
|
12765
|
-
local file="${1:-$_LOOP_ALERT}"
|
|
12766
|
-
[ -n "$file" ] && [ -f "$file" ] || return 0
|
|
12767
|
-
|
|
12768
|
-
awk '
|
|
12769
|
-
{
|
|
12770
|
-
line=$0
|
|
12771
|
-
# skip blank lines and markdown chrome (headers, ack footers)
|
|
12772
|
-
if (line ~ /^[ \t]*$/) next
|
|
12773
|
-
if (line ~ /^[ \t]*#/) next
|
|
12774
|
-
if (line ~ /^[ \t]*\*\*/) next
|
|
12775
|
-
|
|
12776
|
-
ts=""; level=""; category=""
|
|
12777
|
-
|
|
12778
|
-
# leading [timestamp]
|
|
12779
|
-
if (match(line, /^\[[^]]*\]/)) {
|
|
12780
|
-
ts=substr(line, RSTART+1, RLENGTH-2)
|
|
12781
|
-
line=substr(line, RSTART+RLENGTH)
|
|
12782
|
-
sub(/^[ \t]+/, "", line)
|
|
12783
|
-
}
|
|
12784
|
-
# optional [level] (error|warn|info)
|
|
12785
|
-
if (match(line, /^\[(error|warn|info)\]/)) {
|
|
12786
|
-
level=substr(line, RSTART+1, RLENGTH-2)
|
|
12787
|
-
line=substr(line, RSTART+RLENGTH)
|
|
12788
|
-
sub(/^[ \t]+/, "", line)
|
|
12789
|
-
}
|
|
12790
|
-
# optional [TYPE:category]
|
|
12791
|
-
if (match(line, /^\[TYPE:[^]]*\]/)) {
|
|
12792
|
-
category=substr(line, RSTART+6, RLENGTH-7)
|
|
12793
|
-
line=substr(line, RSTART+RLENGTH)
|
|
12794
|
-
sub(/^[ \t]+/, "", line)
|
|
12795
|
-
}
|
|
12796
|
-
|
|
12797
|
-
# legacy "ALERT:" prefix on the remaining message — strip the keyword
|
|
12798
|
-
sub(/^ALERT:[ \t]*/, "", line)
|
|
12799
|
-
|
|
12800
|
-
if (level=="") level="warn"
|
|
12801
|
-
if (category=="") category="legacy"
|
|
12802
|
-
|
|
12803
|
-
printf "%s\t%s\t%s\t%s\n", ts, level, category, line
|
|
12804
|
-
}
|
|
12805
|
-
' "$file"
|
|
12806
|
-
return 0
|
|
12807
|
-
}
|
|
12808
|
-
|
|
12809
|
-
# _alert_log_file
|
|
12810
|
-
# Echo path to .roll/state/alert-log.jsonl (creating the dir). Reuses the
|
|
12811
|
-
# CI Loop's _ci_state_dir so both loops share one project-local state dir.
|
|
12812
|
-
_alert_log_file() {
|
|
12813
|
-
local dir; dir=$(_ci_state_dir)
|
|
12814
12249
|
echo "${dir}/alert-log.jsonl"
|
|
12815
12250
|
}
|
|
12816
12251
|
|
|
12817
|
-
# _alert_should_notify <category> <level>
|
|
12818
|
-
# Decide whether an alert should fire a notification.
|
|
12819
|
-
# error → always true (immediate, never throttled)
|
|
12820
|
-
# warn | info → true unless a same-category alert was already notified
|
|
12821
|
-
# within the last hour (rate-limit / dedup)
|
|
12822
|
-
# The 1h window is read from alert-log.jsonl (notified=1 entries only).
|
|
12823
|
-
# Echoes "true" / "false".
|
|
12824
|
-
_alert_should_notify() {
|
|
12825
|
-
local category="$1" level="$2"
|
|
12826
|
-
[ "$level" = "error" ] && { echo "true"; return 0; }
|
|
12827
|
-
|
|
12828
|
-
local file; file=$(_alert_log_file)
|
|
12829
|
-
[ -f "$file" ] || { echo "true"; return 0; }
|
|
12830
|
-
|
|
12831
|
-
local now; now=$(date -u +%s)
|
|
12832
|
-
# Most recent notified=1 entry for this category → its recorded_at epoch.
|
|
12833
|
-
local last
|
|
12834
|
-
last=$(grep -F "\"category\":\"${category}\"" "$file" 2>/dev/null \
|
|
12835
|
-
| grep -F '"notified":1' \
|
|
12836
|
-
| tail -1 \
|
|
12837
|
-
| sed -n 's/.*"recorded_at":"\([^"]*\)".*/\1/p')
|
|
12838
|
-
[ -n "$last" ] || { echo "true"; return 0; }
|
|
12839
|
-
|
|
12840
|
-
local last_epoch; last_epoch=$(_ci_iso_to_epoch "$last")
|
|
12841
|
-
[ -n "$last_epoch" ] || { echo "true"; return 0; }
|
|
12842
|
-
|
|
12843
|
-
# Within 1h (3600s) → throttle (false); otherwise allow.
|
|
12844
|
-
if [ "$((now - last_epoch))" -lt 3600 ] 2>/dev/null; then
|
|
12845
|
-
echo "false"
|
|
12846
|
-
else
|
|
12847
|
-
echo "true"
|
|
12848
|
-
fi
|
|
12849
|
-
return 0
|
|
12850
|
-
}
|
|
12851
|
-
|
|
12852
|
-
# _alert_write_log <ts> <level> <category> <message> <notified>
|
|
12853
|
-
# Append one NDJSON record to alert-log.jsonl. <notified> is the literal
|
|
12854
|
-
# string "true"/"false" (or 1/0) and is normalized to 1/0. recorded_at is the
|
|
12855
|
-
# consumption time (UTC), distinct from the alert's own <ts>. Quotes in the
|
|
12856
|
-
# message are escaped so the line stays valid JSON. Loop-safe (returns 0).
|
|
12857
|
-
_alert_write_log() {
|
|
12858
|
-
local ts="$1" level="$2" category="$3" message="$4" notified="$5"
|
|
12859
|
-
local file; file=$(_alert_log_file)
|
|
12860
|
-
|
|
12861
|
-
local n=0
|
|
12862
|
-
case "$notified" in true|1) n=1 ;; esac
|
|
12863
|
-
|
|
12864
|
-
# Escape backslashes then double-quotes for JSON string safety.
|
|
12865
|
-
local esc
|
|
12866
|
-
esc=$(printf '%s' "$message" | sed 's/\\/\\\\/g; s/"/\\"/g')
|
|
12867
|
-
|
|
12868
|
-
printf '{"ts":"%s","level":"%s","category":"%s","message":"%s","notified":%s,"recorded_at":"%s"}\n' \
|
|
12869
|
-
"$ts" "$level" "$category" "$esc" "$n" \
|
|
12870
|
-
"$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$file"
|
|
12871
|
-
return 0
|
|
12872
|
-
}
|
|
12873
|
-
|
|
12874
|
-
# _alert_rotate [file]
|
|
12875
|
-
# Snapshot $_LOOP_ALERT (or <file>) to <file>.prev and truncate it in place.
|
|
12876
|
-
# Idempotent: a missing source is a no-op (the .prev from a prior run is
|
|
12877
|
-
# left untouched). Loop-safe (returns 0).
|
|
12878
|
-
#
|
|
12879
|
-
# US-AUTO-046 (kimi peer-review Q2): copy+truncate instead of mv. `mv` swaps
|
|
12880
|
-
# the inode at the path, so a producer loop (main/pr/ci) that opened its `>>`
|
|
12881
|
-
# fd *before* the rotation but writes *after* it would land in `.prev` and be
|
|
12882
|
-
# silently lost. Copying keeps the original inode at the path; the subsequent
|
|
12883
|
-
# `:>` truncates that same inode, so any concurrent appender's fd still points
|
|
12884
|
-
# at the live alert file and its write is read on the next 1-min tick.
|
|
12885
|
-
_alert_rotate() {
|
|
12886
|
-
local file="${1:-$_LOOP_ALERT}"
|
|
12887
|
-
[ -n "$file" ] || return 0
|
|
12888
|
-
if [ -f "$file" ]; then
|
|
12889
|
-
cat "$file" > "${file}.prev" 2>/dev/null || true
|
|
12890
|
-
: > "$file"
|
|
12891
|
-
fi
|
|
12892
|
-
return 0
|
|
12893
|
-
}
|
|
12894
|
-
|
|
12895
|
-
# _alert_dispatch [file]
|
|
12896
|
-
# Main consumer entry point. Parse $_LOOP_ALERT → for each alert decide
|
|
12897
|
-
# notify → fire _notify + record to alert-log.jsonl → rotate the file.
|
|
12898
|
-
# A missing/empty alert file is a no-op (no rotate, no log). Loop-safe.
|
|
12899
|
-
_alert_dispatch() {
|
|
12900
|
-
local file="${1:-$_LOOP_ALERT}"
|
|
12901
|
-
[ -n "$file" ] && [ -f "$file" ] || { _loop_write_tick "alert" "idle" "no_file"; return 0; }
|
|
12902
|
-
# Empty file → nothing to consume, leave it in place.
|
|
12903
|
-
[ -s "$file" ] || { _loop_write_tick "alert" "idle" "empty_file"; return 0; }
|
|
12904
|
-
|
|
12905
|
-
local parsed; parsed=$(_alert_parse_file "$file")
|
|
12906
|
-
[ -n "$parsed" ] || { _alert_rotate "$file"; _loop_write_tick "alert" "idle" "no_parsed"; return 0; }
|
|
12907
|
-
|
|
12908
|
-
local line ts level category message notify
|
|
12909
|
-
local _oifs="$IFS"
|
|
12910
|
-
IFS='
|
|
12911
|
-
'
|
|
12912
|
-
for line in $parsed; do
|
|
12913
|
-
IFS="$_oifs"
|
|
12914
|
-
ts=$(printf '%s' "$line" | cut -f1)
|
|
12915
|
-
level=$(printf '%s' "$line" | cut -f2)
|
|
12916
|
-
category=$(printf '%s' "$line" | cut -f3)
|
|
12917
|
-
message=$(printf '%s' "$line" | cut -f4-)
|
|
12918
|
-
|
|
12919
|
-
notify=$(_alert_should_notify "$category" "$level")
|
|
12920
|
-
if [ "$notify" = "true" ]; then
|
|
12921
|
-
_notify "roll alert: ${level}" "${message}" || true
|
|
12922
|
-
_alert_write_log "$ts" "$level" "$category" "$message" "true"
|
|
12923
|
-
else
|
|
12924
|
-
_alert_write_log "$ts" "$level" "$category" "$message" "false"
|
|
12925
|
-
fi
|
|
12926
|
-
IFS='
|
|
12927
|
-
'
|
|
12928
|
-
done
|
|
12929
|
-
IFS="$_oifs"
|
|
12930
|
-
|
|
12931
|
-
_alert_rotate "$file"
|
|
12932
|
-
_loop_write_tick "alert" "acted" "dispatch_done"
|
|
12933
|
-
return 0
|
|
12934
|
-
}
|
|
12935
|
-
|
|
12936
12252
|
# FIX-070: flip a story row in the main repo's .roll/backlog.md between
|
|
12937
12253
|
# 📋 Todo and 🔨 In Progress. The cycle worktree is gitignored at .roll/,
|
|
12938
12254
|
# so editing the worktree copy + committing leaves no trace in git — and
|
|
@@ -14461,7 +13777,7 @@ _loop_monitor() {
|
|
|
14461
13777
|
# Services status (three services on macOS, single on Linux)
|
|
14462
13778
|
echo -e "$(msg loop.services ${BOLD} ${NC} ${CYAN} ${agent})"
|
|
14463
13779
|
if [[ "$(uname)" == "Darwin" ]]; then
|
|
14464
|
-
local active_start active_end dream_hour dream_minute
|
|
13780
|
+
local active_start active_end dream_hour dream_minute
|
|
14465
13781
|
local _aw; _aw=$(_loop_read_active_window "$project_path")
|
|
14466
13782
|
active_start="${_aw%% *}"; active_end="${_aw##* }"
|
|
14467
13783
|
# US-LOOP-013: use schedule spec for display
|
|
@@ -14471,17 +13787,16 @@ _loop_monitor() {
|
|
|
14471
13787
|
loop_offset="${loop_spec##* }"
|
|
14472
13788
|
dream_hour=$(_config_read_int "loop_dream_hour" "3")
|
|
14473
13789
|
dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
|
|
14474
|
-
brief_hour=$(_config_read_int "loop_brief_hour" "9")
|
|
14475
|
-
brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
|
|
14476
13790
|
|
|
14477
|
-
local loop_sched dream_sched
|
|
13791
|
+
local loop_sched dream_sched pr_sched
|
|
14478
13792
|
loop_sched=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
|
|
14479
13793
|
loop_sched="${loop_sched} active ${active_start}:00–${active_end}:00"
|
|
14480
13794
|
dream_sched=$(printf "%02d:%02d" "$dream_hour" "$dream_minute")
|
|
14481
|
-
|
|
13795
|
+
# FIX-195: pr is a 5-min PR Loop (StartInterval=300); brief was retired.
|
|
13796
|
+
pr_sched="every 5m"
|
|
14482
13797
|
|
|
14483
|
-
local svcs=("loop" "dream" "
|
|
14484
|
-
local scheds=("$loop_sched" "$dream_sched" "$
|
|
13798
|
+
local svcs=("loop" "dream" "pr")
|
|
13799
|
+
local scheds=("$loop_sched" "$dream_sched" "$pr_sched")
|
|
14485
13800
|
for i in "${!svcs[@]}"; do
|
|
14486
13801
|
local svc="${svcs[$i]}" schedule="${scheds[$i]}"
|
|
14487
13802
|
local state; state=$(_launchd_svc_state "$svc" "$project_path")
|
|
@@ -15571,7 +14886,7 @@ _legacy_home() {
|
|
|
15571
14886
|
else
|
|
15572
14887
|
crontab -l 2>/dev/null | grep -q "${_LOOP_TAG}:${project_path}" && loop_state="enabled"
|
|
15573
14888
|
fi
|
|
15574
|
-
local active_start active_end dream_hour dream_minute
|
|
14889
|
+
local active_start active_end dream_hour dream_minute
|
|
15575
14890
|
local _aw; _aw=$(_loop_read_active_window "$project_path")
|
|
15576
14891
|
active_start="${_aw%% *}"; active_end="${_aw##* }"
|
|
15577
14892
|
# US-LOOP-013: use schedule spec for display
|
|
@@ -15581,8 +14896,6 @@ _legacy_home() {
|
|
|
15581
14896
|
loop_offset="${loop_spec##* }"
|
|
15582
14897
|
dream_hour=$(_config_read_int "loop_dream_hour" "3")
|
|
15583
14898
|
dream_minute=$(_config_read_int "loop_dream_minute" "$(_loop_derive_minute "$project_path" 2)")
|
|
15584
|
-
brief_hour=$(_config_read_int "loop_brief_hour" "9")
|
|
15585
|
-
brief_minute=$(_config_read_int "loop_brief_minute" "$(_loop_derive_minute "$project_path" 4)")
|
|
15586
14899
|
|
|
15587
14900
|
local loop_badge loop_sched
|
|
15588
14901
|
loop_sched=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
|
|
@@ -15712,8 +15025,9 @@ _legacy_home() {
|
|
|
15712
15025
|
# ── ⑥ Schedules & Last Brief ──────────────────────────────────────────────
|
|
15713
15026
|
printf " ${BOLD}⏰ Schedules & Last Brief${NC}\n"
|
|
15714
15027
|
local loop_sched_short; loop_sched_short=$(_loop_schedule_desc "$loop_period" "$loop_offset" en)
|
|
15715
|
-
|
|
15716
|
-
|
|
15028
|
+
# FIX-195: brief loop retired — schedule line shows loop + dream only.
|
|
15029
|
+
printf " %s · dream %02d:%02d\n" \
|
|
15030
|
+
"$loop_sched_short" "$dream_hour" "$dream_minute"
|
|
15717
15031
|
local latest_brief; latest_brief=$(ls .roll/briefs/*.md 2>/dev/null | sort | tail -1 || true)
|
|
15718
15032
|
if [[ -n "$latest_brief" ]]; then
|
|
15719
15033
|
local mod_time now age summary
|
|
@@ -15909,11 +15223,10 @@ main() {
|
|
|
15909
15223
|
test) cmd_test "$@" ;;
|
|
15910
15224
|
prices) cmd_prices "$@" ;;
|
|
15911
15225
|
changelog) cmd_changelog "$@" ;;
|
|
15226
|
+
consistency) cmd_consistency "$@" ;;
|
|
15912
15227
|
config) cmd_config "$@" ;;
|
|
15913
15228
|
_loop_render_exit_summary) _loop_render_exit_summary "$@" ;;
|
|
15914
15229
|
_loop_pr_inbox) _loop_pr_inbox "$@" ;;
|
|
15915
|
-
_ci_scan) _ci_scan "$@" ;;
|
|
15916
|
-
_alert_dispatch) _alert_dispatch "$@" ;;
|
|
15917
15230
|
version|--version|-v) echo "roll v${VERSION}" ;;
|
|
15918
15231
|
help|--help|-h) _help "$@" ;;
|
|
15919
15232
|
"") [[ -f ".roll/backlog.md" ]] && _home || { _help; _show_changelog; } ;;
|