@seanyao/roll 2026.601.3 → 2026.601.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/bin/roll +69 -2
- package/lib/i18n/status.sh +2 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## v2026.601.4
|
|
4
|
+
|
|
5
|
+
### 稳定性
|
|
6
|
+
|
|
7
|
+
- **从 `.roll/` 子目录跑 `roll` 不再误报"老结构"(FIX-156)** — `.roll/` 是嵌套 roll-meta git repo,从那里跑 `roll loop status` 等命令时 `git rev-parse` 会返回 `.roll/` 自己当 root,然后旧的"新结构存在?"检查找 `.roll/.roll` 找不到,又看到 `.roll/` 里满地 Roll 内容,就误报"老结构、要 migrate";现在检查会向上找直到 `.roll/` 的父目录,认出"这就是该 Roll 项目的私有过程仓",不再误报 `[loop]`
|
|
8
|
+
- **peer review 死等终结(FIX-150c)** — 此前 peer 调用挂了就是无限挂,等不到 verdict cycle 就跟着挂;现在 wall-clock 超 `peer_call_timeout`(默认 3min)直接 SIGTERM/SIGKILL 干掉跑挂的 agent,tmux 内的也送 Ctrl-C 中断,设全局标志 `_PEER_LAST_TIMED_OUT` 让上层落 ledger 用,绝不无限等 `[loop]`
|
|
9
|
+
|
|
3
10
|
## v2026.601.3
|
|
4
11
|
|
|
5
12
|
### 可见性
|
package/bin/roll
CHANGED
|
@@ -4,7 +4,7 @@ set -euo pipefail
|
|
|
4
4
|
# Roll — AI Agent Convention Manager
|
|
5
5
|
# Single source of truth for how all AI coding agents behave.
|
|
6
6
|
|
|
7
|
-
VERSION="2026.601.
|
|
7
|
+
VERSION="2026.601.4"
|
|
8
8
|
ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
|
|
9
9
|
ROLL_CONFIG="${ROLL_HOME}/config.yaml"
|
|
10
10
|
ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
|
|
@@ -3879,7 +3879,18 @@ _peer_dispatch_in_tmux() {
|
|
|
3879
3879
|
sleep 1
|
|
3880
3880
|
elapsed=$((elapsed + 1))
|
|
3881
3881
|
done
|
|
3882
|
+
# FIX-150c: if we hit the wall clock without the done marker, the agent
|
|
3883
|
+
# is still running inside the tmux session. Send Ctrl-C to interrupt it
|
|
3884
|
+
# so the cycle doesn't hang on a peer that's no longer being awaited.
|
|
3885
|
+
# Return 1 (timed out) vs 0 (completed within budget); _peer_call lifts
|
|
3886
|
+
# this into the global _PEER_LAST_TIMED_OUT flag.
|
|
3887
|
+
local _timed_out=0
|
|
3888
|
+
if [[ ! -f "$done_file" ]]; then
|
|
3889
|
+
tmux send-keys -t "${session}:0" C-c 2>/dev/null || true
|
|
3890
|
+
_timed_out=1
|
|
3891
|
+
fi
|
|
3882
3892
|
rm -f "$done_file"
|
|
3893
|
+
return "$_timed_out"
|
|
3883
3894
|
}
|
|
3884
3895
|
|
|
3885
3896
|
_peer_call() {
|
|
@@ -3892,6 +3903,11 @@ _peer_call() {
|
|
|
3892
3903
|
local call_timeout
|
|
3893
3904
|
call_timeout="$(config_get "peer_call_timeout" "180")"
|
|
3894
3905
|
|
|
3906
|
+
# FIX-150c: signal back to caller whether this call hit the wall-clock
|
|
3907
|
+
# limit. Caller (_peer_*_state, ledger writer) records "timeout" verdict.
|
|
3908
|
+
# Reset per call so callers reading the previous result don't mis-attribute.
|
|
3909
|
+
_PEER_LAST_TIMED_OUT=0
|
|
3910
|
+
|
|
3895
3911
|
info "$(msg status.peer_call_timeout_s_peer ${call_timeout})"
|
|
3896
3912
|
|
|
3897
3913
|
if [[ -n "$session" ]] && command -v tmux >/dev/null 2>&1 && tmux has-session -t "$session" 2>/dev/null; then
|
|
@@ -3903,14 +3919,50 @@ _peer_call() {
|
|
|
3903
3919
|
return 1
|
|
3904
3920
|
}
|
|
3905
3921
|
_peer_dispatch_in_tmux "$session" "$cmd_str" "$out_file" "$stderr_log" "$call_timeout"
|
|
3922
|
+
local _dispatch_rc=$?
|
|
3906
3923
|
output="$(cat "$out_file" 2>/dev/null || true)"
|
|
3907
3924
|
rm -f "$out_file"
|
|
3925
|
+
if [[ "$_dispatch_rc" -ne 0 ]]; then
|
|
3926
|
+
_PEER_LAST_TIMED_OUT=1
|
|
3927
|
+
warn "$(msg status.peer_call_timeout_killed "$to" "$call_timeout")"
|
|
3928
|
+
fi
|
|
3908
3929
|
else
|
|
3909
3930
|
_agent_argv "$to" peer "$prompt" || {
|
|
3910
3931
|
err "$(msg status.unsupported_peer_2 $to)"
|
|
3911
3932
|
return 1
|
|
3912
3933
|
}
|
|
3913
|
-
|
|
3934
|
+
# FIX-150c: hard timeout for non-tmux path. macOS has no `timeout`(1),
|
|
3935
|
+
# so use a background watchdog that SIGTERMs (then SIGKILLs after 2 s
|
|
3936
|
+
# grace) the agent process when it overruns. Output captured via tmp
|
|
3937
|
+
# file because we can't keep it in $output while juggling pids.
|
|
3938
|
+
# `wait` returns the agent's exit code; with `set -e` enabled by the
|
|
3939
|
+
# caller (loops, hooks, bats), a non-zero from the killed agent would
|
|
3940
|
+
# short-circuit out before we can read it — `|| _peer_exit=$?` keeps
|
|
3941
|
+
# the value flowing into the timeout check.
|
|
3942
|
+
local _out _peer_exit=0 _peer_pid _watchdog_pid
|
|
3943
|
+
_out=$(mktemp)
|
|
3944
|
+
"${_AGENT_ARGV[@]}" >"$_out" 2>"$stderr_log" &
|
|
3945
|
+
_peer_pid=$!
|
|
3946
|
+
(
|
|
3947
|
+
sleep "$call_timeout"
|
|
3948
|
+
kill -TERM "$_peer_pid" 2>/dev/null && {
|
|
3949
|
+
sleep 2
|
|
3950
|
+
kill -KILL "$_peer_pid" 2>/dev/null
|
|
3951
|
+
}
|
|
3952
|
+
) &
|
|
3953
|
+
_watchdog_pid=$!
|
|
3954
|
+
wait "$_peer_pid" 2>/dev/null || _peer_exit=$?
|
|
3955
|
+
# Cancel watchdog if agent finished on time.
|
|
3956
|
+
kill "$_watchdog_pid" 2>/dev/null || true
|
|
3957
|
+
wait "$_watchdog_pid" 2>/dev/null || true
|
|
3958
|
+
output="$(cat "$_out" 2>/dev/null || true)"
|
|
3959
|
+
rm -f "$_out"
|
|
3960
|
+
# SIGTERM kill → 143, SIGKILL → 137. Either means we tripped the
|
|
3961
|
+
# timeout watchdog (agent itself doesn't normally exit with those).
|
|
3962
|
+
if [[ "$_peer_exit" -eq 143 || "$_peer_exit" -eq 137 ]]; then
|
|
3963
|
+
_PEER_LAST_TIMED_OUT=1
|
|
3964
|
+
warn "$(msg status.peer_call_timeout_killed "$to" "$call_timeout")"
|
|
3965
|
+
fi
|
|
3914
3966
|
fi
|
|
3915
3967
|
|
|
3916
3968
|
printf '%s\n' "$output"
|
|
@@ -15053,6 +15105,21 @@ _check_structure() {
|
|
|
15053
15105
|
# If new structure exists, allow
|
|
15054
15106
|
[[ -d "$root/.roll" ]] && return 0
|
|
15055
15107
|
|
|
15108
|
+
# FIX-156: nested-repo escape — when cwd is inside `.roll/` (a Roll project's
|
|
15109
|
+
# nested private roll-meta worktree), git rev-parse returns `.roll/` itself
|
|
15110
|
+
# instead of the outer Roll project, so the check above misses the outer
|
|
15111
|
+
# `.roll/` and trips the legacy warning on the project's own roll-meta files
|
|
15112
|
+
# (which contain BACKLOG.md / etc by definition). Walk up from $root and
|
|
15113
|
+
# allow when any ancestor directory has a `.roll/` sibling — that is the
|
|
15114
|
+
# outer Roll project, and the user is operating on it from a sub-checkout.
|
|
15115
|
+
local _probe; _probe="$(dirname "$root")"
|
|
15116
|
+
while [[ "$_probe" != "/" && "$_probe" != "." && -n "$_probe" ]]; do
|
|
15117
|
+
if [[ -d "$_probe/.roll" ]]; then return 0; fi
|
|
15118
|
+
local _parent; _parent="$(dirname "$_probe")"
|
|
15119
|
+
[[ "$_parent" == "$_probe" ]] && break # reached fs root
|
|
15120
|
+
_probe="$_parent"
|
|
15121
|
+
done
|
|
15122
|
+
|
|
15056
15123
|
# US-ONBOARD-019: only treat the directory as a legacy *Roll* project when an
|
|
15057
15124
|
# old-path marker is present AND a Roll-specific content signature confirms
|
|
15058
15125
|
# the project was actually onboarded with Roll. Otherwise we'd block any
|
package/lib/i18n/status.sh
CHANGED
|
@@ -13,6 +13,8 @@ _i18n_set en status.in_sync " %s=%s %s: %s (in sync /"
|
|
|
13
13
|
_i18n_set zh status.in_sync "已同步)"
|
|
14
14
|
_i18n_set en status.peer_call_timeout_s_peer "Peer call timeout: %ss Peer"
|
|
15
15
|
_i18n_set zh status.peer_call_timeout_s_peer "调用超时: %ss"
|
|
16
|
+
_i18n_set en status.peer_call_timeout_killed "Peer %s killed after %ss wall-clock budget (FIX-150c)"
|
|
17
|
+
_i18n_set zh status.peer_call_timeout_killed "Peer %s 超过 %ss 上限,已强制终止 (FIX-150c)"
|
|
16
18
|
_i18n_set en status.unsupported_peer "Unsupported peer: %s"
|
|
17
19
|
_i18n_set zh status.unsupported_peer "不支持的 peer: %s"
|
|
18
20
|
_i18n_set en status.unsupported_peer_2 "Unsupported peer: %s"
|