@seanyao/roll 2026.526.1 → 2026.528.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/roll CHANGED
@@ -4,7 +4,7 @@ set -euo pipefail
4
4
  # Roll — AI Agent Convention Manager
5
5
  # Single source of truth for how all AI coding agents behave.
6
6
 
7
- VERSION="2026.526.1"
7
+ VERSION="2026.528.1"
8
8
  ROLL_HOME="${ROLL_HOME:-${HOME}/.roll}"
9
9
  ROLL_CONFIG="${ROLL_HOME}/config.yaml"
10
10
  ROLL_GLOBAL="${ROLL_HOME}/conventions/global"
@@ -70,6 +70,11 @@ ai_tool_name() {
70
70
  # Antigravity (agy) reuses ~/.gemini/ from the deprecated Gemini CLI for
71
71
  # its config dir, so a literal `gemini` basename now identifies agy.
72
72
  [[ "$bn" == "gemini" ]] && bn="agy"
73
+ # FIX-126: Kimi upstream renamed its CLI to kimi-code and its config dir
74
+ # to ~/.kimi-code/; map both old and new basenames to the canonical
75
+ # "kimi" agent identifier so downstream argv / config / sync paths stay
76
+ # uniform across the upgrade.
77
+ [[ "$bn" == "kimi-code" ]] && bn="kimi"
73
78
  echo "$bn"
74
79
  }
75
80
 
@@ -266,6 +271,7 @@ _ensure_config_entries() {
266
271
  "ai_claude:~/.claude|CLAUDE.md|CLAUDE.md"
267
272
  "ai_agy:~/.gemini|GEMINI.md|GEMINI.md"
268
273
  "ai_kimi:~/.kimi|AGENTS.md|AGENTS.md"
274
+ "ai_kimi_code:~/.kimi-code|AGENTS.md|AGENTS.md"
269
275
  "ai_codex:~/.codex|AGENTS.md|AGENTS.md"
270
276
  "ai_cursor:~/.cursor|.cursor-rules|.cursor-rules"
271
277
  "ai_trae:~/.trae|user_rules.md|project_rules.md"
@@ -273,6 +279,7 @@ _ensure_config_entries() {
273
279
  "ai_openclaw:~/.openclaw/workspace|AGENTS.md|AGENTS.md"
274
280
  "ai_pi:~/.pi/agent|AGENTS.md|AGENTS.md"
275
281
  "ai_deepseek:~/.deepseek|AGENTS.md|AGENTS.md"
282
+ "ai_qwen:~/.qwen|AGENTS.md|AGENTS.md"
276
283
  )
277
284
 
278
285
  local added=0
@@ -489,6 +496,7 @@ _install_local() {
489
496
  ai_claude: ~/.claude|CLAUDE.md|CLAUDE.md
490
497
  ai_gemini: ~/.gemini|GEMINI.md|GEMINI.md
491
498
  ai_kimi: ~/.kimi|AGENTS.md|AGENTS.md
499
+ ai_kimi_code: ~/.kimi-code|AGENTS.md|AGENTS.md
492
500
  ai_codex: ~/.codex|AGENTS.md|AGENTS.md
493
501
  ai_cursor: ~/.cursor|.cursor-rules|.cursor-rules
494
502
  ai_trae: ~/.trae|user_rules.md|project_rules.md
@@ -781,7 +789,7 @@ cmd_setup() {
781
789
  esac
782
790
  }
783
791
 
784
- local _ai_dirs="$HOME/.claude:$HOME/.gemini:$HOME/.kimi:$HOME/.codex:$HOME/.cursor:$HOME/.trae:$HOME/.config/opencode:$HOME/.openclaw:$HOME/.pi:$HOME/.deepseek"
792
+ local _ai_dirs="$HOME/.claude:$HOME/.gemini:$HOME/.kimi:$HOME/.kimi-code:$HOME/.codex:$HOME/.cursor:$HOME/.trae:$HOME/.config/opencode:$HOME/.openclaw:$HOME/.pi:$HOME/.deepseek:$HOME/.qwen"
785
793
 
786
794
  _run_setup_step "$ROLL_HOME" _install_local "$force"
787
795
  _record "$(_state_to_marker "$_ROLL_SETUP_STATE")" "Install templates & conventions to ~/.roll"
@@ -1927,6 +1935,18 @@ PY
1927
1935
  return 0
1928
1936
  fi
1929
1937
 
1938
+ # FIX-125: cycle-context tripwire. Apply phase below runs launchctl unload
1939
+ # and rm against ${HOME}/Library/LaunchAgents/<plist> (bin/roll:1957-1958).
1940
+ # From inside a loop cycle this would mutate the host's launchd domain
1941
+ # using another project's identity. Doc-only offboards (no plists) stay
1942
+ # allowed so cycles can still call offboard for non-launchd cleanup.
1943
+ if [ "${#plists[@]}" -gt 0 ] && _loop_in_cycle; then
1944
+ err "Refusing to unload launchd plists from inside a loop cycle (FIX-125)."
1945
+ echo " Run 'roll offboard --confirm' from a terminal outside the cycle," >&2
1946
+ echo " or pause the loop first: 'roll loop pause'." >&2
1947
+ return 1
1948
+ fi
1949
+
1930
1950
  # Apply. Guard every loop with a count check — `set -u` upstream makes
1931
1951
  # naked `"${arr[@]}"` over an empty array a hard error on bash 5.0.
1932
1952
  echo "$(msg offboard.applying_offboard)"
@@ -3132,9 +3152,20 @@ _agent_argv() {
3132
3152
  *) _AGENT_ARGV=(claude -p "$prompt") ;;
3133
3153
  esac ;;
3134
3154
  kimi)
3155
+ # FIX-126: Kimi upstream renamed binary from kimi-cli → kimi-code.
3156
+ # Prefer the new name when present; fall back through legacy names
3157
+ # so users mid-upgrade keep working until they reinstall.
3158
+ local _kimi_bin
3159
+ if command -v kimi-code >/dev/null 2>&1; then
3160
+ _kimi_bin=kimi-code
3161
+ elif command -v kimi-cli >/dev/null 2>&1; then
3162
+ _kimi_bin=kimi-cli
3163
+ else
3164
+ _kimi_bin=kimi
3165
+ fi
3135
3166
  case "$mode" in
3136
- interactive) _AGENT_ARGV=(kimi "$prompt") ;;
3137
- *) _AGENT_ARGV=(kimi --quiet -p "$prompt") ;;
3167
+ interactive) _AGENT_ARGV=("$_kimi_bin" "$prompt") ;;
3168
+ *) _AGENT_ARGV=("$_kimi_bin" --quiet -p "$prompt") ;;
3138
3169
  esac ;;
3139
3170
  deepseek)
3140
3171
  # deepseek has the same argv shape in both modes (positional prompt).
@@ -3144,7 +3175,7 @@ _agent_argv() {
3144
3175
  interactive) _AGENT_ARGV=(pi "$prompt") ;;
3145
3176
  *) _AGENT_ARGV=(pi -p "$prompt") ;;
3146
3177
  esac ;;
3147
- codex)
3178
+ codex|openai)
3148
3179
  case "$mode" in
3149
3180
  interactive) _AGENT_ARGV=(codex "$prompt") ;;
3150
3181
  peer) _AGENT_ARGV=(codex exec --json --output-last-message "$prompt") ;;
@@ -3155,15 +3186,19 @@ _agent_argv() {
3155
3186
  interactive) _AGENT_ARGV=(opencode "$prompt") ;;
3156
3187
  *) _AGENT_ARGV=(opencode run "$prompt") ;;
3157
3188
  esac ;;
3158
- agy)
3189
+ gemini|agy)
3159
3190
  # Antigravity (agy) replaces the deprecated Google Gemini CLI as of
3160
3191
  # late 2025. agy reuses ~/.gemini/ for config and reads GEMINI.md
3161
3192
  # natively, so the convention sync target is unchanged — only the
3162
- # invoked binary changes. Interactive-only (used by onboard flow).
3193
+ # invoked binary changes.
3163
3194
  case "$mode" in
3164
3195
  interactive) _AGENT_ARGV=(agy -i "$prompt") ;;
3165
- *) return 1 ;;
3196
+ text|peer) _AGENT_ARGV=(agy "$prompt") ;;
3197
+ *) _AGENT_ARGV=(agy "$prompt") ;;
3166
3198
  esac ;;
3199
+ qwen)
3200
+ # qwen has the same argv shape in both modes (positional prompt).
3201
+ _AGENT_ARGV=(qwen "$prompt") ;;
3167
3202
  *) return 1 ;;
3168
3203
  esac
3169
3204
  }
@@ -3201,7 +3236,7 @@ _agent_run_skill() {
3201
3236
  [[ -f "$skill_file" ]] || { err "Skill not found: ${skill}"; return 1; }
3202
3237
  local content; content=$(_skill_content "$skill_file")
3203
3238
  _agent_argv "$agent" text "$content" || {
3204
- err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|codex|opencode>"
3239
+ err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|gemini>"
3205
3240
  return 1
3206
3241
  }
3207
3242
  "${_AGENT_ARGV[@]}"
@@ -3960,7 +3995,7 @@ EOF
3960
3995
  )"
3961
3996
 
3962
3997
  _agent_argv "$agent" text "$prompt" || {
3963
- err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|codex|opencode>"
3998
+ err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|gemini>"
3964
3999
  trap - INT TERM
3965
4000
  return 1
3966
4001
  }
@@ -4346,8 +4381,14 @@ cmd_agent() {
4346
4381
  case "$subcmd" in
4347
4382
  use)
4348
4383
  local name="${1:-}"
4349
- [[ -z "$name" ]] && { err "Usage: roll agent use <claude|kimi|deepseek|pi|codex|opencode>"; exit 1; }
4350
- command -v "$name" &>/dev/null || warn "$(msg agent.not_found_in_path_setting_anyway "$name")"
4384
+ [[ -z "$name" ]] && { err "Usage: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|gemini>"; exit 1; }
4385
+ if [[ "$name" == openai ]]; then
4386
+ command -v codex &>/dev/null || warn "openai agent requires the Codex CLI. Install: npm install -g @openai/codex"
4387
+ elif [[ "$name" == gemini ]]; then
4388
+ command -v agy &>/dev/null || command -v gemini &>/dev/null || warn "gemini agent requires the Google Gemini CLI. Install: npm install -g @antigravity/agy"
4389
+ else
4390
+ command -v "$name" &>/dev/null || warn "$(msg agent.not_found_in_path_setting_anyway "$name")"
4391
+ fi
4351
4392
  # REFACTOR-040: write to .roll/local.yaml (per-machine state). Migrate
4352
4393
  # from legacy .roll.yaml in the project root on the spot — copy the
4353
4394
  # value over once, then delete the old file so the root stays clean.
@@ -4380,7 +4421,7 @@ cmd_agent() {
4380
4421
  list)
4381
4422
  echo ""; echo " $(msg agent.available_agents)"; echo ""
4382
4423
  local current; current=$(_project_agent)
4383
- for a in claude kimi deepseek opencode codex pi; do
4424
+ for a in claude kimi deepseek opencode codex openai pi qwen gemini; do
4384
4425
  if command -v "$a" &>/dev/null; then
4385
4426
  [[ "$a" == "$current" ]] && echo -e " ${GREEN}✓ ${a}${NC} (current)" \
4386
4427
  || echo -e " ${GREEN}✓ ${a}${NC}"
@@ -4406,6 +4447,397 @@ cmd_agent() {
4406
4447
  esac
4407
4448
  }
4408
4449
 
4450
+ # ═══════════════════════════════════════════════════════════════════════════════
4451
+ # ISOLATION — pluggable adapter for running tests in an isolated environment
4452
+ # (US-ISO-001). Phase 1 supports two providers: `none` (default — direct host
4453
+ # execution) and `tart` (US-ISO-002 — macOS VM). The dispatcher reads
4454
+ # .roll/local.yaml's `test_isolation.type` and routes to
4455
+ # `_isolation_<type>_<method>`. See .roll/features/engineering-infrastructure/
4456
+ # dev-vm-isolation-plan.md for the full interface contract.
4457
+ # ═══════════════════════════════════════════════════════════════════════════════
4458
+
4459
+ _ISOLATION_SUPPORTED_TYPES="none tart"
4460
+
4461
+ # Read test_isolation.type from .roll/local.yaml. Falls back to "none" when
4462
+ # the file or key is missing. Uses python3+yaml for nested-key parsing,
4463
+ # matching the parser used by cmd_offboard.
4464
+ _isolation_get_type() {
4465
+ local val=""
4466
+ if [[ -f .roll/local.yaml ]] && command -v python3 >/dev/null 2>&1; then
4467
+ val=$(python3 - <<'PY' 2>/dev/null
4468
+ import sys
4469
+ try:
4470
+ import yaml
4471
+ except ImportError:
4472
+ sys.exit(0)
4473
+ try:
4474
+ data = yaml.safe_load(open(".roll/local.yaml")) or {}
4475
+ except Exception:
4476
+ sys.exit(0)
4477
+ section = data.get("test_isolation")
4478
+ if isinstance(section, dict):
4479
+ t = section.get("type")
4480
+ if isinstance(t, str) and t:
4481
+ print(t)
4482
+ PY
4483
+ )
4484
+ fi
4485
+ if [[ -z "$val" ]]; then
4486
+ val="none"
4487
+ fi
4488
+ printf '%s\n' "$val"
4489
+ }
4490
+
4491
+ # Dispatch an isolation-adapter method to the configured provider.
4492
+ # Usage: _isolation_dispatch <method> [args...]
4493
+ # Methods: init / provision / exec / status / reset / destroy
4494
+ _isolation_dispatch() {
4495
+ local method="$1"; shift || true
4496
+ if [[ -z "$method" ]]; then
4497
+ err "isolation: missing method"
4498
+ echo " usage: _isolation_dispatch <init|provision|exec|status|reset|destroy> [args...]" >&2
4499
+ return 1
4500
+ fi
4501
+
4502
+ # Resolve provider; emit a fallback-INFO line only when the config file is
4503
+ # missing (so an explicit `type: none` stays quiet). Goes to stderr so the
4504
+ # actual dispatch output (e.g. exec stdout) stays clean.
4505
+ local type; type=$(_isolation_get_type)
4506
+ if [[ "$type" = "none" ]] && [[ ! -f .roll/local.yaml ]]; then
4507
+ info "isolation: no test_isolation config, falling back to type=none (host)" >&2
4508
+ fi
4509
+
4510
+ # Reject unknown types up front so the error names the provider, not the
4511
+ # missing function — this is the difference between "you typed it wrong"
4512
+ # and "the adapter is broken".
4513
+ local supported_ok=0 t
4514
+ for t in $_ISOLATION_SUPPORTED_TYPES; do
4515
+ [[ "$type" = "$t" ]] && supported_ok=1
4516
+ done
4517
+ if (( ! supported_ok )); then
4518
+ err "isolation: unknown type '$type' in .roll/local.yaml"
4519
+ echo " supported types: ${_ISOLATION_SUPPORTED_TYPES// /, }" >&2
4520
+ return 1
4521
+ fi
4522
+
4523
+ local fn="_isolation_${type}_${method}"
4524
+ if ! declare -F "$fn" >/dev/null 2>&1; then
4525
+ err "isolation: provider '$type' has no '${method}' implementation"
4526
+ return 1
4527
+ fi
4528
+ "$fn" "$@"
4529
+ }
4530
+
4531
+ # ── `none` adapter (default — direct host execution) ──────────────────────
4532
+ # init / provision / destroy are no-ops; exec runs the command in the host
4533
+ # shell unchanged; status is always 'ready'; reset is a benign no-op
4534
+ # (US-ISO-004 will print an explanatory message when invoked via roll test).
4535
+ _isolation_none_init() { return 0; }
4536
+ _isolation_none_provision() { return 0; }
4537
+ _isolation_none_exec() { "$@"; }
4538
+ _isolation_none_status() { echo "ready"; return 0; }
4539
+ _isolation_none_reset() {
4540
+ # US-ISO-004 AC: type=none has nothing to reset; print explanation but
4541
+ # exit 0 (not a failure — host execution is already as clean as it gets).
4542
+ info "isolation type 'none' has nothing to reset (host execution is stateless)" >&2
4543
+ return 0
4544
+ }
4545
+ _isolation_none_destroy() { return 0; }
4546
+
4547
+ # ─── reset lock (US-ISO-004) ──────────────────────────────────────────────
4548
+ # A single lockfile under .roll/ prevents two `roll test --reset` runs from
4549
+ # racing, and forces concurrent `roll test` test-execution paths to bail
4550
+ # fast rather than blocking on a half-rebuilt VM. --where is read-only and
4551
+ # deliberately bypasses the lock.
4552
+ _isolation_reset_lock_path() {
4553
+ echo ".roll/.iso-reset.lock"
4554
+ }
4555
+
4556
+ _isolation_reset_lock_held() {
4557
+ [[ -f "$(_isolation_reset_lock_path)" ]]
4558
+ }
4559
+
4560
+ # Returns 0 if the caller now holds the lock; 1 if someone else does.
4561
+ _isolation_reset_acquire_lock() {
4562
+ local lock; lock=$(_isolation_reset_lock_path)
4563
+ if [[ -f "$lock" ]]; then
4564
+ return 1
4565
+ fi
4566
+ mkdir -p "$(dirname "$lock")"
4567
+ echo "$$" > "$lock"
4568
+ return 0
4569
+ }
4570
+
4571
+ _isolation_reset_release_lock() {
4572
+ rm -f "$(_isolation_reset_lock_path)"
4573
+ }
4574
+
4575
+ # ── `tart` adapter (US-ISO-002 — macOS Apple Silicon VM via Tart) ─────────
4576
+ # Test override hooks (used by unit tests; default values keep prod stable):
4577
+ # _TART_VM_NAME — VM identifier (default: roll-dev-test)
4578
+ # _TART_BASE_IMAGE — OCI base image (default: cirruslabs macos-tahoe-base)
4579
+ # _TART_SSH_USER — SSH user inside the VM (default: admin)
4580
+
4581
+ _isolation_tart_vm_name() { printf '%s\n' "${_TART_VM_NAME:-roll-dev-test}"; }
4582
+ _isolation_tart_base_image() { printf '%s\n' "${_TART_BASE_IMAGE:-ghcr.io/cirruslabs/macos-tahoe-base:latest}"; }
4583
+ _isolation_tart_ssh_user() { printf '%s\n' "${_TART_SSH_USER:-admin}"; }
4584
+
4585
+ _isolation_tart_check_platform() {
4586
+ if [[ "$(uname)" != "Darwin" ]] || [[ "$(uname -m)" != "arm64" ]]; then
4587
+ err "Tart 仅支持 Apple Silicon macOS"
4588
+ err "Tart only supports Apple Silicon macOS"
4589
+ return 1
4590
+ fi
4591
+ return 0
4592
+ }
4593
+
4594
+ _isolation_tart_check_binary() {
4595
+ if ! command -v tart >/dev/null 2>&1; then
4596
+ err "tart binary not found"
4597
+ err " install via: brew install cirruslabs/cli/tart"
4598
+ return 1
4599
+ fi
4600
+ return 0
4601
+ }
4602
+
4603
+ # Returns 0 with the VM name on stdout when the VM is in `tart list`,
4604
+ # returns 1 silently otherwise. Caller decides what to do.
4605
+ _isolation_tart_vm_present() {
4606
+ local name; name=$(_isolation_tart_vm_name)
4607
+ tart list 2>/dev/null | awk -v n="$name" '$1 == n { found=1 } END { exit !found }'
4608
+ }
4609
+
4610
+ # Returns the VM's IP on stdout when reachable; exit non-zero when the VM
4611
+ # is stopped or `tart ip` fails for any other reason.
4612
+ _isolation_tart_ip() {
4613
+ local name; name=$(_isolation_tart_vm_name)
4614
+ local ip; ip=$(tart ip "$name" 2>/dev/null) || return 1
4615
+ [[ "$ip" =~ ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$ ]] || return 1
4616
+ printf '%s\n' "$ip"
4617
+ }
4618
+
4619
+ # Status state machine — see dev-vm-isolation-plan.md §4.
4620
+ # Returns one of: not-installed | stopped | running | ready
4621
+ _isolation_tart_status() {
4622
+ _isolation_tart_check_platform >/dev/null 2>&1 || { echo "not-installed"; return 0; }
4623
+ command -v tart >/dev/null 2>&1 || { echo "not-installed"; return 0; }
4624
+ _isolation_tart_vm_present || { echo "not-installed"; return 0; }
4625
+ local ip
4626
+ if ! ip=$(_isolation_tart_ip); then
4627
+ echo "stopped"
4628
+ return 0
4629
+ fi
4630
+ # VM up. Is it provisioned? A trivial SSH probe is the cheapest check.
4631
+ local user; user=$(_isolation_tart_ssh_user)
4632
+ if ssh -o BatchMode=yes -o ConnectTimeout=3 -o StrictHostKeyChecking=no \
4633
+ "${user}@${ip}" "true" >/dev/null 2>&1; then
4634
+ echo "ready"
4635
+ else
4636
+ echo "running"
4637
+ fi
4638
+ return 0
4639
+ }
4640
+
4641
+ # init: ensure the base image is cloned into our VM slot. Idempotent —
4642
+ # `tart clone` is skipped when the VM already exists.
4643
+ _isolation_tart_init() {
4644
+ _isolation_tart_check_platform || return 1
4645
+ _isolation_tart_check_binary || return 1
4646
+ local name; name=$(_isolation_tart_vm_name)
4647
+ if _isolation_tart_vm_present; then
4648
+ return 0
4649
+ fi
4650
+ local img; img=$(_isolation_tart_base_image)
4651
+ tart clone "$img" "$name"
4652
+ }
4653
+
4654
+ # provision: ensure runtime deps are installed inside the VM. Idempotent —
4655
+ # brew install no-ops for already-installed packages. Requires the VM to
4656
+ # be running with SSH responsive (caller's responsibility, usually exec).
4657
+ _isolation_tart_provision() {
4658
+ _isolation_tart_check_platform || return 1
4659
+ _isolation_tart_check_binary || return 1
4660
+ local ip; ip=$(_isolation_tart_ip) || { err "tart provision: VM not running"; return 1; }
4661
+ local user; user=$(_isolation_tart_ssh_user)
4662
+ ssh -o BatchMode=yes -o StrictHostKeyChecking=no \
4663
+ "${user}@${ip}" "brew list bats >/dev/null 2>&1 || brew install bats-core; \
4664
+ brew list node >/dev/null 2>&1 || brew install node; \
4665
+ brew list bash >/dev/null 2>&1 || brew install bash"
4666
+ }
4667
+
4668
+ # exec: run the command inside the VM. Auto-starts the VM if it's stopped.
4669
+ # Mounts the host worktree at /Volumes/My Shared Files/roll (Tart virtiofs).
4670
+ _isolation_tart_exec() {
4671
+ _isolation_tart_check_platform || return 1
4672
+ _isolation_tart_check_binary || return 1
4673
+ local name; name=$(_isolation_tart_vm_name)
4674
+ local ip
4675
+ if ! ip=$(_isolation_tart_ip); then
4676
+ # VM stopped — start it in the background with the repo mounted.
4677
+ local repo_root; repo_root="$(pwd -P)"
4678
+ tart run --dir="roll:${repo_root}" "$name" >/dev/null 2>&1 &
4679
+ # Wait up to ~30s for IP to come up.
4680
+ local i=0
4681
+ while (( i < 30 )); do
4682
+ if ip=$(_isolation_tart_ip); then break; fi
4683
+ sleep 1
4684
+ i=$((i + 1))
4685
+ done
4686
+ [[ -n "${ip:-}" ]] || { err "tart exec: VM failed to start in 30s"; return 1; }
4687
+ fi
4688
+ local user; user=$(_isolation_tart_ssh_user)
4689
+ ssh -o BatchMode=yes -o StrictHostKeyChecking=no "${user}@${ip}" "$@"
4690
+ }
4691
+
4692
+ # reset: stop, delete, re-clone from base image, then re-provision.
4693
+ # Target: ≤90s (caller's perception); actual depends on tart clone speed.
4694
+ # Clone is called directly (not via init) so the sequence is unconditional —
4695
+ # tart's own "VM exists" check still no-ops re-clone if delete didn't take.
4696
+ _isolation_tart_reset() {
4697
+ _isolation_tart_check_platform || return 1
4698
+ _isolation_tart_check_binary || return 1
4699
+ local name; name=$(_isolation_tart_vm_name)
4700
+ local img; img=$(_isolation_tart_base_image)
4701
+ tart stop "$name" 2>/dev/null || true
4702
+ tart delete "$name" 2>/dev/null || true
4703
+ tart clone "$img" "$name" || return 1
4704
+ _isolation_tart_provision || true # provision may fail mid-reset; surface
4705
+ # via subsequent status check.
4706
+ }
4707
+
4708
+ # destroy: stop + delete. Doesn't rebuild.
4709
+ _isolation_tart_destroy() {
4710
+ _isolation_tart_check_platform || return 1
4711
+ _isolation_tart_check_binary || return 1
4712
+ local name; name=$(_isolation_tart_vm_name)
4713
+ tart stop "$name" 2>/dev/null || true
4714
+ tart delete "$name" 2>/dev/null || true
4715
+ return 0
4716
+ }
4717
+
4718
+ # ─── cmd_test ────────────────────────────────────────────────────────────
4719
+ # US-ISO-003: `roll test` — runs the project's test suite through the
4720
+ # isolation dispatcher. The configured `test_isolation.type` determines
4721
+ # where the tests execute (host shell vs Tart VM). When type=tart and
4722
+ # the VM fails to start, the failure surfaces non-zero — no silent
4723
+ # fallback to host, since that would lie about where the tests ran.
4724
+
4725
+ # Print where the test suite will execute. Format is machine-readable
4726
+ # (one token, optionally with a colon-separated detail) so scripts can
4727
+ # parse it: `host`, `tart:<ip>`, `tart:stopped`, `tart:not-installed`, …
4728
+ _cmd_test_where() {
4729
+ local type; type=$(_isolation_get_type)
4730
+ case "$type" in
4731
+ none)
4732
+ echo "host"
4733
+ ;;
4734
+ tart)
4735
+ local st; st=$(_isolation_tart_status)
4736
+ case "$st" in
4737
+ ready|running)
4738
+ local ip
4739
+ if ip=$(_isolation_tart_ip 2>/dev/null); then
4740
+ echo "tart:${ip}"
4741
+ else
4742
+ echo "tart:${st}"
4743
+ fi
4744
+ ;;
4745
+ *)
4746
+ echo "tart:${st}"
4747
+ ;;
4748
+ esac
4749
+ ;;
4750
+ *)
4751
+ echo "unknown:${type}"
4752
+ ;;
4753
+ esac
4754
+ }
4755
+
4756
+ cmd_test() {
4757
+ # US-ISO-005: `--help` / `-h` anywhere in pre-`--` args shows help and
4758
+ # exits 0, so `roll test --reset --help` is a help lookup, not a reset.
4759
+ # Args appearing after `--` are forwarded verbatim and not intercepted.
4760
+ local _a
4761
+ for _a in "$@"; do
4762
+ case "$_a" in
4763
+ --) break ;;
4764
+ --help|-h) set -- --help; break ;;
4765
+ esac
4766
+ done
4767
+ case "${1:-}" in
4768
+ --help|-h)
4769
+ cat <<'EOF'
4770
+ Usage: roll test [--where | --reset] [--] [<extra-args>...]
4771
+
4772
+ Runs the project's test suite through the isolation adapter chosen in
4773
+ .roll/local.yaml:
4774
+
4775
+ test_isolation:
4776
+ type: none (default) Direct host execution — same shell as `npm test`.
4777
+ type: tart Inside the Apple-Silicon `roll-dev-test` Tart VM,
4778
+ so tests can't reach the host's launchd / shared
4779
+ roll state. Tart isn't auto-installed; run
4780
+ `brew install cirruslabs/cli/tart` first.
4781
+
4782
+ Flags:
4783
+ --where Print where tests will run, then exit (e.g. `host`,
4784
+ `tart:192.168.64.5`, `tart:stopped`).
4785
+ --reset Rebuild the isolation environment to a clean baseline.
4786
+ type=tart: stop → delete → clone → provision (~90s).
4787
+ type=none: prints a note and exits 0 (host is stateless).
4788
+ Holds a lockfile under .roll/.iso-reset.lock; concurrent
4789
+ `roll test` invocations fast-fail with a clear error.
4790
+ --help, -h Show this help.
4791
+
4792
+ Examples:
4793
+ roll test Run the suite in whatever the config says.
4794
+ roll test -- --tier=fast Forward arguments to npm test.
4795
+ roll test --where Don't run; just report routing.
4796
+ roll test --reset Rebuild the VM (or host no-op).
4797
+
4798
+ When type=tart and the VM can't be reached, the command exits non-zero
4799
+ rather than silently falling back to host execution.
4800
+ EOF
4801
+ return 0
4802
+ ;;
4803
+ --where)
4804
+ _cmd_test_where
4805
+ return 0
4806
+ ;;
4807
+ --reset)
4808
+ # Refuse if another reset is in progress — fast-fail beats blocking
4809
+ # on a half-rebuilt VM (US-ISO-004 AC).
4810
+ if _isolation_reset_lock_held; then
4811
+ err "roll test --reset: another reset is already in progress"
4812
+ echo " lock: $(_isolation_reset_lock_path) (delete manually if stale)" >&2
4813
+ return 1
4814
+ fi
4815
+ _isolation_reset_acquire_lock || {
4816
+ err "roll test --reset: failed to acquire reset lock"
4817
+ return 1
4818
+ }
4819
+ # Make sure the lock comes off no matter how dispatch exits.
4820
+ trap '_isolation_reset_release_lock' RETURN
4821
+ _isolation_dispatch reset
4822
+ return $?
4823
+ ;;
4824
+ --)
4825
+ shift
4826
+ ;;
4827
+ esac
4828
+
4829
+ # Test-execution path. If a reset is in progress, bail rather than racing
4830
+ # into a half-rebuilt VM — user can `roll test --where` to inspect state.
4831
+ if _isolation_reset_lock_held; then
4832
+ err "roll test: a reset is in progress (lock: $(_isolation_reset_lock_path))"
4833
+ echo " re-run once the reset completes, or delete the lockfile if stale" >&2
4834
+ return 1
4835
+ fi
4836
+
4837
+ # Pass remaining args through to npm test inside the configured adapter.
4838
+ _isolation_dispatch exec npm test "$@"
4839
+ }
4840
+
4409
4841
  # ═══════════════════════════════════════════════════════════════════════════════
4410
4842
  # LOOP — autonomous BACKLOG executor management
4411
4843
  # ═══════════════════════════════════════════════════════════════════════════════
@@ -4756,6 +5188,90 @@ fi
4756
5188
  # _SHARED_ROOT overrides and silently leaked test runs.jsonl writes into prod.
4757
5189
  _LOOP_RUNS="${_SHARED_ROOT}/loop/runs.jsonl"
4758
5190
  : "${_LOOP_MUTE_FILE:=${_SHARED_ROOT}/loop/mute-${_LOOP_PROJ_SLUG}}"
5191
+
5192
+ # ──────────────────────────────────────────────────────────────
5193
+ # US-LOOP-018: _loop_resolve_project_path + _loop_runtime_dir
5194
+ #
5195
+ # Two helpers that form the single source-of-truth for resolving
5196
+ # "where does this project's loop runtime data live?". All callers
5197
+ # (inner/outer runner templates, dashboard, GC, pause/resume, etc.)
5198
+ # must go through _loop_runtime_dir — never inline paths.
5199
+ #
5200
+ # Resolution order for _loop_resolve_project_path:
5201
+ # 1. macOS: launchd plist WorkingDirectory key (already used by
5202
+ # _status_loop_overview — implicit registry, no new state file)
5203
+ # 2. Linux: crontab entry (grep for "cd \"<path>\"") — best-effort
5204
+ # 3. Universal fallback: inner runner script grep for
5205
+ # export ROLL_MAIN_PROJECT="..." (inner scripts carry this;
5206
+ # outer runner scripts do not — pi peer review correction)
5207
+ # ──────────────────────────────────────────────────────────────
5208
+
5209
+ # _loop_resolve_project_path <slug>
5210
+ # Resolve a slug to its project directory path.
5211
+ # Returns 0 and prints path on success; returns 1 on failure.
5212
+ _loop_resolve_project_path() {
5213
+ local slug="$1"
5214
+
5215
+ # 1. macOS: launchd plist WorkingDirectory (primary source)
5216
+ if [[ "$(uname)" = "Darwin" ]]; then
5217
+ local plist_dir="${HOME}/Library/LaunchAgents"
5218
+ local plist="${plist_dir}/com.roll.loop.${slug}.plist"
5219
+ if [[ -f "$plist" ]]; then
5220
+ local proj; proj=$(awk '/<key>WorkingDirectory<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<string>|<\/string>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)
5221
+ if [[ -n "$proj" ]]; then
5222
+ echo "$proj"
5223
+ return 0
5224
+ fi
5225
+ fi
5226
+ fi
5227
+
5228
+ # 2. Linux: crontab entry (best-effort)
5229
+ if command -v crontab >/dev/null 2>&1; then
5230
+ local cron_line; cron_line=$(crontab -l 2>/dev/null | grep "run-${slug}.sh" | head -1)
5231
+ if [[ -n "$cron_line" ]]; then
5232
+ # Extract cd path: "cd \"<path>\" && ..."
5233
+ local proj; proj=$(echo "$cron_line" | sed -n 's/.*cd[[:space:]]*"\([^"]*\)".*/\1/p')
5234
+ if [[ -n "$proj" && -d "$proj" ]]; then
5235
+ echo "$proj"
5236
+ return 0
5237
+ fi
5238
+ fi
5239
+ fi
5240
+
5241
+ # 3. Universal fallback: inner runner script ROLL_MAIN_PROJECT export
5242
+ local inner_script="${_SHARED_ROOT:-${HOME}/.shared/roll}/loop/run-${slug}-inner.sh"
5243
+ if [[ -f "$inner_script" ]]; then
5244
+ local proj; proj=$(grep '^export ROLL_MAIN_PROJECT=' "$inner_script" 2>/dev/null | head -1 | sed 's/.*="\(.*\)"/\1/')
5245
+ if [[ -n "$proj" ]]; then
5246
+ echo "$proj"
5247
+ return 0
5248
+ fi
5249
+ fi
5250
+
5251
+ return 1
5252
+ }
5253
+
5254
+ # _loop_runtime_dir <slug>
5255
+ # Return the project's .roll/loop/ directory path.
5256
+ # Priority: ROLL_PROJECT_RUNTIME_DIR env → resolved project + .roll/loop
5257
+ # Returns 0 and prints path on success; returns 1 on failure.
5258
+ _loop_runtime_dir() {
5259
+ local slug="$1"
5260
+
5261
+ # 1. Environment override (test sandbox)
5262
+ if [[ -n "${ROLL_PROJECT_RUNTIME_DIR:-}" ]]; then
5263
+ echo "${ROLL_PROJECT_RUNTIME_DIR}"
5264
+ return 0
5265
+ fi
5266
+
5267
+ # 2. Resolve project path and append .roll/loop
5268
+ local proj; proj=$(_loop_resolve_project_path "$slug")
5269
+ if [[ -z "$proj" ]]; then
5270
+ return 1
5271
+ fi
5272
+ echo "${proj}/.roll/loop"
5273
+ }
5274
+
4759
5275
  # FIX-087: parallel to FIX-065's _SHARED_ROOT auto-sandbox above. Without this,
4760
5276
  # tests that source bin/roll (directly via BATS or indirectly via a runner-inner
4761
5277
  # fork under /tmp / /var/folders/) wrote plists into the developer's real
@@ -4827,17 +5343,15 @@ _loop_derive_minute() {
4827
5343
  echo $(( (hash_dec + offset) % 55 + 1 ))
4828
5344
  }
4829
5345
 
4830
- # US-LOOP-011: validate a (period, offset) pair against the allowed schedule spec.
4831
- # Allowed periods are the divisors of 60: 60/30/20/15/12/10/6/5.
4832
- # Offset must be within [0, period).
5346
+ # US-LOOP-032: validate a (period, offset) pair. Period 1–1440.
5347
+ # offset_minute is deprecated (US-LOOP-032); still accepted for backward
5348
+ # compat but plist generation ignores it (uses StartInterval = period*60).
4833
5349
  _loop_schedule_valid() {
4834
5350
  local period="$1" offset="$2"
4835
- case "$period" in
4836
- 60|30|20|15|12|10|6|5) ;;
4837
- *) return 1 ;;
4838
- esac
5351
+ [[ "$period" =~ ^[0-9]+$ ]] || return 1
5352
+ if (( period < 1 || period > 1440 )); then return 1; fi
4839
5353
  [[ "$offset" =~ ^[0-9]+$ ]] || return 1
4840
- if (( offset >= period )); then return 1; fi
5354
+ if (( offset >= 60 )); then return 1; fi
4841
5355
  return 0
4842
5356
  }
4843
5357
 
@@ -4860,6 +5374,11 @@ _loop_schedule_spec() {
4860
5374
  local_offset=$(awk '/^loop_schedule:/{found=1;next} found && /^[[:space:]]+offset_minute:/{print $2; exit}' "$local_file")
4861
5375
  if [[ -n "$local_period" && -n "$local_offset" ]]; then
4862
5376
  if _loop_schedule_valid "$local_period" "$local_offset"; then
5377
+ # US-LOOP-032: offset_minute is deprecated when period doesn't divide 60.
5378
+ # Warn but don't fail.
5379
+ if (( 60 % local_period != 0 )) && [[ "$local_offset" != "0" ]]; then
5380
+ echo "roll: warning: offset_minute is deprecated (period=${local_period}, offset=${local_offset}). Use period_minutes only." >&2
5381
+ fi
4863
5382
  echo "$local_period $local_offset"
4864
5383
  return 0
4865
5384
  fi
@@ -4868,11 +5387,11 @@ _loop_schedule_spec() {
4868
5387
  local alert_file="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/ALERT-${slug}.md"
4869
5388
  mkdir -p "$(dirname "$alert_file")" 2>/dev/null || true
4870
5389
  {
4871
- printf '## ⚠️ US-LOOP-011: Invalid loop_schedule\n\n'
5390
+ printf '## ⚠️ US-LOOP-032: Invalid loop_schedule\n\n'
4872
5391
  printf '**Time**: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')"
4873
5392
  printf '**Source**: %s\n\n' "${project_path}/.roll/local.yaml"
4874
5393
  printf '**Values**: period_minutes=%s, offset_minute=%s\n\n' "$local_period" "$local_offset"
4875
- printf '**Action**: period must be one of 60/30/20/15/12/10/6/5; offset must be 0–(period-1). Falling back to default (period=60).\n\n'
5394
+ printf '**Action**: period must be 1–1440; offset must be 0–59. Falling back to default (period=60).\n\n'
4876
5395
  printf '%s\n' '---'
4877
5396
  } >> "$alert_file"
4878
5397
  fi
@@ -4892,30 +5411,39 @@ _loop_schedule_spec() {
4892
5411
  echo "60 $offset"
4893
5412
  }
4894
5413
 
4895
- # US-LOOP-013: human-readable schedule description for display.
5414
+ # US-LOOP-032: human-readable schedule description.
4896
5415
  # Args: period offset [lang]
4897
5416
  # lang: en (default) or zh
5417
+ # For periods that divide 60, shows clock-aligned slots (e.g. "every 30min (:00 :30)").
5418
+ # For non-divisor periods (US-LOOP-032), shows just the interval (e.g. "every 45min").
4898
5419
  _loop_schedule_desc() {
4899
5420
  local period="$1" offset="$2" lang="${3:-en}"
4900
5421
  if [[ "$period" -eq 60 ]]; then
4901
5422
  if [[ "$lang" == "zh" ]]; then
4902
- # msg_lang uses the explicit lang param, not ROLL_LANG env; strips trailing
4903
- # newline via command substitution so callers get a clean string.
4904
5423
  printf '%s' "$(msg_lang "$lang" agent.hourly_at_02d "$offset")"
4905
5424
  else
4906
5425
  printf "every hour :%02d" "$offset"
4907
5426
  fi
4908
5427
  return 0
4909
5428
  fi
4910
- local times="" slots=$((60 / period)) i m
4911
- for i in $(seq 0 $((slots - 1))); do
4912
- m=$((offset + i * period))
4913
- times="${times} :$(printf '%02d' "$m")"
4914
- done
4915
- if [[ "$lang" == "zh" ]]; then
4916
- printf '%s' "$(msg_lang "$lang" agent.every_d_min_s "$period" "${times# }")"
5429
+ # US-LOOP-032: only show clock-aligned slots when period divides 60
5430
+ if (( 60 % period == 0 )); then
5431
+ local times="" slots=$((60 / period)) i m
5432
+ for i in $(seq 0 $((slots - 1))); do
5433
+ m=$((offset + i * period))
5434
+ times="${times} :$(printf '%02d' "$m")"
5435
+ done
5436
+ if [[ "$lang" == "zh" ]]; then
5437
+ printf '%s' "$(msg_lang "$lang" agent.every_d_min_s "$period" "${times# }")"
5438
+ else
5439
+ printf "every %dmin (%s)" "$period" "${times# }"
5440
+ fi
4917
5441
  else
4918
- printf "every %dmin (%s)" "$period" "${times# }"
5442
+ if [[ "$lang" == "zh" ]]; then
5443
+ printf '每%d分鐘' "$period"
5444
+ else
5445
+ printf "every %dmin" "$period"
5446
+ fi
4919
5447
  fi
4920
5448
  }
4921
5449
 
@@ -4928,7 +5456,16 @@ _loop_event() {
4928
5456
  local ts slug evfile json
4929
5457
  ts=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
4930
5458
  slug=$(_project_slug 2>/dev/null || basename "$PWD")
4931
- evfile="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/events-${slug}.ndjson"
5459
+ # US-LOOP-020: prefer project-local .roll/loop/; fallback to shared root
5460
+ # for transient slugs (worktree cleanup, orphan recovery) where
5461
+ # _loop_runtime_dir cannot resolve a project path.
5462
+ local _rt_dir
5463
+ _rt_dir=$(_loop_runtime_dir "$slug" 2>/dev/null || echo "")
5464
+ if [ -n "$_rt_dir" ]; then
5465
+ evfile="${_rt_dir}/events.ndjson"
5466
+ else
5467
+ evfile="${_SHARED_ROOT:-$HOME/.shared/roll}/loop/events-${slug}.ndjson"
5468
+ fi
4932
5469
  # FIX-065 tripwire: in a test context (BATS or temp cwd), refuse to write
4933
5470
  # into production ~/.shared/roll/. Catching this in code is the last line
4934
5471
  # of defense if some unusual path bypassed the auto-sandbox at source-time.
@@ -5095,35 +5632,18 @@ _write_launchd_plist() {
5095
5632
  # FIX-105: macOS 26.4 launchd silently refuses to fire StartCalendarInterval
5096
5633
  # entries that contain BOTH Hour and Minute keys. Daily services use
5097
5634
  # StartInterval=86400 instead.
5098
- # US-LOOP-012: when period < 60 and no hour, generate StartCalendarInterval
5099
- # <array> with one <dict> per trigger minute.
5635
+ # US-LOOP-032: all loop services use StartInterval = period * 60 (was
5636
+ # StartCalendarInterval with slot enumeration). Non-divisor periods are now
5637
+ # supported.
5100
5638
  local schedule_xml
5101
5639
  if [[ -n "$hour" ]]; then
5102
5640
  schedule_xml=" <key>StartInterval</key>
5103
5641
  <integer>86400</integer>"
5104
- elif [[ "$period" == "60" ]]; then
5105
- schedule_xml=" <key>StartCalendarInterval</key>
5106
- <dict>
5107
- <key>Minute</key>
5108
- <integer>${offset}</integer>
5109
- </dict>"
5110
5642
  else
5111
- # US-LOOP-012: period < 60 generate array of dicts
5112
- local entries=$(( 60 / period ))
5113
- local xml_lines=" <key>StartCalendarInterval</key>
5114
- <array>"
5115
- local i m
5116
- for ((i = 0; i < entries; i++)); do
5117
- m=$(( offset + i * period ))
5118
- xml_lines+="
5119
- <dict>
5120
- <key>Minute</key>
5121
- <integer>${m}</integer>
5122
- </dict>"
5123
- done
5124
- xml_lines+="
5125
- </array>"
5126
- schedule_xml="$xml_lines"
5643
+ # US-LOOP-032: StartInterval in seconds = period_minutes * 60
5644
+ local interval=$(( period * 60 ))
5645
+ schedule_xml=" <key>StartInterval</key>
5646
+ <integer>${interval}</integer>"
5127
5647
  fi
5128
5648
 
5129
5649
  local content
@@ -5180,6 +5700,10 @@ _write_loop_runner_script() {
5180
5700
  # Use stream-json + formatter: --verbose alone does nothing in -p mode;
5181
5701
  # stream-json enables realtime streaming; loop-fmt.py humanizes the events.
5182
5702
  local fmt_script="${ROLL_PKG_DIR}/lib/loop-fmt.py"
5703
+ # US-LOOP-026: post-cycle single-shot usage writer for non-claude agents.
5704
+ # pi -p text mode prints no usage, so we recover it from pi's session jsonl
5705
+ # exactly once per cycle (loop-fmt passthrough is display-only).
5706
+ local pi_emit_script="${ROLL_PKG_DIR}/lib/agent_usage/pi_emit.py"
5183
5707
  local roll_bin="${ROLL_PKG_DIR}/bin/roll"
5184
5708
  # FIX-041: loop cycle is autonomous — permission prompts and sandbox path
5185
5709
  # restrictions only cause the cycle to burn turns asking for approvals
@@ -5352,10 +5876,22 @@ _runs_append() {
5352
5876
  local _rid="loop-\${_cid%-*}"
5353
5877
  grep -qF "\"run_id\":\"\$_rid\"" "\$_runs_dst" 2>/dev/null && return 0
5354
5878
  mkdir -p "\$(dirname "\$_runs_dst")"
5879
+ # FIX-123: clean stale .tmp files from dead PIDs on entry.
5880
+ # A .tmp residue means a previous atomic write was interrupted.
5881
+ # Only clean files whose PID (extracted from suffix) is dead.
5882
+ local _tmp_dir; _tmp_dir="\$(dirname "\$_runs_dst")"
5883
+ for _stale in "\$_tmp_dir"/runs.jsonl.tmp.*; do
5884
+ [ -f "\$_stale" ] || continue
5885
+ local _stale_pid="\${_stale##*.tmp.}"
5886
+ kill -0 "\$_stale_pid" 2>/dev/null || rm -f "\$_stale"
5887
+ done
5355
5888
  local _ts_now; _ts_now=\$(date -u +%Y-%m-%dT%H:%M:%SZ)
5356
5889
  local _start="\${CYCLE_START:-\$(date -u +%s)}"
5357
5890
  local _dur=\$(( \$(date -u +%s) - _start ))
5358
5891
  [ "\$_dur" -lt 0 ] && _dur=0
5892
+ # FIX-123: atomic write — write to .tmp.$$ first, then cat >> to append,
5893
+ # then remove. If interrupted between jq and rm, the next call cleans it.
5894
+ local _tmp="\$_runs_dst.tmp.\$\$"
5359
5895
  jq -nc \\
5360
5896
  --arg ts "\$_ts_now" \\
5361
5897
  --arg project "${slug}" \\
@@ -5372,7 +5908,9 @@ _runs_append() {
5372
5908
  cycle_id:\$cycle_id,
5373
5909
  built:\$built, skipped:\$skipped, alerts:\$alerts,
5374
5910
  tcr_count:\$tcr_count, duration_sec:\$duration_sec, phases:\$phases}' \\
5375
- >> "\$_runs_dst" 2>/dev/null || true
5911
+ > "\$_tmp" 2>/dev/null || { rm -f "\$_tmp"; return 0; }
5912
+ cat "\$_tmp" >> "\$_runs_dst" 2>/dev/null || true
5913
+ rm -f "\$_tmp"
5376
5914
  }
5377
5915
  _inner_cleanup() {
5378
5916
  local _rc=\$?
@@ -5661,6 +6199,25 @@ else
5661
6199
  _phase_end agent_invoke ok
5662
6200
  fi
5663
6201
 
6202
+ # US-LOOP-026: non-claude agents (pi/deepseek/kimi) print no usage in -p text
6203
+ # mode. Recover token+cost once per cycle from the agent's session jsonl and
6204
+ # append a single authoritative usage event. Done here (not in loop-fmt's
6205
+ # per-attempt passthrough) so retries can't write N duplicate events that the
6206
+ # dashboard's same-label SUM would inflate. Runs before the timeout-abort exit
6207
+ # so partial cycles still get whatever usage the session recorded. The events
6208
+ # path is resolved exactly like _loop_event (rt_dir first, shared fallback) so
6209
+ # pi_emit appends to the same file the reader consumes.
6210
+ if [ "\$(_project_agent)" != "claude" ] && [ -f "${pi_emit_script}" ]; then
6211
+ _pi_rt=\$(_loop_runtime_dir "${slug}" 2>/dev/null || echo "")
6212
+ if [ -n "\$_pi_rt" ]; then
6213
+ _pi_evfile="\${_pi_rt}/events.ndjson"
6214
+ else
6215
+ _pi_evfile="\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop/events-${slug}.ndjson"
6216
+ fi
6217
+ python3 "${pi_emit_script}" --cwd "\$WT" --cycle "\${CYCLE_ID}" \\
6218
+ --slug "${slug}" --events "\$_pi_evfile" 2>/dev/null || true
6219
+ fi
6220
+
5664
6221
  # FIX-057: timed out — skip publish; EXIT trap writes cycle_end blocked + ALERT.
5665
6222
  if [ "\$_CYCLE_TIMED_OUT" -eq 1 ]; then
5666
6223
  echo "[loop] cycle \${CYCLE_ID}: \${LOOP_CYCLE_TIMEOUT_SEC}s timeout — aborting cycle (worktree preserved at \$WT)"
@@ -5827,7 +6384,7 @@ if [ -z "\$ROLL_LOOP_FORCE" ]; then
5827
6384
  if [ "\$h" -lt ${active_start} ] || [ "\$h" -ge ${active_end} ]; then exit 0; fi
5828
6385
  fi
5829
6386
  # Pause check — 'roll loop pause' creates this marker to suspend scheduling
5830
- PAUSE="\$HOME/.shared/roll/loop/PAUSE-${slug}"
6387
+ PAUSE="\${_SHARED_ROOT:-\${HOME}/.shared/roll}/loop/PAUSE-${slug}"
5831
6388
  if [ -z "\$ROLL_LOOP_FORCE" ] && [ -f "\$PAUSE" ]; then exit 0; fi
5832
6389
  # FIX-037: orphan state detection & self-heal — if state.yaml says running
5833
6390
  # but no LOCK process or tmux session exists, the previous cycle was killed
@@ -5916,8 +6473,8 @@ if command -v tmux >/dev/null 2>&1; then
5916
6473
  # that triggered LaunchServices "where is <app>" prompts when the active
5917
6474
  # process name differed from its .app bundle name (e.g. MSTeams vs
5918
6475
  # Microsoft Teams.app).
5919
- if [ -z "\${ROLL_LOOP_NO_POPUP:-}" ] && [ -z "\${BATS_TEST_NUMBER:-}" ] && [ ! -f "\$HOME/.shared/roll/loop/mute-${slug}" ] && [ "\$(uname)" = "Darwin" ]; then
5920
- _attach_cmd="\$HOME/.shared/roll/loop/attach-\$SESSION.command"
6476
+ if [ -z "\${ROLL_LOOP_NO_POPUP:-}" ] && [ -z "\${BATS_TEST_NUMBER:-}" ] && [ ! -f "\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop/mute-${slug}" ] && [ "\$(uname)" = "Darwin" ]; then
6477
+ _attach_cmd="\${_SHARED_ROOT:-\$HOME/.shared/roll}/loop/attach-\$SESSION.command"
5921
6478
  # Drop \`exec\` so the wrapping shell survives \`tmux attach\` exiting,
5922
6479
  # then \`read\` to hold the Terminal open until the user has had a
5923
6480
  # chance to scroll back through the cycle's output. Without this the
@@ -6115,7 +6672,7 @@ _agent_skill_cmd() {
6115
6672
  local agent; agent=$(_project_agent)
6116
6673
  local strip="awk 'NR==1 && /^---$/{skip=1;next} skip && /^---$/{skip=0;next} !skip{print}' '${skill_path}'"
6117
6674
  _agent_argv "$agent" plain "__PROMPT__" || {
6118
- err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|codex|opencode>"
6675
+ err "Unknown agent '${agent}'. Run: roll agent use <claude|kimi|deepseek|pi|openai|codex|opencode|qwen|gemini>"
6119
6676
  return 1
6120
6677
  }
6121
6678
  # Cron-installed skills (dream / brief / loop) run autonomously and need to
@@ -6152,12 +6709,13 @@ cmd_loop() {
6152
6709
  pause) _loop_pause ;;
6153
6710
  resume) _loop_resume ;;
6154
6711
  reset) _loop_reset ;;
6712
+ gc) shift; _loop_gc "$@" ;;
6155
6713
  notify) _notify "${1:-roll}" "${2:-}" ;;
6156
6714
  enforce-tcr) _loop_enforce_tcr "${1:-}" "${2:-}" ;;
6157
6715
  precheck-ci) _loop_precheck_ci ;;
6158
6716
  branches) _loop_branches "$(pwd -P)" ;;
6159
6717
  *) cat <<'HELP'
6160
- Usage: roll loop <on|off|now|test|status|monitor|runs|log|story|events|attach|mute|unmute|pause|resume|reset|notify|enforce-tcr|precheck-ci|branches>
6718
+ Usage: roll loop <on|off|now|test|status|monitor|runs|log|story|events|attach|mute|unmute|pause|resume|reset|gc|branches>
6161
6719
 
6162
6720
  on Install launchd scheduler (loop + dream + brief)
6163
6721
  off Remove launchd scheduler
@@ -6175,15 +6733,19 @@ Usage: roll loop <on|off|now|test|status|monitor|runs|log|story|events|attach|mu
6175
6733
  pause Pause scheduling (keep plist, skip execution)
6176
6734
  resume Resume scheduling after pause
6177
6735
  reset Clear loop state (start fresh on next fire)
6736
+ gc [--dry-run] [--keep-days N]
6737
+ Garbage-collect orphan slugs, tmp debris, expired backups
6738
+ branches List loop-related branches
6739
+
6740
+ Internal (called by roll-loop SKILL):
6178
6741
  notify Send macOS notification
6179
6742
  enforce-tcr Verify TCR commit count for a completed story
6180
6743
  precheck-ci Check HEAD CI status before scanning BACKLOG
6181
- branches List loop-related branches
6182
6744
 
6183
6745
  Schedule is configured per-project in .roll/local.yaml:
6184
6746
 
6185
6747
  loop_schedule:
6186
- period_minutes: 30 # 60, 30, 20, 15, 12, 10, 6, or 5
6748
+ period_minutes: 30 # 1-1440 (any minute interval)
6187
6749
  offset_minute: 7 # 0 – (period_minutes - 1)
6188
6750
 
6189
6751
  See guide/en/loop.md for full documentation.
@@ -6627,6 +7189,152 @@ _loop_attach() {
6627
7189
  exec tmux attach -t "$session"
6628
7190
  }
6629
7191
 
7192
+ # FIX-125: detect whether we are running inside a loop cycle. Cycle context
7193
+ # is signalled by env vars exported by the cycle runner (ROLL_LOOP_AGENT,
7194
+ # bin/roll:5736) or by the outer cycle script (ROLL_CYCLE_LOG_RAW,
7195
+ # bin/roll:6044). Used by callers that touch canonical ${HOME}/Library/LaunchAgents
7196
+ # directly (_loop_gc, cmd_offboard) to refuse host-loop mutations from inside
7197
+ # a cycle. Read-only ops are unaffected.
7198
+ _loop_in_cycle() {
7199
+ [[ -n "${ROLL_LOOP_AGENT:-}" || -n "${ROLL_CYCLE_LOG_RAW:-}" ]]
7200
+ }
7201
+
7202
+ # US-LOOP-021: garbage-collect orphan slugs, tmp debris, and expired backups.
7203
+ # Usage: _loop_gc [--dry-run] [--keep-days N]
7204
+ # Keeps backups/migrated files within N days (default 30).
7205
+ # Retention order: ROLL_LOOP_GC_RETENTION_DAYS env > .roll/local.yaml loop_gc.retention_days > 30.
7206
+ _loop_gc() {
7207
+ # FIX-125: refuse from inside a loop cycle. Phase 1 below scans/mutates
7208
+ # ${HOME}/Library/LaunchAgents directly (bin/roll:6814,6847) — running it
7209
+ # from a cycle would let one project's tick remove another project's plist
7210
+ # under the host's launchd domain. Read-only ops (status, runs) are
7211
+ # unaffected; only the GC mutator is gated.
7212
+ if _loop_in_cycle; then
7213
+ echo "roll loop gc: refusing — cycle-context tripwire (FIX-125)" >&2
7214
+ echo " This command scans ~/Library/LaunchAgents directly. Running it" >&2
7215
+ echo " from inside a loop cycle is a known host-state corruption path." >&2
7216
+ return 1
7217
+ fi
7218
+
7219
+ local dry_run=false
7220
+ local keep_days=30
7221
+
7222
+ # Parse arguments
7223
+ while [[ $# -gt 0 ]]; do
7224
+ case "$1" in
7225
+ --dry-run) dry_run=true; shift ;;
7226
+ --keep-days) keep_days="$2"; shift 2 ;;
7227
+ *) shift ;;
7228
+ esac
7229
+ done
7230
+
7231
+ # Retention config: env > local.yaml > default
7232
+ if [[ -n "${ROLL_LOOP_GC_RETENTION_DAYS:-}" ]]; then
7233
+ keep_days="$ROLL_LOOP_GC_RETENTION_DAYS"
7234
+ elif [[ -f .roll/local.yaml ]]; then
7235
+ local yaml_val; yaml_val=$(_config_read_int "loop_gc.retention_days" "" 2>/dev/null || true)
7236
+ [[ -n "$yaml_val" ]] && keep_days="$yaml_val"
7237
+ fi
7238
+
7239
+ local loop_dir="${_SHARED_ROOT:-${HOME}/.shared/roll}/loop"
7240
+ # Always use canonical LaunchAgents — auto-sandbox only redirects writes.
7241
+ local plist_dir="${HOME}/Library/LaunchAgents"
7242
+ local now_epoch; now_epoch=$(date +%s)
7243
+ local gc_count=0
7244
+
7245
+ # ── Phase 1: orphan slug detection ──
7246
+ local slug plist proj
7247
+ for plist in "$plist_dir"/com.roll.loop.*.plist; do
7248
+ [[ -f "$plist" ]] || continue
7249
+ local fname; fname=$(basename "$plist" .plist)
7250
+ # Extract slug: com.roll.loop.<slug> → <slug>
7251
+ slug="${fname#com.roll.loop.}"
7252
+ [[ -z "$slug" ]] && continue
7253
+
7254
+ # Resolve project path from plist WorkingDirectory key
7255
+ # (inlined from _loop_resolve_project_path — US-LOOP-018)
7256
+ proj=$(awk '/<key>WorkingDirectory<\/key>/{f=1;next} f{gsub(/^[[:space:]]*<string>|<\/string>[[:space:]]*$/,"");print;exit}' "$plist" 2>/dev/null)
7257
+
7258
+ if [[ -z "$proj" || ! -d "$proj" ]]; then
7259
+ # Expired slug — project directory doesn't exist
7260
+ local ts; ts=$(date +%Y%m%dT%H%M%S)
7261
+ local archive_dir="${loop_dir}/archived/${slug}-${ts}"
7262
+
7263
+ if $dry_run; then
7264
+ echo "[DRY-RUN] orphan slug: $slug (project not found)"
7265
+ gc_count=$((gc_count + 1))
7266
+ continue
7267
+ fi
7268
+
7269
+ echo "gc: archiving orphan slug $slug"
7270
+ mkdir -p "$archive_dir"
7271
+
7272
+ # Move runner scripts
7273
+ for f in "${loop_dir}/run-${slug}.sh" \
7274
+ "${loop_dir}/run-${slug}-inner.sh" \
7275
+ "${loop_dir}/attach-roll-loop-${slug}.command"; do
7276
+ [[ -f "$f" ]] && mv "$f" "$archive_dir/"
7277
+ done
7278
+
7279
+ # Move plist (it's already unloaded since project dir is gone)
7280
+ [[ -f "$plist" ]] && mv "$plist" "$archive_dir/"
7281
+
7282
+ gc_count=$((gc_count + 1))
7283
+ fi
7284
+ done
7285
+
7286
+ # ── Phase 2: tmp debris cleanup ──
7287
+
7288
+ # runs.jsonl.tmp.* — always safe to remove (FIX-123 cleans stale on entry,
7289
+ # but files from cycles that hard-crashed without the FIX-123 trap may remain)
7290
+ local f
7291
+ for f in "$loop_dir"/runs.jsonl.tmp.*; do
7292
+ [[ -f "$f" ]] || continue
7293
+ if $dry_run; then
7294
+ echo "[DRY-RUN] tmp debris: $(basename "$f")"
7295
+ else
7296
+ rm -f "$f"
7297
+ fi
7298
+ gc_count=$((gc_count + 1))
7299
+ done
7300
+
7301
+ # backup-before-merge-*.tgz older than 5 days
7302
+ local cutoff_5d; cutoff_5d=$((now_epoch - 5 * 86400))
7303
+ for f in "$loop_dir"/backup-before-merge-*.tgz; do
7304
+ [[ -f "$f" ]] || continue
7305
+ local mtime; mtime=$(stat -f %m "$f" 2>/dev/null || stat -c %Y "$f" 2>/dev/null || echo 0)
7306
+ if [[ "$mtime" -lt "$cutoff_5d" ]]; then
7307
+ if $dry_run; then
7308
+ echo "[DRY-RUN] old backup: $(basename "$f")"
7309
+ else
7310
+ rm -f "$f"
7311
+ fi
7312
+ gc_count=$((gc_count + 1))
7313
+ fi
7314
+ done
7315
+
7316
+ # *.migrated-* older than 7 days
7317
+ local cutoff_7d; cutoff_7d=$((now_epoch - 7 * 86400))
7318
+ for f in "$loop_dir"/*.migrated-*; do
7319
+ [[ -f "$f" ]] || continue
7320
+ local mtime; mtime=$(stat -f %m "$f" 2>/dev/null || stat -c %Y "$f" 2>/dev/null || echo 0)
7321
+ if [[ "$mtime" -lt "$cutoff_7d" ]]; then
7322
+ if $dry_run; then
7323
+ echo "[DRY-RUN] old migrated: $(basename "$f")"
7324
+ else
7325
+ rm -f "$f"
7326
+ fi
7327
+ gc_count=$((gc_count + 1))
7328
+ fi
7329
+ done
7330
+
7331
+ if $dry_run; then
7332
+ echo "gc: dry-run complete ($gc_count items would be cleaned)"
7333
+ else
7334
+ echo "gc: $gc_count items cleaned, keep-days=$keep_days"
7335
+ fi
7336
+ }
7337
+
6630
7338
  # Pretty-print a duration in seconds as "Xs" / "Ym" / "Yh Zm".
6631
7339
  # US-VIEW-019: compute slowest phase + % from a JSON line's phases object.
6632
7340
  # Returns "<abbr> <pct>%" (e.g. "claude 97%") or empty when no phases data.
@@ -7487,6 +8195,18 @@ _loop_pr_rebase_stale() {
7487
8195
  return 0
7488
8196
  }
7489
8197
 
8198
+ # _loop_pr_merge_self <num> <ci_state> <mergeable> <slug>
8199
+ # Merge a loop_self PR directly when CI is green and PR is conflict-free.
8200
+ # Does not rely on repo-level auto-merge (unreliable if not configured).
8201
+ # Same pattern as the bot_review=APPROVED gate.
8202
+ _loop_pr_merge_self() {
8203
+ local num="$1" ci_state="$2" mergeable="$3" slug="$4"
8204
+ [ "$ci_state" = "success" ] && [ "$mergeable" = "MERGEABLE" ] || return 0
8205
+ gh -R "$slug" pr merge "$num" --squash --delete-branch >/dev/null 2>&1 \
8206
+ && info "PR #${num}: loop_self CI green — merged" \
8207
+ || warn "PR #${num}: loop_self merge failed — left open"
8208
+ }
8209
+
7490
8210
  # _loop_pr_inbox
7491
8211
  # Walks open PRs and routes each by classification.
7492
8212
  # Lenient on gh unavailability — returns 0 so the loop continues to BACKLOG.
@@ -7551,7 +8271,10 @@ _loop_pr_inbox() {
7551
8271
  verdict=$(_loop_pr_classify "$head_ref" "$human_review" "$ci_state" "$mergeable")
7552
8272
 
7553
8273
  case "$verdict" in
7554
- loop_self|blocked_human_request_changes|blocked_human_approved)
8274
+ loop_self)
8275
+ _loop_pr_merge_self "$num" "$ci_state" "$mergeable" "$slug"
8276
+ ;;
8277
+ blocked_human_request_changes|blocked_human_approved)
7555
8278
  : # skip — explained by verdict; nothing to do this cycle
7556
8279
  ;;
7557
8280
  stale)
@@ -8304,6 +9027,24 @@ _loop_publish_doc_pr() {
8304
9027
  #
8305
9028
  # Lenient: returns 0 when gh is missing, slug is unresolvable, jq is
8306
9029
  # missing, or runs.jsonl does not exist. Atomic rewrite via temp file.
9030
+
9031
+ # FIX-123: clean stale runs.jsonl .tmp orphan files from dead pids.
9032
+ # Called at every runs.jsonl write entry point so orphans never accumulate.
9033
+ # Optional $1: directory to scan. Defaults to dirname of $_LOOP_RUNS.
9034
+ _loop_cleanup_stale_runs_tmp() {
9035
+ local _dir="${1:-$(dirname "${_LOOP_RUNS:-${HOME}/.shared/roll/loop/runs.jsonl}")}"
9036
+ [ -d "$_dir" ] || return 0
9037
+ local _f _pid
9038
+ for _f in "$_dir"/runs.jsonl.tmp.*; do
9039
+ [ -f "$_f" ] || continue
9040
+ _pid="${_f##*.tmp.}"
9041
+ [ -z "$_pid" ] && continue
9042
+ # shellcheck disable=SC2009
9043
+ kill -0 "$_pid" 2>/dev/null && continue
9044
+ rm -f "$_f"
9045
+ done
9046
+ }
9047
+
8307
9048
  _loop_backfill_merged() {
8308
9049
  local runs_path="${1:-${HOME}/.shared/roll/loop/runs.jsonl}"
8309
9050
  [ -f "$runs_path" ] || return 0
@@ -8311,7 +9052,9 @@ _loop_backfill_merged() {
8311
9052
  command -v jq >/dev/null 2>&1 || return 0
8312
9053
  local slug; _gh_resolve slug || return 0
8313
9054
 
9055
+ _loop_cleanup_stale_runs_tmp "$(dirname "$runs_path")"
8314
9056
  local tmp="${runs_path}.tmp.$$"
9057
+ trap "rm -f '$tmp'" EXIT
8315
9058
  : > "$tmp"
8316
9059
  local line status cycle_id branch view_json state merged_at merge_commit
8317
9060
  while IFS= read -r line; do
@@ -8342,6 +9085,7 @@ _loop_backfill_merged() {
8342
9085
  >> "$tmp" 2>/dev/null || printf '%s\n' "$line" >> "$tmp"
8343
9086
  done < "$runs_path"
8344
9087
  mv "$tmp" "$runs_path" 2>/dev/null || rm -f "$tmp"
9088
+ trap - EXIT
8345
9089
  return 0
8346
9090
  }
8347
9091
 
@@ -9553,6 +10297,7 @@ main() {
9553
10297
  doctor) cmd_doctor "$@" ;;
9554
10298
  review-pr) cmd_review_pr "$@" ;;
9555
10299
  slides) cmd_slides "$@" ;;
10300
+ test) cmd_test "$@" ;;
9556
10301
  prices) cmd_prices "$@" ;;
9557
10302
  changelog) cmd_changelog "$@" ;;
9558
10303
  version|--version|-v) echo "roll v${VERSION}" ;;