npm - ralph-lisa-loop - Versions diffs - 0.3.11 → 0.3.12 - Mend

ralph-lisa-loop 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/README.md +1 -1
package/dist/cli.js +10 -0
package/dist/commands.d.ts +7 -0
package/dist/commands.js +301 -76
package/dist/policy.js +24 -0
package/package.json +1 -1
package/templates/claude-commands/check-turn.md +1 -1
package/templates/claude-commands/submit-work.md +1 -1
package/templates/codex-skills/check-turn.md +2 -2
package/templates/roles/lisa.md +18 -10
package/templates/roles/ralph.md +34 -12
package/templates/skill.json +1 -1

package/README.md CHANGED Viewed

@@ -40,7 +40,7 @@ Ralph writes → Lisa reviews → Consensus → Next step
 - **Round 1 Mandatory Plan** — Ralph must submit `[PLAN]` first for Lisa to verify understanding
 - **Goal Guardian** — Lisa checks for direction drift before every review
 - **Mid-Session Task Update** — Change direction without restarting
-- **Deadlock Escape** — After 5 rounds: `[OVERRIDE]` or `[HANDOFF]` to human
+- **Deadlock Detection** — After 8 consecutive `[NEEDS_WORK]` rounds, watcher auto-pauses for user intervention
 - **Minimal Init** — Zero-intrusion mode with plugin/global config architecture
 ## Essential Commands

package/dist/cli.js CHANGED Viewed

@@ -92,6 +92,12 @@ switch (cmd) {
     case "stop":
         (0, commands_js_1.cmdStop)(rest);
         break;
+    case "emergency-msg":
+        (0, commands_js_1.cmdEmergencyMsg)(rest);
+        break;
+    case "notify":
+        (0, commands_js_1.cmdNotify)(rest);
+        break;
     case "help":
     case "--help":
     case "-h":
@@ -173,6 +179,10 @@ function showHelp() {
     console.log("  ralph-lisa remote --auth user:pass   Enable basic auth");
     console.log("  ralph-lisa remote --stop             Stop ttyd server");
     console.log("");
+    console.log("Emergency & Notifications:");
+    console.log('  ralph-lisa emergency-msg <agent> "msg"  Send emergency message to agent pane');
+    console.log('  ralph-lisa notify "message"              Send notification via RL_NOTIFY_CMD');
+    console.log("");
     console.log("Diagnostics:");
     console.log("  ralph-lisa state-dir                Show state directory resolution");
     console.log("  ralph-lisa state-dir /path           Set state directory (tmux env)");

package/dist/commands.d.ts CHANGED Viewed

@@ -2,6 +2,11 @@
  * CLI commands for Ralph-Lisa Loop.
  * Direct port of io.sh logic to Node/TS.
  */
+/**
+ * Send a notification to the user via RL_NOTIFY_CMD.
+ * Notification failure must not block main flow.
+ */
+export declare function notifyUser(message: string): void;
 /**
  * Generate a project-specific tmux session name to avoid conflicts
  * when running multiple projects simultaneously.
@@ -51,4 +56,6 @@ export declare function cmdRemote(args: string[]): void;
 export declare function cmdStateDir(args: string[]): void;
 export declare function cmdAddContext(args: string[]): void;
 export declare function cmdDoctor(args: string[]): void;
+export declare function cmdEmergencyMsg(args: string[]): void;
+export declare function cmdNotify(args: string[]): void;
 export {};

package/dist/commands.js CHANGED Viewed

@@ -37,6 +37,7 @@ var __importStar = (this && this.__importStar) || (function () {
     };
 })();
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.notifyUser = notifyUser;
 exports.generateSessionName = generateSessionName;
 exports.runGate = runGate;
 exports.cmdInit = cmdInit;
@@ -66,6 +67,8 @@ exports.cmdRemote = cmdRemote;
 exports.cmdStateDir = cmdStateDir;
 exports.cmdAddContext = cmdAddContext;
 exports.cmdDoctor = cmdDoctor;
+exports.cmdEmergencyMsg = cmdEmergencyMsg;
+exports.cmdNotify = cmdNotify;
 const fs = __importStar(require("node:fs"));
 const path = __importStar(require("node:path"));
 const crypto = __importStar(require("node:crypto"));
@@ -75,6 +78,29 @@ const policy_js_1 = require("./policy.js");
 function line(ch = "=", len = 40) {
     return ch.repeat(len);
 }
+/**
+ * Send a notification to the user via RL_NOTIFY_CMD.
+ * Notification failure must not block main flow.
+ */
+function notifyUser(message) {
+    const cmd = process.env.RL_NOTIFY_CMD;
+    if (!cmd)
+        return;
+    try {
+        const child = (0, node_child_process_1.spawn)("sh", ["-c", cmd], {
+            detached: true,
+            stdio: ["pipe", "ignore", "ignore"],
+        });
+        if (child.stdin) {
+            child.stdin.write(message);
+            child.stdin.end();
+        }
+        child.unref();
+    }
+    catch {
+        // Notification failure must not block main flow
+    }
+}
 /**
  * Generate a project-specific tmux session name to avoid conflicts
  * when running multiple projects simultaneously.
@@ -374,6 +400,17 @@ function cmdSubmitRalph(args) {
     console.log(line());
     console.log("");
     console.log("Now wait for Lisa. Check with: ralph-lisa whose-turn");
+    // Notify on step completion (consensus reached)
+    const latestWork = (0, state_js_1.readFile)(path.join(dir, "work.md"));
+    const latestReview = (0, state_js_1.readFile)(path.join(dir, "review.md"));
+    const wTag = extractLastTag(latestWork);
+    const rTag = extractLastTag(latestReview);
+    if ((wTag === "CONSENSUS" && rTag === "CONSENSUS") ||
+        (wTag === "CONSENSUS" && rTag === "PASS") ||
+        (wTag === "PASS" && rTag === "CONSENSUS")) {
+        const stepName = (0, state_js_1.getStep)();
+        notifyUser(`[RLL] Step "${stepName}" complete — consensus reached.`);
+    }
 }
 // ─── submit-lisa ─────────────────────────────────
 function cmdSubmitLisa(args) {
@@ -448,7 +485,8 @@ function cmdSubmitLisa(args) {
         const currentCount = parseInt((0, state_js_1.readFile)(nwCountPath) || "0", 10);
         const newCount = currentCount + 1;
         (0, state_js_1.writeFile)(nwCountPath, String(newCount));
-        if (newCount >= 5) {
+        const deadlockThreshold = parseInt(process.env.RL_DEADLOCK_THRESHOLD || "8", 10);
+        if (newCount >= deadlockThreshold) {
             // Trigger deadlock — write flag for watcher to detect
             const deadlockPath = path.join(dir, "deadlock.txt");
             (0, state_js_1.writeFile)(deadlockPath, `DEADLOCK at round ${round}: ${newCount} consecutive NEEDS_WORK rounds\nTimestamp: ${ts}\nAction: Watcher will pause. User intervention required.`);
@@ -458,6 +496,7 @@ function cmdSubmitLisa(args) {
             console.log("Watcher will pause for user intervention.");
             console.log("To resolve: ralph-lisa scope-update or ralph-lisa force-turn");
             console.log(line("!", 40));
+            notifyUser(`[RLL] DEADLOCK: ${newCount} consecutive NEEDS_WORK rounds. User intervention needed.`);
         }
     }
     else {
@@ -498,6 +537,17 @@ function cmdSubmitLisa(args) {
     console.log(line());
     console.log("");
     console.log("Now wait for Ralph. Check with: ralph-lisa whose-turn");
+    // Notify on step completion (consensus reached)
+    const latestWork = (0, state_js_1.readFile)(path.join(dir, "work.md"));
+    const latestReview = (0, state_js_1.readFile)(path.join(dir, "review.md"));
+    const wTag = extractLastTag(latestWork);
+    const rTag = extractLastTag(latestReview);
+    if ((wTag === "CONSENSUS" && rTag === "CONSENSUS") ||
+        (wTag === "CONSENSUS" && rTag === "PASS") ||
+        (wTag === "PASS" && rTag === "CONSENSUS")) {
+        const stepName = (0, state_js_1.getStep)();
+        notifyUser(`[RLL] Step "${stepName}" complete — consensus reached.`);
+    }
 }
 // ─── status ──────────────────────────────────────
 function cmdStatus() {
@@ -1382,6 +1432,12 @@ description: Lisa review commands for Ralph-Lisa dual-agent collaboration
 This skill provides Lisa's review commands for the Ralph-Lisa collaboration.
+## Turn Rules
+When it's not your turn, do not submit work. You may use subagents for preparatory tasks.
+If triggered by the user but it's not your turn, suggest checking watcher status:
+\`cat .dual-agent/.watcher_heartbeat\` and \`ralph-lisa status\`.
 ## Available Commands
 ### Check Turn
@@ -1392,9 +1448,7 @@ Check if it's your turn before taking action.
 ### Submit Review
 \`\`\`bash
-ralph-lisa submit-lisa "[TAG] summary
-detailed content..."
+ralph-lisa submit-lisa --file .dual-agent/submit.md
 \`\`\`
 Submit your review. Valid tags: PASS, NEEDS_WORK, CHALLENGE, DISCUSS, QUESTION, CONSENSUS
@@ -1409,6 +1463,13 @@ View current task, turn, and last action.
 ralph-lisa read work.md
 \`\`\`
 Read Ralph's latest submission.
+## Review Requirements
+For [CODE]/[FIX] reviews:
+- Verify Test Results match the test plan from [PLAN] phase
+- Re-run the test command yourself to verify results
+- Check for exit code or pass/fail count (or explicit Skipped: with justification)
 `;
     (0, state_js_1.writeFile)(path.join(codexSkillDir, "SKILL.md"), skillContent);
     // Create .codex/config.toml (with marker for safe uninit)
@@ -1735,8 +1796,9 @@ function cmdAuto(args) {
     // Create watcher script
     const watcherScript = path.join(dir, "watcher.sh");
     let watcherContent = `#!/bin/bash
-# Turn watcher v4 - round-based change detection + persistent state
+# Turn watcher v5 - decoupled delivery + send caps + capture-pane monitoring
 # Architecture: polling main loop + optional event acceleration
+# v5: Fixes message flooding and stall bugs from v4 (step41)
 # v4: Round-based detection fixes double-flip deadlock (step39)
 STATE_DIR=".dual-agent"
@@ -1759,14 +1821,29 @@ DEADLOCK_REMIND_TIME=0
 CLEANUP_DONE=0
 # Per-turn escalation state (step38: anti-flooding + stuck-agent detection)
+# step43: configurable escalation timing via env vars (default: 5m/15m/30m)
 NOTIFY_SENT_AT=0        # epoch when first notification was sent this turn
 REMINDER_LEVEL=0        # 0=initial, 1=REMINDER sent, 2=slash sent, 3=user notified
 CURRENT_TURN_HASH=""    # hash of turn.txt content for change detection
+ESCALATION_L1=\${RL_ESCALATION_L1:-300}   # L1 REMINDER (default 5 min)
+ESCALATION_L2=\${RL_ESCALATION_L2:-900}   # L2 /check-turn (default 15 min)
+ESCALATION_L3=\${RL_ESCALATION_L3:-1800}  # L3 STUCK notify (default 30 min)
+# v5: Per-round send cap (P0-2: prevents message flooding)
+SEND_COUNT_THIS_ROUND=0
+MAX_SENDS_PER_ROUND=2   # initial + 1 retry max
 PANE0_LOG="\${STATE_DIR}/pane0.log"
 PANE1_LOG="\${STATE_DIR}/pane1.log"
 PID_FILE="\${STATE_DIR}/watcher.pid"
+# User notification hook (step47)
+notify_user() {
+  if [[ -n "\$RL_NOTIFY_CMD" ]]; then
+    echo "\$1" | eval "\$RL_NOTIFY_CMD" 2>/dev/null &
+  fi
+}
 # Interactive prompt patterns (do NOT send "go" if matched)
 # Covers: passwords, confirmations, Claude Code permission prompts, Codex approval prompts
 # NOTE: patterns must be specific enough to avoid false positives in normal agent output
@@ -1913,27 +1990,23 @@ check_agent_alive() {
 }
 # Returns 0 if pane output has been stable for at least N seconds
+# v5 (P0-3): Uses capture-pane diff instead of pipe-pane log mtime.
+# The old log-mtime approach failed silently when pipe-pane died,
+# causing false-idle detection and message injection into active agents.
 check_output_stable() {
-  local log_file="\$1"
+  local pane="\$1"
   local stable_seconds="\${2:-5}"
-  if [[ ! -f "\$log_file" ]]; then
-    return 0
-  fi
+  # Capture current pane content hash
+  local hash1 hash2
+  hash1=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
+  sleep "\$stable_seconds"
+  hash2=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
-  local mtime_epoch now_epoch elapsed
-  if [[ "\$(uname)" == "Darwin" ]]; then
-    mtime_epoch=\$(stat -f %m "\$log_file" 2>/dev/null || echo 0)
-  else
-    mtime_epoch=\$(stat -c %Y "\$log_file" 2>/dev/null || echo 0)
+  if [[ "\$hash1" == "\$hash2" ]]; then
+    return 0  # Stable — pane content unchanged
   fi
-  now_epoch=\$(date +%s)
-  elapsed=\$(( now_epoch - mtime_epoch ))
-  if (( elapsed >= stable_seconds )); then
-    return 0  # Stable
-  fi
-  return 1  # Still producing output
+  return 1  # Still producing output — pane content changed
 }
 # Returns 0 if interactive prompt detected (do NOT send go)
@@ -1991,14 +2064,14 @@ send_go_to_pane() {
   # 3. Wait for agent to be idle (output stable for 5s)
   #    Prevents injecting text while agent is mid-response
+  #    v5 (P0-3): uses capture-pane diff, not pipe-pane log mtime
   local stable_wait=0
   while (( stable_wait < 30 )); do
-    if check_output_stable "\$log_file" 5; then
+    if check_output_stable "\$pane" 5; then
       break
     fi
     echo "[Watcher] Waiting for \$agent_name to finish output..."
-    sleep 3
-    stable_wait=\$((stable_wait + 3))
+    stable_wait=\$((stable_wait + 5))
   done
   if (( stable_wait >= 30 )); then
     echo "[Watcher] \$agent_name still producing output after 30s, sending anyway"
@@ -2037,29 +2110,73 @@ send_go_to_pane() {
     return 1
   fi
-  # 6. Post-send verification: wait up to 20s for agent to start responding
-  #    Record size AFTER send+retry completes (not before), so we only measure
-  #    the agent's actual response, not the injected text appearing in the pane.
-  local post_send_baseline=0
+  # v5 (P0-1): send-keys succeeded + message left input line = delivered.
+  # Post-send response monitoring is now decoupled — handled by monitor_agent_response()
+  # in the escalation path. This eliminates the flooding bug where pipe-pane failure
+  # caused send_go_to_pane to return 1 despite successful delivery.
+  echo "[Watcher] OK: Message delivered to \$agent_name (send-keys confirmed)"
+  SEND_COUNT_THIS_ROUND=\$((SEND_COUNT_THIS_ROUND + 1))
+  return 0
+}
+# v5 (P1-2): Passive post-send monitoring — checks if agent is responding
+# without sending any messages. Uses capture-pane diff + log growth cross-reference.
+# Called from escalation path, NOT from delivery path.
+monitor_agent_response() {
+  local pane="\$1"
+  local agent_name="\$2"
+  local log_file="\$3"
+  # Record log size BEFORE sleep so we can measure real growth
+  local log_size_before=0
   if [[ -f "\$log_file" ]]; then
-    post_send_baseline=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
+    log_size_before=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
   fi
-  local verify_wait=0
-  while (( verify_wait < 20 )); do
-    sleep 4
-    verify_wait=\$((verify_wait + 4))
-    if [[ -f "\$log_file" ]]; then
-      local cur_size
-      cur_size=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
-      if (( cur_size > post_send_baseline + 100 )); then
-        echo "[Watcher] OK: \$agent_name responded (output grew +\$((cur_size - post_send_baseline)) bytes)"
-        return 0
-      fi
+  # Check 1: capture-pane diff (primary signal, works even if pipe-pane is dead)
+  local hash_before hash_after
+  hash_before=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
+  sleep 5
+  hash_after=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
+  local pane_changed=0
+  local log_grew=0
+  if [[ "\$hash_before" != "\$hash_after" ]]; then
+    pane_changed=1
+  fi
+  # Check 2: log file growth (secondary signal, depends on pipe-pane being alive)
+  # size_before was recorded BEFORE the 5s sleep above
+  if [[ -f "\$log_file" ]]; then
+    local log_size_after
+    log_size_after=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
+    if (( log_size_after > log_size_before + 50 )); then
+      log_grew=1
     fi
-  done
+  fi
-  echo "[Watcher] WARN: \$agent_name did not produce output after send — may not have received message"
-  return 1
+  # Cross-reference for pipe-pane health (P1-1)
+  if (( pane_changed && !log_grew )); then
+    echo "[Watcher] Pipe-pane appears dead (pane active but log stale), rebuilding for \$pane"
+    tmux pipe-pane -t "\${SESSION}:\${pane}" 2>/dev/null || true
+    tmux pipe-pane -o -t "\${SESSION}:\${pane}" "cat >> \\"\$log_file\\"" 2>/dev/null || true
+  fi
+  if (( pane_changed )); then
+    echo "[Watcher] Monitor: \$agent_name is active (pane output changing)"
+    return 0  # Agent is working
+  fi
+  # Check 3: turn.txt changed (ultimate signal — agent finished and submitted)
+  local current_turn
+  current_turn=\$(cat "\$STATE_DIR/turn.txt" 2>/dev/null || echo "")
+  if [[ "\$current_turn" != "\$SEEN_TURN" ]]; then
+    echo "[Watcher] Monitor: Turn changed to \$current_turn — agent completed work"
+    return 0
+  fi
+  return 1  # No activity detected
 }
 # ─── trigger_agent ───────────────────────────────
@@ -2067,6 +2184,12 @@ send_go_to_pane() {
 trigger_agent() {
   local turn="\$1"
+  # v5 (P0-2): Check send cap before attempting delivery
+  if (( SEND_COUNT_THIS_ROUND >= MAX_SENDS_PER_ROUND )); then
+    echo "[Watcher] SEND_CAP: Max sends (\$MAX_SENDS_PER_ROUND) reached for round \$SEEN_ROUND, passive monitoring only"
+    return 1
+  fi
   # Read task context for trigger messages (last meaningful line = latest direction)
   local task_ctx=""
   if [[ -f "\$STATE_DIR/task.md" ]]; then
@@ -2268,6 +2391,8 @@ check_and_trigger() {
       # Reset per-turn escalation state (step38)
       NOTIFY_SENT_AT=0
       REMINDER_LEVEL=0
+      # v5 (P0-2): Reset send cap for new round
+      SEND_COUNT_THIS_ROUND=0
       # Mark delivery pending (step39: decouple ack from delivery)
       DELIVERY_PENDING=1
@@ -2288,6 +2413,8 @@ check_and_trigger() {
       LAST_ACK_TIME=0
       NOTIFY_SENT_AT=0
       REMINDER_LEVEL=0
+      # v5 (P0-2): Reset send cap for new turn
+      SEND_COUNT_THIS_ROUND=0
       DELIVERY_PENDING=1
       PENDING_TARGET="\$CURRENT_TURN"
       save_watcher_state
@@ -2307,7 +2434,9 @@ check_and_trigger() {
     # Consensus suppression (step38): suppress notifications when consensus reached
     # step39: only suppress if round hasn't changed since consensus was detected
-    if check_consensus_reached; then
+    # step46: only suppress if delivery is NOT pending — if turn points to an agent
+    # that hasn't responded yet, they need to be triggered to confirm consensus
+    if check_consensus_reached && (( !DELIVERY_PENDING )); then
       if [[ "\$CONSENSUS_AT_ROUND" == "" ]]; then
         CONSENSUS_AT_ROUND="\$CURRENT_ROUND"
       fi
@@ -2388,40 +2517,60 @@ check_and_trigger() {
         target_pane="0.1"; target_name="Lisa"; target_log="\$PANE1_LOG"
       fi
-      # Check for context limit in pane output (unrecoverable — notify user immediately)
-      local pane_tail
-      pane_tail=\$(tmux capture-pane -t "\${SESSION}:\${target_pane}" -p 2>/dev/null | tail -10)
-      if echo "\$pane_tail" | grep -qiE "context limit|conversation too long|token limit|context window"; then
-        if (( REMINDER_LEVEL < 3 )); then
-          echo "[Watcher] CONTEXT LIMIT detected for \$target_name. Manual intervention required."
-          echo "[Watcher] Restart the agent session to continue."
-          REMINDER_LEVEL=3
-        fi
-      # Time-based escalation: each level checked independently by elapsed time.
-      # If L1/L2 delivery fails, time still advances, so L3 is always reachable.
+      # v5 (P1-2): Passive monitoring — check if agent is working before escalating
+      # This also handles pipe-pane cross-reference rebuild (P1-1)
+      if monitor_agent_response "\$target_pane" "\$target_name" "\$target_log"; then
+        # Agent is active — reset escalation timer, no action needed
+        NOTIFY_SENT_AT=\$(date +%s)
+        REMINDER_LEVEL=0
+      else
+        # Agent not responding — proceed with escalation
+        # Check for context limit in pane output (unrecoverable — notify user immediately)
+        local pane_tail
+        pane_tail=\$(tmux capture-pane -t "\${SESSION}:\${target_pane}" -p 2>/dev/null | tail -10)
+        if echo "\$pane_tail" | grep -qiE "context limit|conversation too long|token limit|context window"; then
+          if (( REMINDER_LEVEL < 3 )); then
+            echo "[Watcher] CONTEXT LIMIT detected for \$target_name. Manual intervention required."
+            echo "[Watcher] Restart the agent session to continue."
+            REMINDER_LEVEL=3
+            notify_user "[RLL] CONTEXT LIMIT: \$target_name needs restart"
+          fi
-      # Level 3: notify user after 10 minutes — always reachable regardless of L1/L2 success
-      elif (( elapsed >= 600 && REMINDER_LEVEL < 3 )); then
-        echo "[Watcher] STUCK: \$target_name has not responded for \${elapsed}s. Manual intervention needed."
-        REMINDER_LEVEL=3
+        # Time-based escalation: each level checked independently by elapsed time.
+        # If L1/L2 delivery fails, time still advances, so L3 is always reachable.
-      # Level 2: slash command after 5 minutes, with prompt guard
-      elif (( elapsed >= 300 && REMINDER_LEVEL < 2 )); then
-        if ! check_for_interactive_prompt "\$target_pane"; then
-          echo "[Watcher] Escalation L2: Sending /check-turn to \$target_name (no response for \${elapsed}s)"
-          if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "/check-turn"; then
-            REMINDER_LEVEL=2
+        # Level 3: notify user (default 30 min) — always reachable regardless of L1/L2 success
+        elif (( elapsed >= ESCALATION_L3 && REMINDER_LEVEL < 3 )); then
+          echo "[Watcher] STUCK: \$target_name has not responded for \${elapsed}s. Manual intervention needed."
+          REMINDER_LEVEL=3
+          notify_user "[RLL] STUCK: \$target_name not responding for \${elapsed}s"
+        # Level 2: slash command (default 15 min), with prompt guard
+        # v5: escalation also respects send cap to prevent flooding
+        elif (( elapsed >= ESCALATION_L2 && REMINDER_LEVEL < 2 )); then
+          if (( SEND_COUNT_THIS_ROUND >= MAX_SENDS_PER_ROUND )); then
+            echo "[Watcher] Escalation L2: Skipped — send cap reached for round \$SEEN_ROUND"
+          elif ! check_for_interactive_prompt "\$target_pane"; then
+            echo "[Watcher] Escalation L2: Sending /check-turn to \$target_name (no response for \${elapsed}s)"
+            if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "/check-turn"; then
+              REMINDER_LEVEL=2
+            fi
+          else
+            echo "[Watcher] Escalation L2: Skipped — interactive prompt detected for \$target_name"
           fi
-        else
-          echo "[Watcher] Escalation L2: Skipped — interactive prompt detected for \$target_name"
-        fi
-      # Level 1: REMINDER after 2 minutes
-      elif (( elapsed >= 120 && REMINDER_LEVEL < 1 )); then
-        echo "[Watcher] Escalation L1: Sending REMINDER to \$target_name (no response for \${elapsed}s)"
-        if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "REMINDER: It is your turn. Please check turn and continue working."; then
-          REMINDER_LEVEL=1
+        # Level 1: REMINDER (default 5 min)
+        # v5: escalation also respects send cap to prevent flooding
+        elif (( elapsed >= ESCALATION_L1 && REMINDER_LEVEL < 1 )); then
+          if (( SEND_COUNT_THIS_ROUND >= MAX_SENDS_PER_ROUND )); then
+            echo "[Watcher] Escalation L1: Skipped — send cap reached for round \$SEEN_ROUND"
+          else
+            echo "[Watcher] Escalation L1: Sending REMINDER to \$target_name (no response for \${elapsed}s)"
+            if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "REMINDER: It is your turn. Please check turn and continue working."; then
+              REMINDER_LEVEL=1
+            fi
+          fi
         fi
       fi
     fi
@@ -2430,7 +2579,7 @@ check_and_trigger() {
 # ─── Main ────────────────────────────────────────
-echo "[Watcher] Starting v4... (Ctrl+C to stop)"
+echo "[Watcher] Starting v5... (Ctrl+C to stop)"
 echo "[Watcher] Monitoring \$STATE_DIR/turn.txt + round.txt"
 echo "[Watcher] Pane logs: \$PANE0_LOG, \$PANE1_LOG"
 if (( CHECKPOINT_ROUNDS > 0 )); then
@@ -2517,7 +2666,7 @@ done
     }
     // Watcher runs in background with session-guarded restart loop
     const watcherLog = path.join(dir, "watcher.log");
-    execSync(`bash -c 'nohup bash -c '"'"'while tmux has-session -t "${sessionName}" 2>/dev/null; do bash "${watcherScript}"; EXIT_CODE=$?; if ! tmux has-session -t "${sessionName}" 2>/dev/null; then echo "[Watcher] Session gone, not restarting." >> "${watcherLog}"; break; fi; echo "[Watcher] Exited ($EXIT_CODE), restarting in 5s..." >> "${watcherLog}"; sleep 5; done'"'"' > "${watcherLog}" 2>&1 & echo $! > "${wrapperPidFile}"'`);
+    execSync(`bash -c 'nohup bash -c '"'"'while tmux has-session -t "${sessionName}" 2>/dev/null; do bash "${watcherScript}"; EXIT_CODE=$?; if ! tmux has-session -t "${sessionName}" 2>/dev/null; then echo "[Watcher] Session gone, not restarting." >> "${watcherLog}"; break; fi; echo "[Watcher] Exited ($EXIT_CODE), restarting in 5s..." >> "${watcherLog}"; if [[ -n "$RL_NOTIFY_CMD" ]]; then echo "[RLL] Watcher crashed (exit $EXIT_CODE), restarting..." | eval "$RL_NOTIFY_CMD" 2>/dev/null & fi; sleep 5; done'"'"' > "${watcherLog}" 2>&1 & echo $! > "${wrapperPidFile}"'`);
     console.log("");
     console.log(line());
     console.log("Auto Mode Started!");
@@ -3119,3 +3268,79 @@ function cmdDoctor(args) {
         process.exit(1);
     }
 }
+// ─── emergency-msg ───────────────────────────────
+function cmdEmergencyMsg(args) {
+    if (args.length < 2) {
+        console.error("Usage: ralph-lisa emergency-msg <ralph|lisa> \"message\"");
+        process.exit(1);
+    }
+    const target = args[0];
+    const message = args.slice(1).join(" ");
+    if (target !== "ralph" && target !== "lisa") {
+        console.error("Error: target must be 'ralph' or 'lisa'");
+        process.exit(1);
+    }
+    // Use project root for session name (not cwd, which may be a subdirectory)
+    const dir = (0, state_js_1.stateDir)();
+    const projectRoot = path.resolve(dir, "..");
+    const sessionName = generateSessionName(projectRoot);
+    // Check tmux session exists
+    try {
+        (0, node_child_process_1.execSync)(`tmux has-session -t "${sessionName}" 2>/dev/null`);
+    }
+    catch {
+        console.error(`Error: tmux session '${sessionName}' not found.`);
+        process.exit(1);
+    }
+    // Check watcher health — only allow emergency-msg when watcher is unhealthy
+    const heartbeatFile = path.join(dir, ".watcher_heartbeat");
+    if (fs.existsSync(heartbeatFile)) {
+        const heartbeat = parseInt((0, state_js_1.readFile)(heartbeatFile).trim(), 10);
+        const now = Math.floor(Date.now() / 1000);
+        if (now - heartbeat < 300) { // 5 minutes
+            console.error("Error: Watcher is healthy (heartbeat < 5min old). Use normal submit flow.");
+            console.error("Emergency messaging is only available when watcher appears stuck.");
+            process.exit(1);
+        }
+    }
+    // Send via tmux — use temp file to avoid shell injection
+    // (user message could contain $(), backticks, etc.)
+    const pane = target === "ralph" ? "0.0" : "0.1";
+    const emergencyMsg = `[EMERGENCY] ${message}`;
+    const tmpMsgFile = path.join(dir, ".emergency_msg_tmp");
+    try {
+        (0, state_js_1.writeFile)(tmpMsgFile, emergencyMsg);
+        (0, node_child_process_1.execSync)(`tmux load-buffer "${tmpMsgFile}" 2>/dev/null && tmux paste-buffer -t "${sessionName}:${pane}" 2>/dev/null`);
+        (0, node_child_process_1.execSync)(`tmux send-keys -t "${sessionName}:${pane}" Enter 2>/dev/null`);
+        try {
+            fs.unlinkSync(tmpMsgFile);
+        }
+        catch { }
+    }
+    catch {
+        console.error(`Error: Failed to send message to ${target}'s pane.`);
+        process.exit(1);
+    }
+    // Log to emergency.log
+    const ts = new Date().toISOString();
+    const logEntry = `[${ts}] To ${target}: ${message}\n`;
+    const logFile = path.join(dir, "emergency.log");
+    fs.appendFileSync(logFile, logEntry);
+    console.log(`Emergency message sent to ${target}: ${message}`);
+    console.log(`Logged to ${logFile}`);
+}
+// ─── notify ──────────────────────────────────────
+function cmdNotify(args) {
+    const message = args.join(" ");
+    if (!message) {
+        console.error("Usage: ralph-lisa notify \"message\"");
+        process.exit(1);
+    }
+    if (!process.env.RL_NOTIFY_CMD) {
+        console.error("Error: RL_NOTIFY_CMD not set. Configure it first:");
+        console.error('  export RL_NOTIFY_CMD="cat >> /tmp/notify.txt"');
+        process.exit(1);
+    }
+    notifyUser(message);
+    console.log(`Notification sent: ${message}`);
+}

package/dist/policy.js CHANGED Viewed

@@ -25,6 +25,15 @@ function getPolicyMode() {
  */
 function checkRalph(tag, content) {
     const violations = [];
+    // [PLAN] must include test plan (step42: mandatory test execution)
+    if (tag === "PLAN") {
+        if (!content.match(/测试计划|[Tt]est [Pp]lan|测试命令|[Tt]est [Cc]ommand/)) {
+            violations.push({
+                rule: "plan-test-plan",
+                message: `[PLAN] submission missing test plan (test command + coverage scope).`,
+            });
+        }
+    }
     // [CODE] or [FIX] must include Test Results and file:line references
     if (tag === "CODE" || tag === "FIX") {
         if (!content.includes("Test Results") &&
@@ -35,6 +44,21 @@ function checkRalph(tag, content) {
                 message: `[${tag}] submission missing "Test Results" section.`,
             });
         }
+        // step42: Test Results must include concrete execution evidence (exit code or pass/fail count)
+        // Exception: explicit "Skipped:" line inside the Test Results section only
+        // Section is bounded: from "Test Results" heading to next heading (## or blank-line-then-heading) or EOF
+        const testResultsMatch = content.match(/[Tt]est [Rr]esults[^\n]*\n([\s\S]*?)(?=\n##\s|\n\n[A-Z]|\n\n\*\*[A-Z]|$)/);
+        if (testResultsMatch) {
+            const testResultsBody = testResultsMatch[1];
+            const hasSkipLine = /^[\s\-*]*[Ss]kip(ped)?\s*:.*\S/m.test(testResultsBody);
+            const hasExecutionEvidence = /[Ee]xit code|退出码|\d+\/\d+\s*(pass|通过|passed)|(\d+)\s*tests?\s*pass/i.test(testResultsBody);
+            if (!hasSkipLine && !hasExecutionEvidence) {
+                violations.push({
+                    rule: "test-results-detail",
+                    message: `[${tag}] Test Results must include exit code or pass/fail count (e.g., "Exit code: 0" or "42/42 passed"), or explicit "Skipped:" with justification.`,
+                });
+            }
+        }
         if (!/\w+\.\w+:\d+/.test(content)) {
             violations.push({
                 rule: "file-line-ref",

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ralph-lisa-loop",
-  "version": "0.3.11",
+  "version": "0.3.12",
   "description": "Turn-based dual-agent collaboration: Ralph codes, Lisa reviews, consensus required.",
   "bin": {
     "ralph-lisa": "dist/cli.js"

package/templates/claude-commands/check-turn.md CHANGED Viewed

@@ -13,6 +13,6 @@ ralph-lisa whose-turn
 ## Rules
 - If output is `ralph`: You can proceed with your work
-- If output is `lisa`: STOP immediately and wait for Lisa's response
+- If output is `lisa`: Wait for Lisa's feedback — do not take further action until your turn
 **NEVER skip this check before working.**

package/templates/claude-commands/submit-work.md CHANGED Viewed

@@ -36,4 +36,4 @@ Detailed content here...
 ## After Submission
-The turn automatically passes to Lisa. You must STOP and wait.
+The turn automatically passes to Lisa. Wait for her feedback — do not take further action until it is your turn again.

package/templates/codex-skills/check-turn.md CHANGED Viewed

@@ -11,6 +11,6 @@ Check whose turn it is before taking any action.
 ## Rules
 - If output is `lisa`: You can proceed with your review
-- If output is `ralph`: STOP immediately and wait for Ralph's submission
+- If output is `ralph`: Wait for Ralph's feedback — do not take further action until your turn
-**NEVER skip this check before working.**
+**NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**

package/templates/roles/lisa.md CHANGED Viewed

@@ -16,16 +16,16 @@ Then based on result:
   ```bash
   ralph-lisa read work.md
   ```
-- `ralph` → Say "Waiting for Ralph" and STOP
+- `ralph` → Say "Waiting for Ralph's feedback" and wait — do not take further action until your turn
 **Do NOT wait for user to tell you to check. Check automatically.**
 ## CRITICAL: Turn-Based Rules
-- Output `lisa` → You can review
-- Output `ralph` → STOP immediately, tell user "Waiting for Ralph"
+- Output `lisa` → You can review. If it's your turn but you cannot complete work (missing input, environment error, etc.), tell the user the specific reason and wait — do not retry repeatedly.
+- Output `ralph` → Tell user it's not your turn. You may use subagents for preparatory work, but do not submit until it is your turn.
-**NEVER skip this check. NEVER work when it's not your turn.**
+**NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
 ## How to Submit
@@ -38,7 +38,7 @@ ralph-lisa submit-lisa --file .dual-agent/submit.md
 Inline mode (`ralph-lisa submit-lisa "[TAG] ..."`) is deprecated — it breaks on special characters. Use `--file` or `--stdin` instead.
-This automatically passes the turn to Ralph. Then you MUST STOP.
+This automatically passes the turn to Ralph. Then wait — do not take further action until it is your turn again.
 ## Tags You Can Use
@@ -59,7 +59,7 @@ This automatically passes the turn to Ralph. Then you MUST STOP.
 3. Review following the behavior spec below
 4. Write review to .dual-agent/submit.md
 5. ralph-lisa submit-lisa --file .dual-agent/submit.md
-6. STOP and wait for Ralph
+6. Wait for Ralph's response
 7. ralph-lisa whose-turn    → Check again
 8. Repeat
 ```
@@ -101,20 +101,21 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
 | Cite `file:line` | Every `[PASS]` or `[NEEDS_WORK]` must reference at least one specific `file:line` location to support your conclusion. |
 | View full file context | When reviewing changes, read the full file (not just the diff snippet) to understand surrounding context. |
 | Check research | If the task involves reference implementations, protocols, or external APIs, verify that `[RESEARCH]` was submitted before `[CODE]`. |
+| Verify test execution | For `[CODE]`/`[FIX]`, verify Test Results contain actual command, exit code, and pass/fail count — OR an explicit `Skipped:` with valid justification (e.g., config-only, no testable logic). If results look suspicious (missing numbers, generic text), return `[NEEDS_WORK]`. |
+| Re-run tests | For `[CODE]`/`[FIX]` with executed tests, run the test command yourself to verify results. For skipped tests, verify the justification is valid. Report your findings in the review. |
+| Verify test plan alignment | For `[CODE]`/`[FIX]`, verify Test Results match the test plan from the `[PLAN]` phase. If tests differ from the plan without explanation, return `[NEEDS_WORK]`. |
 ### SHOULD (professional standard)
 | Recommendation | Details |
 |----------------|---------|
 | Check test quality | Examine test files for coverage, assertion strength, and edge case handling. |
-| Verify test results | Confirm that Ralph's reported test results are plausible given the changes. |
 | Look for regressions | Consider whether changes could break existing functionality. |
 ### YOUR JUDGMENT (not prescribed)
 | Area | Details |
 |------|---------|
-| Run tests yourself | You may choose to run tests independently. This is your professional call. |
 | Write verification tests | When static analysis is insufficient, write ad-hoc tests in `.dual-agent/tests/` and reference the output in your review. These are auto-cleaned on [CONSENSUS]. |
 | Review depth | Decide what to focus on based on risk and complexity. |
 | Accept or reject | Your verdict is your own professional judgment. |
@@ -125,7 +126,8 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
 - [ ] Logic correct
 - [ ] Edge cases handled
 - [ ] Tests adequate
-- [ ] **Test Results included in submission** (required for [CODE]/[FIX])
+- [ ] **Test Results verified** — `[CODE]`/`[FIX]` must have actual command + exit code + pass count, or explicit `Skipped:` with valid justification
+- [ ] **Tests re-run** — You ran the test command yourself and confirmed results match (or verified skip justification)
 - [ ] **Research adequate** (if task involves reference implementations/protocols/external APIs, check that [RESEARCH] was submitted)
 - [ ] **Research verified** — [RESEARCH] submissions must include at least one `Verified:` or `Evidence:` marker. Reject unverified claims.
 - [ ] **Factual claims verified** — For claims that a feature is "missing" or "not implemented", require `file:line` evidence or explicit acknowledgment that source code was not accessible
@@ -150,10 +152,16 @@ Lisa: [NEEDS_WORK] ...
 Ralph: [FIX] Agree, because... / [CHALLENGE] Disagree, because...
 ```
+## Long-Running Tasks
+For time-consuming operations (large-scale code review, batch test re-runs, deep research verification), consider using subagents or background tasks to work in parallel. Summarize subagent results before submitting your review.
+This avoids blocking the main collaboration loop while waiting for slow operations to complete.
 ## Handling Disagreement
 If Ralph uses [CHALLENGE]:
 1. Consider his argument carefully
 2. If convinced → Change your verdict
 3. If not → Explain your reasoning with [CHALLENGE] or [DISCUSS]
-4. After 5 rounds → Accept OVERRIDE or propose HANDOFF
+4. After 5 rounds → Deadlock auto-detected, watcher pauses for user intervention

package/templates/roles/ralph.md CHANGED Viewed

@@ -16,16 +16,16 @@ Then based on result:
   ```bash
   ralph-lisa read review.md
   ```
-- `lisa` → Say "Waiting for Lisa" and STOP
+- `lisa` → Say "Waiting for Lisa's feedback" and wait — do not take further action until your turn
 **Do NOT wait for user to tell you to check. Check automatically.**
 ## CRITICAL: Turn-Based Rules
-- Output `ralph` → You can work
-- Output `lisa` → STOP immediately, tell user "Waiting for Lisa"
+- Output `ralph` → You can work. If it's your turn but you cannot complete work (missing input, environment error, etc.), tell the user the specific reason and wait — do not retry repeatedly.
+- Output `lisa` → Tell user it's not your turn. You may use subagents for preparatory work, but do not submit until it is your turn.
-**NEVER skip this check. NEVER work when it's not your turn.**
+**NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
 ## How to Submit
@@ -38,7 +38,7 @@ ralph-lisa submit-ralph --file .dual-agent/submit.md
 Inline mode (`ralph-lisa submit-ralph "[TAG] ..."`) is deprecated — it breaks on special characters. Use `--file` or `--stdin` instead.
-This automatically passes the turn to Lisa. Then you MUST STOP.
+This automatically passes the turn to Lisa. Then wait — do not take further action until it is your turn again.
 ## Tags You Can Use
@@ -74,10 +74,15 @@ This is required when the task involves reference implementations, protocols, or
 **[CODE] or [FIX] submissions must include:**
-### Test Results
-- Test command: `npm test` / `pytest` / ...
-- Result: Passed / Failed (reason)
-- If skipping tests, must explain why
+### Test Results (must be from actual execution, not fabricated)
+- Test command: the exact command you ran (e.g., `pytest -x`, `npm test`)
+- Exit code: 0 (all passed) or non-zero (failures)
+- Result: X/Y passed (concrete numbers)
+- Failed output: if any failures, include last 30 lines of error output
+- If skipping tests, must explain why — Lisa will judge whether the reason is valid
+- Tests must follow the test plan established in the `[PLAN]` phase
+- Test Results must reference the planned test command
+- If the test plan changed, explain why in the submission
 ## Round 1: Mandatory [PLAN]
@@ -86,6 +91,13 @@ your understanding of the task before you start coding. Include:
 - Your understanding of the task goal
 - Proposed approach
 - Expected deliverables
+- **Test plan** (mandatory):
+  - Test command (e.g., `pytest -x`, `npm test`, `go test ./...`, `flutter test`)
+  - Expected test coverage scope
+  - If no test framework exists, explain verification approach
+- **Quality gate commands** (recommended): Identify lint/format/type-check commands for the project
+  - Examples: `npm run lint`, `ruff check .`, `go vet ./...`
+  - These can be configured via `RL_RALPH_GATE` + `RL_GATE_COMMANDS` for auto mode
 ## Workflow
@@ -96,7 +108,7 @@ your understanding of the task before you start coding. Include:
    → Submit [RESEARCH] first, wait for Lisa's review
 4. Write content to .dual-agent/submit.md
 5. ralph-lisa submit-ralph --file .dual-agent/submit.md
-6. STOP and wait for Lisa
+6. Wait for Lisa's response
 7. ralph-lisa whose-turn    → Check again
 8. (If ralph) Read Lisa's feedback: ralph-lisa read review.md
 9. Respond or proceed based on feedback
@@ -121,13 +133,17 @@ After context compaction, run `ralph-lisa recap` to recover current state:
 ## Handling Lisa's Feedback
-- `[PASS]` → Submit [CONSENSUS] to close. Lisa's [PASS] already approves — no need to wait for her [CONSENSUS] back (single-round consensus).
+- `[PASS]` → First check PASS quality:
+  - Does Lisa's PASS include substantive review content (specific file checks, test verification, technical analysis)?
+  - If it's a rubber-stamp PASS (no specific reasons, no code references, no test verification), submit `[CHALLENGE]` requesting substantive review — **at most once**
+  - If Lisa resubmits PASS after your challenge (even if still thin), accept and submit `[CONSENSUS]` to avoid infinite loop
+  - If it's a substantive PASS and you agree, submit `[CONSENSUS]`
 - `[NEEDS_WORK]` → You MUST explain your reasoning:
   - If you agree: explain WHY Lisa is right, then submit [FIX]
   - If you disagree: use [CHALLENGE] to provide counter-argument
   - **Never submit a bare [FIX] without explanation. No silent acceptance.**
   - **You CANNOT submit [CODE]/[RESEARCH]/[PLAN] after NEEDS_WORK** — the CLI will reject it. Address the feedback first, or run `ralph-lisa scope-update` if the task scope changed.
-- After 3 consecutive NEEDS_WORK rounds → DEADLOCK auto-detected, watcher pauses for user intervention
+- After 8 consecutive NEEDS_WORK rounds → DEADLOCK auto-detected, watcher pauses for user intervention
 ## Submission Test Requirements
@@ -144,6 +160,12 @@ After context compaction, run `ralph-lisa recap` to recover current state:
 - "New tests: 0" requires justification (valid: pure UI layout, config-only change)
 - Invalid excuse: "requires E2E" for pure functions, data shape validation, or mock-able IPC
+## Long-Running Tasks
+For time-consuming operations (large-scale code search, batch test runs, CI waits, complex refactoring), consider using subagents or background tasks to work in parallel. Summarize subagent results before submitting.
+This avoids blocking the main collaboration loop while waiting for slow operations to complete.
 ## Your Responsibilities
 1. Planning and coding

package/templates/skill.json CHANGED Viewed

@@ -22,6 +22,6 @@
   "rules": {
     "consensus": "Both parties must agree before proceeding",
     "verdict": "PASS/NEEDS_WORK is advisory, not a command",
-    "deadlock": "After 5 rounds, use OVERRIDE or HANDOFF"
+    "deadlock": "After 8 consecutive NEEDS_WORK rounds, watcher pauses for user intervention"
   }
 }