ralph-lisa-loop 0.3.11 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -40,7 +40,7 @@ Ralph writes → Lisa reviews → Consensus → Next step
40
40
  - **Round 1 Mandatory Plan** — Ralph must submit `[PLAN]` first for Lisa to verify understanding
41
41
  - **Goal Guardian** — Lisa checks for direction drift before every review
42
42
  - **Mid-Session Task Update** — Change direction without restarting
43
- - **Deadlock Escape** — After 5 rounds: `[OVERRIDE]` or `[HANDOFF]` to human
43
+ - **Deadlock Detection** — After 8 consecutive `[NEEDS_WORK]` rounds, watcher auto-pauses for user intervention
44
44
  - **Minimal Init** — Zero-intrusion mode with plugin/global config architecture
45
45
 
46
46
  ## Essential Commands
package/dist/cli.js CHANGED
@@ -92,6 +92,12 @@ switch (cmd) {
92
92
  case "stop":
93
93
  (0, commands_js_1.cmdStop)(rest);
94
94
  break;
95
+ case "emergency-msg":
96
+ (0, commands_js_1.cmdEmergencyMsg)(rest);
97
+ break;
98
+ case "notify":
99
+ (0, commands_js_1.cmdNotify)(rest);
100
+ break;
95
101
  case "help":
96
102
  case "--help":
97
103
  case "-h":
@@ -173,6 +179,10 @@ function showHelp() {
173
179
  console.log(" ralph-lisa remote --auth user:pass Enable basic auth");
174
180
  console.log(" ralph-lisa remote --stop Stop ttyd server");
175
181
  console.log("");
182
+ console.log("Emergency & Notifications:");
183
+ console.log(' ralph-lisa emergency-msg <agent> "msg" Send emergency message to agent pane');
184
+ console.log(' ralph-lisa notify "message" Send notification via RL_NOTIFY_CMD');
185
+ console.log("");
176
186
  console.log("Diagnostics:");
177
187
  console.log(" ralph-lisa state-dir Show state directory resolution");
178
188
  console.log(" ralph-lisa state-dir /path Set state directory (tmux env)");
@@ -2,6 +2,11 @@
2
2
  * CLI commands for Ralph-Lisa Loop.
3
3
  * Direct port of io.sh logic to Node/TS.
4
4
  */
5
+ /**
6
+ * Send a notification to the user via RL_NOTIFY_CMD.
7
+ * Notification failure must not block main flow.
8
+ */
9
+ export declare function notifyUser(message: string): void;
5
10
  /**
6
11
  * Generate a project-specific tmux session name to avoid conflicts
7
12
  * when running multiple projects simultaneously.
@@ -51,4 +56,6 @@ export declare function cmdRemote(args: string[]): void;
51
56
  export declare function cmdStateDir(args: string[]): void;
52
57
  export declare function cmdAddContext(args: string[]): void;
53
58
  export declare function cmdDoctor(args: string[]): void;
59
+ export declare function cmdEmergencyMsg(args: string[]): void;
60
+ export declare function cmdNotify(args: string[]): void;
54
61
  export {};
package/dist/commands.js CHANGED
@@ -37,6 +37,7 @@ var __importStar = (this && this.__importStar) || (function () {
37
37
  };
38
38
  })();
39
39
  Object.defineProperty(exports, "__esModule", { value: true });
40
+ exports.notifyUser = notifyUser;
40
41
  exports.generateSessionName = generateSessionName;
41
42
  exports.runGate = runGate;
42
43
  exports.cmdInit = cmdInit;
@@ -66,6 +67,8 @@ exports.cmdRemote = cmdRemote;
66
67
  exports.cmdStateDir = cmdStateDir;
67
68
  exports.cmdAddContext = cmdAddContext;
68
69
  exports.cmdDoctor = cmdDoctor;
70
+ exports.cmdEmergencyMsg = cmdEmergencyMsg;
71
+ exports.cmdNotify = cmdNotify;
69
72
  const fs = __importStar(require("node:fs"));
70
73
  const path = __importStar(require("node:path"));
71
74
  const crypto = __importStar(require("node:crypto"));
@@ -75,6 +78,29 @@ const policy_js_1 = require("./policy.js");
75
78
  function line(ch = "=", len = 40) {
76
79
  return ch.repeat(len);
77
80
  }
81
+ /**
82
+ * Send a notification to the user via RL_NOTIFY_CMD.
83
+ * Notification failure must not block main flow.
84
+ */
85
+ function notifyUser(message) {
86
+ const cmd = process.env.RL_NOTIFY_CMD;
87
+ if (!cmd)
88
+ return;
89
+ try {
90
+ const child = (0, node_child_process_1.spawn)("sh", ["-c", cmd], {
91
+ detached: true,
92
+ stdio: ["pipe", "ignore", "ignore"],
93
+ });
94
+ if (child.stdin) {
95
+ child.stdin.write(message);
96
+ child.stdin.end();
97
+ }
98
+ child.unref();
99
+ }
100
+ catch {
101
+ // Notification failure must not block main flow
102
+ }
103
+ }
78
104
  /**
79
105
  * Generate a project-specific tmux session name to avoid conflicts
80
106
  * when running multiple projects simultaneously.
@@ -374,6 +400,17 @@ function cmdSubmitRalph(args) {
374
400
  console.log(line());
375
401
  console.log("");
376
402
  console.log("Now wait for Lisa. Check with: ralph-lisa whose-turn");
403
+ // Notify on step completion (consensus reached)
404
+ const latestWork = (0, state_js_1.readFile)(path.join(dir, "work.md"));
405
+ const latestReview = (0, state_js_1.readFile)(path.join(dir, "review.md"));
406
+ const wTag = extractLastTag(latestWork);
407
+ const rTag = extractLastTag(latestReview);
408
+ if ((wTag === "CONSENSUS" && rTag === "CONSENSUS") ||
409
+ (wTag === "CONSENSUS" && rTag === "PASS") ||
410
+ (wTag === "PASS" && rTag === "CONSENSUS")) {
411
+ const stepName = (0, state_js_1.getStep)();
412
+ notifyUser(`[RLL] Step "${stepName}" complete — consensus reached.`);
413
+ }
377
414
  }
378
415
  // ─── submit-lisa ─────────────────────────────────
379
416
  function cmdSubmitLisa(args) {
@@ -448,7 +485,8 @@ function cmdSubmitLisa(args) {
448
485
  const currentCount = parseInt((0, state_js_1.readFile)(nwCountPath) || "0", 10);
449
486
  const newCount = currentCount + 1;
450
487
  (0, state_js_1.writeFile)(nwCountPath, String(newCount));
451
- if (newCount >= 5) {
488
+ const deadlockThreshold = parseInt(process.env.RL_DEADLOCK_THRESHOLD || "8", 10);
489
+ if (newCount >= deadlockThreshold) {
452
490
  // Trigger deadlock — write flag for watcher to detect
453
491
  const deadlockPath = path.join(dir, "deadlock.txt");
454
492
  (0, state_js_1.writeFile)(deadlockPath, `DEADLOCK at round ${round}: ${newCount} consecutive NEEDS_WORK rounds\nTimestamp: ${ts}\nAction: Watcher will pause. User intervention required.`);
@@ -458,6 +496,7 @@ function cmdSubmitLisa(args) {
458
496
  console.log("Watcher will pause for user intervention.");
459
497
  console.log("To resolve: ralph-lisa scope-update or ralph-lisa force-turn");
460
498
  console.log(line("!", 40));
499
+ notifyUser(`[RLL] DEADLOCK: ${newCount} consecutive NEEDS_WORK rounds. User intervention needed.`);
461
500
  }
462
501
  }
463
502
  else {
@@ -498,6 +537,17 @@ function cmdSubmitLisa(args) {
498
537
  console.log(line());
499
538
  console.log("");
500
539
  console.log("Now wait for Ralph. Check with: ralph-lisa whose-turn");
540
+ // Notify on step completion (consensus reached)
541
+ const latestWork = (0, state_js_1.readFile)(path.join(dir, "work.md"));
542
+ const latestReview = (0, state_js_1.readFile)(path.join(dir, "review.md"));
543
+ const wTag = extractLastTag(latestWork);
544
+ const rTag = extractLastTag(latestReview);
545
+ if ((wTag === "CONSENSUS" && rTag === "CONSENSUS") ||
546
+ (wTag === "CONSENSUS" && rTag === "PASS") ||
547
+ (wTag === "PASS" && rTag === "CONSENSUS")) {
548
+ const stepName = (0, state_js_1.getStep)();
549
+ notifyUser(`[RLL] Step "${stepName}" complete — consensus reached.`);
550
+ }
501
551
  }
502
552
  // ─── status ──────────────────────────────────────
503
553
  function cmdStatus() {
@@ -1382,6 +1432,12 @@ description: Lisa review commands for Ralph-Lisa dual-agent collaboration
1382
1432
 
1383
1433
  This skill provides Lisa's review commands for the Ralph-Lisa collaboration.
1384
1434
 
1435
+ ## Turn Rules
1436
+
1437
+ When it's not your turn, do not submit work. You may use subagents for preparatory tasks.
1438
+ If triggered by the user but it's not your turn, suggest checking watcher status:
1439
+ \`cat .dual-agent/.watcher_heartbeat\` and \`ralph-lisa status\`.
1440
+
1385
1441
  ## Available Commands
1386
1442
 
1387
1443
  ### Check Turn
@@ -1392,9 +1448,7 @@ Check if it's your turn before taking action.
1392
1448
 
1393
1449
  ### Submit Review
1394
1450
  \`\`\`bash
1395
- ralph-lisa submit-lisa "[TAG] summary
1396
-
1397
- detailed content..."
1451
+ ralph-lisa submit-lisa --file .dual-agent/submit.md
1398
1452
  \`\`\`
1399
1453
  Submit your review. Valid tags: PASS, NEEDS_WORK, CHALLENGE, DISCUSS, QUESTION, CONSENSUS
1400
1454
 
@@ -1409,6 +1463,13 @@ View current task, turn, and last action.
1409
1463
  ralph-lisa read work.md
1410
1464
  \`\`\`
1411
1465
  Read Ralph's latest submission.
1466
+
1467
+ ## Review Requirements
1468
+
1469
+ For [CODE]/[FIX] reviews:
1470
+ - Verify Test Results match the test plan from [PLAN] phase
1471
+ - Re-run the test command yourself to verify results
1472
+ - Check for exit code or pass/fail count (or explicit Skipped: with justification)
1412
1473
  `;
1413
1474
  (0, state_js_1.writeFile)(path.join(codexSkillDir, "SKILL.md"), skillContent);
1414
1475
  // Create .codex/config.toml (with marker for safe uninit)
@@ -1735,8 +1796,9 @@ function cmdAuto(args) {
1735
1796
  // Create watcher script
1736
1797
  const watcherScript = path.join(dir, "watcher.sh");
1737
1798
  let watcherContent = `#!/bin/bash
1738
- # Turn watcher v4 - round-based change detection + persistent state
1799
+ # Turn watcher v5 - decoupled delivery + send caps + capture-pane monitoring
1739
1800
  # Architecture: polling main loop + optional event acceleration
1801
+ # v5: Fixes message flooding and stall bugs from v4 (step41)
1740
1802
  # v4: Round-based detection fixes double-flip deadlock (step39)
1741
1803
 
1742
1804
  STATE_DIR=".dual-agent"
@@ -1759,14 +1821,29 @@ DEADLOCK_REMIND_TIME=0
1759
1821
  CLEANUP_DONE=0
1760
1822
 
1761
1823
  # Per-turn escalation state (step38: anti-flooding + stuck-agent detection)
1824
+ # step43: configurable escalation timing via env vars (default: 5m/15m/30m)
1762
1825
  NOTIFY_SENT_AT=0 # epoch when first notification was sent this turn
1763
1826
  REMINDER_LEVEL=0 # 0=initial, 1=REMINDER sent, 2=slash sent, 3=user notified
1764
1827
  CURRENT_TURN_HASH="" # hash of turn.txt content for change detection
1828
+ ESCALATION_L1=\${RL_ESCALATION_L1:-300} # L1 REMINDER (default 5 min)
1829
+ ESCALATION_L2=\${RL_ESCALATION_L2:-900} # L2 /check-turn (default 15 min)
1830
+ ESCALATION_L3=\${RL_ESCALATION_L3:-1800} # L3 STUCK notify (default 30 min)
1831
+
1832
+ # v5: Per-round send cap (P0-2: prevents message flooding)
1833
+ SEND_COUNT_THIS_ROUND=0
1834
+ MAX_SENDS_PER_ROUND=2 # initial + 1 retry max
1765
1835
 
1766
1836
  PANE0_LOG="\${STATE_DIR}/pane0.log"
1767
1837
  PANE1_LOG="\${STATE_DIR}/pane1.log"
1768
1838
  PID_FILE="\${STATE_DIR}/watcher.pid"
1769
1839
 
1840
+ # User notification hook (step47)
1841
+ notify_user() {
1842
+ if [[ -n "\$RL_NOTIFY_CMD" ]]; then
1843
+ echo "\$1" | eval "\$RL_NOTIFY_CMD" 2>/dev/null &
1844
+ fi
1845
+ }
1846
+
1770
1847
  # Interactive prompt patterns (do NOT send "go" if matched)
1771
1848
  # Covers: passwords, confirmations, Claude Code permission prompts, Codex approval prompts
1772
1849
  # NOTE: patterns must be specific enough to avoid false positives in normal agent output
@@ -1913,27 +1990,23 @@ check_agent_alive() {
1913
1990
  }
1914
1991
 
1915
1992
  # Returns 0 if pane output has been stable for at least N seconds
1993
+ # v5 (P0-3): Uses capture-pane diff instead of pipe-pane log mtime.
1994
+ # The old log-mtime approach failed silently when pipe-pane died,
1995
+ # causing false-idle detection and message injection into active agents.
1916
1996
  check_output_stable() {
1917
- local log_file="\$1"
1997
+ local pane="\$1"
1918
1998
  local stable_seconds="\${2:-5}"
1919
1999
 
1920
- if [[ ! -f "\$log_file" ]]; then
1921
- return 0
1922
- fi
2000
+ # Capture current pane content hash
2001
+ local hash1 hash2
2002
+ hash1=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
2003
+ sleep "\$stable_seconds"
2004
+ hash2=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
1923
2005
 
1924
- local mtime_epoch now_epoch elapsed
1925
- if [[ "\$(uname)" == "Darwin" ]]; then
1926
- mtime_epoch=\$(stat -f %m "\$log_file" 2>/dev/null || echo 0)
1927
- else
1928
- mtime_epoch=\$(stat -c %Y "\$log_file" 2>/dev/null || echo 0)
2006
+ if [[ "\$hash1" == "\$hash2" ]]; then
2007
+ return 0 # Stable pane content unchanged
1929
2008
  fi
1930
- now_epoch=\$(date +%s)
1931
- elapsed=\$(( now_epoch - mtime_epoch ))
1932
-
1933
- if (( elapsed >= stable_seconds )); then
1934
- return 0 # Stable
1935
- fi
1936
- return 1 # Still producing output
2009
+ return 1 # Still producing output — pane content changed
1937
2010
  }
1938
2011
 
1939
2012
  # Returns 0 if interactive prompt detected (do NOT send go)
@@ -1991,14 +2064,14 @@ send_go_to_pane() {
1991
2064
 
1992
2065
  # 3. Wait for agent to be idle (output stable for 5s)
1993
2066
  # Prevents injecting text while agent is mid-response
2067
+ # v5 (P0-3): uses capture-pane diff, not pipe-pane log mtime
1994
2068
  local stable_wait=0
1995
2069
  while (( stable_wait < 30 )); do
1996
- if check_output_stable "\$log_file" 5; then
2070
+ if check_output_stable "\$pane" 5; then
1997
2071
  break
1998
2072
  fi
1999
2073
  echo "[Watcher] Waiting for \$agent_name to finish output..."
2000
- sleep 3
2001
- stable_wait=\$((stable_wait + 3))
2074
+ stable_wait=\$((stable_wait + 5))
2002
2075
  done
2003
2076
  if (( stable_wait >= 30 )); then
2004
2077
  echo "[Watcher] \$agent_name still producing output after 30s, sending anyway"
@@ -2037,29 +2110,73 @@ send_go_to_pane() {
2037
2110
  return 1
2038
2111
  fi
2039
2112
 
2040
- # 6. Post-send verification: wait up to 20s for agent to start responding
2041
- # Record size AFTER send+retry completes (not before), so we only measure
2042
- # the agent's actual response, not the injected text appearing in the pane.
2043
- local post_send_baseline=0
2113
+ # v5 (P0-1): send-keys succeeded + message left input line = delivered.
2114
+ # Post-send response monitoring is now decoupled handled by monitor_agent_response()
2115
+ # in the escalation path. This eliminates the flooding bug where pipe-pane failure
2116
+ # caused send_go_to_pane to return 1 despite successful delivery.
2117
+ echo "[Watcher] OK: Message delivered to \$agent_name (send-keys confirmed)"
2118
+ SEND_COUNT_THIS_ROUND=\$((SEND_COUNT_THIS_ROUND + 1))
2119
+ return 0
2120
+ }
2121
+
2122
+ # v5 (P1-2): Passive post-send monitoring — checks if agent is responding
2123
+ # without sending any messages. Uses capture-pane diff + log growth cross-reference.
2124
+ # Called from escalation path, NOT from delivery path.
2125
+ monitor_agent_response() {
2126
+ local pane="\$1"
2127
+ local agent_name="\$2"
2128
+ local log_file="\$3"
2129
+
2130
+ # Record log size BEFORE sleep so we can measure real growth
2131
+ local log_size_before=0
2044
2132
  if [[ -f "\$log_file" ]]; then
2045
- post_send_baseline=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
2133
+ log_size_before=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
2046
2134
  fi
2047
- local verify_wait=0
2048
- while (( verify_wait < 20 )); do
2049
- sleep 4
2050
- verify_wait=\$((verify_wait + 4))
2051
- if [[ -f "\$log_file" ]]; then
2052
- local cur_size
2053
- cur_size=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
2054
- if (( cur_size > post_send_baseline + 100 )); then
2055
- echo "[Watcher] OK: \$agent_name responded (output grew +\$((cur_size - post_send_baseline)) bytes)"
2056
- return 0
2057
- fi
2135
+
2136
+ # Check 1: capture-pane diff (primary signal, works even if pipe-pane is dead)
2137
+ local hash_before hash_after
2138
+ hash_before=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
2139
+ sleep 5
2140
+ hash_after=\$(tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5sum 2>/dev/null || tmux capture-pane -t "\${SESSION}:\${pane}" -p 2>/dev/null | md5)
2141
+
2142
+ local pane_changed=0
2143
+ local log_grew=0
2144
+
2145
+ if [[ "\$hash_before" != "\$hash_after" ]]; then
2146
+ pane_changed=1
2147
+ fi
2148
+
2149
+ # Check 2: log file growth (secondary signal, depends on pipe-pane being alive)
2150
+ # size_before was recorded BEFORE the 5s sleep above
2151
+ if [[ -f "\$log_file" ]]; then
2152
+ local log_size_after
2153
+ log_size_after=\$(wc -c < "\$log_file" 2>/dev/null | tr -d ' ')
2154
+ if (( log_size_after > log_size_before + 50 )); then
2155
+ log_grew=1
2058
2156
  fi
2059
- done
2157
+ fi
2060
2158
 
2061
- echo "[Watcher] WARN: \$agent_name did not produce output after send — may not have received message"
2062
- return 1
2159
+ # Cross-reference for pipe-pane health (P1-1)
2160
+ if (( pane_changed && !log_grew )); then
2161
+ echo "[Watcher] Pipe-pane appears dead (pane active but log stale), rebuilding for \$pane"
2162
+ tmux pipe-pane -t "\${SESSION}:\${pane}" 2>/dev/null || true
2163
+ tmux pipe-pane -o -t "\${SESSION}:\${pane}" "cat >> \\"\$log_file\\"" 2>/dev/null || true
2164
+ fi
2165
+
2166
+ if (( pane_changed )); then
2167
+ echo "[Watcher] Monitor: \$agent_name is active (pane output changing)"
2168
+ return 0 # Agent is working
2169
+ fi
2170
+
2171
+ # Check 3: turn.txt changed (ultimate signal — agent finished and submitted)
2172
+ local current_turn
2173
+ current_turn=\$(cat "\$STATE_DIR/turn.txt" 2>/dev/null || echo "")
2174
+ if [[ "\$current_turn" != "\$SEEN_TURN" ]]; then
2175
+ echo "[Watcher] Monitor: Turn changed to \$current_turn — agent completed work"
2176
+ return 0
2177
+ fi
2178
+
2179
+ return 1 # No activity detected
2063
2180
  }
2064
2181
 
2065
2182
  # ─── trigger_agent ───────────────────────────────
@@ -2067,6 +2184,12 @@ send_go_to_pane() {
2067
2184
  trigger_agent() {
2068
2185
  local turn="\$1"
2069
2186
 
2187
+ # v5 (P0-2): Check send cap before attempting delivery
2188
+ if (( SEND_COUNT_THIS_ROUND >= MAX_SENDS_PER_ROUND )); then
2189
+ echo "[Watcher] SEND_CAP: Max sends (\$MAX_SENDS_PER_ROUND) reached for round \$SEEN_ROUND, passive monitoring only"
2190
+ return 1
2191
+ fi
2192
+
2070
2193
  # Read task context for trigger messages (last meaningful line = latest direction)
2071
2194
  local task_ctx=""
2072
2195
  if [[ -f "\$STATE_DIR/task.md" ]]; then
@@ -2268,6 +2391,8 @@ check_and_trigger() {
2268
2391
  # Reset per-turn escalation state (step38)
2269
2392
  NOTIFY_SENT_AT=0
2270
2393
  REMINDER_LEVEL=0
2394
+ # v5 (P0-2): Reset send cap for new round
2395
+ SEND_COUNT_THIS_ROUND=0
2271
2396
 
2272
2397
  # Mark delivery pending (step39: decouple ack from delivery)
2273
2398
  DELIVERY_PENDING=1
@@ -2288,6 +2413,8 @@ check_and_trigger() {
2288
2413
  LAST_ACK_TIME=0
2289
2414
  NOTIFY_SENT_AT=0
2290
2415
  REMINDER_LEVEL=0
2416
+ # v5 (P0-2): Reset send cap for new turn
2417
+ SEND_COUNT_THIS_ROUND=0
2291
2418
  DELIVERY_PENDING=1
2292
2419
  PENDING_TARGET="\$CURRENT_TURN"
2293
2420
  save_watcher_state
@@ -2307,7 +2434,9 @@ check_and_trigger() {
2307
2434
 
2308
2435
  # Consensus suppression (step38): suppress notifications when consensus reached
2309
2436
  # step39: only suppress if round hasn't changed since consensus was detected
2310
- if check_consensus_reached; then
2437
+ # step46: only suppress if delivery is NOT pending — if turn points to an agent
2438
+ # that hasn't responded yet, they need to be triggered to confirm consensus
2439
+ if check_consensus_reached && (( !DELIVERY_PENDING )); then
2311
2440
  if [[ "\$CONSENSUS_AT_ROUND" == "" ]]; then
2312
2441
  CONSENSUS_AT_ROUND="\$CURRENT_ROUND"
2313
2442
  fi
@@ -2388,40 +2517,60 @@ check_and_trigger() {
2388
2517
  target_pane="0.1"; target_name="Lisa"; target_log="\$PANE1_LOG"
2389
2518
  fi
2390
2519
 
2391
- # Check for context limit in pane output (unrecoverable notify user immediately)
2392
- local pane_tail
2393
- pane_tail=\$(tmux capture-pane -t "\${SESSION}:\${target_pane}" -p 2>/dev/null | tail -10)
2394
- if echo "\$pane_tail" | grep -qiE "context limit|conversation too long|token limit|context window"; then
2395
- if (( REMINDER_LEVEL < 3 )); then
2396
- echo "[Watcher] CONTEXT LIMIT detected for \$target_name. Manual intervention required."
2397
- echo "[Watcher] Restart the agent session to continue."
2398
- REMINDER_LEVEL=3
2399
- fi
2400
-
2401
- # Time-based escalation: each level checked independently by elapsed time.
2402
- # If L1/L2 delivery fails, time still advances, so L3 is always reachable.
2520
+ # v5 (P1-2): Passive monitoring check if agent is working before escalating
2521
+ # This also handles pipe-pane cross-reference rebuild (P1-1)
2522
+ if monitor_agent_response "\$target_pane" "\$target_name" "\$target_log"; then
2523
+ # Agent is active reset escalation timer, no action needed
2524
+ NOTIFY_SENT_AT=\$(date +%s)
2525
+ REMINDER_LEVEL=0
2526
+ else
2527
+ # Agent not responding — proceed with escalation
2528
+
2529
+ # Check for context limit in pane output (unrecoverable — notify user immediately)
2530
+ local pane_tail
2531
+ pane_tail=\$(tmux capture-pane -t "\${SESSION}:\${target_pane}" -p 2>/dev/null | tail -10)
2532
+ if echo "\$pane_tail" | grep -qiE "context limit|conversation too long|token limit|context window"; then
2533
+ if (( REMINDER_LEVEL < 3 )); then
2534
+ echo "[Watcher] CONTEXT LIMIT detected for \$target_name. Manual intervention required."
2535
+ echo "[Watcher] Restart the agent session to continue."
2536
+ REMINDER_LEVEL=3
2537
+ notify_user "[RLL] CONTEXT LIMIT: \$target_name needs restart"
2538
+ fi
2403
2539
 
2404
- # Level 3: notify user after 10 minutes always reachable regardless of L1/L2 success
2405
- elif (( elapsed >= 600 && REMINDER_LEVEL < 3 )); then
2406
- echo "[Watcher] STUCK: \$target_name has not responded for \${elapsed}s. Manual intervention needed."
2407
- REMINDER_LEVEL=3
2540
+ # Time-based escalation: each level checked independently by elapsed time.
2541
+ # If L1/L2 delivery fails, time still advances, so L3 is always reachable.
2408
2542
 
2409
- # Level 2: slash command after 5 minutes, with prompt guard
2410
- elif (( elapsed >= 300 && REMINDER_LEVEL < 2 )); then
2411
- if ! check_for_interactive_prompt "\$target_pane"; then
2412
- echo "[Watcher] Escalation L2: Sending /check-turn to \$target_name (no response for \${elapsed}s)"
2413
- if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "/check-turn"; then
2414
- REMINDER_LEVEL=2
2543
+ # Level 3: notify user (default 30 min) always reachable regardless of L1/L2 success
2544
+ elif (( elapsed >= ESCALATION_L3 && REMINDER_LEVEL < 3 )); then
2545
+ echo "[Watcher] STUCK: \$target_name has not responded for \${elapsed}s. Manual intervention needed."
2546
+ REMINDER_LEVEL=3
2547
+ notify_user "[RLL] STUCK: \$target_name not responding for \${elapsed}s"
2548
+
2549
+ # Level 2: slash command (default 15 min), with prompt guard
2550
+ # v5: escalation also respects send cap to prevent flooding
2551
+ elif (( elapsed >= ESCALATION_L2 && REMINDER_LEVEL < 2 )); then
2552
+ if (( SEND_COUNT_THIS_ROUND >= MAX_SENDS_PER_ROUND )); then
2553
+ echo "[Watcher] Escalation L2: Skipped — send cap reached for round \$SEEN_ROUND"
2554
+ elif ! check_for_interactive_prompt "\$target_pane"; then
2555
+ echo "[Watcher] Escalation L2: Sending /check-turn to \$target_name (no response for \${elapsed}s)"
2556
+ if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "/check-turn"; then
2557
+ REMINDER_LEVEL=2
2558
+ fi
2559
+ else
2560
+ echo "[Watcher] Escalation L2: Skipped — interactive prompt detected for \$target_name"
2415
2561
  fi
2416
- else
2417
- echo "[Watcher] Escalation L2: Skipped — interactive prompt detected for \$target_name"
2418
- fi
2419
2562
 
2420
- # Level 1: REMINDER after 2 minutes
2421
- elif (( elapsed >= 120 && REMINDER_LEVEL < 1 )); then
2422
- echo "[Watcher] Escalation L1: Sending REMINDER to \$target_name (no response for \${elapsed}s)"
2423
- if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "REMINDER: It is your turn. Please check turn and continue working."; then
2424
- REMINDER_LEVEL=1
2563
+ # Level 1: REMINDER (default 5 min)
2564
+ # v5: escalation also respects send cap to prevent flooding
2565
+ elif (( elapsed >= ESCALATION_L1 && REMINDER_LEVEL < 1 )); then
2566
+ if (( SEND_COUNT_THIS_ROUND >= MAX_SENDS_PER_ROUND )); then
2567
+ echo "[Watcher] Escalation L1: Skipped — send cap reached for round \$SEEN_ROUND"
2568
+ else
2569
+ echo "[Watcher] Escalation L1: Sending REMINDER to \$target_name (no response for \${elapsed}s)"
2570
+ if send_go_to_pane "\$target_pane" "\$target_name" "\$target_log" "REMINDER: It is your turn. Please check turn and continue working."; then
2571
+ REMINDER_LEVEL=1
2572
+ fi
2573
+ fi
2425
2574
  fi
2426
2575
  fi
2427
2576
  fi
@@ -2430,7 +2579,7 @@ check_and_trigger() {
2430
2579
 
2431
2580
  # ─── Main ────────────────────────────────────────
2432
2581
 
2433
- echo "[Watcher] Starting v4... (Ctrl+C to stop)"
2582
+ echo "[Watcher] Starting v5... (Ctrl+C to stop)"
2434
2583
  echo "[Watcher] Monitoring \$STATE_DIR/turn.txt + round.txt"
2435
2584
  echo "[Watcher] Pane logs: \$PANE0_LOG, \$PANE1_LOG"
2436
2585
  if (( CHECKPOINT_ROUNDS > 0 )); then
@@ -2517,7 +2666,7 @@ done
2517
2666
  }
2518
2667
  // Watcher runs in background with session-guarded restart loop
2519
2668
  const watcherLog = path.join(dir, "watcher.log");
2520
- execSync(`bash -c 'nohup bash -c '"'"'while tmux has-session -t "${sessionName}" 2>/dev/null; do bash "${watcherScript}"; EXIT_CODE=$?; if ! tmux has-session -t "${sessionName}" 2>/dev/null; then echo "[Watcher] Session gone, not restarting." >> "${watcherLog}"; break; fi; echo "[Watcher] Exited ($EXIT_CODE), restarting in 5s..." >> "${watcherLog}"; sleep 5; done'"'"' > "${watcherLog}" 2>&1 & echo $! > "${wrapperPidFile}"'`);
2669
+ execSync(`bash -c 'nohup bash -c '"'"'while tmux has-session -t "${sessionName}" 2>/dev/null; do bash "${watcherScript}"; EXIT_CODE=$?; if ! tmux has-session -t "${sessionName}" 2>/dev/null; then echo "[Watcher] Session gone, not restarting." >> "${watcherLog}"; break; fi; echo "[Watcher] Exited ($EXIT_CODE), restarting in 5s..." >> "${watcherLog}"; if [[ -n "$RL_NOTIFY_CMD" ]]; then echo "[RLL] Watcher crashed (exit $EXIT_CODE), restarting..." | eval "$RL_NOTIFY_CMD" 2>/dev/null & fi; sleep 5; done'"'"' > "${watcherLog}" 2>&1 & echo $! > "${wrapperPidFile}"'`);
2521
2670
  console.log("");
2522
2671
  console.log(line());
2523
2672
  console.log("Auto Mode Started!");
@@ -3119,3 +3268,79 @@ function cmdDoctor(args) {
3119
3268
  process.exit(1);
3120
3269
  }
3121
3270
  }
3271
+ // ─── emergency-msg ───────────────────────────────
3272
+ function cmdEmergencyMsg(args) {
3273
+ if (args.length < 2) {
3274
+ console.error("Usage: ralph-lisa emergency-msg <ralph|lisa> \"message\"");
3275
+ process.exit(1);
3276
+ }
3277
+ const target = args[0];
3278
+ const message = args.slice(1).join(" ");
3279
+ if (target !== "ralph" && target !== "lisa") {
3280
+ console.error("Error: target must be 'ralph' or 'lisa'");
3281
+ process.exit(1);
3282
+ }
3283
+ // Use project root for session name (not cwd, which may be a subdirectory)
3284
+ const dir = (0, state_js_1.stateDir)();
3285
+ const projectRoot = path.resolve(dir, "..");
3286
+ const sessionName = generateSessionName(projectRoot);
3287
+ // Check tmux session exists
3288
+ try {
3289
+ (0, node_child_process_1.execSync)(`tmux has-session -t "${sessionName}" 2>/dev/null`);
3290
+ }
3291
+ catch {
3292
+ console.error(`Error: tmux session '${sessionName}' not found.`);
3293
+ process.exit(1);
3294
+ }
3295
+ // Check watcher health — only allow emergency-msg when watcher is unhealthy
3296
+ const heartbeatFile = path.join(dir, ".watcher_heartbeat");
3297
+ if (fs.existsSync(heartbeatFile)) {
3298
+ const heartbeat = parseInt((0, state_js_1.readFile)(heartbeatFile).trim(), 10);
3299
+ const now = Math.floor(Date.now() / 1000);
3300
+ if (now - heartbeat < 300) { // 5 minutes
3301
+ console.error("Error: Watcher is healthy (heartbeat < 5min old). Use normal submit flow.");
3302
+ console.error("Emergency messaging is only available when watcher appears stuck.");
3303
+ process.exit(1);
3304
+ }
3305
+ }
3306
+ // Send via tmux — use temp file to avoid shell injection
3307
+ // (user message could contain $(), backticks, etc.)
3308
+ const pane = target === "ralph" ? "0.0" : "0.1";
3309
+ const emergencyMsg = `[EMERGENCY] ${message}`;
3310
+ const tmpMsgFile = path.join(dir, ".emergency_msg_tmp");
3311
+ try {
3312
+ (0, state_js_1.writeFile)(tmpMsgFile, emergencyMsg);
3313
+ (0, node_child_process_1.execSync)(`tmux load-buffer "${tmpMsgFile}" 2>/dev/null && tmux paste-buffer -t "${sessionName}:${pane}" 2>/dev/null`);
3314
+ (0, node_child_process_1.execSync)(`tmux send-keys -t "${sessionName}:${pane}" Enter 2>/dev/null`);
3315
+ try {
3316
+ fs.unlinkSync(tmpMsgFile);
3317
+ }
3318
+ catch { }
3319
+ }
3320
+ catch {
3321
+ console.error(`Error: Failed to send message to ${target}'s pane.`);
3322
+ process.exit(1);
3323
+ }
3324
+ // Log to emergency.log
3325
+ const ts = new Date().toISOString();
3326
+ const logEntry = `[${ts}] To ${target}: ${message}\n`;
3327
+ const logFile = path.join(dir, "emergency.log");
3328
+ fs.appendFileSync(logFile, logEntry);
3329
+ console.log(`Emergency message sent to ${target}: ${message}`);
3330
+ console.log(`Logged to ${logFile}`);
3331
+ }
3332
+ // ─── notify ──────────────────────────────────────
3333
+ function cmdNotify(args) {
3334
+ const message = args.join(" ");
3335
+ if (!message) {
3336
+ console.error("Usage: ralph-lisa notify \"message\"");
3337
+ process.exit(1);
3338
+ }
3339
+ if (!process.env.RL_NOTIFY_CMD) {
3340
+ console.error("Error: RL_NOTIFY_CMD not set. Configure it first:");
3341
+ console.error(' export RL_NOTIFY_CMD="cat >> /tmp/notify.txt"');
3342
+ process.exit(1);
3343
+ }
3344
+ notifyUser(message);
3345
+ console.log(`Notification sent: ${message}`);
3346
+ }
package/dist/policy.js CHANGED
@@ -25,6 +25,15 @@ function getPolicyMode() {
25
25
  */
26
26
  function checkRalph(tag, content) {
27
27
  const violations = [];
28
+ // [PLAN] must include test plan (step42: mandatory test execution)
29
+ if (tag === "PLAN") {
30
+ if (!content.match(/测试计划|[Tt]est [Pp]lan|测试命令|[Tt]est [Cc]ommand/)) {
31
+ violations.push({
32
+ rule: "plan-test-plan",
33
+ message: `[PLAN] submission missing test plan (test command + coverage scope).`,
34
+ });
35
+ }
36
+ }
28
37
  // [CODE] or [FIX] must include Test Results and file:line references
29
38
  if (tag === "CODE" || tag === "FIX") {
30
39
  if (!content.includes("Test Results") &&
@@ -35,6 +44,21 @@ function checkRalph(tag, content) {
35
44
  message: `[${tag}] submission missing "Test Results" section.`,
36
45
  });
37
46
  }
47
+ // step42: Test Results must include concrete execution evidence (exit code or pass/fail count)
48
+ // Exception: explicit "Skipped:" line inside the Test Results section only
49
+ // Section is bounded: from "Test Results" heading to next heading (## or blank-line-then-heading) or EOF
50
+ const testResultsMatch = content.match(/[Tt]est [Rr]esults[^\n]*\n([\s\S]*?)(?=\n##\s|\n\n[A-Z]|\n\n\*\*[A-Z]|$)/);
51
+ if (testResultsMatch) {
52
+ const testResultsBody = testResultsMatch[1];
53
+ const hasSkipLine = /^[\s\-*]*[Ss]kip(ped)?\s*:.*\S/m.test(testResultsBody);
54
+ const hasExecutionEvidence = /[Ee]xit code|退出码|\d+\/\d+\s*(pass|通过|passed)|(\d+)\s*tests?\s*pass/i.test(testResultsBody);
55
+ if (!hasSkipLine && !hasExecutionEvidence) {
56
+ violations.push({
57
+ rule: "test-results-detail",
58
+ message: `[${tag}] Test Results must include exit code or pass/fail count (e.g., "Exit code: 0" or "42/42 passed"), or explicit "Skipped:" with justification.`,
59
+ });
60
+ }
61
+ }
38
62
  if (!/\w+\.\w+:\d+/.test(content)) {
39
63
  violations.push({
40
64
  rule: "file-line-ref",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ralph-lisa-loop",
3
- "version": "0.3.11",
3
+ "version": "0.3.12",
4
4
  "description": "Turn-based dual-agent collaboration: Ralph codes, Lisa reviews, consensus required.",
5
5
  "bin": {
6
6
  "ralph-lisa": "dist/cli.js"
@@ -13,6 +13,6 @@ ralph-lisa whose-turn
13
13
  ## Rules
14
14
 
15
15
  - If output is `ralph`: You can proceed with your work
16
- - If output is `lisa`: STOP immediately and wait for Lisa's response
16
+ - If output is `lisa`: Wait for Lisa's feedback — do not take further action until your turn
17
17
 
18
18
  **NEVER skip this check before working.**
@@ -36,4 +36,4 @@ Detailed content here...
36
36
 
37
37
  ## After Submission
38
38
 
39
- The turn automatically passes to Lisa. You must STOP and wait.
39
+ The turn automatically passes to Lisa. Wait for her feedback — do not take further action until it is your turn again.
@@ -11,6 +11,6 @@ Check whose turn it is before taking any action.
11
11
  ## Rules
12
12
 
13
13
  - If output is `lisa`: You can proceed with your review
14
- - If output is `ralph`: STOP immediately and wait for Ralph's submission
14
+ - If output is `ralph`: Wait for Ralph's feedback — do not take further action until your turn
15
15
 
16
- **NEVER skip this check before working.**
16
+ **NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
@@ -16,16 +16,16 @@ Then based on result:
16
16
  ```bash
17
17
  ralph-lisa read work.md
18
18
  ```
19
- - `ralph` → Say "Waiting for Ralph" and STOP
19
+ - `ralph` → Say "Waiting for Ralph's feedback" and wait — do not take further action until your turn
20
20
 
21
21
  **Do NOT wait for user to tell you to check. Check automatically.**
22
22
 
23
23
  ## CRITICAL: Turn-Based Rules
24
24
 
25
- - Output `lisa` → You can review
26
- - Output `ralph` → STOP immediately, tell user "Waiting for Ralph"
25
+ - Output `lisa` → You can review. If it's your turn but you cannot complete work (missing input, environment error, etc.), tell the user the specific reason and wait — do not retry repeatedly.
26
+ - Output `ralph` → Tell user it's not your turn. You may use subagents for preparatory work, but do not submit until it is your turn.
27
27
 
28
- **NEVER skip this check. NEVER work when it's not your turn.**
28
+ **NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
29
29
 
30
30
  ## How to Submit
31
31
 
@@ -38,7 +38,7 @@ ralph-lisa submit-lisa --file .dual-agent/submit.md
38
38
 
39
39
  Inline mode (`ralph-lisa submit-lisa "[TAG] ..."`) is deprecated — it breaks on special characters. Use `--file` or `--stdin` instead.
40
40
 
41
- This automatically passes the turn to Ralph. Then you MUST STOP.
41
+ This automatically passes the turn to Ralph. Then wait do not take further action until it is your turn again.
42
42
 
43
43
  ## Tags You Can Use
44
44
 
@@ -59,7 +59,7 @@ This automatically passes the turn to Ralph. Then you MUST STOP.
59
59
  3. Review following the behavior spec below
60
60
  4. Write review to .dual-agent/submit.md
61
61
  5. ralph-lisa submit-lisa --file .dual-agent/submit.md
62
- 6. STOP and wait for Ralph
62
+ 6. Wait for Ralph's response
63
63
  7. ralph-lisa whose-turn → Check again
64
64
  8. Repeat
65
65
  ```
@@ -101,20 +101,21 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
101
101
  | Cite `file:line` | Every `[PASS]` or `[NEEDS_WORK]` must reference at least one specific `file:line` location to support your conclusion. |
102
102
  | View full file context | When reviewing changes, read the full file (not just the diff snippet) to understand surrounding context. |
103
103
  | Check research | If the task involves reference implementations, protocols, or external APIs, verify that `[RESEARCH]` was submitted before `[CODE]`. |
104
+ | Verify test execution | For `[CODE]`/`[FIX]`, verify Test Results contain actual command, exit code, and pass/fail count — OR an explicit `Skipped:` with valid justification (e.g., config-only, no testable logic). If results look suspicious (missing numbers, generic text), return `[NEEDS_WORK]`. |
105
+ | Re-run tests | For `[CODE]`/`[FIX]` with executed tests, run the test command yourself to verify results. For skipped tests, verify the justification is valid. Report your findings in the review. |
106
+ | Verify test plan alignment | For `[CODE]`/`[FIX]`, verify Test Results match the test plan from the `[PLAN]` phase. If tests differ from the plan without explanation, return `[NEEDS_WORK]`. |
104
107
 
105
108
  ### SHOULD (professional standard)
106
109
 
107
110
  | Recommendation | Details |
108
111
  |----------------|---------|
109
112
  | Check test quality | Examine test files for coverage, assertion strength, and edge case handling. |
110
- | Verify test results | Confirm that Ralph's reported test results are plausible given the changes. |
111
113
  | Look for regressions | Consider whether changes could break existing functionality. |
112
114
 
113
115
  ### YOUR JUDGMENT (not prescribed)
114
116
 
115
117
  | Area | Details |
116
118
  |------|---------|
117
- | Run tests yourself | You may choose to run tests independently. This is your professional call. |
118
119
  | Write verification tests | When static analysis is insufficient, write ad-hoc tests in `.dual-agent/tests/` and reference the output in your review. These are auto-cleaned on [CONSENSUS]. |
119
120
  | Review depth | Decide what to focus on based on risk and complexity. |
120
121
  | Accept or reject | Your verdict is your own professional judgment. |
@@ -125,7 +126,8 @@ This is your PRIMARY responsibility — catching direction drift early saves mor
125
126
  - [ ] Logic correct
126
127
  - [ ] Edge cases handled
127
128
  - [ ] Tests adequate
128
- - [ ] **Test Results included in submission** (required for [CODE]/[FIX])
129
+ - [ ] **Test Results verified** `[CODE]`/`[FIX]` must have actual command + exit code + pass count, or explicit `Skipped:` with valid justification
130
+ - [ ] **Tests re-run** — You ran the test command yourself and confirmed results match (or verified skip justification)
129
131
  - [ ] **Research adequate** (if task involves reference implementations/protocols/external APIs, check that [RESEARCH] was submitted)
130
132
  - [ ] **Research verified** — [RESEARCH] submissions must include at least one `Verified:` or `Evidence:` marker. Reject unverified claims.
131
133
  - [ ] **Factual claims verified** — For claims that a feature is "missing" or "not implemented", require `file:line` evidence or explicit acknowledgment that source code was not accessible
@@ -150,10 +152,16 @@ Lisa: [NEEDS_WORK] ...
150
152
  Ralph: [FIX] Agree, because... / [CHALLENGE] Disagree, because...
151
153
  ```
152
154
 
155
+ ## Long-Running Tasks
156
+
157
+ For time-consuming operations (large-scale code review, batch test re-runs, deep research verification), consider using subagents or background tasks to work in parallel. Summarize subagent results before submitting your review.
158
+
159
+ This avoids blocking the main collaboration loop while waiting for slow operations to complete.
160
+
153
161
  ## Handling Disagreement
154
162
 
155
163
  If Ralph uses [CHALLENGE]:
156
164
  1. Consider his argument carefully
157
165
  2. If convinced → Change your verdict
158
166
  3. If not → Explain your reasoning with [CHALLENGE] or [DISCUSS]
159
- 4. After 5 rounds → Accept OVERRIDE or propose HANDOFF
167
+ 4. After 5 rounds → Deadlock auto-detected, watcher pauses for user intervention
@@ -16,16 +16,16 @@ Then based on result:
16
16
  ```bash
17
17
  ralph-lisa read review.md
18
18
  ```
19
- - `lisa` → Say "Waiting for Lisa" and STOP
19
+ - `lisa` → Say "Waiting for Lisa's feedback" and wait — do not take further action until your turn
20
20
 
21
21
  **Do NOT wait for user to tell you to check. Check automatically.**
22
22
 
23
23
  ## CRITICAL: Turn-Based Rules
24
24
 
25
- - Output `ralph` → You can work
26
- - Output `lisa` → STOP immediately, tell user "Waiting for Lisa"
25
+ - Output `ralph` → You can work. If it's your turn but you cannot complete work (missing input, environment error, etc.), tell the user the specific reason and wait — do not retry repeatedly.
26
+ - Output `lisa` → Tell user it's not your turn. You may use subagents for preparatory work, but do not submit until it is your turn.
27
27
 
28
- **NEVER skip this check. NEVER work when it's not your turn.**
28
+ **NEVER skip this check. When it's not your turn, do not submit work. You may use subagents for preparatory tasks (research, environment checks). If triggered by the user but it's not your turn, suggest checking watcher status: `cat .dual-agent/.watcher_heartbeat` and `ralph-lisa status`.**
29
29
 
30
30
  ## How to Submit
31
31
 
@@ -38,7 +38,7 @@ ralph-lisa submit-ralph --file .dual-agent/submit.md
38
38
 
39
39
  Inline mode (`ralph-lisa submit-ralph "[TAG] ..."`) is deprecated — it breaks on special characters. Use `--file` or `--stdin` instead.
40
40
 
41
- This automatically passes the turn to Lisa. Then you MUST STOP.
41
+ This automatically passes the turn to Lisa. Then wait do not take further action until it is your turn again.
42
42
 
43
43
  ## Tags You Can Use
44
44
 
@@ -74,10 +74,15 @@ This is required when the task involves reference implementations, protocols, or
74
74
 
75
75
  **[CODE] or [FIX] submissions must include:**
76
76
 
77
- ### Test Results
78
- - Test command: `npm test` / `pytest` / ...
79
- - Result: Passed / Failed (reason)
80
- - If skipping tests, must explain why
77
+ ### Test Results (must be from actual execution, not fabricated)
78
+ - Test command: the exact command you ran (e.g., `pytest -x`, `npm test`)
79
+ - Exit code: 0 (all passed) or non-zero (failures)
80
+ - Result: X/Y passed (concrete numbers)
81
+ - Failed output: if any failures, include last 30 lines of error output
82
+ - If skipping tests, must explain why — Lisa will judge whether the reason is valid
83
+ - Tests must follow the test plan established in the `[PLAN]` phase
84
+ - Test Results must reference the planned test command
85
+ - If the test plan changed, explain why in the submission
81
86
 
82
87
  ## Round 1: Mandatory [PLAN]
83
88
 
@@ -86,6 +91,13 @@ your understanding of the task before you start coding. Include:
86
91
  - Your understanding of the task goal
87
92
  - Proposed approach
88
93
  - Expected deliverables
94
+ - **Test plan** (mandatory):
95
+ - Test command (e.g., `pytest -x`, `npm test`, `go test ./...`, `flutter test`)
96
+ - Expected test coverage scope
97
+ - If no test framework exists, explain verification approach
98
+ - **Quality gate commands** (recommended): Identify lint/format/type-check commands for the project
99
+ - Examples: `npm run lint`, `ruff check .`, `go vet ./...`
100
+ - These can be configured via `RL_RALPH_GATE` + `RL_GATE_COMMANDS` for auto mode
89
101
 
90
102
  ## Workflow
91
103
 
@@ -96,7 +108,7 @@ your understanding of the task before you start coding. Include:
96
108
  → Submit [RESEARCH] first, wait for Lisa's review
97
109
  4. Write content to .dual-agent/submit.md
98
110
  5. ralph-lisa submit-ralph --file .dual-agent/submit.md
99
- 6. STOP and wait for Lisa
111
+ 6. Wait for Lisa's response
100
112
  7. ralph-lisa whose-turn → Check again
101
113
  8. (If ralph) Read Lisa's feedback: ralph-lisa read review.md
102
114
  9. Respond or proceed based on feedback
@@ -121,13 +133,17 @@ After context compaction, run `ralph-lisa recap` to recover current state:
121
133
 
122
134
  ## Handling Lisa's Feedback
123
135
 
124
- - `[PASS]` → Submit [CONSENSUS] to close. Lisa's [PASS] already approves — no need to wait for her [CONSENSUS] back (single-round consensus).
136
+ - `[PASS]` → First check PASS quality:
137
+ - Does Lisa's PASS include substantive review content (specific file checks, test verification, technical analysis)?
138
+ - If it's a rubber-stamp PASS (no specific reasons, no code references, no test verification), submit `[CHALLENGE]` requesting substantive review — **at most once**
139
+ - If Lisa resubmits PASS after your challenge (even if still thin), accept and submit `[CONSENSUS]` to avoid infinite loop
140
+ - If it's a substantive PASS and you agree, submit `[CONSENSUS]`
125
141
  - `[NEEDS_WORK]` → You MUST explain your reasoning:
126
142
  - If you agree: explain WHY Lisa is right, then submit [FIX]
127
143
  - If you disagree: use [CHALLENGE] to provide counter-argument
128
144
  - **Never submit a bare [FIX] without explanation. No silent acceptance.**
129
145
  - **You CANNOT submit [CODE]/[RESEARCH]/[PLAN] after NEEDS_WORK** — the CLI will reject it. Address the feedback first, or run `ralph-lisa scope-update` if the task scope changed.
130
- - After 3 consecutive NEEDS_WORK rounds → DEADLOCK auto-detected, watcher pauses for user intervention
146
+ - After 8 consecutive NEEDS_WORK rounds → DEADLOCK auto-detected, watcher pauses for user intervention
131
147
 
132
148
  ## Submission Test Requirements
133
149
 
@@ -144,6 +160,12 @@ After context compaction, run `ralph-lisa recap` to recover current state:
144
160
  - "New tests: 0" requires justification (valid: pure UI layout, config-only change)
145
161
  - Invalid excuse: "requires E2E" for pure functions, data shape validation, or mock-able IPC
146
162
 
163
+ ## Long-Running Tasks
164
+
165
+ For time-consuming operations (large-scale code search, batch test runs, CI waits, complex refactoring), consider using subagents or background tasks to work in parallel. Summarize subagent results before submitting.
166
+
167
+ This avoids blocking the main collaboration loop while waiting for slow operations to complete.
168
+
147
169
  ## Your Responsibilities
148
170
 
149
171
  1. Planning and coding
@@ -22,6 +22,6 @@
22
22
  "rules": {
23
23
  "consensus": "Both parties must agree before proceeding",
24
24
  "verdict": "PASS/NEEDS_WORK is advisory, not a command",
25
- "deadlock": "After 5 rounds, use OVERRIDE or HANDOFF"
25
+ "deadlock": "After 8 consecutive NEEDS_WORK rounds, watcher pauses for user intervention"
26
26
  }
27
27
  }