@ai-dev-methodologies/rlp-desk 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/docs/plans/spicy-booping-galaxy.md +322 -0
  2. package/docs/rlp-desk/artifact-schema.md +99 -0
  3. package/docs/rlp-desk/ci-setup.md +100 -0
  4. package/docs/rlp-desk/e2e-scenarios.md +102 -0
  5. package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +730 -0
  6. package/install.sh +93 -20
  7. package/package.json +9 -3
  8. package/scripts/build-node-manifest.js +52 -0
  9. package/scripts/postinstall.js +162 -8
  10. package/src/commands/rlp-desk.md +73 -50
  11. package/src/governance.md +56 -7
  12. package/src/node/MANIFEST.txt +15 -0
  13. package/src/node/cli/command-builder.mjs +43 -5
  14. package/src/node/constants.mjs +19 -0
  15. package/src/node/init/campaign-initializer.mjs +100 -10
  16. package/src/node/polling/signal-poller.mjs +139 -3
  17. package/src/node/reporting/campaign-reporting.mjs +5 -1
  18. package/src/node/run.mjs +31 -2
  19. package/src/node/runner/campaign-main-loop.mjs +521 -44
  20. package/src/node/runner/leader-registry.mjs +100 -0
  21. package/src/node/runner/prompt-detector.mjs +41 -0
  22. package/src/node/runner/prompt-dismisser.mjs +200 -0
  23. package/src/node/shared/fs.mjs +38 -0
  24. package/src/node/util/debug-log.mjs +56 -0
  25. package/src/node/util/desk-root.mjs +24 -0
  26. package/src/node/util/shell-quote.mjs +12 -0
  27. package/docs/superpowers/plans/2026-04-24-gpt-5-5-default.md +0 -517
  28. package/docs/superpowers/specs/2026-04-24-gpt-5-5-default.md +0 -107
  29. /package/docs/{TODO-verification-next.md → rlp-desk/TODO-verification-next.md} +0 -0
  30. /package/docs/{architecture.md → rlp-desk/architecture.md} +0 -0
  31. /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-flywheel-enhancement.md +0 -0
  32. /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-pivot-step.md +0 -0
  33. /package/docs/{blueprints → rlp-desk/blueprints}/plan-flywheel-enhancement.md +0 -0
  34. /package/docs/{blueprints → rlp-desk/blueprints}/sv-architecture-rethink.md +0 -0
  35. /package/docs/{getting-started.md → rlp-desk/getting-started.md} +0 -0
  36. /package/docs/{internal → rlp-desk/internal}/verification-policy-gap-analysis.md +0 -0
  37. /package/docs/{internal → rlp-desk/internal}/verification-strategy-research.md +0 -0
  38. /package/docs/{multi-mission-orchestration.md → rlp-desk/multi-mission-orchestration.md} +0 -0
  39. /package/docs/{plans → rlp-desk/plans}/cozy-gliding-trinket.md +0 -0
  40. /package/docs/{plans → rlp-desk/plans}/frolicking-churning-honey.md +0 -0
  41. /package/docs/{plans → rlp-desk/plans}/keen-sauteeing-snowflake.md +0 -0
  42. /package/docs/{plans → rlp-desk/plans}/mutable-booping-corbato.md +0 -0
  43. /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11-handoff-7fixes.md +0 -0
  44. /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11.1-tmux-pane-disappearance.md +0 -0
  45. /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -0
  46. /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert.md +0 -0
  47. /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -0
  48. /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie.md +0 -0
  49. /package/docs/{plans → rlp-desk/plans}/validated-snacking-crayon.md +0 -0
  50. /package/docs/{protocol-reference.md → rlp-desk/protocol-reference.md} +0 -0
@@ -135,7 +135,7 @@ After all items are confirmed:
135
135
  Present the score table to the user before proceeding.
136
136
  2. Present the full contract summary.
137
137
  3. **Self-Verification** — Ask: "Enable self-verification? Worker records step-by-step evidence, Verifier cross-validates process. Recommended for MEDIUM+ risk." Default: yes for HIGH/CRITICAL, no for LOW/MEDIUM.
138
- 4. **Re-execution check**: After slug is confirmed, check if `.claude/ralph-desk/plans/prd-<slug>.md` already exists. If a PRD already exists for this slug, ask: "A PRD already exists for this slug. Improve the existing PRD or start fresh (delete and recreate)?"
138
+ 4. **Re-execution check**: After slug is confirmed, check if `.rlp-desk/plans/prd-<slug>.md` already exists. If a PRD already exists for this slug, ask: "A PRD already exists for this slug. Improve the existing PRD or start fresh (delete and recreate)?"
139
139
  - "improve" → pass `--mode improve` to init
140
140
  - "start fresh" → pass `--mode fresh` to init
141
141
  - If no PRD exists: standard first-run (no --mode needed)
@@ -280,40 +280,51 @@ Parse the `--mode` flag. If absent or `agent`, use the Agent() path below. If `t
280
280
 
281
281
  #### Tmux Mode (`--mode tmux`)
282
282
 
283
- When `--mode tmux` is specified:
283
+ When `--mode tmux` is specified (v0.12.0+ — v5.7 §4.1 routes to Node leader for flywheel + SV support):
284
284
 
285
- 1. **Validate scaffold** — same as Agent() mode: check `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
285
+ 1. **Validate scaffold** — same as Agent() mode: check `.rlp-desk/prompts/<slug>.worker.prompt.md` etc.
286
286
  2. **Check sentinels** — same as Agent() mode.
287
- 3. **Check prerequisites** — verify `tmux` and `jq` are installed. If not, report what is missing and stop.
288
- 4. **Locate runner script** — find `run_ralph_desk.zsh` at `~/.claude/ralph-desk/run_ralph_desk.zsh`. If not found, tell the user to reinstall (`npm install` or `install.sh`).
289
- 5. **Launch** — shell out to the runner script with env vars derived from flags:
287
+ 3. **Check prerequisites** — verify `tmux`, `jq`, and `node` (>= 16) are installed. If not, report what is missing and stop.
288
+ 4. **Locate Node leader** — find `~/.claude/ralph-desk/node/run.mjs`. If not found, tell the user to reinstall (`npm install` or `bash install.sh`).
289
+ 5. **Launch** — shell out to the Node leader. **All dynamic args (slug + model values) MUST be passed through shell single-quote escaping** (v5.7 §4.12 G11) so bracketed model ids like `claude-opus-4-7[1m]` survive zsh parsing:
290
+
290
291
  ```bash
291
- LOOP_NAME="<slug>" \
292
- ROOT="$PWD" \
293
- MAX_ITER=<--max-iter value> \
294
- WORKER_MODEL=<--worker-model value> \
295
- LOCK_WORKER_MODEL=<1 if --lock-worker-model, else 0> \
296
- VERIFIER_MODEL=<--verifier-model value, default: sonnet> \
297
- FINAL_VERIFIER_MODEL=<--final-verifier-model value, default: opus> \
298
- VERIFY_MODE=<--verify-mode value, default: per-us> \
299
- CONSENSUS_MODE=<--consensus value, default: off> \
300
- CONSENSUS_MODEL=<--consensus-model value, default: gpt-5.5:medium> \
301
- FINAL_CONSENSUS_MODEL=<--final-consensus-model value, default: gpt-5.5:high> \
302
- CB_THRESHOLD=<--cb-threshold value, default: 6> \
303
- ITER_TIMEOUT=<--iter-timeout value, default: 600> \
304
- DEBUG=<1 if --debug, else 0> \
305
- WITH_SELF_VERIFICATION=<1 if --with-self-verification, else 0> \
306
- zsh ~/.claude/ralph-desk/run_ralph_desk.zsh
292
+ node ~/.claude/ralph-desk/node/run.mjs run '<slug>' \
293
+ --mode tmux \
294
+ --max-iter <N> \
295
+ --worker-model '<value>' \
296
+ [--lock-worker-model] \
297
+ --verifier-model '<value>' \
298
+ --final-verifier-model '<value>' \
299
+ --consensus <off|all|final-only> \
300
+ --consensus-model '<value>' \
301
+ --final-consensus-model '<value>' \
302
+ --verify-mode <per-us|batch> \
303
+ --cb-threshold <N> \
304
+ --iter-timeout <N> \
305
+ [--debug] [--autonomous] \
306
+ [--lane-strict] # was env LANE_MODE=strict \
307
+ [--test-density-strict] # was env TEST_DENSITY_MODE=strict \
308
+ [--with-self-verification] \
309
+ [--flywheel on-fail --flywheel-model '<value>'] \
310
+ [--flywheel-guard on --flywheel-guard-model '<value>']
307
311
  ```
308
- 6. **If the script exits with error (exit code 1)** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch the script in a different way. Just tell the user what went wrong and suggest using Agent mode instead.
309
- 7. **If successful** tell the user the tmux session has been started. The shell script takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
312
+
313
+ **Quoting contract (v5.7 §4.1)**: every `'<value>'` placeholder above must be replaced with the user's flag value wrapped in single quotes via the equivalent of `shellQuote(value)` `"'" + value.replace(/'/g, "'\\''") + "'"` for POSIX correctness. The slug, all model values, and any future dynamic flag must follow this rule. A slug or model containing brackets / spaces / single quotes / dollar signs / backticks must NOT break the leader invocation.
314
+
315
+ **Env-var translation (v5.7 §4.1)**: the slash command historically built `LANE_MODE=strict zsh ...` and `TEST_DENSITY_MODE=strict zsh ...` from CLI flags. The Node leader uses CLI flags instead — translate `--lane-strict` and `--test-density-strict` into the corresponding flags. Direct env-var users (running zsh directly) are unaffected.
316
+
317
+ 6. **If the Node leader exits with error** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch in a different way. Tell the user what went wrong and suggest `--mode agent` as alternative.
318
+ 7. **If successful** — tell the user the tmux session has been started. The Node leader takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
310
319
 
311
320
  **IMPORTANT RULES:**
312
- - Tmux mode requires the user to already be inside a tmux session. If the runner script rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
313
- - MUST launch the runner with `run_in_background: true` so `/rlp-desk` returns control immediately while preserving live tmux visibility.
321
+ - Tmux mode requires the user to already be inside a tmux session. If the leader rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
322
+ - MUST launch with `run_in_background: true` so `/rlp-desk` returns control immediately while preserving live tmux visibility.
314
323
  - Run-in-background is used so the shell can keep the command visible and keep the pane layout stable for status checks and completion flow.
315
324
  - Do NOT kill panes after completion. Panes stay alive for inspection. User cleans up with `/rlp-desk clean <slug> --kill-session`.
316
- - `--with-self-verification` is accepted in tmux mode. After campaign completion, `run_ralph_desk.zsh` spawns `claude CLI` to generate the SV report from campaign artifacts (done-claims, verify-verdicts, campaign-report). SV reports are written to `~/.claude/ralph-desk/analytics/<slug>/`. Requires `claude` CLI available in PATH; if not found, an error is appended to the campaign report.
325
+ - `--with-self-verification` is fully supported in tmux mode (v5.7 §4.7). The Node leader's `generateSVReport()` writes `self-verification-report.md` + `self-verification-data.json` under `<project>/.claude/ralph-desk/analytics/<slug>/` (project-local, v5.7 §4.11.b).
326
+ - `--flywheel on-fail` and `--flywheel-guard on` are fully supported in tmux mode (v5.7 §4.1). The Node leader handles pane creation, sendKeys dispatch, signal polling, and Guard retry semantics identically to agent mode.
327
+ - Legacy `zsh ~/.claude/ralph-desk/run_ralph_desk.zsh` (deprecated in 0.12.0) still runs for non-flywheel/non-SV invocations but emits a deprecation `[notice]`. Calling it with `FLYWHEEL` or `WITH_SELF_VERIFICATION` env vars exits 2 with a migration banner pointing to the Node leader.
317
328
 
318
329
  **tmux UX model (5 items):**
319
330
  - The session returns immediately after launch (`run_in_background: true`) so the command returns control to the parent CLI.
@@ -324,12 +335,24 @@ WITH_SELF_VERIFICATION=<1 if --with-self-verification, else 0> \
324
335
 
325
336
  #### Agent Mode (`--mode agent` or default)
326
337
 
338
+ **Why Agent mode is structurally immune to Bug 4/5 (mid-execution prompt hang
339
+ & A4 premature dispatch):** Worker/Verifier are dispatched as `Agent(...,
340
+ mode="bypassPermissions", ...)`. The subagent runs non-interactively under
341
+ the platform's bypass — it has no tmux pane, no TUI surface, and cannot
342
+ surface a `[y/N]` prompt to the parent Leader. The auto-dismiss /
343
+ prompt-stall / no-progress timeouts in `run_ralph_desk.zsh` (v5.7 §4.13.b /
344
+ §4.16 / §4.17) are therefore tmux-only by design. **Tradeoff**: because
345
+ `Agent()` has no timeout API, agent-mode iterations are not bounded — if
346
+ the platform's `bypassPermissions` ever fails to suppress an interactive
347
+ prompt at the SDK level, the call hangs indefinitely with no rlp-desk-side
348
+ watchdog. Use `--mode tmux` if you need bounded execution time.
349
+
327
350
  ### Preparation
328
- 1. Validate scaffold: `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
351
+ 1. Validate scaffold: `.rlp-desk/prompts/<slug>.worker.prompt.md` etc.
329
352
  2. **Codex CLI pre-validation**: If `--consensus` is not `off` OR `--worker-model` uses codex format (contains `:`) OR `--verifier-model` / `--final-verifier-model` / `--consensus-model` / `--final-consensus-model` uses codex format, check that `codex` CLI exists in PATH. If codex CLI not found → STOP immediately, print install instructions (`npm install -g @openai/codex`), do not start the loop.
330
353
  3. Check sentinels (complete/blocked). Found → tell user `/rlp-desk clean <slug>`.
331
354
  4. Clean previous `done-claim.json`, `verify-verdict.json`.
332
- 5. **Always**: write baseline log entry to `.claude/ralph-desk/logs/<slug>/baseline.log`: `[timestamp] iter=0 phase=start slug=<slug> worker_model=<model> verifier_model=<model>`. Baseline.log captures 1 line per iteration for lightweight post-mortem (always-on, no flag needed).
355
+ 5. **Always**: write baseline log entry to `.rlp-desk/logs/<slug>/baseline.log`: `[timestamp] iter=0 phase=start slug=<slug> worker_model=<model> verifier_model=<model>`. Baseline.log captures 1 line per iteration for lightweight post-mortem (always-on, no flag needed).
333
356
  6. If `--debug`: also create/clear `~/.claude/ralph-desk/analytics/<slug>/debug.log`. Define a helper: to "debug_log" means append a timestamped line to this file via `Bash("echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $msg\" >> ~/.claude/ralph-desk/analytics/<slug>/debug.log")`. When `--debug` is active, debug.log contains all baseline.log fields plus detailed phase logs.
334
357
  - **4-category log system**: all debug_log entries use exactly one of: `[GOV]` (governance checks: IL enforcement, CB triggers, scope lock, verdict evaluation), `[DECIDE]` (leader decisions: model selection, fix contracts, escalation), `[OPTION]` (configuration snapshot at loop start: thresholds, modes, models), `[FLOW]` (execution progress: worker/verifier dispatch, signal reads, phase transitions)
335
358
  - **Re-execution versioning**: If `debug.log` already exists at `--debug` start, rename it to `debug-v{N}.log` (N = next available integer ≥ 1) before creating a fresh `debug.log`.
@@ -355,14 +378,14 @@ For each iteration (1 to max_iter):
355
378
 
356
379
  **① Check sentinels**
357
380
  ```bash
358
- test -f .claude/ralph-desk/memos/<slug>-complete.md # → done
359
- test -f .claude/ralph-desk/memos/<slug>-blocked.md # → stop
381
+ test -f .rlp-desk/memos/<slug>-complete.md # → done
382
+ test -f .rlp-desk/memos/<slug>-blocked.md # → stop
360
383
  ```
361
384
 
362
385
  **①½ Prep-stage cleanup**
363
386
  ```bash
364
- rm -f .claude/ralph-desk/memos/<slug>-done-claim.json
365
- rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
387
+ rm -f .rlp-desk/memos/<slug>-done-claim.json
388
+ rm -f .rlp-desk/memos/<slug>-verify-verdict.json
366
389
  ```
367
390
 
368
391
  **② Read memory.md** → Stop Status, Next Iteration Contract
@@ -378,15 +401,15 @@ rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
378
401
 
379
402
  **④ Build worker prompt (Prompt Assembly Protocol)**
380
403
  1. Capture `WORKING_DIR` once: use `$PWD` from when `/rlp-desk run` was invoked. Store for all prompt construction.
381
- 2. Read `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` — use its content **verbatim**. Do NOT rewrite, paraphrase, or regenerate paths. The prompt file contains correct absolute paths from init.
404
+ 2. Read `.rlp-desk/prompts/<slug>.worker.prompt.md` — use its content **verbatim**. Do NOT rewrite, paraphrase, or regenerate paths. The prompt file contains correct absolute paths from init.
382
405
  2a. **Per-US PRD injection** (when targeting a specific `us_id`, not "ALL"):
383
- - Check if `.claude/ralph-desk/plans/prd-<slug>-{us_id}.md` exists (created by init split)
406
+ - Check if `.rlp-desk/plans/prd-<slug>-{us_id}.md` exists (created by init split)
384
407
  - If yes: in the assembled prompt text, replace the full PRD reference (`prd-<slug>.md`) with the per-US file path (`prd-<slug>-{us_id}.md`) — so Worker reads only the relevant US section
385
408
  - If no per-US file: fall back to full PRD (`prd-<slug>.md`) with no change needed
386
409
  - Note: this absolute-path substitution is permitted — only absolute→relative rewrites are forbidden.
387
410
  3. Prepend meta comment: `## WORKING_DIR: {absolute path}` — Worker must use this as its working directory.
388
411
  4. Append iteration number + memory contract.
389
- 5. Write to `.claude/ralph-desk/logs/<slug>/iter-NNN.worker-prompt.md` (audit trail).
412
+ 5. Write to `.rlp-desk/logs/<slug>/iter-NNN.worker-prompt.md` (audit trail).
390
413
  - Note: Worker ALWAYS records execution_steps in done-claim.json per governance §1f. No flag needed.
391
414
  - **Rewriting paths from absolute to relative WILL break worktree campaigns. Only additions (WORKING_DIR header, iteration context) are allowed.**
392
415
 
@@ -637,7 +660,7 @@ When `--consensus` is not `off`, also track in `status.json`:
637
660
  ---
638
661
 
639
662
  ## `status <slug>`
640
- Read `.claude/ralph-desk/logs/<slug>/runtime/status.json` and display a detailed report:
663
+ Read `.rlp-desk/logs/<slug>/runtime/status.json` and display a detailed report:
641
664
 
642
665
  ```
643
666
  Campaign: <slug>
@@ -660,22 +683,22 @@ Read the last `verify-verdict.json` to show the most recent verdict summary and
660
683
 
661
684
  ## `clean <slug> [--kill-session]`
662
685
  Remove:
663
- - `.claude/ralph-desk/memos/<slug>-complete.md`
664
- - `.claude/ralph-desk/memos/<slug>-blocked.md`
665
- - `.claude/ralph-desk/memos/<slug>-done-claim.json`
666
- - `.claude/ralph-desk/memos/<slug>-verify-verdict.json`
667
- - `.claude/ralph-desk/memos/<slug>-iter-signal.json`
668
- - `.claude/ralph-desk/logs/<slug>/circuit-breaker.json`
669
- - `.claude/ralph-desk/logs/<slug>/runtime/session-config.json`
670
- - `.claude/ralph-desk/logs/<slug>/runtime/worker-heartbeat.json`
671
- - `.claude/ralph-desk/logs/<slug>/runtime/verifier-heartbeat.json`
672
- - `.claude/ralph-desk/memos/<slug>-escalation.md`
686
+ - `.rlp-desk/memos/<slug>-complete.md`
687
+ - `.rlp-desk/memos/<slug>-blocked.md`
688
+ - `.rlp-desk/memos/<slug>-done-claim.json`
689
+ - `.rlp-desk/memos/<slug>-verify-verdict.json`
690
+ - `.rlp-desk/memos/<slug>-iter-signal.json`
691
+ - `.rlp-desk/logs/<slug>/circuit-breaker.json`
692
+ - `.rlp-desk/logs/<slug>/runtime/session-config.json`
693
+ - `.rlp-desk/logs/<slug>/runtime/worker-heartbeat.json`
694
+ - `.rlp-desk/logs/<slug>/runtime/verifier-heartbeat.json`
695
+ - `.rlp-desk/memos/<slug>-escalation.md`
673
696
  Note: `campaign-report.md`, `campaign-report-v{N}.md`, `iter-NNN-done-claim.json`, and `iter-NNN-verify-verdict.json` are intentionally preserved across clean for historical comparison. Analytics files (`debug.log`, `campaign.jsonl`, `self-verification-data.json`, `self-verification-report-NNN.md`) at `~/.claude/ralph-desk/analytics/<slug>/` are NOT affected by project-level clean.
674
697
 
675
698
  If `--kill-session` is passed, clean up Worker/Verifier tmux panes using session-config.json:
676
699
  ```bash
677
700
  # Read pane IDs from session-config.json (safe — targets only Worker/Verifier panes)
678
- SESSION_CONFIG=".claude/ralph-desk/logs/<slug>/runtime/session-config.json"
701
+ SESSION_CONFIG=".rlp-desk/logs/<slug>/runtime/session-config.json"
679
702
  if [ -f "$SESSION_CONFIG" ] && command -v jq &>/dev/null; then
680
703
  WORKER_PANE=$(jq -r '.panes.worker // empty' "$SESSION_CONFIG")
681
704
  VERIFIER_PANE=$(jq -r '.panes.verifier // empty' "$SESSION_CONFIG")
@@ -715,8 +738,8 @@ Data sources:
715
738
 
716
739
  Resume a previously interrupted campaign. Equivalent to `run <slug>` but explicitly restores state:
717
740
 
718
- 1. Read `.claude/ralph-desk/logs/<slug>/runtime/status.json` for `verified_us`, `iteration`, `consecutive_failures`
719
- 2. Read `.claude/ralph-desk/memos/<slug>-memory.md` for completed stories and next iteration contract
741
+ 1. Read `.rlp-desk/logs/<slug>/runtime/status.json` for `verified_us`, `iteration`, `consecutive_failures`
742
+ 2. Read `.rlp-desk/memos/<slug>-memory.md` for completed stories and next iteration contract
720
743
  3. Check for sentinels (`complete.md`, `blocked.md`) — if present, inform user and stop
721
744
  4. If no sentinels, invoke `run <slug>` with the same options from the previous session (stored in status.json fields: `worker_model`, `verifier_model`, `final_verifier_model`, `verify_mode`, `consensus_mode`)
722
745
  5. The runner automatically restores `verified_us` from memory or status.json on startup
package/src/governance.md CHANGED
@@ -297,13 +297,54 @@ BLOCKED writes a JSON sidecar (`<slug>-blocked.json`) alongside the markdown sen
297
297
  - English: `depends on US-`, `blocking US-`, `awaits US-`, `post-iter US-`, `requires US-N`, `cross-US`
298
298
  - Korean: `US-N 산출물`, `신규 US-`, `post-iter`
299
299
 
300
- **Write Order Contract (atomicity invariant)**:
301
- 1. JSON sidecar written FIRST (`fs.writeFile` / `atomic_write`).
302
- 2. markdown sentinel written SECOND.
303
- 3. Invariant: **markdown exists ⇒ JSON exists** (writer enforces order).
304
- 4. Wrappers SHOULD watch markdown sentinel, then read JSON sidecar. If JSON not yet visible (rare), retry up to 5 × 50ms before failing.
300
+ **Write Order Contract (atomicity invariant)** — v5.7 §4.24 reversed:
301
+ 1. **markdown sentinel written FIRST** via `writeSentinelExclusive` (`fs.open(path, 'wx')` O_EXCL first-writer-wins). The md acts as the race lock.
302
+ 2. **JSON sidecar written SECOND**, only by the winning writer.
303
+ 3. Invariant: **markdown exists ⇒ JSON exists** (winner writes both; losers see EEXIST and return without touching JSON, preserving the winner's content).
304
+ 4. Wrappers SHOULD watch markdown sentinel, then read JSON sidecar. If JSON not yet visible (rare ≤50ms), retry up to 5 × 50ms before failing.
305
305
 
306
- `atomic_write` provides per-file rename atomicity; cross-file ordering is enforced by the explicit two-call sequence.
306
+ `writeSentinelExclusive` (in `src/node/shared/fs.mjs`) provides per-file first-writer-wins; cross-file ordering is enforced by the explicit md-then-JSON sequence inside `writeSentinel`.
307
+
308
+ ## 1g. Sentinel Guarantee Invariant (file-guarantee contract)
309
+
310
+ **Every terminal exit of `runCampaign()` MUST leave exactly one sentinel on disk: `<slug>-blocked.md` XOR `<slug>-complete.md`.**
311
+
312
+ This invariant is the foundation of the fresh-context architecture. If a campaign exits without any sentinel, future iterations cannot determine campaign state — Worker/Verifier are dispatched into a campaign whose history they cannot reconstruct.
313
+
314
+ ### Enforcement (3-layer defense)
315
+
316
+ 1. **Per-poll-site sentinel write** (`_handlePollFailure` helper at `src/node/runner/campaign-main-loop.mjs`). Every `pollForSignal` call site (Worker, VerifierPerUS, VerifierFinal, Flywheel, Guard) is wrapped in `try { … } catch (error) { return _handlePollFailure(error, { role, … }); }`. The helper classifies via `BLOCK_TAGS` typed enum, calls `writeSentinel` (idempotent via O_EXCL), and returns `{status:'blocked', …}` so the caller exits the loop cleanly.
317
+
318
+ 2. **Run-level try/finally backstop** (`_ensureTerminalSentinel`). After the campaign body executes, a `finally` block checks `exists(blockedSentinel) XOR exists(completeSentinel)`. If neither (paused state `continue` excepted), writes a synthetic BLOCKED `infra_failure/leader_exited_without_terminal_state` so even unhandled exceptions cannot escape silently.
319
+
320
+ 3. **Schema validator at READ boundary** (`validateArtifact`). After every `pollForSignal` returns parsed JSON, validates `(slug, iteration ≥ floor, signal_type matches read context, us_id ∈ usList ∪ {ALL})`. Throws `MalformedArtifactError({field, expected, got})` → caught by same `_handlePollFailure` → BLOCKED `contract_violation/malformed_artifact` (recoverable).
321
+
322
+ ### Per-role failure-category enum
323
+
324
+ `_classifyBlock` (in `campaign-main-loop.mjs`) maps each `BLOCK_TAGS` value to one of the locked taxonomy categories:
325
+
326
+ | Tag | reason_category | recoverable | Example trigger |
327
+ |-----|----------------|-------------|-----------------|
328
+ | `WORKER_EXITED` | `infra_failure` | false | Worker pane returned to shell without writing signal |
329
+ | `VERIFIER_EXITED` | `infra_failure` | false | Per-US Verifier exited without writing verdict |
330
+ | `FINAL_VERIFIER_EXITED` | `infra_failure` | false | Final ALL-verifier exited without writing verdict |
331
+ | `FLYWHEEL_EXITED` | `infra_failure` | false | Flywheel pane crashed |
332
+ | `GUARD_EXITED` | `infra_failure` | false | Guard pane crashed |
333
+ | `PROMPT_BLOCKED` | `infra_failure` | false | Default-No prompt — auto-Enter would CANCEL |
334
+ | `<role>_TIMEOUT` | `infra_failure` | false | pollForSignal timed out without exit detected |
335
+ | `MALFORMED_ARTIFACT` | `contract_violation` | true | Worker/Verifier wrote schema-violating JSON |
336
+ | `LEADER_EXITED_WITHOUT_TERMINAL_STATE` | `infra_failure` | false | Backstop fired (uncaught exception or paths outside controlled scope) |
337
+
338
+ ### Auditing
339
+
340
+ Operators can verify the invariant for any campaign by running:
341
+
342
+ ```sh
343
+ zsh tests/sv-gate-fast.sh # 30s mechanical check (greps + units)
344
+ zsh tests/sv-gate-full.sh # 5min including REAL tmux + REAL campaign E2E
345
+ ```
346
+
347
+ The fast gate fails immediately if any pollForSignal call site lacks a `_handlePollFailure` wiring or the writeSentinelExclusive primitive is bypassed.
307
348
 
308
349
  ## 2. Roles
309
350
 
@@ -468,7 +509,7 @@ Characteristics:
468
509
 
469
510
  ### Project-local
470
511
  ```
471
- .claude/ralph-desk/
512
+ .rlp-desk/
472
513
  ├── prompts/
473
514
  │ ├── <slug>.worker.prompt.md # Worker base prompt (regenerated on re-execution)
474
515
  │ └── <slug>.verifier.prompt.md # Verifier base prompt (regenerated on re-execution)
@@ -553,6 +594,14 @@ for iteration in 1..max_iter:
553
594
  • fail + retries exhausted → BLOCKED
554
595
  • inconclusive → BLOCKED (escalate to user)
555
596
  - Guard count tracked per-US in status.json
597
+ - **Mode support (v0.12.0+, v5.7 §4.3)**: flywheel runs identically in
598
+ --mode agent and --mode tmux when routed through the Node leader
599
+ (`node ~/.claude/ralph-desk/node/run.mjs run --mode tmux`). The legacy
600
+ `run_ralph_desk.zsh` runner rejects --flywheel/--flywheel-guard with
601
+ exit 2 + migration banner; users must use the Node entry. Same applies
602
+ to --with-self-verification: SV report generation is supported in
603
+ tmux mode via the Node leader's generateSVReport() (no longer
604
+ agent-mode-only).
556
605
 
557
606
  ⑦ Execute Verifier (see §7a for per-US and §7b for consensus details)
558
607
  - Build prompt (scoped to us_id if per-us mode) → log
@@ -0,0 +1,15 @@
1
+ cli/command-builder.mjs
2
+ constants.mjs
3
+ init/campaign-initializer.mjs
4
+ polling/signal-poller.mjs
5
+ prompts/prompt-assembler.mjs
6
+ reporting/campaign-reporting.mjs
7
+ run.mjs
8
+ runner/campaign-main-loop.mjs
9
+ runner/leader-registry.mjs
10
+ runner/prompt-dismisser.mjs
11
+ shared/fs.mjs
12
+ shared/paths.mjs
13
+ tmux/pane-manager.mjs
14
+ util/debug-log.mjs
15
+ util/shell-quote.mjs
@@ -1,7 +1,28 @@
1
+ import { shellQuote } from '../util/shell-quote.mjs';
2
+ import { OPUS_1M_BETA, isOpusModel } from '../constants.mjs';
3
+
1
4
  const CLAUDE_BIN = 'claude';
2
5
  const CODEX_BIN = 'codex';
3
6
  const CLAUDE_MODELS = new Set(['haiku', 'sonnet', 'opus']);
4
7
 
8
+ // v0.13.0: surface engine classification for tmux+claude warning + observability.
9
+ export function isClaudeEngine(modelFlag) {
10
+ if (typeof modelFlag !== 'string' || modelFlag.length === 0) {
11
+ return false;
12
+ }
13
+
14
+ const head = modelFlag.split(':', 1)[0];
15
+ if (!head) {
16
+ return false;
17
+ }
18
+
19
+ if (CLAUDE_MODELS.has(head)) {
20
+ return true;
21
+ }
22
+
23
+ return head.startsWith('claude-');
24
+ }
25
+
5
26
  function assertTuiMode(mode, builderName) {
6
27
  if (mode !== 'tui') {
7
28
  throw new Error(`${builderName} unknown mode '${mode}'`);
@@ -11,19 +32,36 @@ function assertTuiMode(mode, builderName) {
11
32
  export function buildClaudeCmd(mode, model, options = {}) {
12
33
  assertTuiMode(mode, 'buildClaudeCmd');
13
34
 
14
- const parts = [
15
- 'DISABLE_OMC=1',
35
+ // v5.7 §4.9: auto-enable 1M-token context for Opus models. Long campaigns
36
+ // no longer silently truncate at 200K. Header is benign for non-Opus calls
37
+ // but we omit it there to keep the cmdline tidy.
38
+ const parts = ['DISABLE_OMC=1'];
39
+ if (isOpusModel(model)) {
40
+ parts.push(`ANTHROPIC_BETA=${shellQuote(OPUS_1M_BETA)}`);
41
+ }
42
+ parts.push(
16
43
  CLAUDE_BIN,
17
44
  '--model',
18
- model,
45
+ shellQuote(model),
19
46
  '--mcp-config',
20
47
  '\'{"mcpServers":{}}\'',
21
48
  '--strict-mcp-config',
22
49
  '--dangerously-skip-permissions',
23
- ];
50
+ );
51
+
52
+ // v5.7 §4.11.a: explicit --add-dir whitelist. With --dangerously-skip-permissions
53
+ // alone, claude CLI still surfaces TUI prompts for cwd-adjacent paths in some
54
+ // versions. Add the home rlp-desk tree (where Leader writes registry.jsonl
55
+ // and reads governance docs) plus the campaign cwd, so Worker has full
56
+ // authorized access without prompts.
57
+ if (options.addDirs && Array.isArray(options.addDirs)) {
58
+ for (const dir of options.addDirs) {
59
+ if (dir) parts.push('--add-dir', shellQuote(dir));
60
+ }
61
+ }
24
62
 
25
63
  if (options.effort !== undefined && options.effort !== '') {
26
- parts.push('--effort', options.effort);
64
+ parts.push('--effort', shellQuote(options.effort));
27
65
  }
28
66
 
29
67
  return parts.join(' ');
@@ -0,0 +1,19 @@
1
+ // Shared runtime constants. Single-source for cross-module values.
2
+
3
+ // Anthropic Claude API beta header that activates the 1M-token context window
4
+ // for Opus models. Auto-prepended to every claude CLI invocation that uses
5
+ // --model opus so long campaigns no longer silently truncate at 200K.
6
+ //
7
+ // Docs: https://docs.anthropic.com/en/docs/build-with-claude/context-windows
8
+ // (search "1M context") — header rotates with each beta phase.
9
+ export const OPUS_1M_BETA = 'context-1m-2025-08-07';
10
+
11
+ // Model id that triggers Opus 1M auto-enable. Plain string match against the
12
+ // --model value (post-shellQuote stripping). Bracketed form
13
+ // 'claude-opus-4-7[1m]' is also Opus and benefits from this; pattern match
14
+ // covers both.
15
+ export function isOpusModel(model) {
16
+ if (!model) return false;
17
+ const m = String(model).toLowerCase();
18
+ return m === 'opus' || m.startsWith('claude-opus-');
19
+ }
@@ -1,8 +1,79 @@
1
1
  import fs from 'node:fs/promises';
2
+ import fsSync from 'node:fs';
2
3
  import path from 'node:path';
3
4
 
5
+ import { LEGACY_DESK_REL, resolveDeskRoot } from '../util/desk-root.mjs';
6
+
4
7
  const GITIGNORE_MARKER = '# RLP Desk runtime artifacts';
5
- const GITIGNORE_RULE = '.claude/ralph-desk/';
8
+ const GITIGNORE_RULE = '.rlp-desk/';
9
+ const LEGACY_GITIGNORE_RULE = '.claude/ralph-desk/';
10
+ const MIGRATION_LOCK_FILE = '.rlp-desk-migration.lock';
11
+ const STALE_LOCK_MS = 5 * 60 * 1000;
12
+
13
+ export function migrateLegacyDesk(rootDir, env = process.env) {
14
+ const legacyPath = path.join(rootDir, LEGACY_DESK_REL);
15
+ const newPath = resolveDeskRoot(rootDir, env);
16
+ const lockPath = path.join(rootDir, MIGRATION_LOCK_FILE);
17
+
18
+ // Pre-lock cheap check: skip the lock entirely when there is nothing to do.
19
+ // Re-check the same conditions inside the lock — a competing process may
20
+ // have moved or created files between this check and the lock acquisition.
21
+ if (!fsSync.existsSync(legacyPath)) {
22
+ return { action: 'noop', reason: fsSync.existsSync(newPath) ? 'new-only' : 'neither-exists' };
23
+ }
24
+
25
+ let lockFd;
26
+ try {
27
+ lockFd = fsSync.openSync(lockPath, 'wx');
28
+ } catch (error) {
29
+ if (error.code === 'EEXIST') {
30
+ try {
31
+ const stats = fsSync.statSync(lockPath);
32
+ const age = Date.now() - stats.mtimeMs;
33
+ if (age > STALE_LOCK_MS) {
34
+ fsSync.unlinkSync(lockPath);
35
+ lockFd = fsSync.openSync(lockPath, 'wx');
36
+ } else {
37
+ throw new Error(`Migration already in progress (lock at ${lockPath}, age ${Math.round(age / 1000)}s)`);
38
+ }
39
+ } catch (statError) {
40
+ if (statError.code === 'ENOENT') {
41
+ lockFd = fsSync.openSync(lockPath, 'wx');
42
+ } else {
43
+ throw statError;
44
+ }
45
+ }
46
+ } else {
47
+ throw error;
48
+ }
49
+ }
50
+
51
+ try {
52
+ fsSync.writeSync(lockFd, String(process.pid));
53
+
54
+ // Re-check inside the lock — another process may have already migrated
55
+ // while we were waiting for the lock.
56
+ const legacyExistsLocked = fsSync.existsSync(legacyPath);
57
+ const newExistsLocked = fsSync.existsSync(newPath);
58
+
59
+ if (!legacyExistsLocked) {
60
+ return { action: 'noop', reason: newExistsLocked ? 'new-only' : 'neither-exists' };
61
+ }
62
+
63
+ if (newExistsLocked) {
64
+ throw new Error(
65
+ `Migration aborted: both directories exist. Remove one before re-run. legacy=${legacyPath}, new=${newPath}`,
66
+ );
67
+ }
68
+
69
+ fsSync.mkdirSync(path.dirname(newPath), { recursive: true });
70
+ fsSync.renameSync(legacyPath, newPath);
71
+ return { action: 'migrated', from: legacyPath, to: newPath };
72
+ } finally {
73
+ try { fsSync.closeSync(lockFd); } catch (_) { /* noop */ }
74
+ try { fsSync.unlinkSync(lockPath); } catch (_) { /* noop */ }
75
+ }
76
+ }
6
77
 
7
78
  export async function initCampaign(slug, objective, options = {}) {
8
79
  const normalizedSlug = normalizeSlug(slug);
@@ -10,17 +81,21 @@ export async function initCampaign(slug, objective, options = {}) {
10
81
  const mode = options.mode ?? 'agent';
11
82
  const rootDir = path.resolve(options.rootDir ?? process.cwd());
12
83
  const tmuxEnv = options.tmuxEnv ?? process.env.TMUX ?? '';
13
- const deskRoot = path.join(rootDir, '.claude', 'ralph-desk');
84
+ const env = options.env ?? process.env;
14
85
 
15
86
  if (mode === 'tmux' && !tmuxEnv) {
16
87
  throw new Error('tmux required');
17
88
  }
18
89
 
90
+ migrateLegacyDesk(rootDir, env);
91
+
92
+ const deskRoot = resolveDeskRoot(rootDir, env);
93
+
19
94
  if (mode === 'fresh') {
20
95
  await fs.rm(deskRoot, { recursive: true, force: true });
21
96
  }
22
97
 
23
- const paths = buildPaths(rootDir, normalizedSlug);
98
+ const paths = buildPaths(rootDir, normalizedSlug, env);
24
99
  await ensureDirectories(paths);
25
100
  await ensureGitignore(rootDir);
26
101
 
@@ -55,8 +130,8 @@ function normalizeSlug(value) {
55
130
  return slug;
56
131
  }
57
132
 
58
- function buildPaths(rootDir, slug) {
59
- const deskRoot = path.join(rootDir, '.claude', 'ralph-desk');
133
+ function buildPaths(rootDir, slug, env = process.env) {
134
+ const deskRoot = resolveDeskRoot(rootDir, env);
60
135
  const promptsDir = path.join(deskRoot, 'prompts');
61
136
  const plansDir = path.join(deskRoot, 'plans');
62
137
  const memosDir = path.join(deskRoot, 'memos');
@@ -105,13 +180,28 @@ async function ensureGitignore(rootDir) {
105
180
  }
106
181
  }
107
182
 
108
- if (content.includes(GITIGNORE_MARKER) && content.includes(GITIGNORE_RULE)) {
109
- return;
183
+ let updated = content;
184
+ let changed = false;
185
+
186
+ // v0.13.0: drop the legacy .claude/ralph-desk/ rule if present.
187
+ if (updated.includes(LEGACY_GITIGNORE_RULE)) {
188
+ const legacyLineRegex = new RegExp(
189
+ `^${LEGACY_GITIGNORE_RULE.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\r?\\n`,
190
+ 'gm',
191
+ );
192
+ updated = updated.replace(legacyLineRegex, '');
193
+ changed = true;
110
194
  }
111
195
 
112
- const prefix = content.length > 0 && !content.endsWith('\n') ? '\n' : '';
113
- const block = `${prefix}${GITIGNORE_MARKER}\n${GITIGNORE_RULE}\n`;
114
- await fs.writeFile(gitignorePath, `${content}${block}`, 'utf8');
196
+ if (!(updated.includes(GITIGNORE_MARKER) && updated.includes(GITIGNORE_RULE))) {
197
+ const prefix = updated.length > 0 && !updated.endsWith('\n') ? '\n' : '';
198
+ updated = `${updated}${prefix}${GITIGNORE_MARKER}\n${GITIGNORE_RULE}\n`;
199
+ changed = true;
200
+ }
201
+
202
+ if (changed) {
203
+ await fs.writeFile(gitignorePath, updated, 'utf8');
204
+ }
115
205
  }
116
206
 
117
207
  async function writeIfMissing(targetPath, content) {