npm - @ai-dev-methodologies/rlp-desk - Versions diffs - 0.11.1 → 0.13.0 - Mend

@ai-dev-methodologies/rlp-desk 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

package/src/commands/rlp-desk.md CHANGED Viewed

@@ -135,7 +135,7 @@ After all items are confirmed:
    Present the score table to the user before proceeding.
 2. Present the full contract summary.
 3. **Self-Verification** — Ask: "Enable self-verification? Worker records step-by-step evidence, Verifier cross-validates process. Recommended for MEDIUM+ risk." Default: yes for HIGH/CRITICAL, no for LOW/MEDIUM.
-4. **Re-execution check**: After slug is confirmed, check if `.claude/ralph-desk/plans/prd-<slug>.md` already exists. If a PRD already exists for this slug, ask: "A PRD already exists for this slug. Improve the existing PRD or start fresh (delete and recreate)?"
+4. **Re-execution check**: After slug is confirmed, check if `.rlp-desk/plans/prd-<slug>.md` already exists. If a PRD already exists for this slug, ask: "A PRD already exists for this slug. Improve the existing PRD or start fresh (delete and recreate)?"
    - "improve" → pass `--mode improve` to init
    - "start fresh" → pass `--mode fresh` to init
    - If no PRD exists: standard first-run (no --mode needed)
@@ -280,40 +280,51 @@ Parse the `--mode` flag. If absent or `agent`, use the Agent() path below. If `t
 #### Tmux Mode (`--mode tmux`)
-When `--mode tmux` is specified:
+When `--mode tmux` is specified (v0.12.0+ — v5.7 §4.1 routes to Node leader for flywheel + SV support):
-1. **Validate scaffold** — same as Agent() mode: check `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
+1. **Validate scaffold** — same as Agent() mode: check `.rlp-desk/prompts/<slug>.worker.prompt.md` etc.
 2. **Check sentinels** — same as Agent() mode.
-3. **Check prerequisites** — verify `tmux` and `jq` are installed. If not, report what is missing and stop.
-4. **Locate runner script** — find `run_ralph_desk.zsh` at `~/.claude/ralph-desk/run_ralph_desk.zsh`. If not found, tell the user to reinstall (`npm install` or `install.sh`).
-5. **Launch** — shell out to the runner script with env vars derived from flags:
+3. **Check prerequisites** — verify `tmux`, `jq`, and `node` (>= 16) are installed. If not, report what is missing and stop.
+4. **Locate Node leader** — find `~/.claude/ralph-desk/node/run.mjs`. If not found, tell the user to reinstall (`npm install` or `bash install.sh`).
+5. **Launch** — shell out to the Node leader. **All dynamic args (slug + model values) MUST be passed through shell single-quote escaping** (v5.7 §4.12 G11) so bracketed model ids like `claude-opus-4-7[1m]` survive zsh parsing:
 ```bash
-LOOP_NAME="<slug>" \
-ROOT="$PWD" \
-MAX_ITER=<--max-iter value> \
-WORKER_MODEL=<--worker-model value> \
-LOCK_WORKER_MODEL=<1 if --lock-worker-model, else 0> \
-VERIFIER_MODEL=<--verifier-model value, default: sonnet> \
-FINAL_VERIFIER_MODEL=<--final-verifier-model value, default: opus> \
-VERIFY_MODE=<--verify-mode value, default: per-us> \
-CONSENSUS_MODE=<--consensus value, default: off> \
-CONSENSUS_MODEL=<--consensus-model value, default: gpt-5.5:medium> \
-FINAL_CONSENSUS_MODEL=<--final-consensus-model value, default: gpt-5.5:high> \
-CB_THRESHOLD=<--cb-threshold value, default: 6> \
-ITER_TIMEOUT=<--iter-timeout value, default: 600> \
-DEBUG=<1 if --debug, else 0> \
-WITH_SELF_VERIFICATION=<1 if --with-self-verification, else 0> \
-  zsh ~/.claude/ralph-desk/run_ralph_desk.zsh
+node ~/.claude/ralph-desk/node/run.mjs run '<slug>' \
+  --mode tmux \
+  --max-iter <N> \
+  --worker-model '<value>' \
+  [--lock-worker-model] \
+  --verifier-model '<value>' \
+  --final-verifier-model '<value>' \
+  --consensus <off|all|final-only> \
+  --consensus-model '<value>' \
+  --final-consensus-model '<value>' \
+  --verify-mode <per-us|batch> \
+  --cb-threshold <N> \
+  --iter-timeout <N> \
+  [--debug] [--autonomous] \
+  [--lane-strict]              # was env LANE_MODE=strict \
+  [--test-density-strict]      # was env TEST_DENSITY_MODE=strict \
+  [--with-self-verification] \
+  [--flywheel on-fail --flywheel-model '<value>'] \
+  [--flywheel-guard on --flywheel-guard-model '<value>']
 ```
-6. **If the script exits with error (exit code 1)** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch the script in a different way. Just tell the user what went wrong and suggest using Agent mode instead.
-7. **If successful** — tell the user the tmux session has been started. The shell script takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
+**Quoting contract (v5.7 §4.1)**: every `'<value>'` placeholder above must be replaced with the user's flag value wrapped in single quotes via the equivalent of `shellQuote(value)` — `"'" + value.replace(/'/g, "'\\''") + "'"` for POSIX correctness. The slug, all model values, and any future dynamic flag must follow this rule. A slug or model containing brackets / spaces / single quotes / dollar signs / backticks must NOT break the leader invocation.
+**Env-var translation (v5.7 §4.1)**: the slash command historically built `LANE_MODE=strict zsh ...` and `TEST_DENSITY_MODE=strict zsh ...` from CLI flags. The Node leader uses CLI flags instead — translate `--lane-strict` and `--test-density-strict` into the corresponding flags. Direct env-var users (running zsh directly) are unaffected.
+6. **If the Node leader exits with error** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch in a different way. Tell the user what went wrong and suggest `--mode agent` as alternative.
+7. **If successful** — tell the user the tmux session has been started. The Node leader takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
 **IMPORTANT RULES:**
-- Tmux mode requires the user to already be inside a tmux session. If the runner script rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
-- MUST launch the runner with `run_in_background: true` so `/rlp-desk` returns control immediately while preserving live tmux visibility.
+- Tmux mode requires the user to already be inside a tmux session. If the leader rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
+- MUST launch with `run_in_background: true` so `/rlp-desk` returns control immediately while preserving live tmux visibility.
 - Run-in-background is used so the shell can keep the command visible and keep the pane layout stable for status checks and completion flow.
 - Do NOT kill panes after completion. Panes stay alive for inspection. User cleans up with `/rlp-desk clean <slug> --kill-session`.
-- `--with-self-verification` is accepted in tmux mode. After campaign completion, `run_ralph_desk.zsh` spawns `claude CLI` to generate the SV report from campaign artifacts (done-claims, verify-verdicts, campaign-report). SV reports are written to `~/.claude/ralph-desk/analytics/<slug>/`. Requires `claude` CLI available in PATH; if not found, an error is appended to the campaign report.
+- `--with-self-verification` is fully supported in tmux mode (v5.7 §4.7). The Node leader's `generateSVReport()` writes `self-verification-report.md` + `self-verification-data.json` under `<project>/.claude/ralph-desk/analytics/<slug>/` (project-local, v5.7 §4.11.b).
+- `--flywheel on-fail` and `--flywheel-guard on` are fully supported in tmux mode (v5.7 §4.1). The Node leader handles pane creation, sendKeys dispatch, signal polling, and Guard retry semantics identically to agent mode.
+- Legacy `zsh ~/.claude/ralph-desk/run_ralph_desk.zsh` (deprecated in 0.12.0) still runs for non-flywheel/non-SV invocations but emits a deprecation `[notice]`. Calling it with `FLYWHEEL` or `WITH_SELF_VERIFICATION` env vars exits 2 with a migration banner pointing to the Node leader.
 **tmux UX model (5 items):**
 - The session returns immediately after launch (`run_in_background: true`) so the command returns control to the parent CLI.
@@ -324,12 +335,24 @@ WITH_SELF_VERIFICATION=<1 if --with-self-verification, else 0> \
 #### Agent Mode (`--mode agent` or default)
+**Why Agent mode is structurally immune to Bug 4/5 (mid-execution prompt hang
+& A4 premature dispatch):** Worker/Verifier are dispatched as `Agent(...,
+mode="bypassPermissions", ...)`. The subagent runs non-interactively under
+the platform's bypass — it has no tmux pane, no TUI surface, and cannot
+surface a `[y/N]` prompt to the parent Leader. The auto-dismiss /
+prompt-stall / no-progress timeouts in `run_ralph_desk.zsh` (v5.7 §4.13.b /
+§4.16 / §4.17) are therefore tmux-only by design. **Tradeoff**: because
+`Agent()` has no timeout API, agent-mode iterations are not bounded — if
+the platform's `bypassPermissions` ever fails to suppress an interactive
+prompt at the SDK level, the call hangs indefinitely with no rlp-desk-side
+watchdog. Use `--mode tmux` if you need bounded execution time.
 ### Preparation
-1. Validate scaffold: `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
+1. Validate scaffold: `.rlp-desk/prompts/<slug>.worker.prompt.md` etc.
 2. **Codex CLI pre-validation**: If `--consensus` is not `off` OR `--worker-model` uses codex format (contains `:`) OR `--verifier-model` / `--final-verifier-model` / `--consensus-model` / `--final-consensus-model` uses codex format, check that `codex` CLI exists in PATH. If codex CLI not found → STOP immediately, print install instructions (`npm install -g @openai/codex`), do not start the loop.
 3. Check sentinels (complete/blocked). Found → tell user `/rlp-desk clean <slug>`.
 4. Clean previous `done-claim.json`, `verify-verdict.json`.
-5. **Always**: write baseline log entry to `.claude/ralph-desk/logs/<slug>/baseline.log`: `[timestamp] iter=0 phase=start slug=<slug> worker_model=<model> verifier_model=<model>`. Baseline.log captures 1 line per iteration for lightweight post-mortem (always-on, no flag needed).
+5. **Always**: write baseline log entry to `.rlp-desk/logs/<slug>/baseline.log`: `[timestamp] iter=0 phase=start slug=<slug> worker_model=<model> verifier_model=<model>`. Baseline.log captures 1 line per iteration for lightweight post-mortem (always-on, no flag needed).
 6. If `--debug`: also create/clear `~/.claude/ralph-desk/analytics/<slug>/debug.log`. Define a helper: to "debug_log" means append a timestamped line to this file via `Bash("echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $msg\" >> ~/.claude/ralph-desk/analytics/<slug>/debug.log")`. When `--debug` is active, debug.log contains all baseline.log fields plus detailed phase logs.
    - **4-category log system**: all debug_log entries use exactly one of: `[GOV]` (governance checks: IL enforcement, CB triggers, scope lock, verdict evaluation), `[DECIDE]` (leader decisions: model selection, fix contracts, escalation), `[OPTION]` (configuration snapshot at loop start: thresholds, modes, models), `[FLOW]` (execution progress: worker/verifier dispatch, signal reads, phase transitions)
    - **Re-execution versioning**: If `debug.log` already exists at `--debug` start, rename it to `debug-v{N}.log` (N = next available integer ≥ 1) before creating a fresh `debug.log`.
@@ -355,14 +378,14 @@ For each iteration (1 to max_iter):
 **① Check sentinels**
 ```bash
-test -f .claude/ralph-desk/memos/<slug>-complete.md  # → done
-test -f .claude/ralph-desk/memos/<slug>-blocked.md   # → stop
+test -f .rlp-desk/memos/<slug>-complete.md  # → done
+test -f .rlp-desk/memos/<slug>-blocked.md   # → stop
 ```
 **①½ Prep-stage cleanup**
 ```bash
-rm -f .claude/ralph-desk/memos/<slug>-done-claim.json
-rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
+rm -f .rlp-desk/memos/<slug>-done-claim.json
+rm -f .rlp-desk/memos/<slug>-verify-verdict.json
 ```
 **② Read memory.md** → Stop Status, Next Iteration Contract
@@ -378,15 +401,15 @@ rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
 **④ Build worker prompt (Prompt Assembly Protocol)**
 1. Capture `WORKING_DIR` once: use `$PWD` from when `/rlp-desk run` was invoked. Store for all prompt construction.
-2. Read `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` — use its content **verbatim**. Do NOT rewrite, paraphrase, or regenerate paths. The prompt file contains correct absolute paths from init.
+2. Read `.rlp-desk/prompts/<slug>.worker.prompt.md` — use its content **verbatim**. Do NOT rewrite, paraphrase, or regenerate paths. The prompt file contains correct absolute paths from init.
 2a. **Per-US PRD injection** (when targeting a specific `us_id`, not "ALL"):
-   - Check if `.claude/ralph-desk/plans/prd-<slug>-{us_id}.md` exists (created by init split)
+   - Check if `.rlp-desk/plans/prd-<slug>-{us_id}.md` exists (created by init split)
    - If yes: in the assembled prompt text, replace the full PRD reference (`prd-<slug>.md`) with the per-US file path (`prd-<slug>-{us_id}.md`) — so Worker reads only the relevant US section
    - If no per-US file: fall back to full PRD (`prd-<slug>.md`) with no change needed
    - Note: this absolute-path substitution is permitted — only absolute→relative rewrites are forbidden.
 3. Prepend meta comment: `## WORKING_DIR: {absolute path}` — Worker must use this as its working directory.
 4. Append iteration number + memory contract.
-5. Write to `.claude/ralph-desk/logs/<slug>/iter-NNN.worker-prompt.md` (audit trail).
+5. Write to `.rlp-desk/logs/<slug>/iter-NNN.worker-prompt.md` (audit trail).
 - Note: Worker ALWAYS records execution_steps in done-claim.json per governance §1f. No flag needed.
 - **Rewriting paths from absolute to relative WILL break worktree campaigns. Only additions (WORKING_DIR header, iteration context) are allowed.**
@@ -637,7 +660,7 @@ When `--consensus` is not `off`, also track in `status.json`:
 ---
 ## `status <slug>`
-Read `.claude/ralph-desk/logs/<slug>/runtime/status.json` and display a detailed report:
+Read `.rlp-desk/logs/<slug>/runtime/status.json` and display a detailed report:
 ```
 Campaign: <slug>
@@ -660,22 +683,22 @@ Read the last `verify-verdict.json` to show the most recent verdict summary and
 ## `clean <slug> [--kill-session]`
 Remove:
-- `.claude/ralph-desk/memos/<slug>-complete.md`
-- `.claude/ralph-desk/memos/<slug>-blocked.md`
-- `.claude/ralph-desk/memos/<slug>-done-claim.json`
-- `.claude/ralph-desk/memos/<slug>-verify-verdict.json`
-- `.claude/ralph-desk/memos/<slug>-iter-signal.json`
-- `.claude/ralph-desk/logs/<slug>/circuit-breaker.json`
-- `.claude/ralph-desk/logs/<slug>/runtime/session-config.json`
-- `.claude/ralph-desk/logs/<slug>/runtime/worker-heartbeat.json`
-- `.claude/ralph-desk/logs/<slug>/runtime/verifier-heartbeat.json`
-- `.claude/ralph-desk/memos/<slug>-escalation.md`
+- `.rlp-desk/memos/<slug>-complete.md`
+- `.rlp-desk/memos/<slug>-blocked.md`
+- `.rlp-desk/memos/<slug>-done-claim.json`
+- `.rlp-desk/memos/<slug>-verify-verdict.json`
+- `.rlp-desk/memos/<slug>-iter-signal.json`
+- `.rlp-desk/logs/<slug>/circuit-breaker.json`
+- `.rlp-desk/logs/<slug>/runtime/session-config.json`
+- `.rlp-desk/logs/<slug>/runtime/worker-heartbeat.json`
+- `.rlp-desk/logs/<slug>/runtime/verifier-heartbeat.json`
+- `.rlp-desk/memos/<slug>-escalation.md`
 Note: `campaign-report.md`, `campaign-report-v{N}.md`, `iter-NNN-done-claim.json`, and `iter-NNN-verify-verdict.json` are intentionally preserved across clean for historical comparison. Analytics files (`debug.log`, `campaign.jsonl`, `self-verification-data.json`, `self-verification-report-NNN.md`) at `~/.claude/ralph-desk/analytics/<slug>/` are NOT affected by project-level clean.
 If `--kill-session` is passed, clean up Worker/Verifier tmux panes using session-config.json:
 ```bash
 # Read pane IDs from session-config.json (safe — targets only Worker/Verifier panes)
-SESSION_CONFIG=".claude/ralph-desk/logs/<slug>/runtime/session-config.json"
+SESSION_CONFIG=".rlp-desk/logs/<slug>/runtime/session-config.json"
 if [ -f "$SESSION_CONFIG" ] && command -v jq &>/dev/null; then
   WORKER_PANE=$(jq -r '.panes.worker // empty' "$SESSION_CONFIG")
   VERIFIER_PANE=$(jq -r '.panes.verifier // empty' "$SESSION_CONFIG")
@@ -715,8 +738,8 @@ Data sources:
 Resume a previously interrupted campaign. Equivalent to `run <slug>` but explicitly restores state:
-1. Read `.claude/ralph-desk/logs/<slug>/runtime/status.json` for `verified_us`, `iteration`, `consecutive_failures`
-2. Read `.claude/ralph-desk/memos/<slug>-memory.md` for completed stories and next iteration contract
+1. Read `.rlp-desk/logs/<slug>/runtime/status.json` for `verified_us`, `iteration`, `consecutive_failures`
+2. Read `.rlp-desk/memos/<slug>-memory.md` for completed stories and next iteration contract
 3. Check for sentinels (`complete.md`, `blocked.md`) — if present, inform user and stop
 4. If no sentinels, invoke `run <slug>` with the same options from the previous session (stored in status.json fields: `worker_model`, `verifier_model`, `final_verifier_model`, `verify_mode`, `consensus_mode`)
 5. The runner automatically restores `verified_us` from memory or status.json on startup

package/src/governance.md CHANGED Viewed

@@ -297,13 +297,54 @@ BLOCKED writes a JSON sidecar (`<slug>-blocked.json`) alongside the markdown sen
 - English: `depends on US-`, `blocking US-`, `awaits US-`, `post-iter US-`, `requires US-N`, `cross-US`
 - Korean: `US-N 산출물`, `신규 US-`, `post-iter`
-**Write Order Contract (atomicity invariant)**:
-1. JSON sidecar written FIRST (`fs.writeFile` / `atomic_write`).
-2. markdown sentinel written SECOND.
-3. Invariant: **markdown exists ⇒ JSON exists** (writer enforces order).
-4. Wrappers SHOULD watch markdown sentinel, then read JSON sidecar. If JSON not yet visible (rare), retry up to 5 × 50ms before failing.
+**Write Order Contract (atomicity invariant)** — v5.7 §4.24 reversed:
+1. **markdown sentinel written FIRST** via `writeSentinelExclusive` (`fs.open(path, 'wx')` — O_EXCL first-writer-wins). The md acts as the race lock.
+2. **JSON sidecar written SECOND**, only by the winning writer.
+3. Invariant: **markdown exists ⇒ JSON exists** (winner writes both; losers see EEXIST and return without touching JSON, preserving the winner's content).
+4. Wrappers SHOULD watch markdown sentinel, then read JSON sidecar. If JSON not yet visible (rare ≤50ms), retry up to 5 × 50ms before failing.
-`atomic_write` provides per-file rename atomicity; cross-file ordering is enforced by the explicit two-call sequence.
+`writeSentinelExclusive` (in `src/node/shared/fs.mjs`) provides per-file first-writer-wins; cross-file ordering is enforced by the explicit md-then-JSON sequence inside `writeSentinel`.
+## 1g. Sentinel Guarantee Invariant (file-guarantee contract)
+**Every terminal exit of `runCampaign()` MUST leave exactly one sentinel on disk: `<slug>-blocked.md` XOR `<slug>-complete.md`.**
+This invariant is the foundation of the fresh-context architecture. If a campaign exits without any sentinel, future iterations cannot determine campaign state — Worker/Verifier are dispatched into a campaign whose history they cannot reconstruct.
+### Enforcement (3-layer defense)
+1. **Per-poll-site sentinel write** (`_handlePollFailure` helper at `src/node/runner/campaign-main-loop.mjs`). Every `pollForSignal` call site (Worker, VerifierPerUS, VerifierFinal, Flywheel, Guard) is wrapped in `try { … } catch (error) { return _handlePollFailure(error, { role, … }); }`. The helper classifies via `BLOCK_TAGS` typed enum, calls `writeSentinel` (idempotent via O_EXCL), and returns `{status:'blocked', …}` so the caller exits the loop cleanly.
+2. **Run-level try/finally backstop** (`_ensureTerminalSentinel`). After the campaign body executes, a `finally` block checks `exists(blockedSentinel) XOR exists(completeSentinel)`. If neither (paused state `continue` excepted), writes a synthetic BLOCKED `infra_failure/leader_exited_without_terminal_state` so even unhandled exceptions cannot escape silently.
+3. **Schema validator at READ boundary** (`validateArtifact`). After every `pollForSignal` returns parsed JSON, validates `(slug, iteration ≥ floor, signal_type matches read context, us_id ∈ usList ∪ {ALL})`. Throws `MalformedArtifactError({field, expected, got})` → caught by same `_handlePollFailure` → BLOCKED `contract_violation/malformed_artifact` (recoverable).
+### Per-role failure-category enum
+`_classifyBlock` (in `campaign-main-loop.mjs`) maps each `BLOCK_TAGS` value to one of the locked taxonomy categories:
+| Tag | reason_category | recoverable | Example trigger |
+|-----|----------------|-------------|-----------------|
+| `WORKER_EXITED` | `infra_failure` | false | Worker pane returned to shell without writing signal |
+| `VERIFIER_EXITED` | `infra_failure` | false | Per-US Verifier exited without writing verdict |
+| `FINAL_VERIFIER_EXITED` | `infra_failure` | false | Final ALL-verifier exited without writing verdict |
+| `FLYWHEEL_EXITED` | `infra_failure` | false | Flywheel pane crashed |
+| `GUARD_EXITED` | `infra_failure` | false | Guard pane crashed |
+| `PROMPT_BLOCKED` | `infra_failure` | false | Default-No prompt — auto-Enter would CANCEL |
+| `<role>_TIMEOUT` | `infra_failure` | false | pollForSignal timed out without exit detected |
+| `MALFORMED_ARTIFACT` | `contract_violation` | true | Worker/Verifier wrote schema-violating JSON |
+| `LEADER_EXITED_WITHOUT_TERMINAL_STATE` | `infra_failure` | false | Backstop fired (uncaught exception or paths outside controlled scope) |
+### Auditing
+Operators can verify the invariant for any campaign by running:
+```sh
+zsh tests/sv-gate-fast.sh   # 30s mechanical check (greps + units)
+zsh tests/sv-gate-full.sh   # 5min including REAL tmux + REAL campaign E2E
+```
+The fast gate fails immediately if any pollForSignal call site lacks a `_handlePollFailure` wiring or the writeSentinelExclusive primitive is bypassed.
 ## 2. Roles
@@ -468,7 +509,7 @@ Characteristics:
 ### Project-local
 ```
-.claude/ralph-desk/
+.rlp-desk/
 ├── prompts/
 │   ├── <slug>.worker.prompt.md      # Worker base prompt (regenerated on re-execution)
 │   └── <slug>.verifier.prompt.md    # Verifier base prompt (regenerated on re-execution)
@@ -553,6 +594,14 @@ for iteration in 1..max_iter:
          • fail + retries exhausted → BLOCKED
          • inconclusive → BLOCKED (escalate to user)
        - Guard count tracked per-US in status.json
+     - **Mode support (v0.12.0+, v5.7 §4.3)**: flywheel runs identically in
+       --mode agent and --mode tmux when routed through the Node leader
+       (`node ~/.claude/ralph-desk/node/run.mjs run --mode tmux`). The legacy
+       `run_ralph_desk.zsh` runner rejects --flywheel/--flywheel-guard with
+       exit 2 + migration banner; users must use the Node entry. Same applies
+       to --with-self-verification: SV report generation is supported in
+       tmux mode via the Node leader's generateSVReport() (no longer
+       agent-mode-only).
   ⑦ Execute Verifier (see §7a for per-US and §7b for consensus details)
      - Build prompt (scoped to us_id if per-us mode) → log

package/src/node/MANIFEST.txt ADDED Viewed

@@ -0,0 +1,15 @@
+cli/command-builder.mjs
+constants.mjs
+init/campaign-initializer.mjs
+polling/signal-poller.mjs
+prompts/prompt-assembler.mjs
+reporting/campaign-reporting.mjs
+run.mjs
+runner/campaign-main-loop.mjs
+runner/leader-registry.mjs
+runner/prompt-dismisser.mjs
+shared/fs.mjs
+shared/paths.mjs
+tmux/pane-manager.mjs
+util/debug-log.mjs
+util/shell-quote.mjs

package/src/node/cli/command-builder.mjs CHANGED Viewed

@@ -1,7 +1,28 @@
+import { shellQuote } from '../util/shell-quote.mjs';
+import { OPUS_1M_BETA, isOpusModel } from '../constants.mjs';
 const CLAUDE_BIN = 'claude';
 const CODEX_BIN = 'codex';
 const CLAUDE_MODELS = new Set(['haiku', 'sonnet', 'opus']);
+// v0.13.0: surface engine classification for tmux+claude warning + observability.
+export function isClaudeEngine(modelFlag) {
+  if (typeof modelFlag !== 'string' || modelFlag.length === 0) {
+    return false;
+  }
+  const head = modelFlag.split(':', 1)[0];
+  if (!head) {
+    return false;
+  }
+  if (CLAUDE_MODELS.has(head)) {
+    return true;
+  }
+  return head.startsWith('claude-');
+}
 function assertTuiMode(mode, builderName) {
   if (mode !== 'tui') {
     throw new Error(`${builderName} unknown mode '${mode}'`);
@@ -11,19 +32,36 @@ function assertTuiMode(mode, builderName) {
 export function buildClaudeCmd(mode, model, options = {}) {
   assertTuiMode(mode, 'buildClaudeCmd');
-  const parts = [
-    'DISABLE_OMC=1',
+  // v5.7 §4.9: auto-enable 1M-token context for Opus models. Long campaigns
+  // no longer silently truncate at 200K. Header is benign for non-Opus calls
+  // but we omit it there to keep the cmdline tidy.
+  const parts = ['DISABLE_OMC=1'];
+  if (isOpusModel(model)) {
+    parts.push(`ANTHROPIC_BETA=${shellQuote(OPUS_1M_BETA)}`);
+  }
+  parts.push(
     CLAUDE_BIN,
     '--model',
-    model,
+    shellQuote(model),
     '--mcp-config',
     '\'{"mcpServers":{}}\'',
     '--strict-mcp-config',
     '--dangerously-skip-permissions',
-  ];
+  );
+  // v5.7 §4.11.a: explicit --add-dir whitelist. With --dangerously-skip-permissions
+  // alone, claude CLI still surfaces TUI prompts for cwd-adjacent paths in some
+  // versions. Add the home rlp-desk tree (where Leader writes registry.jsonl
+  // and reads governance docs) plus the campaign cwd, so Worker has full
+  // authorized access without prompts.
+  if (options.addDirs && Array.isArray(options.addDirs)) {
+    for (const dir of options.addDirs) {
+      if (dir) parts.push('--add-dir', shellQuote(dir));
+    }
+  }
   if (options.effort !== undefined && options.effort !== '') {
-    parts.push('--effort', options.effort);
+    parts.push('--effort', shellQuote(options.effort));
   }
   return parts.join(' ');

package/src/node/constants.mjs ADDED Viewed

@@ -0,0 +1,19 @@
+// Shared runtime constants. Single-source for cross-module values.
+// Anthropic Claude API beta header that activates the 1M-token context window
+// for Opus models. Auto-prepended to every claude CLI invocation that uses
+// --model opus so long campaigns no longer silently truncate at 200K.
+//
+// Docs: https://docs.anthropic.com/en/docs/build-with-claude/context-windows
+// (search "1M context") — header rotates with each beta phase.
+export const OPUS_1M_BETA = 'context-1m-2025-08-07';
+// Model id that triggers Opus 1M auto-enable. Plain string match against the
+// --model value (post-shellQuote stripping). Bracketed form
+// 'claude-opus-4-7[1m]' is also Opus and benefits from this; pattern match
+// covers both.
+export function isOpusModel(model) {
+  if (!model) return false;
+  const m = String(model).toLowerCase();
+  return m === 'opus' || m.startsWith('claude-opus-');
+}

package/src/node/init/campaign-initializer.mjs CHANGED Viewed

@@ -1,8 +1,79 @@
 import fs from 'node:fs/promises';
+import fsSync from 'node:fs';
 import path from 'node:path';
+import { LEGACY_DESK_REL, resolveDeskRoot } from '../util/desk-root.mjs';
 const GITIGNORE_MARKER = '# RLP Desk runtime artifacts';
-const GITIGNORE_RULE = '.claude/ralph-desk/';
+const GITIGNORE_RULE = '.rlp-desk/';
+const LEGACY_GITIGNORE_RULE = '.claude/ralph-desk/';
+const MIGRATION_LOCK_FILE = '.rlp-desk-migration.lock';
+const STALE_LOCK_MS = 5 * 60 * 1000;
+export function migrateLegacyDesk(rootDir, env = process.env) {
+  const legacyPath = path.join(rootDir, LEGACY_DESK_REL);
+  const newPath = resolveDeskRoot(rootDir, env);
+  const lockPath = path.join(rootDir, MIGRATION_LOCK_FILE);
+  // Pre-lock cheap check: skip the lock entirely when there is nothing to do.
+  // Re-check the same conditions inside the lock — a competing process may
+  // have moved or created files between this check and the lock acquisition.
+  if (!fsSync.existsSync(legacyPath)) {
+    return { action: 'noop', reason: fsSync.existsSync(newPath) ? 'new-only' : 'neither-exists' };
+  }
+  let lockFd;
+  try {
+    lockFd = fsSync.openSync(lockPath, 'wx');
+  } catch (error) {
+    if (error.code === 'EEXIST') {
+      try {
+        const stats = fsSync.statSync(lockPath);
+        const age = Date.now() - stats.mtimeMs;
+        if (age > STALE_LOCK_MS) {
+          fsSync.unlinkSync(lockPath);
+          lockFd = fsSync.openSync(lockPath, 'wx');
+        } else {
+          throw new Error(`Migration already in progress (lock at ${lockPath}, age ${Math.round(age / 1000)}s)`);
+        }
+      } catch (statError) {
+        if (statError.code === 'ENOENT') {
+          lockFd = fsSync.openSync(lockPath, 'wx');
+        } else {
+          throw statError;
+        }
+      }
+    } else {
+      throw error;
+    }
+  }
+  try {
+    fsSync.writeSync(lockFd, String(process.pid));
+    // Re-check inside the lock — another process may have already migrated
+    // while we were waiting for the lock.
+    const legacyExistsLocked = fsSync.existsSync(legacyPath);
+    const newExistsLocked = fsSync.existsSync(newPath);
+    if (!legacyExistsLocked) {
+      return { action: 'noop', reason: newExistsLocked ? 'new-only' : 'neither-exists' };
+    }
+    if (newExistsLocked) {
+      throw new Error(
+        `Migration aborted: both directories exist. Remove one before re-run. legacy=${legacyPath}, new=${newPath}`,
+      );
+    }
+    fsSync.mkdirSync(path.dirname(newPath), { recursive: true });
+    fsSync.renameSync(legacyPath, newPath);
+    return { action: 'migrated', from: legacyPath, to: newPath };
+  } finally {
+    try { fsSync.closeSync(lockFd); } catch (_) { /* noop */ }
+    try { fsSync.unlinkSync(lockPath); } catch (_) { /* noop */ }
+  }
+}
 export async function initCampaign(slug, objective, options = {}) {
   const normalizedSlug = normalizeSlug(slug);
@@ -10,17 +81,21 @@ export async function initCampaign(slug, objective, options = {}) {
   const mode = options.mode ?? 'agent';
   const rootDir = path.resolve(options.rootDir ?? process.cwd());
   const tmuxEnv = options.tmuxEnv ?? process.env.TMUX ?? '';
-  const deskRoot = path.join(rootDir, '.claude', 'ralph-desk');
+  const env = options.env ?? process.env;
   if (mode === 'tmux' && !tmuxEnv) {
     throw new Error('tmux required');
   }
+  migrateLegacyDesk(rootDir, env);
+  const deskRoot = resolveDeskRoot(rootDir, env);
   if (mode === 'fresh') {
     await fs.rm(deskRoot, { recursive: true, force: true });
   }
-  const paths = buildPaths(rootDir, normalizedSlug);
+  const paths = buildPaths(rootDir, normalizedSlug, env);
   await ensureDirectories(paths);
   await ensureGitignore(rootDir);
@@ -55,8 +130,8 @@ function normalizeSlug(value) {
   return slug;
 }
-function buildPaths(rootDir, slug) {
-  const deskRoot = path.join(rootDir, '.claude', 'ralph-desk');
+function buildPaths(rootDir, slug, env = process.env) {
+  const deskRoot = resolveDeskRoot(rootDir, env);
   const promptsDir = path.join(deskRoot, 'prompts');
   const plansDir = path.join(deskRoot, 'plans');
   const memosDir = path.join(deskRoot, 'memos');
@@ -105,13 +180,28 @@ async function ensureGitignore(rootDir) {
     }
   }
-  if (content.includes(GITIGNORE_MARKER) && content.includes(GITIGNORE_RULE)) {
-    return;
+  let updated = content;
+  let changed = false;
+  // v0.13.0: drop the legacy .claude/ralph-desk/ rule if present.
+  if (updated.includes(LEGACY_GITIGNORE_RULE)) {
+    const legacyLineRegex = new RegExp(
+      `^${LEGACY_GITIGNORE_RULE.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\r?\\n`,
+      'gm',
+    );
+    updated = updated.replace(legacyLineRegex, '');
+    changed = true;
   }
-  const prefix = content.length > 0 && !content.endsWith('\n') ? '\n' : '';
-  const block = `${prefix}${GITIGNORE_MARKER}\n${GITIGNORE_RULE}\n`;
-  await fs.writeFile(gitignorePath, `${content}${block}`, 'utf8');
+  if (!(updated.includes(GITIGNORE_MARKER) && updated.includes(GITIGNORE_RULE))) {
+    const prefix = updated.length > 0 && !updated.endsWith('\n') ? '\n' : '';
+    updated = `${updated}${prefix}${GITIGNORE_MARKER}\n${GITIGNORE_RULE}\n`;
+    changed = true;
+  }
+  if (changed) {
+    await fs.writeFile(gitignorePath, updated, 'utf8');
+  }
 }
 async function writeIfMissing(targetPath, content) {