@ai-dev-methodologies/rlp-desk 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/plans/spicy-booping-galaxy.md +322 -0
- package/docs/rlp-desk/artifact-schema.md +99 -0
- package/docs/rlp-desk/ci-setup.md +100 -0
- package/docs/rlp-desk/e2e-scenarios.md +102 -0
- package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +730 -0
- package/install.sh +93 -20
- package/package.json +9 -3
- package/scripts/build-node-manifest.js +52 -0
- package/scripts/postinstall.js +162 -8
- package/src/commands/rlp-desk.md +73 -50
- package/src/governance.md +56 -7
- package/src/node/MANIFEST.txt +15 -0
- package/src/node/cli/command-builder.mjs +43 -5
- package/src/node/constants.mjs +19 -0
- package/src/node/init/campaign-initializer.mjs +100 -10
- package/src/node/polling/signal-poller.mjs +139 -3
- package/src/node/reporting/campaign-reporting.mjs +5 -1
- package/src/node/run.mjs +31 -2
- package/src/node/runner/campaign-main-loop.mjs +521 -44
- package/src/node/runner/leader-registry.mjs +100 -0
- package/src/node/runner/prompt-detector.mjs +41 -0
- package/src/node/runner/prompt-dismisser.mjs +200 -0
- package/src/node/shared/fs.mjs +38 -0
- package/src/node/util/debug-log.mjs +56 -0
- package/src/node/util/desk-root.mjs +24 -0
- package/src/node/util/shell-quote.mjs +12 -0
- package/docs/superpowers/plans/2026-04-24-gpt-5-5-default.md +0 -517
- package/docs/superpowers/specs/2026-04-24-gpt-5-5-default.md +0 -107
- /package/docs/{TODO-verification-next.md → rlp-desk/TODO-verification-next.md} +0 -0
- /package/docs/{architecture.md → rlp-desk/architecture.md} +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-flywheel-enhancement.md +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-pivot-step.md +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/plan-flywheel-enhancement.md +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/sv-architecture-rethink.md +0 -0
- /package/docs/{getting-started.md → rlp-desk/getting-started.md} +0 -0
- /package/docs/{internal → rlp-desk/internal}/verification-policy-gap-analysis.md +0 -0
- /package/docs/{internal → rlp-desk/internal}/verification-strategy-research.md +0 -0
- /package/docs/{multi-mission-orchestration.md → rlp-desk/multi-mission-orchestration.md} +0 -0
- /package/docs/{plans → rlp-desk/plans}/cozy-gliding-trinket.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/frolicking-churning-honey.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/keen-sauteeing-snowflake.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/mutable-booping-corbato.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11-handoff-7fixes.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11.1-tmux-pane-disappearance.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/validated-snacking-crayon.md +0 -0
- /package/docs/{protocol-reference.md → rlp-desk/protocol-reference.md} +0 -0
package/src/commands/rlp-desk.md
CHANGED
|
@@ -135,7 +135,7 @@ After all items are confirmed:
|
|
|
135
135
|
Present the score table to the user before proceeding.
|
|
136
136
|
2. Present the full contract summary.
|
|
137
137
|
3. **Self-Verification** — Ask: "Enable self-verification? Worker records step-by-step evidence, Verifier cross-validates process. Recommended for MEDIUM+ risk." Default: yes for HIGH/CRITICAL, no for LOW/MEDIUM.
|
|
138
|
-
4. **Re-execution check**: After slug is confirmed, check if `.
|
|
138
|
+
4. **Re-execution check**: After slug is confirmed, check if `.rlp-desk/plans/prd-<slug>.md` already exists. If a PRD already exists for this slug, ask: "A PRD already exists for this slug. Improve the existing PRD or start fresh (delete and recreate)?"
|
|
139
139
|
- "improve" → pass `--mode improve` to init
|
|
140
140
|
- "start fresh" → pass `--mode fresh` to init
|
|
141
141
|
- If no PRD exists: standard first-run (no --mode needed)
|
|
@@ -280,40 +280,51 @@ Parse the `--mode` flag. If absent or `agent`, use the Agent() path below. If `t
|
|
|
280
280
|
|
|
281
281
|
#### Tmux Mode (`--mode tmux`)
|
|
282
282
|
|
|
283
|
-
When `--mode tmux` is specified:
|
|
283
|
+
When `--mode tmux` is specified (v0.12.0+ — v5.7 §4.1 routes to Node leader for flywheel + SV support):
|
|
284
284
|
|
|
285
|
-
1. **Validate scaffold** — same as Agent() mode: check `.
|
|
285
|
+
1. **Validate scaffold** — same as Agent() mode: check `.rlp-desk/prompts/<slug>.worker.prompt.md` etc.
|
|
286
286
|
2. **Check sentinels** — same as Agent() mode.
|
|
287
|
-
3. **Check prerequisites** — verify `tmux` and `
|
|
288
|
-
4. **Locate
|
|
289
|
-
5. **Launch** — shell out to the
|
|
287
|
+
3. **Check prerequisites** — verify `tmux`, `jq`, and `node` (>= 16) are installed. If not, report what is missing and stop.
|
|
288
|
+
4. **Locate Node leader** — find `~/.claude/ralph-desk/node/run.mjs`. If not found, tell the user to reinstall (`npm install` or `bash install.sh`).
|
|
289
|
+
5. **Launch** — shell out to the Node leader. **All dynamic args (slug + model values) MUST be passed through shell single-quote escaping** (v5.7 §4.12 G11) so bracketed model ids like `claude-opus-4-7[1m]` survive zsh parsing:
|
|
290
|
+
|
|
290
291
|
```bash
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
292
|
+
node ~/.claude/ralph-desk/node/run.mjs run '<slug>' \
|
|
293
|
+
--mode tmux \
|
|
294
|
+
--max-iter <N> \
|
|
295
|
+
--worker-model '<value>' \
|
|
296
|
+
[--lock-worker-model] \
|
|
297
|
+
--verifier-model '<value>' \
|
|
298
|
+
--final-verifier-model '<value>' \
|
|
299
|
+
--consensus <off|all|final-only> \
|
|
300
|
+
--consensus-model '<value>' \
|
|
301
|
+
--final-consensus-model '<value>' \
|
|
302
|
+
--verify-mode <per-us|batch> \
|
|
303
|
+
--cb-threshold <N> \
|
|
304
|
+
--iter-timeout <N> \
|
|
305
|
+
[--debug] [--autonomous] \
|
|
306
|
+
[--lane-strict] # was env LANE_MODE=strict \
|
|
307
|
+
[--test-density-strict] # was env TEST_DENSITY_MODE=strict \
|
|
308
|
+
[--with-self-verification] \
|
|
309
|
+
[--flywheel on-fail --flywheel-model '<value>'] \
|
|
310
|
+
[--flywheel-guard on --flywheel-guard-model '<value>']
|
|
307
311
|
```
|
|
308
|
-
|
|
309
|
-
7.
|
|
312
|
+
|
|
313
|
+
**Quoting contract (v5.7 §4.1)**: every `'<value>'` placeholder above must be replaced with the user's flag value wrapped in single quotes via the equivalent of `shellQuote(value)` — `"'" + value.replace(/'/g, "'\\''") + "'"` for POSIX correctness. The slug, all model values, and any future dynamic flag must follow this rule. A slug or model containing brackets / spaces / single quotes / dollar signs / backticks must NOT break the leader invocation.
|
|
314
|
+
|
|
315
|
+
**Env-var translation (v5.7 §4.1)**: the slash command historically built `LANE_MODE=strict zsh ...` and `TEST_DENSITY_MODE=strict zsh ...` from CLI flags. The Node leader uses CLI flags instead — translate `--lane-strict` and `--test-density-strict` into the corresponding flags. Direct env-var users (running zsh directly) are unaffected.
|
|
316
|
+
|
|
317
|
+
6. **If the Node leader exits with error** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch in a different way. Tell the user what went wrong and suggest `--mode agent` as alternative.
|
|
318
|
+
7. **If successful** — tell the user the tmux session has been started. The Node leader takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
|
|
310
319
|
|
|
311
320
|
**IMPORTANT RULES:**
|
|
312
|
-
- Tmux mode requires the user to already be inside a tmux session. If the
|
|
313
|
-
- MUST launch
|
|
321
|
+
- Tmux mode requires the user to already be inside a tmux session. If the leader rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
|
|
322
|
+
- MUST launch with `run_in_background: true` so `/rlp-desk` returns control immediately while preserving live tmux visibility.
|
|
314
323
|
- Run-in-background is used so the shell can keep the command visible and keep the pane layout stable for status checks and completion flow.
|
|
315
324
|
- Do NOT kill panes after completion. Panes stay alive for inspection. User cleans up with `/rlp-desk clean <slug> --kill-session`.
|
|
316
|
-
- `--with-self-verification` is
|
|
325
|
+
- `--with-self-verification` is fully supported in tmux mode (v5.7 §4.7). The Node leader's `generateSVReport()` writes `self-verification-report.md` + `self-verification-data.json` under `<project>/.claude/ralph-desk/analytics/<slug>/` (project-local, v5.7 §4.11.b).
|
|
326
|
+
- `--flywheel on-fail` and `--flywheel-guard on` are fully supported in tmux mode (v5.7 §4.1). The Node leader handles pane creation, sendKeys dispatch, signal polling, and Guard retry semantics identically to agent mode.
|
|
327
|
+
- Legacy `zsh ~/.claude/ralph-desk/run_ralph_desk.zsh` (deprecated in 0.12.0) still runs for non-flywheel/non-SV invocations but emits a deprecation `[notice]`. Calling it with `FLYWHEEL` or `WITH_SELF_VERIFICATION` env vars exits 2 with a migration banner pointing to the Node leader.
|
|
317
328
|
|
|
318
329
|
**tmux UX model (5 items):**
|
|
319
330
|
- The session returns immediately after launch (`run_in_background: true`) so the command returns control to the parent CLI.
|
|
@@ -324,12 +335,24 @@ WITH_SELF_VERIFICATION=<1 if --with-self-verification, else 0> \
|
|
|
324
335
|
|
|
325
336
|
#### Agent Mode (`--mode agent` or default)
|
|
326
337
|
|
|
338
|
+
**Why Agent mode is structurally immune to Bug 4/5 (mid-execution prompt hang
|
|
339
|
+
& A4 premature dispatch):** Worker/Verifier are dispatched as `Agent(...,
|
|
340
|
+
mode="bypassPermissions", ...)`. The subagent runs non-interactively under
|
|
341
|
+
the platform's bypass — it has no tmux pane, no TUI surface, and cannot
|
|
342
|
+
surface a `[y/N]` prompt to the parent Leader. The auto-dismiss /
|
|
343
|
+
prompt-stall / no-progress timeouts in `run_ralph_desk.zsh` (v5.7 §4.13.b /
|
|
344
|
+
§4.16 / §4.17) are therefore tmux-only by design. **Tradeoff**: because
|
|
345
|
+
`Agent()` has no timeout API, agent-mode iterations are not bounded — if
|
|
346
|
+
the platform's `bypassPermissions` ever fails to suppress an interactive
|
|
347
|
+
prompt at the SDK level, the call hangs indefinitely with no rlp-desk-side
|
|
348
|
+
watchdog. Use `--mode tmux` if you need bounded execution time.
|
|
349
|
+
|
|
327
350
|
### Preparation
|
|
328
|
-
1. Validate scaffold: `.
|
|
351
|
+
1. Validate scaffold: `.rlp-desk/prompts/<slug>.worker.prompt.md` etc.
|
|
329
352
|
2. **Codex CLI pre-validation**: If `--consensus` is not `off` OR `--worker-model` uses codex format (contains `:`) OR `--verifier-model` / `--final-verifier-model` / `--consensus-model` / `--final-consensus-model` uses codex format, check that `codex` CLI exists in PATH. If codex CLI not found → STOP immediately, print install instructions (`npm install -g @openai/codex`), do not start the loop.
|
|
330
353
|
3. Check sentinels (complete/blocked). Found → tell user `/rlp-desk clean <slug>`.
|
|
331
354
|
4. Clean previous `done-claim.json`, `verify-verdict.json`.
|
|
332
|
-
5. **Always**: write baseline log entry to `.
|
|
355
|
+
5. **Always**: write baseline log entry to `.rlp-desk/logs/<slug>/baseline.log`: `[timestamp] iter=0 phase=start slug=<slug> worker_model=<model> verifier_model=<model>`. Baseline.log captures 1 line per iteration for lightweight post-mortem (always-on, no flag needed).
|
|
333
356
|
6. If `--debug`: also create/clear `~/.claude/ralph-desk/analytics/<slug>/debug.log`. Define a helper: to "debug_log" means append a timestamped line to this file via `Bash("echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $msg\" >> ~/.claude/ralph-desk/analytics/<slug>/debug.log")`. When `--debug` is active, debug.log contains all baseline.log fields plus detailed phase logs.
|
|
334
357
|
- **4-category log system**: all debug_log entries use exactly one of: `[GOV]` (governance checks: IL enforcement, CB triggers, scope lock, verdict evaluation), `[DECIDE]` (leader decisions: model selection, fix contracts, escalation), `[OPTION]` (configuration snapshot at loop start: thresholds, modes, models), `[FLOW]` (execution progress: worker/verifier dispatch, signal reads, phase transitions)
|
|
335
358
|
- **Re-execution versioning**: If `debug.log` already exists at `--debug` start, rename it to `debug-v{N}.log` (N = next available integer ≥ 1) before creating a fresh `debug.log`.
|
|
@@ -355,14 +378,14 @@ For each iteration (1 to max_iter):
|
|
|
355
378
|
|
|
356
379
|
**① Check sentinels**
|
|
357
380
|
```bash
|
|
358
|
-
test -f .
|
|
359
|
-
test -f .
|
|
381
|
+
test -f .rlp-desk/memos/<slug>-complete.md # → done
|
|
382
|
+
test -f .rlp-desk/memos/<slug>-blocked.md # → stop
|
|
360
383
|
```
|
|
361
384
|
|
|
362
385
|
**①½ Prep-stage cleanup**
|
|
363
386
|
```bash
|
|
364
|
-
rm -f .
|
|
365
|
-
rm -f .
|
|
387
|
+
rm -f .rlp-desk/memos/<slug>-done-claim.json
|
|
388
|
+
rm -f .rlp-desk/memos/<slug>-verify-verdict.json
|
|
366
389
|
```
|
|
367
390
|
|
|
368
391
|
**② Read memory.md** → Stop Status, Next Iteration Contract
|
|
@@ -378,15 +401,15 @@ rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
|
|
|
378
401
|
|
|
379
402
|
**④ Build worker prompt (Prompt Assembly Protocol)**
|
|
380
403
|
1. Capture `WORKING_DIR` once: use `$PWD` from when `/rlp-desk run` was invoked. Store for all prompt construction.
|
|
381
|
-
2. Read `.
|
|
404
|
+
2. Read `.rlp-desk/prompts/<slug>.worker.prompt.md` — use its content **verbatim**. Do NOT rewrite, paraphrase, or regenerate paths. The prompt file contains correct absolute paths from init.
|
|
382
405
|
2a. **Per-US PRD injection** (when targeting a specific `us_id`, not "ALL"):
|
|
383
|
-
- Check if `.
|
|
406
|
+
- Check if `.rlp-desk/plans/prd-<slug>-{us_id}.md` exists (created by init split)
|
|
384
407
|
- If yes: in the assembled prompt text, replace the full PRD reference (`prd-<slug>.md`) with the per-US file path (`prd-<slug>-{us_id}.md`) — so Worker reads only the relevant US section
|
|
385
408
|
- If no per-US file: fall back to full PRD (`prd-<slug>.md`) with no change needed
|
|
386
409
|
- Note: this absolute-path substitution is permitted — only absolute→relative rewrites are forbidden.
|
|
387
410
|
3. Prepend meta comment: `## WORKING_DIR: {absolute path}` — Worker must use this as its working directory.
|
|
388
411
|
4. Append iteration number + memory contract.
|
|
389
|
-
5. Write to `.
|
|
412
|
+
5. Write to `.rlp-desk/logs/<slug>/iter-NNN.worker-prompt.md` (audit trail).
|
|
390
413
|
- Note: Worker ALWAYS records execution_steps in done-claim.json per governance §1f. No flag needed.
|
|
391
414
|
- **Rewriting paths from absolute to relative WILL break worktree campaigns. Only additions (WORKING_DIR header, iteration context) are allowed.**
|
|
392
415
|
|
|
@@ -637,7 +660,7 @@ When `--consensus` is not `off`, also track in `status.json`:
|
|
|
637
660
|
---
|
|
638
661
|
|
|
639
662
|
## `status <slug>`
|
|
640
|
-
Read `.
|
|
663
|
+
Read `.rlp-desk/logs/<slug>/runtime/status.json` and display a detailed report:
|
|
641
664
|
|
|
642
665
|
```
|
|
643
666
|
Campaign: <slug>
|
|
@@ -660,22 +683,22 @@ Read the last `verify-verdict.json` to show the most recent verdict summary and
|
|
|
660
683
|
|
|
661
684
|
## `clean <slug> [--kill-session]`
|
|
662
685
|
Remove:
|
|
663
|
-
- `.
|
|
664
|
-
- `.
|
|
665
|
-
- `.
|
|
666
|
-
- `.
|
|
667
|
-
- `.
|
|
668
|
-
- `.
|
|
669
|
-
- `.
|
|
670
|
-
- `.
|
|
671
|
-
- `.
|
|
672
|
-
- `.
|
|
686
|
+
- `.rlp-desk/memos/<slug>-complete.md`
|
|
687
|
+
- `.rlp-desk/memos/<slug>-blocked.md`
|
|
688
|
+
- `.rlp-desk/memos/<slug>-done-claim.json`
|
|
689
|
+
- `.rlp-desk/memos/<slug>-verify-verdict.json`
|
|
690
|
+
- `.rlp-desk/memos/<slug>-iter-signal.json`
|
|
691
|
+
- `.rlp-desk/logs/<slug>/circuit-breaker.json`
|
|
692
|
+
- `.rlp-desk/logs/<slug>/runtime/session-config.json`
|
|
693
|
+
- `.rlp-desk/logs/<slug>/runtime/worker-heartbeat.json`
|
|
694
|
+
- `.rlp-desk/logs/<slug>/runtime/verifier-heartbeat.json`
|
|
695
|
+
- `.rlp-desk/memos/<slug>-escalation.md`
|
|
673
696
|
Note: `campaign-report.md`, `campaign-report-v{N}.md`, `iter-NNN-done-claim.json`, and `iter-NNN-verify-verdict.json` are intentionally preserved across clean for historical comparison. Analytics files (`debug.log`, `campaign.jsonl`, `self-verification-data.json`, `self-verification-report-NNN.md`) at `~/.claude/ralph-desk/analytics/<slug>/` are NOT affected by project-level clean.
|
|
674
697
|
|
|
675
698
|
If `--kill-session` is passed, clean up Worker/Verifier tmux panes using session-config.json:
|
|
676
699
|
```bash
|
|
677
700
|
# Read pane IDs from session-config.json (safe — targets only Worker/Verifier panes)
|
|
678
|
-
SESSION_CONFIG=".
|
|
701
|
+
SESSION_CONFIG=".rlp-desk/logs/<slug>/runtime/session-config.json"
|
|
679
702
|
if [ -f "$SESSION_CONFIG" ] && command -v jq &>/dev/null; then
|
|
680
703
|
WORKER_PANE=$(jq -r '.panes.worker // empty' "$SESSION_CONFIG")
|
|
681
704
|
VERIFIER_PANE=$(jq -r '.panes.verifier // empty' "$SESSION_CONFIG")
|
|
@@ -715,8 +738,8 @@ Data sources:
|
|
|
715
738
|
|
|
716
739
|
Resume a previously interrupted campaign. Equivalent to `run <slug>` but explicitly restores state:
|
|
717
740
|
|
|
718
|
-
1. Read `.
|
|
719
|
-
2. Read `.
|
|
741
|
+
1. Read `.rlp-desk/logs/<slug>/runtime/status.json` for `verified_us`, `iteration`, `consecutive_failures`
|
|
742
|
+
2. Read `.rlp-desk/memos/<slug>-memory.md` for completed stories and next iteration contract
|
|
720
743
|
3. Check for sentinels (`complete.md`, `blocked.md`) — if present, inform user and stop
|
|
721
744
|
4. If no sentinels, invoke `run <slug>` with the same options from the previous session (stored in status.json fields: `worker_model`, `verifier_model`, `final_verifier_model`, `verify_mode`, `consensus_mode`)
|
|
722
745
|
5. The runner automatically restores `verified_us` from memory or status.json on startup
|
package/src/governance.md
CHANGED
|
@@ -297,13 +297,54 @@ BLOCKED writes a JSON sidecar (`<slug>-blocked.json`) alongside the markdown sen
|
|
|
297
297
|
- English: `depends on US-`, `blocking US-`, `awaits US-`, `post-iter US-`, `requires US-N`, `cross-US`
|
|
298
298
|
- Korean: `US-N 산출물`, `신규 US-`, `post-iter`
|
|
299
299
|
|
|
300
|
-
**Write Order Contract (atomicity invariant)
|
|
301
|
-
1.
|
|
302
|
-
2.
|
|
303
|
-
3. Invariant: **markdown exists ⇒ JSON exists** (
|
|
304
|
-
4. Wrappers SHOULD watch markdown sentinel, then read JSON sidecar. If JSON not yet visible (rare), retry up to 5 × 50ms before failing.
|
|
300
|
+
**Write Order Contract (atomicity invariant)** — v5.7 §4.24 reversed:
|
|
301
|
+
1. **markdown sentinel written FIRST** via `writeSentinelExclusive` (`fs.open(path, 'wx')` — O_EXCL first-writer-wins). The md acts as the race lock.
|
|
302
|
+
2. **JSON sidecar written SECOND**, only by the winning writer.
|
|
303
|
+
3. Invariant: **markdown exists ⇒ JSON exists** (winner writes both; losers see EEXIST and return without touching JSON, preserving the winner's content).
|
|
304
|
+
4. Wrappers SHOULD watch markdown sentinel, then read JSON sidecar. If JSON not yet visible (rare ≤50ms), retry up to 5 × 50ms before failing.
|
|
305
305
|
|
|
306
|
-
`
|
|
306
|
+
`writeSentinelExclusive` (in `src/node/shared/fs.mjs`) provides per-file first-writer-wins; cross-file ordering is enforced by the explicit md-then-JSON sequence inside `writeSentinel`.
|
|
307
|
+
|
|
308
|
+
## 1g. Sentinel Guarantee Invariant (file-guarantee contract)
|
|
309
|
+
|
|
310
|
+
**Every terminal exit of `runCampaign()` MUST leave exactly one sentinel on disk: `<slug>-blocked.md` XOR `<slug>-complete.md`.**
|
|
311
|
+
|
|
312
|
+
This invariant is the foundation of the fresh-context architecture. If a campaign exits without any sentinel, future iterations cannot determine campaign state — Worker/Verifier are dispatched into a campaign whose history they cannot reconstruct.
|
|
313
|
+
|
|
314
|
+
### Enforcement (3-layer defense)
|
|
315
|
+
|
|
316
|
+
1. **Per-poll-site sentinel write** (`_handlePollFailure` helper at `src/node/runner/campaign-main-loop.mjs`). Every `pollForSignal` call site (Worker, VerifierPerUS, VerifierFinal, Flywheel, Guard) is wrapped in `try { … } catch (error) { return _handlePollFailure(error, { role, … }); }`. The helper classifies via `BLOCK_TAGS` typed enum, calls `writeSentinel` (idempotent via O_EXCL), and returns `{status:'blocked', …}` so the caller exits the loop cleanly.
|
|
317
|
+
|
|
318
|
+
2. **Run-level try/finally backstop** (`_ensureTerminalSentinel`). After the campaign body executes, a `finally` block checks `exists(blockedSentinel) XOR exists(completeSentinel)`. If neither (paused state `continue` excepted), writes a synthetic BLOCKED `infra_failure/leader_exited_without_terminal_state` so even unhandled exceptions cannot escape silently.
|
|
319
|
+
|
|
320
|
+
3. **Schema validator at READ boundary** (`validateArtifact`). After every `pollForSignal` returns parsed JSON, validates `(slug, iteration ≥ floor, signal_type matches read context, us_id ∈ usList ∪ {ALL})`. Throws `MalformedArtifactError({field, expected, got})` → caught by same `_handlePollFailure` → BLOCKED `contract_violation/malformed_artifact` (recoverable).
|
|
321
|
+
|
|
322
|
+
### Per-role failure-category enum
|
|
323
|
+
|
|
324
|
+
`_classifyBlock` (in `campaign-main-loop.mjs`) maps each `BLOCK_TAGS` value to one of the locked taxonomy categories:
|
|
325
|
+
|
|
326
|
+
| Tag | reason_category | recoverable | Example trigger |
|
|
327
|
+
|-----|----------------|-------------|-----------------|
|
|
328
|
+
| `WORKER_EXITED` | `infra_failure` | false | Worker pane returned to shell without writing signal |
|
|
329
|
+
| `VERIFIER_EXITED` | `infra_failure` | false | Per-US Verifier exited without writing verdict |
|
|
330
|
+
| `FINAL_VERIFIER_EXITED` | `infra_failure` | false | Final ALL-verifier exited without writing verdict |
|
|
331
|
+
| `FLYWHEEL_EXITED` | `infra_failure` | false | Flywheel pane crashed |
|
|
332
|
+
| `GUARD_EXITED` | `infra_failure` | false | Guard pane crashed |
|
|
333
|
+
| `PROMPT_BLOCKED` | `infra_failure` | false | Default-No prompt — auto-Enter would CANCEL |
|
|
334
|
+
| `<role>_TIMEOUT` | `infra_failure` | false | pollForSignal timed out without exit detected |
|
|
335
|
+
| `MALFORMED_ARTIFACT` | `contract_violation` | true | Worker/Verifier wrote schema-violating JSON |
|
|
336
|
+
| `LEADER_EXITED_WITHOUT_TERMINAL_STATE` | `infra_failure` | false | Backstop fired (uncaught exception or paths outside controlled scope) |
|
|
337
|
+
|
|
338
|
+
### Auditing
|
|
339
|
+
|
|
340
|
+
Operators can verify the invariant for any campaign by running:
|
|
341
|
+
|
|
342
|
+
```sh
|
|
343
|
+
zsh tests/sv-gate-fast.sh # 30s mechanical check (greps + units)
|
|
344
|
+
zsh tests/sv-gate-full.sh # 5min including REAL tmux + REAL campaign E2E
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
The fast gate fails immediately if any pollForSignal call site lacks a `_handlePollFailure` wiring or the writeSentinelExclusive primitive is bypassed.
|
|
307
348
|
|
|
308
349
|
## 2. Roles
|
|
309
350
|
|
|
@@ -468,7 +509,7 @@ Characteristics:
|
|
|
468
509
|
|
|
469
510
|
### Project-local
|
|
470
511
|
```
|
|
471
|
-
.
|
|
512
|
+
.rlp-desk/
|
|
472
513
|
├── prompts/
|
|
473
514
|
│ ├── <slug>.worker.prompt.md # Worker base prompt (regenerated on re-execution)
|
|
474
515
|
│ └── <slug>.verifier.prompt.md # Verifier base prompt (regenerated on re-execution)
|
|
@@ -553,6 +594,14 @@ for iteration in 1..max_iter:
|
|
|
553
594
|
• fail + retries exhausted → BLOCKED
|
|
554
595
|
• inconclusive → BLOCKED (escalate to user)
|
|
555
596
|
- Guard count tracked per-US in status.json
|
|
597
|
+
- **Mode support (v0.12.0+, v5.7 §4.3)**: flywheel runs identically in
|
|
598
|
+
--mode agent and --mode tmux when routed through the Node leader
|
|
599
|
+
(`node ~/.claude/ralph-desk/node/run.mjs run --mode tmux`). The legacy
|
|
600
|
+
`run_ralph_desk.zsh` runner rejects --flywheel/--flywheel-guard with
|
|
601
|
+
exit 2 + migration banner; users must use the Node entry. Same applies
|
|
602
|
+
to --with-self-verification: SV report generation is supported in
|
|
603
|
+
tmux mode via the Node leader's generateSVReport() (no longer
|
|
604
|
+
agent-mode-only).
|
|
556
605
|
|
|
557
606
|
⑦ Execute Verifier (see §7a for per-US and §7b for consensus details)
|
|
558
607
|
- Build prompt (scoped to us_id if per-us mode) → log
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
cli/command-builder.mjs
|
|
2
|
+
constants.mjs
|
|
3
|
+
init/campaign-initializer.mjs
|
|
4
|
+
polling/signal-poller.mjs
|
|
5
|
+
prompts/prompt-assembler.mjs
|
|
6
|
+
reporting/campaign-reporting.mjs
|
|
7
|
+
run.mjs
|
|
8
|
+
runner/campaign-main-loop.mjs
|
|
9
|
+
runner/leader-registry.mjs
|
|
10
|
+
runner/prompt-dismisser.mjs
|
|
11
|
+
shared/fs.mjs
|
|
12
|
+
shared/paths.mjs
|
|
13
|
+
tmux/pane-manager.mjs
|
|
14
|
+
util/debug-log.mjs
|
|
15
|
+
util/shell-quote.mjs
|
|
@@ -1,7 +1,28 @@
|
|
|
1
|
+
import { shellQuote } from '../util/shell-quote.mjs';
|
|
2
|
+
import { OPUS_1M_BETA, isOpusModel } from '../constants.mjs';
|
|
3
|
+
|
|
1
4
|
const CLAUDE_BIN = 'claude';
|
|
2
5
|
const CODEX_BIN = 'codex';
|
|
3
6
|
const CLAUDE_MODELS = new Set(['haiku', 'sonnet', 'opus']);
|
|
4
7
|
|
|
8
|
+
// v0.13.0: surface engine classification for tmux+claude warning + observability.
|
|
9
|
+
export function isClaudeEngine(modelFlag) {
|
|
10
|
+
if (typeof modelFlag !== 'string' || modelFlag.length === 0) {
|
|
11
|
+
return false;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const head = modelFlag.split(':', 1)[0];
|
|
15
|
+
if (!head) {
|
|
16
|
+
return false;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if (CLAUDE_MODELS.has(head)) {
|
|
20
|
+
return true;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return head.startsWith('claude-');
|
|
24
|
+
}
|
|
25
|
+
|
|
5
26
|
function assertTuiMode(mode, builderName) {
|
|
6
27
|
if (mode !== 'tui') {
|
|
7
28
|
throw new Error(`${builderName} unknown mode '${mode}'`);
|
|
@@ -11,19 +32,36 @@ function assertTuiMode(mode, builderName) {
|
|
|
11
32
|
export function buildClaudeCmd(mode, model, options = {}) {
|
|
12
33
|
assertTuiMode(mode, 'buildClaudeCmd');
|
|
13
34
|
|
|
14
|
-
|
|
15
|
-
|
|
35
|
+
// v5.7 §4.9: auto-enable 1M-token context for Opus models. Long campaigns
|
|
36
|
+
// no longer silently truncate at 200K. Header is benign for non-Opus calls
|
|
37
|
+
// but we omit it there to keep the cmdline tidy.
|
|
38
|
+
const parts = ['DISABLE_OMC=1'];
|
|
39
|
+
if (isOpusModel(model)) {
|
|
40
|
+
parts.push(`ANTHROPIC_BETA=${shellQuote(OPUS_1M_BETA)}`);
|
|
41
|
+
}
|
|
42
|
+
parts.push(
|
|
16
43
|
CLAUDE_BIN,
|
|
17
44
|
'--model',
|
|
18
|
-
model,
|
|
45
|
+
shellQuote(model),
|
|
19
46
|
'--mcp-config',
|
|
20
47
|
'\'{"mcpServers":{}}\'',
|
|
21
48
|
'--strict-mcp-config',
|
|
22
49
|
'--dangerously-skip-permissions',
|
|
23
|
-
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
// v5.7 §4.11.a: explicit --add-dir whitelist. With --dangerously-skip-permissions
|
|
53
|
+
// alone, claude CLI still surfaces TUI prompts for cwd-adjacent paths in some
|
|
54
|
+
// versions. Add the home rlp-desk tree (where Leader writes registry.jsonl
|
|
55
|
+
// and reads governance docs) plus the campaign cwd, so Worker has full
|
|
56
|
+
// authorized access without prompts.
|
|
57
|
+
if (options.addDirs && Array.isArray(options.addDirs)) {
|
|
58
|
+
for (const dir of options.addDirs) {
|
|
59
|
+
if (dir) parts.push('--add-dir', shellQuote(dir));
|
|
60
|
+
}
|
|
61
|
+
}
|
|
24
62
|
|
|
25
63
|
if (options.effort !== undefined && options.effort !== '') {
|
|
26
|
-
parts.push('--effort', options.effort);
|
|
64
|
+
parts.push('--effort', shellQuote(options.effort));
|
|
27
65
|
}
|
|
28
66
|
|
|
29
67
|
return parts.join(' ');
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// Shared runtime constants. Single-source for cross-module values.
|
|
2
|
+
|
|
3
|
+
// Anthropic Claude API beta header that activates the 1M-token context window
|
|
4
|
+
// for Opus models. Auto-prepended to every claude CLI invocation that uses
|
|
5
|
+
// --model opus so long campaigns no longer silently truncate at 200K.
|
|
6
|
+
//
|
|
7
|
+
// Docs: https://docs.anthropic.com/en/docs/build-with-claude/context-windows
|
|
8
|
+
// (search "1M context") — header rotates with each beta phase.
|
|
9
|
+
export const OPUS_1M_BETA = 'context-1m-2025-08-07';
|
|
10
|
+
|
|
11
|
+
// Model id that triggers Opus 1M auto-enable. Plain string match against the
|
|
12
|
+
// --model value (post-shellQuote stripping). Bracketed form
|
|
13
|
+
// 'claude-opus-4-7[1m]' is also Opus and benefits from this; pattern match
|
|
14
|
+
// covers both.
|
|
15
|
+
export function isOpusModel(model) {
|
|
16
|
+
if (!model) return false;
|
|
17
|
+
const m = String(model).toLowerCase();
|
|
18
|
+
return m === 'opus' || m.startsWith('claude-opus-');
|
|
19
|
+
}
|
|
@@ -1,8 +1,79 @@
|
|
|
1
1
|
import fs from 'node:fs/promises';
|
|
2
|
+
import fsSync from 'node:fs';
|
|
2
3
|
import path from 'node:path';
|
|
3
4
|
|
|
5
|
+
import { LEGACY_DESK_REL, resolveDeskRoot } from '../util/desk-root.mjs';
|
|
6
|
+
|
|
4
7
|
const GITIGNORE_MARKER = '# RLP Desk runtime artifacts';
|
|
5
|
-
const GITIGNORE_RULE = '.
|
|
8
|
+
const GITIGNORE_RULE = '.rlp-desk/';
|
|
9
|
+
const LEGACY_GITIGNORE_RULE = '.claude/ralph-desk/';
|
|
10
|
+
const MIGRATION_LOCK_FILE = '.rlp-desk-migration.lock';
|
|
11
|
+
const STALE_LOCK_MS = 5 * 60 * 1000;
|
|
12
|
+
|
|
13
|
+
export function migrateLegacyDesk(rootDir, env = process.env) {
|
|
14
|
+
const legacyPath = path.join(rootDir, LEGACY_DESK_REL);
|
|
15
|
+
const newPath = resolveDeskRoot(rootDir, env);
|
|
16
|
+
const lockPath = path.join(rootDir, MIGRATION_LOCK_FILE);
|
|
17
|
+
|
|
18
|
+
// Pre-lock cheap check: skip the lock entirely when there is nothing to do.
|
|
19
|
+
// Re-check the same conditions inside the lock — a competing process may
|
|
20
|
+
// have moved or created files between this check and the lock acquisition.
|
|
21
|
+
if (!fsSync.existsSync(legacyPath)) {
|
|
22
|
+
return { action: 'noop', reason: fsSync.existsSync(newPath) ? 'new-only' : 'neither-exists' };
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
let lockFd;
|
|
26
|
+
try {
|
|
27
|
+
lockFd = fsSync.openSync(lockPath, 'wx');
|
|
28
|
+
} catch (error) {
|
|
29
|
+
if (error.code === 'EEXIST') {
|
|
30
|
+
try {
|
|
31
|
+
const stats = fsSync.statSync(lockPath);
|
|
32
|
+
const age = Date.now() - stats.mtimeMs;
|
|
33
|
+
if (age > STALE_LOCK_MS) {
|
|
34
|
+
fsSync.unlinkSync(lockPath);
|
|
35
|
+
lockFd = fsSync.openSync(lockPath, 'wx');
|
|
36
|
+
} else {
|
|
37
|
+
throw new Error(`Migration already in progress (lock at ${lockPath}, age ${Math.round(age / 1000)}s)`);
|
|
38
|
+
}
|
|
39
|
+
} catch (statError) {
|
|
40
|
+
if (statError.code === 'ENOENT') {
|
|
41
|
+
lockFd = fsSync.openSync(lockPath, 'wx');
|
|
42
|
+
} else {
|
|
43
|
+
throw statError;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
} else {
|
|
47
|
+
throw error;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
try {
|
|
52
|
+
fsSync.writeSync(lockFd, String(process.pid));
|
|
53
|
+
|
|
54
|
+
// Re-check inside the lock — another process may have already migrated
|
|
55
|
+
// while we were waiting for the lock.
|
|
56
|
+
const legacyExistsLocked = fsSync.existsSync(legacyPath);
|
|
57
|
+
const newExistsLocked = fsSync.existsSync(newPath);
|
|
58
|
+
|
|
59
|
+
if (!legacyExistsLocked) {
|
|
60
|
+
return { action: 'noop', reason: newExistsLocked ? 'new-only' : 'neither-exists' };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (newExistsLocked) {
|
|
64
|
+
throw new Error(
|
|
65
|
+
`Migration aborted: both directories exist. Remove one before re-run. legacy=${legacyPath}, new=${newPath}`,
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
fsSync.mkdirSync(path.dirname(newPath), { recursive: true });
|
|
70
|
+
fsSync.renameSync(legacyPath, newPath);
|
|
71
|
+
return { action: 'migrated', from: legacyPath, to: newPath };
|
|
72
|
+
} finally {
|
|
73
|
+
try { fsSync.closeSync(lockFd); } catch (_) { /* noop */ }
|
|
74
|
+
try { fsSync.unlinkSync(lockPath); } catch (_) { /* noop */ }
|
|
75
|
+
}
|
|
76
|
+
}
|
|
6
77
|
|
|
7
78
|
export async function initCampaign(slug, objective, options = {}) {
|
|
8
79
|
const normalizedSlug = normalizeSlug(slug);
|
|
@@ -10,17 +81,21 @@ export async function initCampaign(slug, objective, options = {}) {
|
|
|
10
81
|
const mode = options.mode ?? 'agent';
|
|
11
82
|
const rootDir = path.resolve(options.rootDir ?? process.cwd());
|
|
12
83
|
const tmuxEnv = options.tmuxEnv ?? process.env.TMUX ?? '';
|
|
13
|
-
const
|
|
84
|
+
const env = options.env ?? process.env;
|
|
14
85
|
|
|
15
86
|
if (mode === 'tmux' && !tmuxEnv) {
|
|
16
87
|
throw new Error('tmux required');
|
|
17
88
|
}
|
|
18
89
|
|
|
90
|
+
migrateLegacyDesk(rootDir, env);
|
|
91
|
+
|
|
92
|
+
const deskRoot = resolveDeskRoot(rootDir, env);
|
|
93
|
+
|
|
19
94
|
if (mode === 'fresh') {
|
|
20
95
|
await fs.rm(deskRoot, { recursive: true, force: true });
|
|
21
96
|
}
|
|
22
97
|
|
|
23
|
-
const paths = buildPaths(rootDir, normalizedSlug);
|
|
98
|
+
const paths = buildPaths(rootDir, normalizedSlug, env);
|
|
24
99
|
await ensureDirectories(paths);
|
|
25
100
|
await ensureGitignore(rootDir);
|
|
26
101
|
|
|
@@ -55,8 +130,8 @@ function normalizeSlug(value) {
|
|
|
55
130
|
return slug;
|
|
56
131
|
}
|
|
57
132
|
|
|
58
|
-
function buildPaths(rootDir, slug) {
|
|
59
|
-
const deskRoot =
|
|
133
|
+
function buildPaths(rootDir, slug, env = process.env) {
|
|
134
|
+
const deskRoot = resolveDeskRoot(rootDir, env);
|
|
60
135
|
const promptsDir = path.join(deskRoot, 'prompts');
|
|
61
136
|
const plansDir = path.join(deskRoot, 'plans');
|
|
62
137
|
const memosDir = path.join(deskRoot, 'memos');
|
|
@@ -105,13 +180,28 @@ async function ensureGitignore(rootDir) {
|
|
|
105
180
|
}
|
|
106
181
|
}
|
|
107
182
|
|
|
108
|
-
|
|
109
|
-
|
|
183
|
+
let updated = content;
|
|
184
|
+
let changed = false;
|
|
185
|
+
|
|
186
|
+
// v0.13.0: drop the legacy .claude/ralph-desk/ rule if present.
|
|
187
|
+
if (updated.includes(LEGACY_GITIGNORE_RULE)) {
|
|
188
|
+
const legacyLineRegex = new RegExp(
|
|
189
|
+
`^${LEGACY_GITIGNORE_RULE.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\r?\\n`,
|
|
190
|
+
'gm',
|
|
191
|
+
);
|
|
192
|
+
updated = updated.replace(legacyLineRegex, '');
|
|
193
|
+
changed = true;
|
|
110
194
|
}
|
|
111
195
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
196
|
+
if (!(updated.includes(GITIGNORE_MARKER) && updated.includes(GITIGNORE_RULE))) {
|
|
197
|
+
const prefix = updated.length > 0 && !updated.endsWith('\n') ? '\n' : '';
|
|
198
|
+
updated = `${updated}${prefix}${GITIGNORE_MARKER}\n${GITIGNORE_RULE}\n`;
|
|
199
|
+
changed = true;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (changed) {
|
|
203
|
+
await fs.writeFile(gitignorePath, updated, 'utf8');
|
|
204
|
+
}
|
|
115
205
|
}
|
|
116
206
|
|
|
117
207
|
async function writeIfMissing(targetPath, content) {
|