@ai-dev-methodologies/rlp-desk 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -8
- package/docs/architecture.md +34 -8
- package/docs/getting-started.md +2 -2
- package/docs/protocol-reference.md +267 -14
- package/examples/calculator/.claude/ralph-desk/context/loop-test-latest.md +12 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-output.log +0 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-prompt.md +38 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-trigger.sh +28 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/session-config.json +25 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/status.json +10 -0
- package/examples/calculator/.claude/ralph-desk/logs/loop-test/worker-heartbeat.json +1 -0
- package/examples/calculator/.claude/ralph-desk/memos/loop-test-memory.md +17 -0
- package/examples/calculator/.claude/ralph-desk/prompts/loop-test.worker.prompt.md +1 -1
- package/install.sh +14 -0
- package/package.json +1 -1
- package/scripts/postinstall.js +17 -1
- package/scripts/uninstall.js +1 -0
- package/src/commands/rlp-desk.md +112 -21
- package/src/governance.md +92 -7
- package/src/scripts/init_ralph_desk.zsh +51 -30
- package/src/scripts/run_ralph_desk.zsh +1259 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
{
|
|
2
|
+
"session_name": "rlp-desk-loop-test-20260318-232859",
|
|
3
|
+
"slug": "loop-test",
|
|
4
|
+
"created_at": "2026-03-18T14:28:59Z",
|
|
5
|
+
"panes": {
|
|
6
|
+
"leader": "%99",
|
|
7
|
+
"worker": "%100",
|
|
8
|
+
"verifier": "%101"
|
|
9
|
+
},
|
|
10
|
+
"pid": 65962,
|
|
11
|
+
"root": "/Users/kyjin/dev/own/ai-dev-methodologies/rlp-desk/examples/calculator",
|
|
12
|
+
"models": {
|
|
13
|
+
"worker": "sonnet",
|
|
14
|
+
"verifier": "opus"
|
|
15
|
+
},
|
|
16
|
+
"config": {
|
|
17
|
+
"max_iter": 20,
|
|
18
|
+
"poll_interval": 5,
|
|
19
|
+
"iter_timeout": 600,
|
|
20
|
+
"heartbeat_stale_threshold": 120,
|
|
21
|
+
"max_restarts": 3,
|
|
22
|
+
"idle_nudge_threshold": 30,
|
|
23
|
+
"max_nudges": 3
|
|
24
|
+
}
|
|
25
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"ts":"2026-03-18T14:29:15Z","pid":66349}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# loop-test - Campaign Memory
|
|
2
|
+
|
|
3
|
+
## Stop Status
|
|
4
|
+
continue
|
|
5
|
+
|
|
6
|
+
## Objective
|
|
7
|
+
Implement a Python calculator module: calc.py (4 functions + type hints + ValueError) + test_calc.py (pytest, 8+ tests, all passed)
|
|
8
|
+
|
|
9
|
+
## Current State
|
|
10
|
+
Iteration 0 - not started
|
|
11
|
+
|
|
12
|
+
## Next Iteration Contract
|
|
13
|
+
Start from the beginning: read PRD and implement US-001 (calc.py with 4 functions).
|
|
14
|
+
|
|
15
|
+
## Patterns Discovered
|
|
16
|
+
## Learnings
|
|
17
|
+
## Evidence Chain
|
|
@@ -18,7 +18,7 @@ Iteration rules:
|
|
|
18
18
|
|
|
19
19
|
MANDATORY: When done, write the following signal file:
|
|
20
20
|
- Path: .claude/ralph-desk/memos/loop-test-iter-signal.json
|
|
21
|
-
- Format: {"iteration": N, "status": "continue|verify|blocked", "timestamp": "ISO"}
|
|
21
|
+
- Format: {"iteration": N, "status": "continue|verify|blocked", "summary": "what was done", "timestamp": "ISO"}
|
|
22
22
|
- Status values:
|
|
23
23
|
- "continue" = current story done but other stories remain
|
|
24
24
|
- "verify" = all stories complete + done-claim written
|
package/install.sh
CHANGED
|
@@ -35,19 +35,33 @@ echo " Downloading init script..."
|
|
|
35
35
|
curl -sSL "$REPO_URL/src/scripts/init_ralph_desk.zsh" -o "$DESK_DIR/init_ralph_desk.zsh"
|
|
36
36
|
chmod +x "$DESK_DIR/init_ralph_desk.zsh"
|
|
37
37
|
|
|
38
|
+
# Download tmux runner script
|
|
39
|
+
echo " Downloading tmux runner script..."
|
|
40
|
+
curl -sSL "$REPO_URL/src/scripts/run_ralph_desk.zsh" -o "$DESK_DIR/run_ralph_desk.zsh"
|
|
41
|
+
chmod +x "$DESK_DIR/run_ralph_desk.zsh"
|
|
42
|
+
|
|
38
43
|
# Download governance protocol
|
|
39
44
|
echo " Downloading governance protocol..."
|
|
40
45
|
curl -sSL "$REPO_URL/src/governance.md" -o "$DESK_DIR/governance.md"
|
|
41
46
|
|
|
47
|
+
# Check tmux availability
|
|
48
|
+
if ! command -v tmux &>/dev/null; then
|
|
49
|
+
echo ""
|
|
50
|
+
echo " [warn] tmux not found. Tmux execution mode (--mode tmux) will not be available."
|
|
51
|
+
echo " Install tmux to use lean mode: https://github.com/tmux/tmux/wiki/Installing"
|
|
52
|
+
fi
|
|
53
|
+
|
|
42
54
|
echo ""
|
|
43
55
|
echo " Done! Installed to:"
|
|
44
56
|
echo ""
|
|
45
57
|
echo " Slash command: $COMMANDS_DIR/rlp-desk.md"
|
|
46
58
|
echo " Init script: $DESK_DIR/init_ralph_desk.zsh"
|
|
59
|
+
echo " Tmux runner: $DESK_DIR/run_ralph_desk.zsh"
|
|
47
60
|
echo " Governance: $DESK_DIR/governance.md"
|
|
48
61
|
echo ""
|
|
49
62
|
echo " Usage:"
|
|
50
63
|
echo " 1. Open Claude Code in your project directory"
|
|
51
64
|
echo " 2. Run: /rlp-desk brainstorm \"your task description\""
|
|
52
65
|
echo " 3. Run: /rlp-desk run <slug>"
|
|
66
|
+
echo " 4. Run: /rlp-desk run <slug> --mode tmux (lean mode)"
|
|
53
67
|
echo ""
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-dev-methodologies/rlp-desk",
|
|
3
|
-
"version": "0.0
|
|
3
|
+
"version": "0.1.0",
|
|
4
4
|
"description": "Fresh-context iterative loops for Claude Code — autonomous task completion with independent verification",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"postinstall": "node scripts/postinstall.js",
|
package/scripts/postinstall.js
CHANGED
|
@@ -4,15 +4,17 @@
|
|
|
4
4
|
const fs = require("fs");
|
|
5
5
|
const path = require("path");
|
|
6
6
|
const os = require("os");
|
|
7
|
+
const { execSync } = require("child_process");
|
|
7
8
|
|
|
8
9
|
const home = os.homedir();
|
|
9
10
|
const claudeDir = path.join(home, ".claude");
|
|
10
11
|
const commandsDir = path.join(claudeDir, "commands");
|
|
11
12
|
const deskDir = path.join(claudeDir, "ralph-desk");
|
|
12
13
|
const pkgDir = path.join(__dirname, "..");
|
|
14
|
+
const pkg = require(path.join(pkgDir, "package.json"));
|
|
13
15
|
|
|
14
16
|
console.log("");
|
|
15
|
-
console.log(" RLP Desk
|
|
17
|
+
console.log(" RLP Desk v" + pkg.version);
|
|
16
18
|
console.log(" ================");
|
|
17
19
|
console.log("");
|
|
18
20
|
|
|
@@ -27,6 +29,10 @@ const copies = [
|
|
|
27
29
|
"src/scripts/init_ralph_desk.zsh",
|
|
28
30
|
path.join(deskDir, "init_ralph_desk.zsh"),
|
|
29
31
|
],
|
|
32
|
+
[
|
|
33
|
+
"src/scripts/run_ralph_desk.zsh",
|
|
34
|
+
path.join(deskDir, "run_ralph_desk.zsh"),
|
|
35
|
+
],
|
|
30
36
|
["src/governance.md", path.join(deskDir, "governance.md")],
|
|
31
37
|
];
|
|
32
38
|
|
|
@@ -38,10 +44,20 @@ for (const [src, dest] of copies) {
|
|
|
38
44
|
// Make scripts executable
|
|
39
45
|
try {
|
|
40
46
|
fs.chmodSync(path.join(deskDir, "init_ralph_desk.zsh"), 0o755);
|
|
47
|
+
fs.chmodSync(path.join(deskDir, "run_ralph_desk.zsh"), 0o755);
|
|
41
48
|
} catch (_) {
|
|
42
49
|
// chmod may fail on Windows — not critical
|
|
43
50
|
}
|
|
44
51
|
|
|
52
|
+
// Check tmux availability
|
|
53
|
+
try {
|
|
54
|
+
execSync("which tmux", { stdio: "ignore" });
|
|
55
|
+
} catch (_) {
|
|
56
|
+
console.log(" [warn] tmux not found. Tmux execution mode (--mode tmux) will not be available.");
|
|
57
|
+
console.log(" Install tmux to use lean mode: https://github.com/tmux/tmux/wiki/Installing");
|
|
58
|
+
console.log("");
|
|
59
|
+
}
|
|
60
|
+
|
|
45
61
|
console.log("");
|
|
46
62
|
console.log(" Done! Open Claude Code and run:");
|
|
47
63
|
console.log(" /rlp-desk brainstorm \"your task description\"");
|
package/scripts/uninstall.js
CHANGED
package/src/commands/rlp-desk.md
CHANGED
|
@@ -15,20 +15,28 @@ Parse the first word of `$ARGUMENTS` as the subcommand.
|
|
|
15
15
|
|
|
16
16
|
## `brainstorm <description>`
|
|
17
17
|
|
|
18
|
-
Planning phase BEFORE init. Interactively define the contract with the user
|
|
18
|
+
Planning phase BEFORE init. Interactively define the contract **with the user**.
|
|
19
19
|
|
|
20
|
-
|
|
21
|
-
|
|
20
|
+
You MUST ask the user about each item below. Do NOT decide for them.
|
|
21
|
+
Present your suggestion, then wait for the user's confirmation or change.
|
|
22
|
+
|
|
23
|
+
Ask about these items one by one (or in small groups):
|
|
24
|
+
1. **Slug** — short identifier (e.g., `auth-refactor`). Suggest one, ask if OK.
|
|
22
25
|
2. **Objective** — what the loop achieves
|
|
23
|
-
3. **User Stories** — discrete units with testable acceptance criteria
|
|
24
|
-
4. **Iteration Unit** — one worker does per iteration
|
|
26
|
+
3. **User Stories** — discrete units with testable acceptance criteria. Propose a breakdown, ask the user to confirm/modify.
|
|
27
|
+
4. **Iteration Unit** — what one worker does per iteration. Explicitly ask:
|
|
28
|
+
- "One US per iteration (bounded, incremental verification)?"
|
|
29
|
+
- "All stories at once (faster, single verification)?"
|
|
30
|
+
- Default recommendation: one US per iteration for 3+ stories.
|
|
25
31
|
5. **Verification Commands** — build, test, lint commands
|
|
26
32
|
6. **Completion / Blocked Criteria**
|
|
27
|
-
7. **Worker / Verifier Model** — haiku, sonnet, opus
|
|
28
|
-
8. **Max Iterations**
|
|
33
|
+
7. **Worker / Verifier Model** — haiku, sonnet, opus. Suggest defaults (worker: sonnet, verifier: opus), ask if OK.
|
|
34
|
+
8. **Max Iterations** — suggest based on story count, ask if OK.
|
|
29
35
|
|
|
30
|
-
|
|
36
|
+
After all items are confirmed, present the full contract summary.
|
|
37
|
+
On approval, offer to run `init`.
|
|
31
38
|
Do NOT create files during brainstorm.
|
|
39
|
+
Do NOT auto-decide iteration unit — the user MUST explicitly choose.
|
|
32
40
|
|
|
33
41
|
---
|
|
34
42
|
|
|
@@ -44,9 +52,40 @@ If brainstorm was done, auto-fill PRD and test-spec with the results.
|
|
|
44
52
|
**YOU are the leader. Do NOT delegate leadership.**
|
|
45
53
|
|
|
46
54
|
Options (parse from `$ARGUMENTS`):
|
|
55
|
+
- `--mode agent|tmux` (default: `agent`) — execution mode
|
|
47
56
|
- `--max-iter N` (default: 100)
|
|
48
57
|
- `--worker-model MODEL` (default: sonnet)
|
|
49
|
-
- `--verifier-model MODEL` (default:
|
|
58
|
+
- `--verifier-model MODEL` (default: opus)
|
|
59
|
+
- `--debug` — enable debug logging (tmux mode only, writes to logs/<slug>/debug.log)
|
|
60
|
+
|
|
61
|
+
### Mode Selection
|
|
62
|
+
|
|
63
|
+
Parse the `--mode` flag. If absent or `agent`, use the Agent() path below. If `tmux`, use the Tmux path.
|
|
64
|
+
|
|
65
|
+
#### Tmux Mode (`--mode tmux`)
|
|
66
|
+
|
|
67
|
+
When `--mode tmux` is specified:
|
|
68
|
+
|
|
69
|
+
1. **Validate scaffold** — same as Agent() mode: check `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
|
|
70
|
+
2. **Check sentinels** — same as Agent() mode.
|
|
71
|
+
3. **Check prerequisites** — verify `tmux` and `jq` are installed. If not, report what is missing and stop.
|
|
72
|
+
4. **Locate runner script** — find `run_ralph_desk.zsh` at `~/.claude/ralph-desk/run_ralph_desk.zsh`. If not found, tell the user to reinstall (`npm install` or `install.sh`).
|
|
73
|
+
5. **Launch** — shell out to the runner script with env vars derived from flags:
|
|
74
|
+
```bash
|
|
75
|
+
LOOP_NAME="<slug>" \
|
|
76
|
+
ROOT="$PWD" \
|
|
77
|
+
MAX_ITER=<--max-iter value> \
|
|
78
|
+
WORKER_MODEL=<--worker-model value> \
|
|
79
|
+
VERIFIER_MODEL=<--verifier-model value> \
|
|
80
|
+
DEBUG=<1 if --debug, else 0> \
|
|
81
|
+
zsh ~/.claude/ralph-desk/run_ralph_desk.zsh
|
|
82
|
+
```
|
|
83
|
+
6. **If the script exits with error (exit code 1)** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch the script in a different way. Just tell the user what went wrong and suggest using Agent mode instead.
|
|
84
|
+
7. **If successful** — tell the user the tmux session has been started. The shell script takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
|
|
85
|
+
|
|
86
|
+
**IMPORTANT:** Tmux mode requires the user to already be inside a tmux session. If the runner script rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
|
|
87
|
+
|
|
88
|
+
#### Agent Mode (`--mode agent` or default)
|
|
50
89
|
|
|
51
90
|
### Preparation
|
|
52
91
|
1. Validate scaffold: `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
|
|
@@ -55,6 +94,8 @@ Options (parse from `$ARGUMENTS`):
|
|
|
55
94
|
|
|
56
95
|
### Leader Loop
|
|
57
96
|
|
|
97
|
+
**CRITICAL: DO NOT STOP between iterations.** You MUST continue the loop automatically until a sentinel is written (COMPLETE or BLOCKED) or max_iter is reached. Do NOT pause to ask the user. Do NOT wait for confirmation. The loop is fully autonomous — just report each iteration result briefly and immediately proceed to the next iteration.
|
|
98
|
+
|
|
58
99
|
For each iteration (1 to max_iter):
|
|
59
100
|
|
|
60
101
|
**① Check sentinels**
|
|
@@ -63,7 +104,15 @@ test -f .claude/ralph-desk/memos/<slug>-complete.md # → done
|
|
|
63
104
|
test -f .claude/ralph-desk/memos/<slug>-blocked.md # → stop
|
|
64
105
|
```
|
|
65
106
|
|
|
107
|
+
**①½ Prep-stage cleanup**
|
|
108
|
+
```bash
|
|
109
|
+
rm -f .claude/ralph-desk/memos/<slug>-done-claim.json
|
|
110
|
+
rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
|
|
111
|
+
```
|
|
112
|
+
|
|
66
113
|
**② Read memory.md** → Stop Status, Next Iteration Contract
|
|
114
|
+
- Also read **Completed Stories** → verified work so far
|
|
115
|
+
- Also read **Key Decisions** → settled architectural choices
|
|
67
116
|
|
|
68
117
|
**③ Decide model** (§4 of governance.md)
|
|
69
118
|
- Previous iteration failed → upgrade model
|
|
@@ -106,19 +155,31 @@ Agent(
|
|
|
106
155
|
```
|
|
107
156
|
- Read `verify-verdict.json`:
|
|
108
157
|
- `pass` + `complete` → write COMPLETE sentinel, report done!
|
|
109
|
-
- `fail` + `continue` →
|
|
158
|
+
- `fail` + `continue` → **run Fix Loop** (governance.md §7½):
|
|
159
|
+
1. Read `issues` array, sort by severity (`critical` → `major` → `minor`)
|
|
160
|
+
2. Build structured fix contract with traceability rule
|
|
161
|
+
3. Include `fix_hint` values labeled `(suggestion, non-authoritative)` if present
|
|
162
|
+
4. Increment `consecutive_failures` in `status.json`
|
|
163
|
+
5. Go to ⑧ with fix contract as next Worker contract
|
|
164
|
+
- `request_info` → Leader reads Verifier's questions, decides outcome (or relays to Worker in next contract) → go to ⑧
|
|
110
165
|
- `blocked` → write BLOCKED sentinel, stop
|
|
111
166
|
|
|
112
|
-
**⑧
|
|
167
|
+
**⑧ Write result log and report to user, continue loop**
|
|
168
|
+
- Write `logs/<slug>/iter-NNN.result.md`:
|
|
169
|
+
- Result status `[leader-measured]`
|
|
170
|
+
- Files changed via `git diff --stat HEAD~1 HEAD` `[git-measured]`
|
|
171
|
+
- Verifier verdict `[leader-measured]`
|
|
113
172
|
- Write `status.json`
|
|
114
173
|
- Report: iteration N, phase, model used, result
|
|
115
|
-
- Clean `done-claim.json`, `verify-verdict.json` for next iteration
|
|
116
174
|
|
|
117
175
|
### Circuit Breaker
|
|
118
176
|
- context-latest.md unchanged 3 iterations → BLOCKED
|
|
119
|
-
- Same
|
|
177
|
+
- Same acceptance criterion fails 2 consecutive iterations → upgrade model, retry once, then BLOCKED
|
|
178
|
+
- 3 consecutive **fail** verdicts on 3 unique criterion IDs → upgrade to opus, retry once, then BLOCKED
|
|
120
179
|
- max_iter reached → TIMEOUT, report to user
|
|
121
180
|
|
|
181
|
+
Track `consecutive_failures` in `status.json` (increment on `fail`, reset on `pass`, unchanged by `request_info`). Only **fail** verdicts count for CB chains — `request_info` does not break or contribute.
|
|
182
|
+
|
|
122
183
|
### Important Rules
|
|
123
184
|
- Each Agent() = new process = fresh context
|
|
124
185
|
- YOU track iteration count
|
|
@@ -134,27 +195,38 @@ Read `.claude/ralph-desk/logs/<slug>/status.json` and display.
|
|
|
134
195
|
- No N: show latest `iter-*.worker-prompt.md` summary
|
|
135
196
|
- With N: read `iter-N.worker-prompt.md` and `iter-N.verifier-prompt.md`
|
|
136
197
|
|
|
137
|
-
## `clean <slug
|
|
198
|
+
## `clean <slug> [--kill-session]`
|
|
138
199
|
Remove:
|
|
139
200
|
- `.claude/ralph-desk/memos/<slug>-complete.md`
|
|
140
201
|
- `.claude/ralph-desk/memos/<slug>-blocked.md`
|
|
141
202
|
- `.claude/ralph-desk/memos/<slug>-done-claim.json`
|
|
142
203
|
- `.claude/ralph-desk/memos/<slug>-verify-verdict.json`
|
|
204
|
+
- `.claude/ralph-desk/memos/<slug>-iter-signal.json`
|
|
143
205
|
- `.claude/ralph-desk/logs/<slug>/circuit-breaker.json`
|
|
206
|
+
- `.claude/ralph-desk/logs/<slug>/session-config.json`
|
|
207
|
+
- `.claude/ralph-desk/logs/<slug>/worker-heartbeat.json`
|
|
208
|
+
- `.claude/ralph-desk/logs/<slug>/verifier-heartbeat.json`
|
|
209
|
+
|
|
210
|
+
If `--kill-session` is passed, also kill any tmux session matching `rlp-desk-<slug>-*`:
|
|
211
|
+
```bash
|
|
212
|
+
tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-<slug>-" | while read s; do tmux kill-session -t "$s"; done
|
|
213
|
+
```
|
|
144
214
|
|
|
145
215
|
## No args or `help`
|
|
146
216
|
```
|
|
147
|
-
/rlp-desk brainstorm <description>
|
|
148
|
-
/rlp-desk init <slug> [objective]
|
|
149
|
-
/rlp-desk run <slug> [--
|
|
150
|
-
/rlp-desk status <slug>
|
|
151
|
-
/rlp-desk logs <slug> [N]
|
|
152
|
-
/rlp-desk clean <slug>
|
|
217
|
+
/rlp-desk brainstorm <description> Plan before init (interactive)
|
|
218
|
+
/rlp-desk init <slug> [objective] Create project scaffold
|
|
219
|
+
/rlp-desk run <slug> [--mode agent|tmux] Run loop (agent=LLM leader, tmux=shell leader)
|
|
220
|
+
/rlp-desk status <slug> Show loop status
|
|
221
|
+
/rlp-desk logs <slug> [N] Show iteration log
|
|
222
|
+
/rlp-desk clean <slug> [--kill-session] Reset for re-run (--kill-session kills tmux)
|
|
153
223
|
```
|
|
154
224
|
|
|
155
225
|
## Architecture
|
|
226
|
+
|
|
227
|
+
### Agent Mode (default: `--mode agent`)
|
|
156
228
|
```
|
|
157
|
-
[This session = LEADER]
|
|
229
|
+
[This session = LEADER (LLM)]
|
|
158
230
|
│
|
|
159
231
|
Agent()├──▶ [Worker: executor (fresh context)]
|
|
160
232
|
│ └── reads desk files, implements, updates memory
|
|
@@ -162,3 +234,22 @@ Remove:
|
|
|
162
234
|
Agent()└──▶ [Verifier: executor (fresh context)]
|
|
163
235
|
└── reads done-claim, runs checks, writes verdict
|
|
164
236
|
```
|
|
237
|
+
|
|
238
|
+
### Tmux Mode (`--mode tmux`)
|
|
239
|
+
```
|
|
240
|
+
[tmux session: rlp-desk-<slug>-<timestamp>]
|
|
241
|
+
+-------------------------------------+
|
|
242
|
+
| Leader pane (shell loop) |
|
|
243
|
+
| - writes prompts to files |
|
|
244
|
+
| - sends short triggers via send-keys|
|
|
245
|
+
| - polls iter-signal.json |
|
|
246
|
+
| - monitors heartbeat files |
|
|
247
|
+
| - writes sentinels |
|
|
248
|
+
+------------------+------------------+
|
|
249
|
+
| Worker pane | Verifier pane |
|
|
250
|
+
| bash trigger.sh | bash trigger.sh |
|
|
251
|
+
| -> claude -p ... | -> claude -p ... |
|
|
252
|
+
| heartbeat writer | heartbeat writer |
|
|
253
|
+
| (fresh context) | (fresh context) |
|
|
254
|
+
+------------------+------------------+
|
|
255
|
+
```
|
package/src/governance.md
CHANGED
|
@@ -29,9 +29,13 @@ The Leader orchestrates, while Worker/Verifier run in isolated fresh contexts ev
|
|
|
29
29
|
|
|
30
30
|
### Verifier (fresh context)
|
|
31
31
|
- Independently verifies Worker's done claim
|
|
32
|
+
- Identifies scope via `git diff --name-only` — reads changed files and related imports only
|
|
32
33
|
- Runs commands directly to collect fresh evidence
|
|
33
|
-
-
|
|
34
|
-
-
|
|
34
|
+
- Campaign Memory is for orientation only — not the source of truth
|
|
35
|
+
- Writes verdict (`pass` | `fail` | `request_info`) — if uncertain, use `request_info` with specific questions; Leader decides
|
|
36
|
+
- Delegates deterministic checks (type hints, linting, security) to tools defined in test-spec
|
|
37
|
+
- Focuses on AC verification, semantic review, and smoke tests
|
|
38
|
+
- **Must NEVER modify code or write sentinel files**
|
|
35
39
|
|
|
36
40
|
## 3. State Flow
|
|
37
41
|
|
|
@@ -46,15 +50,15 @@ RUNNING → DONE_CLAIMED → VERIFYING → COMPLETE | CONTINUE | BLOCKED
|
|
|
46
50
|
| Worker (simple) | haiku | Single file, clear change |
|
|
47
51
|
| Worker (standard) | sonnet | Most tasks (default) |
|
|
48
52
|
| Worker (complex) | opus | Architecture changes, multi-file, prior iteration failure |
|
|
49
|
-
| Verifier |
|
|
50
|
-
| Verifier (
|
|
53
|
+
| Verifier | opus | Independent verification requires thoroughness |
|
|
54
|
+
| Verifier (lightweight) | sonnet | Simple, well-defined checks only |
|
|
51
55
|
|
|
52
56
|
The Leader decides each iteration. Decision criteria:
|
|
53
57
|
- Previous iteration failed → upgrade model
|
|
54
58
|
- Simple repetitive task → downgrade model
|
|
55
59
|
- User explicitly specified → use as given
|
|
56
60
|
|
|
57
|
-
##
|
|
61
|
+
## 5a. Execution: Agent() Approach (default) — "Smart Mode"
|
|
58
62
|
|
|
59
63
|
All environments (Claude Code, OpenCode) use the same Agent tool.
|
|
60
64
|
|
|
@@ -83,6 +87,46 @@ Characteristics:
|
|
|
83
87
|
- No tmux required.
|
|
84
88
|
- Monitor in real-time via ctrl+o (Claude Code UI).
|
|
85
89
|
- Prompts are still logged to logs/ for audit trail.
|
|
90
|
+
- Leader is an LLM — can dynamically route models, reason about context, and adapt.
|
|
91
|
+
|
|
92
|
+
## 5b. Execution: Tmux Runner (alternative) — "Lean Mode"
|
|
93
|
+
|
|
94
|
+
For long campaigns, observability, headless/CI execution, or when zero-token orchestration is preferred.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# Launched via slash command:
|
|
98
|
+
/rlp-desk run <slug> --mode tmux
|
|
99
|
+
|
|
100
|
+
# Or directly:
|
|
101
|
+
LOOP_NAME=<slug> ROOT=$(pwd) ~/.claude/ralph-desk/run_ralph_desk.zsh
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The tmux runner (`run_ralph_desk.zsh`) creates a tmux session with three panes:
|
|
105
|
+
- **Leader pane** — deterministic shell loop (no LLM)
|
|
106
|
+
- **Worker pane** — receives `claude -p` invocations via trigger scripts
|
|
107
|
+
- **Verifier pane** — receives `claude -p` invocations via trigger scripts
|
|
108
|
+
|
|
109
|
+
All `claude` CLI calls use `--dangerously-skip-permissions`:
|
|
110
|
+
```bash
|
|
111
|
+
claude -p "$(cat /path/to/prompt.md)" \
|
|
112
|
+
--model sonnet \
|
|
113
|
+
--dangerously-skip-permissions
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
**Security implication:** `--dangerously-skip-permissions` allows the CLI to execute code without user confirmation. The tmux runner requires this because there is no interactive user to approve each action. Only run tmux mode in trusted environments with trusted prompts.
|
|
117
|
+
|
|
118
|
+
Characteristics:
|
|
119
|
+
- Leader is a shell script, not an LLM — zero tokens consumed for orchestration.
|
|
120
|
+
- Leader reads ONLY `iter-signal.json` and `verify-verdict.json` for control flow (structured JSON via `jq`). No markdown parsing.
|
|
121
|
+
- Model routing is static via environment variables (`WORKER_MODEL`, `VERIFIER_MODEL`). This is an explicit trade-off vs Agent() mode's dynamic routing.
|
|
122
|
+
- **Write-then-notify:** All prompts and payloads are written to files first. Only short trigger commands (`bash /path/to/trigger.sh`) are sent via `tmux send-keys`.
|
|
123
|
+
- **Pane IDs (`%N` format):** Captured at pane creation, stored in `session-config.json`. Never uses positional indices.
|
|
124
|
+
- **Copy-mode guard:** Checks `#{pane_in_mode}` before every `send-keys` to avoid sending into scrollback.
|
|
125
|
+
- **Heartbeat monitoring:** Trigger scripts write heartbeat files; Leader checks freshness.
|
|
126
|
+
- **Atomic file writes:** All file writes use `{path}.tmp.{pid}` + `mv` for crash safety.
|
|
127
|
+
- Can run detached (`tmux detach`) for overnight/CI campaigns.
|
|
128
|
+
- User can watch Worker/Verifier execution in real-time via tmux panes.
|
|
129
|
+
- Traceability: governance section 7 step numbers appear as comments throughout the shell script.
|
|
86
130
|
|
|
87
131
|
## 6. File Structure
|
|
88
132
|
|
|
@@ -105,6 +149,7 @@ Characteristics:
|
|
|
105
149
|
├── memos/
|
|
106
150
|
│ ├── <slug>-memory.md # Campaign memory (Worker updates)
|
|
107
151
|
│ ├── <slug>-done-claim.json # Worker's completion claim (runtime)
|
|
152
|
+
│ ├── <slug>-iter-signal.json # Worker's iteration signal (runtime)
|
|
108
153
|
│ ├── <slug>-verify-verdict.json # Verifier's verdict (runtime)
|
|
109
154
|
│ ├── <slug>-complete.md # SENTINEL (Leader only)
|
|
110
155
|
│ └── <slug>-blocked.md # SENTINEL (Leader only)
|
|
@@ -114,6 +159,7 @@ Characteristics:
|
|
|
114
159
|
└── logs/<slug>/
|
|
115
160
|
├── iter-NNN.worker-prompt.md # Audit trail prompt copy
|
|
116
161
|
├── iter-NNN.verifier-prompt.md # Audit trail prompt copy
|
|
162
|
+
├── iter-NNN.result.md # Iteration result (leader-measured + git-measured)
|
|
117
163
|
└── status.json # Leader's loop state
|
|
118
164
|
```
|
|
119
165
|
|
|
@@ -126,7 +172,13 @@ for iteration in 1..max_iter:
|
|
|
126
172
|
- complete.md exists → stop
|
|
127
173
|
- blocked.md exists → stop
|
|
128
174
|
|
|
175
|
+
①½ Prep-stage cleanup
|
|
176
|
+
- Delete done-claim.json if exists
|
|
177
|
+
- Delete verify-verdict.json if exists
|
|
178
|
+
|
|
129
179
|
② Read memory.md → check Stop Status, Next Iteration Contract
|
|
180
|
+
- Also parse Completed Stories (verified work so far)
|
|
181
|
+
- Also parse Key Decisions (settled architectural choices)
|
|
130
182
|
|
|
131
183
|
③ Select model
|
|
132
184
|
- Default or situational decision (see §4)
|
|
@@ -143,6 +195,9 @@ for iteration in 1..max_iter:
|
|
|
143
195
|
- "continue" → go to ⑧
|
|
144
196
|
- "verify" → go to ⑦
|
|
145
197
|
- "blocked" → write BLOCKED sentinel, stop
|
|
198
|
+
Note: In tmux mode, the Leader polls `<slug>-iter-signal.json` instead of
|
|
199
|
+
parsing memory.md. In Agent() mode, the Leader MAY read iter-signal.json
|
|
200
|
+
as a structured alternative to parsing the Stop Status from memory.md.
|
|
146
201
|
|
|
147
202
|
⑦ Execute Verifier
|
|
148
203
|
- Build prompt → log to logs/<slug>/iter-NNN.verifier-prompt.md
|
|
@@ -152,7 +207,31 @@ for iteration in 1..max_iter:
|
|
|
152
207
|
• fail + continue → go to ⑧
|
|
153
208
|
• blocked → write BLOCKED sentinel, stop
|
|
154
209
|
|
|
155
|
-
⑧
|
|
210
|
+
⑧ Write iter-NNN.result.md to logs/<slug>/ (result status + git diff --stat)
|
|
211
|
+
Update status.json, report to user, continue to next iteration
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## 7½. Fix Loop Protocol
|
|
215
|
+
|
|
216
|
+
When the Verifier returns `fail`, the Leader runs the Fix Loop before issuing the next Worker contract:
|
|
217
|
+
|
|
218
|
+
1. **Read issues** from `verify-verdict.json` — sort by severity (`critical` → `major` → `minor`)
|
|
219
|
+
2. **Build fix contract** — include each issue as a numbered task with criterion reference
|
|
220
|
+
- `fix_hint` (if present) is passed as `(suggestion, non-authoritative)` — Worker may ignore
|
|
221
|
+
3. **Traceability rule**: "Only changes that resolve a listed issue are allowed — every change must be justified by the issue it addresses"
|
|
222
|
+
4. **Update status.json** — increment `consecutive_failures`; reset to 0 on any `pass`
|
|
223
|
+
|
|
224
|
+
The `consecutive_failures` counter is maintained by the Leader in `status.json`.
|
|
225
|
+
|
|
226
|
+
**Fix contract format:**
|
|
227
|
+
```
|
|
228
|
+
Fix issues from Verifier verdict (iter-NNN):
|
|
229
|
+
|
|
230
|
+
1. [critical] US-002 AC3: <description> — fix_hint: (suggestion, non-authoritative) <hint>
|
|
231
|
+
2. [major] US-001 AC1: <description>
|
|
232
|
+
|
|
233
|
+
Traceability: only changes that resolve a listed issue are allowed.
|
|
234
|
+
Every change must be justified by the issue it addresses.
|
|
156
235
|
```
|
|
157
236
|
|
|
158
237
|
## 8. Circuit Breaker
|
|
@@ -160,9 +239,15 @@ for iteration in 1..max_iter:
|
|
|
160
239
|
| Condition | Verdict |
|
|
161
240
|
|-----------|---------|
|
|
162
241
|
| context-latest.md unchanged for 3 consecutive iterations | BLOCKED |
|
|
163
|
-
|
|
|
242
|
+
| Same acceptance criterion fails 2 consecutive iterations | Upgrade model, retry once; if still failing → BLOCKED |
|
|
243
|
+
| 3 consecutive **fail** verdicts on 3 unique criterion IDs | Upgrade to opus, retry once; if still failing → BLOCKED |
|
|
164
244
|
| max_iter reached | TIMEOUT (report to user) |
|
|
165
245
|
|
|
246
|
+
The Leader tracks `consecutive_failures` in `status.json`:
|
|
247
|
+
- Increments on `fail`, resets on `pass`, **unchanged by `request_info`**.
|
|
248
|
+
- "Same error" = same acceptance criterion ID in two consecutive **fail** verdicts (`request_info` does not break or contribute to this chain).
|
|
249
|
+
- "Diverse failures" = 3 most recent `fail` verdicts each have a unique criterion ID.
|
|
250
|
+
|
|
166
251
|
## 9. Change Policy
|
|
167
252
|
|
|
168
253
|
- Changes to the shared workflow → modify this document
|