@ai-dev-methodologies/rlp-desk 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,25 @@
1
+ {
2
+ "session_name": "rlp-desk-loop-test-20260318-232859",
3
+ "slug": "loop-test",
4
+ "created_at": "2026-03-18T14:28:59Z",
5
+ "panes": {
6
+ "leader": "%99",
7
+ "worker": "%100",
8
+ "verifier": "%101"
9
+ },
10
+ "pid": 65962,
11
+ "root": "/Users/kyjin/dev/own/ai-dev-methodologies/rlp-desk/examples/calculator",
12
+ "models": {
13
+ "worker": "sonnet",
14
+ "verifier": "opus"
15
+ },
16
+ "config": {
17
+ "max_iter": 20,
18
+ "poll_interval": 5,
19
+ "iter_timeout": 600,
20
+ "heartbeat_stale_threshold": 120,
21
+ "max_restarts": 3,
22
+ "idle_nudge_threshold": 30,
23
+ "max_nudges": 3
24
+ }
25
+ }
@@ -0,0 +1,10 @@
1
+ {
2
+ "slug": "loop-test",
3
+ "iteration": 1,
4
+ "max_iter": 20,
5
+ "phase": "worker",
6
+ "worker_model": "sonnet",
7
+ "verifier_model": "opus",
8
+ "last_result": "running",
9
+ "updated_at_utc": "2026-03-18T14:28:59Z"
10
+ }
@@ -0,0 +1 @@
1
+ {"ts":"2026-03-18T14:29:15Z","pid":66349}
@@ -0,0 +1,17 @@
1
+ # loop-test - Campaign Memory
2
+
3
+ ## Stop Status
4
+ continue
5
+
6
+ ## Objective
7
+ Implement a Python calculator module: calc.py (4 functions + type hints + ValueError) + test_calc.py (pytest, 8+ tests, all passed)
8
+
9
+ ## Current State
10
+ Iteration 0 - not started
11
+
12
+ ## Next Iteration Contract
13
+ Start from the beginning: read PRD and implement US-001 (calc.py with 4 functions).
14
+
15
+ ## Patterns Discovered
16
+ ## Learnings
17
+ ## Evidence Chain
@@ -18,7 +18,7 @@ Iteration rules:
18
18
 
19
19
  MANDATORY: When done, write the following signal file:
20
20
  - Path: .claude/ralph-desk/memos/loop-test-iter-signal.json
21
- - Format: {"iteration": N, "status": "continue|verify|blocked", "timestamp": "ISO"}
21
+ - Format: {"iteration": N, "status": "continue|verify|blocked", "summary": "what was done", "timestamp": "ISO"}
22
22
  - Status values:
23
23
  - "continue" = current story done but other stories remain
24
24
  - "verify" = all stories complete + done-claim written
package/install.sh CHANGED
@@ -35,19 +35,33 @@ echo " Downloading init script..."
35
35
  curl -sSL "$REPO_URL/src/scripts/init_ralph_desk.zsh" -o "$DESK_DIR/init_ralph_desk.zsh"
36
36
  chmod +x "$DESK_DIR/init_ralph_desk.zsh"
37
37
 
38
+ # Download tmux runner script
39
+ echo " Downloading tmux runner script..."
40
+ curl -sSL "$REPO_URL/src/scripts/run_ralph_desk.zsh" -o "$DESK_DIR/run_ralph_desk.zsh"
41
+ chmod +x "$DESK_DIR/run_ralph_desk.zsh"
42
+
38
43
  # Download governance protocol
39
44
  echo " Downloading governance protocol..."
40
45
  curl -sSL "$REPO_URL/src/governance.md" -o "$DESK_DIR/governance.md"
41
46
 
47
+ # Check tmux availability
48
+ if ! command -v tmux &>/dev/null; then
49
+ echo ""
50
+ echo " [warn] tmux not found. Tmux execution mode (--mode tmux) will not be available."
51
+ echo " Install tmux to use lean mode: https://github.com/tmux/tmux/wiki/Installing"
52
+ fi
53
+
42
54
  echo ""
43
55
  echo " Done! Installed to:"
44
56
  echo ""
45
57
  echo " Slash command: $COMMANDS_DIR/rlp-desk.md"
46
58
  echo " Init script: $DESK_DIR/init_ralph_desk.zsh"
59
+ echo " Tmux runner: $DESK_DIR/run_ralph_desk.zsh"
47
60
  echo " Governance: $DESK_DIR/governance.md"
48
61
  echo ""
49
62
  echo " Usage:"
50
63
  echo " 1. Open Claude Code in your project directory"
51
64
  echo " 2. Run: /rlp-desk brainstorm \"your task description\""
52
65
  echo " 3. Run: /rlp-desk run <slug>"
66
+ echo " 4. Run: /rlp-desk run <slug> --mode tmux (lean mode)"
53
67
  echo ""
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ai-dev-methodologies/rlp-desk",
3
- "version": "0.0.1",
3
+ "version": "0.1.0",
4
4
  "description": "Fresh-context iterative loops for Claude Code — autonomous task completion with independent verification",
5
5
  "scripts": {
6
6
  "postinstall": "node scripts/postinstall.js",
@@ -4,15 +4,17 @@
4
4
  const fs = require("fs");
5
5
  const path = require("path");
6
6
  const os = require("os");
7
+ const { execSync } = require("child_process");
7
8
 
8
9
  const home = os.homedir();
9
10
  const claudeDir = path.join(home, ".claude");
10
11
  const commandsDir = path.join(claudeDir, "commands");
11
12
  const deskDir = path.join(claudeDir, "ralph-desk");
12
13
  const pkgDir = path.join(__dirname, "..");
14
+ const pkg = require(path.join(pkgDir, "package.json"));
13
15
 
14
16
  console.log("");
15
- console.log(" RLP Desk v0.0.1");
17
+ console.log(" RLP Desk v" + pkg.version);
16
18
  console.log(" ================");
17
19
  console.log("");
18
20
 
@@ -27,6 +29,10 @@ const copies = [
27
29
  "src/scripts/init_ralph_desk.zsh",
28
30
  path.join(deskDir, "init_ralph_desk.zsh"),
29
31
  ],
32
+ [
33
+ "src/scripts/run_ralph_desk.zsh",
34
+ path.join(deskDir, "run_ralph_desk.zsh"),
35
+ ],
30
36
  ["src/governance.md", path.join(deskDir, "governance.md")],
31
37
  ];
32
38
 
@@ -38,10 +44,20 @@ for (const [src, dest] of copies) {
38
44
  // Make scripts executable
39
45
  try {
40
46
  fs.chmodSync(path.join(deskDir, "init_ralph_desk.zsh"), 0o755);
47
+ fs.chmodSync(path.join(deskDir, "run_ralph_desk.zsh"), 0o755);
41
48
  } catch (_) {
42
49
  // chmod may fail on Windows — not critical
43
50
  }
44
51
 
52
+ // Check tmux availability
53
+ try {
54
+ execSync("which tmux", { stdio: "ignore" });
55
+ } catch (_) {
56
+ console.log(" [warn] tmux not found. Tmux execution mode (--mode tmux) will not be available.");
57
+ console.log(" Install tmux to use lean mode: https://github.com/tmux/tmux/wiki/Installing");
58
+ console.log("");
59
+ }
60
+
45
61
  console.log("");
46
62
  console.log(" Done! Open Claude Code and run:");
47
63
  console.log(" /rlp-desk brainstorm \"your task description\"");
@@ -17,6 +17,7 @@ console.log("");
17
17
  const files = [
18
18
  path.join(commandsDir, "rlp-desk.md"),
19
19
  path.join(deskDir, "init_ralph_desk.zsh"),
20
+ path.join(deskDir, "run_ralph_desk.zsh"),
20
21
  path.join(deskDir, "governance.md"),
21
22
  ];
22
23
 
@@ -15,20 +15,28 @@ Parse the first word of `$ARGUMENTS` as the subcommand.
15
15
 
16
16
  ## `brainstorm <description>`
17
17
 
18
- Planning phase BEFORE init. Interactively define the contract with the user.
18
+ Planning phase BEFORE init. Interactively define the contract **with the user**.
19
19
 
20
- Determine all of the following:
21
- 1. **Slug** short identifier (e.g., `auth-refactor`)
20
+ You MUST ask the user about each item below. Do NOT decide for them.
21
+ Present your suggestion, then wait for the user's confirmation or change.
22
+
23
+ Ask about these items one by one (or in small groups):
24
+ 1. **Slug** — short identifier (e.g., `auth-refactor`). Suggest one, ask if OK.
22
25
  2. **Objective** — what the loop achieves
23
- 3. **User Stories** — discrete units with testable acceptance criteria
24
- 4. **Iteration Unit** — one worker does per iteration (default: one user story)
26
+ 3. **User Stories** — discrete units with testable acceptance criteria. Propose a breakdown, ask the user to confirm/modify.
27
+ 4. **Iteration Unit** — what one worker does per iteration. Explicitly ask:
28
+ - "One US per iteration (bounded, incremental verification)?"
29
+ - "All stories at once (faster, single verification)?"
30
+ - Default recommendation: one US per iteration for 3+ stories.
25
31
  5. **Verification Commands** — build, test, lint commands
26
32
  6. **Completion / Blocked Criteria**
27
- 7. **Worker / Verifier Model** — haiku, sonnet, opus
28
- 8. **Max Iterations**
33
+ 7. **Worker / Verifier Model** — haiku, sonnet, opus. Suggest defaults (worker: sonnet, verifier: opus), ask if OK.
34
+ 8. **Max Iterations** — suggest based on story count, ask if OK.
29
35
 
30
- Present the contract summary. On approval, offer to run `init`.
36
+ After all items are confirmed, present the full contract summary.
37
+ On approval, offer to run `init`.
31
38
  Do NOT create files during brainstorm.
39
+ Do NOT auto-decide iteration unit — the user MUST explicitly choose.
32
40
 
33
41
  ---
34
42
 
@@ -44,9 +52,40 @@ If brainstorm was done, auto-fill PRD and test-spec with the results.
44
52
  **YOU are the leader. Do NOT delegate leadership.**
45
53
 
46
54
  Options (parse from `$ARGUMENTS`):
55
+ - `--mode agent|tmux` (default: `agent`) — execution mode
47
56
  - `--max-iter N` (default: 100)
48
57
  - `--worker-model MODEL` (default: sonnet)
49
- - `--verifier-model MODEL` (default: sonnet)
58
+ - `--verifier-model MODEL` (default: opus)
59
+ - `--debug` — enable debug logging (tmux mode only, writes to logs/<slug>/debug.log)
60
+
61
+ ### Mode Selection
62
+
63
+ Parse the `--mode` flag. If absent or `agent`, use the Agent() path below. If `tmux`, use the Tmux path.
64
+
65
+ #### Tmux Mode (`--mode tmux`)
66
+
67
+ When `--mode tmux` is specified:
68
+
69
+ 1. **Validate scaffold** — same as Agent() mode: check `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
70
+ 2. **Check sentinels** — same as Agent() mode.
71
+ 3. **Check prerequisites** — verify `tmux` and `jq` are installed. If not, report what is missing and stop.
72
+ 4. **Locate runner script** — find `run_ralph_desk.zsh` at `~/.claude/ralph-desk/run_ralph_desk.zsh`. If not found, tell the user to reinstall (`npm install` or `install.sh`).
73
+ 5. **Launch** — shell out to the runner script with env vars derived from flags:
74
+ ```bash
75
+ LOOP_NAME="<slug>" \
76
+ ROOT="$PWD" \
77
+ MAX_ITER=<--max-iter value> \
78
+ WORKER_MODEL=<--worker-model value> \
79
+ VERIFIER_MODEL=<--verifier-model value> \
80
+ DEBUG=<1 if --debug, else 0> \
81
+ zsh ~/.claude/ralph-desk/run_ralph_desk.zsh
82
+ ```
83
+ 6. **If the script exits with error (exit code 1)** — report the error to the user and STOP. Do NOT attempt to work around it. Do NOT create tmux sessions yourself. Do NOT re-launch the script in a different way. Just tell the user what went wrong and suggest using Agent mode instead.
84
+ 7. **If successful** — tell the user the tmux session has been started. The shell script takes over as the deterministic Leader. No Agent() calls are made in tmux mode.
85
+
86
+ **IMPORTANT:** Tmux mode requires the user to already be inside a tmux session. If the runner script rejects because $TMUX is not set, do NOT try to create a tmux session yourself. Tell the user: "Start tmux first, then retry."
87
+
88
+ #### Agent Mode (`--mode agent` or default)
50
89
 
51
90
  ### Preparation
52
91
  1. Validate scaffold: `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
@@ -55,6 +94,8 @@ Options (parse from `$ARGUMENTS`):
55
94
 
56
95
  ### Leader Loop
57
96
 
97
+ **CRITICAL: DO NOT STOP between iterations.** You MUST continue the loop automatically until a sentinel is written (COMPLETE or BLOCKED) or max_iter is reached. Do NOT pause to ask the user. Do NOT wait for confirmation. The loop is fully autonomous — just report each iteration result briefly and immediately proceed to the next iteration.
98
+
58
99
  For each iteration (1 to max_iter):
59
100
 
60
101
  **① Check sentinels**
@@ -63,7 +104,15 @@ test -f .claude/ralph-desk/memos/<slug>-complete.md # → done
63
104
  test -f .claude/ralph-desk/memos/<slug>-blocked.md # → stop
64
105
  ```
65
106
 
107
+ **①½ Prep-stage cleanup**
108
+ ```bash
109
+ rm -f .claude/ralph-desk/memos/<slug>-done-claim.json
110
+ rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
111
+ ```
112
+
66
113
  **② Read memory.md** → Stop Status, Next Iteration Contract
114
+ - Also read **Completed Stories** → verified work so far
115
+ - Also read **Key Decisions** → settled architectural choices
67
116
 
68
117
  **③ Decide model** (§4 of governance.md)
69
118
  - Previous iteration failed → upgrade model
@@ -106,19 +155,31 @@ Agent(
106
155
  ```
107
156
  - Read `verify-verdict.json`:
108
157
  - `pass` + `complete` → write COMPLETE sentinel, report done!
109
- - `fail` + `continue` → go to
158
+ - `fail` + `continue` → **run Fix Loop** (governance.md §7½):
159
+ 1. Read `issues` array, sort by severity (`critical` → `major` → `minor`)
160
+ 2. Build structured fix contract with traceability rule
161
+ 3. Include `fix_hint` values labeled `(suggestion, non-authoritative)` if present
162
+ 4. Increment `consecutive_failures` in `status.json`
163
+ 5. Go to ⑧ with fix contract as next Worker contract
164
+ - `request_info` → Leader reads Verifier's questions, decides outcome (or relays to Worker in next contract) → go to ⑧
110
165
  - `blocked` → write BLOCKED sentinel, stop
111
166
 
112
- **⑧ Report iteration result to user, continue loop**
167
+ **⑧ Write result log and report to user, continue loop**
168
+ - Write `logs/<slug>/iter-NNN.result.md`:
169
+ - Result status `[leader-measured]`
170
+ - Files changed via `git diff --stat HEAD~1 HEAD` `[git-measured]`
171
+ - Verifier verdict `[leader-measured]`
113
172
  - Write `status.json`
114
173
  - Report: iteration N, phase, model used, result
115
- - Clean `done-claim.json`, `verify-verdict.json` for next iteration
116
174
 
117
175
  ### Circuit Breaker
118
176
  - context-latest.md unchanged 3 iterations → BLOCKED
119
- - Same error 2x → upgrade model, retry once, then BLOCKED
177
+ - Same acceptance criterion fails 2 consecutive iterations → upgrade model, retry once, then BLOCKED
178
+ - 3 consecutive **fail** verdicts on 3 unique criterion IDs → upgrade to opus, retry once, then BLOCKED
120
179
  - max_iter reached → TIMEOUT, report to user
121
180
 
181
+ Track `consecutive_failures` in `status.json` (increment on `fail`, reset on `pass`, unchanged by `request_info`). Only **fail** verdicts count for CB chains — `request_info` does not break or contribute.
182
+
122
183
  ### Important Rules
123
184
  - Each Agent() = new process = fresh context
124
185
  - YOU track iteration count
@@ -134,27 +195,38 @@ Read `.claude/ralph-desk/logs/<slug>/status.json` and display.
134
195
  - No N: show latest `iter-*.worker-prompt.md` summary
135
196
  - With N: read `iter-N.worker-prompt.md` and `iter-N.verifier-prompt.md`
136
197
 
137
- ## `clean <slug>`
198
+ ## `clean <slug> [--kill-session]`
138
199
  Remove:
139
200
  - `.claude/ralph-desk/memos/<slug>-complete.md`
140
201
  - `.claude/ralph-desk/memos/<slug>-blocked.md`
141
202
  - `.claude/ralph-desk/memos/<slug>-done-claim.json`
142
203
  - `.claude/ralph-desk/memos/<slug>-verify-verdict.json`
204
+ - `.claude/ralph-desk/memos/<slug>-iter-signal.json`
143
205
  - `.claude/ralph-desk/logs/<slug>/circuit-breaker.json`
206
+ - `.claude/ralph-desk/logs/<slug>/session-config.json`
207
+ - `.claude/ralph-desk/logs/<slug>/worker-heartbeat.json`
208
+ - `.claude/ralph-desk/logs/<slug>/verifier-heartbeat.json`
209
+
210
+ If `--kill-session` is passed, also kill any tmux session matching `rlp-desk-<slug>-*`:
211
+ ```bash
212
+ tmux list-sessions -F '#{session_name}' 2>/dev/null | grep "^rlp-desk-<slug>-" | while read s; do tmux kill-session -t "$s"; done
213
+ ```
144
214
 
145
215
  ## No args or `help`
146
216
  ```
147
- /rlp-desk brainstorm <description> Plan before init (interactive)
148
- /rlp-desk init <slug> [objective] Create project scaffold
149
- /rlp-desk run <slug> [--opts] Run loop (this session = leader)
150
- /rlp-desk status <slug> Show loop status
151
- /rlp-desk logs <slug> [N] Show iteration log
152
- /rlp-desk clean <slug> Reset for re-run
217
+ /rlp-desk brainstorm <description> Plan before init (interactive)
218
+ /rlp-desk init <slug> [objective] Create project scaffold
219
+ /rlp-desk run <slug> [--mode agent|tmux] Run loop (agent=LLM leader, tmux=shell leader)
220
+ /rlp-desk status <slug> Show loop status
221
+ /rlp-desk logs <slug> [N] Show iteration log
222
+ /rlp-desk clean <slug> [--kill-session] Reset for re-run (--kill-session kills tmux)
153
223
  ```
154
224
 
155
225
  ## Architecture
226
+
227
+ ### Agent Mode (default: `--mode agent`)
156
228
  ```
157
- [This session = LEADER]
229
+ [This session = LEADER (LLM)]
158
230
 
159
231
  Agent()├──▶ [Worker: executor (fresh context)]
160
232
  │ └── reads desk files, implements, updates memory
@@ -162,3 +234,22 @@ Remove:
162
234
  Agent()└──▶ [Verifier: executor (fresh context)]
163
235
  └── reads done-claim, runs checks, writes verdict
164
236
  ```
237
+
238
+ ### Tmux Mode (`--mode tmux`)
239
+ ```
240
+ [tmux session: rlp-desk-<slug>-<timestamp>]
241
+ +-------------------------------------+
242
+ | Leader pane (shell loop) |
243
+ | - writes prompts to files |
244
+ | - sends short triggers via send-keys|
245
+ | - polls iter-signal.json |
246
+ | - monitors heartbeat files |
247
+ | - writes sentinels |
248
+ +------------------+------------------+
249
+ | Worker pane | Verifier pane |
250
+ | bash trigger.sh | bash trigger.sh |
251
+ | -> claude -p ... | -> claude -p ... |
252
+ | heartbeat writer | heartbeat writer |
253
+ | (fresh context) | (fresh context) |
254
+ +------------------+------------------+
255
+ ```
package/src/governance.md CHANGED
@@ -29,9 +29,13 @@ The Leader orchestrates, while Worker/Verifier run in isolated fresh contexts ev
29
29
 
30
30
  ### Verifier (fresh context)
31
31
  - Independently verifies Worker's done claim
32
+ - Identifies scope via `git diff --name-only` — reads changed files and related imports only
32
33
  - Runs commands directly to collect fresh evidence
33
- - Writes verdict (pass/fail/blocked)
34
- - **Must NEVER modify code**
34
+ - Campaign Memory is for orientation only — not the source of truth
35
+ - Writes verdict (`pass` | `fail` | `request_info`) — if uncertain, use `request_info` with specific questions; Leader decides
36
+ - Delegates deterministic checks (type hints, linting, security) to tools defined in test-spec
37
+ - Focuses on AC verification, semantic review, and smoke tests
38
+ - **Must NEVER modify code or write sentinel files**
35
39
 
36
40
  ## 3. State Flow
37
41
 
@@ -46,15 +50,15 @@ RUNNING → DONE_CLAIMED → VERIFYING → COMPLETE | CONTINUE | BLOCKED
46
50
  | Worker (simple) | haiku | Single file, clear change |
47
51
  | Worker (standard) | sonnet | Most tasks (default) |
48
52
  | Worker (complex) | opus | Architecture changes, multi-file, prior iteration failure |
49
- | Verifier | sonnet | Sufficient for most cases |
50
- | Verifier (strict) | opus | Security/critical logic verification |
53
+ | Verifier | opus | Independent verification requires thoroughness |
54
+ | Verifier (lightweight) | sonnet | Simple, well-defined checks only |
51
55
 
52
56
  The Leader decides each iteration. Decision criteria:
53
57
  - Previous iteration failed → upgrade model
54
58
  - Simple repetitive task → downgrade model
55
59
  - User explicitly specified → use as given
56
60
 
57
- ## 5. Execution: Unified Agent() Approach
61
+ ## 5a. Execution: Agent() Approach (default) — "Smart Mode"
58
62
 
59
63
  All environments (Claude Code, OpenCode) use the same Agent tool.
60
64
 
@@ -83,6 +87,46 @@ Characteristics:
83
87
  - No tmux required.
84
88
  - Monitor in real-time via ctrl+o (Claude Code UI).
85
89
  - Prompts are still logged to logs/ for audit trail.
90
+ - Leader is an LLM — can dynamically route models, reason about context, and adapt.
91
+
92
+ ## 5b. Execution: Tmux Runner (alternative) — "Lean Mode"
93
+
94
+ For long campaigns, observability, headless/CI execution, or when zero-token orchestration is preferred.
95
+
96
+ ```bash
97
+ # Launched via slash command:
98
+ /rlp-desk run <slug> --mode tmux
99
+
100
+ # Or directly:
101
+ LOOP_NAME=<slug> ROOT=$(pwd) ~/.claude/ralph-desk/run_ralph_desk.zsh
102
+ ```
103
+
104
+ The tmux runner (`run_ralph_desk.zsh`) creates a tmux session with three panes:
105
+ - **Leader pane** — deterministic shell loop (no LLM)
106
+ - **Worker pane** — receives `claude -p` invocations via trigger scripts
107
+ - **Verifier pane** — receives `claude -p` invocations via trigger scripts
108
+
109
+ All `claude` CLI calls use `--dangerously-skip-permissions`:
110
+ ```bash
111
+ claude -p "$(cat /path/to/prompt.md)" \
112
+ --model sonnet \
113
+ --dangerously-skip-permissions
114
+ ```
115
+
116
+ **Security implication:** `--dangerously-skip-permissions` allows the CLI to execute code without user confirmation. The tmux runner requires this because there is no interactive user to approve each action. Only run tmux mode in trusted environments with trusted prompts.
117
+
118
+ Characteristics:
119
+ - Leader is a shell script, not an LLM — zero tokens consumed for orchestration.
120
+ - Leader reads ONLY `iter-signal.json` and `verify-verdict.json` for control flow (structured JSON via `jq`). No markdown parsing.
121
+ - Model routing is static via environment variables (`WORKER_MODEL`, `VERIFIER_MODEL`). This is an explicit trade-off vs Agent() mode's dynamic routing.
122
+ - **Write-then-notify:** All prompts and payloads are written to files first. Only short trigger commands (`bash /path/to/trigger.sh`) are sent via `tmux send-keys`.
123
+ - **Pane IDs (`%N` format):** Captured at pane creation, stored in `session-config.json`. Never uses positional indices.
124
+ - **Copy-mode guard:** Checks `#{pane_in_mode}` before every `send-keys` to avoid sending into scrollback.
125
+ - **Heartbeat monitoring:** Trigger scripts write heartbeat files; Leader checks freshness.
126
+ - **Atomic file writes:** All file writes use `{path}.tmp.{pid}` + `mv` for crash safety.
127
+ - Can run detached (`tmux detach`) for overnight/CI campaigns.
128
+ - User can watch Worker/Verifier execution in real-time via tmux panes.
129
+ - Traceability: governance section 7 step numbers appear as comments throughout the shell script.
86
130
 
87
131
  ## 6. File Structure
88
132
 
@@ -105,6 +149,7 @@ Characteristics:
105
149
  ├── memos/
106
150
  │ ├── <slug>-memory.md # Campaign memory (Worker updates)
107
151
  │ ├── <slug>-done-claim.json # Worker's completion claim (runtime)
152
+ │ ├── <slug>-iter-signal.json # Worker's iteration signal (runtime)
108
153
  │ ├── <slug>-verify-verdict.json # Verifier's verdict (runtime)
109
154
  │ ├── <slug>-complete.md # SENTINEL (Leader only)
110
155
  │ └── <slug>-blocked.md # SENTINEL (Leader only)
@@ -114,6 +159,7 @@ Characteristics:
114
159
  └── logs/<slug>/
115
160
  ├── iter-NNN.worker-prompt.md # Audit trail prompt copy
116
161
  ├── iter-NNN.verifier-prompt.md # Audit trail prompt copy
162
+ ├── iter-NNN.result.md # Iteration result (leader-measured + git-measured)
117
163
  └── status.json # Leader's loop state
118
164
  ```
119
165
 
@@ -126,7 +172,13 @@ for iteration in 1..max_iter:
126
172
  - complete.md exists → stop
127
173
  - blocked.md exists → stop
128
174
 
175
+ ①½ Prep-stage cleanup
176
+ - Delete done-claim.json if exists
177
+ - Delete verify-verdict.json if exists
178
+
129
179
  ② Read memory.md → check Stop Status, Next Iteration Contract
180
+ - Also parse Completed Stories (verified work so far)
181
+ - Also parse Key Decisions (settled architectural choices)
130
182
 
131
183
  ③ Select model
132
184
  - Default or situational decision (see §4)
@@ -143,6 +195,9 @@ for iteration in 1..max_iter:
143
195
  - "continue" → go to ⑧
144
196
  - "verify" → go to ⑦
145
197
  - "blocked" → write BLOCKED sentinel, stop
198
+ Note: In tmux mode, the Leader polls `<slug>-iter-signal.json` instead of
199
+ parsing memory.md. In Agent() mode, the Leader MAY read iter-signal.json
200
+ as a structured alternative to parsing the Stop Status from memory.md.
146
201
 
147
202
  ⑦ Execute Verifier
148
203
  - Build prompt → log to logs/<slug>/iter-NNN.verifier-prompt.md
@@ -152,7 +207,31 @@ for iteration in 1..max_iter:
152
207
  • fail + continue → go to ⑧
153
208
  • blocked → write BLOCKED sentinel, stop
154
209
 
155
- Update status.json, report to user, continue to next iteration
210
+ Write iter-NNN.result.md to logs/<slug>/ (result status + git diff --stat)
211
+ Update status.json, report to user, continue to next iteration
212
+ ```
213
+
214
+ ## 7½. Fix Loop Protocol
215
+
216
+ When the Verifier returns `fail`, the Leader runs the Fix Loop before issuing the next Worker contract:
217
+
218
+ 1. **Read issues** from `verify-verdict.json` — sort by severity (`critical` → `major` → `minor`)
219
+ 2. **Build fix contract** — include each issue as a numbered task with criterion reference
220
+ - `fix_hint` (if present) is passed as `(suggestion, non-authoritative)` — Worker may ignore
221
+ 3. **Traceability rule**: "Only changes that resolve a listed issue are allowed — every change must be justified by the issue it addresses"
222
+ 4. **Update status.json** — increment `consecutive_failures`; reset to 0 on any `pass`
223
+
224
+ The `consecutive_failures` counter is maintained by the Leader in `status.json`.
225
+
226
+ **Fix contract format:**
227
+ ```
228
+ Fix issues from Verifier verdict (iter-NNN):
229
+
230
+ 1. [critical] US-002 AC3: <description> — fix_hint: (suggestion, non-authoritative) <hint>
231
+ 2. [major] US-001 AC1: <description>
232
+
233
+ Traceability: only changes that resolve a listed issue are allowed.
234
+ Every change must be justified by the issue it addresses.
156
235
  ```
157
236
 
158
237
  ## 8. Circuit Breaker
@@ -160,9 +239,15 @@ for iteration in 1..max_iter:
160
239
  | Condition | Verdict |
161
240
  |-----------|---------|
162
241
  | context-latest.md unchanged for 3 consecutive iterations | BLOCKED |
163
- | Worker repeats the same error twice | Upgrade model, retry once; if still failing → BLOCKED |
242
+ | Same acceptance criterion fails 2 consecutive iterations | Upgrade model, retry once; if still failing → BLOCKED |
243
+ | 3 consecutive **fail** verdicts on 3 unique criterion IDs | Upgrade to opus, retry once; if still failing → BLOCKED |
164
244
  | max_iter reached | TIMEOUT (report to user) |
165
245
 
246
+ The Leader tracks `consecutive_failures` in `status.json`:
247
+ - Increments on `fail`, resets on `pass`, **unchanged by `request_info`**.
248
+ - "Same error" = same acceptance criterion ID in two consecutive **fail** verdicts (`request_info` does not break or contribute to this chain).
249
+ - "Diverse failures" = 3 most recent `fail` verdicts each have a unique criterion ID.
250
+
166
251
  ## 9. Change Policy
167
252
 
168
253
  - Changes to the shared workflow → modify this document