@ai-dev-methodologies/rlp-desk 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@ai-dev-methodologies/rlp-desk",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.4",
|
|
4
4
|
"description": "Fresh-context iterative loops for Claude Code — autonomous task completion with independent verification",
|
|
5
5
|
"scripts": {
|
|
6
6
|
"postinstall": "node scripts/postinstall.js",
|
package/src/commands/rlp-desk.md
CHANGED
|
@@ -97,7 +97,7 @@ Options (parse from `$ARGUMENTS`):
|
|
|
97
97
|
- `--consensus-scope all|final-only` — when consensus runs (default: `all`)
|
|
98
98
|
- `all`: consensus runs on every verify (current behavior)
|
|
99
99
|
- `final-only`: consensus only on final ALL verify
|
|
100
|
-
- `--debug` — enable debug logging (
|
|
100
|
+
- `--debug` — enable debug logging (writes to logs/<slug>/debug.log)
|
|
101
101
|
|
|
102
102
|
### Mode Selection
|
|
103
103
|
|
|
@@ -144,11 +144,14 @@ DEBUG=<1 if --debug, else 0> \
|
|
|
144
144
|
1. Validate scaffold: `.claude/ralph-desk/prompts/<slug>.worker.prompt.md` etc.
|
|
145
145
|
2. Check sentinels (complete/blocked). Found → tell user `/rlp-desk clean <slug>`.
|
|
146
146
|
3. Clean previous `done-claim.json`, `verify-verdict.json`.
|
|
147
|
+
4. If `--debug`: create/clear `logs/<slug>/debug.log`. Define a helper: to "debug_log" means append a timestamped line to this file via `Bash("echo \"[$(date '+%Y-%m-%d %H:%M:%S')] $msg\" >> .claude/ralph-desk/logs/<slug>/debug.log")`.
|
|
147
148
|
|
|
148
149
|
### Leader Loop
|
|
149
150
|
|
|
150
151
|
**CRITICAL: DO NOT STOP between iterations.** You MUST continue the loop automatically until a sentinel is written (COMPLETE or BLOCKED) or max_iter is reached. Do NOT pause to ask the user. Do NOT wait for confirmation. The loop is fully autonomous — just report each iteration result briefly and immediately proceed to the next iteration.
|
|
151
152
|
|
|
153
|
+
If `--debug`, at loop start debug_log: `[PLAN] slug=<slug> max_iter=<N> worker_engine=<engine> worker_model=<model> verifier_engine=<engine> verifier_model=<model> verify_mode=<mode> consensus=<0|1> consensus_scope=<scope>`
|
|
154
|
+
|
|
152
155
|
For each iteration (1 to max_iter):
|
|
153
156
|
|
|
154
157
|
**① Check sentinels**
|
|
@@ -166,11 +169,13 @@ rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
|
|
|
166
169
|
**② Read memory.md** → Stop Status, Next Iteration Contract
|
|
167
170
|
- Also read **Completed Stories** → verified work so far
|
|
168
171
|
- Also read **Key Decisions** → settled architectural choices
|
|
172
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=read_memory stop_status=<status> contract="<summary>"`
|
|
169
173
|
|
|
170
174
|
**③ Decide model** (§4 of governance.md)
|
|
171
175
|
- Previous iteration failed → upgrade model
|
|
172
176
|
- Simple task → downgrade
|
|
173
177
|
- User specified → use that
|
|
178
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=model_select worker_model=<model> reason=<reason>`
|
|
174
179
|
|
|
175
180
|
**④ Build worker prompt**
|
|
176
181
|
- Read `.claude/ralph-desk/prompts/<slug>.worker.prompt.md`
|
|
@@ -178,6 +183,7 @@ rm -f .claude/ralph-desk/memos/<slug>-verify-verdict.json
|
|
|
178
183
|
- Write to `.claude/ralph-desk/logs/<slug>/iter-NNN.worker-prompt.md` (audit trail)
|
|
179
184
|
|
|
180
185
|
**⑤ Execute Worker**
|
|
186
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=worker engine=<engine> model=<model> dispatched=true`
|
|
181
187
|
|
|
182
188
|
If `--worker-engine claude` (default):
|
|
183
189
|
```
|
|
@@ -199,11 +205,14 @@ Bash("codex exec --model <worker_codex_model> --reasoning-effort <worker_codex_r
|
|
|
199
205
|
- Codex runs as a subprocess via Bash(), not Agent().
|
|
200
206
|
- Each Bash() call = fresh context for codex.
|
|
201
207
|
|
|
208
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=worker_done engine=<engine>`
|
|
209
|
+
|
|
202
210
|
**⑥ Read memory.md again** (Worker updated it)
|
|
203
211
|
- `stop=continue` → go to ⑧
|
|
204
212
|
- `stop=verify` → go to ⑦
|
|
205
213
|
- `stop=blocked` → write BLOCKED sentinel, stop
|
|
206
214
|
- Also read `iter-signal.json` for `us_id` field (which US was just completed)
|
|
215
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=worker_signal status=<stop_status> us_id=<us_id>`
|
|
207
216
|
|
|
208
217
|
**⑦ Execute Verifier**
|
|
209
218
|
|
|
@@ -225,6 +234,7 @@ Bash("codex exec --model <worker_codex_model> --reasoning-effort <worker_codex_r
|
|
|
225
234
|
- Verifier checks all AC at once
|
|
226
235
|
|
|
227
236
|
**⑦a Dispatch Verifier**
|
|
237
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=verifier engine=<engine> model=<model> scope=<us_id> dispatched=true`
|
|
228
238
|
|
|
229
239
|
If `--verifier-engine claude` (default):
|
|
230
240
|
```
|
|
@@ -263,6 +273,8 @@ After the primary verifier runs, run a second verifier with the OTHER engine:
|
|
|
263
273
|
5. Go to ⑧ with fix contract as next Worker contract
|
|
264
274
|
- `request_info` → Leader reads Verifier's questions, decides outcome (or relays to Worker in next contract) → go to ⑧
|
|
265
275
|
- `blocked` → write BLOCKED sentinel, stop
|
|
276
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=verdict engine=<engine> verdict=<pass|fail|request_info> us_id=<us_id>`
|
|
277
|
+
- If `--debug` and consensus: debug_log `[EXEC] iter=N phase=consensus claude=<verdict> codex=<verdict> round=<N>`
|
|
266
278
|
|
|
267
279
|
**⑧ Write result log and report to user, continue loop**
|
|
268
280
|
- Write `logs/<slug>/iter-NNN.result.md`:
|
|
@@ -271,6 +283,10 @@ After the primary verifier runs, run a second verifier with the OTHER engine:
|
|
|
271
283
|
- Verifier verdict `[leader-measured]`
|
|
272
284
|
- Write `status.json`
|
|
273
285
|
- Report: iteration N, phase, model used, result
|
|
286
|
+
- If `--debug`: debug_log `[EXEC] iter=N phase=result status=<result> consecutive_failures=<N> verified_us=<list>`
|
|
287
|
+
|
|
288
|
+
At loop end (COMPLETE, BLOCKED, or TIMEOUT):
|
|
289
|
+
- If `--debug`: debug_log `[VALIDATE] result=<COMPLETE|BLOCKED|TIMEOUT> iterations=<N> verified_us=<list>`
|
|
274
290
|
|
|
275
291
|
### Circuit Breaker
|
|
276
292
|
- context-latest.md unchanged 3 iterations → BLOCKED
|
|
@@ -342,7 +358,7 @@ Run options:
|
|
|
342
358
|
--verify-mode per-us|batch Verification strategy (default: per-us)
|
|
343
359
|
--verify-consensus Cross-engine consensus verification
|
|
344
360
|
--consensus-scope SCOPE When consensus runs: all|final-only (default: all)
|
|
345
|
-
--debug Debug logging (
|
|
361
|
+
--debug Debug logging (logs/<slug>/debug.log)
|
|
346
362
|
```
|
|
347
363
|
|
|
348
364
|
## Architecture
|
|
@@ -1411,7 +1411,56 @@ run_consensus_verification() {
|
|
|
1411
1411
|
# Consensus disagreement
|
|
1412
1412
|
log_debug "[EXEC] iter=$iter phase=consensus_disagreement round=$CONSENSUS_ROUND claude=$CLAUDE_VERDICT codex=$CODEX_VERDICT action=fix_contract"
|
|
1413
1413
|
|
|
1414
|
-
#
|
|
1414
|
+
# --- Pre-existing failure detection ---
|
|
1415
|
+
# Get files changed by Worker in this iteration
|
|
1416
|
+
local worker_changed_files=""
|
|
1417
|
+
worker_changed_files=$(cd "$ROOT" && git diff --name-only HEAD~1 HEAD 2>/dev/null || echo "")
|
|
1418
|
+
log_debug "[EXEC] iter=$iter worker_changed_files=\"$worker_changed_files\""
|
|
1419
|
+
|
|
1420
|
+
# Check if ALL failing issues reference files NOT touched by the worker
|
|
1421
|
+
local has_worker_caused_issues=0
|
|
1422
|
+
local failing_verdict_file=""
|
|
1423
|
+
if [[ "$CLAUDE_VERDICT" = "fail" ]]; then failing_verdict_file="$claude_verdict_file"
|
|
1424
|
+
elif [[ "$CODEX_VERDICT" = "fail" ]]; then failing_verdict_file="$codex_verdict_file"
|
|
1425
|
+
fi
|
|
1426
|
+
|
|
1427
|
+
if [[ -n "$failing_verdict_file" && -n "$worker_changed_files" ]]; then
|
|
1428
|
+
# Extract file paths mentioned in issues and check against worker changes
|
|
1429
|
+
local issue_files
|
|
1430
|
+
issue_files=$(jq -r '.issues[]? | .description // ""' "$failing_verdict_file" 2>/dev/null)
|
|
1431
|
+
for changed_file in $(echo "$worker_changed_files"); do
|
|
1432
|
+
if echo "$issue_files" | grep -q "$changed_file" 2>/dev/null; then
|
|
1433
|
+
has_worker_caused_issues=1
|
|
1434
|
+
break
|
|
1435
|
+
fi
|
|
1436
|
+
done
|
|
1437
|
+
|
|
1438
|
+
if (( ! has_worker_caused_issues )); then
|
|
1439
|
+
# None of the failing issues reference files the worker changed
|
|
1440
|
+
log " Pre-existing failure detected: failing tests are NOT in files changed by Worker."
|
|
1441
|
+
log_debug "[EXEC] iter=$iter pre_existing_failure=true failing_engine=$([ \"$CLAUDE_VERDICT\" = 'fail' ] && echo claude || echo codex)"
|
|
1442
|
+
|
|
1443
|
+
# Treat as pass — the other engine passed, and failures are pre-existing
|
|
1444
|
+
{
|
|
1445
|
+
echo '{'
|
|
1446
|
+
echo ' "verdict": "pass",'
|
|
1447
|
+
echo ' "verified_at_utc": "'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'",'
|
|
1448
|
+
echo ' "summary": "Consensus PASS (pre-existing failure filtered): claude='"$CLAUDE_VERDICT"' codex='"$CODEX_VERDICT"'. Failing tests not in worker-changed files.",'
|
|
1449
|
+
echo ' "recommended_state_transition": "complete",'
|
|
1450
|
+
echo ' "pre_existing_failure": true,'
|
|
1451
|
+
echo ' "worker_changed_files": "'"$(echo $worker_changed_files | tr '\n' ',')"'",'
|
|
1452
|
+
echo ' "consensus": {'
|
|
1453
|
+
echo ' "claude": { "verdict": "'"$CLAUDE_VERDICT"'" },'
|
|
1454
|
+
echo ' "codex": { "verdict": "'"$CODEX_VERDICT"'" },'
|
|
1455
|
+
echo ' "round": '"$CONSENSUS_ROUND"
|
|
1456
|
+
echo ' }'
|
|
1457
|
+
echo '}'
|
|
1458
|
+
} | atomic_write "$VERDICT_FILE"
|
|
1459
|
+
return 0
|
|
1460
|
+
fi
|
|
1461
|
+
fi
|
|
1462
|
+
|
|
1463
|
+
# --- Worker-caused failure: build fix contract as before ---
|
|
1415
1464
|
local fix_contract="$LOGS_DIR/iter-$(printf '%03d' $iter).fix-contract.md"
|
|
1416
1465
|
{
|
|
1417
1466
|
echo "# Fix Contract (Consensus Round $CONSENSUS_ROUND, iteration $iter)"
|
|
@@ -1577,9 +1626,13 @@ main() {
|
|
|
1577
1626
|
PREV_CONTEXT_HASH=$(compute_context_hash)
|
|
1578
1627
|
|
|
1579
1628
|
# --- governance.md s7: Leader Loop ---
|
|
1629
|
+
local HARD_CEILING=$(( ITER_TIMEOUT * 3 )) # absolute max per iteration (no extensions beyond this)
|
|
1630
|
+
|
|
1580
1631
|
for (( ITERATION = 1; ITERATION <= MAX_ITER; ITERATION++ )); do
|
|
1581
1632
|
log ""
|
|
1582
1633
|
log "========== Iteration $ITERATION / $MAX_ITER =========="
|
|
1634
|
+
local ITER_START_TIME
|
|
1635
|
+
ITER_START_TIME=$(date +%s)
|
|
1583
1636
|
local _iter_contract=""
|
|
1584
1637
|
_iter_contract=$(sed -n '/^## Next Iteration Contract$/,/^## /{ /^## Next/d; /^## [^N]/d; p; }' "$MEMORY_FILE" 2>/dev/null | head -1 | tr '\n' ' ')
|
|
1585
1638
|
log_debug "[EXEC] iter=$ITERATION start contract=\"${_iter_contract:-none}\""
|
|
@@ -1692,8 +1745,20 @@ main() {
|
|
|
1692
1745
|
local worker_cmd
|
|
1693
1746
|
worker_cmd=$(tmux display-message -p -t "$WORKER_PANE" '#{pane_current_command}' 2>/dev/null)
|
|
1694
1747
|
if [[ "$worker_cmd" == "node" || "$worker_cmd" == "claude" || "$worker_cmd" == "codex" ]]; then
|
|
1695
|
-
|
|
1696
|
-
|
|
1748
|
+
# Check hard ceiling before extending
|
|
1749
|
+
local iter_elapsed=$(( $(date +%s) - ITER_START_TIME ))
|
|
1750
|
+
if (( iter_elapsed >= HARD_CEILING )); then
|
|
1751
|
+
log_error "Worker hit hard ceiling (${HARD_CEILING}s = 3x iter_timeout). Killing iteration."
|
|
1752
|
+
log_debug "[EXEC] iter=$ITERATION hard_ceiling_hit=true elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s process=$worker_cmd"
|
|
1753
|
+
tmux send-keys -t "$WORKER_PANE" C-c 2>/dev/null
|
|
1754
|
+
sleep 1
|
|
1755
|
+
WORKER_PANE=$(replace_worker_pane "$WORKER_PANE" "worker")
|
|
1756
|
+
update_status "worker" "hard_timeout"
|
|
1757
|
+
worker_poll_done=1
|
|
1758
|
+
break
|
|
1759
|
+
fi
|
|
1760
|
+
log " Worker timed out but still active ($worker_cmd). Extending poll... (${iter_elapsed}s/${HARD_CEILING}s)"
|
|
1761
|
+
log_debug "[EXEC] iter=$ITERATION timeout_active=true process=$worker_cmd elapsed=${iter_elapsed}s ceiling=${HARD_CEILING}s"
|
|
1697
1762
|
log_debug "[EXEC] iter=$ITERATION poll_extended=true worker_cmd=$worker_cmd"
|
|
1698
1763
|
update_status "worker" "slow"
|
|
1699
1764
|
# Loop continues — re-poll same iteration
|