@ai-dev-methodologies/rlp-desk 0.7.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,885 +0,0 @@
1
- #!/bin/zsh
2
- set -euo pipefail
3
-
4
- # =============================================================================
5
- # Ralph Desk Project Initializer for Claude Code
6
- #
7
- # User-level tool: ~/.claude/ralph-desk/init_ralph_desk.zsh
8
- # Creates project-local scaffold in: .claude/ralph-desk/
9
- #
10
- # Usage:
11
- # ~/.claude/ralph-desk/init_ralph_desk.zsh <slug> [objective] [--mode fresh|improve]
12
- # =============================================================================
13
-
14
- SLUG="${1:?Usage: $0 <slug> [objective] [--mode fresh|improve] [--server-cmd CMD] [--server-port PORT] [--server-health URL]}"
15
- MODE=""
16
- OBJECTIVE="TBD - fill in the objective"
17
- SERVER_CMD=""
18
- SERVER_PORT=""
19
- SERVER_HEALTH=""
20
-
21
- # Parse remaining arguments
22
- shift
23
- while [[ $# -gt 0 ]]; do
24
- case "$1" in
25
- --mode)
26
- MODE="${2:?--mode requires an argument: fresh|improve}"
27
- shift 2
28
- ;;
29
- --mode=*)
30
- MODE="${1#--mode=}"
31
- shift
32
- ;;
33
- --server-cmd)
34
- SERVER_CMD="${2:?--server-cmd requires a command}"
35
- shift 2
36
- ;;
37
- --server-cmd=*)
38
- SERVER_CMD="${1#--server-cmd=}"
39
- shift
40
- ;;
41
- --server-port)
42
- SERVER_PORT="${2:?--server-port requires a port number}"
43
- shift 2
44
- ;;
45
- --server-port=*)
46
- SERVER_PORT="${1#--server-port=}"
47
- shift
48
- ;;
49
- --server-health)
50
- SERVER_HEALTH="${2:?--server-health requires a URL}"
51
- shift 2
52
- ;;
53
- --server-health=*)
54
- SERVER_HEALTH="${1#--server-health=}"
55
- shift
56
- ;;
57
- *)
58
- OBJECTIVE="$1"
59
- shift
60
- ;;
61
- esac
62
- done
63
-
64
- ROOT="${ROOT:-$PWD}"
65
- DESK="$ROOT/.claude/ralph-desk"
66
- RUNNER_DIR="$(cd "$(dirname "$0")" && pwd)"
67
-
68
- # --- Re-execution versioning helpers ---
69
- # Handles ONLY debug.log and campaign-report.md versioning.
70
- # SV reports use their own -NNN auto-increment pattern and are NOT handled here.
71
-
72
- detect_next_version() {
73
- local file_path="$1"
74
- local dir base ext n=1
75
- dir="$(dirname "$file_path")"
76
- base="$(basename "$file_path")"
77
- if [[ "$base" == *.* ]]; then
78
- ext=".${base##*.}"
79
- base="${base%.*}"
80
- else
81
- ext=""
82
- fi
83
- while [[ -f "$dir/${base}-v${n}${ext}" ]]; do
84
- (( n++ ))
85
- done
86
- echo "$n"
87
- }
88
-
89
- version_file() {
90
- local file_path="$1"
91
- if [[ -f "$file_path" ]]; then
92
- local n dir base ext
93
- n="$(detect_next_version "$file_path")"
94
- dir="$(dirname "$file_path")"
95
- base="$(basename "$file_path")"
96
- if [[ "$base" == *.* ]]; then
97
- ext=".${base##*.}"
98
- base="${base%.*}"
99
- else
100
- ext=""
101
- fi
102
- mv "$file_path" "$dir/${base}-v${n}${ext}"
103
- echo " Versioned: $(basename "$file_path") → ${base}-v${n}${ext}"
104
- fi
105
- # Non-existent files silently skipped (no error)
106
- }
107
-
108
- # --- PRD/test-spec per-US splitting helpers ---
109
-
110
- split_prd_by_us() {
111
- local prd_file="$1"
112
- local slug="$2"
113
- local plans_dir
114
- plans_dir="$(dirname "$prd_file")"
115
-
116
- [[ -f "$prd_file" ]] || return 0
117
-
118
- local us_count
119
- us_count=$(grep -c "^### US-" "$prd_file" 2>/dev/null) || us_count=0
120
- if [[ "$us_count" -eq 0 ]]; then
121
- echo " WARNING: No US markers (### US-NNN:) found in PRD — falling back to full PRD injection" >&2
122
- # Clean up any stale per-US split files from previous runs to prevent stale artifacts
123
- local stale_count=0
124
- for stale in "$plans_dir"/prd-"$slug"-US-*.md(N); do
125
- rm "$stale"; stale_count=$(( stale_count + 1 ))
126
- done
127
- [[ $stale_count -gt 0 ]] && echo " Cleaned $stale_count stale prd per-US file(s)"
128
- return 0
129
- fi
130
-
131
- awk -v dir="$plans_dir" -v slug="$slug" '
132
- /^### US-[0-9]+:/ {
133
- if (out != "") close(out)
134
- match($0, /US-[0-9]+/)
135
- us_id = substr($0, RSTART, RLENGTH)
136
- out = dir "/prd-" slug "-" us_id ".md"
137
- }
138
- out != "" { print > out }
139
- ' "$prd_file"
140
-
141
- local count
142
- count=$(ls "$plans_dir"/prd-"$slug"-US-*.md 2>/dev/null | wc -l | tr -d ' ')
143
- echo " Split PRD: $count per-US files"
144
- }
145
-
146
- split_test_spec_by_us() {
147
- local ts_file="$1"
148
- local slug="$2"
149
- local plans_dir
150
- plans_dir="$(dirname "$ts_file")"
151
-
152
- [[ -f "$ts_file" ]] || return 0
153
-
154
- local us_count
155
- us_count=$(grep -c "^## US-" "$ts_file" 2>/dev/null) || us_count=0
156
- if [[ "$us_count" -eq 0 ]]; then
157
- echo " WARNING: No US section markers (## US-NNN:) in test-spec — skipping split" >&2
158
- # Clean up any stale per-US test-spec files from previous runs
159
- for stale in "$plans_dir"/test-spec-"$slug"-US-*.md(N); do
160
- rm "$stale"
161
- done
162
- return 0
163
- fi
164
-
165
- # Extract global header (everything before first ## US- section, e.g. Verification Commands)
166
- local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
167
- awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
168
-
169
- awk -v dir="$plans_dir" -v slug="$slug" '
170
- /^## US-[0-9]+:/ {
171
- if (out != "") close(out)
172
- match($0, /US-[0-9]+/)
173
- us_id = substr($0, RSTART, RLENGTH)
174
- out = dir "/test-spec-" slug "-" us_id ".md"
175
- }
176
- out != "" { print > out }
177
- ' "$ts_file"
178
-
179
- # Prepend global header (Verification Commands etc.) to each split file
180
- for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
181
- [[ -f "$split_file" ]] || continue
182
- local tmp="${split_file}.tmp.$$"
183
- cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
184
- done
185
- rm -f "$header_tmp"
186
-
187
- local count
188
- count=$(ls "$plans_dir"/test-spec-"$slug"-US-*.md 2>/dev/null | wc -l | tr -d ' ')
189
- echo " Split test-spec: $count per-US files (with global header)"
190
- }
191
-
192
- # --- Run command presets ---
193
- # Detects codex CLI availability and shows appropriate run command presets.
194
- # AC1: codex installed → cross-engine preset first, spark Pro, claude-only, basic
195
- # AC2: codex not installed → tmux + claude-only first, install recommendation
196
- # AC3: full options reference with defaults always shown
197
- print_run_presets() {
198
- local slug="$1"
199
- local codex_available=0
200
- command -v codex &>/dev/null && codex_available=1
201
-
202
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
203
- echo "Available run commands (copy the one you want):"
204
- echo ""
205
- if [[ $codex_available -eq 1 ]]; then
206
- echo "# Recommended: cross-engine + final-consensus (full context + blind-spot coverage):"
207
- echo "/rlp-desk run $slug --worker-model gpt-5.4:medium --final-consensus --debug"
208
- echo ""
209
- echo "# Small tasks only (single-file, AC <= 4, simple logic — spark 100k context limit):"
210
- echo "/rlp-desk run $slug --worker-model gpt-5.3-codex-spark:high --debug"
211
- echo ""
212
- echo "# Claude-only:"
213
- echo "/rlp-desk run $slug --debug"
214
- echo ""
215
- echo "# Basic agent:"
216
- echo "/rlp-desk run $slug"
217
- else
218
- echo "# Recommended: tmux mode + claude-only (real-time visibility):"
219
- echo "/rlp-desk run $slug --mode tmux --debug"
220
- echo ""
221
- echo "# Agent mode:"
222
- echo "/rlp-desk run $slug --debug"
223
- echo ""
224
- echo "# Install codex for cost savings + cross-engine blind-spot coverage:"
225
- echo "npm install -g @openai/codex"
226
- fi
227
- echo ""
228
- echo "# Full options reference:"
229
- echo "# --mode agent|tmux (default: agent)"
230
- echo "# --worker-model MODEL haiku|sonnet|opus or gpt-5.4:low|medium|high (default: sonnet)"
231
- echo "# --verifier-model MODEL haiku|sonnet|opus (default: opus)"
232
- echo "# --verify-consensus both claude+codex must pass"
233
- echo "# --verify-mode per-us|batch (default: per-us)"
234
- echo "# --max-iter N (default: 100)"
235
- echo "# --debug enable debug logging"
236
- echo "# --with-self-verification post-campaign analysis report"
237
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
238
- }
239
-
240
- echo "Initializing Ralph Desk: $SLUG"
241
- echo " Root: $ROOT"
242
- echo " Desk: $DESK"
243
- [[ -n "$MODE" ]] && echo " Mode: $MODE"
244
- echo ""
245
-
246
- mkdir -p "$DESK/prompts" "$DESK/context" "$DESK/memos" "$DESK/plans" "$DESK/logs/$SLUG"
247
-
248
- # --- Re-execution lifecycle (--mode handling) ---
249
- PRD_FILE="$DESK/plans/prd-$SLUG.md"
250
- LOGS_DIR="$DESK/logs/$SLUG"
251
-
252
- if [[ -n "$MODE" ]]; then
253
- echo "Re-execution mode: --mode $MODE"
254
- echo ""
255
-
256
- DELETED_COUNT=0
257
-
258
- # Version debug.log and campaign-report.md (NOT self-verification-report — uses -NNN)
259
- version_file "$LOGS_DIR/debug.log"
260
- version_file "$LOGS_DIR/campaign-report.md"
261
-
262
- # Delete iter-* artifacts (archived done-claims, verdicts, prompt logs, results)
263
- for f in "$LOGS_DIR"/iter-*(N); do
264
- [[ -f "$f" ]] && { rm "$f"; (( ++DELETED_COUNT )); }
265
- done
266
-
267
- # Delete runtime memos
268
- for f in \
269
- "$DESK/memos/$SLUG-done-claim.json" \
270
- "$DESK/memos/$SLUG-iter-signal.json" \
271
- "$DESK/memos/$SLUG-verify-verdict.json" \
272
- "$DESK/memos/$SLUG-complete.md" \
273
- "$DESK/memos/$SLUG-blocked.md"; do
274
- [[ -f "$f" ]] && { rm "$f"; (( ++DELETED_COUNT )); }
275
- done
276
-
277
- # Delete status.json, baseline.log, cost-log.jsonl
278
- for f in "$LOGS_DIR/runtime/status.json" "$LOGS_DIR/status.json" "$LOGS_DIR/baseline.log" "$LOGS_DIR/cost-log.jsonl"; do
279
- [[ -f "$f" ]] && { rm "$f"; (( ++DELETED_COUNT )); }
280
- done
281
-
282
- # Delete test-spec only for fresh re-execution mode; improve preserves custom edits
283
- # and reruns split logic on the existing file.
284
- for f in \
285
- "$DESK/plans/test-spec-$SLUG.md" \
286
- "$DESK/prompts/$SLUG.worker.prompt.md" \
287
- "$DESK/prompts/$SLUG.verifier.prompt.md"; do
288
- [[ -f "$f" ]] &&
289
- if [[ "$MODE" == "fresh" ]] || [[ "$f" != "$DESK/plans/test-spec-$SLUG.md" ]]; then
290
- rm "$f"; (( ++DELETED_COUNT ));
291
- fi
292
- done
293
-
294
- # Reset memory and context to fresh templates (rm here; scaffold below regenerates them)
295
- rm -f "$DESK/memos/$SLUG-memory.md" "$DESK/context/$SLUG-latest.md"
296
-
297
- # PRD handling: --mode fresh deletes PRD; --mode improve preserves PRD in-place
298
- if [[ "$MODE" == "fresh" ]]; then
299
- [[ -f "$PRD_FILE" ]] && { rm "$PRD_FILE"; (( ++DELETED_COUNT )); echo " Deleted: prd-$SLUG.md (--mode fresh: PRD deleted for fresh start)"; }
300
- fi
301
-
302
- # Re-execution summary
303
- echo " Re-execution summary:"
304
- if [[ "$MODE" == "improve" ]]; then
305
- echo " Preserved: prd-$SLUG.md (--mode improve: PRD kept in-place)"
306
- fi
307
- echo " Deleted: $DELETED_COUNT runtime artifacts"
308
- echo " Reset: memory.md + context.md (regenerating from templates)"
309
- echo ""
310
- fi
311
-
312
- # --- Worker Prompt ---
313
- F="$DESK/prompts/$SLUG.worker.prompt.md"
314
- if [[ ! -f "$F" ]]; then
315
- cat > "$F" <<EOF
316
- Execute the plan for $SLUG.
317
-
318
- ## Before you start
319
- Read these files in order:
320
- 1. Campaign Memory: $DESK/memos/$SLUG-memory.md → Next Iteration Contract is your mission
321
- 2. PRD: $DESK/plans/prd-$SLUG.md → acceptance criteria
322
- 3. Test Spec: $DESK/plans/test-spec-$SLUG.md → verification methods
323
- 4. Latest Context: $DESK/context/$SLUG-latest.md → current state
324
-
325
- ## TDD MANDATE (hard constraint — violation = automatic FAIL)
326
- > Write failing tests FIRST → confirm RED (exit_code=1) → implement minimum code → confirm GREEN.
327
- > Every NEW AC requires: write_test → verify_red → implement → verify_green in execution_steps.
328
- > No exceptions. Verifier rejects missing RED evidence. For already-passing ACs, use verify_existing.
329
-
330
- ## SCOPE LOCK (hard constraint — violation causes verification failure)
331
- - You MUST only implement the work described in the "Next Iteration Contract" from campaign memory.
332
- - If the contract says "implement US-001 only", do ONLY that. Do NOT touch other stories.
333
- - If the contract says "implement all remaining stories", you may do all of them.
334
- - Do NOT go beyond the contracted scope, even if you can see more work in the PRD.
335
- - No file creation or modification outside the project root.
336
- - Do not modify this prompt file or any PRD/test-spec files.
337
-
338
- ## Forbidden Shortcuts (Verifier will check these)
339
- - Do not mock external services when L2 integration test is required by test-spec.
340
- - Do not delete or weaken existing assertions to make tests pass.
341
- - Do not skip boundary cases listed in the PRD.
342
- - Do not write code before tests — if you did, delete it and start with tests.
343
- - **NEVER modify rlp-desk infrastructure files** (~/.claude/ralph-desk/*, ~/.claude/commands/rlp-desk.md). If you discover a bug in rlp-desk itself, report it in done-claim.json with {"status": "blocked", "reason": "rlp-desk bug: <description>"} and signal blocked. Do NOT attempt to fix rlp-desk — it is the orchestration tool, not your project code.
344
- - **NEVER modify Claude Code settings** (~/.claude/settings.json, .claude/settings.local.json, or any settings files). Do NOT add permissions, change models, or alter configuration. If a permission prompt blocks you, report it as blocked — do NOT try to edit settings to bypass it.
345
-
346
- ## When Stuck (do NOT guess-and-fix)
347
- > 1. STOP and READ the error. Trace the call stack. Identify the root cause before touching code.
348
- > 2. Write a minimal test that reproduces the failure, then fix the root cause only.
349
- > 3. If 3+ fixes fail on the same issue, signal "blocked" with your diagnosis.
350
-
351
- ## Iteration rules
352
- - Use fresh context only; do NOT depend on prior chat history.
353
- - Execute exactly the work specified in the Next Iteration Contract.
354
- - Refresh context file with the current frontier.
355
- - Rewrite campaign memory in full.
356
- - Write evidence artifacts.
357
- - **After writing tests, update test-spec Criteria Mapping with actual test file paths and function names** (replace placeholder -k filters).
358
- - Ensure **each AC has >= 3 tests** (happy + negative + boundary). Do not just meet the total count — distribute evenly per AC.
359
- - **Commit all changes when the iteration is complete** (include iteration number and story ID in commit message).
360
-
361
- MANDATORY: When done with this iteration, write the following signal file:
362
- - Path: $DESK/memos/$SLUG-iter-signal.json
363
- - Format: {"iteration": N, "status": "continue|verify|blocked", "us_id": "US-NNN or null", "summary": "what was done", "timestamp": "ISO"}
364
- - Status values:
365
- - "continue" = current action done but more work remains (no verify needed yet)
366
- - "verify" = current US complete + done-claim written → Verifier checks this US
367
- - "blocked" = autonomous blocker
368
-
369
- ## Signal rules (per-US verification)
370
- - After completing EACH user story → signal "verify" with "us_id" set to the story you just finished (e.g., "US-001").
371
- - The Verifier will check ONLY that story's acceptance criteria.
372
- - After ALL stories individually pass verification → signal "verify" with "us_id": "ALL" for a final full verify of all AC.
373
- - Do NOT signal "continue" when a US is done — always signal "verify" per US.
374
- - Signal "continue" ONLY when you have more work to do within the same US (e.g., a multi-step task).
375
-
376
- ## Done Claim Format
377
- When writing done-claim JSON, ALWAYS include execution_steps — what you did, in what order, with evidence:
378
- \`\`\`json
379
- {
380
- "us_id": "US-NNN",
381
- "claims": ["AC1: ...", "AC2: ..."],
382
- "execution_steps": [
383
- {"step": "write_test", "ac_id": "AC1", "command": null, "summary": "wrote tests/test_add.py with 3 tests"},
384
- {"step": "verify_red", "ac_id": "AC1", "command": "pytest tests/...", "exit_code": 1, "summary": "RED: test fails as expected"},
385
- {"step": "implement", "ac_id": "AC1", "command": null, "summary": "created add() function"},
386
- {"step": "verify_green", "ac_id": "AC1", "command": "pytest tests/...", "exit_code": 0, "summary": "GREEN: 3 passed"},
387
- {"step": "verify_e2e", "ac_id": "AC1", "command": "python -c '...'", "exit_code": 0, "summary": "E2E output matches expected"},
388
- {"step": "commit", "ac_id": "AC1", "command": "git commit ...", "exit_code": 0, "summary": "committed abc1234"}
389
- ]
390
- }
391
- \`\`\`
392
- This is NOT optional. Every done-claim must include the steps you took and the evidence for each.
393
- execution_steps MUST be a JSON array of objects (not a dict with string keys). Each object MUST have: "step", "ac_id", "command", "exit_code", "summary".
394
-
395
- ## Stop behavior
396
- - Single US achieved → write done-claim JSON to $DESK/memos/$SLUG-done-claim.json with the specific US, signal verify, exit
397
- - All US achieved → write done-claim JSON with all US, signal verify with us_id "ALL", exit
398
- - Autonomous blocker → write to $DESK/memos/$SLUG-blocked.md, exit
399
- - Otherwise → set stop=continue, define next iteration contract in memory, exit
400
-
401
- ## Objective
402
- $OBJECTIVE
403
- EOF
404
-
405
- # Inject operational context if server options provided
406
- if [[ -n "$SERVER_CMD" || -n "$SERVER_PORT" ]]; then
407
- cat >> "$F" <<OPCTX
408
-
409
- ## Operational Context
410
- $([ -n "$SERVER_CMD" ] && echo "- **Server Start Command**: \`$SERVER_CMD\`")
411
- $([ -n "$SERVER_PORT" ] && echo "- **Server Port**: $SERVER_PORT")
412
- $([ -n "$SERVER_HEALTH" ] && echo "- **Health Check URL**: $SERVER_HEALTH")
413
-
414
- ### Operational Rules (always apply when server context is present)
415
- - After modifying server/application code, restart the server$([ -n "$SERVER_CMD" ] && echo ": \`$SERVER_CMD\`")
416
- - Before signaling done, verify the server responds$([ -n "$SERVER_HEALTH" ] && echo ": \`curl -sf $SERVER_HEALTH\`" || [ -n "$SERVER_PORT" ] && echo ": \`curl -sf http://localhost:$SERVER_PORT/\`")
417
- - Do NOT modify dependency files (package.json, requirements.txt, etc.) unless the AC explicitly requires it
418
- - Do NOT run package install commands (npm install, pip install, etc.) unless the AC explicitly requires it
419
- OPCTX
420
- fi
421
-
422
- echo " + $F"
423
- else echo " · $F"; fi
424
-
425
- # --- Verifier Prompt ---
426
- F="$DESK/prompts/$SLUG.verifier.prompt.md"
427
- if [[ ! -f "$F" ]]; then
428
- cat > "$F" <<EOF
429
- Independent verifier for Ralph Desk: $SLUG
430
-
431
- ## Iron Law (ABSOLUTE — no exceptions)
432
- > NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE
433
- > "should pass", "probably works", "seems to" = automatic FAIL
434
-
435
- ## Evidence Gate (MANDATORY before any verdict)
436
- 1. IDENTIFY: What command proves this claim?
437
- 2. RUN: Execute the FULL command (fresh, complete)
438
- 3. READ: Full output, check exit code, count failures
439
- 4. VERIFY: Does output confirm the claim?
440
- 5. ONLY THEN: Issue verdict
441
-
442
- Required reads:
443
- - PRD: $DESK/plans/prd-$SLUG.md
444
- - Test Spec: $DESK/plans/test-spec-$SLUG.md
445
- - Campaign Memory: $DESK/memos/$SLUG-memory.md (orientation only — not source of truth)
446
- - Latest Context: $DESK/context/$SLUG-latest.md
447
- - Done Claim: $DESK/memos/$SLUG-done-claim.json
448
- - Iteration Signal: $DESK/memos/$SLUG-iter-signal.json (check us_id field)
449
-
450
- ## Verification Scope
451
- Check the iter-signal.json "us_id" field:
452
- - If us_id is a specific story (e.g., "US-001"): verify ONLY that story's acceptance criteria from the PRD.
453
- - If us_id is "ALL": verify ALL acceptance criteria from the PRD (final full verify).
454
- - If us_id is absent or null: verify all criteria in the done-claim (legacy/batch mode).
455
-
456
- ## Verification Process
457
- 1. Read PRD acceptance criteria (scoped to us_id if present)
458
- 2. Read done claim
459
- 3. Identify scope: run \`git diff --name-only\` to find changed files, then read those files + related imports only
460
- 4. **Scope Lock check**: (a) Read the Next Iteration Contract from campaign memory to identify the contracted US. (b) Run \`git diff --name-only\` to list all changed files. (c) For each changed file, verify it is plausibly related to the contracted US's acceptance criteria. (d) Flag files that appear unrelated. (e) Shared infrastructure (types, configs, common utilities) and dependency files are permitted if the AC implies them.
461
- 5. **Layer Enforcement**: check test-spec L1/L2/L3/L4 sections. ANY section with TODO or blank = FAIL (IL-3).
462
- 6. Run fresh verification: execute ALL commands from test-spec verification layers (L1, L2, L3, L4 as applicable)
463
- **Skip detection (IL-5)**: After running tests, check output for "skip", "pending", "not run", or "0 items collected". Tests that did not actually execute do NOT count as passed. If test_count_executed < test_count_expected, verdict = FAIL ("skipped tests detected").
464
- 7. Check each criterion against fresh evidence (only for the scoped US, or all if us_id=ALL)
465
- 8. Run smoke test if defined in PRD
466
- 9. **Test Sufficiency (IL-4)**: count test functions exercising each AC. Count < 3 per AC = FAIL.
467
- Check diversity: at least 2 of 3 categories (happy, negative, boundary) per AC.
468
- 10. **Anti-Gaming Detection**:
469
- - Assertion integrity: compare assertion count/strength via \`git diff HEAD~1\` — assertions not deleted or weakened
470
- - Test-specific logic: no environment-detection patterns
471
- - "Code inspection" claims: Worker must run actual commands
472
- - Tautological tests: expected values that mirror implementation logic
473
- 10¼. **Anti-Rubber-Stamp Self-Check**:
474
- - If your verdict history shows a 100% pass rate, re-examine your last verdict with increased scrutiny — a 100% pass rate is a red flag for insufficient rigor
475
- - When issuing PASS with explicit warning: note any concerning patterns (e.g., low test diversity, marginal coverage) even if technically passing
476
- - Never issue a silent PASS — every pass verdict must cite specific evidence for each AC checked
477
- - Rationalization red flags: "tests pass so it works" (passing ≠ correct), "Worker is confident" (confidence ≠ evidence), "changes are minimal" (scope ≠ correctness)
478
- 10½. **Worker Process Audit**:
479
- - Test-first compliance: done-claim execution_steps must show write_test step before implement step for each AC
480
- - RED phase evidence: at least one verify_red step with exit_code=1 per AC (proves tests were written before passing)
481
- - Forbidden shortcuts: check done-claim claims and summary for forbidden phrases ("code inspection", "I'm confident", "too simple", "I'll test after", "already manually tested", "partial check")
482
- - Step completeness: each AC should have write_test → verify_red → implement → verify_green sequence in execution_steps
483
- 11. **Reproducibility check**: verify lock file committed, clean install succeeds, security scan passes, env vars documented (per test-spec Reproducibility Gate). Skip if test-spec says "N/A."
484
- 12. Write verdict JSON to: $DESK/memos/$SLUG-verify-verdict.json
485
-
486
- Verdict JSON:
487
- {
488
- "verdict": "pass|fail|request_info",
489
- "us_id": "US-NNN or ALL (matches the scope you verified)",
490
- "verified_at_utc": "ISO timestamp",
491
- "summary": "...",
492
- "per_us_results": {"US-001": "pass|fail|not_started", "US-002": "pass|fail|not_started"},
493
- "criteria_results": [{"criterion":"...","met":true/false,"evidence":"..."}],
494
- "missing_evidence": [],
495
- "issues": [{"id":"...","severity":"critical|major|minor","description":"...","fix_hint":"(suggestion, non-authoritative)"}],
496
- "reasoning": [
497
- {"check": "IL-1 Evidence Gate", "decision": "pass|fail", "basis": "what command was run, what output confirmed the decision"},
498
- {"check": "Layer Enforcement", "decision": "pass|fail", "basis": "which layers checked, any TODO found"},
499
- {"check": "Test Sufficiency", "decision": "pass|fail", "basis": "test count per AC, category coverage"},
500
- {"check": "Anti-Gaming", "decision": "pass|fail", "basis": "what was checked, any suspicious patterns"},
501
- {"check": "Worker Process Audit", "decision": "pass|fail", "basis": "test-first followed: verify_red present per AC, no forbidden shortcuts in claims, execution_steps complete"}
502
- ],
503
- "layer_status": {"L1":"pass|fail|todo|na","L2":"pass|fail|todo|na","L3":"pass|fail|todo|na","L4":"pass|fail|todo|na"},
504
- "test_quality": {"test_count":0,"ac_count":0,"sufficiency":"pass|fail","anti_patterns_found":[]},
505
- "recommended_state_transition": "complete|continue|blocked",
506
- "next_iteration_contract": "...",
507
- "evidence_paths": []
508
- }
509
-
510
- Rules:
511
- - Do NOT trust the worker's claim. Verify with fresh evidence.
512
- - If uncertain, verdict = request_info (describe your specific question in summary so Leader can decide).
513
- - Campaign Memory is for orientation only — do NOT use it as source of truth for AC verification.
514
- - Deterministic checks (type hints, linting, security) delegate to test-spec tools; focus on AC verification + semantic review + smoke test.
515
- - Do NOT modify code or write sentinel files.
516
- - If Worker claims "inspection" or "review" for an AC that requires an automated command, verdict = FAIL.
517
- - **ALWAYS include per_us_results** in verdict JSON — map each US to "pass", "fail", or "not_started". This is required for partial progress tracking in both batch and per-us modes.
518
- EOF
519
-
520
- # Inject operational verification if server options provided
521
- if [[ -n "$SERVER_CMD" || -n "$SERVER_PORT" ]]; then
522
- cat >> "$F" <<OPVER
523
-
524
- ## Operational Verification (server context present)
525
- - Before verifying ACs, check that the server is running$([ -n "$SERVER_PORT" ] && echo " on port $SERVER_PORT")$([ -n "$SERVER_HEALTH" ] && echo ": \`curl -sf $SERVER_HEALTH\`")
526
- - If the server is not running, verdict = FAIL with issue: "server not running on expected port"
527
- - If Worker modified server code but did not restart the server, verdict = FAIL with issue: "server not restarted after code change"
528
- OPVER
529
- fi
530
-
531
- echo " + $F"
532
- else echo " · $F"; fi
533
-
534
- # --- Context ---
535
- F="$DESK/context/$SLUG-latest.md"
536
- if [[ ! -f "$F" ]]; then
537
- cat > "$F" <<EOF
538
- # $SLUG - Latest Context
539
-
540
- ## Current Frontier
541
- ### Completed
542
- ### In Progress
543
- ### Next
544
- - (TBD by first worker)
545
-
546
- ## Key Decisions
547
- ## Known Issues
548
- ## Files Changed This Iteration
549
- ## Verification Status
550
- EOF
551
- echo " + $F"
552
- else echo " · $F"; fi
553
-
554
- # --- Campaign Memory ---
555
- F="$DESK/memos/$SLUG-memory.md"
556
- if [[ ! -f "$F" ]]; then
557
- cat > "$F" <<EOF
558
- # $SLUG - Campaign Memory
559
-
560
- ## Stop Status
561
- continue
562
-
563
- ## Objective
564
- $OBJECTIVE
565
-
566
- ## Current State
567
- Iteration 0 - not started
568
-
569
- ## Completed Stories
570
-
571
- ## Next Iteration Contract
572
- Start from the beginning: read PRD and plan the first bounded action.
573
-
574
- **Criteria**:
575
- - (to be defined by first worker after reading PRD)
576
-
577
- ## Key Decisions
578
-
579
- ## Patterns Discovered
580
- ## Learnings
581
- ## Evidence Chain
582
- EOF
583
- echo " + $F"
584
- else echo " · $F"; fi
585
-
586
- # --- PRD ---
587
- F="$DESK/plans/prd-$SLUG.md"
588
- if [[ ! -f "$F" ]]; then
589
- cat > "$F" <<EOF
590
- # PRD: $SLUG
591
-
592
- ## Objective
593
- $OBJECTIVE
594
-
595
- ## User Stories
596
-
597
- ### US-001: [Title]
598
- - **Priority**: P0
599
- - **Size**: S|M|L
600
- - **Type**: code|visual|content|integration|infra
601
- - **Risk**: LOW|MEDIUM|HIGH|CRITICAL (governance §1c)
602
- - **Depends on**: []
603
- - **Acceptance Criteria** (Given/When/Then — domain language only):
604
- - AC1:
605
- - Given: [precondition in domain language]
606
- - When: [action in domain language]
607
- - Then: [expected outcome with quantitative criteria]
608
- - AC2:
609
- - Given: [precondition]
610
- - When: [action]
611
- - Then: [expected outcome with quantitative criteria]
612
- - **Boundary Cases**: [edge cases — empty input, max values, error conditions, concurrent access]
613
- - **Verification Layers**: [Fill per Risk level — LOW: L1+L3, MEDIUM: L1+L2(if ext deps)+L3, HIGH: L1+L2+L3+L4, CRITICAL: L1+L2+L3+L4+mutation (governance §1c)]
614
- - **Status**: not started
615
-
616
- ## Non-Goals
617
- ## Technical Constraints
618
- ## Done When
619
- - All acceptance criteria pass with quantitative evidence
620
- - All boundary cases covered
621
- - All required verification layers executed (no TODO remaining)
622
- - Independent verifier confirms via Evidence Gate (governance §1b)
623
- EOF
624
- echo " + $F"
625
- else echo " · $F"; fi
626
-
627
- # Split PRD into per-US files (no-op with warning if no US markers)
628
- split_prd_by_us "$DESK/plans/prd-$SLUG.md" "$SLUG"
629
-
630
- # --- Test Spec ---
631
- F="$DESK/plans/test-spec-$SLUG.md"
632
- if [[ ! -f "$F" ]]; then
633
- cat > "$F" <<EOF
634
- # Test Specification: $SLUG
635
-
636
- ## Iron Law Reference
637
- > IL-3: NO PASS WITH TODO IN ANY REQUIRED VERIFICATION LAYER
638
- > IL-4: NO PASS WITHOUT TEST COUNT >= AC COUNT x 3
639
-
640
- ---
641
-
642
- ## Verification Commands
643
- ### Build
644
- \`\`\`bash
645
- # TODO
646
- \`\`\`
647
- ### Test
648
- \`\`\`bash
649
- # TODO
650
- \`\`\`
651
- ### Lint
652
- \`\`\`bash
653
- # TODO
654
- \`\`\`
655
-
656
- ---
657
-
658
- ## Verification Context (fill BEFORE implementation)
659
-
660
- ### Target Behavior
661
- What behavior does this project change or introduce?
662
- - TODO
663
-
664
- ### Impacted Tests
665
- Existing tests that may break due to this change:
666
- - TODO (acceptable at init; Worker fills during first iteration)
667
-
668
- ### Required New Tests
669
- Tests that MUST be written (minimum 3 per AC: happy + negative + boundary):
670
- - TODO
671
-
672
- ### Forbidden Shortcuts (see Worker prompt for full list)
673
- - Do not mock external services when L2 integration test is required
674
- - Do not delete or weaken existing assertions to make tests pass
675
- - Do not add test-specific logic (if __name__ == '__test__' patterns)
676
- - Do not skip boundary cases listed in the PRD
677
- - Do not claim "code inspection" as verification — run the actual command
678
- - Do not say "too simple to test" — simple code breaks
679
- - Do not say "I'll test after" — tests passing immediately prove nothing
680
- - Do not say "already manually tested" — ad-hoc is not systematic
681
- - Do not say "partial check is enough" — partial proves nothing
682
- - Do not say "I'm confident" — confidence is not evidence
683
- - Do not say "existing code has no tests" — you are improving it, add tests
684
- - Do not write code before tests — delete it and start with tests
685
-
686
- ### Pass/Fail Evidence Format
687
- - Command output with exit code 0
688
- - Quantitative result matching expected value
689
- - Screenshot comparison (for visual tasks)
690
-
691
- ---
692
-
693
- ## Verification Layers (ALL required sections — TODO in required layer = Verifier FAIL)
694
-
695
- ### L1: Unit Test (REQUIRED)
696
- \`\`\`bash
697
- # TODO — unit test command (e.g., pytest, jest, go test)
698
- \`\`\`
699
-
700
- ### L2: Integration (required if external services exist, otherwise "N/A — reason")
701
- \`\`\`bash
702
- # TODO — integration test command, or write: N/A — no external services (pure computation/transformation)
703
- \`\`\`
704
-
705
- ### L3: E2E Simulation (REQUIRED)
706
- Known input → full pipeline → quantitative output comparison.
707
- Must cover ALL AC types: happy path + boundary + error path.
708
- - **Happy path input**: TODO (specific test data)
709
- - **Happy path expected output**: TODO (quantitative value)
710
- - **Happy path command**:
711
- \`\`\`bash
712
- # TODO — E2E happy path command
713
- \`\`\`
714
- - **Error path input**: TODO (invalid/boundary input that triggers error)
715
- - **Error path expected**: TODO (error type + non-zero exit code)
716
- - **Error path command**:
717
- \`\`\`bash
718
- # TODO — E2E error path command (expected exit ≠ 0)
719
- \`\`\`
720
-
721
- ### L4: Deploy Verification (required if deploying, otherwise "N/A — reason")
722
- \`\`\`bash
723
- # TODO — deploy verification command, or write: N/A — no deployment (library/tool, local-only change)
724
- \`\`\`
725
-
726
- ---
727
-
728
- ## Mutation Testing Gate (CRITICAL risk only)
729
- - Required: only for CRITICAL risk classification (governance §1c)
730
- - Tool: TODO (e.g., mutmut, Stryker, go-mutesting) or "N/A — not CRITICAL risk"
731
- - Target: >= 60% mutation score on core business logic (project default; override in PRD if justified)
732
- - Scope: core business logic files (not config/tests/docs)
733
- - Command:
734
- \`\`\`bash
735
- # TODO — mutation testing command, or write: N/A — not CRITICAL risk
736
- \`\`\`
737
-
738
- ---
739
-
740
- ## Test Quality Checklist (Verifier checks these)
741
- - [ ] Tests verify behavior, not implementation details
742
- - [ ] Each test has meaningful assertions (not just "no error thrown")
743
- - [ ] Boundary cases covered (empty, max, zero, null, concurrent)
744
- - [ ] No tautological tests (expected value copied from implementation)
745
- - [ ] Mock usage limited to external boundaries only
746
- - [ ] No test-specific logic in production code
747
- - [ ] Each AC has >= 3 tests (happy + negative + boundary) per IL-4
748
-
749
- ## Traceability Matrix (Worker fills during implementation)
750
-
751
- | US | AC | Test File :: Function | Layer | Evidence | Status |
752
- |----|----|----------------------|-------|----------|--------|
753
- | US-001 | AC1 | TODO | L1 | TODO | pending |
754
-
755
- ---
756
-
757
- ## Code Quality Gates (defaults — override in PRD with justification)
758
- - **Code duplication**: <= 3% (project-appropriate tool, e.g., jscpd, pylint, sonar)
759
- - **Mock ratio**: mock-based assertions <= 30% of total assertions
760
- - **Cyclomatic complexity**: <= 10 per function
761
- - **Function length**: <= 50 lines per function
762
- - **File length**: <= 800 lines per file
763
-
764
- ---
765
-
766
- ## Reproducibility Gate
767
- - [ ] Lock file exists and committed (package-lock.json, poetry.lock, go.sum, etc.) or "N/A — no external dependencies"
768
- - [ ] Clean install succeeds (npm ci, pip install, etc.) or "N/A — no external dependencies"
769
- - [ ] Security scan passes (or known vulnerabilities documented and acknowledged in PRD) or "N/A — no dependencies"
770
- - [ ] Environment variables documented (.env.example or equivalent) or "N/A — no env vars"
771
-
772
- ---
773
-
774
- ## Criteria → Verification Mapping
775
-
776
- | US | AC | Layer | Method | Command | Expected Output | Pass Criteria |
777
- |----|----|-------|--------|---------|-----------------|---------------|
778
- | US-001 | AC1 | L1 | TODO | TODO | TODO | TODO |
779
- EOF
780
- echo " + $F"
781
- else echo " · $F"; fi
782
-
783
- # Split test-spec into per-US files (no-op with warning if no US section markers)
784
- split_test_spec_by_us "$DESK/plans/test-spec-$SLUG.md" "$SLUG"
785
-
786
- # --- .gitignore for runtime artifacts ---
787
- GITIGNORE="$ROOT/.gitignore"
788
- MARKER="# RLP Desk runtime artifacts"
789
- if [[ -f "$GITIGNORE" ]]; then
790
- if ! grep -qF "$MARKER" "$GITIGNORE"; then
791
- echo "" >> "$GITIGNORE"
792
- cat >> "$GITIGNORE" <<'GIEOF'
793
- # RLP Desk runtime artifacts
794
- .claude/ralph-desk/
795
- GIEOF
796
- echo " + .gitignore (rlp-desk rules appended)"
797
- else
798
- echo " · .gitignore (rlp-desk rules already present)"
799
- fi
800
- else
801
- cat > "$GITIGNORE" <<'GIEOF'
802
- # RLP Desk runtime artifacts
803
- .claude/ralph-desk/
804
- GIEOF
805
- echo " + .gitignore (created with rlp-desk rules)"
806
- fi
807
-
808
- # --- Claude Code sensitive-file permissions for .claude/ralph-desk/ ---
809
- # Worker/Verifier need Read/Edit/Write access to .claude/ralph-desk/ files.
810
- # --dangerously-skip-permissions does NOT cover "sensitive file" access for .claude/ paths.
811
- # Without these, every file operation triggers an interactive permission prompt that blocks automation.
812
- SETTINGS_FILE="$ROOT/.claude/settings.local.json"
813
- PERM_MARKER="Read(.claude/ralph-desk/**)"
814
-
815
- if [[ -f "$SETTINGS_FILE" ]] && grep -qF "$PERM_MARKER" "$SETTINGS_FILE" 2>/dev/null; then
816
- echo " · .claude/settings.local.json (rlp-desk permissions already present)"
817
- else
818
- PERMS='["Read(.claude/ralph-desk/**)", "Edit(.claude/ralph-desk/**)", "Write(.claude/ralph-desk/**)"]'
819
-
820
- if [[ -f "$SETTINGS_FILE" ]]; then
821
- if command -v jq &>/dev/null; then
822
- jq --argjson perms "$PERMS" '
823
- .permissions //= {} |
824
- .permissions.allow //= [] |
825
- .permissions.allow += ($perms - .permissions.allow)
826
- ' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE"
827
- echo " + .claude/settings.local.json (rlp-desk permissions merged)"
828
- else
829
- echo " ⚠ jq not found. Add to .claude/settings.local.json manually:"
830
- echo " permissions.allow: Read/Edit/Write(.claude/ralph-desk/**)"
831
- fi
832
- else
833
- mkdir -p "$(dirname "$SETTINGS_FILE")"
834
- cat > "$SETTINGS_FILE" <<'SETEOF'
835
- {
836
- "permissions": {
837
- "allow": [
838
- "Read(.claude/ralph-desk/**)",
839
- "Edit(.claude/ralph-desk/**)",
840
- "Write(.claude/ralph-desk/**)"
841
- ]
842
- }
843
- }
844
- SETEOF
845
- echo " + .claude/settings.local.json (created with rlp-desk permissions)"
846
- fi
847
- echo ""
848
- echo " NOTE: Added Read/Edit/Write permissions for .claude/ralph-desk/ to"
849
- echo " .claude/settings.local.json (local, not committed to git)."
850
- echo " This prevents Worker/Verifier from being blocked by Claude Code's"
851
- echo " sensitive-file prompts during automated loop execution."
852
- echo " See: https://github.com/ai-dev-methodologies/rlp-desk#project-structure"
853
- fi
854
-
855
- # --- Post-init validation gate ---
856
- INIT_FAIL=0
857
- for REQUIRED_FILE in \
858
- "$DESK/prompts/$SLUG.worker.prompt.md" \
859
- "$DESK/prompts/$SLUG.verifier.prompt.md" \
860
- "$DESK/context/$SLUG-latest.md" \
861
- "$DESK/memos/$SLUG-memory.md" \
862
- "$DESK/plans/prd-$SLUG.md" \
863
- "$DESK/plans/test-spec-$SLUG.md"; do
864
- if [[ ! -f "$REQUIRED_FILE" ]]; then
865
- echo " ✗ MISSING: $REQUIRED_FILE"
866
- INIT_FAIL=1
867
- fi
868
- done
869
- if [[ $INIT_FAIL -eq 1 ]]; then
870
- echo ""
871
- echo "ERROR: Scaffold incomplete. Some required files were not created."
872
- echo "Re-run init or check filesystem permissions."
873
- exit 1
874
- fi
875
-
876
- echo ""
877
- echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
878
- echo "Scaffold ready: $SLUG"
879
- echo ""
880
- echo "Next:"
881
- echo " 1. Edit PRD: $DESK/plans/prd-$SLUG.md"
882
- echo " 2. Edit test spec: $DESK/plans/test-spec-$SLUG.md"
883
- echo " 3. Run (copy a command below):"
884
- echo ""
885
- print_run_presets "$SLUG"