@ai-dev-methodologies/rlp-desk 0.14.3 → 0.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1309 @@
1
+ #!/bin/zsh
2
+ set -euo pipefail
3
+
4
+ # =============================================================================
5
+ # Ralph Desk Project Initializer for Claude Code
6
+ #
7
+ # User-level tool: ~/.claude/ralph-desk/init_ralph_desk.zsh
8
+ # Creates project-local scaffold in: .rlp-desk/ (v0.13.0+; auto-migrates from
9
+ # legacy .claude/ralph-desk/ to avoid Claude Code's hardcoded sensitive-file
10
+ # policy that hung worker sentinel writes).
11
+ #
12
+ # Usage:
13
+ # ~/.claude/ralph-desk/init_ralph_desk.zsh <slug> [objective] [--mode fresh|improve]
14
+ # =============================================================================
15
+
16
+ SLUG="${1:?Usage: $0 <slug> [objective] [--mode fresh|improve] [--verify-mode per-us|batch] [--server-cmd CMD] [--server-port PORT] [--server-health URL]}"
17
+ MODE=""
18
+ OBJECTIVE="TBD - fill in the objective"
19
+ SERVER_CMD=""
20
+ SERVER_PORT=""
21
+ SERVER_HEALTH=""
22
+ # --verify-mode is parsed here so the PRD cross-US lint matches the mode the
23
+ # user actually plans to run with. Falls back to the VERIFY_MODE env var (which
24
+ # the wrapper may already export) and finally to the per-us default.
25
+ VERIFY_MODE_ARG=""
26
+
27
+ # Parse remaining arguments
28
+ shift
29
+ while [[ $# -gt 0 ]]; do
30
+ case "$1" in
31
+ --mode)
32
+ MODE="${2:?--mode requires an argument: fresh|improve}"
33
+ shift 2
34
+ ;;
35
+ --mode=*)
36
+ MODE="${1#--mode=}"
37
+ shift
38
+ ;;
39
+ --verify-mode)
40
+ VERIFY_MODE_ARG="${2:?--verify-mode requires an argument: per-us|batch}"
41
+ shift 2
42
+ ;;
43
+ --verify-mode=*)
44
+ VERIFY_MODE_ARG="${1#--verify-mode=}"
45
+ shift
46
+ ;;
47
+ --server-cmd)
48
+ SERVER_CMD="${2:?--server-cmd requires a command}"
49
+ shift 2
50
+ ;;
51
+ --server-cmd=*)
52
+ SERVER_CMD="${1#--server-cmd=}"
53
+ shift
54
+ ;;
55
+ --server-port)
56
+ SERVER_PORT="${2:?--server-port requires a port number}"
57
+ shift 2
58
+ ;;
59
+ --server-port=*)
60
+ SERVER_PORT="${1#--server-port=}"
61
+ shift
62
+ ;;
63
+ --server-health)
64
+ SERVER_HEALTH="${2:?--server-health requires a URL}"
65
+ shift 2
66
+ ;;
67
+ --server-health=*)
68
+ SERVER_HEALTH="${1#--server-health=}"
69
+ shift
70
+ ;;
71
+ *)
72
+ OBJECTIVE="$1"
73
+ shift
74
+ ;;
75
+ esac
76
+ done
77
+
78
+ ROOT="${ROOT:-$PWD}"
79
+ # v0.13.0: project-local runtime moved out of .claude/ to avoid Claude Code's
80
+ # hardcoded sensitive policy that hung worker sentinel writes. Honor
81
+ # RLP_DESK_RUNTIME_DIR env override so future platform changes can be dodged
82
+ # without a release.
83
+ DESK="$ROOT/${RLP_DESK_RUNTIME_DIR:-.rlp-desk}"
84
+ RUNNER_DIR="$(cd "$(dirname "$0")" && pwd)"
85
+
86
+ # v0.13.0: legacy .claude/ralph-desk/ auto-migration on init.
87
+ LEGACY_DESK="$ROOT/.claude/ralph-desk"
88
+ if [[ -d "$LEGACY_DESK" ]]; then
89
+ if [[ -d "$DESK" ]]; then
90
+ echo "ERROR: both directories exist (legacy=$LEGACY_DESK, new=$DESK)." >&2
91
+ echo "Remove one before re-running init." >&2
92
+ exit 1
93
+ fi
94
+ echo "[v0.13.0] migrating $LEGACY_DESK -> $DESK"
95
+ mkdir -p "$(dirname "$DESK")"
96
+ mv "$LEGACY_DESK" "$DESK"
97
+ fi
98
+
99
+ # --- Re-execution versioning helpers ---
100
+ # Handles ONLY debug.log and campaign-report.md versioning.
101
+ # SV reports use their own -NNN auto-increment pattern and are NOT handled here.
102
+
103
+ detect_next_version() {
104
+ local file_path="$1"
105
+ local dir base ext n=1
106
+ dir="$(dirname "$file_path")"
107
+ base="$(basename "$file_path")"
108
+ if [[ "$base" == *.* ]]; then
109
+ ext=".${base##*.}"
110
+ base="${base%.*}"
111
+ else
112
+ ext=""
113
+ fi
114
+ while [[ -f "$dir/${base}-v${n}${ext}" ]]; do
115
+ (( n++ ))
116
+ done
117
+ echo "$n"
118
+ }
119
+
120
+ version_file() {
121
+ local file_path="$1"
122
+ if [[ -f "$file_path" ]]; then
123
+ local n dir base ext
124
+ n="$(detect_next_version "$file_path")"
125
+ dir="$(dirname "$file_path")"
126
+ base="$(basename "$file_path")"
127
+ if [[ "$base" == *.* ]]; then
128
+ ext=".${base##*.}"
129
+ base="${base%.*}"
130
+ else
131
+ ext=""
132
+ fi
133
+ mv "$file_path" "$dir/${base}-v${n}${ext}"
134
+ echo " Versioned: $(basename "$file_path") → ${base}-v${n}${ext}"
135
+ fi
136
+ # Non-existent files silently skipped (no error)
137
+ }
138
+
139
+ # --- PRD/test-spec per-US splitting helpers ---
140
+
141
+ split_prd_by_us() {
142
+ local prd_file="$1"
143
+ local slug="$2"
144
+ local plans_dir
145
+ plans_dir="$(dirname "$prd_file")"
146
+
147
+ [[ -f "$prd_file" ]] || return 0
148
+
149
+ local us_count
150
+ us_count=$(grep -c "^### US-" "$prd_file" 2>/dev/null) || us_count=0
151
+ if [[ "$us_count" -eq 0 ]]; then
152
+ echo " WARNING: No US markers (### US-NNN:) found in PRD — falling back to full PRD injection" >&2
153
+ # Clean up any stale per-US split files from previous runs to prevent stale artifacts
154
+ local stale_count=0
155
+ for stale in "$plans_dir"/prd-"$slug"-US-*.md(N); do
156
+ rm "$stale"; stale_count=$(( stale_count + 1 ))
157
+ done
158
+ [[ $stale_count -gt 0 ]] && echo " Cleaned $stale_count stale prd per-US file(s)"
159
+ return 0
160
+ fi
161
+
162
+ awk -v dir="$plans_dir" -v slug="$slug" '
163
+ /^### US-[0-9]+:/ {
164
+ if (out != "") close(out)
165
+ match($0, /US-[0-9]+/)
166
+ us_id = substr($0, RSTART, RLENGTH)
167
+ out = dir "/prd-" slug "-" us_id ".md"
168
+ }
169
+ out != "" { print > out }
170
+ ' "$prd_file"
171
+
172
+ local count
173
+ count=$(ls "$plans_dir"/prd-"$slug"-US-*.md 2>/dev/null | wc -l | tr -d ' ')
174
+ echo " Split PRD: $count per-US files"
175
+ }
176
+
177
+ split_test_spec_by_us() {
178
+ local ts_file="$1"
179
+ local slug="$2"
180
+ local plans_dir
181
+ plans_dir="$(dirname "$ts_file")"
182
+
183
+ [[ -f "$ts_file" ]] || return 0
184
+
185
+ local us_count
186
+ us_count=$(grep -c "^## US-" "$ts_file" 2>/dev/null) || us_count=0
187
+ if [[ "$us_count" -eq 0 ]]; then
188
+ echo " WARNING: No US section markers (## US-NNN:) in test-spec — skipping split" >&2
189
+ # Clean up any stale per-US test-spec files from previous runs
190
+ for stale in "$plans_dir"/test-spec-"$slug"-US-*.md(N); do
191
+ rm "$stale"
192
+ done
193
+ return 0
194
+ fi
195
+
196
+ # Extract global header (everything before first ## US- section, e.g. Verification Commands)
197
+ local header_tmp="${plans_dir}/test-spec-${slug}-header.tmp.$$"
198
+ awk '/^## US-[0-9]+:/{exit} {print}' "$ts_file" > "$header_tmp"
199
+
200
+ awk -v dir="$plans_dir" -v slug="$slug" '
201
+ /^## US-[0-9]+:/ {
202
+ if (out != "") close(out)
203
+ match($0, /US-[0-9]+/)
204
+ us_id = substr($0, RSTART, RLENGTH)
205
+ out = dir "/test-spec-" slug "-" us_id ".md"
206
+ }
207
+ out != "" { print > out }
208
+ ' "$ts_file"
209
+
210
+ # Prepend global header (Verification Commands etc.) to each split file
211
+ for split_file in "$plans_dir"/test-spec-"$slug"-US-*.md; do
212
+ [[ -f "$split_file" ]] || continue
213
+ local tmp="${split_file}.tmp.$$"
214
+ cat "$header_tmp" "$split_file" > "$tmp" && mv "$tmp" "$split_file"
215
+ done
216
+ rm -f "$header_tmp"
217
+
218
+ local count
219
+ count=$(ls "$plans_dir"/test-spec-"$slug"-US-*.md 2>/dev/null | wc -l | tr -d ' ')
220
+ echo " Split test-spec: $count per-US files (with global header)"
221
+ }
222
+
223
+ # --- Run command presets ---
224
+ # Detects codex CLI availability and shows appropriate run command presets.
225
+ # AC1: codex installed → cross-engine preset first, spark Pro, claude-only, basic
226
+ # AC2: codex not installed → tmux + claude-only first, install recommendation
227
+ # AC3: full options reference with defaults always shown
228
+ print_run_presets() {
229
+ local slug="$1"
230
+ local codex_available=0
231
+ command -v codex &>/dev/null && codex_available=1
232
+
233
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
234
+ echo "Available run commands (copy the one you want):"
235
+ echo ""
236
+ if [[ $codex_available -eq 1 ]]; then
237
+ echo "# Recommended: cross-engine + final-consensus (full context + blind-spot coverage):"
238
+ echo "/rlp-desk run $slug --mode tmux --worker-model gpt-5.5:medium --consensus final-only --debug"
239
+ echo ""
240
+ echo "# Small tasks only (single-file, AC <= 4, simple logic — spark 100k context limit):"
241
+ echo "/rlp-desk run $slug --mode tmux --worker-model spark:high --consensus final-only --debug"
242
+ echo ""
243
+ echo "# Critical (full consensus on every verify):"
244
+ echo "/rlp-desk run $slug --mode tmux --worker-model gpt-5.5:high --consensus all --debug"
245
+ echo ""
246
+ echo "# Claude-only:"
247
+ echo "/rlp-desk run $slug --debug"
248
+ else
249
+ echo "# Recommended: tmux mode + claude-only (real-time visibility):"
250
+ echo "/rlp-desk run $slug --mode tmux --debug"
251
+ echo ""
252
+ echo "# Agent mode:"
253
+ echo "/rlp-desk run $slug --debug"
254
+ echo ""
255
+ echo "# Install codex for cost savings + cross-engine blind-spot coverage:"
256
+ echo "npm install -g @openai/codex"
257
+ fi
258
+ echo ""
259
+ echo "# Full options reference:"
260
+ echo "# --mode agent|tmux (default: agent)"
261
+ echo "# --worker-model MODEL haiku|sonnet|opus or gpt-5.5:high|spark:high (default: haiku)"
262
+ echo "# --lock-worker-model disable auto model upgrade"
263
+ echo "# --verifier-model MODEL per-US verifier (default: sonnet)"
264
+ echo "# --final-verifier-model MODEL final ALL verifier (default: opus)"
265
+ echo "# --consensus off|all|final-only cross-engine consensus (default: off)"
266
+ echo "# --consensus-model MODEL per-US cross-verifier (default: gpt-5.5:medium)"
267
+ echo "# --final-consensus-model MODEL final cross-verifier (default: gpt-5.5:high)"
268
+ echo "# --verify-mode per-us|batch (default: per-us)"
269
+ echo "# --cb-threshold N (default: 6)"
270
+ echo "# --max-iter N (default: 100)"
271
+ echo "# --iter-timeout N tmux only (default: 600)"
272
+ echo "# --debug debug logging"
273
+ echo "# --with-self-verification post-campaign SV report"
274
+ echo "# --flywheel off|on-fail direction review on fail (default: off)"
275
+ echo "# --flywheel-model MODEL flywheel reviewer model (default: opus)"
276
+ echo "# --flywheel-guard off|on guard validates flywheel decisions (default: off)"
277
+ echo "# --flywheel-guard-model MODEL guard reviewer model (default: opus)"
278
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
279
+ }
280
+
281
+ echo "Initializing Ralph Desk: $SLUG"
282
+ echo " Root: $ROOT"
283
+ echo " Desk: $DESK"
284
+ [[ -n "$MODE" ]] && echo " Mode: $MODE"
285
+ echo ""
286
+
287
+ mkdir -p "$DESK/prompts" "$DESK/context" "$DESK/memos" "$DESK/plans" "$DESK/logs/$SLUG"
288
+
289
+ # --- Re-execution lifecycle (--mode handling) ---
290
+ PRD_FILE="$DESK/plans/prd-$SLUG.md"
291
+ LOGS_DIR="$DESK/logs/$SLUG"
292
+
293
+ # No-PRD fallback: --mode provided but no PRD exists yet → treat as first-run.
294
+ # Print a note so the user knows the requested mode was ignored, reset MODE so
295
+ # the re-execution lifecycle below is skipped, and let the rest of the script
296
+ # scaffold a fresh PRD template alongside the other prompt/test-spec files.
297
+ if [[ -n "$MODE" ]] && [[ ! -f "$PRD_FILE" ]]; then
298
+ echo "Note: --mode $MODE provided but no PRD found at $PRD_FILE — treating as first-run."
299
+ MODE=""
300
+ fi
301
+
302
+ if [[ -n "$MODE" ]]; then
303
+ echo "Re-execution mode: --mode $MODE"
304
+ echo ""
305
+
306
+ DELETED_COUNT=0
307
+
308
+ # Version debug.log and campaign-report.md (NOT self-verification-report — uses -NNN)
309
+ version_file "$LOGS_DIR/debug.log"
310
+ version_file "$LOGS_DIR/campaign-report.md"
311
+
312
+ # Delete iter-* artifacts (archived done-claims, verdicts, prompt logs, results)
313
+ for f in "$LOGS_DIR"/iter-*(N); do
314
+ [[ -f "$f" ]] && { rm "$f"; (( ++DELETED_COUNT )); }
315
+ done
316
+
317
+ # US-022 R10 P2-J: quarantine cross-mission stale iter-signal.json before
318
+ # the normal reset deletes it. Cleanup (.sisyphus/quarantine/) preserves the
319
+ # foreign signal so the operator can recover it; only signals whose us_id is
320
+ # absent from the current PRD are quarantined.
321
+ local _r10_signal="$DESK/memos/$SLUG-iter-signal.json"
322
+ local _r10_prd="$DESK/plans/prd-$SLUG.md"
323
+ if [[ -f "$_r10_signal" ]]; then
324
+ _quarantine_stale_signal "$_r10_signal" "$_r10_prd" "$DESK" 2>/dev/null || true
325
+ fi
326
+
327
+ # Delete runtime memos
328
+ for f in \
329
+ "$DESK/memos/$SLUG-done-claim.json" \
330
+ "$DESK/memos/$SLUG-iter-signal.json" \
331
+ "$DESK/memos/$SLUG-verify-verdict.json" \
332
+ "$DESK/memos/$SLUG-complete.md" \
333
+ "$DESK/memos/$SLUG-blocked.md" \
334
+ "$DESK/memos/$SLUG-flywheel-signal.json" \
335
+ "$DESK/memos/$SLUG-flywheel-review.md" \
336
+ "$DESK/memos/$SLUG-flywheel-guard-verdict.json"; do
337
+ [[ -f "$f" ]] && { rm "$f"; (( ++DELETED_COUNT )); }
338
+ done
339
+
340
+ # Delete status.json, baseline.log, cost-log.jsonl
341
+ for f in "$LOGS_DIR/runtime/status.json" "$LOGS_DIR/status.json" "$LOGS_DIR/baseline.log" "$LOGS_DIR/cost-log.jsonl"; do
342
+ [[ -f "$f" ]] && { rm "$f"; (( ++DELETED_COUNT )); }
343
+ done
344
+
345
+ # Delete test-spec only for fresh re-execution mode; improve preserves custom edits
346
+ # and reruns split logic on the existing file.
347
+ for f in \
348
+ "$DESK/plans/test-spec-$SLUG.md" \
349
+ "$DESK/prompts/$SLUG.worker.prompt.md" \
350
+ "$DESK/prompts/$SLUG.verifier.prompt.md" \
351
+ "$DESK/prompts/$SLUG.flywheel.prompt.md" \
352
+ "$DESK/prompts/$SLUG.flywheel-guard.prompt.md"; do
353
+ [[ -f "$f" ]] &&
354
+ if [[ "$MODE" == "fresh" ]] || [[ "$f" != "$DESK/plans/test-spec-$SLUG.md" ]]; then
355
+ rm "$f"; (( ++DELETED_COUNT ));
356
+ fi
357
+ done
358
+
359
+ # Reset memory and context to fresh templates (rm here; scaffold below regenerates them)
360
+ rm -f "$DESK/memos/$SLUG-memory.md" "$DESK/context/$SLUG-latest.md"
361
+
362
+ # PRD handling: --mode fresh deletes PRD; --mode improve preserves PRD in-place
363
+ if [[ "$MODE" == "fresh" ]]; then
364
+ [[ -f "$PRD_FILE" ]] && { rm "$PRD_FILE"; (( ++DELETED_COUNT )); echo " Deleted: prd-$SLUG.md (--mode fresh: PRD deleted for fresh start)"; }
365
+ fi
366
+
367
+ # Re-execution summary
368
+ echo " Re-execution summary:"
369
+ if [[ "$MODE" == "improve" ]]; then
370
+ echo " Preserved: prd-$SLUG.md (--mode improve: PRD kept in-place)"
371
+ fi
372
+ echo " Deleted: $DELETED_COUNT runtime artifacts"
373
+ echo " Reset: memory.md + context.md (regenerating from templates)"
374
+ echo ""
375
+ fi
376
+
377
+ # --- Worker Prompt ---
378
+ F="$DESK/prompts/$SLUG.worker.prompt.md"
379
+ if [[ ! -f "$F" ]]; then
380
+ cat > "$F" <<EOF
381
+ Execute the plan for $SLUG.
382
+
383
+ ## Coding Principles (applies to ALL work in this iteration)
384
+
385
+ 1. Think Before Coding
386
+ Don't assume. Don't hide confusion. Surface tradeoffs.
387
+ - State assumptions explicitly. If uncertain, signal blocked with your options
388
+ listed — do not guess.
389
+ - If multiple interpretations exist, present them in blocked signal — do not
390
+ pick silently.
391
+ - If a simpler approach exists, note it in your plan.
392
+ - If something important is unclear, stop and name what is confusing.
393
+
394
+ 2. Simplicity First
395
+ Minimum code that solves the problem. Nothing speculative.
396
+ - No features beyond what was asked.
397
+ - No abstractions for single-use code.
398
+ - No configurability that was not specified.
399
+ - No defensive handling for implausible scenarios unless the context requires it.
400
+ - If 200 lines could be 50, rewrite it.
401
+ Ask: "Would a strong senior engineer call this overcomplicated?" If yes, simplify.
402
+
403
+ 3. Surgical Changes
404
+ Touch only what you must. Clean up only your own mess.
405
+ - Do not improve adjacent code, comments, or formatting unless required by the task.
406
+ - Do not refactor unrelated code.
407
+ - Match the local style unless there is a compelling reason not to.
408
+ - If unrelated dead code is noticed, mention it in done-claim — do not delete it.
409
+ - Remove imports, variables, or functions that YOUR changes made unused.
410
+ - Do not remove pre-existing dead code.
411
+ Test: every changed line should trace directly to the contract.
412
+
413
+ 4. Goal-Driven Execution
414
+ Define success criteria. Loop until verified.
415
+ These principles are enforced by the TDD Mandate and Planning step below.
416
+ If success criteria for any AC are unclear, signal blocked.
417
+
418
+ ## Planning (before writing any code)
419
+ After reading all files, BEFORE writing any test or code:
420
+ 1. List the specific files you will create or modify
421
+ 2. For each AC in the contract, state your approach in 1 sentence
422
+ 3. Identify ordering constraints (which AC depends on which)
423
+ 4. Record as first execution_step: {"step": "plan", "ac_id": "all", "command": null, "exit_code": null, "summary": "Plan: [files], [approach], [order]"}
424
+ Keep planning lightweight — 1-2 sentences per AC, not a detailed analysis.
425
+ If the plan reveals the contract is unclear or infeasible, signal "blocked" immediately.
426
+
427
+ ## Before you start
428
+ Read these files in order:
429
+ 1. Campaign Memory: $DESK/memos/$SLUG-memory.md → Next Iteration Contract is your mission
430
+ 2. PRD: $DESK/plans/prd-$SLUG.md → acceptance criteria
431
+ 3. Test Spec: $DESK/plans/test-spec-$SLUG.md → verification methods
432
+ 4. Latest Context: $DESK/context/$SLUG-latest.md → current state
433
+
434
+ ## TDD MANDATE (hard constraint — violation = automatic FAIL)
435
+ > Write failing tests FIRST → confirm RED (exit_code=1) → implement minimum code → confirm GREEN.
436
+ > Every NEW AC requires: write_test → verify_red → implement → verify_green in execution_steps.
437
+ > No exceptions. Verifier rejects missing RED evidence. For already-passing ACs, use verify_existing.
438
+
439
+ ## SCOPE LOCK (hard constraint — violation causes verification failure)
440
+ - You MUST only implement the work described in the "Next Iteration Contract" from campaign memory.
441
+ - If the contract says "implement US-001 only", do ONLY that. Do NOT touch other stories.
442
+ - If the contract says "implement all remaining stories", you may do all of them.
443
+ - Do NOT go beyond the contracted scope, even if you can see more work in the PRD.
444
+ - No file creation or modification outside the project root.
445
+ - Do not modify this prompt file or any PRD/test-spec files.
446
+ - **Lane discipline (governance §7e)**: PRD, test-spec, and campaign memory
447
+ are leader/owner artifacts. Worker MUST NOT edit them directly. Drift on
448
+ these files triggers a `lane_violation_warning` event in default mode, or
449
+ a sentinel BLOCKED with `infra_failure` + `recoverable=true` in `--lane-strict` mode.
450
+
451
+ ## Forbidden Shortcuts (Verifier will check these)
452
+ - Do not mock external services when L2 integration test is required by test-spec.
453
+ - Do not delete or weaken existing assertions to make tests pass.
454
+ - Do not skip boundary cases listed in the PRD.
455
+ - Do not write code before tests — if you did, delete it and start with tests.
456
+ - **NEVER modify rlp-desk infrastructure files** (~/.claude/ralph-desk/*, ~/.claude/commands/rlp-desk.md). If you discover a bug in rlp-desk itself, report it in done-claim.json with {"status": "blocked", "reason": "rlp-desk bug: <description>"} and signal blocked. Do NOT attempt to fix rlp-desk — it is the orchestration tool, not your project code.
457
+ - **NEVER modify Claude Code settings** (~/.claude/settings.json, .claude/settings.local.json, or any settings files). Do NOT add permissions, change models, or alter configuration. If a permission prompt blocks you, report it as blocked — do NOT try to edit settings to bypass it.
458
+
459
+ ## When Stuck (do NOT guess-and-fix)
460
+ > 1. STOP and READ the error. Trace the call stack. Identify the root cause before touching code.
461
+ > 2. Write a minimal test that reproduces the failure, then fix the root cause only.
462
+ > 3. If 3+ fixes fail on the same issue, signal "blocked" with your diagnosis.
463
+
464
+ ## Iteration rules
465
+ - Use fresh context only; do NOT depend on prior chat history.
466
+ - Execute exactly the work specified in the Next Iteration Contract.
467
+ - Refresh context file with the current frontier.
468
+ - Rewrite campaign memory in full.
469
+ - When rewriting campaign memory, PRESERVE the Key Decisions and Patterns Discovered sections from prior iterations — append new entries, do not erase existing ones.
470
+ - Write evidence artifacts.
471
+ - **After writing tests, update test-spec Criteria Mapping with actual test file paths and function names** (replace placeholder -k filters).
472
+ - Ensure **each AC has >= 3 tests** (happy + negative + boundary). Do not just meet the total count — distribute evenly per AC.
473
+ - **Commit all changes when the iteration is complete** (include iteration number and story ID in commit message).
474
+
475
+ MANDATORY: When done with this iteration, write the following signal file:
476
+ - Path: $DESK/memos/$SLUG-iter-signal.json
477
+ - Format: {"iteration": N, "status": "continue|verify|verify_partial|blocked", "us_id": "US-NNN or null", "summary": "what was done", "timestamp": "ISO"}
478
+ - Status values:
479
+ - "continue" = current action done but more work remains (no verify needed yet)
480
+ - "verify" = current US complete + done-claim written → Verifier checks this US
481
+ - "verify_partial" = subset of ACs verified in this iteration. Required fields: "verified_acs": ["AC1","AC2"], "deferred_acs": ["AC3"], "defer_reason": "<why deferred>". Verifier evaluates only verified_acs; deferred_acs queue for next iter.
482
+ - "blocked" = autonomous blocker
483
+
484
+ ## Signal rules (per-US verification)
485
+ - After completing EACH user story → signal "verify" with "us_id" set to the story you just finished (e.g., "US-001").
486
+ - The Verifier will check ONLY that story's acceptance criteria.
487
+ - After ALL stories individually pass verification → signal "verify" with "us_id": "ALL" for a final full verify of all AC.
488
+ - Do NOT signal "continue" when a US is done — always signal "verify" per US.
489
+ - Signal "continue" ONLY when you have more work to do within the same US (e.g., a multi-step task).
490
+
491
+ ## Step N+1 (MANDATORY — US-017 R5 P0-D)
492
+ After done-claim.json, you MUST also write iter-signal.json with SPECIFIC summary including verified ACs and key evidence paths (e.g., "US-001: AC1+AC2 verified via tests/test_us001_ac1.py:test_happy_path; AC3 implementation in src/foo.py:42").
493
+ The auto-generated A4 fallback summary ("auto-generated by A4 fallback (done-claim without signal)") is a debugging context loss and triggers verifier-side WARN with meta.iter_signal_quality='auto_generated'. Per-mission A4 fallback ratio < 10% is required (governance §1f).
494
+
495
+ ## Blocked exit hygiene (MANDATORY — US-020 R8 P1-H)
496
+ On blocked exit (status=blocked), BEFORE writing iter-signal.json you MUST:
497
+ 1. Append to memory.md § Blocking History an entry: `{iter, us, reason, suggested_repair}`. The next iteration must be able to read why the previous one blocked without re-running the worker.
498
+ 2. Update latest.md § Known Issues with the same context so the Frontier section reflects the blocker.
499
+ The runner verifies memory.md and latest.md mtimes against the sentinel write time. If either file is older than 5 minutes when the sentinel is written, the JSON sidecar's `meta.blocked_hygiene_violated=true` flag is set automatically and an analytics event is emitted (governance §1f, 5th channel).
500
+
501
+ ## Done Claim Format
502
+ When writing done-claim JSON, ALWAYS include execution_steps — what you did, in what order, with evidence:
503
+ \`\`\`json
504
+ {
505
+ "us_id": "US-NNN",
506
+ "claims": ["AC1: ...", "AC2: ..."],
507
+ "execution_steps": [
508
+ {"step": "write_test", "ac_id": "AC1", "command": null, "summary": "wrote tests/test_add.py with 3 tests"},
509
+ {"step": "verify_red", "ac_id": "AC1", "command": "pytest tests/...", "exit_code": 1, "summary": "RED: test fails as expected"},
510
+ {"step": "implement", "ac_id": "AC1", "command": null, "summary": "created add() function"},
511
+ {"step": "verify_green", "ac_id": "AC1", "command": "pytest tests/...", "exit_code": 0, "summary": "GREEN: 3 passed"},
512
+ {"step": "verify_e2e", "ac_id": "AC1", "command": "python -c '...'", "exit_code": 0, "summary": "E2E output matches expected"},
513
+ {"step": "commit", "ac_id": "AC1", "command": "git commit ...", "exit_code": 0, "summary": "committed abc1234"}
514
+ ]
515
+ }
516
+ \`\`\`
517
+ This is NOT optional. Every done-claim must include the steps you took and the evidence for each.
518
+ execution_steps MUST be a JSON array of objects (not a dict with string keys). Each object MUST have: "step", "ac_id", "command", "exit_code", "summary".
519
+
520
+ ## Stop behavior
521
+ - Single US achieved → write done-claim JSON to $DESK/memos/$SLUG-done-claim.json with the specific US, signal verify, exit
522
+ - All US achieved → write done-claim JSON with all US, signal verify with us_id "ALL", exit
523
+ - Autonomous blocker → write to $DESK/memos/$SLUG-blocked.md, exit
524
+ - Otherwise → set stop=continue, define next iteration contract in memory, exit
525
+
526
+ ## Objective
527
+ $OBJECTIVE
528
+ EOF
529
+
530
+ # Inject operational context if server options provided
531
+ if [[ -n "$SERVER_CMD" || -n "$SERVER_PORT" ]]; then
532
+ cat >> "$F" <<OPCTX
533
+
534
+ ## Operational Context
535
+ $([ -n "$SERVER_CMD" ] && echo "- **Server Start Command**: \`$SERVER_CMD\`")
536
+ $([ -n "$SERVER_PORT" ] && echo "- **Server Port**: $SERVER_PORT")
537
+ $([ -n "$SERVER_HEALTH" ] && echo "- **Health Check URL**: $SERVER_HEALTH")
538
+
539
+ ### Operational Rules (always apply when server context is present)
540
+ - After modifying server/application code, restart the server$([ -n "$SERVER_CMD" ] && echo ": \`$SERVER_CMD\`")
541
+ - Before signaling done, verify the server responds$([ -n "$SERVER_HEALTH" ] && echo ": \`curl -sf $SERVER_HEALTH\`" || [ -n "$SERVER_PORT" ] && echo ": \`curl -sf http://localhost:$SERVER_PORT/\`")
542
+ - Do NOT modify dependency files (package.json, requirements.txt, etc.) unless the AC explicitly requires it
543
+ - Do NOT run package install commands (npm install, pip install, etc.) unless the AC explicitly requires it
544
+ OPCTX
545
+ fi
546
+
547
+ echo " + $F"
548
+ else echo " · $F"; fi
549
+
550
+ # --- Verifier Prompt ---
551
+ F="$DESK/prompts/$SLUG.verifier.prompt.md"
552
+ if [[ ! -f "$F" ]]; then
553
+ cat > "$F" <<EOF
554
+ Independent verifier for Ralph Desk: $SLUG
555
+
556
+ ## Verification Principles
557
+
558
+ 1. Think Before Judging
559
+ Don't assume. Don't default to PASS or FAIL without evidence.
560
+ - State your assumptions about what PASS looks like for each AC before
561
+ checking evidence.
562
+ - If evidence is ambiguous or incomplete, say what is unclear and why —
563
+ do not default to either verdict.
564
+ - If multiple interpretations of an AC exist, flag it as a spec issue.
565
+
566
+ 2. Goal-Driven Verification
567
+ Define the specific evidence required for PASS before you start checking.
568
+ - For each AC, state: "PASS requires [specific evidence]."
569
+ - Verify against that criteria, not against a general impression of code quality.
570
+ - If success criteria are unclear, note it in reasoning — do not invent criteria.
571
+
572
+ ## Iron Law (ABSOLUTE — no exceptions)
573
+ > NO COMPLETION CLAIMS WITHOUT FRESH VERIFICATION EVIDENCE
574
+ > "should pass", "probably works", "seems to" = automatic FAIL
575
+
576
+ ## Evidence Gate (MANDATORY before any verdict)
577
+ 1. IDENTIFY: What command proves this claim?
578
+ 2. RUN: Execute the FULL command (fresh, complete)
579
+ 3. READ: Full output, check exit code, count failures
580
+ 4. VERIFY: Does output confirm the claim?
581
+ 5. ONLY THEN: Issue verdict
582
+
583
+ Required reads:
584
+ - PRD: $DESK/plans/prd-$SLUG.md
585
+ - Test Spec: $DESK/plans/test-spec-$SLUG.md
586
+ - Campaign Memory: $DESK/memos/$SLUG-memory.md (orientation only — not source of truth)
587
+ - Latest Context: $DESK/context/$SLUG-latest.md
588
+ - Done Claim: $DESK/memos/$SLUG-done-claim.json
589
+ - Iteration Signal: $DESK/memos/$SLUG-iter-signal.json (check us_id field)
590
+
591
+ ## Verification Scope
592
+ Check the iter-signal.json "us_id" field:
593
+ - If us_id is a specific story (e.g., "US-001"): verify ONLY that story's acceptance criteria from the PRD.
594
+ - If us_id is "ALL": verify ALL acceptance criteria from the PRD (final full verify).
595
+ - If us_id is absent or null: verify all criteria in the done-claim (legacy/batch mode).
596
+
597
+ ## Verification Process
598
+ 1. Read PRD acceptance criteria (scoped to us_id if present)
599
+ 2. Read done claim
600
+ 3. Identify scope: run \`git diff --name-only\` to find changed files, then read those files + related imports only
601
+ 4. **Scope Lock check**: (a) Read the Next Iteration Contract from campaign memory to identify the contracted US. (b) Run \`git diff --name-only\` to list all changed files. (c) For each changed file, verify it is plausibly related to the contracted US's acceptance criteria. (d) Flag files that appear unrelated. (e) Shared infrastructure (types, configs, common utilities) and dependency files are permitted if the AC implies them.
602
+ 5. **Layer Enforcement**: check test-spec L1/L2/L3/L4 sections. ANY section with TODO or blank = FAIL (IL-3).
603
+ 6. Run fresh verification: execute ALL commands from test-spec verification layers (L1, L2, L3, L4 as applicable)
604
+ **Skip detection (IL-5)**: After running tests, check output for "skip", "pending", "not run", or "0 items collected". Tests that did not actually execute do NOT count as passed. If test_count_executed < test_count_expected, verdict = FAIL ("skipped tests detected").
605
+ 7. Check each criterion against fresh evidence (only for the scoped US, or all if us_id=ALL)
606
+ 8. Run smoke test if defined in PRD
607
+ 9. **Test Sufficiency (IL-4)**: count test functions exercising each AC. Count < 3 per AC = FAIL.
608
+ Check diversity: at least 2 of 3 categories (happy, negative, boundary) per AC.
609
+ 10. **Anti-Gaming Detection**:
610
+ - Assertion integrity: compare assertion count/strength via \`git diff HEAD~1\` — assertions not deleted or weakened
611
+ - Test-specific logic: no environment-detection patterns
612
+ - "Code inspection" claims: Worker must run actual commands
613
+ - Tautological tests: expected values that mirror implementation logic
614
+ 10¼. **Anti-Rubber-Stamp Self-Check**:
615
+ - If your verdict history shows a 100% pass rate, re-examine your last verdict with increased scrutiny — a 100% pass rate is a red flag for insufficient rigor
616
+ - When issuing PASS with explicit warning: note any concerning patterns (e.g., low test diversity, marginal coverage) even if technically passing
617
+ - Never issue a silent PASS — every pass verdict must cite specific evidence for each AC checked
618
+ - Rationalization red flags: "tests pass so it works" (passing ≠ correct), "Worker is confident" (confidence ≠ evidence), "changes are minimal" (scope ≠ correctness)
619
+ 10½. **Worker Process Audit**:
620
+ - Test-first compliance: done-claim execution_steps must show write_test step before implement step for each AC
621
+ - RED phase evidence: at least one verify_red step with exit_code=1 per AC (proves tests were written before passing)
622
+ - Forbidden shortcuts: check done-claim claims and summary for forbidden phrases ("code inspection", "I'm confident", "too simple", "I'll test after", "already manually tested", "partial check")
623
+ - Step completeness: each AC should have write_test → verify_red → implement → verify_green sequence in execution_steps
624
+ - Planning Step presence: done-claim execution_steps should include a \`plan\` step as the first entry. If missing, record in reasoning as {"check": "Planning Step", "decision": "info", "basis": "plan step present/absent"} — informational only (does not affect pass/fail verdict)
625
+ 11. **Reproducibility check**: verify lock file committed, clean install succeeds, security scan passes, env vars documented (per test-spec Reproducibility Gate). Skip if test-spec says "N/A."
626
+ 12. Write verdict JSON to: $DESK/memos/$SLUG-verify-verdict.json
627
+ **CRITICAL: You MUST write the verdict as a FILE (not stdout/echo/cat). The Leader polls this file path — terminal output is lost. Evidence strings: include key metrics and exit codes only, do NOT quote full command output or logs verbatim.**
628
+
629
+ Verdict JSON:
630
+ {
631
+ "verdict": "pass|fail|request_info",
632
+ "us_id": "US-NNN or ALL (matches the scope you verified)",
633
+ "verified_at_utc": "ISO timestamp",
634
+ "summary": "...",
635
+ "per_us_results": {"US-001": "pass|fail|not_started", "US-002": "pass|fail|not_started"},
636
+ "criteria_results": [{"criterion":"...","met":true/false,"evidence":"..."}],
637
+ "missing_evidence": [],
638
+ "issues": [{"id":"...","severity":"critical|major|minor","description":"...","fix_hint":"(suggestion, non-authoritative)"}],
639
+ "reasoning": [
640
+ {"check": "IL-1 Evidence Gate", "decision": "pass|fail", "basis": "what command was run, what output confirmed the decision"},
641
+ {"check": "Layer Enforcement", "decision": "pass|fail", "basis": "which layers checked, any TODO found"},
642
+ {"check": "Test Sufficiency", "decision": "pass|fail", "basis": "test count per AC, category coverage"},
643
+ {"check": "Anti-Gaming", "decision": "pass|fail", "basis": "what was checked, any suspicious patterns"},
644
+ {"check": "Worker Process Audit", "decision": "pass|fail", "basis": "test-first followed: verify_red present per AC, no forbidden shortcuts in claims, execution_steps complete"}
645
+ ],
646
+ "layer_status": {"L1":"pass|fail|todo|na","L2":"pass|fail|todo|na","L3":"pass|fail|todo|na","L4":"pass|fail|todo|na"},
647
+ "test_quality": {"test_count":0,"ac_count":0,"sufficiency":"pass|fail","anti_patterns_found":[]},
648
+ "recommended_state_transition": "complete|continue|blocked",
649
+ "next_iteration_contract": "...",
650
+ "evidence_paths": []
651
+ }
652
+
653
+ Rules:
654
+ - Do NOT trust the worker's claim. Verify with fresh evidence.
655
+ - If uncertain, verdict = request_info (describe your specific question in summary so Leader can decide).
656
+ - Campaign Memory is for orientation only — do NOT use it as source of truth for AC verification.
657
+ - Deterministic checks (type hints, linting, security) delegate to test-spec tools; focus on AC verification + semantic review + smoke test.
658
+ - Do NOT modify code or write sentinel files.
659
+ - If Worker claims "inspection" or "review" for an AC that requires an automated command, verdict = FAIL.
660
+ - **US-017 R5 P0-D**: Inspect iter-signal.json's "summary" field. If it matches the auto-generated A4 fallback pattern (e.g., contains "auto-generated by A4 fallback" or "auto-generated after codex exit"), set meta.iter_signal_quality='auto_generated' in the verdict to flag the debugging context loss. Otherwise meta.iter_signal_quality='specific'.
661
+ - **US-019 R7 P1-G**: If signal status=verify_partial, evaluate ONLY verified_acs. Treat deferred_acs as out-of-scope (not fail). If verified_acs is empty/missing, the signal is malformed — do not pass; the runner will downgrade to blocked with reason='verify_partial_malformed'.
662
+ - **ALWAYS include per_us_results** in verdict JSON — map each US to "pass", "fail", or "not_started". This is required for partial progress tracking in both batch and per-us modes.
663
+ EOF
664
+
665
+ # Inject operational verification if server options provided
666
+ if [[ -n "$SERVER_CMD" || -n "$SERVER_PORT" ]]; then
667
+ cat >> "$F" <<OPVER
668
+
669
+ ## Operational Verification (server context present)
670
+ - Before verifying ACs, check that the server is running$([ -n "$SERVER_PORT" ] && echo " on port $SERVER_PORT")$([ -n "$SERVER_HEALTH" ] && echo ": \`curl -sf $SERVER_HEALTH\`")
671
+ - If the server is not running, verdict = FAIL with issue: "server not running on expected port"
672
+ - If Worker modified server code but did not restart the server, verdict = FAIL with issue: "server not restarted after code change"
673
+ OPVER
674
+ fi
675
+
676
+ echo " + $F"
677
+ else echo " · $F"; fi
678
+
679
+ # --- Flywheel Prompt ---
680
+ F="$DESK/prompts/$SLUG.flywheel.prompt.md"
681
+ if [[ ! -f "$F" ]]; then
682
+ cat > "$F" <<'FLYWHEEL_EOF'
683
+ # Flywheel Direction Review
684
+
685
+ You are an independent direction reviewer with fresh context. After a Worker iteration failed verification, you decide whether the current approach should continue, pivot, or change scope.
686
+
687
+ ## Context Files
688
+ Read these in order:
689
+ 1. Campaign Memory: {DESK}/memos/{SLUG}-memory.md — especially Next Iteration Contract, Key Decisions, Rejected Directions
690
+ 2. PRD: {DESK}/plans/prd-{SLUG}.md — acceptance criteria
691
+ 3. Done Claim: {DESK}/memos/{SLUG}-done-claim.json — what Worker actually did
692
+ 4. Verify Verdict: {DESK}/memos/{SLUG}-verify-verdict.json — why Verifier failed it
693
+ 5. Latest Context: {DESK}/context/{SLUG}-latest.md — current state
694
+
695
+ ## CEO Cognitive Patterns (apply throughout your review)
696
+ 1. First-principles — ignore convention, start from the problem itself
697
+ 2. 10x check — can 2x effort yield 10x better result?
698
+ 3. Inversion — what must be true for this approach to fail?
699
+ 4. Simplicity bias — prefer simple over complex solutions
700
+ 5. User-back — reason backwards from end-user experience
701
+ 6. Time-value — does this direction change save 3+ iterations?
702
+ 7. Sunk cost immunity — ignore what was already invested
703
+ 8. Blast radius — assess impact scope of direction change
704
+ 9. Reversibility — prefer easily reversible decisions
705
+ 10. Evidence > opinion — judge only by this iteration's actual results
706
+ 11. Proxy skepticism — is the optimization metric the right proxy for the real goal?
707
+ 12. Classification — hard-to-reverse + large-magnitude changes need stronger evidence
708
+
709
+ ## Review Process
710
+
711
+ ### Step 0A: Premise Challenge
712
+ List every assumption the current approach depends on.
713
+ For each assumption, state whether THIS iteration's evidence supports or contradicts it.
714
+ - Supported: "Assumption X — SUPPORTED: [evidence from done-claim/verdict]"
715
+ - Contradicted: "Assumption X — BROKEN: [evidence]. This means [implication]."
716
+ If any premise is broken, PIVOT or REDUCE is likely the right call.
717
+
718
+ ### Step 0B: Existing Code Leverage
719
+ - Did the Worker miss reusable code that already exists in the project?
720
+ - Would a different approach align better with existing patterns?
721
+ - Check: are there utilities, helpers, or patterns the Worker could have used?
722
+
723
+ ### Step 0C: Ideal State Mapping
724
+ Describe what this US looks like when perfectly implemented (2-3 sentences).
725
+ How far is the current approach from this ideal? What is the gap?
726
+
727
+ ### Step 0D: Implementation Alternatives (MANDATORY)
728
+ Propose at least 2 alternative approaches. For each:
729
+ - Summary (1-2 sentences)
730
+ - Effort: S (< 1 iteration) / M (1-2 iterations) / L (3+ iterations)
731
+ - Risk: low / medium / high
732
+ - Key tradeoff vs current approach
733
+
734
+ Do NOT skip this step. Even if the current approach seems correct, articulate alternatives.
735
+
736
+ ### Step 0E: Scope Decision
737
+ Choose ONE. Justify with evidence from this iteration only:
738
+ - **HOLD**: Premises valid, current approach correct. Refine the contract with specific fixes: "[fix 1], [fix 2]"
739
+ - **PIVOT**: Premise [X] broken. Switch to Alternative [A]. Reason: [evidence]
740
+ - **REDUCE**: AC [N] too complex at current scope. Split into [parts] or simplify to [simpler version]
741
+ - **EXPAND**: Missing prerequisite [Y] discovered. Add to contract: [what to add]
742
+
743
+ ### Step 0F: Contract Rewrite
744
+ Based on your decision, update campaign memory:
745
+ 1. Rewrite "Next Iteration Contract" with the new direction
746
+ 2. Append your decision and reasoning to "Key Decisions"
747
+ 3. If rejecting an approach, append to "Rejected Directions" section:
748
+ "DO NOT retry: [approach description]. Reason: [why it failed]. Evidence: [from iteration N]."
749
+ The next Worker MUST read Rejected Directions before starting.
750
+
751
+ ## Output Files
752
+
753
+ 1. Write analysis to: {DESK}/memos/{SLUG}-flywheel-review.md
754
+ 2. Update campaign memory: {DESK}/memos/{SLUG}-memory.md
755
+ 3. Write signal: {DESK}/memos/{SLUG}-flywheel-signal.json
756
+ Format: {"iteration": N, "decision": "hold|pivot|reduce|expand", "summary": "one line", "rejected_directions": ["approach X because Y"], "contract_updated": true, "next_mission_candidate": null, "timestamp": "ISO"}
757
+
758
+ Optional field — `next_mission_candidate` (string | null):
759
+ - null: no specific next mission suggested (default).
760
+ - "<slug>": suggest a slug the consumer wrapper should chain next, given the
761
+ current direction. The wrapper polls this field for autonomous
762
+ multi-mission orchestration (rlp-desk does not auto-launch missions —
763
+ the consumer wrapper owns that policy). Field is OPTIONAL; absence is
764
+ treated as null. See docs/multi-mission-orchestration.md for the
765
+ consumer-side polling pattern.
766
+ FLYWHEEL_EOF
767
+
768
+ # Replace placeholders with actual paths
769
+ sed -i '' "s|{DESK}|$DESK|g; s|{SLUG}|$SLUG|g" "$F"
770
+
771
+ echo " + $F"
772
+ else echo " · $F"; fi
773
+
774
+ # --- Flywheel Guard Prompt ---
775
+ F="$DESK/prompts/$SLUG.flywheel-guard.prompt.md"
776
+ if [[ ! -f "$F" ]]; then
777
+ cat > "$F" <<'GUARD_EOF'
778
+ # Flywheel Guard Review
779
+
780
+ You are an independent reviewer verifying whether a flywheel direction decision is safe to execute.
781
+ You have NO prior context about this campaign. Read the files below and evaluate the decision objectively.
782
+
783
+ ## Files to Read (in order)
784
+ 1. PRD: {DESK}/plans/prd-{SLUG}.md — the ground truth for what success means
785
+ 2. Flywheel Decision: {DESK}/memos/{SLUG}-flywheel-signal.json — what the flywheel decided
786
+ 3. Flywheel Analysis: {DESK}/memos/{SLUG}-flywheel-review.md — the flywheel's reasoning
787
+ 4. Campaign Memory: {DESK}/memos/{SLUG}-memory.md — history, rejected directions, key decisions
788
+ 5. Done Claim: {DESK}/memos/{SLUG}-done-claim.json — what the Worker actually produced
789
+ 6. Verify Verdict: {DESK}/memos/{SLUG}-verify-verdict.json — why the Verifier failed it
790
+
791
+ ## Validation Checks
792
+
793
+ ### Check 1: Look-ahead Bias
794
+ List every data feature the flywheel's proposed direction depends on.
795
+ For each: "feature X — available at decision time: YES/NO/UNCLEAR"
796
+ - YES: feature is known before the event (entry time, session start price, order book state)
797
+ - NO: feature requires future information (peak price, session end, outcome)
798
+ - UNCLEAR: cannot determine from available context → mark inconclusive
799
+ If ANY feature is NO and used in a deployable strategy (not just upper-bound analysis): FAIL.
800
+
801
+ ### Check 2: Metric Alignment
802
+ 1. What metric does the PRD define as the optimization target?
803
+ 2. What metric does the flywheel's direction optimize?
804
+ 3. Are they the same?
805
+ - Same metric → pass
806
+ - Different metric, not flagged → FAIL (silent metric switch)
807
+ - Different metric, flagged with evidence → FAIL with recommendation: "metric mismatch requires PRD update or user approval before proceeding"
808
+ PRD is ground truth. The guard cannot approve off-PRD metric changes autonomously.
809
+
810
+ ### Check 3: Deployability
811
+ Can the proposed direction's output be used in production as-is?
812
+ - Requires post-hoc data → FAIL
813
+ - Requires infrastructure not mentioned in PRD → FAIL
814
+ - Labeled as "upper-bound only" or "reference" → pass, but you MUST include "analysis_only": true in your verdict so Leader skips Worker dispatch (no implementation, analysis record only)
815
+
816
+ ### Check 4: Repeat Pattern (same-US scoped)
817
+ Compare to prior flywheel decisions for the current US only in campaign memory's Key Decisions section.
818
+ - Same scope decision + same underlying approach as a prior flywheel for this US → FAIL
819
+ - Reframing of a previously rejected direction (check Rejected Directions) → FAIL
820
+ - Genuinely new approach → pass
821
+ Before writing your verdict, you MUST append any rejected flywheel direction to campaign memory's Rejected Directions section. This persists the record before cleanup can erase it.
822
+
823
+ ## Output
824
+ Write verdict to: {DESK}/memos/{SLUG}-flywheel-guard-verdict.json
825
+
826
+ Use this format:
827
+ {
828
+ "verdict": "pass|fail|inconclusive",
829
+ "issues": [{"check": "check-name", "status": "pass|fail|inconclusive", "detail": "finding", "evidence": "reference"}],
830
+ "analysis_only": false,
831
+ "recommendation": "proceed|retry-flywheel|escalate-to-user",
832
+ "timestamp": "ISO"
833
+ }
834
+
835
+ Rules:
836
+ - If ALL checks pass → verdict: pass, recommendation: proceed
837
+ - If ANY check is fail → verdict: fail, recommendation: retry-flywheel
838
+ - If ANY check is inconclusive and none are fail → verdict: inconclusive, recommendation: escalate-to-user
839
+ - Include specific evidence for every check. No "seems fine" or "probably ok."
840
+ GUARD_EOF
841
+
842
+ # Replace placeholders with actual paths
843
+ sed -i '' "s|{DESK}|$DESK|g; s|{SLUG}|$SLUG|g" "$F"
844
+
845
+ echo " + $F"
846
+ else echo " · $F"; fi
847
+
848
+ # --- Context ---
849
+ F="$DESK/context/$SLUG-latest.md"
850
+ if [[ ! -f "$F" ]]; then
851
+ cat > "$F" <<EOF
852
+ # $SLUG - Latest Context
853
+
854
+ ## Current Frontier
855
+ ### Completed
856
+ ### In Progress
857
+ ### Next
858
+ - (TBD by first worker)
859
+
860
+ ## Key Decisions
861
+ ## Known Issues
862
+ ## Files Changed This Iteration
863
+ ## Verification Status
864
+ EOF
865
+ echo " + $F"
866
+ else echo " · $F"; fi
867
+
868
+ # --- Campaign Memory ---
869
+ F="$DESK/memos/$SLUG-memory.md"
870
+ if [[ ! -f "$F" ]]; then
871
+ cat > "$F" <<EOF
872
+ # $SLUG - Campaign Memory
873
+
874
+ ## Stop Status
875
+ continue
876
+
877
+ ## Objective
878
+ $OBJECTIVE
879
+
880
+ ## Current State
881
+ Iteration 0 - not started
882
+
883
+ ## Completed Stories
884
+
885
+ ## Next Iteration Contract
886
+ Start from the beginning: read PRD and plan the first bounded action.
887
+
888
+ **Criteria**:
889
+ - (to be defined by first worker after reading PRD)
890
+
891
+ ## Key Decisions
892
+ (seeded from brainstorm — do not erase, only append)
893
+
894
+ ## Patterns Discovered
895
+ (seeded from brainstorm codebase exploration — do not erase, only append)
896
+ ## Learnings
897
+ ## Evidence Chain
898
+ EOF
899
+ echo " + $F"
900
+ else echo " · $F"; fi
901
+
902
+ # --- PRD ---
903
+ F="$DESK/plans/prd-$SLUG.md"
904
+ if [[ ! -f "$F" ]]; then
905
+ cat > "$F" <<EOF
906
+ # PRD: $SLUG
907
+
908
+ ## Objective
909
+ $OBJECTIVE
910
+
911
+ ## User Stories
912
+
913
+ ### US-001: [Title]
914
+ - **Priority**: P0
915
+ - **Size**: S|M|L
916
+ - **Type**: code|visual|content|integration|infra
917
+ - **Risk**: LOW|MEDIUM|HIGH|CRITICAL (governance §1c)
918
+ - **Depends on**: []
919
+ - **Acceptance Criteria** (Given/When/Then — domain language only):
920
+ - AC1:
921
+ - Given: [precondition in domain language]
922
+ - When: [action in domain language]
923
+ - Then: [expected outcome with quantitative criteria]
924
+ - AC2:
925
+ - Given: [precondition]
926
+ - When: [action]
927
+ - Then: [expected outcome with quantitative criteria]
928
+ - **Boundary Cases**: [edge cases — empty input, max values, error conditions, concurrent access]
929
+ - **Verification Layers**: [Fill per Risk level — LOW: L1+L3, MEDIUM: L1+L2(if ext deps)+L3, HIGH: L1+L2+L3+L4, CRITICAL: L1+L2+L3+L4+mutation (governance §1c)]
930
+ - **Status**: not started
931
+
932
+ ## Non-Goals
933
+ ## Technical Constraints
934
+ ## Done When
935
+ - All acceptance criteria pass with quantitative evidence
936
+ - All boundary cases covered
937
+ - All required verification layers executed (no TODO remaining)
938
+ - Independent verifier confirms via Evidence Gate (governance §1b)
939
+ EOF
940
+ echo " + $F"
941
+ else echo " · $F"; fi
942
+
943
+ # Split PRD into per-US files (no-op with warning if no US markers)
944
+ split_prd_by_us "$DESK/plans/prd-$SLUG.md" "$SLUG"
945
+
946
+ # --- Test Spec ---
947
+ F="$DESK/plans/test-spec-$SLUG.md"
948
+ if [[ ! -f "$F" ]]; then
949
+ cat > "$F" <<EOF
950
+ # Test Specification: $SLUG
951
+
952
+ ## Iron Law Reference
953
+ > IL-3: NO PASS WITH TODO IN ANY REQUIRED VERIFICATION LAYER
954
+ > IL-4: NO PASS WITHOUT TEST COUNT >= AC COUNT x 3
955
+
956
+ ---
957
+
958
+ ## Verification Commands
959
+ ### Build
960
+ \`\`\`bash
961
+ # TODO
962
+ \`\`\`
963
+ ### Test
964
+ \`\`\`bash
965
+ # TODO
966
+ \`\`\`
967
+ ### Lint
968
+ \`\`\`bash
969
+ # TODO
970
+ \`\`\`
971
+
972
+ ---
973
+
974
+ ## Verification Context (fill BEFORE implementation)
975
+
976
+ ### Target Behavior
977
+ What behavior does this project change or introduce?
978
+ - TODO
979
+
980
+ ### Impacted Tests
981
+ Existing tests that may break due to this change:
982
+ - TODO (acceptable at init; Worker fills during first iteration)
983
+
984
+ ### Required New Tests
985
+ Tests that MUST be written (minimum 3 per AC: happy + negative + boundary):
986
+ - TODO
987
+
988
+ ### Forbidden Shortcuts (see Worker prompt for full list)
989
+ - Do not mock external services when L2 integration test is required
990
+ - Do not delete or weaken existing assertions to make tests pass
991
+ - Do not add test-specific logic (if __name__ == '__test__' patterns)
992
+ - Do not skip boundary cases listed in the PRD
993
+ - Do not claim "code inspection" as verification — run the actual command
994
+ - Do not say "too simple to test" — simple code breaks
995
+ - Do not say "I'll test after" — tests passing immediately prove nothing
996
+ - Do not say "already manually tested" — ad-hoc is not systematic
997
+ - Do not say "partial check is enough" — partial proves nothing
998
+ - Do not say "I'm confident" — confidence is not evidence
999
+ - Do not say "existing code has no tests" — you are improving it, add tests
1000
+ - Do not write code before tests — delete it and start with tests
1001
+
1002
+ ### Pass/Fail Evidence Format
1003
+ - Command output with exit code 0
1004
+ - Quantitative result matching expected value
1005
+ - Screenshot comparison (for visual tasks)
1006
+
1007
+ ---
1008
+
1009
+ ## Verification Layers (ALL required sections — TODO in required layer = Verifier FAIL)
1010
+
1011
+ ### L1: Unit Test (REQUIRED)
1012
+ \`\`\`bash
1013
+ # TODO — unit test command (e.g., pytest, jest, go test)
1014
+ \`\`\`
1015
+
1016
+ ### L2: Integration (required if external services exist, otherwise "N/A — reason")
1017
+ \`\`\`bash
1018
+ # TODO — integration test command, or write: N/A — no external services (pure computation/transformation)
1019
+ \`\`\`
1020
+
1021
+ ### L3: E2E Simulation (REQUIRED)
1022
+ Known input → full pipeline → quantitative output comparison.
1023
+ Must cover ALL AC types: happy path + boundary + error path.
1024
+ - **Happy path input**: TODO (specific test data)
1025
+ - **Happy path expected output**: TODO (quantitative value)
1026
+ - **Happy path command**:
1027
+ \`\`\`bash
1028
+ # TODO — E2E happy path command
1029
+ \`\`\`
1030
+ - **Error path input**: TODO (invalid/boundary input that triggers error)
1031
+ - **Error path expected**: TODO (error type + non-zero exit code)
1032
+ - **Error path command**:
1033
+ \`\`\`bash
1034
+ # TODO — E2E error path command (expected exit ≠ 0)
1035
+ \`\`\`
1036
+
1037
+ ### L4: Deploy Verification (required if deploying, otherwise "N/A — reason")
1038
+ \`\`\`bash
1039
+ # TODO — deploy verification command, or write: N/A — no deployment (library/tool, local-only change)
1040
+ \`\`\`
1041
+
1042
+ ---
1043
+
1044
+ ## Mutation Testing Gate (CRITICAL risk only)
1045
+ - Required: only for CRITICAL risk classification (governance §1c)
1046
+ - Tool: TODO (e.g., mutmut, Stryker, go-mutesting) or "N/A — not CRITICAL risk"
1047
+ - Target: >= 60% mutation score on core business logic (project default; override in PRD if justified)
1048
+ - Scope: core business logic files (not config/tests/docs)
1049
+ - Command:
1050
+ \`\`\`bash
1051
+ # TODO — mutation testing command, or write: N/A — not CRITICAL risk
1052
+ \`\`\`
1053
+
1054
+ ---
1055
+
1056
+ ## Test Quality Checklist (Verifier checks these)
1057
+ - [ ] Tests verify behavior, not implementation details
1058
+ - [ ] Each test has meaningful assertions (not just "no error thrown")
1059
+ - [ ] Boundary cases covered (empty, max, zero, null, concurrent)
1060
+ - [ ] No tautological tests (expected value copied from implementation)
1061
+ - [ ] Mock usage limited to external boundaries only
1062
+ - [ ] No test-specific logic in production code
1063
+ - [ ] Each AC has >= 3 tests (happy + negative + boundary) per IL-4
1064
+
1065
+ ## Traceability Matrix (Worker fills during implementation)
1066
+
1067
+ | US | AC | Test File :: Function | Layer | Evidence | Status |
1068
+ |----|----|----------------------|-------|----------|--------|
1069
+ | US-001 | AC1 | TODO | L1 | TODO | pending |
1070
+
1071
+ ---
1072
+
1073
+ ## Code Quality Gates (defaults — override in PRD with justification)
1074
+ - **Code duplication**: <= 3% (project-appropriate tool, e.g., jscpd, pylint, sonar)
1075
+ - **Mock ratio**: mock-based assertions <= 30% of total assertions
1076
+ - **Cyclomatic complexity**: <= 10 per function
1077
+ - **Function length**: <= 50 lines per function
1078
+ - **File length**: <= 800 lines per file
1079
+
1080
+ ---
1081
+
1082
+ ## Reproducibility Gate
1083
+ - [ ] Lock file exists and committed (package-lock.json, poetry.lock, go.sum, etc.) or "N/A — no external dependencies"
1084
+ - [ ] Clean install succeeds (npm ci, pip install, etc.) or "N/A — no external dependencies"
1085
+ - [ ] Security scan passes (or known vulnerabilities documented and acknowledged in PRD) or "N/A — no dependencies"
1086
+ - [ ] Environment variables documented (.env.example or equivalent) or "N/A — no env vars"
1087
+
1088
+ ---
1089
+
1090
+ ## Criteria → Verification Mapping
1091
+
1092
+ | US | AC | Layer | Method | Command | Expected Output | Pass Criteria |
1093
+ |----|----|-------|--------|---------|-----------------|---------------|
1094
+ | US-001 | AC1 | L1 | TODO | TODO | TODO | TODO |
1095
+ EOF
1096
+ echo " + $F"
1097
+ else echo " · $F"; fi
1098
+
1099
+ # Split test-spec into per-US files (no-op with warning if no US section markers)
1100
+ split_test_spec_by_us "$DESK/plans/test-spec-$SLUG.md" "$SLUG"
1101
+
1102
+ # --- .gitignore for runtime artifacts ---
1103
+ GITIGNORE="$ROOT/.gitignore"
1104
+ MARKER="# RLP Desk runtime artifacts"
1105
+ DESK_REL="${RLP_DESK_RUNTIME_DIR:-.rlp-desk}"
1106
+ if [[ -f "$GITIGNORE" ]]; then
1107
+ # v0.13.0: drop legacy ".claude/ralph-desk/" line if present.
1108
+ if grep -qE '^\.claude/ralph-desk/$' "$GITIGNORE"; then
1109
+ sed -i.bak -E '/^\.claude\/ralph-desk\/$/d' "$GITIGNORE"
1110
+ rm -f "${GITIGNORE}.bak"
1111
+ echo " · .gitignore (legacy .claude/ralph-desk/ rule removed)"
1112
+ fi
1113
+ if ! grep -qE "^${DESK_REL}/$" "$GITIGNORE"; then
1114
+ if ! grep -qF "$MARKER" "$GITIGNORE"; then
1115
+ echo "" >> "$GITIGNORE"
1116
+ echo "$MARKER" >> "$GITIGNORE"
1117
+ fi
1118
+ echo "${DESK_REL}/" >> "$GITIGNORE"
1119
+ echo " + .gitignore (rlp-desk rule for ${DESK_REL}/ appended)"
1120
+ else
1121
+ echo " · .gitignore (${DESK_REL}/ already present)"
1122
+ fi
1123
+ else
1124
+ cat > "$GITIGNORE" <<GIEOF
1125
+ # RLP Desk runtime artifacts
1126
+ ${DESK_REL}/
1127
+ GIEOF
1128
+ echo " + .gitignore (created with rlp-desk rule for ${DESK_REL}/)"
1129
+ fi
1130
+
1131
+ # --- Claude Code sensitive-file permissions for .rlp-desk/ ---
1132
+ # Worker/Verifier need Read/Edit/Write access to .rlp-desk/ files. With the
1133
+ # project-local tree outside .claude/, Claude Code's hardcoded sensitive
1134
+ # policy no longer triggers, but explicit permissions still help when the
1135
+ # user has configured stricter defaults.
1136
+ SETTINGS_FILE="$ROOT/.claude/settings.local.json"
1137
+ PERM_MARKER="Read(${DESK_REL}/**)"
1138
+
1139
+ if [[ -f "$SETTINGS_FILE" ]] && grep -qF "$PERM_MARKER" "$SETTINGS_FILE" 2>/dev/null; then
1140
+ echo " · .claude/settings.local.json (rlp-desk permissions already present)"
1141
+ else
1142
+ PERMS=$(printf '["Read(%s/**)", "Edit(%s/**)", "Write(%s/**)"]' "$DESK_REL" "$DESK_REL" "$DESK_REL")
1143
+
1144
+ if [[ -f "$SETTINGS_FILE" ]]; then
1145
+ if command -v jq &>/dev/null; then
1146
+ jq --argjson perms "$PERMS" '
1147
+ .permissions //= {} |
1148
+ .permissions.allow //= [] |
1149
+ .permissions.allow += ($perms - .permissions.allow)
1150
+ ' "$SETTINGS_FILE" > "${SETTINGS_FILE}.tmp" && mv "${SETTINGS_FILE}.tmp" "$SETTINGS_FILE"
1151
+ echo " + .claude/settings.local.json (rlp-desk permissions merged)"
1152
+ else
1153
+ echo " ⚠ jq not found. Add to .claude/settings.local.json manually:"
1154
+ echo " permissions.allow: Read/Edit/Write(${DESK_REL}/**)"
1155
+ fi
1156
+ else
1157
+ mkdir -p "$(dirname "$SETTINGS_FILE")"
1158
+ cat > "$SETTINGS_FILE" <<SETEOF
1159
+ {
1160
+ "permissions": {
1161
+ "allow": [
1162
+ "Read(${DESK_REL}/**)",
1163
+ "Edit(${DESK_REL}/**)",
1164
+ "Write(${DESK_REL}/**)"
1165
+ ]
1166
+ }
1167
+ }
1168
+ SETEOF
1169
+ echo " + .claude/settings.local.json (created with rlp-desk permissions)"
1170
+ fi
1171
+ echo ""
1172
+ echo " NOTE: Added Read/Edit/Write permissions for ${DESK_REL}/ to"
1173
+ echo " .claude/settings.local.json (local, not committed to git)."
1174
+ fi
1175
+
1176
+ # --- Post-init validation gate ---
1177
+ INIT_FAIL=0
1178
+ for REQUIRED_FILE in \
1179
+ "$DESK/prompts/$SLUG.worker.prompt.md" \
1180
+ "$DESK/prompts/$SLUG.verifier.prompt.md" \
1181
+ "$DESK/context/$SLUG-latest.md" \
1182
+ "$DESK/memos/$SLUG-memory.md" \
1183
+ "$DESK/plans/prd-$SLUG.md" \
1184
+ "$DESK/plans/test-spec-$SLUG.md"; do
1185
+ if [[ ! -f "$REQUIRED_FILE" ]]; then
1186
+ echo " ✗ MISSING: $REQUIRED_FILE"
1187
+ INIT_FAIL=1
1188
+ fi
1189
+ done
1190
+ if [[ $INIT_FAIL -eq 1 ]]; then
1191
+ echo ""
1192
+ echo "ERROR: Scaffold incomplete. Some required files were not created."
1193
+ echo "Re-run init or check filesystem permissions."
1194
+ exit 1
1195
+ fi
1196
+
1197
+ # --- PRD cross-US dependency lint (governance §7a) ---
1198
+ # When VERIFY_MODE=per-us (default), each AC must reference only the same US or
1199
+ # earlier verified US' artifacts. Detect future-US references and exit 2 so the
1200
+ # wrapper can distinguish lint reject (2) from generic init failure (1).
1201
+ #
1202
+ # Detector helper — scans the PRD line-by-line, partitions content by `### US-NNN`
1203
+ # headers, and emits violations when an AC inside US-N references US-R with R > N.
1204
+ # Patterns are intentionally narrow (Korean + English idioms from the 2026-04-25
1205
+ # bug report) to avoid false positives on benign cross-references in prose.
1206
+ _detect_cross_us_refs() {
1207
+ local prd_file="$1"
1208
+ # POSIX/BSD-awk compatible: match(s, regex) sets RSTART/RLENGTH only.
1209
+ #
1210
+ # Strategy: partition the PRD into US-N blocks via `### US-NNN` headers,
1211
+ # then within each block flag any `US-([0-9]+)` token whose number R > N
1212
+ # as a cross-US violation, but ONLY on lines that look like AC content
1213
+ # (bullet starting with `-` / `*` or one of Given/When/Then markers). Prose,
1214
+ # roadmap mentions, and sub-headings are skipped. The referenced US must
1215
+ # also be defined in the same PRD — pure typos / forward-pointing
1216
+ # placeholders without a target US are not flagged.
1217
+ #
1218
+ # Two passes: pass 1 collects defined US numbers; pass 2 emits violations.
1219
+ # Pre-existing PRDs with benign cross-US prose ("see also US-005") inside
1220
+ # narrative paragraphs no longer trip the lint.
1221
+ awk '
1222
+ function is_ac_line(s) {
1223
+ # bullet styles or Given/When/Then keywords (Korean and English).
1224
+ return (s ~ /^[[:space:]]*[-*][[:space:]]/) \
1225
+ || (s ~ /(^|[[:space:]])[Gg]iven[:[:space:]]/) \
1226
+ || (s ~ /(^|[[:space:]])[Ww]hen[:[:space:]]/) \
1227
+ || (s ~ /(^|[[:space:]])[Tt]hen[:[:space:]]/)
1228
+ }
1229
+ BEGIN { current = 0; pass = 0 }
1230
+ pass == 0 && $0 ~ /^### US-[0-9]+/ {
1231
+ if (match($0, "US-[0-9]+") > 0) {
1232
+ tok = substr($0, RSTART + 3, RLENGTH - 3)
1233
+ defined[tok + 0] = 1
1234
+ }
1235
+ next
1236
+ }
1237
+ pass == 0 { next }
1238
+ pass == 1 {
1239
+ if ($0 ~ /^### US-[0-9]+/) {
1240
+ if (match($0, "US-[0-9]+") > 0) {
1241
+ tok = substr($0, RSTART + 3, RLENGTH - 3)
1242
+ current = tok + 0
1243
+ }
1244
+ next
1245
+ }
1246
+ if (current == 0) next
1247
+ if (!is_ac_line($0)) next
1248
+ line = $0
1249
+ while (match(line, "US-[0-9]+") > 0) {
1250
+ slice = substr(line, RSTART, RLENGTH)
1251
+ ref_tok = substr(slice, 4)
1252
+ ref = ref_tok + 0
1253
+ if (ref > current && defined[ref]) {
1254
+ # FNR (file-local) gives the PRD line number; NR would accumulate
1255
+ # across both awk passes and report inflated line numbers.
1256
+ printf("US-%03d:%d:%s\n", current, FNR, $0)
1257
+ }
1258
+ line = substr(line, RSTART + RLENGTH)
1259
+ }
1260
+ }
1261
+ ' pass=0 "$prd_file" pass=1 "$prd_file"
1262
+ }
1263
+
1264
+ PRD_FILE_LINT="$DESK/plans/prd-$SLUG.md"
1265
+ # Mode resolution priority (highest first):
1266
+ # 1. --verify-mode CLI arg passed to init
1267
+ # 2. VERIFY_MODE env var (already exported by the wrapper for run)
1268
+ # 3. governance default: per-us
1269
+ LINT_VERIFY_MODE="${VERIFY_MODE_ARG:-${VERIFY_MODE:-per-us}}"
1270
+ if [[ -f "$PRD_FILE_LINT" ]]; then
1271
+ LINT_VIOLATIONS=$(_detect_cross_us_refs "$PRD_FILE_LINT")
1272
+ if [[ -n "$LINT_VIOLATIONS" ]]; then
1273
+ if [[ "$LINT_VERIFY_MODE" == "per-us" ]]; then
1274
+ echo ""
1275
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >&2
1276
+ echo "ERROR: PRD contains cross-US dependency AC incompatible with --verify-mode per-us" >&2
1277
+ echo "" >&2
1278
+ echo "$LINT_VIOLATIONS" | while IFS=: read -r us_id lineno body; do
1279
+ echo " $PRD_FILE_LINT:$lineno ($us_id references a higher-numbered US)" >&2
1280
+ echo " > ${body# }" >&2
1281
+ done
1282
+ echo "" >&2
1283
+ echo "Fix options:" >&2
1284
+ echo " - Move the cross-US AC into the higher-numbered US (the one being referenced)." >&2
1285
+ echo " - OR re-run with VERIFY_MODE=batch to allow cross-US AC." >&2
1286
+ echo " - See governance.md §7a for the cross-US dependency rule." >&2
1287
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" >&2
1288
+ exit 2
1289
+ else
1290
+ echo ""
1291
+ echo "WARN: PRD has cross-US dependency AC. Allowed under VERIFY_MODE=$LINT_VERIFY_MODE,"
1292
+ echo " but blocking under per-us. Locations:"
1293
+ echo "$LINT_VIOLATIONS" | while IFS=: read -r us_id lineno _body; do
1294
+ echo " $PRD_FILE_LINT:$lineno ($us_id)"
1295
+ done
1296
+ fi
1297
+ fi
1298
+ fi
1299
+
1300
+ echo ""
1301
+ echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
1302
+ echo "Scaffold ready: $SLUG"
1303
+ echo ""
1304
+ echo "Next:"
1305
+ echo " 1. Edit PRD: $DESK/plans/prd-$SLUG.md"
1306
+ echo " 2. Edit test spec: $DESK/plans/test-spec-$SLUG.md"
1307
+ echo " 3. Run (copy a command below):"
1308
+ echo ""
1309
+ print_run_presets "$SLUG"