npm - @ai-dev-methodologies/rlp-desk - Versions diffs - 0.15.3 → 0.15.4 - Mend

@ai-dev-methodologies/rlp-desk 0.15.3 → 0.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/CHANGELOG.md +82 -0
package/README.md +34 -4
package/docs/rlp-desk/failure-modes.md +191 -0
package/package.json +3 -2
package/src/node/runner/campaign-main-loop.mjs +84 -11
package/src/node/util/debug-log.mjs +10 -6
package/src/node/util/lifecycle-metrics.mjs +102 -0
package/src/scripts/lib_ralph_desk.zsh +66 -0
package/src/scripts/run_ralph_desk.zsh +18 -0
package/docs/plans/bug-report-overhaul-backlog.md +0 -49
package/docs/plans/bug-report-overhaul-v0.md +0 -238
package/docs/plans/bug-report-overhaul-v1.md +0 -319
package/docs/plans/native-agent-revert.md +0 -184
package/docs/plans/polished-gliding-toucan.md +0 -234
package/docs/plans/pr-e-phase-c1-blocked-recovery-hygiene-v0.md +0 -233
package/docs/plans/spicy-booping-galaxy.md +0 -717
package/docs/plans/strategic-review/rlp-desk-strategic-review.md +0 -125
package/docs/plans/v0.15-stabilization-phase-a-prep.md +0 -130
package/docs/plans/v0.15-stabilization-plan.md +0 -178
package/docs/plans/v0.16-real-llm-sv-gate-spec.md +0 -177

package/src/node/util/lifecycle-metrics.mjs ADDED Viewed

@@ -0,0 +1,102 @@
+// v0.15.4 PR-B4 — Lifecycle observability helper.
+//
+// Plan: docs/plans/v0.15-phase-b-plan-v3.md §B4.
+// Audit: docs/plans/v0.15-phase-b-lifecycle-audit.md §3 Table 2.
+//
+// Five metrics tracked, all gated on RLP_LIFECYCLE_METRICS=1 env flag:
+//   - iter_signal_write_to_read_ms     leader-poll-resolves vs worker-FS-write
+//   - verdict_write_to_read_ms          leader-poll-resolves vs verifier-FS-write
+//   - pane_eof_to_cleanup_ms            pane process exit vs killPaneProcess return
+//   - pane_reap_latency_ms              done-claim observed vs C-c×2 + waitForExit
+//   - sentinel_lock_to_unlock_ms        per type, _lock vs _unlock (object)
+//
+// Emission discipline:
+//   - debug.log: tagged [LIFECYCLE] per record (when flag set)
+//   - campaign.jsonl: ONE batched lifecycle_metrics object per iteration
+//                     (the collector accumulates, the iter-end flush emits)
+// When flag is unset:
+//   - record() is a no-op (early return) — zero overhead beyond a Map check
+//   - flush() returns null so analytics writer can branch on the field
+const ENV_FLAG_NAME = 'RLP_LIFECYCLE_METRICS';
+export function lifecycleMetricsEnabled(env = process.env) {
+  return env[ENV_FLAG_NAME] === '1';
+}
+export class LifecycleMetricsCollector {
+  constructor({ env = process.env, debugLog = null } = {}) {
+    this._enabled = lifecycleMetricsEnabled(env);
+    this._debugLog = debugLog;
+    this._records = [];
+    this._sentinelLockTimes = new Map();
+  }
+  get enabled() {
+    return this._enabled;
+  }
+  // Record a single timing metric. value is in milliseconds. ctx is a flat
+  // object of audit fields (iter, us_id, pane_id, sentinel_type, etc).
+  record(name, valueMs, ctx = {}) {
+    if (!this._enabled) return;
+    const entry = {
+      metric: name,
+      value_ms: Math.max(0, Math.round(valueMs)),
+      ts: new Date().toISOString(),
+      ...ctx,
+    };
+    this._records.push(entry);
+    if (this._debugLog) {
+      // Best-effort fire-and-forget. The debug-log helper is itself best-
+      // effort (appendFile error swallowed), so we don't await it.
+      this._debugLog('LIFECYCLE', { metric: name, value_ms: entry.value_ms, ...ctx });
+    }
+  }
+  // Convenience: pair-bookkeeping for sentinel_lock_to_unlock_ms (object-
+  // valued metric keyed by sentinel type). Call markLockStart at chmod 0o444
+  // time, markUnlock at chmod 0o644 time (or end-of-iter for never-unlocked).
+  //
+  // v0.15.4 audit H2: done-claim is intentionally NOT instrumented with this
+  // pair. In production happy path done-claim is locked-but-never-unlocked
+  // (campaign-main-loop unlocks only signalFile + verdictFile at iter start);
+  // markUnlock for done-claim never fires, so the metric would silently never
+  // emit. Future work: emit at lib_ralph_desk.zsh:602 archival site if needed.
+  //
+  // v0.15.4 audit H3: callers must invoke markLockStart BEFORE the chmod
+  // operation, not after, so the metric covers full lock duration including
+  // chmod execution time. Sub-ms skew, but semantically correct.
+  markLockStart(sentinelType, t = Date.now()) {
+    if (!this._enabled) return;
+    this._sentinelLockTimes.set(sentinelType, t);
+  }
+  markUnlock(sentinelType, ctx = {}, t = Date.now()) {
+    if (!this._enabled) return;
+    const start = this._sentinelLockTimes.get(sentinelType);
+    if (start === undefined) return;
+    this.record('sentinel_lock_to_unlock_ms', t - start, {
+      ...ctx,
+      sentinel_type: sentinelType,
+    });
+    this._sentinelLockTimes.delete(sentinelType);
+  }
+  // Snapshot + reset for end-of-iteration flush. Returns null when disabled
+  // so the analytics writer can omit the field cleanly.
+  flush() {
+    if (!this._enabled) return null;
+    const records = this._records;
+    this._records = [];
+    // Group by metric name for compact campaign.jsonl shape:
+    //   { iter_signal_write_to_read_ms: [{value_ms,ts,...}, ...], ... }
+    const grouped = {};
+    for (const r of records) {
+      const { metric, ...rest } = r;
+      if (!grouped[metric]) grouped[metric] = [];
+      grouped[metric].push(rest);
+    }
+    return grouped;
+  }
+}

package/src/scripts/lib_ralph_desk.zsh CHANGED Viewed

@@ -261,6 +261,19 @@ _kill_pane_process() {
   if typeset -f log_debug >/dev/null 2>&1; then
     log_debug "[bug7] kill_pane_process pane=$pane_id role=$role"
   fi
+  # v0.15.4 PR-B4: pane_eof_to_cleanup_ms instrumentation (flag-gated).
+  # Records the wallclock from kill-start to wait_for_pane_ready return so
+  # B3 can value-assert the substrate fix actually closes the race window.
+  # Uses zsh native $EPOCHREALTIME (microsec) — portable to macOS BSD where
+  # `date +%N` is not supported.
+  local _b4_t0_ms=0
+  if [[ "${RLP_LIFECYCLE_METRICS:-0}" == "1" ]]; then
+    zmodload -e zsh/datetime || zmodload zsh/datetime 2>/dev/null
+    if [[ -n "${EPOCHREALTIME:-}" ]]; then
+      local _b4_t0_str="${EPOCHREALTIME//./}"
+      _b4_t0_ms=${_b4_t0_str:0:13}
+    fi
+  fi
   tmux send-keys -t "$pane_id" C-c 2>/dev/null
   sleep 0.5
   tmux send-keys -t "$pane_id" C-c 2>/dev/null
@@ -268,6 +281,12 @@ _kill_pane_process() {
   if typeset -f wait_for_pane_ready >/dev/null 2>&1; then
     wait_for_pane_ready "$pane_id" 5 2>/dev/null || true
   fi
+  if (( _b4_t0_ms > 0 )); then
+    local _b4_t1_str="${EPOCHREALTIME//./}"
+    local _b4_t1_ms=${_b4_t1_str:0:13}
+    log_lifecycle_metric "pane_eof_to_cleanup_ms" $((_b4_t1_ms - _b4_t0_ms)) \
+      "pane=$pane_id role=$role"
+  fi
   return 0
 }
@@ -285,6 +304,53 @@ _unlock_sentinel() {
   return 0
 }
+# =============================================================================
+# v0.15.4 PR-B4: Lifecycle observability — log_lifecycle_metric
+# =============================================================================
+# Plan: docs/plans/v0.15-phase-b-plan-v3.md §B4 (P2.1 critic-round-2 fix).
+# Helper is GATED on $RLP_LIFECYCLE_METRICS=1 (no-op when unset). Emits to
+# debug.log via log_debug, in a backgrounded subshell so the caller does not
+# block on the FS write. The Node-side mirror is src/node/util/lifecycle-
+# metrics.mjs LifecycleMetricsCollector.
+#
+# v0.15.4 audit M2: concurrent-appender semantics — `( ... ) &!` spawns a
+# disowned subshell per metric. Multiple metrics can fire in rapid succession
+# (e.g., during iter teardown) and race on debug.log. POSIX guarantees atomic
+# append for writes <= PIPE_BUF (4096 bytes). A single LIFECYCLE line is
+# ~150 bytes, well under the limit, so on local filesystems (APFS, ext4, xfs)
+# concurrent appends produce intact non-interleaved lines. On NFS / FUSE /
+# some Docker overlay setups PIPE_BUF guarantees may not hold; in those
+# environments, expect possible interleaving. This is best-effort logging
+# by design — the metric values land in campaign.jsonl via the Node leader's
+# batched flush as the canonical authoritative record. debug.log is an
+# audit aid, not the source of truth.
+#
+# Args:
+#   $1  metric_name       e.g. iter_signal_write_to_read_ms
+#   $2  value_ms          integer milliseconds (will be coerced via printf %d)
+#   $3  context (optional, free-form key=val pairs joined with spaces)
+#
+# Side effects:
+#   - When flag unset: returns 0 immediately (no fork, no FS call).
+#   - When flag set:   forks `( log_debug "..." ) &!` to debug.log.
+#
+# Examples:
+#   log_lifecycle_metric "iter_signal_write_to_read_ms" "$delta" \
+#     "iter=$ITERATION us=$us_id pane=$WORKER_PANE"
+#   log_lifecycle_metric "pane_reap_latency_ms" "$delta" \
+#     "iter=$ITERATION sentinel=done-claim"
+log_lifecycle_metric() {
+  [[ "${RLP_LIFECYCLE_METRICS:-0}" == "1" ]] || return 0
+  local metric="$1"
+  local value_ms="$2"
+  local ctx="${3:-}"
+  [[ -n "$metric" && -n "$value_ms" ]] || return 0
+  if typeset -f log_debug >/dev/null 2>&1; then
+    ( log_debug "[LIFECYCLE] metric=$metric value_ms=$value_ms $ctx" ) &!
+  fi
+  return 0
+}
 # PR-A (Bug #10) — validate operator-written manual recovery artifacts.
 # Returns 0 when all 5 checks pass; 1 otherwise. Sets RECOVERY_FAIL_REASON
 # (global) on failure for caller logging. Mirrors the Node-side helper

package/src/scripts/run_ralph_desk.zsh CHANGED Viewed

@@ -710,6 +710,10 @@ handle_worker_exit_codex() {
   dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
   log "  Codex worker completed with done-claim (us_id=$dc_us_id) and clean tree. Auto-generating signal."
   echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit (clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
+  # v0.15.4 PR-B2-FIX: codex worker pane already exited — reaper would no-op,
+  # but lock done-claim as defense-in-depth so any orphaned subprocess cannot
+  # rewrite the file before lib_ralph_desk.zsh:602 archives it.
+  _lock_sentinel "$DONE_CLAIM_FILE"
   _emit_a4_fallback_audit "$dc_us_id" "$iter" "codex_exit_with_done_claim_clean"
   return 0
 }
@@ -2292,6 +2296,15 @@ poll_for_signal() {
           if _bug8_check_synth_allowed "$ITERATION" "$dc_us_id" "inline_polling_a4_clean"; then
             log "  WARNING: done-claim exists for $dc_us_id but no iter-signal. Tree clean — auto-generating signal (A4 fallback)."
             log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
+            # v0.15.4 PR-B2-FIX: Worker pane is alive and idling post-done-claim
+            # (the canonical Bug #5/7 race window). Reap before synthesizing the
+            # signal so the worker cannot revise done-claim or emit a late
+            # iter-signal that races the leader's synthesized one. Mirror of
+            # Bug #7 Fix-Q parity at run_ralph_desk.zsh:3181 — kill before lock,
+            # lock before synth-write so the next leader read sees a frozen
+            # done-claim and a fresh signal_file in that order.
+            _kill_pane_process "$pane_id" "worker-a4"
+            _lock_sentinel "$DONE_CLAIM_FILE"
             echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim + clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
             _emit_a4_fallback_audit "$dc_us_id" "$ITERATION" "inline_polling_a4_clean"
             return 0
@@ -3180,6 +3193,11 @@ main() {
         # self-review and rewrite iter-signal.json (1m43s drift observed).
         _kill_pane_process "$WORKER_PANE" "worker"
         _lock_sentinel "$SIGNAL_FILE"
+        # v0.15.4 PR-B2-FIX: same worker pass also produced done-claim. Freeze
+        # it alongside iter-signal so Bug #8 gates and the iter-NNN-done-claim
+        # archive (lib_ralph_desk.zsh:602) read a snapshot the worker can no
+        # longer revise. Symmetric with iter-signal/verdict lock contract.
+        _lock_sentinel "$DONE_CLAIM_FILE"
         # PR-0b-narrow: stamp leader handshake ack on the iter-signal (audit-only).
         _stamp_ack_field "$SIGNAL_FILE"
       else

package/docs/plans/bug-report-overhaul-backlog.md DELETED Viewed

@@ -1,49 +0,0 @@
-# Bug Report Overhaul — P2/P3 Backlog
-> Companion to `bug-report-overhaul-v1.md` (PR-A/B/C plan).
-> User stop-rule: ralplan iterates only until P0+P1 = 0; P2 and below are captured here, NOT blockers.
-> Re-prioritize from this file in a future ralplan when the operator-minutes-saved metric from PR-A/B/C lands.
----
-## P2 — should fix in a follow-up PR after PR-A/B/C land
-### From v0 plan (Option C/D, deferred features)
-- **Heartbeat-warning sidecar (Option B from v0)** — emit `<slug>-warning.{md,json}` when heartbeat anomaly crosses 50% of `iter-timeout`. Lets operator pre-empt a BLOCKED before the 30-min wall hits. Decoupled from this PR set because (a) report-quality is the dominant pain (D1), and (b) warning sidecar adds a second sentinel surface that risks false-positive fatigue. Revisit after PR-A/B land and we measure how many BLOCKEDs would have been pre-empted.
-- **GitHub Issues integration (Option D from v0)** — POST blocked context to a configured GitHub repo issue. Requires per-repo authn story (token storage, network retry, rate-limits) — violates principle 3 in the current PR set. Re-evaluate after a credible authn proposal exists.
-- **Pattern-learning loop** — mine `~/.claude/ralph-desk/analytics/*/bug-reports/` for emerging clusters. Auto-extends `docs/bug-patterns.json` with new candidate signatures for human review.
-- **Cross-campaign bug-report dashboard in `/rlp-desk analytics`** — surface patterns across projects.
-- **Auto-suggest "this looks like Bug #N — try fix-X" inline in CLI output** — operationalize PR-C's `pattern_match` data with an inline suggestion. Held back so the deterministic Jaccard implementation can be calibrated against real campaign data first.
-- **Operator-CLI `/rlp-desk recover <slug> --to verify`** — write the manual recovery artifacts (`iter-signal.json`, `done-claim.json`, `status.json` patch) deterministically. Currently a hand-rolled `jq` pipeline per Bug #10 §7 workaround.
-### From Codex Critic Round 2 (BACKLOG)
-- **[P2-1]** PR-A `_validateOperatorRecoveryArtifacts` return shape — current pseudo-code mixes `if (valid)` (boolean coercion) with `valid.reason` (object access). Resolve at implementation time to either `{ ok: bool, reason: string }` (object) or pure boolean + separate side-channel for the warning text. Affects the audit log line shape.
-- **[P2-2]** PR-A test summary in §5 says "5 ACs (R1–R5)" but §8 added AC-R6 (`_skipNextWorkerDispatch` cleared after one use). Update §5 to "6 ACs (R1–R6)" for consistency before PR-A merges.
-### From Codex Critic Round 3 (BACKLOG)
-- **[P2-3]** §9 step 5 banner-aware diff command only covers `run_ralph_desk.zsh`. PR-A and PR-B both also touch `lib_ralph_desk.zsh`. Add a matching `diff <(cat src/scripts/lib_ralph_desk.zsh) <(tail -n +N ~/.claude/ralph-desk/scripts/lib_ralph_desk.zsh)` step in the implementation runbook (verify the right `tail -n +N` offset at impl time — `lib_*.zsh` is sourced and may have no shebang). Extend to `init_ralph_desk.zsh` if PR-B touches it.
-## P3 — nice-to-have polish
-### From Codex Critic Round 2
-- **[P3-1]** Option C/D/E rejection rationale in v1 §4 says "Same as v0" — acceptable because v0 is co-located, but inline one-sentence rationale would make the v1 plan self-contained for future readers who do not have the v0 file.
-### From Architect Round 1 (residual notes)
-- Validate the `bug-patterns.json` Jaccard threshold (0.7) against actual past blocks once we have ≥20 historical reports — current threshold is hand-picked. Likely needs a small calibration script in `scripts/`.
-- Consider whether `bug-reports/` should ship in the npm tarball default `.gitignore` of newly initialized projects — currently the schema doc only recommends operators add it themselves.
----
-## Promotion criteria (when to re-ralplan one of these)
-A backlog item moves back into a planner draft when **any** of these is true:
-1. PR-A/B/C lands and we measure ≥3 BLOCKEDs where the deferred item would have moved D1 by ≥10 minutes (e.g. heartbeat warning would have pre-empted a 30-min wait).
-2. Operator hand-files ≥2 bug reports about the same backlog gap (signal that the deferral was wrong).
-3. The `bug-patterns.json` seed becomes too large for human authoring (≥30 entries) — triggers the pattern-learning loop item.
-4. A user explicitly asks for one (e.g. operator-CLI `/rlp-desk recover` once they fatigue of jq pipelines).

package/docs/plans/bug-report-overhaul-v0.md DELETED Viewed

@@ -1,238 +0,0 @@
-# Bug Report Mechanism Overhaul — v0 (RALPLAN-DR Planner Draft)
-> **Status**: Planner draft awaiting Architect → Codex Critic.
-> **Mode**: deliberate (auto-enabled — touches governance, runner, slash command, test infra).
-> **Stop rule**: iterate until codex critic returns 0 P0 + 0 P1. P2 → backlog.
-> **Critic instruction**: *approve unless P0 or P1 found.*
----
-## 1. Problem statement
-10 hand-written 200-line bug reports (`Bug #1`–`Bug #10`, BOS dev `2026-05-01..05-07`) point at one root frustration: **bugs are endless and each one costs 30+ min of operator time to package** before the rlp-desk side can even start triage. Examples:
-| Pain | Evidence |
-|---|---|
-| Manual context capture | Each report re-collects: env, version, command, status snapshot, pane logs, settings, gitignore — all already on disk |
-| No similarity search | Bug #6/#7/#8 are all "worker hang variants"; operator re-discovers the cluster each time |
-| Recovery is broken | Bug #10 — leader resets `phase=worker` ignoring operator's `phase=verify` manual recovery; operator's iter-signal/done-claim files deleted |
-| Reactive only | Bugs surface only after full BLOCKED (~30 min poll timeout); no early warning on heartbeat anomalies |
-| No deterministic repro pack | rlp-desk side has to chase BOS for missing context (logs, env, version) → fix latency multiplier |
-The blocked-sentinel JSON (`schema_version: 2.0`) already classifies (`reason_category` / `recoverable` / `suggested_action`) but stops at the campaign boundary — it does not become a *bug report*. That gap is the target.
----
-## 2. Principles (5)
-1. **Capture-by-default, not by-request.** When the campaign blocks, the operator should not have to gather anything that already exists on disk.
-2. **One canonical schema, two consumers.** A single `bug-report.json` feeds both BOS-side templates and rlp-desk-side triage; no divergent representations.
-3. **Surgical diffs over new infra.** Extend the existing `blocked.{md,json}` writer + `/rlp-desk` subcommand surface; do not introduce a new daemon, queue, or service.
-4. **Recovery must be idempotent.** Manual recovery of a BLOCKED campaign must not be silently overwritten on relaunch (Bug #10 contract).
-5. **Earlier is cheaper.** A heartbeat-anomaly *warning* costs nothing; a 30-min BLOCKED poll-timeout is the most expensive form of feedback.
----
-## 3. Decision drivers (top 3)
-| # | Driver | Why it dominates |
-|---|---|---|
-| D1 | **Operator minutes per BLOCKED → first actionable report** | Today: 30+ min hand-writing + log collection. Target: ≤2 min (review + 1-line headline edit). Drives the "auto-bundle" choice below. |
-| D2 | **Cluster recognition (avoid duplicate `Bug #N` for same root cause)** | 5 of 10 reports cluster around "worker hang on sentinel" or "verifier post-sentinel race". Without similarity hinting we keep paying triage cost N times. |
-| D3 | **Zero regression on `--mode tmux` 19th launch** | Per `docs/plans/native-agent-revert.md`, the production tmux path is mid-flight. Any change must be additive there; default behavior unchanged. |
----
-## 4. Viable options
-### Option A — **Bundle-first**: `/rlp-desk report <slug>` + auto-emit on BLOCKED *(recommended)*
-Add a single subcommand and one auto-trigger. Mechanics:
-- **Trigger**: every `_handlePollFailure` / `_emitBlockedSentinel` call already in `campaign-main-loop.mjs` and `write_blocked_sentinel` in `run_ralph_desk.zsh` ALSO writes `bug-reports/<slug>-<UTCISO>.json` + `<...>.md` (template-rendered).
-- **Schema**: extends current blocked-sentinel JSON v2.0 with: `repro.command`, `repro.env_snapshot`, `repro.git_head_sha`, `pane_tail.{worker,verifier}` (last 200 lines, redacted), `recent_iter_artifacts[]` (last 3 iterations' done-claim/verdict paths), `pattern_match.{candidate_bug_ids[], score}` (similarity vs known reports).
-- **Subcommand**: `/rlp-desk report <slug>` to (a) regenerate from saved campaign state, (b) attach a custom headline, (c) print the markdown to stdout for paste-into-issue-tracker.
-- **Pattern match**: deterministic — hashed signature on `{reason_category, failure_category, suggested_action, top-level pane stem}` against a `docs/bug-patterns.json` lookup (seeded with #1–#10).
-- **Bug #10 fix**: leader on relaunch honors `status.phase == "verify"` + valid manual artifacts (validated against schema) and skips worker dispatch. Same surgical injection point used by P1-D classifier.
-**Pros**: low-surface; reuses `_classifyBlock`, `writeSentinelExclusive`, `~/.claude/ralph-desk/analytics`; produces the same output regardless of whether BLOCKED came from `--mode tmux` or `--mode native`/`--mode agent`. Operator's job collapses to "edit headline".
-**Cons**: pattern-match is naive (string-stem); will need iteration. Pane-tail capture risks PII/secret leak — must redact (governance §1f already has redaction precedent).
-### Option B — **Heartbeat-first**: pre-BLOCKED early warning channel
-Introduce a `<slug>-warning.{md,json}` sidecar emitted whenever a heartbeat anomaly crosses a soft threshold (50% of `iter-timeout`, no progress). Operator can opt into pre-empting the BLOCKED with `/rlp-desk warn <slug>` before the 30-min wall hits.
-**Pros**: shortens the perceived "bug is endless" tail by surfacing earlier.
-**Cons**: orthogonal to the *report quality* problem. Adds a second sentinel surface; risks false positives that train operators to ignore. Does not solve D1 (hand-writing) or D2 (clusters).
-### Option C — **External tracker integration** (GitHub Issues auto-file)
-Instead of file artifacts, POST blocked context to a configured GitHub repo issue.
-**Pros**: makes rlp-desk-side triage visible without operator handoff.
-**Cons**: violates principle 3 (new infra: secrets, network, retry, rate-limits). Couples to an external service. Out-of-scope per ABSOLUTE rule "NEVER push to remote without explicit user approval" — would need a per-campaign auth path. **Invalidated.**
-### Option D — **Status-quo + better doc template**
-Just publish a clearer template under `docs/rlp-desk/bug-report-template.md` and call it done.
-**Pros**: zero code change.
-**Cons**: does not move D1 (operator minutes) or D2 (clusters) at all. Bug #10 (Recovery breakage) untouched. **Invalidated.**
-### Why A wins (with optional B as future-PR)
-A directly addresses D1 (auto-bundle), D2 (pattern_match seeded with #1–#10), and Bug #10 (relaunch hygiene). B is complementary but orthogonal — defer to a separate PR after A lands and we measure operator minutes-saved. C/D fail principle 3 / D1 respectively.
----
-## 5. Scope (this PR)
-### P0 — must land
-1. **`bug-report.json` writer** — extend `_emitBlockedSentinel` (Node, `campaign-main-loop.mjs:923-968`) and `write_blocked_sentinel` (zsh, `lib_ralph_desk.zsh`) to also emit a per-block bug-report under `.rlp-desk/bug-reports/<slug>-<iso>.{json,md}`. Schema documented at `docs/rlp-desk/bug-report-schema.md`.
-2. **Bug #10 relaunch hygiene** — in launch-time entry of `campaign-main-loop.mjs` (currently forces `phase=worker`+`iter=1`), branch on `status.phase == 'verify'` and validate operator-written `iter-signal.json` + `done-claim.json` against existing artifact-validators. If valid → skip worker dispatch, enter verifier directly. If invalid → log warning + fall through to current behavior.
-3. **Redaction pass** — `pane_tail` and `env_snapshot` go through a deny-list (governance §1f redaction precedent: any `/(api[_-]?key|token|secret|password|bearer|authorization)/i` line replaced with `<REDACTED>`).
-### P1 — must land
-4. **`/rlp-desk report <slug>` subcommand** — added to `src/commands/rlp-desk.md` per current command-handler patterns; reads the latest blocked-sentinel JSON + most recent `bug-reports/*.json`, prints the markdown render to stdout. Optional `--headline "..."` flag rewrites the title line in-place. No auto-publish to any remote.
-5. **`pattern_match` seed** — `docs/bug-patterns.json` shipped with deterministic signatures for Bug #1–#10 (manually authored from BOS reports). Bug-report writer fills `pattern_match.candidate_bug_ids[]` + `score` (Jaccard on `{reason_category, failure_category, top-level pane-tail token bag}`).
-6. **Self-Verification gate compliance** — `src/scripts/run_ralph_desk.zsh` is touched → CLAUDE.md mandates 3 self-verification scenarios (LOW + MEDIUM + CRITICAL). Spelled out in §10.
-### P2+ → `docs/plans/bug-report-overhaul-backlog.md` (separate file, not this PR)
-- Heartbeat-warning sidecar (Option B).
-- GitHub Issues integration (Option C, after authn story).
-- Pattern-learning loop that mines `~/.claude/ralph-desk/analytics/*/bug-reports/` for emerging clusters.
-- Cross-campaign bug-report dashboard in `/rlp-desk analytics`.
-- Auto-suggest "this looks like Bug #N — try fix-X" inline in CLI output (today: `pattern_match` is data-only).
----
-## 6. Files to modify
-| File | Change | Risk |
-|---|---|---|
-| `src/node/runner/campaign-main-loop.mjs` | Extend `_emitBlockedSentinel` to call new `writeBugReport` helper; add Bug #10 relaunch-phase-honor branch in `_runCampaignBody` entry | MED |
-| `src/node/shared/bug-report.mjs` (NEW) | `writeBugReport({slug, classification, reason, paths, env, paneTails, recentArtifacts})` + redaction + pattern-match | LOW (new isolated module) |
-| `src/scripts/lib_ralph_desk.zsh` | New `_write_bug_report` helper called from `write_blocked_sentinel` | MED |
-| `src/scripts/run_ralph_desk.zsh` | Wire `_write_bug_report` after each `write_blocked_sentinel` site (≈10 sites, all already in one taxonomy) | MED |
-| `src/commands/rlp-desk.md` | Add `## report <slug>` section; document `bug-reports/` directory + schema link; add `/rlp-desk report` to help block | LOW |
-| `src/governance.md` | Add §1g "Bug Report Capture" — invariant: every BLOCKED writes a bug-report; redaction rules; relaunch hygiene contract (Bug #10) | LOW (additive) |
-| `docs/rlp-desk/bug-report-schema.md` (NEW) | JSON schema doc + worked example | LOW |
-| `docs/bug-patterns.json` (NEW) | Seed with #1–#10 signatures | LOW |
-| `tests/node/test-bug-report-writer.test.mjs` (NEW) | Schema, redaction, pattern-match unit tests | LOW |
-| `tests/node/test-relaunch-phase-verify-hygiene.test.mjs` (NEW) | Bug #10 fix unit + integration | MED |
-| `tests/test-bug-report-zsh-emit.sh` (NEW) | zsh side bug-report emit verification | MED |
-Total: 5 modified + 6 new. No deletions. Single PR — review surface bounded.
----
-## 7. Pre-mortem (deliberate mode — 3 scenarios)
-### S1 — Pane-tail leaks a secret into a committed bug-report
-A worker pane prints `Authorization: Bearer eyJ...` from a vendor SDK debug log. `pane_tail` captures it; operator commits the bug-report markdown without re-reading.
-**Mitigation**: redaction deny-list runs on the JSON writer side (not at view-time); deny-list is unit-tested in `test-bug-report-writer.test.mjs` with a fuzz-style fixture (10+ secret-shaped strings). Markdown render reads from JSON post-redaction, so it can never out-leak. Additional belt: `bug-reports/` is added to a sample `.gitignore` snippet in the schema doc; we do not auto-add to user repo `.gitignore`.
-**Residual risk**: vendor-specific secret formats not in deny-list. Acceptable: schema doc tells operator to scan before committing, and pattern_match leaves an `unredacted_count` audit field that flags how many lines hit the deny-list (operator can sanity-check).
-### S2 — Bug #10 fix accidentally honors a stale `phase=verify` from a CRASHED leader and re-enters verifier on garbage state
-If the leader crashed mid-worker after writing `phase=verify` (race), relaunch could enter verifier with an inconsistent on-disk state.
-**Mitigation**: validation gate is strict — both `iter-signal.json` AND `done-claim.json` must (a) exist, (b) have `us_id` matching `status.target_us`, (c) have `iteration` matching `status.iteration`, (d) have `iter_signal_quality == 'specific'`, (e) be newer than the most recent `worker-prompt.md` mtime. Failure of ANY check → fall through to current behavior + log "phase=verify ignored: <reason>". This preserves backward compat and matches `_checkBlockedHygiene` precedent.
-**Residual risk**: a clever filesystem race can still pass all five checks. We accept this — the existing "every relaunch resets to worker" behavior is itself a bug (#10), and Option C's stricter "operator must pass `--resume-from-verify` flag" is named in P2 backlog as an opt-in escalation if operators report false positives.
-### S3 — `pattern_match` false-positive trains operators to dismiss real bugs
-Two unrelated `infra_failure` blocks both score ≥0.8 against the same Bug #N signature; operator stops reading.
-**Mitigation**: `pattern_match` is **data-only** in P1 — no inline CLI suggestion. Score + candidate IDs are written to JSON; markdown render places them in a "Possible related bugs" footer with explicit "score: 0.83 — review before assuming match". Auto-suggest is deferred to P2 backlog precisely because we have not validated the signature space yet. Also: ship with **deterministic** Jaccard over a small token bag, not ML — failures are inspectable.
-**Residual risk**: low — operator opt-in to act on `pattern_match`.
----
-## 8. Expanded test plan (deliberate mode)
-### Unit (Node)
-`tests/node/test-bug-report-writer.test.mjs`:
-- AC-W1: schema fields all present + types match `docs/rlp-desk/bug-report-schema.md`.
-- AC-W2: redaction — 12 secret-shaped fixtures (Bearer token, AWS key, GH PAT, OpenAI key, generic `password=...`, etc.) all replaced by `<REDACTED>`; `meta.redacted_line_count` reflects count.
-- AC-W3: pane-tail truncates at 200 lines; preserves last lines (most recent diagnostic value).
-- AC-W4: `pattern_match` against seeded `docs/bug-patterns.json` — synthetic block matching Bug #6 signature returns `score >= 0.7` + correct `candidate_bug_ids`.
-- AC-W5: idempotent — second call with same `(slug, classification, iso)` is a no-op (uses `writeSentinelExclusive` semantics).
-`tests/node/test-relaunch-phase-verify-hygiene.test.mjs`:
-- AC-R1: status.phase=verify + valid artifacts → verifier-only entry (no worker dispatch).
-- AC-R2: status.phase=verify + missing `done-claim.json` → fall through to worker, log warning.
-- AC-R3: status.phase=verify + `us_id` mismatch → fall through, warning.
-- AC-R4: status.phase=verify + `iter-signal.json` older than worker-prompt.md → fall through, warning.
-- AC-R5: status.phase=verify + `iter_signal_quality != 'specific'` → fall through, warning.
-### Integration (Node)
-`tests/node/us006-campaign-main-loop.test.mjs` extension:
-- AC-I1: BLOCKED via `flywheel_inconclusive` → bug-report file written to `.rlp-desk/bug-reports/`; JSON parses; `reason_category == 'mission_abort'`.
-- AC-I2: BLOCKED via `worker_exited` → bug-report `pattern_match.candidate_bug_ids` includes `Bug-7` (worker pane death lineage).
-- AC-I3: relaunch with valid `phase=verify` artifacts → no `iter-002.worker-prompt.md` created; verifier dispatched directly.
-### Integration (zsh)
-`tests/test-bug-report-zsh-emit.sh` (NEW, mirrors `test-bug7-post-sentinel-race.sh` style):
-- Sc-1: stub `dispatch_worker` exits 1 → `write_blocked_sentinel` runs → `<slug>-<iso>.json` exists in `bug-reports/` + parses with `jq`.
-- Sc-2: redaction — pre-injected pane log with `Bearer X` → `jq .pane_tail.worker` does not contain `Bearer X`.
-### Self-Verification scenarios (CLAUDE.md gate, MANDATORY since `run_ralph_desk.zsh` is touched)
-- **LOW**: redaction unit fixture passes; existing zsh + Node regression tests green.
-- **MEDIUM**: real campaign with stub worker that fails → bug-report appears; markdown render contains all required sections; operator can `cat` it; `pattern_match` populated.
-- **CRITICAL**: 2-iter campaign with deliberate BLOCKED at iter-1, then operator manual recovery (write iter-signal/done-claim by hand, set `phase=verify`), relaunch → verifier-only path runs (no worker iter-2 dispatch); Bug #10 reproduction scenario reversed; verdict accepted; `complete.md` written.
-All 3 must PASS before commit. If any FAIL: fix root cause, re-run failing scenario, then re-verify all 3.
----
-## 9. Verification end-to-end
-1. `node --test 'tests/node/*.test.mjs'` — all green; new tests visible.
-2. `bash tests/test-bug-report-zsh-emit.sh` — green.
-3. `bash tests/test-bug7-post-sentinel-race.sh` + `bash tests/test-bug7-poll-partial-write.sh` — unchanged green (no Bug #7 regression).
-4. CLAUDE.md self-verification gate × 3 (above) — all PASS.
-5. Manual: trigger BLOCKED in a sandbox campaign; verify `.rlp-desk/bug-reports/<slug>-<iso>.md` is human-readable + has `Possible related bugs` footer.
-6. Banner-aware diff `src/` ⇆ `~/.claude/ralph-desk/` after `node scripts/postinstall.js`.
----
-## 10. ADR (preview — final once Critic approves)
-- **Decision**: Adopt Option A (bundle-first, auto-emit on BLOCKED) for v0.16.0; defer heartbeat-warning (B) and external-tracker (C) to backlog.
-- **Drivers**: D1 operator-minutes, D2 cluster-recognition, D3 zero `--mode tmux` regression.
-- **Alternatives considered**: B (orthogonal — does not solve D1/D2), C (violates principle 3, requires authn/network), D (does not move any driver).
-- **Why chosen**: A reuses `_classifyBlock` + `writeSentinelExclusive`; surgical-diff principle satisfied; pattern_match seeded from real history.
-- **Consequences**: BLOCKED writes additional artifact (`bug-reports/<slug>-<iso>.{json,md}`); operator workflow shifts from "hand-write 200 lines" to "review + edit headline"; `bug-patterns.json` becomes a living artifact maintained alongside reports.
-- **Follow-ups**: Backlog file lists P2+ items. Heartbeat warning revisited after we measure operator minutes-saved on first 3 BLOCKED post-land.
----
-## 11. Round-by-round resolution log
-| Round | Reviewer | Verdict | Findings closed |
-|---|---|---|---|
-| 0 | — | Planner v0 | initial draft |
-| 1 | Architect | _pending_ | _to fill_ |
-| 2 | Codex Critic | _pending_ | _to fill_ |
-| ... | | | |