npm - @ai-dev-methodologies/rlp-desk - Versions diffs - 0.15.3 → 0.15.5 - Mend

@ai-dev-methodologies/rlp-desk 0.15.3 → 0.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/CHANGELOG.md +98 -0
package/README.md +34 -4
package/docs/rlp-desk/failure-modes.md +191 -0
package/package.json +10 -3
package/src/node/MANIFEST.txt +3 -0
package/src/node/prompts/prompt-assembler.mjs +2 -2
package/src/node/run.mjs +70 -3
package/src/node/runner/campaign-main-loop.mjs +97 -13
package/src/node/util/debug-log.mjs +10 -6
package/src/node/util/lifecycle-metrics.mjs +102 -0
package/src/scripts/lib_ralph_desk.zsh +66 -0
package/src/scripts/run_ralph_desk.zsh +23 -3
package/docs/plans/bug-report-overhaul-backlog.md +0 -49
package/docs/plans/bug-report-overhaul-v0.md +0 -238
package/docs/plans/bug-report-overhaul-v1.md +0 -319
package/docs/plans/native-agent-revert.md +0 -184
package/docs/plans/polished-gliding-toucan.md +0 -234
package/docs/plans/pr-e-phase-c1-blocked-recovery-hygiene-v0.md +0 -233
package/docs/plans/spicy-booping-galaxy.md +0 -717
package/docs/plans/strategic-review/rlp-desk-strategic-review.md +0 -125
package/docs/plans/v0.15-stabilization-phase-a-prep.md +0 -130
package/docs/plans/v0.15-stabilization-plan.md +0 -178
package/docs/plans/v0.16-real-llm-sv-gate-spec.md +0 -177
package/docs/rlp-desk/internal/verification-policy-gap-analysis.md +0 -523
package/docs/rlp-desk/internal/verification-strategy-research.md +0 -2097
package/docs/rlp-desk/plans/cozy-gliding-trinket.md +0 -53
package/docs/rlp-desk/plans/frolicking-churning-honey.md +0 -253
package/docs/rlp-desk/plans/keen-sauteeing-snowflake.md +0 -245
package/docs/rlp-desk/plans/mutable-booping-corbato.md +0 -163
package/docs/rlp-desk/plans/rlp-desk-0.11-handoff-7fixes.md +0 -352
package/docs/rlp-desk/plans/rlp-desk-0.11.1-tmux-pane-disappearance.md +0 -260
package/docs/rlp-desk/plans/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -84
package/docs/rlp-desk/plans/rlp-desk-elegant-papert.md +0 -270
package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +0 -730
package/docs/rlp-desk/plans/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -201
package/docs/rlp-desk/plans/toasty-whistling-diffie.md +0 -117
package/docs/rlp-desk/plans/validated-snacking-crayon.md +0 -204
package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-output.log +0 -0
package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-prompt.md +0 -38
package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-trigger.sh +0 -28
package/examples/calculator/.claude/ralph-desk/logs/loop-test/session-config.json +0 -25
package/examples/calculator/.claude/ralph-desk/logs/loop-test/status.json +0 -10
package/examples/calculator/.claude/ralph-desk/logs/loop-test/worker-heartbeat.json +0 -1

package/src/node/runner/campaign-main-loop.mjs CHANGED Viewed

@@ -32,6 +32,8 @@ import {
   generateSVReport,
   prepareCampaignAnalytics,
 } from '../reporting/campaign-reporting.mjs';
+import { LifecycleMetricsCollector } from '../util/lifecycle-metrics.mjs';
+import { makeDebugLogger } from '../util/debug-log.mjs';
 import {
   createPane as defaultCreatePane,
   killPaneProcess as defaultKillPaneProcess,
@@ -91,7 +93,7 @@ export function detectLegacyDeskInRunMode(rootDir, env = process.env) {
   return { legacyPath, newPath, message };
 }
-function buildPaths(rootDir, slug, env = process.env) {
+export function buildPaths(rootDir, slug, env = process.env) {
   const deskRoot = resolveDeskRoot(rootDir, env);
   const campaignLogDir = path.join(deskRoot, 'logs', slug);
@@ -133,6 +135,10 @@ function buildPaths(rootDir, slug, env = process.env) {
     flywheelGuardPromptFile: path.join(deskRoot, 'prompts', `${slug}.flywheel-guard.prompt.md`),
     flywheelGuardVerdictFile: path.join(deskRoot, 'memos', `${slug}-flywheel-guard-verdict.json`),
     laneAuditFile: path.join(campaignLogDir, 'lane-audit.json'),
+    // v0.15.4 PR-B4: structured debug.log. log_lifecycle_metric (zsh) and
+    // LifecycleMetricsCollector (Node) both emit here when
+    // RLP_LIFECYCLE_METRICS=1.
+    debugLogFile: path.join(campaignLogDir, 'debug.log'),
 };
 }
@@ -555,7 +561,11 @@ async function _archiveRecoveredSidecar(paths) {
   }
 }
-async function appendIterationAnalytics(paths, state, usId, verdict, options) {
+async function appendIterationAnalytics(paths, state, usId, verdict, options, lifecycleMetrics = null) {
+  // v0.15.4 PR-B4: lifecycle_metrics field — null when flag unset (collector
+  // returns null), object grouped by metric name when flag set. Test:
+  // tests/node/test-campaign-jsonl-shape.mjs.
+  const lifecycleSnapshot = lifecycleMetrics ? lifecycleMetrics.flush() : null;
   await appendCampaignAnalytics(paths.analyticsFile, {
     iter: state.iteration,
     us_id: usId,
@@ -564,6 +574,7 @@ async function appendIterationAnalytics(paths, state, usId, verdict, options) {
     verdict,
     duration: 0,
     timestamp: toIso(resolveNow(options.now)),
+    lifecycle_metrics: lifecycleSnapshot,
   });
 }
@@ -1170,7 +1181,7 @@ async function runFinalSequentialVerify({
     });
     if (typeof reapProducer === 'function') {
-      await reapProducer(verifierPaneId, paths.verdictFile);
+      await reapProducer(verifierPaneId, paths.verdictFile, 'verify-verdict');
     }
     if (verdict.verdict !== 'pass') {
@@ -1368,8 +1379,20 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
   const killPaneProcess = options.killPaneProcess ?? defaultKillPaneProcess;
   const lockSentinel = options.lockSentinelFile ?? defaultLockSentinelFile;
   const stampAckField = options.stampAckField ?? defaultStampAckField;
-  const reapProducer = async (paneId, sentinelFile) => {
+  // v0.15.4 PR-B4: lifecycle observability collector. Tests inject
+  // options.lifecycleMetrics for shape-contract verification; production
+  // path constructs from process.env (RLP_LIFECYCLE_METRICS=1 enables).
+  const debugLogger = makeDebugLogger(paths.debugLogFile);
+  const lifecycleMetrics = options.lifecycleMetrics ?? new LifecycleMetricsCollector({
+    env: options.env ?? process.env,
+    debugLog: (cat, fields) => debugLogger(cat, fields),
+  });
+  const reapProducer = async (paneId, sentinelFile, sentinelType = null) => {
     if (!paneId) return;
+    // v0.15.4 PR-B4: pane_eof_to_cleanup_ms = wallclock from kill-start to
+    // killPaneProcess return. pane_reap_latency_ms tracks the same window
+    // when the trigger was a sentinel observation (i.e. sentinelType set).
+    const reapStart = Date.now();
     await killPaneProcess(paneId, {
       sendRawKey,
       waitForExit: waitForProcessExit,
@@ -1384,7 +1407,22 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
     } catch (err) {
       console.error(`[handshake] waitForProcessExit failed on ${paneId} (${err?.message ?? err}); continuing`);
     }
+    const reapMs = Date.now() - reapStart;
+    lifecycleMetrics.record('pane_eof_to_cleanup_ms', reapMs, { pane_id: paneId });
+    if (sentinelType) {
+      lifecycleMetrics.record('pane_reap_latency_ms', reapMs, {
+        pane_id: paneId,
+        sentinel_type: sentinelType,
+      });
+    }
     if (sentinelFile) {
+      // v0.15.4 audit H3 fix: markLockStart BEFORE lockSentinel so the
+      // sentinel_lock_to_unlock_ms metric covers the full lock duration
+      // including chmod 0o444 execution time. Previous code recorded
+      // post-chmod timestamp — sub-ms skew but semantically inverted.
+      // v0.15.4 PR-B4: open lock-to-unlock pair tracking. markUnlock fires
+      // at unlockSentinelFile call sites or end-of-iter for never-unlocked.
+      lifecycleMetrics.markLockStart(path.basename(sentinelFile));
       await lockSentinel(sentinelFile, { log: (msg) => console.error(msg) });
       // PR-0b-narrow AC-H2: stamp the leader_ack audit field. Best-effort,
       // does not block subsequent dispatch.
@@ -1424,7 +1462,18 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
   const usList = await readUsList(paths, slug);
   if (usList.length === 0) {
-    throw new Error(`No user stories found for ${slug}`);
+    // D-5 (dogfood): both leaders parse only H2 `## US-NNN:`. A common mistake is
+    // authoring `### US-NNN` (H3+), which yields zero stories. Surface an actionable
+    // hint instead of a bare "not found" (the zsh leader silently degrades here;
+    // Node fail-closes — the safer behavior, now recoverable via `clean`).
+    let hint = '';
+    try {
+      const prdRaw = await fs.readFile(paths.prdFile, 'utf8');
+      if (/^#{3,}\s+US-\d{3}\b/m.test(prdRaw)) {
+        hint = ' — found US-NNN heading(s) at level ### or deeper; US headings must be H2 ("## US-NNN:")';
+      }
+    } catch { /* best-effort hint */ }
+    throw new Error(`No user stories found for ${slug}${hint}`);
   }
   if (!state.current_us) {
@@ -1516,13 +1565,15 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
     // iteration must not block the next producer's atomic-rename write.
     // Idempotent: missing-file calls are no-ops.
     await unlockSentinelFile(paths.signalFile);
+    lifecycleMetrics.markUnlock(path.basename(paths.signalFile), { iter: state.iteration });
     await unlockSentinelFile(paths.verdictFile);
+    lifecycleMetrics.markUnlock(path.basename(paths.verdictFile), { iter: state.iteration });
     // Audit drift from the prior iteration before doing anything new.
     const _laneSnapshotAfter = await _snapshotLaneMtimes(paths);
     const _laneViolations = await _checkLaneViolations(paths, _laneSnapshot, _laneSnapshotAfter, state, options);
     if (_laneViolations) {
       for (const v of _laneViolations) {
-        await appendIterationAnalytics(paths, state, state.current_us ?? 'ALL', 'lane_violation_warning', { ...options, lane_violation: v });
+        await appendIterationAnalytics(paths, state, state.current_us ?? 'ALL', 'lane_violation_warning', { ...options, lane_violation: v }, lifecycleMetrics);
       }
       if (options.laneStrict) {
         // Strict mode: escalate to BLOCKED with downgrade
@@ -1658,7 +1709,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
       }
       // Bug #7 Fix-Q/R: reap flywheel pane before consuming the signal.
-      await reapProducer(state.flywheel_pane_id ?? state.verifier_pane_id, paths.flywheelSignalFile);
+      await reapProducer(state.flywheel_pane_id ?? state.verifier_pane_id, paths.flywheelSignalFile, 'flywheel-signal');
       state.last_flywheel_decision = flywheelSignal.decision;
       // P0-A multi-mission orchestration: optionally captured from flywheel signal.
@@ -1701,7 +1752,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
         }
         // Bug #7 Fix-Q/R: reap guard pane before mutating state.
-        await reapProducer(guardPaneId, paths.flywheelGuardVerdictFile);
+        await reapProducer(guardPaneId, paths.flywheelGuardVerdictFile, 'flywheel-guard-verdict');
         if (!state.flywheel_guard_count[state.current_us]) {
           state.flywheel_guard_count[state.current_us] = 0;
@@ -1887,10 +1938,35 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
       }
     }
+    // v0.15.4 PR-B4: iter_signal_write_to_read_ms = wallclock from worker FS
+    // write to leader poll resolve. Sentinel mtime is the producer-side anchor;
+    // Date.now() is the leader-side anchor. Best-effort stat — if the file
+    // already lacks read perms (race vs prior lock), fall back to skip.
+    try {
+      const sigStat = fsSync.statSync(paths.signalFile);
+      lifecycleMetrics.record('iter_signal_write_to_read_ms', Date.now() - sigStat.mtimeMs, {
+        iter: state.iteration,
+        us_id: state.current_us,
+      });
+    } catch { /* fail-open: skip on stat error */ }
     // Bug #7 Fix-Q/R: reap the worker pane the instant we accept the signal so
     // claude/codex cannot self-review and rewrite iter-signal.json. Runs even
     // for the codex-fallback synthesized signal (no-op on a dead pane).
-    await reapProducer(state.worker_pane_id, paths.signalFile);
+    await reapProducer(state.worker_pane_id, paths.signalFile, 'iter-signal');
+    // v0.15.4 PR-B2-FIX: same worker pass produced done-claim. The pane is
+    // already reaped above; lock done-claim so the iter-NNN-done-claim archive
+    // and any post-iter Bug #8 gate read a snapshot the worker can no longer
+    // revise. Symmetric with the zsh lock-on-iter-signal contract at
+    // run_ralph_desk.zsh:3197. Best-effort: missing-file is fail-open.
+    //
+    // v0.15.4 audit H2 fix: NO markLockStart for done-claim. In production
+    // happy path done-claim is locked-but-never-unlocked (only signalFile +
+    // verdictFile receive iter-start unlockSentinelFile at L1552-1555), so
+    // markUnlock would never fire and the metric would silently never emit.
+    // done-claim is intentionally excluded from sentinel_lock_to_unlock_ms;
+    // the lib_ralph_desk.zsh:602 archival step is the practical lock-end
+    // event but is not currently instrumented (deferred — not B4 scope).
+    await lockSentinel(paths.doneClaimFile, { log: (msg) => console.error(msg) });
     // US-019 R7 P1-G: verify_partial malformed downgrade.
     // verify_partial requires verified_acs[] to be a non-empty array. Otherwise the verifier
@@ -1961,10 +2037,18 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
       });
     }
+    // v0.15.4 PR-B4: verdict_write_to_read_ms parallel to iter_signal metric.
+    try {
+      const verdStat = fsSync.statSync(paths.verdictFile);
+      lifecycleMetrics.record('verdict_write_to_read_ms', Date.now() - verdStat.mtimeMs, {
+        iter: state.iteration,
+        us_id: state.current_us,
+      });
+    } catch { /* fail-open */ }
     // Bug #7 Fix-Q/R: reap verifier pane immediately after accepting the
     // verdict — without this the codex/claude TUI keeps running for ~2min and
     // can rewrite verify-verdict.json (mtime drift observed in 19th launch).
-    await reapProducer(state.verifier_pane_id, paths.verdictFile);
+    await reapProducer(state.verifier_pane_id, paths.verdictFile, 'verify-verdict');
     if (verdict.verdict === 'pass') {
       state.consecutive_failures = 0;
@@ -1973,7 +2057,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
       }
       state.current_us = getNextUs(usList, state.verified_us, null);
       fixContractPath = null;
-      await appendIterationAnalytics(paths, state, usId, 'pass', options);
+      await appendIterationAnalytics(paths, state, usId, 'pass', options, lifecycleMetrics);
       await writeStatus(paths, state, options.onStatusChange, options.now);
       if (state.verified_us.length === usList.length) {
@@ -1989,7 +2073,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
       const blockedReason = verdict.reason || verdict.summary || 'verifier-blocked';
       const blockedClassification = _classifyBlock('verifier', { verdict, state, slug });
       await writeSentinel(paths.blockedSentinel, 'blocked', usId, blockedReason, blockedClassification, paths);
-      await appendIterationAnalytics(paths, state, usId, 'blocked', options);
+      await appendIterationAnalytics(paths, state, usId, 'blocked', options, lifecycleMetrics);
       await writeStatus(paths, state, options.onStatusChange, options.now);
       let svSummary;
       if (options.withSelfVerification) {
@@ -2028,7 +2112,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
     }
     state.consecutive_failures += 1;
-    await appendIterationAnalytics(paths, state, usId, 'fail', options);
+    await appendIterationAnalytics(paths, state, usId, 'fail', options, lifecycleMetrics);
     const upgradedModel = nextWorkerModel(options.workerModel ?? state.worker_model, state.consecutive_failures);
     if (upgradedModel === 'BLOCKED') {
       state.phase = 'blocked';

package/src/node/util/debug-log.mjs CHANGED Viewed

@@ -6,15 +6,19 @@
 // SHOULD use debugLog() instead of console/manual writes.
 //
 // Categories (governance §1f traceability):
-// - GOV   : governance enforcement (IL, CB triggers, scope locks, verdicts)
-// - DECIDE: leader decisions (model selection, fix contracts, escalation)
-// - OPTION: configuration snapshot at loop start
-// - FLOW  : execution progress (worker/verifier dispatch, signal reads, transitions)
+// - GOV       : governance enforcement (IL, CB triggers, scope locks, verdicts)
+// - DECIDE    : leader decisions (model selection, fix contracts, escalation)
+// - OPTION    : configuration snapshot at loop start
+// - FLOW      : execution progress (worker/verifier dispatch, signal reads, transitions)
+// - LIFECYCLE : v0.15.4 PR-B4 — tmux/process lifecycle metrics gated on
+//               RLP_LIFECYCLE_METRICS=1. Emission rules: see plan v3 §B4
+//               Table (5 metrics). Helper is no-op when flag unset (verified
+//               by tests/node/test-campaign-jsonl-shape.mjs).
 import fs from 'node:fs/promises';
 import path from 'node:path';
-const VALID_CATEGORIES = new Set(['GOV', 'DECIDE', 'OPTION', 'FLOW']);
+const VALID_CATEGORIES = new Set(['GOV', 'DECIDE', 'OPTION', 'FLOW', 'LIFECYCLE']);
 /**
  * Append a structured log line to debug.log. Format mirrors zsh log_debug:
@@ -22,7 +26,7 @@ const VALID_CATEGORIES = new Set(['GOV', 'DECIDE', 'OPTION', 'FLOW']);
  *
  * @param {Object} args
  * @param {string} args.debugLogPath — absolute path to debug.log
- * @param {'GOV'|'DECIDE'|'OPTION'|'FLOW'} args.category
+ * @param {'GOV'|'DECIDE'|'OPTION'|'FLOW'|'LIFECYCLE'} args.category
  * @param {Object<string,string|number|boolean>} args.fields — flat key/value
  *   pairs, serialized as `key=value`. Avoid nested objects; pre-stringify.
  * @returns {Promise<void>} — resolves even on filesystem errors (best-effort).

package/src/node/util/lifecycle-metrics.mjs ADDED Viewed

@@ -0,0 +1,102 @@
+// v0.15.4 PR-B4 — Lifecycle observability helper.
+//
+// Plan: docs/plans/v0.15-phase-b-plan-v3.md §B4.
+// Audit: docs/plans/v0.15-phase-b-lifecycle-audit.md §3 Table 2.
+//
+// Five metrics tracked, all gated on RLP_LIFECYCLE_METRICS=1 env flag:
+//   - iter_signal_write_to_read_ms     leader-poll-resolves vs worker-FS-write
+//   - verdict_write_to_read_ms          leader-poll-resolves vs verifier-FS-write
+//   - pane_eof_to_cleanup_ms            pane process exit vs killPaneProcess return
+//   - pane_reap_latency_ms              done-claim observed vs C-c×2 + waitForExit
+//   - sentinel_lock_to_unlock_ms        per type, _lock vs _unlock (object)
+//
+// Emission discipline:
+//   - debug.log: tagged [LIFECYCLE] per record (when flag set)
+//   - campaign.jsonl: ONE batched lifecycle_metrics object per iteration
+//                     (the collector accumulates, the iter-end flush emits)
+// When flag is unset:
+//   - record() is a no-op (early return) — zero overhead beyond a Map check
+//   - flush() returns null so analytics writer can branch on the field
+const ENV_FLAG_NAME = 'RLP_LIFECYCLE_METRICS';
+export function lifecycleMetricsEnabled(env = process.env) {
+  return env[ENV_FLAG_NAME] === '1';
+}
+export class LifecycleMetricsCollector {
+  constructor({ env = process.env, debugLog = null } = {}) {
+    this._enabled = lifecycleMetricsEnabled(env);
+    this._debugLog = debugLog;
+    this._records = [];
+    this._sentinelLockTimes = new Map();
+  }
+  get enabled() {
+    return this._enabled;
+  }
+  // Record a single timing metric. value is in milliseconds. ctx is a flat
+  // object of audit fields (iter, us_id, pane_id, sentinel_type, etc).
+  record(name, valueMs, ctx = {}) {
+    if (!this._enabled) return;
+    const entry = {
+      metric: name,
+      value_ms: Math.max(0, Math.round(valueMs)),
+      ts: new Date().toISOString(),
+      ...ctx,
+    };
+    this._records.push(entry);
+    if (this._debugLog) {
+      // Best-effort fire-and-forget. The debug-log helper is itself best-
+      // effort (appendFile error swallowed), so we don't await it.
+      this._debugLog('LIFECYCLE', { metric: name, value_ms: entry.value_ms, ...ctx });
+    }
+  }
+  // Convenience: pair-bookkeeping for sentinel_lock_to_unlock_ms (object-
+  // valued metric keyed by sentinel type). Call markLockStart at chmod 0o444
+  // time, markUnlock at chmod 0o644 time (or end-of-iter for never-unlocked).
+  //
+  // v0.15.4 audit H2: done-claim is intentionally NOT instrumented with this
+  // pair. In production happy path done-claim is locked-but-never-unlocked
+  // (campaign-main-loop unlocks only signalFile + verdictFile at iter start);
+  // markUnlock for done-claim never fires, so the metric would silently never
+  // emit. Future work: emit at lib_ralph_desk.zsh:602 archival site if needed.
+  //
+  // v0.15.4 audit H3: callers must invoke markLockStart BEFORE the chmod
+  // operation, not after, so the metric covers full lock duration including
+  // chmod execution time. Sub-ms skew, but semantically correct.
+  markLockStart(sentinelType, t = Date.now()) {
+    if (!this._enabled) return;
+    this._sentinelLockTimes.set(sentinelType, t);
+  }
+  markUnlock(sentinelType, ctx = {}, t = Date.now()) {
+    if (!this._enabled) return;
+    const start = this._sentinelLockTimes.get(sentinelType);
+    if (start === undefined) return;
+    this.record('sentinel_lock_to_unlock_ms', t - start, {
+      ...ctx,
+      sentinel_type: sentinelType,
+    });
+    this._sentinelLockTimes.delete(sentinelType);
+  }
+  // Snapshot + reset for end-of-iteration flush. Returns null when disabled
+  // so the analytics writer can omit the field cleanly.
+  flush() {
+    if (!this._enabled) return null;
+    const records = this._records;
+    this._records = [];
+    // Group by metric name for compact campaign.jsonl shape:
+    //   { iter_signal_write_to_read_ms: [{value_ms,ts,...}, ...], ... }
+    const grouped = {};
+    for (const r of records) {
+      const { metric, ...rest } = r;
+      if (!grouped[metric]) grouped[metric] = [];
+      grouped[metric].push(rest);
+    }
+    return grouped;
+  }
+}

package/src/scripts/lib_ralph_desk.zsh CHANGED Viewed

@@ -261,6 +261,19 @@ _kill_pane_process() {
   if typeset -f log_debug >/dev/null 2>&1; then
     log_debug "[bug7] kill_pane_process pane=$pane_id role=$role"
   fi
+  # v0.15.4 PR-B4: pane_eof_to_cleanup_ms instrumentation (flag-gated).
+  # Records the wallclock from kill-start to wait_for_pane_ready return so
+  # B3 can value-assert the substrate fix actually closes the race window.
+  # Uses zsh native $EPOCHREALTIME (microsec) — portable to macOS BSD where
+  # `date +%N` is not supported.
+  local _b4_t0_ms=0
+  if [[ "${RLP_LIFECYCLE_METRICS:-0}" == "1" ]]; then
+    zmodload -e zsh/datetime || zmodload zsh/datetime 2>/dev/null
+    if [[ -n "${EPOCHREALTIME:-}" ]]; then
+      local _b4_t0_str="${EPOCHREALTIME//./}"
+      _b4_t0_ms=${_b4_t0_str:0:13}
+    fi
+  fi
   tmux send-keys -t "$pane_id" C-c 2>/dev/null
   sleep 0.5
   tmux send-keys -t "$pane_id" C-c 2>/dev/null
@@ -268,6 +281,12 @@ _kill_pane_process() {
   if typeset -f wait_for_pane_ready >/dev/null 2>&1; then
     wait_for_pane_ready "$pane_id" 5 2>/dev/null || true
   fi
+  if (( _b4_t0_ms > 0 )); then
+    local _b4_t1_str="${EPOCHREALTIME//./}"
+    local _b4_t1_ms=${_b4_t1_str:0:13}
+    log_lifecycle_metric "pane_eof_to_cleanup_ms" $((_b4_t1_ms - _b4_t0_ms)) \
+      "pane=$pane_id role=$role"
+  fi
   return 0
 }
@@ -285,6 +304,53 @@ _unlock_sentinel() {
   return 0
 }
+# =============================================================================
+# v0.15.4 PR-B4: Lifecycle observability — log_lifecycle_metric
+# =============================================================================
+# Plan: docs/plans/v0.15-phase-b-plan-v3.md §B4 (P2.1 critic-round-2 fix).
+# Helper is GATED on $RLP_LIFECYCLE_METRICS=1 (no-op when unset). Emits to
+# debug.log via log_debug, in a backgrounded subshell so the caller does not
+# block on the FS write. The Node-side mirror is src/node/util/lifecycle-
+# metrics.mjs LifecycleMetricsCollector.
+#
+# v0.15.4 audit M2: concurrent-appender semantics — `( ... ) &!` spawns a
+# disowned subshell per metric. Multiple metrics can fire in rapid succession
+# (e.g., during iter teardown) and race on debug.log. POSIX guarantees atomic
+# append for writes <= PIPE_BUF (4096 bytes). A single LIFECYCLE line is
+# ~150 bytes, well under the limit, so on local filesystems (APFS, ext4, xfs)
+# concurrent appends produce intact non-interleaved lines. On NFS / FUSE /
+# some Docker overlay setups PIPE_BUF guarantees may not hold; in those
+# environments, expect possible interleaving. This is best-effort logging
+# by design — the metric values land in campaign.jsonl via the Node leader's
+# batched flush as the canonical authoritative record. debug.log is an
+# audit aid, not the source of truth.
+#
+# Args:
+#   $1  metric_name       e.g. iter_signal_write_to_read_ms
+#   $2  value_ms          integer milliseconds (will be coerced via printf %d)
+#   $3  context (optional, free-form key=val pairs joined with spaces)
+#
+# Side effects:
+#   - When flag unset: returns 0 immediately (no fork, no FS call).
+#   - When flag set:   forks `( log_debug "..." ) &!` to debug.log.
+#
+# Examples:
+#   log_lifecycle_metric "iter_signal_write_to_read_ms" "$delta" \
+#     "iter=$ITERATION us=$us_id pane=$WORKER_PANE"
+#   log_lifecycle_metric "pane_reap_latency_ms" "$delta" \
+#     "iter=$ITERATION sentinel=done-claim"
+log_lifecycle_metric() {
+  [[ "${RLP_LIFECYCLE_METRICS:-0}" == "1" ]] || return 0
+  local metric="$1"
+  local value_ms="$2"
+  local ctx="${3:-}"
+  [[ -n "$metric" && -n "$value_ms" ]] || return 0
+  if typeset -f log_debug >/dev/null 2>&1; then
+    ( log_debug "[LIFECYCLE] metric=$metric value_ms=$value_ms $ctx" ) &!
+  fi
+  return 0
+}
 # PR-A (Bug #10) — validate operator-written manual recovery artifacts.
 # Returns 0 when all 5 checks pass; 1 otherwise. Sets RECOVERY_FAIL_REASON
 # (global) on failure for caller logging. Mirrors the Node-side helper

package/src/scripts/run_ralph_desk.zsh CHANGED Viewed

@@ -710,6 +710,10 @@ handle_worker_exit_codex() {
   dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
   log "  Codex worker completed with done-claim (us_id=$dc_us_id) and clean tree. Auto-generating signal."
   echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit (clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
+  # v0.15.4 PR-B2-FIX: codex worker pane already exited — reaper would no-op,
+  # but lock done-claim as defense-in-depth so any orphaned subprocess cannot
+  # rewrite the file before lib_ralph_desk.zsh:602 archives it.
+  _lock_sentinel "$DONE_CLAIM_FILE"
   _emit_a4_fallback_audit "$dc_us_id" "$iter" "codex_exit_with_done_claim_clean"
   return 0
 }
@@ -925,7 +929,9 @@ create_session() {
   BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
   # Truncate cost-log for fresh run (previous data in versioned campaign reports)
-  > "$COST_LOG"
+  # NOTE: ': >' not bare '>' — in zsh a bare redirect with no command runs $NULLCMD
+  # (=cat), which blocks reading stdin when the leader has an open TTY (D-1 dogfood hang).
+  : > "$COST_LOG"
   # v5.7 §4.2: WITH_SELF_VERIFICATION=1 is hard-rejected at script entry now,
   # so by the time we reach create_session() the flag is guaranteed to be 0.
@@ -1849,8 +1855,8 @@ write_worker_trigger() {
         else
           echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
         fi
-        echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
-        echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
+        echo "When done, you MUST WRITE (not just print) the verify signal to the iter-signal FILE at: ${SIGNAL_FILE}"
+        echo "Write this exact JSON to that file (us_id=\"${next_us}\", not \"ALL\"): {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", \"summary\": \"what was done\", \"timestamp\": \"ISO\"}"
         echo ""
         echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
       elif [[ -n "$VERIFIED_US" ]]; then
@@ -2292,6 +2298,15 @@ poll_for_signal() {
           if _bug8_check_synth_allowed "$ITERATION" "$dc_us_id" "inline_polling_a4_clean"; then
             log "  WARNING: done-claim exists for $dc_us_id but no iter-signal. Tree clean — auto-generating signal (A4 fallback)."
             log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
+            # v0.15.4 PR-B2-FIX: Worker pane is alive and idling post-done-claim
+            # (the canonical Bug #5/7 race window). Reap before synthesizing the
+            # signal so the worker cannot revise done-claim or emit a late
+            # iter-signal that races the leader's synthesized one. Mirror of
+            # Bug #7 Fix-Q parity at run_ralph_desk.zsh:3181 — kill before lock,
+            # lock before synth-write so the next leader read sees a frozen
+            # done-claim and a fresh signal_file in that order.
+            _kill_pane_process "$pane_id" "worker-a4"
+            _lock_sentinel "$DONE_CLAIM_FILE"
             echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim + clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
             _emit_a4_fallback_audit "$dc_us_id" "$ITERATION" "inline_polling_a4_clean"
             return 0
@@ -3180,6 +3195,11 @@ main() {
         # self-review and rewrite iter-signal.json (1m43s drift observed).
         _kill_pane_process "$WORKER_PANE" "worker"
         _lock_sentinel "$SIGNAL_FILE"
+        # v0.15.4 PR-B2-FIX: same worker pass also produced done-claim. Freeze
+        # it alongside iter-signal so Bug #8 gates and the iter-NNN-done-claim
+        # archive (lib_ralph_desk.zsh:602) read a snapshot the worker can no
+        # longer revise. Symmetric with iter-signal/verdict lock contract.
+        _lock_sentinel "$DONE_CLAIM_FILE"
         # PR-0b-narrow: stamp leader handshake ack on the iter-signal (audit-only).
         _stamp_ack_field "$SIGNAL_FILE"
       else

package/docs/plans/bug-report-overhaul-backlog.md DELETED Viewed

@@ -1,49 +0,0 @@
-# Bug Report Overhaul — P2/P3 Backlog
-> Companion to `bug-report-overhaul-v1.md` (PR-A/B/C plan).
-> User stop-rule: ralplan iterates only until P0+P1 = 0; P2 and below are captured here, NOT blockers.
-> Re-prioritize from this file in a future ralplan when the operator-minutes-saved metric from PR-A/B/C lands.
----
-## P2 — should fix in a follow-up PR after PR-A/B/C land
-### From v0 plan (Option C/D, deferred features)
-- **Heartbeat-warning sidecar (Option B from v0)** — emit `<slug>-warning.{md,json}` when heartbeat anomaly crosses 50% of `iter-timeout`. Lets operator pre-empt a BLOCKED before the 30-min wall hits. Decoupled from this PR set because (a) report-quality is the dominant pain (D1), and (b) warning sidecar adds a second sentinel surface that risks false-positive fatigue. Revisit after PR-A/B land and we measure how many BLOCKEDs would have been pre-empted.
-- **GitHub Issues integration (Option D from v0)** — POST blocked context to a configured GitHub repo issue. Requires per-repo authn story (token storage, network retry, rate-limits) — violates principle 3 in the current PR set. Re-evaluate after a credible authn proposal exists.
-- **Pattern-learning loop** — mine `~/.claude/ralph-desk/analytics/*/bug-reports/` for emerging clusters. Auto-extends `docs/bug-patterns.json` with new candidate signatures for human review.
-- **Cross-campaign bug-report dashboard in `/rlp-desk analytics`** — surface patterns across projects.
-- **Auto-suggest "this looks like Bug #N — try fix-X" inline in CLI output** — operationalize PR-C's `pattern_match` data with an inline suggestion. Held back so the deterministic Jaccard implementation can be calibrated against real campaign data first.
-- **Operator-CLI `/rlp-desk recover <slug> --to verify`** — write the manual recovery artifacts (`iter-signal.json`, `done-claim.json`, `status.json` patch) deterministically. Currently a hand-rolled `jq` pipeline per Bug #10 §7 workaround.
-### From Codex Critic Round 2 (BACKLOG)
-- **[P2-1]** PR-A `_validateOperatorRecoveryArtifacts` return shape — current pseudo-code mixes `if (valid)` (boolean coercion) with `valid.reason` (object access). Resolve at implementation time to either `{ ok: bool, reason: string }` (object) or pure boolean + separate side-channel for the warning text. Affects the audit log line shape.
-- **[P2-2]** PR-A test summary in §5 says "5 ACs (R1–R5)" but §8 added AC-R6 (`_skipNextWorkerDispatch` cleared after one use). Update §5 to "6 ACs (R1–R6)" for consistency before PR-A merges.
-### From Codex Critic Round 3 (BACKLOG)
-- **[P2-3]** §9 step 5 banner-aware diff command only covers `run_ralph_desk.zsh`. PR-A and PR-B both also touch `lib_ralph_desk.zsh`. Add a matching `diff <(cat src/scripts/lib_ralph_desk.zsh) <(tail -n +N ~/.claude/ralph-desk/scripts/lib_ralph_desk.zsh)` step in the implementation runbook (verify the right `tail -n +N` offset at impl time — `lib_*.zsh` is sourced and may have no shebang). Extend to `init_ralph_desk.zsh` if PR-B touches it.
-## P3 — nice-to-have polish
-### From Codex Critic Round 2
-- **[P3-1]** Option C/D/E rejection rationale in v1 §4 says "Same as v0" — acceptable because v0 is co-located, but inline one-sentence rationale would make the v1 plan self-contained for future readers who do not have the v0 file.
-### From Architect Round 1 (residual notes)
-- Validate the `bug-patterns.json` Jaccard threshold (0.7) against actual past blocks once we have ≥20 historical reports — current threshold is hand-picked. Likely needs a small calibration script in `scripts/`.
-- Consider whether `bug-reports/` should ship in the npm tarball default `.gitignore` of newly initialized projects — currently the schema doc only recommends operators add it themselves.
----
-## Promotion criteria (when to re-ralplan one of these)
-A backlog item moves back into a planner draft when **any** of these is true:
-1. PR-A/B/C lands and we measure ≥3 BLOCKEDs where the deferred item would have moved D1 by ≥10 minutes (e.g. heartbeat warning would have pre-empted a 30-min wait).
-2. Operator hand-files ≥2 bug reports about the same backlog gap (signal that the deferral was wrong).
-3. The `bug-patterns.json` seed becomes too large for human authoring (≥30 entries) — triggers the pattern-learning loop item.
-4. A user explicitly asks for one (e.g. operator-CLI `/rlp-desk recover` once they fatigue of jq pipelines).