@ai-dev-methodologies/rlp-desk 0.15.3 → 0.15.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/CHANGELOG.md +98 -0
  2. package/README.md +34 -4
  3. package/docs/rlp-desk/failure-modes.md +191 -0
  4. package/package.json +10 -3
  5. package/src/node/MANIFEST.txt +3 -0
  6. package/src/node/prompts/prompt-assembler.mjs +2 -2
  7. package/src/node/run.mjs +70 -3
  8. package/src/node/runner/campaign-main-loop.mjs +97 -13
  9. package/src/node/util/debug-log.mjs +10 -6
  10. package/src/node/util/lifecycle-metrics.mjs +102 -0
  11. package/src/scripts/lib_ralph_desk.zsh +66 -0
  12. package/src/scripts/run_ralph_desk.zsh +23 -3
  13. package/docs/plans/bug-report-overhaul-backlog.md +0 -49
  14. package/docs/plans/bug-report-overhaul-v0.md +0 -238
  15. package/docs/plans/bug-report-overhaul-v1.md +0 -319
  16. package/docs/plans/native-agent-revert.md +0 -184
  17. package/docs/plans/polished-gliding-toucan.md +0 -234
  18. package/docs/plans/pr-e-phase-c1-blocked-recovery-hygiene-v0.md +0 -233
  19. package/docs/plans/spicy-booping-galaxy.md +0 -717
  20. package/docs/plans/strategic-review/rlp-desk-strategic-review.md +0 -125
  21. package/docs/plans/v0.15-stabilization-phase-a-prep.md +0 -130
  22. package/docs/plans/v0.15-stabilization-plan.md +0 -178
  23. package/docs/plans/v0.16-real-llm-sv-gate-spec.md +0 -177
  24. package/docs/rlp-desk/internal/verification-policy-gap-analysis.md +0 -523
  25. package/docs/rlp-desk/internal/verification-strategy-research.md +0 -2097
  26. package/docs/rlp-desk/plans/cozy-gliding-trinket.md +0 -53
  27. package/docs/rlp-desk/plans/frolicking-churning-honey.md +0 -253
  28. package/docs/rlp-desk/plans/keen-sauteeing-snowflake.md +0 -245
  29. package/docs/rlp-desk/plans/mutable-booping-corbato.md +0 -163
  30. package/docs/rlp-desk/plans/rlp-desk-0.11-handoff-7fixes.md +0 -352
  31. package/docs/rlp-desk/plans/rlp-desk-0.11.1-tmux-pane-disappearance.md +0 -260
  32. package/docs/rlp-desk/plans/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -84
  33. package/docs/rlp-desk/plans/rlp-desk-elegant-papert.md +0 -270
  34. package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +0 -730
  35. package/docs/rlp-desk/plans/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -201
  36. package/docs/rlp-desk/plans/toasty-whistling-diffie.md +0 -117
  37. package/docs/rlp-desk/plans/validated-snacking-crayon.md +0 -204
  38. package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-output.log +0 -0
  39. package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-prompt.md +0 -38
  40. package/examples/calculator/.claude/ralph-desk/logs/loop-test/iter-001.worker-trigger.sh +0 -28
  41. package/examples/calculator/.claude/ralph-desk/logs/loop-test/session-config.json +0 -25
  42. package/examples/calculator/.claude/ralph-desk/logs/loop-test/status.json +0 -10
  43. package/examples/calculator/.claude/ralph-desk/logs/loop-test/worker-heartbeat.json +0 -1
@@ -32,6 +32,8 @@ import {
32
32
  generateSVReport,
33
33
  prepareCampaignAnalytics,
34
34
  } from '../reporting/campaign-reporting.mjs';
35
+ import { LifecycleMetricsCollector } from '../util/lifecycle-metrics.mjs';
36
+ import { makeDebugLogger } from '../util/debug-log.mjs';
35
37
  import {
36
38
  createPane as defaultCreatePane,
37
39
  killPaneProcess as defaultKillPaneProcess,
@@ -91,7 +93,7 @@ export function detectLegacyDeskInRunMode(rootDir, env = process.env) {
91
93
  return { legacyPath, newPath, message };
92
94
  }
93
95
 
94
- function buildPaths(rootDir, slug, env = process.env) {
96
+ export function buildPaths(rootDir, slug, env = process.env) {
95
97
  const deskRoot = resolveDeskRoot(rootDir, env);
96
98
  const campaignLogDir = path.join(deskRoot, 'logs', slug);
97
99
 
@@ -133,6 +135,10 @@ function buildPaths(rootDir, slug, env = process.env) {
133
135
  flywheelGuardPromptFile: path.join(deskRoot, 'prompts', `${slug}.flywheel-guard.prompt.md`),
134
136
  flywheelGuardVerdictFile: path.join(deskRoot, 'memos', `${slug}-flywheel-guard-verdict.json`),
135
137
  laneAuditFile: path.join(campaignLogDir, 'lane-audit.json'),
138
+ // v0.15.4 PR-B4: structured debug.log. log_lifecycle_metric (zsh) and
139
+ // LifecycleMetricsCollector (Node) both emit here when
140
+ // RLP_LIFECYCLE_METRICS=1.
141
+ debugLogFile: path.join(campaignLogDir, 'debug.log'),
136
142
  };
137
143
  }
138
144
 
@@ -555,7 +561,11 @@ async function _archiveRecoveredSidecar(paths) {
555
561
  }
556
562
  }
557
563
 
558
- async function appendIterationAnalytics(paths, state, usId, verdict, options) {
564
+ async function appendIterationAnalytics(paths, state, usId, verdict, options, lifecycleMetrics = null) {
565
+ // v0.15.4 PR-B4: lifecycle_metrics field — null when flag unset (collector
566
+ // returns null), object grouped by metric name when flag set. Test:
567
+ // tests/node/test-campaign-jsonl-shape.mjs.
568
+ const lifecycleSnapshot = lifecycleMetrics ? lifecycleMetrics.flush() : null;
559
569
  await appendCampaignAnalytics(paths.analyticsFile, {
560
570
  iter: state.iteration,
561
571
  us_id: usId,
@@ -564,6 +574,7 @@ async function appendIterationAnalytics(paths, state, usId, verdict, options) {
564
574
  verdict,
565
575
  duration: 0,
566
576
  timestamp: toIso(resolveNow(options.now)),
577
+ lifecycle_metrics: lifecycleSnapshot,
567
578
  });
568
579
  }
569
580
 
@@ -1170,7 +1181,7 @@ async function runFinalSequentialVerify({
1170
1181
  });
1171
1182
 
1172
1183
  if (typeof reapProducer === 'function') {
1173
- await reapProducer(verifierPaneId, paths.verdictFile);
1184
+ await reapProducer(verifierPaneId, paths.verdictFile, 'verify-verdict');
1174
1185
  }
1175
1186
 
1176
1187
  if (verdict.verdict !== 'pass') {
@@ -1368,8 +1379,20 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1368
1379
  const killPaneProcess = options.killPaneProcess ?? defaultKillPaneProcess;
1369
1380
  const lockSentinel = options.lockSentinelFile ?? defaultLockSentinelFile;
1370
1381
  const stampAckField = options.stampAckField ?? defaultStampAckField;
1371
- const reapProducer = async (paneId, sentinelFile) => {
1382
+ // v0.15.4 PR-B4: lifecycle observability collector. Tests inject
1383
+ // options.lifecycleMetrics for shape-contract verification; production
1384
+ // path constructs from process.env (RLP_LIFECYCLE_METRICS=1 enables).
1385
+ const debugLogger = makeDebugLogger(paths.debugLogFile);
1386
+ const lifecycleMetrics = options.lifecycleMetrics ?? new LifecycleMetricsCollector({
1387
+ env: options.env ?? process.env,
1388
+ debugLog: (cat, fields) => debugLogger(cat, fields),
1389
+ });
1390
+ const reapProducer = async (paneId, sentinelFile, sentinelType = null) => {
1372
1391
  if (!paneId) return;
1392
+ // v0.15.4 PR-B4: pane_eof_to_cleanup_ms = wallclock from kill-start to
1393
+ // killPaneProcess return. pane_reap_latency_ms tracks the same window
1394
+ // when the trigger was a sentinel observation (i.e. sentinelType set).
1395
+ const reapStart = Date.now();
1373
1396
  await killPaneProcess(paneId, {
1374
1397
  sendRawKey,
1375
1398
  waitForExit: waitForProcessExit,
@@ -1384,7 +1407,22 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1384
1407
  } catch (err) {
1385
1408
  console.error(`[handshake] waitForProcessExit failed on ${paneId} (${err?.message ?? err}); continuing`);
1386
1409
  }
1410
+ const reapMs = Date.now() - reapStart;
1411
+ lifecycleMetrics.record('pane_eof_to_cleanup_ms', reapMs, { pane_id: paneId });
1412
+ if (sentinelType) {
1413
+ lifecycleMetrics.record('pane_reap_latency_ms', reapMs, {
1414
+ pane_id: paneId,
1415
+ sentinel_type: sentinelType,
1416
+ });
1417
+ }
1387
1418
  if (sentinelFile) {
1419
+ // v0.15.4 audit H3 fix: markLockStart BEFORE lockSentinel so the
1420
+ // sentinel_lock_to_unlock_ms metric covers the full lock duration
1421
+ // including chmod 0o444 execution time. Previous code recorded
1422
+ // post-chmod timestamp — sub-ms skew but semantically inverted.
1423
+ // v0.15.4 PR-B4: open lock-to-unlock pair tracking. markUnlock fires
1424
+ // at unlockSentinelFile call sites or end-of-iter for never-unlocked.
1425
+ lifecycleMetrics.markLockStart(path.basename(sentinelFile));
1388
1426
  await lockSentinel(sentinelFile, { log: (msg) => console.error(msg) });
1389
1427
  // PR-0b-narrow AC-H2: stamp the leader_ack audit field. Best-effort,
1390
1428
  // does not block subsequent dispatch.
@@ -1424,7 +1462,18 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1424
1462
  const usList = await readUsList(paths, slug);
1425
1463
 
1426
1464
  if (usList.length === 0) {
1427
- throw new Error(`No user stories found for ${slug}`);
1465
+ // D-5 (dogfood): both leaders parse only H2 `## US-NNN:`. A common mistake is
1466
+ // authoring `### US-NNN` (H3+), which yields zero stories. Surface an actionable
1467
+ // hint instead of a bare "not found" (the zsh leader silently degrades here;
1468
+ // Node fail-closes — the safer behavior, now recoverable via `clean`).
1469
+ let hint = '';
1470
+ try {
1471
+ const prdRaw = await fs.readFile(paths.prdFile, 'utf8');
1472
+ if (/^#{3,}\s+US-\d{3}\b/m.test(prdRaw)) {
1473
+ hint = ' — found US-NNN heading(s) at level ### or deeper; US headings must be H2 ("## US-NNN:")';
1474
+ }
1475
+ } catch { /* best-effort hint */ }
1476
+ throw new Error(`No user stories found for ${slug}${hint}`);
1428
1477
  }
1429
1478
 
1430
1479
  if (!state.current_us) {
@@ -1516,13 +1565,15 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1516
1565
  // iteration must not block the next producer's atomic-rename write.
1517
1566
  // Idempotent: missing-file calls are no-ops.
1518
1567
  await unlockSentinelFile(paths.signalFile);
1568
+ lifecycleMetrics.markUnlock(path.basename(paths.signalFile), { iter: state.iteration });
1519
1569
  await unlockSentinelFile(paths.verdictFile);
1570
+ lifecycleMetrics.markUnlock(path.basename(paths.verdictFile), { iter: state.iteration });
1520
1571
  // Audit drift from the prior iteration before doing anything new.
1521
1572
  const _laneSnapshotAfter = await _snapshotLaneMtimes(paths);
1522
1573
  const _laneViolations = await _checkLaneViolations(paths, _laneSnapshot, _laneSnapshotAfter, state, options);
1523
1574
  if (_laneViolations) {
1524
1575
  for (const v of _laneViolations) {
1525
- await appendIterationAnalytics(paths, state, state.current_us ?? 'ALL', 'lane_violation_warning', { ...options, lane_violation: v });
1576
+ await appendIterationAnalytics(paths, state, state.current_us ?? 'ALL', 'lane_violation_warning', { ...options, lane_violation: v }, lifecycleMetrics);
1526
1577
  }
1527
1578
  if (options.laneStrict) {
1528
1579
  // Strict mode: escalate to BLOCKED with downgrade
@@ -1658,7 +1709,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1658
1709
  }
1659
1710
 
1660
1711
  // Bug #7 Fix-Q/R: reap flywheel pane before consuming the signal.
1661
- await reapProducer(state.flywheel_pane_id ?? state.verifier_pane_id, paths.flywheelSignalFile);
1712
+ await reapProducer(state.flywheel_pane_id ?? state.verifier_pane_id, paths.flywheelSignalFile, 'flywheel-signal');
1662
1713
 
1663
1714
  state.last_flywheel_decision = flywheelSignal.decision;
1664
1715
  // P0-A multi-mission orchestration: optionally captured from flywheel signal.
@@ -1701,7 +1752,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1701
1752
  }
1702
1753
 
1703
1754
  // Bug #7 Fix-Q/R: reap guard pane before mutating state.
1704
- await reapProducer(guardPaneId, paths.flywheelGuardVerdictFile);
1755
+ await reapProducer(guardPaneId, paths.flywheelGuardVerdictFile, 'flywheel-guard-verdict');
1705
1756
 
1706
1757
  if (!state.flywheel_guard_count[state.current_us]) {
1707
1758
  state.flywheel_guard_count[state.current_us] = 0;
@@ -1887,10 +1938,35 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1887
1938
  }
1888
1939
  }
1889
1940
 
1941
+ // v0.15.4 PR-B4: iter_signal_write_to_read_ms = wallclock from worker FS
1942
+ // write to leader poll resolve. Sentinel mtime is the producer-side anchor;
1943
+ // Date.now() is the leader-side anchor. Best-effort stat — if the file
1944
+ // already lacks read perms (race vs prior lock), fall back to skip.
1945
+ try {
1946
+ const sigStat = fsSync.statSync(paths.signalFile);
1947
+ lifecycleMetrics.record('iter_signal_write_to_read_ms', Date.now() - sigStat.mtimeMs, {
1948
+ iter: state.iteration,
1949
+ us_id: state.current_us,
1950
+ });
1951
+ } catch { /* fail-open: skip on stat error */ }
1890
1952
  // Bug #7 Fix-Q/R: reap the worker pane the instant we accept the signal so
1891
1953
  // claude/codex cannot self-review and rewrite iter-signal.json. Runs even
1892
1954
  // for the codex-fallback synthesized signal (no-op on a dead pane).
1893
- await reapProducer(state.worker_pane_id, paths.signalFile);
1955
+ await reapProducer(state.worker_pane_id, paths.signalFile, 'iter-signal');
1956
+ // v0.15.4 PR-B2-FIX: same worker pass produced done-claim. The pane is
1957
+ // already reaped above; lock done-claim so the iter-NNN-done-claim archive
1958
+ // and any post-iter Bug #8 gate read a snapshot the worker can no longer
1959
+ // revise. Symmetric with the zsh lock-on-iter-signal contract at
1960
+ // run_ralph_desk.zsh:3197. Best-effort: missing-file is fail-open.
1961
+ //
1962
+ // v0.15.4 audit H2 fix: NO markLockStart for done-claim. In production
1963
+ // happy path done-claim is locked-but-never-unlocked (only signalFile +
1964
+ // verdictFile receive iter-start unlockSentinelFile at L1552-1555), so
1965
+ // markUnlock would never fire and the metric would silently never emit.
1966
+ // done-claim is intentionally excluded from sentinel_lock_to_unlock_ms;
1967
+ // the lib_ralph_desk.zsh:602 archival step is the practical lock-end
1968
+ // event but is not currently instrumented (deferred — not B4 scope).
1969
+ await lockSentinel(paths.doneClaimFile, { log: (msg) => console.error(msg) });
1894
1970
 
1895
1971
  // US-019 R7 P1-G: verify_partial malformed downgrade.
1896
1972
  // verify_partial requires verified_acs[] to be a non-empty array. Otherwise the verifier
@@ -1961,10 +2037,18 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1961
2037
  });
1962
2038
  }
1963
2039
 
2040
+ // v0.15.4 PR-B4: verdict_write_to_read_ms parallel to iter_signal metric.
2041
+ try {
2042
+ const verdStat = fsSync.statSync(paths.verdictFile);
2043
+ lifecycleMetrics.record('verdict_write_to_read_ms', Date.now() - verdStat.mtimeMs, {
2044
+ iter: state.iteration,
2045
+ us_id: state.current_us,
2046
+ });
2047
+ } catch { /* fail-open */ }
1964
2048
  // Bug #7 Fix-Q/R: reap verifier pane immediately after accepting the
1965
2049
  // verdict — without this the codex/claude TUI keeps running for ~2min and
1966
2050
  // can rewrite verify-verdict.json (mtime drift observed in 19th launch).
1967
- await reapProducer(state.verifier_pane_id, paths.verdictFile);
2051
+ await reapProducer(state.verifier_pane_id, paths.verdictFile, 'verify-verdict');
1968
2052
 
1969
2053
  if (verdict.verdict === 'pass') {
1970
2054
  state.consecutive_failures = 0;
@@ -1973,7 +2057,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1973
2057
  }
1974
2058
  state.current_us = getNextUs(usList, state.verified_us, null);
1975
2059
  fixContractPath = null;
1976
- await appendIterationAnalytics(paths, state, usId, 'pass', options);
2060
+ await appendIterationAnalytics(paths, state, usId, 'pass', options, lifecycleMetrics);
1977
2061
  await writeStatus(paths, state, options.onStatusChange, options.now);
1978
2062
 
1979
2063
  if (state.verified_us.length === usList.length) {
@@ -1989,7 +2073,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1989
2073
  const blockedReason = verdict.reason || verdict.summary || 'verifier-blocked';
1990
2074
  const blockedClassification = _classifyBlock('verifier', { verdict, state, slug });
1991
2075
  await writeSentinel(paths.blockedSentinel, 'blocked', usId, blockedReason, blockedClassification, paths);
1992
- await appendIterationAnalytics(paths, state, usId, 'blocked', options);
2076
+ await appendIterationAnalytics(paths, state, usId, 'blocked', options, lifecycleMetrics);
1993
2077
  await writeStatus(paths, state, options.onStatusChange, options.now);
1994
2078
  let svSummary;
1995
2079
  if (options.withSelfVerification) {
@@ -2028,7 +2112,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
2028
2112
  }
2029
2113
 
2030
2114
  state.consecutive_failures += 1;
2031
- await appendIterationAnalytics(paths, state, usId, 'fail', options);
2115
+ await appendIterationAnalytics(paths, state, usId, 'fail', options, lifecycleMetrics);
2032
2116
  const upgradedModel = nextWorkerModel(options.workerModel ?? state.worker_model, state.consecutive_failures);
2033
2117
  if (upgradedModel === 'BLOCKED') {
2034
2118
  state.phase = 'blocked';
@@ -6,15 +6,19 @@
6
6
  // SHOULD use debugLog() instead of console/manual writes.
7
7
  //
8
8
  // Categories (governance §1f traceability):
9
- // - GOV : governance enforcement (IL, CB triggers, scope locks, verdicts)
10
- // - DECIDE: leader decisions (model selection, fix contracts, escalation)
11
- // - OPTION: configuration snapshot at loop start
12
- // - FLOW : execution progress (worker/verifier dispatch, signal reads, transitions)
9
+ // - GOV : governance enforcement (IL, CB triggers, scope locks, verdicts)
10
+ // - DECIDE : leader decisions (model selection, fix contracts, escalation)
11
+ // - OPTION : configuration snapshot at loop start
12
+ // - FLOW : execution progress (worker/verifier dispatch, signal reads, transitions)
13
+ // - LIFECYCLE : v0.15.4 PR-B4 — tmux/process lifecycle metrics gated on
14
+ // RLP_LIFECYCLE_METRICS=1. Emission rules: see plan v3 §B4
15
+ // Table (5 metrics). Helper is no-op when flag unset (verified
16
+ // by tests/node/test-campaign-jsonl-shape.mjs).
13
17
 
14
18
  import fs from 'node:fs/promises';
15
19
  import path from 'node:path';
16
20
 
17
- const VALID_CATEGORIES = new Set(['GOV', 'DECIDE', 'OPTION', 'FLOW']);
21
+ const VALID_CATEGORIES = new Set(['GOV', 'DECIDE', 'OPTION', 'FLOW', 'LIFECYCLE']);
18
22
 
19
23
  /**
20
24
  * Append a structured log line to debug.log. Format mirrors zsh log_debug:
@@ -22,7 +26,7 @@ const VALID_CATEGORIES = new Set(['GOV', 'DECIDE', 'OPTION', 'FLOW']);
22
26
  *
23
27
  * @param {Object} args
24
28
  * @param {string} args.debugLogPath — absolute path to debug.log
25
- * @param {'GOV'|'DECIDE'|'OPTION'|'FLOW'} args.category
29
+ * @param {'GOV'|'DECIDE'|'OPTION'|'FLOW'|'LIFECYCLE'} args.category
26
30
  * @param {Object<string,string|number|boolean>} args.fields — flat key/value
27
31
  * pairs, serialized as `key=value`. Avoid nested objects; pre-stringify.
28
32
  * @returns {Promise<void>} — resolves even on filesystem errors (best-effort).
@@ -0,0 +1,102 @@
1
+ // v0.15.4 PR-B4 — Lifecycle observability helper.
2
+ //
3
+ // Plan: docs/plans/v0.15-phase-b-plan-v3.md §B4.
4
+ // Audit: docs/plans/v0.15-phase-b-lifecycle-audit.md §3 Table 2.
5
+ //
6
+ // Five metrics tracked, all gated on RLP_LIFECYCLE_METRICS=1 env flag:
7
+ // - iter_signal_write_to_read_ms leader-poll-resolves vs worker-FS-write
8
+ // - verdict_write_to_read_ms leader-poll-resolves vs verifier-FS-write
9
+ // - pane_eof_to_cleanup_ms pane process exit vs killPaneProcess return
10
+ // - pane_reap_latency_ms done-claim observed vs C-c×2 + waitForExit
11
+ // - sentinel_lock_to_unlock_ms per type, _lock vs _unlock (object)
12
+ //
13
+ // Emission discipline:
14
+ // - debug.log: tagged [LIFECYCLE] per record (when flag set)
15
+ // - campaign.jsonl: ONE batched lifecycle_metrics object per iteration
16
+ // (the collector accumulates, the iter-end flush emits)
17
+ // When flag is unset:
18
+ // - record() is a no-op (early return) — zero overhead beyond a Map check
19
+ // - flush() returns null so analytics writer can branch on the field
20
+
21
+ const ENV_FLAG_NAME = 'RLP_LIFECYCLE_METRICS';
22
+
23
+ export function lifecycleMetricsEnabled(env = process.env) {
24
+ return env[ENV_FLAG_NAME] === '1';
25
+ }
26
+
27
+ export class LifecycleMetricsCollector {
28
+ constructor({ env = process.env, debugLog = null } = {}) {
29
+ this._enabled = lifecycleMetricsEnabled(env);
30
+ this._debugLog = debugLog;
31
+ this._records = [];
32
+ this._sentinelLockTimes = new Map();
33
+ }
34
+
35
+ get enabled() {
36
+ return this._enabled;
37
+ }
38
+
39
+ // Record a single timing metric. value is in milliseconds. ctx is a flat
40
+ // object of audit fields (iter, us_id, pane_id, sentinel_type, etc).
41
+ record(name, valueMs, ctx = {}) {
42
+ if (!this._enabled) return;
43
+ const entry = {
44
+ metric: name,
45
+ value_ms: Math.max(0, Math.round(valueMs)),
46
+ ts: new Date().toISOString(),
47
+ ...ctx,
48
+ };
49
+ this._records.push(entry);
50
+ if (this._debugLog) {
51
+ // Best-effort fire-and-forget. The debug-log helper is itself best-
52
+ // effort (appendFile error swallowed), so we don't await it.
53
+ this._debugLog('LIFECYCLE', { metric: name, value_ms: entry.value_ms, ...ctx });
54
+ }
55
+ }
56
+
57
+ // Convenience: pair-bookkeeping for sentinel_lock_to_unlock_ms (object-
58
+ // valued metric keyed by sentinel type). Call markLockStart at chmod 0o444
59
+ // time, markUnlock at chmod 0o644 time (or end-of-iter for never-unlocked).
60
+ //
61
+ // v0.15.4 audit H2: done-claim is intentionally NOT instrumented with this
62
+ // pair. In production happy path done-claim is locked-but-never-unlocked
63
+ // (campaign-main-loop unlocks only signalFile + verdictFile at iter start);
64
+ // markUnlock for done-claim never fires, so the metric would silently never
65
+ // emit. Future work: emit at lib_ralph_desk.zsh:602 archival site if needed.
66
+ //
67
+ // v0.15.4 audit H3: callers must invoke markLockStart BEFORE the chmod
68
+ // operation, not after, so the metric covers full lock duration including
69
+ // chmod execution time. Sub-ms skew, but semantically correct.
70
+ markLockStart(sentinelType, t = Date.now()) {
71
+ if (!this._enabled) return;
72
+ this._sentinelLockTimes.set(sentinelType, t);
73
+ }
74
+
75
+ markUnlock(sentinelType, ctx = {}, t = Date.now()) {
76
+ if (!this._enabled) return;
77
+ const start = this._sentinelLockTimes.get(sentinelType);
78
+ if (start === undefined) return;
79
+ this.record('sentinel_lock_to_unlock_ms', t - start, {
80
+ ...ctx,
81
+ sentinel_type: sentinelType,
82
+ });
83
+ this._sentinelLockTimes.delete(sentinelType);
84
+ }
85
+
86
+ // Snapshot + reset for end-of-iteration flush. Returns null when disabled
87
+ // so the analytics writer can omit the field cleanly.
88
+ flush() {
89
+ if (!this._enabled) return null;
90
+ const records = this._records;
91
+ this._records = [];
92
+ // Group by metric name for compact campaign.jsonl shape:
93
+ // { iter_signal_write_to_read_ms: [{value_ms,ts,...}, ...], ... }
94
+ const grouped = {};
95
+ for (const r of records) {
96
+ const { metric, ...rest } = r;
97
+ if (!grouped[metric]) grouped[metric] = [];
98
+ grouped[metric].push(rest);
99
+ }
100
+ return grouped;
101
+ }
102
+ }
@@ -261,6 +261,19 @@ _kill_pane_process() {
261
261
  if typeset -f log_debug >/dev/null 2>&1; then
262
262
  log_debug "[bug7] kill_pane_process pane=$pane_id role=$role"
263
263
  fi
264
+ # v0.15.4 PR-B4: pane_eof_to_cleanup_ms instrumentation (flag-gated).
265
+ # Records the wallclock from kill-start to wait_for_pane_ready return so
266
+ # B3 can value-assert the substrate fix actually closes the race window.
267
+ # Uses zsh native $EPOCHREALTIME (microsec) — portable to macOS BSD where
268
+ # `date +%N` is not supported.
269
+ local _b4_t0_ms=0
270
+ if [[ "${RLP_LIFECYCLE_METRICS:-0}" == "1" ]]; then
271
+ zmodload -e zsh/datetime || zmodload zsh/datetime 2>/dev/null
272
+ if [[ -n "${EPOCHREALTIME:-}" ]]; then
273
+ local _b4_t0_str="${EPOCHREALTIME//./}"
274
+ _b4_t0_ms=${_b4_t0_str:0:13}
275
+ fi
276
+ fi
264
277
  tmux send-keys -t "$pane_id" C-c 2>/dev/null
265
278
  sleep 0.5
266
279
  tmux send-keys -t "$pane_id" C-c 2>/dev/null
@@ -268,6 +281,12 @@ _kill_pane_process() {
268
281
  if typeset -f wait_for_pane_ready >/dev/null 2>&1; then
269
282
  wait_for_pane_ready "$pane_id" 5 2>/dev/null || true
270
283
  fi
284
+ if (( _b4_t0_ms > 0 )); then
285
+ local _b4_t1_str="${EPOCHREALTIME//./}"
286
+ local _b4_t1_ms=${_b4_t1_str:0:13}
287
+ log_lifecycle_metric "pane_eof_to_cleanup_ms" $((_b4_t1_ms - _b4_t0_ms)) \
288
+ "pane=$pane_id role=$role"
289
+ fi
271
290
  return 0
272
291
  }
273
292
 
@@ -285,6 +304,53 @@ _unlock_sentinel() {
285
304
  return 0
286
305
  }
287
306
 
307
+ # =============================================================================
308
+ # v0.15.4 PR-B4: Lifecycle observability — log_lifecycle_metric
309
+ # =============================================================================
310
+ # Plan: docs/plans/v0.15-phase-b-plan-v3.md §B4 (P2.1 critic-round-2 fix).
311
+ # Helper is GATED on $RLP_LIFECYCLE_METRICS=1 (no-op when unset). Emits to
312
+ # debug.log via log_debug, in a backgrounded subshell so the caller does not
313
+ # block on the FS write. The Node-side mirror is src/node/util/lifecycle-
314
+ # metrics.mjs LifecycleMetricsCollector.
315
+ #
316
+ # v0.15.4 audit M2: concurrent-appender semantics — `( ... ) &!` spawns a
317
+ # disowned subshell per metric. Multiple metrics can fire in rapid succession
318
+ # (e.g., during iter teardown) and race on debug.log. POSIX guarantees atomic
319
+ # append for writes <= PIPE_BUF (4096 bytes). A single LIFECYCLE line is
320
+ # ~150 bytes, well under the limit, so on local filesystems (APFS, ext4, xfs)
321
+ # concurrent appends produce intact non-interleaved lines. On NFS / FUSE /
322
+ # some Docker overlay setups PIPE_BUF guarantees may not hold; in those
323
+ # environments, expect possible interleaving. This is best-effort logging
324
+ # by design — the metric values land in campaign.jsonl via the Node leader's
325
+ # batched flush as the canonical authoritative record. debug.log is an
326
+ # audit aid, not the source of truth.
327
+ #
328
+ # Args:
329
+ # $1 metric_name e.g. iter_signal_write_to_read_ms
330
+ # $2 value_ms integer milliseconds (will be coerced via printf %d)
331
+ # $3 context (optional, free-form key=val pairs joined with spaces)
332
+ #
333
+ # Side effects:
334
+ # - When flag unset: returns 0 immediately (no fork, no FS call).
335
+ # - When flag set: forks `( log_debug "..." ) &!` to debug.log.
336
+ #
337
+ # Examples:
338
+ # log_lifecycle_metric "iter_signal_write_to_read_ms" "$delta" \
339
+ # "iter=$ITERATION us=$us_id pane=$WORKER_PANE"
340
+ # log_lifecycle_metric "pane_reap_latency_ms" "$delta" \
341
+ # "iter=$ITERATION sentinel=done-claim"
342
+ log_lifecycle_metric() {
343
+ [[ "${RLP_LIFECYCLE_METRICS:-0}" == "1" ]] || return 0
344
+ local metric="$1"
345
+ local value_ms="$2"
346
+ local ctx="${3:-}"
347
+ [[ -n "$metric" && -n "$value_ms" ]] || return 0
348
+ if typeset -f log_debug >/dev/null 2>&1; then
349
+ ( log_debug "[LIFECYCLE] metric=$metric value_ms=$value_ms $ctx" ) &!
350
+ fi
351
+ return 0
352
+ }
353
+
288
354
  # PR-A (Bug #10) — validate operator-written manual recovery artifacts.
289
355
  # Returns 0 when all 5 checks pass; 1 otherwise. Sets RECOVERY_FAIL_REASON
290
356
  # (global) on failure for caller logging. Mirrors the Node-side helper
@@ -710,6 +710,10 @@ handle_worker_exit_codex() {
710
710
  dc_us_id=$(jq -r '.us_id // "unknown"' "$DONE_CLAIM_FILE" 2>/dev/null)
711
711
  log " Codex worker completed with done-claim (us_id=$dc_us_id) and clean tree. Auto-generating signal."
712
712
  echo '{"iteration":'"$iter"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated after codex exit (clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
713
+ # v0.15.4 PR-B2-FIX: codex worker pane already exited — reaper would no-op,
714
+ # but lock done-claim as defense-in-depth so any orphaned subprocess cannot
715
+ # rewrite the file before lib_ralph_desk.zsh:602 archives it.
716
+ _lock_sentinel "$DONE_CLAIM_FILE"
713
717
  _emit_a4_fallback_audit "$dc_us_id" "$iter" "codex_exit_with_done_claim_clean"
714
718
  return 0
715
719
  }
@@ -925,7 +929,9 @@ create_session() {
925
929
  BASELINE_COMMIT=$(git -C "$ROOT" rev-parse HEAD 2>/dev/null || echo "none")
926
930
 
927
931
  # Truncate cost-log for fresh run (previous data in versioned campaign reports)
928
- > "$COST_LOG"
932
+ # NOTE: ': >' not bare '>' — in zsh a bare redirect with no command runs $NULLCMD
933
+ # (=cat), which blocks reading stdin when the leader has an open TTY (D-1 dogfood hang).
934
+ : > "$COST_LOG"
929
935
 
930
936
  # v5.7 §4.2: WITH_SELF_VERIFICATION=1 is hard-rejected at script entry now,
931
937
  # so by the time we reach create_session() the flag is guaranteed to be 0.
@@ -1849,8 +1855,8 @@ write_worker_trigger() {
1849
1855
  else
1850
1856
  echo "- **Test Spec**: Read \`$DESK/plans/test-spec-${SLUG}.md\` (full — find ${next_us} section)"
1851
1857
  fi
1852
- echo "When done, signal verify with us_id=\"${next_us}\" (not \"ALL\")."
1853
- echo "Signal format: {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", ...}"
1858
+ echo "When done, you MUST WRITE (not just print) the verify signal to the iter-signal FILE at: ${SIGNAL_FILE}"
1859
+ echo "Write this exact JSON to that file (us_id=\"${next_us}\", not \"ALL\"): {\"iteration\": N, \"status\": \"verify\", \"us_id\": \"${next_us}\", \"summary\": \"what was done\", \"timestamp\": \"ISO\"}"
1854
1860
  echo ""
1855
1861
  echo "**Update the campaign memory's 'Next Iteration Contract' to reflect ${next_us}.**"
1856
1862
  elif [[ -n "$VERIFIED_US" ]]; then
@@ -2292,6 +2298,15 @@ poll_for_signal() {
2292
2298
  if _bug8_check_synth_allowed "$ITERATION" "$dc_us_id" "inline_polling_a4_clean"; then
2293
2299
  log " WARNING: done-claim exists for $dc_us_id but no iter-signal. Tree clean — auto-generating signal (A4 fallback)."
2294
2300
  log_debug "[GOV] iter=$ITERATION done_claim_without_signal=true us_id=$dc_us_id action=auto_generate_signal"
2301
+ # v0.15.4 PR-B2-FIX: Worker pane is alive and idling post-done-claim
2302
+ # (the canonical Bug #5/7 race window). Reap before synthesizing the
2303
+ # signal so the worker cannot revise done-claim or emit a late
2304
+ # iter-signal that races the leader's synthesized one. Mirror of
2305
+ # Bug #7 Fix-Q parity at run_ralph_desk.zsh:3181 — kill before lock,
2306
+ # lock before synth-write so the next leader read sees a frozen
2307
+ # done-claim and a fresh signal_file in that order.
2308
+ _kill_pane_process "$pane_id" "worker-a4"
2309
+ _lock_sentinel "$DONE_CLAIM_FILE"
2295
2310
  echo '{"iteration":'"$ITERATION"',"status":"verify","us_id":"'"$dc_us_id"'","summary":"auto-generated by A4 fallback (done-claim + clean tree)","timestamp":"'"$(date -u +%Y-%m-%dT%H:%M:%SZ)"'"}' > "$signal_file"
2296
2311
  _emit_a4_fallback_audit "$dc_us_id" "$ITERATION" "inline_polling_a4_clean"
2297
2312
  return 0
@@ -3180,6 +3195,11 @@ main() {
3180
3195
  # self-review and rewrite iter-signal.json (1m43s drift observed).
3181
3196
  _kill_pane_process "$WORKER_PANE" "worker"
3182
3197
  _lock_sentinel "$SIGNAL_FILE"
3198
+ # v0.15.4 PR-B2-FIX: same worker pass also produced done-claim. Freeze
3199
+ # it alongside iter-signal so Bug #8 gates and the iter-NNN-done-claim
3200
+ # archive (lib_ralph_desk.zsh:602) read a snapshot the worker can no
3201
+ # longer revise. Symmetric with iter-signal/verdict lock contract.
3202
+ _lock_sentinel "$DONE_CLAIM_FILE"
3183
3203
  # PR-0b-narrow: stamp leader handshake ack on the iter-signal (audit-only).
3184
3204
  _stamp_ack_field "$SIGNAL_FILE"
3185
3205
  else
@@ -1,49 +0,0 @@
1
- # Bug Report Overhaul — P2/P3 Backlog
2
-
3
- > Companion to `bug-report-overhaul-v1.md` (PR-A/B/C plan).
4
- > User stop-rule: ralplan iterates only until P0+P1 = 0; P2 and below are captured here, NOT blockers.
5
- > Re-prioritize from this file in a future ralplan when the operator-minutes-saved metric from PR-A/B/C lands.
6
-
7
- ---
8
-
9
- ## P2 — should fix in a follow-up PR after PR-A/B/C land
10
-
11
- ### From v0 plan (Option C/D, deferred features)
12
-
13
- - **Heartbeat-warning sidecar (Option B from v0)** — emit `<slug>-warning.{md,json}` when heartbeat anomaly crosses 50% of `iter-timeout`. Lets operator pre-empt a BLOCKED before the 30-min wall hits. Decoupled from this PR set because (a) report-quality is the dominant pain (D1), and (b) warning sidecar adds a second sentinel surface that risks false-positive fatigue. Revisit after PR-A/B land and we measure how many BLOCKEDs would have been pre-empted.
14
- - **GitHub Issues integration (Option D from v0)** — POST blocked context to a configured GitHub repo issue. Requires per-repo authn story (token storage, network retry, rate-limits) — violates principle 3 in the current PR set. Re-evaluate after a credible authn proposal exists.
15
- - **Pattern-learning loop** — mine `~/.claude/ralph-desk/analytics/*/bug-reports/` for emerging clusters. Auto-extends `docs/bug-patterns.json` with new candidate signatures for human review.
16
- - **Cross-campaign bug-report dashboard in `/rlp-desk analytics`** — surface patterns across projects.
17
- - **Auto-suggest "this looks like Bug #N — try fix-X" inline in CLI output** — operationalize PR-C's `pattern_match` data with an inline suggestion. Held back so the deterministic Jaccard implementation can be calibrated against real campaign data first.
18
- - **Operator-CLI `/rlp-desk recover <slug> --to verify`** — write the manual recovery artifacts (`iter-signal.json`, `done-claim.json`, `status.json` patch) deterministically. Currently a hand-rolled `jq` pipeline per Bug #10 §7 workaround.
19
-
20
- ### From Codex Critic Round 2 (BACKLOG)
21
-
22
- - **[P2-1]** PR-A `_validateOperatorRecoveryArtifacts` return shape — current pseudo-code mixes `if (valid)` (boolean coercion) with `valid.reason` (object access). Resolve at implementation time to either `{ ok: bool, reason: string }` (object) or pure boolean + separate side-channel for the warning text. Affects the audit log line shape.
23
- - **[P2-2]** PR-A test summary in §5 says "5 ACs (R1–R5)" but §8 added AC-R6 (`_skipNextWorkerDispatch` cleared after one use). Update §5 to "6 ACs (R1–R6)" for consistency before PR-A merges.
24
-
25
- ### From Codex Critic Round 3 (BACKLOG)
26
-
27
- - **[P2-3]** §9 step 5 banner-aware diff command only covers `run_ralph_desk.zsh`. PR-A and PR-B both also touch `lib_ralph_desk.zsh`. Add a matching `diff <(cat src/scripts/lib_ralph_desk.zsh) <(tail -n +N ~/.claude/ralph-desk/scripts/lib_ralph_desk.zsh)` step in the implementation runbook (verify the right `tail -n +N` offset at impl time — `lib_*.zsh` is sourced and may have no shebang). Extend to `init_ralph_desk.zsh` if PR-B touches it.
28
-
29
- ## P3 — nice-to-have polish
30
-
31
- ### From Codex Critic Round 2
32
-
33
- - **[P3-1]** Option C/D/E rejection rationale in v1 §4 says "Same as v0" — acceptable because v0 is co-located, but inline one-sentence rationale would make the v1 plan self-contained for future readers who do not have the v0 file.
34
-
35
- ### From Architect Round 1 (residual notes)
36
-
37
- - Validate the `bug-patterns.json` Jaccard threshold (0.7) against actual past blocks once we have ≥20 historical reports — current threshold is hand-picked. Likely needs a small calibration script in `scripts/`.
38
- - Consider whether `bug-reports/` should ship in the npm tarball default `.gitignore` of newly initialized projects — currently the schema doc only recommends operators add it themselves.
39
-
40
- ---
41
-
42
- ## Promotion criteria (when to re-ralplan one of these)
43
-
44
- A backlog item moves back into a planner draft when **any** of these is true:
45
-
46
- 1. PR-A/B/C lands and we measure ≥3 BLOCKEDs where the deferred item would have moved D1 by ≥10 minutes (e.g. heartbeat warning would have pre-empted a 30-min wait).
47
- 2. Operator hand-files ≥2 bug reports about the same backlog gap (signal that the deferral was wrong).
48
- 3. The `bug-patterns.json` seed becomes too large for human authoring (≥30 entries) — triggers the pattern-learning loop item.
49
- 4. A user explicitly asks for one (e.g. operator-CLI `/rlp-desk recover` once they fatigue of jq pipelines).