@ai-dev-methodologies/rlp-desk 0.14.6 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,12 @@ import { shellQuote } from '../util/shell-quote.mjs';
10
10
  import { ONE_MILLION_BETA, wantsOneMillionContext } from '../constants.mjs';
11
11
  import { initCampaign } from '../init/campaign-initializer.mjs';
12
12
  import { LEGACY_DESK_REL, resolveDeskRoot } from '../util/desk-root.mjs';
13
- import { writeSentinelExclusive } from '../shared/fs.mjs';
13
+ import {
14
+ lockSentinelFile as defaultLockSentinelFile,
15
+ stampAckField as defaultStampAckField,
16
+ unlockSentinelFile,
17
+ writeSentinelExclusive,
18
+ } from '../shared/fs.mjs';
14
19
  import {
15
20
  TimeoutError,
16
21
  WorkerExitedError,
@@ -29,7 +34,10 @@ import {
29
34
  } from '../reporting/campaign-reporting.mjs';
30
35
  import {
31
36
  createPane as defaultCreatePane,
37
+ killPaneProcess as defaultKillPaneProcess,
32
38
  sendKeys as defaultSendKeys,
39
+ sendRawKey as defaultSendRawKey,
40
+ waitForProcessExit as defaultWaitForProcessExit,
33
41
  } from '../tmux/pane-manager.mjs';
34
42
 
35
43
  const execFileAsync = promisify(execFile);
@@ -128,6 +136,39 @@ function buildPaths(rootDir, slug, env = process.env) {
128
136
  };
129
137
  }
130
138
 
139
+ // Bug #8 PR-B: default git working-tree probe. Inline (~20 LoC) — no new
140
+ // module per Architect/Critic codex iter 6 consensus. Tests inject a stub via
141
+ // run() option `checkWorkingTree`.
142
+ // - returns { ok: false, error } when git rev-parse fails (not a repo, etc).
143
+ // - returns { ok: true, dirty: bool, dirtyFiles[] } otherwise.
144
+ // - dirtyFiles are raw `git status --porcelain` lines (caller truncates).
145
+ async function _defaultCheckWorkingTree(rootDir) {
146
+ try {
147
+ const { stdout: top } = await execFileAsync('git', ['-C', rootDir, 'rev-parse', '--show-toplevel']);
148
+ const trimmed = top.trim();
149
+ // macOS `/var` resolves to `/private/var`; symlinks elsewhere too. Compare
150
+ // canonical realpaths via fs.realpath so the comparison does not fire on
151
+ // symlink-equivalent paths.
152
+ const [topCanon, rootCanon] = await Promise.all([
153
+ fs.realpath(trimmed).catch(() => trimmed),
154
+ fs.realpath(rootDir).catch(() => rootDir),
155
+ ]);
156
+ if (topCanon !== rootCanon) {
157
+ // Worker is in a sub-tree, not the campaign root. Refuse to classify.
158
+ return { ok: false, error: `git toplevel ${trimmed} != ${rootDir}` };
159
+ }
160
+ } catch (err) {
161
+ return { ok: false, error: err?.message ?? String(err) };
162
+ }
163
+ try {
164
+ const { stdout } = await execFileAsync('git', ['-C', rootDir, 'status', '--porcelain']);
165
+ const lines = stdout.split('\n').filter(Boolean);
166
+ return { ok: true, dirty: lines.length > 0, dirtyFiles: lines };
167
+ } catch (err) {
168
+ return { ok: false, error: err?.message ?? String(err) };
169
+ }
170
+ }
171
+
131
172
  async function exists(targetPath) {
132
173
  try {
133
174
  await fs.access(targetPath);
@@ -347,6 +388,110 @@ async function readCurrentState(paths, slug, options) {
347
388
  };
348
389
  }
349
390
 
391
+ // PR-A (Bug #10): validate operator-written recovery artifacts. When the
392
+ // operator hand-rolls a `phase=verify` recovery (jq-patches status.json,
393
+ // writes iter-signal.json + done-claim.json by hand, deletes the blocked
394
+ // sentinel), the leader must NOT silently overwrite that work on relaunch.
395
+ // All five checks must pass for the leader to honor the recovery.
396
+ //
397
+ // Returns { ok: boolean, reason: string }. On any failure the caller falls
398
+ // through to the default behavior (worker dispatch) — defensive by design.
399
+ async function _validateOperatorRecoveryArtifacts({ paths, state }) {
400
+ // 1. iter-signal.json + done-claim.json must both exist and parse.
401
+ let signal;
402
+ let doneClaim;
403
+ try {
404
+ signal = await readJsonIfExists(paths.signalFile);
405
+ } catch (err) {
406
+ return { ok: false, reason: `iter-signal.json parse error: ${err?.message ?? err}` };
407
+ }
408
+ if (!signal) return { ok: false, reason: 'iter-signal.json missing' };
409
+
410
+ try {
411
+ doneClaim = await readJsonIfExists(paths.doneClaimFile);
412
+ } catch (err) {
413
+ return { ok: false, reason: `done-claim.json parse error: ${err?.message ?? err}` };
414
+ }
415
+ if (!doneClaim) return { ok: false, reason: 'done-claim.json missing' };
416
+
417
+ // 2. us_id must match status.current_us in BOTH artifacts.
418
+ if (signal.us_id !== state.current_us) {
419
+ return {
420
+ ok: false,
421
+ reason: `iter-signal.us_id (${signal.us_id}) != status.current_us (${state.current_us})`,
422
+ };
423
+ }
424
+ if (doneClaim.us_id !== state.current_us) {
425
+ return {
426
+ ok: false,
427
+ reason: `done-claim.us_id (${doneClaim.us_id}) != status.current_us (${state.current_us})`,
428
+ };
429
+ }
430
+
431
+ // 3. iteration must match status.iteration in BOTH artifacts.
432
+ if (signal.iteration !== state.iteration) {
433
+ return {
434
+ ok: false,
435
+ reason: `iter-signal.iteration (${signal.iteration}) != status.iteration (${state.iteration})`,
436
+ };
437
+ }
438
+ if (doneClaim.iteration !== state.iteration) {
439
+ return {
440
+ ok: false,
441
+ reason: `done-claim.iteration (${doneClaim.iteration}) != status.iteration (${state.iteration})`,
442
+ };
443
+ }
444
+
445
+ // 4. iter_signal_quality must be 'specific' (not generic / vague).
446
+ if (signal.iter_signal_quality !== 'specific') {
447
+ return {
448
+ ok: false,
449
+ reason: `iter-signal.iter_signal_quality (${signal.iter_signal_quality}) != 'specific'`,
450
+ };
451
+ }
452
+
453
+ // 5. Both artifact mtimes must be NEWER than the most recent
454
+ // iter-NNN.worker-prompt.md mtime — guards against operator running
455
+ // `phase=verify` against stale artifacts from a much earlier iteration.
456
+ const promptFile = path.join(
457
+ paths.campaignLogDir,
458
+ `iter-${String(state.iteration).padStart(3, '0')}.worker-prompt.md`,
459
+ );
460
+ let promptMtime = 0;
461
+ try {
462
+ const promptStat = await fs.stat(promptFile);
463
+ promptMtime = promptStat.mtimeMs;
464
+ } catch {
465
+ // No worker-prompt.md for this iteration → check vacuously passes
466
+ // (operator is recovering from a state that never even dispatched yet).
467
+ promptMtime = 0;
468
+ }
469
+ if (promptMtime > 0) {
470
+ let signalMtime = 0;
471
+ let doneClaimMtime = 0;
472
+ try {
473
+ signalMtime = (await fs.stat(paths.signalFile)).mtimeMs;
474
+ doneClaimMtime = (await fs.stat(paths.doneClaimFile)).mtimeMs;
475
+ } catch (err) {
476
+ return { ok: false, reason: `mtime stat failed: ${err?.message ?? err}` };
477
+ }
478
+ if (signalMtime <= promptMtime) {
479
+ return {
480
+ ok: false,
481
+ reason: `iter-signal.json mtime (${signalMtime}) is not strictly newer than worker-prompt mtime (${promptMtime})`,
482
+ };
483
+ }
484
+ if (doneClaimMtime <= promptMtime) {
485
+ return {
486
+ ok: false,
487
+ reason: `done-claim.json mtime (${doneClaimMtime}) is not strictly newer than worker-prompt mtime (${promptMtime})`,
488
+ };
489
+ }
490
+ }
491
+
492
+ return { ok: true, reason: 'all five checks passed' };
493
+ }
494
+
350
495
  async function appendIterationAnalytics(paths, state, usId, verdict, options) {
351
496
  await appendCampaignAnalytics(paths.analyticsFile, {
352
497
  iter: state.iteration,
@@ -534,6 +679,12 @@ export const BLOCK_TAGS = Object.freeze({
534
679
  MALFORMED_ARTIFACT: 'malformed_artifact',
535
680
  // Backstop (run() try/finally)
536
681
  LEADER_EXITED_WITHOUT_TERMINAL_STATE: 'leader_exited_without_terminal_state',
682
+ // Bug #8 (Plan v6 PR-B): refuse to synthesize verify signal when codex
683
+ // worker exited without committing. Three new tags route through
684
+ // _handlePollFailure with reasonOverride/categoryOverride.
685
+ CODEX_EXIT_NO_DONE_CLAIM: 'codex_exit_no_done_claim',
686
+ GIT_STATE_UNVERIFIABLE: 'git_state_unverifiable',
687
+ WORKER_INCOMPLETE_UNCOMMITTED: 'worker_incomplete_uncommitted',
537
688
  });
538
689
 
539
690
  // P1-D Failure Taxonomy classifier. governance §1f locks the reason_category
@@ -619,6 +770,32 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
619
770
  action = 'investigate_leader_logs';
620
771
  failureCategory = 'leader_exited_without_terminal_state';
621
772
  break;
773
+ // Bug #8 PR-B — codex worker exited but did not write done-claim. Refuse
774
+ // to synthesize a verify signal; surface as infra_failure so wrapper does
775
+ // not retry blindly.
776
+ case BLOCK_TAGS.CODEX_EXIT_NO_DONE_CLAIM:
777
+ category = 'infra_failure';
778
+ recoverable = false;
779
+ action = 'investigate_pane_logs';
780
+ failureCategory = 'codex_exit_no_done_claim';
781
+ break;
782
+ // Bug #8 PR-B — git status could not be resolved (not a repo, git binary
783
+ // missing, etc). Without git we cannot prove the working tree is clean,
784
+ // so refuse to synthesize.
785
+ case BLOCK_TAGS.GIT_STATE_UNVERIFIABLE:
786
+ category = 'infra_failure';
787
+ recoverable = false;
788
+ action = 'investigate_git_state';
789
+ failureCategory = 'git_state_unverifiable';
790
+ break;
791
+ // Bug #8 PR-B — worker said it was done (done-claim present) but the tree
792
+ // is dirty. Recoverable: next iteration's worker can finish committing.
793
+ case BLOCK_TAGS.WORKER_INCOMPLETE_UNCOMMITTED:
794
+ category = 'metric_failure';
795
+ recoverable = true;
796
+ action = 'retry_after_fix';
797
+ failureCategory = 'worker_incomplete_uncommitted';
798
+ break;
622
799
  default:
623
800
  category = 'metric_failure';
624
801
  recoverable = false;
@@ -650,9 +827,41 @@ async function _handlePollFailure(error, ctx) {
650
827
  options,
651
828
  role, // 'worker' | 'verifier' | 'final_verifier' | 'flywheel' | 'guard'
652
829
  usIdOverride,
830
+ // Bug #8 PR-B: when the caller has already classified the failure (e.g.
831
+ // codex done-claim/git gate), forward an explicit BLOCK_TAGS value as
832
+ // categoryOverride and a reason string. Named `categoryOverride` per
833
+ // Plan v6 PRD (it overrides the tag→reason_category mapping). Existing 5
834
+ // callers omit both and the legacy error→tag mapping below runs unchanged.
835
+ categoryOverride,
836
+ reasonOverride,
653
837
  } = ctx;
654
838
  const usId = usIdOverride ?? state.current_us;
655
839
 
840
+ if (categoryOverride) {
841
+ state.phase = 'blocked';
842
+ const classification = _classifyBlock(categoryOverride, { state, slug });
843
+ const reasonText = reasonOverride ?? `${role} blocked: ${categoryOverride}`;
844
+ await writeSentinel(paths.blockedSentinel, 'blocked', usId, reasonText, classification, paths);
845
+ await writeStatus(paths, state, options.onStatusChange, options.now);
846
+ await generateCampaignReport({
847
+ slug,
848
+ reportFile: paths.reportFile,
849
+ prdFile: paths.prdFile,
850
+ statusFile: paths.statusFile,
851
+ analyticsFile: paths.analyticsFile,
852
+ now: resolveNow(options.now),
853
+ blockedReason: reasonText,
854
+ blockedCategory: classification.reason_category,
855
+ });
856
+ return {
857
+ status: 'blocked',
858
+ usId,
859
+ reason: reasonText,
860
+ category: classification.reason_category,
861
+ statusFile: paths.statusFile,
862
+ };
863
+ }
864
+
656
865
  let tag;
657
866
  let reason;
658
867
  if (error instanceof WorkerExitedError) {
@@ -872,6 +1081,10 @@ async function runFinalSequentialVerify({
872
1081
  pollForSignal,
873
1082
  runIntegrationCheck,
874
1083
  iterTimeoutMs,
1084
+ // Bug #7 Fix-Q/R: optional reaper. Passed from _runCampaignBody so each
1085
+ // per-US verdict kills the verifier TUI before the next per-US dispatch
1086
+ // reuses the same pane. No-op when undefined (legacy/test callers).
1087
+ reapProducer,
875
1088
  }) {
876
1089
  const verifierModel = state.final_verifier_model;
877
1090
 
@@ -893,6 +1106,10 @@ async function runFinalSequentialVerify({
893
1106
  timeoutMs: iterTimeoutMs,
894
1107
  });
895
1108
 
1109
+ if (typeof reapProducer === 'function') {
1110
+ await reapProducer(verifierPaneId, paths.verdictFile);
1111
+ }
1112
+
896
1113
  if (verdict.verdict !== 'pass') {
897
1114
  return {
898
1115
  status: 'continue',
@@ -1078,6 +1295,46 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1078
1295
  const createPane = options.createPane ?? defaultCreatePane;
1079
1296
  const createSession = options.createSession ?? defaultCreateSession;
1080
1297
  const pollForSignal = options.pollForSignal ?? defaultPollForSignal;
1298
+ // Bug #7 Fix-Q/R: post-sentinel reaper. Producer (claude/codex TUI) must be
1299
+ // interrupted the moment leader has consumed the sentinel; otherwise the
1300
+ // pane lingers in idle prompt and self-reviews for ~2min. lockSentinel
1301
+ // freezes the file mtime as defense-in-depth. All four are injectable so
1302
+ // existing tests with fake sendKeys keep working (us006 createTmuxFakes).
1303
+ const sendRawKey = options.sendRawKey ?? defaultSendRawKey;
1304
+ const waitForProcessExit = options.waitForProcessExit ?? defaultWaitForProcessExit;
1305
+ const killPaneProcess = options.killPaneProcess ?? defaultKillPaneProcess;
1306
+ const lockSentinel = options.lockSentinelFile ?? defaultLockSentinelFile;
1307
+ const stampAckField = options.stampAckField ?? defaultStampAckField;
1308
+ const reapProducer = async (paneId, sentinelFile) => {
1309
+ if (!paneId) return;
1310
+ await killPaneProcess(paneId, {
1311
+ sendRawKey,
1312
+ waitForExit: waitForProcessExit,
1313
+ log: (msg) => console.error(msg),
1314
+ });
1315
+ // PR-0b-narrow AC-H1: after killPaneProcess, wait for the producing
1316
+ // process to actually exit before continuing. waitForProcessExit returns
1317
+ // when pane_current_command resolves to a shell (zsh/bash/sh). Wrapped
1318
+ // in try/catch — failure here is non-fatal but emits a log entry.
1319
+ try {
1320
+ await waitForProcessExit(paneId, { timeoutMs: 5000 });
1321
+ } catch (err) {
1322
+ console.error(`[handshake] waitForProcessExit failed on ${paneId} (${err?.message ?? err}); continuing`);
1323
+ }
1324
+ if (sentinelFile) {
1325
+ await lockSentinel(sentinelFile, { log: (msg) => console.error(msg) });
1326
+ // PR-0b-narrow AC-H2: stamp the leader_ack audit field. Best-effort,
1327
+ // does not block subsequent dispatch.
1328
+ await stampAckField(sentinelFile, {
1329
+ acked_by: 'leader',
1330
+ acked_at: new Date(resolveNow(options.now)).toISOString(),
1331
+ ack_pane_state: 'shell',
1332
+ }, { log: (msg) => console.error(msg) });
1333
+ }
1334
+ };
1335
+ // Bug #8 PR-B: working-tree probe injected (or default execFile git).
1336
+ // Returns { ok: boolean, dirty?: boolean, dirtyFiles?: string[], error?: string }.
1337
+ const checkWorkingTree = options.checkWorkingTree ?? _defaultCheckWorkingTree;
1081
1338
  const runIntegrationCheck = options.runIntegrationCheck ?? (async () => ({ exitCode: 0, summary: 'integration skipped' }));
1082
1339
  const maxIterations = options.maxIterations ?? 100;
1083
1340
  // v5.7 §4.19: campaign-level pollForSignal timeout (Node leader fix).
@@ -1135,6 +1392,28 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1135
1392
 
1136
1393
  let fixContractPath = null;
1137
1394
 
1395
+ // PR-A (Bug #10): operator-recovery hygiene. If the operator hand-rolled a
1396
+ // `phase=verify` recovery (jq-patches status.json, writes manual artifacts,
1397
+ // deletes the blocked sentinel), the leader MUST honor that work instead of
1398
+ // resetting to phase=worker on relaunch. The validator runs five checks
1399
+ // (see _validateOperatorRecoveryArtifacts); on full pass, _skipNextWorkerDispatch
1400
+ // is set as a one-shot flag consumed at the worker dispatch call site below.
1401
+ // On any failure the leader logs the reason and falls through to default
1402
+ // behavior.
1403
+ if (state.phase === 'verify' && state.iteration > 0) {
1404
+ const validation = await _validateOperatorRecoveryArtifacts({ paths, state });
1405
+ if (validation.ok) {
1406
+ console.error(
1407
+ `[recovery] Resuming verify phase — operator manual recovery detected (us=${state.current_us} iter=${state.iteration}): ${validation.reason}`,
1408
+ );
1409
+ state._skipNextWorkerDispatch = true;
1410
+ } else {
1411
+ console.error(
1412
+ `[recovery] phase=verify ignored, falling through to worker dispatch: ${validation.reason}`,
1413
+ );
1414
+ }
1415
+ }
1416
+
1138
1417
  // P1-E Lane Enforcement: snapshot lane mtimes before each iteration,
1139
1418
  // compare at the top of the next iteration. Drift on read-only artifacts
1140
1419
  // (PRD, test-spec, context) emits a lane_violation_warning event + audit
@@ -1143,6 +1422,11 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1143
1422
  let _laneSnapshot = await _snapshotLaneMtimes(paths);
1144
1423
 
1145
1424
  while (state.iteration <= maxIterations) {
1425
+ // Bug #7 Fix-R defensive unlock: a 0o444 sentinel left from the previous
1426
+ // iteration must not block the next producer's atomic-rename write.
1427
+ // Idempotent: missing-file calls are no-ops.
1428
+ await unlockSentinelFile(paths.signalFile);
1429
+ await unlockSentinelFile(paths.verdictFile);
1146
1430
  // Audit drift from the prior iteration before doing anything new.
1147
1431
  const _laneSnapshotAfter = await _snapshotLaneMtimes(paths);
1148
1432
  const _laneViolations = await _checkLaneViolations(paths, _laneSnapshot, _laneSnapshotAfter, state, options);
@@ -1191,6 +1475,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1191
1475
  pollForSignal,
1192
1476
  runIntegrationCheck,
1193
1477
  iterTimeoutMs,
1478
+ reapProducer,
1194
1479
  });
1195
1480
  } catch (error) {
1196
1481
  // v5.7 §4.25 — uniform poll-failure handling for final verifier.
@@ -1282,12 +1567,17 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1282
1567
  });
1283
1568
  }
1284
1569
 
1570
+ // Bug #7 Fix-Q/R: reap flywheel pane before consuming the signal.
1571
+ await reapProducer(state.flywheel_pane_id ?? state.verifier_pane_id, paths.flywheelSignalFile);
1572
+
1285
1573
  state.last_flywheel_decision = flywheelSignal.decision;
1286
1574
  // P0-A multi-mission orchestration: optionally captured from flywheel signal.
1287
1575
  // null when the flywheel did not suggest a next mission. Consumer wrappers
1288
1576
  // poll status.next_mission_candidate to chain missions without code edits.
1289
1577
  // See docs/multi-mission-orchestration.md.
1290
1578
  state.next_mission_candidate = flywheelSignal.next_mission_candidate ?? null;
1579
+ // Bug #7 Fix-R cleanup: unlock before unlink so 0o444 doesn't block.
1580
+ await unlockSentinelFile(paths.flywheelSignalFile);
1291
1581
  await fs.unlink(paths.flywheelSignalFile).catch(() => {});
1292
1582
 
1293
1583
  // Flywheel Guard (independent validation of flywheel decision)
@@ -1320,11 +1610,15 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1320
1610
  });
1321
1611
  }
1322
1612
 
1613
+ // Bug #7 Fix-Q/R: reap guard pane before mutating state.
1614
+ await reapProducer(guardPaneId, paths.flywheelGuardVerdictFile);
1615
+
1323
1616
  if (!state.flywheel_guard_count[state.current_us]) {
1324
1617
  state.flywheel_guard_count[state.current_us] = 0;
1325
1618
  }
1326
1619
  state.flywheel_guard_count[state.current_us] += 1;
1327
1620
 
1621
+ await unlockSentinelFile(paths.flywheelGuardVerdictFile);
1328
1622
  await fs.unlink(paths.flywheelGuardVerdictFile).catch(() => {});
1329
1623
 
1330
1624
  if (guardVerdict.verdict === 'inconclusive') {
@@ -1404,18 +1698,36 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1404
1698
  }
1405
1699
  }
1406
1700
 
1407
- state.phase = 'worker';
1408
- await writeStatus(paths, state, options.onStatusChange, options.now);
1409
- await dispatchWorker({
1410
- iteration: state.iteration,
1411
- paths,
1412
- slug,
1413
- usList,
1414
- state,
1415
- sendKeys,
1416
- workerPaneId: state.worker_pane_id,
1417
- fixContractPath,
1418
- });
1701
+ // PR-A (Bug #10): one-shot guard. When the operator's `phase=verify`
1702
+ // recovery was honored at campaign entry, skip both the phase reset and
1703
+ // the worker dispatch — the operator already wrote a valid iter-signal.json
1704
+ // and done-claim.json, so pollForSignal below will pick them up immediately
1705
+ // and the loop continues into the verifier phase. The flag is cleared
1706
+ // after consumption so subsequent iterations dispatch the worker normally.
1707
+ if (state._skipNextWorkerDispatch) {
1708
+ state._skipNextWorkerDispatch = false;
1709
+ console.error(
1710
+ `[recovery] Skipping worker dispatch for iter=${state.iteration} (honoring operator manual recovery)`,
1711
+ );
1712
+ // Persist phase=verify so a subsequent crash-and-relaunch sees the same
1713
+ // contract. writeStatus is intentionally called BEFORE pollForSignal so
1714
+ // the on-disk state matches what we are about to do.
1715
+ state.phase = 'verify';
1716
+ await writeStatus(paths, state, options.onStatusChange, options.now);
1717
+ } else {
1718
+ state.phase = 'worker';
1719
+ await writeStatus(paths, state, options.onStatusChange, options.now);
1720
+ await dispatchWorker({
1721
+ iteration: state.iteration,
1722
+ paths,
1723
+ slug,
1724
+ usList,
1725
+ state,
1726
+ sendKeys,
1727
+ workerPaneId: state.worker_pane_id,
1728
+ fixContractPath,
1729
+ });
1730
+ }
1419
1731
 
1420
1732
  let signal;
1421
1733
  try {
@@ -1432,8 +1744,43 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1432
1744
  });
1433
1745
  } catch (error) {
1434
1746
  if (error instanceof TimeoutError && parseModelFlag(state.worker_model).engine === 'codex') {
1435
- // v5.7 codex CLI exits cleanly after writing signal; if pollForSignal
1436
- // timed out for codex, synthesize a verify signal so the loop continues.
1747
+ // Bug #8 PR-B 4-way gate: refuse to synthesize verify signal when
1748
+ // codex worker exited without committing real work.
1749
+ // 1. done-claim absent → BLOCKED infra_failure
1750
+ // 2. git unverifiable → BLOCKED infra_failure
1751
+ // 3. done-claim + dirty tree → BLOCKED metric_failure
1752
+ // 4. done-claim + clean tree → synthesize verify (legacy path)
1753
+ const doneClaimExists = await exists(paths.doneClaimFile);
1754
+ if (!doneClaimExists) {
1755
+ return _handlePollFailure(error, {
1756
+ paths, state, slug, options,
1757
+ role: 'worker',
1758
+ categoryOverride: BLOCK_TAGS.CODEX_EXIT_NO_DONE_CLAIM,
1759
+ reasonOverride:
1760
+ 'codex worker exited (timeout) without writing done-claim; refusing to synthesize verify signal',
1761
+ });
1762
+ }
1763
+ const tree = await checkWorkingTree(rootDir);
1764
+ if (!tree.ok) {
1765
+ return _handlePollFailure(error, {
1766
+ paths, state, slug, options,
1767
+ role: 'worker',
1768
+ categoryOverride: BLOCK_TAGS.GIT_STATE_UNVERIFIABLE,
1769
+ reasonOverride:
1770
+ `git status unverifiable (${tree.error ?? 'unknown'}); refusing to synthesize verify signal`,
1771
+ });
1772
+ }
1773
+ if (tree.dirty) {
1774
+ const sample = (tree.dirtyFiles ?? []).slice(0, 5).join(', ');
1775
+ return _handlePollFailure(error, {
1776
+ paths, state, slug, options,
1777
+ role: 'worker',
1778
+ categoryOverride: BLOCK_TAGS.WORKER_INCOMPLETE_UNCOMMITTED,
1779
+ reasonOverride:
1780
+ `worker_incomplete_uncommitted: done-claim present but tree dirty (${sample || 'no file list'})`,
1781
+ });
1782
+ }
1783
+ // Clean tree — preserve the legacy synthesize behaviour.
1437
1784
  signal = {
1438
1785
  iteration: state.iteration,
1439
1786
  status: 'verify',
@@ -1450,6 +1797,11 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1450
1797
  }
1451
1798
  }
1452
1799
 
1800
+ // Bug #7 Fix-Q/R: reap the worker pane the instant we accept the signal so
1801
+ // claude/codex cannot self-review and rewrite iter-signal.json. Runs even
1802
+ // for the codex-fallback synthesized signal (no-op on a dead pane).
1803
+ await reapProducer(state.worker_pane_id, paths.signalFile);
1804
+
1453
1805
  // US-019 R7 P1-G: verify_partial malformed downgrade.
1454
1806
  // verify_partial requires verified_acs[] to be a non-empty array. Otherwise the verifier
1455
1807
  // has nothing to evaluate and we must treat the signal as broken contract → blocked.
@@ -1519,6 +1871,11 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
1519
1871
  });
1520
1872
  }
1521
1873
 
1874
+ // Bug #7 Fix-Q/R: reap verifier pane immediately after accepting the
1875
+ // verdict — without this the codex/claude TUI keeps running for ~2min and
1876
+ // can rewrite verify-verdict.json (mtime drift observed in 19th launch).
1877
+ await reapProducer(state.verifier_pane_id, paths.verdictFile);
1878
+
1522
1879
  if (verdict.verdict === 'pass') {
1523
1880
  state.consecutive_failures = 0;
1524
1881
  if (!state.verified_us.includes(usId)) {
@@ -59,3 +59,86 @@ export async function writeSentinelExclusive(targetPath, content) {
59
59
  }
60
60
  return { wrote: true };
61
61
  }
62
+
63
+ // Bug #7 Fix-R: best-effort chmod 0o444 to freeze a sentinel file once the
64
+ // leader has accepted it. Mirror of scripts/postinstall.js tryLockFile (L104).
65
+ // Some filesystems silently ignore chmod (WSL1/NTFS, tmpfs); we log once and
66
+ // continue. Q (process kill) is the primary defense; R is defense-in-depth.
67
+ let _sentinelLockWarningEmitted = false;
68
+ export async function lockSentinelFile(filePath, { log = (msg) => console.error(msg) } = {}) {
69
+ try {
70
+ await fs.chmod(filePath, 0o444);
71
+ } catch (err) {
72
+ if (err && err.code === 'ENOENT') {
73
+ // File missing is not an error — sentinel may have been consumed and
74
+ // unlinked by a concurrent path. Idempotent no-op.
75
+ return;
76
+ }
77
+ if (!_sentinelLockWarningEmitted) {
78
+ log(`[bug7] chmod 0444 on ${filePath} failed (${err?.code ?? 'unknown'}); post-sentinel write-protection unavailable on this FS.`);
79
+ _sentinelLockWarningEmitted = true;
80
+ }
81
+ }
82
+ }
83
+
84
+ // Pair to lockSentinelFile. Called before fs.unlink in iter-cleanup paths so
85
+ // subsequent atomic-rename writes never see EACCES on the destination mode.
86
+ // Idempotent — missing file or already-writable is fine.
87
+ export async function unlockSentinelFile(filePath) {
88
+ try {
89
+ await fs.chmod(filePath, 0o644);
90
+ } catch {
91
+ // best-effort; cleanup proceeds regardless.
92
+ }
93
+ }
94
+
95
+ // PR-0b-narrow (Plan v6) — stamp leader handshake ack onto an already-locked
96
+ // sentinel. Best-effort, audit-only: the contract is "if we can write, do; if
97
+ // not, swallow". Callers must NOT depend on the ack landing for hard ordering
98
+ // semantics (use waitForProcessExit + the chmod 0o444 lock for that). The
99
+ // resulting `content.leader_ack` is auxiliary metadata so post-mortem audits
100
+ // can prove which Leader iteration consumed which sentinel.
101
+ //
102
+ // Sequence (mirrored in src/scripts/lib_ralph_desk.zsh::_stamp_ack_field):
103
+ // 1. chmod 0o644 (so we can write — sentinel was locked by lockSentinelFile)
104
+ // 2. JSON.parse
105
+ // 3. merge ack as content.leader_ack
106
+ // 4. atomic write
107
+ // 5. chmod 0o444 (re-lock)
108
+ //
109
+ // All steps wrapped in try/catch; any failure is silently dropped. Failure
110
+ // modes that we deliberately swallow:
111
+ // - File missing (sentinel was unlinked by a concurrent path).
112
+ // - Malformed JSON (race with a partial-write window — Bug #7 already gates
113
+ // this on the read side, but stampAckField may still observe it during
114
+ // transitional iterations).
115
+ // - chmod ENOTSUP / WSL1 / NTFS (recorded in Bug #7 fixes).
116
+ export async function stampAckField(filePath, ack, { log = (msg) => console.error(msg) } = {}) {
117
+ try {
118
+ await fs.chmod(filePath, 0o644);
119
+ } catch (err) {
120
+ if (err && err.code === 'ENOENT') return; // sentinel gone — nothing to stamp
121
+ // chmod failure is non-fatal — try the write anyway in case the FS already allows it
122
+ }
123
+ let content;
124
+ try {
125
+ const raw = await fs.readFile(filePath, 'utf8');
126
+ content = JSON.parse(raw);
127
+ } catch (err) {
128
+ log(`[stamp-ack] read/parse failed for ${filePath} (${err?.code ?? err?.message ?? 'unknown'}); ack dropped (audit-only)`);
129
+ // Re-lock if possible — best-effort.
130
+ try { await fs.chmod(filePath, 0o444); } catch {}
131
+ return;
132
+ }
133
+ if (!content || typeof content !== 'object') {
134
+ try { await fs.chmod(filePath, 0o444); } catch {}
135
+ return;
136
+ }
137
+ content.leader_ack = ack;
138
+ try {
139
+ await fs.writeFile(filePath, `${JSON.stringify(content, null, 2)}\n`, 'utf8');
140
+ } catch (err) {
141
+ log(`[stamp-ack] write failed for ${filePath} (${err?.code ?? err?.message ?? 'unknown'}); ack dropped`);
142
+ }
143
+ try { await fs.chmod(filePath, 0o444); } catch {}
144
+ }
@@ -52,6 +52,12 @@ export async function sendKeys(paneId, command) {
52
52
  await runTmux(['send-keys', '-t', paneId, 'Enter'], { paneId });
53
53
  }
54
54
 
55
+ // Bug #7 Fix-Q: send a raw tmux key (e.g. C-c) without the `-l --` literal-text
56
+ // flag. Distinct from sendKeys() so callers can interrupt a running TUI.
57
+ export async function sendRawKey(paneId, key) {
58
+ await runTmux(['send-keys', '-t', paneId, key], { paneId });
59
+ }
60
+
55
61
  export async function waitForProcessExit(
56
62
  paneId,
57
63
  { pollIntervalMs = 100, timeoutMs = 5000 } = {},
@@ -75,3 +81,36 @@ export async function waitForProcessExit(
75
81
  paneId,
76
82
  });
77
83
  }
84
+
85
+ // Bug #7 Fix-Q: terminate the TUI process producing a sentinel file the moment
86
+ // the leader has accepted it. Without this, claude/codex returns to its idle
87
+ // prompt and continues self-review for 1-2 minutes, racing the next iteration.
88
+ // Mirror of zsh pattern at run_ralph_desk.zsh:2384-2397, 375-376, 529-530.
89
+ // Fail-open: pane may already be dead from prior teardown, or waitForExit may
90
+ // time out — neither aborts the iteration.
91
+ export async function killPaneProcess(
92
+ paneId,
93
+ {
94
+ sendRawKey: sendRawKeyImpl = sendRawKey,
95
+ waitForExit = waitForProcessExit,
96
+ gracePeriodMs = 800,
97
+ exitTimeoutMs = 5000,
98
+ log = () => {},
99
+ } = {},
100
+ ) {
101
+ const safeSend = async (key) => {
102
+ try {
103
+ await sendRawKeyImpl(paneId, key);
104
+ } catch (err) {
105
+ log(`[bug7] killPaneProcess sendRawKey ${key} failed for ${paneId}: ${err?.message ?? err}`);
106
+ }
107
+ };
108
+ await safeSend('C-c');
109
+ await new Promise((resolve) => setTimeout(resolve, gracePeriodMs));
110
+ await safeSend('C-c');
111
+ try {
112
+ await waitForExit(paneId, { timeoutMs: exitTimeoutMs });
113
+ } catch (err) {
114
+ log(`[bug7] killPaneProcess waitForExit failed for ${paneId}: ${err?.message ?? err}`);
115
+ }
116
+ }