@ai-dev-methodologies/rlp-desk 0.14.6 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/plans/bug-report-overhaul-backlog.md +49 -0
- package/docs/plans/bug-report-overhaul-v0.md +238 -0
- package/docs/plans/bug-report-overhaul-v1.md +319 -0
- package/docs/plans/native-agent-revert.md +184 -0
- package/docs/plans/polished-gliding-toucan.md +234 -0
- package/docs/plans/strategic-review/rlp-desk-strategic-review.md +125 -0
- package/docs/rlp-desk/signal-protocol.md +93 -0
- package/install.sh +2 -0
- package/package.json +1 -1
- package/scripts/postinstall.js +2 -0
- package/src/commands/rlp-desk.md +56 -46
- package/src/node/run.mjs +45 -7
- package/src/node/runner/campaign-main-loop.mjs +372 -15
- package/src/node/shared/fs.mjs +83 -0
- package/src/node/tmux/pane-manager.mjs +39 -0
- package/src/scripts/lib_ralph_desk.zsh +152 -0
- package/src/scripts/run_ralph_desk.zsh +218 -59
|
@@ -10,7 +10,12 @@ import { shellQuote } from '../util/shell-quote.mjs';
|
|
|
10
10
|
import { ONE_MILLION_BETA, wantsOneMillionContext } from '../constants.mjs';
|
|
11
11
|
import { initCampaign } from '../init/campaign-initializer.mjs';
|
|
12
12
|
import { LEGACY_DESK_REL, resolveDeskRoot } from '../util/desk-root.mjs';
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
lockSentinelFile as defaultLockSentinelFile,
|
|
15
|
+
stampAckField as defaultStampAckField,
|
|
16
|
+
unlockSentinelFile,
|
|
17
|
+
writeSentinelExclusive,
|
|
18
|
+
} from '../shared/fs.mjs';
|
|
14
19
|
import {
|
|
15
20
|
TimeoutError,
|
|
16
21
|
WorkerExitedError,
|
|
@@ -29,7 +34,10 @@ import {
|
|
|
29
34
|
} from '../reporting/campaign-reporting.mjs';
|
|
30
35
|
import {
|
|
31
36
|
createPane as defaultCreatePane,
|
|
37
|
+
killPaneProcess as defaultKillPaneProcess,
|
|
32
38
|
sendKeys as defaultSendKeys,
|
|
39
|
+
sendRawKey as defaultSendRawKey,
|
|
40
|
+
waitForProcessExit as defaultWaitForProcessExit,
|
|
33
41
|
} from '../tmux/pane-manager.mjs';
|
|
34
42
|
|
|
35
43
|
const execFileAsync = promisify(execFile);
|
|
@@ -128,6 +136,39 @@ function buildPaths(rootDir, slug, env = process.env) {
|
|
|
128
136
|
};
|
|
129
137
|
}
|
|
130
138
|
|
|
139
|
+
// Bug #8 PR-B: default git working-tree probe. Inline (~20 LoC) — no new
|
|
140
|
+
// module per Architect/Critic codex iter 6 consensus. Tests inject a stub via
|
|
141
|
+
// run() option `checkWorkingTree`.
|
|
142
|
+
// - returns { ok: false, error } when git rev-parse fails (not a repo, etc).
|
|
143
|
+
// - returns { ok: true, dirty: bool, dirtyFiles[] } otherwise.
|
|
144
|
+
// - dirtyFiles are raw `git status --porcelain` lines (caller truncates).
|
|
145
|
+
async function _defaultCheckWorkingTree(rootDir) {
|
|
146
|
+
try {
|
|
147
|
+
const { stdout: top } = await execFileAsync('git', ['-C', rootDir, 'rev-parse', '--show-toplevel']);
|
|
148
|
+
const trimmed = top.trim();
|
|
149
|
+
// macOS `/var` resolves to `/private/var`; symlinks elsewhere too. Compare
|
|
150
|
+
// canonical realpaths via fs.realpath so the comparison does not fire on
|
|
151
|
+
// symlink-equivalent paths.
|
|
152
|
+
const [topCanon, rootCanon] = await Promise.all([
|
|
153
|
+
fs.realpath(trimmed).catch(() => trimmed),
|
|
154
|
+
fs.realpath(rootDir).catch(() => rootDir),
|
|
155
|
+
]);
|
|
156
|
+
if (topCanon !== rootCanon) {
|
|
157
|
+
// Worker is in a sub-tree, not the campaign root. Refuse to classify.
|
|
158
|
+
return { ok: false, error: `git toplevel ${trimmed} != ${rootDir}` };
|
|
159
|
+
}
|
|
160
|
+
} catch (err) {
|
|
161
|
+
return { ok: false, error: err?.message ?? String(err) };
|
|
162
|
+
}
|
|
163
|
+
try {
|
|
164
|
+
const { stdout } = await execFileAsync('git', ['-C', rootDir, 'status', '--porcelain']);
|
|
165
|
+
const lines = stdout.split('\n').filter(Boolean);
|
|
166
|
+
return { ok: true, dirty: lines.length > 0, dirtyFiles: lines };
|
|
167
|
+
} catch (err) {
|
|
168
|
+
return { ok: false, error: err?.message ?? String(err) };
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
131
172
|
async function exists(targetPath) {
|
|
132
173
|
try {
|
|
133
174
|
await fs.access(targetPath);
|
|
@@ -347,6 +388,110 @@ async function readCurrentState(paths, slug, options) {
|
|
|
347
388
|
};
|
|
348
389
|
}
|
|
349
390
|
|
|
391
|
+
// PR-A (Bug #10): validate operator-written recovery artifacts. When the
|
|
392
|
+
// operator hand-rolls a `phase=verify` recovery (jq-patches status.json,
|
|
393
|
+
// writes iter-signal.json + done-claim.json by hand, deletes the blocked
|
|
394
|
+
// sentinel), the leader must NOT silently overwrite that work on relaunch.
|
|
395
|
+
// All five checks must pass for the leader to honor the recovery.
|
|
396
|
+
//
|
|
397
|
+
// Returns { ok: boolean, reason: string }. On any failure the caller falls
|
|
398
|
+
// through to the default behavior (worker dispatch) — defensive by design.
|
|
399
|
+
async function _validateOperatorRecoveryArtifacts({ paths, state }) {
|
|
400
|
+
// 1. iter-signal.json + done-claim.json must both exist and parse.
|
|
401
|
+
let signal;
|
|
402
|
+
let doneClaim;
|
|
403
|
+
try {
|
|
404
|
+
signal = await readJsonIfExists(paths.signalFile);
|
|
405
|
+
} catch (err) {
|
|
406
|
+
return { ok: false, reason: `iter-signal.json parse error: ${err?.message ?? err}` };
|
|
407
|
+
}
|
|
408
|
+
if (!signal) return { ok: false, reason: 'iter-signal.json missing' };
|
|
409
|
+
|
|
410
|
+
try {
|
|
411
|
+
doneClaim = await readJsonIfExists(paths.doneClaimFile);
|
|
412
|
+
} catch (err) {
|
|
413
|
+
return { ok: false, reason: `done-claim.json parse error: ${err?.message ?? err}` };
|
|
414
|
+
}
|
|
415
|
+
if (!doneClaim) return { ok: false, reason: 'done-claim.json missing' };
|
|
416
|
+
|
|
417
|
+
// 2. us_id must match status.current_us in BOTH artifacts.
|
|
418
|
+
if (signal.us_id !== state.current_us) {
|
|
419
|
+
return {
|
|
420
|
+
ok: false,
|
|
421
|
+
reason: `iter-signal.us_id (${signal.us_id}) != status.current_us (${state.current_us})`,
|
|
422
|
+
};
|
|
423
|
+
}
|
|
424
|
+
if (doneClaim.us_id !== state.current_us) {
|
|
425
|
+
return {
|
|
426
|
+
ok: false,
|
|
427
|
+
reason: `done-claim.us_id (${doneClaim.us_id}) != status.current_us (${state.current_us})`,
|
|
428
|
+
};
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
// 3. iteration must match status.iteration in BOTH artifacts.
|
|
432
|
+
if (signal.iteration !== state.iteration) {
|
|
433
|
+
return {
|
|
434
|
+
ok: false,
|
|
435
|
+
reason: `iter-signal.iteration (${signal.iteration}) != status.iteration (${state.iteration})`,
|
|
436
|
+
};
|
|
437
|
+
}
|
|
438
|
+
if (doneClaim.iteration !== state.iteration) {
|
|
439
|
+
return {
|
|
440
|
+
ok: false,
|
|
441
|
+
reason: `done-claim.iteration (${doneClaim.iteration}) != status.iteration (${state.iteration})`,
|
|
442
|
+
};
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
// 4. iter_signal_quality must be 'specific' (not generic / vague).
|
|
446
|
+
if (signal.iter_signal_quality !== 'specific') {
|
|
447
|
+
return {
|
|
448
|
+
ok: false,
|
|
449
|
+
reason: `iter-signal.iter_signal_quality (${signal.iter_signal_quality}) != 'specific'`,
|
|
450
|
+
};
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// 5. Both artifact mtimes must be NEWER than the most recent
|
|
454
|
+
// iter-NNN.worker-prompt.md mtime — guards against operator running
|
|
455
|
+
// `phase=verify` against stale artifacts from a much earlier iteration.
|
|
456
|
+
const promptFile = path.join(
|
|
457
|
+
paths.campaignLogDir,
|
|
458
|
+
`iter-${String(state.iteration).padStart(3, '0')}.worker-prompt.md`,
|
|
459
|
+
);
|
|
460
|
+
let promptMtime = 0;
|
|
461
|
+
try {
|
|
462
|
+
const promptStat = await fs.stat(promptFile);
|
|
463
|
+
promptMtime = promptStat.mtimeMs;
|
|
464
|
+
} catch {
|
|
465
|
+
// No worker-prompt.md for this iteration → check vacuously passes
|
|
466
|
+
// (operator is recovering from a state that never even dispatched yet).
|
|
467
|
+
promptMtime = 0;
|
|
468
|
+
}
|
|
469
|
+
if (promptMtime > 0) {
|
|
470
|
+
let signalMtime = 0;
|
|
471
|
+
let doneClaimMtime = 0;
|
|
472
|
+
try {
|
|
473
|
+
signalMtime = (await fs.stat(paths.signalFile)).mtimeMs;
|
|
474
|
+
doneClaimMtime = (await fs.stat(paths.doneClaimFile)).mtimeMs;
|
|
475
|
+
} catch (err) {
|
|
476
|
+
return { ok: false, reason: `mtime stat failed: ${err?.message ?? err}` };
|
|
477
|
+
}
|
|
478
|
+
if (signalMtime <= promptMtime) {
|
|
479
|
+
return {
|
|
480
|
+
ok: false,
|
|
481
|
+
reason: `iter-signal.json mtime (${signalMtime}) is not strictly newer than worker-prompt mtime (${promptMtime})`,
|
|
482
|
+
};
|
|
483
|
+
}
|
|
484
|
+
if (doneClaimMtime <= promptMtime) {
|
|
485
|
+
return {
|
|
486
|
+
ok: false,
|
|
487
|
+
reason: `done-claim.json mtime (${doneClaimMtime}) is not strictly newer than worker-prompt mtime (${promptMtime})`,
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
return { ok: true, reason: 'all five checks passed' };
|
|
493
|
+
}
|
|
494
|
+
|
|
350
495
|
async function appendIterationAnalytics(paths, state, usId, verdict, options) {
|
|
351
496
|
await appendCampaignAnalytics(paths.analyticsFile, {
|
|
352
497
|
iter: state.iteration,
|
|
@@ -534,6 +679,12 @@ export const BLOCK_TAGS = Object.freeze({
|
|
|
534
679
|
MALFORMED_ARTIFACT: 'malformed_artifact',
|
|
535
680
|
// Backstop (run() try/finally)
|
|
536
681
|
LEADER_EXITED_WITHOUT_TERMINAL_STATE: 'leader_exited_without_terminal_state',
|
|
682
|
+
// Bug #8 (Plan v6 PR-B): refuse to synthesize verify signal when codex
|
|
683
|
+
// worker exited without committing. Three new tags route through
|
|
684
|
+
// _handlePollFailure with reasonOverride/categoryOverride.
|
|
685
|
+
CODEX_EXIT_NO_DONE_CLAIM: 'codex_exit_no_done_claim',
|
|
686
|
+
GIT_STATE_UNVERIFIABLE: 'git_state_unverifiable',
|
|
687
|
+
WORKER_INCOMPLETE_UNCOMMITTED: 'worker_incomplete_uncommitted',
|
|
537
688
|
});
|
|
538
689
|
|
|
539
690
|
// P1-D Failure Taxonomy classifier. governance §1f locks the reason_category
|
|
@@ -619,6 +770,32 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
|
|
|
619
770
|
action = 'investigate_leader_logs';
|
|
620
771
|
failureCategory = 'leader_exited_without_terminal_state';
|
|
621
772
|
break;
|
|
773
|
+
// Bug #8 PR-B — codex worker exited but did not write done-claim. Refuse
|
|
774
|
+
// to synthesize a verify signal; surface as infra_failure so wrapper does
|
|
775
|
+
// not retry blindly.
|
|
776
|
+
case BLOCK_TAGS.CODEX_EXIT_NO_DONE_CLAIM:
|
|
777
|
+
category = 'infra_failure';
|
|
778
|
+
recoverable = false;
|
|
779
|
+
action = 'investigate_pane_logs';
|
|
780
|
+
failureCategory = 'codex_exit_no_done_claim';
|
|
781
|
+
break;
|
|
782
|
+
// Bug #8 PR-B — git status could not be resolved (not a repo, git binary
|
|
783
|
+
// missing, etc). Without git we cannot prove the working tree is clean,
|
|
784
|
+
// so refuse to synthesize.
|
|
785
|
+
case BLOCK_TAGS.GIT_STATE_UNVERIFIABLE:
|
|
786
|
+
category = 'infra_failure';
|
|
787
|
+
recoverable = false;
|
|
788
|
+
action = 'investigate_git_state';
|
|
789
|
+
failureCategory = 'git_state_unverifiable';
|
|
790
|
+
break;
|
|
791
|
+
// Bug #8 PR-B — worker said it was done (done-claim present) but the tree
|
|
792
|
+
// is dirty. Recoverable: next iteration's worker can finish committing.
|
|
793
|
+
case BLOCK_TAGS.WORKER_INCOMPLETE_UNCOMMITTED:
|
|
794
|
+
category = 'metric_failure';
|
|
795
|
+
recoverable = true;
|
|
796
|
+
action = 'retry_after_fix';
|
|
797
|
+
failureCategory = 'worker_incomplete_uncommitted';
|
|
798
|
+
break;
|
|
622
799
|
default:
|
|
623
800
|
category = 'metric_failure';
|
|
624
801
|
recoverable = false;
|
|
@@ -650,9 +827,41 @@ async function _handlePollFailure(error, ctx) {
|
|
|
650
827
|
options,
|
|
651
828
|
role, // 'worker' | 'verifier' | 'final_verifier' | 'flywheel' | 'guard'
|
|
652
829
|
usIdOverride,
|
|
830
|
+
// Bug #8 PR-B: when the caller has already classified the failure (e.g.
|
|
831
|
+
// codex done-claim/git gate), forward an explicit BLOCK_TAGS value as
|
|
832
|
+
// categoryOverride and a reason string. Named `categoryOverride` per
|
|
833
|
+
// Plan v6 PRD (it overrides the tag→reason_category mapping). Existing 5
|
|
834
|
+
// callers omit both and the legacy error→tag mapping below runs unchanged.
|
|
835
|
+
categoryOverride,
|
|
836
|
+
reasonOverride,
|
|
653
837
|
} = ctx;
|
|
654
838
|
const usId = usIdOverride ?? state.current_us;
|
|
655
839
|
|
|
840
|
+
if (categoryOverride) {
|
|
841
|
+
state.phase = 'blocked';
|
|
842
|
+
const classification = _classifyBlock(categoryOverride, { state, slug });
|
|
843
|
+
const reasonText = reasonOverride ?? `${role} blocked: ${categoryOverride}`;
|
|
844
|
+
await writeSentinel(paths.blockedSentinel, 'blocked', usId, reasonText, classification, paths);
|
|
845
|
+
await writeStatus(paths, state, options.onStatusChange, options.now);
|
|
846
|
+
await generateCampaignReport({
|
|
847
|
+
slug,
|
|
848
|
+
reportFile: paths.reportFile,
|
|
849
|
+
prdFile: paths.prdFile,
|
|
850
|
+
statusFile: paths.statusFile,
|
|
851
|
+
analyticsFile: paths.analyticsFile,
|
|
852
|
+
now: resolveNow(options.now),
|
|
853
|
+
blockedReason: reasonText,
|
|
854
|
+
blockedCategory: classification.reason_category,
|
|
855
|
+
});
|
|
856
|
+
return {
|
|
857
|
+
status: 'blocked',
|
|
858
|
+
usId,
|
|
859
|
+
reason: reasonText,
|
|
860
|
+
category: classification.reason_category,
|
|
861
|
+
statusFile: paths.statusFile,
|
|
862
|
+
};
|
|
863
|
+
}
|
|
864
|
+
|
|
656
865
|
let tag;
|
|
657
866
|
let reason;
|
|
658
867
|
if (error instanceof WorkerExitedError) {
|
|
@@ -872,6 +1081,10 @@ async function runFinalSequentialVerify({
|
|
|
872
1081
|
pollForSignal,
|
|
873
1082
|
runIntegrationCheck,
|
|
874
1083
|
iterTimeoutMs,
|
|
1084
|
+
// Bug #7 Fix-Q/R: optional reaper. Passed from _runCampaignBody so each
|
|
1085
|
+
// per-US verdict kills the verifier TUI before the next per-US dispatch
|
|
1086
|
+
// reuses the same pane. No-op when undefined (legacy/test callers).
|
|
1087
|
+
reapProducer,
|
|
875
1088
|
}) {
|
|
876
1089
|
const verifierModel = state.final_verifier_model;
|
|
877
1090
|
|
|
@@ -893,6 +1106,10 @@ async function runFinalSequentialVerify({
|
|
|
893
1106
|
timeoutMs: iterTimeoutMs,
|
|
894
1107
|
});
|
|
895
1108
|
|
|
1109
|
+
if (typeof reapProducer === 'function') {
|
|
1110
|
+
await reapProducer(verifierPaneId, paths.verdictFile);
|
|
1111
|
+
}
|
|
1112
|
+
|
|
896
1113
|
if (verdict.verdict !== 'pass') {
|
|
897
1114
|
return {
|
|
898
1115
|
status: 'continue',
|
|
@@ -1078,6 +1295,46 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1078
1295
|
const createPane = options.createPane ?? defaultCreatePane;
|
|
1079
1296
|
const createSession = options.createSession ?? defaultCreateSession;
|
|
1080
1297
|
const pollForSignal = options.pollForSignal ?? defaultPollForSignal;
|
|
1298
|
+
// Bug #7 Fix-Q/R: post-sentinel reaper. Producer (claude/codex TUI) must be
|
|
1299
|
+
// interrupted the moment leader has consumed the sentinel; otherwise the
|
|
1300
|
+
// pane lingers in idle prompt and self-reviews for ~2min. lockSentinel
|
|
1301
|
+
// freezes the file mtime as defense-in-depth. All four are injectable so
|
|
1302
|
+
// existing tests with fake sendKeys keep working (us006 createTmuxFakes).
|
|
1303
|
+
const sendRawKey = options.sendRawKey ?? defaultSendRawKey;
|
|
1304
|
+
const waitForProcessExit = options.waitForProcessExit ?? defaultWaitForProcessExit;
|
|
1305
|
+
const killPaneProcess = options.killPaneProcess ?? defaultKillPaneProcess;
|
|
1306
|
+
const lockSentinel = options.lockSentinelFile ?? defaultLockSentinelFile;
|
|
1307
|
+
const stampAckField = options.stampAckField ?? defaultStampAckField;
|
|
1308
|
+
const reapProducer = async (paneId, sentinelFile) => {
|
|
1309
|
+
if (!paneId) return;
|
|
1310
|
+
await killPaneProcess(paneId, {
|
|
1311
|
+
sendRawKey,
|
|
1312
|
+
waitForExit: waitForProcessExit,
|
|
1313
|
+
log: (msg) => console.error(msg),
|
|
1314
|
+
});
|
|
1315
|
+
// PR-0b-narrow AC-H1: after killPaneProcess, wait for the producing
|
|
1316
|
+
// process to actually exit before continuing. waitForProcessExit returns
|
|
1317
|
+
// when pane_current_command resolves to a shell (zsh/bash/sh). Wrapped
|
|
1318
|
+
// in try/catch — failure here is non-fatal but emits a log entry.
|
|
1319
|
+
try {
|
|
1320
|
+
await waitForProcessExit(paneId, { timeoutMs: 5000 });
|
|
1321
|
+
} catch (err) {
|
|
1322
|
+
console.error(`[handshake] waitForProcessExit failed on ${paneId} (${err?.message ?? err}); continuing`);
|
|
1323
|
+
}
|
|
1324
|
+
if (sentinelFile) {
|
|
1325
|
+
await lockSentinel(sentinelFile, { log: (msg) => console.error(msg) });
|
|
1326
|
+
// PR-0b-narrow AC-H2: stamp the leader_ack audit field. Best-effort,
|
|
1327
|
+
// does not block subsequent dispatch.
|
|
1328
|
+
await stampAckField(sentinelFile, {
|
|
1329
|
+
acked_by: 'leader',
|
|
1330
|
+
acked_at: new Date(resolveNow(options.now)).toISOString(),
|
|
1331
|
+
ack_pane_state: 'shell',
|
|
1332
|
+
}, { log: (msg) => console.error(msg) });
|
|
1333
|
+
}
|
|
1334
|
+
};
|
|
1335
|
+
// Bug #8 PR-B: working-tree probe injected (or default execFile git).
|
|
1336
|
+
// Returns { ok: boolean, dirty?: boolean, dirtyFiles?: string[], error?: string }.
|
|
1337
|
+
const checkWorkingTree = options.checkWorkingTree ?? _defaultCheckWorkingTree;
|
|
1081
1338
|
const runIntegrationCheck = options.runIntegrationCheck ?? (async () => ({ exitCode: 0, summary: 'integration skipped' }));
|
|
1082
1339
|
const maxIterations = options.maxIterations ?? 100;
|
|
1083
1340
|
// v5.7 §4.19: campaign-level pollForSignal timeout (Node leader fix).
|
|
@@ -1135,6 +1392,28 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1135
1392
|
|
|
1136
1393
|
let fixContractPath = null;
|
|
1137
1394
|
|
|
1395
|
+
// PR-A (Bug #10): operator-recovery hygiene. If the operator hand-rolled a
|
|
1396
|
+
// `phase=verify` recovery (jq-patches status.json, writes manual artifacts,
|
|
1397
|
+
// deletes the blocked sentinel), the leader MUST honor that work instead of
|
|
1398
|
+
// resetting to phase=worker on relaunch. The validator runs five checks
|
|
1399
|
+
// (see _validateOperatorRecoveryArtifacts); on full pass, _skipNextWorkerDispatch
|
|
1400
|
+
// is set as a one-shot flag consumed at the worker dispatch call site below.
|
|
1401
|
+
// On any failure the leader logs the reason and falls through to default
|
|
1402
|
+
// behavior.
|
|
1403
|
+
if (state.phase === 'verify' && state.iteration > 0) {
|
|
1404
|
+
const validation = await _validateOperatorRecoveryArtifacts({ paths, state });
|
|
1405
|
+
if (validation.ok) {
|
|
1406
|
+
console.error(
|
|
1407
|
+
`[recovery] Resuming verify phase — operator manual recovery detected (us=${state.current_us} iter=${state.iteration}): ${validation.reason}`,
|
|
1408
|
+
);
|
|
1409
|
+
state._skipNextWorkerDispatch = true;
|
|
1410
|
+
} else {
|
|
1411
|
+
console.error(
|
|
1412
|
+
`[recovery] phase=verify ignored, falling through to worker dispatch: ${validation.reason}`,
|
|
1413
|
+
);
|
|
1414
|
+
}
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1138
1417
|
// P1-E Lane Enforcement: snapshot lane mtimes before each iteration,
|
|
1139
1418
|
// compare at the top of the next iteration. Drift on read-only artifacts
|
|
1140
1419
|
// (PRD, test-spec, context) emits a lane_violation_warning event + audit
|
|
@@ -1143,6 +1422,11 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1143
1422
|
let _laneSnapshot = await _snapshotLaneMtimes(paths);
|
|
1144
1423
|
|
|
1145
1424
|
while (state.iteration <= maxIterations) {
|
|
1425
|
+
// Bug #7 Fix-R defensive unlock: a 0o444 sentinel left from the previous
|
|
1426
|
+
// iteration must not block the next producer's atomic-rename write.
|
|
1427
|
+
// Idempotent: missing-file calls are no-ops.
|
|
1428
|
+
await unlockSentinelFile(paths.signalFile);
|
|
1429
|
+
await unlockSentinelFile(paths.verdictFile);
|
|
1146
1430
|
// Audit drift from the prior iteration before doing anything new.
|
|
1147
1431
|
const _laneSnapshotAfter = await _snapshotLaneMtimes(paths);
|
|
1148
1432
|
const _laneViolations = await _checkLaneViolations(paths, _laneSnapshot, _laneSnapshotAfter, state, options);
|
|
@@ -1191,6 +1475,7 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1191
1475
|
pollForSignal,
|
|
1192
1476
|
runIntegrationCheck,
|
|
1193
1477
|
iterTimeoutMs,
|
|
1478
|
+
reapProducer,
|
|
1194
1479
|
});
|
|
1195
1480
|
} catch (error) {
|
|
1196
1481
|
// v5.7 §4.25 — uniform poll-failure handling for final verifier.
|
|
@@ -1282,12 +1567,17 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1282
1567
|
});
|
|
1283
1568
|
}
|
|
1284
1569
|
|
|
1570
|
+
// Bug #7 Fix-Q/R: reap flywheel pane before consuming the signal.
|
|
1571
|
+
await reapProducer(state.flywheel_pane_id ?? state.verifier_pane_id, paths.flywheelSignalFile);
|
|
1572
|
+
|
|
1285
1573
|
state.last_flywheel_decision = flywheelSignal.decision;
|
|
1286
1574
|
// P0-A multi-mission orchestration: optionally captured from flywheel signal.
|
|
1287
1575
|
// null when the flywheel did not suggest a next mission. Consumer wrappers
|
|
1288
1576
|
// poll status.next_mission_candidate to chain missions without code edits.
|
|
1289
1577
|
// See docs/multi-mission-orchestration.md.
|
|
1290
1578
|
state.next_mission_candidate = flywheelSignal.next_mission_candidate ?? null;
|
|
1579
|
+
// Bug #7 Fix-R cleanup: unlock before unlink so 0o444 doesn't block.
|
|
1580
|
+
await unlockSentinelFile(paths.flywheelSignalFile);
|
|
1291
1581
|
await fs.unlink(paths.flywheelSignalFile).catch(() => {});
|
|
1292
1582
|
|
|
1293
1583
|
// Flywheel Guard (independent validation of flywheel decision)
|
|
@@ -1320,11 +1610,15 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1320
1610
|
});
|
|
1321
1611
|
}
|
|
1322
1612
|
|
|
1613
|
+
// Bug #7 Fix-Q/R: reap guard pane before mutating state.
|
|
1614
|
+
await reapProducer(guardPaneId, paths.flywheelGuardVerdictFile);
|
|
1615
|
+
|
|
1323
1616
|
if (!state.flywheel_guard_count[state.current_us]) {
|
|
1324
1617
|
state.flywheel_guard_count[state.current_us] = 0;
|
|
1325
1618
|
}
|
|
1326
1619
|
state.flywheel_guard_count[state.current_us] += 1;
|
|
1327
1620
|
|
|
1621
|
+
await unlockSentinelFile(paths.flywheelGuardVerdictFile);
|
|
1328
1622
|
await fs.unlink(paths.flywheelGuardVerdictFile).catch(() => {});
|
|
1329
1623
|
|
|
1330
1624
|
if (guardVerdict.verdict === 'inconclusive') {
|
|
@@ -1404,18 +1698,36 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1404
1698
|
}
|
|
1405
1699
|
}
|
|
1406
1700
|
|
|
1407
|
-
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1414
|
-
state
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1701
|
+
// PR-A (Bug #10): one-shot guard. When the operator's `phase=verify`
|
|
1702
|
+
// recovery was honored at campaign entry, skip both the phase reset and
|
|
1703
|
+
// the worker dispatch — the operator already wrote a valid iter-signal.json
|
|
1704
|
+
// and done-claim.json, so pollForSignal below will pick them up immediately
|
|
1705
|
+
// and the loop continues into the verifier phase. The flag is cleared
|
|
1706
|
+
// after consumption so subsequent iterations dispatch the worker normally.
|
|
1707
|
+
if (state._skipNextWorkerDispatch) {
|
|
1708
|
+
state._skipNextWorkerDispatch = false;
|
|
1709
|
+
console.error(
|
|
1710
|
+
`[recovery] Skipping worker dispatch for iter=${state.iteration} (honoring operator manual recovery)`,
|
|
1711
|
+
);
|
|
1712
|
+
// Persist phase=verify so a subsequent crash-and-relaunch sees the same
|
|
1713
|
+
// contract. writeStatus is intentionally called BEFORE pollForSignal so
|
|
1714
|
+
// the on-disk state matches what we are about to do.
|
|
1715
|
+
state.phase = 'verify';
|
|
1716
|
+
await writeStatus(paths, state, options.onStatusChange, options.now);
|
|
1717
|
+
} else {
|
|
1718
|
+
state.phase = 'worker';
|
|
1719
|
+
await writeStatus(paths, state, options.onStatusChange, options.now);
|
|
1720
|
+
await dispatchWorker({
|
|
1721
|
+
iteration: state.iteration,
|
|
1722
|
+
paths,
|
|
1723
|
+
slug,
|
|
1724
|
+
usList,
|
|
1725
|
+
state,
|
|
1726
|
+
sendKeys,
|
|
1727
|
+
workerPaneId: state.worker_pane_id,
|
|
1728
|
+
fixContractPath,
|
|
1729
|
+
});
|
|
1730
|
+
}
|
|
1419
1731
|
|
|
1420
1732
|
let signal;
|
|
1421
1733
|
try {
|
|
@@ -1432,8 +1744,43 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1432
1744
|
});
|
|
1433
1745
|
} catch (error) {
|
|
1434
1746
|
if (error instanceof TimeoutError && parseModelFlag(state.worker_model).engine === 'codex') {
|
|
1435
|
-
//
|
|
1436
|
-
//
|
|
1747
|
+
// Bug #8 PR-B 4-way gate: refuse to synthesize verify signal when
|
|
1748
|
+
// codex worker exited without committing real work.
|
|
1749
|
+
// 1. done-claim absent → BLOCKED infra_failure
|
|
1750
|
+
// 2. git unverifiable → BLOCKED infra_failure
|
|
1751
|
+
// 3. done-claim + dirty tree → BLOCKED metric_failure
|
|
1752
|
+
// 4. done-claim + clean tree → synthesize verify (legacy path)
|
|
1753
|
+
const doneClaimExists = await exists(paths.doneClaimFile);
|
|
1754
|
+
if (!doneClaimExists) {
|
|
1755
|
+
return _handlePollFailure(error, {
|
|
1756
|
+
paths, state, slug, options,
|
|
1757
|
+
role: 'worker',
|
|
1758
|
+
categoryOverride: BLOCK_TAGS.CODEX_EXIT_NO_DONE_CLAIM,
|
|
1759
|
+
reasonOverride:
|
|
1760
|
+
'codex worker exited (timeout) without writing done-claim; refusing to synthesize verify signal',
|
|
1761
|
+
});
|
|
1762
|
+
}
|
|
1763
|
+
const tree = await checkWorkingTree(rootDir);
|
|
1764
|
+
if (!tree.ok) {
|
|
1765
|
+
return _handlePollFailure(error, {
|
|
1766
|
+
paths, state, slug, options,
|
|
1767
|
+
role: 'worker',
|
|
1768
|
+
categoryOverride: BLOCK_TAGS.GIT_STATE_UNVERIFIABLE,
|
|
1769
|
+
reasonOverride:
|
|
1770
|
+
`git status unverifiable (${tree.error ?? 'unknown'}); refusing to synthesize verify signal`,
|
|
1771
|
+
});
|
|
1772
|
+
}
|
|
1773
|
+
if (tree.dirty) {
|
|
1774
|
+
const sample = (tree.dirtyFiles ?? []).slice(0, 5).join(', ');
|
|
1775
|
+
return _handlePollFailure(error, {
|
|
1776
|
+
paths, state, slug, options,
|
|
1777
|
+
role: 'worker',
|
|
1778
|
+
categoryOverride: BLOCK_TAGS.WORKER_INCOMPLETE_UNCOMMITTED,
|
|
1779
|
+
reasonOverride:
|
|
1780
|
+
`worker_incomplete_uncommitted: done-claim present but tree dirty (${sample || 'no file list'})`,
|
|
1781
|
+
});
|
|
1782
|
+
}
|
|
1783
|
+
// Clean tree — preserve the legacy synthesize behaviour.
|
|
1437
1784
|
signal = {
|
|
1438
1785
|
iteration: state.iteration,
|
|
1439
1786
|
status: 'verify',
|
|
@@ -1450,6 +1797,11 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1450
1797
|
}
|
|
1451
1798
|
}
|
|
1452
1799
|
|
|
1800
|
+
// Bug #7 Fix-Q/R: reap the worker pane the instant we accept the signal so
|
|
1801
|
+
// claude/codex cannot self-review and rewrite iter-signal.json. Runs even
|
|
1802
|
+
// for the codex-fallback synthesized signal (no-op on a dead pane).
|
|
1803
|
+
await reapProducer(state.worker_pane_id, paths.signalFile);
|
|
1804
|
+
|
|
1453
1805
|
// US-019 R7 P1-G: verify_partial malformed downgrade.
|
|
1454
1806
|
// verify_partial requires verified_acs[] to be a non-empty array. Otherwise the verifier
|
|
1455
1807
|
// has nothing to evaluate and we must treat the signal as broken contract → blocked.
|
|
@@ -1519,6 +1871,11 @@ async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
|
1519
1871
|
});
|
|
1520
1872
|
}
|
|
1521
1873
|
|
|
1874
|
+
// Bug #7 Fix-Q/R: reap verifier pane immediately after accepting the
|
|
1875
|
+
// verdict — without this the codex/claude TUI keeps running for ~2min and
|
|
1876
|
+
// can rewrite verify-verdict.json (mtime drift observed in 19th launch).
|
|
1877
|
+
await reapProducer(state.verifier_pane_id, paths.verdictFile);
|
|
1878
|
+
|
|
1522
1879
|
if (verdict.verdict === 'pass') {
|
|
1523
1880
|
state.consecutive_failures = 0;
|
|
1524
1881
|
if (!state.verified_us.includes(usId)) {
|
package/src/node/shared/fs.mjs
CHANGED
|
@@ -59,3 +59,86 @@ export async function writeSentinelExclusive(targetPath, content) {
|
|
|
59
59
|
}
|
|
60
60
|
return { wrote: true };
|
|
61
61
|
}
|
|
62
|
+
|
|
63
|
+
// Bug #7 Fix-R: best-effort chmod 0o444 to freeze a sentinel file once the
|
|
64
|
+
// leader has accepted it. Mirror of scripts/postinstall.js tryLockFile (L104).
|
|
65
|
+
// Some filesystems silently ignore chmod (WSL1/NTFS, tmpfs); we log once and
|
|
66
|
+
// continue. Q (process kill) is the primary defense; R is defense-in-depth.
|
|
67
|
+
let _sentinelLockWarningEmitted = false;
|
|
68
|
+
export async function lockSentinelFile(filePath, { log = (msg) => console.error(msg) } = {}) {
|
|
69
|
+
try {
|
|
70
|
+
await fs.chmod(filePath, 0o444);
|
|
71
|
+
} catch (err) {
|
|
72
|
+
if (err && err.code === 'ENOENT') {
|
|
73
|
+
// File missing is not an error — sentinel may have been consumed and
|
|
74
|
+
// unlinked by a concurrent path. Idempotent no-op.
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
if (!_sentinelLockWarningEmitted) {
|
|
78
|
+
log(`[bug7] chmod 0444 on ${filePath} failed (${err?.code ?? 'unknown'}); post-sentinel write-protection unavailable on this FS.`);
|
|
79
|
+
_sentinelLockWarningEmitted = true;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// Pair to lockSentinelFile. Called before fs.unlink in iter-cleanup paths so
|
|
85
|
+
// subsequent atomic-rename writes never see EACCES on the destination mode.
|
|
86
|
+
// Idempotent — missing file or already-writable is fine.
|
|
87
|
+
export async function unlockSentinelFile(filePath) {
|
|
88
|
+
try {
|
|
89
|
+
await fs.chmod(filePath, 0o644);
|
|
90
|
+
} catch {
|
|
91
|
+
// best-effort; cleanup proceeds regardless.
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// PR-0b-narrow (Plan v6) — stamp leader handshake ack onto an already-locked
|
|
96
|
+
// sentinel. Best-effort, audit-only: the contract is "if we can write, do; if
|
|
97
|
+
// not, swallow". Callers must NOT depend on the ack landing for hard ordering
|
|
98
|
+
// semantics (use waitForProcessExit + the chmod 0o444 lock for that). The
|
|
99
|
+
// resulting `content.leader_ack` is auxiliary metadata so post-mortem audits
|
|
100
|
+
// can prove which Leader iteration consumed which sentinel.
|
|
101
|
+
//
|
|
102
|
+
// Sequence (mirrored in src/scripts/lib_ralph_desk.zsh::_stamp_ack_field):
|
|
103
|
+
// 1. chmod 0o644 (so we can write — sentinel was locked by lockSentinelFile)
|
|
104
|
+
// 2. JSON.parse
|
|
105
|
+
// 3. merge ack as content.leader_ack
|
|
106
|
+
// 4. atomic write
|
|
107
|
+
// 5. chmod 0o444 (re-lock)
|
|
108
|
+
//
|
|
109
|
+
// All steps wrapped in try/catch; any failure is silently dropped. Failure
|
|
110
|
+
// modes that we deliberately swallow:
|
|
111
|
+
// - File missing (sentinel was unlinked by a concurrent path).
|
|
112
|
+
// - Malformed JSON (race with a partial-write window — Bug #7 already gates
|
|
113
|
+
// this on the read side, but stampAckField may still observe it during
|
|
114
|
+
// transitional iterations).
|
|
115
|
+
// - chmod ENOTSUP / WSL1 / NTFS (recorded in Bug #7 fixes).
|
|
116
|
+
export async function stampAckField(filePath, ack, { log = (msg) => console.error(msg) } = {}) {
|
|
117
|
+
try {
|
|
118
|
+
await fs.chmod(filePath, 0o644);
|
|
119
|
+
} catch (err) {
|
|
120
|
+
if (err && err.code === 'ENOENT') return; // sentinel gone — nothing to stamp
|
|
121
|
+
// chmod failure is non-fatal — try the write anyway in case the FS already allows it
|
|
122
|
+
}
|
|
123
|
+
let content;
|
|
124
|
+
try {
|
|
125
|
+
const raw = await fs.readFile(filePath, 'utf8');
|
|
126
|
+
content = JSON.parse(raw);
|
|
127
|
+
} catch (err) {
|
|
128
|
+
log(`[stamp-ack] read/parse failed for ${filePath} (${err?.code ?? err?.message ?? 'unknown'}); ack dropped (audit-only)`);
|
|
129
|
+
// Re-lock if possible — best-effort.
|
|
130
|
+
try { await fs.chmod(filePath, 0o444); } catch {}
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
if (!content || typeof content !== 'object') {
|
|
134
|
+
try { await fs.chmod(filePath, 0o444); } catch {}
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
content.leader_ack = ack;
|
|
138
|
+
try {
|
|
139
|
+
await fs.writeFile(filePath, `${JSON.stringify(content, null, 2)}\n`, 'utf8');
|
|
140
|
+
} catch (err) {
|
|
141
|
+
log(`[stamp-ack] write failed for ${filePath} (${err?.code ?? err?.message ?? 'unknown'}); ack dropped`);
|
|
142
|
+
}
|
|
143
|
+
try { await fs.chmod(filePath, 0o444); } catch {}
|
|
144
|
+
}
|
|
@@ -52,6 +52,12 @@ export async function sendKeys(paneId, command) {
|
|
|
52
52
|
await runTmux(['send-keys', '-t', paneId, 'Enter'], { paneId });
|
|
53
53
|
}
|
|
54
54
|
|
|
55
|
+
// Bug #7 Fix-Q: send a raw tmux key (e.g. C-c) without the `-l --` literal-text
|
|
56
|
+
// flag. Distinct from sendKeys() so callers can interrupt a running TUI.
|
|
57
|
+
export async function sendRawKey(paneId, key) {
|
|
58
|
+
await runTmux(['send-keys', '-t', paneId, key], { paneId });
|
|
59
|
+
}
|
|
60
|
+
|
|
55
61
|
export async function waitForProcessExit(
|
|
56
62
|
paneId,
|
|
57
63
|
{ pollIntervalMs = 100, timeoutMs = 5000 } = {},
|
|
@@ -75,3 +81,36 @@ export async function waitForProcessExit(
|
|
|
75
81
|
paneId,
|
|
76
82
|
});
|
|
77
83
|
}
|
|
84
|
+
|
|
85
|
+
// Bug #7 Fix-Q: terminate the TUI process producing a sentinel file the moment
|
|
86
|
+
// the leader has accepted it. Without this, claude/codex returns to its idle
|
|
87
|
+
// prompt and continues self-review for 1-2 minutes, racing the next iteration.
|
|
88
|
+
// Mirror of zsh pattern at run_ralph_desk.zsh:2384-2397, 375-376, 529-530.
|
|
89
|
+
// Fail-open: pane may already be dead from prior teardown, or waitForExit may
|
|
90
|
+
// time out — neither aborts the iteration.
|
|
91
|
+
export async function killPaneProcess(
|
|
92
|
+
paneId,
|
|
93
|
+
{
|
|
94
|
+
sendRawKey: sendRawKeyImpl = sendRawKey,
|
|
95
|
+
waitForExit = waitForProcessExit,
|
|
96
|
+
gracePeriodMs = 800,
|
|
97
|
+
exitTimeoutMs = 5000,
|
|
98
|
+
log = () => {},
|
|
99
|
+
} = {},
|
|
100
|
+
) {
|
|
101
|
+
const safeSend = async (key) => {
|
|
102
|
+
try {
|
|
103
|
+
await sendRawKeyImpl(paneId, key);
|
|
104
|
+
} catch (err) {
|
|
105
|
+
log(`[bug7] killPaneProcess sendRawKey ${key} failed for ${paneId}: ${err?.message ?? err}`);
|
|
106
|
+
}
|
|
107
|
+
};
|
|
108
|
+
await safeSend('C-c');
|
|
109
|
+
await new Promise((resolve) => setTimeout(resolve, gracePeriodMs));
|
|
110
|
+
await safeSend('C-c');
|
|
111
|
+
try {
|
|
112
|
+
await waitForExit(paneId, { timeoutMs: exitTimeoutMs });
|
|
113
|
+
} catch (err) {
|
|
114
|
+
log(`[bug7] killPaneProcess waitForExit failed for ${paneId}: ${err?.message ?? err}`);
|
|
115
|
+
}
|
|
116
|
+
}
|