@ai-dev-methodologies/rlp-desk 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/docs/rlp-desk/artifact-schema.md +99 -0
  2. package/docs/rlp-desk/ci-setup.md +100 -0
  3. package/docs/rlp-desk/e2e-scenarios.md +102 -0
  4. package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +730 -0
  5. package/install.sh +93 -20
  6. package/package.json +8 -2
  7. package/scripts/build-node-manifest.js +52 -0
  8. package/scripts/postinstall.js +162 -8
  9. package/src/commands/rlp-desk.md +48 -25
  10. package/src/governance.md +55 -6
  11. package/src/node/MANIFEST.txt +15 -0
  12. package/src/node/cli/command-builder.mjs +25 -5
  13. package/src/node/constants.mjs +19 -0
  14. package/src/node/polling/signal-poller.mjs +119 -3
  15. package/src/node/runner/campaign-main-loop.mjs +470 -41
  16. package/src/node/runner/leader-registry.mjs +100 -0
  17. package/src/node/runner/prompt-dismisser.mjs +200 -0
  18. package/src/node/shared/fs.mjs +38 -0
  19. package/src/node/util/debug-log.mjs +56 -0
  20. package/src/node/util/shell-quote.mjs +12 -0
  21. package/docs/superpowers/plans/2026-04-24-gpt-5-5-default.md +0 -517
  22. package/docs/superpowers/specs/2026-04-24-gpt-5-5-default.md +0 -107
  23. /package/docs/{TODO-verification-next.md → rlp-desk/TODO-verification-next.md} +0 -0
  24. /package/docs/{architecture.md → rlp-desk/architecture.md} +0 -0
  25. /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-flywheel-enhancement.md +0 -0
  26. /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-pivot-step.md +0 -0
  27. /package/docs/{blueprints → rlp-desk/blueprints}/plan-flywheel-enhancement.md +0 -0
  28. /package/docs/{blueprints → rlp-desk/blueprints}/sv-architecture-rethink.md +0 -0
  29. /package/docs/{getting-started.md → rlp-desk/getting-started.md} +0 -0
  30. /package/docs/{internal → rlp-desk/internal}/verification-policy-gap-analysis.md +0 -0
  31. /package/docs/{internal → rlp-desk/internal}/verification-strategy-research.md +0 -0
  32. /package/docs/{multi-mission-orchestration.md → rlp-desk/multi-mission-orchestration.md} +0 -0
  33. /package/docs/{plans → rlp-desk/plans}/cozy-gliding-trinket.md +0 -0
  34. /package/docs/{plans → rlp-desk/plans}/frolicking-churning-honey.md +0 -0
  35. /package/docs/{plans → rlp-desk/plans}/keen-sauteeing-snowflake.md +0 -0
  36. /package/docs/{plans → rlp-desk/plans}/mutable-booping-corbato.md +0 -0
  37. /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11-handoff-7fixes.md +0 -0
  38. /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11.1-tmux-pane-disappearance.md +0 -0
  39. /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -0
  40. /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert.md +0 -0
  41. /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -0
  42. /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie.md +0 -0
  43. /package/docs/{plans → rlp-desk/plans}/validated-snacking-crayon.md +0 -0
  44. /package/docs/{protocol-reference.md → rlp-desk/protocol-reference.md} +0 -0
@@ -5,8 +5,16 @@ import { execFile } from 'node:child_process';
5
5
  import { promisify } from 'node:util';
6
6
 
7
7
  import { buildClaudeCmd, buildCodexCmd, parseModelFlag } from '../cli/command-builder.mjs';
8
+ import { shellQuote } from '../util/shell-quote.mjs';
9
+ import { OPUS_1M_BETA, isOpusModel } from '../constants.mjs';
8
10
  import { initCampaign } from '../init/campaign-initializer.mjs';
9
- import { TimeoutError, pollForSignal as defaultPollForSignal } from '../polling/signal-poller.mjs';
11
+ import { writeSentinelExclusive } from '../shared/fs.mjs';
12
+ import {
13
+ TimeoutError,
14
+ WorkerExitedError,
15
+ PromptBlockedError,
16
+ pollForSignal as defaultPollForSignal,
17
+ } from '../polling/signal-poller.mjs';
10
18
  import {
11
19
  assembleVerifierPrompt,
12
20
  assembleWorkerPrompt,
@@ -57,7 +65,11 @@ function buildPaths(rootDir, slug) {
57
65
  prdFile: path.join(deskRoot, 'plans', `prd-${slug}.md`),
58
66
  testSpecFile: path.join(deskRoot, 'plans', `test-spec-${slug}.md`),
59
67
  analyticsFile: path.join(campaignLogDir, 'campaign.jsonl'),
60
- analyticsDir: path.join(os.homedir(), '.claude', 'ralph-desk', 'analytics', slug),
68
+ // v5.7 §4.11.b: project-local analytics so Worker/Verifier prompts that
69
+ // reference this path stay inside cwd-tree (no `--add-dir` whitelist needed
70
+ // for cross-cwd writes). Cross-project rollup uses ~/.claude/ralph-desk/registry.jsonl
71
+ // (Leader-only, never appears in Worker prompts) — see §4.11.c.
72
+ analyticsDir: path.join(deskRoot, 'analytics', slug),
61
73
  reportFile: path.join(campaignLogDir, 'campaign-report.md'),
62
74
  statusFile: path.join(campaignLogDir, 'runtime', 'status.json'),
63
75
  flywheelPromptFile: path.join(deskRoot, 'prompts', `${slug}.flywheel.prompt.md`),
@@ -418,7 +430,37 @@ async function _checkLaneViolations(paths, snapshotBefore, snapshotAfter, state,
418
430
  // the zsh helper _classify_cross_us_or_metric in lib_ralph_desk.zsh.
419
431
  const CROSS_US_TOKEN_RE = /depends on US-|blocking US-|awaits US-|post-iter US-|requires US-\d+|cross-US|US-\d+ 산출물|신규 US-|post-iter/i;
420
432
 
421
- // P1-D Failure Taxonomy classifier. governance §1f locks the 6 reason_category
433
+ // v5.7 §4.25 typed enum for _classifyBlock tags. Replaces ad-hoc string
434
+ // literals scattered across writeSentinel call sites. Typo-safe via Object.freeze.
435
+ export const BLOCK_TAGS = Object.freeze({
436
+ // Verdict-driven (Verifier 'fail')
437
+ VERIFIER: 'verifier',
438
+ // Flywheel/Guard verdicts
439
+ FLYWHEEL_INCONCLUSIVE: 'flywheel_inconclusive',
440
+ FLYWHEEL_EXHAUSTED: 'flywheel_exhausted',
441
+ // Model upgrade chain exhausted
442
+ MODEL_UPGRADE: 'model_upgrade',
443
+ // Worker/Verifier/Flywheel/Guard pane exited without artifacts (file-guarantee)
444
+ WORKER_EXITED: 'worker_exited_without_artifacts',
445
+ VERIFIER_EXITED: 'verifier_exited_without_artifacts',
446
+ FINAL_VERIFIER_EXITED: 'final_verifier_exited_without_artifacts',
447
+ FLYWHEEL_EXITED: 'flywheel_pane_exited_without_artifacts',
448
+ GUARD_EXITED: 'guard_pane_exited_without_artifacts',
449
+ // Auto-Enter unsafe (default-No prompt)
450
+ PROMPT_BLOCKED: 'prompt_blocked',
451
+ // Persistent timeout without exit (different from EXITED)
452
+ WORKER_TIMEOUT: 'worker_timeout',
453
+ VERIFIER_TIMEOUT: 'verifier_timeout',
454
+ FINAL_VERIFIER_TIMEOUT: 'final_verifier_timeout',
455
+ FLYWHEEL_TIMEOUT: 'flywheel_timeout',
456
+ GUARD_TIMEOUT: 'guard_timeout',
457
+ // Schema validator (P1)
458
+ MALFORMED_ARTIFACT: 'malformed_artifact',
459
+ // Backstop (run() try/finally)
460
+ LEADER_EXITED_WITHOUT_TERMINAL_STATE: 'leader_exited_without_terminal_state',
461
+ });
462
+
463
+ // P1-D Failure Taxonomy classifier. governance §1f locks the reason_category
422
464
  // values + recoverable + suggested_action defaults per source. wrapper MUST
423
465
  // branch on reason_category; failure_category is diagnostic only.
424
466
  function _classifyBlock(source, { verdict, state, slug } = {}) {
@@ -427,18 +469,18 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
427
469
  let action;
428
470
  let failureCategory = null;
429
471
  switch (source) {
430
- case 'flywheel_inconclusive':
431
- case 'flywheel_exhausted':
472
+ case BLOCK_TAGS.FLYWHEEL_INCONCLUSIVE:
473
+ case BLOCK_TAGS.FLYWHEEL_EXHAUSTED:
432
474
  category = 'mission_abort';
433
475
  recoverable = false;
434
476
  action = 'terminal_alert';
435
477
  break;
436
- case 'model_upgrade':
478
+ case BLOCK_TAGS.MODEL_UPGRADE:
437
479
  category = 'repeat_axis';
438
480
  recoverable = false;
439
481
  action = 'next_mission_chain';
440
482
  break;
441
- case 'verifier': {
483
+ case BLOCK_TAGS.VERIFIER: {
442
484
  const text = `${verdict?.reason ?? ''} ${verdict?.summary ?? ''}`;
443
485
  category = CROSS_US_TOKEN_RE.test(text) ? 'cross_us_dep' : 'metric_failure';
444
486
  recoverable = true;
@@ -446,6 +488,54 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
446
488
  failureCategory = verdict?.failure_category ?? null;
447
489
  break;
448
490
  }
491
+ // v5.7 §4.22 §4.24 — pane-exit-without-artifacts variants. All
492
+ // infra_failure, not recoverable (Worker/Verifier/Flywheel/Guard pane
493
+ // process is gone; campaign cannot proceed). failure_category preserved
494
+ // for telemetry.
495
+ case BLOCK_TAGS.WORKER_EXITED:
496
+ case BLOCK_TAGS.VERIFIER_EXITED:
497
+ case BLOCK_TAGS.FINAL_VERIFIER_EXITED:
498
+ case BLOCK_TAGS.FLYWHEEL_EXITED:
499
+ case BLOCK_TAGS.GUARD_EXITED:
500
+ category = 'infra_failure';
501
+ recoverable = false;
502
+ action = 'investigate_pane_logs';
503
+ failureCategory = source;
504
+ break;
505
+ // v5.7 §4.17 — auto-Enter on default-No would CANCEL; refuse and BLOCK.
506
+ case BLOCK_TAGS.PROMPT_BLOCKED:
507
+ category = 'infra_failure';
508
+ recoverable = false;
509
+ action = 'manual_prompt_response';
510
+ failureCategory = 'prompt_blocked';
511
+ break;
512
+ // Persistent timeout (no exit detected) — different from EXITED.
513
+ case BLOCK_TAGS.WORKER_TIMEOUT:
514
+ case BLOCK_TAGS.VERIFIER_TIMEOUT:
515
+ case BLOCK_TAGS.FINAL_VERIFIER_TIMEOUT:
516
+ case BLOCK_TAGS.FLYWHEEL_TIMEOUT:
517
+ case BLOCK_TAGS.GUARD_TIMEOUT:
518
+ category = 'infra_failure';
519
+ recoverable = false;
520
+ action = 'increase_iter_timeout_or_investigate';
521
+ failureCategory = source;
522
+ break;
523
+ // v5.7 §4.25 P1 — schema validator caught a malformed/incoherent artifact.
524
+ // Recoverable: next iteration's Worker prompt can include the schema
525
+ // error (P2 feedback loop closure) and try again.
526
+ case BLOCK_TAGS.MALFORMED_ARTIFACT:
527
+ category = 'contract_violation';
528
+ recoverable = true;
529
+ action = 'retry_with_schema_feedback';
530
+ failureCategory = 'malformed_artifact';
531
+ break;
532
+ // Backstop: run() exited without terminal sentinel.
533
+ case BLOCK_TAGS.LEADER_EXITED_WITHOUT_TERMINAL_STATE:
534
+ category = 'infra_failure';
535
+ recoverable = false;
536
+ action = 'investigate_leader_logs';
537
+ failureCategory = 'leader_exited_without_terminal_state';
538
+ break;
449
539
  default:
450
540
  category = 'metric_failure';
451
541
  recoverable = false;
@@ -461,22 +551,198 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
461
551
  };
462
552
  }
463
553
 
554
+ // v5.7 §4.25 — uniform poll-failure → BLOCKED handler, used by every
555
+ // `pollForSignal` call site (Worker, VerifierPerUS, VerifierFinal, Flywheel,
556
+ // Guard). Mirrors the canonical Worker pattern previously inlined at line
557
+ // ~1037-1110. Idempotent via writeSentinelExclusive (first-writer-wins).
558
+ //
559
+ // Returns the early-exit object the call site should `return` to its
560
+ // orchestrator. Callers MUST `return` it (not throw), so the run() loop
561
+ // terminates cleanly with phase=blocked.
562
+ async function _handlePollFailure(error, ctx) {
563
+ const {
564
+ paths,
565
+ state,
566
+ slug,
567
+ options,
568
+ role, // 'worker' | 'verifier' | 'final_verifier' | 'flywheel' | 'guard'
569
+ usIdOverride,
570
+ } = ctx;
571
+ const usId = usIdOverride ?? state.current_us;
572
+
573
+ let tag;
574
+ let reason;
575
+ if (error instanceof WorkerExitedError) {
576
+ tag = ({
577
+ worker: BLOCK_TAGS.WORKER_EXITED,
578
+ verifier: BLOCK_TAGS.VERIFIER_EXITED,
579
+ final_verifier: BLOCK_TAGS.FINAL_VERIFIER_EXITED,
580
+ flywheel: BLOCK_TAGS.FLYWHEEL_EXITED,
581
+ guard: BLOCK_TAGS.GUARD_EXITED,
582
+ })[role] ?? BLOCK_TAGS.WORKER_EXITED;
583
+ reason = `${error.reason ?? 'pane exited without artifacts'}: ${error.message}`;
584
+ } else if (error instanceof PromptBlockedError) {
585
+ tag = BLOCK_TAGS.PROMPT_BLOCKED;
586
+ reason = `${error.reason ?? 'default-No prompt'}: ${error.message}`;
587
+ } else if (error instanceof MalformedArtifactError) {
588
+ tag = BLOCK_TAGS.MALFORMED_ARTIFACT;
589
+ reason = `Malformed artifact at ${error.field}: expected ${error.expected}, got ${error.got}`;
590
+ } else if (error instanceof TimeoutError) {
591
+ tag = ({
592
+ worker: BLOCK_TAGS.WORKER_TIMEOUT,
593
+ verifier: BLOCK_TAGS.VERIFIER_TIMEOUT,
594
+ final_verifier: BLOCK_TAGS.FINAL_VERIFIER_TIMEOUT,
595
+ flywheel: BLOCK_TAGS.FLYWHEEL_TIMEOUT,
596
+ guard: BLOCK_TAGS.GUARD_TIMEOUT,
597
+ })[role] ?? BLOCK_TAGS.WORKER_TIMEOUT;
598
+ reason = `${role} pollForSignal timed out: ${error.message}`;
599
+ } else {
600
+ // Unknown error — treat as infra_failure so backstop doesn't have to
601
+ // synthesize. Re-throw after writing so caller's outer try/finally
602
+ // (run() backstop) sees something but doesn't double-write.
603
+ tag = BLOCK_TAGS.LEADER_EXITED_WITHOUT_TERMINAL_STATE;
604
+ reason = `Unexpected error in ${role} poll: ${error?.message ?? error}`;
605
+ }
606
+
607
+ state.phase = 'blocked';
608
+ const classification = _classifyBlock(tag, { state, slug });
609
+ await writeSentinel(paths.blockedSentinel, 'blocked', usId, reason, classification, paths);
610
+ await writeStatus(paths, state, options.onStatusChange, options.now);
611
+ await generateCampaignReport({
612
+ slug,
613
+ reportFile: paths.reportFile,
614
+ prdFile: paths.prdFile,
615
+ statusFile: paths.statusFile,
616
+ analyticsFile: paths.analyticsFile,
617
+ now: resolveNow(options.now),
618
+ blockedReason: reason,
619
+ blockedCategory: classification.reason_category,
620
+ });
621
+
622
+ return {
623
+ status: 'blocked',
624
+ usId,
625
+ reason,
626
+ category: classification.reason_category,
627
+ statusFile: paths.statusFile,
628
+ };
629
+ }
630
+
631
+ // v5.7 §4.25 P1 — schema validator. Throws MalformedArtifactError if the
632
+ // parsed artifact violates the contract. Caller catches via _handlePollFailure.
633
+ // Hooks AFTER pollForSignal returns parsed JSON, BEFORE state mutation.
634
+ //
635
+ // Validates:
636
+ // - slug matches campaign slug (or absent — backwards compat)
637
+ // - iteration is integer ≥ state.iteration_floor (worker may advance, never regress)
638
+ // - signal_type matches read context ('signal' | 'verdict' | 'flywheel_signal' | 'flywheel_guard_verdict')
639
+ // The signal_type field is OPTIONAL for backwards compat — existing artifacts
640
+ // don't include it. Future writers should.
641
+ // - us_id ∈ usList ∪ {'ALL'} (closed-set)
642
+ export class MalformedArtifactError extends Error {
643
+ constructor(message, info = {}) {
644
+ super(message);
645
+ this.name = 'MalformedArtifactError';
646
+ this.field = info.field ?? null;
647
+ this.expected = info.expected ?? null;
648
+ this.got = info.got ?? null;
649
+ this.raw = info.raw ?? null;
650
+ }
651
+ }
652
+
653
+ function validateArtifact(parsed, ctx) {
654
+ const { expectedSlug, expectedSignalType, allowedUsIds } = ctx;
655
+ if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
656
+ throw new MalformedArtifactError('Artifact is not a JSON object', {
657
+ field: '<root>',
658
+ expected: 'object',
659
+ got: Array.isArray(parsed) ? 'array' : typeof parsed,
660
+ raw: parsed,
661
+ });
662
+ }
663
+ if (parsed.slug !== undefined && expectedSlug && parsed.slug !== expectedSlug) {
664
+ throw new MalformedArtifactError('slug mismatch', {
665
+ field: 'slug',
666
+ expected: expectedSlug,
667
+ got: parsed.slug,
668
+ raw: parsed,
669
+ });
670
+ }
671
+ if (parsed.iteration !== undefined) {
672
+ if (!Number.isInteger(parsed.iteration)) {
673
+ throw new MalformedArtifactError('iteration must be integer', {
674
+ field: 'iteration',
675
+ expected: 'integer',
676
+ got: typeof parsed.iteration,
677
+ raw: parsed,
678
+ });
679
+ }
680
+ // v5.7 §4.25 P1 — iteration validation is STRUCTURAL ONLY (must be integer).
681
+ // Originally proposed as a strict lower bound (worker can never regress
682
+ // below state.iteration_floor), this caused false BLOCKs in real campaigns
683
+ // because (a) workers may carry over a previous iteration value across
684
+ // multiple iterations without updating the field, and (b) the leader's
685
+ // state.iteration is authoritative regardless of what the worker writes.
686
+ // The leader owns iteration tracking; the worker's value is informational
687
+ // only. State-consistency enforcement is a higher-layer concern (analytics
688
+ // post-mortem), not a contract-violation BLOCK trigger. We deliberately
689
+ // accept any integer here; iterationFloor parameter is retained in ctx for
690
+ // backwards compatibility with call sites but no longer gates this check.
691
+ }
692
+ if (parsed.signal_type !== undefined && expectedSignalType && parsed.signal_type !== expectedSignalType) {
693
+ throw new MalformedArtifactError('signal_type mismatch', {
694
+ field: 'signal_type',
695
+ expected: expectedSignalType,
696
+ got: parsed.signal_type,
697
+ raw: parsed,
698
+ });
699
+ }
700
+ if (parsed.us_id !== undefined && Array.isArray(allowedUsIds) && allowedUsIds.length > 0) {
701
+ if (!allowedUsIds.includes(parsed.us_id)) {
702
+ throw new MalformedArtifactError(
703
+ `us_id ${parsed.us_id} not in allowed set [${allowedUsIds.join(', ')}]`,
704
+ {
705
+ field: 'us_id',
706
+ expected: `one of [${allowedUsIds.join(', ')}]`,
707
+ got: parsed.us_id,
708
+ raw: parsed,
709
+ },
710
+ );
711
+ }
712
+ }
713
+ return parsed;
714
+ }
715
+
464
716
  async function writeSentinel(filePath, status, usId, reason, classification = null, paths = null) {
465
717
  // governance §1f BLOCKED Surfacing: BLOCKED is surfaced on FIVE channels —
466
718
  // sentinel (markdown + JSON sidecar), status, console (stderr), report,
467
719
  // and (US-020 R8 P1-H, 5th channel) memory.md/latest.md hygiene update.
468
720
  // Legacy 1-line parsers still work because line 1 is unchanged.
721
+ //
722
+ // v5.7 §4.24 — Write Order Contract REVERSED for first-writer-wins:
723
+ // 1. markdown sentinel FIRST via writeSentinelExclusive (O_EXCL lock).
724
+ // Whoever wins this is the canonical writer for this campaign exit.
725
+ // 2. JSON sidecar SECOND, only if we won the md write.
726
+ // Invariant: md exists ⇒ JSON exists (within ≤50ms; watchers retry).
727
+ // If two paths race to write blocked.md/complete.md, exactly ONE wins;
728
+ // the loser sees `wrote=false, reason=already_exists` and returns silently
729
+ // (the campaign is already classified). Cross-path category collisions
730
+ // resolve by first-fired timestamp (existing return-on-first-error pattern).
469
731
  const lines = [`${status.toUpperCase()}: ${usId}`];
470
732
  if (reason) lines.push(`Reason: ${reason}`);
471
733
  if (classification?.reason_category) {
472
734
  lines.push(`Category: ${classification.reason_category}`);
473
735
  }
736
+ const mdBody = `${lines.join('\n')}\n`;
737
+
738
+ const result = await writeSentinelExclusive(filePath, mdBody);
739
+ if (!result.wrote) {
740
+ // Another path already wrote the sentinel for this campaign. Idempotent
741
+ // no-op — we are NOT the canonical writer; do not overwrite the JSON
742
+ // sidecar either or we'll desynchronize from the winning md.
743
+ return result;
744
+ }
474
745
 
475
- // P1-D Write Order Contract:
476
- // 1. JSON sidecar FIRST (atomic per-file rename via writeFile).
477
- // 2. markdown sentinel SECOND.
478
- // Invariant: markdown exists ⇒ JSON exists. Wrappers watch markdown,
479
- // then read JSON; if JSON not yet visible (rare race), retry up to 5×50ms.
480
746
  if (status === 'blocked' && classification) {
481
747
  const jsonPath = filePath.replace(/\.md$/, '.json');
482
748
  let hygieneViolated = false;
@@ -503,7 +769,7 @@ async function writeSentinel(filePath, status, usId, reason, classification = nu
503
769
  await fs.writeFile(jsonPath, `${JSON.stringify(jsonBody, null, 2)}\n`, 'utf8');
504
770
  }
505
771
 
506
- await fs.writeFile(filePath, `${lines.join('\n')}\n`, 'utf8');
772
+ return result;
507
773
  }
508
774
 
509
775
  async function runFinalSequentialVerify({
@@ -514,6 +780,7 @@ async function runFinalSequentialVerify({
514
780
  verifierPaneId,
515
781
  pollForSignal,
516
782
  runIntegrationCheck,
783
+ iterTimeoutMs,
517
784
  }) {
518
785
  const verifierModel = state.final_verifier_model;
519
786
 
@@ -532,6 +799,7 @@ async function runFinalSequentialVerify({
532
799
  const verdict = await pollForSignal(paths.verdictFile, {
533
800
  mode: parseModelFlag(verifierModel, 'verifier').engine,
534
801
  paneId: verifierPaneId,
802
+ timeoutMs: iterTimeoutMs,
535
803
  });
536
804
 
537
805
  if (verdict.verdict !== 'pass') {
@@ -568,8 +836,31 @@ async function runFinalSequentialVerify({
568
836
  };
569
837
  }
570
838
 
571
- function buildFlywheelTriggerCmd({ flywheelPromptFile, flywheelModel, rootDir }) {
572
- return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 claude --model ${flywheelModel} --no-mcp -p "$(cat ${JSON.stringify(flywheelPromptFile)})"`;
839
+ // v5.7 §4.11.a (refactored per code-review HIGH): single source-of-truth for
840
+ // the home rlp-desk dir and the autonomous claude command shape. Was duplicated
841
+ // across buildFlywheelTriggerCmd/buildGuardTriggerCmd byte-for-byte.
842
+ const HOME_DESK_DIR = path.join(os.homedir(), '.claude', 'ralph-desk');
843
+
844
+ function buildAutonomousClaudeCmd({ promptFile, model, rootDir, homeDeskDir = HOME_DESK_DIR }) {
845
+ // §4.9: ANTHROPIC_BETA prefix for Opus 1M context.
846
+ const betaPrefix = isOpusModel(model)
847
+ ? `ANTHROPIC_BETA=${shellQuote(OPUS_1M_BETA)} `
848
+ : '';
849
+ // §4.11.a: --add-dir whitelist (home rlp-desk + campaign cwd) for true autonomy.
850
+ const addDirParts = [];
851
+ if (homeDeskDir) addDirParts.push(`--add-dir ${shellQuote(homeDeskDir)}`);
852
+ if (rootDir) addDirParts.push(`--add-dir ${shellQuote(rootDir)}`);
853
+ const addDir = addDirParts.length ? ' ' + addDirParts.join(' ') : '';
854
+ return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 ${betaPrefix}claude --model ${shellQuote(model)} --no-mcp${addDir} -p "$(cat ${JSON.stringify(promptFile)})"`;
855
+ }
856
+
857
+ // Thin wrappers retained for call-site clarity + possible per-role customization.
858
+ function buildFlywheelTriggerCmd({ flywheelPromptFile, flywheelModel, rootDir, homeDeskDir }) {
859
+ return buildAutonomousClaudeCmd({ promptFile: flywheelPromptFile, model: flywheelModel, rootDir, homeDeskDir });
860
+ }
861
+
862
+ function buildGuardTriggerCmd({ guardPromptFile, guardModel, rootDir, homeDeskDir }) {
863
+ return buildAutonomousClaudeCmd({ promptFile: guardPromptFile, model: guardModel, rootDir, homeDeskDir });
573
864
  }
574
865
 
575
866
  async function dispatchFlywheel({ paths, sendKeys, flywheelPaneId, flywheelModel, rootDir }) {
@@ -581,10 +872,6 @@ async function dispatchFlywheel({ paths, sendKeys, flywheelPaneId, flywheelModel
581
872
  await sendKeys(flywheelPaneId, triggerCmd);
582
873
  }
583
874
 
584
- function buildGuardTriggerCmd({ guardPromptFile, guardModel, rootDir }) {
585
- return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 claude --model ${guardModel} --no-mcp -p "$(cat ${JSON.stringify(guardPromptFile)})"`;
586
- }
587
-
588
875
  async function dispatchGuard({ paths, sendKeys, guardPaneId, guardModel, rootDir }) {
589
876
  const triggerCmd = buildGuardTriggerCmd({
590
877
  guardPromptFile: paths.flywheelGuardPromptFile,
@@ -610,12 +897,83 @@ export function shouldRunGuard(flywheelGuard, state, usId) {
610
897
  export async function run(slug, options = {}) {
611
898
  const rootDir = path.resolve(options.rootDir ?? process.cwd());
612
899
  const paths = buildPaths(rootDir, slug);
900
+ // v5.7 §4.24 §1g — runtime invariant: every terminal exit of run() MUST
901
+ // leave exactly one sentinel on disk (blocked.md XOR complete.md). The
902
+ // try/finally below is the last-resort backstop that writes a synthetic
903
+ // BLOCKED if the body throws or returns without a terminal sentinel.
904
+ // Idempotent via writeSentinelExclusive — a real BLOCKED already in place
905
+ // is not overwritten.
906
+ let runResult;
907
+ let runThrew;
908
+ try {
909
+ runResult = await _runCampaignBody(slug, options, paths, rootDir);
910
+ return runResult;
911
+ } catch (error) {
912
+ runThrew = error;
913
+ throw error;
914
+ } finally {
915
+ await _ensureTerminalSentinel({
916
+ paths,
917
+ slug,
918
+ result: runResult,
919
+ threwError: runThrew,
920
+ });
921
+ }
922
+ }
923
+
924
+ async function _ensureTerminalSentinel({ paths, slug, result, threwError }) {
925
+ // 'continue' is paused, not terminal. Real terminal: 'blocked' or 'complete'.
926
+ // If neither sentinel exists at exit, leader exited unexpectedly. Write
927
+ // synthetic BLOCKED `infra_failure/leader_exited_without_terminal_state`.
928
+ if (result && result.status === 'continue') {
929
+ return;
930
+ }
931
+ let blockedExists = false;
932
+ let completeExists = false;
933
+ try { blockedExists = await exists(paths.blockedSentinel); } catch {}
934
+ try { completeExists = await exists(paths.completeSentinel); } catch {}
935
+ if (blockedExists || completeExists) {
936
+ return;
937
+ }
938
+ const reason = threwError
939
+ ? `Leader exited unexpectedly (no terminal sentinel): ${threwError?.message ?? threwError}`
940
+ : 'Leader exited without writing terminal sentinel';
941
+ const classification = {
942
+ slug,
943
+ iteration: 0,
944
+ reason_category: 'infra_failure',
945
+ failure_category: 'leader_exited_without_terminal_state',
946
+ recoverable: false,
947
+ suggested_action: 'investigate_leader_logs',
948
+ };
949
+ try {
950
+ await writeSentinel(
951
+ paths.blockedSentinel,
952
+ 'blocked',
953
+ 'ALL',
954
+ reason,
955
+ classification,
956
+ paths,
957
+ );
958
+ } catch (sentinelError) {
959
+ // Best-effort. If even the backstop write fails, log to stderr so the
960
+ // operator has SOME signal. Do NOT swallow the original error.
961
+ console.error('[run] failed to write backstop BLOCKED sentinel:', sentinelError);
962
+ }
963
+ }
964
+
965
+ async function _runCampaignBody(slug, options, paths, rootDir) {
613
966
  const sendKeys = options.sendKeys ?? defaultSendKeys;
614
967
  const createPane = options.createPane ?? defaultCreatePane;
615
968
  const createSession = options.createSession ?? defaultCreateSession;
616
969
  const pollForSignal = options.pollForSignal ?? defaultPollForSignal;
617
970
  const runIntegrationCheck = options.runIntegrationCheck ?? (async () => ({ exitCode: 0, summary: 'integration skipped' }));
618
971
  const maxIterations = options.maxIterations ?? 100;
972
+ // v5.7 §4.19: campaign-level pollForSignal timeout (Node leader fix).
973
+ // The CLI parses --iter-timeout but never forwarded it to pollForSignal,
974
+ // so every campaign hit the 5s signal-poller default and exited
975
+ // immediately. Default 600s (10 min) per CLI documentation; convert to ms.
976
+ const iterTimeoutMs = ((options.iterTimeout ?? 600) * 1000);
619
977
 
620
978
  await ensureDirs(paths);
621
979
  await ensureScaffold(paths);
@@ -710,15 +1068,26 @@ export async function run(slug, options = {}) {
710
1068
 
711
1069
  state.current_us = getNextUs(usList, state.verified_us, state.current_us);
712
1070
  if (state.current_us === 'ALL') {
713
- const finalResult = await runFinalSequentialVerify({
714
- paths,
715
- state,
716
- usList,
717
- sendKeys,
718
- verifierPaneId: state.verifier_pane_id,
719
- pollForSignal,
720
- runIntegrationCheck,
721
- });
1071
+ let finalResult;
1072
+ try {
1073
+ finalResult = await runFinalSequentialVerify({
1074
+ paths,
1075
+ state,
1076
+ usList,
1077
+ sendKeys,
1078
+ verifierPaneId: state.verifier_pane_id,
1079
+ pollForSignal,
1080
+ runIntegrationCheck,
1081
+ iterTimeoutMs,
1082
+ });
1083
+ } catch (error) {
1084
+ // v5.7 §4.25 — uniform poll-failure handling for final verifier.
1085
+ return _handlePollFailure(error, {
1086
+ paths, state, slug, options,
1087
+ role: 'final_verifier',
1088
+ usIdOverride: 'ALL',
1089
+ });
1090
+ }
722
1091
 
723
1092
  if (finalResult.status === 'complete') {
724
1093
  state.phase = 'complete';
@@ -781,10 +1150,25 @@ export async function run(slug, options = {}) {
781
1150
  rootDir,
782
1151
  });
783
1152
 
784
- const flywheelSignal = await pollForSignal(paths.flywheelSignalFile, {
785
- mode: 'claude',
786
- paneId: state.flywheel_pane_id ?? state.verifier_pane_id,
787
- });
1153
+ let flywheelSignal;
1154
+ try {
1155
+ flywheelSignal = await pollForSignal(paths.flywheelSignalFile, {
1156
+ mode: 'claude',
1157
+ paneId: state.flywheel_pane_id ?? state.verifier_pane_id,
1158
+ timeoutMs: iterTimeoutMs,
1159
+ });
1160
+ validateArtifact(flywheelSignal, {
1161
+ expectedSlug: slug,
1162
+ iterationFloor: state.iteration,
1163
+ expectedSignalType: 'flywheel_signal',
1164
+ allowedUsIds: [...usList, 'ALL'],
1165
+ });
1166
+ } catch (error) {
1167
+ return _handlePollFailure(error, {
1168
+ paths, state, slug, options,
1169
+ role: 'flywheel',
1170
+ });
1171
+ }
788
1172
 
789
1173
  state.last_flywheel_decision = flywheelSignal.decision;
790
1174
  // P0-A multi-mission orchestration: optionally captured from flywheel signal.
@@ -804,10 +1188,25 @@ export async function run(slug, options = {}) {
804
1188
 
805
1189
  await dispatchGuard({ paths, sendKeys, guardPaneId, guardModel, rootDir });
806
1190
 
807
- const guardVerdict = await pollForSignal(paths.flywheelGuardVerdictFile, {
808
- mode: 'claude',
809
- paneId: guardPaneId,
810
- });
1191
+ let guardVerdict;
1192
+ try {
1193
+ guardVerdict = await pollForSignal(paths.flywheelGuardVerdictFile, {
1194
+ mode: 'claude',
1195
+ paneId: guardPaneId,
1196
+ timeoutMs: iterTimeoutMs,
1197
+ });
1198
+ validateArtifact(guardVerdict, {
1199
+ expectedSlug: slug,
1200
+ iterationFloor: state.iteration,
1201
+ expectedSignalType: 'flywheel_guard_verdict',
1202
+ allowedUsIds: [...usList, 'ALL'],
1203
+ });
1204
+ } catch (error) {
1205
+ return _handlePollFailure(error, {
1206
+ paths, state, slug, options,
1207
+ role: 'guard',
1208
+ });
1209
+ }
811
1210
 
812
1211
  if (!state.flywheel_guard_count[state.current_us]) {
813
1212
  state.flywheel_guard_count[state.current_us] = 0;
@@ -911,9 +1310,18 @@ export async function run(slug, options = {}) {
911
1310
  signal = await pollForSignal(paths.signalFile, {
912
1311
  mode: parseModelFlag(state.worker_model).engine,
913
1312
  paneId: state.worker_pane_id,
1313
+ timeoutMs: iterTimeoutMs,
1314
+ });
1315
+ validateArtifact(signal, {
1316
+ expectedSlug: slug,
1317
+ iterationFloor: state.iteration,
1318
+ expectedSignalType: 'signal',
1319
+ allowedUsIds: [...usList, 'ALL'],
914
1320
  });
915
1321
  } catch (error) {
916
1322
  if (error instanceof TimeoutError && parseModelFlag(state.worker_model).engine === 'codex') {
1323
+ // v5.7 — codex CLI exits cleanly after writing signal; if pollForSignal
1324
+ // timed out for codex, synthesize a verify signal so the loop continues.
917
1325
  signal = {
918
1326
  iteration: state.iteration,
919
1327
  status: 'verify',
@@ -921,7 +1329,12 @@ export async function run(slug, options = {}) {
921
1329
  summary: 'auto-generated after codex exit fallback',
922
1330
  };
923
1331
  } else {
924
- throw error;
1332
+ // v5.7 §4.25 — uniform handling for WorkerExitedError, PromptBlockedError,
1333
+ // MalformedArtifactError, TimeoutError, and unknown errors.
1334
+ return _handlePollFailure(error, {
1335
+ paths, state, slug, options,
1336
+ role: 'worker',
1337
+ });
925
1338
  }
926
1339
  }
927
1340
 
@@ -959,10 +1372,26 @@ export async function run(slug, options = {}) {
959
1372
  verifierModel,
960
1373
  });
961
1374
 
962
- const verdict = await pollForSignal(paths.verdictFile, {
963
- mode: parseModelFlag(verifierModel, 'verifier').engine,
964
- paneId: state.verifier_pane_id,
965
- });
1375
+ let verdict;
1376
+ try {
1377
+ verdict = await pollForSignal(paths.verdictFile, {
1378
+ mode: parseModelFlag(verifierModel, 'verifier').engine,
1379
+ paneId: state.verifier_pane_id,
1380
+ timeoutMs: iterTimeoutMs,
1381
+ });
1382
+ validateArtifact(verdict, {
1383
+ expectedSlug: slug,
1384
+ iterationFloor: state.iteration,
1385
+ expectedSignalType: 'verdict',
1386
+ allowedUsIds: [...usList, 'ALL'],
1387
+ });
1388
+ } catch (error) {
1389
+ return _handlePollFailure(error, {
1390
+ paths, state, slug, options,
1391
+ role: 'verifier',
1392
+ usIdOverride: usId,
1393
+ });
1394
+ }
966
1395
 
967
1396
  if (verdict.verdict === 'pass') {
968
1397
  state.consecutive_failures = 0;