@ai-dev-methodologies/rlp-desk 0.11.1 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/docs/plans/spicy-booping-galaxy.md +322 -0
  2. package/docs/rlp-desk/artifact-schema.md +99 -0
  3. package/docs/rlp-desk/ci-setup.md +100 -0
  4. package/docs/rlp-desk/e2e-scenarios.md +102 -0
  5. package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +730 -0
  6. package/install.sh +93 -20
  7. package/package.json +9 -3
  8. package/scripts/build-node-manifest.js +52 -0
  9. package/scripts/postinstall.js +162 -8
  10. package/src/commands/rlp-desk.md +73 -50
  11. package/src/governance.md +56 -7
  12. package/src/node/MANIFEST.txt +15 -0
  13. package/src/node/cli/command-builder.mjs +43 -5
  14. package/src/node/constants.mjs +19 -0
  15. package/src/node/init/campaign-initializer.mjs +100 -10
  16. package/src/node/polling/signal-poller.mjs +139 -3
  17. package/src/node/reporting/campaign-reporting.mjs +5 -1
  18. package/src/node/run.mjs +31 -2
  19. package/src/node/runner/campaign-main-loop.mjs +521 -44
  20. package/src/node/runner/leader-registry.mjs +100 -0
  21. package/src/node/runner/prompt-detector.mjs +41 -0
  22. package/src/node/runner/prompt-dismisser.mjs +200 -0
  23. package/src/node/shared/fs.mjs +38 -0
  24. package/src/node/util/debug-log.mjs +56 -0
  25. package/src/node/util/desk-root.mjs +24 -0
  26. package/src/node/util/shell-quote.mjs +12 -0
  27. package/docs/superpowers/plans/2026-04-24-gpt-5-5-default.md +0 -517
  28. package/docs/superpowers/specs/2026-04-24-gpt-5-5-default.md +0 -107
  29. /package/docs/{TODO-verification-next.md → rlp-desk/TODO-verification-next.md} +0 -0
  30. /package/docs/{architecture.md → rlp-desk/architecture.md} +0 -0
  31. /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-flywheel-enhancement.md +0 -0
  32. /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-pivot-step.md +0 -0
  33. /package/docs/{blueprints → rlp-desk/blueprints}/plan-flywheel-enhancement.md +0 -0
  34. /package/docs/{blueprints → rlp-desk/blueprints}/sv-architecture-rethink.md +0 -0
  35. /package/docs/{getting-started.md → rlp-desk/getting-started.md} +0 -0
  36. /package/docs/{internal → rlp-desk/internal}/verification-policy-gap-analysis.md +0 -0
  37. /package/docs/{internal → rlp-desk/internal}/verification-strategy-research.md +0 -0
  38. /package/docs/{multi-mission-orchestration.md → rlp-desk/multi-mission-orchestration.md} +0 -0
  39. /package/docs/{plans → rlp-desk/plans}/cozy-gliding-trinket.md +0 -0
  40. /package/docs/{plans → rlp-desk/plans}/frolicking-churning-honey.md +0 -0
  41. /package/docs/{plans → rlp-desk/plans}/keen-sauteeing-snowflake.md +0 -0
  42. /package/docs/{plans → rlp-desk/plans}/mutable-booping-corbato.md +0 -0
  43. /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11-handoff-7fixes.md +0 -0
  44. /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11.1-tmux-pane-disappearance.md +0 -0
  45. /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -0
  46. /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert.md +0 -0
  47. /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -0
  48. /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie.md +0 -0
  49. /package/docs/{plans → rlp-desk/plans}/validated-snacking-crayon.md +0 -0
  50. /package/docs/{protocol-reference.md → rlp-desk/protocol-reference.md} +0 -0
@@ -1,12 +1,22 @@
1
1
  import fs from 'node:fs/promises';
2
+ import fsSync from 'node:fs';
2
3
  import os from 'node:os';
3
4
  import path from 'node:path';
4
5
  import { execFile } from 'node:child_process';
5
6
  import { promisify } from 'node:util';
6
7
 
7
8
  import { buildClaudeCmd, buildCodexCmd, parseModelFlag } from '../cli/command-builder.mjs';
9
+ import { shellQuote } from '../util/shell-quote.mjs';
10
+ import { OPUS_1M_BETA, isOpusModel } from '../constants.mjs';
8
11
  import { initCampaign } from '../init/campaign-initializer.mjs';
9
- import { TimeoutError, pollForSignal as defaultPollForSignal } from '../polling/signal-poller.mjs';
12
+ import { LEGACY_DESK_REL, resolveDeskRoot } from '../util/desk-root.mjs';
13
+ import { writeSentinelExclusive } from '../shared/fs.mjs';
14
+ import {
15
+ TimeoutError,
16
+ WorkerExitedError,
17
+ PromptBlockedError,
18
+ pollForSignal as defaultPollForSignal,
19
+ } from '../polling/signal-poller.mjs';
10
20
  import {
11
21
  assembleVerifierPrompt,
12
22
  assembleWorkerPrompt,
@@ -33,8 +43,23 @@ const MODEL_UPGRADES = {
33
43
  'gpt-5.3-codex-spark:xhigh': 'BLOCKED',
34
44
  };
35
45
 
36
- function buildPaths(rootDir, slug) {
37
- const deskRoot = path.join(rootDir, '.claude', 'ralph-desk');
46
+ // v0.13.0: legacy .claude/ralph-desk/ guidance for run mode (no auto-mv).
47
+ export function detectLegacyDeskInRunMode(rootDir, env = process.env) {
48
+ const legacyPath = path.join(rootDir, LEGACY_DESK_REL);
49
+ if (!fsSync.existsSync(legacyPath)) {
50
+ return null;
51
+ }
52
+
53
+ const newPath = resolveDeskRoot(rootDir, env);
54
+ const newRel = path.relative(rootDir, newPath) || path.basename(newPath);
55
+ const message =
56
+ `Legacy ${LEGACY_DESK_REL}/ detected. Run mode does not auto-migrate to protect in-flight campaigns. ` +
57
+ `Run: mv ${LEGACY_DESK_REL} ${newRel} then re-run.`;
58
+ return { legacyPath, newPath, message };
59
+ }
60
+
61
+ function buildPaths(rootDir, slug, env = process.env) {
62
+ const deskRoot = resolveDeskRoot(rootDir, env);
38
63
  const campaignLogDir = path.join(deskRoot, 'logs', slug);
39
64
 
40
65
  return {
@@ -57,7 +82,11 @@ function buildPaths(rootDir, slug) {
57
82
  prdFile: path.join(deskRoot, 'plans', `prd-${slug}.md`),
58
83
  testSpecFile: path.join(deskRoot, 'plans', `test-spec-${slug}.md`),
59
84
  analyticsFile: path.join(campaignLogDir, 'campaign.jsonl'),
60
- analyticsDir: path.join(os.homedir(), '.claude', 'ralph-desk', 'analytics', slug),
85
+ // v5.7 §4.11.b: project-local analytics so Worker/Verifier prompts that
86
+ // reference this path stay inside cwd-tree (no `--add-dir` whitelist needed
87
+ // for cross-cwd writes). Cross-project rollup uses ~/.claude/ralph-desk/registry.jsonl
88
+ // (Leader-only, never appears in Worker prompts) — see §4.11.c.
89
+ analyticsDir: path.join(deskRoot, 'analytics', slug),
61
90
  reportFile: path.join(campaignLogDir, 'campaign-report.md'),
62
91
  statusFile: path.join(campaignLogDir, 'runtime', 'status.json'),
63
92
  flywheelPromptFile: path.join(deskRoot, 'prompts', `${slug}.flywheel.prompt.md`),
@@ -418,7 +447,41 @@ async function _checkLaneViolations(paths, snapshotBefore, snapshotAfter, state,
418
447
  // the zsh helper _classify_cross_us_or_metric in lib_ralph_desk.zsh.
419
448
  const CROSS_US_TOKEN_RE = /depends on US-|blocking US-|awaits US-|post-iter US-|requires US-\d+|cross-US|US-\d+ 산출물|신규 US-|post-iter/i;
420
449
 
421
- // P1-D Failure Taxonomy classifier. governance §1f locks the 6 reason_category
450
+ // v5.7 §4.25 typed enum for _classifyBlock tags. Replaces ad-hoc string
451
+ // literals scattered across writeSentinel call sites. Typo-safe via Object.freeze.
452
+ export const BLOCK_TAGS = Object.freeze({
453
+ // Verdict-driven (Verifier 'fail')
454
+ VERIFIER: 'verifier',
455
+ // Flywheel/Guard verdicts
456
+ FLYWHEEL_INCONCLUSIVE: 'flywheel_inconclusive',
457
+ FLYWHEEL_EXHAUSTED: 'flywheel_exhausted',
458
+ // Model upgrade chain exhausted
459
+ MODEL_UPGRADE: 'model_upgrade',
460
+ // Worker/Verifier/Flywheel/Guard pane exited without artifacts (file-guarantee)
461
+ WORKER_EXITED: 'worker_exited_without_artifacts',
462
+ VERIFIER_EXITED: 'verifier_exited_without_artifacts',
463
+ FINAL_VERIFIER_EXITED: 'final_verifier_exited_without_artifacts',
464
+ FLYWHEEL_EXITED: 'flywheel_pane_exited_without_artifacts',
465
+ GUARD_EXITED: 'guard_pane_exited_without_artifacts',
466
+ // Auto-Enter unsafe (default-No prompt)
467
+ PROMPT_BLOCKED: 'prompt_blocked',
468
+ // v0.13.0: Claude Code self-modification permission prompt (cannot be
469
+ // dismissed by --dangerously-skip-permissions). Surfaced separately so
470
+ // wrappers know to switch worker engine, not retry.
471
+ PERMISSION_PROMPT: 'permission_prompt',
472
+ // Persistent timeout without exit (different from EXITED)
473
+ WORKER_TIMEOUT: 'worker_timeout',
474
+ VERIFIER_TIMEOUT: 'verifier_timeout',
475
+ FINAL_VERIFIER_TIMEOUT: 'final_verifier_timeout',
476
+ FLYWHEEL_TIMEOUT: 'flywheel_timeout',
477
+ GUARD_TIMEOUT: 'guard_timeout',
478
+ // Schema validator (P1)
479
+ MALFORMED_ARTIFACT: 'malformed_artifact',
480
+ // Backstop (run() try/finally)
481
+ LEADER_EXITED_WITHOUT_TERMINAL_STATE: 'leader_exited_without_terminal_state',
482
+ });
483
+
484
+ // P1-D Failure Taxonomy classifier. governance §1f locks the reason_category
422
485
  // values + recoverable + suggested_action defaults per source. wrapper MUST
423
486
  // branch on reason_category; failure_category is diagnostic only.
424
487
  function _classifyBlock(source, { verdict, state, slug } = {}) {
@@ -427,18 +490,18 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
427
490
  let action;
428
491
  let failureCategory = null;
429
492
  switch (source) {
430
- case 'flywheel_inconclusive':
431
- case 'flywheel_exhausted':
493
+ case BLOCK_TAGS.FLYWHEEL_INCONCLUSIVE:
494
+ case BLOCK_TAGS.FLYWHEEL_EXHAUSTED:
432
495
  category = 'mission_abort';
433
496
  recoverable = false;
434
497
  action = 'terminal_alert';
435
498
  break;
436
- case 'model_upgrade':
499
+ case BLOCK_TAGS.MODEL_UPGRADE:
437
500
  category = 'repeat_axis';
438
501
  recoverable = false;
439
502
  action = 'next_mission_chain';
440
503
  break;
441
- case 'verifier': {
504
+ case BLOCK_TAGS.VERIFIER: {
442
505
  const text = `${verdict?.reason ?? ''} ${verdict?.summary ?? ''}`;
443
506
  category = CROSS_US_TOKEN_RE.test(text) ? 'cross_us_dep' : 'metric_failure';
444
507
  recoverable = true;
@@ -446,6 +509,61 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
446
509
  failureCategory = verdict?.failure_category ?? null;
447
510
  break;
448
511
  }
512
+ // v5.7 §4.22 §4.24 — pane-exit-without-artifacts variants. All
513
+ // infra_failure, not recoverable (Worker/Verifier/Flywheel/Guard pane
514
+ // process is gone; campaign cannot proceed). failure_category preserved
515
+ // for telemetry.
516
+ case BLOCK_TAGS.WORKER_EXITED:
517
+ case BLOCK_TAGS.VERIFIER_EXITED:
518
+ case BLOCK_TAGS.FINAL_VERIFIER_EXITED:
519
+ case BLOCK_TAGS.FLYWHEEL_EXITED:
520
+ case BLOCK_TAGS.GUARD_EXITED:
521
+ category = 'infra_failure';
522
+ recoverable = false;
523
+ action = 'investigate_pane_logs';
524
+ failureCategory = source;
525
+ break;
526
+ // v5.7 §4.17 — auto-Enter on default-No would CANCEL; refuse and BLOCK.
527
+ case BLOCK_TAGS.PROMPT_BLOCKED:
528
+ category = 'infra_failure';
529
+ recoverable = false;
530
+ action = 'manual_prompt_response';
531
+ failureCategory = 'prompt_blocked';
532
+ break;
533
+ // v0.13.0: Claude Code self-modification gate — switch worker engine.
534
+ case BLOCK_TAGS.PERMISSION_PROMPT:
535
+ category = 'infra_failure';
536
+ recoverable = false;
537
+ action = 'switch_worker_to_codex_or_use_agent_mode';
538
+ failureCategory = 'permission_prompt';
539
+ break;
540
+ // Persistent timeout (no exit detected) — different from EXITED.
541
+ case BLOCK_TAGS.WORKER_TIMEOUT:
542
+ case BLOCK_TAGS.VERIFIER_TIMEOUT:
543
+ case BLOCK_TAGS.FINAL_VERIFIER_TIMEOUT:
544
+ case BLOCK_TAGS.FLYWHEEL_TIMEOUT:
545
+ case BLOCK_TAGS.GUARD_TIMEOUT:
546
+ category = 'infra_failure';
547
+ recoverable = false;
548
+ action = 'increase_iter_timeout_or_investigate';
549
+ failureCategory = source;
550
+ break;
551
+ // v5.7 §4.25 P1 — schema validator caught a malformed/incoherent artifact.
552
+ // Recoverable: next iteration's Worker prompt can include the schema
553
+ // error (P2 feedback loop closure) and try again.
554
+ case BLOCK_TAGS.MALFORMED_ARTIFACT:
555
+ category = 'contract_violation';
556
+ recoverable = true;
557
+ action = 'retry_with_schema_feedback';
558
+ failureCategory = 'malformed_artifact';
559
+ break;
560
+ // Backstop: run() exited without terminal sentinel.
561
+ case BLOCK_TAGS.LEADER_EXITED_WITHOUT_TERMINAL_STATE:
562
+ category = 'infra_failure';
563
+ recoverable = false;
564
+ action = 'investigate_leader_logs';
565
+ failureCategory = 'leader_exited_without_terminal_state';
566
+ break;
449
567
  default:
450
568
  category = 'metric_failure';
451
569
  recoverable = false;
@@ -461,22 +579,206 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
461
579
  };
462
580
  }
463
581
 
582
+ // v5.7 §4.25 — uniform poll-failure → BLOCKED handler, used by every
583
+ // `pollForSignal` call site (Worker, VerifierPerUS, VerifierFinal, Flywheel,
584
+ // Guard). Mirrors the canonical Worker pattern previously inlined at line
585
+ // ~1037-1110. Idempotent via writeSentinelExclusive (first-writer-wins).
586
+ //
587
+ // Returns the early-exit object the call site should `return` to its
588
+ // orchestrator. Callers MUST `return` it (not throw), so the run() loop
589
+ // terminates cleanly with phase=blocked.
590
+ async function _handlePollFailure(error, ctx) {
591
+ const {
592
+ paths,
593
+ state,
594
+ slug,
595
+ options,
596
+ role, // 'worker' | 'verifier' | 'final_verifier' | 'flywheel' | 'guard'
597
+ usIdOverride,
598
+ } = ctx;
599
+ const usId = usIdOverride ?? state.current_us;
600
+
601
+ let tag;
602
+ let reason;
603
+ if (error instanceof WorkerExitedError) {
604
+ tag = ({
605
+ worker: BLOCK_TAGS.WORKER_EXITED,
606
+ verifier: BLOCK_TAGS.VERIFIER_EXITED,
607
+ final_verifier: BLOCK_TAGS.FINAL_VERIFIER_EXITED,
608
+ flywheel: BLOCK_TAGS.FLYWHEEL_EXITED,
609
+ guard: BLOCK_TAGS.GUARD_EXITED,
610
+ })[role] ?? BLOCK_TAGS.WORKER_EXITED;
611
+ reason = `${error.reason ?? 'pane exited without artifacts'}: ${error.message}`;
612
+ } else if (error instanceof PromptBlockedError) {
613
+ // v0.13.0: error.category is set by signal-poller when Claude Code
614
+ // self-modification prompt is detected. Distinct tag drives a different
615
+ // failure_category + suggested_action than the default-No prompt path.
616
+ if (error.category === 'permission_prompt') {
617
+ tag = BLOCK_TAGS.PERMISSION_PROMPT;
618
+ reason = `${error.reason ?? 'permission prompt'}: ${error.message}`;
619
+ } else {
620
+ tag = BLOCK_TAGS.PROMPT_BLOCKED;
621
+ reason = `${error.reason ?? 'default-No prompt'}: ${error.message}`;
622
+ }
623
+ } else if (error instanceof MalformedArtifactError) {
624
+ tag = BLOCK_TAGS.MALFORMED_ARTIFACT;
625
+ reason = `Malformed artifact at ${error.field}: expected ${error.expected}, got ${error.got}`;
626
+ } else if (error instanceof TimeoutError) {
627
+ tag = ({
628
+ worker: BLOCK_TAGS.WORKER_TIMEOUT,
629
+ verifier: BLOCK_TAGS.VERIFIER_TIMEOUT,
630
+ final_verifier: BLOCK_TAGS.FINAL_VERIFIER_TIMEOUT,
631
+ flywheel: BLOCK_TAGS.FLYWHEEL_TIMEOUT,
632
+ guard: BLOCK_TAGS.GUARD_TIMEOUT,
633
+ })[role] ?? BLOCK_TAGS.WORKER_TIMEOUT;
634
+ reason = `${role} pollForSignal timed out: ${error.message}`;
635
+ } else {
636
+ // Unknown error — treat as infra_failure so backstop doesn't have to
637
+ // synthesize. Re-throw after writing so caller's outer try/finally
638
+ // (run() backstop) sees something but doesn't double-write.
639
+ tag = BLOCK_TAGS.LEADER_EXITED_WITHOUT_TERMINAL_STATE;
640
+ reason = `Unexpected error in ${role} poll: ${error?.message ?? error}`;
641
+ }
642
+
643
+ state.phase = 'blocked';
644
+ const classification = _classifyBlock(tag, { state, slug });
645
+ await writeSentinel(paths.blockedSentinel, 'blocked', usId, reason, classification, paths);
646
+ await writeStatus(paths, state, options.onStatusChange, options.now);
647
+ await generateCampaignReport({
648
+ slug,
649
+ reportFile: paths.reportFile,
650
+ prdFile: paths.prdFile,
651
+ statusFile: paths.statusFile,
652
+ analyticsFile: paths.analyticsFile,
653
+ now: resolveNow(options.now),
654
+ blockedReason: reason,
655
+ blockedCategory: classification.reason_category,
656
+ });
657
+
658
+ return {
659
+ status: 'blocked',
660
+ usId,
661
+ reason,
662
+ category: classification.reason_category,
663
+ statusFile: paths.statusFile,
664
+ };
665
+ }
666
+
667
+ // v5.7 §4.25 P1 — schema validator. Throws MalformedArtifactError if the
668
+ // parsed artifact violates the contract. Caller catches via _handlePollFailure.
669
+ // Hooks AFTER pollForSignal returns parsed JSON, BEFORE state mutation.
670
+ //
671
+ // Validates:
672
+ // - slug matches campaign slug (or absent — backwards compat)
673
+ // - iteration is integer ≥ state.iteration_floor (worker may advance, never regress)
674
+ // - signal_type matches read context ('signal' | 'verdict' | 'flywheel_signal' | 'flywheel_guard_verdict')
675
+ // The signal_type field is OPTIONAL for backwards compat — existing artifacts
676
+ // don't include it. Future writers should.
677
+ // - us_id ∈ usList ∪ {'ALL'} (closed-set)
678
+ export class MalformedArtifactError extends Error {
679
+ constructor(message, info = {}) {
680
+ super(message);
681
+ this.name = 'MalformedArtifactError';
682
+ this.field = info.field ?? null;
683
+ this.expected = info.expected ?? null;
684
+ this.got = info.got ?? null;
685
+ this.raw = info.raw ?? null;
686
+ }
687
+ }
688
+
689
+ function validateArtifact(parsed, ctx) {
690
+ const { expectedSlug, expectedSignalType, allowedUsIds } = ctx;
691
+ if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
692
+ throw new MalformedArtifactError('Artifact is not a JSON object', {
693
+ field: '<root>',
694
+ expected: 'object',
695
+ got: Array.isArray(parsed) ? 'array' : typeof parsed,
696
+ raw: parsed,
697
+ });
698
+ }
699
+ if (parsed.slug !== undefined && expectedSlug && parsed.slug !== expectedSlug) {
700
+ throw new MalformedArtifactError('slug mismatch', {
701
+ field: 'slug',
702
+ expected: expectedSlug,
703
+ got: parsed.slug,
704
+ raw: parsed,
705
+ });
706
+ }
707
+ if (parsed.iteration !== undefined) {
708
+ if (!Number.isInteger(parsed.iteration)) {
709
+ throw new MalformedArtifactError('iteration must be integer', {
710
+ field: 'iteration',
711
+ expected: 'integer',
712
+ got: typeof parsed.iteration,
713
+ raw: parsed,
714
+ });
715
+ }
716
+ // v5.7 §4.25 P1 — iteration validation is STRUCTURAL ONLY (must be integer).
717
+ // Originally proposed as a strict lower bound (worker can never regress
718
+ // below state.iteration_floor), this caused false BLOCKs in real campaigns
719
+ // because (a) workers may carry over a previous iteration value across
720
+ // multiple iterations without updating the field, and (b) the leader's
721
+ // state.iteration is authoritative regardless of what the worker writes.
722
+ // The leader owns iteration tracking; the worker's value is informational
723
+ // only. State-consistency enforcement is a higher-layer concern (analytics
724
+ // post-mortem), not a contract-violation BLOCK trigger. We deliberately
725
+ // accept any integer here; iterationFloor parameter is retained in ctx for
726
+ // backwards compatibility with call sites but no longer gates this check.
727
+ }
728
+ if (parsed.signal_type !== undefined && expectedSignalType && parsed.signal_type !== expectedSignalType) {
729
+ throw new MalformedArtifactError('signal_type mismatch', {
730
+ field: 'signal_type',
731
+ expected: expectedSignalType,
732
+ got: parsed.signal_type,
733
+ raw: parsed,
734
+ });
735
+ }
736
+ if (parsed.us_id !== undefined && Array.isArray(allowedUsIds) && allowedUsIds.length > 0) {
737
+ if (!allowedUsIds.includes(parsed.us_id)) {
738
+ throw new MalformedArtifactError(
739
+ `us_id ${parsed.us_id} not in allowed set [${allowedUsIds.join(', ')}]`,
740
+ {
741
+ field: 'us_id',
742
+ expected: `one of [${allowedUsIds.join(', ')}]`,
743
+ got: parsed.us_id,
744
+ raw: parsed,
745
+ },
746
+ );
747
+ }
748
+ }
749
+ return parsed;
750
+ }
751
+
464
752
  async function writeSentinel(filePath, status, usId, reason, classification = null, paths = null) {
465
753
  // governance §1f BLOCKED Surfacing: BLOCKED is surfaced on FIVE channels —
466
754
  // sentinel (markdown + JSON sidecar), status, console (stderr), report,
467
755
  // and (US-020 R8 P1-H, 5th channel) memory.md/latest.md hygiene update.
468
756
  // Legacy 1-line parsers still work because line 1 is unchanged.
757
+ //
758
+ // v5.7 §4.24 — Write Order Contract REVERSED for first-writer-wins:
759
+ // 1. markdown sentinel FIRST via writeSentinelExclusive (O_EXCL lock).
760
+ // Whoever wins this is the canonical writer for this campaign exit.
761
+ // 2. JSON sidecar SECOND, only if we won the md write.
762
+ // Invariant: md exists ⇒ JSON exists (within ≤50ms; watchers retry).
763
+ // If two paths race to write blocked.md/complete.md, exactly ONE wins;
764
+ // the loser sees `wrote=false, reason=already_exists` and returns silently
765
+ // (the campaign is already classified). Cross-path category collisions
766
+ // resolve by first-fired timestamp (existing return-on-first-error pattern).
469
767
  const lines = [`${status.toUpperCase()}: ${usId}`];
470
768
  if (reason) lines.push(`Reason: ${reason}`);
471
769
  if (classification?.reason_category) {
472
770
  lines.push(`Category: ${classification.reason_category}`);
473
771
  }
772
+ const mdBody = `${lines.join('\n')}\n`;
773
+
774
+ const result = await writeSentinelExclusive(filePath, mdBody);
775
+ if (!result.wrote) {
776
+ // Another path already wrote the sentinel for this campaign. Idempotent
777
+ // no-op — we are NOT the canonical writer; do not overwrite the JSON
778
+ // sidecar either or we'll desynchronize from the winning md.
779
+ return result;
780
+ }
474
781
 
475
- // P1-D Write Order Contract:
476
- // 1. JSON sidecar FIRST (atomic per-file rename via writeFile).
477
- // 2. markdown sentinel SECOND.
478
- // Invariant: markdown exists ⇒ JSON exists. Wrappers watch markdown,
479
- // then read JSON; if JSON not yet visible (rare race), retry up to 5×50ms.
480
782
  if (status === 'blocked' && classification) {
481
783
  const jsonPath = filePath.replace(/\.md$/, '.json');
482
784
  let hygieneViolated = false;
@@ -503,7 +805,7 @@ async function writeSentinel(filePath, status, usId, reason, classification = nu
503
805
  await fs.writeFile(jsonPath, `${JSON.stringify(jsonBody, null, 2)}\n`, 'utf8');
504
806
  }
505
807
 
506
- await fs.writeFile(filePath, `${lines.join('\n')}\n`, 'utf8');
808
+ return result;
507
809
  }
508
810
 
509
811
  async function runFinalSequentialVerify({
@@ -514,6 +816,7 @@ async function runFinalSequentialVerify({
514
816
  verifierPaneId,
515
817
  pollForSignal,
516
818
  runIntegrationCheck,
819
+ iterTimeoutMs,
517
820
  }) {
518
821
  const verifierModel = state.final_verifier_model;
519
822
 
@@ -532,6 +835,7 @@ async function runFinalSequentialVerify({
532
835
  const verdict = await pollForSignal(paths.verdictFile, {
533
836
  mode: parseModelFlag(verifierModel, 'verifier').engine,
534
837
  paneId: verifierPaneId,
838
+ timeoutMs: iterTimeoutMs,
535
839
  });
536
840
 
537
841
  if (verdict.verdict !== 'pass') {
@@ -568,8 +872,31 @@ async function runFinalSequentialVerify({
568
872
  };
569
873
  }
570
874
 
571
- function buildFlywheelTriggerCmd({ flywheelPromptFile, flywheelModel, rootDir }) {
572
- return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 claude --model ${flywheelModel} --no-mcp -p "$(cat ${JSON.stringify(flywheelPromptFile)})"`;
875
+ // v5.7 §4.11.a (refactored per code-review HIGH): single source-of-truth for
876
+ // the home rlp-desk dir and the autonomous claude command shape. Was duplicated
877
+ // across buildFlywheelTriggerCmd/buildGuardTriggerCmd byte-for-byte.
878
+ const HOME_DESK_DIR = path.join(os.homedir(), '.claude', 'ralph-desk');
879
+
880
+ function buildAutonomousClaudeCmd({ promptFile, model, rootDir, homeDeskDir = HOME_DESK_DIR }) {
881
+ // §4.9: ANTHROPIC_BETA prefix for Opus 1M context.
882
+ const betaPrefix = isOpusModel(model)
883
+ ? `ANTHROPIC_BETA=${shellQuote(OPUS_1M_BETA)} `
884
+ : '';
885
+ // §4.11.a: --add-dir whitelist (home rlp-desk + campaign cwd) for true autonomy.
886
+ const addDirParts = [];
887
+ if (homeDeskDir) addDirParts.push(`--add-dir ${shellQuote(homeDeskDir)}`);
888
+ if (rootDir) addDirParts.push(`--add-dir ${shellQuote(rootDir)}`);
889
+ const addDir = addDirParts.length ? ' ' + addDirParts.join(' ') : '';
890
+ return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 ${betaPrefix}claude --model ${shellQuote(model)} --no-mcp${addDir} -p "$(cat ${JSON.stringify(promptFile)})"`;
891
+ }
892
+
893
+ // Thin wrappers retained for call-site clarity + possible per-role customization.
894
+ function buildFlywheelTriggerCmd({ flywheelPromptFile, flywheelModel, rootDir, homeDeskDir }) {
895
+ return buildAutonomousClaudeCmd({ promptFile: flywheelPromptFile, model: flywheelModel, rootDir, homeDeskDir });
896
+ }
897
+
898
+ function buildGuardTriggerCmd({ guardPromptFile, guardModel, rootDir, homeDeskDir }) {
899
+ return buildAutonomousClaudeCmd({ promptFile: guardPromptFile, model: guardModel, rootDir, homeDeskDir });
573
900
  }
574
901
 
575
902
  async function dispatchFlywheel({ paths, sendKeys, flywheelPaneId, flywheelModel, rootDir }) {
@@ -581,10 +908,6 @@ async function dispatchFlywheel({ paths, sendKeys, flywheelPaneId, flywheelModel
581
908
  await sendKeys(flywheelPaneId, triggerCmd);
582
909
  }
583
910
 
584
- function buildGuardTriggerCmd({ guardPromptFile, guardModel, rootDir }) {
585
- return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 claude --model ${guardModel} --no-mcp -p "$(cat ${JSON.stringify(guardPromptFile)})"`;
586
- }
587
-
588
911
  async function dispatchGuard({ paths, sendKeys, guardPaneId, guardModel, rootDir }) {
589
912
  const triggerCmd = buildGuardTriggerCmd({
590
913
  guardPromptFile: paths.flywheelGuardPromptFile,
@@ -609,13 +932,96 @@ export function shouldRunGuard(flywheelGuard, state, usId) {
609
932
 
610
933
  export async function run(slug, options = {}) {
611
934
  const rootDir = path.resolve(options.rootDir ?? process.cwd());
612
- const paths = buildPaths(rootDir, slug);
935
+ const env = options.env ?? process.env;
936
+
937
+ // v0.13.0: refuse to run when legacy .claude/ralph-desk/ is present.
938
+ // init mode auto-migrates; run mode protects in-flight campaigns and
939
+ // surfaces a clear manual command to the operator.
940
+ const legacy = detectLegacyDeskInRunMode(rootDir, env);
941
+ if (legacy) {
942
+ const err = new Error(legacy.message);
943
+ err.code = 'LEGACY_DESK_DETECTED';
944
+ throw err;
945
+ }
946
+
947
+ const paths = buildPaths(rootDir, slug, env);
948
+ // v5.7 §4.24 §1g — runtime invariant: every terminal exit of run() MUST
949
+ // leave exactly one sentinel on disk (blocked.md XOR complete.md). The
950
+ // try/finally below is the last-resort backstop that writes a synthetic
951
+ // BLOCKED if the body throws or returns without a terminal sentinel.
952
+ // Idempotent via writeSentinelExclusive — a real BLOCKED already in place
953
+ // is not overwritten.
954
+ let runResult;
955
+ let runThrew;
956
+ try {
957
+ runResult = await _runCampaignBody(slug, options, paths, rootDir);
958
+ return runResult;
959
+ } catch (error) {
960
+ runThrew = error;
961
+ throw error;
962
+ } finally {
963
+ await _ensureTerminalSentinel({
964
+ paths,
965
+ slug,
966
+ result: runResult,
967
+ threwError: runThrew,
968
+ });
969
+ }
970
+ }
971
+
972
+ async function _ensureTerminalSentinel({ paths, slug, result, threwError }) {
973
+ // 'continue' is paused, not terminal. Real terminal: 'blocked' or 'complete'.
974
+ // If neither sentinel exists at exit, leader exited unexpectedly. Write
975
+ // synthetic BLOCKED `infra_failure/leader_exited_without_terminal_state`.
976
+ if (result && result.status === 'continue') {
977
+ return;
978
+ }
979
+ let blockedExists = false;
980
+ let completeExists = false;
981
+ try { blockedExists = await exists(paths.blockedSentinel); } catch {}
982
+ try { completeExists = await exists(paths.completeSentinel); } catch {}
983
+ if (blockedExists || completeExists) {
984
+ return;
985
+ }
986
+ const reason = threwError
987
+ ? `Leader exited unexpectedly (no terminal sentinel): ${threwError?.message ?? threwError}`
988
+ : 'Leader exited without writing terminal sentinel';
989
+ const classification = {
990
+ slug,
991
+ iteration: 0,
992
+ reason_category: 'infra_failure',
993
+ failure_category: 'leader_exited_without_terminal_state',
994
+ recoverable: false,
995
+ suggested_action: 'investigate_leader_logs',
996
+ };
997
+ try {
998
+ await writeSentinel(
999
+ paths.blockedSentinel,
1000
+ 'blocked',
1001
+ 'ALL',
1002
+ reason,
1003
+ classification,
1004
+ paths,
1005
+ );
1006
+ } catch (sentinelError) {
1007
+ // Best-effort. If even the backstop write fails, log to stderr so the
1008
+ // operator has SOME signal. Do NOT swallow the original error.
1009
+ console.error('[run] failed to write backstop BLOCKED sentinel:', sentinelError);
1010
+ }
1011
+ }
1012
+
1013
+ async function _runCampaignBody(slug, options, paths, rootDir) {
613
1014
  const sendKeys = options.sendKeys ?? defaultSendKeys;
614
1015
  const createPane = options.createPane ?? defaultCreatePane;
615
1016
  const createSession = options.createSession ?? defaultCreateSession;
616
1017
  const pollForSignal = options.pollForSignal ?? defaultPollForSignal;
617
1018
  const runIntegrationCheck = options.runIntegrationCheck ?? (async () => ({ exitCode: 0, summary: 'integration skipped' }));
618
1019
  const maxIterations = options.maxIterations ?? 100;
1020
+ // v5.7 §4.19: campaign-level pollForSignal timeout (Node leader fix).
1021
+ // The CLI parses --iter-timeout but never forwarded it to pollForSignal,
1022
+ // so every campaign hit the 5s signal-poller default and exited
1023
+ // immediately. Default 600s (10 min) per CLI documentation; convert to ms.
1024
+ const iterTimeoutMs = ((options.iterTimeout ?? 600) * 1000);
619
1025
 
620
1026
  await ensureDirs(paths);
621
1027
  await ensureScaffold(paths);
@@ -710,15 +1116,26 @@ export async function run(slug, options = {}) {
710
1116
 
711
1117
  state.current_us = getNextUs(usList, state.verified_us, state.current_us);
712
1118
  if (state.current_us === 'ALL') {
713
- const finalResult = await runFinalSequentialVerify({
714
- paths,
715
- state,
716
- usList,
717
- sendKeys,
718
- verifierPaneId: state.verifier_pane_id,
719
- pollForSignal,
720
- runIntegrationCheck,
721
- });
1119
+ let finalResult;
1120
+ try {
1121
+ finalResult = await runFinalSequentialVerify({
1122
+ paths,
1123
+ state,
1124
+ usList,
1125
+ sendKeys,
1126
+ verifierPaneId: state.verifier_pane_id,
1127
+ pollForSignal,
1128
+ runIntegrationCheck,
1129
+ iterTimeoutMs,
1130
+ });
1131
+ } catch (error) {
1132
+ // v5.7 §4.25 — uniform poll-failure handling for final verifier.
1133
+ return _handlePollFailure(error, {
1134
+ paths, state, slug, options,
1135
+ role: 'final_verifier',
1136
+ usIdOverride: 'ALL',
1137
+ });
1138
+ }
722
1139
 
723
1140
  if (finalResult.status === 'complete') {
724
1141
  state.phase = 'complete';
@@ -781,10 +1198,25 @@ export async function run(slug, options = {}) {
781
1198
  rootDir,
782
1199
  });
783
1200
 
784
- const flywheelSignal = await pollForSignal(paths.flywheelSignalFile, {
785
- mode: 'claude',
786
- paneId: state.flywheel_pane_id ?? state.verifier_pane_id,
787
- });
1201
+ let flywheelSignal;
1202
+ try {
1203
+ flywheelSignal = await pollForSignal(paths.flywheelSignalFile, {
1204
+ mode: 'claude',
1205
+ paneId: state.flywheel_pane_id ?? state.verifier_pane_id,
1206
+ timeoutMs: iterTimeoutMs,
1207
+ });
1208
+ validateArtifact(flywheelSignal, {
1209
+ expectedSlug: slug,
1210
+ iterationFloor: state.iteration,
1211
+ expectedSignalType: 'flywheel_signal',
1212
+ allowedUsIds: [...usList, 'ALL'],
1213
+ });
1214
+ } catch (error) {
1215
+ return _handlePollFailure(error, {
1216
+ paths, state, slug, options,
1217
+ role: 'flywheel',
1218
+ });
1219
+ }
788
1220
 
789
1221
  state.last_flywheel_decision = flywheelSignal.decision;
790
1222
  // P0-A multi-mission orchestration: optionally captured from flywheel signal.
@@ -804,10 +1236,25 @@ export async function run(slug, options = {}) {
804
1236
 
805
1237
  await dispatchGuard({ paths, sendKeys, guardPaneId, guardModel, rootDir });
806
1238
 
807
- const guardVerdict = await pollForSignal(paths.flywheelGuardVerdictFile, {
808
- mode: 'claude',
809
- paneId: guardPaneId,
810
- });
1239
+ let guardVerdict;
1240
+ try {
1241
+ guardVerdict = await pollForSignal(paths.flywheelGuardVerdictFile, {
1242
+ mode: 'claude',
1243
+ paneId: guardPaneId,
1244
+ timeoutMs: iterTimeoutMs,
1245
+ });
1246
+ validateArtifact(guardVerdict, {
1247
+ expectedSlug: slug,
1248
+ iterationFloor: state.iteration,
1249
+ expectedSignalType: 'flywheel_guard_verdict',
1250
+ allowedUsIds: [...usList, 'ALL'],
1251
+ });
1252
+ } catch (error) {
1253
+ return _handlePollFailure(error, {
1254
+ paths, state, slug, options,
1255
+ role: 'guard',
1256
+ });
1257
+ }
811
1258
 
812
1259
  if (!state.flywheel_guard_count[state.current_us]) {
813
1260
  state.flywheel_guard_count[state.current_us] = 0;
@@ -911,9 +1358,18 @@ export async function run(slug, options = {}) {
911
1358
  signal = await pollForSignal(paths.signalFile, {
912
1359
  mode: parseModelFlag(state.worker_model).engine,
913
1360
  paneId: state.worker_pane_id,
1361
+ timeoutMs: iterTimeoutMs,
1362
+ });
1363
+ validateArtifact(signal, {
1364
+ expectedSlug: slug,
1365
+ iterationFloor: state.iteration,
1366
+ expectedSignalType: 'signal',
1367
+ allowedUsIds: [...usList, 'ALL'],
914
1368
  });
915
1369
  } catch (error) {
916
1370
  if (error instanceof TimeoutError && parseModelFlag(state.worker_model).engine === 'codex') {
1371
+ // v5.7 — codex CLI exits cleanly after writing signal; if pollForSignal
1372
+ // timed out for codex, synthesize a verify signal so the loop continues.
917
1373
  signal = {
918
1374
  iteration: state.iteration,
919
1375
  status: 'verify',
@@ -921,7 +1377,12 @@ export async function run(slug, options = {}) {
921
1377
  summary: 'auto-generated after codex exit fallback',
922
1378
  };
923
1379
  } else {
924
- throw error;
1380
+ // v5.7 §4.25 — uniform handling for WorkerExitedError, PromptBlockedError,
1381
+ // MalformedArtifactError, TimeoutError, and unknown errors.
1382
+ return _handlePollFailure(error, {
1383
+ paths, state, slug, options,
1384
+ role: 'worker',
1385
+ });
925
1386
  }
926
1387
  }
927
1388
 
@@ -959,10 +1420,26 @@ export async function run(slug, options = {}) {
959
1420
  verifierModel,
960
1421
  });
961
1422
 
962
- const verdict = await pollForSignal(paths.verdictFile, {
963
- mode: parseModelFlag(verifierModel, 'verifier').engine,
964
- paneId: state.verifier_pane_id,
965
- });
1423
+ let verdict;
1424
+ try {
1425
+ verdict = await pollForSignal(paths.verdictFile, {
1426
+ mode: parseModelFlag(verifierModel, 'verifier').engine,
1427
+ paneId: state.verifier_pane_id,
1428
+ timeoutMs: iterTimeoutMs,
1429
+ });
1430
+ validateArtifact(verdict, {
1431
+ expectedSlug: slug,
1432
+ iterationFloor: state.iteration,
1433
+ expectedSignalType: 'verdict',
1434
+ allowedUsIds: [...usList, 'ALL'],
1435
+ });
1436
+ } catch (error) {
1437
+ return _handlePollFailure(error, {
1438
+ paths, state, slug, options,
1439
+ role: 'verifier',
1440
+ usIdOverride: usId,
1441
+ });
1442
+ }
966
1443
 
967
1444
  if (verdict.verdict === 'pass') {
968
1445
  state.consecutive_failures = 0;