@ai-dev-methodologies/rlp-desk 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/rlp-desk/artifact-schema.md +99 -0
- package/docs/rlp-desk/ci-setup.md +100 -0
- package/docs/rlp-desk/e2e-scenarios.md +102 -0
- package/docs/rlp-desk/plans/rlp-desk-0.11.1-tmux-pane-disappearance.md +260 -0
- package/docs/rlp-desk/plans/rlp-desk-tmux-flywheel-routing.md +730 -0
- package/install.sh +93 -20
- package/package.json +8 -2
- package/scripts/build-node-manifest.js +52 -0
- package/scripts/postinstall.js +162 -8
- package/src/commands/rlp-desk.md +48 -25
- package/src/governance.md +68 -6
- package/src/node/MANIFEST.txt +15 -0
- package/src/node/cli/command-builder.mjs +25 -5
- package/src/node/constants.mjs +19 -0
- package/src/node/polling/signal-poller.mjs +119 -3
- package/src/node/runner/campaign-main-loop.mjs +470 -41
- package/src/node/runner/leader-registry.mjs +100 -0
- package/src/node/runner/prompt-dismisser.mjs +200 -0
- package/src/node/shared/fs.mjs +38 -0
- package/src/node/util/debug-log.mjs +56 -0
- package/src/node/util/shell-quote.mjs +12 -0
- package/docs/superpowers/plans/2026-04-24-gpt-5-5-default.md +0 -517
- package/docs/superpowers/specs/2026-04-24-gpt-5-5-default.md +0 -107
- /package/docs/{TODO-verification-next.md → rlp-desk/TODO-verification-next.md} +0 -0
- /package/docs/{architecture.md → rlp-desk/architecture.md} +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-flywheel-enhancement.md +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/blueprint-pivot-step.md +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/plan-flywheel-enhancement.md +0 -0
- /package/docs/{blueprints → rlp-desk/blueprints}/sv-architecture-rethink.md +0 -0
- /package/docs/{getting-started.md → rlp-desk/getting-started.md} +0 -0
- /package/docs/{internal → rlp-desk/internal}/verification-policy-gap-analysis.md +0 -0
- /package/docs/{internal → rlp-desk/internal}/verification-strategy-research.md +0 -0
- /package/docs/{multi-mission-orchestration.md → rlp-desk/multi-mission-orchestration.md} +0 -0
- /package/docs/{plans → rlp-desk/plans}/cozy-gliding-trinket.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/frolicking-churning-honey.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/keen-sauteeing-snowflake.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/mutable-booping-corbato.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-0.11-handoff-7fixes.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert-agent-a8cd695ffca2a3ad8.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/rlp-desk-elegant-papert.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie-agent-a6814625642e956da.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/toasty-whistling-diffie.md +0 -0
- /package/docs/{plans → rlp-desk/plans}/validated-snacking-crayon.md +0 -0
- /package/docs/{protocol-reference.md → rlp-desk/protocol-reference.md} +0 -0
|
@@ -5,8 +5,16 @@ import { execFile } from 'node:child_process';
|
|
|
5
5
|
import { promisify } from 'node:util';
|
|
6
6
|
|
|
7
7
|
import { buildClaudeCmd, buildCodexCmd, parseModelFlag } from '../cli/command-builder.mjs';
|
|
8
|
+
import { shellQuote } from '../util/shell-quote.mjs';
|
|
9
|
+
import { OPUS_1M_BETA, isOpusModel } from '../constants.mjs';
|
|
8
10
|
import { initCampaign } from '../init/campaign-initializer.mjs';
|
|
9
|
-
import {
|
|
11
|
+
import { writeSentinelExclusive } from '../shared/fs.mjs';
|
|
12
|
+
import {
|
|
13
|
+
TimeoutError,
|
|
14
|
+
WorkerExitedError,
|
|
15
|
+
PromptBlockedError,
|
|
16
|
+
pollForSignal as defaultPollForSignal,
|
|
17
|
+
} from '../polling/signal-poller.mjs';
|
|
10
18
|
import {
|
|
11
19
|
assembleVerifierPrompt,
|
|
12
20
|
assembleWorkerPrompt,
|
|
@@ -57,7 +65,11 @@ function buildPaths(rootDir, slug) {
|
|
|
57
65
|
prdFile: path.join(deskRoot, 'plans', `prd-${slug}.md`),
|
|
58
66
|
testSpecFile: path.join(deskRoot, 'plans', `test-spec-${slug}.md`),
|
|
59
67
|
analyticsFile: path.join(campaignLogDir, 'campaign.jsonl'),
|
|
60
|
-
|
|
68
|
+
// v5.7 §4.11.b: project-local analytics so Worker/Verifier prompts that
|
|
69
|
+
// reference this path stay inside cwd-tree (no `--add-dir` whitelist needed
|
|
70
|
+
// for cross-cwd writes). Cross-project rollup uses ~/.claude/ralph-desk/registry.jsonl
|
|
71
|
+
// (Leader-only, never appears in Worker prompts) — see §4.11.c.
|
|
72
|
+
analyticsDir: path.join(deskRoot, 'analytics', slug),
|
|
61
73
|
reportFile: path.join(campaignLogDir, 'campaign-report.md'),
|
|
62
74
|
statusFile: path.join(campaignLogDir, 'runtime', 'status.json'),
|
|
63
75
|
flywheelPromptFile: path.join(deskRoot, 'prompts', `${slug}.flywheel.prompt.md`),
|
|
@@ -418,7 +430,37 @@ async function _checkLaneViolations(paths, snapshotBefore, snapshotAfter, state,
|
|
|
418
430
|
// the zsh helper _classify_cross_us_or_metric in lib_ralph_desk.zsh.
|
|
419
431
|
const CROSS_US_TOKEN_RE = /depends on US-|blocking US-|awaits US-|post-iter US-|requires US-\d+|cross-US|US-\d+ 산출물|신규 US-|post-iter/i;
|
|
420
432
|
|
|
421
|
-
//
|
|
433
|
+
// v5.7 §4.25 — typed enum for _classifyBlock tags. Replaces ad-hoc string
|
|
434
|
+
// literals scattered across writeSentinel call sites. Typo-safe via Object.freeze.
|
|
435
|
+
export const BLOCK_TAGS = Object.freeze({
|
|
436
|
+
// Verdict-driven (Verifier 'fail')
|
|
437
|
+
VERIFIER: 'verifier',
|
|
438
|
+
// Flywheel/Guard verdicts
|
|
439
|
+
FLYWHEEL_INCONCLUSIVE: 'flywheel_inconclusive',
|
|
440
|
+
FLYWHEEL_EXHAUSTED: 'flywheel_exhausted',
|
|
441
|
+
// Model upgrade chain exhausted
|
|
442
|
+
MODEL_UPGRADE: 'model_upgrade',
|
|
443
|
+
// Worker/Verifier/Flywheel/Guard pane exited without artifacts (file-guarantee)
|
|
444
|
+
WORKER_EXITED: 'worker_exited_without_artifacts',
|
|
445
|
+
VERIFIER_EXITED: 'verifier_exited_without_artifacts',
|
|
446
|
+
FINAL_VERIFIER_EXITED: 'final_verifier_exited_without_artifacts',
|
|
447
|
+
FLYWHEEL_EXITED: 'flywheel_pane_exited_without_artifacts',
|
|
448
|
+
GUARD_EXITED: 'guard_pane_exited_without_artifacts',
|
|
449
|
+
// Auto-Enter unsafe (default-No prompt)
|
|
450
|
+
PROMPT_BLOCKED: 'prompt_blocked',
|
|
451
|
+
// Persistent timeout without exit (different from EXITED)
|
|
452
|
+
WORKER_TIMEOUT: 'worker_timeout',
|
|
453
|
+
VERIFIER_TIMEOUT: 'verifier_timeout',
|
|
454
|
+
FINAL_VERIFIER_TIMEOUT: 'final_verifier_timeout',
|
|
455
|
+
FLYWHEEL_TIMEOUT: 'flywheel_timeout',
|
|
456
|
+
GUARD_TIMEOUT: 'guard_timeout',
|
|
457
|
+
// Schema validator (P1)
|
|
458
|
+
MALFORMED_ARTIFACT: 'malformed_artifact',
|
|
459
|
+
// Backstop (run() try/finally)
|
|
460
|
+
LEADER_EXITED_WITHOUT_TERMINAL_STATE: 'leader_exited_without_terminal_state',
|
|
461
|
+
});
|
|
462
|
+
|
|
463
|
+
// P1-D Failure Taxonomy classifier. governance §1f locks the reason_category
|
|
422
464
|
// values + recoverable + suggested_action defaults per source. wrapper MUST
|
|
423
465
|
// branch on reason_category; failure_category is diagnostic only.
|
|
424
466
|
function _classifyBlock(source, { verdict, state, slug } = {}) {
|
|
@@ -427,18 +469,18 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
|
|
|
427
469
|
let action;
|
|
428
470
|
let failureCategory = null;
|
|
429
471
|
switch (source) {
|
|
430
|
-
case
|
|
431
|
-
case
|
|
472
|
+
case BLOCK_TAGS.FLYWHEEL_INCONCLUSIVE:
|
|
473
|
+
case BLOCK_TAGS.FLYWHEEL_EXHAUSTED:
|
|
432
474
|
category = 'mission_abort';
|
|
433
475
|
recoverable = false;
|
|
434
476
|
action = 'terminal_alert';
|
|
435
477
|
break;
|
|
436
|
-
case
|
|
478
|
+
case BLOCK_TAGS.MODEL_UPGRADE:
|
|
437
479
|
category = 'repeat_axis';
|
|
438
480
|
recoverable = false;
|
|
439
481
|
action = 'next_mission_chain';
|
|
440
482
|
break;
|
|
441
|
-
case
|
|
483
|
+
case BLOCK_TAGS.VERIFIER: {
|
|
442
484
|
const text = `${verdict?.reason ?? ''} ${verdict?.summary ?? ''}`;
|
|
443
485
|
category = CROSS_US_TOKEN_RE.test(text) ? 'cross_us_dep' : 'metric_failure';
|
|
444
486
|
recoverable = true;
|
|
@@ -446,6 +488,54 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
|
|
|
446
488
|
failureCategory = verdict?.failure_category ?? null;
|
|
447
489
|
break;
|
|
448
490
|
}
|
|
491
|
+
// v5.7 §4.22 §4.24 — pane-exit-without-artifacts variants. All
|
|
492
|
+
// infra_failure, not recoverable (Worker/Verifier/Flywheel/Guard pane
|
|
493
|
+
// process is gone; campaign cannot proceed). failure_category preserved
|
|
494
|
+
// for telemetry.
|
|
495
|
+
case BLOCK_TAGS.WORKER_EXITED:
|
|
496
|
+
case BLOCK_TAGS.VERIFIER_EXITED:
|
|
497
|
+
case BLOCK_TAGS.FINAL_VERIFIER_EXITED:
|
|
498
|
+
case BLOCK_TAGS.FLYWHEEL_EXITED:
|
|
499
|
+
case BLOCK_TAGS.GUARD_EXITED:
|
|
500
|
+
category = 'infra_failure';
|
|
501
|
+
recoverable = false;
|
|
502
|
+
action = 'investigate_pane_logs';
|
|
503
|
+
failureCategory = source;
|
|
504
|
+
break;
|
|
505
|
+
// v5.7 §4.17 — auto-Enter on default-No would CANCEL; refuse and BLOCK.
|
|
506
|
+
case BLOCK_TAGS.PROMPT_BLOCKED:
|
|
507
|
+
category = 'infra_failure';
|
|
508
|
+
recoverable = false;
|
|
509
|
+
action = 'manual_prompt_response';
|
|
510
|
+
failureCategory = 'prompt_blocked';
|
|
511
|
+
break;
|
|
512
|
+
// Persistent timeout (no exit detected) — different from EXITED.
|
|
513
|
+
case BLOCK_TAGS.WORKER_TIMEOUT:
|
|
514
|
+
case BLOCK_TAGS.VERIFIER_TIMEOUT:
|
|
515
|
+
case BLOCK_TAGS.FINAL_VERIFIER_TIMEOUT:
|
|
516
|
+
case BLOCK_TAGS.FLYWHEEL_TIMEOUT:
|
|
517
|
+
case BLOCK_TAGS.GUARD_TIMEOUT:
|
|
518
|
+
category = 'infra_failure';
|
|
519
|
+
recoverable = false;
|
|
520
|
+
action = 'increase_iter_timeout_or_investigate';
|
|
521
|
+
failureCategory = source;
|
|
522
|
+
break;
|
|
523
|
+
// v5.7 §4.25 P1 — schema validator caught a malformed/incoherent artifact.
|
|
524
|
+
// Recoverable: next iteration's Worker prompt can include the schema
|
|
525
|
+
// error (P2 feedback loop closure) and try again.
|
|
526
|
+
case BLOCK_TAGS.MALFORMED_ARTIFACT:
|
|
527
|
+
category = 'contract_violation';
|
|
528
|
+
recoverable = true;
|
|
529
|
+
action = 'retry_with_schema_feedback';
|
|
530
|
+
failureCategory = 'malformed_artifact';
|
|
531
|
+
break;
|
|
532
|
+
// Backstop: run() exited without terminal sentinel.
|
|
533
|
+
case BLOCK_TAGS.LEADER_EXITED_WITHOUT_TERMINAL_STATE:
|
|
534
|
+
category = 'infra_failure';
|
|
535
|
+
recoverable = false;
|
|
536
|
+
action = 'investigate_leader_logs';
|
|
537
|
+
failureCategory = 'leader_exited_without_terminal_state';
|
|
538
|
+
break;
|
|
449
539
|
default:
|
|
450
540
|
category = 'metric_failure';
|
|
451
541
|
recoverable = false;
|
|
@@ -461,22 +551,198 @@ function _classifyBlock(source, { verdict, state, slug } = {}) {
|
|
|
461
551
|
};
|
|
462
552
|
}
|
|
463
553
|
|
|
554
|
+
// v5.7 §4.25 — uniform poll-failure → BLOCKED handler, used by every
|
|
555
|
+
// `pollForSignal` call site (Worker, VerifierPerUS, VerifierFinal, Flywheel,
|
|
556
|
+
// Guard). Mirrors the canonical Worker pattern previously inlined at line
|
|
557
|
+
// ~1037-1110. Idempotent via writeSentinelExclusive (first-writer-wins).
|
|
558
|
+
//
|
|
559
|
+
// Returns the early-exit object the call site should `return` to its
|
|
560
|
+
// orchestrator. Callers MUST `return` it (not throw), so the run() loop
|
|
561
|
+
// terminates cleanly with phase=blocked.
|
|
562
|
+
async function _handlePollFailure(error, ctx) {
|
|
563
|
+
const {
|
|
564
|
+
paths,
|
|
565
|
+
state,
|
|
566
|
+
slug,
|
|
567
|
+
options,
|
|
568
|
+
role, // 'worker' | 'verifier' | 'final_verifier' | 'flywheel' | 'guard'
|
|
569
|
+
usIdOverride,
|
|
570
|
+
} = ctx;
|
|
571
|
+
const usId = usIdOverride ?? state.current_us;
|
|
572
|
+
|
|
573
|
+
let tag;
|
|
574
|
+
let reason;
|
|
575
|
+
if (error instanceof WorkerExitedError) {
|
|
576
|
+
tag = ({
|
|
577
|
+
worker: BLOCK_TAGS.WORKER_EXITED,
|
|
578
|
+
verifier: BLOCK_TAGS.VERIFIER_EXITED,
|
|
579
|
+
final_verifier: BLOCK_TAGS.FINAL_VERIFIER_EXITED,
|
|
580
|
+
flywheel: BLOCK_TAGS.FLYWHEEL_EXITED,
|
|
581
|
+
guard: BLOCK_TAGS.GUARD_EXITED,
|
|
582
|
+
})[role] ?? BLOCK_TAGS.WORKER_EXITED;
|
|
583
|
+
reason = `${error.reason ?? 'pane exited without artifacts'}: ${error.message}`;
|
|
584
|
+
} else if (error instanceof PromptBlockedError) {
|
|
585
|
+
tag = BLOCK_TAGS.PROMPT_BLOCKED;
|
|
586
|
+
reason = `${error.reason ?? 'default-No prompt'}: ${error.message}`;
|
|
587
|
+
} else if (error instanceof MalformedArtifactError) {
|
|
588
|
+
tag = BLOCK_TAGS.MALFORMED_ARTIFACT;
|
|
589
|
+
reason = `Malformed artifact at ${error.field}: expected ${error.expected}, got ${error.got}`;
|
|
590
|
+
} else if (error instanceof TimeoutError) {
|
|
591
|
+
tag = ({
|
|
592
|
+
worker: BLOCK_TAGS.WORKER_TIMEOUT,
|
|
593
|
+
verifier: BLOCK_TAGS.VERIFIER_TIMEOUT,
|
|
594
|
+
final_verifier: BLOCK_TAGS.FINAL_VERIFIER_TIMEOUT,
|
|
595
|
+
flywheel: BLOCK_TAGS.FLYWHEEL_TIMEOUT,
|
|
596
|
+
guard: BLOCK_TAGS.GUARD_TIMEOUT,
|
|
597
|
+
})[role] ?? BLOCK_TAGS.WORKER_TIMEOUT;
|
|
598
|
+
reason = `${role} pollForSignal timed out: ${error.message}`;
|
|
599
|
+
} else {
|
|
600
|
+
// Unknown error — treat as infra_failure so backstop doesn't have to
|
|
601
|
+
// synthesize. Re-throw after writing so caller's outer try/finally
|
|
602
|
+
// (run() backstop) sees something but doesn't double-write.
|
|
603
|
+
tag = BLOCK_TAGS.LEADER_EXITED_WITHOUT_TERMINAL_STATE;
|
|
604
|
+
reason = `Unexpected error in ${role} poll: ${error?.message ?? error}`;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
state.phase = 'blocked';
|
|
608
|
+
const classification = _classifyBlock(tag, { state, slug });
|
|
609
|
+
await writeSentinel(paths.blockedSentinel, 'blocked', usId, reason, classification, paths);
|
|
610
|
+
await writeStatus(paths, state, options.onStatusChange, options.now);
|
|
611
|
+
await generateCampaignReport({
|
|
612
|
+
slug,
|
|
613
|
+
reportFile: paths.reportFile,
|
|
614
|
+
prdFile: paths.prdFile,
|
|
615
|
+
statusFile: paths.statusFile,
|
|
616
|
+
analyticsFile: paths.analyticsFile,
|
|
617
|
+
now: resolveNow(options.now),
|
|
618
|
+
blockedReason: reason,
|
|
619
|
+
blockedCategory: classification.reason_category,
|
|
620
|
+
});
|
|
621
|
+
|
|
622
|
+
return {
|
|
623
|
+
status: 'blocked',
|
|
624
|
+
usId,
|
|
625
|
+
reason,
|
|
626
|
+
category: classification.reason_category,
|
|
627
|
+
statusFile: paths.statusFile,
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// v5.7 §4.25 P1 — schema validator. Throws MalformedArtifactError if the
|
|
632
|
+
// parsed artifact violates the contract. Caller catches via _handlePollFailure.
|
|
633
|
+
// Hooks AFTER pollForSignal returns parsed JSON, BEFORE state mutation.
|
|
634
|
+
//
|
|
635
|
+
// Validates:
|
|
636
|
+
// - slug matches campaign slug (or absent — backwards compat)
|
|
637
|
+
// - iteration is integer ≥ state.iteration_floor (worker may advance, never regress)
|
|
638
|
+
// - signal_type matches read context ('signal' | 'verdict' | 'flywheel_signal' | 'flywheel_guard_verdict')
|
|
639
|
+
// The signal_type field is OPTIONAL for backwards compat — existing artifacts
|
|
640
|
+
// don't include it. Future writers should.
|
|
641
|
+
// - us_id ∈ usList ∪ {'ALL'} (closed-set)
|
|
642
|
+
export class MalformedArtifactError extends Error {
|
|
643
|
+
constructor(message, info = {}) {
|
|
644
|
+
super(message);
|
|
645
|
+
this.name = 'MalformedArtifactError';
|
|
646
|
+
this.field = info.field ?? null;
|
|
647
|
+
this.expected = info.expected ?? null;
|
|
648
|
+
this.got = info.got ?? null;
|
|
649
|
+
this.raw = info.raw ?? null;
|
|
650
|
+
}
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
function validateArtifact(parsed, ctx) {
|
|
654
|
+
const { expectedSlug, expectedSignalType, allowedUsIds } = ctx;
|
|
655
|
+
if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
|
|
656
|
+
throw new MalformedArtifactError('Artifact is not a JSON object', {
|
|
657
|
+
field: '<root>',
|
|
658
|
+
expected: 'object',
|
|
659
|
+
got: Array.isArray(parsed) ? 'array' : typeof parsed,
|
|
660
|
+
raw: parsed,
|
|
661
|
+
});
|
|
662
|
+
}
|
|
663
|
+
if (parsed.slug !== undefined && expectedSlug && parsed.slug !== expectedSlug) {
|
|
664
|
+
throw new MalformedArtifactError('slug mismatch', {
|
|
665
|
+
field: 'slug',
|
|
666
|
+
expected: expectedSlug,
|
|
667
|
+
got: parsed.slug,
|
|
668
|
+
raw: parsed,
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
if (parsed.iteration !== undefined) {
|
|
672
|
+
if (!Number.isInteger(parsed.iteration)) {
|
|
673
|
+
throw new MalformedArtifactError('iteration must be integer', {
|
|
674
|
+
field: 'iteration',
|
|
675
|
+
expected: 'integer',
|
|
676
|
+
got: typeof parsed.iteration,
|
|
677
|
+
raw: parsed,
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
// v5.7 §4.25 P1 — iteration validation is STRUCTURAL ONLY (must be integer).
|
|
681
|
+
// Originally proposed as a strict lower bound (worker can never regress
|
|
682
|
+
// below state.iteration_floor), this caused false BLOCKs in real campaigns
|
|
683
|
+
// because (a) workers may carry over a previous iteration value across
|
|
684
|
+
// multiple iterations without updating the field, and (b) the leader's
|
|
685
|
+
// state.iteration is authoritative regardless of what the worker writes.
|
|
686
|
+
// The leader owns iteration tracking; the worker's value is informational
|
|
687
|
+
// only. State-consistency enforcement is a higher-layer concern (analytics
|
|
688
|
+
// post-mortem), not a contract-violation BLOCK trigger. We deliberately
|
|
689
|
+
// accept any integer here; iterationFloor parameter is retained in ctx for
|
|
690
|
+
// backwards compatibility with call sites but no longer gates this check.
|
|
691
|
+
}
|
|
692
|
+
if (parsed.signal_type !== undefined && expectedSignalType && parsed.signal_type !== expectedSignalType) {
|
|
693
|
+
throw new MalformedArtifactError('signal_type mismatch', {
|
|
694
|
+
field: 'signal_type',
|
|
695
|
+
expected: expectedSignalType,
|
|
696
|
+
got: parsed.signal_type,
|
|
697
|
+
raw: parsed,
|
|
698
|
+
});
|
|
699
|
+
}
|
|
700
|
+
if (parsed.us_id !== undefined && Array.isArray(allowedUsIds) && allowedUsIds.length > 0) {
|
|
701
|
+
if (!allowedUsIds.includes(parsed.us_id)) {
|
|
702
|
+
throw new MalformedArtifactError(
|
|
703
|
+
`us_id ${parsed.us_id} not in allowed set [${allowedUsIds.join(', ')}]`,
|
|
704
|
+
{
|
|
705
|
+
field: 'us_id',
|
|
706
|
+
expected: `one of [${allowedUsIds.join(', ')}]`,
|
|
707
|
+
got: parsed.us_id,
|
|
708
|
+
raw: parsed,
|
|
709
|
+
},
|
|
710
|
+
);
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
return parsed;
|
|
714
|
+
}
|
|
715
|
+
|
|
464
716
|
async function writeSentinel(filePath, status, usId, reason, classification = null, paths = null) {
|
|
465
717
|
// governance §1f BLOCKED Surfacing: BLOCKED is surfaced on FIVE channels —
|
|
466
718
|
// sentinel (markdown + JSON sidecar), status, console (stderr), report,
|
|
467
719
|
// and (US-020 R8 P1-H, 5th channel) memory.md/latest.md hygiene update.
|
|
468
720
|
// Legacy 1-line parsers still work because line 1 is unchanged.
|
|
721
|
+
//
|
|
722
|
+
// v5.7 §4.24 — Write Order Contract REVERSED for first-writer-wins:
|
|
723
|
+
// 1. markdown sentinel FIRST via writeSentinelExclusive (O_EXCL lock).
|
|
724
|
+
// Whoever wins this is the canonical writer for this campaign exit.
|
|
725
|
+
// 2. JSON sidecar SECOND, only if we won the md write.
|
|
726
|
+
// Invariant: md exists ⇒ JSON exists (within ≤50ms; watchers retry).
|
|
727
|
+
// If two paths race to write blocked.md/complete.md, exactly ONE wins;
|
|
728
|
+
// the loser sees `wrote=false, reason=already_exists` and returns silently
|
|
729
|
+
// (the campaign is already classified). Cross-path category collisions
|
|
730
|
+
// resolve by first-fired timestamp (existing return-on-first-error pattern).
|
|
469
731
|
const lines = [`${status.toUpperCase()}: ${usId}`];
|
|
470
732
|
if (reason) lines.push(`Reason: ${reason}`);
|
|
471
733
|
if (classification?.reason_category) {
|
|
472
734
|
lines.push(`Category: ${classification.reason_category}`);
|
|
473
735
|
}
|
|
736
|
+
const mdBody = `${lines.join('\n')}\n`;
|
|
737
|
+
|
|
738
|
+
const result = await writeSentinelExclusive(filePath, mdBody);
|
|
739
|
+
if (!result.wrote) {
|
|
740
|
+
// Another path already wrote the sentinel for this campaign. Idempotent
|
|
741
|
+
// no-op — we are NOT the canonical writer; do not overwrite the JSON
|
|
742
|
+
// sidecar either or we'll desynchronize from the winning md.
|
|
743
|
+
return result;
|
|
744
|
+
}
|
|
474
745
|
|
|
475
|
-
// P1-D Write Order Contract:
|
|
476
|
-
// 1. JSON sidecar FIRST (atomic per-file rename via writeFile).
|
|
477
|
-
// 2. markdown sentinel SECOND.
|
|
478
|
-
// Invariant: markdown exists ⇒ JSON exists. Wrappers watch markdown,
|
|
479
|
-
// then read JSON; if JSON not yet visible (rare race), retry up to 5×50ms.
|
|
480
746
|
if (status === 'blocked' && classification) {
|
|
481
747
|
const jsonPath = filePath.replace(/\.md$/, '.json');
|
|
482
748
|
let hygieneViolated = false;
|
|
@@ -503,7 +769,7 @@ async function writeSentinel(filePath, status, usId, reason, classification = nu
|
|
|
503
769
|
await fs.writeFile(jsonPath, `${JSON.stringify(jsonBody, null, 2)}\n`, 'utf8');
|
|
504
770
|
}
|
|
505
771
|
|
|
506
|
-
|
|
772
|
+
return result;
|
|
507
773
|
}
|
|
508
774
|
|
|
509
775
|
async function runFinalSequentialVerify({
|
|
@@ -514,6 +780,7 @@ async function runFinalSequentialVerify({
|
|
|
514
780
|
verifierPaneId,
|
|
515
781
|
pollForSignal,
|
|
516
782
|
runIntegrationCheck,
|
|
783
|
+
iterTimeoutMs,
|
|
517
784
|
}) {
|
|
518
785
|
const verifierModel = state.final_verifier_model;
|
|
519
786
|
|
|
@@ -532,6 +799,7 @@ async function runFinalSequentialVerify({
|
|
|
532
799
|
const verdict = await pollForSignal(paths.verdictFile, {
|
|
533
800
|
mode: parseModelFlag(verifierModel, 'verifier').engine,
|
|
534
801
|
paneId: verifierPaneId,
|
|
802
|
+
timeoutMs: iterTimeoutMs,
|
|
535
803
|
});
|
|
536
804
|
|
|
537
805
|
if (verdict.verdict !== 'pass') {
|
|
@@ -568,8 +836,31 @@ async function runFinalSequentialVerify({
|
|
|
568
836
|
};
|
|
569
837
|
}
|
|
570
838
|
|
|
571
|
-
|
|
572
|
-
|
|
839
|
+
// v5.7 §4.11.a (refactored per code-review HIGH): single source-of-truth for
|
|
840
|
+
// the home rlp-desk dir and the autonomous claude command shape. Was duplicated
|
|
841
|
+
// across buildFlywheelTriggerCmd/buildGuardTriggerCmd byte-for-byte.
|
|
842
|
+
const HOME_DESK_DIR = path.join(os.homedir(), '.claude', 'ralph-desk');
|
|
843
|
+
|
|
844
|
+
function buildAutonomousClaudeCmd({ promptFile, model, rootDir, homeDeskDir = HOME_DESK_DIR }) {
|
|
845
|
+
// §4.9: ANTHROPIC_BETA prefix for Opus 1M context.
|
|
846
|
+
const betaPrefix = isOpusModel(model)
|
|
847
|
+
? `ANTHROPIC_BETA=${shellQuote(OPUS_1M_BETA)} `
|
|
848
|
+
: '';
|
|
849
|
+
// §4.11.a: --add-dir whitelist (home rlp-desk + campaign cwd) for true autonomy.
|
|
850
|
+
const addDirParts = [];
|
|
851
|
+
if (homeDeskDir) addDirParts.push(`--add-dir ${shellQuote(homeDeskDir)}`);
|
|
852
|
+
if (rootDir) addDirParts.push(`--add-dir ${shellQuote(rootDir)}`);
|
|
853
|
+
const addDir = addDirParts.length ? ' ' + addDirParts.join(' ') : '';
|
|
854
|
+
return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 ${betaPrefix}claude --model ${shellQuote(model)} --no-mcp${addDir} -p "$(cat ${JSON.stringify(promptFile)})"`;
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// Thin wrappers retained for call-site clarity + possible per-role customization.
|
|
858
|
+
function buildFlywheelTriggerCmd({ flywheelPromptFile, flywheelModel, rootDir, homeDeskDir }) {
|
|
859
|
+
return buildAutonomousClaudeCmd({ promptFile: flywheelPromptFile, model: flywheelModel, rootDir, homeDeskDir });
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
function buildGuardTriggerCmd({ guardPromptFile, guardModel, rootDir, homeDeskDir }) {
|
|
863
|
+
return buildAutonomousClaudeCmd({ promptFile: guardPromptFile, model: guardModel, rootDir, homeDeskDir });
|
|
573
864
|
}
|
|
574
865
|
|
|
575
866
|
async function dispatchFlywheel({ paths, sendKeys, flywheelPaneId, flywheelModel, rootDir }) {
|
|
@@ -581,10 +872,6 @@ async function dispatchFlywheel({ paths, sendKeys, flywheelPaneId, flywheelModel
|
|
|
581
872
|
await sendKeys(flywheelPaneId, triggerCmd);
|
|
582
873
|
}
|
|
583
874
|
|
|
584
|
-
function buildGuardTriggerCmd({ guardPromptFile, guardModel, rootDir }) {
|
|
585
|
-
return `cd ${JSON.stringify(rootDir)} && DISABLE_OMC=1 claude --model ${guardModel} --no-mcp -p "$(cat ${JSON.stringify(guardPromptFile)})"`;
|
|
586
|
-
}
|
|
587
|
-
|
|
588
875
|
async function dispatchGuard({ paths, sendKeys, guardPaneId, guardModel, rootDir }) {
|
|
589
876
|
const triggerCmd = buildGuardTriggerCmd({
|
|
590
877
|
guardPromptFile: paths.flywheelGuardPromptFile,
|
|
@@ -610,12 +897,83 @@ export function shouldRunGuard(flywheelGuard, state, usId) {
|
|
|
610
897
|
export async function run(slug, options = {}) {
|
|
611
898
|
const rootDir = path.resolve(options.rootDir ?? process.cwd());
|
|
612
899
|
const paths = buildPaths(rootDir, slug);
|
|
900
|
+
// v5.7 §4.24 §1g — runtime invariant: every terminal exit of run() MUST
|
|
901
|
+
// leave exactly one sentinel on disk (blocked.md XOR complete.md). The
|
|
902
|
+
// try/finally below is the last-resort backstop that writes a synthetic
|
|
903
|
+
// BLOCKED if the body throws or returns without a terminal sentinel.
|
|
904
|
+
// Idempotent via writeSentinelExclusive — a real BLOCKED already in place
|
|
905
|
+
// is not overwritten.
|
|
906
|
+
let runResult;
|
|
907
|
+
let runThrew;
|
|
908
|
+
try {
|
|
909
|
+
runResult = await _runCampaignBody(slug, options, paths, rootDir);
|
|
910
|
+
return runResult;
|
|
911
|
+
} catch (error) {
|
|
912
|
+
runThrew = error;
|
|
913
|
+
throw error;
|
|
914
|
+
} finally {
|
|
915
|
+
await _ensureTerminalSentinel({
|
|
916
|
+
paths,
|
|
917
|
+
slug,
|
|
918
|
+
result: runResult,
|
|
919
|
+
threwError: runThrew,
|
|
920
|
+
});
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
async function _ensureTerminalSentinel({ paths, slug, result, threwError }) {
|
|
925
|
+
// 'continue' is paused, not terminal. Real terminal: 'blocked' or 'complete'.
|
|
926
|
+
// If neither sentinel exists at exit, leader exited unexpectedly. Write
|
|
927
|
+
// synthetic BLOCKED `infra_failure/leader_exited_without_terminal_state`.
|
|
928
|
+
if (result && result.status === 'continue') {
|
|
929
|
+
return;
|
|
930
|
+
}
|
|
931
|
+
let blockedExists = false;
|
|
932
|
+
let completeExists = false;
|
|
933
|
+
try { blockedExists = await exists(paths.blockedSentinel); } catch {}
|
|
934
|
+
try { completeExists = await exists(paths.completeSentinel); } catch {}
|
|
935
|
+
if (blockedExists || completeExists) {
|
|
936
|
+
return;
|
|
937
|
+
}
|
|
938
|
+
const reason = threwError
|
|
939
|
+
? `Leader exited unexpectedly (no terminal sentinel): ${threwError?.message ?? threwError}`
|
|
940
|
+
: 'Leader exited without writing terminal sentinel';
|
|
941
|
+
const classification = {
|
|
942
|
+
slug,
|
|
943
|
+
iteration: 0,
|
|
944
|
+
reason_category: 'infra_failure',
|
|
945
|
+
failure_category: 'leader_exited_without_terminal_state',
|
|
946
|
+
recoverable: false,
|
|
947
|
+
suggested_action: 'investigate_leader_logs',
|
|
948
|
+
};
|
|
949
|
+
try {
|
|
950
|
+
await writeSentinel(
|
|
951
|
+
paths.blockedSentinel,
|
|
952
|
+
'blocked',
|
|
953
|
+
'ALL',
|
|
954
|
+
reason,
|
|
955
|
+
classification,
|
|
956
|
+
paths,
|
|
957
|
+
);
|
|
958
|
+
} catch (sentinelError) {
|
|
959
|
+
// Best-effort. If even the backstop write fails, log to stderr so the
|
|
960
|
+
// operator has SOME signal. Do NOT swallow the original error.
|
|
961
|
+
console.error('[run] failed to write backstop BLOCKED sentinel:', sentinelError);
|
|
962
|
+
}
|
|
963
|
+
}
|
|
964
|
+
|
|
965
|
+
async function _runCampaignBody(slug, options, paths, rootDir) {
|
|
613
966
|
const sendKeys = options.sendKeys ?? defaultSendKeys;
|
|
614
967
|
const createPane = options.createPane ?? defaultCreatePane;
|
|
615
968
|
const createSession = options.createSession ?? defaultCreateSession;
|
|
616
969
|
const pollForSignal = options.pollForSignal ?? defaultPollForSignal;
|
|
617
970
|
const runIntegrationCheck = options.runIntegrationCheck ?? (async () => ({ exitCode: 0, summary: 'integration skipped' }));
|
|
618
971
|
const maxIterations = options.maxIterations ?? 100;
|
|
972
|
+
// v5.7 §4.19: campaign-level pollForSignal timeout (Node leader fix).
|
|
973
|
+
// The CLI parses --iter-timeout but never forwarded it to pollForSignal,
|
|
974
|
+
// so every campaign hit the 5s signal-poller default and exited
|
|
975
|
+
// immediately. Default 600s (10 min) per CLI documentation; convert to ms.
|
|
976
|
+
const iterTimeoutMs = ((options.iterTimeout ?? 600) * 1000);
|
|
619
977
|
|
|
620
978
|
await ensureDirs(paths);
|
|
621
979
|
await ensureScaffold(paths);
|
|
@@ -710,15 +1068,26 @@ export async function run(slug, options = {}) {
|
|
|
710
1068
|
|
|
711
1069
|
state.current_us = getNextUs(usList, state.verified_us, state.current_us);
|
|
712
1070
|
if (state.current_us === 'ALL') {
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
1071
|
+
let finalResult;
|
|
1072
|
+
try {
|
|
1073
|
+
finalResult = await runFinalSequentialVerify({
|
|
1074
|
+
paths,
|
|
1075
|
+
state,
|
|
1076
|
+
usList,
|
|
1077
|
+
sendKeys,
|
|
1078
|
+
verifierPaneId: state.verifier_pane_id,
|
|
1079
|
+
pollForSignal,
|
|
1080
|
+
runIntegrationCheck,
|
|
1081
|
+
iterTimeoutMs,
|
|
1082
|
+
});
|
|
1083
|
+
} catch (error) {
|
|
1084
|
+
// v5.7 §4.25 — uniform poll-failure handling for final verifier.
|
|
1085
|
+
return _handlePollFailure(error, {
|
|
1086
|
+
paths, state, slug, options,
|
|
1087
|
+
role: 'final_verifier',
|
|
1088
|
+
usIdOverride: 'ALL',
|
|
1089
|
+
});
|
|
1090
|
+
}
|
|
722
1091
|
|
|
723
1092
|
if (finalResult.status === 'complete') {
|
|
724
1093
|
state.phase = 'complete';
|
|
@@ -781,10 +1150,25 @@ export async function run(slug, options = {}) {
|
|
|
781
1150
|
rootDir,
|
|
782
1151
|
});
|
|
783
1152
|
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
1153
|
+
let flywheelSignal;
|
|
1154
|
+
try {
|
|
1155
|
+
flywheelSignal = await pollForSignal(paths.flywheelSignalFile, {
|
|
1156
|
+
mode: 'claude',
|
|
1157
|
+
paneId: state.flywheel_pane_id ?? state.verifier_pane_id,
|
|
1158
|
+
timeoutMs: iterTimeoutMs,
|
|
1159
|
+
});
|
|
1160
|
+
validateArtifact(flywheelSignal, {
|
|
1161
|
+
expectedSlug: slug,
|
|
1162
|
+
iterationFloor: state.iteration,
|
|
1163
|
+
expectedSignalType: 'flywheel_signal',
|
|
1164
|
+
allowedUsIds: [...usList, 'ALL'],
|
|
1165
|
+
});
|
|
1166
|
+
} catch (error) {
|
|
1167
|
+
return _handlePollFailure(error, {
|
|
1168
|
+
paths, state, slug, options,
|
|
1169
|
+
role: 'flywheel',
|
|
1170
|
+
});
|
|
1171
|
+
}
|
|
788
1172
|
|
|
789
1173
|
state.last_flywheel_decision = flywheelSignal.decision;
|
|
790
1174
|
// P0-A multi-mission orchestration: optionally captured from flywheel signal.
|
|
@@ -804,10 +1188,25 @@ export async function run(slug, options = {}) {
|
|
|
804
1188
|
|
|
805
1189
|
await dispatchGuard({ paths, sendKeys, guardPaneId, guardModel, rootDir });
|
|
806
1190
|
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
1191
|
+
let guardVerdict;
|
|
1192
|
+
try {
|
|
1193
|
+
guardVerdict = await pollForSignal(paths.flywheelGuardVerdictFile, {
|
|
1194
|
+
mode: 'claude',
|
|
1195
|
+
paneId: guardPaneId,
|
|
1196
|
+
timeoutMs: iterTimeoutMs,
|
|
1197
|
+
});
|
|
1198
|
+
validateArtifact(guardVerdict, {
|
|
1199
|
+
expectedSlug: slug,
|
|
1200
|
+
iterationFloor: state.iteration,
|
|
1201
|
+
expectedSignalType: 'flywheel_guard_verdict',
|
|
1202
|
+
allowedUsIds: [...usList, 'ALL'],
|
|
1203
|
+
});
|
|
1204
|
+
} catch (error) {
|
|
1205
|
+
return _handlePollFailure(error, {
|
|
1206
|
+
paths, state, slug, options,
|
|
1207
|
+
role: 'guard',
|
|
1208
|
+
});
|
|
1209
|
+
}
|
|
811
1210
|
|
|
812
1211
|
if (!state.flywheel_guard_count[state.current_us]) {
|
|
813
1212
|
state.flywheel_guard_count[state.current_us] = 0;
|
|
@@ -911,9 +1310,18 @@ export async function run(slug, options = {}) {
|
|
|
911
1310
|
signal = await pollForSignal(paths.signalFile, {
|
|
912
1311
|
mode: parseModelFlag(state.worker_model).engine,
|
|
913
1312
|
paneId: state.worker_pane_id,
|
|
1313
|
+
timeoutMs: iterTimeoutMs,
|
|
1314
|
+
});
|
|
1315
|
+
validateArtifact(signal, {
|
|
1316
|
+
expectedSlug: slug,
|
|
1317
|
+
iterationFloor: state.iteration,
|
|
1318
|
+
expectedSignalType: 'signal',
|
|
1319
|
+
allowedUsIds: [...usList, 'ALL'],
|
|
914
1320
|
});
|
|
915
1321
|
} catch (error) {
|
|
916
1322
|
if (error instanceof TimeoutError && parseModelFlag(state.worker_model).engine === 'codex') {
|
|
1323
|
+
// v5.7 — codex CLI exits cleanly after writing signal; if pollForSignal
|
|
1324
|
+
// timed out for codex, synthesize a verify signal so the loop continues.
|
|
917
1325
|
signal = {
|
|
918
1326
|
iteration: state.iteration,
|
|
919
1327
|
status: 'verify',
|
|
@@ -921,7 +1329,12 @@ export async function run(slug, options = {}) {
|
|
|
921
1329
|
summary: 'auto-generated after codex exit fallback',
|
|
922
1330
|
};
|
|
923
1331
|
} else {
|
|
924
|
-
|
|
1332
|
+
// v5.7 §4.25 — uniform handling for WorkerExitedError, PromptBlockedError,
|
|
1333
|
+
// MalformedArtifactError, TimeoutError, and unknown errors.
|
|
1334
|
+
return _handlePollFailure(error, {
|
|
1335
|
+
paths, state, slug, options,
|
|
1336
|
+
role: 'worker',
|
|
1337
|
+
});
|
|
925
1338
|
}
|
|
926
1339
|
}
|
|
927
1340
|
|
|
@@ -959,10 +1372,26 @@ export async function run(slug, options = {}) {
|
|
|
959
1372
|
verifierModel,
|
|
960
1373
|
});
|
|
961
1374
|
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
1375
|
+
let verdict;
|
|
1376
|
+
try {
|
|
1377
|
+
verdict = await pollForSignal(paths.verdictFile, {
|
|
1378
|
+
mode: parseModelFlag(verifierModel, 'verifier').engine,
|
|
1379
|
+
paneId: state.verifier_pane_id,
|
|
1380
|
+
timeoutMs: iterTimeoutMs,
|
|
1381
|
+
});
|
|
1382
|
+
validateArtifact(verdict, {
|
|
1383
|
+
expectedSlug: slug,
|
|
1384
|
+
iterationFloor: state.iteration,
|
|
1385
|
+
expectedSignalType: 'verdict',
|
|
1386
|
+
allowedUsIds: [...usList, 'ALL'],
|
|
1387
|
+
});
|
|
1388
|
+
} catch (error) {
|
|
1389
|
+
return _handlePollFailure(error, {
|
|
1390
|
+
paths, state, slug, options,
|
|
1391
|
+
role: 'verifier',
|
|
1392
|
+
usIdOverride: usId,
|
|
1393
|
+
});
|
|
1394
|
+
}
|
|
966
1395
|
|
|
967
1396
|
if (verdict.verdict === 'pass') {
|
|
968
1397
|
state.consecutive_failures = 0;
|