@chllming/wave-orchestration 0.8.5 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/CHANGELOG.md +46 -0
  2. package/README.md +14 -9
  3. package/docs/README.md +3 -1
  4. package/docs/context7/bundles.json +19 -20
  5. package/docs/context7/planner-agent/README.md +4 -1
  6. package/docs/guides/author-and-run-waves.md +4 -1
  7. package/docs/guides/planner.md +3 -1
  8. package/docs/guides/signal-wrappers.md +165 -0
  9. package/docs/guides/terminal-surfaces.md +15 -0
  10. package/docs/plans/context7-wave-orchestrator.md +24 -7
  11. package/docs/plans/current-state.md +7 -3
  12. package/docs/plans/end-state-architecture.md +16 -4
  13. package/docs/plans/examples/wave-example-design-handoff.md +1 -1
  14. package/docs/plans/examples/wave-example-live-proof.md +1 -1
  15. package/docs/plans/migration.md +179 -72
  16. package/docs/plans/wave-orchestrator.md +11 -5
  17. package/docs/reference/cli-reference.md +21 -4
  18. package/docs/reference/coordination-and-closure.md +26 -5
  19. package/docs/reference/live-proof-waves.md +9 -0
  20. package/docs/reference/npmjs-trusted-publishing.md +2 -2
  21. package/docs/reference/runtime-config/README.md +9 -3
  22. package/docs/reference/sample-waves.md +5 -5
  23. package/docs/reference/skills.md +9 -1
  24. package/docs/reference/wave-control.md +18 -0
  25. package/docs/reference/wave-planning-lessons.md +7 -1
  26. package/docs/research/coordination-failure-review.md +6 -6
  27. package/package.json +1 -1
  28. package/releases/manifest.json +38 -0
  29. package/scripts/context7-api-check.sh +57 -13
  30. package/scripts/wave-orchestrator/agent-state.mjs +42 -0
  31. package/scripts/wave-orchestrator/autonomous.mjs +42 -6
  32. package/scripts/wave-orchestrator/clarification-triage.mjs +4 -3
  33. package/scripts/wave-orchestrator/control-cli.mjs +145 -11
  34. package/scripts/wave-orchestrator/control-plane.mjs +12 -1
  35. package/scripts/wave-orchestrator/coordination-store.mjs +124 -4
  36. package/scripts/wave-orchestrator/coordination.mjs +35 -0
  37. package/scripts/wave-orchestrator/executors.mjs +11 -6
  38. package/scripts/wave-orchestrator/gate-engine.mjs +5 -5
  39. package/scripts/wave-orchestrator/install.mjs +2 -0
  40. package/scripts/wave-orchestrator/launcher-runtime.mjs +12 -1
  41. package/scripts/wave-orchestrator/launcher.mjs +236 -0
  42. package/scripts/wave-orchestrator/ledger.mjs +14 -12
  43. package/scripts/wave-orchestrator/reducer-snapshot.mjs +8 -6
  44. package/scripts/wave-orchestrator/retry-engine.mjs +19 -11
  45. package/scripts/wave-orchestrator/routing-state.mjs +50 -3
  46. package/scripts/wave-orchestrator/session-supervisor.mjs +119 -10
  47. package/scripts/wave-orchestrator/shared.mjs +1 -0
  48. package/scripts/wave-orchestrator/signals.mjs +681 -0
  49. package/scripts/wave-orchestrator/task-entity.mjs +4 -4
  50. package/scripts/wave-orchestrator/terminals.mjs +14 -14
  51. package/scripts/wave-orchestrator/wave-control-schema.mjs +2 -0
  52. package/scripts/wave-orchestrator/wave-files.mjs +15 -21
  53. package/scripts/wave-orchestrator/wave-state-reducer.mjs +72 -5
  54. package/scripts/wave-status.sh +200 -0
  55. package/scripts/wave-watch.sh +200 -0
  56. package/skills/README.md +3 -0
  57. package/skills/signal-hygiene/SKILL.md +51 -0
  58. package/skills/signal-hygiene/skill.json +20 -0
@@ -203,6 +203,8 @@ export function buildExecutionPrompt({
203
203
  sharedPlanDocs = null,
204
204
  designPacketPaths = null,
205
205
  designExecutionMode = null,
206
+ signalStatePath = null,
207
+ signalAckPath = null,
206
208
  contQaAgentId = "A0",
207
209
  contEvalAgentId = "E0",
208
210
  integrationAgentId = "A8",
@@ -213,6 +215,10 @@ export function buildExecutionPrompt({
213
215
  ? path.relative(REPO_ROOT, sharedSummaryPath)
214
216
  : null;
215
217
  const relativeInboxPath = inboxPath ? path.relative(REPO_ROOT, inboxPath) : null;
218
+ const relativeSignalStatePath = signalStatePath
219
+ ? path.relative(REPO_ROOT, signalStatePath)
220
+ : null;
221
+ const relativeSignalAckPath = signalAckPath ? path.relative(REPO_ROOT, signalAckPath) : null;
216
222
  const lanePlansDir = lane === DEFAULT_WAVE_LANE ? "docs/plans" : `docs/${lane}/plans`;
217
223
  const resolvedSharedPlanDocs =
218
224
  sharedPlanDocs && sharedPlanDocs.length > 0
@@ -531,6 +537,14 @@ export function buildExecutionPrompt({
531
537
  `Agent inbox repo-relative path: ${relativeInboxPath}`,
532
538
  ]
533
539
  : []),
540
+ ...(signalStatePath
541
+ ? [
542
+ `Signal state absolute path: ${signalStatePath}`,
543
+ `Signal state repo-relative path: ${relativeSignalStatePath}`,
544
+ `Signal ack absolute path: ${signalAckPath}`,
545
+ `Signal ack repo-relative path: ${relativeSignalAckPath}`,
546
+ ]
547
+ : []),
534
548
  "",
535
549
  ...(sharedSummaryText
536
550
  ? ["Current wave shared summary:", "```markdown", sharedSummaryText, "```", ""]
@@ -538,6 +552,17 @@ export function buildExecutionPrompt({
538
552
  ...(inboxText
539
553
  ? ["Current agent inbox:", "```markdown", inboxText, "```", ""]
540
554
  : []),
555
+ ...(signalStatePath
556
+ ? [
557
+ "Long-running signal loop:",
558
+ "- If you are operating as a resident or waiting agent, keep watching the signal state JSON instead of exiting early.",
559
+ "- When the signal `version` increases beyond the version recorded in the signal ack file, immediately write the ack file before acting.",
560
+ `- Write the ack file as JSON with exactly these keys: \`agentId\`, \`version\`, \`signal\`, and \`observedAt\`. Use \`${agent.agentId}\` as \`agentId\` and an ISO-8601 timestamp for \`observedAt\`.`,
561
+ "- After writing the ack, re-read the inbox, shared summary, and message board, then handle the new signal once.",
562
+ "- If the signal version has not changed, stay idle. Do not busy-loop or repeat unchanged work.",
563
+ "",
564
+ ]
565
+ : []),
541
566
  ...exitContractLines,
542
567
  ...promotedComponentLines,
543
568
  ...evalTargetLines,
@@ -564,6 +589,8 @@ export function buildResidentOrchestratorPrompt({
564
589
  sharedSummaryPath,
565
590
  dashboardPath,
566
591
  triagePath = null,
592
+ signalStatePath = null,
593
+ signalAckPath = null,
567
594
  rolePrompt = "",
568
595
  }) {
569
596
  const coordinationCommand = [
@@ -612,6 +639,8 @@ export function buildResidentOrchestratorPrompt({
612
639
  `- Wave dashboard: ${dashboardPath}`,
613
640
  `- Message board projection: ${messageBoardPath}`,
614
641
  ...(triagePath ? [`- Feedback triage log: ${triagePath}`] : []),
642
+ ...(signalStatePath ? [`- Signal state: ${signalStatePath}`] : []),
643
+ ...(signalAckPath ? [`- Signal ack: ${signalAckPath}`] : []),
615
644
  "",
616
645
  "Action surface:",
617
646
  `- Coordination command: \`${coordinationCommand}\``,
@@ -624,6 +653,12 @@ export function buildResidentOrchestratorPrompt({
624
653
  "2. Identify open clarifications, open clarification-linked requests, overdue acknowledgements, and human-feedback state.",
625
654
  "3. If action is needed, write a durable coordination update and explain the policy basis for the action.",
626
655
  "4. If nothing needs action, continue monitoring. Do not exit until the wave is clearly terminal or the launcher stops the session.",
656
+ ...(signalStatePath
657
+ ? [
658
+ "5. When the signal state `version` increases, immediately write the signal ack file before taking action so the launcher knows you observed the change.",
659
+ "6. After acknowledging the signal, re-read the shared summary, dashboard, coordination log, and triage artifacts before intervening.",
660
+ ]
661
+ : []),
627
662
  "",
628
663
  ...(roleSection
629
664
  ? [
@@ -305,6 +305,7 @@ function buildLaunchLimitsMetadata(agent) {
305
305
  const executor = agent?.executorResolved || {};
306
306
  const executorId = normalizeExecutorMode(executor.id || DEFAULT_EXECUTOR_MODE);
307
307
  const attemptTimeoutMinutes = executor?.budget?.minutes ?? null;
308
+ const advisoryTurnBudget = executor?.budget?.turns ?? null;
308
309
  if (executorId === "claude") {
309
310
  const source = executor?.claude?.maxTurnsSource || null;
310
311
  return {
@@ -312,9 +313,11 @@ function buildLaunchLimitsMetadata(agent) {
312
313
  knownTurnLimit: executor?.claude?.maxTurns ?? null,
313
314
  turnLimitSource: source,
314
315
  notes:
315
- source === "budget.turns"
316
- ? ["Known turn limit was derived from generic budget.turns."]
317
- : [],
316
+ source
317
+ ? []
318
+ : advisoryTurnBudget !== null
319
+ ? ["Generic budget.turns remained advisory; Wave emitted no Claude --max-turns flag."]
320
+ : [],
318
321
  };
319
322
  }
320
323
  if (executorId === "opencode") {
@@ -324,9 +327,11 @@ function buildLaunchLimitsMetadata(agent) {
324
327
  knownTurnLimit: executor?.opencode?.steps ?? null,
325
328
  turnLimitSource: source,
326
329
  notes:
327
- source === "budget.turns"
328
- ? ["Known turn limit was derived from generic budget.turns."]
329
- : [],
330
+ source
331
+ ? []
332
+ : advisoryTurnBudget !== null
333
+ ? ["Generic budget.turns remained advisory; Wave emitted no OpenCode --steps flag."]
334
+ : [],
330
335
  };
331
336
  }
332
337
  if (executorId === "codex") {
@@ -49,7 +49,7 @@ import {
49
49
  validateWaveComponentMatrixCurrentLevels,
50
50
  } from "./wave-files.mjs";
51
51
  import {
52
- isOpenCoordinationStatus,
52
+ coordinationRecordBlocksWave,
53
53
  openClarificationLinkedRequests,
54
54
  } from "./coordination-store.mjs";
55
55
  import { contradictionsBlockingGate } from "./contradiction-entity.mjs";
@@ -998,7 +998,7 @@ export function readWaveIntegrationBarrier(wave, agentRuns, derivedState, option
998
998
 
999
999
  export function readClarificationBarrier(derivedState) {
1000
1000
  const openClarifications = (derivedState?.coordinationState?.clarifications || []).filter(
1001
- (record) => isOpenCoordinationStatus(record.status),
1001
+ (record) => coordinationRecordBlocksWave(record),
1002
1002
  );
1003
1003
  if (openClarifications.length > 0) {
1004
1004
  return {
@@ -1009,7 +1009,7 @@ export function readClarificationBarrier(derivedState) {
1009
1009
  }
1010
1010
  const openClarificationRequests = openClarificationLinkedRequests(
1011
1011
  derivedState?.coordinationState,
1012
- );
1012
+ ).filter((record) => coordinationRecordBlocksWave(record));
1013
1013
  if (openClarificationRequests.length > 0) {
1014
1014
  return {
1015
1015
  ok: false,
@@ -1019,10 +1019,10 @@ export function readClarificationBarrier(derivedState) {
1019
1019
  }
1020
1020
  const pendingHuman = [
1021
1021
  ...((derivedState?.coordinationState?.humanEscalations || []).filter((record) =>
1022
- isOpenCoordinationStatus(record.status),
1022
+ coordinationRecordBlocksWave(record),
1023
1023
  )),
1024
1024
  ...((derivedState?.coordinationState?.humanFeedback || []).filter((record) =>
1025
- isOpenCoordinationStatus(record.status),
1025
+ coordinationRecordBlocksWave(record),
1026
1026
  )),
1027
1027
  ];
1028
1028
  if (pendingHuman.length > 0) {
@@ -27,6 +27,8 @@ export const CHANGELOG_MANIFEST_PATH = path.join(PACKAGE_ROOT, "releases", "mani
27
27
  export const WORKSPACE_PACKAGE_JSON_PATH = path.join(REPO_ROOT, "package.json");
28
28
  export const STARTER_TEMPLATE_PATHS = [
29
29
  "wave.config.json",
30
+ "scripts/wave-status.sh",
31
+ "scripts/wave-watch.sh",
30
32
  "docs/README.md",
31
33
  "docs/agents/wave-documentation-role.md",
32
34
  "docs/agents/wave-design-role.md",
@@ -20,6 +20,11 @@ import {
20
20
  summarizeResolvedSkills,
21
21
  writeResolvedSkillArtifacts,
22
22
  } from "./skills.mjs";
23
+ import {
24
+ agentSignalAckPath,
25
+ agentSignalPath,
26
+ agentUsesSignalHygiene,
27
+ } from "./signals.mjs";
23
28
 
24
29
  export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths) {
25
30
  runInfo.agent.skillsResolved = resolveAgentSkills(
@@ -129,6 +134,12 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
129
134
  .map((waveAgent) => resolveDesignReportPath(waveAgent))
130
135
  .filter(Boolean),
131
136
  designExecutionMode,
137
+ signalStatePath: agentUsesSignalHygiene(agent)
138
+ ? agentSignalPath(lanePaths, wave, agent.agentId)
139
+ : null,
140
+ signalAckPath: agentUsesSignalHygiene(agent)
141
+ ? agentSignalAckPath(lanePaths, wave, agent.agentId)
142
+ : null,
132
143
  });
133
144
  const promptHash = hashAgentPromptFingerprint(agent);
134
145
  fs.writeFileSync(promptPath, `${prompt}\n`, "utf8");
@@ -224,7 +235,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
224
235
  "const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
225
236
  )} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
226
237
  `echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
227
- "exec bash -l",
238
+ "exit \"$status\"",
228
239
  ].join("\n");
229
240
 
230
241
  runTmuxFn(
@@ -16,6 +16,7 @@ import {
16
16
  readWaveHumanFeedbackRequests,
17
17
  } from "./coordination.mjs";
18
18
  import {
19
+ appendCoordinationRecord,
19
20
  buildCoordinationResponseMetrics,
20
21
  } from "./coordination-store.mjs";
21
22
  import {
@@ -122,6 +123,7 @@ import {
122
123
  import {
123
124
  clearWaveRetryOverride,
124
125
  readWaveRetryOverride,
126
+ writeWaveRetryOverride,
125
127
  } from "./retry-control.mjs";
126
128
  import { appendWaveControlEvent, readControlPlaneEvents } from "./control-plane.mjs";
127
129
  import { materializeContradictionsFromControlPlaneEvents } from "./contradiction-entity.mjs";
@@ -209,7 +211,9 @@ import {
209
211
  recordAttemptState,
210
212
  recordWaveRunState,
211
213
  runTmux,
214
+ syncLiveWaveSignals,
212
215
  } from "./session-supervisor.mjs";
216
+ import { buildControlStatusPayload } from "./control-cli.mjs";
213
217
  import {
214
218
  planInitialWaveAttempt,
215
219
  planRetryWaveAttempt,
@@ -576,6 +580,100 @@ function buildFailureFromGate(gateName, gate, fallbackLogPath) {
576
580
  };
577
581
  }
578
582
 
583
+ function normalizeFailureStatusCode(value) {
584
+ return String(value || "").trim().toLowerCase();
585
+ }
586
+
587
+ function recoverableFailureReason(failure, summary = null) {
588
+ const statusCode = normalizeFailureStatusCode(failure?.statusCode);
589
+ if (["timeout-no-status", "timed_out", "missing-status"].includes(statusCode)) {
590
+ return statusCode;
591
+ }
592
+ const terminationReason = String(summary?.terminationReason || "").trim().toLowerCase();
593
+ if (["timeout", "max-turns", "session-missing"].includes(terminationReason)) {
594
+ return terminationReason;
595
+ }
596
+ const detailText = `${failure?.detail || ""} ${summary?.terminationHint || ""}`.toLowerCase();
597
+ if (detailText.includes("rate limit") || detailText.includes("429 too many requests")) {
598
+ return "rate-limit";
599
+ }
600
+ return null;
601
+ }
602
+
603
+ function annotateFailuresWithRecoveryHints(failures, agentRuns) {
604
+ const runsByAgentId = new Map((agentRuns || []).map((run) => [run.agent.agentId, run]));
605
+ return (failures || []).map((failure) => {
606
+ const run = failure?.agentId ? runsByAgentId.get(failure.agentId) : null;
607
+ const summary = run
608
+ ? readAgentExecutionSummary(run.statusPath, {
609
+ agent: run.agent,
610
+ statusPath: run.statusPath,
611
+ statusRecord: readStatusRecordIfPresent(run.statusPath),
612
+ logPath: fs.existsSync(run.logPath) ? run.logPath : null,
613
+ })
614
+ : null;
615
+ const recoveryReason = recoverableFailureReason(failure, summary);
616
+ return {
617
+ ...failure,
618
+ detail: failure?.detail || summary?.terminationHint || null,
619
+ terminationReason: summary?.terminationReason || null,
620
+ terminationHint: summary?.terminationHint || null,
621
+ observedTurnLimit:
622
+ Number.isFinite(Number(summary?.terminationObservedTurnLimit))
623
+ ? Number(summary.terminationObservedTurnLimit)
624
+ : null,
625
+ recoverable: Boolean(recoveryReason),
626
+ recoveryReason,
627
+ };
628
+ });
629
+ }
630
+
631
+ function failuresAreRecoverable(failures) {
632
+ return Array.isArray(failures) && failures.length > 0 && failures.every((failure) => failure?.recoverable);
633
+ }
634
+
635
+ function appendRepairCoordinationRequests({
636
+ coordinationLogPath,
637
+ lanePaths,
638
+ wave,
639
+ attempt,
640
+ runs,
641
+ failures,
642
+ }) {
643
+ const selectedRuns = Array.isArray(runs) ? runs : [];
644
+ const failureByAgentId = new Map(
645
+ (failures || [])
646
+ .filter((failure) => failure?.agentId)
647
+ .map((failure) => [failure.agentId, failure]),
648
+ );
649
+ for (const run of selectedRuns) {
650
+ const agentId = run?.agent?.agentId;
651
+ if (!agentId) {
652
+ continue;
653
+ }
654
+ const failure = failureByAgentId.get(agentId) || null;
655
+ appendCoordinationRecord(coordinationLogPath, {
656
+ id: `repair-wave-${wave.wave}-attempt-${attempt}-${agentId}`,
657
+ lane: lanePaths.lane,
658
+ wave: wave.wave,
659
+ agentId: "launcher",
660
+ kind: "request",
661
+ targets: [`agent:${agentId}`],
662
+ priority: "normal",
663
+ summary: failure
664
+ ? `Repair ${agentId}: ${failure.recoveryReason || failure.statusCode}`
665
+ : `Repair ${agentId}: targeted follow-up`,
666
+ detail: failure
667
+ ? `Targeted recovery for ${agentId} after ${failure.recoveryReason || failure.statusCode}. ${failure.detail || "Resume the bounded follow-up work and preserve reusable proof from other agents."}`
668
+ : `Targeted recovery for ${agentId}. Resume the bounded follow-up work and preserve reusable proof from other agents.`,
669
+ status: "open",
670
+ source: "launcher",
671
+ blocking: false,
672
+ blockerSeverity: "soft",
673
+ });
674
+ }
675
+ }
676
+
579
677
  // --- Main entry point ---
580
678
 
581
679
  export async function runLauncherCli(argv) {
@@ -624,6 +722,7 @@ export async function runLauncherCli(argv) {
624
722
  ensureDirectory(lanePaths.controlDir);
625
723
  ensureDirectory(lanePaths.assignmentsDir);
626
724
  ensureDirectory(lanePaths.inboxesDir);
725
+ ensureDirectory(lanePaths.signalsDir);
627
726
  ensureDirectory(lanePaths.ledgerDir);
628
727
  ensureDirectory(lanePaths.integrationDir);
629
728
  ensureDirectory(lanePaths.proofDir);
@@ -1188,6 +1287,19 @@ export async function runLauncherCli(argv) {
1188
1287
  flushDashboards();
1189
1288
  return true;
1190
1289
  };
1290
+ const syncWaveSignals = () =>
1291
+ syncLiveWaveSignals({
1292
+ lanePaths,
1293
+ wave,
1294
+ statusPayload: buildControlStatusPayload({
1295
+ lanePaths,
1296
+ wave,
1297
+ }),
1298
+ agentRuns,
1299
+ residentEnabled: Boolean(residentOrchestratorRun),
1300
+ recordCombinedEvent,
1301
+ appendCoordination,
1302
+ });
1191
1303
 
1192
1304
  const proofRegistryForReuse = readWaveProofRegistry(lanePaths, wave.wave);
1193
1305
  const initialAttemptPlan = planInitialWaveAttempt({
@@ -1217,6 +1329,7 @@ export async function runLauncherCli(argv) {
1217
1329
  }
1218
1330
  flushDashboards();
1219
1331
  emitCoordinationAlertEvents(derivedState);
1332
+ syncWaveSignals();
1220
1333
 
1221
1334
  if (options.dashboard && currentWaveDashboardTerminalEntry) {
1222
1335
  launchWaveDashboardSession(lanePaths, {
@@ -1281,6 +1394,7 @@ export async function runLauncherCli(argv) {
1281
1394
  details: `session=${residentOrchestratorRun.sessionName}; executor=${residentOrchestratorRun.lastExecutorId || "unknown"}`,
1282
1395
  actionRequested: "None",
1283
1396
  });
1397
+ syncWaveSignals();
1284
1398
  }
1285
1399
  }
1286
1400
 
@@ -1487,6 +1601,7 @@ export async function runLauncherCli(argv) {
1487
1601
  updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
1488
1602
  flushDashboards();
1489
1603
  }
1604
+ syncWaveSignals();
1490
1605
  },
1491
1606
  {
1492
1607
  controlPlane: {
@@ -1500,7 +1615,9 @@ export async function runLauncherCli(argv) {
1500
1615
  }
1501
1616
 
1502
1617
  materializeAgentExecutionSummaries(wave, agentRuns);
1618
+ failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
1503
1619
  refreshDerivedState(attempt);
1620
+ syncWaveSignals();
1504
1621
  lastLiveCoordinationRefreshAt = Date.now();
1505
1622
  emitCoordinationAlertEvents(derivedState);
1506
1623
  failures = reconcileFailuresAgainstSharedComponentState(wave, agentRuns, failures);
@@ -1686,6 +1803,7 @@ export async function runLauncherCli(argv) {
1686
1803
  failures = closureResult.failures;
1687
1804
  timedOut = timedOut || closureResult.timedOut;
1688
1805
  materializeAgentExecutionSummaries(wave, agentRuns);
1806
+ failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
1689
1807
  refreshDerivedState(attempt);
1690
1808
  }
1691
1809
  } else {
@@ -1876,6 +1994,14 @@ export async function runLauncherCli(argv) {
1876
1994
  detail: "Queued for shared component closure",
1877
1995
  });
1878
1996
  }
1997
+ appendRepairCoordinationRequests({
1998
+ coordinationLogPath: derivedState.coordinationLogPath,
1999
+ lanePaths,
2000
+ wave,
2001
+ attempt,
2002
+ runs: runsToLaunch,
2003
+ failures,
2004
+ });
1879
2005
  writeWaveRelaunchProjection({
1880
2006
  lanePaths,
1881
2007
  wave,
@@ -1911,6 +2037,106 @@ export async function runLauncherCli(argv) {
1911
2037
  }
1912
2038
 
1913
2039
  if (attempt >= options.maxRetriesPerWave + 1) {
2040
+ const reducerDecision =
2041
+ latestReducerSnapshot || refreshReducerSnapshot(attempt);
2042
+ const recoveryPlan = planRetryWaveAttempt({
2043
+ agentRuns,
2044
+ failures,
2045
+ derivedState,
2046
+ lanePaths,
2047
+ wave,
2048
+ retryOverride: readWaveRetryOverride(lanePaths, wave.wave),
2049
+ waveState: reducerDecision?.reducerState || null,
2050
+ });
2051
+ const recoverySelectedAgentIds = Array.from(
2052
+ new Set([
2053
+ ...((recoveryPlan.selectedRuns || []).map((run) => run.agent.agentId)),
2054
+ ...((reducerDecision?.resumePlan?.invalidatedAgentIds || []).filter(Boolean)),
2055
+ ...((failures || []).map((failure) => failure.agentId).filter(Boolean)),
2056
+ ]),
2057
+ );
2058
+ if (failuresAreRecoverable(failures) && recoverySelectedAgentIds.length > 0) {
2059
+ const resumeCursor =
2060
+ reducerDecision?.resumePlan?.resumeFromPhase &&
2061
+ reducerDecision.resumePlan.resumeFromPhase !== "completed"
2062
+ ? reducerDecision.resumePlan.resumeFromPhase
2063
+ : null;
2064
+ const queuedRecovery = writeWaveRetryOverride(lanePaths, wave.wave, {
2065
+ lane: lanePaths.lane,
2066
+ wave: wave.wave,
2067
+ selectedAgentIds: recoverySelectedAgentIds,
2068
+ resumeCursor,
2069
+ clearReusableAgentIds: Array.from(
2070
+ new Set((failures || []).map((failure) => failure.agentId).filter(Boolean)),
2071
+ ),
2072
+ preserveReusableAgentIds: reducerDecision?.resumePlan?.reusableAgentIds || [],
2073
+ reuseProofBundleIds: reducerDecision?.resumePlan?.reusableProofBundleIds || [],
2074
+ requestedBy: "launcher-recovery",
2075
+ reason: `Auto recovery queued after recoverable execution issue(s): ${(failures || []).map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`).join(", ")}.`,
2076
+ applyOnce: true,
2077
+ });
2078
+ appendRepairCoordinationRequests({
2079
+ coordinationLogPath: derivedState.coordinationLogPath,
2080
+ lanePaths,
2081
+ wave,
2082
+ attempt: attempt + 1,
2083
+ runs: agentRuns.filter((run) => recoverySelectedAgentIds.includes(run.agent.agentId)),
2084
+ failures,
2085
+ });
2086
+ if (recoveryPlan.selectedRuns.length > 0) {
2087
+ writeWaveRelaunchProjection({
2088
+ lanePaths,
2089
+ wave,
2090
+ attempt: attempt + 1,
2091
+ runs: recoveryPlan.selectedRuns,
2092
+ failures,
2093
+ derivedState,
2094
+ });
2095
+ }
2096
+ recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
2097
+ selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
2098
+ detail: failures
2099
+ .map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`)
2100
+ .join(", "),
2101
+ });
2102
+ recordWaveRunState(lanePaths, wave.wave, "blocked", {
2103
+ attempts: attempt,
2104
+ traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
2105
+ gateSnapshot: completionGateSnapshot,
2106
+ recoverable: true,
2107
+ rerunRequestId: queuedRecovery.requestId,
2108
+ failures: failures.map((failure) => ({
2109
+ agentId: failure.agentId || null,
2110
+ statusCode: failure.statusCode,
2111
+ recoveryReason: failure.recoveryReason || null,
2112
+ detail: failure.detail || null,
2113
+ })),
2114
+ });
2115
+ dashboardState.status = "blocked";
2116
+ for (const failure of failures) {
2117
+ setWaveDashboardAgent(dashboardState, failure.agentId, {
2118
+ state: "blocked",
2119
+ detail:
2120
+ failure.detail ||
2121
+ `Recoverable ${failure.recoveryReason || failure.statusCode}; targeted resume queued.`,
2122
+ });
2123
+ }
2124
+ flushDashboards();
2125
+ appendCoordination({
2126
+ event: "wave_recovery_queued",
2127
+ waves: [wave.wave],
2128
+ status: "blocked",
2129
+ details: `attempt=${attempt}/${options.maxRetriesPerWave + 1}; request=${queuedRecovery.requestId}; agents=${recoverySelectedAgentIds.join(",")}; reason=${(failures || []).map((failure) => failure.recoveryReason || failure.statusCode).join(",")}`,
2130
+ actionRequested:
2131
+ `Lane ${lanePaths.lane} owners should resume the queued targeted recovery or let autonomous relaunch the selected agents.`,
2132
+ });
2133
+ await flushWaveControlTelemetry();
2134
+ const error = new Error(
2135
+ `Wave ${wave.wave} queued targeted recovery request ${queuedRecovery.requestId} after recoverable execution failures.`,
2136
+ );
2137
+ error.exitCode = 43;
2138
+ throw error;
2139
+ }
1914
2140
  recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
1915
2141
  selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
1916
2142
  detail: failures
@@ -2055,6 +2281,16 @@ export async function runLauncherCli(argv) {
2055
2281
  detail: "Queued for retry",
2056
2282
  });
2057
2283
  }
2284
+ if (retryPlan.source !== "override") {
2285
+ appendRepairCoordinationRequests({
2286
+ coordinationLogPath: derivedState.coordinationLogPath,
2287
+ lanePaths,
2288
+ wave,
2289
+ attempt: attempt + 1,
2290
+ runs: runsToLaunch,
2291
+ failures,
2292
+ });
2293
+ }
2058
2294
  writeWaveRelaunchProjection({
2059
2295
  lanePaths,
2060
2296
  wave,
@@ -18,7 +18,11 @@ import {
18
18
  isImplementationOwningDesignAgent,
19
19
  isSecurityReviewAgent,
20
20
  } from "./role-helpers.mjs";
21
- import { openClarificationLinkedRequests } from "./coordination-store.mjs";
21
+ import {
22
+ coordinationRecordBlocksWave,
23
+ coordinationRecordIsHardBlocker,
24
+ openClarificationLinkedRequests,
25
+ } from "./coordination-store.mjs";
22
26
  import { buildHelperTasks } from "./routing-state.mjs";
23
27
  import { readJsonOrNull, toIsoTimestamp, writeJsonAtomic } from "./shared.mjs";
24
28
 
@@ -35,16 +39,12 @@ function taskStateFromValidation(validation) {
35
39
 
36
40
  function openHighPriorityBlockers(state) {
37
41
  return (state?.blockers || []).filter(
38
- (record) =>
39
- ["open", "acknowledged", "in_progress"].includes(record.status) &&
40
- ["high", "urgent"].includes(record.priority),
42
+ (record) => coordinationRecordIsHardBlocker(record),
41
43
  );
42
44
  }
43
45
 
44
46
  function openClarifications(state) {
45
- return (state?.clarifications || []).filter((record) =>
46
- ["open", "acknowledged", "in_progress"].includes(record.status),
47
- );
47
+ return (state?.clarifications || []).filter((record) => coordinationRecordBlocksWave(record));
48
48
  }
49
49
 
50
50
  export function buildSeedWaveLedger({
@@ -161,7 +161,7 @@ function derivePhase({
161
161
  }
162
162
  if (
163
163
  openClarifications(state).length > 0 ||
164
- openClarificationLinkedRequests(state).length > 0
164
+ openClarificationLinkedRequests(state).filter((record) => coordinationRecordBlocksWave(record)).length > 0
165
165
  ) {
166
166
  return "clarifying";
167
167
  }
@@ -407,7 +407,9 @@ export function deriveWaveLedger({
407
407
  tasks,
408
408
  blockers: (coordinationState?.blockers || []).map((record) => record.id),
409
409
  openClarifications: openClarifications(coordinationState).map((record) => record.id),
410
- clarificationLinkedRequests: openClarificationLinkedRequests(coordinationState).map(
410
+ clarificationLinkedRequests: openClarificationLinkedRequests(coordinationState)
411
+ .filter((record) => coordinationRecordBlocksWave(record))
412
+ .map(
411
413
  (record) => record.id,
412
414
  ),
413
415
  openRequests: (coordinationState?.requests || [])
@@ -437,14 +439,14 @@ export function deriveWaveLedger({
437
439
  : null,
438
440
  humanFeedback: [
439
441
  ...(coordinationState?.humanFeedback || [])
440
- .filter((record) => ["open", "acknowledged", "in_progress"].includes(record.status))
442
+ .filter((record) => coordinationRecordBlocksWave(record))
441
443
  .map((record) => record.id),
442
444
  ...(coordinationState?.humanEscalations || [])
443
- .filter((record) => ["open", "acknowledged", "in_progress"].includes(record.status))
445
+ .filter((record) => coordinationRecordBlocksWave(record))
444
446
  .map((record) => record.id),
445
447
  ],
446
448
  humanEscalations: (coordinationState?.humanEscalations || [])
447
- .filter((record) => ["open", "acknowledged", "in_progress"].includes(record.status))
449
+ .filter((record) => coordinationRecordBlocksWave(record))
448
450
  .map((record) => record.id),
449
451
  contEvalState: contEvalValidation.ok ? "pass" : "open",
450
452
  securityState: securityValidation.ok ? securityValidation.statusCode || "pass" : "open",
@@ -1,7 +1,7 @@
1
1
  import fs from "node:fs";
2
2
  import path from "node:path";
3
3
  import {
4
- isOpenCoordinationStatus,
4
+ coordinationRecordBlocksWave,
5
5
  openClarificationLinkedRequests,
6
6
  readMaterializedCoordinationState,
7
7
  } from "./coordination-store.mjs";
@@ -66,17 +66,19 @@ function compatibilityBlockerIds(derivedState) {
66
66
  const coordinationState = derivedState?.coordinationState || {};
67
67
  return normalizeShadowIdList([
68
68
  ...(coordinationState.blockers || [])
69
- .filter((record) => isOpenCoordinationStatus(record.status))
69
+ .filter((record) => coordinationRecordBlocksWave(record))
70
70
  .map((record) => record.id),
71
71
  ...(coordinationState.clarifications || [])
72
- .filter((record) => isOpenCoordinationStatus(record.status))
72
+ .filter((record) => coordinationRecordBlocksWave(record))
73
+ .map((record) => record.id),
74
+ ...openClarificationLinkedRequests(coordinationState)
75
+ .filter((record) => coordinationRecordBlocksWave(record))
73
76
  .map((record) => record.id),
74
- ...openClarificationLinkedRequests(coordinationState).map((record) => record.id),
75
77
  ...(coordinationState.humanFeedback || [])
76
- .filter((record) => isOpenCoordinationStatus(record.status))
78
+ .filter((record) => coordinationRecordBlocksWave(record))
77
79
  .map((record) => record.id),
78
80
  ...(coordinationState.humanEscalations || [])
79
- .filter((record) => isOpenCoordinationStatus(record.status))
81
+ .filter((record) => coordinationRecordBlocksWave(record))
80
82
  .map((record) => record.id),
81
83
  ...((derivedState?.capabilityAssignments || [])
82
84
  .filter((assignment) => assignment.blocking)