@chllming/wave-orchestration 0.8.6 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CHANGELOG.md +38 -0
  2. package/README.md +5 -5
  3. package/docs/README.md +3 -1
  4. package/docs/guides/author-and-run-waves.md +1 -1
  5. package/docs/guides/planner.md +1 -1
  6. package/docs/guides/recommendations-0.8.8.md +133 -0
  7. package/docs/guides/terminal-surfaces.md +2 -0
  8. package/docs/plans/current-state.md +2 -1
  9. package/docs/plans/end-state-architecture.md +1 -1
  10. package/docs/plans/examples/wave-example-design-handoff.md +1 -1
  11. package/docs/plans/examples/wave-example-live-proof.md +1 -1
  12. package/docs/plans/migration.md +25 -8
  13. package/docs/plans/wave-orchestrator.md +8 -5
  14. package/docs/reference/cli-reference.md +11 -3
  15. package/docs/reference/coordination-and-closure.md +28 -5
  16. package/docs/reference/live-proof-waves.md +9 -0
  17. package/docs/reference/npmjs-trusted-publishing.md +2 -2
  18. package/docs/reference/runtime-config/README.md +10 -3
  19. package/docs/reference/sample-waves.md +5 -5
  20. package/docs/reference/skills.md +1 -1
  21. package/docs/reference/wave-control.md +16 -0
  22. package/docs/reference/wave-planning-lessons.md +7 -1
  23. package/docs/research/coordination-failure-review.md +6 -6
  24. package/package.json +1 -1
  25. package/releases/manifest.json +36 -0
  26. package/scripts/wave-orchestrator/agent-state.mjs +42 -0
  27. package/scripts/wave-orchestrator/autonomous.mjs +42 -6
  28. package/scripts/wave-orchestrator/clarification-triage.mjs +4 -3
  29. package/scripts/wave-orchestrator/control-cli.mjs +126 -11
  30. package/scripts/wave-orchestrator/control-plane.mjs +12 -1
  31. package/scripts/wave-orchestrator/coordination-store.mjs +124 -4
  32. package/scripts/wave-orchestrator/executors.mjs +11 -6
  33. package/scripts/wave-orchestrator/gate-engine.mjs +5 -5
  34. package/scripts/wave-orchestrator/launcher-runtime.mjs +1 -1
  35. package/scripts/wave-orchestrator/launcher.mjs +216 -0
  36. package/scripts/wave-orchestrator/ledger.mjs +14 -12
  37. package/scripts/wave-orchestrator/reducer-snapshot.mjs +8 -6
  38. package/scripts/wave-orchestrator/retry-engine.mjs +19 -11
  39. package/scripts/wave-orchestrator/routing-state.mjs +50 -3
  40. package/scripts/wave-orchestrator/session-supervisor.mjs +6 -10
  41. package/scripts/wave-orchestrator/task-entity.mjs +4 -4
  42. package/scripts/wave-orchestrator/terminals.mjs +14 -14
  43. package/scripts/wave-orchestrator/wave-files.mjs +15 -21
  44. package/scripts/wave-orchestrator/wave-state-reducer.mjs +72 -5
@@ -11,6 +11,8 @@ import {
11
11
  import {
12
12
  CLARIFICATION_CLOSURE_PREFIX,
13
13
  buildCoordinationResponseMetrics,
14
+ coordinationBlockerSeverity,
15
+ coordinationRecordBlocksWave,
14
16
  } from "./coordination-store.mjs";
15
17
  import {
16
18
  DEFAULT_COORDINATION_ACK_TIMEOUT_MS,
@@ -586,6 +588,8 @@ export function buildTaskSnapshots({
586
588
  const metrics = responseMetrics.recordMetricsById.get(record.id) || {};
587
589
  const feedbackRequest = feedbackById.get(record.id) || null;
588
590
  const taskState = taskStateForCoordinationRecord(record, feedbackRequest);
591
+ const blocking = coordinationRecordBlocksWave(record);
592
+ const blockerSeverity = coordinationBlockerSeverity(record);
589
593
  tasks.push({
590
594
  taskId: record.id,
591
595
  sourceRecordId: record.id,
@@ -598,6 +602,8 @@ export function buildTaskSnapshots({
598
602
  assigneeAgentId: firstTargetAgentId(record),
599
603
  leaseOwnerAgentId:
600
604
  ["acknowledged", "in_progress"].includes(record.status) ? firstTargetAgentId(record) : null,
605
+ blocking,
606
+ blockerSeverity,
601
607
  needsHuman:
602
608
  record.kind === "human-feedback" ||
603
609
  feedbackRequest?.status === "pending" ||
@@ -627,7 +633,7 @@ export function buildTaskSnapshots({
627
633
  ? feedbackRequest?.updatedAt || record.updatedAt || record.createdAt
628
634
  : null,
629
635
  overdueAck: metrics.overdueAck === true,
630
- stale: metrics.staleClarification === true,
636
+ stale: metrics.staleClarification === true || blockerSeverity === "stale",
631
637
  feedbackRequestId: feedbackRequest?.id || null,
632
638
  humanResponse: feedbackRequest?.responseText || null,
633
639
  humanOperator: feedbackRequest?.responseOperator || null,
@@ -648,6 +654,8 @@ export function buildTaskSnapshots({
648
654
  ownerAgentId: request.agentId || null,
649
655
  assigneeAgentId: request.agentId || null,
650
656
  leaseOwnerAgentId: null,
657
+ blocking: true,
658
+ blockerSeverity: "hard",
651
659
  needsHuman: request.status !== "answered",
652
660
  dependsOn: [],
653
661
  evidenceRefs: [],
@@ -676,6 +684,9 @@ export function buildTaskSnapshots({
676
684
  export function nextTaskDeadline(tasks) {
677
685
  const candidates = [];
678
686
  for (const task of tasks || []) {
687
+ if (task?.blocking === false) {
688
+ continue;
689
+ }
679
690
  for (const [kind, value] of [
680
691
  ["ack", task.ackDeadlineAt],
681
692
  ["resolve", task.resolveDeadlineAt],
@@ -43,7 +43,17 @@ export const COORDINATION_STATUS_VALUES = [
43
43
 
44
44
  export const COORDINATION_PRIORITY_VALUES = ["low", "normal", "high", "urgent"];
45
45
  export const COORDINATION_CONFIDENCE_VALUES = ["low", "medium", "high"];
46
+ export const COORDINATION_BLOCKER_SEVERITY_VALUES = [
47
+ "hard",
48
+ "soft",
49
+ "stale",
50
+ "advisory",
51
+ "proof-critical",
52
+ "closure-critical",
53
+ ];
46
54
  const OPEN_COORDINATION_STATUSES = new Set(["open", "acknowledged", "in_progress"]);
55
+ const NON_BLOCKING_BLOCKER_SEVERITIES = new Set(["stale", "advisory"]);
56
+ const HARD_BLOCKER_SEVERITIES = new Set(["hard", "proof-critical", "closure-critical"]);
47
57
  export const CLARIFICATION_CLOSURE_PREFIX = "clarification:";
48
58
 
49
59
  function normalizeString(value, fallback = "") {
@@ -65,6 +75,23 @@ function normalizeStringArray(values) {
65
75
  );
66
76
  }
67
77
 
78
+ function normalizeOptionalBoolean(value, fallback = null) {
79
+ if (value === undefined || value === null || value === "") {
80
+ return fallback;
81
+ }
82
+ if (typeof value === "boolean") {
83
+ return value;
84
+ }
85
+ const normalized = String(value).trim().toLowerCase();
86
+ if (["1", "true", "yes", "y", "on"].includes(normalized)) {
87
+ return true;
88
+ }
89
+ if (["0", "false", "no", "n", "off"].includes(normalized)) {
90
+ return false;
91
+ }
92
+ return Boolean(value);
93
+ }
94
+
68
95
  function validateEnum(value, allowed, label) {
69
96
  if (!allowed.includes(value)) {
70
97
  throw new Error(`${label} must be one of ${allowed.join(", ")} (got: ${value || "empty"})`);
@@ -75,6 +102,74 @@ function stableId(prefix) {
75
102
  return `${prefix}-${crypto.randomBytes(4).toString("hex")}`;
76
103
  }
77
104
 
105
+ function defaultBlockingForKind(kind) {
106
+ return ["request", "blocker", "clarification-request", "human-escalation", "human-feedback"].includes(
107
+ kind,
108
+ );
109
+ }
110
+
111
+ function defaultSeverityForRecord(kind, priority, blocking) {
112
+ if (blocking === false) {
113
+ return "advisory";
114
+ }
115
+ if (kind === "human-escalation" || kind === "human-feedback") {
116
+ return "hard";
117
+ }
118
+ if (kind === "request" || kind === "clarification-request") {
119
+ return "closure-critical";
120
+ }
121
+ if (kind === "blocker") {
122
+ return ["high", "urgent"].includes(priority) ? "hard" : "soft";
123
+ }
124
+ return "advisory";
125
+ }
126
+
127
+ function normalizeBlockerSeverity(value, defaults = {}) {
128
+ const normalized = normalizeString(value ?? defaults.blockerSeverity, "").toLowerCase();
129
+ return normalized || null;
130
+ }
131
+
132
+ export function coordinationBlockerSeverity(record) {
133
+ if (!record || typeof record !== "object") {
134
+ return "advisory";
135
+ }
136
+ const blocking =
137
+ record.blocking === undefined || record.blocking === null
138
+ ? defaultBlockingForKind(String(record.kind || "").trim().toLowerCase())
139
+ : record.blocking !== false;
140
+ const explicitSeverity = normalizeBlockerSeverity(record.blockerSeverity);
141
+ const derivedSeverity =
142
+ explicitSeverity ||
143
+ defaultSeverityForRecord(
144
+ String(record.kind || "").trim().toLowerCase(),
145
+ String(record.priority || "normal").trim().toLowerCase(),
146
+ blocking,
147
+ );
148
+ if (COORDINATION_BLOCKER_SEVERITY_VALUES.includes(derivedSeverity)) {
149
+ return derivedSeverity;
150
+ }
151
+ return defaultSeverityForRecord(
152
+ String(record.kind || "").trim().toLowerCase(),
153
+ String(record.priority || "normal").trim().toLowerCase(),
154
+ blocking,
155
+ );
156
+ }
157
+
158
+ export function coordinationRecordBlocksWave(record) {
159
+ if (!record || !isOpenCoordinationStatus(record.status)) {
160
+ return false;
161
+ }
162
+ if (record.blocking === false) {
163
+ return false;
164
+ }
165
+ return !NON_BLOCKING_BLOCKER_SEVERITIES.has(coordinationBlockerSeverity(record));
166
+ }
167
+
168
+ export function coordinationRecordIsHardBlocker(record) {
169
+ return coordinationRecordBlocksWave(record) &&
170
+ HARD_BLOCKER_SEVERITIES.has(coordinationBlockerSeverity(record));
171
+ }
172
+
78
173
  export function normalizeCoordinationRecord(rawRecord, defaults = {}) {
79
174
  if (!rawRecord || typeof rawRecord !== "object" || Array.isArray(rawRecord)) {
80
175
  throw new Error("Coordination record must be an object");
@@ -93,12 +188,25 @@ export function normalizeCoordinationRecord(rawRecord, defaults = {}) {
93
188
  ).toLowerCase();
94
189
  const priority = normalizeString(rawRecord.priority || defaults.priority || "normal").toLowerCase();
95
190
  const confidence = normalizeString(rawRecord.confidence || defaults.confidence || "medium").toLowerCase();
191
+ const explicitBlocking = normalizeOptionalBoolean(
192
+ rawRecord.blocking,
193
+ normalizeOptionalBoolean(defaults.blocking, null),
194
+ );
195
+ const blocking = explicitBlocking ?? defaultBlockingForKind(kind);
196
+ const blockerSeverity =
197
+ normalizeBlockerSeverity(rawRecord.blockerSeverity, defaults) ||
198
+ defaultSeverityForRecord(kind, priority, blocking);
96
199
  const createdAt = normalizeString(rawRecord.createdAt || defaults.createdAt || now);
97
200
  const updatedAt = normalizeString(rawRecord.updatedAt || defaults.updatedAt || createdAt);
98
201
  validateEnum(kind, COORDINATION_KIND_VALUES, "Coordination kind");
99
202
  validateEnum(status, COORDINATION_STATUS_VALUES, "Coordination status");
100
203
  validateEnum(priority, COORDINATION_PRIORITY_VALUES, "Coordination priority");
101
204
  validateEnum(confidence, COORDINATION_CONFIDENCE_VALUES, "Coordination confidence");
205
+ validateEnum(
206
+ blockerSeverity,
207
+ COORDINATION_BLOCKER_SEVERITY_VALUES,
208
+ "Coordination blockerSeverity",
209
+ );
102
210
  if (!lane) {
103
211
  throw new Error("Coordination lane is required");
104
212
  }
@@ -118,6 +226,8 @@ export function normalizeCoordinationRecord(rawRecord, defaults = {}) {
118
226
  targets: normalizeStringArray(rawRecord.targets ?? defaults.targets),
119
227
  status,
120
228
  priority,
229
+ blocking,
230
+ blockerSeverity,
121
231
  artifactRefs: normalizeStringArray(rawRecord.artifactRefs ?? defaults.artifactRefs),
122
232
  dependsOn: normalizeStringArray(rawRecord.dependsOn ?? defaults.dependsOn),
123
233
  closureCondition: normalizeString(rawRecord.closureCondition ?? defaults.closureCondition, ""),
@@ -180,6 +290,8 @@ export function appendCoordinationRecord(filePath, rawRecord, defaults = {}) {
180
290
  kind: record.kind,
181
291
  status: record.status,
182
292
  priority: record.priority,
293
+ blocking: record.blocking !== false,
294
+ blockerSeverity: record.blockerSeverity,
183
295
  confidence: record.confidence,
184
296
  summary: record.summary,
185
297
  detail: record.detail,
@@ -416,15 +528,16 @@ export function buildCoordinationResponseMetrics(state, options = {}) {
416
528
  for (const record of state?.openRecords || []) {
417
529
  const startMs = parseRecordStartMs(record);
418
530
  const ageMs = Number.isFinite(startMs) ? Math.max(0, nowMs - startMs) : null;
419
- const ackTracked = isAckTrackedRecord(record);
531
+ const blocking = coordinationRecordBlocksWave(record);
532
+ const ackTracked = blocking && isAckTrackedRecord(record);
420
533
  const ackPending = ackTracked && record.status === "open";
421
534
  const clarificationLinked =
422
- record.kind === "clarification-request" || isClarificationLinkedRequest(record);
535
+ blocking && (record.kind === "clarification-request" || isClarificationLinkedRequest(record));
423
536
  const overdueAck = ackPending && Number.isFinite(ageMs) && ageMs >= ackTimeoutMs;
424
537
  const staleClarification =
425
538
  clarificationLinked && Number.isFinite(ageMs) && ageMs >= resolutionStaleMs;
426
539
 
427
- if (Number.isFinite(ageMs)) {
540
+ if (blocking && Number.isFinite(ageMs)) {
428
541
  oldestOpenCoordinationAgeMs =
429
542
  oldestOpenCoordinationAgeMs === null
430
543
  ? ageMs
@@ -454,6 +567,7 @@ export function buildCoordinationResponseMetrics(state, options = {}) {
454
567
  overdueAck,
455
568
  clarificationLinked,
456
569
  staleClarification,
570
+ blocking,
457
571
  });
458
572
  }
459
573
 
@@ -469,7 +583,7 @@ export function buildCoordinationResponseMetrics(state, options = {}) {
469
583
  a.localeCompare(b),
470
584
  ),
471
585
  openHumanEscalationCount: (state?.humanEscalations || []).filter((record) =>
472
- isOpenCoordinationStatus(record.status),
586
+ coordinationRecordBlocksWave(record),
473
587
  ).length,
474
588
  recordMetricsById,
475
589
  };
@@ -490,6 +604,12 @@ function renderOpenRecord(record, responseMetrics = null) {
490
604
  if (recordMetrics?.staleClarification) {
491
605
  tags.push("stale-clarification");
492
606
  }
607
+ if (record.blocking === false) {
608
+ tags.push("non-blocking");
609
+ }
610
+ if (record.blockerSeverity) {
611
+ tags.push(`severity=${record.blockerSeverity}`);
612
+ }
493
613
  const timing = tags.length > 0 ? ` [${tags.join(", ")}]` : "";
494
614
  return `- [${record.priority}] ${record.kind}/${record.status} ${record.agentId}${targets}${timing} id=${record.id}: ${compactSingleLine(record.summary || record.detail || "no summary", 160)}${artifacts}`;
495
615
  }
@@ -305,6 +305,7 @@ function buildLaunchLimitsMetadata(agent) {
305
305
  const executor = agent?.executorResolved || {};
306
306
  const executorId = normalizeExecutorMode(executor.id || DEFAULT_EXECUTOR_MODE);
307
307
  const attemptTimeoutMinutes = executor?.budget?.minutes ?? null;
308
+ const advisoryTurnBudget = executor?.budget?.turns ?? null;
308
309
  if (executorId === "claude") {
309
310
  const source = executor?.claude?.maxTurnsSource || null;
310
311
  return {
@@ -312,9 +313,11 @@ function buildLaunchLimitsMetadata(agent) {
312
313
  knownTurnLimit: executor?.claude?.maxTurns ?? null,
313
314
  turnLimitSource: source,
314
315
  notes:
315
- source === "budget.turns"
316
- ? ["Known turn limit was derived from generic budget.turns."]
317
- : [],
316
+ source
317
+ ? []
318
+ : advisoryTurnBudget !== null
319
+ ? ["Generic budget.turns remained advisory; Wave emitted no Claude --max-turns flag."]
320
+ : [],
318
321
  };
319
322
  }
320
323
  if (executorId === "opencode") {
@@ -324,9 +327,11 @@ function buildLaunchLimitsMetadata(agent) {
324
327
  knownTurnLimit: executor?.opencode?.steps ?? null,
325
328
  turnLimitSource: source,
326
329
  notes:
327
- source === "budget.turns"
328
- ? ["Known turn limit was derived from generic budget.turns."]
329
- : [],
330
+ source
331
+ ? []
332
+ : advisoryTurnBudget !== null
333
+ ? ["Generic budget.turns remained advisory; Wave emitted no OpenCode --steps flag."]
334
+ : [],
330
335
  };
331
336
  }
332
337
  if (executorId === "codex") {
@@ -49,7 +49,7 @@ import {
49
49
  validateWaveComponentMatrixCurrentLevels,
50
50
  } from "./wave-files.mjs";
51
51
  import {
52
- isOpenCoordinationStatus,
52
+ coordinationRecordBlocksWave,
53
53
  openClarificationLinkedRequests,
54
54
  } from "./coordination-store.mjs";
55
55
  import { contradictionsBlockingGate } from "./contradiction-entity.mjs";
@@ -998,7 +998,7 @@ export function readWaveIntegrationBarrier(wave, agentRuns, derivedState, option
998
998
 
999
999
  export function readClarificationBarrier(derivedState) {
1000
1000
  const openClarifications = (derivedState?.coordinationState?.clarifications || []).filter(
1001
- (record) => isOpenCoordinationStatus(record.status),
1001
+ (record) => coordinationRecordBlocksWave(record),
1002
1002
  );
1003
1003
  if (openClarifications.length > 0) {
1004
1004
  return {
@@ -1009,7 +1009,7 @@ export function readClarificationBarrier(derivedState) {
1009
1009
  }
1010
1010
  const openClarificationRequests = openClarificationLinkedRequests(
1011
1011
  derivedState?.coordinationState,
1012
- );
1012
+ ).filter((record) => coordinationRecordBlocksWave(record));
1013
1013
  if (openClarificationRequests.length > 0) {
1014
1014
  return {
1015
1015
  ok: false,
@@ -1019,10 +1019,10 @@ export function readClarificationBarrier(derivedState) {
1019
1019
  }
1020
1020
  const pendingHuman = [
1021
1021
  ...((derivedState?.coordinationState?.humanEscalations || []).filter((record) =>
1022
- isOpenCoordinationStatus(record.status),
1022
+ coordinationRecordBlocksWave(record),
1023
1023
  )),
1024
1024
  ...((derivedState?.coordinationState?.humanFeedback || []).filter((record) =>
1025
- isOpenCoordinationStatus(record.status),
1025
+ coordinationRecordBlocksWave(record),
1026
1026
  )),
1027
1027
  ];
1028
1028
  if (pendingHuman.length > 0) {
@@ -235,7 +235,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
235
235
  "const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
236
236
  )} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
237
237
  `echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
238
- "exec bash -l",
238
+ "exit \"$status\"",
239
239
  ].join("\n");
240
240
 
241
241
  runTmuxFn(
@@ -16,6 +16,7 @@ import {
16
16
  readWaveHumanFeedbackRequests,
17
17
  } from "./coordination.mjs";
18
18
  import {
19
+ appendCoordinationRecord,
19
20
  buildCoordinationResponseMetrics,
20
21
  } from "./coordination-store.mjs";
21
22
  import {
@@ -122,6 +123,7 @@ import {
122
123
  import {
123
124
  clearWaveRetryOverride,
124
125
  readWaveRetryOverride,
126
+ writeWaveRetryOverride,
125
127
  } from "./retry-control.mjs";
126
128
  import { appendWaveControlEvent, readControlPlaneEvents } from "./control-plane.mjs";
127
129
  import { materializeContradictionsFromControlPlaneEvents } from "./contradiction-entity.mjs";
@@ -578,6 +580,100 @@ function buildFailureFromGate(gateName, gate, fallbackLogPath) {
578
580
  };
579
581
  }
580
582
 
583
+ function normalizeFailureStatusCode(value) {
584
+ return String(value || "").trim().toLowerCase();
585
+ }
586
+
587
+ function recoverableFailureReason(failure, summary = null) {
588
+ const statusCode = normalizeFailureStatusCode(failure?.statusCode);
589
+ if (["timeout-no-status", "timed_out", "missing-status"].includes(statusCode)) {
590
+ return statusCode;
591
+ }
592
+ const terminationReason = String(summary?.terminationReason || "").trim().toLowerCase();
593
+ if (["timeout", "max-turns", "session-missing"].includes(terminationReason)) {
594
+ return terminationReason;
595
+ }
596
+ const detailText = `${failure?.detail || ""} ${summary?.terminationHint || ""}`.toLowerCase();
597
+ if (detailText.includes("rate limit") || detailText.includes("429 too many requests")) {
598
+ return "rate-limit";
599
+ }
600
+ return null;
601
+ }
602
+
603
+ function annotateFailuresWithRecoveryHints(failures, agentRuns) {
604
+ const runsByAgentId = new Map((agentRuns || []).map((run) => [run.agent.agentId, run]));
605
+ return (failures || []).map((failure) => {
606
+ const run = failure?.agentId ? runsByAgentId.get(failure.agentId) : null;
607
+ const summary = run
608
+ ? readAgentExecutionSummary(run.statusPath, {
609
+ agent: run.agent,
610
+ statusPath: run.statusPath,
611
+ statusRecord: readStatusRecordIfPresent(run.statusPath),
612
+ logPath: fs.existsSync(run.logPath) ? run.logPath : null,
613
+ })
614
+ : null;
615
+ const recoveryReason = recoverableFailureReason(failure, summary);
616
+ return {
617
+ ...failure,
618
+ detail: failure?.detail || summary?.terminationHint || null,
619
+ terminationReason: summary?.terminationReason || null,
620
+ terminationHint: summary?.terminationHint || null,
621
+ observedTurnLimit:
622
+ Number.isFinite(Number(summary?.terminationObservedTurnLimit))
623
+ ? Number(summary.terminationObservedTurnLimit)
624
+ : null,
625
+ recoverable: Boolean(recoveryReason),
626
+ recoveryReason,
627
+ };
628
+ });
629
+ }
630
+
631
+ function failuresAreRecoverable(failures) {
632
+ return Array.isArray(failures) && failures.length > 0 && failures.every((failure) => failure?.recoverable);
633
+ }
634
+
635
+ function appendRepairCoordinationRequests({
636
+ coordinationLogPath,
637
+ lanePaths,
638
+ wave,
639
+ attempt,
640
+ runs,
641
+ failures,
642
+ }) {
643
+ const selectedRuns = Array.isArray(runs) ? runs : [];
644
+ const failureByAgentId = new Map(
645
+ (failures || [])
646
+ .filter((failure) => failure?.agentId)
647
+ .map((failure) => [failure.agentId, failure]),
648
+ );
649
+ for (const run of selectedRuns) {
650
+ const agentId = run?.agent?.agentId;
651
+ if (!agentId) {
652
+ continue;
653
+ }
654
+ const failure = failureByAgentId.get(agentId) || null;
655
+ appendCoordinationRecord(coordinationLogPath, {
656
+ id: `repair-wave-${wave.wave}-attempt-${attempt}-${agentId}`,
657
+ lane: lanePaths.lane,
658
+ wave: wave.wave,
659
+ agentId: "launcher",
660
+ kind: "request",
661
+ targets: [`agent:${agentId}`],
662
+ priority: "normal",
663
+ summary: failure
664
+ ? `Repair ${agentId}: ${failure.recoveryReason || failure.statusCode}`
665
+ : `Repair ${agentId}: targeted follow-up`,
666
+ detail: failure
667
+ ? `Targeted recovery for ${agentId} after ${failure.recoveryReason || failure.statusCode}. ${failure.detail || "Resume the bounded follow-up work and preserve reusable proof from other agents."}`
668
+ : `Targeted recovery for ${agentId}. Resume the bounded follow-up work and preserve reusable proof from other agents.`,
669
+ status: "open",
670
+ source: "launcher",
671
+ blocking: false,
672
+ blockerSeverity: "soft",
673
+ });
674
+ }
675
+ }
676
+
581
677
  // --- Main entry point ---
582
678
 
583
679
  export async function runLauncherCli(argv) {
@@ -1519,6 +1615,7 @@ export async function runLauncherCli(argv) {
1519
1615
  }
1520
1616
 
1521
1617
  materializeAgentExecutionSummaries(wave, agentRuns);
1618
+ failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
1522
1619
  refreshDerivedState(attempt);
1523
1620
  syncWaveSignals();
1524
1621
  lastLiveCoordinationRefreshAt = Date.now();
@@ -1706,6 +1803,7 @@ export async function runLauncherCli(argv) {
1706
1803
  failures = closureResult.failures;
1707
1804
  timedOut = timedOut || closureResult.timedOut;
1708
1805
  materializeAgentExecutionSummaries(wave, agentRuns);
1806
+ failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
1709
1807
  refreshDerivedState(attempt);
1710
1808
  }
1711
1809
  } else {
@@ -1896,6 +1994,14 @@ export async function runLauncherCli(argv) {
1896
1994
  detail: "Queued for shared component closure",
1897
1995
  });
1898
1996
  }
1997
+ appendRepairCoordinationRequests({
1998
+ coordinationLogPath: derivedState.coordinationLogPath,
1999
+ lanePaths,
2000
+ wave,
2001
+ attempt,
2002
+ runs: runsToLaunch,
2003
+ failures,
2004
+ });
1899
2005
  writeWaveRelaunchProjection({
1900
2006
  lanePaths,
1901
2007
  wave,
@@ -1931,6 +2037,106 @@ export async function runLauncherCli(argv) {
1931
2037
  }
1932
2038
 
1933
2039
  if (attempt >= options.maxRetriesPerWave + 1) {
2040
+ const reducerDecision =
2041
+ latestReducerSnapshot || refreshReducerSnapshot(attempt);
2042
+ const recoveryPlan = planRetryWaveAttempt({
2043
+ agentRuns,
2044
+ failures,
2045
+ derivedState,
2046
+ lanePaths,
2047
+ wave,
2048
+ retryOverride: readWaveRetryOverride(lanePaths, wave.wave),
2049
+ waveState: reducerDecision?.reducerState || null,
2050
+ });
2051
+ const recoverySelectedAgentIds = Array.from(
2052
+ new Set([
2053
+ ...((recoveryPlan.selectedRuns || []).map((run) => run.agent.agentId)),
2054
+ ...((reducerDecision?.resumePlan?.invalidatedAgentIds || []).filter(Boolean)),
2055
+ ...((failures || []).map((failure) => failure.agentId).filter(Boolean)),
2056
+ ]),
2057
+ );
2058
+ if (failuresAreRecoverable(failures) && recoverySelectedAgentIds.length > 0) {
2059
+ const resumeCursor =
2060
+ reducerDecision?.resumePlan?.resumeFromPhase &&
2061
+ reducerDecision.resumePlan.resumeFromPhase !== "completed"
2062
+ ? reducerDecision.resumePlan.resumeFromPhase
2063
+ : null;
2064
+ const queuedRecovery = writeWaveRetryOverride(lanePaths, wave.wave, {
2065
+ lane: lanePaths.lane,
2066
+ wave: wave.wave,
2067
+ selectedAgentIds: recoverySelectedAgentIds,
2068
+ resumeCursor,
2069
+ clearReusableAgentIds: Array.from(
2070
+ new Set((failures || []).map((failure) => failure.agentId).filter(Boolean)),
2071
+ ),
2072
+ preserveReusableAgentIds: reducerDecision?.resumePlan?.reusableAgentIds || [],
2073
+ reuseProofBundleIds: reducerDecision?.resumePlan?.reusableProofBundleIds || [],
2074
+ requestedBy: "launcher-recovery",
2075
+ reason: `Auto recovery queued after recoverable execution issue(s): ${(failures || []).map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`).join(", ")}.`,
2076
+ applyOnce: true,
2077
+ });
2078
+ appendRepairCoordinationRequests({
2079
+ coordinationLogPath: derivedState.coordinationLogPath,
2080
+ lanePaths,
2081
+ wave,
2082
+ attempt: attempt + 1,
2083
+ runs: agentRuns.filter((run) => recoverySelectedAgentIds.includes(run.agent.agentId)),
2084
+ failures,
2085
+ });
2086
+ if (recoveryPlan.selectedRuns.length > 0) {
2087
+ writeWaveRelaunchProjection({
2088
+ lanePaths,
2089
+ wave,
2090
+ attempt: attempt + 1,
2091
+ runs: recoveryPlan.selectedRuns,
2092
+ failures,
2093
+ derivedState,
2094
+ });
2095
+ }
2096
+ recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
2097
+ selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
2098
+ detail: failures
2099
+ .map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`)
2100
+ .join(", "),
2101
+ });
2102
+ recordWaveRunState(lanePaths, wave.wave, "blocked", {
2103
+ attempts: attempt,
2104
+ traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
2105
+ gateSnapshot: completionGateSnapshot,
2106
+ recoverable: true,
2107
+ rerunRequestId: queuedRecovery.requestId,
2108
+ failures: failures.map((failure) => ({
2109
+ agentId: failure.agentId || null,
2110
+ statusCode: failure.statusCode,
2111
+ recoveryReason: failure.recoveryReason || null,
2112
+ detail: failure.detail || null,
2113
+ })),
2114
+ });
2115
+ dashboardState.status = "blocked";
2116
+ for (const failure of failures) {
2117
+ setWaveDashboardAgent(dashboardState, failure.agentId, {
2118
+ state: "blocked",
2119
+ detail:
2120
+ failure.detail ||
2121
+ `Recoverable ${failure.recoveryReason || failure.statusCode}; targeted resume queued.`,
2122
+ });
2123
+ }
2124
+ flushDashboards();
2125
+ appendCoordination({
2126
+ event: "wave_recovery_queued",
2127
+ waves: [wave.wave],
2128
+ status: "blocked",
2129
+ details: `attempt=${attempt}/${options.maxRetriesPerWave + 1}; request=${queuedRecovery.requestId}; agents=${recoverySelectedAgentIds.join(",")}; reason=${(failures || []).map((failure) => failure.recoveryReason || failure.statusCode).join(",")}`,
2130
+ actionRequested:
2131
+ `Lane ${lanePaths.lane} owners should resume the queued targeted recovery or let autonomous relaunch the selected agents.`,
2132
+ });
2133
+ await flushWaveControlTelemetry();
2134
+ const error = new Error(
2135
+ `Wave ${wave.wave} queued targeted recovery request ${queuedRecovery.requestId} after recoverable execution failures.`,
2136
+ );
2137
+ error.exitCode = 43;
2138
+ throw error;
2139
+ }
1934
2140
  recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
1935
2141
  selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
1936
2142
  detail: failures
@@ -2075,6 +2281,16 @@ export async function runLauncherCli(argv) {
2075
2281
  detail: "Queued for retry",
2076
2282
  });
2077
2283
  }
2284
+ if (retryPlan.source !== "override") {
2285
+ appendRepairCoordinationRequests({
2286
+ coordinationLogPath: derivedState.coordinationLogPath,
2287
+ lanePaths,
2288
+ wave,
2289
+ attempt: attempt + 1,
2290
+ runs: runsToLaunch,
2291
+ failures,
2292
+ });
2293
+ }
2078
2294
  writeWaveRelaunchProjection({
2079
2295
  lanePaths,
2080
2296
  wave,