@chllming/wave-orchestration 0.8.6 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +38 -0
- package/README.md +5 -5
- package/docs/README.md +3 -1
- package/docs/guides/author-and-run-waves.md +1 -1
- package/docs/guides/planner.md +1 -1
- package/docs/guides/recommendations-0.8.8.md +133 -0
- package/docs/guides/terminal-surfaces.md +2 -0
- package/docs/plans/current-state.md +2 -1
- package/docs/plans/end-state-architecture.md +1 -1
- package/docs/plans/examples/wave-example-design-handoff.md +1 -1
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/migration.md +25 -8
- package/docs/plans/wave-orchestrator.md +8 -5
- package/docs/reference/cli-reference.md +11 -3
- package/docs/reference/coordination-and-closure.md +28 -5
- package/docs/reference/live-proof-waves.md +9 -0
- package/docs/reference/npmjs-trusted-publishing.md +2 -2
- package/docs/reference/runtime-config/README.md +10 -3
- package/docs/reference/sample-waves.md +5 -5
- package/docs/reference/skills.md +1 -1
- package/docs/reference/wave-control.md +16 -0
- package/docs/reference/wave-planning-lessons.md +7 -1
- package/docs/research/coordination-failure-review.md +6 -6
- package/package.json +1 -1
- package/releases/manifest.json +36 -0
- package/scripts/wave-orchestrator/agent-state.mjs +42 -0
- package/scripts/wave-orchestrator/autonomous.mjs +42 -6
- package/scripts/wave-orchestrator/clarification-triage.mjs +4 -3
- package/scripts/wave-orchestrator/control-cli.mjs +126 -11
- package/scripts/wave-orchestrator/control-plane.mjs +12 -1
- package/scripts/wave-orchestrator/coordination-store.mjs +124 -4
- package/scripts/wave-orchestrator/executors.mjs +11 -6
- package/scripts/wave-orchestrator/gate-engine.mjs +5 -5
- package/scripts/wave-orchestrator/launcher-runtime.mjs +1 -1
- package/scripts/wave-orchestrator/launcher.mjs +216 -0
- package/scripts/wave-orchestrator/ledger.mjs +14 -12
- package/scripts/wave-orchestrator/reducer-snapshot.mjs +8 -6
- package/scripts/wave-orchestrator/retry-engine.mjs +19 -11
- package/scripts/wave-orchestrator/routing-state.mjs +50 -3
- package/scripts/wave-orchestrator/session-supervisor.mjs +6 -10
- package/scripts/wave-orchestrator/task-entity.mjs +4 -4
- package/scripts/wave-orchestrator/terminals.mjs +14 -14
- package/scripts/wave-orchestrator/wave-files.mjs +15 -21
- package/scripts/wave-orchestrator/wave-state-reducer.mjs +72 -5
|
@@ -11,6 +11,8 @@ import {
|
|
|
11
11
|
import {
|
|
12
12
|
CLARIFICATION_CLOSURE_PREFIX,
|
|
13
13
|
buildCoordinationResponseMetrics,
|
|
14
|
+
coordinationBlockerSeverity,
|
|
15
|
+
coordinationRecordBlocksWave,
|
|
14
16
|
} from "./coordination-store.mjs";
|
|
15
17
|
import {
|
|
16
18
|
DEFAULT_COORDINATION_ACK_TIMEOUT_MS,
|
|
@@ -586,6 +588,8 @@ export function buildTaskSnapshots({
|
|
|
586
588
|
const metrics = responseMetrics.recordMetricsById.get(record.id) || {};
|
|
587
589
|
const feedbackRequest = feedbackById.get(record.id) || null;
|
|
588
590
|
const taskState = taskStateForCoordinationRecord(record, feedbackRequest);
|
|
591
|
+
const blocking = coordinationRecordBlocksWave(record);
|
|
592
|
+
const blockerSeverity = coordinationBlockerSeverity(record);
|
|
589
593
|
tasks.push({
|
|
590
594
|
taskId: record.id,
|
|
591
595
|
sourceRecordId: record.id,
|
|
@@ -598,6 +602,8 @@ export function buildTaskSnapshots({
|
|
|
598
602
|
assigneeAgentId: firstTargetAgentId(record),
|
|
599
603
|
leaseOwnerAgentId:
|
|
600
604
|
["acknowledged", "in_progress"].includes(record.status) ? firstTargetAgentId(record) : null,
|
|
605
|
+
blocking,
|
|
606
|
+
blockerSeverity,
|
|
601
607
|
needsHuman:
|
|
602
608
|
record.kind === "human-feedback" ||
|
|
603
609
|
feedbackRequest?.status === "pending" ||
|
|
@@ -627,7 +633,7 @@ export function buildTaskSnapshots({
|
|
|
627
633
|
? feedbackRequest?.updatedAt || record.updatedAt || record.createdAt
|
|
628
634
|
: null,
|
|
629
635
|
overdueAck: metrics.overdueAck === true,
|
|
630
|
-
stale: metrics.staleClarification === true,
|
|
636
|
+
stale: metrics.staleClarification === true || blockerSeverity === "stale",
|
|
631
637
|
feedbackRequestId: feedbackRequest?.id || null,
|
|
632
638
|
humanResponse: feedbackRequest?.responseText || null,
|
|
633
639
|
humanOperator: feedbackRequest?.responseOperator || null,
|
|
@@ -648,6 +654,8 @@ export function buildTaskSnapshots({
|
|
|
648
654
|
ownerAgentId: request.agentId || null,
|
|
649
655
|
assigneeAgentId: request.agentId || null,
|
|
650
656
|
leaseOwnerAgentId: null,
|
|
657
|
+
blocking: true,
|
|
658
|
+
blockerSeverity: "hard",
|
|
651
659
|
needsHuman: request.status !== "answered",
|
|
652
660
|
dependsOn: [],
|
|
653
661
|
evidenceRefs: [],
|
|
@@ -676,6 +684,9 @@ export function buildTaskSnapshots({
|
|
|
676
684
|
export function nextTaskDeadline(tasks) {
|
|
677
685
|
const candidates = [];
|
|
678
686
|
for (const task of tasks || []) {
|
|
687
|
+
if (task?.blocking === false) {
|
|
688
|
+
continue;
|
|
689
|
+
}
|
|
679
690
|
for (const [kind, value] of [
|
|
680
691
|
["ack", task.ackDeadlineAt],
|
|
681
692
|
["resolve", task.resolveDeadlineAt],
|
|
@@ -43,7 +43,17 @@ export const COORDINATION_STATUS_VALUES = [
|
|
|
43
43
|
|
|
44
44
|
export const COORDINATION_PRIORITY_VALUES = ["low", "normal", "high", "urgent"];
|
|
45
45
|
export const COORDINATION_CONFIDENCE_VALUES = ["low", "medium", "high"];
|
|
46
|
+
export const COORDINATION_BLOCKER_SEVERITY_VALUES = [
|
|
47
|
+
"hard",
|
|
48
|
+
"soft",
|
|
49
|
+
"stale",
|
|
50
|
+
"advisory",
|
|
51
|
+
"proof-critical",
|
|
52
|
+
"closure-critical",
|
|
53
|
+
];
|
|
46
54
|
const OPEN_COORDINATION_STATUSES = new Set(["open", "acknowledged", "in_progress"]);
|
|
55
|
+
const NON_BLOCKING_BLOCKER_SEVERITIES = new Set(["stale", "advisory"]);
|
|
56
|
+
const HARD_BLOCKER_SEVERITIES = new Set(["hard", "proof-critical", "closure-critical"]);
|
|
47
57
|
export const CLARIFICATION_CLOSURE_PREFIX = "clarification:";
|
|
48
58
|
|
|
49
59
|
function normalizeString(value, fallback = "") {
|
|
@@ -65,6 +75,23 @@ function normalizeStringArray(values) {
|
|
|
65
75
|
);
|
|
66
76
|
}
|
|
67
77
|
|
|
78
|
+
function normalizeOptionalBoolean(value, fallback = null) {
|
|
79
|
+
if (value === undefined || value === null || value === "") {
|
|
80
|
+
return fallback;
|
|
81
|
+
}
|
|
82
|
+
if (typeof value === "boolean") {
|
|
83
|
+
return value;
|
|
84
|
+
}
|
|
85
|
+
const normalized = String(value).trim().toLowerCase();
|
|
86
|
+
if (["1", "true", "yes", "y", "on"].includes(normalized)) {
|
|
87
|
+
return true;
|
|
88
|
+
}
|
|
89
|
+
if (["0", "false", "no", "n", "off"].includes(normalized)) {
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
return Boolean(value);
|
|
93
|
+
}
|
|
94
|
+
|
|
68
95
|
function validateEnum(value, allowed, label) {
|
|
69
96
|
if (!allowed.includes(value)) {
|
|
70
97
|
throw new Error(`${label} must be one of ${allowed.join(", ")} (got: ${value || "empty"})`);
|
|
@@ -75,6 +102,74 @@ function stableId(prefix) {
|
|
|
75
102
|
return `${prefix}-${crypto.randomBytes(4).toString("hex")}`;
|
|
76
103
|
}
|
|
77
104
|
|
|
105
|
+
function defaultBlockingForKind(kind) {
|
|
106
|
+
return ["request", "blocker", "clarification-request", "human-escalation", "human-feedback"].includes(
|
|
107
|
+
kind,
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function defaultSeverityForRecord(kind, priority, blocking) {
|
|
112
|
+
if (blocking === false) {
|
|
113
|
+
return "advisory";
|
|
114
|
+
}
|
|
115
|
+
if (kind === "human-escalation" || kind === "human-feedback") {
|
|
116
|
+
return "hard";
|
|
117
|
+
}
|
|
118
|
+
if (kind === "request" || kind === "clarification-request") {
|
|
119
|
+
return "closure-critical";
|
|
120
|
+
}
|
|
121
|
+
if (kind === "blocker") {
|
|
122
|
+
return ["high", "urgent"].includes(priority) ? "hard" : "soft";
|
|
123
|
+
}
|
|
124
|
+
return "advisory";
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function normalizeBlockerSeverity(value, defaults = {}) {
|
|
128
|
+
const normalized = normalizeString(value ?? defaults.blockerSeverity, "").toLowerCase();
|
|
129
|
+
return normalized || null;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export function coordinationBlockerSeverity(record) {
|
|
133
|
+
if (!record || typeof record !== "object") {
|
|
134
|
+
return "advisory";
|
|
135
|
+
}
|
|
136
|
+
const blocking =
|
|
137
|
+
record.blocking === undefined || record.blocking === null
|
|
138
|
+
? defaultBlockingForKind(String(record.kind || "").trim().toLowerCase())
|
|
139
|
+
: record.blocking !== false;
|
|
140
|
+
const explicitSeverity = normalizeBlockerSeverity(record.blockerSeverity);
|
|
141
|
+
const derivedSeverity =
|
|
142
|
+
explicitSeverity ||
|
|
143
|
+
defaultSeverityForRecord(
|
|
144
|
+
String(record.kind || "").trim().toLowerCase(),
|
|
145
|
+
String(record.priority || "normal").trim().toLowerCase(),
|
|
146
|
+
blocking,
|
|
147
|
+
);
|
|
148
|
+
if (COORDINATION_BLOCKER_SEVERITY_VALUES.includes(derivedSeverity)) {
|
|
149
|
+
return derivedSeverity;
|
|
150
|
+
}
|
|
151
|
+
return defaultSeverityForRecord(
|
|
152
|
+
String(record.kind || "").trim().toLowerCase(),
|
|
153
|
+
String(record.priority || "normal").trim().toLowerCase(),
|
|
154
|
+
blocking,
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export function coordinationRecordBlocksWave(record) {
|
|
159
|
+
if (!record || !isOpenCoordinationStatus(record.status)) {
|
|
160
|
+
return false;
|
|
161
|
+
}
|
|
162
|
+
if (record.blocking === false) {
|
|
163
|
+
return false;
|
|
164
|
+
}
|
|
165
|
+
return !NON_BLOCKING_BLOCKER_SEVERITIES.has(coordinationBlockerSeverity(record));
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export function coordinationRecordIsHardBlocker(record) {
|
|
169
|
+
return coordinationRecordBlocksWave(record) &&
|
|
170
|
+
HARD_BLOCKER_SEVERITIES.has(coordinationBlockerSeverity(record));
|
|
171
|
+
}
|
|
172
|
+
|
|
78
173
|
export function normalizeCoordinationRecord(rawRecord, defaults = {}) {
|
|
79
174
|
if (!rawRecord || typeof rawRecord !== "object" || Array.isArray(rawRecord)) {
|
|
80
175
|
throw new Error("Coordination record must be an object");
|
|
@@ -93,12 +188,25 @@ export function normalizeCoordinationRecord(rawRecord, defaults = {}) {
|
|
|
93
188
|
).toLowerCase();
|
|
94
189
|
const priority = normalizeString(rawRecord.priority || defaults.priority || "normal").toLowerCase();
|
|
95
190
|
const confidence = normalizeString(rawRecord.confidence || defaults.confidence || "medium").toLowerCase();
|
|
191
|
+
const explicitBlocking = normalizeOptionalBoolean(
|
|
192
|
+
rawRecord.blocking,
|
|
193
|
+
normalizeOptionalBoolean(defaults.blocking, null),
|
|
194
|
+
);
|
|
195
|
+
const blocking = explicitBlocking ?? defaultBlockingForKind(kind);
|
|
196
|
+
const blockerSeverity =
|
|
197
|
+
normalizeBlockerSeverity(rawRecord.blockerSeverity, defaults) ||
|
|
198
|
+
defaultSeverityForRecord(kind, priority, blocking);
|
|
96
199
|
const createdAt = normalizeString(rawRecord.createdAt || defaults.createdAt || now);
|
|
97
200
|
const updatedAt = normalizeString(rawRecord.updatedAt || defaults.updatedAt || createdAt);
|
|
98
201
|
validateEnum(kind, COORDINATION_KIND_VALUES, "Coordination kind");
|
|
99
202
|
validateEnum(status, COORDINATION_STATUS_VALUES, "Coordination status");
|
|
100
203
|
validateEnum(priority, COORDINATION_PRIORITY_VALUES, "Coordination priority");
|
|
101
204
|
validateEnum(confidence, COORDINATION_CONFIDENCE_VALUES, "Coordination confidence");
|
|
205
|
+
validateEnum(
|
|
206
|
+
blockerSeverity,
|
|
207
|
+
COORDINATION_BLOCKER_SEVERITY_VALUES,
|
|
208
|
+
"Coordination blockerSeverity",
|
|
209
|
+
);
|
|
102
210
|
if (!lane) {
|
|
103
211
|
throw new Error("Coordination lane is required");
|
|
104
212
|
}
|
|
@@ -118,6 +226,8 @@ export function normalizeCoordinationRecord(rawRecord, defaults = {}) {
|
|
|
118
226
|
targets: normalizeStringArray(rawRecord.targets ?? defaults.targets),
|
|
119
227
|
status,
|
|
120
228
|
priority,
|
|
229
|
+
blocking,
|
|
230
|
+
blockerSeverity,
|
|
121
231
|
artifactRefs: normalizeStringArray(rawRecord.artifactRefs ?? defaults.artifactRefs),
|
|
122
232
|
dependsOn: normalizeStringArray(rawRecord.dependsOn ?? defaults.dependsOn),
|
|
123
233
|
closureCondition: normalizeString(rawRecord.closureCondition ?? defaults.closureCondition, ""),
|
|
@@ -180,6 +290,8 @@ export function appendCoordinationRecord(filePath, rawRecord, defaults = {}) {
|
|
|
180
290
|
kind: record.kind,
|
|
181
291
|
status: record.status,
|
|
182
292
|
priority: record.priority,
|
|
293
|
+
blocking: record.blocking !== false,
|
|
294
|
+
blockerSeverity: record.blockerSeverity,
|
|
183
295
|
confidence: record.confidence,
|
|
184
296
|
summary: record.summary,
|
|
185
297
|
detail: record.detail,
|
|
@@ -416,15 +528,16 @@ export function buildCoordinationResponseMetrics(state, options = {}) {
|
|
|
416
528
|
for (const record of state?.openRecords || []) {
|
|
417
529
|
const startMs = parseRecordStartMs(record);
|
|
418
530
|
const ageMs = Number.isFinite(startMs) ? Math.max(0, nowMs - startMs) : null;
|
|
419
|
-
const
|
|
531
|
+
const blocking = coordinationRecordBlocksWave(record);
|
|
532
|
+
const ackTracked = blocking && isAckTrackedRecord(record);
|
|
420
533
|
const ackPending = ackTracked && record.status === "open";
|
|
421
534
|
const clarificationLinked =
|
|
422
|
-
record.kind === "clarification-request" || isClarificationLinkedRequest(record);
|
|
535
|
+
blocking && (record.kind === "clarification-request" || isClarificationLinkedRequest(record));
|
|
423
536
|
const overdueAck = ackPending && Number.isFinite(ageMs) && ageMs >= ackTimeoutMs;
|
|
424
537
|
const staleClarification =
|
|
425
538
|
clarificationLinked && Number.isFinite(ageMs) && ageMs >= resolutionStaleMs;
|
|
426
539
|
|
|
427
|
-
if (Number.isFinite(ageMs)) {
|
|
540
|
+
if (blocking && Number.isFinite(ageMs)) {
|
|
428
541
|
oldestOpenCoordinationAgeMs =
|
|
429
542
|
oldestOpenCoordinationAgeMs === null
|
|
430
543
|
? ageMs
|
|
@@ -454,6 +567,7 @@ export function buildCoordinationResponseMetrics(state, options = {}) {
|
|
|
454
567
|
overdueAck,
|
|
455
568
|
clarificationLinked,
|
|
456
569
|
staleClarification,
|
|
570
|
+
blocking,
|
|
457
571
|
});
|
|
458
572
|
}
|
|
459
573
|
|
|
@@ -469,7 +583,7 @@ export function buildCoordinationResponseMetrics(state, options = {}) {
|
|
|
469
583
|
a.localeCompare(b),
|
|
470
584
|
),
|
|
471
585
|
openHumanEscalationCount: (state?.humanEscalations || []).filter((record) =>
|
|
472
|
-
|
|
586
|
+
coordinationRecordBlocksWave(record),
|
|
473
587
|
).length,
|
|
474
588
|
recordMetricsById,
|
|
475
589
|
};
|
|
@@ -490,6 +604,12 @@ function renderOpenRecord(record, responseMetrics = null) {
|
|
|
490
604
|
if (recordMetrics?.staleClarification) {
|
|
491
605
|
tags.push("stale-clarification");
|
|
492
606
|
}
|
|
607
|
+
if (record.blocking === false) {
|
|
608
|
+
tags.push("non-blocking");
|
|
609
|
+
}
|
|
610
|
+
if (record.blockerSeverity) {
|
|
611
|
+
tags.push(`severity=${record.blockerSeverity}`);
|
|
612
|
+
}
|
|
493
613
|
const timing = tags.length > 0 ? ` [${tags.join(", ")}]` : "";
|
|
494
614
|
return `- [${record.priority}] ${record.kind}/${record.status} ${record.agentId}${targets}${timing} id=${record.id}: ${compactSingleLine(record.summary || record.detail || "no summary", 160)}${artifacts}`;
|
|
495
615
|
}
|
|
@@ -305,6 +305,7 @@ function buildLaunchLimitsMetadata(agent) {
|
|
|
305
305
|
const executor = agent?.executorResolved || {};
|
|
306
306
|
const executorId = normalizeExecutorMode(executor.id || DEFAULT_EXECUTOR_MODE);
|
|
307
307
|
const attemptTimeoutMinutes = executor?.budget?.minutes ?? null;
|
|
308
|
+
const advisoryTurnBudget = executor?.budget?.turns ?? null;
|
|
308
309
|
if (executorId === "claude") {
|
|
309
310
|
const source = executor?.claude?.maxTurnsSource || null;
|
|
310
311
|
return {
|
|
@@ -312,9 +313,11 @@ function buildLaunchLimitsMetadata(agent) {
|
|
|
312
313
|
knownTurnLimit: executor?.claude?.maxTurns ?? null,
|
|
313
314
|
turnLimitSource: source,
|
|
314
315
|
notes:
|
|
315
|
-
source
|
|
316
|
-
? [
|
|
317
|
-
:
|
|
316
|
+
source
|
|
317
|
+
? []
|
|
318
|
+
: advisoryTurnBudget !== null
|
|
319
|
+
? ["Generic budget.turns remained advisory; Wave emitted no Claude --max-turns flag."]
|
|
320
|
+
: [],
|
|
318
321
|
};
|
|
319
322
|
}
|
|
320
323
|
if (executorId === "opencode") {
|
|
@@ -324,9 +327,11 @@ function buildLaunchLimitsMetadata(agent) {
|
|
|
324
327
|
knownTurnLimit: executor?.opencode?.steps ?? null,
|
|
325
328
|
turnLimitSource: source,
|
|
326
329
|
notes:
|
|
327
|
-
source
|
|
328
|
-
? [
|
|
329
|
-
:
|
|
330
|
+
source
|
|
331
|
+
? []
|
|
332
|
+
: advisoryTurnBudget !== null
|
|
333
|
+
? ["Generic budget.turns remained advisory; Wave emitted no OpenCode --steps flag."]
|
|
334
|
+
: [],
|
|
330
335
|
};
|
|
331
336
|
}
|
|
332
337
|
if (executorId === "codex") {
|
|
@@ -49,7 +49,7 @@ import {
|
|
|
49
49
|
validateWaveComponentMatrixCurrentLevels,
|
|
50
50
|
} from "./wave-files.mjs";
|
|
51
51
|
import {
|
|
52
|
-
|
|
52
|
+
coordinationRecordBlocksWave,
|
|
53
53
|
openClarificationLinkedRequests,
|
|
54
54
|
} from "./coordination-store.mjs";
|
|
55
55
|
import { contradictionsBlockingGate } from "./contradiction-entity.mjs";
|
|
@@ -998,7 +998,7 @@ export function readWaveIntegrationBarrier(wave, agentRuns, derivedState, option
|
|
|
998
998
|
|
|
999
999
|
export function readClarificationBarrier(derivedState) {
|
|
1000
1000
|
const openClarifications = (derivedState?.coordinationState?.clarifications || []).filter(
|
|
1001
|
-
(record) =>
|
|
1001
|
+
(record) => coordinationRecordBlocksWave(record),
|
|
1002
1002
|
);
|
|
1003
1003
|
if (openClarifications.length > 0) {
|
|
1004
1004
|
return {
|
|
@@ -1009,7 +1009,7 @@ export function readClarificationBarrier(derivedState) {
|
|
|
1009
1009
|
}
|
|
1010
1010
|
const openClarificationRequests = openClarificationLinkedRequests(
|
|
1011
1011
|
derivedState?.coordinationState,
|
|
1012
|
-
);
|
|
1012
|
+
).filter((record) => coordinationRecordBlocksWave(record));
|
|
1013
1013
|
if (openClarificationRequests.length > 0) {
|
|
1014
1014
|
return {
|
|
1015
1015
|
ok: false,
|
|
@@ -1019,10 +1019,10 @@ export function readClarificationBarrier(derivedState) {
|
|
|
1019
1019
|
}
|
|
1020
1020
|
const pendingHuman = [
|
|
1021
1021
|
...((derivedState?.coordinationState?.humanEscalations || []).filter((record) =>
|
|
1022
|
-
|
|
1022
|
+
coordinationRecordBlocksWave(record),
|
|
1023
1023
|
)),
|
|
1024
1024
|
...((derivedState?.coordinationState?.humanFeedback || []).filter((record) =>
|
|
1025
|
-
|
|
1025
|
+
coordinationRecordBlocksWave(record),
|
|
1026
1026
|
)),
|
|
1027
1027
|
];
|
|
1028
1028
|
if (pendingHuman.length > 0) {
|
|
@@ -235,7 +235,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
235
235
|
"const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
|
|
236
236
|
)} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
|
|
237
237
|
`echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
|
|
238
|
-
"
|
|
238
|
+
"exit \"$status\"",
|
|
239
239
|
].join("\n");
|
|
240
240
|
|
|
241
241
|
runTmuxFn(
|
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
readWaveHumanFeedbackRequests,
|
|
17
17
|
} from "./coordination.mjs";
|
|
18
18
|
import {
|
|
19
|
+
appendCoordinationRecord,
|
|
19
20
|
buildCoordinationResponseMetrics,
|
|
20
21
|
} from "./coordination-store.mjs";
|
|
21
22
|
import {
|
|
@@ -122,6 +123,7 @@ import {
|
|
|
122
123
|
import {
|
|
123
124
|
clearWaveRetryOverride,
|
|
124
125
|
readWaveRetryOverride,
|
|
126
|
+
writeWaveRetryOverride,
|
|
125
127
|
} from "./retry-control.mjs";
|
|
126
128
|
import { appendWaveControlEvent, readControlPlaneEvents } from "./control-plane.mjs";
|
|
127
129
|
import { materializeContradictionsFromControlPlaneEvents } from "./contradiction-entity.mjs";
|
|
@@ -578,6 +580,100 @@ function buildFailureFromGate(gateName, gate, fallbackLogPath) {
|
|
|
578
580
|
};
|
|
579
581
|
}
|
|
580
582
|
|
|
583
|
+
function normalizeFailureStatusCode(value) {
|
|
584
|
+
return String(value || "").trim().toLowerCase();
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
function recoverableFailureReason(failure, summary = null) {
|
|
588
|
+
const statusCode = normalizeFailureStatusCode(failure?.statusCode);
|
|
589
|
+
if (["timeout-no-status", "timed_out", "missing-status"].includes(statusCode)) {
|
|
590
|
+
return statusCode;
|
|
591
|
+
}
|
|
592
|
+
const terminationReason = String(summary?.terminationReason || "").trim().toLowerCase();
|
|
593
|
+
if (["timeout", "max-turns", "session-missing"].includes(terminationReason)) {
|
|
594
|
+
return terminationReason;
|
|
595
|
+
}
|
|
596
|
+
const detailText = `${failure?.detail || ""} ${summary?.terminationHint || ""}`.toLowerCase();
|
|
597
|
+
if (detailText.includes("rate limit") || detailText.includes("429 too many requests")) {
|
|
598
|
+
return "rate-limit";
|
|
599
|
+
}
|
|
600
|
+
return null;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function annotateFailuresWithRecoveryHints(failures, agentRuns) {
|
|
604
|
+
const runsByAgentId = new Map((agentRuns || []).map((run) => [run.agent.agentId, run]));
|
|
605
|
+
return (failures || []).map((failure) => {
|
|
606
|
+
const run = failure?.agentId ? runsByAgentId.get(failure.agentId) : null;
|
|
607
|
+
const summary = run
|
|
608
|
+
? readAgentExecutionSummary(run.statusPath, {
|
|
609
|
+
agent: run.agent,
|
|
610
|
+
statusPath: run.statusPath,
|
|
611
|
+
statusRecord: readStatusRecordIfPresent(run.statusPath),
|
|
612
|
+
logPath: fs.existsSync(run.logPath) ? run.logPath : null,
|
|
613
|
+
})
|
|
614
|
+
: null;
|
|
615
|
+
const recoveryReason = recoverableFailureReason(failure, summary);
|
|
616
|
+
return {
|
|
617
|
+
...failure,
|
|
618
|
+
detail: failure?.detail || summary?.terminationHint || null,
|
|
619
|
+
terminationReason: summary?.terminationReason || null,
|
|
620
|
+
terminationHint: summary?.terminationHint || null,
|
|
621
|
+
observedTurnLimit:
|
|
622
|
+
Number.isFinite(Number(summary?.terminationObservedTurnLimit))
|
|
623
|
+
? Number(summary.terminationObservedTurnLimit)
|
|
624
|
+
: null,
|
|
625
|
+
recoverable: Boolean(recoveryReason),
|
|
626
|
+
recoveryReason,
|
|
627
|
+
};
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
function failuresAreRecoverable(failures) {
|
|
632
|
+
return Array.isArray(failures) && failures.length > 0 && failures.every((failure) => failure?.recoverable);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function appendRepairCoordinationRequests({
|
|
636
|
+
coordinationLogPath,
|
|
637
|
+
lanePaths,
|
|
638
|
+
wave,
|
|
639
|
+
attempt,
|
|
640
|
+
runs,
|
|
641
|
+
failures,
|
|
642
|
+
}) {
|
|
643
|
+
const selectedRuns = Array.isArray(runs) ? runs : [];
|
|
644
|
+
const failureByAgentId = new Map(
|
|
645
|
+
(failures || [])
|
|
646
|
+
.filter((failure) => failure?.agentId)
|
|
647
|
+
.map((failure) => [failure.agentId, failure]),
|
|
648
|
+
);
|
|
649
|
+
for (const run of selectedRuns) {
|
|
650
|
+
const agentId = run?.agent?.agentId;
|
|
651
|
+
if (!agentId) {
|
|
652
|
+
continue;
|
|
653
|
+
}
|
|
654
|
+
const failure = failureByAgentId.get(agentId) || null;
|
|
655
|
+
appendCoordinationRecord(coordinationLogPath, {
|
|
656
|
+
id: `repair-wave-${wave.wave}-attempt-${attempt}-${agentId}`,
|
|
657
|
+
lane: lanePaths.lane,
|
|
658
|
+
wave: wave.wave,
|
|
659
|
+
agentId: "launcher",
|
|
660
|
+
kind: "request",
|
|
661
|
+
targets: [`agent:${agentId}`],
|
|
662
|
+
priority: "normal",
|
|
663
|
+
summary: failure
|
|
664
|
+
? `Repair ${agentId}: ${failure.recoveryReason || failure.statusCode}`
|
|
665
|
+
: `Repair ${agentId}: targeted follow-up`,
|
|
666
|
+
detail: failure
|
|
667
|
+
? `Targeted recovery for ${agentId} after ${failure.recoveryReason || failure.statusCode}. ${failure.detail || "Resume the bounded follow-up work and preserve reusable proof from other agents."}`
|
|
668
|
+
: `Targeted recovery for ${agentId}. Resume the bounded follow-up work and preserve reusable proof from other agents.`,
|
|
669
|
+
status: "open",
|
|
670
|
+
source: "launcher",
|
|
671
|
+
blocking: false,
|
|
672
|
+
blockerSeverity: "soft",
|
|
673
|
+
});
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
|
|
581
677
|
// --- Main entry point ---
|
|
582
678
|
|
|
583
679
|
export async function runLauncherCli(argv) {
|
|
@@ -1519,6 +1615,7 @@ export async function runLauncherCli(argv) {
|
|
|
1519
1615
|
}
|
|
1520
1616
|
|
|
1521
1617
|
materializeAgentExecutionSummaries(wave, agentRuns);
|
|
1618
|
+
failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
|
|
1522
1619
|
refreshDerivedState(attempt);
|
|
1523
1620
|
syncWaveSignals();
|
|
1524
1621
|
lastLiveCoordinationRefreshAt = Date.now();
|
|
@@ -1706,6 +1803,7 @@ export async function runLauncherCli(argv) {
|
|
|
1706
1803
|
failures = closureResult.failures;
|
|
1707
1804
|
timedOut = timedOut || closureResult.timedOut;
|
|
1708
1805
|
materializeAgentExecutionSummaries(wave, agentRuns);
|
|
1806
|
+
failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
|
|
1709
1807
|
refreshDerivedState(attempt);
|
|
1710
1808
|
}
|
|
1711
1809
|
} else {
|
|
@@ -1896,6 +1994,14 @@ export async function runLauncherCli(argv) {
|
|
|
1896
1994
|
detail: "Queued for shared component closure",
|
|
1897
1995
|
});
|
|
1898
1996
|
}
|
|
1997
|
+
appendRepairCoordinationRequests({
|
|
1998
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
1999
|
+
lanePaths,
|
|
2000
|
+
wave,
|
|
2001
|
+
attempt,
|
|
2002
|
+
runs: runsToLaunch,
|
|
2003
|
+
failures,
|
|
2004
|
+
});
|
|
1899
2005
|
writeWaveRelaunchProjection({
|
|
1900
2006
|
lanePaths,
|
|
1901
2007
|
wave,
|
|
@@ -1931,6 +2037,106 @@ export async function runLauncherCli(argv) {
|
|
|
1931
2037
|
}
|
|
1932
2038
|
|
|
1933
2039
|
if (attempt >= options.maxRetriesPerWave + 1) {
|
|
2040
|
+
const reducerDecision =
|
|
2041
|
+
latestReducerSnapshot || refreshReducerSnapshot(attempt);
|
|
2042
|
+
const recoveryPlan = planRetryWaveAttempt({
|
|
2043
|
+
agentRuns,
|
|
2044
|
+
failures,
|
|
2045
|
+
derivedState,
|
|
2046
|
+
lanePaths,
|
|
2047
|
+
wave,
|
|
2048
|
+
retryOverride: readWaveRetryOverride(lanePaths, wave.wave),
|
|
2049
|
+
waveState: reducerDecision?.reducerState || null,
|
|
2050
|
+
});
|
|
2051
|
+
const recoverySelectedAgentIds = Array.from(
|
|
2052
|
+
new Set([
|
|
2053
|
+
...((recoveryPlan.selectedRuns || []).map((run) => run.agent.agentId)),
|
|
2054
|
+
...((reducerDecision?.resumePlan?.invalidatedAgentIds || []).filter(Boolean)),
|
|
2055
|
+
...((failures || []).map((failure) => failure.agentId).filter(Boolean)),
|
|
2056
|
+
]),
|
|
2057
|
+
);
|
|
2058
|
+
if (failuresAreRecoverable(failures) && recoverySelectedAgentIds.length > 0) {
|
|
2059
|
+
const resumeCursor =
|
|
2060
|
+
reducerDecision?.resumePlan?.resumeFromPhase &&
|
|
2061
|
+
reducerDecision.resumePlan.resumeFromPhase !== "completed"
|
|
2062
|
+
? reducerDecision.resumePlan.resumeFromPhase
|
|
2063
|
+
: null;
|
|
2064
|
+
const queuedRecovery = writeWaveRetryOverride(lanePaths, wave.wave, {
|
|
2065
|
+
lane: lanePaths.lane,
|
|
2066
|
+
wave: wave.wave,
|
|
2067
|
+
selectedAgentIds: recoverySelectedAgentIds,
|
|
2068
|
+
resumeCursor,
|
|
2069
|
+
clearReusableAgentIds: Array.from(
|
|
2070
|
+
new Set((failures || []).map((failure) => failure.agentId).filter(Boolean)),
|
|
2071
|
+
),
|
|
2072
|
+
preserveReusableAgentIds: reducerDecision?.resumePlan?.reusableAgentIds || [],
|
|
2073
|
+
reuseProofBundleIds: reducerDecision?.resumePlan?.reusableProofBundleIds || [],
|
|
2074
|
+
requestedBy: "launcher-recovery",
|
|
2075
|
+
reason: `Auto recovery queued after recoverable execution issue(s): ${(failures || []).map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`).join(", ")}.`,
|
|
2076
|
+
applyOnce: true,
|
|
2077
|
+
});
|
|
2078
|
+
appendRepairCoordinationRequests({
|
|
2079
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
2080
|
+
lanePaths,
|
|
2081
|
+
wave,
|
|
2082
|
+
attempt: attempt + 1,
|
|
2083
|
+
runs: agentRuns.filter((run) => recoverySelectedAgentIds.includes(run.agent.agentId)),
|
|
2084
|
+
failures,
|
|
2085
|
+
});
|
|
2086
|
+
if (recoveryPlan.selectedRuns.length > 0) {
|
|
2087
|
+
writeWaveRelaunchProjection({
|
|
2088
|
+
lanePaths,
|
|
2089
|
+
wave,
|
|
2090
|
+
attempt: attempt + 1,
|
|
2091
|
+
runs: recoveryPlan.selectedRuns,
|
|
2092
|
+
failures,
|
|
2093
|
+
derivedState,
|
|
2094
|
+
});
|
|
2095
|
+
}
|
|
2096
|
+
recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
|
|
2097
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
2098
|
+
detail: failures
|
|
2099
|
+
.map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`)
|
|
2100
|
+
.join(", "),
|
|
2101
|
+
});
|
|
2102
|
+
recordWaveRunState(lanePaths, wave.wave, "blocked", {
|
|
2103
|
+
attempts: attempt,
|
|
2104
|
+
traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
|
|
2105
|
+
gateSnapshot: completionGateSnapshot,
|
|
2106
|
+
recoverable: true,
|
|
2107
|
+
rerunRequestId: queuedRecovery.requestId,
|
|
2108
|
+
failures: failures.map((failure) => ({
|
|
2109
|
+
agentId: failure.agentId || null,
|
|
2110
|
+
statusCode: failure.statusCode,
|
|
2111
|
+
recoveryReason: failure.recoveryReason || null,
|
|
2112
|
+
detail: failure.detail || null,
|
|
2113
|
+
})),
|
|
2114
|
+
});
|
|
2115
|
+
dashboardState.status = "blocked";
|
|
2116
|
+
for (const failure of failures) {
|
|
2117
|
+
setWaveDashboardAgent(dashboardState, failure.agentId, {
|
|
2118
|
+
state: "blocked",
|
|
2119
|
+
detail:
|
|
2120
|
+
failure.detail ||
|
|
2121
|
+
`Recoverable ${failure.recoveryReason || failure.statusCode}; targeted resume queued.`,
|
|
2122
|
+
});
|
|
2123
|
+
}
|
|
2124
|
+
flushDashboards();
|
|
2125
|
+
appendCoordination({
|
|
2126
|
+
event: "wave_recovery_queued",
|
|
2127
|
+
waves: [wave.wave],
|
|
2128
|
+
status: "blocked",
|
|
2129
|
+
details: `attempt=${attempt}/${options.maxRetriesPerWave + 1}; request=${queuedRecovery.requestId}; agents=${recoverySelectedAgentIds.join(",")}; reason=${(failures || []).map((failure) => failure.recoveryReason || failure.statusCode).join(",")}`,
|
|
2130
|
+
actionRequested:
|
|
2131
|
+
`Lane ${lanePaths.lane} owners should resume the queued targeted recovery or let autonomous relaunch the selected agents.`,
|
|
2132
|
+
});
|
|
2133
|
+
await flushWaveControlTelemetry();
|
|
2134
|
+
const error = new Error(
|
|
2135
|
+
`Wave ${wave.wave} queued targeted recovery request ${queuedRecovery.requestId} after recoverable execution failures.`,
|
|
2136
|
+
);
|
|
2137
|
+
error.exitCode = 43;
|
|
2138
|
+
throw error;
|
|
2139
|
+
}
|
|
1934
2140
|
recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
|
|
1935
2141
|
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
1936
2142
|
detail: failures
|
|
@@ -2075,6 +2281,16 @@ export async function runLauncherCli(argv) {
|
|
|
2075
2281
|
detail: "Queued for retry",
|
|
2076
2282
|
});
|
|
2077
2283
|
}
|
|
2284
|
+
if (retryPlan.source !== "override") {
|
|
2285
|
+
appendRepairCoordinationRequests({
|
|
2286
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
2287
|
+
lanePaths,
|
|
2288
|
+
wave,
|
|
2289
|
+
attempt: attempt + 1,
|
|
2290
|
+
runs: runsToLaunch,
|
|
2291
|
+
failures,
|
|
2292
|
+
});
|
|
2293
|
+
}
|
|
2078
2294
|
writeWaveRelaunchProjection({
|
|
2079
2295
|
lanePaths,
|
|
2080
2296
|
wave,
|