@chllming/wave-orchestration 0.8.5 → 0.8.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +46 -0
- package/README.md +14 -9
- package/docs/README.md +3 -1
- package/docs/context7/bundles.json +19 -20
- package/docs/context7/planner-agent/README.md +4 -1
- package/docs/guides/author-and-run-waves.md +4 -1
- package/docs/guides/planner.md +3 -1
- package/docs/guides/signal-wrappers.md +165 -0
- package/docs/guides/terminal-surfaces.md +15 -0
- package/docs/plans/context7-wave-orchestrator.md +24 -7
- package/docs/plans/current-state.md +7 -3
- package/docs/plans/end-state-architecture.md +16 -4
- package/docs/plans/examples/wave-example-design-handoff.md +1 -1
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/migration.md +179 -72
- package/docs/plans/wave-orchestrator.md +11 -5
- package/docs/reference/cli-reference.md +21 -4
- package/docs/reference/coordination-and-closure.md +26 -5
- package/docs/reference/live-proof-waves.md +9 -0
- package/docs/reference/npmjs-trusted-publishing.md +2 -2
- package/docs/reference/runtime-config/README.md +9 -3
- package/docs/reference/sample-waves.md +5 -5
- package/docs/reference/skills.md +9 -1
- package/docs/reference/wave-control.md +18 -0
- package/docs/reference/wave-planning-lessons.md +7 -1
- package/docs/research/coordination-failure-review.md +6 -6
- package/package.json +1 -1
- package/releases/manifest.json +38 -0
- package/scripts/context7-api-check.sh +57 -13
- package/scripts/wave-orchestrator/agent-state.mjs +42 -0
- package/scripts/wave-orchestrator/autonomous.mjs +42 -6
- package/scripts/wave-orchestrator/clarification-triage.mjs +4 -3
- package/scripts/wave-orchestrator/control-cli.mjs +145 -11
- package/scripts/wave-orchestrator/control-plane.mjs +12 -1
- package/scripts/wave-orchestrator/coordination-store.mjs +124 -4
- package/scripts/wave-orchestrator/coordination.mjs +35 -0
- package/scripts/wave-orchestrator/executors.mjs +11 -6
- package/scripts/wave-orchestrator/gate-engine.mjs +5 -5
- package/scripts/wave-orchestrator/install.mjs +2 -0
- package/scripts/wave-orchestrator/launcher-runtime.mjs +12 -1
- package/scripts/wave-orchestrator/launcher.mjs +236 -0
- package/scripts/wave-orchestrator/ledger.mjs +14 -12
- package/scripts/wave-orchestrator/reducer-snapshot.mjs +8 -6
- package/scripts/wave-orchestrator/retry-engine.mjs +19 -11
- package/scripts/wave-orchestrator/routing-state.mjs +50 -3
- package/scripts/wave-orchestrator/session-supervisor.mjs +119 -10
- package/scripts/wave-orchestrator/shared.mjs +1 -0
- package/scripts/wave-orchestrator/signals.mjs +681 -0
- package/scripts/wave-orchestrator/task-entity.mjs +4 -4
- package/scripts/wave-orchestrator/terminals.mjs +14 -14
- package/scripts/wave-orchestrator/wave-control-schema.mjs +2 -0
- package/scripts/wave-orchestrator/wave-files.mjs +15 -21
- package/scripts/wave-orchestrator/wave-state-reducer.mjs +72 -5
- package/scripts/wave-status.sh +200 -0
- package/scripts/wave-watch.sh +200 -0
- package/skills/README.md +3 -0
- package/skills/signal-hygiene/SKILL.md +51 -0
- package/skills/signal-hygiene/skill.json +20 -0
|
@@ -203,6 +203,8 @@ export function buildExecutionPrompt({
|
|
|
203
203
|
sharedPlanDocs = null,
|
|
204
204
|
designPacketPaths = null,
|
|
205
205
|
designExecutionMode = null,
|
|
206
|
+
signalStatePath = null,
|
|
207
|
+
signalAckPath = null,
|
|
206
208
|
contQaAgentId = "A0",
|
|
207
209
|
contEvalAgentId = "E0",
|
|
208
210
|
integrationAgentId = "A8",
|
|
@@ -213,6 +215,10 @@ export function buildExecutionPrompt({
|
|
|
213
215
|
? path.relative(REPO_ROOT, sharedSummaryPath)
|
|
214
216
|
: null;
|
|
215
217
|
const relativeInboxPath = inboxPath ? path.relative(REPO_ROOT, inboxPath) : null;
|
|
218
|
+
const relativeSignalStatePath = signalStatePath
|
|
219
|
+
? path.relative(REPO_ROOT, signalStatePath)
|
|
220
|
+
: null;
|
|
221
|
+
const relativeSignalAckPath = signalAckPath ? path.relative(REPO_ROOT, signalAckPath) : null;
|
|
216
222
|
const lanePlansDir = lane === DEFAULT_WAVE_LANE ? "docs/plans" : `docs/${lane}/plans`;
|
|
217
223
|
const resolvedSharedPlanDocs =
|
|
218
224
|
sharedPlanDocs && sharedPlanDocs.length > 0
|
|
@@ -531,6 +537,14 @@ export function buildExecutionPrompt({
|
|
|
531
537
|
`Agent inbox repo-relative path: ${relativeInboxPath}`,
|
|
532
538
|
]
|
|
533
539
|
: []),
|
|
540
|
+
...(signalStatePath
|
|
541
|
+
? [
|
|
542
|
+
`Signal state absolute path: ${signalStatePath}`,
|
|
543
|
+
`Signal state repo-relative path: ${relativeSignalStatePath}`,
|
|
544
|
+
`Signal ack absolute path: ${signalAckPath}`,
|
|
545
|
+
`Signal ack repo-relative path: ${relativeSignalAckPath}`,
|
|
546
|
+
]
|
|
547
|
+
: []),
|
|
534
548
|
"",
|
|
535
549
|
...(sharedSummaryText
|
|
536
550
|
? ["Current wave shared summary:", "```markdown", sharedSummaryText, "```", ""]
|
|
@@ -538,6 +552,17 @@ export function buildExecutionPrompt({
|
|
|
538
552
|
...(inboxText
|
|
539
553
|
? ["Current agent inbox:", "```markdown", inboxText, "```", ""]
|
|
540
554
|
: []),
|
|
555
|
+
...(signalStatePath
|
|
556
|
+
? [
|
|
557
|
+
"Long-running signal loop:",
|
|
558
|
+
"- If you are operating as a resident or waiting agent, keep watching the signal state JSON instead of exiting early.",
|
|
559
|
+
"- When the signal `version` increases beyond the version recorded in the signal ack file, immediately write the ack file before acting.",
|
|
560
|
+
`- Write the ack file as JSON with exactly these keys: \`agentId\`, \`version\`, \`signal\`, and \`observedAt\`. Use \`${agent.agentId}\` as \`agentId\` and an ISO-8601 timestamp for \`observedAt\`.`,
|
|
561
|
+
"- After writing the ack, re-read the inbox, shared summary, and message board, then handle the new signal once.",
|
|
562
|
+
"- If the signal version has not changed, stay idle. Do not busy-loop or repeat unchanged work.",
|
|
563
|
+
"",
|
|
564
|
+
]
|
|
565
|
+
: []),
|
|
541
566
|
...exitContractLines,
|
|
542
567
|
...promotedComponentLines,
|
|
543
568
|
...evalTargetLines,
|
|
@@ -564,6 +589,8 @@ export function buildResidentOrchestratorPrompt({
|
|
|
564
589
|
sharedSummaryPath,
|
|
565
590
|
dashboardPath,
|
|
566
591
|
triagePath = null,
|
|
592
|
+
signalStatePath = null,
|
|
593
|
+
signalAckPath = null,
|
|
567
594
|
rolePrompt = "",
|
|
568
595
|
}) {
|
|
569
596
|
const coordinationCommand = [
|
|
@@ -612,6 +639,8 @@ export function buildResidentOrchestratorPrompt({
|
|
|
612
639
|
`- Wave dashboard: ${dashboardPath}`,
|
|
613
640
|
`- Message board projection: ${messageBoardPath}`,
|
|
614
641
|
...(triagePath ? [`- Feedback triage log: ${triagePath}`] : []),
|
|
642
|
+
...(signalStatePath ? [`- Signal state: ${signalStatePath}`] : []),
|
|
643
|
+
...(signalAckPath ? [`- Signal ack: ${signalAckPath}`] : []),
|
|
615
644
|
"",
|
|
616
645
|
"Action surface:",
|
|
617
646
|
`- Coordination command: \`${coordinationCommand}\``,
|
|
@@ -624,6 +653,12 @@ export function buildResidentOrchestratorPrompt({
|
|
|
624
653
|
"2. Identify open clarifications, open clarification-linked requests, overdue acknowledgements, and human-feedback state.",
|
|
625
654
|
"3. If action is needed, write a durable coordination update and explain the policy basis for the action.",
|
|
626
655
|
"4. If nothing needs action, continue monitoring. Do not exit until the wave is clearly terminal or the launcher stops the session.",
|
|
656
|
+
...(signalStatePath
|
|
657
|
+
? [
|
|
658
|
+
"5. When the signal state `version` increases, immediately write the signal ack file before taking action so the launcher knows you observed the change.",
|
|
659
|
+
"6. After acknowledging the signal, re-read the shared summary, dashboard, coordination log, and triage artifacts before intervening.",
|
|
660
|
+
]
|
|
661
|
+
: []),
|
|
627
662
|
"",
|
|
628
663
|
...(roleSection
|
|
629
664
|
? [
|
|
@@ -305,6 +305,7 @@ function buildLaunchLimitsMetadata(agent) {
|
|
|
305
305
|
const executor = agent?.executorResolved || {};
|
|
306
306
|
const executorId = normalizeExecutorMode(executor.id || DEFAULT_EXECUTOR_MODE);
|
|
307
307
|
const attemptTimeoutMinutes = executor?.budget?.minutes ?? null;
|
|
308
|
+
const advisoryTurnBudget = executor?.budget?.turns ?? null;
|
|
308
309
|
if (executorId === "claude") {
|
|
309
310
|
const source = executor?.claude?.maxTurnsSource || null;
|
|
310
311
|
return {
|
|
@@ -312,9 +313,11 @@ function buildLaunchLimitsMetadata(agent) {
|
|
|
312
313
|
knownTurnLimit: executor?.claude?.maxTurns ?? null,
|
|
313
314
|
turnLimitSource: source,
|
|
314
315
|
notes:
|
|
315
|
-
source
|
|
316
|
-
? [
|
|
317
|
-
:
|
|
316
|
+
source
|
|
317
|
+
? []
|
|
318
|
+
: advisoryTurnBudget !== null
|
|
319
|
+
? ["Generic budget.turns remained advisory; Wave emitted no Claude --max-turns flag."]
|
|
320
|
+
: [],
|
|
318
321
|
};
|
|
319
322
|
}
|
|
320
323
|
if (executorId === "opencode") {
|
|
@@ -324,9 +327,11 @@ function buildLaunchLimitsMetadata(agent) {
|
|
|
324
327
|
knownTurnLimit: executor?.opencode?.steps ?? null,
|
|
325
328
|
turnLimitSource: source,
|
|
326
329
|
notes:
|
|
327
|
-
source
|
|
328
|
-
? [
|
|
329
|
-
:
|
|
330
|
+
source
|
|
331
|
+
? []
|
|
332
|
+
: advisoryTurnBudget !== null
|
|
333
|
+
? ["Generic budget.turns remained advisory; Wave emitted no OpenCode --steps flag."]
|
|
334
|
+
: [],
|
|
330
335
|
};
|
|
331
336
|
}
|
|
332
337
|
if (executorId === "codex") {
|
|
@@ -49,7 +49,7 @@ import {
|
|
|
49
49
|
validateWaveComponentMatrixCurrentLevels,
|
|
50
50
|
} from "./wave-files.mjs";
|
|
51
51
|
import {
|
|
52
|
-
|
|
52
|
+
coordinationRecordBlocksWave,
|
|
53
53
|
openClarificationLinkedRequests,
|
|
54
54
|
} from "./coordination-store.mjs";
|
|
55
55
|
import { contradictionsBlockingGate } from "./contradiction-entity.mjs";
|
|
@@ -998,7 +998,7 @@ export function readWaveIntegrationBarrier(wave, agentRuns, derivedState, option
|
|
|
998
998
|
|
|
999
999
|
export function readClarificationBarrier(derivedState) {
|
|
1000
1000
|
const openClarifications = (derivedState?.coordinationState?.clarifications || []).filter(
|
|
1001
|
-
(record) =>
|
|
1001
|
+
(record) => coordinationRecordBlocksWave(record),
|
|
1002
1002
|
);
|
|
1003
1003
|
if (openClarifications.length > 0) {
|
|
1004
1004
|
return {
|
|
@@ -1009,7 +1009,7 @@ export function readClarificationBarrier(derivedState) {
|
|
|
1009
1009
|
}
|
|
1010
1010
|
const openClarificationRequests = openClarificationLinkedRequests(
|
|
1011
1011
|
derivedState?.coordinationState,
|
|
1012
|
-
);
|
|
1012
|
+
).filter((record) => coordinationRecordBlocksWave(record));
|
|
1013
1013
|
if (openClarificationRequests.length > 0) {
|
|
1014
1014
|
return {
|
|
1015
1015
|
ok: false,
|
|
@@ -1019,10 +1019,10 @@ export function readClarificationBarrier(derivedState) {
|
|
|
1019
1019
|
}
|
|
1020
1020
|
const pendingHuman = [
|
|
1021
1021
|
...((derivedState?.coordinationState?.humanEscalations || []).filter((record) =>
|
|
1022
|
-
|
|
1022
|
+
coordinationRecordBlocksWave(record),
|
|
1023
1023
|
)),
|
|
1024
1024
|
...((derivedState?.coordinationState?.humanFeedback || []).filter((record) =>
|
|
1025
|
-
|
|
1025
|
+
coordinationRecordBlocksWave(record),
|
|
1026
1026
|
)),
|
|
1027
1027
|
];
|
|
1028
1028
|
if (pendingHuman.length > 0) {
|
|
@@ -27,6 +27,8 @@ export const CHANGELOG_MANIFEST_PATH = path.join(PACKAGE_ROOT, "releases", "mani
|
|
|
27
27
|
export const WORKSPACE_PACKAGE_JSON_PATH = path.join(REPO_ROOT, "package.json");
|
|
28
28
|
export const STARTER_TEMPLATE_PATHS = [
|
|
29
29
|
"wave.config.json",
|
|
30
|
+
"scripts/wave-status.sh",
|
|
31
|
+
"scripts/wave-watch.sh",
|
|
30
32
|
"docs/README.md",
|
|
31
33
|
"docs/agents/wave-documentation-role.md",
|
|
32
34
|
"docs/agents/wave-design-role.md",
|
|
@@ -20,6 +20,11 @@ import {
|
|
|
20
20
|
summarizeResolvedSkills,
|
|
21
21
|
writeResolvedSkillArtifacts,
|
|
22
22
|
} from "./skills.mjs";
|
|
23
|
+
import {
|
|
24
|
+
agentSignalAckPath,
|
|
25
|
+
agentSignalPath,
|
|
26
|
+
agentUsesSignalHygiene,
|
|
27
|
+
} from "./signals.mjs";
|
|
23
28
|
|
|
24
29
|
export function refreshResolvedSkillsForRun(runInfo, waveDefinition, lanePaths) {
|
|
25
30
|
runInfo.agent.skillsResolved = resolveAgentSkills(
|
|
@@ -129,6 +134,12 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
129
134
|
.map((waveAgent) => resolveDesignReportPath(waveAgent))
|
|
130
135
|
.filter(Boolean),
|
|
131
136
|
designExecutionMode,
|
|
137
|
+
signalStatePath: agentUsesSignalHygiene(agent)
|
|
138
|
+
? agentSignalPath(lanePaths, wave, agent.agentId)
|
|
139
|
+
: null,
|
|
140
|
+
signalAckPath: agentUsesSignalHygiene(agent)
|
|
141
|
+
? agentSignalAckPath(lanePaths, wave, agent.agentId)
|
|
142
|
+
: null,
|
|
132
143
|
});
|
|
133
144
|
const promptHash = hashAgentPromptFingerprint(agent);
|
|
134
145
|
fs.writeFileSync(promptPath, `${prompt}\n`, "utf8");
|
|
@@ -224,7 +235,7 @@ export async function launchAgentSession(lanePaths, params, { runTmuxFn }) {
|
|
|
224
235
|
"const fs=require('node:fs'); const statusPath=process.argv[1]; const payload={code:Number(process.argv[2]),promptHash:process.argv[3]||null,orchestratorId:process.argv[4]||null,attempt:Number(process.argv[5])||1,completedAt:new Date().toISOString()}; fs.writeFileSync(statusPath, JSON.stringify(payload, null, 2)+'\\n', 'utf8');",
|
|
225
236
|
)} ${shellQuote(statusPath)} "$status" ${shellQuote(promptHash)} ${shellQuote(orchestratorId || "")} ${shellQuote(String(attempt || 1))}`,
|
|
226
237
|
`echo "[${lanePaths.lane}-wave-launcher] ${sessionName} finished with code $status"`,
|
|
227
|
-
"
|
|
238
|
+
"exit \"$status\"",
|
|
228
239
|
].join("\n");
|
|
229
240
|
|
|
230
241
|
runTmuxFn(
|
|
@@ -16,6 +16,7 @@ import {
|
|
|
16
16
|
readWaveHumanFeedbackRequests,
|
|
17
17
|
} from "./coordination.mjs";
|
|
18
18
|
import {
|
|
19
|
+
appendCoordinationRecord,
|
|
19
20
|
buildCoordinationResponseMetrics,
|
|
20
21
|
} from "./coordination-store.mjs";
|
|
21
22
|
import {
|
|
@@ -122,6 +123,7 @@ import {
|
|
|
122
123
|
import {
|
|
123
124
|
clearWaveRetryOverride,
|
|
124
125
|
readWaveRetryOverride,
|
|
126
|
+
writeWaveRetryOverride,
|
|
125
127
|
} from "./retry-control.mjs";
|
|
126
128
|
import { appendWaveControlEvent, readControlPlaneEvents } from "./control-plane.mjs";
|
|
127
129
|
import { materializeContradictionsFromControlPlaneEvents } from "./contradiction-entity.mjs";
|
|
@@ -209,7 +211,9 @@ import {
|
|
|
209
211
|
recordAttemptState,
|
|
210
212
|
recordWaveRunState,
|
|
211
213
|
runTmux,
|
|
214
|
+
syncLiveWaveSignals,
|
|
212
215
|
} from "./session-supervisor.mjs";
|
|
216
|
+
import { buildControlStatusPayload } from "./control-cli.mjs";
|
|
213
217
|
import {
|
|
214
218
|
planInitialWaveAttempt,
|
|
215
219
|
planRetryWaveAttempt,
|
|
@@ -576,6 +580,100 @@ function buildFailureFromGate(gateName, gate, fallbackLogPath) {
|
|
|
576
580
|
};
|
|
577
581
|
}
|
|
578
582
|
|
|
583
|
+
function normalizeFailureStatusCode(value) {
|
|
584
|
+
return String(value || "").trim().toLowerCase();
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
function recoverableFailureReason(failure, summary = null) {
|
|
588
|
+
const statusCode = normalizeFailureStatusCode(failure?.statusCode);
|
|
589
|
+
if (["timeout-no-status", "timed_out", "missing-status"].includes(statusCode)) {
|
|
590
|
+
return statusCode;
|
|
591
|
+
}
|
|
592
|
+
const terminationReason = String(summary?.terminationReason || "").trim().toLowerCase();
|
|
593
|
+
if (["timeout", "max-turns", "session-missing"].includes(terminationReason)) {
|
|
594
|
+
return terminationReason;
|
|
595
|
+
}
|
|
596
|
+
const detailText = `${failure?.detail || ""} ${summary?.terminationHint || ""}`.toLowerCase();
|
|
597
|
+
if (detailText.includes("rate limit") || detailText.includes("429 too many requests")) {
|
|
598
|
+
return "rate-limit";
|
|
599
|
+
}
|
|
600
|
+
return null;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
function annotateFailuresWithRecoveryHints(failures, agentRuns) {
|
|
604
|
+
const runsByAgentId = new Map((agentRuns || []).map((run) => [run.agent.agentId, run]));
|
|
605
|
+
return (failures || []).map((failure) => {
|
|
606
|
+
const run = failure?.agentId ? runsByAgentId.get(failure.agentId) : null;
|
|
607
|
+
const summary = run
|
|
608
|
+
? readAgentExecutionSummary(run.statusPath, {
|
|
609
|
+
agent: run.agent,
|
|
610
|
+
statusPath: run.statusPath,
|
|
611
|
+
statusRecord: readStatusRecordIfPresent(run.statusPath),
|
|
612
|
+
logPath: fs.existsSync(run.logPath) ? run.logPath : null,
|
|
613
|
+
})
|
|
614
|
+
: null;
|
|
615
|
+
const recoveryReason = recoverableFailureReason(failure, summary);
|
|
616
|
+
return {
|
|
617
|
+
...failure,
|
|
618
|
+
detail: failure?.detail || summary?.terminationHint || null,
|
|
619
|
+
terminationReason: summary?.terminationReason || null,
|
|
620
|
+
terminationHint: summary?.terminationHint || null,
|
|
621
|
+
observedTurnLimit:
|
|
622
|
+
Number.isFinite(Number(summary?.terminationObservedTurnLimit))
|
|
623
|
+
? Number(summary.terminationObservedTurnLimit)
|
|
624
|
+
: null,
|
|
625
|
+
recoverable: Boolean(recoveryReason),
|
|
626
|
+
recoveryReason,
|
|
627
|
+
};
|
|
628
|
+
});
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
function failuresAreRecoverable(failures) {
|
|
632
|
+
return Array.isArray(failures) && failures.length > 0 && failures.every((failure) => failure?.recoverable);
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function appendRepairCoordinationRequests({
|
|
636
|
+
coordinationLogPath,
|
|
637
|
+
lanePaths,
|
|
638
|
+
wave,
|
|
639
|
+
attempt,
|
|
640
|
+
runs,
|
|
641
|
+
failures,
|
|
642
|
+
}) {
|
|
643
|
+
const selectedRuns = Array.isArray(runs) ? runs : [];
|
|
644
|
+
const failureByAgentId = new Map(
|
|
645
|
+
(failures || [])
|
|
646
|
+
.filter((failure) => failure?.agentId)
|
|
647
|
+
.map((failure) => [failure.agentId, failure]),
|
|
648
|
+
);
|
|
649
|
+
for (const run of selectedRuns) {
|
|
650
|
+
const agentId = run?.agent?.agentId;
|
|
651
|
+
if (!agentId) {
|
|
652
|
+
continue;
|
|
653
|
+
}
|
|
654
|
+
const failure = failureByAgentId.get(agentId) || null;
|
|
655
|
+
appendCoordinationRecord(coordinationLogPath, {
|
|
656
|
+
id: `repair-wave-${wave.wave}-attempt-${attempt}-${agentId}`,
|
|
657
|
+
lane: lanePaths.lane,
|
|
658
|
+
wave: wave.wave,
|
|
659
|
+
agentId: "launcher",
|
|
660
|
+
kind: "request",
|
|
661
|
+
targets: [`agent:${agentId}`],
|
|
662
|
+
priority: "normal",
|
|
663
|
+
summary: failure
|
|
664
|
+
? `Repair ${agentId}: ${failure.recoveryReason || failure.statusCode}`
|
|
665
|
+
: `Repair ${agentId}: targeted follow-up`,
|
|
666
|
+
detail: failure
|
|
667
|
+
? `Targeted recovery for ${agentId} after ${failure.recoveryReason || failure.statusCode}. ${failure.detail || "Resume the bounded follow-up work and preserve reusable proof from other agents."}`
|
|
668
|
+
: `Targeted recovery for ${agentId}. Resume the bounded follow-up work and preserve reusable proof from other agents.`,
|
|
669
|
+
status: "open",
|
|
670
|
+
source: "launcher",
|
|
671
|
+
blocking: false,
|
|
672
|
+
blockerSeverity: "soft",
|
|
673
|
+
});
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
|
|
579
677
|
// --- Main entry point ---
|
|
580
678
|
|
|
581
679
|
export async function runLauncherCli(argv) {
|
|
@@ -624,6 +722,7 @@ export async function runLauncherCli(argv) {
|
|
|
624
722
|
ensureDirectory(lanePaths.controlDir);
|
|
625
723
|
ensureDirectory(lanePaths.assignmentsDir);
|
|
626
724
|
ensureDirectory(lanePaths.inboxesDir);
|
|
725
|
+
ensureDirectory(lanePaths.signalsDir);
|
|
627
726
|
ensureDirectory(lanePaths.ledgerDir);
|
|
628
727
|
ensureDirectory(lanePaths.integrationDir);
|
|
629
728
|
ensureDirectory(lanePaths.proofDir);
|
|
@@ -1188,6 +1287,19 @@ export async function runLauncherCli(argv) {
|
|
|
1188
1287
|
flushDashboards();
|
|
1189
1288
|
return true;
|
|
1190
1289
|
};
|
|
1290
|
+
const syncWaveSignals = () =>
|
|
1291
|
+
syncLiveWaveSignals({
|
|
1292
|
+
lanePaths,
|
|
1293
|
+
wave,
|
|
1294
|
+
statusPayload: buildControlStatusPayload({
|
|
1295
|
+
lanePaths,
|
|
1296
|
+
wave,
|
|
1297
|
+
}),
|
|
1298
|
+
agentRuns,
|
|
1299
|
+
residentEnabled: Boolean(residentOrchestratorRun),
|
|
1300
|
+
recordCombinedEvent,
|
|
1301
|
+
appendCoordination,
|
|
1302
|
+
});
|
|
1191
1303
|
|
|
1192
1304
|
const proofRegistryForReuse = readWaveProofRegistry(lanePaths, wave.wave);
|
|
1193
1305
|
const initialAttemptPlan = planInitialWaveAttempt({
|
|
@@ -1217,6 +1329,7 @@ export async function runLauncherCli(argv) {
|
|
|
1217
1329
|
}
|
|
1218
1330
|
flushDashboards();
|
|
1219
1331
|
emitCoordinationAlertEvents(derivedState);
|
|
1332
|
+
syncWaveSignals();
|
|
1220
1333
|
|
|
1221
1334
|
if (options.dashboard && currentWaveDashboardTerminalEntry) {
|
|
1222
1335
|
launchWaveDashboardSession(lanePaths, {
|
|
@@ -1281,6 +1394,7 @@ export async function runLauncherCli(argv) {
|
|
|
1281
1394
|
details: `session=${residentOrchestratorRun.sessionName}; executor=${residentOrchestratorRun.lastExecutorId || "unknown"}`,
|
|
1282
1395
|
actionRequested: "None",
|
|
1283
1396
|
});
|
|
1397
|
+
syncWaveSignals();
|
|
1284
1398
|
}
|
|
1285
1399
|
}
|
|
1286
1400
|
|
|
@@ -1487,6 +1601,7 @@ export async function runLauncherCli(argv) {
|
|
|
1487
1601
|
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
1488
1602
|
flushDashboards();
|
|
1489
1603
|
}
|
|
1604
|
+
syncWaveSignals();
|
|
1490
1605
|
},
|
|
1491
1606
|
{
|
|
1492
1607
|
controlPlane: {
|
|
@@ -1500,7 +1615,9 @@ export async function runLauncherCli(argv) {
|
|
|
1500
1615
|
}
|
|
1501
1616
|
|
|
1502
1617
|
materializeAgentExecutionSummaries(wave, agentRuns);
|
|
1618
|
+
failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
|
|
1503
1619
|
refreshDerivedState(attempt);
|
|
1620
|
+
syncWaveSignals();
|
|
1504
1621
|
lastLiveCoordinationRefreshAt = Date.now();
|
|
1505
1622
|
emitCoordinationAlertEvents(derivedState);
|
|
1506
1623
|
failures = reconcileFailuresAgainstSharedComponentState(wave, agentRuns, failures);
|
|
@@ -1686,6 +1803,7 @@ export async function runLauncherCli(argv) {
|
|
|
1686
1803
|
failures = closureResult.failures;
|
|
1687
1804
|
timedOut = timedOut || closureResult.timedOut;
|
|
1688
1805
|
materializeAgentExecutionSummaries(wave, agentRuns);
|
|
1806
|
+
failures = annotateFailuresWithRecoveryHints(failures, agentRuns);
|
|
1689
1807
|
refreshDerivedState(attempt);
|
|
1690
1808
|
}
|
|
1691
1809
|
} else {
|
|
@@ -1876,6 +1994,14 @@ export async function runLauncherCli(argv) {
|
|
|
1876
1994
|
detail: "Queued for shared component closure",
|
|
1877
1995
|
});
|
|
1878
1996
|
}
|
|
1997
|
+
appendRepairCoordinationRequests({
|
|
1998
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
1999
|
+
lanePaths,
|
|
2000
|
+
wave,
|
|
2001
|
+
attempt,
|
|
2002
|
+
runs: runsToLaunch,
|
|
2003
|
+
failures,
|
|
2004
|
+
});
|
|
1879
2005
|
writeWaveRelaunchProjection({
|
|
1880
2006
|
lanePaths,
|
|
1881
2007
|
wave,
|
|
@@ -1911,6 +2037,106 @@ export async function runLauncherCli(argv) {
|
|
|
1911
2037
|
}
|
|
1912
2038
|
|
|
1913
2039
|
if (attempt >= options.maxRetriesPerWave + 1) {
|
|
2040
|
+
const reducerDecision =
|
|
2041
|
+
latestReducerSnapshot || refreshReducerSnapshot(attempt);
|
|
2042
|
+
const recoveryPlan = planRetryWaveAttempt({
|
|
2043
|
+
agentRuns,
|
|
2044
|
+
failures,
|
|
2045
|
+
derivedState,
|
|
2046
|
+
lanePaths,
|
|
2047
|
+
wave,
|
|
2048
|
+
retryOverride: readWaveRetryOverride(lanePaths, wave.wave),
|
|
2049
|
+
waveState: reducerDecision?.reducerState || null,
|
|
2050
|
+
});
|
|
2051
|
+
const recoverySelectedAgentIds = Array.from(
|
|
2052
|
+
new Set([
|
|
2053
|
+
...((recoveryPlan.selectedRuns || []).map((run) => run.agent.agentId)),
|
|
2054
|
+
...((reducerDecision?.resumePlan?.invalidatedAgentIds || []).filter(Boolean)),
|
|
2055
|
+
...((failures || []).map((failure) => failure.agentId).filter(Boolean)),
|
|
2056
|
+
]),
|
|
2057
|
+
);
|
|
2058
|
+
if (failuresAreRecoverable(failures) && recoverySelectedAgentIds.length > 0) {
|
|
2059
|
+
const resumeCursor =
|
|
2060
|
+
reducerDecision?.resumePlan?.resumeFromPhase &&
|
|
2061
|
+
reducerDecision.resumePlan.resumeFromPhase !== "completed"
|
|
2062
|
+
? reducerDecision.resumePlan.resumeFromPhase
|
|
2063
|
+
: null;
|
|
2064
|
+
const queuedRecovery = writeWaveRetryOverride(lanePaths, wave.wave, {
|
|
2065
|
+
lane: lanePaths.lane,
|
|
2066
|
+
wave: wave.wave,
|
|
2067
|
+
selectedAgentIds: recoverySelectedAgentIds,
|
|
2068
|
+
resumeCursor,
|
|
2069
|
+
clearReusableAgentIds: Array.from(
|
|
2070
|
+
new Set((failures || []).map((failure) => failure.agentId).filter(Boolean)),
|
|
2071
|
+
),
|
|
2072
|
+
preserveReusableAgentIds: reducerDecision?.resumePlan?.reusableAgentIds || [],
|
|
2073
|
+
reuseProofBundleIds: reducerDecision?.resumePlan?.reusableProofBundleIds || [],
|
|
2074
|
+
requestedBy: "launcher-recovery",
|
|
2075
|
+
reason: `Auto recovery queued after recoverable execution issue(s): ${(failures || []).map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`).join(", ")}.`,
|
|
2076
|
+
applyOnce: true,
|
|
2077
|
+
});
|
|
2078
|
+
appendRepairCoordinationRequests({
|
|
2079
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
2080
|
+
lanePaths,
|
|
2081
|
+
wave,
|
|
2082
|
+
attempt: attempt + 1,
|
|
2083
|
+
runs: agentRuns.filter((run) => recoverySelectedAgentIds.includes(run.agent.agentId)),
|
|
2084
|
+
failures,
|
|
2085
|
+
});
|
|
2086
|
+
if (recoveryPlan.selectedRuns.length > 0) {
|
|
2087
|
+
writeWaveRelaunchProjection({
|
|
2088
|
+
lanePaths,
|
|
2089
|
+
wave,
|
|
2090
|
+
attempt: attempt + 1,
|
|
2091
|
+
runs: recoveryPlan.selectedRuns,
|
|
2092
|
+
failures,
|
|
2093
|
+
derivedState,
|
|
2094
|
+
});
|
|
2095
|
+
}
|
|
2096
|
+
recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
|
|
2097
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
2098
|
+
detail: failures
|
|
2099
|
+
.map((failure) => `${failure.agentId || "wave"}:${failure.recoveryReason || failure.statusCode}`)
|
|
2100
|
+
.join(", "),
|
|
2101
|
+
});
|
|
2102
|
+
recordWaveRunState(lanePaths, wave.wave, "blocked", {
|
|
2103
|
+
attempts: attempt,
|
|
2104
|
+
traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
|
|
2105
|
+
gateSnapshot: completionGateSnapshot,
|
|
2106
|
+
recoverable: true,
|
|
2107
|
+
rerunRequestId: queuedRecovery.requestId,
|
|
2108
|
+
failures: failures.map((failure) => ({
|
|
2109
|
+
agentId: failure.agentId || null,
|
|
2110
|
+
statusCode: failure.statusCode,
|
|
2111
|
+
recoveryReason: failure.recoveryReason || null,
|
|
2112
|
+
detail: failure.detail || null,
|
|
2113
|
+
})),
|
|
2114
|
+
});
|
|
2115
|
+
dashboardState.status = "blocked";
|
|
2116
|
+
for (const failure of failures) {
|
|
2117
|
+
setWaveDashboardAgent(dashboardState, failure.agentId, {
|
|
2118
|
+
state: "blocked",
|
|
2119
|
+
detail:
|
|
2120
|
+
failure.detail ||
|
|
2121
|
+
`Recoverable ${failure.recoveryReason || failure.statusCode}; targeted resume queued.`,
|
|
2122
|
+
});
|
|
2123
|
+
}
|
|
2124
|
+
flushDashboards();
|
|
2125
|
+
appendCoordination({
|
|
2126
|
+
event: "wave_recovery_queued",
|
|
2127
|
+
waves: [wave.wave],
|
|
2128
|
+
status: "blocked",
|
|
2129
|
+
details: `attempt=${attempt}/${options.maxRetriesPerWave + 1}; request=${queuedRecovery.requestId}; agents=${recoverySelectedAgentIds.join(",")}; reason=${(failures || []).map((failure) => failure.recoveryReason || failure.statusCode).join(",")}`,
|
|
2130
|
+
actionRequested:
|
|
2131
|
+
`Lane ${lanePaths.lane} owners should resume the queued targeted recovery or let autonomous relaunch the selected agents.`,
|
|
2132
|
+
});
|
|
2133
|
+
await flushWaveControlTelemetry();
|
|
2134
|
+
const error = new Error(
|
|
2135
|
+
`Wave ${wave.wave} queued targeted recovery request ${queuedRecovery.requestId} after recoverable execution failures.`,
|
|
2136
|
+
);
|
|
2137
|
+
error.exitCode = 43;
|
|
2138
|
+
throw error;
|
|
2139
|
+
}
|
|
1914
2140
|
recordAttemptState(lanePaths, wave.wave, attempt, "failed", {
|
|
1915
2141
|
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
1916
2142
|
detail: failures
|
|
@@ -2055,6 +2281,16 @@ export async function runLauncherCli(argv) {
|
|
|
2055
2281
|
detail: "Queued for retry",
|
|
2056
2282
|
});
|
|
2057
2283
|
}
|
|
2284
|
+
if (retryPlan.source !== "override") {
|
|
2285
|
+
appendRepairCoordinationRequests({
|
|
2286
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
2287
|
+
lanePaths,
|
|
2288
|
+
wave,
|
|
2289
|
+
attempt: attempt + 1,
|
|
2290
|
+
runs: runsToLaunch,
|
|
2291
|
+
failures,
|
|
2292
|
+
});
|
|
2293
|
+
}
|
|
2058
2294
|
writeWaveRelaunchProjection({
|
|
2059
2295
|
lanePaths,
|
|
2060
2296
|
wave,
|
|
@@ -18,7 +18,11 @@ import {
|
|
|
18
18
|
isImplementationOwningDesignAgent,
|
|
19
19
|
isSecurityReviewAgent,
|
|
20
20
|
} from "./role-helpers.mjs";
|
|
21
|
-
import {
|
|
21
|
+
import {
|
|
22
|
+
coordinationRecordBlocksWave,
|
|
23
|
+
coordinationRecordIsHardBlocker,
|
|
24
|
+
openClarificationLinkedRequests,
|
|
25
|
+
} from "./coordination-store.mjs";
|
|
22
26
|
import { buildHelperTasks } from "./routing-state.mjs";
|
|
23
27
|
import { readJsonOrNull, toIsoTimestamp, writeJsonAtomic } from "./shared.mjs";
|
|
24
28
|
|
|
@@ -35,16 +39,12 @@ function taskStateFromValidation(validation) {
|
|
|
35
39
|
|
|
36
40
|
function openHighPriorityBlockers(state) {
|
|
37
41
|
return (state?.blockers || []).filter(
|
|
38
|
-
(record) =>
|
|
39
|
-
["open", "acknowledged", "in_progress"].includes(record.status) &&
|
|
40
|
-
["high", "urgent"].includes(record.priority),
|
|
42
|
+
(record) => coordinationRecordIsHardBlocker(record),
|
|
41
43
|
);
|
|
42
44
|
}
|
|
43
45
|
|
|
44
46
|
function openClarifications(state) {
|
|
45
|
-
return (state?.clarifications || []).filter((record) =>
|
|
46
|
-
["open", "acknowledged", "in_progress"].includes(record.status),
|
|
47
|
-
);
|
|
47
|
+
return (state?.clarifications || []).filter((record) => coordinationRecordBlocksWave(record));
|
|
48
48
|
}
|
|
49
49
|
|
|
50
50
|
export function buildSeedWaveLedger({
|
|
@@ -161,7 +161,7 @@ function derivePhase({
|
|
|
161
161
|
}
|
|
162
162
|
if (
|
|
163
163
|
openClarifications(state).length > 0 ||
|
|
164
|
-
openClarificationLinkedRequests(state).length > 0
|
|
164
|
+
openClarificationLinkedRequests(state).filter((record) => coordinationRecordBlocksWave(record)).length > 0
|
|
165
165
|
) {
|
|
166
166
|
return "clarifying";
|
|
167
167
|
}
|
|
@@ -407,7 +407,9 @@ export function deriveWaveLedger({
|
|
|
407
407
|
tasks,
|
|
408
408
|
blockers: (coordinationState?.blockers || []).map((record) => record.id),
|
|
409
409
|
openClarifications: openClarifications(coordinationState).map((record) => record.id),
|
|
410
|
-
clarificationLinkedRequests: openClarificationLinkedRequests(coordinationState)
|
|
410
|
+
clarificationLinkedRequests: openClarificationLinkedRequests(coordinationState)
|
|
411
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
412
|
+
.map(
|
|
411
413
|
(record) => record.id,
|
|
412
414
|
),
|
|
413
415
|
openRequests: (coordinationState?.requests || [])
|
|
@@ -437,14 +439,14 @@ export function deriveWaveLedger({
|
|
|
437
439
|
: null,
|
|
438
440
|
humanFeedback: [
|
|
439
441
|
...(coordinationState?.humanFeedback || [])
|
|
440
|
-
.filter((record) =>
|
|
442
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
441
443
|
.map((record) => record.id),
|
|
442
444
|
...(coordinationState?.humanEscalations || [])
|
|
443
|
-
.filter((record) =>
|
|
445
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
444
446
|
.map((record) => record.id),
|
|
445
447
|
],
|
|
446
448
|
humanEscalations: (coordinationState?.humanEscalations || [])
|
|
447
|
-
.filter((record) =>
|
|
449
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
448
450
|
.map((record) => record.id),
|
|
449
451
|
contEvalState: contEvalValidation.ok ? "pass" : "open",
|
|
450
452
|
securityState: securityValidation.ok ? securityValidation.statusCode || "pass" : "open",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import {
|
|
4
|
-
|
|
4
|
+
coordinationRecordBlocksWave,
|
|
5
5
|
openClarificationLinkedRequests,
|
|
6
6
|
readMaterializedCoordinationState,
|
|
7
7
|
} from "./coordination-store.mjs";
|
|
@@ -66,17 +66,19 @@ function compatibilityBlockerIds(derivedState) {
|
|
|
66
66
|
const coordinationState = derivedState?.coordinationState || {};
|
|
67
67
|
return normalizeShadowIdList([
|
|
68
68
|
...(coordinationState.blockers || [])
|
|
69
|
-
.filter((record) =>
|
|
69
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
70
70
|
.map((record) => record.id),
|
|
71
71
|
...(coordinationState.clarifications || [])
|
|
72
|
-
.filter((record) =>
|
|
72
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
73
|
+
.map((record) => record.id),
|
|
74
|
+
...openClarificationLinkedRequests(coordinationState)
|
|
75
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
73
76
|
.map((record) => record.id),
|
|
74
|
-
...openClarificationLinkedRequests(coordinationState).map((record) => record.id),
|
|
75
77
|
...(coordinationState.humanFeedback || [])
|
|
76
|
-
.filter((record) =>
|
|
78
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
77
79
|
.map((record) => record.id),
|
|
78
80
|
...(coordinationState.humanEscalations || [])
|
|
79
|
-
.filter((record) =>
|
|
81
|
+
.filter((record) => coordinationRecordBlocksWave(record))
|
|
80
82
|
.map((record) => record.id),
|
|
81
83
|
...((derivedState?.capabilityAssignments || [])
|
|
82
84
|
.filter((assignment) => assignment.blocking)
|