@chllming/wave-orchestration 0.6.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +57 -1
- package/README.md +39 -7
- package/docs/agents/wave-orchestrator-role.md +50 -0
- package/docs/agents/wave-planner-role.md +39 -0
- package/docs/context7/bundles.json +9 -0
- package/docs/context7/planner-agent/README.md +25 -0
- package/docs/context7/planner-agent/manifest.json +83 -0
- package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
- package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
- package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
- package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
- package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
- package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
- package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
- package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
- package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
- package/docs/evals/README.md +96 -1
- package/docs/evals/arm-templates/README.md +13 -0
- package/docs/evals/arm-templates/full-wave.json +15 -0
- package/docs/evals/arm-templates/single-agent.json +15 -0
- package/docs/evals/benchmark-catalog.json +7 -0
- package/docs/evals/cases/README.md +47 -0
- package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
- package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
- package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
- package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
- package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
- package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
- package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
- package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
- package/docs/evals/external-benchmarks.json +85 -0
- package/docs/evals/external-command-config.sample.json +9 -0
- package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
- package/docs/evals/pilots/README.md +47 -0
- package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
- package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
- package/docs/evals/wave-benchmark-program.md +302 -0
- package/docs/guides/planner.md +48 -11
- package/docs/plans/context7-wave-orchestrator.md +20 -0
- package/docs/plans/current-state.md +8 -1
- package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
- package/docs/plans/wave-orchestrator.md +62 -11
- package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
- package/docs/reference/coordination-and-closure.md +436 -0
- package/docs/reference/live-proof-waves.md +25 -3
- package/docs/reference/npmjs-trusted-publishing.md +3 -3
- package/docs/reference/proof-metrics.md +90 -0
- package/docs/reference/runtime-config/README.md +61 -0
- package/docs/reference/sample-waves.md +29 -18
- package/docs/reference/wave-control.md +164 -0
- package/docs/reference/wave-planning-lessons.md +131 -0
- package/package.json +5 -4
- package/releases/manifest.json +18 -0
- package/scripts/research/agent-context-archive.mjs +18 -0
- package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
- package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
- package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
- package/scripts/wave-orchestrator/autonomous.mjs +7 -0
- package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
- package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
- package/scripts/wave-orchestrator/benchmark.mjs +972 -0
- package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
- package/scripts/wave-orchestrator/config.mjs +175 -0
- package/scripts/wave-orchestrator/control-cli.mjs +1123 -0
- package/scripts/wave-orchestrator/control-plane.mjs +697 -0
- package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
- package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
- package/scripts/wave-orchestrator/coordination.mjs +84 -0
- package/scripts/wave-orchestrator/dashboard-renderer.mjs +38 -3
- package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
- package/scripts/wave-orchestrator/evals.mjs +23 -0
- package/scripts/wave-orchestrator/executors.mjs +3 -2
- package/scripts/wave-orchestrator/feedback.mjs +55 -0
- package/scripts/wave-orchestrator/install.mjs +55 -1
- package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
- package/scripts/wave-orchestrator/launcher-runtime.mjs +24 -21
- package/scripts/wave-orchestrator/launcher.mjs +796 -35
- package/scripts/wave-orchestrator/planner-context.mjs +75 -0
- package/scripts/wave-orchestrator/planner.mjs +2270 -136
- package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
- package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
- package/scripts/wave-orchestrator/replay.mjs +10 -4
- package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
- package/scripts/wave-orchestrator/retry-control.mjs +225 -0
- package/scripts/wave-orchestrator/shared.mjs +26 -0
- package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
- package/scripts/wave-orchestrator/traces.mjs +157 -2
- package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
- package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
- package/scripts/wave-orchestrator/wave-files.mjs +17 -5
- package/scripts/wave.mjs +27 -0
- package/skills/repo-coding-rules/SKILL.md +1 -0
- package/skills/role-cont-eval/SKILL.md +1 -0
- package/skills/role-cont-qa/SKILL.md +13 -6
- package/skills/role-deploy/SKILL.md +1 -0
- package/skills/role-documentation/SKILL.md +4 -0
- package/skills/role-implementation/SKILL.md +4 -0
- package/skills/role-infra/SKILL.md +2 -1
- package/skills/role-integration/SKILL.md +15 -8
- package/skills/role-planner/SKILL.md +39 -0
- package/skills/role-planner/skill.json +21 -0
- package/skills/role-research/SKILL.md +1 -0
- package/skills/role-security/SKILL.md +2 -2
- package/skills/runtime-claude/SKILL.md +2 -1
- package/skills/runtime-codex/SKILL.md +1 -0
- package/skills/runtime-local/SKILL.md +2 -0
- package/skills/runtime-opencode/SKILL.md +1 -0
- package/skills/wave-core/SKILL.md +25 -6
- package/skills/wave-core/references/marker-syntax.md +16 -8
- package/wave.config.json +45 -0
|
@@ -11,12 +11,14 @@ import {
|
|
|
11
11
|
} from "./config.mjs";
|
|
12
12
|
import {
|
|
13
13
|
appendOrchestratorBoardEntry,
|
|
14
|
+
buildResidentOrchestratorPrompt,
|
|
14
15
|
ensureOrchestratorBoard,
|
|
15
16
|
feedbackStateSignature,
|
|
16
17
|
readWaveHumanFeedbackRequests,
|
|
17
18
|
} from "./coordination.mjs";
|
|
18
19
|
import {
|
|
19
20
|
appendCoordinationRecord,
|
|
21
|
+
buildCoordinationResponseMetrics,
|
|
20
22
|
compileAgentInbox,
|
|
21
23
|
compileSharedSummary,
|
|
22
24
|
isOpenCoordinationStatus,
|
|
@@ -54,6 +56,8 @@ import {
|
|
|
54
56
|
DEFAULT_AGENT_RATE_LIMIT_BASE_DELAY_SECONDS,
|
|
55
57
|
DEFAULT_AGENT_RATE_LIMIT_MAX_DELAY_SECONDS,
|
|
56
58
|
DEFAULT_AGENT_RATE_LIMIT_RETRIES,
|
|
59
|
+
DEFAULT_COORDINATION_ACK_TIMEOUT_MS,
|
|
60
|
+
DEFAULT_LIVE_COORDINATION_REFRESH_MS,
|
|
57
61
|
DEFAULT_MAX_RETRIES_PER_WAVE,
|
|
58
62
|
DEFAULT_TIMEOUT_MINUTES,
|
|
59
63
|
DEFAULT_WAVE_LANE,
|
|
@@ -129,7 +133,21 @@ import {
|
|
|
129
133
|
} from "./agent-state.mjs";
|
|
130
134
|
import { buildDocsQueue, readDocsQueue, writeDocsQueue } from "./docs-queue.mjs";
|
|
131
135
|
import { deriveWaveLedger, readWaveLedger, writeWaveLedger } from "./ledger.mjs";
|
|
136
|
+
import {
|
|
137
|
+
augmentSummaryWithProofRegistry,
|
|
138
|
+
readWaveProofRegistry,
|
|
139
|
+
waveProofRegistryPath,
|
|
140
|
+
} from "./proof-registry.mjs";
|
|
141
|
+
import {
|
|
142
|
+
clearWaveRetryOverride,
|
|
143
|
+
readWaveRelaunchPlanSnapshot,
|
|
144
|
+
readWaveRetryOverride,
|
|
145
|
+
resolveRetryOverrideRuns,
|
|
146
|
+
waveRelaunchPlanPath,
|
|
147
|
+
} from "./retry-control.mjs";
|
|
148
|
+
import { appendWaveControlEvent } from "./control-plane.mjs";
|
|
132
149
|
import { buildQualityMetrics, writeTraceBundle } from "./traces.mjs";
|
|
150
|
+
import { flushWaveControlQueue } from "./wave-control-client.mjs";
|
|
133
151
|
import { triageClarificationRequests } from "./clarification-triage.mjs";
|
|
134
152
|
import { readProjectProfile, resolveDefaultTerminalSurface } from "./project-profile.mjs";
|
|
135
153
|
import {
|
|
@@ -149,7 +167,6 @@ import {
|
|
|
149
167
|
writeDependencySnapshotMarkdown,
|
|
150
168
|
} from "./routing-state.mjs";
|
|
151
169
|
import {
|
|
152
|
-
readRelaunchPlan,
|
|
153
170
|
writeAssignmentSnapshot,
|
|
154
171
|
writeDependencySnapshot,
|
|
155
172
|
writeRelaunchPlan,
|
|
@@ -219,6 +236,9 @@ Options:
|
|
|
219
236
|
Disable orchestrator coordination board updates for this run
|
|
220
237
|
--coordination-note <text>
|
|
221
238
|
Optional startup intent note appended to orchestrator board
|
|
239
|
+
--resident-orchestrator
|
|
240
|
+
Launch an additional long-running resident orchestrator session for the wave
|
|
241
|
+
--no-telemetry Disable Wave Control reporting for this launcher run
|
|
222
242
|
--no-context7 Disable launcher-side Context7 prefetch/injection
|
|
223
243
|
--help Show this help message
|
|
224
244
|
`);
|
|
@@ -249,6 +269,8 @@ function parseArgs(argv) {
|
|
|
249
269
|
cleanupSessions: true,
|
|
250
270
|
keepTerminals: false,
|
|
251
271
|
context7Enabled: true,
|
|
272
|
+
telemetryEnabled: true,
|
|
273
|
+
residentOrchestrator: false,
|
|
252
274
|
orchestratorId: null,
|
|
253
275
|
orchestratorBoardPath: null,
|
|
254
276
|
coordinationNote: "",
|
|
@@ -285,6 +307,8 @@ function parseArgs(argv) {
|
|
|
285
307
|
options.keepTerminals = true;
|
|
286
308
|
} else if (arg === "--no-context7") {
|
|
287
309
|
options.context7Enabled = false;
|
|
310
|
+
} else if (arg === "--no-telemetry") {
|
|
311
|
+
options.telemetryEnabled = false;
|
|
288
312
|
} else if (arg === "--no-orchestrator-board") {
|
|
289
313
|
options.orchestratorBoardPath = null;
|
|
290
314
|
orchestratorBoardProvided = true;
|
|
@@ -305,6 +329,8 @@ function parseArgs(argv) {
|
|
|
305
329
|
orchestratorBoardProvided = true;
|
|
306
330
|
} else if (arg === "--coordination-note") {
|
|
307
331
|
options.coordinationNote = String(argv[++i] || "").trim();
|
|
332
|
+
} else if (arg === "--resident-orchestrator") {
|
|
333
|
+
options.residentOrchestrator = true;
|
|
308
334
|
} else if (arg === "--state-file") {
|
|
309
335
|
options.runStatePath = path.resolve(REPO_ROOT, argv[++i] || "");
|
|
310
336
|
stateFileProvided = true;
|
|
@@ -359,7 +385,18 @@ function parseArgs(argv) {
|
|
|
359
385
|
if (!executorProvided) {
|
|
360
386
|
options.executorMode = lanePaths.executors.default;
|
|
361
387
|
}
|
|
388
|
+
if (!options.telemetryEnabled) {
|
|
389
|
+
lanePaths.waveControl = {
|
|
390
|
+
...(lanePaths.waveControl || {}),
|
|
391
|
+
enabled: false,
|
|
392
|
+
};
|
|
393
|
+
lanePaths.laneProfile = {
|
|
394
|
+
...(lanePaths.laneProfile || {}),
|
|
395
|
+
waveControl: lanePaths.waveControl,
|
|
396
|
+
};
|
|
397
|
+
}
|
|
362
398
|
options.orchestratorId ||= sanitizeOrchestratorId(`${lanePaths.lane}-orch-${process.pid}`);
|
|
399
|
+
lanePaths.orchestratorId = options.orchestratorId;
|
|
363
400
|
if (options.agentRateLimitMaxDelaySeconds < options.agentRateLimitBaseDelaySeconds) {
|
|
364
401
|
throw new Error(
|
|
365
402
|
"--agent-rate-limit-max-delay-seconds must be >= --agent-rate-limit-base-delay-seconds",
|
|
@@ -530,17 +567,19 @@ function materializeAgentExecutionSummaryForRun(wave, runInfo) {
|
|
|
530
567
|
}
|
|
531
568
|
|
|
532
569
|
function readRunExecutionSummary(runInfo, wave = null) {
|
|
570
|
+
const applyProofRegistry = (summary) =>
|
|
571
|
+
runInfo?.proofRegistry ? augmentSummaryWithProofRegistry(runInfo.agent, summary, runInfo.proofRegistry) : summary;
|
|
533
572
|
if (runInfo?.summary && typeof runInfo.summary === "object") {
|
|
534
|
-
return runInfo.summary;
|
|
573
|
+
return applyProofRegistry(runInfo.summary);
|
|
535
574
|
}
|
|
536
575
|
if (runInfo?.summaryPath && fs.existsSync(runInfo.summaryPath)) {
|
|
537
|
-
return readAgentExecutionSummary(runInfo.summaryPath);
|
|
576
|
+
return applyProofRegistry(readAgentExecutionSummary(runInfo.summaryPath));
|
|
538
577
|
}
|
|
539
578
|
if (runInfo?.statusPath && fs.existsSync(agentSummaryPathFromStatusPath(runInfo.statusPath))) {
|
|
540
|
-
return readAgentExecutionSummary(runInfo.statusPath);
|
|
579
|
+
return applyProofRegistry(readAgentExecutionSummary(runInfo.statusPath));
|
|
541
580
|
}
|
|
542
581
|
if (wave && runInfo?.statusPath && runInfo?.logPath && fs.existsSync(runInfo.statusPath)) {
|
|
543
|
-
return materializeAgentExecutionSummaryForRun(wave, runInfo);
|
|
582
|
+
return applyProofRegistry(materializeAgentExecutionSummaryForRun(wave, runInfo));
|
|
544
583
|
}
|
|
545
584
|
return null;
|
|
546
585
|
}
|
|
@@ -594,12 +633,8 @@ function waveIntegrationMarkdownPath(lanePaths, waveNumber) {
|
|
|
594
633
|
return path.join(lanePaths.integrationDir, `wave-${waveNumber}.md`);
|
|
595
634
|
}
|
|
596
635
|
|
|
597
|
-
function waveRelaunchPlanPath(lanePaths, waveNumber) {
|
|
598
|
-
return path.join(lanePaths.statusDir, `relaunch-plan-wave-${waveNumber}.json`);
|
|
599
|
-
}
|
|
600
|
-
|
|
601
636
|
function readWaveRelaunchPlan(lanePaths, waveNumber) {
|
|
602
|
-
return
|
|
637
|
+
return readWaveRelaunchPlanSnapshot(lanePaths, waveNumber);
|
|
603
638
|
}
|
|
604
639
|
|
|
605
640
|
function writeWaveRelaunchPlan(lanePaths, waveNumber, payload) {
|
|
@@ -1377,6 +1412,7 @@ function writeWaveDerivedState({
|
|
|
1377
1412
|
capabilityAssignments,
|
|
1378
1413
|
dependencySnapshot,
|
|
1379
1414
|
});
|
|
1415
|
+
const responseMetrics = buildCoordinationResponseMetrics(coordinationState);
|
|
1380
1416
|
const messageBoardPath = path.join(lanePaths.messageboardsDir, `wave-${wave.wave}.md`);
|
|
1381
1417
|
writeCoordinationBoardProjection(messageBoardPath, {
|
|
1382
1418
|
wave: wave.wave,
|
|
@@ -1398,6 +1434,7 @@ function writeWaveDerivedState({
|
|
|
1398
1434
|
integrationMarkdownPath: waveIntegrationMarkdownPath(lanePaths, wave.wave),
|
|
1399
1435
|
securityMarkdownPath: waveSecurityMarkdownPath(lanePaths, wave.wave),
|
|
1400
1436
|
ledger,
|
|
1437
|
+
responseMetrics,
|
|
1401
1438
|
sharedSummaryPath,
|
|
1402
1439
|
sharedSummaryText: sharedSummary.text,
|
|
1403
1440
|
inboxesByAgentId,
|
|
@@ -1415,6 +1452,23 @@ function applyDerivedStateToDashboard(dashboardState, derivedState) {
|
|
|
1415
1452
|
).length;
|
|
1416
1453
|
dashboardState.inboundDependenciesOpen = (derivedState.dependencySnapshot?.openInbound || []).length;
|
|
1417
1454
|
dashboardState.outboundDependenciesOpen = (derivedState.dependencySnapshot?.openOutbound || []).length;
|
|
1455
|
+
dashboardState.coordinationOpen = derivedState.coordinationState?.openRecords?.length || 0;
|
|
1456
|
+
dashboardState.openClarifications =
|
|
1457
|
+
(derivedState.coordinationState?.clarifications || []).filter((record) =>
|
|
1458
|
+
isOpenCoordinationStatus(record.status),
|
|
1459
|
+
).length;
|
|
1460
|
+
dashboardState.openHumanEscalations =
|
|
1461
|
+
derivedState.responseMetrics?.openHumanEscalationCount ||
|
|
1462
|
+
(derivedState.coordinationState?.humanEscalations || []).filter((record) =>
|
|
1463
|
+
isOpenCoordinationStatus(record.status),
|
|
1464
|
+
).length;
|
|
1465
|
+
dashboardState.oldestOpenCoordinationAgeMs =
|
|
1466
|
+
derivedState.responseMetrics?.oldestOpenCoordinationAgeMs ?? null;
|
|
1467
|
+
dashboardState.oldestUnackedRequestAgeMs =
|
|
1468
|
+
derivedState.responseMetrics?.oldestUnackedRequestAgeMs ?? null;
|
|
1469
|
+
dashboardState.overdueAckCount = derivedState.responseMetrics?.overdueAckCount || 0;
|
|
1470
|
+
dashboardState.overdueClarificationCount =
|
|
1471
|
+
derivedState.responseMetrics?.overdueClarificationCount || 0;
|
|
1418
1472
|
}
|
|
1419
1473
|
|
|
1420
1474
|
export function readWaveImplementationGate(wave, agentRuns) {
|
|
@@ -1849,6 +1903,198 @@ function listLaneTmuxSessionNames(lanePaths) {
|
|
|
1849
1903
|
);
|
|
1850
1904
|
}
|
|
1851
1905
|
|
|
1906
|
+
function residentOrchestratorRolePromptPath() {
|
|
1907
|
+
return path.join(REPO_ROOT, "docs", "agents", "wave-orchestrator-role.md");
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
function loadResidentOrchestratorRolePrompt() {
|
|
1911
|
+
const filePath = residentOrchestratorRolePromptPath();
|
|
1912
|
+
if (!fs.existsSync(filePath)) {
|
|
1913
|
+
return "Monitor the wave, triage clarification timing, and intervene through coordination records only.";
|
|
1914
|
+
}
|
|
1915
|
+
return fs.readFileSync(filePath, "utf8");
|
|
1916
|
+
}
|
|
1917
|
+
|
|
1918
|
+
function defaultResidentExecutorState(options) {
|
|
1919
|
+
if (options.executorMode === "claude") {
|
|
1920
|
+
return {
|
|
1921
|
+
id: "claude",
|
|
1922
|
+
role: "orchestrator",
|
|
1923
|
+
selectedBy: "resident-orchestrator",
|
|
1924
|
+
budget: { minutes: options.timeoutMinutes },
|
|
1925
|
+
claude: {
|
|
1926
|
+
command: "claude",
|
|
1927
|
+
},
|
|
1928
|
+
};
|
|
1929
|
+
}
|
|
1930
|
+
if (options.executorMode === "opencode") {
|
|
1931
|
+
return {
|
|
1932
|
+
id: "opencode",
|
|
1933
|
+
role: "orchestrator",
|
|
1934
|
+
selectedBy: "resident-orchestrator",
|
|
1935
|
+
budget: { minutes: options.timeoutMinutes },
|
|
1936
|
+
opencode: {
|
|
1937
|
+
command: "opencode",
|
|
1938
|
+
},
|
|
1939
|
+
};
|
|
1940
|
+
}
|
|
1941
|
+
return {
|
|
1942
|
+
id: "codex",
|
|
1943
|
+
role: "orchestrator",
|
|
1944
|
+
selectedBy: "resident-orchestrator",
|
|
1945
|
+
budget: { minutes: options.timeoutMinutes },
|
|
1946
|
+
codex: {
|
|
1947
|
+
command: "codex",
|
|
1948
|
+
sandbox: options.codexSandboxMode,
|
|
1949
|
+
},
|
|
1950
|
+
};
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
function buildResidentExecutorState(executorTemplate, options) {
|
|
1954
|
+
const source = executorTemplate
|
|
1955
|
+
? JSON.parse(JSON.stringify(executorTemplate))
|
|
1956
|
+
: defaultResidentExecutorState(options);
|
|
1957
|
+
source.role = "orchestrator";
|
|
1958
|
+
source.selectedBy = "resident-orchestrator";
|
|
1959
|
+
source.budget = {
|
|
1960
|
+
...(source.budget || {}),
|
|
1961
|
+
minutes: Math.max(
|
|
1962
|
+
Number.parseInt(String(source?.budget?.minutes || 0), 10) || 0,
|
|
1963
|
+
options.timeoutMinutes,
|
|
1964
|
+
),
|
|
1965
|
+
};
|
|
1966
|
+
if (source.id === "codex") {
|
|
1967
|
+
source.codex = {
|
|
1968
|
+
...(source.codex || {}),
|
|
1969
|
+
command: source?.codex?.command || "codex",
|
|
1970
|
+
sandbox: source?.codex?.sandbox || options.codexSandboxMode,
|
|
1971
|
+
};
|
|
1972
|
+
} else if (source.id === "claude") {
|
|
1973
|
+
source.claude = {
|
|
1974
|
+
...(source.claude || {}),
|
|
1975
|
+
command: source?.claude?.command || "claude",
|
|
1976
|
+
};
|
|
1977
|
+
} else if (source.id === "opencode") {
|
|
1978
|
+
source.opencode = {
|
|
1979
|
+
...(source.opencode || {}),
|
|
1980
|
+
command: source?.opencode?.command || "opencode",
|
|
1981
|
+
};
|
|
1982
|
+
}
|
|
1983
|
+
return source;
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1986
|
+
function buildResidentOrchestratorRun({
|
|
1987
|
+
lanePaths,
|
|
1988
|
+
wave,
|
|
1989
|
+
agentRuns,
|
|
1990
|
+
derivedState,
|
|
1991
|
+
dashboardPath,
|
|
1992
|
+
runTag,
|
|
1993
|
+
options,
|
|
1994
|
+
}) {
|
|
1995
|
+
const executorTemplate =
|
|
1996
|
+
agentRuns.find((run) => run.agent.executorResolved?.id === options.executorMode)?.agent
|
|
1997
|
+
?.executorResolved ||
|
|
1998
|
+
agentRuns.find((run) => run.agent.executorResolved)?.agent?.executorResolved ||
|
|
1999
|
+
null;
|
|
2000
|
+
const executorResolved = buildResidentExecutorState(executorTemplate, options);
|
|
2001
|
+
if (executorResolved.id === "local") {
|
|
2002
|
+
return {
|
|
2003
|
+
run: null,
|
|
2004
|
+
skipReason: "Resident orchestrator requires codex, claude, or opencode; local executor is not suitable.",
|
|
2005
|
+
};
|
|
2006
|
+
}
|
|
2007
|
+
const agent = {
|
|
2008
|
+
agentId: "ORCH",
|
|
2009
|
+
title: "Resident Orchestrator",
|
|
2010
|
+
slug: `${wave.wave}-resident-orchestrator`,
|
|
2011
|
+
prompt: loadResidentOrchestratorRolePrompt(),
|
|
2012
|
+
executorResolved,
|
|
2013
|
+
};
|
|
2014
|
+
const baseName = `wave-${wave.wave}-resident-orchestrator`;
|
|
2015
|
+
const sessionName = `${lanePaths.tmuxSessionPrefix}${wave.wave}_resident_orchestrator_${runTag}`.replace(
|
|
2016
|
+
/[^a-zA-Z0-9_-]/g,
|
|
2017
|
+
"_",
|
|
2018
|
+
);
|
|
2019
|
+
return {
|
|
2020
|
+
run: {
|
|
2021
|
+
agent,
|
|
2022
|
+
sessionName,
|
|
2023
|
+
promptPath: path.join(lanePaths.promptsDir, `${baseName}.prompt.md`),
|
|
2024
|
+
logPath: path.join(lanePaths.logsDir, `${baseName}.log`),
|
|
2025
|
+
statusPath: path.join(lanePaths.statusDir, `${baseName}.status`),
|
|
2026
|
+
promptOverride: buildResidentOrchestratorPrompt({
|
|
2027
|
+
lane: lanePaths.lane,
|
|
2028
|
+
wave: wave.wave,
|
|
2029
|
+
waveFile: wave.file,
|
|
2030
|
+
orchestratorId: options.orchestratorId,
|
|
2031
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
2032
|
+
messageBoardPath: derivedState.messageBoardPath,
|
|
2033
|
+
sharedSummaryPath: derivedState.sharedSummaryPath,
|
|
2034
|
+
dashboardPath,
|
|
2035
|
+
triagePath: derivedState.clarificationTriage?.triagePath || null,
|
|
2036
|
+
rolePrompt: agent.prompt,
|
|
2037
|
+
}),
|
|
2038
|
+
},
|
|
2039
|
+
skipReason: "",
|
|
2040
|
+
};
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
function monitorResidentOrchestratorSession({
|
|
2044
|
+
lanePaths,
|
|
2045
|
+
run,
|
|
2046
|
+
waveNumber,
|
|
2047
|
+
recordCombinedEvent,
|
|
2048
|
+
appendCoordination,
|
|
2049
|
+
sessionState,
|
|
2050
|
+
}) {
|
|
2051
|
+
if (!run || sessionState?.closed === true) {
|
|
2052
|
+
return false;
|
|
2053
|
+
}
|
|
2054
|
+
if (fs.existsSync(run.statusPath)) {
|
|
2055
|
+
sessionState.closed = true;
|
|
2056
|
+
const exitCode = readStatusCodeIfPresent(run.statusPath);
|
|
2057
|
+
recordCombinedEvent({
|
|
2058
|
+
level: exitCode === 0 ? "info" : "warn",
|
|
2059
|
+
agentId: run.agent.agentId,
|
|
2060
|
+
message:
|
|
2061
|
+
exitCode === 0
|
|
2062
|
+
? "Resident orchestrator exited; launcher continues as the control plane."
|
|
2063
|
+
: `Resident orchestrator exited with code ${exitCode}; launcher continues as the control plane.`,
|
|
2064
|
+
});
|
|
2065
|
+
appendCoordination({
|
|
2066
|
+
event: "resident_orchestrator_exit",
|
|
2067
|
+
waves: [waveNumber],
|
|
2068
|
+
status: exitCode === 0 ? "resolved" : "warn",
|
|
2069
|
+
details:
|
|
2070
|
+
exitCode === 0
|
|
2071
|
+
? "Resident orchestrator session ended before wave completion."
|
|
2072
|
+
: `Resident orchestrator session ended with code ${exitCode} before wave completion.`,
|
|
2073
|
+
actionRequested: "None",
|
|
2074
|
+
});
|
|
2075
|
+
return true;
|
|
2076
|
+
}
|
|
2077
|
+
const activeSessions = new Set(listLaneTmuxSessionNames(lanePaths));
|
|
2078
|
+
if (!activeSessions.has(run.sessionName)) {
|
|
2079
|
+
sessionState.closed = true;
|
|
2080
|
+
recordCombinedEvent({
|
|
2081
|
+
level: "warn",
|
|
2082
|
+
agentId: run.agent.agentId,
|
|
2083
|
+
message:
|
|
2084
|
+
"Resident orchestrator session disappeared before writing a status file; launcher continues as the control plane.",
|
|
2085
|
+
});
|
|
2086
|
+
appendCoordination({
|
|
2087
|
+
event: "resident_orchestrator_missing",
|
|
2088
|
+
waves: [waveNumber],
|
|
2089
|
+
status: "warn",
|
|
2090
|
+
details: `tmux session ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
|
|
2091
|
+
actionRequested: "None",
|
|
2092
|
+
});
|
|
2093
|
+
return true;
|
|
2094
|
+
}
|
|
2095
|
+
return false;
|
|
2096
|
+
}
|
|
2097
|
+
|
|
1852
2098
|
function isWaveDashboardBackedByLiveSession(lanePaths, dashboardPath, activeSessionNames) {
|
|
1853
2099
|
const waveMatch = path.basename(dashboardPath).match(/^wave-(\d+)\.json$/);
|
|
1854
2100
|
if (!waveMatch) {
|
|
@@ -2107,12 +2353,14 @@ function monitorWaveHumanFeedback({
|
|
|
2107
2353
|
agentIds: agentRuns.map((run) => run.agent.agentId),
|
|
2108
2354
|
orchestratorId,
|
|
2109
2355
|
});
|
|
2356
|
+
let changed = false;
|
|
2110
2357
|
for (const request of requests) {
|
|
2111
2358
|
const signature = feedbackStateSignature(request);
|
|
2112
2359
|
if (feedbackStateByRequestId.get(request.id) === signature) {
|
|
2113
2360
|
continue;
|
|
2114
2361
|
}
|
|
2115
2362
|
feedbackStateByRequestId.set(request.id, signature);
|
|
2363
|
+
changed = true;
|
|
2116
2364
|
const question = request.question || "n/a";
|
|
2117
2365
|
const context = request.context ? `; context=${request.context}` : "";
|
|
2118
2366
|
const responseOperator = request.responseOperator || "human-operator";
|
|
@@ -2127,14 +2375,14 @@ function monitorWaveHumanFeedback({
|
|
|
2127
2375
|
`[human-feedback] wave=${waveNumber} agent=${request.agentId} request=${request.id} pending: ${question}`,
|
|
2128
2376
|
);
|
|
2129
2377
|
console.warn(
|
|
2130
|
-
`[human-feedback] respond with: pnpm exec wave
|
|
2378
|
+
`[human-feedback] respond with: pnpm exec wave control task act answer --lane ${lanePaths.lane} --wave ${waveNumber} --id ${request.id} --response "<answer>" --operator "<name>"`,
|
|
2131
2379
|
);
|
|
2132
2380
|
appendCoordination({
|
|
2133
2381
|
event: "human_feedback_requested",
|
|
2134
2382
|
waves: [waveNumber],
|
|
2135
2383
|
status: "waiting-human",
|
|
2136
2384
|
details: `request_id=${request.id}; agent=${request.agentId}; question=${question}${context}`,
|
|
2137
|
-
actionRequested: `Launcher operator should ask or answer in the parent session, then run: pnpm exec wave
|
|
2385
|
+
actionRequested: `Launcher operator should ask or answer in the parent session, then run: pnpm exec wave control task act answer --lane ${lanePaths.lane} --wave ${waveNumber} --id ${request.id} --response "<answer>" --operator "<name>"`,
|
|
2138
2386
|
});
|
|
2139
2387
|
if (coordinationLogPath) {
|
|
2140
2388
|
appendCoordinationRecord(coordinationLogPath, {
|
|
@@ -2225,6 +2473,7 @@ function monitorWaveHumanFeedback({
|
|
|
2225
2473
|
}
|
|
2226
2474
|
}
|
|
2227
2475
|
}
|
|
2476
|
+
return changed;
|
|
2228
2477
|
}
|
|
2229
2478
|
|
|
2230
2479
|
function proofCentricReuseBlocked(derivedState) {
|
|
@@ -2238,6 +2487,33 @@ function proofCentricReuseBlocked(derivedState) {
|
|
|
2238
2487
|
);
|
|
2239
2488
|
}
|
|
2240
2489
|
|
|
2490
|
+
function sameAgentIdSet(left = [], right = []) {
|
|
2491
|
+
const leftIds = Array.from(new Set((left || []).filter(Boolean))).toSorted();
|
|
2492
|
+
const rightIds = Array.from(new Set((right || []).filter(Boolean))).toSorted();
|
|
2493
|
+
return leftIds.length === rightIds.length && leftIds.every((agentId, index) => agentId === rightIds[index]);
|
|
2494
|
+
}
|
|
2495
|
+
|
|
2496
|
+
export function persistedRelaunchPlanMatchesCurrentState(
|
|
2497
|
+
agentRuns,
|
|
2498
|
+
persistedPlan,
|
|
2499
|
+
lanePaths,
|
|
2500
|
+
waveDefinition,
|
|
2501
|
+
) {
|
|
2502
|
+
if (!persistedPlan || !Array.isArray(persistedPlan.selectedAgentIds)) {
|
|
2503
|
+
return false;
|
|
2504
|
+
}
|
|
2505
|
+
const componentGate = readWaveComponentGate(waveDefinition, agentRuns, {
|
|
2506
|
+
laneProfile: lanePaths?.laneProfile,
|
|
2507
|
+
});
|
|
2508
|
+
if (componentGate?.statusCode !== "shared-component-sibling-pending") {
|
|
2509
|
+
return true;
|
|
2510
|
+
}
|
|
2511
|
+
return sameAgentIdSet(
|
|
2512
|
+
persistedPlan.selectedAgentIds,
|
|
2513
|
+
componentGate.waitingOnAgentIds || [],
|
|
2514
|
+
);
|
|
2515
|
+
}
|
|
2516
|
+
|
|
2241
2517
|
function applyPersistedRelaunchPlan(agentRuns, persistedPlan, lanePaths, waveDefinition) {
|
|
2242
2518
|
if (!persistedPlan || !Array.isArray(persistedPlan.selectedAgentIds)) {
|
|
2243
2519
|
return [];
|
|
@@ -2256,6 +2532,42 @@ function applyPersistedRelaunchPlan(agentRuns, persistedPlan, lanePaths, waveDef
|
|
|
2256
2532
|
.filter(Boolean);
|
|
2257
2533
|
}
|
|
2258
2534
|
|
|
2535
|
+
export function resolveSharedComponentContinuationRuns(
|
|
2536
|
+
currentRuns,
|
|
2537
|
+
agentRuns,
|
|
2538
|
+
failures,
|
|
2539
|
+
derivedState,
|
|
2540
|
+
lanePaths,
|
|
2541
|
+
waveDefinition = null,
|
|
2542
|
+
) {
|
|
2543
|
+
if (!Array.isArray(currentRuns) || currentRuns.length === 0 || !Array.isArray(failures) || failures.length === 0) {
|
|
2544
|
+
return [];
|
|
2545
|
+
}
|
|
2546
|
+
if (!failures.every((failure) => failure.statusCode === "shared-component-sibling-pending")) {
|
|
2547
|
+
return [];
|
|
2548
|
+
}
|
|
2549
|
+
const currentRunIds = new Set(currentRuns.map((run) => run.agent.agentId));
|
|
2550
|
+
const waitingAgentIds = new Set(
|
|
2551
|
+
failures.flatMap((failure) => failure.waitingOnAgentIds || []).filter(Boolean),
|
|
2552
|
+
);
|
|
2553
|
+
if (Array.from(currentRunIds).some((agentId) => waitingAgentIds.has(agentId))) {
|
|
2554
|
+
return [];
|
|
2555
|
+
}
|
|
2556
|
+
const relaunchResolution = resolveRelaunchRuns(
|
|
2557
|
+
agentRuns,
|
|
2558
|
+
failures,
|
|
2559
|
+
derivedState,
|
|
2560
|
+
lanePaths,
|
|
2561
|
+
waveDefinition,
|
|
2562
|
+
);
|
|
2563
|
+
if (relaunchResolution.barrier || relaunchResolution.runs.length === 0) {
|
|
2564
|
+
return [];
|
|
2565
|
+
}
|
|
2566
|
+
return relaunchResolution.runs.some((run) => !currentRunIds.has(run.agent.agentId))
|
|
2567
|
+
? relaunchResolution.runs
|
|
2568
|
+
: [];
|
|
2569
|
+
}
|
|
2570
|
+
|
|
2259
2571
|
function relaunchReasonBuckets(runs, failures, derivedState) {
|
|
2260
2572
|
const selectedAgentIds = new Set((runs || []).map((run) => run.agent.agentId));
|
|
2261
2573
|
return {
|
|
@@ -2361,7 +2673,10 @@ export function hasReusableSuccessStatus(agent, statusPath, options = {}) {
|
|
|
2361
2673
|
if (!summary) {
|
|
2362
2674
|
return false;
|
|
2363
2675
|
}
|
|
2364
|
-
|
|
2676
|
+
const effectiveSummary = options.proofRegistry
|
|
2677
|
+
? augmentSummaryWithProofRegistry(agent, summary, options.proofRegistry)
|
|
2678
|
+
: summary;
|
|
2679
|
+
if (!validateImplementationSummary(agent, effectiveSummary).ok) {
|
|
2365
2680
|
return false;
|
|
2366
2681
|
}
|
|
2367
2682
|
if (proofCentricReuseBlocked(options.derivedState)) {
|
|
@@ -2379,6 +2694,28 @@ function isClosureAgentId(agent, lanePaths) {
|
|
|
2379
2694
|
].includes(agent?.agentId) || isSecurityReviewAgent(agent);
|
|
2380
2695
|
}
|
|
2381
2696
|
|
|
2697
|
+
export function selectReusablePreCompletedAgentIds(
|
|
2698
|
+
agentRuns,
|
|
2699
|
+
lanePaths,
|
|
2700
|
+
{ retryOverride = null, wave = null, derivedState = null, proofRegistry = null } = {},
|
|
2701
|
+
) {
|
|
2702
|
+
const retryOverrideClearedAgentIds = new Set(retryOverride?.clearReusableAgentIds || []);
|
|
2703
|
+
return new Set(
|
|
2704
|
+
(agentRuns || [])
|
|
2705
|
+
.filter(
|
|
2706
|
+
(run) =>
|
|
2707
|
+
!retryOverrideClearedAgentIds.has(run.agent.agentId) &&
|
|
2708
|
+
!isClosureAgentId(run.agent, lanePaths) &&
|
|
2709
|
+
hasReusableSuccessStatus(run.agent, run.statusPath, {
|
|
2710
|
+
wave,
|
|
2711
|
+
derivedState,
|
|
2712
|
+
proofRegistry,
|
|
2713
|
+
}),
|
|
2714
|
+
)
|
|
2715
|
+
.map((run) => run.agent.agentId),
|
|
2716
|
+
);
|
|
2717
|
+
}
|
|
2718
|
+
|
|
2382
2719
|
export function selectInitialWaveRuns(agentRuns, lanePaths) {
|
|
2383
2720
|
const implementationRuns = (agentRuns || []).filter(
|
|
2384
2721
|
(run) => !isClosureAgentId(run?.agent, lanePaths),
|
|
@@ -3008,10 +3345,12 @@ export async function runLauncherCli(argv) {
|
|
|
3008
3345
|
ensureDirectory(lanePaths.messageboardsDir);
|
|
3009
3346
|
ensureDirectory(lanePaths.dashboardsDir);
|
|
3010
3347
|
ensureDirectory(lanePaths.coordinationDir);
|
|
3348
|
+
ensureDirectory(lanePaths.controlDir);
|
|
3011
3349
|
ensureDirectory(lanePaths.assignmentsDir);
|
|
3012
3350
|
ensureDirectory(lanePaths.inboxesDir);
|
|
3013
3351
|
ensureDirectory(lanePaths.ledgerDir);
|
|
3014
3352
|
ensureDirectory(lanePaths.integrationDir);
|
|
3353
|
+
ensureDirectory(lanePaths.proofDir);
|
|
3015
3354
|
ensureDirectory(lanePaths.securityDir);
|
|
3016
3355
|
ensureDirectory(lanePaths.dependencySnapshotsDir);
|
|
3017
3356
|
ensureDirectory(lanePaths.docsQueueDir);
|
|
@@ -3330,6 +3669,8 @@ export async function runLauncherCli(argv) {
|
|
|
3330
3669
|
let dashboardState = null;
|
|
3331
3670
|
let terminalEntries = [];
|
|
3332
3671
|
let terminalsAppended = false;
|
|
3672
|
+
let residentOrchestratorRun = null;
|
|
3673
|
+
const residentOrchestratorState = { closed: false };
|
|
3333
3674
|
|
|
3334
3675
|
const flushDashboards = () => {
|
|
3335
3676
|
if (!dashboardState) {
|
|
@@ -3353,6 +3694,13 @@ export async function runLauncherCli(argv) {
|
|
|
3353
3694
|
message: `${globalMessagePrefix}${message}`,
|
|
3354
3695
|
});
|
|
3355
3696
|
};
|
|
3697
|
+
const flushWaveControlTelemetry = async () => {
|
|
3698
|
+
try {
|
|
3699
|
+
await flushWaveControlQueue(lanePaths);
|
|
3700
|
+
} catch {
|
|
3701
|
+
// Remote telemetry delivery is best-effort only.
|
|
3702
|
+
}
|
|
3703
|
+
};
|
|
3356
3704
|
|
|
3357
3705
|
try {
|
|
3358
3706
|
terminalEntries = createTemporaryTerminalEntries(
|
|
@@ -3392,6 +3740,10 @@ export async function runLauncherCli(argv) {
|
|
|
3392
3740
|
});
|
|
3393
3741
|
|
|
3394
3742
|
const refreshDerivedState = (attemptNumber = 0) => {
|
|
3743
|
+
const proofRegistry = readWaveProofRegistry(lanePaths, wave.wave);
|
|
3744
|
+
for (const run of agentRuns) {
|
|
3745
|
+
run.proofRegistry = proofRegistry;
|
|
3746
|
+
}
|
|
3395
3747
|
const summariesByAgentId = Object.fromEntries(
|
|
3396
3748
|
agentRuns
|
|
3397
3749
|
.map((run) => [run.agent.agentId, readRunExecutionSummary(run, wave)])
|
|
@@ -3425,7 +3777,8 @@ export async function runLauncherCli(argv) {
|
|
|
3425
3777
|
};
|
|
3426
3778
|
|
|
3427
3779
|
refreshDerivedState(0);
|
|
3428
|
-
|
|
3780
|
+
let persistedRelaunchPlan = readWaveRelaunchPlan(lanePaths, wave.wave);
|
|
3781
|
+
let retryOverride = readWaveRetryOverride(lanePaths, wave.wave);
|
|
3429
3782
|
|
|
3430
3783
|
dashboardState = buildWaveDashboardState({
|
|
3431
3784
|
lane: lanePaths.lane,
|
|
@@ -3437,19 +3790,77 @@ export async function runLauncherCli(argv) {
|
|
|
3437
3790
|
agentRuns,
|
|
3438
3791
|
});
|
|
3439
3792
|
applyDerivedStateToDashboard(dashboardState, derivedState);
|
|
3793
|
+
const feedbackStateByRequestId = new Map();
|
|
3794
|
+
const coordinationAlertState = {
|
|
3795
|
+
overdueAckSignature: "",
|
|
3796
|
+
overdueClarificationSignature: "",
|
|
3797
|
+
};
|
|
3798
|
+
let lastLiveCoordinationRefreshAt = 0;
|
|
3799
|
+
const emitCoordinationAlertEvents = (currentDerivedState = derivedState) => {
|
|
3800
|
+
const responseMetrics =
|
|
3801
|
+
currentDerivedState?.responseMetrics ||
|
|
3802
|
+
buildCoordinationResponseMetrics(currentDerivedState?.coordinationState);
|
|
3803
|
+
const overdueAckSignature = (responseMetrics?.overdueAckRecordIds || []).join(",");
|
|
3804
|
+
if (
|
|
3805
|
+
overdueAckSignature &&
|
|
3806
|
+
overdueAckSignature !== coordinationAlertState.overdueAckSignature
|
|
3807
|
+
) {
|
|
3808
|
+
recordCombinedEvent({
|
|
3809
|
+
level: "warn",
|
|
3810
|
+
message: `Overdue acknowledgements in coordination state: ${overdueAckSignature}.`,
|
|
3811
|
+
});
|
|
3812
|
+
appendCoordination({
|
|
3813
|
+
event: "coordination_ack_overdue",
|
|
3814
|
+
waves: [wave.wave],
|
|
3815
|
+
status: "warn",
|
|
3816
|
+
details: `records=${overdueAckSignature}; ack_timeout_ms=${DEFAULT_COORDINATION_ACK_TIMEOUT_MS}`,
|
|
3817
|
+
actionRequested:
|
|
3818
|
+
"Assigned owners should acknowledge, resolve, or reroute the targeted coordination items.",
|
|
3819
|
+
});
|
|
3820
|
+
}
|
|
3821
|
+
coordinationAlertState.overdueAckSignature = overdueAckSignature;
|
|
3822
|
+
const overdueClarificationSignature = (responseMetrics?.overdueClarificationIds || []).join(
|
|
3823
|
+
",",
|
|
3824
|
+
);
|
|
3825
|
+
if (
|
|
3826
|
+
overdueClarificationSignature &&
|
|
3827
|
+
overdueClarificationSignature !== coordinationAlertState.overdueClarificationSignature
|
|
3828
|
+
) {
|
|
3829
|
+
recordCombinedEvent({
|
|
3830
|
+
level: "warn",
|
|
3831
|
+
message: `Stale clarification chains remain open: ${overdueClarificationSignature}.`,
|
|
3832
|
+
});
|
|
3833
|
+
appendCoordination({
|
|
3834
|
+
event: "clarification_chain_stale",
|
|
3835
|
+
waves: [wave.wave],
|
|
3836
|
+
status: "warn",
|
|
3837
|
+
details: `clarifications=${overdueClarificationSignature}`,
|
|
3838
|
+
actionRequested:
|
|
3839
|
+
"The orchestrator should reroute, resolve, or escalate the stale clarification chain.",
|
|
3840
|
+
});
|
|
3841
|
+
}
|
|
3842
|
+
coordinationAlertState.overdueClarificationSignature = overdueClarificationSignature;
|
|
3843
|
+
};
|
|
3844
|
+
const refreshActiveCoordinationState = (attemptNumber = 0, { force = false } = {}) => {
|
|
3845
|
+
const nowMs = Date.now();
|
|
3846
|
+
if (!force && nowMs - lastLiveCoordinationRefreshAt < DEFAULT_LIVE_COORDINATION_REFRESH_MS) {
|
|
3847
|
+
return false;
|
|
3848
|
+
}
|
|
3849
|
+
refreshDerivedState(attemptNumber);
|
|
3850
|
+
lastLiveCoordinationRefreshAt = nowMs;
|
|
3851
|
+
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
3852
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3853
|
+
flushDashboards();
|
|
3854
|
+
return true;
|
|
3855
|
+
};
|
|
3440
3856
|
|
|
3441
|
-
const
|
|
3442
|
-
|
|
3443
|
-
|
|
3444
|
-
|
|
3445
|
-
|
|
3446
|
-
|
|
3447
|
-
|
|
3448
|
-
derivedState,
|
|
3449
|
-
}),
|
|
3450
|
-
)
|
|
3451
|
-
.map((run) => run.agent.agentId),
|
|
3452
|
-
);
|
|
3857
|
+
const proofRegistryForReuse = readWaveProofRegistry(lanePaths, wave.wave);
|
|
3858
|
+
const preCompletedAgentIds = selectReusablePreCompletedAgentIds(agentRuns, lanePaths, {
|
|
3859
|
+
retryOverride,
|
|
3860
|
+
wave,
|
|
3861
|
+
derivedState,
|
|
3862
|
+
proofRegistry: proofRegistryForReuse,
|
|
3863
|
+
});
|
|
3453
3864
|
for (const agentId of preCompletedAgentIds) {
|
|
3454
3865
|
setWaveDashboardAgent(dashboardState, agentId, {
|
|
3455
3866
|
state: "completed",
|
|
@@ -3472,6 +3883,7 @@ export async function runLauncherCli(argv) {
|
|
|
3472
3883
|
});
|
|
3473
3884
|
}
|
|
3474
3885
|
flushDashboards();
|
|
3886
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3475
3887
|
|
|
3476
3888
|
if (options.dashboard && currentWaveDashboardTerminalEntry) {
|
|
3477
3889
|
launchWaveDashboardSession(lanePaths, {
|
|
@@ -3481,28 +3893,162 @@ export async function runLauncherCli(argv) {
|
|
|
3481
3893
|
});
|
|
3482
3894
|
}
|
|
3483
3895
|
|
|
3896
|
+
if (options.residentOrchestrator) {
|
|
3897
|
+
const residentSetup = buildResidentOrchestratorRun({
|
|
3898
|
+
lanePaths,
|
|
3899
|
+
wave,
|
|
3900
|
+
agentRuns,
|
|
3901
|
+
derivedState,
|
|
3902
|
+
dashboardPath,
|
|
3903
|
+
runTag,
|
|
3904
|
+
options,
|
|
3905
|
+
});
|
|
3906
|
+
if (residentSetup.skipReason) {
|
|
3907
|
+
recordCombinedEvent({
|
|
3908
|
+
level: "warn",
|
|
3909
|
+
message: residentSetup.skipReason,
|
|
3910
|
+
});
|
|
3911
|
+
} else if (residentSetup.run) {
|
|
3912
|
+
residentOrchestratorRun = residentSetup.run;
|
|
3913
|
+
const launchResult = await launchAgentSession(lanePaths, {
|
|
3914
|
+
wave: wave.wave,
|
|
3915
|
+
waveDefinition: wave,
|
|
3916
|
+
agent: residentOrchestratorRun.agent,
|
|
3917
|
+
sessionName: residentOrchestratorRun.sessionName,
|
|
3918
|
+
promptPath: residentOrchestratorRun.promptPath,
|
|
3919
|
+
logPath: residentOrchestratorRun.logPath,
|
|
3920
|
+
statusPath: residentOrchestratorRun.statusPath,
|
|
3921
|
+
messageBoardPath: derivedState.messageBoardPath,
|
|
3922
|
+
messageBoardSnapshot: derivedState.messageBoardText,
|
|
3923
|
+
sharedSummaryPath: derivedState.sharedSummaryPath,
|
|
3924
|
+
sharedSummaryText: derivedState.sharedSummaryText,
|
|
3925
|
+
inboxPath: null,
|
|
3926
|
+
inboxText: "",
|
|
3927
|
+
promptOverride: residentOrchestratorRun.promptOverride,
|
|
3928
|
+
orchestratorId: options.orchestratorId,
|
|
3929
|
+
agentRateLimitRetries: options.agentRateLimitRetries,
|
|
3930
|
+
agentRateLimitBaseDelaySeconds: options.agentRateLimitBaseDelaySeconds,
|
|
3931
|
+
agentRateLimitMaxDelaySeconds: options.agentRateLimitMaxDelaySeconds,
|
|
3932
|
+
context7Enabled: options.context7Enabled,
|
|
3933
|
+
});
|
|
3934
|
+
residentOrchestratorRun.lastPromptHash = launchResult?.promptHash || null;
|
|
3935
|
+
residentOrchestratorRun.lastExecutorId =
|
|
3936
|
+
launchResult?.executorId || residentOrchestratorRun.agent.executorResolved?.id || null;
|
|
3937
|
+
recordCombinedEvent({
|
|
3938
|
+
agentId: residentOrchestratorRun.agent.agentId,
|
|
3939
|
+
message: `Resident orchestrator launched in tmux session ${residentOrchestratorRun.sessionName}`,
|
|
3940
|
+
});
|
|
3941
|
+
appendCoordination({
|
|
3942
|
+
event: "resident_orchestrator_start",
|
|
3943
|
+
waves: [wave.wave],
|
|
3944
|
+
status: "running",
|
|
3945
|
+
details: `session=${residentOrchestratorRun.sessionName}; executor=${residentOrchestratorRun.lastExecutorId || "unknown"}`,
|
|
3946
|
+
actionRequested: "None",
|
|
3947
|
+
});
|
|
3948
|
+
}
|
|
3949
|
+
}
|
|
3950
|
+
|
|
3484
3951
|
const availableRuns = agentRuns.filter((run) => !preCompletedAgentIds.has(run.agent.agentId));
|
|
3952
|
+
if (
|
|
3953
|
+
persistedRelaunchPlan &&
|
|
3954
|
+
!persistedRelaunchPlanMatchesCurrentState(
|
|
3955
|
+
agentRuns,
|
|
3956
|
+
persistedRelaunchPlan,
|
|
3957
|
+
lanePaths,
|
|
3958
|
+
wave,
|
|
3959
|
+
)
|
|
3960
|
+
) {
|
|
3961
|
+
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
3962
|
+
persistedRelaunchPlan = null;
|
|
3963
|
+
}
|
|
3485
3964
|
const persistedRuns = applyPersistedRelaunchPlan(
|
|
3486
3965
|
availableRuns,
|
|
3487
3966
|
persistedRelaunchPlan,
|
|
3488
3967
|
lanePaths,
|
|
3489
3968
|
wave,
|
|
3490
3969
|
);
|
|
3970
|
+
const overrideRuns = resolveRetryOverrideRuns(availableRuns, retryOverride, lanePaths, wave);
|
|
3971
|
+
if (overrideRuns.unknownAgentIds.length > 0) {
|
|
3972
|
+
appendCoordination({
|
|
3973
|
+
event: "retry_override_invalid",
|
|
3974
|
+
waves: [wave.wave],
|
|
3975
|
+
status: "warn",
|
|
3976
|
+
details: `unknown_agents=${overrideRuns.unknownAgentIds.join(",")}`,
|
|
3977
|
+
actionRequested:
|
|
3978
|
+
"Retry override references agent ids that do not exist in the current wave definition.",
|
|
3979
|
+
});
|
|
3980
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
3981
|
+
retryOverride = null;
|
|
3982
|
+
}
|
|
3491
3983
|
let runsToLaunch =
|
|
3492
|
-
|
|
3984
|
+
overrideRuns.unknownAgentIds.length === 0 && overrideRuns.runs.length > 0
|
|
3985
|
+
? overrideRuns.runs
|
|
3986
|
+
: persistedRuns.length > 0
|
|
3987
|
+
? persistedRuns
|
|
3988
|
+
: selectInitialWaveRuns(availableRuns, lanePaths);
|
|
3989
|
+
if (overrideRuns.runs.length > 0) {
|
|
3990
|
+
appendCoordination({
|
|
3991
|
+
event: "retry_override_applied",
|
|
3992
|
+
waves: [wave.wave],
|
|
3993
|
+
status: "running",
|
|
3994
|
+
details: `agents=${overrideRuns.selectedAgentIds.join(",")}; requested_by=${retryOverride?.requestedBy || "human-operator"}`,
|
|
3995
|
+
actionRequested: "None",
|
|
3996
|
+
});
|
|
3997
|
+
if (retryOverride?.applyOnce !== false) {
|
|
3998
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
3999
|
+
retryOverride = null;
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
3493
4002
|
let attempt = 1;
|
|
3494
|
-
|
|
4003
|
+
let traceAttempt = 1;
|
|
3495
4004
|
let completionGateSnapshot = null;
|
|
3496
4005
|
let completionTraceDir = null;
|
|
4006
|
+
const recordAttemptState = (attemptNumber, state, data = {}) =>
|
|
4007
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4008
|
+
entityType: "attempt",
|
|
4009
|
+
entityId: `wave-${wave.wave}-attempt-${attemptNumber}`,
|
|
4010
|
+
action: state,
|
|
4011
|
+
source: "launcher",
|
|
4012
|
+
actor: "launcher",
|
|
4013
|
+
data: {
|
|
4014
|
+
attemptId: `wave-${wave.wave}-attempt-${attemptNumber}`,
|
|
4015
|
+
attemptNumber,
|
|
4016
|
+
state,
|
|
4017
|
+
selectedAgentIds: data.selectedAgentIds || [],
|
|
4018
|
+
detail: data.detail || null,
|
|
4019
|
+
updatedAt: toIsoTimestamp(),
|
|
4020
|
+
...(data.createdAt ? { createdAt: data.createdAt } : {}),
|
|
4021
|
+
},
|
|
4022
|
+
});
|
|
4023
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4024
|
+
entityType: "wave_run",
|
|
4025
|
+
entityId: `wave-${wave.wave}`,
|
|
4026
|
+
action: "started",
|
|
4027
|
+
source: "launcher",
|
|
4028
|
+
actor: "launcher",
|
|
4029
|
+
data: {
|
|
4030
|
+
waveId: `wave-${wave.wave}`,
|
|
4031
|
+
waveNumber: wave.wave,
|
|
4032
|
+
agentIds: wave.agents.map((agent) => agent.agentId),
|
|
4033
|
+
runVariant: lanePaths.runVariant || "live",
|
|
4034
|
+
},
|
|
4035
|
+
});
|
|
3497
4036
|
|
|
3498
4037
|
while (attempt <= options.maxRetriesPerWave + 1) {
|
|
3499
4038
|
refreshDerivedState(attempt - 1);
|
|
4039
|
+
lastLiveCoordinationRefreshAt = Date.now();
|
|
3500
4040
|
dashboardState.attempt = attempt;
|
|
3501
4041
|
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
4042
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3502
4043
|
flushDashboards();
|
|
3503
4044
|
recordCombinedEvent({
|
|
3504
4045
|
message: `Attempt ${attempt}/${options.maxRetriesPerWave + 1}; launching agents: ${runsToLaunch.map((run) => run.agent.agentId).join(", ") || "none"}`,
|
|
3505
4046
|
});
|
|
4047
|
+
recordAttemptState(attempt, "running", {
|
|
4048
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4049
|
+
detail: `Launching ${runsToLaunch.map((run) => run.agent.agentId).join(", ") || "no"} agents.`,
|
|
4050
|
+
createdAt: toIsoTimestamp(),
|
|
4051
|
+
});
|
|
3506
4052
|
|
|
3507
4053
|
const launchedImplementationRuns = runsToLaunch.filter(
|
|
3508
4054
|
(run) =>
|
|
@@ -3593,6 +4139,23 @@ export async function runLauncherCli(argv) {
|
|
|
3593
4139
|
state: "running",
|
|
3594
4140
|
detail: "Session launched",
|
|
3595
4141
|
});
|
|
4142
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4143
|
+
entityType: "agent_run",
|
|
4144
|
+
entityId: `wave-${wave.wave}-attempt-${attempt}-agent-${runInfo.agent.agentId}`,
|
|
4145
|
+
action: "started",
|
|
4146
|
+
source: "launcher",
|
|
4147
|
+
actor: runInfo.agent.agentId,
|
|
4148
|
+
attempt,
|
|
4149
|
+
data: {
|
|
4150
|
+
agentId: runInfo.agent.agentId,
|
|
4151
|
+
attemptNumber: attempt,
|
|
4152
|
+
sessionName: runInfo.sessionName,
|
|
4153
|
+
executorId: runInfo.lastExecutorId,
|
|
4154
|
+
promptPath: path.relative(REPO_ROOT, runInfo.promptPath),
|
|
4155
|
+
statusPath: path.relative(REPO_ROOT, runInfo.statusPath),
|
|
4156
|
+
logPath: path.relative(REPO_ROOT, runInfo.logPath),
|
|
4157
|
+
},
|
|
4158
|
+
});
|
|
3596
4159
|
recordCombinedEvent({
|
|
3597
4160
|
agentId: runInfo.agent.agentId,
|
|
3598
4161
|
message: `Launched in tmux session ${runInfo.sessionName}`,
|
|
@@ -3627,7 +4190,7 @@ export async function runLauncherCli(argv) {
|
|
|
3627
4190
|
pendingAgentIds,
|
|
3628
4191
|
(event) => recordCombinedEvent(event),
|
|
3629
4192
|
);
|
|
3630
|
-
monitorWaveHumanFeedback({
|
|
4193
|
+
const feedbackChanged = monitorWaveHumanFeedback({
|
|
3631
4194
|
lanePaths,
|
|
3632
4195
|
waveNumber: wave.wave,
|
|
3633
4196
|
agentRuns: runsToLaunch,
|
|
@@ -3637,8 +4200,21 @@ export async function runLauncherCli(argv) {
|
|
|
3637
4200
|
recordCombinedEvent,
|
|
3638
4201
|
appendCoordination,
|
|
3639
4202
|
});
|
|
3640
|
-
|
|
3641
|
-
|
|
4203
|
+
const residentChanged = monitorResidentOrchestratorSession({
|
|
4204
|
+
lanePaths,
|
|
4205
|
+
run: residentOrchestratorRun,
|
|
4206
|
+
waveNumber: wave.wave,
|
|
4207
|
+
recordCombinedEvent,
|
|
4208
|
+
appendCoordination,
|
|
4209
|
+
sessionState: residentOrchestratorState,
|
|
4210
|
+
});
|
|
4211
|
+
const refreshed = refreshActiveCoordinationState(attempt, {
|
|
4212
|
+
force: feedbackChanged || residentChanged,
|
|
4213
|
+
});
|
|
4214
|
+
if (!refreshed) {
|
|
4215
|
+
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
4216
|
+
flushDashboards();
|
|
4217
|
+
}
|
|
3642
4218
|
},
|
|
3643
4219
|
);
|
|
3644
4220
|
failures = waitResult.failures;
|
|
@@ -3646,7 +4222,31 @@ export async function runLauncherCli(argv) {
|
|
|
3646
4222
|
}
|
|
3647
4223
|
|
|
3648
4224
|
materializeAgentExecutionSummaries(wave, agentRuns);
|
|
4225
|
+
for (const runInfo of runsToLaunch) {
|
|
4226
|
+
const statusRecord = readStatusRecordIfPresent(runInfo.statusPath);
|
|
4227
|
+
const action = Number(statusRecord?.code) === 0 ? "completed" : "failed";
|
|
4228
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4229
|
+
entityType: "agent_run",
|
|
4230
|
+
entityId: `wave-${wave.wave}-attempt-${attempt}-agent-${runInfo.agent.agentId}`,
|
|
4231
|
+
action,
|
|
4232
|
+
source: "launcher",
|
|
4233
|
+
actor: runInfo.agent.agentId,
|
|
4234
|
+
attempt,
|
|
4235
|
+
data: {
|
|
4236
|
+
agentId: runInfo.agent.agentId,
|
|
4237
|
+
attemptNumber: attempt,
|
|
4238
|
+
exitCode: statusRecord?.code ?? null,
|
|
4239
|
+
completedAt: statusRecord?.completedAt || null,
|
|
4240
|
+
promptHash: statusRecord?.promptHash || runInfo.lastPromptHash || null,
|
|
4241
|
+
executorId: runInfo.lastExecutorId || null,
|
|
4242
|
+
logPath: path.relative(REPO_ROOT, runInfo.logPath),
|
|
4243
|
+
statusPath: path.relative(REPO_ROOT, runInfo.statusPath),
|
|
4244
|
+
},
|
|
4245
|
+
});
|
|
4246
|
+
}
|
|
3649
4247
|
refreshDerivedState(attempt);
|
|
4248
|
+
lastLiveCoordinationRefreshAt = Date.now();
|
|
4249
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3650
4250
|
failures = reconcileFailuresAgainstSharedComponentState(wave, agentRuns, failures);
|
|
3651
4251
|
for (const failure of failures) {
|
|
3652
4252
|
if (failure.statusCode === "shared-component-sibling-pending") {
|
|
@@ -4075,7 +4675,7 @@ export async function runLauncherCli(argv) {
|
|
|
4075
4675
|
lanePaths,
|
|
4076
4676
|
launcherOptions: options,
|
|
4077
4677
|
wave,
|
|
4078
|
-
attempt,
|
|
4678
|
+
attempt: traceAttempt,
|
|
4079
4679
|
manifest: buildManifest(lanePaths, [wave]),
|
|
4080
4680
|
coordinationLogPath: derivedState.coordinationLogPath,
|
|
4081
4681
|
coordinationState: derivedState.coordinationState,
|
|
@@ -4086,6 +4686,8 @@ export async function runLauncherCli(argv) {
|
|
|
4086
4686
|
securitySummary: derivedState.securitySummary,
|
|
4087
4687
|
integrationSummary: derivedState.integrationSummary,
|
|
4088
4688
|
integrationMarkdownPath: derivedState.integrationMarkdownPath,
|
|
4689
|
+
proofRegistryPath: waveProofRegistryPath(lanePaths, wave.wave),
|
|
4690
|
+
controlPlanePath: path.join(lanePaths.controlPlaneDir, `wave-${wave.wave}.jsonl`),
|
|
4089
4691
|
clarificationTriage: derivedState.clarificationTriage,
|
|
4090
4692
|
agentRuns,
|
|
4091
4693
|
structuredSignals,
|
|
@@ -4102,13 +4704,103 @@ export async function runLauncherCli(argv) {
|
|
|
4102
4704
|
summariesByAgentId,
|
|
4103
4705
|
agentRuns,
|
|
4104
4706
|
gateSnapshot,
|
|
4105
|
-
attempt,
|
|
4707
|
+
attempt: traceAttempt,
|
|
4106
4708
|
coordinationLogPath: derivedState.coordinationLogPath,
|
|
4107
4709
|
}),
|
|
4108
4710
|
});
|
|
4109
4711
|
completionTraceDir = traceDir;
|
|
4712
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4713
|
+
entityType: "gate",
|
|
4714
|
+
entityId: `wave-${wave.wave}-attempt-${attempt}-gate`,
|
|
4715
|
+
action: "evaluated",
|
|
4716
|
+
source: "launcher",
|
|
4717
|
+
actor: "launcher",
|
|
4718
|
+
attempt,
|
|
4719
|
+
data: {
|
|
4720
|
+
attemptNumber: attempt,
|
|
4721
|
+
traceDir: path.relative(REPO_ROOT, traceDir),
|
|
4722
|
+
gateSnapshot,
|
|
4723
|
+
qualitySummary: {
|
|
4724
|
+
contradictionCount: gateSnapshot?.integration?.conflictingClaims?.length || 0,
|
|
4725
|
+
finalRecommendation: derivedState.integrationSummary?.recommendation || "unknown",
|
|
4726
|
+
},
|
|
4727
|
+
},
|
|
4728
|
+
});
|
|
4729
|
+
await flushWaveControlTelemetry();
|
|
4730
|
+
|
|
4731
|
+
const sharedComponentContinuationRuns = resolveSharedComponentContinuationRuns(
|
|
4732
|
+
runsToLaunch,
|
|
4733
|
+
agentRuns,
|
|
4734
|
+
failures,
|
|
4735
|
+
derivedState,
|
|
4736
|
+
lanePaths,
|
|
4737
|
+
wave,
|
|
4738
|
+
);
|
|
4739
|
+
if (sharedComponentContinuationRuns.length > 0) {
|
|
4740
|
+
recordAttemptState(attempt, "completed", {
|
|
4741
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4742
|
+
detail: `Attempt completed; continuing with sibling owners ${sharedComponentContinuationRuns.map((run) => run.agent.agentId).join(", ")}.`,
|
|
4743
|
+
});
|
|
4744
|
+
runsToLaunch = sharedComponentContinuationRuns;
|
|
4745
|
+
const nextAgentIds = runsToLaunch.map((run) => run.agent.agentId);
|
|
4746
|
+
const nextAgentSummary = nextAgentIds.join(", ");
|
|
4747
|
+
recordCombinedEvent({
|
|
4748
|
+
message: `Shared component closure now depends on sibling owners: ${nextAgentSummary}.`,
|
|
4749
|
+
});
|
|
4750
|
+
appendCoordination({
|
|
4751
|
+
event: "wave_shared_component_continue",
|
|
4752
|
+
waves: [wave.wave],
|
|
4753
|
+
status: "running",
|
|
4754
|
+
details: `attempt=${attempt}/${options.maxRetriesPerWave + 1}; next_agents=${nextAgentSummary}`,
|
|
4755
|
+
actionRequested: `Lane ${lanePaths.lane} owners should let the remaining shared-component owners finish their proof before further retries.`,
|
|
4756
|
+
});
|
|
4757
|
+
for (const run of runsToLaunch) {
|
|
4758
|
+
setWaveDashboardAgent(dashboardState, run.agent.agentId, {
|
|
4759
|
+
state: "pending",
|
|
4760
|
+
detail: "Queued for shared component closure",
|
|
4761
|
+
});
|
|
4762
|
+
}
|
|
4763
|
+
writeWaveRelaunchPlan(lanePaths, wave.wave, {
|
|
4764
|
+
wave: wave.wave,
|
|
4765
|
+
attempt,
|
|
4766
|
+
phase: derivedState?.ledger?.phase || null,
|
|
4767
|
+
selectedAgentIds: nextAgentIds,
|
|
4768
|
+
reasonBuckets: relaunchReasonBuckets(runsToLaunch, failures, derivedState),
|
|
4769
|
+
executorStates: Object.fromEntries(
|
|
4770
|
+
runsToLaunch.map((run) => [run.agent.agentId, run.agent.executorResolved || null]),
|
|
4771
|
+
),
|
|
4772
|
+
fallbackHistory: Object.fromEntries(
|
|
4773
|
+
runsToLaunch.map((run) => [
|
|
4774
|
+
run.agent.agentId,
|
|
4775
|
+
run.agent.executorResolved?.executorHistory || [],
|
|
4776
|
+
]),
|
|
4777
|
+
),
|
|
4778
|
+
createdAt: toIsoTimestamp(),
|
|
4779
|
+
});
|
|
4780
|
+
flushDashboards();
|
|
4781
|
+
traceAttempt += 1;
|
|
4782
|
+
continue;
|
|
4783
|
+
}
|
|
4110
4784
|
|
|
4111
4785
|
if (failures.length === 0) {
|
|
4786
|
+
recordAttemptState(attempt, "completed", {
|
|
4787
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4788
|
+
detail: "Wave gates passed for this attempt.",
|
|
4789
|
+
});
|
|
4790
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4791
|
+
entityType: "wave_run",
|
|
4792
|
+
entityId: `wave-${wave.wave}`,
|
|
4793
|
+
action: "completed",
|
|
4794
|
+
source: "launcher",
|
|
4795
|
+
actor: "launcher",
|
|
4796
|
+
data: {
|
|
4797
|
+
waveId: `wave-${wave.wave}`,
|
|
4798
|
+
waveNumber: wave.wave,
|
|
4799
|
+
attempts: attempt,
|
|
4800
|
+
traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
|
|
4801
|
+
gateSnapshot: completionGateSnapshot,
|
|
4802
|
+
},
|
|
4803
|
+
});
|
|
4112
4804
|
dashboardState.status = "completed";
|
|
4113
4805
|
recordCombinedEvent({ message: `Wave ${wave.wave} completed successfully.` });
|
|
4114
4806
|
refreshWaveDashboardAgentStates(dashboardState, agentRuns, new Set(), (event) =>
|
|
@@ -4116,10 +4808,36 @@ export async function runLauncherCli(argv) {
|
|
|
4116
4808
|
);
|
|
4117
4809
|
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
4118
4810
|
flushDashboards();
|
|
4811
|
+
await flushWaveControlTelemetry();
|
|
4119
4812
|
break;
|
|
4120
4813
|
}
|
|
4121
4814
|
|
|
4122
4815
|
if (attempt >= options.maxRetriesPerWave + 1) {
|
|
4816
|
+
recordAttemptState(attempt, "failed", {
|
|
4817
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4818
|
+
detail: failures
|
|
4819
|
+
.map((failure) => `${failure.agentId || "wave"}:${failure.statusCode}`)
|
|
4820
|
+
.join(", "),
|
|
4821
|
+
});
|
|
4822
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4823
|
+
entityType: "wave_run",
|
|
4824
|
+
entityId: `wave-${wave.wave}`,
|
|
4825
|
+
action: "failed",
|
|
4826
|
+
source: "launcher",
|
|
4827
|
+
actor: "launcher",
|
|
4828
|
+
data: {
|
|
4829
|
+
waveId: `wave-${wave.wave}`,
|
|
4830
|
+
waveNumber: wave.wave,
|
|
4831
|
+
attempts: attempt,
|
|
4832
|
+
traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
|
|
4833
|
+
gateSnapshot: completionGateSnapshot,
|
|
4834
|
+
failures: failures.map((failure) => ({
|
|
4835
|
+
agentId: failure.agentId || null,
|
|
4836
|
+
statusCode: failure.statusCode,
|
|
4837
|
+
detail: failure.detail || null,
|
|
4838
|
+
})),
|
|
4839
|
+
},
|
|
4840
|
+
});
|
|
4123
4841
|
dashboardState.status = timedOut ? "timed_out" : "failed";
|
|
4124
4842
|
for (const failure of failures) {
|
|
4125
4843
|
setWaveDashboardAgent(dashboardState, failure.agentId, {
|
|
@@ -4137,6 +4855,7 @@ export async function runLauncherCli(argv) {
|
|
|
4137
4855
|
const error = new Error(
|
|
4138
4856
|
`Wave ${wave.wave} failed after ${attempt} attempt(s):\n${details}`,
|
|
4139
4857
|
);
|
|
4858
|
+
await flushWaveControlTelemetry();
|
|
4140
4859
|
if (
|
|
4141
4860
|
failures.every(
|
|
4142
4861
|
(failure) =>
|
|
@@ -4151,6 +4870,12 @@ export async function runLauncherCli(argv) {
|
|
|
4151
4870
|
|
|
4152
4871
|
const failedAgentIds = new Set(failures.map((failure) => failure.agentId));
|
|
4153
4872
|
const failedList = Array.from(failedAgentIds).join(", ");
|
|
4873
|
+
recordAttemptState(attempt, "failed", {
|
|
4874
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4875
|
+
detail: failures
|
|
4876
|
+
.map((failure) => `${failure.agentId || "wave"}:${failure.statusCode}`)
|
|
4877
|
+
.join(", "),
|
|
4878
|
+
});
|
|
4154
4879
|
console.warn(
|
|
4155
4880
|
`[retry] Wave ${wave.wave} had failures for agents: ${failedList}. Evaluating safe relaunch targets.`,
|
|
4156
4881
|
);
|
|
@@ -4168,7 +4893,38 @@ export async function runLauncherCli(argv) {
|
|
|
4168
4893
|
lanePaths,
|
|
4169
4894
|
wave,
|
|
4170
4895
|
);
|
|
4171
|
-
|
|
4896
|
+
retryOverride = readWaveRetryOverride(lanePaths, wave.wave);
|
|
4897
|
+
const overrideResolution = resolveRetryOverrideRuns(
|
|
4898
|
+
agentRuns,
|
|
4899
|
+
retryOverride,
|
|
4900
|
+
lanePaths,
|
|
4901
|
+
wave,
|
|
4902
|
+
);
|
|
4903
|
+
if (overrideResolution.unknownAgentIds.length > 0) {
|
|
4904
|
+
appendCoordination({
|
|
4905
|
+
event: "retry_override_invalid",
|
|
4906
|
+
waves: [wave.wave],
|
|
4907
|
+
status: "warn",
|
|
4908
|
+
details: `unknown_agents=${overrideResolution.unknownAgentIds.join(",")}`,
|
|
4909
|
+
actionRequested:
|
|
4910
|
+
"Retry override references agent ids that do not exist in the current wave definition.",
|
|
4911
|
+
});
|
|
4912
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
4913
|
+
retryOverride = null;
|
|
4914
|
+
} else if (overrideResolution.runs.length > 0) {
|
|
4915
|
+
runsToLaunch = overrideResolution.runs;
|
|
4916
|
+
appendCoordination({
|
|
4917
|
+
event: "retry_override_applied",
|
|
4918
|
+
waves: [wave.wave],
|
|
4919
|
+
status: "running",
|
|
4920
|
+
details: `agents=${overrideResolution.selectedAgentIds.join(",")}; requested_by=${retryOverride?.requestedBy || "human-operator"}`,
|
|
4921
|
+
actionRequested: "None",
|
|
4922
|
+
});
|
|
4923
|
+
if (retryOverride?.applyOnce !== false) {
|
|
4924
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
4925
|
+
retryOverride = null;
|
|
4926
|
+
}
|
|
4927
|
+
} else if (relaunchResolution.barrier) {
|
|
4172
4928
|
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
4173
4929
|
for (const failure of relaunchResolution.barrier.failures) {
|
|
4174
4930
|
recordCombinedEvent({
|
|
@@ -4194,8 +4950,9 @@ export async function runLauncherCli(argv) {
|
|
|
4194
4950
|
);
|
|
4195
4951
|
error.exitCode = 43;
|
|
4196
4952
|
throw error;
|
|
4953
|
+
} else {
|
|
4954
|
+
runsToLaunch = relaunchResolution.runs;
|
|
4197
4955
|
}
|
|
4198
|
-
runsToLaunch = relaunchResolution.runs;
|
|
4199
4956
|
if (runsToLaunch.length === 0) {
|
|
4200
4957
|
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
4201
4958
|
const error = new Error(
|
|
@@ -4229,6 +4986,7 @@ export async function runLauncherCli(argv) {
|
|
|
4229
4986
|
});
|
|
4230
4987
|
flushDashboards();
|
|
4231
4988
|
attempt += 1;
|
|
4989
|
+
traceAttempt += 1;
|
|
4232
4990
|
}
|
|
4233
4991
|
|
|
4234
4992
|
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
@@ -4256,6 +5014,9 @@ export async function runLauncherCli(argv) {
|
|
|
4256
5014
|
details: `attempts_used=${dashboardState?.attempt ?? "n/a"}; completed_waves=${runState.completedWaves.join(", ") || "none"}`,
|
|
4257
5015
|
});
|
|
4258
5016
|
} finally {
|
|
5017
|
+
if (residentOrchestratorRun) {
|
|
5018
|
+
killTmuxSessionIfExists(lanePaths.tmuxSocketName, residentOrchestratorRun.sessionName);
|
|
5019
|
+
}
|
|
4259
5020
|
if (terminalsAppended && !options.keepTerminals) {
|
|
4260
5021
|
removeTerminalEntries(lanePaths.terminalsPath, terminalEntries);
|
|
4261
5022
|
}
|