@chllming/wave-orchestration 0.6.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +64 -1
- package/README.md +44 -8
- package/docs/agents/wave-orchestrator-role.md +50 -0
- package/docs/agents/wave-planner-role.md +39 -0
- package/docs/context7/bundles.json +9 -0
- package/docs/context7/planner-agent/README.md +25 -0
- package/docs/context7/planner-agent/manifest.json +83 -0
- package/docs/context7/planner-agent/papers/cooperbench-why-coding-agents-cannot-be-your-teammates-yet.md +3283 -0
- package/docs/context7/planner-agent/papers/dova-deliberation-first-multi-agent-orchestration-for-autonomous-research-automation.md +1699 -0
- package/docs/context7/planner-agent/papers/dpbench-large-language-models-struggle-with-simultaneous-coordination.md +2251 -0
- package/docs/context7/planner-agent/papers/incremental-planning-to-control-a-blackboard-based-problem-solver.md +1729 -0
- package/docs/context7/planner-agent/papers/silo-bench-a-scalable-environment-for-evaluating-distributed-coordination-in-multi-agent-llm-systems.md +3747 -0
- package/docs/context7/planner-agent/papers/todoevolve-learning-to-architect-agent-planning-systems.md +1675 -0
- package/docs/context7/planner-agent/papers/verified-multi-agent-orchestration-a-plan-execute-verify-replan-framework-for-complex-query-resolution.md +1173 -0
- package/docs/context7/planner-agent/papers/why-do-multi-agent-llm-systems-fail.md +5211 -0
- package/docs/context7/planner-agent/topics/planning-and-orchestration.md +24 -0
- package/docs/evals/README.md +96 -1
- package/docs/evals/arm-templates/README.md +13 -0
- package/docs/evals/arm-templates/full-wave.json +15 -0
- package/docs/evals/arm-templates/single-agent.json +15 -0
- package/docs/evals/benchmark-catalog.json +7 -0
- package/docs/evals/cases/README.md +47 -0
- package/docs/evals/cases/wave-blackboard-inbox-targeting.json +73 -0
- package/docs/evals/cases/wave-contradiction-conflict.json +104 -0
- package/docs/evals/cases/wave-expert-routing-preservation.json +69 -0
- package/docs/evals/cases/wave-hidden-profile-private-evidence.json +81 -0
- package/docs/evals/cases/wave-premature-closure-guard.json +71 -0
- package/docs/evals/cases/wave-silo-cross-agent-state.json +77 -0
- package/docs/evals/cases/wave-simultaneous-lockstep.json +92 -0
- package/docs/evals/cooperbench/real-world-mitigation.md +341 -0
- package/docs/evals/external-benchmarks.json +85 -0
- package/docs/evals/external-command-config.sample.json +9 -0
- package/docs/evals/external-command-config.swe-bench-pro.json +8 -0
- package/docs/evals/pilots/README.md +47 -0
- package/docs/evals/pilots/swe-bench-pro-public-full-wave-review-10.json +64 -0
- package/docs/evals/pilots/swe-bench-pro-public-pilot.json +111 -0
- package/docs/evals/wave-benchmark-program.md +302 -0
- package/docs/guides/planner.md +48 -11
- package/docs/plans/context7-wave-orchestrator.md +20 -0
- package/docs/plans/current-state.md +9 -1
- package/docs/plans/examples/wave-benchmark-improvement.md +108 -0
- package/docs/plans/examples/wave-example-live-proof.md +1 -1
- package/docs/plans/examples/wave-example-rollout-fidelity.md +340 -0
- package/docs/plans/wave-orchestrator.md +73 -11
- package/docs/plans/waves/reviews/wave-1-benchmark-operator.md +118 -0
- package/docs/reference/coordination-and-closure.md +436 -0
- package/docs/reference/live-proof-waves.md +25 -3
- package/docs/reference/npmjs-trusted-publishing.md +3 -3
- package/docs/reference/proof-metrics.md +90 -0
- package/docs/reference/runtime-config/README.md +61 -0
- package/docs/reference/sample-waves.md +29 -18
- package/docs/reference/wave-control.md +164 -0
- package/docs/reference/wave-planning-lessons.md +131 -0
- package/package.json +5 -4
- package/releases/manifest.json +33 -0
- package/scripts/research/agent-context-archive.mjs +18 -0
- package/scripts/research/manifests/agent-context-expanded-2026-03-22.mjs +17 -0
- package/scripts/research/sync-planner-context7-bundle.mjs +133 -0
- package/scripts/wave-autonomous.mjs +2 -4
- package/scripts/wave-orchestrator/adhoc.mjs +32 -11
- package/scripts/wave-orchestrator/artifact-schemas.mjs +232 -0
- package/scripts/wave-orchestrator/autonomous.mjs +27 -6
- package/scripts/wave-orchestrator/benchmark-cases.mjs +374 -0
- package/scripts/wave-orchestrator/benchmark-external.mjs +1384 -0
- package/scripts/wave-orchestrator/benchmark.mjs +972 -0
- package/scripts/wave-orchestrator/clarification-triage.mjs +78 -12
- package/scripts/wave-orchestrator/config.mjs +175 -0
- package/scripts/wave-orchestrator/control-cli.mjs +1123 -0
- package/scripts/wave-orchestrator/control-plane.mjs +697 -0
- package/scripts/wave-orchestrator/coord-cli.mjs +360 -2
- package/scripts/wave-orchestrator/coordination-store.mjs +211 -9
- package/scripts/wave-orchestrator/coordination.mjs +84 -0
- package/scripts/wave-orchestrator/dashboard-renderer.mjs +38 -3
- package/scripts/wave-orchestrator/dashboard-state.mjs +22 -0
- package/scripts/wave-orchestrator/evals.mjs +23 -0
- package/scripts/wave-orchestrator/executors.mjs +3 -2
- package/scripts/wave-orchestrator/feedback.mjs +55 -0
- package/scripts/wave-orchestrator/install.mjs +253 -26
- package/scripts/wave-orchestrator/launcher-closure.mjs +4 -1
- package/scripts/wave-orchestrator/launcher-runtime.mjs +24 -21
- package/scripts/wave-orchestrator/launcher.mjs +800 -35
- package/scripts/wave-orchestrator/package-update-notice.mjs +230 -0
- package/scripts/wave-orchestrator/package-version.mjs +32 -0
- package/scripts/wave-orchestrator/planner-context.mjs +75 -0
- package/scripts/wave-orchestrator/planner.mjs +2270 -136
- package/scripts/wave-orchestrator/proof-cli.mjs +195 -0
- package/scripts/wave-orchestrator/proof-registry.mjs +317 -0
- package/scripts/wave-orchestrator/replay.mjs +10 -4
- package/scripts/wave-orchestrator/retry-cli.mjs +184 -0
- package/scripts/wave-orchestrator/retry-control.mjs +225 -0
- package/scripts/wave-orchestrator/shared.mjs +26 -0
- package/scripts/wave-orchestrator/swe-bench-pro-task.mjs +1004 -0
- package/scripts/wave-orchestrator/traces.mjs +157 -2
- package/scripts/wave-orchestrator/wave-control-client.mjs +532 -0
- package/scripts/wave-orchestrator/wave-control-schema.mjs +309 -0
- package/scripts/wave-orchestrator/wave-files.mjs +17 -5
- package/scripts/wave.mjs +39 -2
- package/skills/repo-coding-rules/SKILL.md +1 -0
- package/skills/role-cont-eval/SKILL.md +1 -0
- package/skills/role-cont-qa/SKILL.md +13 -6
- package/skills/role-deploy/SKILL.md +1 -0
- package/skills/role-documentation/SKILL.md +4 -0
- package/skills/role-implementation/SKILL.md +4 -0
- package/skills/role-infra/SKILL.md +2 -1
- package/skills/role-integration/SKILL.md +15 -8
- package/skills/role-planner/SKILL.md +39 -0
- package/skills/role-planner/skill.json +21 -0
- package/skills/role-research/SKILL.md +1 -0
- package/skills/role-security/SKILL.md +2 -2
- package/skills/runtime-claude/SKILL.md +2 -1
- package/skills/runtime-codex/SKILL.md +1 -0
- package/skills/runtime-local/SKILL.md +2 -0
- package/skills/runtime-opencode/SKILL.md +1 -0
- package/skills/wave-core/SKILL.md +25 -6
- package/skills/wave-core/references/marker-syntax.md +16 -8
- package/wave.config.json +45 -0
|
@@ -11,12 +11,14 @@ import {
|
|
|
11
11
|
} from "./config.mjs";
|
|
12
12
|
import {
|
|
13
13
|
appendOrchestratorBoardEntry,
|
|
14
|
+
buildResidentOrchestratorPrompt,
|
|
14
15
|
ensureOrchestratorBoard,
|
|
15
16
|
feedbackStateSignature,
|
|
16
17
|
readWaveHumanFeedbackRequests,
|
|
17
18
|
} from "./coordination.mjs";
|
|
18
19
|
import {
|
|
19
20
|
appendCoordinationRecord,
|
|
21
|
+
buildCoordinationResponseMetrics,
|
|
20
22
|
compileAgentInbox,
|
|
21
23
|
compileSharedSummary,
|
|
22
24
|
isOpenCoordinationStatus,
|
|
@@ -54,6 +56,8 @@ import {
|
|
|
54
56
|
DEFAULT_AGENT_RATE_LIMIT_BASE_DELAY_SECONDS,
|
|
55
57
|
DEFAULT_AGENT_RATE_LIMIT_MAX_DELAY_SECONDS,
|
|
56
58
|
DEFAULT_AGENT_RATE_LIMIT_RETRIES,
|
|
59
|
+
DEFAULT_COORDINATION_ACK_TIMEOUT_MS,
|
|
60
|
+
DEFAULT_LIVE_COORDINATION_REFRESH_MS,
|
|
57
61
|
DEFAULT_MAX_RETRIES_PER_WAVE,
|
|
58
62
|
DEFAULT_TIMEOUT_MINUTES,
|
|
59
63
|
DEFAULT_WAVE_LANE,
|
|
@@ -98,6 +102,7 @@ import {
|
|
|
98
102
|
commandForExecutor,
|
|
99
103
|
isExecutorCommandAvailable,
|
|
100
104
|
} from "./executors.mjs";
|
|
105
|
+
import { maybeAnnouncePackageUpdate } from "./package-update-notice.mjs";
|
|
101
106
|
import {
|
|
102
107
|
agentRequiresProofCentricValidation,
|
|
103
108
|
buildRunStateEvidence,
|
|
@@ -128,7 +133,21 @@ import {
|
|
|
128
133
|
} from "./agent-state.mjs";
|
|
129
134
|
import { buildDocsQueue, readDocsQueue, writeDocsQueue } from "./docs-queue.mjs";
|
|
130
135
|
import { deriveWaveLedger, readWaveLedger, writeWaveLedger } from "./ledger.mjs";
|
|
136
|
+
import {
|
|
137
|
+
augmentSummaryWithProofRegistry,
|
|
138
|
+
readWaveProofRegistry,
|
|
139
|
+
waveProofRegistryPath,
|
|
140
|
+
} from "./proof-registry.mjs";
|
|
141
|
+
import {
|
|
142
|
+
clearWaveRetryOverride,
|
|
143
|
+
readWaveRelaunchPlanSnapshot,
|
|
144
|
+
readWaveRetryOverride,
|
|
145
|
+
resolveRetryOverrideRuns,
|
|
146
|
+
waveRelaunchPlanPath,
|
|
147
|
+
} from "./retry-control.mjs";
|
|
148
|
+
import { appendWaveControlEvent } from "./control-plane.mjs";
|
|
131
149
|
import { buildQualityMetrics, writeTraceBundle } from "./traces.mjs";
|
|
150
|
+
import { flushWaveControlQueue } from "./wave-control-client.mjs";
|
|
132
151
|
import { triageClarificationRequests } from "./clarification-triage.mjs";
|
|
133
152
|
import { readProjectProfile, resolveDefaultTerminalSurface } from "./project-profile.mjs";
|
|
134
153
|
import {
|
|
@@ -148,7 +167,6 @@ import {
|
|
|
148
167
|
writeDependencySnapshotMarkdown,
|
|
149
168
|
} from "./routing-state.mjs";
|
|
150
169
|
import {
|
|
151
|
-
readRelaunchPlan,
|
|
152
170
|
writeAssignmentSnapshot,
|
|
153
171
|
writeDependencySnapshot,
|
|
154
172
|
writeRelaunchPlan,
|
|
@@ -218,6 +236,9 @@ Options:
|
|
|
218
236
|
Disable orchestrator coordination board updates for this run
|
|
219
237
|
--coordination-note <text>
|
|
220
238
|
Optional startup intent note appended to orchestrator board
|
|
239
|
+
--resident-orchestrator
|
|
240
|
+
Launch an additional long-running resident orchestrator session for the wave
|
|
241
|
+
--no-telemetry Disable Wave Control reporting for this launcher run
|
|
221
242
|
--no-context7 Disable launcher-side Context7 prefetch/injection
|
|
222
243
|
--help Show this help message
|
|
223
244
|
`);
|
|
@@ -248,6 +269,8 @@ function parseArgs(argv) {
|
|
|
248
269
|
cleanupSessions: true,
|
|
249
270
|
keepTerminals: false,
|
|
250
271
|
context7Enabled: true,
|
|
272
|
+
telemetryEnabled: true,
|
|
273
|
+
residentOrchestrator: false,
|
|
251
274
|
orchestratorId: null,
|
|
252
275
|
orchestratorBoardPath: null,
|
|
253
276
|
coordinationNote: "",
|
|
@@ -284,6 +307,8 @@ function parseArgs(argv) {
|
|
|
284
307
|
options.keepTerminals = true;
|
|
285
308
|
} else if (arg === "--no-context7") {
|
|
286
309
|
options.context7Enabled = false;
|
|
310
|
+
} else if (arg === "--no-telemetry") {
|
|
311
|
+
options.telemetryEnabled = false;
|
|
287
312
|
} else if (arg === "--no-orchestrator-board") {
|
|
288
313
|
options.orchestratorBoardPath = null;
|
|
289
314
|
orchestratorBoardProvided = true;
|
|
@@ -304,6 +329,8 @@ function parseArgs(argv) {
|
|
|
304
329
|
orchestratorBoardProvided = true;
|
|
305
330
|
} else if (arg === "--coordination-note") {
|
|
306
331
|
options.coordinationNote = String(argv[++i] || "").trim();
|
|
332
|
+
} else if (arg === "--resident-orchestrator") {
|
|
333
|
+
options.residentOrchestrator = true;
|
|
307
334
|
} else if (arg === "--state-file") {
|
|
308
335
|
options.runStatePath = path.resolve(REPO_ROOT, argv[++i] || "");
|
|
309
336
|
stateFileProvided = true;
|
|
@@ -358,7 +385,18 @@ function parseArgs(argv) {
|
|
|
358
385
|
if (!executorProvided) {
|
|
359
386
|
options.executorMode = lanePaths.executors.default;
|
|
360
387
|
}
|
|
388
|
+
if (!options.telemetryEnabled) {
|
|
389
|
+
lanePaths.waveControl = {
|
|
390
|
+
...(lanePaths.waveControl || {}),
|
|
391
|
+
enabled: false,
|
|
392
|
+
};
|
|
393
|
+
lanePaths.laneProfile = {
|
|
394
|
+
...(lanePaths.laneProfile || {}),
|
|
395
|
+
waveControl: lanePaths.waveControl,
|
|
396
|
+
};
|
|
397
|
+
}
|
|
361
398
|
options.orchestratorId ||= sanitizeOrchestratorId(`${lanePaths.lane}-orch-${process.pid}`);
|
|
399
|
+
lanePaths.orchestratorId = options.orchestratorId;
|
|
362
400
|
if (options.agentRateLimitMaxDelaySeconds < options.agentRateLimitBaseDelaySeconds) {
|
|
363
401
|
throw new Error(
|
|
364
402
|
"--agent-rate-limit-max-delay-seconds must be >= --agent-rate-limit-base-delay-seconds",
|
|
@@ -529,17 +567,19 @@ function materializeAgentExecutionSummaryForRun(wave, runInfo) {
|
|
|
529
567
|
}
|
|
530
568
|
|
|
531
569
|
function readRunExecutionSummary(runInfo, wave = null) {
|
|
570
|
+
const applyProofRegistry = (summary) =>
|
|
571
|
+
runInfo?.proofRegistry ? augmentSummaryWithProofRegistry(runInfo.agent, summary, runInfo.proofRegistry) : summary;
|
|
532
572
|
if (runInfo?.summary && typeof runInfo.summary === "object") {
|
|
533
|
-
return runInfo.summary;
|
|
573
|
+
return applyProofRegistry(runInfo.summary);
|
|
534
574
|
}
|
|
535
575
|
if (runInfo?.summaryPath && fs.existsSync(runInfo.summaryPath)) {
|
|
536
|
-
return readAgentExecutionSummary(runInfo.summaryPath);
|
|
576
|
+
return applyProofRegistry(readAgentExecutionSummary(runInfo.summaryPath));
|
|
537
577
|
}
|
|
538
578
|
if (runInfo?.statusPath && fs.existsSync(agentSummaryPathFromStatusPath(runInfo.statusPath))) {
|
|
539
|
-
return readAgentExecutionSummary(runInfo.statusPath);
|
|
579
|
+
return applyProofRegistry(readAgentExecutionSummary(runInfo.statusPath));
|
|
540
580
|
}
|
|
541
581
|
if (wave && runInfo?.statusPath && runInfo?.logPath && fs.existsSync(runInfo.statusPath)) {
|
|
542
|
-
return materializeAgentExecutionSummaryForRun(wave, runInfo);
|
|
582
|
+
return applyProofRegistry(materializeAgentExecutionSummaryForRun(wave, runInfo));
|
|
543
583
|
}
|
|
544
584
|
return null;
|
|
545
585
|
}
|
|
@@ -593,12 +633,8 @@ function waveIntegrationMarkdownPath(lanePaths, waveNumber) {
|
|
|
593
633
|
return path.join(lanePaths.integrationDir, `wave-${waveNumber}.md`);
|
|
594
634
|
}
|
|
595
635
|
|
|
596
|
-
function waveRelaunchPlanPath(lanePaths, waveNumber) {
|
|
597
|
-
return path.join(lanePaths.statusDir, `relaunch-plan-wave-${waveNumber}.json`);
|
|
598
|
-
}
|
|
599
|
-
|
|
600
636
|
function readWaveRelaunchPlan(lanePaths, waveNumber) {
|
|
601
|
-
return
|
|
637
|
+
return readWaveRelaunchPlanSnapshot(lanePaths, waveNumber);
|
|
602
638
|
}
|
|
603
639
|
|
|
604
640
|
function writeWaveRelaunchPlan(lanePaths, waveNumber, payload) {
|
|
@@ -1376,6 +1412,7 @@ function writeWaveDerivedState({
|
|
|
1376
1412
|
capabilityAssignments,
|
|
1377
1413
|
dependencySnapshot,
|
|
1378
1414
|
});
|
|
1415
|
+
const responseMetrics = buildCoordinationResponseMetrics(coordinationState);
|
|
1379
1416
|
const messageBoardPath = path.join(lanePaths.messageboardsDir, `wave-${wave.wave}.md`);
|
|
1380
1417
|
writeCoordinationBoardProjection(messageBoardPath, {
|
|
1381
1418
|
wave: wave.wave,
|
|
@@ -1397,6 +1434,7 @@ function writeWaveDerivedState({
|
|
|
1397
1434
|
integrationMarkdownPath: waveIntegrationMarkdownPath(lanePaths, wave.wave),
|
|
1398
1435
|
securityMarkdownPath: waveSecurityMarkdownPath(lanePaths, wave.wave),
|
|
1399
1436
|
ledger,
|
|
1437
|
+
responseMetrics,
|
|
1400
1438
|
sharedSummaryPath,
|
|
1401
1439
|
sharedSummaryText: sharedSummary.text,
|
|
1402
1440
|
inboxesByAgentId,
|
|
@@ -1414,6 +1452,23 @@ function applyDerivedStateToDashboard(dashboardState, derivedState) {
|
|
|
1414
1452
|
).length;
|
|
1415
1453
|
dashboardState.inboundDependenciesOpen = (derivedState.dependencySnapshot?.openInbound || []).length;
|
|
1416
1454
|
dashboardState.outboundDependenciesOpen = (derivedState.dependencySnapshot?.openOutbound || []).length;
|
|
1455
|
+
dashboardState.coordinationOpen = derivedState.coordinationState?.openRecords?.length || 0;
|
|
1456
|
+
dashboardState.openClarifications =
|
|
1457
|
+
(derivedState.coordinationState?.clarifications || []).filter((record) =>
|
|
1458
|
+
isOpenCoordinationStatus(record.status),
|
|
1459
|
+
).length;
|
|
1460
|
+
dashboardState.openHumanEscalations =
|
|
1461
|
+
derivedState.responseMetrics?.openHumanEscalationCount ||
|
|
1462
|
+
(derivedState.coordinationState?.humanEscalations || []).filter((record) =>
|
|
1463
|
+
isOpenCoordinationStatus(record.status),
|
|
1464
|
+
).length;
|
|
1465
|
+
dashboardState.oldestOpenCoordinationAgeMs =
|
|
1466
|
+
derivedState.responseMetrics?.oldestOpenCoordinationAgeMs ?? null;
|
|
1467
|
+
dashboardState.oldestUnackedRequestAgeMs =
|
|
1468
|
+
derivedState.responseMetrics?.oldestUnackedRequestAgeMs ?? null;
|
|
1469
|
+
dashboardState.overdueAckCount = derivedState.responseMetrics?.overdueAckCount || 0;
|
|
1470
|
+
dashboardState.overdueClarificationCount =
|
|
1471
|
+
derivedState.responseMetrics?.overdueClarificationCount || 0;
|
|
1417
1472
|
}
|
|
1418
1473
|
|
|
1419
1474
|
export function readWaveImplementationGate(wave, agentRuns) {
|
|
@@ -1848,6 +1903,198 @@ function listLaneTmuxSessionNames(lanePaths) {
|
|
|
1848
1903
|
);
|
|
1849
1904
|
}
|
|
1850
1905
|
|
|
1906
|
+
function residentOrchestratorRolePromptPath() {
|
|
1907
|
+
return path.join(REPO_ROOT, "docs", "agents", "wave-orchestrator-role.md");
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
function loadResidentOrchestratorRolePrompt() {
|
|
1911
|
+
const filePath = residentOrchestratorRolePromptPath();
|
|
1912
|
+
if (!fs.existsSync(filePath)) {
|
|
1913
|
+
return "Monitor the wave, triage clarification timing, and intervene through coordination records only.";
|
|
1914
|
+
}
|
|
1915
|
+
return fs.readFileSync(filePath, "utf8");
|
|
1916
|
+
}
|
|
1917
|
+
|
|
1918
|
+
function defaultResidentExecutorState(options) {
|
|
1919
|
+
if (options.executorMode === "claude") {
|
|
1920
|
+
return {
|
|
1921
|
+
id: "claude",
|
|
1922
|
+
role: "orchestrator",
|
|
1923
|
+
selectedBy: "resident-orchestrator",
|
|
1924
|
+
budget: { minutes: options.timeoutMinutes },
|
|
1925
|
+
claude: {
|
|
1926
|
+
command: "claude",
|
|
1927
|
+
},
|
|
1928
|
+
};
|
|
1929
|
+
}
|
|
1930
|
+
if (options.executorMode === "opencode") {
|
|
1931
|
+
return {
|
|
1932
|
+
id: "opencode",
|
|
1933
|
+
role: "orchestrator",
|
|
1934
|
+
selectedBy: "resident-orchestrator",
|
|
1935
|
+
budget: { minutes: options.timeoutMinutes },
|
|
1936
|
+
opencode: {
|
|
1937
|
+
command: "opencode",
|
|
1938
|
+
},
|
|
1939
|
+
};
|
|
1940
|
+
}
|
|
1941
|
+
return {
|
|
1942
|
+
id: "codex",
|
|
1943
|
+
role: "orchestrator",
|
|
1944
|
+
selectedBy: "resident-orchestrator",
|
|
1945
|
+
budget: { minutes: options.timeoutMinutes },
|
|
1946
|
+
codex: {
|
|
1947
|
+
command: "codex",
|
|
1948
|
+
sandbox: options.codexSandboxMode,
|
|
1949
|
+
},
|
|
1950
|
+
};
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
function buildResidentExecutorState(executorTemplate, options) {
|
|
1954
|
+
const source = executorTemplate
|
|
1955
|
+
? JSON.parse(JSON.stringify(executorTemplate))
|
|
1956
|
+
: defaultResidentExecutorState(options);
|
|
1957
|
+
source.role = "orchestrator";
|
|
1958
|
+
source.selectedBy = "resident-orchestrator";
|
|
1959
|
+
source.budget = {
|
|
1960
|
+
...(source.budget || {}),
|
|
1961
|
+
minutes: Math.max(
|
|
1962
|
+
Number.parseInt(String(source?.budget?.minutes || 0), 10) || 0,
|
|
1963
|
+
options.timeoutMinutes,
|
|
1964
|
+
),
|
|
1965
|
+
};
|
|
1966
|
+
if (source.id === "codex") {
|
|
1967
|
+
source.codex = {
|
|
1968
|
+
...(source.codex || {}),
|
|
1969
|
+
command: source?.codex?.command || "codex",
|
|
1970
|
+
sandbox: source?.codex?.sandbox || options.codexSandboxMode,
|
|
1971
|
+
};
|
|
1972
|
+
} else if (source.id === "claude") {
|
|
1973
|
+
source.claude = {
|
|
1974
|
+
...(source.claude || {}),
|
|
1975
|
+
command: source?.claude?.command || "claude",
|
|
1976
|
+
};
|
|
1977
|
+
} else if (source.id === "opencode") {
|
|
1978
|
+
source.opencode = {
|
|
1979
|
+
...(source.opencode || {}),
|
|
1980
|
+
command: source?.opencode?.command || "opencode",
|
|
1981
|
+
};
|
|
1982
|
+
}
|
|
1983
|
+
return source;
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1986
|
+
function buildResidentOrchestratorRun({
|
|
1987
|
+
lanePaths,
|
|
1988
|
+
wave,
|
|
1989
|
+
agentRuns,
|
|
1990
|
+
derivedState,
|
|
1991
|
+
dashboardPath,
|
|
1992
|
+
runTag,
|
|
1993
|
+
options,
|
|
1994
|
+
}) {
|
|
1995
|
+
const executorTemplate =
|
|
1996
|
+
agentRuns.find((run) => run.agent.executorResolved?.id === options.executorMode)?.agent
|
|
1997
|
+
?.executorResolved ||
|
|
1998
|
+
agentRuns.find((run) => run.agent.executorResolved)?.agent?.executorResolved ||
|
|
1999
|
+
null;
|
|
2000
|
+
const executorResolved = buildResidentExecutorState(executorTemplate, options);
|
|
2001
|
+
if (executorResolved.id === "local") {
|
|
2002
|
+
return {
|
|
2003
|
+
run: null,
|
|
2004
|
+
skipReason: "Resident orchestrator requires codex, claude, or opencode; local executor is not suitable.",
|
|
2005
|
+
};
|
|
2006
|
+
}
|
|
2007
|
+
const agent = {
|
|
2008
|
+
agentId: "ORCH",
|
|
2009
|
+
title: "Resident Orchestrator",
|
|
2010
|
+
slug: `${wave.wave}-resident-orchestrator`,
|
|
2011
|
+
prompt: loadResidentOrchestratorRolePrompt(),
|
|
2012
|
+
executorResolved,
|
|
2013
|
+
};
|
|
2014
|
+
const baseName = `wave-${wave.wave}-resident-orchestrator`;
|
|
2015
|
+
const sessionName = `${lanePaths.tmuxSessionPrefix}${wave.wave}_resident_orchestrator_${runTag}`.replace(
|
|
2016
|
+
/[^a-zA-Z0-9_-]/g,
|
|
2017
|
+
"_",
|
|
2018
|
+
);
|
|
2019
|
+
return {
|
|
2020
|
+
run: {
|
|
2021
|
+
agent,
|
|
2022
|
+
sessionName,
|
|
2023
|
+
promptPath: path.join(lanePaths.promptsDir, `${baseName}.prompt.md`),
|
|
2024
|
+
logPath: path.join(lanePaths.logsDir, `${baseName}.log`),
|
|
2025
|
+
statusPath: path.join(lanePaths.statusDir, `${baseName}.status`),
|
|
2026
|
+
promptOverride: buildResidentOrchestratorPrompt({
|
|
2027
|
+
lane: lanePaths.lane,
|
|
2028
|
+
wave: wave.wave,
|
|
2029
|
+
waveFile: wave.file,
|
|
2030
|
+
orchestratorId: options.orchestratorId,
|
|
2031
|
+
coordinationLogPath: derivedState.coordinationLogPath,
|
|
2032
|
+
messageBoardPath: derivedState.messageBoardPath,
|
|
2033
|
+
sharedSummaryPath: derivedState.sharedSummaryPath,
|
|
2034
|
+
dashboardPath,
|
|
2035
|
+
triagePath: derivedState.clarificationTriage?.triagePath || null,
|
|
2036
|
+
rolePrompt: agent.prompt,
|
|
2037
|
+
}),
|
|
2038
|
+
},
|
|
2039
|
+
skipReason: "",
|
|
2040
|
+
};
|
|
2041
|
+
}
|
|
2042
|
+
|
|
2043
|
+
function monitorResidentOrchestratorSession({
|
|
2044
|
+
lanePaths,
|
|
2045
|
+
run,
|
|
2046
|
+
waveNumber,
|
|
2047
|
+
recordCombinedEvent,
|
|
2048
|
+
appendCoordination,
|
|
2049
|
+
sessionState,
|
|
2050
|
+
}) {
|
|
2051
|
+
if (!run || sessionState?.closed === true) {
|
|
2052
|
+
return false;
|
|
2053
|
+
}
|
|
2054
|
+
if (fs.existsSync(run.statusPath)) {
|
|
2055
|
+
sessionState.closed = true;
|
|
2056
|
+
const exitCode = readStatusCodeIfPresent(run.statusPath);
|
|
2057
|
+
recordCombinedEvent({
|
|
2058
|
+
level: exitCode === 0 ? "info" : "warn",
|
|
2059
|
+
agentId: run.agent.agentId,
|
|
2060
|
+
message:
|
|
2061
|
+
exitCode === 0
|
|
2062
|
+
? "Resident orchestrator exited; launcher continues as the control plane."
|
|
2063
|
+
: `Resident orchestrator exited with code ${exitCode}; launcher continues as the control plane.`,
|
|
2064
|
+
});
|
|
2065
|
+
appendCoordination({
|
|
2066
|
+
event: "resident_orchestrator_exit",
|
|
2067
|
+
waves: [waveNumber],
|
|
2068
|
+
status: exitCode === 0 ? "resolved" : "warn",
|
|
2069
|
+
details:
|
|
2070
|
+
exitCode === 0
|
|
2071
|
+
? "Resident orchestrator session ended before wave completion."
|
|
2072
|
+
: `Resident orchestrator session ended with code ${exitCode} before wave completion.`,
|
|
2073
|
+
actionRequested: "None",
|
|
2074
|
+
});
|
|
2075
|
+
return true;
|
|
2076
|
+
}
|
|
2077
|
+
const activeSessions = new Set(listLaneTmuxSessionNames(lanePaths));
|
|
2078
|
+
if (!activeSessions.has(run.sessionName)) {
|
|
2079
|
+
sessionState.closed = true;
|
|
2080
|
+
recordCombinedEvent({
|
|
2081
|
+
level: "warn",
|
|
2082
|
+
agentId: run.agent.agentId,
|
|
2083
|
+
message:
|
|
2084
|
+
"Resident orchestrator session disappeared before writing a status file; launcher continues as the control plane.",
|
|
2085
|
+
});
|
|
2086
|
+
appendCoordination({
|
|
2087
|
+
event: "resident_orchestrator_missing",
|
|
2088
|
+
waves: [waveNumber],
|
|
2089
|
+
status: "warn",
|
|
2090
|
+
details: `tmux session ${run.sessionName} disappeared before ${path.relative(REPO_ROOT, run.statusPath)} was written.`,
|
|
2091
|
+
actionRequested: "None",
|
|
2092
|
+
});
|
|
2093
|
+
return true;
|
|
2094
|
+
}
|
|
2095
|
+
return false;
|
|
2096
|
+
}
|
|
2097
|
+
|
|
1851
2098
|
function isWaveDashboardBackedByLiveSession(lanePaths, dashboardPath, activeSessionNames) {
|
|
1852
2099
|
const waveMatch = path.basename(dashboardPath).match(/^wave-(\d+)\.json$/);
|
|
1853
2100
|
if (!waveMatch) {
|
|
@@ -2106,12 +2353,14 @@ function monitorWaveHumanFeedback({
|
|
|
2106
2353
|
agentIds: agentRuns.map((run) => run.agent.agentId),
|
|
2107
2354
|
orchestratorId,
|
|
2108
2355
|
});
|
|
2356
|
+
let changed = false;
|
|
2109
2357
|
for (const request of requests) {
|
|
2110
2358
|
const signature = feedbackStateSignature(request);
|
|
2111
2359
|
if (feedbackStateByRequestId.get(request.id) === signature) {
|
|
2112
2360
|
continue;
|
|
2113
2361
|
}
|
|
2114
2362
|
feedbackStateByRequestId.set(request.id, signature);
|
|
2363
|
+
changed = true;
|
|
2115
2364
|
const question = request.question || "n/a";
|
|
2116
2365
|
const context = request.context ? `; context=${request.context}` : "";
|
|
2117
2366
|
const responseOperator = request.responseOperator || "human-operator";
|
|
@@ -2126,14 +2375,14 @@ function monitorWaveHumanFeedback({
|
|
|
2126
2375
|
`[human-feedback] wave=${waveNumber} agent=${request.agentId} request=${request.id} pending: ${question}`,
|
|
2127
2376
|
);
|
|
2128
2377
|
console.warn(
|
|
2129
|
-
`[human-feedback] respond with: pnpm exec wave
|
|
2378
|
+
`[human-feedback] respond with: pnpm exec wave control task act answer --lane ${lanePaths.lane} --wave ${waveNumber} --id ${request.id} --response "<answer>" --operator "<name>"`,
|
|
2130
2379
|
);
|
|
2131
2380
|
appendCoordination({
|
|
2132
2381
|
event: "human_feedback_requested",
|
|
2133
2382
|
waves: [waveNumber],
|
|
2134
2383
|
status: "waiting-human",
|
|
2135
2384
|
details: `request_id=${request.id}; agent=${request.agentId}; question=${question}${context}`,
|
|
2136
|
-
actionRequested: `Launcher operator should ask or answer in the parent session, then run: pnpm exec wave
|
|
2385
|
+
actionRequested: `Launcher operator should ask or answer in the parent session, then run: pnpm exec wave control task act answer --lane ${lanePaths.lane} --wave ${waveNumber} --id ${request.id} --response "<answer>" --operator "<name>"`,
|
|
2137
2386
|
});
|
|
2138
2387
|
if (coordinationLogPath) {
|
|
2139
2388
|
appendCoordinationRecord(coordinationLogPath, {
|
|
@@ -2224,6 +2473,7 @@ function monitorWaveHumanFeedback({
|
|
|
2224
2473
|
}
|
|
2225
2474
|
}
|
|
2226
2475
|
}
|
|
2476
|
+
return changed;
|
|
2227
2477
|
}
|
|
2228
2478
|
|
|
2229
2479
|
function proofCentricReuseBlocked(derivedState) {
|
|
@@ -2237,6 +2487,33 @@ function proofCentricReuseBlocked(derivedState) {
|
|
|
2237
2487
|
);
|
|
2238
2488
|
}
|
|
2239
2489
|
|
|
2490
|
+
function sameAgentIdSet(left = [], right = []) {
|
|
2491
|
+
const leftIds = Array.from(new Set((left || []).filter(Boolean))).toSorted();
|
|
2492
|
+
const rightIds = Array.from(new Set((right || []).filter(Boolean))).toSorted();
|
|
2493
|
+
return leftIds.length === rightIds.length && leftIds.every((agentId, index) => agentId === rightIds[index]);
|
|
2494
|
+
}
|
|
2495
|
+
|
|
2496
|
+
export function persistedRelaunchPlanMatchesCurrentState(
|
|
2497
|
+
agentRuns,
|
|
2498
|
+
persistedPlan,
|
|
2499
|
+
lanePaths,
|
|
2500
|
+
waveDefinition,
|
|
2501
|
+
) {
|
|
2502
|
+
if (!persistedPlan || !Array.isArray(persistedPlan.selectedAgentIds)) {
|
|
2503
|
+
return false;
|
|
2504
|
+
}
|
|
2505
|
+
const componentGate = readWaveComponentGate(waveDefinition, agentRuns, {
|
|
2506
|
+
laneProfile: lanePaths?.laneProfile,
|
|
2507
|
+
});
|
|
2508
|
+
if (componentGate?.statusCode !== "shared-component-sibling-pending") {
|
|
2509
|
+
return true;
|
|
2510
|
+
}
|
|
2511
|
+
return sameAgentIdSet(
|
|
2512
|
+
persistedPlan.selectedAgentIds,
|
|
2513
|
+
componentGate.waitingOnAgentIds || [],
|
|
2514
|
+
);
|
|
2515
|
+
}
|
|
2516
|
+
|
|
2240
2517
|
function applyPersistedRelaunchPlan(agentRuns, persistedPlan, lanePaths, waveDefinition) {
|
|
2241
2518
|
if (!persistedPlan || !Array.isArray(persistedPlan.selectedAgentIds)) {
|
|
2242
2519
|
return [];
|
|
@@ -2255,6 +2532,42 @@ function applyPersistedRelaunchPlan(agentRuns, persistedPlan, lanePaths, waveDef
|
|
|
2255
2532
|
.filter(Boolean);
|
|
2256
2533
|
}
|
|
2257
2534
|
|
|
2535
|
+
export function resolveSharedComponentContinuationRuns(
|
|
2536
|
+
currentRuns,
|
|
2537
|
+
agentRuns,
|
|
2538
|
+
failures,
|
|
2539
|
+
derivedState,
|
|
2540
|
+
lanePaths,
|
|
2541
|
+
waveDefinition = null,
|
|
2542
|
+
) {
|
|
2543
|
+
if (!Array.isArray(currentRuns) || currentRuns.length === 0 || !Array.isArray(failures) || failures.length === 0) {
|
|
2544
|
+
return [];
|
|
2545
|
+
}
|
|
2546
|
+
if (!failures.every((failure) => failure.statusCode === "shared-component-sibling-pending")) {
|
|
2547
|
+
return [];
|
|
2548
|
+
}
|
|
2549
|
+
const currentRunIds = new Set(currentRuns.map((run) => run.agent.agentId));
|
|
2550
|
+
const waitingAgentIds = new Set(
|
|
2551
|
+
failures.flatMap((failure) => failure.waitingOnAgentIds || []).filter(Boolean),
|
|
2552
|
+
);
|
|
2553
|
+
if (Array.from(currentRunIds).some((agentId) => waitingAgentIds.has(agentId))) {
|
|
2554
|
+
return [];
|
|
2555
|
+
}
|
|
2556
|
+
const relaunchResolution = resolveRelaunchRuns(
|
|
2557
|
+
agentRuns,
|
|
2558
|
+
failures,
|
|
2559
|
+
derivedState,
|
|
2560
|
+
lanePaths,
|
|
2561
|
+
waveDefinition,
|
|
2562
|
+
);
|
|
2563
|
+
if (relaunchResolution.barrier || relaunchResolution.runs.length === 0) {
|
|
2564
|
+
return [];
|
|
2565
|
+
}
|
|
2566
|
+
return relaunchResolution.runs.some((run) => !currentRunIds.has(run.agent.agentId))
|
|
2567
|
+
? relaunchResolution.runs
|
|
2568
|
+
: [];
|
|
2569
|
+
}
|
|
2570
|
+
|
|
2258
2571
|
function relaunchReasonBuckets(runs, failures, derivedState) {
|
|
2259
2572
|
const selectedAgentIds = new Set((runs || []).map((run) => run.agent.agentId));
|
|
2260
2573
|
return {
|
|
@@ -2360,7 +2673,10 @@ export function hasReusableSuccessStatus(agent, statusPath, options = {}) {
|
|
|
2360
2673
|
if (!summary) {
|
|
2361
2674
|
return false;
|
|
2362
2675
|
}
|
|
2363
|
-
|
|
2676
|
+
const effectiveSummary = options.proofRegistry
|
|
2677
|
+
? augmentSummaryWithProofRegistry(agent, summary, options.proofRegistry)
|
|
2678
|
+
: summary;
|
|
2679
|
+
if (!validateImplementationSummary(agent, effectiveSummary).ok) {
|
|
2364
2680
|
return false;
|
|
2365
2681
|
}
|
|
2366
2682
|
if (proofCentricReuseBlocked(options.derivedState)) {
|
|
@@ -2378,6 +2694,28 @@ function isClosureAgentId(agent, lanePaths) {
|
|
|
2378
2694
|
].includes(agent?.agentId) || isSecurityReviewAgent(agent);
|
|
2379
2695
|
}
|
|
2380
2696
|
|
|
2697
|
+
export function selectReusablePreCompletedAgentIds(
|
|
2698
|
+
agentRuns,
|
|
2699
|
+
lanePaths,
|
|
2700
|
+
{ retryOverride = null, wave = null, derivedState = null, proofRegistry = null } = {},
|
|
2701
|
+
) {
|
|
2702
|
+
const retryOverrideClearedAgentIds = new Set(retryOverride?.clearReusableAgentIds || []);
|
|
2703
|
+
return new Set(
|
|
2704
|
+
(agentRuns || [])
|
|
2705
|
+
.filter(
|
|
2706
|
+
(run) =>
|
|
2707
|
+
!retryOverrideClearedAgentIds.has(run.agent.agentId) &&
|
|
2708
|
+
!isClosureAgentId(run.agent, lanePaths) &&
|
|
2709
|
+
hasReusableSuccessStatus(run.agent, run.statusPath, {
|
|
2710
|
+
wave,
|
|
2711
|
+
derivedState,
|
|
2712
|
+
proofRegistry,
|
|
2713
|
+
}),
|
|
2714
|
+
)
|
|
2715
|
+
.map((run) => run.agent.agentId),
|
|
2716
|
+
);
|
|
2717
|
+
}
|
|
2718
|
+
|
|
2381
2719
|
export function selectInitialWaveRuns(agentRuns, lanePaths) {
|
|
2382
2720
|
const implementationRuns = (agentRuns || []).filter(
|
|
2383
2721
|
(run) => !isClosureAgentId(run?.agent, lanePaths),
|
|
@@ -2971,6 +3309,9 @@ export async function runLauncherCli(argv) {
|
|
|
2971
3309
|
return;
|
|
2972
3310
|
}
|
|
2973
3311
|
const { lanePaths, options } = parsed;
|
|
3312
|
+
if (!options.reconcileStatus) {
|
|
3313
|
+
await maybeAnnouncePackageUpdate();
|
|
3314
|
+
}
|
|
2974
3315
|
let lockHeld = false;
|
|
2975
3316
|
let globalDashboard = null;
|
|
2976
3317
|
let globalDashboardTerminalEntry = null;
|
|
@@ -3004,10 +3345,12 @@ export async function runLauncherCli(argv) {
|
|
|
3004
3345
|
ensureDirectory(lanePaths.messageboardsDir);
|
|
3005
3346
|
ensureDirectory(lanePaths.dashboardsDir);
|
|
3006
3347
|
ensureDirectory(lanePaths.coordinationDir);
|
|
3348
|
+
ensureDirectory(lanePaths.controlDir);
|
|
3007
3349
|
ensureDirectory(lanePaths.assignmentsDir);
|
|
3008
3350
|
ensureDirectory(lanePaths.inboxesDir);
|
|
3009
3351
|
ensureDirectory(lanePaths.ledgerDir);
|
|
3010
3352
|
ensureDirectory(lanePaths.integrationDir);
|
|
3353
|
+
ensureDirectory(lanePaths.proofDir);
|
|
3011
3354
|
ensureDirectory(lanePaths.securityDir);
|
|
3012
3355
|
ensureDirectory(lanePaths.dependencySnapshotsDir);
|
|
3013
3356
|
ensureDirectory(lanePaths.docsQueueDir);
|
|
@@ -3326,6 +3669,8 @@ export async function runLauncherCli(argv) {
|
|
|
3326
3669
|
let dashboardState = null;
|
|
3327
3670
|
let terminalEntries = [];
|
|
3328
3671
|
let terminalsAppended = false;
|
|
3672
|
+
let residentOrchestratorRun = null;
|
|
3673
|
+
const residentOrchestratorState = { closed: false };
|
|
3329
3674
|
|
|
3330
3675
|
const flushDashboards = () => {
|
|
3331
3676
|
if (!dashboardState) {
|
|
@@ -3349,6 +3694,13 @@ export async function runLauncherCli(argv) {
|
|
|
3349
3694
|
message: `${globalMessagePrefix}${message}`,
|
|
3350
3695
|
});
|
|
3351
3696
|
};
|
|
3697
|
+
const flushWaveControlTelemetry = async () => {
|
|
3698
|
+
try {
|
|
3699
|
+
await flushWaveControlQueue(lanePaths);
|
|
3700
|
+
} catch {
|
|
3701
|
+
// Remote telemetry delivery is best-effort only.
|
|
3702
|
+
}
|
|
3703
|
+
};
|
|
3352
3704
|
|
|
3353
3705
|
try {
|
|
3354
3706
|
terminalEntries = createTemporaryTerminalEntries(
|
|
@@ -3388,6 +3740,10 @@ export async function runLauncherCli(argv) {
|
|
|
3388
3740
|
});
|
|
3389
3741
|
|
|
3390
3742
|
const refreshDerivedState = (attemptNumber = 0) => {
|
|
3743
|
+
const proofRegistry = readWaveProofRegistry(lanePaths, wave.wave);
|
|
3744
|
+
for (const run of agentRuns) {
|
|
3745
|
+
run.proofRegistry = proofRegistry;
|
|
3746
|
+
}
|
|
3391
3747
|
const summariesByAgentId = Object.fromEntries(
|
|
3392
3748
|
agentRuns
|
|
3393
3749
|
.map((run) => [run.agent.agentId, readRunExecutionSummary(run, wave)])
|
|
@@ -3421,7 +3777,8 @@ export async function runLauncherCli(argv) {
|
|
|
3421
3777
|
};
|
|
3422
3778
|
|
|
3423
3779
|
refreshDerivedState(0);
|
|
3424
|
-
|
|
3780
|
+
let persistedRelaunchPlan = readWaveRelaunchPlan(lanePaths, wave.wave);
|
|
3781
|
+
let retryOverride = readWaveRetryOverride(lanePaths, wave.wave);
|
|
3425
3782
|
|
|
3426
3783
|
dashboardState = buildWaveDashboardState({
|
|
3427
3784
|
lane: lanePaths.lane,
|
|
@@ -3433,19 +3790,77 @@ export async function runLauncherCli(argv) {
|
|
|
3433
3790
|
agentRuns,
|
|
3434
3791
|
});
|
|
3435
3792
|
applyDerivedStateToDashboard(dashboardState, derivedState);
|
|
3793
|
+
const feedbackStateByRequestId = new Map();
|
|
3794
|
+
const coordinationAlertState = {
|
|
3795
|
+
overdueAckSignature: "",
|
|
3796
|
+
overdueClarificationSignature: "",
|
|
3797
|
+
};
|
|
3798
|
+
let lastLiveCoordinationRefreshAt = 0;
|
|
3799
|
+
const emitCoordinationAlertEvents = (currentDerivedState = derivedState) => {
|
|
3800
|
+
const responseMetrics =
|
|
3801
|
+
currentDerivedState?.responseMetrics ||
|
|
3802
|
+
buildCoordinationResponseMetrics(currentDerivedState?.coordinationState);
|
|
3803
|
+
const overdueAckSignature = (responseMetrics?.overdueAckRecordIds || []).join(",");
|
|
3804
|
+
if (
|
|
3805
|
+
overdueAckSignature &&
|
|
3806
|
+
overdueAckSignature !== coordinationAlertState.overdueAckSignature
|
|
3807
|
+
) {
|
|
3808
|
+
recordCombinedEvent({
|
|
3809
|
+
level: "warn",
|
|
3810
|
+
message: `Overdue acknowledgements in coordination state: ${overdueAckSignature}.`,
|
|
3811
|
+
});
|
|
3812
|
+
appendCoordination({
|
|
3813
|
+
event: "coordination_ack_overdue",
|
|
3814
|
+
waves: [wave.wave],
|
|
3815
|
+
status: "warn",
|
|
3816
|
+
details: `records=${overdueAckSignature}; ack_timeout_ms=${DEFAULT_COORDINATION_ACK_TIMEOUT_MS}`,
|
|
3817
|
+
actionRequested:
|
|
3818
|
+
"Assigned owners should acknowledge, resolve, or reroute the targeted coordination items.",
|
|
3819
|
+
});
|
|
3820
|
+
}
|
|
3821
|
+
coordinationAlertState.overdueAckSignature = overdueAckSignature;
|
|
3822
|
+
const overdueClarificationSignature = (responseMetrics?.overdueClarificationIds || []).join(
|
|
3823
|
+
",",
|
|
3824
|
+
);
|
|
3825
|
+
if (
|
|
3826
|
+
overdueClarificationSignature &&
|
|
3827
|
+
overdueClarificationSignature !== coordinationAlertState.overdueClarificationSignature
|
|
3828
|
+
) {
|
|
3829
|
+
recordCombinedEvent({
|
|
3830
|
+
level: "warn",
|
|
3831
|
+
message: `Stale clarification chains remain open: ${overdueClarificationSignature}.`,
|
|
3832
|
+
});
|
|
3833
|
+
appendCoordination({
|
|
3834
|
+
event: "clarification_chain_stale",
|
|
3835
|
+
waves: [wave.wave],
|
|
3836
|
+
status: "warn",
|
|
3837
|
+
details: `clarifications=${overdueClarificationSignature}`,
|
|
3838
|
+
actionRequested:
|
|
3839
|
+
"The orchestrator should reroute, resolve, or escalate the stale clarification chain.",
|
|
3840
|
+
});
|
|
3841
|
+
}
|
|
3842
|
+
coordinationAlertState.overdueClarificationSignature = overdueClarificationSignature;
|
|
3843
|
+
};
|
|
3844
|
+
const refreshActiveCoordinationState = (attemptNumber = 0, { force = false } = {}) => {
|
|
3845
|
+
const nowMs = Date.now();
|
|
3846
|
+
if (!force && nowMs - lastLiveCoordinationRefreshAt < DEFAULT_LIVE_COORDINATION_REFRESH_MS) {
|
|
3847
|
+
return false;
|
|
3848
|
+
}
|
|
3849
|
+
refreshDerivedState(attemptNumber);
|
|
3850
|
+
lastLiveCoordinationRefreshAt = nowMs;
|
|
3851
|
+
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
3852
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3853
|
+
flushDashboards();
|
|
3854
|
+
return true;
|
|
3855
|
+
};
|
|
3436
3856
|
|
|
3437
|
-
const
|
|
3438
|
-
|
|
3439
|
-
|
|
3440
|
-
|
|
3441
|
-
|
|
3442
|
-
|
|
3443
|
-
|
|
3444
|
-
derivedState,
|
|
3445
|
-
}),
|
|
3446
|
-
)
|
|
3447
|
-
.map((run) => run.agent.agentId),
|
|
3448
|
-
);
|
|
3857
|
+
const proofRegistryForReuse = readWaveProofRegistry(lanePaths, wave.wave);
|
|
3858
|
+
const preCompletedAgentIds = selectReusablePreCompletedAgentIds(agentRuns, lanePaths, {
|
|
3859
|
+
retryOverride,
|
|
3860
|
+
wave,
|
|
3861
|
+
derivedState,
|
|
3862
|
+
proofRegistry: proofRegistryForReuse,
|
|
3863
|
+
});
|
|
3449
3864
|
for (const agentId of preCompletedAgentIds) {
|
|
3450
3865
|
setWaveDashboardAgent(dashboardState, agentId, {
|
|
3451
3866
|
state: "completed",
|
|
@@ -3468,6 +3883,7 @@ export async function runLauncherCli(argv) {
|
|
|
3468
3883
|
});
|
|
3469
3884
|
}
|
|
3470
3885
|
flushDashboards();
|
|
3886
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3471
3887
|
|
|
3472
3888
|
if (options.dashboard && currentWaveDashboardTerminalEntry) {
|
|
3473
3889
|
launchWaveDashboardSession(lanePaths, {
|
|
@@ -3477,28 +3893,162 @@ export async function runLauncherCli(argv) {
|
|
|
3477
3893
|
});
|
|
3478
3894
|
}
|
|
3479
3895
|
|
|
3896
|
+
if (options.residentOrchestrator) {
|
|
3897
|
+
const residentSetup = buildResidentOrchestratorRun({
|
|
3898
|
+
lanePaths,
|
|
3899
|
+
wave,
|
|
3900
|
+
agentRuns,
|
|
3901
|
+
derivedState,
|
|
3902
|
+
dashboardPath,
|
|
3903
|
+
runTag,
|
|
3904
|
+
options,
|
|
3905
|
+
});
|
|
3906
|
+
if (residentSetup.skipReason) {
|
|
3907
|
+
recordCombinedEvent({
|
|
3908
|
+
level: "warn",
|
|
3909
|
+
message: residentSetup.skipReason,
|
|
3910
|
+
});
|
|
3911
|
+
} else if (residentSetup.run) {
|
|
3912
|
+
residentOrchestratorRun = residentSetup.run;
|
|
3913
|
+
const launchResult = await launchAgentSession(lanePaths, {
|
|
3914
|
+
wave: wave.wave,
|
|
3915
|
+
waveDefinition: wave,
|
|
3916
|
+
agent: residentOrchestratorRun.agent,
|
|
3917
|
+
sessionName: residentOrchestratorRun.sessionName,
|
|
3918
|
+
promptPath: residentOrchestratorRun.promptPath,
|
|
3919
|
+
logPath: residentOrchestratorRun.logPath,
|
|
3920
|
+
statusPath: residentOrchestratorRun.statusPath,
|
|
3921
|
+
messageBoardPath: derivedState.messageBoardPath,
|
|
3922
|
+
messageBoardSnapshot: derivedState.messageBoardText,
|
|
3923
|
+
sharedSummaryPath: derivedState.sharedSummaryPath,
|
|
3924
|
+
sharedSummaryText: derivedState.sharedSummaryText,
|
|
3925
|
+
inboxPath: null,
|
|
3926
|
+
inboxText: "",
|
|
3927
|
+
promptOverride: residentOrchestratorRun.promptOverride,
|
|
3928
|
+
orchestratorId: options.orchestratorId,
|
|
3929
|
+
agentRateLimitRetries: options.agentRateLimitRetries,
|
|
3930
|
+
agentRateLimitBaseDelaySeconds: options.agentRateLimitBaseDelaySeconds,
|
|
3931
|
+
agentRateLimitMaxDelaySeconds: options.agentRateLimitMaxDelaySeconds,
|
|
3932
|
+
context7Enabled: options.context7Enabled,
|
|
3933
|
+
});
|
|
3934
|
+
residentOrchestratorRun.lastPromptHash = launchResult?.promptHash || null;
|
|
3935
|
+
residentOrchestratorRun.lastExecutorId =
|
|
3936
|
+
launchResult?.executorId || residentOrchestratorRun.agent.executorResolved?.id || null;
|
|
3937
|
+
recordCombinedEvent({
|
|
3938
|
+
agentId: residentOrchestratorRun.agent.agentId,
|
|
3939
|
+
message: `Resident orchestrator launched in tmux session ${residentOrchestratorRun.sessionName}`,
|
|
3940
|
+
});
|
|
3941
|
+
appendCoordination({
|
|
3942
|
+
event: "resident_orchestrator_start",
|
|
3943
|
+
waves: [wave.wave],
|
|
3944
|
+
status: "running",
|
|
3945
|
+
details: `session=${residentOrchestratorRun.sessionName}; executor=${residentOrchestratorRun.lastExecutorId || "unknown"}`,
|
|
3946
|
+
actionRequested: "None",
|
|
3947
|
+
});
|
|
3948
|
+
}
|
|
3949
|
+
}
|
|
3950
|
+
|
|
3480
3951
|
const availableRuns = agentRuns.filter((run) => !preCompletedAgentIds.has(run.agent.agentId));
|
|
3952
|
+
if (
|
|
3953
|
+
persistedRelaunchPlan &&
|
|
3954
|
+
!persistedRelaunchPlanMatchesCurrentState(
|
|
3955
|
+
agentRuns,
|
|
3956
|
+
persistedRelaunchPlan,
|
|
3957
|
+
lanePaths,
|
|
3958
|
+
wave,
|
|
3959
|
+
)
|
|
3960
|
+
) {
|
|
3961
|
+
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
3962
|
+
persistedRelaunchPlan = null;
|
|
3963
|
+
}
|
|
3481
3964
|
const persistedRuns = applyPersistedRelaunchPlan(
|
|
3482
3965
|
availableRuns,
|
|
3483
3966
|
persistedRelaunchPlan,
|
|
3484
3967
|
lanePaths,
|
|
3485
3968
|
wave,
|
|
3486
3969
|
);
|
|
3970
|
+
const overrideRuns = resolveRetryOverrideRuns(availableRuns, retryOverride, lanePaths, wave);
|
|
3971
|
+
if (overrideRuns.unknownAgentIds.length > 0) {
|
|
3972
|
+
appendCoordination({
|
|
3973
|
+
event: "retry_override_invalid",
|
|
3974
|
+
waves: [wave.wave],
|
|
3975
|
+
status: "warn",
|
|
3976
|
+
details: `unknown_agents=${overrideRuns.unknownAgentIds.join(",")}`,
|
|
3977
|
+
actionRequested:
|
|
3978
|
+
"Retry override references agent ids that do not exist in the current wave definition.",
|
|
3979
|
+
});
|
|
3980
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
3981
|
+
retryOverride = null;
|
|
3982
|
+
}
|
|
3487
3983
|
let runsToLaunch =
|
|
3488
|
-
|
|
3984
|
+
overrideRuns.unknownAgentIds.length === 0 && overrideRuns.runs.length > 0
|
|
3985
|
+
? overrideRuns.runs
|
|
3986
|
+
: persistedRuns.length > 0
|
|
3987
|
+
? persistedRuns
|
|
3988
|
+
: selectInitialWaveRuns(availableRuns, lanePaths);
|
|
3989
|
+
if (overrideRuns.runs.length > 0) {
|
|
3990
|
+
appendCoordination({
|
|
3991
|
+
event: "retry_override_applied",
|
|
3992
|
+
waves: [wave.wave],
|
|
3993
|
+
status: "running",
|
|
3994
|
+
details: `agents=${overrideRuns.selectedAgentIds.join(",")}; requested_by=${retryOverride?.requestedBy || "human-operator"}`,
|
|
3995
|
+
actionRequested: "None",
|
|
3996
|
+
});
|
|
3997
|
+
if (retryOverride?.applyOnce !== false) {
|
|
3998
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
3999
|
+
retryOverride = null;
|
|
4000
|
+
}
|
|
4001
|
+
}
|
|
3489
4002
|
let attempt = 1;
|
|
3490
|
-
|
|
4003
|
+
let traceAttempt = 1;
|
|
3491
4004
|
let completionGateSnapshot = null;
|
|
3492
4005
|
let completionTraceDir = null;
|
|
4006
|
+
const recordAttemptState = (attemptNumber, state, data = {}) =>
|
|
4007
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4008
|
+
entityType: "attempt",
|
|
4009
|
+
entityId: `wave-${wave.wave}-attempt-${attemptNumber}`,
|
|
4010
|
+
action: state,
|
|
4011
|
+
source: "launcher",
|
|
4012
|
+
actor: "launcher",
|
|
4013
|
+
data: {
|
|
4014
|
+
attemptId: `wave-${wave.wave}-attempt-${attemptNumber}`,
|
|
4015
|
+
attemptNumber,
|
|
4016
|
+
state,
|
|
4017
|
+
selectedAgentIds: data.selectedAgentIds || [],
|
|
4018
|
+
detail: data.detail || null,
|
|
4019
|
+
updatedAt: toIsoTimestamp(),
|
|
4020
|
+
...(data.createdAt ? { createdAt: data.createdAt } : {}),
|
|
4021
|
+
},
|
|
4022
|
+
});
|
|
4023
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4024
|
+
entityType: "wave_run",
|
|
4025
|
+
entityId: `wave-${wave.wave}`,
|
|
4026
|
+
action: "started",
|
|
4027
|
+
source: "launcher",
|
|
4028
|
+
actor: "launcher",
|
|
4029
|
+
data: {
|
|
4030
|
+
waveId: `wave-${wave.wave}`,
|
|
4031
|
+
waveNumber: wave.wave,
|
|
4032
|
+
agentIds: wave.agents.map((agent) => agent.agentId),
|
|
4033
|
+
runVariant: lanePaths.runVariant || "live",
|
|
4034
|
+
},
|
|
4035
|
+
});
|
|
3493
4036
|
|
|
3494
4037
|
while (attempt <= options.maxRetriesPerWave + 1) {
|
|
3495
4038
|
refreshDerivedState(attempt - 1);
|
|
4039
|
+
lastLiveCoordinationRefreshAt = Date.now();
|
|
3496
4040
|
dashboardState.attempt = attempt;
|
|
3497
4041
|
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
4042
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3498
4043
|
flushDashboards();
|
|
3499
4044
|
recordCombinedEvent({
|
|
3500
4045
|
message: `Attempt ${attempt}/${options.maxRetriesPerWave + 1}; launching agents: ${runsToLaunch.map((run) => run.agent.agentId).join(", ") || "none"}`,
|
|
3501
4046
|
});
|
|
4047
|
+
recordAttemptState(attempt, "running", {
|
|
4048
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4049
|
+
detail: `Launching ${runsToLaunch.map((run) => run.agent.agentId).join(", ") || "no"} agents.`,
|
|
4050
|
+
createdAt: toIsoTimestamp(),
|
|
4051
|
+
});
|
|
3502
4052
|
|
|
3503
4053
|
const launchedImplementationRuns = runsToLaunch.filter(
|
|
3504
4054
|
(run) =>
|
|
@@ -3589,6 +4139,23 @@ export async function runLauncherCli(argv) {
|
|
|
3589
4139
|
state: "running",
|
|
3590
4140
|
detail: "Session launched",
|
|
3591
4141
|
});
|
|
4142
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4143
|
+
entityType: "agent_run",
|
|
4144
|
+
entityId: `wave-${wave.wave}-attempt-${attempt}-agent-${runInfo.agent.agentId}`,
|
|
4145
|
+
action: "started",
|
|
4146
|
+
source: "launcher",
|
|
4147
|
+
actor: runInfo.agent.agentId,
|
|
4148
|
+
attempt,
|
|
4149
|
+
data: {
|
|
4150
|
+
agentId: runInfo.agent.agentId,
|
|
4151
|
+
attemptNumber: attempt,
|
|
4152
|
+
sessionName: runInfo.sessionName,
|
|
4153
|
+
executorId: runInfo.lastExecutorId,
|
|
4154
|
+
promptPath: path.relative(REPO_ROOT, runInfo.promptPath),
|
|
4155
|
+
statusPath: path.relative(REPO_ROOT, runInfo.statusPath),
|
|
4156
|
+
logPath: path.relative(REPO_ROOT, runInfo.logPath),
|
|
4157
|
+
},
|
|
4158
|
+
});
|
|
3592
4159
|
recordCombinedEvent({
|
|
3593
4160
|
agentId: runInfo.agent.agentId,
|
|
3594
4161
|
message: `Launched in tmux session ${runInfo.sessionName}`,
|
|
@@ -3623,7 +4190,7 @@ export async function runLauncherCli(argv) {
|
|
|
3623
4190
|
pendingAgentIds,
|
|
3624
4191
|
(event) => recordCombinedEvent(event),
|
|
3625
4192
|
);
|
|
3626
|
-
monitorWaveHumanFeedback({
|
|
4193
|
+
const feedbackChanged = monitorWaveHumanFeedback({
|
|
3627
4194
|
lanePaths,
|
|
3628
4195
|
waveNumber: wave.wave,
|
|
3629
4196
|
agentRuns: runsToLaunch,
|
|
@@ -3633,8 +4200,21 @@ export async function runLauncherCli(argv) {
|
|
|
3633
4200
|
recordCombinedEvent,
|
|
3634
4201
|
appendCoordination,
|
|
3635
4202
|
});
|
|
3636
|
-
|
|
3637
|
-
|
|
4203
|
+
const residentChanged = monitorResidentOrchestratorSession({
|
|
4204
|
+
lanePaths,
|
|
4205
|
+
run: residentOrchestratorRun,
|
|
4206
|
+
waveNumber: wave.wave,
|
|
4207
|
+
recordCombinedEvent,
|
|
4208
|
+
appendCoordination,
|
|
4209
|
+
sessionState: residentOrchestratorState,
|
|
4210
|
+
});
|
|
4211
|
+
const refreshed = refreshActiveCoordinationState(attempt, {
|
|
4212
|
+
force: feedbackChanged || residentChanged,
|
|
4213
|
+
});
|
|
4214
|
+
if (!refreshed) {
|
|
4215
|
+
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
4216
|
+
flushDashboards();
|
|
4217
|
+
}
|
|
3638
4218
|
},
|
|
3639
4219
|
);
|
|
3640
4220
|
failures = waitResult.failures;
|
|
@@ -3642,7 +4222,31 @@ export async function runLauncherCli(argv) {
|
|
|
3642
4222
|
}
|
|
3643
4223
|
|
|
3644
4224
|
materializeAgentExecutionSummaries(wave, agentRuns);
|
|
4225
|
+
for (const runInfo of runsToLaunch) {
|
|
4226
|
+
const statusRecord = readStatusRecordIfPresent(runInfo.statusPath);
|
|
4227
|
+
const action = Number(statusRecord?.code) === 0 ? "completed" : "failed";
|
|
4228
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4229
|
+
entityType: "agent_run",
|
|
4230
|
+
entityId: `wave-${wave.wave}-attempt-${attempt}-agent-${runInfo.agent.agentId}`,
|
|
4231
|
+
action,
|
|
4232
|
+
source: "launcher",
|
|
4233
|
+
actor: runInfo.agent.agentId,
|
|
4234
|
+
attempt,
|
|
4235
|
+
data: {
|
|
4236
|
+
agentId: runInfo.agent.agentId,
|
|
4237
|
+
attemptNumber: attempt,
|
|
4238
|
+
exitCode: statusRecord?.code ?? null,
|
|
4239
|
+
completedAt: statusRecord?.completedAt || null,
|
|
4240
|
+
promptHash: statusRecord?.promptHash || runInfo.lastPromptHash || null,
|
|
4241
|
+
executorId: runInfo.lastExecutorId || null,
|
|
4242
|
+
logPath: path.relative(REPO_ROOT, runInfo.logPath),
|
|
4243
|
+
statusPath: path.relative(REPO_ROOT, runInfo.statusPath),
|
|
4244
|
+
},
|
|
4245
|
+
});
|
|
4246
|
+
}
|
|
3645
4247
|
refreshDerivedState(attempt);
|
|
4248
|
+
lastLiveCoordinationRefreshAt = Date.now();
|
|
4249
|
+
emitCoordinationAlertEvents(derivedState);
|
|
3646
4250
|
failures = reconcileFailuresAgainstSharedComponentState(wave, agentRuns, failures);
|
|
3647
4251
|
for (const failure of failures) {
|
|
3648
4252
|
if (failure.statusCode === "shared-component-sibling-pending") {
|
|
@@ -4071,7 +4675,7 @@ export async function runLauncherCli(argv) {
|
|
|
4071
4675
|
lanePaths,
|
|
4072
4676
|
launcherOptions: options,
|
|
4073
4677
|
wave,
|
|
4074
|
-
attempt,
|
|
4678
|
+
attempt: traceAttempt,
|
|
4075
4679
|
manifest: buildManifest(lanePaths, [wave]),
|
|
4076
4680
|
coordinationLogPath: derivedState.coordinationLogPath,
|
|
4077
4681
|
coordinationState: derivedState.coordinationState,
|
|
@@ -4082,6 +4686,8 @@ export async function runLauncherCli(argv) {
|
|
|
4082
4686
|
securitySummary: derivedState.securitySummary,
|
|
4083
4687
|
integrationSummary: derivedState.integrationSummary,
|
|
4084
4688
|
integrationMarkdownPath: derivedState.integrationMarkdownPath,
|
|
4689
|
+
proofRegistryPath: waveProofRegistryPath(lanePaths, wave.wave),
|
|
4690
|
+
controlPlanePath: path.join(lanePaths.controlPlaneDir, `wave-${wave.wave}.jsonl`),
|
|
4085
4691
|
clarificationTriage: derivedState.clarificationTriage,
|
|
4086
4692
|
agentRuns,
|
|
4087
4693
|
structuredSignals,
|
|
@@ -4098,13 +4704,103 @@ export async function runLauncherCli(argv) {
|
|
|
4098
4704
|
summariesByAgentId,
|
|
4099
4705
|
agentRuns,
|
|
4100
4706
|
gateSnapshot,
|
|
4101
|
-
attempt,
|
|
4707
|
+
attempt: traceAttempt,
|
|
4102
4708
|
coordinationLogPath: derivedState.coordinationLogPath,
|
|
4103
4709
|
}),
|
|
4104
4710
|
});
|
|
4105
4711
|
completionTraceDir = traceDir;
|
|
4712
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4713
|
+
entityType: "gate",
|
|
4714
|
+
entityId: `wave-${wave.wave}-attempt-${attempt}-gate`,
|
|
4715
|
+
action: "evaluated",
|
|
4716
|
+
source: "launcher",
|
|
4717
|
+
actor: "launcher",
|
|
4718
|
+
attempt,
|
|
4719
|
+
data: {
|
|
4720
|
+
attemptNumber: attempt,
|
|
4721
|
+
traceDir: path.relative(REPO_ROOT, traceDir),
|
|
4722
|
+
gateSnapshot,
|
|
4723
|
+
qualitySummary: {
|
|
4724
|
+
contradictionCount: gateSnapshot?.integration?.conflictingClaims?.length || 0,
|
|
4725
|
+
finalRecommendation: derivedState.integrationSummary?.recommendation || "unknown",
|
|
4726
|
+
},
|
|
4727
|
+
},
|
|
4728
|
+
});
|
|
4729
|
+
await flushWaveControlTelemetry();
|
|
4730
|
+
|
|
4731
|
+
const sharedComponentContinuationRuns = resolveSharedComponentContinuationRuns(
|
|
4732
|
+
runsToLaunch,
|
|
4733
|
+
agentRuns,
|
|
4734
|
+
failures,
|
|
4735
|
+
derivedState,
|
|
4736
|
+
lanePaths,
|
|
4737
|
+
wave,
|
|
4738
|
+
);
|
|
4739
|
+
if (sharedComponentContinuationRuns.length > 0) {
|
|
4740
|
+
recordAttemptState(attempt, "completed", {
|
|
4741
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4742
|
+
detail: `Attempt completed; continuing with sibling owners ${sharedComponentContinuationRuns.map((run) => run.agent.agentId).join(", ")}.`,
|
|
4743
|
+
});
|
|
4744
|
+
runsToLaunch = sharedComponentContinuationRuns;
|
|
4745
|
+
const nextAgentIds = runsToLaunch.map((run) => run.agent.agentId);
|
|
4746
|
+
const nextAgentSummary = nextAgentIds.join(", ");
|
|
4747
|
+
recordCombinedEvent({
|
|
4748
|
+
message: `Shared component closure now depends on sibling owners: ${nextAgentSummary}.`,
|
|
4749
|
+
});
|
|
4750
|
+
appendCoordination({
|
|
4751
|
+
event: "wave_shared_component_continue",
|
|
4752
|
+
waves: [wave.wave],
|
|
4753
|
+
status: "running",
|
|
4754
|
+
details: `attempt=${attempt}/${options.maxRetriesPerWave + 1}; next_agents=${nextAgentSummary}`,
|
|
4755
|
+
actionRequested: `Lane ${lanePaths.lane} owners should let the remaining shared-component owners finish their proof before further retries.`,
|
|
4756
|
+
});
|
|
4757
|
+
for (const run of runsToLaunch) {
|
|
4758
|
+
setWaveDashboardAgent(dashboardState, run.agent.agentId, {
|
|
4759
|
+
state: "pending",
|
|
4760
|
+
detail: "Queued for shared component closure",
|
|
4761
|
+
});
|
|
4762
|
+
}
|
|
4763
|
+
writeWaveRelaunchPlan(lanePaths, wave.wave, {
|
|
4764
|
+
wave: wave.wave,
|
|
4765
|
+
attempt,
|
|
4766
|
+
phase: derivedState?.ledger?.phase || null,
|
|
4767
|
+
selectedAgentIds: nextAgentIds,
|
|
4768
|
+
reasonBuckets: relaunchReasonBuckets(runsToLaunch, failures, derivedState),
|
|
4769
|
+
executorStates: Object.fromEntries(
|
|
4770
|
+
runsToLaunch.map((run) => [run.agent.agentId, run.agent.executorResolved || null]),
|
|
4771
|
+
),
|
|
4772
|
+
fallbackHistory: Object.fromEntries(
|
|
4773
|
+
runsToLaunch.map((run) => [
|
|
4774
|
+
run.agent.agentId,
|
|
4775
|
+
run.agent.executorResolved?.executorHistory || [],
|
|
4776
|
+
]),
|
|
4777
|
+
),
|
|
4778
|
+
createdAt: toIsoTimestamp(),
|
|
4779
|
+
});
|
|
4780
|
+
flushDashboards();
|
|
4781
|
+
traceAttempt += 1;
|
|
4782
|
+
continue;
|
|
4783
|
+
}
|
|
4106
4784
|
|
|
4107
4785
|
if (failures.length === 0) {
|
|
4786
|
+
recordAttemptState(attempt, "completed", {
|
|
4787
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4788
|
+
detail: "Wave gates passed for this attempt.",
|
|
4789
|
+
});
|
|
4790
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4791
|
+
entityType: "wave_run",
|
|
4792
|
+
entityId: `wave-${wave.wave}`,
|
|
4793
|
+
action: "completed",
|
|
4794
|
+
source: "launcher",
|
|
4795
|
+
actor: "launcher",
|
|
4796
|
+
data: {
|
|
4797
|
+
waveId: `wave-${wave.wave}`,
|
|
4798
|
+
waveNumber: wave.wave,
|
|
4799
|
+
attempts: attempt,
|
|
4800
|
+
traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
|
|
4801
|
+
gateSnapshot: completionGateSnapshot,
|
|
4802
|
+
},
|
|
4803
|
+
});
|
|
4108
4804
|
dashboardState.status = "completed";
|
|
4109
4805
|
recordCombinedEvent({ message: `Wave ${wave.wave} completed successfully.` });
|
|
4110
4806
|
refreshWaveDashboardAgentStates(dashboardState, agentRuns, new Set(), (event) =>
|
|
@@ -4112,10 +4808,36 @@ export async function runLauncherCli(argv) {
|
|
|
4112
4808
|
);
|
|
4113
4809
|
updateWaveDashboardMessageBoard(dashboardState, messageBoardPath);
|
|
4114
4810
|
flushDashboards();
|
|
4811
|
+
await flushWaveControlTelemetry();
|
|
4115
4812
|
break;
|
|
4116
4813
|
}
|
|
4117
4814
|
|
|
4118
4815
|
if (attempt >= options.maxRetriesPerWave + 1) {
|
|
4816
|
+
recordAttemptState(attempt, "failed", {
|
|
4817
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4818
|
+
detail: failures
|
|
4819
|
+
.map((failure) => `${failure.agentId || "wave"}:${failure.statusCode}`)
|
|
4820
|
+
.join(", "),
|
|
4821
|
+
});
|
|
4822
|
+
appendWaveControlEvent(lanePaths, wave.wave, {
|
|
4823
|
+
entityType: "wave_run",
|
|
4824
|
+
entityId: `wave-${wave.wave}`,
|
|
4825
|
+
action: "failed",
|
|
4826
|
+
source: "launcher",
|
|
4827
|
+
actor: "launcher",
|
|
4828
|
+
data: {
|
|
4829
|
+
waveId: `wave-${wave.wave}`,
|
|
4830
|
+
waveNumber: wave.wave,
|
|
4831
|
+
attempts: attempt,
|
|
4832
|
+
traceDir: completionTraceDir ? path.relative(REPO_ROOT, completionTraceDir) : null,
|
|
4833
|
+
gateSnapshot: completionGateSnapshot,
|
|
4834
|
+
failures: failures.map((failure) => ({
|
|
4835
|
+
agentId: failure.agentId || null,
|
|
4836
|
+
statusCode: failure.statusCode,
|
|
4837
|
+
detail: failure.detail || null,
|
|
4838
|
+
})),
|
|
4839
|
+
},
|
|
4840
|
+
});
|
|
4119
4841
|
dashboardState.status = timedOut ? "timed_out" : "failed";
|
|
4120
4842
|
for (const failure of failures) {
|
|
4121
4843
|
setWaveDashboardAgent(dashboardState, failure.agentId, {
|
|
@@ -4133,6 +4855,7 @@ export async function runLauncherCli(argv) {
|
|
|
4133
4855
|
const error = new Error(
|
|
4134
4856
|
`Wave ${wave.wave} failed after ${attempt} attempt(s):\n${details}`,
|
|
4135
4857
|
);
|
|
4858
|
+
await flushWaveControlTelemetry();
|
|
4136
4859
|
if (
|
|
4137
4860
|
failures.every(
|
|
4138
4861
|
(failure) =>
|
|
@@ -4147,6 +4870,12 @@ export async function runLauncherCli(argv) {
|
|
|
4147
4870
|
|
|
4148
4871
|
const failedAgentIds = new Set(failures.map((failure) => failure.agentId));
|
|
4149
4872
|
const failedList = Array.from(failedAgentIds).join(", ");
|
|
4873
|
+
recordAttemptState(attempt, "failed", {
|
|
4874
|
+
selectedAgentIds: runsToLaunch.map((run) => run.agent.agentId),
|
|
4875
|
+
detail: failures
|
|
4876
|
+
.map((failure) => `${failure.agentId || "wave"}:${failure.statusCode}`)
|
|
4877
|
+
.join(", "),
|
|
4878
|
+
});
|
|
4150
4879
|
console.warn(
|
|
4151
4880
|
`[retry] Wave ${wave.wave} had failures for agents: ${failedList}. Evaluating safe relaunch targets.`,
|
|
4152
4881
|
);
|
|
@@ -4164,7 +4893,38 @@ export async function runLauncherCli(argv) {
|
|
|
4164
4893
|
lanePaths,
|
|
4165
4894
|
wave,
|
|
4166
4895
|
);
|
|
4167
|
-
|
|
4896
|
+
retryOverride = readWaveRetryOverride(lanePaths, wave.wave);
|
|
4897
|
+
const overrideResolution = resolveRetryOverrideRuns(
|
|
4898
|
+
agentRuns,
|
|
4899
|
+
retryOverride,
|
|
4900
|
+
lanePaths,
|
|
4901
|
+
wave,
|
|
4902
|
+
);
|
|
4903
|
+
if (overrideResolution.unknownAgentIds.length > 0) {
|
|
4904
|
+
appendCoordination({
|
|
4905
|
+
event: "retry_override_invalid",
|
|
4906
|
+
waves: [wave.wave],
|
|
4907
|
+
status: "warn",
|
|
4908
|
+
details: `unknown_agents=${overrideResolution.unknownAgentIds.join(",")}`,
|
|
4909
|
+
actionRequested:
|
|
4910
|
+
"Retry override references agent ids that do not exist in the current wave definition.",
|
|
4911
|
+
});
|
|
4912
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
4913
|
+
retryOverride = null;
|
|
4914
|
+
} else if (overrideResolution.runs.length > 0) {
|
|
4915
|
+
runsToLaunch = overrideResolution.runs;
|
|
4916
|
+
appendCoordination({
|
|
4917
|
+
event: "retry_override_applied",
|
|
4918
|
+
waves: [wave.wave],
|
|
4919
|
+
status: "running",
|
|
4920
|
+
details: `agents=${overrideResolution.selectedAgentIds.join(",")}; requested_by=${retryOverride?.requestedBy || "human-operator"}`,
|
|
4921
|
+
actionRequested: "None",
|
|
4922
|
+
});
|
|
4923
|
+
if (retryOverride?.applyOnce !== false) {
|
|
4924
|
+
clearWaveRetryOverride(lanePaths, wave.wave);
|
|
4925
|
+
retryOverride = null;
|
|
4926
|
+
}
|
|
4927
|
+
} else if (relaunchResolution.barrier) {
|
|
4168
4928
|
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
4169
4929
|
for (const failure of relaunchResolution.barrier.failures) {
|
|
4170
4930
|
recordCombinedEvent({
|
|
@@ -4190,8 +4950,9 @@ export async function runLauncherCli(argv) {
|
|
|
4190
4950
|
);
|
|
4191
4951
|
error.exitCode = 43;
|
|
4192
4952
|
throw error;
|
|
4953
|
+
} else {
|
|
4954
|
+
runsToLaunch = relaunchResolution.runs;
|
|
4193
4955
|
}
|
|
4194
|
-
runsToLaunch = relaunchResolution.runs;
|
|
4195
4956
|
if (runsToLaunch.length === 0) {
|
|
4196
4957
|
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
4197
4958
|
const error = new Error(
|
|
@@ -4225,6 +4986,7 @@ export async function runLauncherCli(argv) {
|
|
|
4225
4986
|
});
|
|
4226
4987
|
flushDashboards();
|
|
4227
4988
|
attempt += 1;
|
|
4989
|
+
traceAttempt += 1;
|
|
4228
4990
|
}
|
|
4229
4991
|
|
|
4230
4992
|
clearWaveRelaunchPlan(lanePaths, wave.wave);
|
|
@@ -4252,6 +5014,9 @@ export async function runLauncherCli(argv) {
|
|
|
4252
5014
|
details: `attempts_used=${dashboardState?.attempt ?? "n/a"}; completed_waves=${runState.completedWaves.join(", ") || "none"}`,
|
|
4253
5015
|
});
|
|
4254
5016
|
} finally {
|
|
5017
|
+
if (residentOrchestratorRun) {
|
|
5018
|
+
killTmuxSessionIfExists(lanePaths.tmuxSocketName, residentOrchestratorRun.sessionName);
|
|
5019
|
+
}
|
|
4255
5020
|
if (terminalsAppended && !options.keepTerminals) {
|
|
4256
5021
|
removeTerminalEntries(lanePaths.terminalsPath, terminalEntries);
|
|
4257
5022
|
}
|