@bridge_gpt/mcp-server 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -15
- package/build/agent-config-credential-migration.js +272 -0
- package/build/agents.generated.js +1 -1
- package/build/chain-orchestrator.js +16 -1
- package/build/commands.generated.js +9 -7
- package/build/conductor/bridge-api-client.js +625 -0
- package/build/conductor/claude-hook.js +251 -0
- package/build/conductor/cli.js +1048 -0
- package/build/conductor/data-normalization.js +114 -0
- package/build/conductor/doctor.js +164 -0
- package/build/conductor/done-gate.js +325 -0
- package/build/conductor/epic-reconcile.js +139 -0
- package/build/conductor/epic-runtime.js +611 -0
- package/build/conductor/epic-state.js +125 -0
- package/build/conductor/errors.js +85 -0
- package/build/conductor/git-ci-types.js +129 -0
- package/build/conductor/git-hooks.js +218 -0
- package/build/conductor/git-inspection.js +185 -0
- package/build/conductor/git-producer.js +137 -0
- package/build/conductor/merge-ledger.js +198 -0
- package/build/conductor/paths.js +224 -0
- package/build/conductor/plan.js +77 -0
- package/build/conductor/pr-ci-producer.js +427 -0
- package/build/conductor/pr-discovery.js +135 -0
- package/build/conductor/producer-ledger.js +125 -0
- package/build/conductor/redaction.js +112 -0
- package/build/conductor/store.js +1156 -0
- package/build/conductor/supervisor-config.js +150 -0
- package/build/conductor/supervisor-escalation.js +244 -0
- package/build/conductor/supervisor-judgment-python.js +141 -0
- package/build/conductor/supervisor-judgment.js +215 -0
- package/build/conductor/supervisor-ledger.js +119 -0
- package/build/conductor/supervisor-merge.js +127 -0
- package/build/conductor/supervisor-message-relay.js +61 -0
- package/build/conductor/supervisor-notification.js +39 -0
- package/build/conductor/supervisor-runtime.js +351 -0
- package/build/conductor/supervisor-state.js +572 -0
- package/build/conductor/supervisor-types.js +16 -0
- package/build/conductor/taxonomy.js +58 -0
- package/build/conductor/tools.js +367 -0
- package/build/conductor/types.js +9 -0
- package/build/conductor-bin.js +21 -0
- package/build/conductor-claude-hook-bin.js +21 -0
- package/build/credential-store.js +175 -4
- package/build/credentials-cli.js +223 -0
- package/build/decision-page-schema.js +60 -0
- package/build/decision-page-template.js +262 -10
- package/build/doctor.js +5 -1
- package/build/index.js +468 -59
- package/build/pipeline-orchestrator.js +5 -1
- package/build/pipeline-utils.js +45 -5
- package/build/pipelines.generated.js +37 -9
- package/build/readme.generated.js +1 -1
- package/build/review-tickets.js +596 -0
- package/build/scheduled-prompt.js +16 -10
- package/build/start-tickets-conductor.js +496 -0
- package/build/start-tickets-prereqs.js +32 -23
- package/build/start-tickets-repo.js +49 -0
- package/build/start-tickets.js +682 -81
- package/build/version.generated.js +1 -1
- package/design-assets/favicon/android-chrome-192x192.png +0 -0
- package/design-assets/favicon/android-chrome-512x512.png +0 -0
- package/design-assets/favicon/apple-touch-icon.png +0 -0
- package/design-assets/favicon/favicon-16x16.png +0 -0
- package/design-assets/favicon/favicon-32x32.png +0 -0
- package/design-assets/favicon/favicon.ico +0 -0
- package/design-assets/favicon/site.webmanifest +1 -0
- package/design-assets/just-logo-rough-draft.png +0 -0
- package/package.json +17 -5
- package/pipelines/idea-to-ticket.json +5 -0
- package/pipelines/plan-epic.json +16 -1
- package/pipelines/review-ticket.json +2 -1
- package/public/css/main.min.css +2 -0
- package/public/css/main.min.css.map +1 -0
- package/public/fonts/OFL.txt +93 -0
- package/public/fonts/SourceSansPro-Black.ttf +0 -0
- package/public/fonts/SourceSansPro-BlackItalic.ttf +0 -0
- package/public/fonts/SourceSansPro-Bold.ttf +0 -0
- package/public/fonts/SourceSansPro-BoldItalic.ttf +0 -0
- package/public/fonts/SourceSansPro-ExtraLight.ttf +0 -0
- package/public/fonts/SourceSansPro-ExtraLightItalic.ttf +0 -0
- package/public/fonts/SourceSansPro-Italic.ttf +0 -0
- package/public/fonts/SourceSansPro-Light.ttf +0 -0
- package/public/fonts/SourceSansPro-LightItalic.ttf +0 -0
- package/public/fonts/SourceSansPro-Regular.ttf +0 -0
- package/public/fonts/SourceSansPro-SemiBold.ttf +0 -0
- package/public/fonts/SourceSansPro-SemiBoldItalic.ttf +0 -0
- package/public/img/bridge-logo-160x51.webp +0 -0
- package/public/img/bridge-logo-300x92.webp +0 -0
- package/public/img/favicon/android-chrome-192x192.png +0 -0
- package/public/img/favicon/android-chrome-512x512.png +0 -0
- package/public/img/favicon/apple-touch-icon.png +0 -0
- package/public/img/favicon/favicon-16x16.png +0 -0
- package/public/img/favicon/favicon-32x32.png +0 -0
- package/public/img/favicon/favicon.ico +0 -0
- package/public/img/favicon/site.webmanifest +1 -0
- package/public/img/installation/bitbucket/app-password-1.png +0 -0
- package/public/img/installation/bitbucket/app-password-2.png +0 -0
- package/public/img/installation/bitbucket/create-token-1.png +0 -0
- package/public/img/installation/bitbucket/create-token-2.png +0 -0
- package/public/img/installation/bitbucket/webhook-1.png +0 -0
- package/public/img/installation/github/github-review-webhook.png +0 -0
- package/public/img/installation/jira/credentials/api-key.png +0 -0
- package/public/img/installation/jira/webhook/create-rule.png +0 -0
- package/public/img/installation/jira/webhook/project-settings.png +0 -0
- package/public/img/installation/jira/webhook/rule-create-1.png +0 -0
- package/public/img/installation/jira/webhook/rule-create-2.png +0 -0
- package/public/img/installation/jira/webhook/rule-create-3.png +0 -0
- package/public/img/installation/pinecone/pinecone-api-key.png +0 -0
- package/public/img/installation/pinecone/pinecone-index.png +0 -0
- package/public/js/main.min.js +2 -0
- package/public/js/main.min.js.map +1 -0
- package/smoke-test/SMOKE-TEST.md +16 -8
|
@@ -0,0 +1,351 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Foreground supervisor runtime loop (BAPI-396, conductor C4).
|
|
3
|
+
*
|
|
4
|
+
* `conductor supervise --run-id <id>` runs this run-scoped, visible, ephemeral
|
|
5
|
+
* loop. It polls the conductor ledger, reduces raw events into deterministic
|
|
6
|
+
* worker watchdog state, persists an operational projection, escalates stuck
|
|
7
|
+
* workers (terminal output + idempotent ledger event), and terminates when all
|
|
8
|
+
* workers are terminal or the global timeout elapses.
|
|
9
|
+
*
|
|
10
|
+
* ## STATE PRECEDENCE (source-of-truth ordering) ##
|
|
11
|
+
* 1. Raw ledger events — the ONLY source of operational truth.
|
|
12
|
+
* 2. supervisor_projection — the resumable operational mirror (this loop owns
|
|
13
|
+
* writes; crash/restart re-hydrates from it).
|
|
14
|
+
* 3. supervisor.assessment events — AUDIT ONLY; never override raw truth.
|
|
15
|
+
* 4. LLM judgment context — DISPOSABLE; advisory text only, never persisted as
|
|
16
|
+
* truth and never able to execute an action.
|
|
17
|
+
*
|
|
18
|
+
* The deterministic loop owns correctness. The LLM is consulted only to classify
|
|
19
|
+
* AMBIGUOUS stalls and draft escalation text, within a per-run budget, and
|
|
20
|
+
* degrades to deterministic-only behavior whenever it is disabled, exhausted, or
|
|
21
|
+
* fails.
|
|
22
|
+
*/
|
|
23
|
+
import { waitForConductorEvent, getSupervisorSnapshot, upsertSupervisorProjection, sendWorkerMessage, } from "./store.js";
|
|
24
|
+
import { emitSupervisorAssessmentIfNew } from "./supervisor-ledger.js";
|
|
25
|
+
import { sendSupervisorEscalationWorkerMessageIfNew } from "./supervisor-message-relay.js";
|
|
26
|
+
import { resolveSupervisorConfig } from "./supervisor-config.js";
|
|
27
|
+
import { hydrateSupervisorRunStateFromSnapshot, applyConductorEventToSupervisorState, applySupervisorHousekeeping, isSupervisorRunTerminal, hasSupervisorGlobalTimeoutElapsed, toSupervisorProjectionInput, } from "./supervisor-state.js";
|
|
28
|
+
import { findSupervisorEscalationCandidates, shouldEmitEscalation, recordEscalationResult, formatEscalationForTerminal, } from "./supervisor-escalation.js";
|
|
29
|
+
import { assessSupervisorCandidate } from "./supervisor-judgment.js";
|
|
30
|
+
import { makeSupervisorIdempotencyKey } from "./supervisor-ledger.js";
|
|
31
|
+
import { createDefaultSupervisorJudgmentClient } from "./supervisor-judgment-python.js";
|
|
32
|
+
import { resolveConductorBridgeApiAccess, } from "./bridge-api-client.js";
|
|
33
|
+
import { extractMergeActionIdentityFromGateEvent } from "./merge-ledger.js";
|
|
34
|
+
import { processGateMetMerge } from "./supervisor-merge.js";
|
|
35
|
+
import { dispatchSupervisorNotification } from "./supervisor-notification.js";
|
|
36
|
+
import { ConductorValidationError } from "./errors.js";
|
|
37
|
+
/** Compact, secret-free worker snapshot for the judgment request. */
|
|
38
|
+
function compactWorkerForJudgment(worker) {
|
|
39
|
+
if (!worker)
|
|
40
|
+
return null;
|
|
41
|
+
return {
|
|
42
|
+
worker_id: worker.worker_id,
|
|
43
|
+
ticket_key: worker.ticket_key,
|
|
44
|
+
state: worker.state,
|
|
45
|
+
liveness: worker.liveness,
|
|
46
|
+
last_event_time: worker.last_event_time,
|
|
47
|
+
last_progress_time: worker.last_progress_time,
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
/** Deterministic (degraded) assessment for an unambiguous candidate. */
|
|
51
|
+
function deterministicAssessment(candidate) {
|
|
52
|
+
return {
|
|
53
|
+
classification: "stuck",
|
|
54
|
+
confidence: 1,
|
|
55
|
+
should_escalate: true,
|
|
56
|
+
reason: candidate.reason,
|
|
57
|
+
draft_escalation_text: null,
|
|
58
|
+
source: "degraded",
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
/** Resolve the terminal run status from worker outcomes. */
|
|
62
|
+
function terminalStatus(state) {
|
|
63
|
+
const anyFailed = Object.values(state.workers).some((w) => w.state === "failed");
|
|
64
|
+
return anyFailed ? "failed" : "complete";
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Process the escalation pipeline for the current state: find candidates, assess
|
|
68
|
+
* ambiguous ones (within budget), emit idempotent assessment events, print
|
|
69
|
+
* terminal lines for newly-emitted escalations, and record outcomes. Returns
|
|
70
|
+
* nothing; it mutates `state`.
|
|
71
|
+
*/
|
|
72
|
+
async function processEscalations(state, config, client, deps) {
|
|
73
|
+
const now = deps.now();
|
|
74
|
+
const candidates = findSupervisorEscalationCandidates(state, config, now);
|
|
75
|
+
for (const candidate of candidates) {
|
|
76
|
+
const decision = shouldEmitEscalation(state, candidate, config, now);
|
|
77
|
+
if (!decision.emit)
|
|
78
|
+
continue;
|
|
79
|
+
// Compute the idempotency key up front so EVERY decision in this window —
|
|
80
|
+
// including an LLM "do not escalate" — can be recorded under it.
|
|
81
|
+
const idempotency = {
|
|
82
|
+
run_id: state.run_id,
|
|
83
|
+
worker_id: candidate.worker_id,
|
|
84
|
+
reason: candidate.reason,
|
|
85
|
+
kind: candidate.kind,
|
|
86
|
+
cooldown_window: decision.cooldown_window,
|
|
87
|
+
};
|
|
88
|
+
const idempotencyKey = makeSupervisorIdempotencyKey(idempotency);
|
|
89
|
+
let assessment;
|
|
90
|
+
if (candidate.ambiguous) {
|
|
91
|
+
assessment = await assessSupervisorCandidate({
|
|
92
|
+
run_id: state.run_id,
|
|
93
|
+
candidate,
|
|
94
|
+
worker: candidate.worker_id ? compactWorkerForJudgment(state.workers[candidate.worker_id]) : null,
|
|
95
|
+
}, config, state.llm_budget, client);
|
|
96
|
+
}
|
|
97
|
+
else {
|
|
98
|
+
assessment = deterministicAssessment(candidate);
|
|
99
|
+
}
|
|
100
|
+
state.latest_assessment = assessment;
|
|
101
|
+
if (!assessment.should_escalate) {
|
|
102
|
+
// The judgment declined to escalate. Record a `suppressed` decision for
|
|
103
|
+
// this cooldown window so the candidate is NOT re-judged every wake
|
|
104
|
+
// interval — without this, a persistently-ambiguous worker would re-query
|
|
105
|
+
// the LLM each tick, exhaust the per-run budget, and then escalate anyway
|
|
106
|
+
// once the degraded fallback kicks in. The decision stands until the
|
|
107
|
+
// window rolls over.
|
|
108
|
+
recordEscalationResult(state, candidate, decision.cooldown_window, idempotencyKey, "suppressed", now);
|
|
109
|
+
continue;
|
|
110
|
+
}
|
|
111
|
+
let outcome = "skipped";
|
|
112
|
+
try {
|
|
113
|
+
const result = deps.emitAssessment({
|
|
114
|
+
run_id: state.run_id,
|
|
115
|
+
worker_id: candidate.worker_id,
|
|
116
|
+
assessment,
|
|
117
|
+
details: { elapsed_ms: candidate.elapsed_ms, ...candidate.context },
|
|
118
|
+
idempotency,
|
|
119
|
+
});
|
|
120
|
+
outcome = result.emitted ? "emitted" : "duplicate";
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
// Emission is best-effort; record as skipped and keep supervising.
|
|
124
|
+
outcome = "skipped";
|
|
125
|
+
}
|
|
126
|
+
recordEscalationResult(state, candidate, decision.cooldown_window, idempotencyKey, outcome, now);
|
|
127
|
+
// BAPI-397: pass the escalation to the worker through the cooperative relay.
|
|
128
|
+
// Enqueue for BOTH `emitted` and `duplicate` outcomes so a crash between
|
|
129
|
+
// assessment emission and message enqueue self-heals on restart — the store's
|
|
130
|
+
// idempotency key (`cause_seq = last_seq`) + per-type cooldown prevent a
|
|
131
|
+
// second delivery. Enqueue only when the candidate is worker-scoped (a
|
|
132
|
+
// run-level `global_timeout` has no worker to message). This is best-effort:
|
|
133
|
+
// a relay failure must never stop supervising, and message payloads/draft
|
|
134
|
+
// text are never logged on failure.
|
|
135
|
+
if ((outcome === "emitted" || outcome === "duplicate") && candidate.worker_id) {
|
|
136
|
+
try {
|
|
137
|
+
sendSupervisorEscalationWorkerMessageIfNew(candidate, assessment, state, {
|
|
138
|
+
sendMessage: deps.sendWorkerMessage,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
catch {
|
|
142
|
+
/* best-effort relay; keep supervising */
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Only print on a genuinely NEW escalation — never re-spam a duplicate after
|
|
146
|
+
// a crash/restart within the same cooldown window.
|
|
147
|
+
if (outcome === "emitted") {
|
|
148
|
+
deps.log(formatEscalationForTerminal(state.run_id, candidate, assessment.draft_escalation_text));
|
|
149
|
+
// S7: dispatch an out-of-band human notification for newly emitted escalations.
|
|
150
|
+
// Best-effort: a failure must never stop supervising.
|
|
151
|
+
if (deps.dispatchNotification) {
|
|
152
|
+
try {
|
|
153
|
+
await deps.dispatchNotification(state.run_id, candidate, assessment, idempotencyKey);
|
|
154
|
+
}
|
|
155
|
+
catch {
|
|
156
|
+
/* best-effort notification; keep supervising */
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Run the foreground supervisor loop for exactly one run. Returns a
|
|
164
|
+
* {@link SupervisorRunResult} with a process exit code. Validation failures and
|
|
165
|
+
* unexpected boundary exceptions are sanitized; on an unexpected error the loop
|
|
166
|
+
* attempts to persist a `failed` projection before returning a non-zero result.
|
|
167
|
+
*/
|
|
168
|
+
export async function runSupervisor(options, deps = {}) {
|
|
169
|
+
const runId = typeof options.run_id === "string" ? options.run_id.trim() : "";
|
|
170
|
+
if (runId.length === 0) {
|
|
171
|
+
throw new ConductorValidationError("Supervisor requires exactly one non-empty --run-id.");
|
|
172
|
+
}
|
|
173
|
+
const config = options.config ?? resolveSupervisorConfig(options.overrides ?? {});
|
|
174
|
+
const now = deps.now ?? (() => Date.now());
|
|
175
|
+
const log = deps.log ?? ((m) => process.stdout.write(`${m}\n`));
|
|
176
|
+
const errorLog = deps.errorLog ?? ((m) => process.stderr.write(`${m}\n`));
|
|
177
|
+
const waitForEvent = deps.waitForEvent ?? waitForConductorEvent;
|
|
178
|
+
const getSnapshot = deps.getSnapshot ?? getSupervisorSnapshot;
|
|
179
|
+
const upsertProjection = deps.upsertProjection ?? upsertSupervisorProjection;
|
|
180
|
+
const emitAssessment = deps.emitAssessment ?? emitSupervisorAssessmentIfNew;
|
|
181
|
+
const sendMessage = deps.sendWorkerMessage ?? sendWorkerMessage;
|
|
182
|
+
const judgmentClient = deps.judgmentClient ?? createDefaultSupervisorJudgmentClient(config);
|
|
183
|
+
const resolveBridgeAccess = deps.resolveBridgeAccess ?? (() => resolveConductorBridgeApiAccess());
|
|
184
|
+
const processMerge = deps.processMerge ?? processGateMetMerge;
|
|
185
|
+
const dispatchNotification = deps.dispatchNotification ?? dispatchSupervisorNotification;
|
|
186
|
+
// Bridge API access for the C6 merge pipeline is resolved at most once, lazily on
|
|
187
|
+
// the first eligible gate.met event. Unavailable credentials/repo identity
|
|
188
|
+
// disable autonomous merge gracefully — no local VCS operation, no loop crash.
|
|
189
|
+
let bridgeAccess = null;
|
|
190
|
+
let bridgeAccessResolved = false;
|
|
191
|
+
const processGateMetMerges = async (events) => {
|
|
192
|
+
const eligible = events.filter((e) => extractMergeActionIdentityFromGateEvent(e) !== null);
|
|
193
|
+
if (eligible.length === 0)
|
|
194
|
+
return;
|
|
195
|
+
if (!bridgeAccessResolved) {
|
|
196
|
+
bridgeAccessResolved = true;
|
|
197
|
+
try {
|
|
198
|
+
const result = await resolveBridgeAccess();
|
|
199
|
+
bridgeAccess = result.ok ? result.access : null;
|
|
200
|
+
if (!result.ok) {
|
|
201
|
+
errorLog(`[supervisor] run=${runId} auto-merge disabled: ${result.kind}`);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
catch {
|
|
205
|
+
bridgeAccess = null;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (!bridgeAccess)
|
|
209
|
+
return;
|
|
210
|
+
for (const event of eligible) {
|
|
211
|
+
try {
|
|
212
|
+
await processMerge(bridgeAccess, event);
|
|
213
|
+
}
|
|
214
|
+
catch {
|
|
215
|
+
// A merge-processing failure must never crash the supervisor loop.
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
};
|
|
219
|
+
const persist = (state) => {
|
|
220
|
+
try {
|
|
221
|
+
upsertProjection(toSupervisorProjectionInput(state));
|
|
222
|
+
}
|
|
223
|
+
catch {
|
|
224
|
+
// Persistence is best-effort within an iteration; the next iteration retries.
|
|
225
|
+
}
|
|
226
|
+
};
|
|
227
|
+
let state;
|
|
228
|
+
try {
|
|
229
|
+
const snapshot = getSnapshot(runId);
|
|
230
|
+
state = hydrateSupervisorRunStateFromSnapshot(snapshot, runId, config, now());
|
|
231
|
+
}
|
|
232
|
+
catch {
|
|
233
|
+
// A snapshot read failure is non-fatal — start from empty state.
|
|
234
|
+
return {
|
|
235
|
+
run_id: runId,
|
|
236
|
+
status: "failed",
|
|
237
|
+
exit_code: 2,
|
|
238
|
+
reason: "supervisor failed to load initial snapshot",
|
|
239
|
+
last_seq: 0,
|
|
240
|
+
worker_count: 0,
|
|
241
|
+
};
|
|
242
|
+
}
|
|
243
|
+
// Resume from last_seq+1 when a projection was hydrated; else from 1.
|
|
244
|
+
let sinceSeq = state.last_seq > 0 ? state.last_seq + 1 : 1;
|
|
245
|
+
// Defensive iteration cap so a frozen clock can never spin forever; the global
|
|
246
|
+
// timeout is the real bound under a live clock.
|
|
247
|
+
const iterationCap = Math.max(1000, Math.ceil(config.global_timeout_ms / Math.max(1, config.wake_interval_ms)) + 16);
|
|
248
|
+
try {
|
|
249
|
+
// Refresh liveness from the hydrated state BEFORE the first terminal check so
|
|
250
|
+
// a resumed, already-terminal run (crash/restart) can exit promptly without
|
|
251
|
+
// blocking on a full wake interval first.
|
|
252
|
+
applySupervisorHousekeeping(state, config, now());
|
|
253
|
+
for (let iteration = 0; iteration < iterationCap; iteration += 1) {
|
|
254
|
+
// Terminal / global-timeout checks run at the TOP of the loop — BEFORE the
|
|
255
|
+
// blocking wait — so a run that is already (or has just become) terminal
|
|
256
|
+
// exits immediately rather than waiting one wake interval for events that
|
|
257
|
+
// will never arrive. The blocking wait below only happens while the run is
|
|
258
|
+
// still in progress.
|
|
259
|
+
if (isSupervisorRunTerminal(state)) {
|
|
260
|
+
state.status = terminalStatus(state);
|
|
261
|
+
persist(state);
|
|
262
|
+
log(`[supervisor] run=${runId} complete status=${state.status} workers=${Object.keys(state.workers).length}`);
|
|
263
|
+
return {
|
|
264
|
+
run_id: runId,
|
|
265
|
+
status: state.status,
|
|
266
|
+
exit_code: 0,
|
|
267
|
+
reason: "all workers terminal",
|
|
268
|
+
last_seq: state.last_seq,
|
|
269
|
+
worker_count: Object.keys(state.workers).length,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
if (hasSupervisorGlobalTimeoutElapsed(state, now())) {
|
|
273
|
+
state.status = "timed_out";
|
|
274
|
+
persist(state);
|
|
275
|
+
errorLog(`[supervisor] run=${runId} global timeout elapsed`);
|
|
276
|
+
return {
|
|
277
|
+
run_id: runId,
|
|
278
|
+
status: "timed_out",
|
|
279
|
+
exit_code: 2,
|
|
280
|
+
reason: "global timeout elapsed",
|
|
281
|
+
last_seq: state.last_seq,
|
|
282
|
+
worker_count: Object.keys(state.workers).length,
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
const waitResult = await waitForEvent({
|
|
286
|
+
since_seq: sinceSeq,
|
|
287
|
+
filter: { run_id: runId },
|
|
288
|
+
data_mode: "full",
|
|
289
|
+
timeout_ms: config.wake_interval_ms,
|
|
290
|
+
limit: config.poll_limit,
|
|
291
|
+
});
|
|
292
|
+
if (waitResult.events.length > 0) {
|
|
293
|
+
const sorted = [...waitResult.events].sort((a, b) => a.seq - b.seq);
|
|
294
|
+
for (const event of sorted) {
|
|
295
|
+
applyConductorEventToSupervisorState(state, event, now());
|
|
296
|
+
}
|
|
297
|
+
sinceSeq = sorted[sorted.length - 1].seq + 1;
|
|
298
|
+
// C6: after reducing the batch into state, process any worker-scoped
|
|
299
|
+
// gate.met events through the API-owned merge pipeline. Eligibility,
|
|
300
|
+
// idempotency, and the dry-run/enabled decision are all enforced
|
|
301
|
+
// downstream; this loop only dispatches and never merges locally.
|
|
302
|
+
await processGateMetMerges(sorted);
|
|
303
|
+
}
|
|
304
|
+
// Housekeeping (liveness + state-specific stalls) runs every iteration so a
|
|
305
|
+
// timeout with no events still advances liveness/stall state.
|
|
306
|
+
applySupervisorHousekeeping(state, config, now());
|
|
307
|
+
await processEscalations(state, config, judgmentClient, {
|
|
308
|
+
emitAssessment,
|
|
309
|
+
sendWorkerMessage: sendMessage,
|
|
310
|
+
log,
|
|
311
|
+
now,
|
|
312
|
+
dispatchNotification,
|
|
313
|
+
});
|
|
314
|
+
// Persist after the batch AND after housekeeping/escalation, because state
|
|
315
|
+
// can change even when no events arrived. The terminal/timeout exit is
|
|
316
|
+
// re-evaluated at the top of the next iteration (no extra blocking wait).
|
|
317
|
+
persist(state);
|
|
318
|
+
}
|
|
319
|
+
// Iteration cap reached without a terminal/global-timeout exit (frozen clock):
|
|
320
|
+
// treat as a timeout rather than spinning.
|
|
321
|
+
state.status = "timed_out";
|
|
322
|
+
persist(state);
|
|
323
|
+
return {
|
|
324
|
+
run_id: runId,
|
|
325
|
+
status: "timed_out",
|
|
326
|
+
exit_code: 2,
|
|
327
|
+
reason: "supervisor iteration cap reached",
|
|
328
|
+
last_seq: state.last_seq,
|
|
329
|
+
worker_count: Object.keys(state.workers).length,
|
|
330
|
+
};
|
|
331
|
+
}
|
|
332
|
+
catch {
|
|
333
|
+
// Unexpected boundary exception — sanitize, attempt to persist failed status.
|
|
334
|
+
state.status = "failed";
|
|
335
|
+
try {
|
|
336
|
+
upsertProjection(toSupervisorProjectionInput(state));
|
|
337
|
+
}
|
|
338
|
+
catch {
|
|
339
|
+
/* best-effort */
|
|
340
|
+
}
|
|
341
|
+
errorLog(`[supervisor] run=${runId} failed unexpectedly`);
|
|
342
|
+
return {
|
|
343
|
+
run_id: runId,
|
|
344
|
+
status: "failed",
|
|
345
|
+
exit_code: 2,
|
|
346
|
+
reason: "supervisor failed unexpectedly",
|
|
347
|
+
last_seq: state.last_seq,
|
|
348
|
+
worker_count: Object.keys(state.workers).length,
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
}
|