@bridge_gpt/mcp-server 0.2.9 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -4
- package/build/conductor/bridge-api-client.js +262 -35
- package/build/conductor/cli.js +22 -1
- package/build/conductor/doctor.js +34 -1
- package/build/conductor/done-gate.js +301 -58
- package/build/conductor/epic-reconcile.js +121 -4
- package/build/conductor/epic-runtime.js +298 -17
- package/build/conductor/epic-state.js +108 -9
- package/build/conductor/git-ci-types.js +6 -0
- package/build/conductor/pr-ci-producer.js +114 -15
- package/build/conductor/pr-review-producer.js +116 -0
- package/build/conductor/store.js +8 -1
- package/build/conductor/supervisor-message-relay.js +31 -0
- package/build/conductor/taxonomy.js +3 -0
- package/build/conductor/tools.js +2 -2
- package/build/index.js +356 -1086
- package/build/init.js +481 -0
- package/build/install-bridge.js +692 -0
- package/build/mcp-profile.js +43 -0
- package/build/readme.generated.js +1 -1
- package/build/start-tickets-conductor.js +1 -0
- package/build/start-tickets.js +186 -10
- package/build/upgrade-cli.js +154 -0
- package/build/version.generated.js +1 -1
- package/package.json +2 -2
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Deterministic observed→desired reconciliation for the Epic Supervisor
|
|
3
|
-
* (BAPI-408).
|
|
3
|
+
* (BAPI-408, BAPI-436).
|
|
4
4
|
*
|
|
5
5
|
* Executes five steps in order:
|
|
6
6
|
* 1. Fold terminal signals into Postgres via CAS
|
|
@@ -9,10 +9,15 @@
|
|
|
9
9
|
* 4. Action approved merges via C6 delegation
|
|
10
10
|
* 5. Schedule post-action wait hooks
|
|
11
11
|
*
|
|
12
|
+
* Dispatch is purely merge-gated (BAPI-436): dependents are dispatched only
|
|
13
|
+
* after their predecessor reaches "done", which requires a merge.succeeded
|
|
14
|
+
* signal. gate.met and run.stopped fold to the intermediate "ready_for_review"
|
|
15
|
+
* state — dependents do NOT dispatch on these signals.
|
|
16
|
+
*
|
|
12
17
|
* All durable mutations go through injected seams so the logic is testable
|
|
13
18
|
* without real network, ledger, or terminal access.
|
|
14
19
|
*/
|
|
15
|
-
import { computeReadySet } from "./epic-state.js";
|
|
20
|
+
import { computeReadySet, decideRemediation } from "./epic-state.js";
|
|
16
21
|
import { extractMergeActionIdentityFromGateEvent } from "./merge-ledger.js";
|
|
17
22
|
// ---------------------------------------------------------------------------
|
|
18
23
|
// reconcileEpic
|
|
@@ -21,7 +26,7 @@ import { extractMergeActionIdentityFromGateEvent } from "./merge-ledger.js";
|
|
|
21
26
|
* Execute the deterministic observed→desired reconciliation pass. All I/O is
|
|
22
27
|
* behind injected seams; the ready-set is computed by pure code (no LLM).
|
|
23
28
|
*/
|
|
24
|
-
export async function reconcileEpic(access, observed, plan, deps) {
|
|
29
|
+
export async function reconcileEpic(access, observed, plan, deps, supervisorConfig) {
|
|
25
30
|
const result = {
|
|
26
31
|
signals_folded: 0,
|
|
27
32
|
dispatched: 0,
|
|
@@ -43,6 +48,22 @@ export async function reconcileEpic(access, observed, plan, deps) {
|
|
|
43
48
|
if (casResult.ok) {
|
|
44
49
|
result.signals_folded += 1;
|
|
45
50
|
deps.log(`[epic-reconcile] folded ${signal.signal_type} for ${signal.ticket_key} → ${signal.next_status}`);
|
|
51
|
+
// BAPI-442: fire teardown + Jira transition strictly after merge.succeeded
|
|
52
|
+
// CAS → done. Both are fail-open: errors are logged and never abort the pass.
|
|
53
|
+
if (signal.signal_type === "merge.succeeded") {
|
|
54
|
+
if (supervisorConfig?.teardown_enabled && deps.teardownSeam) {
|
|
55
|
+
await deps.teardownSeam(observed.epic_key, signal.ticket_key).catch((e) => {
|
|
56
|
+
const safeMsg = e instanceof Error ? e.constructor.name : "teardown error";
|
|
57
|
+
deps.log(`[epic-reconcile] teardown error for ${signal.ticket_key}: ${safeMsg}`);
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
if (deps.jiraTransitionSeam) {
|
|
61
|
+
await deps.jiraTransitionSeam(observed.epic_key, signal.ticket_key).catch((e) => {
|
|
62
|
+
const safeMsg = e instanceof Error ? e.constructor.name : "jira error";
|
|
63
|
+
deps.log(`[epic-reconcile] jira-transition error for ${signal.ticket_key}: ${safeMsg}`);
|
|
64
|
+
});
|
|
65
|
+
}
|
|
66
|
+
}
|
|
46
67
|
}
|
|
47
68
|
else {
|
|
48
69
|
// CAS conflict: another tick already advanced this ticket — non-fatal
|
|
@@ -61,8 +82,24 @@ export async function reconcileEpic(access, observed, plan, deps) {
|
|
|
61
82
|
}
|
|
62
83
|
// Step 2: Compute the ready-set (pure — never calls LLM)
|
|
63
84
|
const readySet = computeReadySet(plan, observed.ticket_statuses);
|
|
64
|
-
// Step 3: Dispatch each ready ticket idempotently
|
|
85
|
+
// Step 3: Dispatch each ready ticket idempotently.
|
|
86
|
+
//
|
|
87
|
+
// NOTE (BAPI-442): an earlier draft performed a synchronous two-phase
|
|
88
|
+
// planned → ready_for_review → ready spec re-review here — it spawned
|
|
89
|
+
// /review-ticket and then dispatched implementation in the SAME tick. That did
|
|
90
|
+
// not actually gate implementation on the review (the verdict was never
|
|
91
|
+
// consulted and the review run_id was discarded), and it overloaded the
|
|
92
|
+
// BAPI-436 "ready_for_review" (awaiting-merge) state, leaving a liveness gap
|
|
93
|
+
// if the conductor crashed between the CAS and the dispatch. That path has
|
|
94
|
+
// been removed. `auto_rereview_enabled` is reserved until real review-gating —
|
|
95
|
+
// a distinct `reviewing` status, review-run correlation, multi-tick re-entry,
|
|
96
|
+
// and a spec-review verdict signal — is built (BAPI-445). Until then a ready
|
|
97
|
+
// ticket dispatches implementation directly regardless of the flag.
|
|
65
98
|
for (const ticketKey of readySet) {
|
|
99
|
+
if (supervisorConfig?.auto_rereview_enabled) {
|
|
100
|
+
deps.log(`[epic-reconcile] auto_rereview_enabled is set but review-gating is not yet ` +
|
|
101
|
+
`implemented (BAPI-445); dispatching ${ticketKey} directly`);
|
|
102
|
+
}
|
|
66
103
|
let claimResult;
|
|
67
104
|
try {
|
|
68
105
|
claimResult = await deps.claimDispatchKey(observed.epic_key, ticketKey, observed.plan_version);
|
|
@@ -118,6 +155,86 @@ export async function reconcileEpic(access, observed, plan, deps) {
|
|
|
118
155
|
result.warnings.push(`correlate-failed for ${ticketKey}: ${safeMsg}`);
|
|
119
156
|
}
|
|
120
157
|
}
|
|
158
|
+
// Step 3.5: Remediation pass (BAPI-441) — re-act on blocked tickets under
|
|
159
|
+
// budget. Keyed off the folded "blocked" status + per-ticket counters, NOT a
|
|
160
|
+
// computeReadySet change (the ready-set still returns only planned tickets).
|
|
161
|
+
// Skipped entirely unless the remediation seams + supervisorConfig are wired.
|
|
162
|
+
const remediationWired = supervisorConfig !== undefined &&
|
|
163
|
+
deps.readWorkerLiveness !== undefined &&
|
|
164
|
+
deps.remediateCas !== undefined &&
|
|
165
|
+
deps.sendNudge !== undefined &&
|
|
166
|
+
deps.resumeDispatch !== undefined;
|
|
167
|
+
if (remediationWired) {
|
|
168
|
+
const cfg = supervisorConfig;
|
|
169
|
+
const readWorkerLiveness = deps.readWorkerLiveness;
|
|
170
|
+
const sendNudge = deps.sendNudge;
|
|
171
|
+
const resumeDispatch = deps.resumeDispatch;
|
|
172
|
+
const remediateCas = deps.remediateCas;
|
|
173
|
+
for (const [ticketKey, status] of observed.ticket_statuses) {
|
|
174
|
+
if (status !== "blocked")
|
|
175
|
+
continue;
|
|
176
|
+
// Per-ticket try/catch so a single ticket's failure never aborts the pass.
|
|
177
|
+
try {
|
|
178
|
+
const counters = observed.ticket_remediation_counters?.get(ticketKey) ?? {
|
|
179
|
+
attempts: 0,
|
|
180
|
+
no_progress: 0,
|
|
181
|
+
};
|
|
182
|
+
const liveness = await readWorkerLiveness(observed.epic_key, ticketKey);
|
|
183
|
+
const decision = decideRemediation(counters.attempts, counters.no_progress, liveness.alive, cfg);
|
|
184
|
+
if (decision === "escalate") {
|
|
185
|
+
await escalate(observed.epic_key, `remediation-budget-exhausted:${ticketKey}`);
|
|
186
|
+
deps.log(`[epic-reconcile] remediation escalate ${ticketKey} ` +
|
|
187
|
+
`(attempts=${counters.attempts} no_progress=${counters.no_progress})`);
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
// The attempt being recorded is the next one (1-based).
|
|
191
|
+
const attempt = counters.attempts + 1;
|
|
192
|
+
const attemptKind = decision;
|
|
193
|
+
// The folding reason frames the nudge (message type + digest). Default to
|
|
194
|
+
// the review path when the ledger no longer carries the blocking event.
|
|
195
|
+
const reason = observed.ticket_blocked_reasons?.get(ticketKey) ?? "review.changes_requested";
|
|
196
|
+
// A nudge needs a worker to address it to. The liveness scan already
|
|
197
|
+
// resolved the worker id from the same heartbeat that proved the worker
|
|
198
|
+
// alive; if it is missing we cannot relay, so skip BEFORE recording an
|
|
199
|
+
// attempt — otherwise the CAS would burn a budget unit with nothing sent.
|
|
200
|
+
if (decision === "nudge" && !liveness.workerId) {
|
|
201
|
+
result.warnings.push(`remediation nudge skipped for ${ticketKey}: alive worker has no worker_id`);
|
|
202
|
+
continue;
|
|
203
|
+
}
|
|
204
|
+
// Record the attempt durably FIRST. The remediate endpoint builds the
|
|
205
|
+
// (backend-redacted) review digest for a nudge and returns it, and is
|
|
206
|
+
// idempotent (a 409 replay returns conflict, not throw). An unexpected
|
|
207
|
+
// remediate failure is absorbed as a per-ticket warning so the rest of
|
|
208
|
+
// the pass still runs (crash-replay safe).
|
|
209
|
+
let casOutcome;
|
|
210
|
+
try {
|
|
211
|
+
casOutcome = await remediateCas(observed.epic_key, ticketKey, attemptKind, reason);
|
|
212
|
+
}
|
|
213
|
+
catch (err) {
|
|
214
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "remediate error";
|
|
215
|
+
result.warnings.push(`remediate-cas-failed for ${ticketKey} (${attemptKind}): ${safeMsg}`);
|
|
216
|
+
continue;
|
|
217
|
+
}
|
|
218
|
+
if (casOutcome.conflict) {
|
|
219
|
+
// Idempotency replay: the attempt was already recorded (and acted on)
|
|
220
|
+
// on a prior tick. Do not re-act — the crash-replay self-heals here.
|
|
221
|
+
result.warnings.push(`remediation replay swallowed for ${ticketKey} (${attemptKind})`);
|
|
222
|
+
continue;
|
|
223
|
+
}
|
|
224
|
+
if (decision === "nudge") {
|
|
225
|
+
await sendNudge(observed.epic_key, ticketKey, attempt, casOutcome.reviewDigest, casOutcome.truncated, reason, liveness.workerId);
|
|
226
|
+
}
|
|
227
|
+
else {
|
|
228
|
+
await resumeDispatch(observed.epic_key, ticketKey, attempt);
|
|
229
|
+
}
|
|
230
|
+
deps.log(`[epic-reconcile] remediation ${decision} ${ticketKey} attempt=${attempt}`);
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "remediation error";
|
|
234
|
+
result.warnings.push(`remediation-error for ${ticketKey}: ${safeMsg}`);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
121
238
|
// Step 4: Action approved merges via C6 delegation
|
|
122
239
|
for (const event of observed.pending_merge_events) {
|
|
123
240
|
const identity = extractMergeActionIdentityFromGateEvent(event);
|
|
@@ -17,16 +17,20 @@
|
|
|
17
17
|
* All durable mutations go through the injectable seams that call the sibling
|
|
18
18
|
* Epic Run TS client (already available in bridge-api-client.ts as of BAPI-407).
|
|
19
19
|
*/
|
|
20
|
-
import {
|
|
20
|
+
import { spawnSync } from "child_process";
|
|
21
|
+
import { resolveConductorBridgeApiAccess, claimEpicSupervisionLease, fetchEpicRunState, advanceEpicTicketStatus, createEpicTicketStatus, recordEpicDispatch, transitionEpicDispatch, fetchParseStatus, triggerRepositoryParse, getEpicPlan, buildEpicDispatchKey, fetchEffectiveSupervisorConfig, fetchEffectiveSupervisorSetup, fetchPrReviewStatus, remediateEpicTicket, deletePullRequestBranch, transitionJiraStatus, } from "./bridge-api-client.js";
|
|
21
22
|
import { processGateMetMerge } from "./supervisor-merge.js";
|
|
22
|
-
import { rebuildObservedState, } from "./epic-state.js";
|
|
23
|
+
import { rebuildObservedState, extractWorkerLiveness, } from "./epic-state.js";
|
|
23
24
|
import { reconcileEpic } from "./epic-reconcile.js";
|
|
25
|
+
import { buildSupervisorRemediationWorkerMessage } from "./supervisor-message-relay.js";
|
|
26
|
+
import { sendWorkerMessage } from "./store.js";
|
|
24
27
|
import { hashPlan } from "./plan.js";
|
|
25
|
-
import { pollConductorEvents } from "./store.js";
|
|
28
|
+
import { pollConductorEvents, POLL_LIMIT_MAX } from "./store.js";
|
|
26
29
|
import { dispatchSupervisorNotification } from "./supervisor-notification.js";
|
|
27
30
|
import { makeSupervisorIdempotencyKey } from "./supervisor-ledger.js";
|
|
28
31
|
import { createDefaultStartTicketsDeps, orchestrateStartTickets } from "../start-tickets.js";
|
|
29
32
|
import { orchestrateReviewTickets } from "../review-tickets.js";
|
|
33
|
+
import { createStartTicketsConductorContext, provisionConductorHooksForRows, emitStartTicketsRunStarted, } from "../start-tickets-conductor.js";
|
|
30
34
|
// ---------------------------------------------------------------------------
|
|
31
35
|
// Constants
|
|
32
36
|
// ---------------------------------------------------------------------------
|
|
@@ -46,7 +50,7 @@ function defaultLeaseOwner() {
|
|
|
46
50
|
async function defaultEscalateOnce(epicKey, reason) {
|
|
47
51
|
process.stderr.write(`[epic-tick] ESCALATION epic=${epicKey} reason=${reason}\n`);
|
|
48
52
|
}
|
|
49
|
-
async function defaultDispatchSeam(_epicKey, ticketKey) {
|
|
53
|
+
async function defaultDispatchSeam(_epicKey, ticketKey, _attempt = 0) {
|
|
50
54
|
throw new Error(`dispatch seam not wired for ticket ${ticketKey}`);
|
|
51
55
|
}
|
|
52
56
|
async function defaultPostActionWaitSeam(_epicKey, _ticketKey) {
|
|
@@ -73,7 +77,8 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
73
77
|
const dispatchSeam = deps.dispatchSeam ?? defaultDispatchSeam;
|
|
74
78
|
const processMergeFn = deps.processMerge ?? processGateMetMerge;
|
|
75
79
|
const postActionWaitSeam = deps.postActionWaitSeam ?? defaultPostActionWaitSeam;
|
|
76
|
-
const fetchLocalEvents = deps.fetchLocalEvents ??
|
|
80
|
+
const fetchLocalEvents = deps.fetchLocalEvents ??
|
|
81
|
+
((_key, _runIds) => []);
|
|
77
82
|
const resolveBridgeAccess = deps.resolveBridgeAccess ?? resolveConductorBridgeApiAccess;
|
|
78
83
|
const claimLeaseFn = deps.claimLease ?? claimEpicSupervisionLease;
|
|
79
84
|
const fetchEpicStateFn = deps.fetchEpicState ?? fetchEpicRunState;
|
|
@@ -190,7 +195,15 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
190
195
|
worker_count: 0,
|
|
191
196
|
};
|
|
192
197
|
}
|
|
193
|
-
|
|
198
|
+
// Scope the local-ledger read to this epic's dispatched run_ids. The shared
|
|
199
|
+
// ~/.config/bridge/events.db ledger accumulates events for every epic/worker
|
|
200
|
+
// on the machine; rebuildObservedState only folds signals whose run_id maps
|
|
201
|
+
// to one of these dispatches, so scoping the read here avoids loading the
|
|
202
|
+
// entire (up to 50K-row) ledger on every tick.
|
|
203
|
+
const dispatchedRunIds = epicRunState.dispatches
|
|
204
|
+
.map((d) => d.run_id)
|
|
205
|
+
.filter((rid) => typeof rid === "string" && rid.length > 0);
|
|
206
|
+
const localEvents = fetchLocalEvents(epic_key, dispatchedRunIds);
|
|
194
207
|
const observed = rebuildObservedState(epicRunState, localEvents, nowFn());
|
|
195
208
|
workerCount = [...observed.ticket_statuses.values()].filter((s) => ACTIVE_WORKER_STATUSES.has(s)).length;
|
|
196
209
|
// Step 3.5: Run post-action waits (parse-after-merge)
|
|
@@ -330,6 +343,82 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
330
343
|
}
|
|
331
344
|
// Step 5: Reconcile observed→desired
|
|
332
345
|
if (plan !== null) {
|
|
346
|
+
// BAPI-441: fetch the effective supervisor config (budget ceilings +
|
|
347
|
+
// liveness window) and setup (pr_bindings) once. Fail-open: if the config
|
|
348
|
+
// read fails, remediationConfig stays undefined and reconcile skips the
|
|
349
|
+
// remediation pass entirely (dispatch/merge steps unaffected).
|
|
350
|
+
let remediationConfig;
|
|
351
|
+
let livenessWindowSeconds = 120;
|
|
352
|
+
let prBindings = {};
|
|
353
|
+
try {
|
|
354
|
+
const cfg = await fetchEffectiveSupervisorConfig(access, epic_key);
|
|
355
|
+
remediationConfig = {
|
|
356
|
+
max_remediation_attempts: cfg.max_remediation_attempts,
|
|
357
|
+
max_remediation_no_progress_attempts: cfg.max_remediation_no_progress_attempts,
|
|
358
|
+
auto_rereview_enabled: cfg.auto_rereview_enabled ?? false,
|
|
359
|
+
teardown_enabled: cfg.teardown_enabled ?? false,
|
|
360
|
+
};
|
|
361
|
+
livenessWindowSeconds = cfg.worker_liveness_window_seconds;
|
|
362
|
+
}
|
|
363
|
+
catch (err) {
|
|
364
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "config error";
|
|
365
|
+
errorLog(`[epic-tick] supervisor-config fetch failed (${safeMsg}); skipping remediation for epic=${epic_key}`);
|
|
366
|
+
}
|
|
367
|
+
if (remediationConfig) {
|
|
368
|
+
try {
|
|
369
|
+
const setup = await fetchEffectiveSupervisorSetup(access, epic_key);
|
|
370
|
+
if (setup.pr_bindings && typeof setup.pr_bindings === "object") {
|
|
371
|
+
prBindings = setup.pr_bindings;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
catch (err) {
|
|
375
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "setup error";
|
|
376
|
+
errorLog(`[epic-tick] supervisor-setup fetch failed (${safeMsg}); remediation PR resolution degraded for epic=${epic_key}`);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
// ticket_key → dispatched run_id (the run whose heartbeat liveness reads).
|
|
380
|
+
// Seed from ticket_status.dispatch_run_id, then prefer the most-recent
|
|
381
|
+
// dispatch-ledger run_id per ticket so that after a remediation re-dispatch
|
|
382
|
+
// (a new attempt-scoped epic_dispatch row correlated with the fresh run_id)
|
|
383
|
+
// liveness tracks the NEW worker rather than the stale original.
|
|
384
|
+
const ticketRunIdMap = new Map();
|
|
385
|
+
for (const ts of epicRunState.ticket_statuses) {
|
|
386
|
+
if (ts.dispatch_run_id)
|
|
387
|
+
ticketRunIdMap.set(ts.ticket_key, ts.dispatch_run_id);
|
|
388
|
+
}
|
|
389
|
+
const latestDispatchByTicket = new Map();
|
|
390
|
+
for (const d of epicRunState.dispatches) {
|
|
391
|
+
if (!d.run_id)
|
|
392
|
+
continue;
|
|
393
|
+
const updatedAt = new Date(d.updated_at).getTime();
|
|
394
|
+
const prev = latestDispatchByTicket.get(d.ticket_key);
|
|
395
|
+
if (!prev || updatedAt >= prev.updatedAt) {
|
|
396
|
+
latestDispatchByTicket.set(d.ticket_key, { runId: d.run_id, updatedAt });
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
for (const [tk, info] of latestDispatchByTicket) {
|
|
400
|
+
ticketRunIdMap.set(tk, info.runId);
|
|
401
|
+
}
|
|
402
|
+
const resolvePrNumber = (ticketKey) => {
|
|
403
|
+
const raw = prBindings[ticketKey];
|
|
404
|
+
if (typeof raw === "number" && Number.isInteger(raw) && raw >= 1)
|
|
405
|
+
return raw;
|
|
406
|
+
if (raw && typeof raw === "object") {
|
|
407
|
+
const obj = raw;
|
|
408
|
+
const pr = obj.pr_number ?? obj.pr;
|
|
409
|
+
if (typeof pr === "number" && Number.isInteger(pr) && pr >= 1)
|
|
410
|
+
return pr;
|
|
411
|
+
}
|
|
412
|
+
return null;
|
|
413
|
+
};
|
|
414
|
+
const maxSeqForRun = (runId) => {
|
|
415
|
+
let maxSeq = 0;
|
|
416
|
+
for (const ev of localEvents) {
|
|
417
|
+
if (ev.run_id === runId && ev.seq > maxSeq)
|
|
418
|
+
maxSeq = ev.seq;
|
|
419
|
+
}
|
|
420
|
+
return maxSeq;
|
|
421
|
+
};
|
|
333
422
|
const reconcileDeps = {
|
|
334
423
|
casTicketStatus: async (ek, tk, rowVersion, nextStatus, planVersion) => advanceEpicTicketStatus(access, {
|
|
335
424
|
epicKey: ek,
|
|
@@ -360,13 +449,146 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
360
449
|
runId,
|
|
361
450
|
});
|
|
362
451
|
},
|
|
363
|
-
dispatchSeam: async (ek, tk) => dispatchSeam(ek, tk),
|
|
452
|
+
dispatchSeam: async (ek, tk, attempt = 0) => dispatchSeam(ek, tk, attempt),
|
|
364
453
|
processMerge: async (acc, event) => processMergeFn(acc, event),
|
|
365
454
|
postActionWaitSeam: async (ek, tk) => postActionWaitSeam(ek, tk),
|
|
366
455
|
escalateOnce: async (ek, reason) => escalateOnce(ek, reason),
|
|
367
456
|
log,
|
|
457
|
+
// BAPI-442: teardown and Jira-transition seams (fail-open, optional).
|
|
458
|
+
teardownSeam: async (_ek, tk) => {
|
|
459
|
+
// Resolve the PR number for the ticket.
|
|
460
|
+
const prNumber = resolvePrNumber(tk);
|
|
461
|
+
if (prNumber === null) {
|
|
462
|
+
errorLog(`[epic-tick] teardown: no PR binding for ${tk}; skipping`);
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
// Fetch setup to get the expected head SHA if available; fall back to empty.
|
|
466
|
+
let expectedSha = "";
|
|
467
|
+
try {
|
|
468
|
+
const setup = await fetchEffectiveSupervisorSetup(access, epic_key);
|
|
469
|
+
const binding = (setup.pr_bindings ?? {})[tk];
|
|
470
|
+
if (binding && typeof binding === "object") {
|
|
471
|
+
const b = binding;
|
|
472
|
+
if (typeof b.head_sha === "string")
|
|
473
|
+
expectedSha = b.head_sha;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
catch {
|
|
477
|
+
// Best-effort; proceed with empty SHA (endpoint still deletes by PR number)
|
|
478
|
+
}
|
|
479
|
+
try {
|
|
480
|
+
await deletePullRequestBranch(access, prNumber, expectedSha || "");
|
|
481
|
+
log(`[epic-tick] teardown: branch deleted for PR #${prNumber} (ticket=${tk})`);
|
|
482
|
+
}
|
|
483
|
+
catch (err) {
|
|
484
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "error";
|
|
485
|
+
errorLog(`[epic-tick] teardown: branch-delete failed (${safeMsg}) for ${tk}`);
|
|
486
|
+
}
|
|
487
|
+
// Remove local worktree idempotently; errors are benign.
|
|
488
|
+
try {
|
|
489
|
+
spawnSync("git", ["worktree", "remove", "--force", tk], { stdio: "ignore" });
|
|
490
|
+
log(`[epic-tick] teardown: worktree removed for ${tk}`);
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
// Already removed or never created — idempotent skip.
|
|
494
|
+
}
|
|
495
|
+
},
|
|
496
|
+
jiraTransitionSeam: async (_ek, tk) => {
|
|
497
|
+
try {
|
|
498
|
+
const result = await transitionJiraStatus(access, tk, "auto");
|
|
499
|
+
if (result.status === "skipped") {
|
|
500
|
+
log(`[epic-tick] jira-transition: no matching transition for ${tk} (skipped)`);
|
|
501
|
+
}
|
|
502
|
+
else {
|
|
503
|
+
log(`[epic-tick] jira-transition: transitioned ${tk}`);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
catch (err) {
|
|
507
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "error";
|
|
508
|
+
errorLog(`[epic-tick] jira-transition failed (${safeMsg}) for ${tk}`);
|
|
509
|
+
}
|
|
510
|
+
},
|
|
511
|
+
// BAPI-441 remediation seams.
|
|
512
|
+
readWorkerLiveness: async (_ek, tk) => {
|
|
513
|
+
const runId = ticketRunIdMap.get(tk);
|
|
514
|
+
if (!runId)
|
|
515
|
+
return { alive: false, workerId: null };
|
|
516
|
+
return extractWorkerLiveness(localEvents, runId, nowFn(), livenessWindowSeconds);
|
|
517
|
+
},
|
|
518
|
+
remediateCas: async (ek, tk, attemptKind, reason) => {
|
|
519
|
+
const prNumber = resolvePrNumber(tk);
|
|
520
|
+
if (prNumber === null) {
|
|
521
|
+
throw new Error(`remediate: no PR binding for ${tk}`);
|
|
522
|
+
}
|
|
523
|
+
const reviewStatus = (await fetchPrReviewStatus(access, prNumber));
|
|
524
|
+
const headSha = reviewStatus?.detail?.head_sha ?? null;
|
|
525
|
+
if (!headSha) {
|
|
526
|
+
throw new Error(`remediate: no head_sha for PR ${prNumber}`);
|
|
527
|
+
}
|
|
528
|
+
const rowVersion = observed.ticket_row_versions.get(tk) ?? 0;
|
|
529
|
+
// Deterministic block-state idempotency key: stable for a given durable
|
|
530
|
+
// row_version so a same-tick retry replays (409, swallowed); advances
|
|
531
|
+
// with the next attempt.
|
|
532
|
+
const idempotencyKey = `remediate:${ek}:${tk}:${rowVersion}`;
|
|
533
|
+
const result = await remediateEpicTicket(access, {
|
|
534
|
+
pr_number: prNumber,
|
|
535
|
+
epic_run_id: ek,
|
|
536
|
+
ticket_key: tk,
|
|
537
|
+
expected_row_version: rowVersion,
|
|
538
|
+
head_sha: headSha,
|
|
539
|
+
idempotency_key: idempotencyKey,
|
|
540
|
+
attempt_kind: attemptKind,
|
|
541
|
+
reason,
|
|
542
|
+
});
|
|
543
|
+
if (result.conflict) {
|
|
544
|
+
return { conflict: true, reviewDigest: null, truncated: false };
|
|
545
|
+
}
|
|
546
|
+
return {
|
|
547
|
+
conflict: false,
|
|
548
|
+
reviewDigest: result.response.review_digest,
|
|
549
|
+
truncated: result.response.truncated,
|
|
550
|
+
};
|
|
551
|
+
},
|
|
552
|
+
sendNudge: async (_ek, tk, attempt, reviewDigest, truncated, reason, workerId) => {
|
|
553
|
+
const runId = ticketRunIdMap.get(tk);
|
|
554
|
+
if (!runId)
|
|
555
|
+
throw new Error(`nudge: no run_id for ${tk}`);
|
|
556
|
+
// workerId is resolved by readWorkerLiveness from the same heartbeat
|
|
557
|
+
// scan and null-checked by the reconcile pass before remediateCas, so
|
|
558
|
+
// the two seams stay consistent and no budget is burned on a missing id.
|
|
559
|
+
const input = buildSupervisorRemediationWorkerMessage({
|
|
560
|
+
runId,
|
|
561
|
+
workerId,
|
|
562
|
+
ticketKey: tk,
|
|
563
|
+
reason,
|
|
564
|
+
attempt,
|
|
565
|
+
reviewDigest: reviewDigest ?? "",
|
|
566
|
+
truncated,
|
|
567
|
+
causeSeq: maxSeqForRun(runId),
|
|
568
|
+
});
|
|
569
|
+
sendWorkerMessage(input);
|
|
570
|
+
},
|
|
571
|
+
resumeDispatch: async (ek, tk, attempt) => {
|
|
572
|
+
// Claim an attempt-scoped pending dispatch row FIRST so the spawn's
|
|
573
|
+
// run_spawned correlation (inside orchestrateStartTickets) has a row to
|
|
574
|
+
// transition and the re-dispatched run_id is durably recorded against
|
|
575
|
+
// the ticket. The claim is idempotent (lease-held/already-spawned are
|
|
576
|
+
// returned, not thrown).
|
|
577
|
+
await recordEpicDispatch(access, {
|
|
578
|
+
epicKey: ek,
|
|
579
|
+
ticketKey: tk,
|
|
580
|
+
planVersion: plan.plan_version,
|
|
581
|
+
leaseOwner: lease_owner,
|
|
582
|
+
ttlSeconds: DEFAULT_DISPATCH_KEY_TTL_SECONDS,
|
|
583
|
+
attempt,
|
|
584
|
+
});
|
|
585
|
+
// dispatchSeam returns the new run_id; orchestrate correlates it into
|
|
586
|
+
// the attempt-scoped epic_dispatch row, so the next tick's liveness map
|
|
587
|
+
// (built from the dispatch ledger) tracks the fresh worker.
|
|
588
|
+
await dispatchSeam(ek, tk, attempt);
|
|
589
|
+
},
|
|
368
590
|
};
|
|
369
|
-
const reconcileResult = await reconcileEpic(access, observed, plan, reconcileDeps);
|
|
591
|
+
const reconcileResult = await reconcileEpic(access, observed, plan, reconcileDeps, remediationConfig);
|
|
370
592
|
log(`[epic-tick] reconcile done: epic=${epic_key} ` +
|
|
371
593
|
`signals=${reconcileResult.signals_folded} ` +
|
|
372
594
|
`dispatched=${reconcileResult.dispatched} ` +
|
|
@@ -494,7 +716,7 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
494
716
|
const planHash = hashPlan(dag);
|
|
495
717
|
return { plan_hash: planHash, plan_version: response.plan_version, tickets };
|
|
496
718
|
};
|
|
497
|
-
const dispatchSeam = async (ek, tk) => {
|
|
719
|
+
const dispatchSeam = async (ek, tk, attempt = 0) => {
|
|
498
720
|
// Guard: fetchPlan must run before dispatchSeam so cachedPlanVersion and
|
|
499
721
|
// automationMap are populated. A zero version means the factory seam was
|
|
500
722
|
// wired but fetchPlan was never called — fail explicitly rather than silently
|
|
@@ -502,6 +724,12 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
502
724
|
if (cachedPlanVersion === 0) {
|
|
503
725
|
throw new Error(`dispatchSeam called before fetchPlan for epic ${ek} ticket ${tk}; cachedPlanVersion is 0`);
|
|
504
726
|
}
|
|
727
|
+
// BAPI-441: a remediation re-dispatch (attempt > 0) reuses the existing
|
|
728
|
+
// branch/worktree (resume mode) and claims an attempt-scoped dispatch key so
|
|
729
|
+
// it is not deduped against the original epic dispatch.
|
|
730
|
+
const isResume = attempt > 0;
|
|
731
|
+
// The dispatch kind comes from the plan node's automation (start-tickets or
|
|
732
|
+
// review-tickets); default to start-tickets when unspecified.
|
|
505
733
|
const kind = automationMap.get(tk) ?? "start-tickets";
|
|
506
734
|
// Operator dry-run: when BAPI_CONDUCTOR_DISPATCH_DRY_RUN=1, dispatch resolves
|
|
507
735
|
// the spawn command + model routing but opens NO terminal, creates NO worktree,
|
|
@@ -513,7 +741,7 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
513
741
|
epic_key: ek,
|
|
514
742
|
epic_run_id: ek,
|
|
515
743
|
plan_version: cachedPlanVersion,
|
|
516
|
-
dispatch_key: buildEpicDispatchKey(ek, tk, cachedPlanVersion),
|
|
744
|
+
dispatch_key: buildEpicDispatchKey(ek, tk, cachedPlanVersion, attempt),
|
|
517
745
|
};
|
|
518
746
|
const deps = createDefaultStartTicketsDeps();
|
|
519
747
|
let runId;
|
|
@@ -533,6 +761,14 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
533
761
|
runId = result.rows[0]?.runId;
|
|
534
762
|
}
|
|
535
763
|
else {
|
|
764
|
+
// BAPI-409 / IH-1: epic dispatch (dispatch_key set) requires the conductor
|
|
765
|
+
// stage to mint a run_id and provision per-worker env/supervisor context.
|
|
766
|
+
// `conductorEnabled: true` alone is necessary but not sufficient — the
|
|
767
|
+
// BAPI-409 guard in orchestrateStartTickets fails closed unless the
|
|
768
|
+
// createConductorContext seam (and its siblings) is injected via the third
|
|
769
|
+
// `overrides` argument, exactly as the packaged start-tickets CLI does. The
|
|
770
|
+
// orchestrator short-circuits on dryRun before using them, so passing them
|
|
771
|
+
// unconditionally is safe; dispatchDryRun preserves the operator dry-run seam.
|
|
536
772
|
const result = await orchestrateStartTickets(deps, {
|
|
537
773
|
keys: [tk],
|
|
538
774
|
epic: identity,
|
|
@@ -543,11 +779,13 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
543
779
|
refreshMain: false,
|
|
544
780
|
branchOverrides: {},
|
|
545
781
|
baseBranch: "main",
|
|
546
|
-
// Epic dispatch always uses the Conductor system (epic-tick coordinates
|
|
547
|
-
// workers through it), so the message-relay instruction must be present
|
|
548
|
-
// on epic-dispatched worker prompts regardless of the user-facing
|
|
549
|
-
// `--conductor` default.
|
|
550
782
|
conductorEnabled: true,
|
|
783
|
+
// BAPI-441: re-dispatch reuses the existing branch/worktree.
|
|
784
|
+
resumeMode: isResume,
|
|
785
|
+
}, {
|
|
786
|
+
createConductorContext: createStartTicketsConductorContext,
|
|
787
|
+
provisionConductorHooksForRows,
|
|
788
|
+
emitStartTicketsRunStarted,
|
|
551
789
|
});
|
|
552
790
|
if (!result.ok) {
|
|
553
791
|
throw new Error(`start-tickets dispatch failed: ${result.error}`);
|
|
@@ -564,11 +802,50 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
564
802
|
}
|
|
565
803
|
return runId;
|
|
566
804
|
};
|
|
567
|
-
const fetchLocalEvents = (_ek) => {
|
|
805
|
+
const fetchLocalEvents = (_ek, runIds) => {
|
|
568
806
|
// Workers and the epic-tick process share the same local SQLite ledger
|
|
569
807
|
// (~/.config/bridge/events.db). pollConductorEvents opens it read-only.
|
|
570
|
-
|
|
571
|
-
|
|
808
|
+
//
|
|
809
|
+
// Scope the read to this epic's dispatched run_ids. The shared ledger holds
|
|
810
|
+
// events for every epic/worker on the machine (up to RETENTION_MAX_ROWS),
|
|
811
|
+
// but rebuildObservedState only folds signals whose run_id maps to one of
|
|
812
|
+
// these dispatches — so the run_ids filter pushes that scoping into SQL and
|
|
813
|
+
// avoids loading sibling-epic events on every tick. With no known run_ids
|
|
814
|
+
// (first tick before any dispatch) there is nothing to fold, so skip the
|
|
815
|
+
// read entirely.
|
|
816
|
+
//
|
|
817
|
+
// pollConductorEvents returns at most POLL_LIMIT_MAX events per call
|
|
818
|
+
// (default 100, capped at 1000) starting at `since_seq`. rebuildObservedState
|
|
819
|
+
// folds terminal signals (gate.met/run.stopped/merge.succeeded/ci.failed)
|
|
820
|
+
// ONLY from the events it is handed, so a single capped page silently hides
|
|
821
|
+
// recent terminal signals once the (scoped) result grows past one page —
|
|
822
|
+
// done-detection then breaks. Drain the COMPLETE history by paginating on the
|
|
823
|
+
// `next_seq` cursor until a short (or empty) page signals the tail.
|
|
824
|
+
if (runIds !== undefined && runIds.length === 0) {
|
|
825
|
+
return [];
|
|
826
|
+
}
|
|
827
|
+
const runIdsFilter = runIds && runIds.length > 0 ? { run_ids: [...runIds] } : undefined;
|
|
828
|
+
const events = [];
|
|
829
|
+
let sinceSeq = 1;
|
|
830
|
+
// Retention caps (retention_days/retention_max_rows) bound the ledger, but
|
|
831
|
+
// cap total iterations defensively against a non-advancing cursor.
|
|
832
|
+
const MAX_PAGES = 10_000;
|
|
833
|
+
for (let page = 0; page < MAX_PAGES; page += 1) {
|
|
834
|
+
const result = pollConductorEvents({
|
|
835
|
+
data_mode: "full",
|
|
836
|
+
since_seq: sinceSeq,
|
|
837
|
+
limit: POLL_LIMIT_MAX,
|
|
838
|
+
filter: runIdsFilter,
|
|
839
|
+
});
|
|
840
|
+
events.push(...result.events);
|
|
841
|
+
// Stop on a short/empty page (no more rows) or a cursor that fails to
|
|
842
|
+
// advance (guards against an infinite loop).
|
|
843
|
+
if (result.count < POLL_LIMIT_MAX || result.next_seq <= sinceSeq) {
|
|
844
|
+
break;
|
|
845
|
+
}
|
|
846
|
+
sinceSeq = result.next_seq;
|
|
847
|
+
}
|
|
848
|
+
return events;
|
|
572
849
|
};
|
|
573
850
|
const escalateOnce = async (ek, reason) => {
|
|
574
851
|
const candidate = {
|
|
@@ -623,5 +900,9 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
623
900
|
fetchLocalEvents,
|
|
624
901
|
escalateOnce,
|
|
625
902
|
postActionWaitSeam,
|
|
903
|
+
// BAPI-442 seams are wired at the reconcileDeps level inside runEpicTick
|
|
904
|
+
// (they need the per-tick `access` and `prBindings` closure). The factory
|
|
905
|
+
// returns the dispatchSeam with isReReview support; the other two seams are
|
|
906
|
+
// defined inline in the reconcileDeps object in runEpicTick.
|
|
626
907
|
};
|
|
627
908
|
}
|