@bridge_gpt/mcp-server 0.2.6 → 0.2.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -5
- package/build/commands.generated.js +1 -1
- package/build/conductor/bridge-api-client.js +262 -35
- package/build/conductor/cli.js +22 -1
- package/build/conductor/doctor.js +34 -1
- package/build/conductor/done-gate.js +301 -58
- package/build/conductor/epic-reconcile.js +121 -4
- package/build/conductor/epic-runtime.js +299 -13
- package/build/conductor/epic-state.js +108 -9
- package/build/conductor/git-ci-types.js +6 -0
- package/build/conductor/pr-ci-producer.js +114 -15
- package/build/conductor/pr-review-producer.js +116 -0
- package/build/conductor/store.js +8 -1
- package/build/conductor/supervisor-message-relay.js +31 -0
- package/build/conductor/taxonomy.js +3 -0
- package/build/conductor/tools.js +2 -2
- package/build/index.js +356 -1086
- package/build/init.js +481 -0
- package/build/install-bridge.js +692 -0
- package/build/mcp-profile.js +43 -0
- package/build/readme.generated.js +1 -1
- package/build/start-tickets-conductor.js +1 -0
- package/build/start-tickets.js +328 -36
- package/build/upgrade-cli.js +154 -0
- package/build/version.generated.js +1 -1
- package/package.json +3 -2
|
@@ -17,16 +17,20 @@
|
|
|
17
17
|
* All durable mutations go through the injectable seams that call the sibling
|
|
18
18
|
* Epic Run TS client (already available in bridge-api-client.ts as of BAPI-407).
|
|
19
19
|
*/
|
|
20
|
-
import {
|
|
20
|
+
import { spawnSync } from "child_process";
|
|
21
|
+
import { resolveConductorBridgeApiAccess, claimEpicSupervisionLease, fetchEpicRunState, advanceEpicTicketStatus, createEpicTicketStatus, recordEpicDispatch, transitionEpicDispatch, fetchParseStatus, triggerRepositoryParse, getEpicPlan, buildEpicDispatchKey, fetchEffectiveSupervisorConfig, fetchEffectiveSupervisorSetup, fetchPrReviewStatus, remediateEpicTicket, deletePullRequestBranch, transitionJiraStatus, } from "./bridge-api-client.js";
|
|
21
22
|
import { processGateMetMerge } from "./supervisor-merge.js";
|
|
22
|
-
import { rebuildObservedState, } from "./epic-state.js";
|
|
23
|
+
import { rebuildObservedState, extractWorkerLiveness, } from "./epic-state.js";
|
|
23
24
|
import { reconcileEpic } from "./epic-reconcile.js";
|
|
25
|
+
import { buildSupervisorRemediationWorkerMessage } from "./supervisor-message-relay.js";
|
|
26
|
+
import { sendWorkerMessage } from "./store.js";
|
|
24
27
|
import { hashPlan } from "./plan.js";
|
|
25
|
-
import { pollConductorEvents } from "./store.js";
|
|
28
|
+
import { pollConductorEvents, POLL_LIMIT_MAX } from "./store.js";
|
|
26
29
|
import { dispatchSupervisorNotification } from "./supervisor-notification.js";
|
|
27
30
|
import { makeSupervisorIdempotencyKey } from "./supervisor-ledger.js";
|
|
28
31
|
import { createDefaultStartTicketsDeps, orchestrateStartTickets } from "../start-tickets.js";
|
|
29
32
|
import { orchestrateReviewTickets } from "../review-tickets.js";
|
|
33
|
+
import { createStartTicketsConductorContext, provisionConductorHooksForRows, emitStartTicketsRunStarted, } from "../start-tickets-conductor.js";
|
|
30
34
|
// ---------------------------------------------------------------------------
|
|
31
35
|
// Constants
|
|
32
36
|
// ---------------------------------------------------------------------------
|
|
@@ -46,7 +50,7 @@ function defaultLeaseOwner() {
|
|
|
46
50
|
async function defaultEscalateOnce(epicKey, reason) {
|
|
47
51
|
process.stderr.write(`[epic-tick] ESCALATION epic=${epicKey} reason=${reason}\n`);
|
|
48
52
|
}
|
|
49
|
-
async function defaultDispatchSeam(_epicKey, ticketKey) {
|
|
53
|
+
async function defaultDispatchSeam(_epicKey, ticketKey, _attempt = 0) {
|
|
50
54
|
throw new Error(`dispatch seam not wired for ticket ${ticketKey}`);
|
|
51
55
|
}
|
|
52
56
|
async function defaultPostActionWaitSeam(_epicKey, _ticketKey) {
|
|
@@ -73,7 +77,8 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
73
77
|
const dispatchSeam = deps.dispatchSeam ?? defaultDispatchSeam;
|
|
74
78
|
const processMergeFn = deps.processMerge ?? processGateMetMerge;
|
|
75
79
|
const postActionWaitSeam = deps.postActionWaitSeam ?? defaultPostActionWaitSeam;
|
|
76
|
-
const fetchLocalEvents = deps.fetchLocalEvents ??
|
|
80
|
+
const fetchLocalEvents = deps.fetchLocalEvents ??
|
|
81
|
+
((_key, _runIds) => []);
|
|
77
82
|
const resolveBridgeAccess = deps.resolveBridgeAccess ?? resolveConductorBridgeApiAccess;
|
|
78
83
|
const claimLeaseFn = deps.claimLease ?? claimEpicSupervisionLease;
|
|
79
84
|
const fetchEpicStateFn = deps.fetchEpicState ?? fetchEpicRunState;
|
|
@@ -190,7 +195,15 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
190
195
|
worker_count: 0,
|
|
191
196
|
};
|
|
192
197
|
}
|
|
193
|
-
|
|
198
|
+
// Scope the local-ledger read to this epic's dispatched run_ids. The shared
|
|
199
|
+
// ~/.config/bridge/events.db ledger accumulates events for every epic/worker
|
|
200
|
+
// on the machine; rebuildObservedState only folds signals whose run_id maps
|
|
201
|
+
// to one of these dispatches, so scoping the read here avoids loading the
|
|
202
|
+
// entire (up to 50K-row) ledger on every tick.
|
|
203
|
+
const dispatchedRunIds = epicRunState.dispatches
|
|
204
|
+
.map((d) => d.run_id)
|
|
205
|
+
.filter((rid) => typeof rid === "string" && rid.length > 0);
|
|
206
|
+
const localEvents = fetchLocalEvents(epic_key, dispatchedRunIds);
|
|
194
207
|
const observed = rebuildObservedState(epicRunState, localEvents, nowFn());
|
|
195
208
|
workerCount = [...observed.ticket_statuses.values()].filter((s) => ACTIVE_WORKER_STATUSES.has(s)).length;
|
|
196
209
|
// Step 3.5: Run post-action waits (parse-after-merge)
|
|
@@ -330,6 +343,82 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
330
343
|
}
|
|
331
344
|
// Step 5: Reconcile observed→desired
|
|
332
345
|
if (plan !== null) {
|
|
346
|
+
// BAPI-441: fetch the effective supervisor config (budget ceilings +
|
|
347
|
+
// liveness window) and setup (pr_bindings) once. Fail-open: if the config
|
|
348
|
+
// read fails, remediationConfig stays undefined and reconcile skips the
|
|
349
|
+
// remediation pass entirely (dispatch/merge steps unaffected).
|
|
350
|
+
let remediationConfig;
|
|
351
|
+
let livenessWindowSeconds = 120;
|
|
352
|
+
let prBindings = {};
|
|
353
|
+
try {
|
|
354
|
+
const cfg = await fetchEffectiveSupervisorConfig(access, epic_key);
|
|
355
|
+
remediationConfig = {
|
|
356
|
+
max_remediation_attempts: cfg.max_remediation_attempts,
|
|
357
|
+
max_remediation_no_progress_attempts: cfg.max_remediation_no_progress_attempts,
|
|
358
|
+
auto_rereview_enabled: cfg.auto_rereview_enabled ?? false,
|
|
359
|
+
teardown_enabled: cfg.teardown_enabled ?? false,
|
|
360
|
+
};
|
|
361
|
+
livenessWindowSeconds = cfg.worker_liveness_window_seconds;
|
|
362
|
+
}
|
|
363
|
+
catch (err) {
|
|
364
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "config error";
|
|
365
|
+
errorLog(`[epic-tick] supervisor-config fetch failed (${safeMsg}); skipping remediation for epic=${epic_key}`);
|
|
366
|
+
}
|
|
367
|
+
if (remediationConfig) {
|
|
368
|
+
try {
|
|
369
|
+
const setup = await fetchEffectiveSupervisorSetup(access, epic_key);
|
|
370
|
+
if (setup.pr_bindings && typeof setup.pr_bindings === "object") {
|
|
371
|
+
prBindings = setup.pr_bindings;
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
catch (err) {
|
|
375
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "setup error";
|
|
376
|
+
errorLog(`[epic-tick] supervisor-setup fetch failed (${safeMsg}); remediation PR resolution degraded for epic=${epic_key}`);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
// ticket_key → dispatched run_id (the run whose heartbeat liveness reads).
|
|
380
|
+
// Seed from ticket_status.dispatch_run_id, then prefer the most-recent
|
|
381
|
+
// dispatch-ledger run_id per ticket so that after a remediation re-dispatch
|
|
382
|
+
// (a new attempt-scoped epic_dispatch row correlated with the fresh run_id)
|
|
383
|
+
// liveness tracks the NEW worker rather than the stale original.
|
|
384
|
+
const ticketRunIdMap = new Map();
|
|
385
|
+
for (const ts of epicRunState.ticket_statuses) {
|
|
386
|
+
if (ts.dispatch_run_id)
|
|
387
|
+
ticketRunIdMap.set(ts.ticket_key, ts.dispatch_run_id);
|
|
388
|
+
}
|
|
389
|
+
const latestDispatchByTicket = new Map();
|
|
390
|
+
for (const d of epicRunState.dispatches) {
|
|
391
|
+
if (!d.run_id)
|
|
392
|
+
continue;
|
|
393
|
+
const updatedAt = new Date(d.updated_at).getTime();
|
|
394
|
+
const prev = latestDispatchByTicket.get(d.ticket_key);
|
|
395
|
+
if (!prev || updatedAt >= prev.updatedAt) {
|
|
396
|
+
latestDispatchByTicket.set(d.ticket_key, { runId: d.run_id, updatedAt });
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
for (const [tk, info] of latestDispatchByTicket) {
|
|
400
|
+
ticketRunIdMap.set(tk, info.runId);
|
|
401
|
+
}
|
|
402
|
+
const resolvePrNumber = (ticketKey) => {
|
|
403
|
+
const raw = prBindings[ticketKey];
|
|
404
|
+
if (typeof raw === "number" && Number.isInteger(raw) && raw >= 1)
|
|
405
|
+
return raw;
|
|
406
|
+
if (raw && typeof raw === "object") {
|
|
407
|
+
const obj = raw;
|
|
408
|
+
const pr = obj.pr_number ?? obj.pr;
|
|
409
|
+
if (typeof pr === "number" && Number.isInteger(pr) && pr >= 1)
|
|
410
|
+
return pr;
|
|
411
|
+
}
|
|
412
|
+
return null;
|
|
413
|
+
};
|
|
414
|
+
const maxSeqForRun = (runId) => {
|
|
415
|
+
let maxSeq = 0;
|
|
416
|
+
for (const ev of localEvents) {
|
|
417
|
+
if (ev.run_id === runId && ev.seq > maxSeq)
|
|
418
|
+
maxSeq = ev.seq;
|
|
419
|
+
}
|
|
420
|
+
return maxSeq;
|
|
421
|
+
};
|
|
333
422
|
const reconcileDeps = {
|
|
334
423
|
casTicketStatus: async (ek, tk, rowVersion, nextStatus, planVersion) => advanceEpicTicketStatus(access, {
|
|
335
424
|
epicKey: ek,
|
|
@@ -360,13 +449,146 @@ export async function runEpicTick(options, deps = {}) {
|
|
|
360
449
|
runId,
|
|
361
450
|
});
|
|
362
451
|
},
|
|
363
|
-
dispatchSeam: async (ek, tk) => dispatchSeam(ek, tk),
|
|
452
|
+
dispatchSeam: async (ek, tk, attempt = 0) => dispatchSeam(ek, tk, attempt),
|
|
364
453
|
processMerge: async (acc, event) => processMergeFn(acc, event),
|
|
365
454
|
postActionWaitSeam: async (ek, tk) => postActionWaitSeam(ek, tk),
|
|
366
455
|
escalateOnce: async (ek, reason) => escalateOnce(ek, reason),
|
|
367
456
|
log,
|
|
457
|
+
// BAPI-442: teardown and Jira-transition seams (fail-open, optional).
|
|
458
|
+
teardownSeam: async (_ek, tk) => {
|
|
459
|
+
// Resolve the PR number for the ticket.
|
|
460
|
+
const prNumber = resolvePrNumber(tk);
|
|
461
|
+
if (prNumber === null) {
|
|
462
|
+
errorLog(`[epic-tick] teardown: no PR binding for ${tk}; skipping`);
|
|
463
|
+
return;
|
|
464
|
+
}
|
|
465
|
+
// Fetch setup to get the expected head SHA if available; fall back to empty.
|
|
466
|
+
let expectedSha = "";
|
|
467
|
+
try {
|
|
468
|
+
const setup = await fetchEffectiveSupervisorSetup(access, epic_key);
|
|
469
|
+
const binding = (setup.pr_bindings ?? {})[tk];
|
|
470
|
+
if (binding && typeof binding === "object") {
|
|
471
|
+
const b = binding;
|
|
472
|
+
if (typeof b.head_sha === "string")
|
|
473
|
+
expectedSha = b.head_sha;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
catch {
|
|
477
|
+
// Best-effort; proceed with empty SHA (endpoint still deletes by PR number)
|
|
478
|
+
}
|
|
479
|
+
try {
|
|
480
|
+
await deletePullRequestBranch(access, prNumber, expectedSha || "");
|
|
481
|
+
log(`[epic-tick] teardown: branch deleted for PR #${prNumber} (ticket=${tk})`);
|
|
482
|
+
}
|
|
483
|
+
catch (err) {
|
|
484
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "error";
|
|
485
|
+
errorLog(`[epic-tick] teardown: branch-delete failed (${safeMsg}) for ${tk}`);
|
|
486
|
+
}
|
|
487
|
+
// Remove local worktree idempotently; errors are benign.
|
|
488
|
+
try {
|
|
489
|
+
spawnSync("git", ["worktree", "remove", "--force", tk], { stdio: "ignore" });
|
|
490
|
+
log(`[epic-tick] teardown: worktree removed for ${tk}`);
|
|
491
|
+
}
|
|
492
|
+
catch {
|
|
493
|
+
// Already removed or never created — idempotent skip.
|
|
494
|
+
}
|
|
495
|
+
},
|
|
496
|
+
jiraTransitionSeam: async (_ek, tk) => {
|
|
497
|
+
try {
|
|
498
|
+
const result = await transitionJiraStatus(access, tk, "auto");
|
|
499
|
+
if (result.status === "skipped") {
|
|
500
|
+
log(`[epic-tick] jira-transition: no matching transition for ${tk} (skipped)`);
|
|
501
|
+
}
|
|
502
|
+
else {
|
|
503
|
+
log(`[epic-tick] jira-transition: transitioned ${tk}`);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
catch (err) {
|
|
507
|
+
const safeMsg = err instanceof Error ? err.constructor.name : "error";
|
|
508
|
+
errorLog(`[epic-tick] jira-transition failed (${safeMsg}) for ${tk}`);
|
|
509
|
+
}
|
|
510
|
+
},
|
|
511
|
+
// BAPI-441 remediation seams.
|
|
512
|
+
readWorkerLiveness: async (_ek, tk) => {
|
|
513
|
+
const runId = ticketRunIdMap.get(tk);
|
|
514
|
+
if (!runId)
|
|
515
|
+
return { alive: false, workerId: null };
|
|
516
|
+
return extractWorkerLiveness(localEvents, runId, nowFn(), livenessWindowSeconds);
|
|
517
|
+
},
|
|
518
|
+
remediateCas: async (ek, tk, attemptKind, reason) => {
|
|
519
|
+
const prNumber = resolvePrNumber(tk);
|
|
520
|
+
if (prNumber === null) {
|
|
521
|
+
throw new Error(`remediate: no PR binding for ${tk}`);
|
|
522
|
+
}
|
|
523
|
+
const reviewStatus = (await fetchPrReviewStatus(access, prNumber));
|
|
524
|
+
const headSha = reviewStatus?.detail?.head_sha ?? null;
|
|
525
|
+
if (!headSha) {
|
|
526
|
+
throw new Error(`remediate: no head_sha for PR ${prNumber}`);
|
|
527
|
+
}
|
|
528
|
+
const rowVersion = observed.ticket_row_versions.get(tk) ?? 0;
|
|
529
|
+
// Deterministic block-state idempotency key: stable for a given durable
|
|
530
|
+
// row_version so a same-tick retry replays (409, swallowed); advances
|
|
531
|
+
// with the next attempt.
|
|
532
|
+
const idempotencyKey = `remediate:${ek}:${tk}:${rowVersion}`;
|
|
533
|
+
const result = await remediateEpicTicket(access, {
|
|
534
|
+
pr_number: prNumber,
|
|
535
|
+
epic_run_id: ek,
|
|
536
|
+
ticket_key: tk,
|
|
537
|
+
expected_row_version: rowVersion,
|
|
538
|
+
head_sha: headSha,
|
|
539
|
+
idempotency_key: idempotencyKey,
|
|
540
|
+
attempt_kind: attemptKind,
|
|
541
|
+
reason,
|
|
542
|
+
});
|
|
543
|
+
if (result.conflict) {
|
|
544
|
+
return { conflict: true, reviewDigest: null, truncated: false };
|
|
545
|
+
}
|
|
546
|
+
return {
|
|
547
|
+
conflict: false,
|
|
548
|
+
reviewDigest: result.response.review_digest,
|
|
549
|
+
truncated: result.response.truncated,
|
|
550
|
+
};
|
|
551
|
+
},
|
|
552
|
+
sendNudge: async (_ek, tk, attempt, reviewDigest, truncated, reason, workerId) => {
|
|
553
|
+
const runId = ticketRunIdMap.get(tk);
|
|
554
|
+
if (!runId)
|
|
555
|
+
throw new Error(`nudge: no run_id for ${tk}`);
|
|
556
|
+
// workerId is resolved by readWorkerLiveness from the same heartbeat
|
|
557
|
+
// scan and null-checked by the reconcile pass before remediateCas, so
|
|
558
|
+
// the two seams stay consistent and no budget is burned on a missing id.
|
|
559
|
+
const input = buildSupervisorRemediationWorkerMessage({
|
|
560
|
+
runId,
|
|
561
|
+
workerId,
|
|
562
|
+
ticketKey: tk,
|
|
563
|
+
reason,
|
|
564
|
+
attempt,
|
|
565
|
+
reviewDigest: reviewDigest ?? "",
|
|
566
|
+
truncated,
|
|
567
|
+
causeSeq: maxSeqForRun(runId),
|
|
568
|
+
});
|
|
569
|
+
sendWorkerMessage(input);
|
|
570
|
+
},
|
|
571
|
+
resumeDispatch: async (ek, tk, attempt) => {
|
|
572
|
+
// Claim an attempt-scoped pending dispatch row FIRST so the spawn's
|
|
573
|
+
// run_spawned correlation (inside orchestrateStartTickets) has a row to
|
|
574
|
+
// transition and the re-dispatched run_id is durably recorded against
|
|
575
|
+
// the ticket. The claim is idempotent (lease-held/already-spawned are
|
|
576
|
+
// returned, not thrown).
|
|
577
|
+
await recordEpicDispatch(access, {
|
|
578
|
+
epicKey: ek,
|
|
579
|
+
ticketKey: tk,
|
|
580
|
+
planVersion: plan.plan_version,
|
|
581
|
+
leaseOwner: lease_owner,
|
|
582
|
+
ttlSeconds: DEFAULT_DISPATCH_KEY_TTL_SECONDS,
|
|
583
|
+
attempt,
|
|
584
|
+
});
|
|
585
|
+
// dispatchSeam returns the new run_id; orchestrate correlates it into
|
|
586
|
+
// the attempt-scoped epic_dispatch row, so the next tick's liveness map
|
|
587
|
+
// (built from the dispatch ledger) tracks the fresh worker.
|
|
588
|
+
await dispatchSeam(ek, tk, attempt);
|
|
589
|
+
},
|
|
368
590
|
};
|
|
369
|
-
const reconcileResult = await reconcileEpic(access, observed, plan, reconcileDeps);
|
|
591
|
+
const reconcileResult = await reconcileEpic(access, observed, plan, reconcileDeps, remediationConfig);
|
|
370
592
|
log(`[epic-tick] reconcile done: epic=${epic_key} ` +
|
|
371
593
|
`signals=${reconcileResult.signals_folded} ` +
|
|
372
594
|
`dispatched=${reconcileResult.dispatched} ` +
|
|
@@ -494,7 +716,7 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
494
716
|
const planHash = hashPlan(dag);
|
|
495
717
|
return { plan_hash: planHash, plan_version: response.plan_version, tickets };
|
|
496
718
|
};
|
|
497
|
-
const dispatchSeam = async (ek, tk) => {
|
|
719
|
+
const dispatchSeam = async (ek, tk, attempt = 0) => {
|
|
498
720
|
// Guard: fetchPlan must run before dispatchSeam so cachedPlanVersion and
|
|
499
721
|
// automationMap are populated. A zero version means the factory seam was
|
|
500
722
|
// wired but fetchPlan was never called — fail explicitly rather than silently
|
|
@@ -502,6 +724,12 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
502
724
|
if (cachedPlanVersion === 0) {
|
|
503
725
|
throw new Error(`dispatchSeam called before fetchPlan for epic ${ek} ticket ${tk}; cachedPlanVersion is 0`);
|
|
504
726
|
}
|
|
727
|
+
// BAPI-441: a remediation re-dispatch (attempt > 0) reuses the existing
|
|
728
|
+
// branch/worktree (resume mode) and claims an attempt-scoped dispatch key so
|
|
729
|
+
// it is not deduped against the original epic dispatch.
|
|
730
|
+
const isResume = attempt > 0;
|
|
731
|
+
// The dispatch kind comes from the plan node's automation (start-tickets or
|
|
732
|
+
// review-tickets); default to start-tickets when unspecified.
|
|
505
733
|
const kind = automationMap.get(tk) ?? "start-tickets";
|
|
506
734
|
// Operator dry-run: when BAPI_CONDUCTOR_DISPATCH_DRY_RUN=1, dispatch resolves
|
|
507
735
|
// the spawn command + model routing but opens NO terminal, creates NO worktree,
|
|
@@ -513,7 +741,7 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
513
741
|
epic_key: ek,
|
|
514
742
|
epic_run_id: ek,
|
|
515
743
|
plan_version: cachedPlanVersion,
|
|
516
|
-
dispatch_key: buildEpicDispatchKey(ek, tk, cachedPlanVersion),
|
|
744
|
+
dispatch_key: buildEpicDispatchKey(ek, tk, cachedPlanVersion, attempt),
|
|
517
745
|
};
|
|
518
746
|
const deps = createDefaultStartTicketsDeps();
|
|
519
747
|
let runId;
|
|
@@ -533,6 +761,14 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
533
761
|
runId = result.rows[0]?.runId;
|
|
534
762
|
}
|
|
535
763
|
else {
|
|
764
|
+
// BAPI-409 / IH-1: epic dispatch (dispatch_key set) requires the conductor
|
|
765
|
+
// stage to mint a run_id and provision per-worker env/supervisor context.
|
|
766
|
+
// `conductorEnabled: true` alone is necessary but not sufficient — the
|
|
767
|
+
// BAPI-409 guard in orchestrateStartTickets fails closed unless the
|
|
768
|
+
// createConductorContext seam (and its siblings) is injected via the third
|
|
769
|
+
// `overrides` argument, exactly as the packaged start-tickets CLI does. The
|
|
770
|
+
// orchestrator short-circuits on dryRun before using them, so passing them
|
|
771
|
+
// unconditionally is safe; dispatchDryRun preserves the operator dry-run seam.
|
|
536
772
|
const result = await orchestrateStartTickets(deps, {
|
|
537
773
|
keys: [tk],
|
|
538
774
|
epic: identity,
|
|
@@ -543,6 +779,13 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
543
779
|
refreshMain: false,
|
|
544
780
|
branchOverrides: {},
|
|
545
781
|
baseBranch: "main",
|
|
782
|
+
conductorEnabled: true,
|
|
783
|
+
// BAPI-441: re-dispatch reuses the existing branch/worktree.
|
|
784
|
+
resumeMode: isResume,
|
|
785
|
+
}, {
|
|
786
|
+
createConductorContext: createStartTicketsConductorContext,
|
|
787
|
+
provisionConductorHooksForRows,
|
|
788
|
+
emitStartTicketsRunStarted,
|
|
546
789
|
});
|
|
547
790
|
if (!result.ok) {
|
|
548
791
|
throw new Error(`start-tickets dispatch failed: ${result.error}`);
|
|
@@ -559,11 +802,50 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
559
802
|
}
|
|
560
803
|
return runId;
|
|
561
804
|
};
|
|
562
|
-
const fetchLocalEvents = (_ek) => {
|
|
805
|
+
const fetchLocalEvents = (_ek, runIds) => {
|
|
563
806
|
// Workers and the epic-tick process share the same local SQLite ledger
|
|
564
807
|
// (~/.config/bridge/events.db). pollConductorEvents opens it read-only.
|
|
565
|
-
|
|
566
|
-
|
|
808
|
+
//
|
|
809
|
+
// Scope the read to this epic's dispatched run_ids. The shared ledger holds
|
|
810
|
+
// events for every epic/worker on the machine (up to RETENTION_MAX_ROWS),
|
|
811
|
+
// but rebuildObservedState only folds signals whose run_id maps to one of
|
|
812
|
+
// these dispatches — so the run_ids filter pushes that scoping into SQL and
|
|
813
|
+
// avoids loading sibling-epic events on every tick. With no known run_ids
|
|
814
|
+
// (first tick before any dispatch) there is nothing to fold, so skip the
|
|
815
|
+
// read entirely.
|
|
816
|
+
//
|
|
817
|
+
// pollConductorEvents returns at most POLL_LIMIT_MAX events per call
|
|
818
|
+
// (default 100, capped at 1000) starting at `since_seq`. rebuildObservedState
|
|
819
|
+
// folds terminal signals (gate.met/run.stopped/merge.succeeded/ci.failed)
|
|
820
|
+
// ONLY from the events it is handed, so a single capped page silently hides
|
|
821
|
+
// recent terminal signals once the (scoped) result grows past one page —
|
|
822
|
+
// done-detection then breaks. Drain the COMPLETE history by paginating on the
|
|
823
|
+
// `next_seq` cursor until a short (or empty) page signals the tail.
|
|
824
|
+
if (runIds !== undefined && runIds.length === 0) {
|
|
825
|
+
return [];
|
|
826
|
+
}
|
|
827
|
+
const runIdsFilter = runIds && runIds.length > 0 ? { run_ids: [...runIds] } : undefined;
|
|
828
|
+
const events = [];
|
|
829
|
+
let sinceSeq = 1;
|
|
830
|
+
// Retention caps (retention_days/retention_max_rows) bound the ledger, but
|
|
831
|
+
// cap total iterations defensively against a non-advancing cursor.
|
|
832
|
+
const MAX_PAGES = 10_000;
|
|
833
|
+
for (let page = 0; page < MAX_PAGES; page += 1) {
|
|
834
|
+
const result = pollConductorEvents({
|
|
835
|
+
data_mode: "full",
|
|
836
|
+
since_seq: sinceSeq,
|
|
837
|
+
limit: POLL_LIMIT_MAX,
|
|
838
|
+
filter: runIdsFilter,
|
|
839
|
+
});
|
|
840
|
+
events.push(...result.events);
|
|
841
|
+
// Stop on a short/empty page (no more rows) or a cursor that fails to
|
|
842
|
+
// advance (guards against an infinite loop).
|
|
843
|
+
if (result.count < POLL_LIMIT_MAX || result.next_seq <= sinceSeq) {
|
|
844
|
+
break;
|
|
845
|
+
}
|
|
846
|
+
sinceSeq = result.next_seq;
|
|
847
|
+
}
|
|
848
|
+
return events;
|
|
567
849
|
};
|
|
568
850
|
const escalateOnce = async (ek, reason) => {
|
|
569
851
|
const candidate = {
|
|
@@ -618,5 +900,9 @@ export async function buildProductionEpicRuntimeDeps(epicKey) {
|
|
|
618
900
|
fetchLocalEvents,
|
|
619
901
|
escalateOnce,
|
|
620
902
|
postActionWaitSeam,
|
|
903
|
+
// BAPI-442 seams are wired at the reconcileDeps level inside runEpicTick
|
|
904
|
+
// (they need the per-tick `access` and `prBindings` closure). The factory
|
|
905
|
+
// returns the dispatchSeam with isReReview support; the other two seams are
|
|
906
|
+
// defined inline in the reconcileDeps object in runEpicTick.
|
|
621
907
|
};
|
|
622
908
|
}
|
|
@@ -1,44 +1,63 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Pure, deterministic observed-state rebuild + ready-set computation for the
|
|
3
|
-
* Epic Supervisor (BAPI-408).
|
|
3
|
+
* Epic Supervisor (BAPI-408, BAPI-436).
|
|
4
4
|
*
|
|
5
5
|
* This module has NO I/O, NO timers, and NO LLM calls. Every time-dependent
|
|
6
6
|
* function takes an explicit `now` (epoch ms) so tests are wall-clock
|
|
7
7
|
* independent. Truth precedence: raw local ledger events override non-terminal
|
|
8
8
|
* Postgres states.
|
|
9
|
+
*
|
|
10
|
+
* Status mapping (BAPI-436 — merge-gated dependent dispatch):
|
|
11
|
+
* gate.met → ready_for_review (implementation done; awaiting merge)
|
|
12
|
+
* run.stopped → ready_for_review (worker session ended; awaiting merge)
|
|
13
|
+
* merge.succeeded → done (merged; dependents may now dispatch)
|
|
14
|
+
* ci.failed → blocked (unchanged)
|
|
9
15
|
*/
|
|
10
16
|
// ---------------------------------------------------------------------------
|
|
11
17
|
// Status sets (module-private)
|
|
12
18
|
// ---------------------------------------------------------------------------
|
|
13
19
|
const NOT_STARTED_STATUS = "planned";
|
|
14
20
|
const DONE_STATUSES = new Set(["done"]);
|
|
21
|
+
// "blocked" is intentionally non-terminal: a merge.succeeded arriving in a
|
|
22
|
+
// subsequent tick (cross-tick) can still advance a blocked ticket to done.
|
|
23
|
+
// This is the expected path when an operator merges a PR despite failed CI.
|
|
24
|
+
// Contrast with the same-tick case: ci.failed wins over merge.succeeded in
|
|
25
|
+
// the same ledger batch (first-signal-wins; see the foldedTicketKeys guard).
|
|
15
26
|
const NON_TERMINAL_STATUSES = new Set([
|
|
16
27
|
"planned",
|
|
17
28
|
"ready",
|
|
18
29
|
"dispatched",
|
|
19
30
|
"running",
|
|
20
31
|
"blocked",
|
|
32
|
+
"ready_for_review",
|
|
21
33
|
]);
|
|
22
34
|
const TERMINAL_SIGNAL_TYPES = new Set([
|
|
23
35
|
"gate.met",
|
|
24
36
|
"merge.succeeded",
|
|
25
37
|
"ci.failed",
|
|
26
38
|
"run.stopped",
|
|
39
|
+
"review.changes_requested",
|
|
27
40
|
]);
|
|
28
41
|
function isNonTerminal(status) {
|
|
29
42
|
return NON_TERMINAL_STATUSES.has(status);
|
|
30
43
|
}
|
|
31
|
-
function signalToNextStatus(signalType) {
|
|
44
|
+
export function signalToNextStatus(signalType) {
|
|
32
45
|
if (signalType === "ci.failed")
|
|
33
46
|
return "blocked";
|
|
34
|
-
|
|
47
|
+
if (signalType === "review.changes_requested")
|
|
48
|
+
return "blocked";
|
|
49
|
+
if (signalType === "merge.succeeded")
|
|
50
|
+
return "done";
|
|
51
|
+
return "ready_for_review"; // gate.met and run.stopped: awaiting merge
|
|
35
52
|
}
|
|
36
53
|
// ---------------------------------------------------------------------------
|
|
37
54
|
// computeReadySet
|
|
38
55
|
// ---------------------------------------------------------------------------
|
|
39
56
|
/**
|
|
40
57
|
* Pure deterministic ready-set computation. Returns ticket keys that:
|
|
41
|
-
* 1. Have status "planned" (not yet started)
|
|
58
|
+
* 1. Have status "planned" (not yet started) or "ready" (crash-recovery —
|
|
59
|
+
* a ticket already advanced to ready on a prior tick that crashed before
|
|
60
|
+
* dispatch must not be silently dropped), AND
|
|
42
61
|
* 2. Whose full `depends_on` list is satisfied (all deps have "done" status).
|
|
43
62
|
*
|
|
44
63
|
* Never calls an LLM or performs I/O. Goal 8 invariant.
|
|
@@ -47,7 +66,7 @@ export function computeReadySet(plan, ticketStatuses) {
|
|
|
47
66
|
const ready = [];
|
|
48
67
|
for (const ticket of plan.tickets) {
|
|
49
68
|
const currentStatus = ticketStatuses.get(ticket.ticket_key) ?? "planned";
|
|
50
|
-
if (currentStatus !== NOT_STARTED_STATUS)
|
|
69
|
+
if (currentStatus !== NOT_STARTED_STATUS && currentStatus !== "ready")
|
|
51
70
|
continue;
|
|
52
71
|
const allDepsResolved = ticket.depends_on.every((dep) => DONE_STATUSES.has(ticketStatuses.get(dep) ?? "planned"));
|
|
53
72
|
if (allDepsResolved)
|
|
@@ -55,6 +74,46 @@ export function computeReadySet(plan, ticketStatuses) {
|
|
|
55
74
|
}
|
|
56
75
|
return ready;
|
|
57
76
|
}
|
|
77
|
+
/**
|
|
78
|
+
* Pure, deterministic remediation decision (no I/O, no clock). Given the current
|
|
79
|
+
* budget counters, the ledger-derived worker liveness, and the configured
|
|
80
|
+
* ceilings, decide whether to NUDGE (worker still alive), RE-DISPATCH (worker
|
|
81
|
+
* gone, budget remaining), or ESCALATE (budget exhausted).
|
|
82
|
+
*
|
|
83
|
+
* Budget exhaustion is checked FIRST so an at-ceiling ticket always escalates
|
|
84
|
+
* regardless of liveness. A counter at-or-above its ceiling exhausts the budget.
|
|
85
|
+
*/
|
|
86
|
+
export function decideRemediation(attempts, noProgressAttempts, alive, config) {
|
|
87
|
+
if (attempts >= config.max_remediation_attempts ||
|
|
88
|
+
noProgressAttempts >= config.max_remediation_no_progress_attempts) {
|
|
89
|
+
return "escalate";
|
|
90
|
+
}
|
|
91
|
+
return alive ? "nudge" : "redispatch";
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Pure liveness extraction (no I/O, explicit `nowMs`). Finds the most recent
|
|
95
|
+
* `message.delivered`/`message.acked` heartbeat event for `runId` and reports
|
|
96
|
+
* the worker alive when that heartbeat's age is within
|
|
97
|
+
* `windowSeconds`. An empty ledger (no heartbeat for the run) defaults to
|
|
98
|
+
* `{ alive: false, workerId: null }` (fail-closed: never misjudge a worker
|
|
99
|
+
* alive without evidence).
|
|
100
|
+
*/
|
|
101
|
+
export function extractWorkerLiveness(events, runId, nowMs, windowSeconds) {
|
|
102
|
+
let latest = null;
|
|
103
|
+
for (const ev of events) {
|
|
104
|
+
if (ev.run_id !== runId)
|
|
105
|
+
continue;
|
|
106
|
+
if (ev.type !== "message.delivered" && ev.type !== "message.acked")
|
|
107
|
+
continue;
|
|
108
|
+
if (!latest || new Date(ev.time).getTime() > new Date(latest.time).getTime()) {
|
|
109
|
+
latest = ev;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (!latest)
|
|
113
|
+
return { alive: false, workerId: null };
|
|
114
|
+
const age = nowMs - new Date(latest.time).getTime();
|
|
115
|
+
return { alive: age <= windowSeconds * 1000, workerId: latest.worker_id ?? null };
|
|
116
|
+
}
|
|
58
117
|
// ---------------------------------------------------------------------------
|
|
59
118
|
// rebuildObservedState
|
|
60
119
|
// ---------------------------------------------------------------------------
|
|
@@ -75,14 +134,23 @@ export function rebuildObservedState(postgresState, events, _now) {
|
|
|
75
134
|
// Populate base maps from Postgres
|
|
76
135
|
const ticketStatusMap = new Map();
|
|
77
136
|
const ticketRowVersionMap = new Map();
|
|
137
|
+
const ticketRemediationMap = new Map();
|
|
78
138
|
for (const ts of ticket_statuses) {
|
|
79
139
|
ticketStatusMap.set(ts.ticket_key, ts.status);
|
|
80
140
|
ticketRowVersionMap.set(ts.ticket_key, ts.row_version);
|
|
141
|
+
ticketRemediationMap.set(ts.ticket_key, {
|
|
142
|
+
attempts: ts.remediation_attempts ?? 0,
|
|
143
|
+
no_progress: ts.remediation_no_progress_attempts ?? 0,
|
|
144
|
+
});
|
|
81
145
|
}
|
|
82
146
|
const unfoldedSignals = [];
|
|
83
147
|
const pendingMergeEvents = [];
|
|
84
148
|
// Track which tickets already have a folded signal (one override per ticket)
|
|
85
149
|
const foldedTicketKeys = new Set();
|
|
150
|
+
// BAPI-441: per-ticket latest blocking reason (ci.failed / review.changes_requested),
|
|
151
|
+
// tracked across the full ledger so an already-blocked ticket still carries a
|
|
152
|
+
// reason for the remediation pass to frame the nudge.
|
|
153
|
+
const ticketBlockedReasons = new Map();
|
|
86
154
|
for (const event of events) {
|
|
87
155
|
if (!TERMINAL_SIGNAL_TYPES.has(event.type))
|
|
88
156
|
continue;
|
|
@@ -91,16 +159,45 @@ export function rebuildObservedState(postgresState, events, _now) {
|
|
|
91
159
|
const ticketKey = runId ? runIdToTicketKey.get(runId) : undefined;
|
|
92
160
|
if (!ticketKey)
|
|
93
161
|
continue;
|
|
94
|
-
|
|
95
|
-
|
|
162
|
+
// Record the blocking reason (latest wins; events are seq-ordered) regardless
|
|
163
|
+
// of fold state, so a ticket blocked on a prior tick still resolves a reason.
|
|
164
|
+
if (event.type === "ci.failed" || event.type === "review.changes_requested") {
|
|
165
|
+
ticketBlockedReasons.set(ticketKey, event.type);
|
|
96
166
|
}
|
|
97
167
|
const postgresStatus = ticketStatusMap.get(ticketKey) ?? "planned";
|
|
98
168
|
if (!isNonTerminal(postgresStatus))
|
|
99
169
|
continue;
|
|
100
|
-
if
|
|
101
|
-
|
|
170
|
+
// Only queue for merge actioning if this ticket hasn't already been folded
|
|
171
|
+
// this tick. Without this guard, two gate.met events for the same ticket
|
|
172
|
+
// would both enqueue, and a ci.failed → gate.met sequence would enqueue a
|
|
173
|
+
// merge action for a ticket whose effective status is "blocked".
|
|
174
|
+
if (event.type === "gate.met" && !foldedTicketKeys.has(ticketKey)) {
|
|
175
|
+
pendingMergeEvents.push(event);
|
|
176
|
+
}
|
|
102
177
|
const signalType = event.type;
|
|
103
178
|
const nextStatus = signalToNextStatus(signalType);
|
|
179
|
+
if (foldedTicketKeys.has(ticketKey)) {
|
|
180
|
+
// Allow a same-tick upgrade from ready_for_review → done when
|
|
181
|
+
// merge.succeeded arrives after gate.met in the same ledger batch.
|
|
182
|
+
// First-signal-wins for everything else: if ci.failed arrived first,
|
|
183
|
+
// currentLocalStatus is "blocked" (not "ready_for_review"), so the
|
|
184
|
+
// upgrade guard below is false and merge.succeeded is intentionally
|
|
185
|
+
// dropped — a failed-CI ticket must not be silently advanced to done.
|
|
186
|
+
const currentLocalStatus = ticketStatusMap.get(ticketKey);
|
|
187
|
+
if (currentLocalStatus === "ready_for_review" && nextStatus === "done") {
|
|
188
|
+
const existingIdx = unfoldedSignals.findIndex((s) => s.ticket_key === ticketKey);
|
|
189
|
+
if (existingIdx >= 0) {
|
|
190
|
+
unfoldedSignals[existingIdx] = {
|
|
191
|
+
...unfoldedSignals[existingIdx],
|
|
192
|
+
next_status: nextStatus,
|
|
193
|
+
signal_type: signalType,
|
|
194
|
+
event,
|
|
195
|
+
};
|
|
196
|
+
ticketStatusMap.set(ticketKey, nextStatus);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
continue;
|
|
200
|
+
}
|
|
104
201
|
const rowVersion = ticketRowVersionMap.get(ticketKey) ?? 0;
|
|
105
202
|
unfoldedSignals.push({
|
|
106
203
|
ticket_key: ticketKey,
|
|
@@ -119,6 +216,8 @@ export function rebuildObservedState(postgresState, events, _now) {
|
|
|
119
216
|
plan_version: epic_run.current_plan_version,
|
|
120
217
|
ticket_statuses: ticketStatusMap,
|
|
121
218
|
ticket_row_versions: ticketRowVersionMap,
|
|
219
|
+
ticket_remediation_counters: ticketRemediationMap,
|
|
220
|
+
ticket_blocked_reasons: ticketBlockedReasons,
|
|
122
221
|
unfolded_terminal_signals: unfoldedSignals,
|
|
123
222
|
pending_merge_events: pendingMergeEvents,
|
|
124
223
|
};
|
|
@@ -25,8 +25,14 @@ export const GIT_CI_PRODUCER = "git-pr-ci-producer";
|
|
|
25
25
|
export const GIT_HOOK_PRODUCER = "git-hook";
|
|
26
26
|
/** The single v1 done-gate condition type. */
|
|
27
27
|
export const REQUIRED_CI_CHECKS_GREEN = "required_ci_checks_green";
|
|
28
|
+
/** The v2 review-state done-gate condition type. */
|
|
29
|
+
export const REVIEW_STATE = "review_state";
|
|
28
30
|
/** Default gate name surfaced in `gate.met` event data. */
|
|
29
31
|
export const DEFAULT_GATE_NAME = "done";
|
|
32
|
+
/** Event type emitted when the configured review source is satisfied. */
|
|
33
|
+
export const REVIEW_PASSED = "review.passed";
|
|
34
|
+
/** Event type emitted when a reviewer requests changes. */
|
|
35
|
+
export const REVIEW_CHANGES_REQUESTED = "review.changes_requested";
|
|
30
36
|
/** Matches ASCII control characters (C0 range plus DEL). */
|
|
31
37
|
const CONTROL_CHAR_RE = /[\u0000-\u001F\u007F]/;
|
|
32
38
|
/** Matches a 40- or 64-character hex string (git SHA-1 / SHA-256 object ids). */
|