baxian 1.2.25 → 1.2.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/bootstrap-poller.d.ts.map +1 -1
- package/dist/agent/bootstrap-poller.js +0 -11
- package/dist/agent/bootstrap-poller.js.map +1 -1
- package/dist/agent/bootstrap.d.ts.map +1 -1
- package/dist/agent/bootstrap.js +0 -42
- package/dist/agent/bootstrap.js.map +1 -1
- package/dist/agent/diff-split.d.ts.map +1 -1
- package/dist/agent/diff-split.js +0 -4
- package/dist/agent/diff-split.js.map +1 -1
- package/dist/agent/image-input.d.ts.map +1 -1
- package/dist/agent/image-input.js +0 -3
- package/dist/agent/image-input.js.map +1 -1
- package/dist/agent/liveness.d.ts.map +1 -1
- package/dist/agent/liveness.js +0 -3
- package/dist/agent/liveness.js.map +1 -1
- package/dist/agent/manager.d.ts +0 -4
- package/dist/agent/manager.d.ts.map +1 -1
- package/dist/agent/manager.js +9 -951
- package/dist/agent/manager.js.map +1 -1
- package/dist/agent/pane-streamer-manager.d.ts.map +1 -1
- package/dist/agent/pane-streamer-manager.js +0 -9
- package/dist/agent/pane-streamer-manager.js.map +1 -1
- package/dist/agent/pane-streamer.d.ts.map +1 -1
- package/dist/agent/pane-streamer.js +2 -27
- package/dist/agent/pane-streamer.js.map +1 -1
- package/dist/agent/pet-input.d.ts.map +1 -1
- package/dist/agent/pet-input.js +0 -3
- package/dist/agent/pet-input.js.map +1 -1
- package/dist/agent/phase-signal-watcher.d.ts +0 -6
- package/dist/agent/phase-signal-watcher.d.ts.map +1 -1
- package/dist/agent/phase-signal-watcher.js +0 -41
- package/dist/agent/phase-signal-watcher.js.map +1 -1
- package/dist/agent/phase-signal.d.ts.map +1 -1
- package/dist/agent/phase-signal.js +1 -18
- package/dist/agent/phase-signal.js.map +1 -1
- package/dist/agent/preflight.d.ts.map +1 -1
- package/dist/agent/preflight.js +0 -12
- package/dist/agent/preflight.js.map +1 -1
- package/dist/agent/prompt.d.ts +0 -4
- package/dist/agent/prompt.d.ts.map +1 -1
- package/dist/agent/prompt.js +0 -66
- package/dist/agent/prompt.js.map +1 -1
- package/dist/agent/repo-store.d.ts.map +1 -1
- package/dist/agent/repo-store.js +0 -21
- package/dist/agent/repo-store.js.map +1 -1
- package/dist/agent/review-transport.d.ts +0 -1
- package/dist/agent/review-transport.d.ts.map +1 -1
- package/dist/agent/review-transport.js +0 -16
- package/dist/agent/review-transport.js.map +1 -1
- package/dist/agent/runner.d.ts +0 -3
- package/dist/agent/runner.d.ts.map +1 -1
- package/dist/agent/runner.js +0 -40
- package/dist/agent/runner.js.map +1 -1
- package/dist/agent/tmux-probe-poller.d.ts.map +1 -1
- package/dist/agent/tmux-probe-poller.js +1 -20
- package/dist/agent/tmux-probe-poller.js.map +1 -1
- package/dist/agent/tmux.d.ts +0 -1
- package/dist/agent/tmux.d.ts.map +1 -1
- package/dist/agent/tmux.js +6 -45
- package/dist/agent/tmux.js.map +1 -1
- package/dist/agent/worktree.d.ts.map +1 -1
- package/dist/agent/worktree.js +0 -9
- package/dist/agent/worktree.js.map +1 -1
- package/dist/api/agents.d.ts.map +1 -1
- package/dist/api/agents.js +0 -5
- package/dist/api/agents.js.map +1 -1
- package/dist/api/config.d.ts.map +1 -1
- package/dist/api/config.js +0 -16
- package/dist/api/config.js.map +1 -1
- package/dist/api/hosts.d.ts.map +1 -1
- package/dist/api/hosts.js +0 -37
- package/dist/api/hosts.js.map +1 -1
- package/dist/api/pets.d.ts.map +1 -1
- package/dist/api/pets.js +0 -5
- package/dist/api/pets.js.map +1 -1
- package/dist/api/probe.d.ts.map +1 -1
- package/dist/api/probe.js +0 -5
- package/dist/api/probe.js.map +1 -1
- package/dist/api/projects.d.ts.map +1 -1
- package/dist/api/projects.js +0 -40
- package/dist/api/projects.js.map +1 -1
- package/dist/api/tasks.d.ts.map +1 -1
- package/dist/api/tasks.js +0 -14
- package/dist/api/tasks.js.map +1 -1
- package/dist/app.d.ts +0 -3
- package/dist/app.d.ts.map +1 -1
- package/dist/app.js +0 -14
- package/dist/app.js.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +0 -13
- package/dist/cli.js.map +1 -1
- package/dist/config/hot-reload.d.ts.map +1 -1
- package/dist/config/hot-reload.js +0 -3
- package/dist/config/hot-reload.js.map +1 -1
- package/dist/config/loader.d.ts +0 -27
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/loader.js +0 -56
- package/dist/config/loader.js.map +1 -1
- package/dist/config/normalizer.js +0 -1
- package/dist/config/normalizer.js.map +1 -1
- package/dist/config/validator.d.ts.map +1 -1
- package/dist/config/validator.js +0 -16
- package/dist/config/validator.js.map +1 -1
- package/dist/event/broker.d.ts.map +1 -1
- package/dist/event/broker.js +0 -2
- package/dist/event/broker.js.map +1 -1
- package/dist/event/bus.d.ts.map +1 -1
- package/dist/event/bus.js +0 -2
- package/dist/event/bus.js.map +1 -1
- package/dist/event/handlers.d.ts.map +1 -1
- package/dist/event/handlers.js +2 -150
- package/dist/event/handlers.js.map +1 -1
- package/dist/event/publish.d.ts.map +1 -1
- package/dist/event/publish.js +0 -8
- package/dist/event/publish.js.map +1 -1
- package/dist/event/server-handlers.d.ts.map +1 -1
- package/dist/event/server-handlers.js +0 -80
- package/dist/event/server-handlers.js.map +1 -1
- package/dist/event/ws.d.ts.map +1 -1
- package/dist/event/ws.js +0 -5
- package/dist/event/ws.js.map +1 -1
- package/dist/github/mapper.d.ts.map +1 -1
- package/dist/github/mapper.js +0 -13
- package/dist/github/mapper.js.map +1 -1
- package/dist/github/poller.d.ts.map +1 -1
- package/dist/github/poller.js +0 -15
- package/dist/github/poller.js.map +1 -1
- package/dist/github/resolver.d.ts.map +1 -1
- package/dist/github/resolver.js +0 -1
- package/dist/github/resolver.js.map +1 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +0 -26
- package/dist/index.js.map +1 -1
- package/dist/lifecycle/restart-sentinel.d.ts.map +1 -1
- package/dist/lifecycle/restart-sentinel.js +0 -1
- package/dist/lifecycle/restart-sentinel.js.map +1 -1
- package/dist/lifecycle/restart.d.ts +0 -1
- package/dist/lifecycle/restart.d.ts.map +1 -1
- package/dist/lifecycle/restart.js +0 -3
- package/dist/lifecycle/restart.js.map +1 -1
- package/dist/shared/constants.d.ts.map +1 -1
- package/dist/shared/constants.js +0 -19
- package/dist/shared/constants.js.map +1 -1
- package/dist/shared/git-url.d.ts +0 -2
- package/dist/shared/git-url.d.ts.map +1 -1
- package/dist/shared/git-url.js +0 -21
- package/dist/shared/git-url.js.map +1 -1
- package/dist/shared/types.d.ts +0 -54
- package/dist/shared/types.d.ts.map +1 -1
- package/dist/shared/types.js.map +1 -1
- package/dist/skill/registry.d.ts.map +1 -1
- package/dist/skill/registry.js +3 -25
- package/dist/skill/registry.js.map +1 -1
- package/dist/state/agent-store.d.ts +0 -1
- package/dist/state/agent-store.d.ts.map +1 -1
- package/dist/state/agent-store.js +0 -5
- package/dist/state/agent-store.js.map +1 -1
- package/dist/state/error-record-store.d.ts.map +1 -1
- package/dist/state/error-record-store.js +1 -27
- package/dist/state/error-record-store.js.map +1 -1
- package/dist/state/lock.d.ts.map +1 -1
- package/dist/state/lock.js +1 -6
- package/dist/state/lock.js.map +1 -1
- package/dist/state/pet-store.d.ts.map +1 -1
- package/dist/state/pet-store.js +0 -10
- package/dist/state/pet-store.js.map +1 -1
- package/dist/state/post-approve-store.d.ts.map +1 -1
- package/dist/state/post-approve-store.js +0 -1
- package/dist/state/post-approve-store.js.map +1 -1
- package/dist/state/process-lock.d.ts +0 -2
- package/dist/state/process-lock.d.ts.map +1 -1
- package/dist/state/process-lock.js +0 -5
- package/dist/state/process-lock.js.map +1 -1
- package/dist/state/review-store.d.ts.map +1 -1
- package/dist/state/review-store.js +0 -3
- package/dist/state/review-store.js.map +1 -1
- package/dist/state/snapshot.d.ts.map +1 -1
- package/dist/state/snapshot.js +0 -19
- package/dist/state/snapshot.js.map +1 -1
- package/dist/state/task-store.d.ts.map +1 -1
- package/dist/state/task-store.js +0 -4
- package/dist/state/task-store.js.map +1 -1
- package/dist/terminal/attach.d.ts.map +1 -1
- package/dist/terminal/attach.js +0 -12
- package/dist/terminal/attach.js.map +1 -1
- package/dist/terminal/key-sanitizer.d.ts.map +1 -1
- package/dist/terminal/key-sanitizer.js +0 -2
- package/dist/terminal/key-sanitizer.js.map +1 -1
- package/dist/terminal/stream-ws.d.ts.map +1 -1
- package/dist/terminal/stream-ws.js +0 -5
- package/dist/terminal/stream-ws.js.map +1 -1
- package/dist/timing/backoff.d.ts.map +1 -1
- package/dist/timing/backoff.js.map +1 -1
- package/dist/web/assets/index-DvVOHfHm.css +1 -0
- package/dist/web/index.html +2 -2
- package/package.json +2 -2
- package/dist/web/assets/index-DuER3MFf.css +0 -1
- /package/dist/web/assets/{index-B94unmLb.js → index-BFsxrk2C.js} +0 -0
package/dist/agent/manager.js
CHANGED
|
@@ -48,7 +48,6 @@ export class DispatchTerminalError extends Error {
|
|
|
48
48
|
this.name = 'DispatchTerminalError';
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
|
-
// shellQuote model/addDirs: spliced into a tmux command line; unquoted values allow injection.
|
|
52
51
|
export function buildLaunchCommand(agent) {
|
|
53
52
|
const segments = [];
|
|
54
53
|
switch (agent.runtime) {
|
|
@@ -74,41 +73,17 @@ function agentRuntimeKindFor(agent) {
|
|
|
74
73
|
}
|
|
75
74
|
const DEFAULT_DISPATCH_ACK_TIMEOUT_MS = 30_000;
|
|
76
75
|
const DEFAULT_DISPATCH_SETTLE_TIMEOUT_MS = 3_000;
|
|
77
|
-
// Dev-facing deliverable phases that weave the task's uploaded image paths into the prompt.
|
|
78
76
|
const IMAGE_DISPATCH_PHASES = new Set(['develop', 'code', 'fix', 'server-feedback']);
|
|
79
|
-
// Pane grabs taken to decide live-turn vs static composer; (N-1) * runtimeLivenessProbeMs must exceed 1s.
|
|
80
77
|
const RUNTIME_LIVENESS_SAMPLES = 3;
|
|
81
78
|
export function canDispatchWithBinding(binding) {
|
|
82
79
|
return !binding?.taskId && !binding?.creationToken && binding?.status !== 'awaiting_human';
|
|
83
80
|
}
|
|
84
|
-
// 部分 awaiting phase 表示 agent 这一轮 turn 已跑完,绑定是 stale 的——即使 task 不 terminal
|
|
85
|
-
// 也应 release 让 agent 被下一轮 acquire。outcome handler (review.submitted) 走 allowAwaitingHuman 即可。
|
|
86
|
-
//
|
|
87
|
-
// 当前集合为空:先前包含的 'dev-wait-gate-failed-after-qa-started' 和 'dispatch-failed:ack_unknown'
|
|
88
|
-
// 语义都是"QA prompt 已粘贴,可能仍在 pane 中跑"——任何在 outcome 到达前的 release(含 resumeAgent /
|
|
89
|
-
// recover 路径)都可能让第二个 prompt 派进同 pane 与旧 turn 混在一起。outcome handler 通过显式
|
|
90
|
-
// allowAwaitingHuman:true release,gate 单点放行。
|
|
91
81
|
const TURN_COMPLETED_AWAITING_PHASES = new Set();
|
|
92
|
-
// Pane is stopped (interrupt landed) but its session was not cleared: cancel is mid /clear
|
|
93
|
-
// (`cancel-clearing`) or /clear was unconfirmed (`cancel-clear-failed`). Resume can't fix it (it doesn't
|
|
94
|
-
// /clear), so these are DELETE-only: shouldReleaseHeldBinding returns false → recover()/escape/Resume all
|
|
95
|
-
// refuse. Persisted, so the protection survives a restart mid-cleanup.
|
|
96
82
|
const UNCLEARED_PANE_PHASES = new Set(['cancel-clearing', 'cancel-clear-failed']);
|
|
97
|
-
// All cancel-cleanup holds (the un-cleared ones plus `cancel-interrupt-failed`, where the interrupt failed
|
|
98
|
-
// so the pane may still be running the cancelled task). None may be AUTO-released — not by recover(), not by
|
|
99
|
-
// a terminal-task escape, not even by an allowAwaitingHuman caller — because that would reuse the cancelled
|
|
100
|
-
// session. Only cancel's own confirmed-/clear release (fromCancelCleanup) frees one automatically; the
|
|
101
|
-
// operator recovers via Resume (cancel-interrupt-failed only, after verifying) or DELETE (any).
|
|
102
83
|
const CANCEL_CLEANUP_HOLD_PHASES = new Set([...UNCLEARED_PANE_PHASES, 'cancel-interrupt-failed']);
|
|
103
|
-
// The cancel flow owns a binding in a cancel-cleanup hold: NO dispatch-failure cleanup may wipe it or
|
|
104
|
-
// overwrite it (else cancel's Phase 2 /clear is skipped and an un-cleared pane is reused). Every binding-wipe
|
|
105
|
-
// path must check this — releaseAgentForTask, rollbackFailedDispatch, startSession cleanup, markAwaitingHuman.
|
|
106
84
|
function isCancelCleanupHold(binding) {
|
|
107
85
|
return binding?.awaitingPhase != null && CANCEL_CLEANUP_HOLD_PHASES.has(binding.awaitingPhase);
|
|
108
86
|
}
|
|
109
|
-
// Cancel-cleanup phases escalate monotonically — a more-locked phase must never be downgraded. cancel-clear-failed
|
|
110
|
-
// (DELETE-only: /clear unconfirmed) outranks cancel-interrupt-failed (Resume-able) and the transient cancel-clearing,
|
|
111
|
-
// so a re-entrant terminal cleanup can't soften "un-cleared, DELETE-only" into a hold Resume would reuse.
|
|
112
87
|
const CANCEL_CLEANUP_PHASE_RANK = {
|
|
113
88
|
'cancel-clearing': 1,
|
|
114
89
|
'cancel-interrupt-failed': 2,
|
|
@@ -120,14 +95,8 @@ function cancelPhaseRank(phase) {
|
|
|
120
95
|
function cancelPhaseDowngrades(prev, next) {
|
|
121
96
|
return cancelPhaseRank(next) < cancelPhaseRank(prev);
|
|
122
97
|
}
|
|
123
|
-
// A prompt line still holding the typed `/clear` (e.g. `❯ /clear`, `› /clear`) = the Enter was swallowed,
|
|
124
|
-
// so /clear was never submitted. After a real submission /clear wipes the screen and the composer is empty.
|
|
125
98
|
const CLEAR_PENDING_IN_COMPOSER_RE = /(?:^|\n)[ \t]*[❯>›→][ \t]*\/clear\b/;
|
|
126
|
-
// A greeting capability failure is NOT cleared by a plain Resume or by recover()'s auto-release:
|
|
127
|
-
// the agent must re-prove it can signal (restart/retry re-runs the handshake). Auto-releasing it
|
|
128
|
-
// would slip an unverified agent back to dispatchable, defeating the whole bootstrap gate.
|
|
129
99
|
const REGREET_REQUIRED_HOLD_PHASES = new Set(['greeting_failed']);
|
|
130
|
-
// Resume / recover 共用:决定 Held agent 的 binding 是否随状态恢复一起清掉。
|
|
131
100
|
export function shouldReleaseHeldBinding(state, boundTask) {
|
|
132
101
|
if (state.awaitingPhase != null && UNCLEARED_PANE_PHASES.has(state.awaitingPhase))
|
|
133
102
|
return false;
|
|
@@ -155,16 +124,12 @@ export class AgentManager {
|
|
|
155
124
|
reviewTransportInstance;
|
|
156
125
|
dispatchAckTimeoutMs;
|
|
157
126
|
dispatchSettleTimeoutMs;
|
|
158
|
-
// Re-send Enter after this long of continuous post-paste idle — recovers a swallowed first Enter
|
|
159
|
-
// without risking a double-submit (a real submit goes busy well within this window).
|
|
160
127
|
dispatchAckResendIntervalMs = 3_000;
|
|
161
128
|
taskMutationQueue = Promise.resolve();
|
|
162
129
|
agentIndex;
|
|
163
130
|
platformRunner;
|
|
164
131
|
imageStagingRoot;
|
|
165
132
|
bootstrapTimeoutsMs;
|
|
166
|
-
// Bootstrap greeting handshake: total attempts before holding the agent, to absorb a
|
|
167
|
-
// single transient slow/garbled reply without failing a genuinely capable agent.
|
|
168
133
|
greetingMaxAttempts = 2;
|
|
169
134
|
runtimeMenuWatchers = new Map();
|
|
170
135
|
runtimeMenuPollIntervalMs = 10_000;
|
|
@@ -172,25 +137,13 @@ export class AgentManager {
|
|
|
172
137
|
compactIdlePollMs = 2_000;
|
|
173
138
|
manualCompactWaitMs = 5_000;
|
|
174
139
|
clearContextWaitMs = 30_000;
|
|
175
|
-
// Gap between liveness grabs in interruptPaneAndWaitReady; (SAMPLES-1)*this must exceed 1s + margin so a
|
|
176
|
-
// per-second elapsed-counter tick is always captured (700 * 2 = 1.4s).
|
|
177
140
|
runtimeLivenessProbeMs = 700;
|
|
178
|
-
// How long the post-C-c verify polls for the pane to reach a clean/empty composer before holding.
|
|
179
141
|
cleanComposerWaitMs = 5_000;
|
|
180
|
-
// How long cancel waits for an in-flight dispatch to release the pane mutex before holding (set in the
|
|
181
|
-
// constructor from the actual dispatchAckTimeoutMs). On timeout the pane is classified DELETE-only, not
|
|
182
|
-
// Resume-able, so a longer non-dispatch holder can't leave an un-cleared pane reusable.
|
|
183
142
|
cancelInterruptGuardWaitMs = DEFAULT_DISPATCH_ACK_TIMEOUT_MS + 5_000;
|
|
184
143
|
postMergeFetchTimeoutMs = 60_000;
|
|
185
144
|
postMergeBranchTimeoutMs = 10_000;
|
|
186
|
-
// taskIds with in-flight manual review — second concurrent POST gets 409.
|
|
187
145
|
manualReviewInFlight = new Set();
|
|
188
|
-
// taskIds with an in-flight mark-complete (slow external `gh pr merge`). While set, the
|
|
189
|
-
// task is being merged — Cancel / Call review / Continue must refuse so they can't act on
|
|
190
|
-
// the same max_rounds snapshot and interleave with the irreversible merge.
|
|
191
146
|
markCompleteInFlight = new Set();
|
|
192
|
-
// agentIds with in-flight DELETE — 第二个 DELETE 撞 awaiting_human stale-lock takeover 路径会
|
|
193
|
-
// 把第一个 DELETE 持有的占位也当 stale 接管,导致并发 cleanupRemovedAgentRuntime。
|
|
194
147
|
deletionInFlight = new Set();
|
|
195
148
|
compactInFlight = new Set();
|
|
196
149
|
constructor(deps) {
|
|
@@ -217,8 +170,6 @@ export class AgentManager {
|
|
|
217
170
|
this.reviewStore = deps.reviewStore;
|
|
218
171
|
this.dispatchAckTimeoutMs = deps.dispatchAckTimeoutMs ?? DEFAULT_DISPATCH_ACK_TIMEOUT_MS;
|
|
219
172
|
this.dispatchSettleTimeoutMs = deps.dispatchSettleTimeoutMs ?? DEFAULT_DISPATCH_SETTLE_TIMEOUT_MS;
|
|
220
|
-
// Track the ACTUAL ack timeout (a dispatch holds the pane mutex through waitSubmitAck), not a hardcoded
|
|
221
|
-
// default — else an overridden dispatchAckTimeoutMs would let cancel give up before the dispatch releases.
|
|
222
173
|
this.cancelInterruptGuardWaitMs = this.dispatchAckTimeoutMs + 5_000;
|
|
223
174
|
this.agentIndex = buildAgentIndex(config);
|
|
224
175
|
this.platformRunner = deps.platformRunner ?? new LocalRunner();
|
|
@@ -238,21 +189,15 @@ export class AgentManager {
|
|
|
238
189
|
getReviewStore() {
|
|
239
190
|
return this.reviewStore;
|
|
240
191
|
}
|
|
241
|
-
// Config validation guarantees non-GitHub projects resolve to server mode.
|
|
242
192
|
effectiveReviewMode(projectId) {
|
|
243
193
|
const project = this.getProjectConfig(projectId);
|
|
244
194
|
return project?.review?.mode ?? this.config.review.mode ?? 'github';
|
|
245
195
|
}
|
|
246
|
-
// Snapshot-aware afterDone read: an EXPLICIT null snapshot must win over hot
|
|
247
|
-
// config — `??` would swallow it and reroute an already-decided task.
|
|
248
196
|
resolveAfterDone(task) {
|
|
249
197
|
if (task.afterDone !== undefined)
|
|
250
198
|
return task.afterDone;
|
|
251
199
|
return this.coerceAfterDone(task.projectId, this.config.review.afterDone);
|
|
252
200
|
}
|
|
253
|
-
// Non-GitHub repos have no PR platform: 'pr' degrades to 'branch' (push + optional
|
|
254
|
-
// ff-merge). An unset afterDone defaults to 'branch' so reviewed work actually reaches
|
|
255
|
-
// the remote; an explicit null still means "don't publish". GitHub is unchanged.
|
|
256
201
|
coerceAfterDone(projectId, configured) {
|
|
257
202
|
const project = this.getProjectConfig(projectId);
|
|
258
203
|
if (project && !isGitHubRepo(project.repo)) {
|
|
@@ -269,9 +214,6 @@ export class AgentManager {
|
|
|
269
214
|
});
|
|
270
215
|
return this.reviewTransportInstance;
|
|
271
216
|
}
|
|
272
|
-
// ReviewTransport resolves worktrees synchronously; agentStore reads are async.
|
|
273
|
-
// The cache is refreshed by callers (server handlers) before transport use via
|
|
274
|
-
// refreshWorktreeCacheFor — a stale entry only costs one refresh round-trip.
|
|
275
217
|
bindingWorktreeCache = new Map();
|
|
276
218
|
async refreshWorktreeCacheFor(agentId) {
|
|
277
219
|
const state = await this.agentStore.get(agentId);
|
|
@@ -305,17 +247,12 @@ export class AgentManager {
|
|
|
305
247
|
this.config = config;
|
|
306
248
|
this.agentIndex = buildAgentIndex(config);
|
|
307
249
|
}
|
|
308
|
-
// Live view — handlers that read review.rounds etc. must go through this so PATCH /config
|
|
309
|
-
// takes effect on the very next event instead of frozen-at-boot closure capture.
|
|
310
250
|
getConfig() {
|
|
311
251
|
return this.config;
|
|
312
252
|
}
|
|
313
253
|
getAgentConfig(agentId) {
|
|
314
254
|
return this.agentIndex.get(agentId);
|
|
315
255
|
}
|
|
316
|
-
// DELETE phase1 (withConfigLock 内) 先调;返回冲突 id 表示另一 DELETE 已在跑此 agent,caller 应 409。
|
|
317
|
-
// 成功 claim 后所有出口(含 phase1 reply / phase2/3 完成 / rollback / throw)必须调
|
|
318
|
-
// releaseDeletionClaim 释放,否则 agent 永久卡在 "delete-in-flight" 状态。
|
|
319
256
|
tryClaimDeletion(agentIds) {
|
|
320
257
|
for (const id of agentIds) {
|
|
321
258
|
if (this.deletionInFlight.has(id))
|
|
@@ -362,10 +299,6 @@ export class AgentManager {
|
|
|
362
299
|
catch (err) {
|
|
363
300
|
throw new EnsureSessionError({ createdSession: false, agentId }, `ensureWorkdir failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
364
301
|
}
|
|
365
|
-
// Skills must be on disk at the repo root before the REPL launches OR is reused,
|
|
366
|
-
// so native discovery sees them and the dispatch's /skill / $skill resolves. Runs
|
|
367
|
-
// on every path — fresh launch, adopt of a live runtime, and shell/REPL restart —
|
|
368
|
-
// not just buildFreshSession; the version marker keeps the steady state a single cat.
|
|
369
302
|
try {
|
|
370
303
|
await this.provisionRepoSkills(runner, agent, workdir);
|
|
371
304
|
}
|
|
@@ -388,39 +321,19 @@ export class AgentManager {
|
|
|
388
321
|
}
|
|
389
322
|
return this.buildFreshSession(tmux, agent, agentId, workdir);
|
|
390
323
|
}
|
|
391
|
-
// Materialize baxian skills into the repo root the REPL launches in (cwd at
|
|
392
|
-
// launch), so the agent's `claude`/`codex` discovers them as native skills and
|
|
393
|
-
// the dispatch can force-load one with `/skill` / `$skill`. Each file is written
|
|
394
|
-
// atomically (stage + rename) and a current skill's dir is never removed, so a
|
|
395
|
-
// concurrent agent's lazy SKILL.md read never observes an absent/partial file.
|
|
396
324
|
async provisionRepoSkills(runner, agent, workdir) {
|
|
397
325
|
if (this.skillRegistry.names().length === 0)
|
|
398
326
|
return;
|
|
399
327
|
const subdir = agent.runtime === 'codex' ? '.agents/skills' : '.claude/skills';
|
|
400
328
|
const destRoot = `${workdir}/${subdir}`;
|
|
401
|
-
// Re-run on EVERY dispatch — do NOT cache the result. Config hot-reload
|
|
402
|
-
// (replaceConfig, no restart) can repoint this agent's workdir/runtime to another
|
|
403
|
-
// repo / skills dir, and repo code or a prior agent turn can tamper the on-disk
|
|
404
|
-
// skill tree between dispatches; a skip cache (in-memory or on-disk) would then
|
|
405
|
-
// serve a missing or repo-controlled tree. The files are tiny, so materialize
|
|
406
|
-
// unconditionally. The cleanup + git-exclude are idempotent + best-effort.
|
|
407
329
|
await this.excludeInjectedSkills(runner, workdir, subdir);
|
|
408
|
-
// Serialize the cleanup+materialize per target skills dir (shared with the launch-time
|
|
409
|
-
// scan in buildFreshSession): two same-runtime agents on one repo would otherwise let
|
|
410
|
-
// one's `rm` blank the tree while the other materializes or its REPL scans for skills.
|
|
411
330
|
await this.runUnderSkillDirLock(this.skillDirLockKey(agent, workdir), async () => {
|
|
412
331
|
await this.ensureSkillDirSafe(runner, workdir, subdir);
|
|
413
332
|
await this.skillRegistry.materialize((path, content) => this.atomicWriteFile(runner, path, content), destRoot);
|
|
414
333
|
});
|
|
415
334
|
}
|
|
416
|
-
// Per (host, workdir, runtime-subdir) in-process lock. Both the cleanup+materialize
|
|
417
|
-
// and the fresh REPL launch (which scans the skills dir at startup) run under it, so a
|
|
418
|
-
// concurrent same-dir agent never observes a transiently-empty skills tree.
|
|
419
335
|
skillDirChain = new Map();
|
|
420
336
|
skillDirLockKey(agent, workdir) {
|
|
421
|
-
// Canonicalize the host (a registry id and an equivalent inline host, or a blank vs default
|
|
422
|
-
// port, must collapse to one key) and the workdir (a trailing slash must not fork the lock),
|
|
423
|
-
// so two agents truly pointing at the same physical dir serialize instead of racing.
|
|
424
337
|
const host = hostGroupKey(agent.mode, resolveAgentHost(this.config.host, agent.host));
|
|
425
338
|
const subdir = agent.runtime === 'codex' ? '.agents/skills' : '.claude/skills';
|
|
426
339
|
const dir = workdir.replace(/\/+$/, '');
|
|
@@ -432,12 +345,6 @@ export class AgentManager {
|
|
|
432
345
|
this.skillDirChain.set(key, run.then(() => undefined, () => undefined));
|
|
433
346
|
return run;
|
|
434
347
|
}
|
|
435
|
-
// Atomic per-file replace. materialize() hands us each skill file's FINAL path; we stage it as a
|
|
436
|
-
// sibling `.baxian-tmp` and `mv -f` it into place. POSIX rename is atomic, so a claude/codex lazy
|
|
437
|
-
// SKILL.md body read — which happens at `/baxian-*` / `$baxian-*` INVOKE time, after this dir's
|
|
438
|
-
// provisioning lock has already been released — sees either the complete old file or the complete
|
|
439
|
-
// new one, never the truncate-in-place window of a bare writeFile or the blank window of a
|
|
440
|
-
// delete-then-rewrite. The tmp lives inside the `baxian-*` leaf, so the git-exclude rule covers it.
|
|
441
348
|
async atomicWriteFile(runner, finalPath, content) {
|
|
442
349
|
const tmp = `${finalPath}.baxian-tmp`;
|
|
443
350
|
await runner.writeFile(tmp, content);
|
|
@@ -447,18 +354,6 @@ export class AgentManager {
|
|
|
447
354
|
throw new Error(`atomic skill write failed (${finalPath}): ${res.stderr || 'unknown error'}`);
|
|
448
355
|
}
|
|
449
356
|
}
|
|
450
|
-
// Make the skills subtree symlink-safe before writing into it, WITHOUT blanking a live skill. A
|
|
451
|
-
// current skill's dir is left in place (its files are swapped atomically by atomicWriteFile); we
|
|
452
|
-
// prune `baxian-*` dirs no longer in the registry, strip EVERY symlink anywhere under a `baxian-*`
|
|
453
|
-
// tree (leaf OR a nested component like `baxian-pr-review/agents`) so the atomic write can't be
|
|
454
|
-
// redirected out of the workdir, and drop stale helper files left by a past skill version (a
|
|
455
|
-
// removed/renamed file) — SKILL.md is kept so a concurrent lazy read is never blanked, and
|
|
456
|
-
// materialize re-writes every current file atomically. The PARENT components (`.claude`/`.agents`
|
|
457
|
-
// + their `skills` subdir) fail fast when they are symlinks: following one could write OUTSIDE the
|
|
458
|
-
// workdir, and silently rm-ing it would destroy a user's legitimate symlinked skills dir (codex
|
|
459
|
-
// documents symlinked skill folders as supported). `find -name`/`-path` (not a bare glob) avoids
|
|
460
|
-
// zsh NOMATCH; the whole thing runs under POSIX `sh -c` since wrapRemoteCommand otherwise uses the
|
|
461
|
-
// login shell (maybe fish). `top`/`subdir` are fixed constants; names are baxian-owned slugs.
|
|
462
357
|
async ensureSkillDirSafe(runner, workdir, subdir) {
|
|
463
358
|
const top = subdir.split('/')[0];
|
|
464
359
|
const keep = this.skillRegistry.names().map((n) => `! -name ${shellQuote(n)}`).join(' ');
|
|
@@ -475,37 +370,21 @@ export class AgentManager {
|
|
|
475
370
|
throw new Error(`failed to prepare a symlink-safe ${subdir} in ${workdir}: ${res.stderr || 'unknown error'}`);
|
|
476
371
|
}
|
|
477
372
|
}
|
|
478
|
-
// Tag a freshly-launched session with the skills version it discovered at launch, so
|
|
479
|
-
// adoptOrRestartSession can tell when a live REPL predates the current skills.
|
|
480
373
|
async tagSessionSkillsVersion(tmux, agentId) {
|
|
481
374
|
if (this.skillRegistry.names().length === 0)
|
|
482
375
|
return;
|
|
483
376
|
await tmux.setOption(agentId, '@baxian-skills-version', this.skillRegistry.contentHash());
|
|
484
377
|
}
|
|
485
|
-
// True when a live REPL's launch-time skills version differs from the current one
|
|
486
|
-
// (or is absent — a pre-skills session): it cannot resolve a dispatched /baxian-*.
|
|
487
378
|
async replSkillsStale(tmux, agentId) {
|
|
488
379
|
if (this.skillRegistry.names().length === 0)
|
|
489
380
|
return false;
|
|
490
|
-
// getOption already maps a MISSING tag to null (→ stale). Do NOT swallow other
|
|
491
|
-
// errors: a thrown tmux probe failure must propagate so the caller surfaces it as an
|
|
492
|
-
// EnsureSessionError, instead of being read as stale and needlessly killing the REPL.
|
|
493
381
|
const tagged = await tmux.getOption(agentId, '@baxian-skills-version');
|
|
494
382
|
return tagged !== this.skillRegistry.contentHash();
|
|
495
383
|
}
|
|
496
|
-
// Hide ONLY what baxian writes — the `baxian-*` skill dirs — from the agent's
|
|
497
|
-
// `git status` / PRs. Excluding the whole skills dir would also hide a user repo's own
|
|
498
|
-
// untracked native skills there, defeating the `baxian-` prefix's coexistence intent.
|
|
499
|
-
// The `if git rev-parse` guard skips a non-git workdir, and failure only warns: skills
|
|
500
|
-
// are already on disk, so a git hiccup must not block the session.
|
|
501
384
|
async excludeInjectedSkills(runner, workdir, subdir) {
|
|
502
|
-
// info/exclude patterns anchor at the REPO ROOT, but the skills dir lives at the
|
|
503
|
-
// workdir; when workdir is a SUBDIR of the repo, prefix the rule with the workdir's
|
|
504
|
-
// path relative to the repo root (git rev-parse --show-prefix) so the pattern matches.
|
|
505
385
|
const inner = `cd ${shellQuote(workdir)} && if p="$(git rev-parse --git-path info/exclude 2>/dev/null)"; then ` +
|
|
506
386
|
`pre="$(git rev-parse --show-prefix 2>/dev/null)"; rule="\${pre}${subdir}/baxian-*"; ` +
|
|
507
387
|
`mkdir -p "$(dirname "$p")" && { grep -qxF "$rule" "$p" 2>/dev/null || printf '%s\\n' "$rule" >> "$p"; }; fi`;
|
|
508
|
-
// Run under POSIX sh (if/then/fi + $() are not fish syntax; wrapRemoteCommand uses $SHELL).
|
|
509
388
|
const res = await runner.exec(`sh -c ${shellQuote(inner)}`);
|
|
510
389
|
if (res.exitCode !== 0) {
|
|
511
390
|
console.warn(`[AgentManager] skill info/exclude best-effort failed in ${workdir} ` +
|
|
@@ -517,14 +396,11 @@ export class AgentManager {
|
|
|
517
396
|
await tmux.setOption(agentId, 'prefix2', 'None');
|
|
518
397
|
await tmux.setOption(agentId, 'mouse', 'on');
|
|
519
398
|
}
|
|
520
|
-
// New sessions start in latest mode; adopted sessions keep their current size owner.
|
|
521
399
|
async pinFreshSessionOptions(tmux, agentId) {
|
|
522
400
|
await tmux.setOption(agentId, 'window-size', 'latest');
|
|
523
401
|
await this.pinRuntimeSessionOptions(tmux, agentId);
|
|
524
402
|
}
|
|
525
403
|
async buildFreshSession(tmux, agent, agentId, workdir) {
|
|
526
|
-
// Hold the per-skills-dir lock across the launch so the REPL's startup skill scan
|
|
527
|
-
// can't overlap a concurrent same-dir agent's provisioning rm (see provisionRepoSkills).
|
|
528
404
|
return this.runUnderSkillDirLock(this.skillDirLockKey(agent, workdir), () => this.buildFreshSessionLocked(tmux, agent, agentId, workdir));
|
|
529
405
|
}
|
|
530
406
|
async buildFreshSessionLocked(tmux, agent, agentId, workdir) {
|
|
@@ -532,7 +408,6 @@ export class AgentManager {
|
|
|
532
408
|
const runtime = agentRuntimeKindFor(agent);
|
|
533
409
|
try {
|
|
534
410
|
await tmux.createSession(agentId, workdir);
|
|
535
|
-
// Mark BEFORE setOption — failure here must trigger caller's rollback.
|
|
536
411
|
createdSession = true;
|
|
537
412
|
await tmux.setOption(agentId, '@baxian-agent-id', agentId);
|
|
538
413
|
await tmux.setOption(agentId, '@baxian-runtime', agent.runtime);
|
|
@@ -565,7 +440,6 @@ export class AgentManager {
|
|
|
565
440
|
throw new EnsureSessionError(partial, `buildFreshSession failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
566
441
|
}
|
|
567
442
|
}
|
|
568
|
-
// Never throws — failures land in agentStore.
|
|
569
443
|
async startBootstrapAsync(agentId, creationToken) {
|
|
570
444
|
const cfgAtStart = this.getAgentConfig(agentId);
|
|
571
445
|
if (!cfgAtStart) {
|
|
@@ -583,13 +457,7 @@ export class AgentManager {
|
|
|
583
457
|
};
|
|
584
458
|
try {
|
|
585
459
|
const result = await this.ensureSession(agentId, 'create');
|
|
586
|
-
// Capability gate: hold the agent until it proves (via the baxian-signals skill)
|
|
587
|
-
// that it can load skills and echo a valid greeting signal back through its pane.
|
|
588
|
-
// A non-greeting agent that reached 'ok' would silently hang on its first real signal.
|
|
589
460
|
if (!(await this.runGreetingHandshake(agentId, cfgAtStart, result.paneId))) {
|
|
590
|
-
// A newer create may have rotated creationToken during the (slow) greeting wait.
|
|
591
|
-
// Mirror the success token-mismatch path: kill the orphan session we created so the
|
|
592
|
-
// next generation's `create` doesn't trip on a pre-existing tmux session.
|
|
593
461
|
const current = await this.agentStore.get(agentId);
|
|
594
462
|
if (current && current.creationToken !== creationToken) {
|
|
595
463
|
console.warn(`[bootstrap] ${agentId} creationToken changed during greeting — killing orphan session`);
|
|
@@ -651,27 +519,18 @@ export class AgentManager {
|
|
|
651
519
|
await this.markBootstrapFailed(agentId, creationToken, message);
|
|
652
520
|
}
|
|
653
521
|
}
|
|
654
|
-
// Bootstrap capability handshake: inject the greeting prompt and wait for the agent to
|
|
655
|
-
// echo [bx:greeting:<token>] per the baxian-signals skill. Returns true on a verified
|
|
656
|
-
// echo, false on timeout / lost session across all attempts. No task binding exists yet,
|
|
657
|
-
// so this drives the low-level inject + the pane-scoped awaitOnce directly.
|
|
658
522
|
async runGreetingHandshake(agentId, agent, paneId) {
|
|
659
523
|
const watcher = this.phaseSignalWatcher;
|
|
660
524
|
if (!watcher)
|
|
661
|
-
return true;
|
|
525
|
+
return true;
|
|
662
526
|
const tmux = new TmuxManager(this.createRunnerFor(agent));
|
|
663
527
|
for (let attempt = 1; attempt <= this.greetingMaxAttempts; attempt++) {
|
|
664
528
|
const token = createSignalToken();
|
|
665
529
|
try {
|
|
666
|
-
// Inject FIRST, then arm the wait: if the paste fails the agent never sees the
|
|
667
|
-
// prompt and cannot echo, so skip the (default 120s) wait entirely and retry.
|
|
668
530
|
await this.injectAndAwaitAckSteps(tmux, paneId, buildGreetingPrompt(token, agent.runtime), agentId, agent.runtime);
|
|
669
531
|
}
|
|
670
532
|
catch (err) {
|
|
671
533
|
console.warn(`[bootstrap] greeting inject failed for ${agentId} (attempt ${attempt}):`, err);
|
|
672
|
-
// ack_unknown = injectAndAwaitAckSteps could NOT confirm the composer was cleared, so the
|
|
673
|
-
// next paste would land on a live/unconfirmed input stream. Hold rather than concatenate.
|
|
674
|
-
// A raw (non-ack_unknown) throw means the composer was already C-c'd → safe to retry.
|
|
675
534
|
if (err instanceof DispatchTerminalError && err.reason === 'ack_unknown')
|
|
676
535
|
break;
|
|
677
536
|
continue;
|
|
@@ -685,23 +544,14 @@ export class AgentManager {
|
|
|
685
544
|
if (outcome === 'matched')
|
|
686
545
|
return true;
|
|
687
546
|
console.warn(`[bootstrap] greeting attempt ${attempt}/${this.greetingMaxAttempts} for ${agentId}: ${outcome}`);
|
|
688
|
-
// 'no-agent' = config removed, unrecoverable. 'timeout'/'session-gone' (incl. a transient
|
|
689
|
-
// subscribe fault disguised as session-gone) keep the remaining retries — a one-off pane
|
|
690
|
-
// jitter must not fail a genuinely capable agent.
|
|
691
547
|
if (outcome === 'no-agent')
|
|
692
548
|
break;
|
|
693
|
-
// An ack-timeout paste returns acked:false and leaves the unsubmitted greeting prompt in the
|
|
694
|
-
// composer; the next injectPrompt would concatenate onto it. Clear it before retrying — if the
|
|
695
|
-
// composer can't be confirmed clean, hold rather than paste onto a dirty/unsafe one.
|
|
696
549
|
if (attempt < this.greetingMaxAttempts && !(await this.clearComposerForReuse(tmux, paneId, agentId))) {
|
|
697
550
|
break;
|
|
698
551
|
}
|
|
699
552
|
}
|
|
700
553
|
return false;
|
|
701
554
|
}
|
|
702
|
-
// Greeting failed: hold the agent for a human (awaiting_human → not dispatchable) with a
|
|
703
|
-
// reason that names the capability gap. Clearing creationToken drops the "starting" pill;
|
|
704
|
-
// the operator fixes the runtime and restarts (re-greets) or Resumes to override.
|
|
705
555
|
async markGreetingFailed(agentId, creationToken) {
|
|
706
556
|
const existing = await this.agentStore.get(agentId);
|
|
707
557
|
if (!existing)
|
|
@@ -741,9 +591,6 @@ export class AgentManager {
|
|
|
741
591
|
data: { phase: 'greeting_failed', reason },
|
|
742
592
|
});
|
|
743
593
|
}
|
|
744
|
-
// Operator restart-repl/retry recovery for a greeting_failed agent: re-run the handshake on
|
|
745
|
-
// the freshly-restarted REPL. Only a passing greeting clears the hold; a failure re-holds it.
|
|
746
|
-
// Returns true when it took ownership of a greeting_failed agent (caller skips its normal clear).
|
|
747
594
|
async regreetHeldAgent(agentId) {
|
|
748
595
|
const agent = this.getAgentConfig(agentId);
|
|
749
596
|
if (!agent)
|
|
@@ -751,9 +598,6 @@ export class AgentManager {
|
|
|
751
598
|
const state = await this.agentStore.get(agentId);
|
|
752
599
|
if (state?.awaitingPhase !== 'greeting_failed')
|
|
753
600
|
return false;
|
|
754
|
-
// Identity of THIS hold: a greeting_failed binding carries no creationToken, so a DELETE+recreate
|
|
755
|
-
// during the (slow, up to 2× timeout) handshake is detected via awaitingSince — a stale regreet
|
|
756
|
-
// must never write onto the recreated generation.
|
|
757
601
|
const guardSince = state.awaitingSince;
|
|
758
602
|
let paneId = state.paneId;
|
|
759
603
|
if (!paneId) {
|
|
@@ -762,15 +606,12 @@ export class AgentManager {
|
|
|
762
606
|
}
|
|
763
607
|
catch (err) {
|
|
764
608
|
console.warn(`[regreet] cannot resolve pane for ${agentId}:`, err);
|
|
765
|
-
return true;
|
|
609
|
+
return true;
|
|
766
610
|
}
|
|
767
611
|
}
|
|
768
612
|
if (!(await this.runGreetingHandshake(agentId, agent, paneId))) {
|
|
769
|
-
// Failed → leave the existing hold untouched. Do NOT re-write it: an unguarded write could land
|
|
770
|
-
// on a DELETE+recreated generation that reused this id. Operator can restart/retry again.
|
|
771
613
|
return true;
|
|
772
614
|
}
|
|
773
|
-
// Passed → clear the hold, but only if this exact greeting_failed generation is still present.
|
|
774
615
|
await this.agentStore.update(agentId, (fresh) => {
|
|
775
616
|
if (!fresh || fresh.awaitingPhase !== 'greeting_failed' || fresh.awaitingSince !== guardSince) {
|
|
776
617
|
return AGENT_STORE_NOOP;
|
|
@@ -784,18 +625,12 @@ export class AgentManager {
|
|
|
784
625
|
const existing = await this.agentStore.get(agentId);
|
|
785
626
|
if (!existing)
|
|
786
627
|
return;
|
|
787
|
-
// A greeting capability failure must not be downgraded into a dialog-resolvable hold: doing so
|
|
788
|
-
// would let Resume release a never-re-greeted agent. Keep the greeting_failed hold; restart/retry
|
|
789
|
-
// re-runs the handshake.
|
|
790
628
|
if (existing.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(existing.awaitingPhase))
|
|
791
629
|
return;
|
|
792
|
-
// runtime path snapshot 全空时直接拒绝——既无 paneId 也无 taskId 作 generation 证据,
|
|
793
|
-
// 旧 callback 通过 guard 污染同样 idle 的新 agent 的风险无法排除。
|
|
794
630
|
if (opts.runtimePath && opts.expectedPaneId === undefined && opts.expectedTaskId === undefined) {
|
|
795
631
|
console.warn(`[AgentManager] markDialogPending runtime path: refusing to write without paneId/taskId snapshot (no generation guard available for ${agentId})`);
|
|
796
632
|
return;
|
|
797
633
|
}
|
|
798
|
-
// Pre-check(early exit;下面 closure 内会再 atomic 校验一次)
|
|
799
634
|
if (opts.runtimePath) {
|
|
800
635
|
if (existing.creationToken !== undefined)
|
|
801
636
|
return;
|
|
@@ -815,10 +650,8 @@ export class AgentManager {
|
|
|
815
650
|
paneId = await new TmuxManager(runner).getSinglePaneId(agentId);
|
|
816
651
|
}
|
|
817
652
|
catch {
|
|
818
|
-
// best-effort; slowPoll skips iterations without paneId
|
|
819
653
|
}
|
|
820
654
|
}
|
|
821
|
-
// 原子写入:guard + paneId + awaiting fields 一次性,避免 get→update 中间 race。
|
|
822
655
|
const now = new Date().toISOString();
|
|
823
656
|
let wrote = false;
|
|
824
657
|
let projectIdForEmit = '';
|
|
@@ -871,7 +704,6 @@ export class AgentManager {
|
|
|
871
704
|
const existing = await this.agentStore.get(agentId);
|
|
872
705
|
if (!existing)
|
|
873
706
|
return;
|
|
874
|
-
// generational guard: token mismatch means a newer create-recreate already won.
|
|
875
707
|
if (creationToken !== undefined && existing.creationToken !== creationToken)
|
|
876
708
|
return;
|
|
877
709
|
const now = new Date().toISOString();
|
|
@@ -896,11 +728,6 @@ export class AgentManager {
|
|
|
896
728
|
data: { error: errorMessage, phase: 'session' },
|
|
897
729
|
});
|
|
898
730
|
}
|
|
899
|
-
// Returns true when handled — caller skips its own kill cleanup.
|
|
900
|
-
// expectedFromStatuses: fail-task transition 的允许 fromStatus 集合。caller (startSession/continueSession)
|
|
901
|
-
// 已根据 phase + opts.dialogFailFromStatuses 计算好;并发 outcome 已把 task 推到此集合外的状态时
|
|
902
|
-
// transitionTaskStatus skip → 不覆盖已接受的 outcome。未传时退化为 [...ACTIVE_TASK_STATUSES] (retry
|
|
903
|
-
// endpoint 等无 phase 路径,且这些路径不绑 task,fail task 分支本就不进入)。
|
|
904
731
|
async handleDialogPendingFromRuntime(agentId, err, opts = {}) {
|
|
905
732
|
if (!(err instanceof EnsureSessionError) || !err.partial.dialogPending) {
|
|
906
733
|
return false;
|
|
@@ -908,10 +735,6 @@ export class AgentManager {
|
|
|
908
735
|
let state = await this.agentStore.get(agentId);
|
|
909
736
|
if (!state)
|
|
910
737
|
return false;
|
|
911
|
-
// retry path(state.paneId 未写入但 ensureSession 刚 createSession):从 tmux 取 paneId
|
|
912
|
-
// 写入 state 作 generation 证据,否则 markDialogPending 的 snapshot 全空 refuse 会 no-op
|
|
913
|
-
// → return true 让 caller 释放锁 202 返回 → agent 留 idle 但 tmux dialog 在跑 → 下一个
|
|
914
|
-
// dispatch 撞进 dialog pane。tmux 探 paneId 失败时返回 false,让 caller 走 killSession 回滚。
|
|
915
738
|
if (state.paneId === undefined && err.partial.createdSession) {
|
|
916
739
|
const cfg = this.getAgentConfig(agentId);
|
|
917
740
|
if (!cfg)
|
|
@@ -928,11 +751,6 @@ export class AgentManager {
|
|
|
928
751
|
if (!discoveredPaneId)
|
|
929
752
|
return false;
|
|
930
753
|
const probeNow = new Date().toISOString();
|
|
931
|
-
// 不用 updatedAt guard:updatedAt 太宽,正常 background updates
|
|
932
|
-
// (repoPath refresh / poller bump 等) 也会触发假阳性让合法 retry dialog 路径误拒。
|
|
933
|
-
// race ("DELETE+recreate 后旧回调写新 agent") 在持锁路径下是 theoretical (retry endpoint 持锁
|
|
934
|
-
// 全程到 handleDialogPendingFromRuntime 返回;startSession/continueSession 由 acquireAgentForTask
|
|
935
|
-
// 持锁),且 `fresh.paneId !== undefined` 已挡住新 agent 已写 paneId 的情况。
|
|
936
754
|
await this.agentStore.update(agentId, (fresh) => {
|
|
937
755
|
if (!fresh)
|
|
938
756
|
return AGENT_STORE_NOOP;
|
|
@@ -948,24 +766,12 @@ export class AgentManager {
|
|
|
948
766
|
console.warn(`[AgentManager] handleDialogPendingFromRuntime: ${agentId} has no paneId/taskId snapshot (no generation guard); refusing — caller should rollback`);
|
|
949
767
|
return false;
|
|
950
768
|
}
|
|
951
|
-
// runtime path: 显式 guard,不传 state.creationToken(race window 内可能已是新 generation)。
|
|
952
|
-
// 同时 snapshot paneId / taskId 作 atomic check,挡住"DELETE+recreate + 新 bootstrap 完成"的 race。
|
|
953
769
|
await this.markDialogPending(agentId, undefined, {
|
|
954
770
|
runtimePath: true,
|
|
955
771
|
...(state.paneId !== undefined ? { expectedPaneId: state.paneId } : {}),
|
|
956
772
|
expectedTaskId: state.taskId,
|
|
957
773
|
});
|
|
958
|
-
// runtime path (agent 已绑 active task + 无 creationToken):dialog 在 ensureSession 阶段抛错,
|
|
959
|
-
// prompt 还没 inject——直接 fail task 让 UI Retry 通路打开(无工作丢失)。
|
|
960
|
-
// 若不 fail:task 卡 in_progress、agent Held、operator Resume 后仍无人重发 prompt(owner 评审 #6
|
|
961
|
-
// 指出 transitionToCodePhase 会死锁)。task fail 后 Resume / recover 走 terminal-release 路径。
|
|
962
|
-
// 用 transitionTaskStatus(内含 withTaskLock + fromStatus guard)避免与 Cancel / merge /
|
|
963
|
-
// review outcome 等并发 mutation race,否则 stale 'failed' 会覆盖已经到达的 terminal 状态。
|
|
964
774
|
if (state.taskId && state.creationToken === undefined) {
|
|
965
|
-
// fromStatus 来自 caller 显式计算:startSession/continueSession 用 opts.dialogFailFromStatuses ??
|
|
966
|
-
// PHASE_EXPECTED_STATUS[phase],dispatchReviewToQa 走 bypassTaskStatusGate 时显式传 [taskStatusAtClaim]
|
|
967
|
-
// (manual review 入口可能是 approved/fixing/in_progress,但 phase='review' 的 default 只接受 'review' →
|
|
968
|
-
// 不传就 skip → task 卡 active 死锁)。
|
|
969
775
|
const expectedFromStatuses = opts.expectedFromStatuses ?? [...ACTIVE_TASK_STATUSES];
|
|
970
776
|
const transitioned = await this.transitionTaskStatus(state.taskId, 'failed', { fromStatus: expectedFromStatuses });
|
|
971
777
|
if (transitioned) {
|
|
@@ -977,19 +783,10 @@ export class AgentManager {
|
|
|
977
783
|
taskId: state.taskId,
|
|
978
784
|
data: { status: 'failed', reason: 'agent_dialog_pending_runtime' },
|
|
979
785
|
});
|
|
980
|
-
// 同步释放 partner agent 的 binding。task 已 terminal 不会再走 cancel 清理,否则 partner
|
|
981
|
-
// 永远指向 terminal task → retryTask 走 validateTaskDispatch 看 dev 仍 bound → 409。
|
|
982
786
|
await this.releasePartnersAndDrain(agentId, [state.taskId], [transitioned.task.projectId]);
|
|
983
787
|
}
|
|
984
788
|
}
|
|
985
|
-
// 通知 caller (startSession / continueSession catch) 不要再调 releaseAgentForTask 清理——
|
|
986
|
-
// task 已 terminal + agent Held 时,shouldReleaseHeldBinding 第一条规则会放行 release,
|
|
987
|
-
// 把仍卡 dialog 的 pane 解锁让下个 dispatch 派进来。set partial.handled 让 caller 跳过 release。
|
|
988
789
|
err.partial.handled = true;
|
|
989
|
-
// slowPoll 是 fire-and-forget,在 caller 释放锁后继续运行。runtime path 下 creationToken=undefined
|
|
990
|
-
// 不足以挡 DELETE+recreate 后旧 poll 撞新 agent(新 agent ack_unknown/dev-wait-gate-failed 时
|
|
991
|
-
// creationToken 也 undefined)→ 旧 poll 会把新 agent phase 覆为 resolved_runtime → Resume 不再拒。
|
|
992
|
-
// 传入当前 paneId/taskId snapshot 作 generation 证据,atomic update 校验匹配才写。
|
|
993
790
|
const snapshotPaneId = state.paneId;
|
|
994
791
|
const snapshotTaskId = state.taskId;
|
|
995
792
|
void this.slowPollDialogPending(agentId, state.creationToken, {
|
|
@@ -1012,10 +809,6 @@ export class AgentManager {
|
|
|
1012
809
|
}
|
|
1013
810
|
throw new Error(`waitForBootstrapSettled(${agentId}) timed out after ${timeoutMs}ms`);
|
|
1014
811
|
}
|
|
1015
|
-
// 无硬上限——配合 markDialogPending 的 human.intervention emit 让 operator 来;
|
|
1016
|
-
// DELETE/recreate 通过 creationToken 失配让循环自然退出。
|
|
1017
|
-
// runtime path(creationToken=undefined)下旧 poll 会撞 DELETE+recreate 后的新 agent(也无 token),
|
|
1018
|
-
// 需要 opts.expectedPaneId/expectedTaskId 作 generation 证据,loop top + atomic update 双重校验。
|
|
1019
812
|
async slowPollDialogPending(agentId, creationToken, opts = {}) {
|
|
1020
813
|
const POLL_INTERVAL_MS = 5_000;
|
|
1021
814
|
const cfg = this.getAgentConfig(agentId);
|
|
@@ -1027,7 +820,6 @@ export class AgentManager {
|
|
|
1027
820
|
const generationMismatch = (state) => {
|
|
1028
821
|
if (state.creationToken !== creationToken)
|
|
1029
822
|
return true;
|
|
1030
|
-
// runtime path: 校验 paneId/taskId snapshot 匹配
|
|
1031
823
|
if (creationToken === undefined) {
|
|
1032
824
|
if (opts.expectedPaneId !== undefined && state.paneId !== opts.expectedPaneId)
|
|
1033
825
|
return true;
|
|
@@ -1043,9 +835,6 @@ export class AgentManager {
|
|
|
1043
835
|
return;
|
|
1044
836
|
if (generationMismatch(state))
|
|
1045
837
|
return;
|
|
1046
|
-
// The session's live pane is authoritative — never the stored snapshot. A runtime relaunch
|
|
1047
|
-
// (skills-stale rebuild, crash / Ctrl-C recovery) gives the session a fresh pane id; trusting
|
|
1048
|
-
// a stale state.paneId would poll a dead pane forever and the Held would never clear.
|
|
1049
838
|
let paneId;
|
|
1050
839
|
try {
|
|
1051
840
|
paneId = await tmux.getSinglePaneId(agentId);
|
|
@@ -1071,14 +860,7 @@ export class AgentManager {
|
|
|
1071
860
|
const now = new Date().toISOString();
|
|
1072
861
|
let projectIdForEmit = '';
|
|
1073
862
|
let wrote = false;
|
|
1074
|
-
// bootstrap path: creationToken set,agent 未绑 task;ready 后自动清 Held(无需 operator)。
|
|
1075
|
-
// runtime path: creationToken undefined,agent 仍绑 task(已被 handleDialogPendingFromRuntime
|
|
1076
|
-
// 推 failed)+ lock 在;ready 后切到 'agent_dialog_resolved_runtime' phase,让 resumeAgent 放行
|
|
1077
|
-
// 让 operator 显式确认。仍保留 awaiting_human + lock 防止"dialog ready 自动派下一 task 撞 pane"。
|
|
1078
863
|
const isBootstrapPath = creationToken !== undefined;
|
|
1079
|
-
// A create bootstrap that was blocked on a startup dialog still owes the greeting gate:
|
|
1080
|
-
// now that the dialog is dismissed and the REPL is ready, run it before clearing to 'ok',
|
|
1081
|
-
// else a dialog-resolved agent would reach the dispatch pool without proving capability.
|
|
1082
864
|
if (isBootstrapPath && !(await this.runGreetingHandshake(agentId, cfg, paneId))) {
|
|
1083
865
|
await this.markGreetingFailed(agentId, creationToken);
|
|
1084
866
|
return;
|
|
@@ -1088,9 +870,6 @@ export class AgentManager {
|
|
|
1088
870
|
return AGENT_STORE_NOOP;
|
|
1089
871
|
if (generationMismatch(fresh))
|
|
1090
872
|
return AGENT_STORE_NOOP;
|
|
1091
|
-
// A greeting capability hold must not be downgraded to a dialog-resolvable phase here (the
|
|
1092
|
-
// runtime branch would otherwise rewrite it to agent_dialog_resolved_runtime, which Resume
|
|
1093
|
-
// then releases un-regreeted). Preserve it; restart/retry's regreet is its recovery path.
|
|
1094
873
|
if (fresh.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(fresh.awaitingPhase)) {
|
|
1095
874
|
return AGENT_STORE_NOOP;
|
|
1096
875
|
}
|
|
@@ -1129,7 +908,6 @@ export class AgentManager {
|
|
|
1129
908
|
});
|
|
1130
909
|
}
|
|
1131
910
|
else {
|
|
1132
|
-
// runtime dialog 解决,phase 切到 resolved_runtime;emit 通知 operator 现在可以 Resume。
|
|
1133
911
|
await this.safeEmit({
|
|
1134
912
|
id: '',
|
|
1135
913
|
type: 'human.intervention',
|
|
@@ -1145,7 +923,6 @@ export class AgentManager {
|
|
|
1145
923
|
return;
|
|
1146
924
|
}
|
|
1147
925
|
}
|
|
1148
|
-
// Idempotent; self-exits when taskId clears, so callers never need a paired stop.
|
|
1149
926
|
startRuntimeMenuWatch(agentId) {
|
|
1150
927
|
if (this.runtimeMenuWatchers.has(agentId))
|
|
1151
928
|
return;
|
|
@@ -1191,7 +968,6 @@ export class AgentManager {
|
|
|
1191
968
|
catch {
|
|
1192
969
|
continue;
|
|
1193
970
|
}
|
|
1194
|
-
// Re-fetch after the async capture; release/reassign may have rewritten state.
|
|
1195
971
|
const fresh = await this.agentStore.get(agentId);
|
|
1196
972
|
if (!fresh)
|
|
1197
973
|
return;
|
|
@@ -1277,17 +1053,12 @@ export class AgentManager {
|
|
|
1277
1053
|
stale = await this.replSkillsStale(tmux, agentId);
|
|
1278
1054
|
}
|
|
1279
1055
|
catch (err) {
|
|
1280
|
-
// A tmux probe failure here is transient — surface it, do NOT kill the REPL.
|
|
1281
1056
|
throw new EnsureSessionError({ createdSession: false, agentId }, `skills-version probe failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
1282
1057
|
}
|
|
1283
1058
|
if (stale) {
|
|
1284
|
-
// This REPL launched before the current skills were on disk; claude/codex only
|
|
1285
|
-
// discover a freshly-created top-level skills dir at launch, so a dispatched
|
|
1286
|
-
// /baxian-* / $baxian-* would not resolve. Rebuild so the command works.
|
|
1287
1059
|
await tmux.killSession(agentId).catch(() => { });
|
|
1288
1060
|
return this.buildFreshSession(tmux, agent, agentId, workdir);
|
|
1289
1061
|
}
|
|
1290
|
-
// 复用既有 REPL,上下文未中断——freshRuntime=false,post-approve 可走增量 nudge。
|
|
1291
1062
|
return { ok: true, createdSession: false, freshRuntime: false, paneId, workdir };
|
|
1292
1063
|
}
|
|
1293
1064
|
case 'startup-dialog':
|
|
@@ -1298,7 +1069,6 @@ export class AgentManager {
|
|
|
1298
1069
|
lastScreen: state.lastScreen,
|
|
1299
1070
|
}, `adoptOrRestartSession: REPL blocked on startup dialog`);
|
|
1300
1071
|
case 'other':
|
|
1301
|
-
// Refuse send-keys — would land as input inside vim/make/etc instead of spawning REPL.
|
|
1302
1072
|
throw new EnsureSessionError({ createdSession: false, agentId }, `pane foreground "${state.paneCurrentCommand}" is neither runtime ` +
|
|
1303
1073
|
`(${runtime}) nor shell; refusing to send launch keys — operator ` +
|
|
1304
1074
|
`must reset the pane manually`);
|
|
@@ -1309,10 +1079,8 @@ export class AgentManager {
|
|
|
1309
1079
|
});
|
|
1310
1080
|
await tmux.waitReplReady(paneId, runtime, {
|
|
1311
1081
|
timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
|
|
1312
|
-
// scrollback>0 risks matching a stale ready anchor from before trust prompt.
|
|
1313
1082
|
scrollback: 0,
|
|
1314
1083
|
});
|
|
1315
|
-
// 信任弹窗刚被答完,REPL 从启动态进入可用——上下文是新的。
|
|
1316
1084
|
await this.tagSessionSkillsVersion(tmux, agentId);
|
|
1317
1085
|
return { ok: true, createdSession: false, freshRuntime: true, paneId, workdir };
|
|
1318
1086
|
}
|
|
@@ -1340,7 +1108,6 @@ export class AgentManager {
|
|
|
1340
1108
|
timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
|
|
1341
1109
|
scrollback: 0,
|
|
1342
1110
|
});
|
|
1343
|
-
// shell 路径:在原 pane 里重新启动了 REPL,新进程没有旧 prompt 上下文。
|
|
1344
1111
|
await this.tagSessionSkillsVersion(tmux, agentId);
|
|
1345
1112
|
return { ok: true, createdSession: false, freshRuntime: true, paneId, workdir };
|
|
1346
1113
|
}
|
|
@@ -1375,7 +1142,6 @@ export class AgentManager {
|
|
|
1375
1142
|
return false;
|
|
1376
1143
|
}
|
|
1377
1144
|
if (!reuseLock) {
|
|
1378
|
-
// Tag the lock with the taskId so post-merge cleanup can later prove it still owns it.
|
|
1379
1145
|
const ok = await this.lockManager.acquire(agentId, taskId);
|
|
1380
1146
|
if (!ok)
|
|
1381
1147
|
return false;
|
|
@@ -1390,10 +1156,6 @@ export class AgentManager {
|
|
|
1390
1156
|
}));
|
|
1391
1157
|
return true;
|
|
1392
1158
|
}
|
|
1393
|
-
// waiting: dev keeps lock across review/fix. idle: terminal release.
|
|
1394
|
-
// 纯状态更新——REPL 是否 ready 不在此处守门,dispatch 路径自己处理就绪问题。
|
|
1395
|
-
// awaiting_human 状态拒释:避免上游 catch(如 EnsureSessionError(dialogPending) 的 generic
|
|
1396
|
-
// fallback)撕掉 markAwaitingHuman 已标的 await 标记。resumeAgent 用 allowAwaitingHuman 接管。
|
|
1397
1159
|
async releaseAgentForTask(agentId, expectedTaskId, mode, opts = {}) {
|
|
1398
1160
|
return this.withTaskLock(async () => {
|
|
1399
1161
|
const state = await this.agentStore.get(agentId);
|
|
@@ -1404,20 +1166,12 @@ export class AgentManager {
|
|
|
1404
1166
|
`(expected ${expectedTaskId}, got ${state.taskId}); skipping`);
|
|
1405
1167
|
return false;
|
|
1406
1168
|
}
|
|
1407
|
-
// Cancel-cleanup hold: only cancel's own release may free it. Checked BEFORE the allowAwaitingHuman
|
|
1408
|
-
// gate below, because that gate skips shouldReleaseHeldBinding entirely — so without this an
|
|
1409
|
-
// allowAwaitingHuman caller (startup false-start, review/max-rounds handlers, a terminal-task escape)
|
|
1410
|
-
// could reassign the un-cleared/maybe-running pane before cancel confirms /clear. (Operator recovery
|
|
1411
|
-
// via resumeAgent / DELETE doesn't go through this path.)
|
|
1412
1169
|
if (isCancelCleanupHold(state) && !opts.fromCancelCleanup) {
|
|
1413
1170
|
console.warn(`[AgentManager] releaseAgentForTask: agent ${agentId} ${state.awaitingPhase} (cancel-cleanup hold); refusing auto-release`);
|
|
1414
1171
|
return false;
|
|
1415
1172
|
}
|
|
1416
1173
|
const boundTask = await this.taskStore.get(expectedTaskId);
|
|
1417
1174
|
if (state.status === 'awaiting_human' && !opts.allowAwaitingHuman) {
|
|
1418
|
-
// gate 例外:bound task 已 terminal / turn-completed phase 都属于正常 cleanup 路径,
|
|
1419
|
-
// 必须能清绑定,否则 stale binding 永久指向终态 task → 后续 acquire 全卡。
|
|
1420
|
-
// shouldReleaseHeldBinding 和 Resume 共享同一规则。
|
|
1421
1175
|
if (!shouldReleaseHeldBinding(state, boundTask)) {
|
|
1422
1176
|
console.warn(`[AgentManager] releaseAgentForTask: agent ${agentId} is awaiting_human (${state.awaitingPhase}); refusing to release`);
|
|
1423
1177
|
return false;
|
|
@@ -1435,8 +1189,6 @@ export class AgentManager {
|
|
|
1435
1189
|
await this.agentStore.update(agentId, (latest) => {
|
|
1436
1190
|
if (!latest)
|
|
1437
1191
|
return AGENT_STORE_NOOP;
|
|
1438
|
-
// clearAwaitingHuman: restart-repl/retry 显式 operator op 已确认 REPL 重启,前面的
|
|
1439
|
-
// ack_unknown/dialog_pending Held 不再成立——清掉 awaiting_human 字段让 agent 可派遣。
|
|
1440
1192
|
if (opts.clearAwaitingHuman && latest.status === 'awaiting_human') {
|
|
1441
1193
|
const cleared = {
|
|
1442
1194
|
id: latest.id,
|
|
@@ -1504,13 +1256,6 @@ export class AgentManager {
|
|
|
1504
1256
|
});
|
|
1505
1257
|
return cleared;
|
|
1506
1258
|
}
|
|
1507
|
-
// baxian 把 agent 标为"自动调度走不通,等 operator 显式 resume"。
|
|
1508
|
-
// 唯一禁区入口:cancel C-c 失败 / dispatch ack_unknown / dialog 卡住等场景。
|
|
1509
|
-
// 保留绑定 + 锁,靠 canDispatchWithBinding 的 status 检查拦住自动派遣。
|
|
1510
|
-
// generation guard 防 DELETE+recreate race:
|
|
1511
|
-
// - expectedCreationToken: 'tok' → store 当前 token 必须等于 'tok'
|
|
1512
|
-
// - expectedCreationToken: null → store 当前必须仍 *无* token(runtime path 用)
|
|
1513
|
-
// - 不传 → 不校验 generation
|
|
1514
1259
|
async markAwaitingHuman(agentId, phase, reason, opts = {}) {
|
|
1515
1260
|
const now = new Date().toISOString();
|
|
1516
1261
|
let projectId = '';
|
|
@@ -1520,22 +1265,17 @@ export class AgentManager {
|
|
|
1520
1265
|
if (!existing)
|
|
1521
1266
|
return AGENT_STORE_NOOP;
|
|
1522
1267
|
if (opts.expectedCreationToken !== undefined) {
|
|
1523
|
-
const expected = opts.expectedCreationToken;
|
|
1268
|
+
const expected = opts.expectedCreationToken;
|
|
1524
1269
|
const actual = existing.creationToken ?? null;
|
|
1525
1270
|
if (actual !== expected)
|
|
1526
1271
|
return AGENT_STORE_NOOP;
|
|
1527
1272
|
}
|
|
1528
|
-
// taskId guard:迟到 mark 撞已 release+reassign 的 binding 时 noop
|
|
1529
|
-
// (caller 观察到 expectedTaskId 时 binding 还是它,update 时已变 = race lost)。
|
|
1530
1273
|
if (opts.expectedTaskId !== undefined) {
|
|
1531
|
-
const expectedTask = opts.expectedTaskId;
|
|
1274
|
+
const expectedTask = opts.expectedTaskId;
|
|
1532
1275
|
const actualTask = existing.taskId ?? null;
|
|
1533
1276
|
if (actualTask !== expectedTask)
|
|
1534
1277
|
return AGENT_STORE_NOOP;
|
|
1535
1278
|
}
|
|
1536
|
-
// A cancel-cleanup hold is owned by the cancel flow: a generic dispatch-failure hold must not overwrite
|
|
1537
|
-
// it, and even a cancel-cleanup phase must not DOWNGRADE it (cancel-clear-failed is DELETE-only — softening
|
|
1538
|
-
// it to cancel-clearing/cancel-interrupt-failed would let Resume reuse an un-cleared pane).
|
|
1539
1279
|
if (isCancelCleanupHold(existing)) {
|
|
1540
1280
|
if (!CANCEL_CLEANUP_HOLD_PHASES.has(phase))
|
|
1541
1281
|
return AGENT_STORE_NOOP;
|
|
@@ -1569,11 +1309,6 @@ export class AgentManager {
|
|
|
1569
1309
|
},
|
|
1570
1310
|
});
|
|
1571
1311
|
}
|
|
1572
|
-
// dispatch catch helper:caller 调 startSession/continueSession 抛 DispatchTerminalError
|
|
1573
|
-
// 时统一区分 ack_unknown vs 其他 reason。返回 true 表示已 markAwaitingHuman(caller
|
|
1574
|
-
// 应跳过 release / rollback),返回 false 表示其他错误(caller 走常规清理)。
|
|
1575
|
-
// expectedTaskId: caller 当时观察到的 binding;mark 在 lock 释放后才执行的话,binding 可能
|
|
1576
|
-
// 已被 outcome/cancel release 给新任务,传 taskId 用作 atomic guard 避免污染无关 binding。
|
|
1577
1312
|
async markAwaitingIfAckUnknown(agentId, err, expectedTaskId) {
|
|
1578
1313
|
if (err instanceof DispatchTerminalError && err.reason === 'ack_unknown') {
|
|
1579
1314
|
await this.markAwaitingHuman(agentId, `dispatch-failed:${err.reason}`, `${err.message}. Prompt may still be running in the pane; verify before resuming.`, { expectedTaskId });
|
|
@@ -1581,9 +1316,6 @@ export class AgentManager {
|
|
|
1581
1316
|
}
|
|
1582
1317
|
return false;
|
|
1583
1318
|
}
|
|
1584
|
-
// operator 显式恢复 awaiting_human 的 agent。
|
|
1585
|
-
// 如果 taskId 指向已 terminal 的 task,连带清掉绑定 + 锁——回归 idle 可派遣。
|
|
1586
|
-
// 如果 taskId 指向仍 active 的 task(罕见,比如 dialog_pending 期间 task 没 fail),保留绑定。
|
|
1587
1319
|
async resumeAgent(agentId) {
|
|
1588
1320
|
const result = await this.withTaskLock(async () => {
|
|
1589
1321
|
const state = await this.agentStore.get(agentId);
|
|
@@ -1592,21 +1324,12 @@ export class AgentManager {
|
|
|
1592
1324
|
if (state.status !== 'awaiting_human') {
|
|
1593
1325
|
return { resumed: false, releasedBinding: false, reason: 'Agent is not awaiting human; nothing to resume.' };
|
|
1594
1326
|
}
|
|
1595
|
-
// creationToken 仍 set = bootstrap dialog 仍未解决。Resume 不能让它"继续"——
|
|
1596
|
-
// dialog 在 pane 里需要 operator 通过 web terminal 处理,slowPoll 解决后自动清状态。
|
|
1597
|
-
// 如果 operator 想放弃这个 agent,应该走 DELETE 路径。
|
|
1598
1327
|
if (state.creationToken) {
|
|
1599
1328
|
const reason = 'Bootstrap dialog still unresolved; resolve it via the web terminal or DELETE the agent.';
|
|
1600
1329
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} still has creationToken — ${reason}`);
|
|
1601
1330
|
return { resumed: false, releasedBinding: false, reason };
|
|
1602
1331
|
}
|
|
1603
1332
|
const boundTask = state.taskId ? await this.taskStore.get(state.taskId) : null;
|
|
1604
|
-
// "prompt 可能仍在 pane 中跑"类 phase + bound task 仍 active 时 refuse:Resume 让
|
|
1605
|
-
// shouldReleaseHeldBinding 放行清 binding 后下一次 dispatchPendingTask 会把第二个 prompt
|
|
1606
|
-
// 派进同 pane 与旧 turn 混在一起。outcome 到达时 review.submitted handler 通过
|
|
1607
|
-
// allowAwaitingHuman:true 显式 release;这里不必再走 Resume。task terminal/missing 时则放行
|
|
1608
|
-
// — failTaskForDispatchError 的 ack_unknown 分支会把 task 推 failed 后保留 Held,
|
|
1609
|
-
// 此时唯一恢复路径就是 Resume。
|
|
1610
1333
|
const PROMPT_MAYBE_RUNNING_PHASES = new Set([
|
|
1611
1334
|
'dispatch-failed:ack_unknown',
|
|
1612
1335
|
'dev-wait-gate-failed-after-qa-started',
|
|
@@ -1618,32 +1341,17 @@ export class AgentManager {
|
|
|
1618
1341
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} — ${reason}`);
|
|
1619
1342
|
return { resumed: false, releasedBinding: false, reason };
|
|
1620
1343
|
}
|
|
1621
|
-
// agent_dialog_pending: pane 仍卡 startup dialog,REPL 未 ready。Resume 让
|
|
1622
|
-
// shouldReleaseHeldBinding 看 task terminal/missing 放行后会清 binding/lock,下一次
|
|
1623
|
-
// dispatchPendingTask 就会把新 prompt 派进仍卡 dialog 的 pane。dialog 的恢复路径只能是
|
|
1624
|
-
// operator 通过 web terminal dismiss → slowPollDialogPending 转 phase 到
|
|
1625
|
-
// agent_dialog_resolved_runtime(Resume 放行)或 bootstrap path 直接清 Held → status='ok',
|
|
1626
|
-
// 或 DELETE agent。
|
|
1627
1344
|
if (state.awaitingPhase === 'agent_dialog_pending') {
|
|
1628
1345
|
const reason = 'Startup dialog still pending; Resume cannot dismiss it. Dismiss the dialog via the web terminal (baxian will auto-resume) or DELETE the agent.';
|
|
1629
1346
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} — ${reason}`);
|
|
1630
1347
|
return { resumed: false, releasedBinding: false, reason };
|
|
1631
1348
|
}
|
|
1632
|
-
// agent_dialog_resolved_runtime + active task:正常路径下 handleDialogPendingFromRuntime
|
|
1633
|
-
// 已 fail task → boundTask 应 terminal。bound task 仍 active 表示 crash window
|
|
1634
|
-
// (handleDialogPendingFromRuntime 写 awaiting_human 后 transitionTaskStatus 前 crash);
|
|
1635
|
-
// Resume 走 release path 会切 status=ok 但保留 binding + lock,prompt 从未发送 → task 静默卡死。
|
|
1636
|
-
// refuse Resume,提示 operator 显式 cancel task 或 DELETE agent。
|
|
1637
1349
|
if (state.awaitingPhase === 'agent_dialog_resolved_runtime'
|
|
1638
1350
|
&& boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
|
|
1639
1351
|
const reason = `Dialog resolved but task ${state.taskId} is still active and its prompt was never injected; Resume would strand it. Cancel the task or DELETE the agent.`;
|
|
1640
1352
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} — ${reason}`);
|
|
1641
1353
|
return { resumed: false, releasedBinding: false, reason };
|
|
1642
1354
|
}
|
|
1643
|
-
// code-dispatch-failed: the code-phase prompt never reached the pane (spec
|
|
1644
|
-
// approval already transitioned the task). Resume = clear the hold AND
|
|
1645
|
-
// redispatch the code prompt (outside this lock) — without the redispatch
|
|
1646
|
-
// the task would stay in_progress with nothing running.
|
|
1647
1355
|
if (state.awaitingPhase === 'code-dispatch-failed'
|
|
1648
1356
|
&& boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)
|
|
1649
1357
|
&& state.taskId) {
|
|
@@ -1662,27 +1370,17 @@ export class AgentManager {
|
|
|
1662
1370
|
});
|
|
1663
1371
|
return { resumed: true, releasedBinding: false, redispatchCodeTaskId: state.taskId };
|
|
1664
1372
|
}
|
|
1665
|
-
// signal-arm-failed: the prompt was already dispatched but its pane-signal watcher never
|
|
1666
|
-
// armed. Resume here would only flip status→ok WITHOUT rebuilding the watcher (Resume has no
|
|
1667
|
-
// re-arm path), so the prompt's signal would still have no consumer — silent deadlock again.
|
|
1668
|
-
// Refuse while the task is active; operator must cancel the task or DELETE the agent to retry.
|
|
1669
1373
|
if (state.awaitingPhase?.startsWith('signal-arm-failed')
|
|
1670
1374
|
&& boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
|
|
1671
1375
|
const reason = `The dispatched prompt's pane signal has no consumer and Resume cannot rebuild the watcher; cancel task ${state.taskId} or DELETE the agent to retry.`;
|
|
1672
1376
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} — ${reason}`);
|
|
1673
1377
|
return { resumed: false, releasedBinding: false, reason };
|
|
1674
1378
|
}
|
|
1675
|
-
// Un-cleared pane (cancel mid-clear or /clear unconfirmed): Resume would free + reuse it (terminal
|
|
1676
|
-
// task → shouldReleaseHeldBinding) and leak the cancelled task's context. Refuse; only DELETE (which
|
|
1677
|
-
// destroys the pane) is a safe recovery.
|
|
1678
1379
|
if (state.awaitingPhase != null && UNCLEARED_PANE_PHASES.has(state.awaitingPhase)) {
|
|
1679
1380
|
const reason = 'The pane holds un-cleared context from a cancelled task; Resume would leak it into the next task. DELETE the agent to discard it.';
|
|
1680
1381
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} — ${reason}`);
|
|
1681
1382
|
return { resumed: false, releasedBinding: false, reason };
|
|
1682
1383
|
}
|
|
1683
|
-
// A greeting capability failure must be RE-PROVEN, not Resumed away: the default path below
|
|
1684
|
-
// flips status→'ok' regardless of shouldReleaseHeldBinding, which would put an unverified
|
|
1685
|
-
// agent back in the dispatch pool. The recovery path is restart-repl / retry (re-greets).
|
|
1686
1384
|
if (state.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(state.awaitingPhase)) {
|
|
1687
1385
|
const reason = 'Greeting capability check failed; the runtime must re-prove it. Resume cannot clear this hold — use Restart REPL to re-run the greeting check.';
|
|
1688
1386
|
console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} — ${reason}`);
|
|
@@ -1744,7 +1442,6 @@ export class AgentManager {
|
|
|
1744
1442
|
});
|
|
1745
1443
|
return { resumed: true, releasedBinding: shouldReleaseBinding };
|
|
1746
1444
|
});
|
|
1747
|
-
// Outside the task lock: continueSession takes it internally.
|
|
1748
1445
|
if (result.redispatchCodeTaskId) {
|
|
1749
1446
|
try {
|
|
1750
1447
|
const resumed = await this.continueSession(result.redispatchCodeTaskId, agentId, 'code');
|
|
@@ -1774,21 +1471,14 @@ export class AgentManager {
|
|
|
1774
1471
|
return null;
|
|
1775
1472
|
}
|
|
1776
1473
|
}
|
|
1777
|
-
// ESC can't clear un-submitted composer text (pane never reaches ready); Ctrl-C does. Under the pane mutex
|
|
1778
|
-
// (no key interleave with a concurrent dispatch), C-c'd only while still the runtime and not mid-turn.
|
|
1779
1474
|
async interruptPaneAndWaitReady(state, cfg) {
|
|
1780
1475
|
const paneId = await this.resolvePaneId(state, cfg);
|
|
1781
1476
|
if (!paneId)
|
|
1782
1477
|
return false;
|
|
1783
1478
|
const tmux = new TmuxManager(this.createRunnerFor(cfg));
|
|
1784
1479
|
const runtime = agentRuntimeKindFor(cfg);
|
|
1785
|
-
// An in-flight dispatch holds the mutex during its paste/ack; it releases once it sees the now-terminal task,
|
|
1786
|
-
// so wait briefly rather than dropping a cancel-during-paste straight to manual hold.
|
|
1787
1480
|
if (!(await this.acquireCompactGuardWithin(cfg.id, this.cancelInterruptGuardWaitMs))) {
|
|
1788
1481
|
console.warn(`[AgentManager] interruptPaneAndWaitReady: ${cfg.id} pane mutex still busy after wait; holding (un-cleared)`);
|
|
1789
|
-
// A longer-running holder (post-merge compaction, a slow image write) kept the mutex: cancel never
|
|
1790
|
-
// verified the pane, so classify it un-cleared/DELETE-only. The monotonic phase guard then blocks the
|
|
1791
|
-
// caller from softening it to a Resume-able cancel-interrupt-failed and reusing an un-cleared pane.
|
|
1792
1482
|
await this.markAwaitingHuman(cfg.id, 'cancel-clear-failed', 'Cancel could not acquire the pane mutex to interrupt/clear (a dispatch/compact/upload held it); the ' +
|
|
1793
1483
|
'pane state is unverified and may still hold the cancelled session. DELETE the agent to discard it.', { expectedTaskId: state.taskId });
|
|
1794
1484
|
return false;
|
|
@@ -1804,16 +1494,12 @@ export class AgentManager {
|
|
|
1804
1494
|
}
|
|
1805
1495
|
if (await this.paneReachedReplReady(tmux, paneId, runtime, 10_000))
|
|
1806
1496
|
return true;
|
|
1807
|
-
// Ctrl-C can only restore a runtime prompt: if the pane crashed back to a shell or a human took it
|
|
1808
|
-
// over, it would hit their session instead — hold for a human.
|
|
1809
1497
|
if (!(await this.paneRunsRuntime(tmux, paneId, runtime)))
|
|
1810
1498
|
return false;
|
|
1811
1499
|
if (await this.paneHasLiveTurn(tmux, paneId, runtime)) {
|
|
1812
1500
|
console.warn(`[AgentManager] interruptPaneAndWaitReady: pane ${paneId} still running a turn after ESC; holding`);
|
|
1813
1501
|
return false;
|
|
1814
1502
|
}
|
|
1815
|
-
// Re-confirm: the runtime could have crashed to a shell during the ~1.4s liveness window; C-c must not
|
|
1816
|
-
// land in a foreign session.
|
|
1817
1503
|
if (!(await this.paneRunsRuntime(tmux, paneId, runtime)))
|
|
1818
1504
|
return false;
|
|
1819
1505
|
try {
|
|
@@ -1824,15 +1510,12 @@ export class AgentManager {
|
|
|
1824
1510
|
console.warn(`[AgentManager] interruptPaneAndWaitReady: send C-c (composer clear) failed for pane ${paneId}:`, err);
|
|
1825
1511
|
return false;
|
|
1826
1512
|
}
|
|
1827
|
-
// Verify the OUTCOME via the canonical readiness check, not a guessed pre-state.
|
|
1828
1513
|
return this.paneReachedReplReady(tmux, paneId, runtime, this.cleanComposerWaitMs);
|
|
1829
1514
|
}
|
|
1830
1515
|
finally {
|
|
1831
1516
|
this.compactInFlight.delete(cfg.id);
|
|
1832
1517
|
}
|
|
1833
1518
|
}
|
|
1834
|
-
// Liveness = change across samples (pollution-immune): a static screen AND a static title are inert; only a
|
|
1835
|
-
// repaint or an advancing OSC braille title is a live turn. (A stale working-shaped title doesn't change.)
|
|
1836
1519
|
async paneHasLiveTurn(tmux, paneId, runtime) {
|
|
1837
1520
|
let first;
|
|
1838
1521
|
let firstTitle;
|
|
@@ -1857,15 +1540,14 @@ export class AgentManager {
|
|
|
1857
1540
|
return true;
|
|
1858
1541
|
}
|
|
1859
1542
|
if (title !== firstTitle && hasOscTitleWorking(title))
|
|
1860
|
-
return true;
|
|
1543
|
+
return true;
|
|
1861
1544
|
if (hasRuntimeReadyView(frame, runtime))
|
|
1862
|
-
return false;
|
|
1545
|
+
return false;
|
|
1863
1546
|
if (frame !== first)
|
|
1864
1547
|
return true;
|
|
1865
1548
|
}
|
|
1866
1549
|
return false;
|
|
1867
1550
|
}
|
|
1868
|
-
// Ctrl-C must only hit the runtime: confirm the pane's process is still codex/claude, else hold for a human.
|
|
1869
1551
|
async paneRunsRuntime(tmux, paneId, runtime) {
|
|
1870
1552
|
let proc;
|
|
1871
1553
|
try {
|
|
@@ -1891,17 +1573,11 @@ export class AgentManager {
|
|
|
1891
1573
|
return false;
|
|
1892
1574
|
}
|
|
1893
1575
|
}
|
|
1894
|
-
// Persist a "cancel is interrupting + /clearing this pane" hold BEFORE the ESC→/clear window so the
|
|
1895
|
-
// protection survives a restart (recover() holds UNCLEARED_PANE_PHASES) and the escape can't reassign the
|
|
1896
|
-
// un-cleared pane. Direct update (no intervention event): a normal cancel clears it on release; only a
|
|
1897
|
-
// crash/failure leaves it. Conditional on the binding so a stale cancel can't mark an agent rebound away.
|
|
1898
1576
|
async markPaneCancelClearing(agentId, taskId) {
|
|
1899
1577
|
const now = new Date().toISOString();
|
|
1900
1578
|
await this.agentStore.update(agentId, (latest) => {
|
|
1901
1579
|
if (!latest || latest.taskId !== taskId)
|
|
1902
1580
|
return AGENT_STORE_NOOP;
|
|
1903
|
-
// Don't downgrade a more-locked hold (e.g. cancel-clear-failed, DELETE-only) back to the transient
|
|
1904
|
-
// cancel-clearing: a re-entrant terminal cleanup must not soften the un-cleared protection.
|
|
1905
1581
|
if (cancelPhaseDowngrades(latest.awaitingPhase, 'cancel-clearing'))
|
|
1906
1582
|
return AGENT_STORE_NOOP;
|
|
1907
1583
|
return {
|
|
@@ -1914,8 +1590,6 @@ export class AgentManager {
|
|
|
1914
1590
|
};
|
|
1915
1591
|
});
|
|
1916
1592
|
}
|
|
1917
|
-
// Returns whether /clear was confirmed (a real busy→idle, not the stale pre-/clear idle frame); the
|
|
1918
|
-
// caller must hold the agent, not release it, on false or the un-cleared context leaks to the next dispatch.
|
|
1919
1593
|
async clearPaneContext(state, cfg) {
|
|
1920
1594
|
const paneId = await this.resolvePaneId(state, cfg);
|
|
1921
1595
|
if (!paneId)
|
|
@@ -1927,13 +1601,9 @@ export class AgentManager {
|
|
|
1927
1601
|
try {
|
|
1928
1602
|
const tmux = new TmuxManager(this.createRunnerFor(cfg));
|
|
1929
1603
|
const runtime = agentRuntimeKindFor(cfg);
|
|
1930
|
-
// C-c clears any prompt an ack-timeout left in the composer, so /clear isn't appended to it and submitted.
|
|
1931
1604
|
await tmux.sendKeysToPane(paneId, 'C-c');
|
|
1932
1605
|
await this.waitForReplPromptReady(tmux, paneId, runtime, this.clearContextWaitMs);
|
|
1933
1606
|
await tmux.sendKeysLiteral(paneId, '/clear');
|
|
1934
|
-
// Snapshot the composer holding the typed /clear; require submission proof so a swallowed Enter
|
|
1935
|
-
// (which would leave /clear idle in the composer and let waitForReplPromptReady pass on the stale
|
|
1936
|
-
// frame) is resent rather than treated as cleared.
|
|
1937
1607
|
const beforeSubmit = await tmux.capturePaneSnapshot(paneId);
|
|
1938
1608
|
await tmux.sendEnter(paneId);
|
|
1939
1609
|
await tmux.waitSubmitAck(paneId, beforeSubmit, runtime, {
|
|
@@ -1943,13 +1613,10 @@ export class AgentManager {
|
|
|
1943
1613
|
resendIntervalMs: this.compactIdlePollMs,
|
|
1944
1614
|
});
|
|
1945
1615
|
await this.waitForReplPromptReady(tmux, paneId, runtime, this.clearContextWaitMs);
|
|
1946
|
-
// A rejected /clear ("…is disabled while a task is in progress.") returns to a bare prompt that now reads
|
|
1947
|
-
// ready — but the context was NOT cleared, so hold the pane instead of releasing it.
|
|
1948
1616
|
if (await this.hasRuntimeSlashCommandRejection(tmux, paneId, '/clear')) {
|
|
1949
1617
|
console.warn(`[AgentManager] clearPaneContext: /clear rejected (task still in progress) for ${cfg.id}; unconfirmed`);
|
|
1950
1618
|
return false;
|
|
1951
1619
|
}
|
|
1952
|
-
// /clear still parked in the composer → its Enter was swallowed, never submitted → unconfirmed.
|
|
1953
1620
|
const afterClear = await tmux.capturePaneById(paneId, { ansi: false, scrollback: 0 });
|
|
1954
1621
|
if (CLEAR_PENDING_IN_COMPOSER_RE.test(afterClear)) {
|
|
1955
1622
|
console.warn(`[AgentManager] clearPaneContext: /clear still in composer for ${cfg.id}; unconfirmed`);
|
|
@@ -1985,13 +1652,8 @@ export class AgentManager {
|
|
|
1985
1652
|
},
|
|
1986
1653
|
recommendation: 'Inspect the runtime pane, then retry or cancel the task.',
|
|
1987
1654
|
});
|
|
1988
|
-
// ack_unknown: sendEnter 已发,prompt 可能已被 REPL 接收并正在执行——
|
|
1989
|
-
// 不能 release 让下一任务排队进同一 pane。pre-Enter 错误(prompt_too_large /
|
|
1990
|
-
// required_skills_missing / gate_failed)则正常 release。
|
|
1991
1655
|
if (err.reason === 'ack_unknown') {
|
|
1992
1656
|
await this.markAwaitingHuman(agentId, `dispatch-failed:${err.reason}`, `${err.message}. Prompt may still be running in the pane; verify before resuming.`, { expectedTaskId: taskId });
|
|
1993
|
-
// task 已 terminal: 同步释放 partner agent binding,否则 partner(如 dev)永远绑 terminal task,
|
|
1994
|
-
// retryTask 走 validateTaskDispatch 时会看 dev 仍 bound → 409,UI Retry 通路被堵。
|
|
1995
1657
|
if (transitioned) {
|
|
1996
1658
|
await this.releasePartnersAndDrain(agentId, [taskId], [transitioned.task.projectId]);
|
|
1997
1659
|
}
|
|
@@ -2024,9 +1686,6 @@ export class AgentManager {
|
|
|
2024
1686
|
const out = [];
|
|
2025
1687
|
for (const t of tasks) {
|
|
2026
1688
|
const bound = t.agentId === agentId || t.qaAgentId === agentId;
|
|
2027
|
-
// Human gates are decision states, not running work: an absent agent
|
|
2028
|
-
// session must not terminally fail a task whose published PR/branch
|
|
2029
|
-
// would then be orphaned — Confirm/Cancel remain the only exits.
|
|
2030
1689
|
if (t.status === 'ready' || t.status === 'merge-ready')
|
|
2031
1690
|
continue;
|
|
2032
1691
|
if (ACTIVE_TASK_STATUSES.has(t.status) && bound) {
|
|
@@ -2064,8 +1723,6 @@ export class AgentManager {
|
|
|
2064
1723
|
if (!partnerId || partnerId === excludeAgentId)
|
|
2065
1724
|
continue;
|
|
2066
1725
|
try {
|
|
2067
|
-
// allowAwaitingHuman: task 已 terminal 必须能完整清理;partner 即使被标 Held(罕见)
|
|
2068
|
-
// 也应释放,否则 partner stale binding 永远指向 terminal task → 后续 acquire 全卡。
|
|
2069
1726
|
const ok = await this.releaseAgentForTask(partnerId, taskId, 'idle', { allowAwaitingHuman: true });
|
|
2070
1727
|
if (!ok) {
|
|
2071
1728
|
console.warn(`[AgentManager] failTasksForAgent: partner ${partnerId} release returned false ` +
|
|
@@ -2078,7 +1735,6 @@ export class AgentManager {
|
|
|
2078
1735
|
}
|
|
2079
1736
|
}
|
|
2080
1737
|
}
|
|
2081
|
-
// 100ms poll: fixed 200ms races runtimes that take >200ms to ack SIGINT.
|
|
2082
1738
|
async pollPaneCommandStable(tmux, paneId, opts) {
|
|
2083
1739
|
const deadline = Date.now() + opts.timeoutMs;
|
|
2084
1740
|
const SHELL = /^(?:zsh|bash|sh|fish)$/;
|
|
@@ -2118,10 +1774,6 @@ export class AgentManager {
|
|
|
2118
1774
|
else if (!SHELL.test(cmd)) {
|
|
2119
1775
|
throw new Error(`restart-repl precondition failed: unexpected pane state "${cmd}"`);
|
|
2120
1776
|
}
|
|
2121
|
-
// Re-materialize skills BEFORE the relaunch so the fresh REPL scans the current tree. restart-repl
|
|
2122
|
-
// is the operator's recovery for a greeting_failed agent whose on-disk skill tree was stale/missing,
|
|
2123
|
-
// and unlike retry it does not go through ensureSession's provisionRepoSkills. Best-effort: a
|
|
2124
|
-
// provisioning blip must not block the REPL restart (a still-broken tree surfaces on the next regreet).
|
|
2125
1777
|
const project = this.getProjectConfig(cfg.projectId);
|
|
2126
1778
|
let workdir;
|
|
2127
1779
|
let provisioned = false;
|
|
@@ -2145,18 +1797,10 @@ export class AgentManager {
|
|
|
2145
1797
|
timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
|
|
2146
1798
|
scrollback: 0,
|
|
2147
1799
|
});
|
|
2148
|
-
// Only re-tag when the tree was actually re-provisioned. Tagging after a FAILED provision
|
|
2149
|
-
// would stamp the current version onto a REPL that scanned the stale/missing tree, so the
|
|
2150
|
-
// next ensureSession reads it as fresh and reuses it instead of self-healing (rebuild). A
|
|
2151
|
-
// successful provision DOES need the tag, else ensureSession needlessly kills this REPL and
|
|
2152
|
-
// drops the agent onto a different, ungreeted one.
|
|
2153
1800
|
if (provisioned) {
|
|
2154
1801
|
await this.tagSessionSkillsVersion(tmux, agentId);
|
|
2155
1802
|
}
|
|
2156
1803
|
};
|
|
2157
|
-
// Hold the per-skills-dir lock ACROSS the relaunch (like buildFreshSessionLocked) so a concurrent
|
|
2158
|
-
// same-dir agent's provisioning — which transiently removes helper files (agents/openai.yaml) —
|
|
2159
|
-
// can't make this fresh REPL scan an incomplete skill tree.
|
|
2160
1804
|
if (workdir !== undefined) {
|
|
2161
1805
|
await this.runUnderSkillDirLock(this.skillDirLockKey(cfg, workdir), relaunch);
|
|
2162
1806
|
}
|
|
@@ -2190,7 +1834,6 @@ export class AgentManager {
|
|
|
2190
1834
|
}
|
|
2191
1835
|
return { targets: [agentId] };
|
|
2192
1836
|
}
|
|
2193
|
-
// Aggregates failures so DELETE rolls back the session claim on remote IO error.
|
|
2194
1837
|
async cleanupRemovedAgentRuntime(targets) {
|
|
2195
1838
|
const failures = [];
|
|
2196
1839
|
for (const id of targets) {
|
|
@@ -2201,7 +1844,6 @@ export class AgentManager {
|
|
|
2201
1844
|
const tmux = new TmuxManager(runner);
|
|
2202
1845
|
const worktree = new WorktreeManager(runner);
|
|
2203
1846
|
this.stopRuntimeMenuWatch(id);
|
|
2204
|
-
// Streamer first so subscribers see session_gone.
|
|
2205
1847
|
if (this.paneStreamerManager) {
|
|
2206
1848
|
try {
|
|
2207
1849
|
await this.paneStreamerManager.destroy(id);
|
|
@@ -2246,9 +1888,6 @@ export class AgentManager {
|
|
|
2246
1888
|
throw new CleanupFailedError(`cleanupRemovedAgentRuntime: ${failures.length} step(s) failed: ${summary}`, failures);
|
|
2247
1889
|
}
|
|
2248
1890
|
}
|
|
2249
|
-
// IO-free preview; caller compares vs MAX_PROMPT_BYTES_ROUTE_LIMIT before allocating worktree.
|
|
2250
|
-
// preferredAgentId 为空 → 用项目内 dev 的最长 workdir 估上界(unassigned 路径),dispatch 时仍
|
|
2251
|
-
// 会按真实 dev 再算一次;preferredAgentId 有值 → 按该 dev 的 config 算,避免按全局上界误拒。
|
|
2252
1891
|
previewPromptBytesForTaskInput(projectId, input) {
|
|
2253
1892
|
let cfg;
|
|
2254
1893
|
let workdirForEstimate;
|
|
@@ -2294,9 +1933,6 @@ export class AgentManager {
|
|
|
2294
1933
|
agent: cfg,
|
|
2295
1934
|
worktreePath: worktreePathBound,
|
|
2296
1935
|
skillRegistry: this.skillRegistry,
|
|
2297
|
-
// A representative token so the preview exercises the SAME required-skill set (baxian-signals)
|
|
2298
|
-
// and worst-case byte size the real signal-emitting dispatch will build — else a missing
|
|
2299
|
-
// baxian-signals only surfaces async after the task is already created (201).
|
|
2300
1936
|
signalToken: 'preview-signal-token',
|
|
2301
1937
|
});
|
|
2302
1938
|
return Buffer.byteLength(fullPrompt, 'utf8');
|
|
@@ -2349,7 +1985,6 @@ export class AgentManager {
|
|
|
2349
1985
|
return this.postApproveStore.get(taskId);
|
|
2350
1986
|
}
|
|
2351
1987
|
async setPostApproveCompletion(taskId, value) {
|
|
2352
|
-
// Store write + watcher.start under one lock — otherwise concurrent clear zombies the sub.
|
|
2353
1988
|
await this.withTaskLock(async () => {
|
|
2354
1989
|
await this.postApproveStore.set(taskId, value);
|
|
2355
1990
|
if (!this.phaseSignalWatcher)
|
|
@@ -2380,7 +2015,6 @@ export class AgentManager {
|
|
|
2380
2015
|
return cleared;
|
|
2381
2016
|
});
|
|
2382
2017
|
}
|
|
2383
|
-
// Recovery snapshot replay is safe because the completion token is cleared after merge-ready.
|
|
2384
2018
|
async setupRecoveredPostApproveSignals() {
|
|
2385
2019
|
if (!this.phaseSignalWatcher)
|
|
2386
2020
|
return;
|
|
@@ -2405,10 +2039,6 @@ export class AgentManager {
|
|
|
2405
2039
|
}
|
|
2406
2040
|
}
|
|
2407
2041
|
}
|
|
2408
|
-
// snapshot 扫描按协议族决定:server 协议(含全模式 spec 阶段)恢复时必扫,github code 阶段仅 review/fixing 扫。
|
|
2409
|
-
// 只对 spec verdict / spec-fixed emit intervention — spec-done 在 develop
|
|
2410
|
-
// prompt 里是 optional, 报警会让所有 in_progress task 噪音化。
|
|
2411
|
-
// expectedKinds 必须覆盖 dispatch 时实际 set up 的 kind 集,否则真信号无法匹配。
|
|
2412
2042
|
async setupRecoveredSpecSignals() {
|
|
2413
2043
|
if (!this.phaseSignalWatcher)
|
|
2414
2044
|
return;
|
|
@@ -2420,23 +2050,11 @@ export class AgentManager {
|
|
|
2420
2050
|
if (!mapped)
|
|
2421
2051
|
continue;
|
|
2422
2052
|
const { expectedKinds, agentId } = mapped;
|
|
2423
|
-
// Only spec verdict / spec-fixed / PR verdict warrant an intervention —
|
|
2424
|
-
// optional kinds (spec-done, pr-created in develop) would spam every
|
|
2425
|
-
// in_progress task on restart.
|
|
2426
2053
|
const interventionKindLabel = task.phase === 'spec' && task.status === 'review' ? 'spec-reviewed'
|
|
2427
2054
|
: task.phase === 'spec' && task.status === 'fixing' ? 'spec-fixed'
|
|
2428
2055
|
: task.phase !== 'spec' && task.status === 'review' ? 'pr-approved|pr-changes-requested'
|
|
2429
2056
|
: task.phase !== 'spec' && task.status === 'fixing' ? 'pr-fixed'
|
|
2430
2057
|
: undefined;
|
|
2431
|
-
// spec 阶段恒为 server 协议(无 poller 兜底);code 阶段才按 reviewMode 区分。
|
|
2432
|
-
// Scan pane snapshot on recover for signals the agent emitted before the
|
|
2433
|
-
// server consumed them (lost on restart; agent won't re-emit).
|
|
2434
|
-
// github code states (review/fixing): replay-safe handlers — token + status
|
|
2435
|
-
// gates reject duplicates; PR verdict & pr-fixed covered.
|
|
2436
|
-
// github pre-spec (phase undefined, in_progress): spec-done has only the pane
|
|
2437
|
-
// channel (pr-created has a poller backstop, scanning it is idempotent).
|
|
2438
|
-
// server protocol incl. all-mode spec phase: no poller backstop, pane is the
|
|
2439
|
-
// only signal channel; handlers equally replay-safe via same gates.
|
|
2440
2058
|
const isServerProtocol = task.reviewMode === 'server' || task.phase === 'spec';
|
|
2441
2059
|
const scanSnapshotOnRecover = isServerProtocol
|
|
2442
2060
|
|| (task.phase === undefined && task.status === 'in_progress')
|
|
@@ -2475,14 +2093,6 @@ export class AgentManager {
|
|
|
2475
2093
|
}
|
|
2476
2094
|
}
|
|
2477
2095
|
}
|
|
2478
|
-
// Weak "did the dev do anything since the review was dispatched" heuristic for a
|
|
2479
|
-
// no-push fixing round: any inline thread reply (in_reply_to_id set) OR any
|
|
2480
|
-
// top-level PR/issue comment created after sinceIso. baxian can't attribute
|
|
2481
|
-
// comments to an agent (no GitHub identity), and findings may live in the review
|
|
2482
|
-
// body / issue comments / a same-identity `gh pr review --comment`, so this only
|
|
2483
|
-
// separates a true no-op (zero activity) from a real round — QA still does the
|
|
2484
|
-
// real per-finding check. THROWS on gh failure so the caller fails closed (a
|
|
2485
|
-
// swallowed error would masquerade as "no reply" → false no-op intervention).
|
|
2486
2096
|
async prHasDevReplySince(taskId, sinceIso) {
|
|
2487
2097
|
const task = await this.taskStore.get(taskId);
|
|
2488
2098
|
if (!task?.prNumber)
|
|
@@ -2493,9 +2103,6 @@ export class AgentManager {
|
|
|
2493
2103
|
const since = Date.parse(sinceIso);
|
|
2494
2104
|
if (Number.isNaN(since))
|
|
2495
2105
|
return false;
|
|
2496
|
-
// Three independent endpoints — fetch concurrently, not back-to-back. Reviews
|
|
2497
|
-
// cover the same-identity `gh pr review --comment` reply path (a PR review with
|
|
2498
|
-
// a body, surfaced via submitted_at, not an inline/issue comment).
|
|
2499
2106
|
const repo = repoSlug(project.repo);
|
|
2500
2107
|
const [inlineReplies, issueComments, reviews] = await Promise.all([
|
|
2501
2108
|
this.ghCreatedAt(`repos/${repo}/pulls/${task.prNumber}/comments`, '.[] | select(.in_reply_to_id != null) | .created_at'),
|
|
@@ -2508,10 +2115,6 @@ export class AgentManager {
|
|
|
2508
2115
|
return !Number.isNaN(t) && t > since;
|
|
2509
2116
|
});
|
|
2510
2117
|
}
|
|
2511
|
-
// Each matched `created_at` across ALL pages. `gh api --paginate` with `--jq`
|
|
2512
|
-
// runs the filter per page and concatenates output, so emitting one timestamp
|
|
2513
|
-
// per row (not a per-page `length`) is the only way to count across pages.
|
|
2514
|
-
// Throws on non-zero exit so callers can fail closed.
|
|
2515
2118
|
async ghCreatedAt(endpoint, jq) {
|
|
2516
2119
|
const result = await this.platformRunner.exec(`gh api --paginate ${shellQuote(endpoint)} --jq ${shellQuote(jq)}`);
|
|
2517
2120
|
if (result.exitCode !== 0) {
|
|
@@ -2538,13 +2141,6 @@ export class AgentManager {
|
|
|
2538
2141
|
}
|
|
2539
2142
|
return headSha;
|
|
2540
2143
|
}
|
|
2541
|
-
// Pane signals are agent-emitted text; their prNumber is whatever the agent
|
|
2542
|
-
// chose to print. Without branch-equality verification the server would
|
|
2543
|
-
// happily QA-review and later auto-merge a PR that belongs to a different
|
|
2544
|
-
// task (typo / hallucination / copy-paste). Poller-sourced events skip this
|
|
2545
|
-
// because the poller routes by `bx/<taskId>` branch already.
|
|
2546
|
-
// Returns the verified headSha on success, undefined on mismatch / lookup
|
|
2547
|
-
// failure (caller treats as "do not trust this prNumber").
|
|
2548
2144
|
async verifyPaneSignalPrNumber(taskId, prNumber) {
|
|
2549
2145
|
const task = await this.taskStore.get(taskId);
|
|
2550
2146
|
if (!task || !task.branch)
|
|
@@ -2635,15 +2231,6 @@ export class AgentManager {
|
|
|
2635
2231
|
return { task, previousStatus };
|
|
2636
2232
|
});
|
|
2637
2233
|
}
|
|
2638
|
-
// Count a review pass exactly once, on the dispatch success path (a failed dispatch
|
|
2639
|
-
// returns before calling this, so the round is never inflated → no rollback needed).
|
|
2640
|
-
// `expectedRound` is the reviewRound captured BEFORE the dispatch transition. Under the
|
|
2641
|
-
// lock: if a same-identity verdict raced in between startSession and this call, its
|
|
2642
|
-
// review.submitted handler already counted the pass — moving reviewRound off
|
|
2643
|
-
// expectedRound — so this no-ops. This is symmetric for the first review (expected 0,
|
|
2644
|
-
// verdict counts via reviewRound===0) and an approved re-review (expected ≥1, where the
|
|
2645
|
-
// verdict does NOT catch-count) — both end at exactly one increment. withTaskLock
|
|
2646
|
-
// serializes this against the verdict transition.
|
|
2647
2234
|
async bumpReviewRoundIfStillAt(taskId, expectedRound) {
|
|
2648
2235
|
await this.withTaskLock(async () => {
|
|
2649
2236
|
const task = await this.taskStore.get(taskId);
|
|
@@ -2681,7 +2268,6 @@ export class AgentManager {
|
|
|
2681
2268
|
return agent.workdir;
|
|
2682
2269
|
return agentState?.repoPath ?? null;
|
|
2683
2270
|
}
|
|
2684
|
-
// Empty repos lack origin/HEAD; undefined makes git use base repo HEAD.
|
|
2685
2271
|
async resolveAutoBaseRef(runner, workdir) {
|
|
2686
2272
|
const result = await runner.exec(`git -C ${shellQuote(workdir)} rev-parse --verify --quiet origin/HEAD`);
|
|
2687
2273
|
return result.exitCode === 0 ? 'origin/HEAD' : undefined;
|
|
@@ -2707,9 +2293,6 @@ export class AgentManager {
|
|
|
2707
2293
|
`skipping agent cleanup — agent already reassigned`);
|
|
2708
2294
|
return;
|
|
2709
2295
|
}
|
|
2710
|
-
// Cancel may have taken over (markPaneCancelClearing keeps taskId, flips to a cancel-cleanup hold). This
|
|
2711
|
-
// rollback would clear taskId + release the lock, making cancel's Phase 1 skip interrupt + /clear and leave
|
|
2712
|
-
// an un-cleared pane bound to nothing — leave the binding/lock to the cancel owner.
|
|
2713
2296
|
if (isCancelCleanupHold(existing)) {
|
|
2714
2297
|
console.warn(`[AgentManager] rollback: agent ${agentId} held by cancel cleanup (${existing?.awaitingPhase}); ` +
|
|
2715
2298
|
`leaving binding to the owner`);
|
|
@@ -2768,13 +2351,9 @@ export class AgentManager {
|
|
|
2768
2351
|
throw new ApiError(400, `Branch "${input.branch}" is already bound to task ${existing.id}`);
|
|
2769
2352
|
}
|
|
2770
2353
|
}
|
|
2771
|
-
// Stage images first so the task is written + emitted (task.created) WITH its images already
|
|
2772
|
-
// on disk — a pending task is never observable, or crash-recoverable, without them. A persist
|
|
2773
|
-
// failure here throws before any store write / lock, so nothing half-created survives.
|
|
2774
2354
|
const imageFilenames = input.images?.length
|
|
2775
2355
|
? await this.persistTaskImages(taskId, input.images)
|
|
2776
2356
|
: undefined;
|
|
2777
|
-
// Unassigned: no dev to pick, no qa to derive; goes straight to pending.
|
|
2778
2357
|
if (input.preferredAgentId === '') {
|
|
2779
2358
|
const unassigned = {
|
|
2780
2359
|
id: taskId,
|
|
@@ -2890,8 +2469,6 @@ export class AgentManager {
|
|
|
2890
2469
|
id: dev.id,
|
|
2891
2470
|
projectId,
|
|
2892
2471
|
taskId,
|
|
2893
|
-
// Mark not-yet-delivered from the moment the binding exists — before ensureSession/worktree
|
|
2894
|
-
// side-effects — so a crash anywhere in the bootstrap is recoverable (see recover()).
|
|
2895
2472
|
bootstrappingTaskId: taskId,
|
|
2896
2473
|
updatedAt: now,
|
|
2897
2474
|
...(existing?.paneId !== undefined ? { paneId: existing.paneId } : {}),
|
|
@@ -2911,18 +2488,12 @@ export class AgentManager {
|
|
|
2911
2488
|
});
|
|
2912
2489
|
}
|
|
2913
2490
|
async createAndStartTask(projectId, input, opts = {}) {
|
|
2914
|
-
// createTask stages images atomically before the task is visible (store + task.created),
|
|
2915
|
-
// so a pending task can never be observed — or crash-recovered — without its images.
|
|
2916
2491
|
const task = await this.createTask(projectId, input);
|
|
2917
2492
|
if (task.status === 'in_progress' && task.agentId) {
|
|
2918
|
-
// Persist token first — prompt build 和 watcher 验证共用 task.signalToken。
|
|
2919
2493
|
const signalToken = createSignalToken();
|
|
2920
2494
|
await this.updateTask(task.id, { signalToken });
|
|
2921
2495
|
const start = this.startCreatedTaskSession(task.id, task.agentId, signalToken);
|
|
2922
2496
|
if (opts.background) {
|
|
2923
|
-
// Agent bootstrap (worktree + REPL spawn + ready-poll) can take tens of seconds; don't block
|
|
2924
|
-
// the create response on it. Failures still land on the task (failed / rolled back) and reach
|
|
2925
|
-
// the UI through the same task-event stream as every other lifecycle transition.
|
|
2926
2497
|
void start.catch((err) => {
|
|
2927
2498
|
console.error(`[AgentManager] createAndStartTask background start failed for task=${task.id}:`, err);
|
|
2928
2499
|
});
|
|
@@ -2946,18 +2517,11 @@ export class AgentManager {
|
|
|
2946
2517
|
console.error(`[AgentManager] createAndStartTask startSession hard error for task=${taskId}:`, err);
|
|
2947
2518
|
}
|
|
2948
2519
|
if (started) {
|
|
2949
|
-
// A user Cancel can race the background bootstrap. startSession's success path may have already
|
|
2950
|
-
// injected the prompt, so the task can be terminal here with the cancelled prompt still live in the
|
|
2951
|
-
// pane. Mirror cancelTask: interrupt + confirm the pane is ready before idle-releasing, else hold —
|
|
2952
|
-
// a bare release would hand a pane still running the cancelled prompt to the next dispatch. (If
|
|
2953
|
-
// cancel already cleared the binding, releaseAgentForTask is a safe no-op.)
|
|
2954
2520
|
const fresh = await this.taskStore.get(taskId);
|
|
2955
2521
|
if (!fresh || TERMINAL_STATUSES.includes(fresh.status)) {
|
|
2956
2522
|
const cfg = this.getAgentConfig(agentId);
|
|
2957
2523
|
const state = await this.agentStore.get(agentId);
|
|
2958
2524
|
if (cfg && state && state.taskId === taskId) {
|
|
2959
|
-
// Persist the un-cleared hold before the ESC→/clear window so a restart mid-cleanup recovers it
|
|
2960
|
-
// held instead of releasing the still-dirty pane (mirrors cancelTask).
|
|
2961
2525
|
await this.markPaneCancelClearing(agentId, taskId);
|
|
2962
2526
|
if (!(await this.interruptPaneAndWaitReady(state, cfg))) {
|
|
2963
2527
|
await this.markAwaitingHuman(agentId, 'cancel-interrupt-failed', 'Task was cancelled during startup but ESC / REPL ready check failed; the agent may still be ' +
|
|
@@ -2969,19 +2533,12 @@ export class AgentManager {
|
|
|
2969
2533
|
'the pane holds un-cleared context. DELETE the agent to discard it (Resume will not reuse an un-cleared pane).', { expectedTaskId: taskId });
|
|
2970
2534
|
return null;
|
|
2971
2535
|
}
|
|
2972
|
-
// /clear confirmed → cancel cleanup is done; free the cancel-clearing hold (only path allowed to).
|
|
2973
2536
|
await this.releaseAgentForTask(agentId, taskId, 'idle', { allowAwaitingHuman: true, fromCancelCleanup: true });
|
|
2974
2537
|
return null;
|
|
2975
2538
|
}
|
|
2976
|
-
// No live pane to clean (agent gone / rebound): release without the cancel-cleanup bypass — if it
|
|
2977
|
-
// somehow holds an un-cleared phase, refusing here keeps the dirty pane for the owning cancel.
|
|
2978
2539
|
await this.releaseAgentForTask(agentId, taskId, 'idle', { allowAwaitingHuman: true });
|
|
2979
2540
|
return null;
|
|
2980
2541
|
}
|
|
2981
|
-
// 后台路径吞掉 reject(void start.catch):arm 抛异常时也要显式 hold agent,否则会留下一个没有
|
|
2982
|
-
// spec-done/pr-created 消费者的常驻任务(同步 caller 仍由上游收到异常)。
|
|
2983
|
-
// Kinds derive from the task's frozen reviewMode — a hot mode flip during the
|
|
2984
|
-
// startSession window must not desync the armed kinds from the sent prompt.
|
|
2985
2542
|
const initialKinds = this.devInitialSignalKinds(fresh.reviewMode);
|
|
2986
2543
|
try {
|
|
2987
2544
|
await this.armPostDispatchSignalOrHold(taskId, agentId, initialKinds, signalToken);
|
|
@@ -2999,15 +2556,8 @@ export class AgentManager {
|
|
|
2999
2556
|
await this.failTaskForDispatchError(taskId, 'develop', agentId, dispatchErr);
|
|
3000
2557
|
}
|
|
3001
2558
|
else if (dispatchErr instanceof EnsureSessionError && dispatchErr.partial.handled) {
|
|
3002
|
-
// handleDialogPendingFromRuntime 已标 Held + fail task + release partners;rollback 会清 taskId/lock
|
|
3003
|
-
// 让仍卡 dialog 的 pane 在 status='awaiting_human' 被清后可被新 dispatch 撞进——必须跳过。
|
|
3004
2559
|
}
|
|
3005
2560
|
else {
|
|
3006
|
-
// startSession returned false. Usually the task changed under us (cancelled/terminal). If it went
|
|
3007
|
-
// terminal mid-bootstrap (e.g. the user cancelled before the pane existed — cancelTask then held the
|
|
3008
|
-
// agent as cancel-interrupt-failed with nothing actually running), rollbackFailedDispatch only acts
|
|
3009
|
-
// on in_progress tasks, so it would leave the agent bound to the dead task needing a manual Resume.
|
|
3010
|
-
// Release it instead so the agent is free for the next dispatch.
|
|
3011
2561
|
const fresh = await this.taskStore.get(taskId);
|
|
3012
2562
|
if (fresh && TERMINAL_STATUSES.includes(fresh.status) && (await this.agentStore.get(agentId))?.taskId === taskId) {
|
|
3013
2563
|
await this.releaseAgentForTask(agentId, taskId, 'idle', { allowAwaitingHuman: true });
|
|
@@ -3018,7 +2568,6 @@ export class AgentManager {
|
|
|
3018
2568
|
}
|
|
3019
2569
|
return (await this.taskStore.get(taskId)) ?? null;
|
|
3020
2570
|
}
|
|
3021
|
-
/** Write an uploaded image to the running agent's host, paste its path (no Enter). */
|
|
3022
2571
|
async attachImageToRunningAgent(agentId, bytes, ext) {
|
|
3023
2572
|
const cfg = this.getAgentConfig(agentId);
|
|
3024
2573
|
if (!cfg)
|
|
@@ -3027,14 +2576,10 @@ export class AgentManager {
|
|
|
3027
2576
|
const paneId = state?.paneId;
|
|
3028
2577
|
if (!paneId)
|
|
3029
2578
|
throw new ApiError(409, `Agent ${agentId} has no live session`);
|
|
3030
|
-
// 写文件→粘贴全程持有 pane 互斥:写文件可能卡住,恢复后的粘贴若落进
|
|
3031
|
-
// compact 的 C-c→/compact 窗口会把路径拼进指令提交。
|
|
3032
2579
|
if (!this.tryAcquireCompactGuard(agentId)) {
|
|
3033
2580
|
throw new ApiError(409, `Agent ${agentId} compact or upload in progress; retry shortly`);
|
|
3034
2581
|
}
|
|
3035
2582
|
try {
|
|
3036
|
-
// Refuse to paste into a pane cancel is tearing down. Re-check BOTH before and after the (slow) host
|
|
3037
|
-
// write: cancel keeps taskId while flipping the hold, and it can land during writeImageToHost.
|
|
3038
2583
|
const assertUploadStillValid = async () => {
|
|
3039
2584
|
const held = await this.agentStore.get(agentId);
|
|
3040
2585
|
if (!held || held.paneId !== paneId) {
|
|
@@ -3044,8 +2589,6 @@ export class AgentManager {
|
|
|
3044
2589
|
if (boundTask && TERMINAL_STATUSES.includes(boundTask.status)) {
|
|
3045
2590
|
throw new ApiError(409, `Agent ${agentId} task ${held.taskId} is terminal; image upload refused`);
|
|
3046
2591
|
}
|
|
3047
|
-
// taskStore.get yielded the loop — re-read the cancel hold LAST so a hold that landed during that await
|
|
3048
|
-
// (e.g. while the slow host write was running) is caught before the paste.
|
|
3049
2592
|
const fresh = await this.agentStore.get(agentId);
|
|
3050
2593
|
if (!fresh || fresh.paneId !== paneId || isCancelCleanupHold(fresh)) {
|
|
3051
2594
|
throw new ApiError(409, `Agent ${agentId} is being cancelled (${fresh?.awaitingPhase}); image upload refused`);
|
|
@@ -3079,8 +2622,6 @@ export class AgentManager {
|
|
|
3079
2622
|
throw new ApiError(409, `Agent ${agentId} has no live session`);
|
|
3080
2623
|
const taskIdAtStart = state.taskId;
|
|
3081
2624
|
const updatedAtAtStart = state.updatedAt;
|
|
3082
|
-
// updatedAt 拦同任务 phase 派发(paneId/taskId 均不变,派发 paste 前必写 state);
|
|
3083
|
-
// 快照变了决不注入——中断键(C-c/Escape)会打断刚注入的 prompt。
|
|
3084
2625
|
const assertSessionUnchanged = async () => {
|
|
3085
2626
|
const now = await this.agentStore.get(agentId);
|
|
3086
2627
|
if (!now
|
|
@@ -3102,7 +2643,6 @@ export class AgentManager {
|
|
|
3102
2643
|
};
|
|
3103
2644
|
await waitReady();
|
|
3104
2645
|
await assertSessionUnchanged();
|
|
3105
|
-
// Codex quits on Ctrl-C at an empty composer (openai/codex#14708); interrupt it with Escape instead.
|
|
3106
2646
|
await tmux.sendKeysToPane(paneId, cfg.runtime === 'codex' ? 'Escape' : 'C-c');
|
|
3107
2647
|
await waitReady();
|
|
3108
2648
|
await assertSessionUnchanged();
|
|
@@ -3139,7 +2679,6 @@ export class AgentManager {
|
|
|
3139
2679
|
}
|
|
3140
2680
|
return filenames;
|
|
3141
2681
|
}
|
|
3142
|
-
// Retry reads staged bytes up-front; a missing source is a visible 409, never a silent drop.
|
|
3143
2682
|
async readStagedImages(taskId, filenames) {
|
|
3144
2683
|
const dir = join(this.imageStagingRoot, taskId);
|
|
3145
2684
|
const out = [];
|
|
@@ -3156,8 +2695,6 @@ export class AgentManager {
|
|
|
3156
2695
|
}
|
|
3157
2696
|
return out;
|
|
3158
2697
|
}
|
|
3159
|
-
// Materialize staged task images onto the agent host at dispatch; absolute host paths get
|
|
3160
|
-
// woven into the prompt. Missing staging aborts the dispatch loudly (no silent skip).
|
|
3161
2698
|
async materializeTaskImages(runner, task) {
|
|
3162
2699
|
const filenames = task.images ?? [];
|
|
3163
2700
|
if (filenames.length === 0)
|
|
@@ -3178,10 +2715,6 @@ export class AgentManager {
|
|
|
3178
2715
|
}
|
|
3179
2716
|
return hostPaths;
|
|
3180
2717
|
}
|
|
3181
|
-
// Dev-facing deliverable phases all carry the task's uploaded images, since the image is a
|
|
3182
|
-
// persistent task input the dev needs while producing or revising the spec/code — and a fresh
|
|
3183
|
-
// runtime (restart/recovery) loses the original context. IMAGE_DISPATCH_PHASES 中的后续阶段经
|
|
3184
|
-
// continueSession 触发此方法;QA 阶段和 post-approve 不传图。
|
|
3185
2718
|
async imagePathsForDispatch(runner, task, phase) {
|
|
3186
2719
|
if (!IMAGE_DISPATCH_PHASES.has(phase))
|
|
3187
2720
|
return [];
|
|
@@ -3228,7 +2761,6 @@ export class AgentManager {
|
|
|
3228
2761
|
return { task: fresh, errorCode: 409, error: `Agent ${agentId} lock acquisition failed` };
|
|
3229
2762
|
}
|
|
3230
2763
|
const now = new Date().toISOString();
|
|
3231
|
-
// 优先沿用 fresh.qaAgentId;缺失(unassigned claim / 旧 task 建时无 QA 伙伴)才查当前 config。
|
|
3232
2764
|
const qaId = fresh.qaAgentId ?? this.findQaPartner(agentId)?.id;
|
|
3233
2765
|
const claimedTask = {
|
|
3234
2766
|
...fresh,
|
|
@@ -3243,7 +2775,6 @@ export class AgentManager {
|
|
|
3243
2775
|
id: agentId,
|
|
3244
2776
|
projectId: cfg.projectId,
|
|
3245
2777
|
taskId,
|
|
3246
|
-
// Same not-yet-delivered marker as createTask, so a crash mid-redispatch is recoverable too.
|
|
3247
2778
|
bootstrappingTaskId: taskId,
|
|
3248
2779
|
updatedAt: now,
|
|
3249
2780
|
...(existing?.paneId !== undefined ? { paneId: existing.paneId } : {}),
|
|
@@ -3286,14 +2817,11 @@ export class AgentManager {
|
|
|
3286
2817
|
await this.failTaskForDispatchError(claimed.id, 'develop', claimed.agentId, dispatchErr);
|
|
3287
2818
|
}
|
|
3288
2819
|
else if (dispatchErr instanceof EnsureSessionError && dispatchErr.partial.handled) {
|
|
3289
|
-
// handleDialogPendingFromRuntime 已处理;跳过 rollback。
|
|
3290
2820
|
}
|
|
3291
2821
|
else {
|
|
3292
2822
|
await this.rollbackFailedDispatch(claimed.id, claimed.agentId);
|
|
3293
2823
|
}
|
|
3294
2824
|
const refreshed = await this.taskStore.get(claimed.id);
|
|
3295
|
-
// startSession 返回 false 而没抛 → 任务状态在锁外发生了变化(如并发 cancel / 已 terminal),
|
|
3296
|
-
// 客户端按 409 提示重试 / 刷新即可;只有真正的 dispatch 异常映射为 500。
|
|
3297
2825
|
if (dispatchErr === null) {
|
|
3298
2826
|
return { task: refreshed, errorCode: 409, error: 'task state changed during dispatch; startSession refused' };
|
|
3299
2827
|
}
|
|
@@ -3316,7 +2844,6 @@ export class AgentManager {
|
|
|
3316
2844
|
console.warn(`[AgentManager] startSession[${phase}]: pre-create task=${taskId} not found; aborting`);
|
|
3317
2845
|
return false;
|
|
3318
2846
|
}
|
|
3319
|
-
// bypassTaskStatusGate 只放过 expected gate,不放过 terminal。
|
|
3320
2847
|
if (TERMINAL_STATUSES.includes(preTask.status)) {
|
|
3321
2848
|
console.warn(`[AgentManager] startSession[${phase}]: pre-create task=${taskId} status=${preTask.status} ` +
|
|
3322
2849
|
`is terminal; aborting`);
|
|
@@ -3374,7 +2901,6 @@ export class AgentManager {
|
|
|
3374
2901
|
: phase === 'review' || phase === 'recheck'
|
|
3375
2902
|
? await worktree.createDetached(workdir, taskId, task.branch)
|
|
3376
2903
|
: await worktree.create(workdir, taskId, baseRef, customBranch);
|
|
3377
|
-
// Persist worktreePath now so a crash before set-running leaves a recoverable trail.
|
|
3378
2904
|
await this.agentStore.update(agentId, (stateNow) => {
|
|
3379
2905
|
if (!stateNow || stateNow.taskId !== taskId)
|
|
3380
2906
|
return AGENT_STORE_NOOP;
|
|
@@ -3386,10 +2912,8 @@ export class AgentManager {
|
|
|
3386
2912
|
updatedAt: new Date().toISOString(),
|
|
3387
2913
|
};
|
|
3388
2914
|
});
|
|
3389
|
-
// Caller-transmitted token/round take precedence — task fields are stale during dispatch.
|
|
3390
2915
|
const promptSignalToken = opts.signalToken ?? task.signalToken;
|
|
3391
2916
|
const promptSpecRound = opts.currentSpecRound ?? task.specReviewRound;
|
|
3392
|
-
// develop prompt 按 QA 有无裁剪 spec 路线(qaAgentId 快照优先,与 review 派发同一解析)。
|
|
3393
2917
|
const hasQaPartner = !!(task.qaAgentId ?? this.findQaPartner(agentId)?.id);
|
|
3394
2918
|
let prompt;
|
|
3395
2919
|
try {
|
|
@@ -3417,7 +2941,6 @@ export class AgentManager {
|
|
|
3417
2941
|
await worktree.removeWithBranch(workdir, worktreePath, customBranch);
|
|
3418
2942
|
}
|
|
3419
2943
|
catch { }
|
|
3420
|
-
// Terminal — rolling back to pending would loop on the same misconfiguration.
|
|
3421
2944
|
if (err instanceof PromptSizeError) {
|
|
3422
2945
|
throw new DispatchTerminalError('prompt_too_large', err.message);
|
|
3423
2946
|
}
|
|
@@ -3426,7 +2949,6 @@ export class AgentManager {
|
|
|
3426
2949
|
}
|
|
3427
2950
|
throw err;
|
|
3428
2951
|
}
|
|
3429
|
-
// Last cancellable boundary before paste.
|
|
3430
2952
|
const taskFresh = await this.taskStore.get(taskId);
|
|
3431
2953
|
if (!taskFresh) {
|
|
3432
2954
|
console.warn(`[AgentManager] startSession: task ${taskId} disappeared mid-dispatch; cleaning up worktree before paste`);
|
|
@@ -3475,8 +2997,6 @@ export class AgentManager {
|
|
|
3475
2997
|
catch { }
|
|
3476
2998
|
return false;
|
|
3477
2999
|
}
|
|
3478
|
-
// Pane exists now but the prompt is not out — arm here so a request it triggers is a live chunk,
|
|
3479
|
-
// not snapshot-suppressed scrollback. Abort cleanly (no binding written yet) if it cannot arm.
|
|
3480
3000
|
if (opts.armBeforeInject && !(await opts.armBeforeInject())) {
|
|
3481
3001
|
try {
|
|
3482
3002
|
await worktree.removeWithBranch(workdir, worktreePath, customBranch);
|
|
@@ -3489,7 +3009,6 @@ export class AgentManager {
|
|
|
3489
3009
|
try {
|
|
3490
3010
|
let cancelHoldWon = false;
|
|
3491
3011
|
await this.agentStore.update(agentId, (existing) => {
|
|
3492
|
-
// This fresh rebuild would drop awaitingPhase: if cancel raced a hold in, overwriting it skips /clear.
|
|
3493
3012
|
if (isCancelCleanupHold(existing)) {
|
|
3494
3013
|
cancelHoldWon = true;
|
|
3495
3014
|
return AGENT_STORE_NOOP;
|
|
@@ -3502,8 +3021,6 @@ export class AgentManager {
|
|
|
3502
3021
|
worktreePath,
|
|
3503
3022
|
repoPath: workdir,
|
|
3504
3023
|
startedAt: now,
|
|
3505
|
-
// Mark this dispatch as mid-bootstrap until the prompt is ack'd — recover() rolls back only a task
|
|
3506
|
-
// it can positively see was never delivered, so a crash here doesn't leave it silently stuck.
|
|
3507
3024
|
bootstrappingTaskId: taskId,
|
|
3508
3025
|
updatedAt: now,
|
|
3509
3026
|
...(existing?.creationToken !== undefined ? { creationToken: existing.creationToken } : {}),
|
|
@@ -3520,11 +3037,6 @@ export class AgentManager {
|
|
|
3520
3037
|
}
|
|
3521
3038
|
agentMarkedRunning = true;
|
|
3522
3039
|
await this.injectAndAwaitAck(tmux, paneId, prompt, agentId, agent.runtime);
|
|
3523
|
-
// Prompt delivered → clear the mid-bootstrap marker IMMEDIATELY, before the slower persist/emit/watch
|
|
3524
|
-
// steps: a crash between ack and the clear would otherwise leave recover() seeing a stale marker on
|
|
3525
|
-
// an already-running prompt and re-dispatching it. The clear is best-effort and NON-destructive — its
|
|
3526
|
-
// own catch holds for human rather than falling into the dispatch-failure teardown below (a storage
|
|
3527
|
-
// blip must not tear down a prompt that's already running, nor leave a stale marker recover re-runs).
|
|
3528
3040
|
try {
|
|
3529
3041
|
await this.clearBootstrapMarker(agentId, taskId);
|
|
3530
3042
|
}
|
|
@@ -3548,9 +3060,6 @@ export class AgentManager {
|
|
|
3548
3060
|
return true;
|
|
3549
3061
|
}
|
|
3550
3062
|
catch (err) {
|
|
3551
|
-
// ack_unknown 表示 sendEnter 已发,prompt 可能正在 REPL 中执行。
|
|
3552
|
-
// 清绑定/lock/worktree 会让下一任务复用仍在跑旧 prompt 的 pane——保留所有 state,
|
|
3553
|
-
// 由上游 failTaskForDispatchError → markAwaitingHuman 接手等人。
|
|
3554
3063
|
const isAckUnknown = err instanceof DispatchTerminalError && err.reason === 'ack_unknown';
|
|
3555
3064
|
if (!isAckUnknown) {
|
|
3556
3065
|
try {
|
|
@@ -3566,9 +3075,6 @@ export class AgentManager {
|
|
|
3566
3075
|
`(taskId=${agentNow?.taskId}, expected ${taskId}); skipping`);
|
|
3567
3076
|
return AGENT_STORE_NOOP;
|
|
3568
3077
|
}
|
|
3569
|
-
// Cancel may have taken over the binding while we waited for the pane mutex: markPaneCancelClearing
|
|
3570
|
-
// keeps taskId but flips it to a cancel-cleanup hold. Tearing it down here would drop that hold and
|
|
3571
|
-
// make cancel's Phase 2 skip /clear, reusing an un-cleared pane — leave it to the cancel owner.
|
|
3572
3078
|
if (isCancelCleanupHold(agentNow)) {
|
|
3573
3079
|
console.warn(`[AgentManager] startSession cleanup agentStore: agent ${agentId} held by cancel cleanup ` +
|
|
3574
3080
|
`(${agentNow.awaitingPhase}); leaving binding to the owner`);
|
|
@@ -3597,24 +3103,15 @@ export class AgentManager {
|
|
|
3597
3103
|
throw err;
|
|
3598
3104
|
}
|
|
3599
3105
|
}
|
|
3600
|
-
// 注入方必须持有 pane 互斥:compact 侧的快照校验关不死「校验→按键」
|
|
3601
|
-
// 之间的 async 边界,竞态只能在这里关死。
|
|
3602
3106
|
async injectAndAwaitAck(tmux, paneId, prompt, agentId, runtime) {
|
|
3603
3107
|
const before = await this.agentStore.get(agentId);
|
|
3604
3108
|
await this.acquireCompactGuard(agentId);
|
|
3605
3109
|
try {
|
|
3606
|
-
// guard 等待期间任务可能被 Cancel(释放绑定)或会话重建;过期派发
|
|
3607
|
-
// 决不落 pane。无快照(direct 调用)时跳过——真实派发必有绑定。
|
|
3608
3110
|
if (before) {
|
|
3609
3111
|
const now = await this.agentStore.get(agentId);
|
|
3610
3112
|
if (!now || now.paneId !== before.paneId || now.taskId !== before.taskId) {
|
|
3611
3113
|
throw new Error(`dispatch aborted: agent ${agentId} binding changed while waiting for pane mutex`);
|
|
3612
3114
|
}
|
|
3613
|
-
// Refuse to inject a cancelled task's prompt into a pane cancel is about to /clear — else this
|
|
3614
|
-
// dispatch wins the mutex in cancel's interrupt→/clear gap and clearPaneContext fails to re-acquire
|
|
3615
|
-
// it, stranding the agent at cancel-clear-failed. Cancel persists the agent hold
|
|
3616
|
-
// (markPaneCancelClearing, keeps taskId) BEFORE flipping the task terminal, so check the hold too —
|
|
3617
|
-
// a task-status-only check would slip through the window between the two writes.
|
|
3618
3115
|
if (isCancelCleanupHold(now)) {
|
|
3619
3116
|
throw new Error(`dispatch aborted: agent ${agentId} taken over by cancel (${now.awaitingPhase}) while waiting for pane mutex`);
|
|
3620
3117
|
}
|
|
@@ -3622,8 +3119,6 @@ export class AgentManager {
|
|
|
3622
3119
|
if (boundTask && TERMINAL_STATUSES.includes(boundTask.status)) {
|
|
3623
3120
|
throw new Error(`dispatch aborted: task ${now.taskId} for agent ${agentId} went terminal while waiting for pane mutex`);
|
|
3624
3121
|
}
|
|
3625
|
-
// taskStore.get yielded the loop — re-read right before the paste so a cancel hold that landed during
|
|
3626
|
-
// that await can't slip an injection into a pane cancel has taken over.
|
|
3627
3122
|
const fresh = await this.agentStore.get(agentId);
|
|
3628
3123
|
if (!fresh || fresh.paneId !== before.paneId || fresh.taskId !== before.taskId || isCancelCleanupHold(fresh)) {
|
|
3629
3124
|
throw new Error(`dispatch aborted: agent ${agentId} taken over by cancel before paste`);
|
|
@@ -3639,8 +3134,6 @@ export class AgentManager {
|
|
|
3639
3134
|
await tmux.injectPrompt(paneId, prompt, agentId);
|
|
3640
3135
|
let baseline;
|
|
3641
3136
|
try {
|
|
3642
|
-
// Pre-Enter failure is raw, not ack_unknown (3331269349); but the leftover prompt must not reach
|
|
3643
|
-
// a reused pane (#223), so only release when the composer is cleared or the session is gone.
|
|
3644
3137
|
baseline = await tmux.captureSettledSnapshot(paneId, { timeoutMs: this.dispatchSettleTimeoutMs });
|
|
3645
3138
|
await tmux.sendEnter(paneId);
|
|
3646
3139
|
}
|
|
@@ -3660,8 +3153,6 @@ export class AgentManager {
|
|
|
3660
3153
|
}
|
|
3661
3154
|
catch (err) {
|
|
3662
3155
|
const message = err instanceof Error ? err.message : String(err);
|
|
3663
|
-
// 仅 ack 超时走 intervention(REPL 排队 OK 等人查看);其他错误(capturePaneSnapshot
|
|
3664
|
-
// 等基础设施失败)当 dispatch 终态错误抛出,由上游标 task failed。
|
|
3665
3156
|
if (!(err instanceof Error && /runtime ack timeout/.test(err.message))) {
|
|
3666
3157
|
throw new DispatchTerminalError('ack_unknown', `ack_unknown for pane ${paneId}: ${message}`);
|
|
3667
3158
|
}
|
|
@@ -3683,17 +3174,10 @@ export class AgentManager {
|
|
|
3683
3174
|
'Attach via web terminal to verify and resolve.',
|
|
3684
3175
|
},
|
|
3685
3176
|
});
|
|
3686
|
-
// "pane already busy at baseline" means the paste landed on an already-running input stream,
|
|
3687
|
-
// NOT an idle composer — the skills did NOT become context, so callers must not record them
|
|
3688
|
-
// as injected. Any other timeout (idle composer / swallowed Enter) did
|
|
3689
|
-
// deliver the prompt text into the composer.
|
|
3690
3177
|
const composerDelivered = !/pane already busy at baseline/.test(message);
|
|
3691
3178
|
return { acked: false, composerDelivered };
|
|
3692
3179
|
}
|
|
3693
3180
|
}
|
|
3694
|
-
// Returns true once the pane is safe to release for reuse after a pre-Enter failure: C-c cleared the
|
|
3695
|
-
// unsubmitted prompt, or the session is gone (next dispatch rebuilds fresh). Returns false when
|
|
3696
|
-
// neither could be confirmed — the caller then holds the agent instead of reusing a dirty composer.
|
|
3697
3181
|
async clearComposerForReuse(tmux, paneId, agentId) {
|
|
3698
3182
|
try {
|
|
3699
3183
|
await tmux.sendKeysToPane(paneId, 'C-c');
|
|
@@ -3710,7 +3194,6 @@ export class AgentManager {
|
|
|
3710
3194
|
return false;
|
|
3711
3195
|
}
|
|
3712
3196
|
}
|
|
3713
|
-
// Ready gate prevents mid-paste webhook from flipping to 'waiting' on a busy REPL.
|
|
3714
3197
|
async markAgentWaiting(agentId, expectedTaskId, opts = {}) {
|
|
3715
3198
|
return this.releaseAgentForTask(agentId, expectedTaskId, 'waiting', opts);
|
|
3716
3199
|
}
|
|
@@ -3771,14 +3254,9 @@ export class AgentManager {
|
|
|
3771
3254
|
const promptSpecRound = opts.currentSpecRound ?? task.specReviewRound;
|
|
3772
3255
|
let prompt;
|
|
3773
3256
|
try {
|
|
3774
|
-
// freshRuntime=true 表示 tmux/REPL 刚新建或重启,旧 post-approve prompt 上下文已丢失。
|
|
3775
|
-
// 此时即使 redispatchCount>0 也必须发完整长段,否则 dev 拿不到 T_self / idempotency / final
|
|
3776
|
-
// re-fetch / 禁止 merge 等首轮规则。
|
|
3777
3257
|
const useIncrementalNudge = typeof opts.postApproveRedispatchCount === 'number'
|
|
3778
3258
|
&& opts.postApproveRedispatchCount > 0
|
|
3779
3259
|
&& !ensure.freshRuntime;
|
|
3780
|
-
// code phase (post spec-approval) flows through here, not startSession — materialize the
|
|
3781
|
-
// task's uploaded images so a fresh code-phase context still sees their paths.
|
|
3782
3260
|
const imagePaths = await this.imagePathsForDispatch(runner, task, phase);
|
|
3783
3261
|
prompt = buildPromptInline({
|
|
3784
3262
|
task,
|
|
@@ -3810,7 +3288,6 @@ export class AgentManager {
|
|
|
3810
3288
|
}
|
|
3811
3289
|
throw err;
|
|
3812
3290
|
}
|
|
3813
|
-
// Final state re-check before the irreversible paste — guards against IO-window races.
|
|
3814
3291
|
const expectedStatuses = PHASE_EXPECTED_STATUS[phase] ?? [];
|
|
3815
3292
|
const taskFresh = await this.taskStore.get(taskId);
|
|
3816
3293
|
if (!taskFresh || TERMINAL_STATUSES.includes(taskFresh.status)) {
|
|
@@ -3842,7 +3319,6 @@ export class AgentManager {
|
|
|
3842
3319
|
return false;
|
|
3843
3320
|
}
|
|
3844
3321
|
}
|
|
3845
|
-
// Arm before paste (same reasoning as startSession): pane exists, prompt not out yet.
|
|
3846
3322
|
if (opts.armBeforeInject && !(await opts.armBeforeInject())) {
|
|
3847
3323
|
return false;
|
|
3848
3324
|
}
|
|
@@ -3860,12 +3336,6 @@ export class AgentManager {
|
|
|
3860
3336
|
await this.injectAndAwaitAck(tmux, paneId, prompt, agentId, agent.runtime);
|
|
3861
3337
|
return true;
|
|
3862
3338
|
}
|
|
3863
|
-
// A bootstrappingTaskId marker means a dispatch began (binding written) but its prompt was never
|
|
3864
|
-
// ack-cleared. The one exception is a delivered task whose marker-clear write blipped (held as
|
|
3865
|
-
// bootstrap-marker-clear-failed) — its prompt is running, so leave it. (ack_unknown fails the task to a
|
|
3866
|
-
// terminal status, so the in_progress check excludes it.) Everything else with the marker — not yet
|
|
3867
|
-
// started, or held on a startup dialog — never ran, so rolling back to pending (and removing the empty
|
|
3868
|
-
// worktree, which rollbackFailedDispatch wouldn't) is safe and lets normal dispatch restart it.
|
|
3869
3339
|
async rollbackUndeliveredBootstrap(state, agentConfig) {
|
|
3870
3340
|
if (!state.taskId
|
|
3871
3341
|
|| state.bootstrappingTaskId !== state.taskId
|
|
@@ -3876,10 +3346,6 @@ export class AgentManager {
|
|
|
3876
3346
|
if (!boundTask || boundTask.phase || boundTask.status !== 'in_progress' || boundTask.agentId !== state.id) {
|
|
3877
3347
|
return false;
|
|
3878
3348
|
}
|
|
3879
|
-
// Durable cross-check: the session.started event lives in a separate log that survives an
|
|
3880
|
-
// agent-store-specific write failure. If one exists, the prompt WAS delivered (the marker is stale
|
|
3881
|
-
// because its clear — and the held fallback — both failed). Rolling back would duplicate a running
|
|
3882
|
-
// prompt; instead clear the stale marker and leave the task to the normal re-attach path.
|
|
3883
3349
|
if (await this.bootstrapPromptWasDelivered(state.taskId, boundTask.createdAt)) {
|
|
3884
3350
|
await this.clearBootstrapMarker(state.id, state.taskId);
|
|
3885
3351
|
return false;
|
|
@@ -3898,10 +3364,6 @@ export class AgentManager {
|
|
|
3898
3364
|
}
|
|
3899
3365
|
}
|
|
3900
3366
|
await this.rollbackFailedDispatch(state.taskId, state.id);
|
|
3901
|
-
// rollbackFailedDispatch spreads the old binding, keeping any awaiting_human/awaitingPhase (e.g. a
|
|
3902
|
-
// dialog-pending hold) — which would leave the now-unbound agent non-dispatchable
|
|
3903
|
-
// (canDispatchWithBinding refuses awaiting_human). Clear the held state so the preferred agent can
|
|
3904
|
-
// pick the re-queued task back up without a manual Resume.
|
|
3905
3367
|
await this.agentStore.update(state.id, (latest) => {
|
|
3906
3368
|
if (!latest || latest.status !== 'awaiting_human')
|
|
3907
3369
|
return AGENT_STORE_NOOP;
|
|
@@ -3910,9 +3372,6 @@ export class AgentManager {
|
|
|
3910
3372
|
});
|
|
3911
3373
|
return true;
|
|
3912
3374
|
}
|
|
3913
|
-
// session.started is emitted (to the event log) only after the prompt is delivered, so its presence is
|
|
3914
|
-
// durable proof of delivery even if the agent-store marker-clear failed. Task ids are never reused, so a
|
|
3915
|
-
// match is unambiguous; scan from the task's creation date forward.
|
|
3916
3375
|
async bootstrapPromptWasDelivered(taskId, createdAtIso) {
|
|
3917
3376
|
const today = new Date().toISOString().slice(0, 10);
|
|
3918
3377
|
const from = createdAtIso.slice(0, 10);
|
|
@@ -3942,18 +3401,8 @@ export class AgentManager {
|
|
|
3942
3401
|
continue;
|
|
3943
3402
|
try {
|
|
3944
3403
|
const result = await this.ensureSession(state.id, 'recover');
|
|
3945
|
-
// A develop bootstrap interrupted before its prompt was ack'd leaves an in_progress task whose
|
|
3946
|
-
// runtime never received it; recover() can't re-deliver, so roll it back to pending (helper). We do
|
|
3947
|
-
// NOT treat freshRuntime alone as missing (that would discard a delivered task's worktree on host
|
|
3948
|
-
// reboot) — only a positively-marked, never-ack'd dispatch is rolled back. (Same check runs in the
|
|
3949
|
-
// dialog-pending catch below, so a mid-bootstrap task blocked on a startup dialog isn't held forever.)
|
|
3950
3404
|
if (await this.rollbackUndeliveredBootstrap(state, agentConfig))
|
|
3951
3405
|
continue;
|
|
3952
|
-
// An incomplete create bootstrap (creationToken still set, no task) crashed before it
|
|
3953
|
-
// proved signal capability. Re-run the greeting gate — but in the BACKGROUND: recover() is
|
|
3954
|
-
// awaited before the server serves, and a synchronous handshake would block startup up to
|
|
3955
|
-
// 2×greeting-timeout per such agent (serially). creationToken stays set meanwhile, so
|
|
3956
|
-
// canDispatchWithBinding keeps the agent out of the pool until the handshake resolves.
|
|
3957
3406
|
if (state.creationToken && !state.taskId) {
|
|
3958
3407
|
const ct = state.creationToken;
|
|
3959
3408
|
const pane = result.paneId;
|
|
@@ -3974,9 +3423,6 @@ export class AgentManager {
|
|
|
3974
3423
|
})().catch((err) => console.warn(`[recover] background re-greet for ${agentId} crashed:`, err));
|
|
3975
3424
|
continue;
|
|
3976
3425
|
}
|
|
3977
|
-
// recover 成功 = server 重启前 dialog_pending 的 agent 现在 REPL ready。
|
|
3978
|
-
// 处理 Held:与 resumeAgent 共用 shouldReleaseHeldBinding 规则(task terminal/无 task /
|
|
3979
|
-
// turn-completed phase → 同步清 binding;task active 且 phase 不在 completed 集合 → 保留 binding)。
|
|
3980
3426
|
const boundTask = state.taskId ? await this.taskStore.get(state.taskId) : null;
|
|
3981
3427
|
if (state.taskId
|
|
3982
3428
|
&& boundTask?.id === state.taskId
|
|
@@ -4010,13 +3456,8 @@ export class AgentManager {
|
|
|
4010
3456
|
console.warn(`[recover] dispatchPostMergeCleanup(${state.id}, ${boundTask.id}) failed:`, cleanupErr);
|
|
4011
3457
|
}
|
|
4012
3458
|
}
|
|
4013
|
-
// A cancel-cleanup hold must NOT be auto-released on restart (it would reuse the cancelled,
|
|
4014
|
-
// un-cleared/maybe-running pane). cancel-interrupt-failed has shouldReleaseHeldBinding=true (it's
|
|
4015
|
-
// operator-Resume recoverable), so exclude the whole cancel-cleanup set here explicitly.
|
|
4016
3459
|
const cancelHold = isCancelCleanupHold(state);
|
|
4017
3460
|
const shouldReleaseBinding = shouldReleaseHeldBinding(state, boundTask) && !cancelHold;
|
|
4018
|
-
// 释放 binding 时同步清 worktree(与 resumeAgent 一致)——否则跨重启恢复后
|
|
4019
|
-
// worktreePath 在下面 update 中被丢弃,磁盘上的 worktree 永远无人回收。
|
|
4020
3461
|
if (shouldReleaseBinding && state.worktreePath) {
|
|
4021
3462
|
const cleanupDir = this.resolveWorkdir(agentConfig, state);
|
|
4022
3463
|
if (cleanupDir) {
|
|
@@ -4030,12 +3471,6 @@ export class AgentManager {
|
|
|
4030
3471
|
}
|
|
4031
3472
|
}
|
|
4032
3473
|
}
|
|
4033
|
-
// 所有 awaiting_human + non-releasable binding 都保留 Held。包括 agent_dialog_pending +
|
|
4034
|
-
// active task 这个 crash window 场景:handleDialogPendingFromRuntime 已写 awaiting_human
|
|
4035
|
-
// 但 transitionTaskStatus 之前 crash 重启 → task 仍 active;recover 切到 ok 会丢失 Resume
|
|
4036
|
-
// 入口、binding 仍指向 active task → 新 dispatch 撞 stale binding。
|
|
4037
|
-
// 注意:agent_dialog_pending + 无 taskId(最 common 的 dialog_pending) → shouldReleaseBinding=true
|
|
4038
|
-
// → preserveHeld=false → 走 release path 清 Held(recover 视为 dialog dismissed 的正常出口)。
|
|
4039
3474
|
const preserveHeld = !shouldReleaseBinding
|
|
4040
3475
|
&& state.status === 'awaiting_human';
|
|
4041
3476
|
await this.agentStore.update(state.id, (latest) => {
|
|
@@ -4060,7 +3495,6 @@ export class AgentManager {
|
|
|
4060
3495
|
};
|
|
4061
3496
|
if (!preserveHeld)
|
|
4062
3497
|
return withBinding;
|
|
4063
|
-
// 保留 awaiting_human 整套字段:operator 仍需干预(Resume / cancel task / DELETE agent)。
|
|
4064
3498
|
return {
|
|
4065
3499
|
...withBinding,
|
|
4066
3500
|
status: 'awaiting_human',
|
|
@@ -4072,22 +3506,15 @@ export class AgentManager {
|
|
|
4072
3506
|
if (shouldReleaseBinding) {
|
|
4073
3507
|
await this.lockManager.release(state.id);
|
|
4074
3508
|
}
|
|
4075
|
-
// Skip the menu-watch for cancel-cleanup holds: they await operator Resume/DELETE, and their taskId
|
|
4076
|
-
// won't clear on its own, so the watcher would poll forever.
|
|
4077
3509
|
if (state.taskId && !shouldReleaseBinding && !cancelHold) {
|
|
4078
3510
|
this.startRuntimeMenuWatch(state.id);
|
|
4079
3511
|
}
|
|
4080
3512
|
}
|
|
4081
3513
|
catch (err) {
|
|
4082
3514
|
if (err instanceof EnsureSessionError && err.partial.dialogPending) {
|
|
4083
|
-
// A mid-bootstrap dispatch (marker set, prompt never ack'd) that comes back blocked on a startup
|
|
4084
|
-
// dialog can't be Resumed for an active task — roll it back rather than hold it forever; the
|
|
4085
|
-
// re-dispatch handles the dialog fresh via ensureSession's trust-dialog path.
|
|
4086
3515
|
if (await this.rollbackUndeliveredBootstrap(state, agentConfig))
|
|
4087
3516
|
continue;
|
|
4088
3517
|
await this.markDialogPending(state.id, state.creationToken);
|
|
4089
|
-
// runtime path (creationToken=undefined) 时传 paneId/taskId snapshot 作 generation guard,
|
|
4090
|
-
// 否则 generationMismatch 看 expectedTaskId 默认 undefined 与 state.taskId 不匹配会立即退出。
|
|
4091
3518
|
void this.slowPollDialogPending(state.id, state.creationToken, {
|
|
4092
3519
|
...(state.paneId !== undefined ? { expectedPaneId: state.paneId } : {}),
|
|
4093
3520
|
expectedTaskId: state.taskId,
|
|
@@ -4161,9 +3588,6 @@ export class AgentManager {
|
|
|
4161
3588
|
return AGENT_STORE_NOOP;
|
|
4162
3589
|
if (existing.creationToken)
|
|
4163
3590
|
return AGENT_STORE_NOOP;
|
|
4164
|
-
// A greeting capability hold must survive a transient/real tmux disappearance — wiping it
|
|
4165
|
-
// (it carries a paneId on the dialog path) would slip an unverified agent back into the
|
|
4166
|
-
// dispatch pool. Operator restart/retry re-greets; recover/Resume already preserve it.
|
|
4167
3591
|
if (existing.awaitingPhase != null && REGREET_REQUIRED_HOLD_PHASES.has(existing.awaitingPhase)) {
|
|
4168
3592
|
return AGENT_STORE_NOOP;
|
|
4169
3593
|
}
|
|
@@ -4220,12 +3644,6 @@ export class AgentManager {
|
|
|
4220
3644
|
async cancelTask(taskId) {
|
|
4221
3645
|
let devToRelease;
|
|
4222
3646
|
let qaToRelease;
|
|
4223
|
-
// Server-mode ready gate may have already published remote artifacts
|
|
4224
|
-
// (pushed branch / open PR). Capture before flipping to cancelled so the
|
|
4225
|
-
// post-lock cleanup can retire them instead of orphaning.
|
|
4226
|
-
// mayBeInFlight: approved+marker means the publish prompt may STILL be
|
|
4227
|
-
// running — retirement must wait for the dev interrupt or the in-flight
|
|
4228
|
-
// push/pr-create would recreate the artifacts right after cleanup.
|
|
4229
3647
|
let publishedCleanup;
|
|
4230
3648
|
this.phaseSignalWatcher?.stop(taskId);
|
|
4231
3649
|
const result = await this.withTaskLock(async () => {
|
|
@@ -4234,9 +3652,6 @@ export class AgentManager {
|
|
|
4234
3652
|
throw new ApiError(404, 'Task not found');
|
|
4235
3653
|
if (TERMINAL_STATUSES.includes(task.status))
|
|
4236
3654
|
return task;
|
|
4237
|
-
// A mark-complete merge is mid-flight (task is merge-ready, PR being merged) — refuse
|
|
4238
|
-
// to cancel so the merge can't land while the task is flipped to cancelled (which would
|
|
4239
|
-
// make pr.merged a no-op and skip cleanup). Checked under the lock to close the window.
|
|
4240
3655
|
if (this.markCompleteInFlight.has(taskId)) {
|
|
4241
3656
|
throw new ApiError(409, `Task ${taskId} is being completed (merge in progress); try again shortly`);
|
|
4242
3657
|
}
|
|
@@ -4244,11 +3659,6 @@ export class AgentManager {
|
|
|
4244
3659
|
devToRelease = task.agentId;
|
|
4245
3660
|
if (task.qaAgentId)
|
|
4246
3661
|
qaToRelease = task.qaAgentId;
|
|
4247
|
-
// approved + publishDispatchedAt = the publish prompt reached the pane, so
|
|
4248
|
-
// remote artifacts may already exist even though code-ready never landed
|
|
4249
|
-
// (dispatch crash, or the reviewed-head mismatch gate refused ready —
|
|
4250
|
-
// whose documented exit is exactly this Cancel).
|
|
4251
|
-
// Truthy (not !== undefined): sanitizeTask passes hand-edited nulls through.
|
|
4252
3662
|
const publishedAtGate = task.status === 'ready'
|
|
4253
3663
|
|| (task.status === 'approved' && !!task.publishDispatchedAt);
|
|
4254
3664
|
if (task.reviewMode === 'server' && publishedAtGate && task.agentId) {
|
|
@@ -4259,14 +3669,11 @@ export class AgentManager {
|
|
|
4259
3669
|
branch: task.branch,
|
|
4260
3670
|
...(task.prNumber !== undefined ? { prNumber: task.prNumber } : {}),
|
|
4261
3671
|
devAgentId: task.agentId,
|
|
4262
|
-
// ready = code-ready consumed, publish finished; approved = no
|
|
4263
|
-
// completion signal yet, the publish may still be running.
|
|
4264
3672
|
mayBeInFlight: task.status === 'approved',
|
|
4265
3673
|
};
|
|
4266
3674
|
}
|
|
4267
3675
|
}
|
|
4268
3676
|
else if (task.status === 'merge-ready' && task.prNumber !== undefined && task.branch && task.agentId) {
|
|
4269
|
-
// GitHub-mode gate cancel leaves the same orphaned PR/branch.
|
|
4270
3677
|
publishedCleanup = {
|
|
4271
3678
|
afterDone: 'pr',
|
|
4272
3679
|
branch: task.branch,
|
|
@@ -4275,9 +3682,6 @@ export class AgentManager {
|
|
|
4275
3682
|
mayBeInFlight: false,
|
|
4276
3683
|
};
|
|
4277
3684
|
}
|
|
4278
|
-
// Mark the panes cancel-clearing BEFORE flipping the task terminal (still under the lock), so any
|
|
4279
|
-
// window — a concurrent escape, or a restart — sees a persisted hold instead of a plain binding to a
|
|
4280
|
-
// terminal task that recover()/the escape would release with the session still un-cleared.
|
|
4281
3685
|
for (const id of [devToRelease, qaToRelease]) {
|
|
4282
3686
|
if (id)
|
|
4283
3687
|
await this.markPaneCancelClearing(id, taskId);
|
|
@@ -4296,16 +3700,7 @@ export class AgentManager {
|
|
|
4296
3700
|
});
|
|
4297
3701
|
return task;
|
|
4298
3702
|
});
|
|
4299
|
-
// 唯一允许打断 agent 会话的入口(用户主动 Cancel)。Interrupt BEFORE remote
|
|
4300
|
-
// retirement: an in-flight publish prompt would re-push the branch / re-open
|
|
4301
|
-
// the PR right after cleanup, and a cancelled task gets no second pass.
|
|
4302
|
-
// Only a successful interrupt PROVES the pane stopped — skipped paths
|
|
4303
|
-
// (config hot-removed: the pane outlives the config; state gone; rebound)
|
|
4304
|
-
// leave an in-flight publish possible.
|
|
4305
3703
|
let devStopConfirmed = false;
|
|
4306
|
-
// Phase 1 — interrupt every still-bound pane first, so a slow /clear on one agent can't keep another
|
|
4307
|
-
// running the cancelled task. The persisted cancel-clearing hold (set under the lock) blocks any
|
|
4308
|
-
// concurrent escape from releasing these panes until they are /cleared.
|
|
4309
3704
|
const stopped = [];
|
|
4310
3705
|
for (const id of [devToRelease, qaToRelease]) {
|
|
4311
3706
|
if (!id)
|
|
@@ -4320,13 +3715,10 @@ export class AgentManager {
|
|
|
4320
3715
|
await this.markAwaitingHuman(id, 'cancel-interrupt-failed', 'Task marked cancelled but ESC / REPL ready check failed; agent may still be running the cancelled prompt. Attach via web terminal to verify, then Resume or Delete.', { expectedTaskId: taskId });
|
|
4321
3716
|
continue;
|
|
4322
3717
|
}
|
|
4323
|
-
// devStopConfirmed reflects only "the pane stopped" — set before /clear so published-artifact
|
|
4324
|
-
// retirement still proceeds even when /clear can't be confirmed.
|
|
4325
3718
|
if (id === publishedCleanup?.devAgentId)
|
|
4326
3719
|
devStopConfirmed = true;
|
|
4327
3720
|
stopped.push(id);
|
|
4328
3721
|
}
|
|
4329
|
-
// Phase 2 — every pane is stopped; /clear + release each.
|
|
4330
3722
|
for (const id of stopped) {
|
|
4331
3723
|
const cfg = this.getAgentConfig(id);
|
|
4332
3724
|
const state = await this.agentStore.get(id);
|
|
@@ -4335,26 +3727,18 @@ export class AgentManager {
|
|
|
4335
3727
|
continue;
|
|
4336
3728
|
}
|
|
4337
3729
|
if (!(await this.clearPaneContext(state, cfg))) {
|
|
4338
|
-
// /clear unconfirmed → hold; the un-cleared pane stays bound (UNCLEARED_PANE_PHASES) until DELETE.
|
|
4339
3730
|
await this.markAwaitingHuman(id, 'cancel-clear-failed', 'Task marked cancelled and the session interrupted, but /clear was not confirmed; the pane holds un-cleared context. DELETE the agent to discard it (Resume will not reuse an un-cleared pane).', { expectedTaskId: taskId });
|
|
4340
3731
|
continue;
|
|
4341
3732
|
}
|
|
4342
3733
|
try {
|
|
4343
|
-
// fromCancelCleanup: this IS the owning cancel, having confirmed /clear — the only release allowed
|
|
4344
|
-
// to free the cancel-clearing hold. allowAwaitingHuman: cross the awaiting_human gate too.
|
|
4345
3734
|
await this.releaseAgentForTask(id, taskId, 'idle', { allowAwaitingHuman: true, fromCancelCleanup: true });
|
|
4346
3735
|
}
|
|
4347
3736
|
catch (err) {
|
|
4348
3737
|
console.error(`[AgentManager] cancelTask releaseAgentForTask(${id}) failed:`, err);
|
|
4349
3738
|
}
|
|
4350
3739
|
}
|
|
4351
|
-
// Best-effort remote retirement for a cancelled published gate: close the PR
|
|
4352
|
-
// and delete the pushed branch so they don't outlive the task. Failures only
|
|
4353
|
-
// warn + intervene — cancel must not be blocked by remote faults.
|
|
4354
3740
|
if (publishedCleanup) {
|
|
4355
3741
|
if (publishedCleanup.mayBeInFlight && !devStopConfirmed) {
|
|
4356
|
-
// No proof the publish prompt stopped; cleaning now would race its
|
|
4357
|
-
// push/pr-create. Leave the artifacts to the operator.
|
|
4358
3742
|
await this.safeEmit({
|
|
4359
3743
|
id: '',
|
|
4360
3744
|
type: 'human.intervention',
|
|
@@ -4409,9 +3793,6 @@ export class AgentManager {
|
|
|
4409
3793
|
}
|
|
4410
3794
|
return result;
|
|
4411
3795
|
}
|
|
4412
|
-
// create-time 不再校验 awaiting_human / creating / bound —— 这些都是"忙",
|
|
4413
|
-
// 允许入队(落 pending);可执行性判断下沉到 dispatchPendingTask(或 createTask 已空闲分支)。
|
|
4414
|
-
// 仍保留:agent 存在/同 project/role=dev(非空时)+ prompt size 上界。
|
|
4415
3796
|
async validateTaskDispatch(projectId, input) {
|
|
4416
3797
|
if (input.preferredAgentId !== '') {
|
|
4417
3798
|
const cfg = this.getAgentConfig(input.preferredAgentId);
|
|
@@ -4440,30 +3821,20 @@ export class AgentManager {
|
|
|
4440
3821
|
`reduce task description or remove some skills from AGENT_PHASES[develop]`);
|
|
4441
3822
|
}
|
|
4442
3823
|
}
|
|
4443
|
-
// Force a fresh QA review pass; bumps reviewRound only after startSession succeeds.
|
|
4444
3824
|
async dispatchReviewToQa(taskId) {
|
|
4445
3825
|
const claim = await this.withTaskLock(async () => {
|
|
4446
3826
|
if (this.manualReviewInFlight.has(taskId)) {
|
|
4447
3827
|
throw new ApiError(409, `Manual review already in progress for task ${taskId}`);
|
|
4448
3828
|
}
|
|
4449
|
-
// A mark-complete merge is mid-flight — refuse so Call review can't flip the
|
|
4450
|
-
// merge-ready task back to review while the PR is being merged.
|
|
4451
3829
|
if (this.markCompleteInFlight.has(taskId)) {
|
|
4452
3830
|
throw new ApiError(409, `Task ${taskId} is being completed (merge in progress); try again shortly`);
|
|
4453
3831
|
}
|
|
4454
3832
|
const task = await this.taskStore.get(taskId);
|
|
4455
3833
|
if (!task)
|
|
4456
3834
|
throw new ApiError(404, `Task ${taskId} not found`);
|
|
4457
|
-
// Server-mode tasks review via the exchange protocol; routing one into the
|
|
4458
|
-
// legacy GitHub review flow would cross-contaminate the state machines.
|
|
4459
3835
|
if (task.reviewMode === 'server') {
|
|
4460
3836
|
throw new ApiError(409, `Task ${taskId} uses server review mode; legacy Call review is not applicable`);
|
|
4461
3837
|
}
|
|
4462
|
-
// spec-phase max_rounds escapes via Retry/Cancel only. Call review dispatches the
|
|
4463
|
-
// CODE-review protocol, but review.submitted early-returns for spec phase — so a direct
|
|
4464
|
-
// /tasks/:id/review here would transition the task to review + bind QA, yet its verdict
|
|
4465
|
-
// could never advance it or release the QA. Guard the server entry (UI already hides it),
|
|
4466
|
-
// matching the continue/complete spec guards.
|
|
4467
3838
|
if (task.phase === 'spec' && task.status === 'max_rounds') {
|
|
4468
3839
|
throw new ApiError(409, `Call review is not supported for spec-phase max_rounds tasks (use Retry or Cancel)`);
|
|
4469
3840
|
}
|
|
@@ -4473,7 +3844,6 @@ export class AgentManager {
|
|
|
4473
3844
|
if (!task.branch) {
|
|
4474
3845
|
throw new ApiError(400, `Task ${taskId} has no branch; cannot dispatch review`);
|
|
4475
3846
|
}
|
|
4476
|
-
// Stale qaAgentId (deleted + recreated QA) → fall back to current partner.
|
|
4477
3847
|
let qaId = task.qaAgentId;
|
|
4478
3848
|
if (qaId && !this.getAgentConfig(qaId)) {
|
|
4479
3849
|
console.warn(`[dispatchReviewToQa] task ${taskId}.qaAgentId="${qaId}" no longer in config; ` +
|
|
@@ -4504,16 +3874,6 @@ export class AgentManager {
|
|
|
4504
3874
|
if (!acquired) {
|
|
4505
3875
|
throw new ApiError(409, `QA agent ${qaId} is busy or unavailable`);
|
|
4506
3876
|
}
|
|
4507
|
-
// dev 被 parked 到 waiting (mode='waiting' 仅 bump updatedAt 不发 C-c 不清 binding);旧实现
|
|
4508
|
-
// approved/其他状态走两条不同分支,但 release(waiting) 和 markAgentWaiting 实际都走相同的
|
|
4509
|
-
// releaseAgentForTask(waiting) — 现在统一调 markAgentWaiting,devParked 仅作 QA 失败时
|
|
4510
|
-
// emit dev-parked intervention 的旗标。
|
|
4511
|
-
// .catch→false: 旧 approved 分支已有此模式,markAgentWaiting reject (store/lock IO 异常) 时
|
|
4512
|
-
// 不能直接跳出 try/finally — QA 已 acquire (binding+lock) 必须先 release 清理才能 throw。
|
|
4513
|
-
// Park dev only when it is still bound to THIS task. A paused max_rounds task
|
|
4514
|
-
// released its dev (spec phase) or kept it reserved (code phase); a released
|
|
4515
|
-
// dev has no running session to park, and markAgentWaiting would fail the
|
|
4516
|
-
// taskId-match check (manager.ts releaseAgentForTask) → spurious 500.
|
|
4517
3877
|
let devParked = false;
|
|
4518
3878
|
if (!isTerminal && devAgentId) {
|
|
4519
3879
|
const devState = await this.agentStore.get(devAgentId);
|
|
@@ -4531,8 +3891,6 @@ export class AgentManager {
|
|
|
4531
3891
|
devParked = true;
|
|
4532
3892
|
}
|
|
4533
3893
|
}
|
|
4534
|
-
// PHASE 0 — snapshot fields PHASE 1/2 may overwrite, so rollback can
|
|
4535
|
-
// restore them exactly (qaAgentId / signalToken / reviewHeadAnchorSha).
|
|
4536
3894
|
const pre = await this.taskStore.get(taskId);
|
|
4537
3895
|
const snapshot = {
|
|
4538
3896
|
qaAgentId: pre?.qaAgentId,
|
|
@@ -4540,29 +3898,10 @@ export class AgentManager {
|
|
|
4540
3898
|
reviewHeadAnchorSha: pre?.reviewHeadAnchorSha,
|
|
4541
3899
|
reviewDispatchedAt: pre?.reviewDispatchedAt,
|
|
4542
3900
|
};
|
|
4543
|
-
// PHASE 1 — persist status/qaAgentId/reviewHeadAnchorSha/reviewDispatchedAt
|
|
4544
|
-
// BEFORE setting up the fallback watcher (PHASE 2). A same-identity QA can echo
|
|
4545
|
-
// pr-approved between watcher.start() and these mutations; if they aren't
|
|
4546
|
-
// committed first, the verdict handler reads stale state (fromStatus mismatch,
|
|
4547
|
-
// head-unavailable, or release-QA orphans the binding). reviewDispatchedAt
|
|
4548
|
-
// also anchors the poller-verdict freshness gate.
|
|
4549
|
-
//
|
|
4550
|
-
// Terminal tasks (cancelled/failed/merged/max_rounds) skip status
|
|
4551
|
-
// transition + anchor — they remain terminal — but still bump reviewRound
|
|
4552
|
-
// to record the manual review attempt.
|
|
4553
3901
|
const isTerminalAtClaim = TERMINAL_STATUSES.includes(taskStatusAtClaim);
|
|
4554
3902
|
if (!isTerminalAtClaim) {
|
|
4555
3903
|
const reviewAnchor = await this.fetchPrHeadSha(taskId).catch(() => undefined);
|
|
4556
|
-
const preDispatched = await this.transitionTaskStatus(taskId, 'review', { fromStatus: [taskStatusAtClaim] },
|
|
4557
|
-
// Always overwrite reviewHeadAnchorSha (even with undefined when fetch
|
|
4558
|
-
// failed) so a stale anchor from a prior round can never survive into this
|
|
4559
|
-
// round's verdict handling. Rotate signalToken + advance reviewDispatchedAt in
|
|
4560
|
-
// the SAME mutation that exposes the new anchor/status — otherwise there is a
|
|
4561
|
-
// window where the anchor is already new but the token/dispatch time are still
|
|
4562
|
-
// the old pass's, and an old QA's late stamped verdict would pass the
|
|
4563
|
-
// freshness/token gate. PHASE 0's snapshot captured the prior token, so a
|
|
4564
|
-
// failed dispatch still rolls back correctly.
|
|
4565
|
-
{
|
|
3904
|
+
const preDispatched = await this.transitionTaskStatus(taskId, 'review', { fromStatus: [taskStatusAtClaim] }, {
|
|
4566
3905
|
reviewHeadAnchorSha: reviewAnchor,
|
|
4567
3906
|
reviewDispatchedAt: new Date().toISOString(),
|
|
4568
3907
|
signalToken: createSignalToken(),
|
|
@@ -4574,9 +3913,6 @@ export class AgentManager {
|
|
|
4574
3913
|
throw new ApiError(409, `Task ${taskId} status changed during dispatch; cannot enter review`);
|
|
4575
3914
|
}
|
|
4576
3915
|
}
|
|
4577
|
-
// reviewRound bump + qaAgentId bind. The pass anchor/token/dispatch-time were
|
|
4578
|
-
// already advanced atomically in the transition above (non-terminal); terminal
|
|
4579
|
-
// tasks skip that transition and only record the review attempt here.
|
|
4580
3916
|
await this.withTaskLock(async () => {
|
|
4581
3917
|
const fresh = await this.taskStore.get(taskId);
|
|
4582
3918
|
if (!fresh)
|
|
@@ -4588,15 +3924,8 @@ export class AgentManager {
|
|
|
4588
3924
|
updatedAt: new Date().toISOString(),
|
|
4589
3925
|
});
|
|
4590
3926
|
});
|
|
4591
|
-
// PHASE 2 — set up the fallback verdict watcher, then inject the prompt. The
|
|
4592
|
-
// poller is the authoritative verdict source; this watcher only fires in the
|
|
4593
|
-
// same-identity (422) case where `gh pr review` leaves no GitHub state to poll.
|
|
4594
|
-
// The verdict can fire any time after `start({...})` returns; PHASE 1 state is
|
|
4595
|
-
// already committed so the handler sees a consistent task.
|
|
4596
3927
|
const { armed } = await this.rotateAndSetupPhaseSignal(taskId, qaId, ['pr-approved', 'pr-changes-requested']);
|
|
4597
3928
|
if (!armed) {
|
|
4598
|
-
// Verdict watcher didn't arm — a same-identity review would have no verdict source.
|
|
4599
|
-
// Roll back PHASE 1 and fail loudly instead of injecting a prompt nothing will consume.
|
|
4600
3929
|
await this.rollbackDispatchReviewPhase1(taskId, taskStatusAtClaim, isTerminalAtClaim, snapshot);
|
|
4601
3930
|
await this.releaseAgentForTask(qaId, taskId, 'idle').catch(() => undefined);
|
|
4602
3931
|
if (devParked)
|
|
@@ -4611,18 +3940,11 @@ export class AgentManager {
|
|
|
4611
3940
|
});
|
|
4612
3941
|
}
|
|
4613
3942
|
catch (err) {
|
|
4614
|
-
// ack_unknown: prompt 已发,QA 可能在跑;保留 binding 让 operator 接管。
|
|
4615
|
-
// 不需要再 transition / bump — PHASE 1 已经做完。
|
|
4616
3943
|
if (await this.markAwaitingIfAckUnknown(qaId, err, taskId)) {
|
|
4617
|
-
// no-op
|
|
4618
3944
|
}
|
|
4619
3945
|
else if (err instanceof EnsureSessionError && err.partial.handled) {
|
|
4620
|
-
// handleDialogPendingFromRuntime 已 Held QA + fail task + release partners;不能再 release
|
|
4621
|
-
// 否则 boundTask terminal 让 shouldReleaseHeldBinding 放行清掉仍卡 dialog 的 pane lock。
|
|
4622
3946
|
}
|
|
4623
3947
|
else {
|
|
4624
|
-
// Hard failure (not ack_unknown / dialog handled). Roll back PHASE 1
|
|
4625
|
-
// so the manual review attempt leaves no half-bumped state behind.
|
|
4626
3948
|
await this.rollbackDispatchReviewPhase1(taskId, taskStatusAtClaim, isTerminalAtClaim, snapshot);
|
|
4627
3949
|
await this.releaseAgentForTask(qaId, taskId, 'idle')
|
|
4628
3950
|
.catch(() => undefined);
|
|
@@ -4632,8 +3954,6 @@ export class AgentManager {
|
|
|
4632
3954
|
throw err;
|
|
4633
3955
|
}
|
|
4634
3956
|
if (!started) {
|
|
4635
|
-
// startSession resolved false (no exception). Same rollback as the
|
|
4636
|
-
// hard-failure catch branch above.
|
|
4637
3957
|
await this.rollbackDispatchReviewPhase1(taskId, taskStatusAtClaim, isTerminalAtClaim, snapshot);
|
|
4638
3958
|
await this.releaseAgentForTask(qaId, taskId, 'idle')
|
|
4639
3959
|
.catch(() => undefined);
|
|
@@ -4641,8 +3961,6 @@ export class AgentManager {
|
|
|
4641
3961
|
await this.emitManualReviewDevParkedQaFailedIntervention(devAgentId, taskId);
|
|
4642
3962
|
throw new ApiError(500, `Failed to start QA review session for ${taskId}`);
|
|
4643
3963
|
}
|
|
4644
|
-
// PHASE 1 already wrote the anchor + status + qaAgentId + reviewRound
|
|
4645
|
-
// bump under withTaskLock; no further mutations needed here.
|
|
4646
3964
|
const final = await this.taskStore.get(taskId);
|
|
4647
3965
|
return final;
|
|
4648
3966
|
}
|
|
@@ -4650,15 +3968,7 @@ export class AgentManager {
|
|
|
4650
3968
|
this.manualReviewInFlight.delete(taskId);
|
|
4651
3969
|
}
|
|
4652
3970
|
}
|
|
4653
|
-
// Manually push a code-phase max_rounds task through one more dev fix round.
|
|
4654
|
-
// Reuses the fixing dispatch chain (fixing → pr-fixed watcher → continueSession),
|
|
4655
|
-
// bypassing the review cap for this one round; the round still increments and the
|
|
4656
|
-
// task re-pauses at max_rounds if QA requests changes again. The dev is the
|
|
4657
|
-
// reserved one from the pause (§2.1), so its worktree is reused as-is.
|
|
4658
3971
|
async continueDevRound(taskId) {
|
|
4659
|
-
// A mark-complete merge may be mid-flight after claiming the task but before the
|
|
4660
|
-
// max_rounds → merge-ready transition lands; refuse so the two can't both act on the
|
|
4661
|
-
// same max_rounds snapshot (the merge-ready status guard covers the post-transition window).
|
|
4662
3972
|
if (this.markCompleteInFlight.has(taskId)) {
|
|
4663
3973
|
throw new ApiError(409, `Task ${taskId} is being completed (merge in progress); try again shortly`);
|
|
4664
3974
|
}
|
|
@@ -4671,8 +3981,6 @@ export class AgentManager {
|
|
|
4671
3981
|
if (task.phase === 'spec') {
|
|
4672
3982
|
throw new ApiError(409, `Continue one round is only supported for code-phase tasks`);
|
|
4673
3983
|
}
|
|
4674
|
-
// Server-mode continue: grant one round past the cap, then re-run the server
|
|
4675
|
-
// fix protocol from the stored findings — no PR exists at this point.
|
|
4676
3984
|
if (task.reviewMode === 'server') {
|
|
4677
3985
|
if (!task.agentId) {
|
|
4678
3986
|
throw new ApiError(400, `Task ${taskId} has no dev agent; cannot continue`);
|
|
@@ -4681,9 +3989,6 @@ export class AgentManager {
|
|
|
4681
3989
|
if (!stored?.findings) {
|
|
4682
3990
|
throw new ApiError(409, `Task ${taskId} has no stored findings to continue from; cancel instead`);
|
|
4683
3991
|
}
|
|
4684
|
-
// Re-check + grant under the task lock: the entry checks above ran lock-free,
|
|
4685
|
-
// so a concurrent mark-complete may have claimed the gate since (the
|
|
4686
|
-
// claimCompleteGate comment promises Continue re-checks under the same lock).
|
|
4687
3992
|
await this.withTaskLock(async () => {
|
|
4688
3993
|
if (this.markCompleteInFlight.has(taskId)) {
|
|
4689
3994
|
throw new ApiError(409, `Task ${taskId} is being completed (merge in progress); try again shortly`);
|
|
@@ -4701,9 +4006,6 @@ export class AgentManager {
|
|
|
4701
4006
|
dispatched = await this.dispatchServerFixToDev(taskId, JSON.stringify(stored.findings));
|
|
4702
4007
|
}
|
|
4703
4008
|
finally {
|
|
4704
|
-
// The grant is only spent when the fix prompt actually reached the dev.
|
|
4705
|
-
// Decrement (not restore-snapshot): a snapshot write-back would also
|
|
4706
|
-
// erase a concurrent Continue's grant.
|
|
4707
4009
|
if (!dispatched) {
|
|
4708
4010
|
await this.withTaskLock(async () => {
|
|
4709
4011
|
const fresh = await this.taskStore.get(taskId);
|
|
@@ -4726,14 +4028,9 @@ export class AgentManager {
|
|
|
4726
4028
|
if (!task.agentId) {
|
|
4727
4029
|
throw new ApiError(400, `Task ${taskId} has no dev agent; cannot continue`);
|
|
4728
4030
|
}
|
|
4729
|
-
// Retained-dev precondition: the paused dev must still hold this task and its
|
|
4730
|
-
// worktree. If broken (cancelled, reassigned, external interference), continueSession
|
|
4731
|
-
// would have no checkout to reuse — steer the user to Retry instead of recreating it.
|
|
4732
4031
|
const devAgentId = task.agentId;
|
|
4733
4032
|
const devState = await this.agentStore.get(devAgentId);
|
|
4734
4033
|
if (devState?.taskId !== taskId || !devState.worktreePath) {
|
|
4735
|
-
// code-phase max_rounds has no Retry (Continue/Complete/Cancel only), so don't
|
|
4736
|
-
// point at Retry: the work is on the PR — merge it (mark-complete) or abandon (cancel).
|
|
4737
4034
|
throw new ApiError(409, `Dev ${devAgentId} no longer holds task ${taskId}'s reserved worktree (cannot continue); ` +
|
|
4738
4035
|
`use mark-complete to merge the PR as-is, or cancel the task`);
|
|
4739
4036
|
}
|
|
@@ -4756,9 +4053,6 @@ export class AgentManager {
|
|
|
4756
4053
|
await rollback();
|
|
4757
4054
|
throw new ApiError(500, `Failed to arm pr-fixed watcher for task ${taskId}`);
|
|
4758
4055
|
}
|
|
4759
|
-
// rollback returns the task to max_rounds AND re-parks the dev to waiting, keeping
|
|
4760
|
-
// the reserved-dev invariant (bound + 'waiting' + worktree) so a later continue/cancel
|
|
4761
|
-
// sees a consistent state and the snapshot shows 'waiting', not a stale 'working'.
|
|
4762
4056
|
const rollbackAndRepark = async () => {
|
|
4763
4057
|
await rollback();
|
|
4764
4058
|
await this.markAgentWaiting(devAgentId, taskId).catch(() => undefined);
|
|
@@ -4782,12 +4076,6 @@ export class AgentManager {
|
|
|
4782
4076
|
const fresh = await this.taskStore.get(taskId);
|
|
4783
4077
|
return fresh;
|
|
4784
4078
|
}
|
|
4785
|
-
// Undo PHASE 1+2 of dispatchReviewToQa when startSession ultimately fails
|
|
4786
|
-
// (resolved false, or threw a hard error other than ack_unknown / dialog).
|
|
4787
|
-
// Restores task fields to the pre-dispatch snapshot and re-establishes the pane-signal
|
|
4788
|
-
// watcher matching the RESTORED state (develop spec/pr-created, spec verdict,
|
|
4789
|
-
// approved→pr-merge-ready, or the review fallback verdict watcher) so a later
|
|
4790
|
-
// emit using the prior token is still consumed.
|
|
4791
4079
|
async rollbackDispatchReviewPhase1(taskId, originalStatus, isTerminalAtClaim, snapshot) {
|
|
4792
4080
|
await this.withTaskLock(async () => {
|
|
4793
4081
|
const fresh = await this.taskStore.get(taskId);
|
|
@@ -4809,9 +4097,6 @@ export class AgentManager {
|
|
|
4809
4097
|
});
|
|
4810
4098
|
if (!this.phaseSignalWatcher)
|
|
4811
4099
|
return;
|
|
4812
|
-
// Special case: approved task with a pending PostApproveCompletion. PHASE 2
|
|
4813
|
-
// stopped its pr-merge-ready watcher; restore it so dev's later
|
|
4814
|
-
// pr-merge-ready emit gets consumed.
|
|
4815
4100
|
if (originalStatus === 'approved') {
|
|
4816
4101
|
const completion = await this.postApproveStore.get(taskId);
|
|
4817
4102
|
const task = await this.taskStore.get(taskId);
|
|
@@ -4831,9 +4116,6 @@ export class AgentManager {
|
|
|
4831
4116
|
return;
|
|
4832
4117
|
}
|
|
4833
4118
|
}
|
|
4834
|
-
// General case: rolled-back task may still want a watcher matching its
|
|
4835
|
-
// current (restored) state — develop dispatch still waiting on
|
|
4836
|
-
// spec-done/pr-created, recheck still waiting on verdict, etc.
|
|
4837
4119
|
const restored = await this.taskStore.get(taskId);
|
|
4838
4120
|
if (!restored || !restored.signalToken)
|
|
4839
4121
|
return;
|
|
@@ -4853,11 +4135,6 @@ export class AgentManager {
|
|
|
4853
4135
|
console.warn(`[AgentManager] rollback: re-establish ${mapped.expectedKinds.join(',')} failed for task=${taskId}:`, err);
|
|
4854
4136
|
}
|
|
4855
4137
|
}
|
|
4856
|
-
// Single source of truth for "what watcher should this task have, given its
|
|
4857
|
-
// current state". Used by both setupRecoveredSpecSignals (restart recovery)
|
|
4858
|
-
// and rollbackDispatchReviewPhase1 (manual dispatch failure).
|
|
4859
|
-
// Dev's first prompt offers the spec-first or straight-to-code path; the arm
|
|
4860
|
-
// must accept both completion signals for the task's protocol family.
|
|
4861
4138
|
devInitialSignalKinds(reviewMode) {
|
|
4862
4139
|
const mode = reviewMode ?? this.config.review.mode ?? 'github';
|
|
4863
4140
|
return mode === 'server'
|
|
@@ -4874,7 +4151,6 @@ export class AgentManager {
|
|
|
4874
4151
|
return { expectedKinds: ['spec-fixed'], agentId: task.agentId };
|
|
4875
4152
|
}
|
|
4876
4153
|
if (task.phase !== 'spec' && task.status === 'fixing' && task.agentId) {
|
|
4877
|
-
// Code-track fixing: dev emits pr-fixed when the round is done.
|
|
4878
4154
|
return { expectedKinds: ['pr-fixed'], agentId: task.agentId };
|
|
4879
4155
|
}
|
|
4880
4156
|
if (task.phase === undefined && task.status === 'in_progress' && task.agentId) {
|
|
@@ -4884,14 +4160,10 @@ export class AgentManager {
|
|
|
4884
4160
|
return { expectedKinds: ['pr-created'], agentId: task.agentId };
|
|
4885
4161
|
}
|
|
4886
4162
|
if (task.phase !== 'spec' && task.status === 'review' && task.qaAgentId) {
|
|
4887
|
-
// Fallback verdict watcher for the same-identity (422) case; the poller is
|
|
4888
|
-
// the primary, authoritative verdict source for distinct identities.
|
|
4889
4163
|
return { expectedKinds: ['pr-approved', 'pr-changes-requested'], agentId: task.qaAgentId };
|
|
4890
4164
|
}
|
|
4891
4165
|
return undefined;
|
|
4892
4166
|
}
|
|
4893
|
-
// Recovery mapping for server-mode tasks: the watcher is the ONLY verdict
|
|
4894
|
-
// channel (no poller backstop), so every awaiting state must re-arm on restart.
|
|
4895
4167
|
mapServerTaskToExpectedWatcher(task) {
|
|
4896
4168
|
const isSpec = task.phase === 'spec';
|
|
4897
4169
|
if (task.status === 'review' && task.qaAgentId) {
|
|
@@ -4910,10 +4182,6 @@ export class AgentManager {
|
|
|
4910
4182
|
}
|
|
4911
4183
|
return undefined;
|
|
4912
4184
|
}
|
|
4913
|
-
// Public re-establish helper for in-band recoveries that don't rotate the token
|
|
4914
|
-
// (e.g. handler reject path: agent's next emit must still match current
|
|
4915
|
-
// task.signalToken, so rotating would strand it). Returns whether a watcher
|
|
4916
|
-
// armed; callers that consumed a signal must hold on false or it has no consumer.
|
|
4917
4185
|
async setupPhaseSignal(taskId, agentId, expectedKinds, opts = {}) {
|
|
4918
4186
|
const task = await this.taskStore.get(taskId);
|
|
4919
4187
|
if (!task?.signalToken)
|
|
@@ -4945,8 +4213,6 @@ export class AgentManager {
|
|
|
4945
4213
|
const t = await this.taskStore.get(taskId);
|
|
4946
4214
|
if (!t)
|
|
4947
4215
|
throw new ApiError(404, 'Task not found');
|
|
4948
|
-
// max_rounds is non-terminal but still retryable for spec-phase tasks (their
|
|
4949
|
-
// only escape this iteration). code-phase max_rounds uses continue/complete/cancel.
|
|
4950
4216
|
const retryable = TERMINAL_STATUSES.includes(t.status)
|
|
4951
4217
|
|| (t.status === 'max_rounds' && t.phase === 'spec');
|
|
4952
4218
|
if (!retryable) {
|
|
@@ -4959,15 +4225,10 @@ export class AgentManager {
|
|
|
4959
4225
|
description: old.description,
|
|
4960
4226
|
preferredAgentId: old.preferredAgentId,
|
|
4961
4227
|
};
|
|
4962
|
-
// Retry preserves uploaded images: read the old task's staged bytes up-front
|
|
4963
|
-
// (missing → visible 409 before any new task/binding is created).
|
|
4964
4228
|
if (old.images?.length) {
|
|
4965
4229
|
input.images = await this.readStagedImages(old.id, old.images);
|
|
4966
4230
|
}
|
|
4967
4231
|
await this.validateTaskDispatch(old.projectId, input);
|
|
4968
|
-
// Non-terminal retry (spec-phase max_rounds) must finalize the old paused task so it
|
|
4969
|
-
// leaves the active list instead of lingering beside the fresh run. Terminal tasks
|
|
4970
|
-
// already are their own history record and are left untouched.
|
|
4971
4232
|
if (!TERMINAL_STATUSES.includes(old.status)) {
|
|
4972
4233
|
await this.cancelTask(old.id);
|
|
4973
4234
|
}
|
|
@@ -5031,33 +4292,19 @@ export class AgentManager {
|
|
|
5031
4292
|
throw new Error(`gh pr merge failed for PR #${task.prNumber}: ${result.stderr || result.stdout}`);
|
|
5032
4293
|
}
|
|
5033
4294
|
}
|
|
5034
|
-
// Manually finish a max_rounds task: merge its PR, then reuse the normal merged
|
|
5035
|
-
// cleanup chain (pr.merged handler → transition merged + post-merge worktree/branch
|
|
5036
|
-
// cleanup + /clear + release). Same path the poller drives when it detects the merge.
|
|
5037
4295
|
async markTaskComplete(taskId) {
|
|
5038
4296
|
const peek = await this.taskStore.get(taskId);
|
|
5039
4297
|
if (!peek)
|
|
5040
4298
|
throw new ApiError(404, `Task ${taskId} not found`);
|
|
5041
|
-
// Human gate (spec §10): ready / merge-ready confirm runs its own completion
|
|
5042
|
-
// matrix (with its own lock-claimed gate); the legacy max_rounds path below
|
|
5043
|
-
// is untouched.
|
|
5044
4299
|
if (peek.status === 'ready' || peek.status === 'merge-ready') {
|
|
5045
4300
|
return this.confirmHumanGate(taskId);
|
|
5046
4301
|
}
|
|
5047
|
-
// Claim under the task lock — the whole merge window. markCompleteInFlight
|
|
5048
|
-
// blocks Cancel / Call review / Continue (all re-check it under the same
|
|
5049
|
-
// lock) so they can't act on the same snapshot and interleave with the
|
|
5050
|
-
// irreversible `gh pr merge` (or, server mode, the publish dispatch).
|
|
5051
4302
|
const task = await this.claimCompleteGate(taskId, ['max_rounds', 'approved']);
|
|
5052
4303
|
try {
|
|
5053
|
-
// Server-mode publish retry: a failed afterDone dispatch leaves the task
|
|
5054
|
-
// 'approved' with dev released — mark-complete re-runs the publish.
|
|
5055
4304
|
const serverApprovedRetry = task.status === 'approved' && task.reviewMode === 'server';
|
|
5056
4305
|
if (!serverApprovedRetry && task.status !== 'max_rounds') {
|
|
5057
4306
|
throw new ApiError(409, `Task ${taskId} is not at max_rounds (status=${task.status})`);
|
|
5058
4307
|
}
|
|
5059
|
-
// spec-phase max_rounds escapes via Retry/Cancel only (the UI hides complete). Guard the
|
|
5060
|
-
// endpoint too so a direct API call / older client can't merge a spec cap through here.
|
|
5061
4308
|
if (task.phase === 'spec') {
|
|
5062
4309
|
throw new ApiError(409, `Mark complete is only supported for code-phase tasks`);
|
|
5063
4310
|
}
|
|
@@ -5073,7 +4320,6 @@ export class AgentManager {
|
|
|
5073
4320
|
throw new ApiError(409, `Task ${taskId} publish was delivered and is awaiting code-ready; ` +
|
|
5074
4321
|
`retry only after it fails (if the publish is verifiably dead, Cancel the task)`);
|
|
5075
4322
|
}
|
|
5076
|
-
// task.afterDone was snapshotted when the approve verdict routed it.
|
|
5077
4323
|
const afterDone = this.resolveAfterDone(task);
|
|
5078
4324
|
if (afterDone === null) {
|
|
5079
4325
|
throw new ApiError(409, `Task ${taskId} is approved with no afterDone step; nothing to retry`);
|
|
@@ -5081,13 +4327,7 @@ export class AgentManager {
|
|
|
5081
4327
|
await this.dispatchServerAfterDone(taskId, afterDone);
|
|
5082
4328
|
return (await this.taskStore.get(taskId));
|
|
5083
4329
|
}
|
|
5084
|
-
// Server-mode capped task, human accepts as-is: no PR exists yet — run the
|
|
5085
|
-
// afterDone flow (or finish directly) instead of the legacy PR merge.
|
|
5086
|
-
// Inside the in-flight claim so a concurrent Continue can't act on the same
|
|
5087
|
-
// max_rounds snapshot and release dev mid-publish.
|
|
5088
4330
|
if (task.reviewMode === 'server') {
|
|
5089
|
-
// Max_rounds never routed an approve verdict — snapshot afterDone NOW so
|
|
5090
|
-
// the eventual ready-confirm uses this decision, not future hot config.
|
|
5091
4331
|
const afterDone = this.coerceAfterDone(task.projectId, this.config.review.afterDone);
|
|
5092
4332
|
await this.updateTask(taskId, { afterDone });
|
|
5093
4333
|
if (afterDone === null) {
|
|
@@ -5103,11 +4343,6 @@ export class AgentManager {
|
|
|
5103
4343
|
await this.dispatchServerAfterDone(taskId, afterDone);
|
|
5104
4344
|
return (await this.taskStore.get(taskId));
|
|
5105
4345
|
}
|
|
5106
|
-
// Held-agent check AFTER claiming (the claim blocks a new continueDevRound from starting),
|
|
5107
|
-
// and re-reading agent state here catches a continue that Held an agent in the window just
|
|
5108
|
-
// before our claim. dispatchPostMergeCleanup early-returns on awaiting_human, so merging with
|
|
5109
|
-
// a held dev/QA still bound to this task would orphan the merged task on a locked agent.
|
|
5110
|
-
// Bound to *this* task only — a stale id whose agent moved on is harmless (cleanup early-returns).
|
|
5111
4346
|
for (const agentId of [task.agentId, task.qaAgentId]) {
|
|
5112
4347
|
if (!agentId)
|
|
5113
4348
|
continue;
|
|
@@ -5116,9 +4351,6 @@ export class AgentManager {
|
|
|
5116
4351
|
throw new ApiError(409, `Agent ${agentId} is awaiting human intervention on this task; resume/restart/delete it before marking complete`);
|
|
5117
4352
|
}
|
|
5118
4353
|
}
|
|
5119
|
-
// Atomically transition max_rounds → merge-ready under the task lock. merge-ready is
|
|
5120
|
-
// active + already in pr.merged's fromStatus, so the post-merge cleanup chain runs;
|
|
5121
|
-
// combined with the in-flight claim it fully serializes against the other actions.
|
|
5122
4354
|
const claimed = await this.transitionTaskStatus(taskId, 'merge-ready', { fromStatus: ['max_rounds'] });
|
|
5123
4355
|
if (!claimed) {
|
|
5124
4356
|
throw new ApiError(409, `Task ${taskId} changed status during mark-complete; aborted`);
|
|
@@ -5159,9 +4391,6 @@ export class AgentManager {
|
|
|
5159
4391
|
if (!dev)
|
|
5160
4392
|
return;
|
|
5161
4393
|
this.phaseSignalWatcher?.stop(taskId);
|
|
5162
|
-
// Keep the agent BOUND (non-dispatchable) until branch cleanup + context reset finish, then
|
|
5163
|
-
// release. dispatchPostMergeCleanup owns the whole lifecycle: worktree removal → branch
|
|
5164
|
-
// delete → /clear → release (no agent notification).
|
|
5165
4394
|
if (task.prNumber && task.branch) {
|
|
5166
4395
|
const ctx = {
|
|
5167
4396
|
taskId: task.id,
|
|
@@ -5170,7 +4399,6 @@ export class AgentManager {
|
|
|
5170
4399
|
await this.dispatchPostMergeCleanup(task.agentId, ctx).catch(err => console.warn(`[AgentManager] cleanupAfterMerge: dispatchPostMergeCleanup(${task.agentId}) failed:`, err));
|
|
5171
4400
|
}
|
|
5172
4401
|
else {
|
|
5173
|
-
// No PR/branch to clean up and nothing to compact — release immediately so the agent frees.
|
|
5174
4402
|
await this.releaseAgentForTask(task.agentId, taskId, 'idle').catch(err => console.warn(`[AgentManager] cleanupAfterMerge: releaseAgentForTask(${task.agentId}, ${taskId}) failed:`, err));
|
|
5175
4403
|
}
|
|
5176
4404
|
}
|
|
@@ -5215,22 +4443,13 @@ export class AgentManager {
|
|
|
5215
4443
|
await this.deleteLocalBranchInRepo(runner, state.repoPath, ctx.branch, agentId);
|
|
5216
4444
|
}
|
|
5217
4445
|
else {
|
|
5218
|
-
// No repoPath on the binding → can't run `git branch -D`. Keep the "can't clean → warn
|
|
5219
|
-
// server-side" contract: surface the skip (the merged branch may linger locally) rather than
|
|
5220
|
-
// dropping it silently now that there is no agent notification carrying `branch-cleanup: skipped`.
|
|
5221
4446
|
console.warn(`[AgentManager] dispatchPostMergeCleanup(${agentId}): no repoPath on binding; skipping local ` +
|
|
5222
4447
|
`branch delete for ${ctx.branch} (it may linger locally)`);
|
|
5223
4448
|
}
|
|
5224
|
-
// No agent dialogue: the merged task is already done and the agent is idle, so baxian just resets
|
|
5225
|
-
// its context with /clear and releases it. Worktree + local branch were cleaned above; a failed
|
|
5226
|
-
// branch delete is logged server-side (deleteLocalBranchInRepo), not surfaced into the pane.
|
|
5227
4449
|
const tmux = new TmuxManager(runner);
|
|
5228
4450
|
const runtime = agentRuntimeKindFor(agent);
|
|
5229
4451
|
void this.runPostMergeCompaction(tmux, state.paneId, agentId, ctx.taskId, runtime).catch(err => console.warn(`[AgentManager] runPostMergeCompaction(${agentId}) failed:`, err));
|
|
5230
4452
|
}
|
|
5231
|
-
// Release the post-merge binding (clears taskId + frees the lock we held → agent dispatchable
|
|
5232
|
-
// again). Shared success tail. Skips when the binding has already moved to another task, so it
|
|
5233
|
-
// never releases a lock owned by a different flow.
|
|
5234
4453
|
async releasePostMergeAgent(agentId, taskId) {
|
|
5235
4454
|
const state = await this.agentStore.get(agentId);
|
|
5236
4455
|
if (state?.taskId !== taskId)
|
|
@@ -5242,8 +4461,6 @@ export class AgentManager {
|
|
|
5242
4461
|
console.warn(`[AgentManager] releasePostMergeAgent: releaseAgentForTask(${agentId}, ${taskId}) failed:`, err);
|
|
5243
4462
|
}
|
|
5244
4463
|
}
|
|
5245
|
-
// Removes the merged worktree but KEEPS taskId on the binding, so the agent remains
|
|
5246
|
-
// non-dispatchable while branch delete + /clear run. Only worktreePath is dropped.
|
|
5247
4464
|
async removeMergedWorktree(cfg, agentId, expectedTaskId) {
|
|
5248
4465
|
await this.withTaskLock(async () => {
|
|
5249
4466
|
const state = await this.agentStore.get(agentId);
|
|
@@ -5270,12 +4487,7 @@ export class AgentManager {
|
|
|
5270
4487
|
});
|
|
5271
4488
|
});
|
|
5272
4489
|
}
|
|
5273
|
-
// Best-effort local cleanup after merge: prune the remote-tracking ref + worktree admin entry, then
|
|
5274
|
-
// delete the local branch. shellQuote prevents injection. Failures are logged server-side only — the
|
|
5275
|
-
// wrap-up resets the pane with /clear regardless, and there is no agent notification to keep honest.
|
|
5276
4490
|
async deleteLocalBranchInRepo(runner, repoPath, branch, agentId) {
|
|
5277
|
-
// --expire=now: bare `git worktree prune` honors gc.worktreePruneExpire (default 3 months),
|
|
5278
|
-
// so a worktree that the release just removed could still be tracked as occupying the ref.
|
|
5279
4491
|
const fetchCmd = `cd ${shellQuote(repoPath)} && git fetch --prune origin && git worktree prune --expire=now`;
|
|
5280
4492
|
try {
|
|
5281
4493
|
const fetchResult = await runner.exec(fetchCmd, { timeout: this.postMergeFetchTimeoutMs });
|
|
@@ -5290,7 +4502,6 @@ export class AgentManager {
|
|
|
5290
4502
|
const delCmd = `cd ${shellQuote(repoPath)} && git branch -D ${shellQuote(branch)}`;
|
|
5291
4503
|
try {
|
|
5292
4504
|
const delResult = await runner.exec(delCmd, { timeout: this.postMergeBranchTimeoutMs });
|
|
5293
|
-
// exit 0 → deleted; "not found"/"no such branch" → already absent (auto-delete-head-branches). Both fine.
|
|
5294
4505
|
if (delResult.exitCode !== 0 && !/not found|not a valid|no such branch/i.test(delResult.stderr)) {
|
|
5295
4506
|
console.warn(`[AgentManager] deleteLocalBranchInRepo(${agentId}, ${branch}): branch -D exit=${delResult.exitCode} ` +
|
|
5296
4507
|
`stderr=${delResult.stderr.trim()}`);
|
|
@@ -5301,8 +4512,6 @@ export class AgentManager {
|
|
|
5301
4512
|
}
|
|
5302
4513
|
}
|
|
5303
4514
|
async runPostMergeCompaction(tmux, paneId, agentId, originalTaskId, runtime) {
|
|
5304
|
-
// 等待获取(而非无条件 add):手动 compact 持锁时直接进入会并发,
|
|
5305
|
-
// 且 finally 会误删对方的 guard 放穿后续请求。
|
|
5306
4515
|
await this.acquireCompactGuard(agentId);
|
|
5307
4516
|
try {
|
|
5308
4517
|
await this.runPostMergeCompactionSteps(tmux, paneId, agentId, originalTaskId, runtime);
|
|
@@ -5338,9 +4547,6 @@ export class AgentManager {
|
|
|
5338
4547
|
};
|
|
5339
4548
|
if (!await bindingStillOurs())
|
|
5340
4549
|
return;
|
|
5341
|
-
// No notification, no dialogue: the merged task is done and the agent is idle, so just reset its
|
|
5342
|
-
// context. sendPostMergeSlashCommand interrupts any lingering turn (Esc), waits for idle, sends
|
|
5343
|
-
// /clear, verifies it wasn't rejected, and retries. Returns false if the binding moved mid-way.
|
|
5344
4550
|
let cleared = false;
|
|
5345
4551
|
try {
|
|
5346
4552
|
cleared = await this.sendPostMergeSlashCommand(tmux, paneId, agentId, runtime, bindingStillOurs);
|
|
@@ -5349,17 +4555,15 @@ export class AgentManager {
|
|
|
5349
4555
|
console.warn(`[AgentManager] runPostMergeCompaction(${agentId}) /clear failed:`, err);
|
|
5350
4556
|
}
|
|
5351
4557
|
if (!cleared) {
|
|
5352
|
-
// A runtime that exited to a shell can't take /clear; restarting it yields the same clean slate.
|
|
5353
4558
|
if (await this.recoverPostMergeExitedRuntime(tmux, paneId, agentId, originalTaskId, runtime)) {
|
|
5354
4559
|
cleared = true;
|
|
5355
4560
|
}
|
|
5356
4561
|
else if (!await bindingStillOurs()) {
|
|
5357
|
-
return;
|
|
4562
|
+
return;
|
|
5358
4563
|
}
|
|
5359
4564
|
else if (!await this.clearComposerForReuse(tmux, paneId, agentId)) {
|
|
5360
|
-
return;
|
|
4565
|
+
return;
|
|
5361
4566
|
}
|
|
5362
|
-
// else: composer cleaned → a stuck pane must not strand the agent bound to a merged task; release.
|
|
5363
4567
|
}
|
|
5364
4568
|
await this.releasePostMergeAgent(agentId, originalTaskId);
|
|
5365
4569
|
}
|
|
@@ -5409,9 +4613,6 @@ export class AgentManager {
|
|
|
5409
4613
|
async sendPostMergeSlashCommand(tmux, paneId, agentId, runtime, bindingStillOurs) {
|
|
5410
4614
|
let rejection;
|
|
5411
4615
|
for (let attempt = 1; attempt <= 2; attempt++) {
|
|
5412
|
-
// The runtime rejects /clear while a turn is in progress, and the agent's last task turn can
|
|
5413
|
-
// still be settling when our idle scrape passes. Esc interrupts it (the same stop the cancel
|
|
5414
|
-
// flow runs before /clear); a genuinely idle pane absorbs it harmlessly.
|
|
5415
4616
|
await tmux.sendKeysToPane(paneId, 'Escape');
|
|
5416
4617
|
await this.waitForReplPromptReady(tmux, paneId, runtime, this.compactIdleWaitMs);
|
|
5417
4618
|
if (!await bindingStillOurs())
|
|
@@ -5441,10 +4642,6 @@ export class AgentManager {
|
|
|
5441
4642
|
runtimeSlashCommandRejectedPattern(command) {
|
|
5442
4643
|
return new RegExp(`["'“”‘’]?${command}["'“”‘’]?\\s+is disabled while a task is in progress\\.`, 'gi');
|
|
5443
4644
|
}
|
|
5444
|
-
// pane_current_command 是 runtime 是否仍活的权威信号(不被 viewport stale frame 骗)。
|
|
5445
|
-
// anchor 在 codex busy 屏(`Working on it…\n esc to interrupt`)不存在,所以 busy 状态只看
|
|
5446
|
-
// procTitle;只有准备返回 idle 时才用 anchor 作双重证据,挡 stale-frame + shell 误报。
|
|
5447
|
-
// 入口先等一拍:上一步刚 sendEnter,给 runtime 时间进入 busy,避免观察到假 idle。
|
|
5448
4645
|
async waitForReplPromptReady(tmux, paneId, runtime, timeoutMs) {
|
|
5449
4646
|
const deadline = Date.now() + timeoutMs;
|
|
5450
4647
|
await tmux.waitReplReady(paneId, runtime, {
|
|
@@ -5477,9 +4674,6 @@ export class AgentManager {
|
|
|
5477
4674
|
stopPhaseSignalWatcher(taskId) {
|
|
5478
4675
|
this.phaseSignalWatcher?.stop(taskId);
|
|
5479
4676
|
}
|
|
5480
|
-
// Returns 'live' when a code-ready watcher is armed and a publish prompt is in flight,
|
|
5481
|
-
// 'delivered' when no watcher is running but publishDispatchedAt indicates delivery,
|
|
5482
|
-
// or false when the approved state is retryable (stops any stale recovered watcher).
|
|
5483
4677
|
checkPublishInFlight(taskId, publishDispatchedAt) {
|
|
5484
4678
|
if (this.phaseSignalWatcher?.expectedKindsFor(taskId).has('code-ready')) {
|
|
5485
4679
|
if (!this.phaseSignalWatcher.isRecovered(taskId) || publishDispatchedAt)
|
|
@@ -5489,11 +4683,6 @@ export class AgentManager {
|
|
|
5489
4683
|
}
|
|
5490
4684
|
return publishDispatchedAt ? 'delivered' : false;
|
|
5491
4685
|
}
|
|
5492
|
-
// Prompt build (via task.signalToken) and watcher must share the same token.
|
|
5493
|
-
// Returns whether dispatch may safely proceed. False ONLY when a configured watcher failed
|
|
5494
|
-
// to arm — the dangerous case where a same-identity verdict would have no consumer. When no
|
|
5495
|
-
// watcher subsystem is configured at all (poller-only deployment) the poller is the verdict
|
|
5496
|
-
// path, so this returns true and does not block. Best-effort callers ignore the result.
|
|
5497
4686
|
async setupPhaseSignalWatcher(taskId, agentId, expectedKinds, token, skipSnapshot = false, onReadFile) {
|
|
5498
4687
|
if (!this.phaseSignalWatcher)
|
|
5499
4688
|
return true;
|
|
@@ -5516,38 +4705,21 @@ export class AgentManager {
|
|
|
5516
4705
|
return false;
|
|
5517
4706
|
}
|
|
5518
4707
|
}
|
|
5519
|
-
// Arm a watcher for a signal the just-dispatched prompt will emit, then hold the agent if it
|
|
5520
|
-
// could not arm. Used by post-dispatch arms (develop/spec/code phases) whose pane only exists
|
|
5521
|
-
// after dispatch, so they can't gate before sending the prompt the way verdict dispatch does.
|
|
5522
4708
|
async armPostDispatchSignalOrHold(taskId, agentId, expectedKinds, token, skipSnapshot = false, onReadFile) {
|
|
5523
4709
|
const armed = await this.setupPhaseSignalWatcher(taskId, agentId, expectedKinds, token, skipSnapshot, onReadFile);
|
|
5524
4710
|
if (!armed)
|
|
5525
4711
|
await this.holdAgentForUnarmedSignal(taskId, agentId, expectedKinds);
|
|
5526
4712
|
}
|
|
5527
|
-
// The prompt expecting a pane signal is already out but its watcher failed to arm — the signal
|
|
5528
|
-
// would have no consumer. Hold the agent so the stuck state is explicit instead of silently
|
|
5529
|
-
// waiting forever (#218 item 4). resumeAgent refuses Resume for this phase under an active task
|
|
5530
|
-
// (Resume can't rebuild the watcher), so the operator cancels the task / deletes the agent to retry.
|
|
5531
4713
|
async holdAgentForUnarmedSignal(taskId, agentId, expectedKinds) {
|
|
5532
4714
|
const label = (Array.isArray(expectedKinds) ? [...expectedKinds] : [expectedKinds]).join(',');
|
|
5533
4715
|
await this.markAwaitingHuman(agentId, `signal-arm-failed:${label}`, 'Pane-signal watcher failed to arm after dispatch; the prompt expects a signal with no consumer. Cancel the task or delete the agent to retry.', { expectedTaskId: taskId });
|
|
5534
4716
|
}
|
|
5535
|
-
// Public helper for phases whose dispatch lives outside this class
|
|
5536
|
-
// (handlers.ts pr.created/pr.updated → review/recheck). Rotates the task's
|
|
5537
|
-
// signalToken atomically and sets up the watcher; returns the new token so the
|
|
5538
|
-
// caller can wire-up only when this resolves.
|
|
5539
4717
|
async rotateAndSetupPhaseSignal(taskId, agentId, expectedKinds) {
|
|
5540
4718
|
const newToken = createSignalToken();
|
|
5541
4719
|
await this.updateTask(taskId, { signalToken: newToken });
|
|
5542
4720
|
const armed = await this.setupPhaseSignalWatcher(taskId, agentId, expectedKinds, newToken);
|
|
5543
4721
|
return { token: newToken, armed };
|
|
5544
4722
|
}
|
|
5545
|
-
// Atomically undo a verdict dispatch (pr.created / pr.updated handlers) when the verdict watcher
|
|
5546
|
-
// failed to arm and the QA prompt was never sent: restore the pre-transition status/token/anchor,
|
|
5547
|
-
// drop the QA binding, and re-establish the watcher matching the restored state so an
|
|
5548
|
-
// already-emitted prior-phase signal (e.g. the dev's pr-created) is re-consumed via the snapshot
|
|
5549
|
-
// scan. Without restoring the rotated token, the dev's prior prompt signal would fail the token
|
|
5550
|
-
// gate and the task would stall. Mirrors rollbackDispatchReviewPhase1 for the automated handlers.
|
|
5551
4723
|
async rollbackVerdictArmFailure(taskId, restore) {
|
|
5552
4724
|
await this.withTaskLock(async () => {
|
|
5553
4725
|
const fresh = await this.taskStore.get(taskId);
|
|
@@ -5579,22 +4751,11 @@ export class AgentManager {
|
|
|
5579
4751
|
const devAgentId = task.agentId;
|
|
5580
4752
|
if (!devAgentId)
|
|
5581
4753
|
return null;
|
|
5582
|
-
// Rotate token for the code phase so dev's pr-created signal is fresh; old
|
|
5583
|
-
// spec token must not survive into a different expected-kind set.
|
|
5584
4754
|
const newToken = createSignalToken();
|
|
5585
|
-
// Atomic transition + persist: 旧版先 transition 再 updateTask, 中间崩溃 task 卡在
|
|
5586
|
-
// (phase='spec', status='in_progress') — setupRecoveredSpecSignals 三个 case 都不匹配,
|
|
5587
|
-
// freshness gate 也拒所有 spec.* event, 任务 stranded 无 auto-recovery。
|
|
5588
4755
|
const transition = await this.transitionTaskStatus(taskId, 'in_progress', { fromStatus: ['review', 'fixing', 'in_progress'] }, { phase: 'code', signalToken: newToken });
|
|
5589
4756
|
if (!transition)
|
|
5590
4757
|
return null;
|
|
5591
4758
|
this.stopPhaseSignalWatcher(taskId);
|
|
5592
|
-
// Best-effort arm (NOT hold-on-failure): this runs before the code prompt is dispatched
|
|
5593
|
-
// (acquire + continueSession below), so holding here would block that reentry. And pr-created
|
|
5594
|
-
// is authoritatively detected by the GitHub poller (PR creation isn't same-identity-gated), so
|
|
5595
|
-
// a missed pane watcher only costs one poll cycle of latency, never a stuck task.
|
|
5596
|
-
// Server mode has NO poller backstop: an unarmed code-done watcher means the
|
|
5597
|
-
// dev's completion signal would have no consumer — fail closed and hold.
|
|
5598
4759
|
const codeKind = task.reviewMode === 'server' ? 'code-done' : 'pr-created';
|
|
5599
4760
|
const codeArmed = await this.setupPhaseSignalWatcher(taskId, devAgentId, codeKind, newToken);
|
|
5600
4761
|
if (!codeArmed && task.reviewMode === 'server') {
|
|
@@ -5602,7 +4763,6 @@ export class AgentManager {
|
|
|
5602
4763
|
return null;
|
|
5603
4764
|
}
|
|
5604
4765
|
if (task.qaAgentId) {
|
|
5605
|
-
// release 失败留 stale qa binding → emit intervention 让其可见。
|
|
5606
4766
|
const released = await this.releaseAgentForTask(task.qaAgentId, taskId, 'idle')
|
|
5607
4767
|
.catch(() => false);
|
|
5608
4768
|
if (!released) {
|
|
@@ -5619,10 +4779,6 @@ export class AgentManager {
|
|
|
5619
4779
|
}
|
|
5620
4780
|
const acquired = await this.acquireAgentForTask(devAgentId, taskId, 'code');
|
|
5621
4781
|
if (!acquired) {
|
|
5622
|
-
// Task already shows phase='code' in_progress with the code-done watcher
|
|
5623
|
-
// armed, and server mode has no poller backstop — without a hold the dev
|
|
5624
|
-
// never receives the code prompt and the task dead-ends. The
|
|
5625
|
-
// code-dispatch-failed hold gives Resume a redispatch path.
|
|
5626
4782
|
await this.markAwaitingHuman(devAgentId, 'code-dispatch-failed', 'Dev could not be acquired for the code phase after spec approval; the task looks in_progress but the code prompt was never dispatched. Resume the agent to redispatch or cancel the task.', { expectedTaskId: taskId }).catch(() => undefined);
|
|
5627
4783
|
await this.safeEmit({
|
|
5628
4784
|
id: '',
|
|
@@ -5640,14 +4796,10 @@ export class AgentManager {
|
|
|
5640
4796
|
resumed = await this.continueSession(taskId, devAgentId, 'code');
|
|
5641
4797
|
}
|
|
5642
4798
|
catch (err) {
|
|
5643
|
-
// 同 dispatchServerReviewToQa/dispatchServerFixToDev:DispatchTerminalError 委托给 failTaskForDispatchError
|
|
5644
|
-
// (ack_unknown → markAwaitingHuman;其他 reason → release + task failed)。
|
|
5645
4799
|
if (err instanceof DispatchTerminalError) {
|
|
5646
4800
|
await this.failTaskForDispatchError(taskId, 'code', devAgentId, err);
|
|
5647
4801
|
}
|
|
5648
4802
|
else if (!(err instanceof EnsureSessionError && err.partial.handled)) {
|
|
5649
|
-
// Task already shows phase='code' in_progress but the prompt never landed
|
|
5650
|
-
// and there is no retry entry — hold explicitly instead of dead-ending.
|
|
5651
4803
|
await this.markAwaitingHuman(devAgentId, 'code-dispatch-failed', 'Code-phase prompt was not delivered after spec approval; the task looks in_progress but the dev never received it. Resume/restart the agent or cancel the task.', { expectedTaskId: taskId }).catch(() => undefined);
|
|
5652
4804
|
}
|
|
5653
4805
|
console.error(`[AgentManager] transitionToCodePhase continueSession(dev=${devAgentId}) failed:`, err);
|
|
@@ -5668,7 +4820,6 @@ export class AgentManager {
|
|
|
5668
4820
|
}
|
|
5669
4821
|
return await this.taskStore.get(taskId);
|
|
5670
4822
|
}
|
|
5671
|
-
// ── Server review mode (spec: docs/spec/server-review-mode.md) ──────────────
|
|
5672
4823
|
async dispatchServerReviewToQa(taskId, opts) {
|
|
5673
4824
|
const dispatchPhase = opts.phase === 'spec'
|
|
5674
4825
|
? 'server-spec-review'
|
|
@@ -5678,15 +4829,11 @@ export class AgentManager {
|
|
|
5678
4829
|
const task = await this.taskStore.get(taskId);
|
|
5679
4830
|
if (!task)
|
|
5680
4831
|
throw new Error(`dispatchServerReviewToQa: task ${taskId} not found`);
|
|
5681
|
-
// spec 阶段恒为 server 中转;code 阶段仍 server-only。
|
|
5682
4832
|
if (task.reviewMode !== 'server' && opts.phase !== 'spec') {
|
|
5683
4833
|
throw new Error(`dispatchServerReviewToQa: task ${taskId} is not in server review mode`);
|
|
5684
4834
|
}
|
|
5685
4835
|
const qaId = task.qaAgentId ?? this.findQaPartner(task.agentId)?.id;
|
|
5686
4836
|
if (!qaId) {
|
|
5687
|
-
// Config validation rejects qa-less server pairs, but a hot-removed QA
|
|
5688
|
-
// can still land here — re-arm the consumed entry signal so the task
|
|
5689
|
-
// is recoverable once a QA is configured again.
|
|
5690
4837
|
const entryKind = task.status === 'fixing'
|
|
5691
4838
|
? (opts.phase === 'spec' ? 'spec-fixed' : 'code-fixed')
|
|
5692
4839
|
: (opts.phase === 'spec' ? 'spec-done' : 'code-done');
|
|
@@ -5720,25 +4867,17 @@ export class AgentManager {
|
|
|
5720
4867
|
if (!claim)
|
|
5721
4868
|
return null;
|
|
5722
4869
|
const { qaId, devAgentId, projectId, newToken, newRound } = claim;
|
|
5723
|
-
// continueSession failure after the transition would otherwise strand the
|
|
5724
|
-
// task in 'review' with a fresh token nobody will ever signal.
|
|
5725
4870
|
const rollback = async () => {
|
|
5726
4871
|
await this.transitionTaskStatus(taskId, claim.originalStatus, { fromStatus: ['review'] }, {
|
|
5727
4872
|
signalToken: claim.originalToken,
|
|
5728
4873
|
batchIndex: claim.originalBatchIndex,
|
|
5729
4874
|
batchTotal: claim.originalBatchTotal,
|
|
5730
|
-
// spec transition 写入 phase:'spec';github 首轮失败若不还原,dev 直发
|
|
5731
|
-
// pr-created 会被 legacy freshness gate 拒(设计 §2)。
|
|
5732
4875
|
phase: claim.originalPhase,
|
|
5733
4876
|
...(opts.phase === 'spec'
|
|
5734
4877
|
? { specReviewRound: claim.originalRound }
|
|
5735
4878
|
: { reviewRound: claim.originalRound }),
|
|
5736
4879
|
}).catch(() => undefined);
|
|
5737
4880
|
};
|
|
5738
|
-
// The entry signal (code/spec-done|fixed) was already consumed by the
|
|
5739
|
-
// watcher; a pre-transition failure must re-arm it with the unrotated token
|
|
5740
|
-
// or the agent's re-emit after the operator fixes availability has no
|
|
5741
|
-
// consumer.
|
|
5742
4881
|
const rearmEntrySignal = async () => {
|
|
5743
4882
|
const entryKind = claim.originalStatus === 'fixing'
|
|
5744
4883
|
? (opts.phase === 'spec' ? 'spec-fixed' : 'code-fixed')
|
|
@@ -5806,8 +4945,6 @@ export class AgentManager {
|
|
|
5806
4945
|
});
|
|
5807
4946
|
return null;
|
|
5808
4947
|
}
|
|
5809
|
-
// First dispatch creates the QA's base-detached worktree (startSession);
|
|
5810
|
-
// batch continuations reuse the live session + worktree (continueSession).
|
|
5811
4948
|
const sessionOpts = {
|
|
5812
4949
|
bypassTaskStatusGate: true,
|
|
5813
4950
|
signalToken: newToken,
|
|
@@ -5818,14 +4955,8 @@ export class AgentManager {
|
|
|
5818
4955
|
...(opts.priorFindingsJson ? { serverPriorFindings: opts.priorFindingsJson } : {}),
|
|
5819
4956
|
...(opts.priorResponseJson ? { serverPriorResponse: opts.priorResponseJson } : {}),
|
|
5820
4957
|
...(opts.phase === 'spec' ? { currentSpecRound: newRound } : {}),
|
|
5821
|
-
// Arm the verdict + read-file watcher in the pane-exists / pre-paste window so a QA
|
|
5822
|
-
// [bx:read-file:...] emitted during the dispatch is a live chunk, not snapshot-suppressed.
|
|
5823
4958
|
armBeforeInject: () => this.setupPhaseSignalWatcher(taskId, qaId, expectedKind, newToken, false, (req) => { void this.handleReadFileRequest(taskId, qaId, req); }),
|
|
5824
4959
|
};
|
|
5825
|
-
// A continuation consumed the QA's reviewed signal (not the dev's entry
|
|
5826
|
-
// signal): rollback restores the prior slice's review/token, so re-arm the
|
|
5827
|
-
// reviewed watcher — the QA's re-emit replays the stored batch findings and
|
|
5828
|
-
// resumes the next-slice dispatch.
|
|
5829
4960
|
const rearmConsumedSignal = async () => {
|
|
5830
4961
|
if (opts.continuation) {
|
|
5831
4962
|
await this.setupPhaseSignal(taskId, qaId, expectedKind, { skipSnapshot: true });
|
|
@@ -5841,14 +4972,11 @@ export class AgentManager {
|
|
|
5841
4972
|
: await this.startSession(taskId, qaId, dispatchPhase, sessionOpts);
|
|
5842
4973
|
}
|
|
5843
4974
|
catch (err) {
|
|
5844
|
-
// armBeforeInject may have armed the watcher before the failing paste — drop it so a stale
|
|
5845
|
-
// entry can't fire on a rolled-back / failed task (no-op if it never armed).
|
|
5846
4975
|
this.stopPhaseSignalWatcher(taskId);
|
|
5847
4976
|
if (err instanceof DispatchTerminalError) {
|
|
5848
4977
|
await this.failTaskForDispatchError(taskId, dispatchPhase, qaId, err);
|
|
5849
4978
|
}
|
|
5850
4979
|
else if (err instanceof EnsureSessionError && err.partial.handled) {
|
|
5851
|
-
// handleDialogPendingFromRuntime already held + failed + released.
|
|
5852
4980
|
}
|
|
5853
4981
|
else {
|
|
5854
4982
|
await rollback();
|
|
@@ -5860,8 +4988,6 @@ export class AgentManager {
|
|
|
5860
4988
|
throw err;
|
|
5861
4989
|
}
|
|
5862
4990
|
if (!started) {
|
|
5863
|
-
// Covers armBeforeInject returning false (watcher couldn't arm) as well as any other
|
|
5864
|
-
// pre-paste abort; stop is a no-op when nothing armed.
|
|
5865
4991
|
this.stopPhaseSignalWatcher(taskId);
|
|
5866
4992
|
await rollback();
|
|
5867
4993
|
if (!opts.continuation) {
|
|
@@ -5898,8 +5024,6 @@ export class AgentManager {
|
|
|
5898
5024
|
newToken: createSignalToken(),
|
|
5899
5025
|
taskPhase: (task.phase ?? 'code'),
|
|
5900
5026
|
currentSpecRound: task.specReviewRound,
|
|
5901
|
-
// Continue-one-round enters from max_rounds — failure must restore THAT,
|
|
5902
|
-
// not silently demote the human's pause decision to 'review'.
|
|
5903
5027
|
originalStatus: task.status,
|
|
5904
5028
|
originalToken: task.signalToken,
|
|
5905
5029
|
};
|
|
@@ -5911,18 +5035,12 @@ export class AgentManager {
|
|
|
5911
5035
|
await this.transitionTaskStatus(taskId, claim.originalStatus, { fromStatus: ['fixing'] }, { signalToken: claim.originalToken }).catch(() => undefined);
|
|
5912
5036
|
};
|
|
5913
5037
|
const expectedKind = taskPhase === 'spec' ? 'spec-fixed' : 'code-fixed';
|
|
5914
|
-
// The QA's reviewed signal was consumed before this dispatch; pre-transition
|
|
5915
|
-
// failures must re-arm it (unrotated token) so a later re-emit is consumed.
|
|
5916
5038
|
const rearmReviewedSignal = async () => {
|
|
5917
5039
|
if (!qaAgentId)
|
|
5918
5040
|
return;
|
|
5919
5041
|
const reviewedKind = taskPhase === 'spec' ? 'spec-reviewed' : 'code-reviewed';
|
|
5920
5042
|
await this.setupPhaseSignal(taskId, qaAgentId, reviewedKind, { skipSnapshot: true });
|
|
5921
5043
|
};
|
|
5922
|
-
// Dev BEFORE QA: releasing the QA first is irreversible (binding cleared,
|
|
5923
|
-
// worktree removed, schedulable elsewhere) — a dev acquire failure after it
|
|
5924
|
-
// would leave the review-parked task with no stably-bound agent to retry
|
|
5925
|
-
// from. With the dev secured first, both failure exits keep the QA bound.
|
|
5926
5044
|
const acquired = await this.acquireAgentForTask(devAgentId, taskId, 'server-feedback');
|
|
5927
5045
|
if (!acquired) {
|
|
5928
5046
|
await rearmReviewedSignal();
|
|
@@ -5954,13 +5072,8 @@ export class AgentManager {
|
|
|
5954
5072
|
return null;
|
|
5955
5073
|
}
|
|
5956
5074
|
}
|
|
5957
|
-
// max_rounds entry = human "continue one round" via continueDevRound.
|
|
5958
5075
|
const transition = await this.transitionTaskStatus(taskId, 'fixing', { fromStatus: ['review', 'max_rounds'] }, { signalToken: newToken, fixDispatchedAt: new Date().toISOString() });
|
|
5959
5076
|
if (!transition) {
|
|
5960
|
-
// Refusal = the task left review/max_rounds concurrently (cancel / fail /
|
|
5961
|
-
// mark-complete publish). Ownership moved with it — releasing dev here
|
|
5962
|
-
// would strip a binding the winning chain may be actively using (e.g. a
|
|
5963
|
-
// publish prompt running in the pane); its own cleanup releases the dev.
|
|
5964
5077
|
await this.safeEmit({
|
|
5965
5078
|
id: '',
|
|
5966
5079
|
type: 'human.intervention',
|
|
@@ -5988,14 +5101,10 @@ export class AgentManager {
|
|
|
5988
5101
|
await this.failTaskForDispatchError(taskId, 'server-feedback', devAgentId, err);
|
|
5989
5102
|
}
|
|
5990
5103
|
else if (err instanceof EnsureSessionError && err.partial.handled) {
|
|
5991
|
-
// handled upstream
|
|
5992
5104
|
}
|
|
5993
5105
|
else {
|
|
5994
5106
|
await rollbackToEntry();
|
|
5995
5107
|
await this.releaseAgentForTask(devAgentId, taskId, 'idle').catch(() => undefined);
|
|
5996
|
-
// Rollback restored review/old-token, but the QA's reviewed signal was
|
|
5997
|
-
// consumed — without a subscriber its re-emit can never retry the fix
|
|
5998
|
-
// dispatch.
|
|
5999
5108
|
await rearmReviewedSignal();
|
|
6000
5109
|
}
|
|
6001
5110
|
throw err;
|
|
@@ -6029,9 +5138,6 @@ export class AgentManager {
|
|
|
6029
5138
|
const originalToken = task.signalToken;
|
|
6030
5139
|
const newToken = createSignalToken();
|
|
6031
5140
|
await this.updateTask(taskId, { signalToken: newToken });
|
|
6032
|
-
// The publish prompt never reached the pane — restore the pre-rotation token
|
|
6033
|
-
// (so recovery still matches the pre-dispatch arm) and clear the delivery
|
|
6034
|
-
// marker so retry knows this approved state is preemptible.
|
|
6035
5141
|
const rollbackToken = async () => {
|
|
6036
5142
|
await this.updateTask(taskId, { signalToken: originalToken, publishDispatchedAt: undefined })
|
|
6037
5143
|
.catch(() => undefined);
|
|
@@ -6050,11 +5156,6 @@ export class AgentManager {
|
|
|
6050
5156
|
});
|
|
6051
5157
|
return null;
|
|
6052
5158
|
}
|
|
6053
|
-
// Pessimistic delivery marker BEFORE the irreversible paste: every failure
|
|
6054
|
-
// path below clears it. The remaining crash window (marker written, paste
|
|
6055
|
-
// never ran) fails CLOSED — retry 409s on a publish that never started and
|
|
6056
|
-
// the operator escapes via Cancel — instead of the old window's fail-open
|
|
6057
|
-
// double publish (paste ran, marker missing, retry re-pastes).
|
|
6058
5159
|
await this.updateTask(taskId, { publishDispatchedAt: new Date().toISOString() });
|
|
6059
5160
|
let resumed = false;
|
|
6060
5161
|
try {
|
|
@@ -6069,8 +5170,6 @@ export class AgentManager {
|
|
|
6069
5170
|
await this.failTaskForDispatchError(taskId, 'server-after-done', devAgentId, err);
|
|
6070
5171
|
}
|
|
6071
5172
|
else if (!(err instanceof EnsureSessionError && err.partial.handled)) {
|
|
6072
|
-
// Keep dev BOUND — its worktree holds the reviewed (unpushed) commits.
|
|
6073
|
-
// mark-complete retries the publish via server-after-done same-task reentry.
|
|
6074
5173
|
await rollbackToken();
|
|
6075
5174
|
}
|
|
6076
5175
|
throw err;
|
|
@@ -6095,8 +5194,6 @@ export class AgentManager {
|
|
|
6095
5194
|
await this.armPostDispatchSignalOrHold(taskId, devAgentId, 'code-ready', newToken);
|
|
6096
5195
|
return await this.taskStore.get(taskId);
|
|
6097
5196
|
}
|
|
6098
|
-
// QA asked for file context during a server-mode review. Read from the DEV
|
|
6099
|
-
// worktree (the QA worktree sits on the base branch) and paste into QA's pane.
|
|
6100
5197
|
async handleReadFileRequest(taskId, qaAgentId, req) {
|
|
6101
5198
|
const task = await this.taskStore.get(taskId);
|
|
6102
5199
|
if (!task)
|
|
@@ -6114,8 +5211,6 @@ export class AgentManager {
|
|
|
6114
5211
|
const reason = err instanceof Error ? err.message : String(err);
|
|
6115
5212
|
body = `=== baxian read-file ${req.file}:${req.startLine}-${req.endLine} REFUSED: ${reason} ===`;
|
|
6116
5213
|
}
|
|
6117
|
-
// The read ran async — QA may have submitted its verdict and been released
|
|
6118
|
-
// or rebound meanwhile. Never paste old-task content into a new task's pane.
|
|
6119
5214
|
const qaState = await this.agentStore.get(qaAgentId);
|
|
6120
5215
|
if (qaState?.taskId !== taskId) {
|
|
6121
5216
|
console.warn(`[AgentManager] read-file response dropped: qa=${qaAgentId} no longer bound to ${taskId} (got ${qaState?.taskId})`);
|
|
@@ -6128,21 +5223,16 @@ export class AgentManager {
|
|
|
6128
5223
|
console.warn(`[AgentManager] read-file injection to ${qaAgentId} failed:`, err);
|
|
6129
5224
|
}
|
|
6130
5225
|
}
|
|
6131
|
-
// Plain text paste + submit into a live agent pane (no skills, no ack protocol).
|
|
6132
5226
|
async injectTextToAgent(agentId, text, opts = {}) {
|
|
6133
5227
|
const cfg = this.getAgentConfig(agentId);
|
|
6134
5228
|
if (!cfg)
|
|
6135
5229
|
throw new Error(`injectTextToAgent: unknown agent ${agentId}`);
|
|
6136
5230
|
await this.acquireCompactGuard(agentId);
|
|
6137
5231
|
try {
|
|
6138
|
-
// 锁内重读:guard 等待期间绑定可能已易主,过期文本决不落 pane。
|
|
6139
5232
|
const state = await this.agentStore.get(agentId);
|
|
6140
5233
|
if (opts.expectedTaskId !== undefined && state?.taskId !== opts.expectedTaskId) {
|
|
6141
5234
|
throw new Error(`injectTextToAgent: agent ${agentId} no longer bound to ${opts.expectedTaskId}`);
|
|
6142
5235
|
}
|
|
6143
|
-
// Same cancel race as the prompt dispatch: cancel persists the agent hold (keeps taskId) before the
|
|
6144
|
-
// task flips terminal, and this responder can win the mutex in cancel's interrupt→/clear gap. Refuse to
|
|
6145
|
-
// inject into a pane cancel is tearing down — by the hold AND by terminal task status.
|
|
6146
5236
|
if (isCancelCleanupHold(state)) {
|
|
6147
5237
|
throw new Error(`injectTextToAgent: agent ${agentId} taken over by cancel (${state?.awaitingPhase}); refusing injection`);
|
|
6148
5238
|
}
|
|
@@ -6153,8 +5243,6 @@ export class AgentManager {
|
|
|
6153
5243
|
const paneId = state?.paneId;
|
|
6154
5244
|
if (!paneId)
|
|
6155
5245
|
throw new Error(`injectTextToAgent: agent ${agentId} has no live pane`);
|
|
6156
|
-
// taskStore.get yielded the loop — re-read right before the paste so a cancel hold that landed during
|
|
6157
|
-
// that await can't slip text into a pane cancel has taken over.
|
|
6158
5246
|
const fresh = await this.agentStore.get(agentId);
|
|
6159
5247
|
if (!fresh || fresh.paneId !== paneId
|
|
6160
5248
|
|| (opts.expectedTaskId !== undefined && fresh.taskId !== opts.expectedTaskId)
|
|
@@ -6169,22 +5257,14 @@ export class AgentManager {
|
|
|
6169
5257
|
this.compactInFlight.delete(agentId);
|
|
6170
5258
|
}
|
|
6171
5259
|
}
|
|
6172
|
-
// Human gate confirm (spec §10): executes the configured completion for
|
|
6173
|
-
// ready (server mode) / merge-ready (github mode) tasks.
|
|
6174
5260
|
async confirmHumanGate(taskId) {
|
|
6175
|
-
// Claim under the task lock: a Cancel racing this read can no longer flip the
|
|
6176
|
-
// task to cancelled (and retire its artifacts) while we proceed on a stale
|
|
6177
|
-
// gate snapshot — cancelTask checks the in-flight flag inside the same lock.
|
|
6178
5261
|
const task = await this.claimCompleteGate(taskId, ['ready', 'merge-ready']);
|
|
6179
5262
|
try {
|
|
6180
5263
|
const project = this.getProjectConfig(task.projectId);
|
|
6181
5264
|
const mergeAuto = project?.merge === 'auto';
|
|
6182
|
-
// Snapshot from verdict time — a hot config flip between publish and
|
|
6183
|
-
// confirm must not reroute an already-published artifact.
|
|
6184
5265
|
const afterDone = this.resolveAfterDone(task);
|
|
6185
5266
|
if (task.status === 'merge-ready') {
|
|
6186
5267
|
if (mergeAuto && task.prNumber) {
|
|
6187
|
-
// Guard on the post-approve head persisted at the merge-ready transition.
|
|
6188
5268
|
if (!task.latestHeadSha) {
|
|
6189
5269
|
throw new ApiError(409, `Task ${taskId} has no approved head recorded; cannot safely merge`);
|
|
6190
5270
|
}
|
|
@@ -6204,18 +5284,13 @@ export class AgentManager {
|
|
|
6204
5284
|
}
|
|
6205
5285
|
return this.finishTaskAsDone(taskId);
|
|
6206
5286
|
}
|
|
6207
|
-
// status === 'ready' (server mode)
|
|
6208
5287
|
if (afterDone === 'pr' && mergeAuto && task.prNumber) {
|
|
6209
|
-
// Reviewed-head guard is mandatory here — publish fail-closes on capture,
|
|
6210
|
-
// so a missing sha means tampered/legacy state, not a soft fallback.
|
|
6211
5288
|
if (!task.latestHeadSha) {
|
|
6212
5289
|
throw new ApiError(409, `Task ${taskId} has no reviewed head recorded; cannot safely merge`);
|
|
6213
5290
|
}
|
|
6214
5291
|
await this.executeConfirmMerge(task, () => this.mergePr(taskId, {
|
|
6215
5292
|
matchHeadSha: task.latestHeadSha,
|
|
6216
5293
|
}));
|
|
6217
|
-
// pr.merged's fromStatus now includes 'ready' — let the handler own the
|
|
6218
|
-
// merged transition + full cleanup chain (branch delete, /clear, release).
|
|
6219
5294
|
await this.eventBus.emit({
|
|
6220
5295
|
id: '',
|
|
6221
5296
|
type: 'pr.merged',
|
|
@@ -6240,8 +5315,6 @@ export class AgentManager {
|
|
|
6240
5315
|
this.markCompleteInFlight.delete(taskId);
|
|
6241
5316
|
}
|
|
6242
5317
|
}
|
|
6243
|
-
// Atomic gate claim: re-read + status check + markCompleteInFlight.add under
|
|
6244
|
-
// the task lock, so confirm and cancel serialize on the same snapshot.
|
|
6245
5318
|
async claimCompleteGate(taskId, statuses) {
|
|
6246
5319
|
return this.withTaskLock(async () => {
|
|
6247
5320
|
const fresh = await this.taskStore.get(taskId);
|
|
@@ -6257,9 +5330,6 @@ export class AgentManager {
|
|
|
6257
5330
|
return fresh;
|
|
6258
5331
|
});
|
|
6259
5332
|
}
|
|
6260
|
-
// Merge failures keep the gate: transient gh/network errors retry via another
|
|
6261
|
-
// Confirm, a stale head resolves via Cancel or an external decision — terminal
|
|
6262
|
-
// 'failed' would orphan the published PR/branch outside the task flow.
|
|
6263
5333
|
async executeConfirmMerge(task, merge) {
|
|
6264
5334
|
try {
|
|
6265
5335
|
await merge();
|
|
@@ -6289,8 +5359,6 @@ export class AgentManager {
|
|
|
6289
5359
|
await this.releaseTaskAgents(taskId);
|
|
6290
5360
|
return (await this.taskStore.get(taskId));
|
|
6291
5361
|
}
|
|
6292
|
-
// Terminal-state resource release shared by done/merged(branch)/failed confirm
|
|
6293
|
-
// paths: stop the watcher, release dev+qa (worktree removal rides releaseAgentForTask).
|
|
6294
5362
|
async releaseTaskAgents(taskId) {
|
|
6295
5363
|
this.phaseSignalWatcher?.stop(taskId);
|
|
6296
5364
|
const task = await this.taskStore.get(taskId);
|
|
@@ -6308,10 +5376,6 @@ export class AgentManager {
|
|
|
6308
5376
|
});
|
|
6309
5377
|
}
|
|
6310
5378
|
}
|
|
6311
|
-
// afterDone:'branch' + merge:'auto' — fast-forward the remote default branch
|
|
6312
|
-
// to the reviewed branch ref-to-ref (`git push origin origin/bx/X:main`).
|
|
6313
|
-
// Never touches the repo working tree, and a plain push is ff-only by default:
|
|
6314
|
-
// a non-ff base is rejected by the remote and a human must rebase/decide (spec §6).
|
|
6315
5379
|
repoMergeQueue = new Map();
|
|
6316
5380
|
async ffMergeBranch(task) {
|
|
6317
5381
|
const dev = this.getAgentConfig(task.agentId);
|
|
@@ -6329,16 +5393,12 @@ export class AgentManager {
|
|
|
6329
5393
|
const db = await runner.exec(`${cd}git symbolic-ref --short refs/remotes/origin/HEAD`);
|
|
6330
5394
|
const defaultBranch = db.stdout.trim().replace(/^origin\//, '');
|
|
6331
5395
|
if (db.exitCode !== 0 || defaultBranch === '') {
|
|
6332
|
-
// A silent 'main' fallback would push the reviewed branch onto the wrong
|
|
6333
|
-
// ref for repos whose default branch differs.
|
|
6334
5396
|
throw new Error(`ffMergeBranch: cannot resolve default branch: ${db.stderr.trim() || 'empty origin/HEAD'}`);
|
|
6335
5397
|
}
|
|
6336
5398
|
const fetch = await runner.exec(`${cd}git fetch origin`);
|
|
6337
5399
|
if (fetch.exitCode !== 0) {
|
|
6338
5400
|
throw new Error(`ffMergeBranch [git fetch] failed: ${fetch.stderr.trim()}`);
|
|
6339
5401
|
}
|
|
6340
|
-
// Reviewed-head guard (branch path): refuse if origin/<branch> moved after
|
|
6341
|
-
// the gate — symmetric with the pr path's --match-head-commit.
|
|
6342
5402
|
if (task.latestHeadSha) {
|
|
6343
5403
|
const remoteHead = await runner.exec(`${cd}git rev-parse ${shellQuote(`origin/${branch}`)}`);
|
|
6344
5404
|
if (remoteHead.exitCode !== 0 || remoteHead.stdout.trim() !== task.latestHeadSha) {
|
|
@@ -6353,8 +5413,6 @@ export class AgentManager {
|
|
|
6353
5413
|
if (push.exitCode !== 0) {
|
|
6354
5414
|
throw new Error(`ffMergeBranch [push] failed: ${push.stderr.trim() || push.stdout.trim()}`);
|
|
6355
5415
|
}
|
|
6356
|
-
// The merge has landed; branch deletion is cleanup — a transient failure
|
|
6357
|
-
// here must not flip an already-merged task to failed.
|
|
6358
5416
|
const del = await runner.exec(`${cd}git push origin --delete ${shellQuote(branch)}`);
|
|
6359
5417
|
if (del.exitCode !== 0) {
|
|
6360
5418
|
console.warn(`[AgentManager] ffMergeBranch: post-merge branch delete failed for ${branch}: ${del.stderr.trim() || del.stdout.trim()}`);
|