baxian 0.0.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +19 -1
  3. package/dist/agent/bootstrap-poller.d.ts +34 -0
  4. package/dist/agent/bootstrap-poller.d.ts.map +1 -0
  5. package/dist/agent/bootstrap-poller.js +93 -0
  6. package/dist/agent/bootstrap-poller.js.map +1 -0
  7. package/dist/agent/bootstrap.d.ts +39 -0
  8. package/dist/agent/bootstrap.d.ts.map +1 -0
  9. package/dist/agent/bootstrap.js +214 -0
  10. package/dist/agent/bootstrap.js.map +1 -0
  11. package/dist/agent/index.d.ts +8 -0
  12. package/dist/agent/index.d.ts.map +1 -0
  13. package/dist/agent/index.js +8 -0
  14. package/dist/agent/index.js.map +1 -0
  15. package/dist/agent/manager.d.ts +280 -0
  16. package/dist/agent/manager.d.ts.map +1 -0
  17. package/dist/agent/manager.js +3666 -0
  18. package/dist/agent/manager.js.map +1 -0
  19. package/dist/agent/marker-protocol.d.ts +12 -0
  20. package/dist/agent/marker-protocol.d.ts.map +1 -0
  21. package/dist/agent/marker-protocol.js +52 -0
  22. package/dist/agent/marker-protocol.js.map +1 -0
  23. package/dist/agent/pane-streamer-manager.d.ts +24 -0
  24. package/dist/agent/pane-streamer-manager.d.ts.map +1 -0
  25. package/dist/agent/pane-streamer-manager.js +107 -0
  26. package/dist/agent/pane-streamer-manager.js.map +1 -0
  27. package/dist/agent/pane-streamer.d.ts +97 -0
  28. package/dist/agent/pane-streamer.d.ts.map +1 -0
  29. package/dist/agent/pane-streamer.js +382 -0
  30. package/dist/agent/pane-streamer.js.map +1 -0
  31. package/dist/agent/post-approve-marker-watcher.d.ts +29 -0
  32. package/dist/agent/post-approve-marker-watcher.d.ts.map +1 -0
  33. package/dist/agent/post-approve-marker-watcher.js +160 -0
  34. package/dist/agent/post-approve-marker-watcher.js.map +1 -0
  35. package/dist/agent/preflight.d.ts +9 -0
  36. package/dist/agent/preflight.d.ts.map +1 -0
  37. package/dist/agent/preflight.js +164 -0
  38. package/dist/agent/preflight.js.map +1 -0
  39. package/dist/agent/prompt.d.ts +44 -0
  40. package/dist/agent/prompt.d.ts.map +1 -0
  41. package/dist/agent/prompt.js +252 -0
  42. package/dist/agent/prompt.js.map +1 -0
  43. package/dist/agent/repo-store.d.ts +27 -0
  44. package/dist/agent/repo-store.d.ts.map +1 -0
  45. package/dist/agent/repo-store.js +152 -0
  46. package/dist/agent/repo-store.js.map +1 -0
  47. package/dist/agent/runner.d.ts +46 -0
  48. package/dist/agent/runner.d.ts.map +1 -0
  49. package/dist/agent/runner.js +241 -0
  50. package/dist/agent/runner.js.map +1 -0
  51. package/dist/agent/spec-review-marker-watcher.d.ts +33 -0
  52. package/dist/agent/spec-review-marker-watcher.d.ts.map +1 -0
  53. package/dist/agent/spec-review-marker-watcher.js +180 -0
  54. package/dist/agent/spec-review-marker-watcher.js.map +1 -0
  55. package/dist/agent/tmux-probe-poller.d.ts +78 -0
  56. package/dist/agent/tmux-probe-poller.d.ts.map +1 -0
  57. package/dist/agent/tmux-probe-poller.js +418 -0
  58. package/dist/agent/tmux-probe-poller.js.map +1 -0
  59. package/dist/agent/tmux.d.ts +78 -0
  60. package/dist/agent/tmux.d.ts.map +1 -0
  61. package/dist/agent/tmux.js +395 -0
  62. package/dist/agent/tmux.js.map +1 -0
  63. package/dist/agent/worktree.d.ts +10 -0
  64. package/dist/agent/worktree.d.ts.map +1 -0
  65. package/dist/agent/worktree.js +41 -0
  66. package/dist/agent/worktree.js.map +1 -0
  67. package/dist/api/agents.d.ts +3 -0
  68. package/dist/api/agents.d.ts.map +1 -0
  69. package/dist/api/agents.js +29 -0
  70. package/dist/api/agents.js.map +1 -0
  71. package/dist/api/config.d.ts +5 -0
  72. package/dist/api/config.d.ts.map +1 -0
  73. package/dist/api/config.js +114 -0
  74. package/dist/api/config.js.map +1 -0
  75. package/dist/api/events.d.ts +3 -0
  76. package/dist/api/events.d.ts.map +1 -0
  77. package/dist/api/events.js +11 -0
  78. package/dist/api/events.js.map +1 -0
  79. package/dist/api/pollers.d.ts +3 -0
  80. package/dist/api/pollers.d.ts.map +1 -0
  81. package/dist/api/pollers.js +4 -0
  82. package/dist/api/pollers.js.map +1 -0
  83. package/dist/api/probe.d.ts +9 -0
  84. package/dist/api/probe.d.ts.map +1 -0
  85. package/dist/api/probe.js +87 -0
  86. package/dist/api/probe.js.map +1 -0
  87. package/dist/api/projects.d.ts +3 -0
  88. package/dist/api/projects.d.ts.map +1 -0
  89. package/dist/api/projects.js +602 -0
  90. package/dist/api/projects.js.map +1 -0
  91. package/dist/api/restart.d.ts +3 -0
  92. package/dist/api/restart.d.ts.map +1 -0
  93. package/dist/api/restart.js +20 -0
  94. package/dist/api/restart.js.map +1 -0
  95. package/dist/api/tasks.d.ts +3 -0
  96. package/dist/api/tasks.d.ts.map +1 -0
  97. package/dist/api/tasks.js +136 -0
  98. package/dist/api/tasks.js.map +1 -0
  99. package/dist/app.d.ts +51 -0
  100. package/dist/app.d.ts.map +1 -0
  101. package/dist/app.js +169 -0
  102. package/dist/app.js.map +1 -0
  103. package/dist/cli.d.ts +20 -0
  104. package/dist/cli.d.ts.map +1 -0
  105. package/dist/cli.js +319 -0
  106. package/dist/cli.js.map +1 -0
  107. package/dist/config/backup.d.ts +3 -0
  108. package/dist/config/backup.d.ts.map +1 -0
  109. package/dist/config/backup.js +44 -0
  110. package/dist/config/backup.js.map +1 -0
  111. package/dist/config/index.d.ts +5 -0
  112. package/dist/config/index.d.ts.map +1 -0
  113. package/dist/config/index.js +5 -0
  114. package/dist/config/index.js.map +1 -0
  115. package/dist/config/loader.d.ts +42 -0
  116. package/dist/config/loader.d.ts.map +1 -0
  117. package/dist/config/loader.js +197 -0
  118. package/dist/config/loader.js.map +1 -0
  119. package/dist/config/mutex.d.ts +2 -0
  120. package/dist/config/mutex.d.ts.map +1 -0
  121. package/dist/config/mutex.js +7 -0
  122. package/dist/config/mutex.js.map +1 -0
  123. package/dist/config/normalizer.d.ts +2 -0
  124. package/dist/config/normalizer.d.ts.map +1 -0
  125. package/dist/config/normalizer.js +42 -0
  126. package/dist/config/normalizer.js.map +1 -0
  127. package/dist/config/validator.d.ts +7 -0
  128. package/dist/config/validator.d.ts.map +1 -0
  129. package/dist/config/validator.js +278 -0
  130. package/dist/config/validator.js.map +1 -0
  131. package/dist/errors.d.ts +5 -0
  132. package/dist/errors.d.ts.map +1 -0
  133. package/dist/errors.js +9 -0
  134. package/dist/errors.js.map +1 -0
  135. package/dist/event/broker.d.ts +11 -0
  136. package/dist/event/broker.d.ts.map +1 -0
  137. package/dist/event/broker.js +47 -0
  138. package/dist/event/broker.js.map +1 -0
  139. package/dist/event/bus.d.ts +12 -0
  140. package/dist/event/bus.d.ts.map +1 -0
  141. package/dist/event/bus.js +35 -0
  142. package/dist/event/bus.js.map +1 -0
  143. package/dist/event/handlers.d.ts +6 -0
  144. package/dist/event/handlers.d.ts.map +1 -0
  145. package/dist/event/handlers.js +1121 -0
  146. package/dist/event/handlers.js.map +1 -0
  147. package/dist/event/index.d.ts +4 -0
  148. package/dist/event/index.d.ts.map +1 -0
  149. package/dist/event/index.js +4 -0
  150. package/dist/event/index.js.map +1 -0
  151. package/dist/event/log.d.ts +9 -0
  152. package/dist/event/log.d.ts.map +1 -0
  153. package/dist/event/log.js +43 -0
  154. package/dist/event/log.js.map +1 -0
  155. package/dist/event/publish.d.ts +37 -0
  156. package/dist/event/publish.d.ts.map +1 -0
  157. package/dist/event/publish.js +197 -0
  158. package/dist/event/publish.js.map +1 -0
  159. package/dist/event/ws.d.ts +3 -0
  160. package/dist/event/ws.d.ts.map +1 -0
  161. package/dist/event/ws.js +169 -0
  162. package/dist/event/ws.js.map +1 -0
  163. package/dist/github/index.d.ts +4 -0
  164. package/dist/github/index.d.ts.map +1 -0
  165. package/dist/github/index.js +3 -0
  166. package/dist/github/index.js.map +1 -0
  167. package/dist/github/mapper.d.ts +51 -0
  168. package/dist/github/mapper.d.ts.map +1 -0
  169. package/dist/github/mapper.js +191 -0
  170. package/dist/github/mapper.js.map +1 -0
  171. package/dist/github/poller.d.ts +64 -0
  172. package/dist/github/poller.d.ts.map +1 -0
  173. package/dist/github/poller.js +513 -0
  174. package/dist/github/poller.js.map +1 -0
  175. package/dist/github/resolver.d.ts +8 -0
  176. package/dist/github/resolver.d.ts.map +1 -0
  177. package/dist/github/resolver.js +24 -0
  178. package/dist/github/resolver.js.map +1 -0
  179. package/dist/index.d.ts +11 -0
  180. package/dist/index.d.ts.map +1 -0
  181. package/dist/index.js +310 -0
  182. package/dist/index.js.map +1 -0
  183. package/dist/lifecycle/restart-sentinel.d.ts +18 -0
  184. package/dist/lifecycle/restart-sentinel.d.ts.map +1 -0
  185. package/dist/lifecycle/restart-sentinel.js +61 -0
  186. package/dist/lifecycle/restart-sentinel.js.map +1 -0
  187. package/dist/lifecycle/restart.d.ts +21 -0
  188. package/dist/lifecycle/restart.d.ts.map +1 -0
  189. package/dist/lifecycle/restart.js +57 -0
  190. package/dist/lifecycle/restart.js.map +1 -0
  191. package/dist/shared/constants.d.ts +18 -0
  192. package/dist/shared/constants.d.ts.map +1 -0
  193. package/dist/shared/constants.js +60 -0
  194. package/dist/shared/constants.js.map +1 -0
  195. package/dist/shared/index.d.ts +3 -0
  196. package/dist/shared/index.d.ts.map +1 -0
  197. package/dist/shared/index.js +3 -0
  198. package/dist/shared/index.js.map +1 -0
  199. package/dist/shared/types.d.ts +263 -0
  200. package/dist/shared/types.d.ts.map +1 -0
  201. package/dist/shared/types.js +2 -0
  202. package/dist/shared/types.js.map +1 -0
  203. package/dist/skill/index.d.ts +2 -0
  204. package/dist/skill/index.d.ts.map +1 -0
  205. package/dist/skill/index.js +2 -0
  206. package/dist/skill/index.js.map +1 -0
  207. package/dist/skill/registry.d.ts +30 -0
  208. package/dist/skill/registry.d.ts.map +1 -0
  209. package/dist/skill/registry.js +174 -0
  210. package/dist/skill/registry.js.map +1 -0
  211. package/dist/skills/UPSTREAM.md +38 -0
  212. package/dist/skills/baxian-rules/SKILL.md +68 -0
  213. package/dist/skills/merge-sync/SKILL.md +42 -0
  214. package/dist/skills/pr-feedback/SKILL.md +117 -0
  215. package/dist/skills/pr-recheck/SKILL.md +52 -0
  216. package/dist/skills/pr-review/SKILL.md +60 -0
  217. package/dist/skills/spells/SKILL.md +41 -0
  218. package/dist/skills/task-check/SKILL.md +26 -0
  219. package/dist/state/agent-store.d.ts +21 -0
  220. package/dist/state/agent-store.d.ts.map +1 -0
  221. package/dist/state/agent-store.js +145 -0
  222. package/dist/state/agent-store.js.map +1 -0
  223. package/dist/state/error-record-store.d.ts +40 -0
  224. package/dist/state/error-record-store.d.ts.map +1 -0
  225. package/dist/state/error-record-store.js +203 -0
  226. package/dist/state/error-record-store.js.map +1 -0
  227. package/dist/state/index.d.ts +5 -0
  228. package/dist/state/index.d.ts.map +1 -0
  229. package/dist/state/index.js +5 -0
  230. package/dist/state/index.js.map +1 -0
  231. package/dist/state/init.d.ts +2 -0
  232. package/dist/state/init.d.ts.map +1 -0
  233. package/dist/state/init.js +13 -0
  234. package/dist/state/init.js.map +1 -0
  235. package/dist/state/lock.d.ts +9 -0
  236. package/dist/state/lock.d.ts.map +1 -0
  237. package/dist/state/lock.js +36 -0
  238. package/dist/state/lock.js.map +1 -0
  239. package/dist/state/post-approve-store.d.ts +23 -0
  240. package/dist/state/post-approve-store.d.ts.map +1 -0
  241. package/dist/state/post-approve-store.js +79 -0
  242. package/dist/state/post-approve-store.js.map +1 -0
  243. package/dist/state/process-lock.d.ts +24 -0
  244. package/dist/state/process-lock.d.ts.map +1 -0
  245. package/dist/state/process-lock.js +175 -0
  246. package/dist/state/process-lock.js.map +1 -0
  247. package/dist/state/snapshot.d.ts +38 -0
  248. package/dist/state/snapshot.d.ts.map +1 -0
  249. package/dist/state/snapshot.js +134 -0
  250. package/dist/state/snapshot.js.map +1 -0
  251. package/dist/state/task-store.d.ts +25 -0
  252. package/dist/state/task-store.d.ts.map +1 -0
  253. package/dist/state/task-store.js +167 -0
  254. package/dist/state/task-store.js.map +1 -0
  255. package/dist/terminal/attach.d.ts +7 -0
  256. package/dist/terminal/attach.d.ts.map +1 -0
  257. package/dist/terminal/attach.js +26 -0
  258. package/dist/terminal/attach.js.map +1 -0
  259. package/dist/terminal/index.d.ts +3 -0
  260. package/dist/terminal/index.d.ts.map +1 -0
  261. package/dist/terminal/index.js +3 -0
  262. package/dist/terminal/index.js.map +1 -0
  263. package/dist/terminal/key-sanitizer.d.ts +2 -0
  264. package/dist/terminal/key-sanitizer.d.ts.map +1 -0
  265. package/dist/terminal/key-sanitizer.js +9 -0
  266. package/dist/terminal/key-sanitizer.js.map +1 -0
  267. package/dist/terminal/stream-ws.d.ts +3 -0
  268. package/dist/terminal/stream-ws.d.ts.map +1 -0
  269. package/dist/terminal/stream-ws.js +426 -0
  270. package/dist/terminal/stream-ws.js.map +1 -0
  271. package/dist/terminal/ws-auth.d.ts +5 -0
  272. package/dist/terminal/ws-auth.d.ts.map +1 -0
  273. package/dist/terminal/ws-auth.js +45 -0
  274. package/dist/terminal/ws-auth.js.map +1 -0
  275. package/dist/timing/debounced-task.d.ts +9 -0
  276. package/dist/timing/debounced-task.d.ts.map +1 -0
  277. package/dist/timing/debounced-task.js +23 -0
  278. package/dist/timing/debounced-task.js.map +1 -0
  279. package/dist/timing/periodic-task-runner.d.ts +21 -0
  280. package/dist/timing/periodic-task-runner.d.ts.map +1 -0
  281. package/dist/timing/periodic-task-runner.js +61 -0
  282. package/dist/timing/periodic-task-runner.js.map +1 -0
  283. package/dist/web/assets/index-53CBbz4w.js +4 -0
  284. package/dist/web/assets/index-B9D6BV08.css +1 -0
  285. package/dist/web/assets/react-BG4Iuztk.js +40 -0
  286. package/dist/web/assets/router-B_Nv0oRz.js +12 -0
  287. package/dist/web/assets/xterm-CFbL2ovg.css +32 -0
  288. package/dist/web/assets/xterm-D5X2JljJ.js +9 -0
  289. package/dist/web/index.html +17 -0
  290. package/package.json +44 -5
  291. package/index.js +0 -1
@@ -0,0 +1,3666 @@
1
+ import { createMarkerToken } from './marker-protocol.js';
2
+ import { BRANCH_PREFIX, PHASE_EXPECTED_STATUS, PHASE_REQUIRES_AGENT_BOUND_TO_TASK, TASK_TERMINAL_STATUSES as TERMINAL_STATUSES, } from '../shared/index.js';
3
+ import { AGENT_STORE_NOOP } from '../state/agent-store.js';
4
+ import { PostApproveStore } from '../state/post-approve-store.js';
5
+ import { SkillRegistry } from '../skill/registry.js';
6
+ import { createRunner, LocalRunner, shellQuote } from './runner.js';
7
+ import { TmuxManager, ReplNotReadyError, detectStartupDialog, detectRuntimeMenu, detectReplActiveBusy, hasReplReadyAnchor, hasReplProcTitle, } from './tmux.js';
8
+ import { WorktreeManager } from './worktree.js';
9
+ import { RepoStore, createRepoStoreCache } from './repo-store.js';
10
+ import { PostApproveMarkerWatcher } from './post-approve-marker-watcher.js';
11
+ import { SpecReviewMarkerWatcher } from './spec-review-marker-watcher.js';
12
+ import { buildPromptInline, buildPostMergeCleanupPrompt, PromptSizeError, RequiredSkillsMissingError, MAX_PROMPT_BYTES_ROUTE_LIMIT, } from './prompt.js';
13
+ import { ApiError } from '../errors.js';
14
+ export class EnsureSessionError extends Error {
15
+ partial;
16
+ constructor(partial, message) {
17
+ super(message);
18
+ this.partial = partial;
19
+ this.name = 'EnsureSessionError';
20
+ }
21
+ }
22
+ export class CleanupFailedError extends Error {
23
+ failures;
24
+ constructor(message, failures) {
25
+ super(message);
26
+ this.failures = failures;
27
+ this.name = 'CleanupFailedError';
28
+ }
29
+ }
30
+ export class DispatchTransientError extends Error {
31
+ constructor(message) {
32
+ super(message);
33
+ this.name = 'DispatchTransientError';
34
+ }
35
+ }
36
+ export class DispatchTerminalError extends Error {
37
+ reason;
38
+ replDrained;
39
+ constructor(reason, message, replDrained = false) {
40
+ super(message);
41
+ this.reason = reason;
42
+ this.replDrained = replDrained;
43
+ this.name = 'DispatchTerminalError';
44
+ }
45
+ }
46
+ // shellQuote model/addDirs: spliced into a tmux command line; unquoted values allow injection.
47
+ export function buildLaunchCommand(agent) {
48
+ const segments = [];
49
+ switch (agent.runtime) {
50
+ case 'claude-code':
51
+ segments.push('env CLAUDE_CODE_NO_FLICKER=1 claude --permission-mode bypassPermissions');
52
+ break;
53
+ case 'codex':
54
+ segments.push('codex --dangerously-bypass-approvals-and-sandbox');
55
+ break;
56
+ }
57
+ if (agent.model) {
58
+ segments.push(`--model ${shellQuote(agent.model)}`);
59
+ }
60
+ if (agent.addDirs && agent.addDirs.length > 0) {
61
+ for (const dir of agent.addDirs) {
62
+ segments.push(`--add-dir ${shellQuote(dir)}`);
63
+ }
64
+ }
65
+ return segments.join(' ');
66
+ }
67
+ function agentRuntimeKindFor(agent) {
68
+ return agent.runtime;
69
+ }
70
+ const DEFAULT_DISPATCH_ACK_TIMEOUT_MS = 30_000;
71
+ const ACTIVE_TASK_STATUSES = new Set(['in_progress', 'review', 'fixing', 'approved']);
72
+ export function canDispatchWithBinding(binding) {
73
+ return !binding?.taskId && !binding?.creationToken && binding?.status !== 'awaiting_human';
74
+ }
75
+ // 部分 awaiting phase 表示 agent 这一轮 turn 已跑完,绑定是 stale 的——即使 task 不 terminal
76
+ // 也应 release 让 agent 被下一轮 acquire。outcome handler (review.submitted) 走 allowAwaitingHuman 即可。
77
+ //
78
+ // 当前集合为空:先前包含的 'dev-wait-gate-failed-after-qa-started' 和 'dispatch-failed:ack_unknown'
79
+ // 语义都是"QA prompt 已粘贴,可能仍在 pane 中跑"——任何在 outcome 到达前的 release(含 resumeAgent /
80
+ // recover 路径)都可能让第二个 prompt 派进同 pane 与旧 turn 混在一起。outcome handler 通过显式
81
+ // allowAwaitingHuman:true release,gate 单点放行。
82
+ const TURN_COMPLETED_AWAITING_PHASES = new Set();
83
+ // Resume / recover 共用:决定 Held agent 的 binding 是否随状态恢复一起清掉。
84
+ export function shouldReleaseHeldBinding(state, boundTask) {
85
+ const taskIsTerminal = !!boundTask && TERMINAL_STATUSES.includes(boundTask.status);
86
+ const turnCompleted = state.awaitingPhase != null && TURN_COMPLETED_AWAITING_PHASES.has(state.awaitingPhase);
87
+ return !boundTask || taskIsTerminal || turnCompleted;
88
+ }
89
+ // null 表示 context 已无效(换 task / 换 pane),调用方应回退到完整注入。
90
+ // 返回数组(含空数组)则代表当前 REPL session 已有的 skill 名集合,可作 excludeSkills 入参。
91
+ function reuseSkillsIfContextValid(state, taskId, paneId) {
92
+ const rec = state?.injectedSkills;
93
+ if (!rec)
94
+ return null;
95
+ if (rec.taskId !== taskId || rec.paneId !== paneId)
96
+ return null;
97
+ return rec.skills;
98
+ }
99
+ export class AgentManager {
100
+ config;
101
+ agentStore;
102
+ taskStore;
103
+ lockManager;
104
+ eventBus;
105
+ skillRegistry;
106
+ runnerFactory;
107
+ repoStoreFactory;
108
+ repoCache;
109
+ paneStreamerManager;
110
+ postApproveStore;
111
+ postApproveMarkerWatcher;
112
+ specReviewMarkerWatcher;
113
+ errorRecordStore;
114
+ dispatchAckTimeoutMs;
115
+ taskMutationQueue = Promise.resolve();
116
+ agentIndex;
117
+ platformRunner;
118
+ bootstrapTimeoutsMs;
119
+ warnedStaleQueueDispatch = new Set();
120
+ runtimeMenuWatchers = new Map();
121
+ runtimeMenuPollIntervalMs = 10_000;
122
+ compactIdleWaitMs = 5 * 60_000;
123
+ compactIdlePollMs = 2_000;
124
+ postMergeFetchTimeoutMs = 60_000;
125
+ postMergeBranchTimeoutMs = 10_000;
126
+ // taskIds with in-flight manual review — second concurrent POST gets 409.
127
+ manualReviewInFlight = new Set();
128
+ // agentIds with in-flight DELETE — 第二个 DELETE 撞 awaiting_human stale-lock takeover 路径会
129
+ // 把第一个 DELETE 持有的占位也当 stale 接管,导致并发 cleanupRemovedAgentRuntime。
130
+ deletionInFlight = new Set();
131
+ constructor(deps) {
132
+ this.config = deps.config;
133
+ this.agentStore = deps.agentStore;
134
+ this.taskStore = deps.taskStore;
135
+ this.lockManager = deps.lockManager;
136
+ this.eventBus = deps.eventBus;
137
+ this.skillRegistry = deps.skillRegistry ?? new SkillRegistry('');
138
+ this.runnerFactory = deps.runnerFactory;
139
+ this.repoStoreFactory = deps.repoStoreFactory;
140
+ this.paneStreamerManager = deps.paneStreamerManager;
141
+ this.postApproveStore = deps.postApproveStore ?? new PostApproveStore();
142
+ this.errorRecordStore = deps.errorRecordStore;
143
+ this.postApproveMarkerWatcher = deps.postApproveMarkerWatcher
144
+ ?? (deps.paneStreamerManager
145
+ ? new PostApproveMarkerWatcher({
146
+ paneStreamerManager: deps.paneStreamerManager,
147
+ eventBus: deps.eventBus,
148
+ resolveAgent: (id) => this.getAgentConfig(id),
149
+ })
150
+ : undefined);
151
+ this.specReviewMarkerWatcher = deps.specReviewMarkerWatcher
152
+ ?? (deps.paneStreamerManager
153
+ ? new SpecReviewMarkerWatcher({
154
+ paneStreamerManager: deps.paneStreamerManager,
155
+ eventBus: deps.eventBus,
156
+ resolveAgent: (id) => this.getAgentConfig(id),
157
+ })
158
+ : undefined);
159
+ this.dispatchAckTimeoutMs = deps.dispatchAckTimeoutMs ?? DEFAULT_DISPATCH_ACK_TIMEOUT_MS;
160
+ this.agentIndex = buildAgentIndex(deps.config);
161
+ this.platformRunner = deps.platformRunner ?? new LocalRunner();
162
+ this.repoCache = createRepoStoreCache();
163
+ this.bootstrapTimeoutsMs = {
164
+ trustDialog: deps.bootstrapTimeoutsMs?.trustDialog ?? 10_000,
165
+ waitReplReady: deps.bootstrapTimeoutsMs?.waitReplReady ?? 30_000,
166
+ };
167
+ }
168
+ withTaskLock(fn) {
169
+ const next = this.taskMutationQueue.then(fn);
170
+ this.taskMutationQueue = next.catch(() => undefined);
171
+ return next;
172
+ }
173
+ async safeEmit(event) {
174
+ try {
175
+ await this.eventBus.emit(event);
176
+ }
177
+ catch (err) {
178
+ console.warn(`[AgentManager] safeEmit ${event.type} failed (audit log loss; state machine unaffected):`, err);
179
+ }
180
+ }
181
+ async recordError(input) {
182
+ if (!this.errorRecordStore)
183
+ return;
184
+ try {
185
+ await this.errorRecordStore.append(input);
186
+ }
187
+ catch (err) {
188
+ console.warn(`[AgentManager] ErrorRecordStore.append failed (${input.operation}/${input.reason}):`, err);
189
+ }
190
+ }
191
+ replaceConfig(validated) {
192
+ this.config = validated;
193
+ this.agentIndex = buildAgentIndex(validated);
194
+ }
195
+ getAgentConfig(agentId) {
196
+ return this.agentIndex.get(agentId);
197
+ }
198
+ // DELETE phase1 (withConfigLock 内) 先调;返回冲突 id 表示另一 DELETE 已在跑此 agent,caller 应 409。
199
+ // 成功 claim 后所有出口(含 phase1 reply / phase2/3 完成 / rollback / throw)必须调
200
+ // releaseDeletionClaim 释放,否则 agent 永久卡在 "delete-in-flight" 状态。
201
+ tryClaimDeletion(agentIds) {
202
+ for (const id of agentIds) {
203
+ if (this.deletionInFlight.has(id))
204
+ return id;
205
+ }
206
+ for (const id of agentIds)
207
+ this.deletionInFlight.add(id);
208
+ return null;
209
+ }
210
+ releaseDeletionClaim(agentIds) {
211
+ for (const id of agentIds)
212
+ this.deletionInFlight.delete(id);
213
+ }
214
+ isDeletionInFlight(agentId) {
215
+ return this.deletionInFlight.has(agentId);
216
+ }
217
+ async ensureSession(agentId, mode) {
218
+ const agent = this.getAgentConfig(agentId);
219
+ if (!agent) {
220
+ throw new EnsureSessionError({ createdSession: false, agentId }, `Unknown agent: ${agentId}`);
221
+ }
222
+ const project = this.getProjectConfig(agent.projectId);
223
+ if (!project) {
224
+ throw new EnsureSessionError({ createdSession: false, agentId }, `Unknown project: ${agent.projectId}`);
225
+ }
226
+ const runner = this.createRunnerFor(agent);
227
+ const tmux = new TmuxManager(runner);
228
+ let workdir;
229
+ try {
230
+ const resolved = await this.ensureWorkdir(agent, project, runner);
231
+ workdir = resolved.workdir;
232
+ if (resolved.repoStore) {
233
+ await this.agentStore.update(agentId, (existing) => {
234
+ if (!existing)
235
+ return AGENT_STORE_NOOP;
236
+ return {
237
+ ...existing,
238
+ repoPath: workdir,
239
+ updatedAt: new Date().toISOString(),
240
+ };
241
+ });
242
+ }
243
+ }
244
+ catch (err) {
245
+ throw new EnsureSessionError({ createdSession: false, agentId }, `ensureWorkdir failed: ${err instanceof Error ? err.message : String(err)}`);
246
+ }
247
+ let alive;
248
+ try {
249
+ alive = await tmux.hasSession(agentId);
250
+ }
251
+ catch (err) {
252
+ throw new EnsureSessionError({ createdSession: false, agentId }, `tmux probe failed: ${err instanceof Error ? err.message : String(err)}`);
253
+ }
254
+ if (mode === 'create' && alive) {
255
+ throw new EnsureSessionError({ createdSession: false, agentId }, `tmux session "${agentId}" already exists on host; baxian only manages sessions ` +
256
+ `it creates itself — kill it manually or pick a different agent id`);
257
+ }
258
+ if (alive && (mode === 'runtime' || mode === 'recover')) {
259
+ return this.adoptOrRestartSession(tmux, agent, agentId, workdir);
260
+ }
261
+ return this.buildFreshSession(tmux, agent, agentId, workdir);
262
+ }
263
+ async pinRuntimeSessionOptions(tmux, agentId) {
264
+ await tmux.setOption(agentId, 'prefix', 'C-b');
265
+ await tmux.setOption(agentId, 'prefix2', 'None');
266
+ await tmux.setOption(agentId, 'mouse', 'on');
267
+ }
268
+ // New sessions start in latest mode; adopted sessions keep their current size owner.
269
+ async pinFreshSessionOptions(tmux, agentId) {
270
+ await tmux.setOption(agentId, 'window-size', 'latest');
271
+ await this.pinRuntimeSessionOptions(tmux, agentId);
272
+ }
273
+ async buildFreshSession(tmux, agent, agentId, workdir) {
274
+ let createdSession = false;
275
+ try {
276
+ await tmux.createSession(agentId, workdir);
277
+ // Mark BEFORE setOption — failure here must trigger caller's rollback.
278
+ createdSession = true;
279
+ await tmux.setOption(agentId, '@baxian-agent-id', agentId);
280
+ await tmux.setOption(agentId, '@baxian-runtime', agent.runtime);
281
+ await tmux.setOption(agentId, 'allow-passthrough', 'on');
282
+ await tmux.setOption(agentId, 'set-titles', 'on');
283
+ await this.pinFreshSessionOptions(tmux, agentId);
284
+ await tmux.setOption(agentId, 'status-right', '');
285
+ await tmux.setServerOption('extended-keys', 'on');
286
+ await tmux.appendServerOptionIfMissing('terminal-features', 'xterm*:extkeys');
287
+ const paneId = await tmux.getSinglePaneId(agentId);
288
+ await tmux.sendKeysToPane(paneId, `${buildLaunchCommand(agent)}\n`);
289
+ await tmux.handleTrustDialog(paneId, agentRuntimeKindFor(agent), {
290
+ timeoutMs: this.bootstrapTimeoutsMs.trustDialog,
291
+ });
292
+ await tmux.waitReplReady(paneId, agentRuntimeKindFor(agent), {
293
+ timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
294
+ scrollback: 0,
295
+ });
296
+ return { ok: true, createdSession: true, freshRuntime: true, paneId, workdir };
297
+ }
298
+ catch (err) {
299
+ const partial = { createdSession, agentId };
300
+ if (err instanceof ReplNotReadyError) {
301
+ partial.lastScreen = err.lastScreen;
302
+ if (createdSession && detectStartupDialog(err.lastScreen)) {
303
+ partial.dialogPending = true;
304
+ }
305
+ }
306
+ throw new EnsureSessionError(partial, `buildFreshSession failed: ${err instanceof Error ? err.message : String(err)}`);
307
+ }
308
+ }
309
+ // Never throws — failures land in agentStore.
310
+ async startBootstrapAsync(agentId, creationToken) {
311
+ const cfgAtStart = this.getAgentConfig(agentId);
312
+ if (!cfgAtStart) {
313
+ console.warn(`[bootstrap] ${agentId} not in config at bootstrap start — aborting`);
314
+ return;
315
+ }
316
+ const tryKillOrphanSession = async (reason) => {
317
+ try {
318
+ const runner = this.createRunnerFor(cfgAtStart);
319
+ await new TmuxManager(runner).killSession(agentId);
320
+ }
321
+ catch (cleanupErr) {
322
+ console.warn(`[bootstrap] orphan killSession (${reason}) failed for ${agentId}:`, cleanupErr);
323
+ }
324
+ };
325
+ try {
326
+ const result = await this.ensureSession(agentId, 'create');
327
+ let resolvedExisting = null;
328
+ const now = new Date().toISOString();
329
+ await this.agentStore.update(agentId, (existing) => {
330
+ if (!existing || existing.creationToken !== creationToken)
331
+ return AGENT_STORE_NOOP;
332
+ resolvedExisting = existing;
333
+ return {
334
+ ...existing,
335
+ paneId: result.paneId,
336
+ creationToken: undefined,
337
+ updatedAt: now,
338
+ };
339
+ });
340
+ if (!resolvedExisting) {
341
+ console.warn(`[bootstrap] ${agentId} creationToken mismatch on success — killing orphan session`);
342
+ await tryKillOrphanSession('post-success token mismatch');
343
+ return;
344
+ }
345
+ await this.safeEmit({
346
+ id: '',
347
+ type: 'agent.bootstrap_succeeded',
348
+ timestamp: now,
349
+ projectId: resolvedExisting.projectId,
350
+ agentId,
351
+ data: { paneId: result.paneId, phase: 'session' },
352
+ });
353
+ return;
354
+ }
355
+ catch (err) {
356
+ if (err instanceof EnsureSessionError && err.partial.dialogPending) {
357
+ if (err.partial.createdSession) {
358
+ const fresh = await this.agentStore.get(agentId);
359
+ if (!fresh || fresh.creationToken !== creationToken) {
360
+ console.warn(`[bootstrap] ${agentId} dialog-path token mismatch — killing orphan session`);
361
+ await tryKillOrphanSession('dialog-path token mismatch');
362
+ return;
363
+ }
364
+ }
365
+ await this.markDialogPending(agentId, creationToken);
366
+ void this.slowPollDialogPending(agentId, creationToken).catch((pollErr) => {
367
+ console.warn(`[bootstrap] slowPoll for ${agentId} crashed:`, pollErr);
368
+ });
369
+ return;
370
+ }
371
+ if (err instanceof EnsureSessionError && err.partial.createdSession) {
372
+ await tryKillOrphanSession('hard-failure rollback');
373
+ }
374
+ const message = err instanceof Error ? err.message : String(err);
375
+ await this.markBootstrapFailed(agentId, creationToken, message);
376
+ }
377
+ }
378
+ async markDialogPending(agentId, creationToken, opts = {}) {
379
+ const existing = await this.agentStore.get(agentId);
380
+ if (!existing)
381
+ return;
382
+ // runtime path snapshot 全空时直接拒绝——既无 paneId 也无 taskId 作 generation 证据,
383
+ // 旧 callback 通过 guard 污染同样 idle 的新 agent 的风险无法排除。
384
+ if (opts.runtimePath && opts.expectedPaneId === undefined && opts.expectedTaskId === undefined) {
385
+ console.warn(`[AgentManager] markDialogPending runtime path: refusing to write without paneId/taskId snapshot (no generation guard available for ${agentId})`);
386
+ return;
387
+ }
388
+ // Pre-check(early exit;下面 closure 内会再 atomic 校验一次)
389
+ if (opts.runtimePath) {
390
+ if (existing.creationToken !== undefined)
391
+ return;
392
+ if (opts.expectedPaneId !== undefined && existing.paneId !== opts.expectedPaneId)
393
+ return;
394
+ if (existing.taskId !== opts.expectedTaskId)
395
+ return;
396
+ }
397
+ else if (creationToken !== undefined && existing.creationToken !== creationToken) {
398
+ return;
399
+ }
400
+ const cfg = this.getAgentConfig(agentId);
401
+ let paneId = existing.paneId;
402
+ if (cfg && !paneId) {
403
+ try {
404
+ const runner = this.createRunnerFor(cfg);
405
+ paneId = await new TmuxManager(runner).getSinglePaneId(agentId);
406
+ }
407
+ catch {
408
+ // best-effort; slowPoll skips iterations without paneId
409
+ }
410
+ }
411
+ // 原子写入:guard + paneId + awaiting fields 一次性,避免 get→update 中间 race。
412
+ const now = new Date().toISOString();
413
+ let wrote = false;
414
+ let projectIdForEmit = '';
415
+ let taskIdForEmit;
416
+ await this.agentStore.update(agentId, (fresh) => {
417
+ if (!fresh)
418
+ return AGENT_STORE_NOOP;
419
+ if (opts.runtimePath) {
420
+ if (fresh.creationToken !== undefined)
421
+ return AGENT_STORE_NOOP;
422
+ if (opts.expectedPaneId !== undefined && fresh.paneId !== opts.expectedPaneId)
423
+ return AGENT_STORE_NOOP;
424
+ if (fresh.taskId !== opts.expectedTaskId)
425
+ return AGENT_STORE_NOOP;
426
+ }
427
+ else if (creationToken !== undefined && fresh.creationToken !== creationToken) {
428
+ return AGENT_STORE_NOOP;
429
+ }
430
+ projectIdForEmit = fresh.projectId;
431
+ taskIdForEmit = fresh.taskId;
432
+ wrote = true;
433
+ return {
434
+ ...fresh,
435
+ ...(paneId !== undefined ? { paneId } : {}),
436
+ status: 'awaiting_human',
437
+ awaitingPhase: 'agent_dialog_pending',
438
+ awaitingReason: 'Agent REPL launched but blocked on a startup dialog (e.g. CLI update notice). Operator should attach via web terminal and dismiss it; baxian will auto-detect ready and resume.',
439
+ awaitingSince: now,
440
+ updatedAt: now,
441
+ };
442
+ });
443
+ if (!wrote)
444
+ return;
445
+ await this.safeEmit({
446
+ id: '',
447
+ type: 'human.intervention',
448
+ timestamp: now,
449
+ projectId: projectIdForEmit,
450
+ agentId,
451
+ ...(taskIdForEmit ? { taskId: taskIdForEmit } : {}),
452
+ data: {
453
+ phase: 'agent_dialog_pending',
454
+ reason: 'Agent REPL launched but blocked on a startup dialog (e.g. CLI update notice). Operator should attach via web terminal and dismiss it; baxian will auto-detect ready and resume.',
455
+ },
456
+ });
457
+ }
458
+ async markBootstrapFailed(agentId, creationToken, errorMessage) {
459
+ const existing = await this.agentStore.get(agentId);
460
+ if (!existing)
461
+ return;
462
+ // generational guard: token mismatch means a newer create-recreate already won.
463
+ if (creationToken !== undefined && existing.creationToken !== creationToken)
464
+ return;
465
+ const now = new Date().toISOString();
466
+ await this.agentStore.update(agentId, (fresh) => {
467
+ if (!fresh)
468
+ return AGENT_STORE_NOOP;
469
+ if (creationToken !== undefined && fresh.creationToken !== creationToken)
470
+ return AGENT_STORE_NOOP;
471
+ return {
472
+ ...fresh,
473
+ paneId: undefined,
474
+ creationToken: undefined,
475
+ updatedAt: now,
476
+ };
477
+ });
478
+ await this.safeEmit({
479
+ id: '',
480
+ type: 'agent.bootstrap_failed',
481
+ timestamp: now,
482
+ projectId: existing.projectId,
483
+ agentId,
484
+ data: { error: errorMessage, phase: 'session' },
485
+ });
486
+ }
487
+ // Returns true when handled — caller skips its own kill cleanup.
488
+ // expectedFromStatuses: fail-task transition 的允许 fromStatus 集合。caller (startSession/continueSession)
489
+ // 已根据 phase + opts.dialogFailFromStatuses 计算好;并发 outcome 已把 task 推到此集合外的状态时
490
+ // transitionTaskStatus skip → 不覆盖已接受的 outcome。未传时退化为 [...ACTIVE_TASK_STATUSES] (retry
491
+ // endpoint 等无 phase 路径,且这些路径不绑 task,fail task 分支本就不进入)。
492
+ async handleDialogPendingFromRuntime(agentId, err, opts = {}) {
493
+ if (!(err instanceof EnsureSessionError) || !err.partial.dialogPending) {
494
+ return false;
495
+ }
496
+ let state = await this.agentStore.get(agentId);
497
+ if (!state)
498
+ return false;
499
+ // retry path(state.paneId 未写入但 ensureSession 刚 createSession):从 tmux 取 paneId
500
+ // 写入 state 作 generation 证据,否则 markDialogPending 的 snapshot 全空 refuse 会 no-op
501
+ // → return true 让 caller 释放锁 202 返回 → agent 留 idle 但 tmux dialog 在跑 → 下一个
502
+ // dispatch 撞进 dialog pane。tmux 探 paneId 失败时返回 false,让 caller 走 killSession 回滚。
503
+ if (state.paneId === undefined && err.partial.createdSession) {
504
+ const cfg = this.getAgentConfig(agentId);
505
+ if (!cfg)
506
+ return false;
507
+ let discoveredPaneId;
508
+ try {
509
+ const runner = this.createRunnerFor(cfg);
510
+ discoveredPaneId = await new TmuxManager(runner).getSinglePaneId(agentId);
511
+ }
512
+ catch (probeErr) {
513
+ console.warn(`[AgentManager] handleDialogPendingFromRuntime: tmux probe paneId failed for ${agentId}:`, probeErr);
514
+ return false;
515
+ }
516
+ if (!discoveredPaneId)
517
+ return false;
518
+ const probeNow = new Date().toISOString();
519
+ // 撤销 updatedAt guard(round-13 codex review):updatedAt 太宽,正常 background updates
520
+ // (repoPath refresh / poller bump 等) 也会触发假阳性让合法 retry dialog 路径误拒。
521
+ // race ("DELETE+recreate 后旧回调写新 agent") 在持锁路径下是 theoretical (retry endpoint 持锁
522
+ // 全程到 handleDialogPendingFromRuntime 返回;startSession/continueSession 由 acquireAgentForTask
523
+ // 持锁),且 `fresh.paneId !== undefined` 已挡住新 agent 已写 paneId 的情况。
524
+ await this.agentStore.update(agentId, (fresh) => {
525
+ if (!fresh)
526
+ return AGENT_STORE_NOOP;
527
+ if (fresh.paneId !== undefined)
528
+ return AGENT_STORE_NOOP;
529
+ return { ...fresh, paneId: discoveredPaneId, updatedAt: probeNow };
530
+ });
531
+ state = await this.agentStore.get(agentId);
532
+ if (!state?.paneId)
533
+ return false;
534
+ }
535
+ if (state.paneId === undefined && state.taskId === undefined) {
536
+ console.warn(`[AgentManager] handleDialogPendingFromRuntime: ${agentId} has no paneId/taskId snapshot (no generation guard); refusing — caller should rollback`);
537
+ return false;
538
+ }
539
+ // runtime path: 显式 guard,不传 state.creationToken(race window 内可能已是新 generation)。
540
+ // 同时 snapshot paneId / taskId 作 atomic check,挡住"DELETE+recreate + 新 bootstrap 完成"的 race。
541
+ await this.markDialogPending(agentId, undefined, {
542
+ runtimePath: true,
543
+ ...(state.paneId !== undefined ? { expectedPaneId: state.paneId } : {}),
544
+ expectedTaskId: state.taskId,
545
+ });
546
+ // runtime path (agent 已绑 active task + 无 creationToken):dialog 在 ensureSession 阶段抛错,
547
+ // prompt 还没 inject——直接 fail task 让 UI Retry 通路打开(无工作丢失)。
548
+ // 若不 fail:task 卡 in_progress、agent Held、operator Resume 后仍无人重发 prompt(owner 评审 #6
549
+ // 指出 transitionToCodePhase 会死锁)。task fail 后 Resume / recover 走 terminal-release 路径。
550
+ // 用 transitionTaskStatus(内含 withTaskLock + fromStatus guard)避免与 Cancel / merge /
551
+ // review outcome 等并发 mutation race,否则 stale 'failed' 会覆盖已经到达的 terminal 状态。
552
+ if (state.taskId && state.creationToken === undefined) {
553
+ // fromStatus 来自 caller 显式计算:startSession/continueSession 用 opts.dialogFailFromStatuses ??
554
+ // PHASE_EXPECTED_STATUS[phase],dispatchReviewToQa 走 bypassTaskStatusGate 时显式传 [taskStatusAtClaim]
555
+ // (manual review 入口可能是 approved/fixing/in_progress,但 phase='review' 的 default 只接受 'review' →
556
+ // 不传就 skip → task 卡 active 死锁,见 owner round-15 评审)。
557
+ const expectedFromStatuses = opts.expectedFromStatuses ?? [...ACTIVE_TASK_STATUSES];
558
+ const transitioned = await this.transitionTaskStatus(state.taskId, 'failed', { fromStatus: expectedFromStatuses });
559
+ if (transitioned) {
560
+ await this.safeEmit({
561
+ id: '',
562
+ type: 'task.updated',
563
+ timestamp: new Date().toISOString(),
564
+ projectId: transitioned.task.projectId,
565
+ taskId: state.taskId,
566
+ data: { status: 'failed', reason: 'agent_dialog_pending_runtime' },
567
+ });
568
+ // 同步释放 partner agent 的 binding。task 已 terminal 不会再走 cancel 清理,否则 partner
569
+ // 永远指向 terminal task → retryTask 走 validateTaskDispatch 看 dev 仍 bound → 409。
570
+ await this.releasePartnersAndDrain(agentId, [state.taskId], [transitioned.task.projectId]);
571
+ }
572
+ }
573
+ // 通知 caller (startSession / continueSession catch) 不要再调 releaseAgentForTask 清理——
574
+ // task 已 terminal + agent Held 时,shouldReleaseHeldBinding 第一条规则会放行 release,
575
+ // 把仍卡 dialog 的 pane 解锁让下个 dispatch 派进来。set partial.handled 让 caller 跳过 release。
576
+ err.partial.handled = true;
577
+ // slowPoll 是 fire-and-forget,在 caller 释放锁后继续运行。runtime path 下 creationToken=undefined
578
+ // 不足以挡 DELETE+recreate 后旧 poll 撞新 agent(新 agent ack_unknown/dev-wait-gate-failed 时
579
+ // creationToken 也 undefined)→ 旧 poll 会把新 agent phase 覆为 resolved_runtime → Resume 不再拒。
580
+ // 传入当前 paneId/taskId snapshot 作 generation 证据,atomic update 校验匹配才写。
581
+ const snapshotPaneId = state.paneId;
582
+ const snapshotTaskId = state.taskId;
583
+ void this.slowPollDialogPending(agentId, state.creationToken, {
584
+ ...(snapshotPaneId !== undefined ? { expectedPaneId: snapshotPaneId } : {}),
585
+ expectedTaskId: snapshotTaskId,
586
+ }).catch((pollErr) => {
587
+ console.warn(`[runtime] slowPoll for ${agentId} crashed:`, pollErr);
588
+ });
589
+ return true;
590
+ }
591
+ async waitForBootstrapSettled(agentId, timeoutMs = 5_000) {
592
+ const deadline = Date.now() + timeoutMs;
593
+ while (Date.now() < deadline) {
594
+ const state = await this.agentStore.get(agentId);
595
+ if (!state)
596
+ return;
597
+ if (!state.creationToken)
598
+ return;
599
+ await new Promise(r => setTimeout(r, 25));
600
+ }
601
+ throw new Error(`waitForBootstrapSettled(${agentId}) timed out after ${timeoutMs}ms`);
602
+ }
603
+ // 无硬上限——配合 markDialogPending 的 human.intervention emit 让 operator 来;
604
+ // DELETE/recreate 通过 creationToken 失配让循环自然退出。
605
+ // runtime path(creationToken=undefined)下旧 poll 会撞 DELETE+recreate 后的新 agent(也无 token),
606
+ // 需要 opts.expectedPaneId/expectedTaskId 作 generation 证据,loop top + atomic update 双重校验。
607
+ async slowPollDialogPending(agentId, creationToken, opts = {}) {
608
+ const POLL_INTERVAL_MS = 5_000;
609
+ const cfg = this.getAgentConfig(agentId);
610
+ if (!cfg)
611
+ return;
612
+ const runner = this.createRunnerFor(cfg);
613
+ const tmux = new TmuxManager(runner);
614
+ const runtime = agentRuntimeKindFor(cfg);
615
+ const generationMismatch = (state) => {
616
+ if (state.creationToken !== creationToken)
617
+ return true;
618
+ // runtime path: 校验 paneId/taskId snapshot 匹配
619
+ if (creationToken === undefined) {
620
+ if (opts.expectedPaneId !== undefined && state.paneId !== opts.expectedPaneId)
621
+ return true;
622
+ if (state.taskId !== opts.expectedTaskId)
623
+ return true;
624
+ }
625
+ return false;
626
+ };
627
+ while (true) {
628
+ await new Promise(r => setTimeout(r, POLL_INTERVAL_MS));
629
+ const state = await this.agentStore.get(agentId);
630
+ if (!state)
631
+ return;
632
+ if (generationMismatch(state))
633
+ return;
634
+ let paneId = state.paneId;
635
+ if (!paneId) {
636
+ try {
637
+ paneId = await tmux.getSinglePaneId(agentId);
638
+ const discoveredPaneId = paneId;
639
+ await this.agentStore.update(agentId, (s) => {
640
+ if (!s)
641
+ return AGENT_STORE_NOOP;
642
+ if (generationMismatch(s))
643
+ return AGENT_STORE_NOOP;
644
+ return {
645
+ ...s,
646
+ paneId: discoveredPaneId,
647
+ updatedAt: new Date().toISOString(),
648
+ };
649
+ });
650
+ }
651
+ catch {
652
+ continue;
653
+ }
654
+ }
655
+ try {
656
+ await tmux.waitReplReady(paneId, runtime, {
657
+ timeoutMs: 1_000,
658
+ intervalMs: 200,
659
+ scrollback: 0,
660
+ });
661
+ }
662
+ catch {
663
+ continue;
664
+ }
665
+ const preFresh = await this.agentStore.get(agentId);
666
+ if (!preFresh)
667
+ return;
668
+ if (generationMismatch(preFresh))
669
+ return;
670
+ const now = new Date().toISOString();
671
+ let projectIdForEmit = '';
672
+ let wrote = false;
673
+ // bootstrap path: creationToken set,agent 未绑 task;ready 后自动清 Held(无需 operator)。
674
+ // runtime path: creationToken undefined,agent 仍绑 task(已被 handleDialogPendingFromRuntime
675
+ // 推 failed)+ lock 在;ready 后切到 'agent_dialog_resolved_runtime' phase,让 resumeAgent 放行
676
+ // 让 operator 显式确认。仍保留 awaiting_human + lock 防止"dialog ready 自动派下一 task 撞 pane"。
677
+ const isBootstrapPath = creationToken !== undefined;
678
+ await this.agentStore.update(agentId, (fresh) => {
679
+ if (!fresh)
680
+ return AGENT_STORE_NOOP;
681
+ if (generationMismatch(fresh))
682
+ return AGENT_STORE_NOOP;
683
+ projectIdForEmit = fresh.projectId;
684
+ wrote = true;
685
+ if (isBootstrapPath) {
686
+ return {
687
+ ...fresh,
688
+ creationToken: undefined,
689
+ status: 'ok',
690
+ awaitingPhase: undefined,
691
+ awaitingReason: undefined,
692
+ awaitingSince: undefined,
693
+ updatedAt: now,
694
+ };
695
+ }
696
+ return {
697
+ ...fresh,
698
+ awaitingPhase: 'agent_dialog_resolved_runtime',
699
+ awaitingReason: 'Runtime dialog resolved; agent REPL ready. Click Resume to release the binding and let baxian pick the next task.',
700
+ updatedAt: now,
701
+ };
702
+ });
703
+ if (!wrote)
704
+ return;
705
+ if (isBootstrapPath) {
706
+ await this.safeEmit({
707
+ id: '',
708
+ type: 'agent.bootstrap_succeeded',
709
+ timestamp: now,
710
+ projectId: projectIdForEmit,
711
+ agentId,
712
+ data: { paneId, phase: 'session_dialog_resolved' },
713
+ });
714
+ }
715
+ else {
716
+ // runtime dialog 解决,phase 切到 resolved_runtime;emit 通知 operator 现在可以 Resume。
717
+ await this.safeEmit({
718
+ id: '',
719
+ type: 'human.intervention',
720
+ timestamp: now,
721
+ projectId: projectIdForEmit,
722
+ agentId,
723
+ data: {
724
+ phase: 'agent_dialog_resolved_runtime',
725
+ note: 'Runtime dialog resolved; agent REPL ready. Click Resume to continue.',
726
+ },
727
+ });
728
+ }
729
+ return;
730
+ }
731
+ }
732
+ // Idempotent; self-exits when taskId clears, so callers never need a paired stop.
733
+ startRuntimeMenuWatch(agentId) {
734
+ if (this.runtimeMenuWatchers.has(agentId))
735
+ return;
736
+ const controller = new AbortController();
737
+ this.runtimeMenuWatchers.set(agentId, controller);
738
+ void this.runtimeMenuWatchLoop(agentId, controller.signal)
739
+ .catch((err) => {
740
+ console.warn(`[runtimeMenuWatch] ${agentId} loop crashed:`, err);
741
+ })
742
+ .finally(() => {
743
+ const current = this.runtimeMenuWatchers.get(agentId);
744
+ if (current === controller)
745
+ this.runtimeMenuWatchers.delete(agentId);
746
+ });
747
+ }
748
+ stopRuntimeMenuWatch(agentId) {
749
+ const c = this.runtimeMenuWatchers.get(agentId);
750
+ if (c)
751
+ c.abort();
752
+ }
753
+ async runtimeMenuWatchLoop(agentId, signal) {
754
+ const cfg = this.getAgentConfig(agentId);
755
+ if (!cfg)
756
+ return;
757
+ const tmux = new TmuxManager(this.createRunnerFor(cfg));
758
+ let pendingMenu = false;
759
+ while (!signal.aborted) {
760
+ await this.sleep(this.runtimeMenuPollIntervalMs, signal);
761
+ if (signal.aborted)
762
+ return;
763
+ const state = await this.agentStore.get(agentId);
764
+ if (!state)
765
+ return;
766
+ if (!state.taskId) {
767
+ return;
768
+ }
769
+ if (!state.paneId)
770
+ continue;
771
+ let stripped;
772
+ try {
773
+ stripped = await tmux.capturePaneById(state.paneId, { ansi: false, scrollback: 0 });
774
+ }
775
+ catch {
776
+ continue;
777
+ }
778
+ // Re-fetch after the async capture; release/reassign may have rewritten state.
779
+ const fresh = await this.agentStore.get(agentId);
780
+ if (!fresh)
781
+ return;
782
+ if (!fresh.taskId) {
783
+ return;
784
+ }
785
+ if (fresh.taskId !== state.taskId ||
786
+ fresh.paneId !== state.paneId) {
787
+ continue;
788
+ }
789
+ const onMenu = detectRuntimeMenu(stripped);
790
+ if (onMenu && !pendingMenu) {
791
+ pendingMenu = true;
792
+ const now = new Date().toISOString();
793
+ await this.safeEmit({
794
+ id: '',
795
+ type: 'human.intervention',
796
+ timestamp: now,
797
+ projectId: fresh.projectId,
798
+ agentId,
799
+ taskId: fresh.taskId,
800
+ data: {
801
+ phase: 'agent_runtime_menu_pending',
802
+ note: 'Agent paused on an interactive menu mid-task. Attach via web terminal and respond; baxian will auto-clear once the menu closes.',
803
+ },
804
+ });
805
+ }
806
+ else if (!onMenu && pendingMenu) {
807
+ pendingMenu = false;
808
+ }
809
+ }
810
+ }
811
+ sleep(ms, signal) {
812
+ return new Promise((resolve) => {
813
+ const timer = setTimeout(() => {
814
+ signal.removeEventListener('abort', onAbort);
815
+ resolve();
816
+ }, ms);
817
+ const onAbort = () => {
818
+ clearTimeout(timer);
819
+ resolve();
820
+ };
821
+ signal.addEventListener('abort', onAbort, { once: true });
822
+ });
823
+ }
824
+ async adoptOrRestartSession(tmux, agent, agentId, workdir) {
825
+ let marker;
826
+ try {
827
+ marker = await tmux.getOption(agentId, '@baxian-agent-id');
828
+ }
829
+ catch (err) {
830
+ throw new EnsureSessionError({ createdSession: false, agentId }, `getOption(@baxian-agent-id) failed: ${err instanceof Error ? err.message : String(err)}`);
831
+ }
832
+ if (marker !== agentId) {
833
+ throw new EnsureSessionError({ createdSession: false, agentId }, `tmux session "${agentId}" exists but @baxian-agent-id marker mismatch ` +
834
+ `(got "${marker ?? 'null'}"); baxian will not adopt foreign session — operator must intervene`);
835
+ }
836
+ try {
837
+ await this.pinRuntimeSessionOptions(tmux, agentId);
838
+ }
839
+ catch (err) {
840
+ throw new EnsureSessionError({ createdSession: false, agentId }, `pinRuntimeSessionOptions failed: ${err instanceof Error ? err.message : String(err)}`);
841
+ }
842
+ let paneId;
843
+ try {
844
+ paneId = await tmux.getSinglePaneId(agentId);
845
+ }
846
+ catch (err) {
847
+ throw new EnsureSessionError({ createdSession: false, agentId }, `getSinglePaneId failed: ${err instanceof Error ? err.message : String(err)}`);
848
+ }
849
+ const runtime = agentRuntimeKindFor(agent);
850
+ let state;
851
+ try {
852
+ state = await tmux.classifyPaneForAdopt(paneId, runtime);
853
+ }
854
+ catch (err) {
855
+ throw new EnsureSessionError({ createdSession: false, agentId }, `classifyPaneForAdopt failed: ${err instanceof Error ? err.message : String(err)}`);
856
+ }
857
+ switch (state.kind) {
858
+ case 'live-runtime':
859
+ // 复用既有 REPL,上下文未中断——dedup 仍可沿用。
860
+ return { ok: true, createdSession: false, freshRuntime: false, paneId, workdir };
861
+ case 'startup-dialog':
862
+ throw new EnsureSessionError({
863
+ createdSession: false,
864
+ agentId,
865
+ dialogPending: true,
866
+ lastScreen: state.lastScreen,
867
+ }, `adoptOrRestartSession: REPL blocked on startup dialog`);
868
+ case 'other':
869
+ // Refuse send-keys — would land as input inside vim/make/etc instead of spawning REPL.
870
+ throw new EnsureSessionError({ createdSession: false, agentId }, `pane foreground "${state.paneCurrentCommand}" is neither runtime ` +
871
+ `(${runtime}) nor shell; refusing to send launch keys — operator ` +
872
+ `must reset the pane manually`);
873
+ case 'trust-dialog':
874
+ try {
875
+ await tmux.handleTrustDialog(paneId, runtime, {
876
+ timeoutMs: this.bootstrapTimeoutsMs.trustDialog,
877
+ });
878
+ await tmux.waitReplReady(paneId, runtime, {
879
+ timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
880
+ // scrollback>0 risks matching a stale ready anchor from before trust prompt.
881
+ scrollback: 0,
882
+ });
883
+ // 信任弹窗刚被答完,REPL 从启动态进入可用——上下文是新的。
884
+ return { ok: true, createdSession: false, freshRuntime: true, paneId, workdir };
885
+ }
886
+ catch (trustErr) {
887
+ if (trustErr instanceof ReplNotReadyError && detectStartupDialog(trustErr.lastScreen)) {
888
+ throw new EnsureSessionError({
889
+ createdSession: false,
890
+ agentId,
891
+ dialogPending: true,
892
+ lastScreen: trustErr.lastScreen,
893
+ }, `adoptOrRestartSession: REPL blocked on startup dialog after trust auto-answer`);
894
+ }
895
+ throw new EnsureSessionError({ createdSession: false, agentId }, `adoptOrRestartSession: trust dialog handling failed: ` +
896
+ `${trustErr instanceof Error ? trustErr.message : String(trustErr)}`);
897
+ }
898
+ case 'shell':
899
+ break;
900
+ }
901
+ try {
902
+ await tmux.sendKeysToPane(paneId, `${buildLaunchCommand(agent)}\n`);
903
+ await tmux.handleTrustDialog(paneId, runtime, {
904
+ timeoutMs: this.bootstrapTimeoutsMs.trustDialog,
905
+ });
906
+ await tmux.waitReplReady(paneId, runtime, {
907
+ timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
908
+ scrollback: 0,
909
+ });
910
+ // shell 路径:在原 pane 里重新启动了 REPL,新进程没有旧 prompt 上下文。
911
+ return { ok: true, createdSession: false, freshRuntime: true, paneId, workdir };
912
+ }
913
+ catch (relErr) {
914
+ if (relErr instanceof ReplNotReadyError && detectStartupDialog(relErr.lastScreen)) {
915
+ throw new EnsureSessionError({
916
+ createdSession: false,
917
+ agentId,
918
+ dialogPending: true,
919
+ lastScreen: relErr.lastScreen,
920
+ }, `adoptOrRestartSession: REPL relaunch blocked on startup dialog`);
921
+ }
922
+ throw new EnsureSessionError({ createdSession: false, agentId }, `REPL relaunch failed: ${relErr instanceof Error ? relErr.message : String(relErr)}`);
923
+ }
924
+ }
925
+ async acquireAgentForTask(agentId, taskId, phase) {
926
+ const cfg = this.getAgentConfig(agentId);
927
+ if (!cfg)
928
+ throw new Error(`Unknown agent: ${agentId}`);
929
+ const state = await this.agentStore.get(agentId);
930
+ const sameTaskLocked = state?.taskId === taskId && (await this.lockManager.isLocked(agentId));
931
+ const reentryPhases = new Set(['fix', 'post_approve', 'spec-fix', 'code']);
932
+ const sameTaskReentry = state?.taskId === taskId &&
933
+ !state.creationToken &&
934
+ state.status !== 'awaiting_human' &&
935
+ reentryPhases.has(phase);
936
+ const reuseLock = sameTaskLocked && reentryPhases.has(phase);
937
+ if (!sameTaskReentry && !canDispatchWithBinding(state)) {
938
+ return false;
939
+ }
940
+ if (!reuseLock) {
941
+ const ok = await this.lockManager.acquire(agentId);
942
+ if (!ok)
943
+ return false;
944
+ }
945
+ const now = new Date().toISOString();
946
+ await this.agentStore.update(agentId, (existing) => ({
947
+ ...(existing ?? { id: agentId, projectId: cfg.projectId, updatedAt: now }),
948
+ id: agentId,
949
+ projectId: cfg.projectId,
950
+ taskId,
951
+ updatedAt: now,
952
+ }));
953
+ return true;
954
+ }
955
+ // waiting: dev keeps lock across review/fix. idle: terminal release.
956
+ // 纯状态更新——REPL 是否 ready 不在此处守门,dispatch 路径自己处理就绪问题。
957
+ // awaiting_human 状态拒释:避免上游 catch(如 EnsureSessionError(dialogPending) 的 generic
958
+ // fallback)撕掉 markAwaitingHuman 已标的 await 标记。resumeAgent 用 allowAwaitingHuman 接管。
959
+ async releaseAgentForTask(agentId, expectedTaskId, mode, opts = {}) {
960
+ return this.withTaskLock(async () => {
961
+ const state = await this.agentStore.get(agentId);
962
+ if (!state)
963
+ return false;
964
+ if (state.taskId !== expectedTaskId) {
965
+ console.warn(`[AgentManager] releaseAgentForTask: agent ${agentId} taskId mismatch ` +
966
+ `(expected ${expectedTaskId}, got ${state.taskId}); skipping`);
967
+ return false;
968
+ }
969
+ const boundTask = await this.taskStore.get(expectedTaskId);
970
+ if (state.status === 'awaiting_human' && !opts.allowAwaitingHuman) {
971
+ // gate 例外:bound task 已 terminal / turn-completed phase 都属于正常 cleanup 路径,
972
+ // 必须能清绑定,否则 stale binding 永久指向终态 task → 后续 acquire 全卡。
973
+ // shouldReleaseHeldBinding 和 Resume 共享同一规则。
974
+ if (!shouldReleaseHeldBinding(state, boundTask)) {
975
+ console.warn(`[AgentManager] releaseAgentForTask: agent ${agentId} is awaiting_human (${state.awaitingPhase}); refusing to release`);
976
+ return false;
977
+ }
978
+ }
979
+ if (mode === 'waiting' && (!boundTask || !ACTIVE_TASK_STATUSES.has(boundTask.status))) {
980
+ console.warn(`[AgentManager] releaseAgentForTask: task ${expectedTaskId} is not active; skipping waiting transition`);
981
+ return false;
982
+ }
983
+ const cfg = this.getAgentConfig(agentId);
984
+ if (!cfg)
985
+ return false;
986
+ const now = new Date().toISOString();
987
+ if (mode === 'waiting') {
988
+ await this.agentStore.update(agentId, (latest) => {
989
+ if (!latest)
990
+ return AGENT_STORE_NOOP;
991
+ // clearAwaitingHuman: restart-repl/retry 显式 operator op 已确认 REPL 重启,前面的
992
+ // ack_unknown/dialog_pending Held 不再成立——清掉 awaiting_human 字段让 agent 可派遣。
993
+ if (opts.clearAwaitingHuman && latest.status === 'awaiting_human') {
994
+ const cleared = {
995
+ id: latest.id,
996
+ projectId: latest.projectId,
997
+ updatedAt: now,
998
+ ...(latest.taskId !== undefined ? { taskId: latest.taskId } : {}),
999
+ ...(latest.paneId !== undefined ? { paneId: latest.paneId } : {}),
1000
+ ...(latest.repoPath !== undefined ? { repoPath: latest.repoPath } : {}),
1001
+ ...(latest.worktreePath !== undefined ? { worktreePath: latest.worktreePath } : {}),
1002
+ ...(latest.startedAt !== undefined ? { startedAt: latest.startedAt } : {}),
1003
+ ...(latest.creationToken !== undefined ? { creationToken: latest.creationToken } : {}),
1004
+ };
1005
+ return cleared;
1006
+ }
1007
+ return {
1008
+ ...latest,
1009
+ updatedAt: now,
1010
+ };
1011
+ });
1012
+ return true;
1013
+ }
1014
+ if (state.worktreePath) {
1015
+ const cleanupDir = this.resolveWorkdir(cfg, state);
1016
+ if (cleanupDir) {
1017
+ const runner = this.createRunnerFor(cfg);
1018
+ const worktree = new WorktreeManager(runner);
1019
+ try {
1020
+ await worktree.remove(cleanupDir, state.worktreePath);
1021
+ }
1022
+ catch (err) {
1023
+ console.warn(`[AgentManager] releaseAgentForTask worktree.remove failed for ${state.worktreePath}:`, err);
1024
+ }
1025
+ }
1026
+ }
1027
+ await this.agentStore.update(agentId, (existing) => {
1028
+ if (!existing)
1029
+ return AGENT_STORE_NOOP;
1030
+ if (existing.taskId !== expectedTaskId)
1031
+ return AGENT_STORE_NOOP;
1032
+ return {
1033
+ id: existing.id,
1034
+ projectId: existing.projectId,
1035
+ ...(existing.repoPath !== undefined ? { repoPath: existing.repoPath } : {}),
1036
+ ...(existing.paneId !== undefined ? { paneId: existing.paneId } : {}),
1037
+ ...(existing.creationToken !== undefined ? { creationToken: existing.creationToken } : {}),
1038
+ updatedAt: now,
1039
+ };
1040
+ });
1041
+ await this.lockManager.release(agentId);
1042
+ return true;
1043
+ });
1044
+ }
1045
+ // baxian 把 agent 标为"自动调度走不通,等 operator 显式 resume"。
1046
+ // 唯一禁区入口:cancel C-c 失败 / dispatch ack_unknown / dialog 卡住等场景。
1047
+ // 保留绑定 + 锁,靠 canDispatchWithBinding 的 status 检查拦住自动派遣。
1048
+ // generation guard 防 DELETE+recreate race:
1049
+ // - expectedCreationToken: 'tok' → store 当前 token 必须等于 'tok'
1050
+ // - expectedCreationToken: null → store 当前必须仍 *无* token(runtime path 用)
1051
+ // - 不传 → 不校验 generation
1052
+ async markAwaitingHuman(agentId, phase, reason, opts = {}) {
1053
+ const now = new Date().toISOString();
1054
+ let projectId = '';
1055
+ let taskId;
1056
+ let wrote = false;
1057
+ await this.agentStore.update(agentId, (existing) => {
1058
+ if (!existing)
1059
+ return AGENT_STORE_NOOP;
1060
+ if (opts.expectedCreationToken !== undefined) {
1061
+ const expected = opts.expectedCreationToken; // string | null
1062
+ const actual = existing.creationToken ?? null;
1063
+ if (actual !== expected)
1064
+ return AGENT_STORE_NOOP;
1065
+ }
1066
+ // taskId guard:迟到 mark 撞已 release+reassign 的 binding 时 noop
1067
+ // (caller 观察到 expectedTaskId 时 binding 还是它,update 时已变 = race lost)。
1068
+ if (opts.expectedTaskId !== undefined) {
1069
+ const expectedTask = opts.expectedTaskId; // string | null
1070
+ const actualTask = existing.taskId ?? null;
1071
+ if (actualTask !== expectedTask)
1072
+ return AGENT_STORE_NOOP;
1073
+ }
1074
+ projectId = existing.projectId;
1075
+ taskId = existing.taskId;
1076
+ wrote = true;
1077
+ return {
1078
+ ...existing,
1079
+ status: 'awaiting_human',
1080
+ awaitingPhase: phase,
1081
+ awaitingReason: reason,
1082
+ awaitingSince: now,
1083
+ updatedAt: now,
1084
+ };
1085
+ });
1086
+ if (!wrote)
1087
+ return;
1088
+ await this.safeEmit({
1089
+ id: '',
1090
+ type: 'human.intervention',
1091
+ timestamp: now,
1092
+ projectId,
1093
+ agentId,
1094
+ ...(taskId ? { taskId } : {}),
1095
+ data: {
1096
+ phase,
1097
+ reason,
1098
+ },
1099
+ });
1100
+ }
1101
+ // dispatch catch helper:caller 调 startSession/continueSession 抛 DispatchTerminalError
1102
+ // 时统一区分 ack_unknown vs 其他 reason。返回 true 表示已 markAwaitingHuman(caller
1103
+ // 应跳过 release / rollback),返回 false 表示其他错误(caller 走常规清理)。
1104
+ // expectedTaskId: caller 当时观察到的 binding;mark 在 lock 释放后才执行的话,binding 可能
1105
+ // 已被 outcome/cancel release 给新任务,传 taskId 用作 atomic guard 避免污染无关 binding。
1106
+ async markAwaitingIfAckUnknown(agentId, err, expectedTaskId) {
1107
+ if (err instanceof DispatchTerminalError && err.reason === 'ack_unknown') {
1108
+ await this.markAwaitingHuman(agentId, `dispatch-failed:${err.reason}`, `${err.message}. Prompt may still be running in the pane; verify before resuming.`, { expectedTaskId });
1109
+ return true;
1110
+ }
1111
+ return false;
1112
+ }
1113
+ // operator 显式恢复 awaiting_human 的 agent。
1114
+ // 如果 taskId 指向已 terminal 的 task,连带清掉绑定 + 锁——回归 idle 可派遣。
1115
+ // 如果 taskId 指向仍 active 的 task(罕见,比如 dialog_pending 期间 task 没 fail),保留绑定。
1116
+ async resumeAgent(agentId) {
1117
+ // drainQueue 内部对每个 pending task 走 withTaskLock —— 不能在 outer withTaskLock 内调,
1118
+ // 否则形成等待环。捕获 projectId 出 lock 后再 drain(同 cancelTask 模式)。
1119
+ let projectIdToDrain;
1120
+ const result = await this.withTaskLock(async () => {
1121
+ const state = await this.agentStore.get(agentId);
1122
+ if (!state)
1123
+ return { resumed: false, releasedBinding: false };
1124
+ if (state.status !== 'awaiting_human') {
1125
+ return { resumed: false, releasedBinding: false };
1126
+ }
1127
+ // creationToken 仍 set = bootstrap dialog 仍未解决。Resume 不能让它"继续"——
1128
+ // dialog 在 pane 里需要 operator 通过 web terminal 处理,slowPoll 解决后自动清状态。
1129
+ // 如果 operator 想放弃这个 agent,应该走 DELETE 路径。
1130
+ if (state.creationToken) {
1131
+ console.warn(`[AgentManager] resumeAgent: agent ${agentId} still has creationToken (bootstrap dialog unresolved); refusing Resume — operator should resolve dialog via web terminal or DELETE the agent.`);
1132
+ return { resumed: false, releasedBinding: false };
1133
+ }
1134
+ const boundTask = state.taskId ? await this.taskStore.get(state.taskId) : null;
1135
+ // "prompt 可能仍在 pane 中跑"类 phase + bound task 仍 active 时 refuse:Resume 让
1136
+ // shouldReleaseHeldBinding 放行清 binding 后 drainQueue 会把第二个 prompt 派进同 pane
1137
+ // 与旧 turn 混在一起。outcome 到达时 review.submitted handler 通过 allowAwaitingHuman:true
1138
+ // 显式 release;这里不必再走 Resume。task terminal/missing 时则放行 — failTaskForDispatchError
1139
+ // 的 ack_unknown 分支会把 task 推 failed 后保留 Held,此时唯一恢复路径就是 Resume。
1140
+ const PROMPT_MAYBE_RUNNING_PHASES = new Set([
1141
+ 'dispatch-failed:ack_unknown',
1142
+ 'dev-wait-gate-failed-after-qa-started',
1143
+ ]);
1144
+ if (state.awaitingPhase != null
1145
+ && PROMPT_MAYBE_RUNNING_PHASES.has(state.awaitingPhase)
1146
+ && boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
1147
+ console.warn(`[AgentManager] resumeAgent: agent ${agentId} ${state.awaitingPhase} with active task ${state.taskId} — prompt may still be running; refusing Resume until outcome arrives or operator cancels the task / deletes the agent.`);
1148
+ return { resumed: false, releasedBinding: false };
1149
+ }
1150
+ // agent_dialog_pending: pane 仍卡 startup dialog,REPL 未 ready。Resume 让
1151
+ // shouldReleaseHeldBinding 看 task terminal/missing 放行后会清 binding/lock 并 drainQueue
1152
+ // 把新 prompt 派进仍卡 dialog 的 pane。dialog 的恢复路径只能是 operator 通过 web terminal
1153
+ // dismiss → slowPollDialogPending 转 phase 到 agent_dialog_resolved_runtime(Resume 放行)
1154
+ // 或 bootstrap path 直接清 Held → status='ok',或 DELETE agent。
1155
+ if (state.awaitingPhase === 'agent_dialog_pending') {
1156
+ console.warn(`[AgentManager] resumeAgent: agent ${agentId} dialog still pending; refusing Resume — operator should dismiss dialog via web terminal (slowPoll will mark agent_dialog_resolved_runtime, then Resume) or DELETE the agent.`);
1157
+ return { resumed: false, releasedBinding: false };
1158
+ }
1159
+ // agent_dialog_resolved_runtime + active task:正常路径下 handleDialogPendingFromRuntime
1160
+ // 已 fail task → boundTask 应 terminal。bound task 仍 active 表示 crash window
1161
+ // (handleDialogPendingFromRuntime 写 awaiting_human 后 transitionTaskStatus 前 crash);
1162
+ // Resume 走 release path 会切 status=ok 但保留 binding + lock,prompt 从未发送 → task 静默卡死。
1163
+ // refuse Resume,提示 operator 显式 cancel task 或 DELETE agent。
1164
+ if (state.awaitingPhase === 'agent_dialog_resolved_runtime'
1165
+ && boundTask && ACTIVE_TASK_STATUSES.has(boundTask.status)) {
1166
+ console.warn(`[AgentManager] resumeAgent: agent ${agentId} dialog resolved but bound task ${state.taskId} still active (crash window) — prompt was never injected; refusing Resume. Operator should cancel the task or DELETE the agent.`);
1167
+ return { resumed: false, releasedBinding: false };
1168
+ }
1169
+ const now = new Date().toISOString();
1170
+ const shouldReleaseBinding = shouldReleaseHeldBinding(state, boundTask);
1171
+ const cfg = this.getAgentConfig(agentId);
1172
+ if (shouldReleaseBinding && state.worktreePath && cfg) {
1173
+ const cleanupDir = this.resolveWorkdir(cfg, state);
1174
+ if (cleanupDir) {
1175
+ const runner = this.createRunnerFor(cfg);
1176
+ const worktree = new WorktreeManager(runner);
1177
+ try {
1178
+ await worktree.remove(cleanupDir, state.worktreePath);
1179
+ }
1180
+ catch (err) {
1181
+ console.warn(`[AgentManager] resumeAgent worktree.remove failed for ${state.worktreePath}:`, err);
1182
+ }
1183
+ }
1184
+ }
1185
+ await this.agentStore.update(agentId, (existing) => {
1186
+ if (!existing)
1187
+ return AGENT_STORE_NOOP;
1188
+ const next = {
1189
+ id: existing.id,
1190
+ projectId: existing.projectId,
1191
+ updatedAt: now,
1192
+ ...(existing.repoPath !== undefined ? { repoPath: existing.repoPath } : {}),
1193
+ ...(existing.paneId !== undefined ? { paneId: existing.paneId } : {}),
1194
+ ...(existing.creationToken !== undefined ? { creationToken: existing.creationToken } : {}),
1195
+ status: 'ok',
1196
+ };
1197
+ if (!shouldReleaseBinding) {
1198
+ if (existing.taskId !== undefined)
1199
+ next.taskId = existing.taskId;
1200
+ if (existing.worktreePath !== undefined)
1201
+ next.worktreePath = existing.worktreePath;
1202
+ if (existing.startedAt !== undefined)
1203
+ next.startedAt = existing.startedAt;
1204
+ }
1205
+ return next;
1206
+ });
1207
+ if (shouldReleaseBinding) {
1208
+ await this.lockManager.release(agentId);
1209
+ projectIdToDrain = state.projectId;
1210
+ }
1211
+ await this.safeEmit({
1212
+ id: '',
1213
+ type: 'human.intervention',
1214
+ timestamp: now,
1215
+ projectId: state.projectId,
1216
+ agentId,
1217
+ ...(state.taskId ? { taskId: state.taskId } : {}),
1218
+ data: {
1219
+ phase: 'resumed',
1220
+ previousPhase: state.awaitingPhase,
1221
+ releasedBinding: shouldReleaseBinding,
1222
+ },
1223
+ });
1224
+ return { resumed: true, releasedBinding: shouldReleaseBinding };
1225
+ });
1226
+ // 出 withTaskLock 后再 drain——drainQueue 内对每个 pending 走 withTaskLock,自我嵌套会死锁。
1227
+ if (projectIdToDrain) {
1228
+ try {
1229
+ await this.drainQueue(projectIdToDrain);
1230
+ }
1231
+ catch (err) {
1232
+ console.warn(`[AgentManager] resumeAgent drainQueue(${projectIdToDrain}) failed:`, err);
1233
+ }
1234
+ }
1235
+ return result;
1236
+ }
1237
+ async interruptPaneAndWaitReady(state, cfg) {
1238
+ const runner = this.createRunnerFor(cfg);
1239
+ const tmux = new TmuxManager(runner);
1240
+ let paneId = state.paneId;
1241
+ if (!paneId) {
1242
+ try {
1243
+ paneId = await tmux.getSinglePaneId(cfg.id);
1244
+ }
1245
+ catch (err) {
1246
+ console.warn(`[AgentManager] interruptPaneAndWaitReady: getSinglePaneId failed for ${cfg.id}:`, err);
1247
+ return false;
1248
+ }
1249
+ }
1250
+ try {
1251
+ await tmux.sendKeysToPane(paneId, 'C-c');
1252
+ await new Promise(r => setTimeout(r, 200));
1253
+ }
1254
+ catch (err) {
1255
+ console.warn(`[AgentManager] interruptPaneAndWaitReady: send C-c failed for pane ${paneId}:`, err);
1256
+ return false;
1257
+ }
1258
+ try {
1259
+ await tmux.waitReplReady(paneId, agentRuntimeKindFor(cfg), {
1260
+ timeoutMs: 10_000,
1261
+ scrollback: 0,
1262
+ });
1263
+ return true;
1264
+ }
1265
+ catch (err) {
1266
+ console.warn(`[AgentManager] interruptPaneAndWaitReady: waitReplReady failed for pane ${paneId}:`, err);
1267
+ return false;
1268
+ }
1269
+ }
1270
+ async failTaskForDispatchError(taskId, phase, agentId, err) {
1271
+ const expected = PHASE_EXPECTED_STATUS[phase] ?? [];
1272
+ const transitioned = await this.transitionTaskStatus(taskId, 'failed', { fromStatus: expected.length > 0 ? expected : ['in_progress', 'review', 'fixing', 'approved'] });
1273
+ if (!transitioned) {
1274
+ console.warn(`[AgentManager] failTaskForDispatchError: task ${taskId} not in expected ` +
1275
+ `fromStatus for phase=${phase}; skipping task transition`);
1276
+ }
1277
+ await this.recordError({
1278
+ agentId,
1279
+ projectId: transitioned?.task.projectId ?? '',
1280
+ taskId,
1281
+ operation: 'dispatch',
1282
+ reason: `DISPATCH_${err.reason.toUpperCase()}`,
1283
+ message: err.message,
1284
+ observation: {
1285
+ phase,
1286
+ replDrained: err.replDrained,
1287
+ },
1288
+ recommendation: 'Inspect the runtime pane, then retry or cancel the task.',
1289
+ });
1290
+ // ack_unknown: sendEnter 已发,prompt 可能已被 REPL 接收并正在执行——
1291
+ // 不能 release 让下一任务排队进同一 pane。pre-Enter 错误(prompt_too_large /
1292
+ // required_skills_missing / gate_failed)则正常 release。
1293
+ if (err.reason === 'ack_unknown') {
1294
+ await this.markAwaitingHuman(agentId, `dispatch-failed:${err.reason}`, `${err.message}. Prompt may still be running in the pane; verify before resuming.`, { expectedTaskId: taskId });
1295
+ // task 已 terminal: 同步释放 partner agent binding,否则 partner(如 dev)永远绑 terminal task,
1296
+ // retryTask 走 validateTaskDispatch 时会看 dev 仍 bound → 409,UI Retry 通路被堵。
1297
+ if (transitioned) {
1298
+ await this.releasePartnersAndDrain(agentId, [taskId], [transitioned.task.projectId]);
1299
+ }
1300
+ return;
1301
+ }
1302
+ try {
1303
+ await this.releaseAgentForTask(agentId, taskId, 'idle');
1304
+ }
1305
+ catch (releaseErr) {
1306
+ console.warn(`[AgentManager] failTaskForDispatchError: releaseAgentForTask(${agentId}) failed:`, releaseErr);
1307
+ }
1308
+ await this.safeEmit({
1309
+ id: '',
1310
+ type: 'human.intervention',
1311
+ timestamp: new Date().toISOString(),
1312
+ projectId: transitioned?.task.projectId ?? '',
1313
+ agentId,
1314
+ taskId,
1315
+ data: {
1316
+ phase: `dispatch-failed:${err.reason}`,
1317
+ reason: err.reason,
1318
+ message: err.message,
1319
+ replDrained: err.replDrained,
1320
+ },
1321
+ });
1322
+ }
1323
+ async failTasksForAgent(agentId, reason, opts = {}) {
1324
+ const failed = await this.withTaskLock(async () => {
1325
+ const tasks = await this.taskStore.list({});
1326
+ const out = [];
1327
+ for (const t of tasks) {
1328
+ const active = ['in_progress', 'review', 'fixing', 'approved'];
1329
+ const bound = t.agentId === agentId || t.qaAgentId === agentId;
1330
+ if (active.includes(t.status) && bound) {
1331
+ t.status = 'failed';
1332
+ t.updatedAt = new Date().toISOString();
1333
+ await this.taskStore.set(t);
1334
+ out.push(t);
1335
+ }
1336
+ }
1337
+ return out;
1338
+ });
1339
+ for (const t of failed) {
1340
+ await this.safeEmit({
1341
+ id: '',
1342
+ type: 'task.updated',
1343
+ timestamp: new Date().toISOString(),
1344
+ projectId: t.projectId,
1345
+ taskId: t.id,
1346
+ data: { status: 'failed', reason },
1347
+ });
1348
+ }
1349
+ const failedTaskIds = failed.map(t => t.id);
1350
+ const projectIds = [...new Set(failed.map(t => t.projectId))];
1351
+ if (!opts.deferPartnerCleanup) {
1352
+ await this.releasePartnersAndDrain(agentId, failedTaskIds, projectIds);
1353
+ }
1354
+ return { failedTaskIds, projectIds };
1355
+ }
1356
+ async releasePartnersAndDrain(excludeAgentId, failedTaskIds, projectIds) {
1357
+ for (const taskId of failedTaskIds) {
1358
+ const t = await this.taskStore.get(taskId);
1359
+ if (!t)
1360
+ continue;
1361
+ for (const partnerId of [t.agentId, t.qaAgentId]) {
1362
+ if (!partnerId || partnerId === excludeAgentId)
1363
+ continue;
1364
+ try {
1365
+ // allowAwaitingHuman: task 已 terminal 必须能完整清理;partner 即使被标 Held(罕见)
1366
+ // 也应释放,否则 partner stale binding 永远指向 terminal task → 后续 acquire 全卡。
1367
+ const ok = await this.releaseAgentForTask(partnerId, taskId, 'idle', { allowAwaitingHuman: true });
1368
+ if (!ok) {
1369
+ console.warn(`[AgentManager] failTasksForAgent: partner ${partnerId} release returned false ` +
1370
+ `for failed task ${taskId}; operator must use restart-repl / retry if cleanup is still needed.`);
1371
+ }
1372
+ }
1373
+ catch (err) {
1374
+ console.warn(`[AgentManager] failTasksForAgent: releaseAgentForTask(partner=${partnerId}, task=${taskId}) failed:`, err);
1375
+ }
1376
+ }
1377
+ }
1378
+ for (const projectId of projectIds) {
1379
+ try {
1380
+ await this.drainQueue(projectId);
1381
+ }
1382
+ catch (err) {
1383
+ console.error(`[AgentManager] failTasksForAgent: drainQueue(${projectId}) failed:`, err);
1384
+ }
1385
+ }
1386
+ }
1387
+ // 100ms poll: fixed 200ms races runtimes that take >200ms to ack SIGINT.
1388
+ async pollPaneCommandStable(tmux, paneId, opts) {
1389
+ const deadline = Date.now() + opts.timeoutMs;
1390
+ const SHELL = /^(?:zsh|bash|sh|fish)$/;
1391
+ let last = '';
1392
+ while (Date.now() < deadline) {
1393
+ await new Promise(r => setTimeout(r, 100));
1394
+ const raw = await tmux.displayMessage(paneId, '#{pane_current_command}');
1395
+ last = raw.trim();
1396
+ if (opts.expectShell ? SHELL.test(last) : last !== '')
1397
+ return last;
1398
+ }
1399
+ return last;
1400
+ }
1401
+ async restartReplOnly(agentId) {
1402
+ const cfg = this.getAgentConfig(agentId);
1403
+ if (!cfg)
1404
+ throw new Error(`Unknown agent: ${agentId}`);
1405
+ const runner = this.createRunnerFor(cfg);
1406
+ const tmux = new TmuxManager(runner);
1407
+ const alive = await tmux.hasSession(agentId);
1408
+ if (!alive) {
1409
+ throw new Error(`restart-repl: tmux session ${agentId} does not exist; use retry to rebuild`);
1410
+ }
1411
+ const marker = await tmux.getOption(agentId, '@baxian-agent-id');
1412
+ if (marker !== agentId) {
1413
+ throw new Error(`restart-repl: marker mismatch (got "${marker ?? 'null'}"); refusing to touch foreign session`);
1414
+ }
1415
+ const paneId = await tmux.getSinglePaneId(agentId);
1416
+ await tmux.sendKeysToPane(paneId, 'C-c');
1417
+ const cmd = await this.pollPaneCommandStable(tmux, paneId, { timeoutMs: 2_000 });
1418
+ const RUNTIME = /^(?:claude|codex|node|\d+\.\d+\.\d+)$/;
1419
+ const SHELL = /^(?:zsh|bash|sh|fish)$/;
1420
+ if (RUNTIME.test(cmd)) {
1421
+ await tmux.sendKeysToPane(paneId, 'exit', 'Enter');
1422
+ await this.pollPaneCommandStable(tmux, paneId, { timeoutMs: 2_000, expectShell: true });
1423
+ }
1424
+ else if (!SHELL.test(cmd)) {
1425
+ throw new Error(`restart-repl precondition failed: unexpected pane state "${cmd}"`);
1426
+ }
1427
+ const runtime = agentRuntimeKindFor(cfg);
1428
+ await tmux.sendKeysToPane(paneId, `${buildLaunchCommand(cfg)}\n`);
1429
+ await tmux.handleTrustDialog(paneId, runtime, {
1430
+ timeoutMs: this.bootstrapTimeoutsMs.trustDialog,
1431
+ });
1432
+ await tmux.waitReplReady(paneId, runtime, {
1433
+ timeoutMs: this.bootstrapTimeoutsMs.waitReplReady,
1434
+ scrollback: 0,
1435
+ });
1436
+ await this.agentStore.update(agentId, (state) => {
1437
+ if (!state)
1438
+ return AGENT_STORE_NOOP;
1439
+ return {
1440
+ ...state,
1441
+ paneId,
1442
+ updatedAt: new Date().toISOString(),
1443
+ };
1444
+ });
1445
+ }
1446
+ prepareRemoveTargets(agentId) {
1447
+ const cfg = this.getAgentConfig(agentId);
1448
+ if (!cfg)
1449
+ throw new Error(`Unknown agent: ${agentId}`);
1450
+ const project = this.getProjectConfig(cfg.projectId);
1451
+ if (!project)
1452
+ throw new Error(`Unknown project: ${cfg.projectId}`);
1453
+ if (cfg.role === 'qa')
1454
+ return { targets: [agentId] };
1455
+ for (const pair of project.agent) {
1456
+ if (pair[0]?.id === agentId) {
1457
+ const qa = pair[1];
1458
+ return { targets: qa ? [agentId, qa.id] : [agentId] };
1459
+ }
1460
+ }
1461
+ return { targets: [agentId] };
1462
+ }
1463
+ // Aggregates failures so DELETE rolls back the marker on remote IO error.
1464
+ async cleanupRemovedAgentRuntime(targets) {
1465
+ const failures = [];
1466
+ for (const id of targets) {
1467
+ const cfg = this.getAgentConfig(id);
1468
+ if (!cfg)
1469
+ continue;
1470
+ const runner = this.createRunnerFor(cfg);
1471
+ const tmux = new TmuxManager(runner);
1472
+ const worktree = new WorktreeManager(runner);
1473
+ this.stopRuntimeMenuWatch(id);
1474
+ // Streamer first so subscribers see session_gone.
1475
+ if (this.paneStreamerManager) {
1476
+ try {
1477
+ await this.paneStreamerManager.destroy(id);
1478
+ }
1479
+ catch (err) {
1480
+ console.warn(`[AgentManager] cleanupRemovedAgentRuntime: paneStreamerManager.destroy(${id}) failed:`, err);
1481
+ }
1482
+ }
1483
+ try {
1484
+ const alive = await tmux.hasSession(id);
1485
+ if (alive) {
1486
+ const marker = await tmux.getOption(id, '@baxian-agent-id');
1487
+ if (marker === id) {
1488
+ await tmux.killSession(id);
1489
+ }
1490
+ else {
1491
+ console.warn(`[AgentManager] cleanupRemovedAgentRuntime: skipping kill for ${id} ` +
1492
+ `(marker=${marker ?? 'null'}; not baxian-managed)`);
1493
+ }
1494
+ }
1495
+ }
1496
+ catch (err) {
1497
+ failures.push({ agentId: id, step: 'tmux', error: err });
1498
+ }
1499
+ const state = await this.agentStore.get(id);
1500
+ if (state?.worktreePath) {
1501
+ const cleanupDir = this.resolveWorkdir(cfg, state);
1502
+ if (cleanupDir) {
1503
+ try {
1504
+ await worktree.remove(cleanupDir, state.worktreePath);
1505
+ }
1506
+ catch (err) {
1507
+ failures.push({ agentId: id, step: 'worktree.remove', error: err });
1508
+ }
1509
+ }
1510
+ }
1511
+ }
1512
+ if (failures.length > 0) {
1513
+ const summary = failures
1514
+ .map(f => `${f.agentId}/${f.step}: ${f.error instanceof Error ? f.error.message : String(f.error)}`)
1515
+ .join('; ');
1516
+ throw new CleanupFailedError(`cleanupRemovedAgentRuntime: ${failures.length} step(s) failed: ${summary}`, failures);
1517
+ }
1518
+ }
1519
+ // IO-free preview; caller compares vs MAX_PROMPT_BYTES_ROUTE_LIMIT before allocating worktree.
1520
+ previewPromptBytesForTaskInput(projectId, input) {
1521
+ const cfg = this.getAgentConfig(input.preferredAgentId);
1522
+ if (!cfg)
1523
+ throw new Error(`Unknown agent: ${input.preferredAgentId}`);
1524
+ if (cfg.projectId !== projectId) {
1525
+ throw new Error(`Agent ${input.preferredAgentId} not in project ${projectId}`);
1526
+ }
1527
+ const workdirGuess = cfg.workdir ?? '/'.padEnd(64, 'x');
1528
+ const worktreePathBound = `${workdirGuess}/.baxian-worktrees/task-9999999999_ffffffffffffffff`;
1529
+ const now = new Date().toISOString();
1530
+ const fakeTask = {
1531
+ id: 'task-9999999999',
1532
+ projectId,
1533
+ title: input.title,
1534
+ description: input.description,
1535
+ preferredAgentId: input.preferredAgentId,
1536
+ agentId: cfg.id,
1537
+ branch: `${BRANCH_PREFIX}task-9999999999`,
1538
+ reviewRound: 0,
1539
+ status: 'in_progress',
1540
+ createdAt: now,
1541
+ updatedAt: now,
1542
+ };
1543
+ const fullPrompt = buildPromptInline({
1544
+ task: fakeTask,
1545
+ phase: 'develop',
1546
+ agent: cfg,
1547
+ worktreePath: worktreePathBound,
1548
+ skillRegistry: this.skillRegistry,
1549
+ });
1550
+ return Buffer.byteLength(fullPrompt, 'utf8');
1551
+ }
1552
+ listAgents() {
1553
+ return [...this.agentIndex.values()];
1554
+ }
1555
+ getProjectConfig(projectId) {
1556
+ return this.config.project.find(p => p.id === projectId);
1557
+ }
1558
+ getProjectByRepo(repo) {
1559
+ return this.config.project.find(p => p.repo === repo);
1560
+ }
1561
+ findQaPartner(devAgentId) {
1562
+ for (const project of this.config.project) {
1563
+ for (const pair of project.agent) {
1564
+ if (pair[0]?.id === devAgentId) {
1565
+ return pair[1];
1566
+ }
1567
+ }
1568
+ }
1569
+ return undefined;
1570
+ }
1571
+ async getTask(taskId) {
1572
+ return this.taskStore.get(taskId);
1573
+ }
1574
+ async getAgentState(agentId) {
1575
+ return this.agentStore.get(agentId);
1576
+ }
1577
+ async getPostApproveCompletion(taskId) {
1578
+ return this.postApproveStore.get(taskId);
1579
+ }
1580
+ async setPostApproveCompletion(taskId, value) {
1581
+ // Store write + watcher.start under one lock — otherwise concurrent clear zombies the sub.
1582
+ await this.withTaskLock(async () => {
1583
+ await this.postApproveStore.set(taskId, value);
1584
+ if (!this.postApproveMarkerWatcher)
1585
+ return;
1586
+ const task = await this.taskStore.get(taskId);
1587
+ if (!task)
1588
+ return;
1589
+ await this.postApproveMarkerWatcher.start({
1590
+ taskId,
1591
+ projectId: task.projectId,
1592
+ agentId: task.agentId,
1593
+ token: value.token,
1594
+ });
1595
+ });
1596
+ }
1597
+ async clearPostApproveCompletion(taskId) {
1598
+ await this.withTaskLock(async () => {
1599
+ await this.postApproveStore.clear(taskId);
1600
+ this.postApproveMarkerWatcher?.stop(taskId);
1601
+ });
1602
+ }
1603
+ async clearPostApproveCompletionIfMatches(taskId, token) {
1604
+ return this.withTaskLock(async () => {
1605
+ const cleared = await this.postApproveStore.clearIfMatches(taskId, token);
1606
+ if (cleared)
1607
+ this.postApproveMarkerWatcher?.stop(taskId);
1608
+ return cleared;
1609
+ });
1610
+ }
1611
+ // manual-merge skips snapshot — a stale scrollback marker would re-fire every restart.
1612
+ async armRecoveredPostApproveMarkers() {
1613
+ if (!this.postApproveMarkerWatcher)
1614
+ return;
1615
+ const tasks = await this.taskStore.list({ status: 'approved' });
1616
+ for (const task of tasks) {
1617
+ const completion = await this.postApproveStore.get(task.id);
1618
+ if (!completion)
1619
+ continue;
1620
+ const project = this.getProjectConfig(task.projectId);
1621
+ const skipSnapshot = project?.merge !== 'auto';
1622
+ try {
1623
+ await this.postApproveMarkerWatcher.start({
1624
+ taskId: task.id,
1625
+ projectId: task.projectId,
1626
+ agentId: task.agentId,
1627
+ token: completion.token,
1628
+ skipSnapshot,
1629
+ });
1630
+ }
1631
+ catch (err) {
1632
+ console.warn(`[AgentManager] armRecoveredPostApproveMarkers: failed to arm task=${task.id}:`, err);
1633
+ }
1634
+ }
1635
+ }
1636
+ // skipSnapshot=true: 旧 scrollback 里的 marker 重启后不应再触发。
1637
+ // 只对 spec-review-complete / spec-fix-complete emit intervention — spec-ready 在 develop
1638
+ // prompt 里是 optional, 报警会让所有 in_progress task 噪音化。
1639
+ async armRecoveredSpecMarkers() {
1640
+ if (!this.specReviewMarkerWatcher)
1641
+ return;
1642
+ const tasks = await this.taskStore.list();
1643
+ for (const task of tasks) {
1644
+ if (!task.specMarkerToken)
1645
+ continue;
1646
+ let kind;
1647
+ let agentId;
1648
+ if (task.phase === 'spec' && task.status === 'review') {
1649
+ kind = 'spec-review-complete';
1650
+ agentId = task.qaAgentId;
1651
+ }
1652
+ else if (task.phase === 'spec' && task.status === 'fixing') {
1653
+ kind = 'spec-fix-complete';
1654
+ agentId = task.agentId;
1655
+ }
1656
+ else if (task.phase === undefined && task.status === 'in_progress') {
1657
+ kind = 'spec-ready';
1658
+ agentId = task.agentId;
1659
+ }
1660
+ if (!kind || !agentId)
1661
+ continue;
1662
+ try {
1663
+ await this.specReviewMarkerWatcher.start({
1664
+ taskId: task.id,
1665
+ projectId: task.projectId,
1666
+ agentId,
1667
+ kind,
1668
+ token: task.specMarkerToken,
1669
+ skipSnapshot: true,
1670
+ });
1671
+ if (kind !== 'spec-ready') {
1672
+ await this.safeEmit({
1673
+ id: '',
1674
+ type: 'human.intervention',
1675
+ timestamp: new Date().toISOString(),
1676
+ projectId: task.projectId,
1677
+ agentId,
1678
+ taskId: task.id,
1679
+ data: {
1680
+ phase: 'spec-marker-armed-during-recovery',
1681
+ kind,
1682
+ note: 'Task is waiting for a spec marker after server recovery; if no marker arrives, the prompt may not have been fully delivered before the previous crash. Inspect the agent pane and consider manual retry or transition.',
1683
+ },
1684
+ });
1685
+ }
1686
+ }
1687
+ catch (err) {
1688
+ console.warn(`[AgentManager] armRecoveredSpecMarkers: failed to arm task=${task.id} kind=${kind}:`, err);
1689
+ }
1690
+ }
1691
+ }
1692
+ async fetchPrHeadSha(taskId) {
1693
+ const task = await this.taskStore.get(taskId);
1694
+ if (!task || !task.prNumber) {
1695
+ throw new Error(`fetchPrHeadSha: no PR number for task ${taskId}`);
1696
+ }
1697
+ const project = this.getProjectConfig(task.projectId);
1698
+ if (!project) {
1699
+ throw new Error(`fetchPrHeadSha: unknown project ${task.projectId}`);
1700
+ }
1701
+ const result = await this.platformRunner.exec(`gh pr view ${task.prNumber} --repo ${shellQuote(project.repo)} --json headRefOid --jq .headRefOid`);
1702
+ if (result.exitCode !== 0) {
1703
+ throw new Error(`gh pr view failed for PR #${task.prNumber}: ${result.stderr || result.stdout}`);
1704
+ }
1705
+ const headSha = result.stdout.trim();
1706
+ if (!/^[0-9a-f]{40}$/i.test(headSha)) {
1707
+ throw new Error(`gh pr view returned invalid headRefOid for PR #${task.prNumber}`);
1708
+ }
1709
+ return headSha;
1710
+ }
1711
+ async listTasksByPrNumber(prNumber, projectId) {
1712
+ const all = await this.taskStore.list({ projectId });
1713
+ return all.filter(t => t.prNumber === prNumber);
1714
+ }
1715
+ async updateTaskStatus(taskId, status) {
1716
+ const task = await this.taskStore.get(taskId);
1717
+ if (!task)
1718
+ return;
1719
+ task.status = status;
1720
+ task.updatedAt = new Date().toISOString();
1721
+ await this.taskStore.set(task);
1722
+ }
1723
+ async updateTask(taskId, updates) {
1724
+ await this.withTaskLock(async () => {
1725
+ const task = await this.taskStore.get(taskId);
1726
+ if (!task)
1727
+ return;
1728
+ Object.assign(task, updates, { updatedAt: new Date().toISOString() });
1729
+ await this.taskStore.set(task);
1730
+ });
1731
+ }
1732
+ async transitionTaskStatus(taskId, toStatus, guard, patch) {
1733
+ return this.withTaskLock(async () => {
1734
+ const task = await this.taskStore.get(taskId);
1735
+ if (!task)
1736
+ return null;
1737
+ const previousStatus = task.status;
1738
+ if (TERMINAL_STATUSES.includes(previousStatus))
1739
+ return null;
1740
+ if (!guard.fromStatus.includes(previousStatus))
1741
+ return null;
1742
+ Object.assign(task, patch ?? {}, {
1743
+ status: toStatus,
1744
+ updatedAt: new Date().toISOString(),
1745
+ });
1746
+ await this.taskStore.set(task);
1747
+ return { task, previousStatus };
1748
+ });
1749
+ }
1750
+ createRunnerFor(agent) {
1751
+ if (this.runnerFactory) {
1752
+ return this.runnerFactory(agent);
1753
+ }
1754
+ return createRunner(agent.mode, agent.host);
1755
+ }
1756
+ createRepoStore(agent, project, runner) {
1757
+ if (this.repoStoreFactory) {
1758
+ return this.repoStoreFactory(runner, project.repo, agent.mode, agent.host, this.repoCache);
1759
+ }
1760
+ return new RepoStore(runner, project.repo, agent.mode, agent.host, this.repoCache);
1761
+ }
1762
+ async ensureWorkdir(agent, project, runner) {
1763
+ if (agent.workdir)
1764
+ return { workdir: agent.workdir, repoStore: null };
1765
+ const repoStore = this.createRepoStore(agent, project, runner);
1766
+ const workdir = await repoStore.ensure();
1767
+ return { workdir, repoStore };
1768
+ }
1769
+ resolveWorkdir(agent, agentState) {
1770
+ if (agent.workdir)
1771
+ return agent.workdir;
1772
+ return agentState?.repoPath ?? null;
1773
+ }
1774
+ // Empty repos lack origin/HEAD; undefined makes git use base repo HEAD.
1775
+ async resolveAutoBaseRef(runner, workdir) {
1776
+ const result = await runner.exec(`git -C ${shellQuote(workdir)} rev-parse --verify --quiet origin/HEAD`);
1777
+ return result.exitCode === 0 ? 'origin/HEAD' : undefined;
1778
+ }
1779
+ getRepoCache() {
1780
+ return this.repoCache;
1781
+ }
1782
+ async rollbackFailedDispatch(taskId, agentId) {
1783
+ await this.withTaskLock(async () => {
1784
+ const task = await this.taskStore.get(taskId);
1785
+ if (!task)
1786
+ return;
1787
+ if (task.status !== 'in_progress')
1788
+ return;
1789
+ task.agentId = '';
1790
+ task.status = 'pending';
1791
+ task.updatedAt = new Date().toISOString();
1792
+ await this.taskStore.set(task);
1793
+ });
1794
+ const existing = await this.agentStore.get(agentId);
1795
+ if (existing && existing.taskId !== taskId) {
1796
+ console.warn(`[AgentManager] rollback: agent ${agentId} taskId mismatch (expected ${taskId}, got ${existing.taskId}); ` +
1797
+ `skipping agent cleanup — agent already reassigned`);
1798
+ return;
1799
+ }
1800
+ const projectId = existing?.projectId ?? this.getAgentConfig(agentId)?.projectId;
1801
+ if (!projectId) {
1802
+ console.error(`[AgentManager] CRITICAL: cannot resolve projectId for agent ${agentId} during rollback; deleting agent state.`);
1803
+ await this.agentStore.delete(agentId);
1804
+ }
1805
+ else {
1806
+ const now = new Date().toISOString();
1807
+ await this.agentStore.update(agentId, (latest) => ({
1808
+ ...(latest ?? existing ?? { id: agentId, projectId, updatedAt: now }),
1809
+ id: agentId,
1810
+ projectId,
1811
+ taskId: undefined,
1812
+ worktreePath: undefined,
1813
+ updatedAt: now,
1814
+ }));
1815
+ }
1816
+ await this.lockManager.release(agentId);
1817
+ }
1818
+ async pickAgent(projectId, preferredAgentId) {
1819
+ const cfg = this.getAgentConfig(preferredAgentId);
1820
+ if (!cfg)
1821
+ throw new ApiError(400, `Unknown agent: ${preferredAgentId}`);
1822
+ if (cfg.projectId !== projectId) {
1823
+ throw new ApiError(400, `Agent ${preferredAgentId} not in project ${projectId}`);
1824
+ }
1825
+ if (cfg.role !== 'dev') {
1826
+ throw new ApiError(400, `Agent ${preferredAgentId} is not dev role`);
1827
+ }
1828
+ const state = await this.agentStore.get(preferredAgentId);
1829
+ if (!canDispatchWithBinding(state))
1830
+ return null;
1831
+ const { projectId: _projectId, ...rest } = cfg;
1832
+ return rest;
1833
+ }
1834
+ async createTask(projectId, input) {
1835
+ return this.withTaskLock(async () => {
1836
+ const taskId = await this.taskStore.nextId();
1837
+ const now = new Date().toISOString();
1838
+ const dev = await this.pickAgent(projectId, input.preferredAgentId);
1839
+ const qa = this.findQaPartner(input.preferredAgentId);
1840
+ if (!dev) {
1841
+ const queued = {
1842
+ id: taskId,
1843
+ projectId,
1844
+ title: input.title,
1845
+ description: input.description,
1846
+ preferredAgentId: input.preferredAgentId,
1847
+ agentId: '',
1848
+ reviewRound: 0,
1849
+ status: 'pending',
1850
+ branch: BRANCH_PREFIX + taskId,
1851
+ createdAt: now,
1852
+ updatedAt: now,
1853
+ ...(qa ? { qaAgentId: qa.id } : {}),
1854
+ };
1855
+ await this.taskStore.set(queued);
1856
+ await this.safeEmit({
1857
+ id: '',
1858
+ type: 'task.created',
1859
+ timestamp: now,
1860
+ projectId,
1861
+ taskId,
1862
+ data: {
1863
+ queued: true,
1864
+ queueReason: 'preferred_agent_busy',
1865
+ agentId: input.preferredAgentId,
1866
+ },
1867
+ });
1868
+ return queued;
1869
+ }
1870
+ const acquired = await this.lockManager.acquire(dev.id);
1871
+ if (!acquired) {
1872
+ const queued = {
1873
+ id: taskId,
1874
+ projectId,
1875
+ title: input.title,
1876
+ description: input.description,
1877
+ preferredAgentId: input.preferredAgentId,
1878
+ agentId: '',
1879
+ reviewRound: 0,
1880
+ status: 'pending',
1881
+ branch: BRANCH_PREFIX + taskId,
1882
+ createdAt: now,
1883
+ updatedAt: now,
1884
+ ...(qa ? { qaAgentId: qa.id } : {}),
1885
+ };
1886
+ await this.taskStore.set(queued);
1887
+ await this.safeEmit({
1888
+ id: '',
1889
+ type: 'task.created',
1890
+ timestamp: now,
1891
+ projectId,
1892
+ taskId,
1893
+ data: {
1894
+ queued: true,
1895
+ queueReason: 'agent_locked',
1896
+ agentId: input.preferredAgentId,
1897
+ },
1898
+ });
1899
+ return queued;
1900
+ }
1901
+ const task = {
1902
+ id: taskId,
1903
+ projectId,
1904
+ title: input.title,
1905
+ description: input.description,
1906
+ preferredAgentId: input.preferredAgentId,
1907
+ agentId: dev.id,
1908
+ ...(qa ? { qaAgentId: qa.id } : {}),
1909
+ reviewRound: 0,
1910
+ status: 'in_progress',
1911
+ branch: BRANCH_PREFIX + taskId,
1912
+ createdAt: now,
1913
+ updatedAt: now,
1914
+ };
1915
+ await this.taskStore.set(task);
1916
+ await this.agentStore.update(dev.id, (existing) => ({
1917
+ id: dev.id,
1918
+ projectId,
1919
+ taskId,
1920
+ updatedAt: now,
1921
+ ...(existing?.paneId !== undefined ? { paneId: existing.paneId } : {}),
1922
+ ...(existing?.repoPath !== undefined ? { repoPath: existing.repoPath } : {}),
1923
+ ...(existing?.creationToken !== undefined ? { creationToken: existing.creationToken } : {}),
1924
+ }));
1925
+ await this.safeEmit({
1926
+ id: '',
1927
+ type: 'task.assigned',
1928
+ timestamp: now,
1929
+ projectId,
1930
+ agentId: dev.id,
1931
+ taskId,
1932
+ data: { agentId: dev.id },
1933
+ });
1934
+ return task;
1935
+ });
1936
+ }
1937
+ async createAndStartTask(projectId, input) {
1938
+ const task = await this.createTask(projectId, input);
1939
+ if (task.status === 'in_progress' && task.agentId) {
1940
+ // Persist token first — prompt build 和 watcher 验证共用 task.specMarkerToken。
1941
+ const specMarkerToken = createMarkerToken();
1942
+ await this.updateTask(task.id, { specMarkerToken });
1943
+ let started = false;
1944
+ let dispatchErr = null;
1945
+ try {
1946
+ started = await this.startSession(task.id, task.agentId, 'develop');
1947
+ }
1948
+ catch (err) {
1949
+ dispatchErr = err;
1950
+ console.error(`[AgentManager] createAndStartTask startSession hard error for task=${task.id}:`, err);
1951
+ }
1952
+ if (started) {
1953
+ await this.armSpecMarkerWatcher(task.id, task.agentId, 'spec-ready', specMarkerToken);
1954
+ }
1955
+ if (!started) {
1956
+ if (dispatchErr instanceof DispatchTerminalError) {
1957
+ await this.failTaskForDispatchError(task.id, 'develop', task.agentId, dispatchErr);
1958
+ }
1959
+ else if (dispatchErr instanceof EnsureSessionError && dispatchErr.partial.handled) {
1960
+ // handleDialogPendingFromRuntime 已标 Held + fail task + release partners;rollback 会清 taskId/lock
1961
+ // 让仍卡 dialog 的 pane 在 status='awaiting_human' 被清后可被新 dispatch 撞进——必须跳过。
1962
+ }
1963
+ else {
1964
+ await this.rollbackFailedDispatch(task.id, task.agentId);
1965
+ }
1966
+ const refreshed = await this.taskStore.get(task.id);
1967
+ if (refreshed)
1968
+ return refreshed;
1969
+ }
1970
+ }
1971
+ return task;
1972
+ }
1973
+ async drainQueue(projectId) {
1974
+ const pending = await this.taskStore.list({ projectId, status: 'pending' });
1975
+ pending.sort((a, b) => a.createdAt.localeCompare(b.createdAt));
1976
+ for (const task of pending) {
1977
+ const taken = await this.withTaskLock(async () => {
1978
+ const fresh = await this.taskStore.get(task.id);
1979
+ if (!fresh || fresh.status !== 'pending')
1980
+ return null;
1981
+ const cfg = this.getAgentConfig(fresh.preferredAgentId);
1982
+ if (!cfg || cfg.projectId !== projectId || cfg.role !== 'dev') {
1983
+ if (!this.warnedStaleQueueDispatch.has(fresh.id)) {
1984
+ this.warnedStaleQueueDispatch.add(fresh.id);
1985
+ const reason = !cfg
1986
+ ? 'agent not configured'
1987
+ : cfg.projectId !== projectId
1988
+ ? `agent.projectId=${cfg.projectId} ≠ task.projectId=${projectId}`
1989
+ : `agent.role=${cfg.role} ≠ 'dev'`;
1990
+ console.warn(`[AgentManager] drainQueue: task ${fresh.id} preferredAgentId=${fresh.preferredAgentId} ` +
1991
+ `cannot be dispatched (${reason}); task will stay pending until config is fixed or task is edited / cancelled`);
1992
+ }
1993
+ return null;
1994
+ }
1995
+ const state = await this.agentStore.get(cfg.id);
1996
+ if (!canDispatchWithBinding(state))
1997
+ return null;
1998
+ const acquired = await this.lockManager.acquire(cfg.id);
1999
+ if (!acquired)
2000
+ return null;
2001
+ const { projectId: _projectId, ...dev } = cfg;
2002
+ const now = new Date().toISOString();
2003
+ fresh.agentId = dev.id;
2004
+ fresh.status = 'in_progress';
2005
+ fresh.updatedAt = now;
2006
+ await this.taskStore.set(fresh);
2007
+ await this.agentStore.update(dev.id, (existing) => ({
2008
+ id: dev.id,
2009
+ projectId,
2010
+ taskId: fresh.id,
2011
+ updatedAt: now,
2012
+ ...(existing?.paneId !== undefined ? { paneId: existing.paneId } : {}),
2013
+ ...(existing?.repoPath !== undefined ? { repoPath: existing.repoPath } : {}),
2014
+ ...(existing?.creationToken !== undefined ? { creationToken: existing.creationToken } : {}),
2015
+ }));
2016
+ await this.safeEmit({
2017
+ id: '',
2018
+ type: 'task.assigned',
2019
+ timestamp: now,
2020
+ projectId,
2021
+ agentId: dev.id,
2022
+ taskId: fresh.id,
2023
+ data: { agentId: dev.id, dequeued: true },
2024
+ });
2025
+ return fresh;
2026
+ });
2027
+ if (!taken)
2028
+ continue;
2029
+ const specMarkerToken = createMarkerToken();
2030
+ await this.updateTask(taken.id, { specMarkerToken });
2031
+ let started = false;
2032
+ let dispatchErr = null;
2033
+ try {
2034
+ started = await this.startSession(taken.id, taken.agentId, 'develop');
2035
+ }
2036
+ catch (err) {
2037
+ dispatchErr = err;
2038
+ console.error(`[AgentManager] drainQueue startSession hard error for task=${taken.id}:`, err);
2039
+ }
2040
+ if (started) {
2041
+ await this.armSpecMarkerWatcher(taken.id, taken.agentId, 'spec-ready', specMarkerToken);
2042
+ }
2043
+ if (!started) {
2044
+ if (dispatchErr instanceof DispatchTerminalError) {
2045
+ await this.failTaskForDispatchError(taken.id, 'develop', taken.agentId, dispatchErr);
2046
+ }
2047
+ else if (dispatchErr instanceof EnsureSessionError && dispatchErr.partial.handled) {
2048
+ // handleDialogPendingFromRuntime 已 Held + fail task + release partners;跳过 rollback。
2049
+ }
2050
+ else {
2051
+ await this.rollbackFailedDispatch(taken.id, taken.agentId);
2052
+ }
2053
+ }
2054
+ }
2055
+ }
2056
+ async startSession(taskId, agentId, phase, opts = {}) {
2057
+ const agent = this.getAgentConfig(agentId);
2058
+ if (!agent)
2059
+ throw new Error(`Unknown agent: ${agentId}`);
2060
+ const task = await this.taskStore.get(taskId);
2061
+ if (!task)
2062
+ throw new Error(`Unknown task: ${taskId}`);
2063
+ const project = this.getProjectConfig(agent.projectId);
2064
+ if (!project)
2065
+ throw new Error(`Unknown project: ${agent.projectId}`);
2066
+ const expectedStatuses = PHASE_EXPECTED_STATUS[phase] ?? [];
2067
+ const preTask = await this.taskStore.get(taskId);
2068
+ if (!preTask) {
2069
+ console.warn(`[AgentManager] startSession[${phase}]: pre-create task=${taskId} not found; aborting`);
2070
+ return false;
2071
+ }
2072
+ // bypassTaskStatusGate 只放过 expected gate,不放过 terminal。
2073
+ if (TERMINAL_STATUSES.includes(preTask.status)) {
2074
+ console.warn(`[AgentManager] startSession[${phase}]: pre-create task=${taskId} status=${preTask.status} ` +
2075
+ `is terminal; aborting`);
2076
+ return false;
2077
+ }
2078
+ if (!opts.bypassTaskStatusGate && !expectedStatuses.includes(preTask.status)) {
2079
+ console.warn(`[AgentManager] startSession[${phase}]: pre-create task=${taskId} status=${preTask.status} ` +
2080
+ `not in expected ${expectedStatuses.join('/')}; aborting`);
2081
+ return false;
2082
+ }
2083
+ const preAgent = await this.agentStore.get(agentId);
2084
+ if (PHASE_REQUIRES_AGENT_BOUND_TO_TASK[phase]) {
2085
+ if (!preAgent || preAgent.taskId !== taskId) {
2086
+ console.warn(`[AgentManager] startSession[${phase}]: pre-create agent=${agentId} not bound to ${taskId} ` +
2087
+ `(got ${preAgent?.taskId}); aborting`);
2088
+ return false;
2089
+ }
2090
+ }
2091
+ else if (preAgent && preAgent.taskId && preAgent.taskId !== taskId) {
2092
+ console.warn(`[AgentManager] startSession[${phase}]: pre-create agent=${agentId} already bound to ` +
2093
+ `${preAgent.taskId} (request ${taskId}); aborting`);
2094
+ return false;
2095
+ }
2096
+ const dialogFailFromStatuses = opts.dialogFailFromStatuses ?? PHASE_EXPECTED_STATUS[phase] ?? [...ACTIVE_TASK_STATUSES];
2097
+ let ensure;
2098
+ try {
2099
+ ensure = await this.ensureSession(agentId, 'runtime');
2100
+ }
2101
+ catch (err) {
2102
+ if (await this.handleDialogPendingFromRuntime(agentId, err, { expectedFromStatuses: dialogFailFromStatuses })) {
2103
+ throw err;
2104
+ }
2105
+ if (err instanceof EnsureSessionError && err.partial.createdSession) {
2106
+ try {
2107
+ const runner = this.createRunnerFor(agent);
2108
+ await new TmuxManager(runner).killSession(agentId);
2109
+ }
2110
+ catch (cleanupErr) {
2111
+ console.warn(`[AgentManager] startSession ensureSession rollback killSession failed:`, cleanupErr);
2112
+ }
2113
+ }
2114
+ throw err;
2115
+ }
2116
+ const { paneId, workdir } = ensure;
2117
+ const runner = this.createRunnerFor(agent);
2118
+ const worktree = new WorktreeManager(runner);
2119
+ const tmux = new TmuxManager(runner);
2120
+ const baseRef = agent.workdir
2121
+ ? undefined
2122
+ : await this.resolveAutoBaseRef(runner, workdir);
2123
+ const worktreePath = phase === 'review' || phase === 'recheck' || phase === 'spec-review'
2124
+ ? await worktree.createDetached(workdir, taskId, task.branch)
2125
+ : await worktree.create(workdir, taskId, baseRef);
2126
+ // Persist worktreePath now so a crash before set-running leaves a recoverable trail.
2127
+ await this.agentStore.update(agentId, (stateNow) => {
2128
+ if (!stateNow || stateNow.taskId !== taskId)
2129
+ return AGENT_STORE_NOOP;
2130
+ return {
2131
+ ...stateNow,
2132
+ paneId,
2133
+ worktreePath,
2134
+ repoPath: workdir,
2135
+ updatedAt: new Date().toISOString(),
2136
+ };
2137
+ });
2138
+ // Caller-transmitted token/round take precedence — task fields are stale during dispatch.
2139
+ const promptSpecMarkerToken = opts.specMarkerToken ?? task.specMarkerToken;
2140
+ const promptSpecRound = opts.currentSpecRound ?? task.specReviewRound;
2141
+ const beforeInjectAgent = await this.agentStore.get(agentId);
2142
+ // freshRuntime=true 覆盖两种场景:(a) buildFreshSession 全新 tmux session;
2143
+ // (b) adoptOrRestartSession 的 shell 重启 / trust-dialog 答完——pane 仍在但 REPL
2144
+ // 是新进程。两种情况下旧上下文都没了,必须重置 dedup baseline,决不能因为
2145
+ // paneId 字符串恰好相同就沿用旧 skill 集。
2146
+ const reuseInjectedSkills = ensure.freshRuntime
2147
+ ? null
2148
+ : reuseSkillsIfContextValid(beforeInjectAgent, taskId, paneId);
2149
+ let prompt;
2150
+ try {
2151
+ prompt = buildPromptInline({
2152
+ task,
2153
+ phase,
2154
+ agent,
2155
+ worktreePath,
2156
+ skillRegistry: this.skillRegistry,
2157
+ ...(promptSpecMarkerToken ? { specMarkerToken: promptSpecMarkerToken } : {}),
2158
+ ...(promptSpecRound !== undefined ? { currentSpecRound: promptSpecRound } : {}),
2159
+ ...(opts.specFindings ? { specFindings: opts.specFindings } : {}),
2160
+ ...(reuseInjectedSkills ? { excludeSkills: reuseInjectedSkills } : {}),
2161
+ });
2162
+ }
2163
+ catch (err) {
2164
+ try {
2165
+ await worktree.remove(workdir, worktreePath);
2166
+ }
2167
+ catch { }
2168
+ // Terminal — rolling back to pending would loop on the same misconfiguration.
2169
+ if (err instanceof PromptSizeError) {
2170
+ throw new DispatchTerminalError('prompt_too_large', err.message);
2171
+ }
2172
+ if (err instanceof RequiredSkillsMissingError) {
2173
+ throw new DispatchTerminalError('required_skills_missing', err.message);
2174
+ }
2175
+ throw err;
2176
+ }
2177
+ // Last cancellable boundary before paste.
2178
+ const taskFresh = await this.taskStore.get(taskId);
2179
+ if (!taskFresh) {
2180
+ console.warn(`[AgentManager] startSession: task ${taskId} disappeared mid-dispatch; cleaning up worktree before paste`);
2181
+ try {
2182
+ await worktree.remove(workdir, worktreePath);
2183
+ }
2184
+ catch { }
2185
+ return false;
2186
+ }
2187
+ if (TERMINAL_STATUSES.includes(taskFresh.status)) {
2188
+ console.warn(`[AgentManager] startSession: task ${taskId} status=${taskFresh.status} is terminal ` +
2189
+ `for phase=${phase}; cleaning up worktree before paste`);
2190
+ try {
2191
+ await worktree.remove(workdir, worktreePath);
2192
+ }
2193
+ catch { }
2194
+ return false;
2195
+ }
2196
+ if (!opts.bypassTaskStatusGate && !expectedStatuses.includes(taskFresh.status)) {
2197
+ console.warn(`[AgentManager] startSession: task ${taskId} status=${taskFresh.status} not in ` +
2198
+ `expected ${expectedStatuses.join('/')} for phase=${phase}; cleaning up worktree before paste`);
2199
+ try {
2200
+ await worktree.remove(workdir, worktreePath);
2201
+ }
2202
+ catch { }
2203
+ return false;
2204
+ }
2205
+ const agentFresh = await this.agentStore.get(agentId);
2206
+ if (PHASE_REQUIRES_AGENT_BOUND_TO_TASK[phase]) {
2207
+ if (!agentFresh || agentFresh.taskId !== taskId) {
2208
+ console.warn(`[AgentManager] startSession[${phase}]: agent ${agentId} not bound to ${taskId} ` +
2209
+ `(got taskId=${agentFresh?.taskId}); cleaning up worktree before paste`);
2210
+ try {
2211
+ await worktree.remove(workdir, worktreePath);
2212
+ }
2213
+ catch { }
2214
+ return false;
2215
+ }
2216
+ }
2217
+ else if (agentFresh && agentFresh.taskId && agentFresh.taskId !== taskId) {
2218
+ console.warn(`[AgentManager] startSession[${phase}]: agent ${agentId} reassigned to ${agentFresh.taskId} ` +
2219
+ `(was ${taskId}); cleaning up worktree before paste`);
2220
+ try {
2221
+ await worktree.remove(workdir, worktreePath);
2222
+ }
2223
+ catch { }
2224
+ return false;
2225
+ }
2226
+ const now = new Date().toISOString();
2227
+ let agentMarkedRunning = false;
2228
+ try {
2229
+ await this.agentStore.update(agentId, (existing) => ({
2230
+ id: agentId,
2231
+ projectId: agent.projectId,
2232
+ paneId,
2233
+ taskId,
2234
+ worktreePath,
2235
+ repoPath: workdir,
2236
+ startedAt: now,
2237
+ updatedAt: now,
2238
+ ...(existing?.creationToken !== undefined ? { creationToken: existing.creationToken } : {}),
2239
+ ...(reuseInjectedSkills
2240
+ ? { injectedSkills: { taskId, paneId, skills: reuseInjectedSkills } }
2241
+ : {}),
2242
+ }));
2243
+ agentMarkedRunning = true;
2244
+ const ackResult = await this.injectAndAwaitAck(tmux, paneId, prompt, agentId, agent.runtime);
2245
+ // ack-timeout 路径下 REPL 未确认接收,prompt 可能仍在排队——把 phase skills 记成 resident
2246
+ // 等于让下一轮 dedup 在事实尚未确认时短路,operator 复盘后重新 dispatch 会发空 <skills/>,
2247
+ // 必须只在 acked 通路上落盘。
2248
+ if (ackResult.acked) {
2249
+ await this.persistInjectedSkills(agentId, taskId, paneId, agent.role, phase, reuseInjectedSkills);
2250
+ }
2251
+ await this.eventBus.emit({
2252
+ id: '',
2253
+ type: 'session.started',
2254
+ timestamp: now,
2255
+ projectId: agent.projectId,
2256
+ agentId,
2257
+ taskId,
2258
+ data: { phase, worktreePath },
2259
+ });
2260
+ this.startRuntimeMenuWatch(agentId);
2261
+ return true;
2262
+ }
2263
+ catch (err) {
2264
+ // ack_unknown 表示 sendEnter 已发,prompt 可能正在 REPL 中执行。
2265
+ // 清绑定/lock/worktree 会让下一任务复用仍在跑旧 prompt 的 pane——保留所有 state,
2266
+ // 由上游 failTaskForDispatchError → markAwaitingHuman 接手等人。
2267
+ const isAckUnknown = err instanceof DispatchTerminalError && err.reason === 'ack_unknown';
2268
+ if (!isAckUnknown) {
2269
+ try {
2270
+ await worktree.remove(workdir, worktreePath);
2271
+ }
2272
+ catch { }
2273
+ if (agentMarkedRunning) {
2274
+ try {
2275
+ let released = false;
2276
+ await this.agentStore.update(agentId, (agentNow) => {
2277
+ if (!agentNow || agentNow.taskId !== taskId) {
2278
+ console.warn(`[AgentManager] startSession cleanup agentStore: agent ${agentId} already reassigned ` +
2279
+ `(taskId=${agentNow?.taskId}, expected ${taskId}); skipping`);
2280
+ return AGENT_STORE_NOOP;
2281
+ }
2282
+ released = true;
2283
+ void err;
2284
+ return {
2285
+ id: agentId,
2286
+ projectId: agent.projectId,
2287
+ paneId,
2288
+ repoPath: workdir,
2289
+ updatedAt: new Date().toISOString(),
2290
+ ...(agentNow.creationToken !== undefined ? { creationToken: agentNow.creationToken } : {}),
2291
+ };
2292
+ });
2293
+ if (released) {
2294
+ await this.lockManager.release(agentId);
2295
+ }
2296
+ }
2297
+ catch (cleanupErr) {
2298
+ console.warn(`[AgentManager] startSession cleanup agentStore failed:`, cleanupErr);
2299
+ }
2300
+ }
2301
+ }
2302
+ throw err;
2303
+ }
2304
+ }
2305
+ async injectAndAwaitAck(tmux, paneId, prompt, agentId, _runtime) {
2306
+ await tmux.injectPrompt(paneId, prompt, agentId);
2307
+ const baseline = await tmux.capturePaneSnapshot(paneId);
2308
+ await tmux.sendEnter(paneId);
2309
+ try {
2310
+ await tmux.waitAck(paneId, baseline, { timeoutMs: this.dispatchAckTimeoutMs });
2311
+ return { acked: true };
2312
+ }
2313
+ catch (err) {
2314
+ const message = err instanceof Error ? err.message : String(err);
2315
+ // 仅 ack 超时走 intervention(REPL 排队 OK 等人查看);其他错误(capturePaneSnapshot
2316
+ // 等基础设施失败)当 dispatch 终态错误抛出,由上游标 task failed。
2317
+ if (!(err instanceof Error && /runtime ack timeout/.test(err.message))) {
2318
+ throw new DispatchTerminalError('ack_unknown', `ack_unknown for pane ${paneId}: ${message}`);
2319
+ }
2320
+ console.warn(`[AgentManager] dispatch ack timeout for pane ${paneId} agent ${agentId}: ${message}`);
2321
+ const state = await this.agentStore.get(agentId).catch(() => null);
2322
+ await this.safeEmit({
2323
+ id: '',
2324
+ type: 'human.intervention',
2325
+ timestamp: new Date().toISOString(),
2326
+ projectId: state?.projectId ?? '',
2327
+ agentId,
2328
+ ...(state?.taskId ? { taskId: state.taskId } : {}),
2329
+ data: {
2330
+ phase: 'dispatch-ack-timeout',
2331
+ paneId,
2332
+ message,
2333
+ note: 'REPL did not acknowledge the pasted prompt within timeout. ' +
2334
+ 'baxian intentionally did NOT send C-c — the input may still be queued. ' +
2335
+ 'Attach via web terminal to verify and resolve.',
2336
+ },
2337
+ });
2338
+ return { acked: false };
2339
+ }
2340
+ }
2341
+ // Snapshot which skills are now resident in the REPL's context, union of
2342
+ // the pre-dispatch baseline (when context was still valid) and the phase's
2343
+ // declared skills. Guarded by (taskId, paneId) so a concurrent rebind never
2344
+ // overwrites a freshly-bound agent.
2345
+ async persistInjectedSkills(agentId, taskId, paneId, role, phase, reuseInjectedSkills) {
2346
+ const phaseSkills = this.skillRegistry.skillsForPhase(role, phase);
2347
+ const baseList = reuseInjectedSkills ?? [];
2348
+ // 已有有效 context 记录,且本 phase 没有引入新 skill → 写盘无信息增益,short-circuit。
2349
+ // reuseInjectedSkills === null 时缺基线记录,仍需建一份初始档。
2350
+ if (reuseInjectedSkills !== null && phaseSkills.every(s => baseList.includes(s)))
2351
+ return;
2352
+ const merged = Array.from(new Set([...baseList, ...phaseSkills]));
2353
+ const now = new Date().toISOString();
2354
+ await this.agentStore.update(agentId, (latest) => {
2355
+ if (!latest || latest.taskId !== taskId || latest.paneId !== paneId)
2356
+ return AGENT_STORE_NOOP;
2357
+ return {
2358
+ ...latest,
2359
+ injectedSkills: { taskId, paneId, skills: merged },
2360
+ updatedAt: now,
2361
+ };
2362
+ });
2363
+ }
2364
+ // Ready gate prevents mid-paste webhook from flipping to 'waiting' on a busy REPL.
2365
+ async markAgentWaiting(agentId, expectedTaskId, opts = {}) {
2366
+ return this.releaseAgentForTask(agentId, expectedTaskId, 'waiting', opts);
2367
+ }
2368
+ async continueSession(taskId, agentId, phase, opts = {}) {
2369
+ const agent = this.getAgentConfig(agentId);
2370
+ if (!agent)
2371
+ throw new Error(`Unknown agent: ${agentId}`);
2372
+ const task = await this.taskStore.get(taskId);
2373
+ if (!task)
2374
+ throw new Error(`Unknown task: ${taskId}`);
2375
+ const postApproveToken = opts.postApproveToken
2376
+ ?? (phase === 'post_approve'
2377
+ ? (await this.getPostApproveCompletion(taskId))?.token
2378
+ : undefined);
2379
+ if (phase === 'post_approve') {
2380
+ const completion = await this.getPostApproveCompletion(taskId);
2381
+ if (!completion || !postApproveToken || completion.token !== postApproveToken) {
2382
+ console.warn(`[AgentManager] continueSession[post_approve]: token missing or stale for task ${taskId}; skipping`);
2383
+ return false;
2384
+ }
2385
+ }
2386
+ const agentState = await this.agentStore.get(agentId);
2387
+ if (!agentState)
2388
+ throw new Error(`No agent state found for: ${agentId}`);
2389
+ if (PHASE_REQUIRES_AGENT_BOUND_TO_TASK[phase] && agentState.taskId !== taskId) {
2390
+ console.warn(`[AgentManager] continueSession[${phase}]: agent ${agentId} not bound to ${taskId} ` +
2391
+ `(pre-paste taskId=${agentState.taskId}); skipping`);
2392
+ return false;
2393
+ }
2394
+ if (!PHASE_REQUIRES_AGENT_BOUND_TO_TASK[phase]
2395
+ && agentState.taskId
2396
+ && agentState.taskId !== taskId) {
2397
+ console.warn(`[AgentManager] continueSession[${phase}]: agent ${agentId} reassigned ` +
2398
+ `(pre-paste taskId=${agentState.taskId} !== ${taskId}); skipping`);
2399
+ return false;
2400
+ }
2401
+ const worktreePath = agentState.worktreePath;
2402
+ const dialogFailFromStatuses = opts.dialogFailFromStatuses ?? PHASE_EXPECTED_STATUS[phase] ?? [...ACTIVE_TASK_STATUSES];
2403
+ let ensure;
2404
+ try {
2405
+ ensure = await this.ensureSession(agentId, 'runtime');
2406
+ }
2407
+ catch (err) {
2408
+ if (await this.handleDialogPendingFromRuntime(agentId, err, { expectedFromStatuses: dialogFailFromStatuses })) {
2409
+ throw err;
2410
+ }
2411
+ if (err instanceof EnsureSessionError && err.partial.createdSession) {
2412
+ try {
2413
+ const runner = this.createRunnerFor(agent);
2414
+ await new TmuxManager(runner).killSession(agentId);
2415
+ }
2416
+ catch (cleanupErr) {
2417
+ console.warn(`[AgentManager] continueSession ensureSession rollback killSession failed:`, cleanupErr);
2418
+ }
2419
+ }
2420
+ throw err;
2421
+ }
2422
+ const { paneId } = ensure;
2423
+ const runner = this.createRunnerFor(agent);
2424
+ const tmux = new TmuxManager(runner);
2425
+ const promptSpecMarkerToken = opts.specMarkerToken ?? task.specMarkerToken;
2426
+ const promptSpecRound = opts.currentSpecRound ?? task.specReviewRound;
2427
+ // 与 startSession 同步:任何 REPL 启动 / 重启路径(freshRuntime=true)都视为新上下文,
2428
+ // 强制重新注入完整 skill 集——既覆盖 fresh tmux session,也覆盖同 pane 里的 shell 重启
2429
+ // 与 trust-dialog 完成两种 adopt 场景。
2430
+ const reuseInjectedSkills = ensure.freshRuntime
2431
+ ? null
2432
+ : reuseSkillsIfContextValid(agentState, taskId, paneId);
2433
+ let prompt;
2434
+ try {
2435
+ prompt = buildPromptInline({
2436
+ task,
2437
+ phase,
2438
+ agent,
2439
+ worktreePath,
2440
+ skillRegistry: this.skillRegistry,
2441
+ ...(postApproveToken ? { postApproveToken } : {}),
2442
+ ...(promptSpecMarkerToken ? { specMarkerToken: promptSpecMarkerToken } : {}),
2443
+ ...(promptSpecRound !== undefined ? { currentSpecRound: promptSpecRound } : {}),
2444
+ ...(opts.specFindings ? { specFindings: opts.specFindings } : {}),
2445
+ ...(reuseInjectedSkills ? { excludeSkills: reuseInjectedSkills } : {}),
2446
+ });
2447
+ }
2448
+ catch (err) {
2449
+ if (err instanceof PromptSizeError) {
2450
+ throw new DispatchTerminalError('prompt_too_large', err.message);
2451
+ }
2452
+ if (err instanceof RequiredSkillsMissingError) {
2453
+ throw new DispatchTerminalError('required_skills_missing', err.message);
2454
+ }
2455
+ throw err;
2456
+ }
2457
+ // Final state re-check before the irreversible paste — guards against IO-window races.
2458
+ const expectedStatuses = PHASE_EXPECTED_STATUS[phase] ?? [];
2459
+ const taskFresh = await this.taskStore.get(taskId);
2460
+ if (!taskFresh || TERMINAL_STATUSES.includes(taskFresh.status)) {
2461
+ console.warn(`[AgentManager] continueSession: task ${taskId} status=${taskFresh?.status} terminal/missing; skipping paste`);
2462
+ return false;
2463
+ }
2464
+ if (!opts.bypassTaskStatusGate && !expectedStatuses.includes(taskFresh.status)) {
2465
+ console.warn(`[AgentManager] continueSession: task ${taskId} status=${taskFresh.status} not in ` +
2466
+ `expected ${expectedStatuses.join('/')} for phase=${phase}; skipping paste`);
2467
+ return false;
2468
+ }
2469
+ const agentFresh = await this.agentStore.get(agentId);
2470
+ if (PHASE_REQUIRES_AGENT_BOUND_TO_TASK[phase]) {
2471
+ if (!agentFresh || agentFresh.taskId !== taskId) {
2472
+ console.warn(`[AgentManager] continueSession[${phase}]: agent ${agentId} not bound to ${taskId} ` +
2473
+ `(got ${agentFresh?.taskId}); skipping`);
2474
+ return false;
2475
+ }
2476
+ }
2477
+ else if (agentFresh && agentFresh.taskId && agentFresh.taskId !== taskId) {
2478
+ console.warn(`[AgentManager] continueSession[${phase}]: agent ${agentId} reassigned ` +
2479
+ `(taskId=${agentFresh.taskId} !== ${taskId}); skipping`);
2480
+ return false;
2481
+ }
2482
+ if (phase === 'post_approve') {
2483
+ const completionFresh = await this.getPostApproveCompletion(taskId);
2484
+ if (!completionFresh || completionFresh.token !== postApproveToken) {
2485
+ console.warn(`[AgentManager] continueSession[post_approve]: token changed before paste for task ${taskId}; skipping`);
2486
+ return false;
2487
+ }
2488
+ }
2489
+ const now = new Date().toISOString();
2490
+ await this.agentStore.update(agentId, (latest) => {
2491
+ if (!latest)
2492
+ return AGENT_STORE_NOOP;
2493
+ return {
2494
+ ...latest,
2495
+ paneId,
2496
+ worktreePath,
2497
+ updatedAt: now,
2498
+ ...(reuseInjectedSkills
2499
+ ? { injectedSkills: { taskId, paneId, skills: reuseInjectedSkills } }
2500
+ : { injectedSkills: undefined }),
2501
+ };
2502
+ });
2503
+ const ackResult = await this.injectAndAwaitAck(tmux, paneId, prompt, agentId, agent.runtime);
2504
+ if (ackResult.acked) {
2505
+ await this.persistInjectedSkills(agentId, taskId, paneId, agent.role, phase, reuseInjectedSkills);
2506
+ }
2507
+ return true;
2508
+ }
2509
+ async recover() {
2510
+ const states = await this.agentStore.list();
2511
+ const deferredCleanups = [];
2512
+ for (const state of states) {
2513
+ const agentConfig = this.getAgentConfig(state.id);
2514
+ if (!agentConfig)
2515
+ continue;
2516
+ try {
2517
+ const result = await this.ensureSession(state.id, 'recover');
2518
+ // recover 成功 = server 重启前 dialog_pending 的 agent 现在 REPL ready。
2519
+ // 处理 Held:与 resumeAgent 共用 shouldReleaseHeldBinding 规则(task terminal/无 task /
2520
+ // turn-completed phase → 同步清 binding;task active 且 phase 不在 completed 集合 → 保留 binding)。
2521
+ const boundTask = state.taskId ? await this.taskStore.get(state.taskId) : null;
2522
+ const shouldReleaseBinding = shouldReleaseHeldBinding(state, boundTask);
2523
+ // 释放 binding 时同步清 worktree(与 resumeAgent 一致)——否则跨重启恢复后
2524
+ // worktreePath 在下面 update 中被丢弃,磁盘上的 worktree 永远无人回收。
2525
+ if (shouldReleaseBinding && state.worktreePath) {
2526
+ const cleanupDir = this.resolveWorkdir(agentConfig, state);
2527
+ if (cleanupDir) {
2528
+ const runner = this.createRunnerFor(agentConfig);
2529
+ const worktree = new WorktreeManager(runner);
2530
+ try {
2531
+ await worktree.remove(cleanupDir, state.worktreePath);
2532
+ }
2533
+ catch (worktreeErr) {
2534
+ console.warn(`[recover] worktree.remove failed for ${state.worktreePath}:`, worktreeErr);
2535
+ }
2536
+ }
2537
+ }
2538
+ // 所有 awaiting_human + non-releasable binding 都保留 Held。包括 agent_dialog_pending +
2539
+ // active task 这个 crash window 场景:handleDialogPendingFromRuntime 已写 awaiting_human
2540
+ // 但 transitionTaskStatus 之前 crash 重启 → task 仍 active;recover 切到 ok 会丢失 Resume
2541
+ // 入口、binding 仍指向 active task → 新 dispatch 撞 stale binding。
2542
+ // 注意:agent_dialog_pending + 无 taskId(最 common 的 dialog_pending) → shouldReleaseBinding=true
2543
+ // → preserveHeld=false → 走 release path 清 Held(recover 视为 dialog dismissed 的正常出口)。
2544
+ const preserveHeld = !shouldReleaseBinding
2545
+ && state.status === 'awaiting_human';
2546
+ await this.agentStore.update(state.id, (latest) => {
2547
+ if (!latest)
2548
+ return AGENT_STORE_NOOP;
2549
+ const base = {
2550
+ id: latest.id,
2551
+ projectId: latest.projectId,
2552
+ paneId: result.paneId,
2553
+ updatedAt: new Date().toISOString(),
2554
+ ...(latest.repoPath !== undefined ? { repoPath: latest.repoPath } : {}),
2555
+ status: 'ok',
2556
+ };
2557
+ if (shouldReleaseBinding) {
2558
+ return base;
2559
+ }
2560
+ const withBinding = {
2561
+ ...base,
2562
+ ...(latest.taskId !== undefined ? { taskId: latest.taskId } : {}),
2563
+ ...(latest.worktreePath !== undefined ? { worktreePath: latest.worktreePath } : {}),
2564
+ ...(latest.startedAt !== undefined ? { startedAt: latest.startedAt } : {}),
2565
+ };
2566
+ if (!preserveHeld)
2567
+ return withBinding;
2568
+ // 保留 awaiting_human 整套字段:operator 仍需干预(Resume / cancel task / DELETE agent)。
2569
+ return {
2570
+ ...withBinding,
2571
+ status: 'awaiting_human',
2572
+ ...(latest.awaitingPhase !== undefined ? { awaitingPhase: latest.awaitingPhase } : {}),
2573
+ ...(latest.awaitingReason !== undefined ? { awaitingReason: latest.awaitingReason } : {}),
2574
+ ...(latest.awaitingSince !== undefined ? { awaitingSince: latest.awaitingSince } : {}),
2575
+ };
2576
+ });
2577
+ if (shouldReleaseBinding) {
2578
+ await this.lockManager.release(state.id);
2579
+ }
2580
+ if (state.taskId && !shouldReleaseBinding) {
2581
+ this.startRuntimeMenuWatch(state.id);
2582
+ }
2583
+ }
2584
+ catch (err) {
2585
+ if (err instanceof EnsureSessionError && err.partial.dialogPending) {
2586
+ await this.markDialogPending(state.id, state.creationToken);
2587
+ // runtime path (creationToken=undefined) 时传 paneId/taskId snapshot 作 generation guard,
2588
+ // 否则 generationMismatch 看 expectedTaskId 默认 undefined 与 state.taskId 不匹配会立即退出。
2589
+ void this.slowPollDialogPending(state.id, state.creationToken, {
2590
+ ...(state.paneId !== undefined ? { expectedPaneId: state.paneId } : {}),
2591
+ expectedTaskId: state.taskId,
2592
+ }).catch((pollErr) => {
2593
+ console.warn(`[recover] slowPoll for ${state.id} crashed:`, pollErr);
2594
+ });
2595
+ continue;
2596
+ }
2597
+ if (err instanceof EnsureSessionError && err.partial.createdSession) {
2598
+ try {
2599
+ const runner = this.createRunnerFor(agentConfig);
2600
+ await new TmuxManager(runner).killSession(state.id);
2601
+ }
2602
+ catch (cleanupErr) {
2603
+ console.warn(`[recover] killSession rollback failed for agent=${state.id}:`, cleanupErr);
2604
+ }
2605
+ }
2606
+ const message = err instanceof Error ? err.message : String(err);
2607
+ console.warn(`[recover] ensureSession failed for agent=${state.id}: ${message}`);
2608
+ await this.agentStore.update(state.id, (latest) => {
2609
+ if (!latest)
2610
+ return AGENT_STORE_NOOP;
2611
+ return {
2612
+ ...latest,
2613
+ paneId: undefined,
2614
+ creationToken: undefined,
2615
+ updatedAt: new Date().toISOString(),
2616
+ };
2617
+ });
2618
+ await this.lockManager.release(state.id);
2619
+ const cleanup = await this.failTasksForAgent(state.id, `recovery: ${message}`, { deferPartnerCleanup: true });
2620
+ deferredCleanups.push({
2621
+ failingAgentId: state.id,
2622
+ failedTaskIds: cleanup.failedTaskIds,
2623
+ projectIds: cleanup.projectIds,
2624
+ });
2625
+ await this.recordError({
2626
+ agentId: state.id,
2627
+ projectId: state.projectId,
2628
+ ...(state.taskId ? { taskId: state.taskId } : {}),
2629
+ operation: 'recovery',
2630
+ reason: 'RECOVERY_ENSURE_SESSION_FAILED',
2631
+ message,
2632
+ observation: { phase: 'recovery-failed' },
2633
+ recommendation: 'Inspect or recreate the tmux session, then retry the affected task.',
2634
+ });
2635
+ await this.safeEmit({
2636
+ id: '',
2637
+ type: 'human.intervention',
2638
+ timestamp: new Date().toISOString(),
2639
+ projectId: state.projectId,
2640
+ agentId: state.id,
2641
+ taskId: state.taskId ?? '',
2642
+ data: { phase: 'recovery-failed', error: message },
2643
+ });
2644
+ }
2645
+ }
2646
+ for (const c of deferredCleanups) {
2647
+ await this.releasePartnersAndDrain(c.failingAgentId, c.failedTaskIds, c.projectIds);
2648
+ }
2649
+ }
2650
+ async reconcileFailedAgent(agentId) {
2651
+ const reconciled = await this.withTaskLock(async () => {
2652
+ let projectId = '';
2653
+ let timestamp = '';
2654
+ let hadBinding = false;
2655
+ let changed = false;
2656
+ let taskId;
2657
+ await this.agentStore.update(agentId, (existing) => {
2658
+ if (!existing)
2659
+ return AGENT_STORE_NOOP;
2660
+ if (existing.creationToken)
2661
+ return AGENT_STORE_NOOP;
2662
+ timestamp = new Date().toISOString();
2663
+ projectId = existing.projectId;
2664
+ hadBinding = !!existing.taskId;
2665
+ taskId = existing.taskId;
2666
+ if (!existing.taskId
2667
+ && !existing.worktreePath
2668
+ && !existing.startedAt
2669
+ && !existing.paneId
2670
+ && !existing.creationToken) {
2671
+ return AGENT_STORE_NOOP;
2672
+ }
2673
+ changed = true;
2674
+ return {
2675
+ id: existing.id,
2676
+ projectId: existing.projectId,
2677
+ ...(existing.repoPath !== undefined ? { repoPath: existing.repoPath } : {}),
2678
+ updatedAt: timestamp,
2679
+ };
2680
+ });
2681
+ if (!projectId || !changed)
2682
+ return null;
2683
+ await this.lockManager.release(agentId);
2684
+ return { projectId, timestamp, hadBinding, taskId };
2685
+ });
2686
+ if (!reconciled)
2687
+ return false;
2688
+ if (reconciled.hadBinding) {
2689
+ await this.failTasksForAgent(agentId, 'tmux-probe=absent');
2690
+ }
2691
+ await this.recordError({
2692
+ agentId,
2693
+ projectId: reconciled.projectId,
2694
+ ...(reconciled.taskId ? { taskId: reconciled.taskId } : {}),
2695
+ operation: 'recovery',
2696
+ reason: 'TMUX_SESSION_ABSENT',
2697
+ message: 'tmux probe reported the agent session absent',
2698
+ occurredAt: reconciled.timestamp,
2699
+ observation: { tmuxSessionStatus: 'absent' },
2700
+ recommendation: 'Restart the agent runtime before assigning more work.',
2701
+ });
2702
+ await this.safeEmit({
2703
+ id: '',
2704
+ type: 'agent.recovered',
2705
+ timestamp: reconciled.timestamp,
2706
+ projectId: reconciled.projectId,
2707
+ agentId,
2708
+ data: { reason: 'tmux-probe=absent' },
2709
+ });
2710
+ return true;
2711
+ }
2712
+ async cancelTask(taskId) {
2713
+ let devToRelease;
2714
+ let qaToRelease;
2715
+ let projectIdToDrain;
2716
+ this.specReviewMarkerWatcher?.stop(taskId);
2717
+ const result = await this.withTaskLock(async () => {
2718
+ const task = await this.taskStore.get(taskId);
2719
+ if (!task)
2720
+ throw new ApiError(404, 'Task not found');
2721
+ if (TERMINAL_STATUSES.includes(task.status))
2722
+ return task;
2723
+ if (task.agentId)
2724
+ devToRelease = task.agentId;
2725
+ if (task.qaAgentId)
2726
+ qaToRelease = task.qaAgentId;
2727
+ const now = new Date().toISOString();
2728
+ task.status = 'cancelled';
2729
+ task.updatedAt = now;
2730
+ await this.taskStore.set(task);
2731
+ await this.safeEmit({
2732
+ id: '',
2733
+ type: 'task.updated',
2734
+ timestamp: now,
2735
+ projectId: task.projectId,
2736
+ taskId,
2737
+ data: { status: 'cancelled' },
2738
+ });
2739
+ projectIdToDrain = task.projectId;
2740
+ return task;
2741
+ });
2742
+ // 唯一允许打断 agent 会话的入口(用户主动 Cancel)。
2743
+ for (const id of [devToRelease, qaToRelease]) {
2744
+ if (!id)
2745
+ continue;
2746
+ const cfg = this.getAgentConfig(id);
2747
+ const state = await this.agentStore.get(id);
2748
+ if (!cfg || !state)
2749
+ continue;
2750
+ // 重校验绑定:lock 释放后另一路 release+acquire 可能已把 agent 绑给新任务,
2751
+ // 此时不能 C-c 打到新会话上、也不能继续 idle release。
2752
+ if (state.taskId !== taskId) {
2753
+ console.warn(`[AgentManager] cancelTask: ${id} no longer bound to ${taskId} (got ${state.taskId}); skipping`);
2754
+ continue;
2755
+ }
2756
+ const ok = await this.interruptPaneAndWaitReady(state, cfg);
2757
+ if (!ok) {
2758
+ await this.markAwaitingHuman(id, 'cancel-interrupt-failed', 'Task marked cancelled but C-c / REPL ready check failed; agent may still be running. Attach via web terminal to verify, then Resume or Delete.');
2759
+ continue;
2760
+ }
2761
+ try {
2762
+ // allowAwaitingHuman: cancelTask 是显式回收入口,agent 之前可能因 ack_unknown 等被标 Held,
2763
+ // 用户主动 Cancel 应允许跨过 awaiting_human gate 清理 binding;release 默认 gate 是为了
2764
+ // 拦住非显式路径(如 generic catch)的意外清理,cancelTask 不属于那类。
2765
+ await this.releaseAgentForTask(id, taskId, 'idle', { allowAwaitingHuman: true });
2766
+ }
2767
+ catch (err) {
2768
+ console.error(`[AgentManager] cancelTask releaseAgentForTask(${id}) failed:`, err);
2769
+ }
2770
+ }
2771
+ if (projectIdToDrain) {
2772
+ try {
2773
+ await this.drainQueue(projectIdToDrain);
2774
+ }
2775
+ catch (err) {
2776
+ console.error(`[AgentManager] drainQueue failed for ${projectIdToDrain}:`, err);
2777
+ }
2778
+ }
2779
+ return result;
2780
+ }
2781
+ async validateTaskDispatch(projectId, input) {
2782
+ const agentState = await this.agentStore.get(input.preferredAgentId);
2783
+ if (agentState?.status === 'awaiting_human') {
2784
+ throw new ApiError(409, `agent "${input.preferredAgentId}" is awaiting human (${agentState.awaitingPhase ?? 'unknown phase'}); resume or delete before dispatching new tasks`);
2785
+ }
2786
+ if (agentState?.creationToken || agentState?.taskId) {
2787
+ const reason = agentState.creationToken ? 'being created' : `bound to task ${agentState.taskId}`;
2788
+ throw new ApiError(409, `agent "${input.preferredAgentId}" is ${reason}; please retry later`);
2789
+ }
2790
+ let previewBytes;
2791
+ try {
2792
+ previewBytes = this.previewPromptBytesForTaskInput(projectId, input);
2793
+ }
2794
+ catch (err) {
2795
+ // RequiredSkillsMissingError is a server config / sync failure, not a
2796
+ // bad client request — bubble as 500 so operators retry registry repair,
2797
+ // not callers retrying the same request.
2798
+ if (err instanceof RequiredSkillsMissingError) {
2799
+ throw new ApiError(500, err.message);
2800
+ }
2801
+ throw new ApiError(400, err instanceof Error ? err.message : String(err));
2802
+ }
2803
+ if (previewBytes > MAX_PROMPT_BYTES_ROUTE_LIMIT) {
2804
+ throw new ApiError(400, `estimated prompt size ${previewBytes} bytes exceeds ${MAX_PROMPT_BYTES_ROUTE_LIMIT} limit; ` +
2805
+ `reduce task description or remove some skills from AGENT_PHASES[develop]`);
2806
+ }
2807
+ }
2808
+ // Force a fresh QA review pass; bumps reviewRound only after startSession succeeds.
2809
+ async dispatchReviewToQa(taskId) {
2810
+ const claim = await this.withTaskLock(async () => {
2811
+ if (this.manualReviewInFlight.has(taskId)) {
2812
+ throw new ApiError(409, `Manual review already in progress for task ${taskId}`);
2813
+ }
2814
+ const task = await this.taskStore.get(taskId);
2815
+ if (!task)
2816
+ throw new ApiError(404, `Task ${taskId} not found`);
2817
+ if (!task.prNumber) {
2818
+ throw new ApiError(400, `Task ${taskId} has no PR yet; cannot dispatch review`);
2819
+ }
2820
+ if (!task.branch) {
2821
+ throw new ApiError(400, `Task ${taskId} has no branch; cannot dispatch review`);
2822
+ }
2823
+ // Stale qaAgentId (deleted + recreated QA) → fall back to current partner.
2824
+ let qaId = task.qaAgentId;
2825
+ if (qaId && !this.getAgentConfig(qaId)) {
2826
+ console.warn(`[dispatchReviewToQa] task ${taskId}.qaAgentId="${qaId}" no longer in config; ` +
2827
+ `falling back to findQaPartner(${task.agentId})`);
2828
+ qaId = undefined;
2829
+ }
2830
+ if (!qaId) {
2831
+ const qa = this.findQaPartner(task.agentId);
2832
+ if (!qa) {
2833
+ throw new ApiError(400, `Dev ${task.agentId} has no QA partner configured; cannot dispatch review`);
2834
+ }
2835
+ qaId = qa.id;
2836
+ }
2837
+ this.manualReviewInFlight.add(taskId);
2838
+ return { qaId, devAgentId: task.agentId, taskStatusAtClaim: task.status };
2839
+ });
2840
+ try {
2841
+ const { qaId, devAgentId, taskStatusAtClaim } = claim;
2842
+ const isTerminal = TERMINAL_STATUSES.includes(taskStatusAtClaim);
2843
+ const qaPhase = taskStatusAtClaim === 'pending' || taskStatusAtClaim === 'in_progress'
2844
+ ? 'review'
2845
+ : 'recheck';
2846
+ const prevQa = await this.agentStore.get(qaId);
2847
+ if (prevQa?.taskId === taskId) {
2848
+ await this.releaseAgentForTask(qaId, taskId, 'idle');
2849
+ }
2850
+ const acquired = await this.acquireAgentForTask(qaId, taskId, qaPhase);
2851
+ if (!acquired) {
2852
+ throw new ApiError(409, `QA agent ${qaId} is busy or unavailable`);
2853
+ }
2854
+ // dev 被 parked 到 waiting (mode='waiting' 仅 bump updatedAt 不发 C-c 不清 binding);旧实现
2855
+ // approved/其他状态走两条不同分支,但 release(waiting) 和 markAgentWaiting 实际都走相同的
2856
+ // releaseAgentForTask(waiting) — 现在统一调 markAgentWaiting,devParked 仅作 QA 失败时
2857
+ // emit dev-parked intervention 的旗标。
2858
+ // .catch→false: 旧 approved 分支已有此模式,markAgentWaiting reject (store/lock IO 异常) 时
2859
+ // 不能直接跳出 try/finally — QA 已 acquire (binding+lock) 必须先 release 清理才能 throw。
2860
+ let devParked = false;
2861
+ if (!isTerminal && devAgentId) {
2862
+ const devOk = await this.markAgentWaiting(devAgentId, taskId)
2863
+ .catch(err => {
2864
+ console.warn(`[dispatchReviewToQa] markAgentWaiting(dev=${devAgentId}) threw:`, err);
2865
+ return false;
2866
+ });
2867
+ if (!devOk) {
2868
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
2869
+ .catch(() => undefined);
2870
+ throw new ApiError(500, `Cannot park dev ${devAgentId} into waiting for manual QA review (task status=${taskStatusAtClaim}); QA released`);
2871
+ }
2872
+ devParked = true;
2873
+ }
2874
+ // Start QA before any task mutation so failures don't leave a half-bumped state.
2875
+ let started = false;
2876
+ try {
2877
+ // bypassTaskStatusGate 路径下先 startSession 后 transition;dialog 抛时 task 仍是
2878
+ // taskStatusAtClaim (manual review 入口可能是 approved/fixing/in_progress/pending/review)。
2879
+ // 默认 PHASE_EXPECTED_STATUS[qaPhase]=['review'] 不匹配 → fail-task skip → QA Held + task active
2880
+ // 死锁。显式传 [taskStatusAtClaim] 让 handleDialogPendingFromRuntime 能正确 fail task。
2881
+ started = await this.startSession(taskId, qaId, qaPhase, {
2882
+ bypassTaskStatusGate: true,
2883
+ dialogFailFromStatuses: [taskStatusAtClaim],
2884
+ });
2885
+ }
2886
+ catch (err) {
2887
+ // ack_unknown: QA prompt 已发,保留绑定让 operator 接管;其他错误正常 release。
2888
+ if (await this.markAwaitingIfAckUnknown(qaId, err, taskId)) {
2889
+ // prompt 已发 → QA 可能仍在跑 review → outcome (review.submitted) 会到来。
2890
+ // 必须先完成"推 task 到 review + 写 qaAgentId + 增 reviewRound",否则
2891
+ // review.submitted handler 看 fromStatus 不匹配会丢弃 outcome。
2892
+ // 严格 fromStatus = [taskStatusAtClaim]:outcome 已被接受导致 task 离开 claim 状态时
2893
+ // (例如 approved → REQUEST_CHANGES 推到 fixing),迟到的 cleanup 跳过 transition + bump,
2894
+ // 避免把已接受的 outcome 回滚到 review。
2895
+ this.stopSpecMarkerWatcher(taskId);
2896
+ const transitioned = await this.transitionTaskStatus(taskId, 'review', { fromStatus: [taskStatusAtClaim] });
2897
+ if (transitioned) {
2898
+ await this.withTaskLock(async () => {
2899
+ const fresh = await this.taskStore.get(taskId);
2900
+ if (!fresh)
2901
+ return;
2902
+ await this.taskStore.set({
2903
+ ...fresh,
2904
+ reviewRound: fresh.reviewRound + 1,
2905
+ qaAgentId: qaId,
2906
+ updatedAt: new Date().toISOString(),
2907
+ });
2908
+ });
2909
+ }
2910
+ }
2911
+ else if (err instanceof EnsureSessionError && err.partial.handled) {
2912
+ // handleDialogPendingFromRuntime 已 Held QA + fail task + release partners;不能再 release
2913
+ // 否则 boundTask terminal 让 shouldReleaseHeldBinding 放行清掉仍卡 dialog 的 pane lock。
2914
+ }
2915
+ else {
2916
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
2917
+ .catch(() => undefined);
2918
+ if (devParked)
2919
+ await this.emitManualReviewDevParkedQaFailedIntervention(devAgentId, taskId);
2920
+ }
2921
+ throw err;
2922
+ }
2923
+ if (!started) {
2924
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
2925
+ .catch(() => undefined);
2926
+ if (devParked)
2927
+ await this.emitManualReviewDevParkedQaFailedIntervention(devAgentId, taskId);
2928
+ throw new ApiError(500, `Failed to start QA review session for ${taskId}`);
2929
+ }
2930
+ this.stopSpecMarkerWatcher(taskId);
2931
+ await this.withTaskLock(async () => {
2932
+ const fresh = await this.taskStore.get(taskId);
2933
+ if (!fresh)
2934
+ return;
2935
+ await this.taskStore.set({
2936
+ ...fresh,
2937
+ reviewRound: fresh.reviewRound + 1,
2938
+ qaAgentId: qaId,
2939
+ updatedAt: new Date().toISOString(),
2940
+ });
2941
+ });
2942
+ await this.transitionTaskStatus(taskId, 'review', { fromStatus: ['pending', 'in_progress', 'review', 'fixing', 'approved'] });
2943
+ const final = await this.taskStore.get(taskId);
2944
+ return final;
2945
+ }
2946
+ finally {
2947
+ this.manualReviewInFlight.delete(taskId);
2948
+ }
2949
+ }
2950
+ async emitManualReviewDevParkedQaFailedIntervention(agentId, expectedTaskId) {
2951
+ if (!agentId)
2952
+ return;
2953
+ const cur = await this.agentStore.get(agentId);
2954
+ if (!cur || cur.taskId !== expectedTaskId)
2955
+ return;
2956
+ await this.safeEmit({
2957
+ id: '',
2958
+ type: 'human.intervention',
2959
+ timestamp: new Date().toISOString(),
2960
+ projectId: cur.projectId,
2961
+ agentId,
2962
+ taskId: expectedTaskId,
2963
+ data: {
2964
+ phase: 'manual-review-dev-parked-qa-failed',
2965
+ note: 'Manual QA dispatch parked the dev agent into waiting; QA then failed to start. ' +
2966
+ 'Dev binding is kept but no new prompt is running — re-dispatch the manual review or cancel the task.',
2967
+ },
2968
+ });
2969
+ }
2970
+ async retryTask(taskId) {
2971
+ const old = await this.withTaskLock(async () => {
2972
+ const t = await this.taskStore.get(taskId);
2973
+ if (!t)
2974
+ throw new ApiError(404, 'Task not found');
2975
+ if (!TERMINAL_STATUSES.includes(t.status)) {
2976
+ throw new ApiError(409, `Task ${taskId} cannot be retried in status "${t.status}"; cancel it first or wait for completion`);
2977
+ }
2978
+ return t;
2979
+ });
2980
+ const input = {
2981
+ title: old.title,
2982
+ description: old.description,
2983
+ preferredAgentId: old.preferredAgentId,
2984
+ };
2985
+ await this.validateTaskDispatch(old.projectId, input);
2986
+ return this.createAndStartTask(old.projectId, input);
2987
+ }
2988
+ async editTask(taskId, patch) {
2989
+ let projectIdToDrain;
2990
+ const result = await this.withTaskLock(async () => {
2991
+ const task = await this.taskStore.get(taskId);
2992
+ if (!task)
2993
+ throw new ApiError(404, 'Task not found');
2994
+ if (task.status !== 'pending') {
2995
+ throw new ApiError(409, `Task not editable in status ${task.status}`);
2996
+ }
2997
+ let preferredChanged = false;
2998
+ if (patch.title !== undefined)
2999
+ task.title = patch.title;
3000
+ if (patch.description !== undefined)
3001
+ task.description = patch.description;
3002
+ if (patch.preferredAgentId !== undefined && patch.preferredAgentId !== task.preferredAgentId) {
3003
+ const cfg = this.getAgentConfig(patch.preferredAgentId);
3004
+ if (!cfg)
3005
+ throw new ApiError(400, `Unknown agent: ${patch.preferredAgentId}`);
3006
+ if (cfg.projectId !== task.projectId) {
3007
+ throw new ApiError(400, `Agent not in project ${task.projectId}`);
3008
+ }
3009
+ if (cfg.role !== 'dev')
3010
+ throw new ApiError(400, `Agent is not dev role`);
3011
+ task.preferredAgentId = patch.preferredAgentId;
3012
+ task.qaAgentId = this.findQaPartner(patch.preferredAgentId)?.id;
3013
+ preferredChanged = true;
3014
+ }
3015
+ task.updatedAt = new Date().toISOString();
3016
+ await this.taskStore.set(task);
3017
+ if (preferredChanged)
3018
+ projectIdToDrain = task.projectId;
3019
+ return task;
3020
+ });
3021
+ if (projectIdToDrain) {
3022
+ try {
3023
+ await this.drainQueue(projectIdToDrain);
3024
+ }
3025
+ catch (err) {
3026
+ console.error(`[AgentManager] drainQueue failed for ${projectIdToDrain}:`, err);
3027
+ }
3028
+ const refreshed = await this.taskStore.get(taskId);
3029
+ if (refreshed)
3030
+ return refreshed;
3031
+ }
3032
+ return result;
3033
+ }
3034
+ async mergePr(taskId, opts = {}) {
3035
+ const task = await this.taskStore.get(taskId);
3036
+ if (!task || !task.prNumber) {
3037
+ throw new Error(`mergePr: no PR number for task ${taskId}`);
3038
+ }
3039
+ const project = this.getProjectConfig(task.projectId);
3040
+ if (!project) {
3041
+ throw new Error(`mergePr: unknown project ${task.projectId}`);
3042
+ }
3043
+ const matchHead = opts.matchHeadSha
3044
+ ? ` --match-head-commit ${shellQuote(opts.matchHeadSha)}`
3045
+ : '';
3046
+ const result = await this.platformRunner.exec(`gh pr merge ${task.prNumber} --repo ${shellQuote(project.repo)}${matchHead} --squash --delete-branch`);
3047
+ if (result.exitCode !== 0) {
3048
+ throw new Error(`gh pr merge failed for PR #${task.prNumber}: ${result.stderr || result.stdout}`);
3049
+ }
3050
+ }
3051
+ async cleanupAfterMerge(taskId) {
3052
+ const task = await this.taskStore.get(taskId);
3053
+ if (!task || !task.agentId)
3054
+ return;
3055
+ const dev = this.getAgentConfig(task.agentId);
3056
+ if (!dev)
3057
+ return;
3058
+ this.specReviewMarkerWatcher?.stop(taskId);
3059
+ // Release BEFORE dispatch: 'idle' release removes the worktree, and the cleanup
3060
+ // must work in the stable post-release state (server-side git ops use the main
3061
+ // repo clone, not the worktree). dispatch reads state again so it survives this.
3062
+ try {
3063
+ await this.releaseAgentForTask(task.agentId, taskId, 'idle');
3064
+ }
3065
+ catch (err) {
3066
+ console.warn(`[AgentManager] cleanupAfterMerge: releaseAgentForTask(${task.agentId}, ${taskId}) failed:`, err);
3067
+ }
3068
+ if (task.prNumber && task.branch) {
3069
+ const ctx = {
3070
+ prNumber: task.prNumber,
3071
+ taskId: task.id,
3072
+ branch: task.branch,
3073
+ };
3074
+ await this.dispatchPostMergeCleanup(task.agentId, ctx).catch(err => console.warn(`[AgentManager] cleanupAfterMerge: dispatchPostMergeCleanup(${task.agentId}) failed:`, err));
3075
+ }
3076
+ }
3077
+ // Server-side branch delete + pane notification + /compact slash command. Server-side
3078
+ // path is required: the caller's releaseAgentForTask('idle') just removed the worktree,
3079
+ // so the agent itself cannot reliably cd + run git from inside the pane.
3080
+ async dispatchPostMergeCleanup(agentId, ctx) {
3081
+ const state = await this.agentStore.get(agentId);
3082
+ if (!state?.paneId)
3083
+ return;
3084
+ // Late-arriving pr.merged guard: APPROVE may have already released QA, drainQueue may have
3085
+ // re-assigned this pane to a new review. Refuse to dispatch into a pane bound to a different task.
3086
+ if (state.taskId && state.taskId !== ctx.taskId)
3087
+ return;
3088
+ const agent = this.getAgentConfig(agentId);
3089
+ if (!agent)
3090
+ return;
3091
+ const paneId = state.paneId;
3092
+ const runner = this.createRunnerFor(agent);
3093
+ const cleanupResult = state.repoPath
3094
+ ? await this.deleteLocalBranchInRepo(runner, state.repoPath, ctx.branch, agentId)
3095
+ : { outcome: 'skipped', detail: 'agent has no repoPath in binding' };
3096
+ // deleteLocalBranchInRepo opens a long await window. drainQueue / createTask can rebind
3097
+ // this pane in the meantime — re-validate before any pane write. The lock makes re-check
3098
+ // + inject mutually exclusive with startSession, so a binding cannot land between them.
3099
+ const tmux = new TmuxManager(runner);
3100
+ const prompt = buildPostMergeCleanupPrompt(ctx, cleanupResult);
3101
+ const acquired = await this.lockManager.acquire(agentId);
3102
+ if (!acquired)
3103
+ return;
3104
+ try {
3105
+ const after = await this.agentStore.get(agentId);
3106
+ if (!after || after.paneId !== paneId)
3107
+ return;
3108
+ if (after.taskId && after.taskId !== ctx.taskId)
3109
+ return;
3110
+ await tmux.injectPrompt(paneId, prompt, agentId);
3111
+ await tmux.sendEnter(paneId);
3112
+ }
3113
+ finally {
3114
+ await this.lockManager.release(agentId);
3115
+ }
3116
+ const runtime = agentRuntimeKindFor(agent);
3117
+ void this.sendCompactWhenIdle(tmux, paneId, agentId, ctx.taskId, runtime).catch(err => console.warn(`[AgentManager] sendCompactWhenIdle(${agentId}) failed:`, err));
3118
+ }
3119
+ // shellQuote prevents injection. Three outcomes the caller must distinguish so the
3120
+ // notification prompt to the agent doesn't lie about a deletion that actually failed:
3121
+ // - deleted: branch ref was removed (or no longer exists at the end of the call).
3122
+ // - absent: branch wasn't there at all (auto-delete-head-branches, never landed locally).
3123
+ // - failed: worktree still occupies the ref, permissions, etc. — agent must NOT be told "cleaned".
3124
+ async deleteLocalBranchInRepo(runner, repoPath, branch, agentId) {
3125
+ // --expire=now: bare `git worktree prune` honors gc.worktreePruneExpire (default 3 months),
3126
+ // so a worktree that the release just removed could still be tracked as occupying the ref.
3127
+ const fetchCmd = `cd ${shellQuote(repoPath)} && git fetch --prune origin && git worktree prune --expire=now`;
3128
+ try {
3129
+ const fetchResult = await runner.exec(fetchCmd, { timeout: this.postMergeFetchTimeoutMs });
3130
+ if (fetchResult.exitCode !== 0) {
3131
+ console.warn(`[AgentManager] deleteLocalBranchInRepo(${agentId}, ${branch}): fetch/prune exit=${fetchResult.exitCode} ` +
3132
+ `stderr=${fetchResult.stderr.trim()}`);
3133
+ }
3134
+ }
3135
+ catch (err) {
3136
+ console.warn(`[AgentManager] deleteLocalBranchInRepo(${agentId}, ${branch}) fetch/prune threw:`, err);
3137
+ }
3138
+ const delCmd = `cd ${shellQuote(repoPath)} && git branch -D ${shellQuote(branch)}`;
3139
+ try {
3140
+ const delResult = await runner.exec(delCmd, { timeout: this.postMergeBranchTimeoutMs });
3141
+ if (delResult.exitCode === 0) {
3142
+ return { outcome: 'deleted', detail: delResult.stdout.trim() };
3143
+ }
3144
+ if (/not found|not a valid|no such branch/i.test(delResult.stderr)) {
3145
+ return { outcome: 'absent', detail: delResult.stderr.trim() };
3146
+ }
3147
+ console.warn(`[AgentManager] deleteLocalBranchInRepo(${agentId}, ${branch}): branch -D exit=${delResult.exitCode} ` +
3148
+ `stderr=${delResult.stderr.trim()}`);
3149
+ return { outcome: 'failed', detail: delResult.stderr.trim() || `exit ${delResult.exitCode}` };
3150
+ }
3151
+ catch (err) {
3152
+ const detail = err instanceof Error ? err.message : String(err);
3153
+ console.warn(`[AgentManager] deleteLocalBranchInRepo(${agentId}, ${branch}) branch -D threw:`, err);
3154
+ return { outcome: 'failed', detail };
3155
+ }
3156
+ }
3157
+ async sendCompactWhenIdle(tmux, paneId, agentId, originalTaskId, runtime) {
3158
+ await this.waitForReplPromptReady(tmux, paneId, runtime, this.compactIdleWaitMs);
3159
+ // Atomic check + send: lockManager.acquire makes "read binding + sendKeys" mutually
3160
+ // exclusive with startSession (which is what drainQueue uses to re-bind this pane).
3161
+ // Failing to acquire means a dispatch is already in flight — let it own the pane.
3162
+ const acquired = await this.lockManager.acquire(agentId);
3163
+ if (!acquired)
3164
+ return;
3165
+ try {
3166
+ const latest = await this.agentStore.get(agentId);
3167
+ if (!latest || latest.paneId !== paneId)
3168
+ return;
3169
+ if (latest.taskId && latest.taskId !== originalTaskId)
3170
+ return;
3171
+ await tmux.sendKeysLiteral(paneId, '/compact');
3172
+ await tmux.sendEnter(paneId);
3173
+ }
3174
+ finally {
3175
+ await this.lockManager.release(agentId);
3176
+ }
3177
+ }
3178
+ // pane_current_command 是 runtime 是否仍活的权威信号(不被 viewport stale frame 骗)。
3179
+ // anchor 在 codex busy 屏(`Working on it…\n esc to interrupt`)不存在,所以 busy 状态只看
3180
+ // procTitle;只有准备返回 idle 时才用 anchor 作双重证据,挡 stale-frame + shell 误报。
3181
+ // 入口先等一拍:上一步刚 sendEnter,给 runtime 时间进入 busy,避免观察到假 idle。
3182
+ async waitForReplPromptReady(tmux, paneId, runtime, timeoutMs) {
3183
+ const deadline = Date.now() + timeoutMs;
3184
+ await tmux.waitReplReady(paneId, runtime, {
3185
+ timeoutMs,
3186
+ intervalMs: this.compactIdlePollMs,
3187
+ });
3188
+ await new Promise(r => setTimeout(r, this.compactIdlePollMs));
3189
+ while (true) {
3190
+ const current = await tmux.displayMessage(paneId, '#{pane_current_command}');
3191
+ if (!hasReplProcTitle(current, runtime)) {
3192
+ throw new Error(`waitForReplPromptReady: pane ${paneId} pane_current_command=${current.trim()} (not runtime, REPL may have exited)`);
3193
+ }
3194
+ const cap = await tmux.capturePaneById(paneId, { ansi: false, scrollback: 0 });
3195
+ if (detectRuntimeMenu(cap) || detectStartupDialog(cap)) {
3196
+ throw new Error(`waitForReplPromptReady: pane ${paneId} shows menu/dialog, not a ready REPL prompt`);
3197
+ }
3198
+ if (!detectReplActiveBusy(cap)) {
3199
+ if (!hasReplReadyAnchor(cap, runtime)) {
3200
+ throw new Error(`waitForReplPromptReady: pane ${paneId} observed idle but no ready anchor (stale frame?)`);
3201
+ }
3202
+ return;
3203
+ }
3204
+ if (Date.now() >= deadline) {
3205
+ throw new Error(`waitForReplPromptReady: pane ${paneId} stayed busy past ${timeoutMs}ms`);
3206
+ }
3207
+ await new Promise(r => setTimeout(r, this.compactIdlePollMs));
3208
+ }
3209
+ }
3210
+ stopSpecMarkerWatcher(taskId) {
3211
+ this.specReviewMarkerWatcher?.stop(taskId);
3212
+ }
3213
+ // Prompt build (via task.specMarkerToken) and watcher must share the same token.
3214
+ async armSpecMarkerWatcher(taskId, agentId, kind, token) {
3215
+ if (!this.specReviewMarkerWatcher)
3216
+ return;
3217
+ const task = await this.taskStore.get(taskId);
3218
+ if (!task)
3219
+ return;
3220
+ try {
3221
+ await this.specReviewMarkerWatcher.start({
3222
+ taskId,
3223
+ projectId: task.projectId,
3224
+ agentId,
3225
+ kind,
3226
+ token,
3227
+ });
3228
+ }
3229
+ catch (err) {
3230
+ console.warn(`[AgentManager] armSpecMarkerWatcher(task=${taskId}, kind=${kind}) failed:`, err);
3231
+ }
3232
+ }
3233
+ async readSpecReviewFile(taskId, fileName) {
3234
+ const task = await this.taskStore.get(taskId);
3235
+ if (!task)
3236
+ return null;
3237
+ if (!task.branch) {
3238
+ throw new Error(`readSpecReviewFile: task ${taskId} has no branch`);
3239
+ }
3240
+ const project = this.getProjectConfig(task.projectId);
3241
+ if (!project) {
3242
+ throw new Error(`readSpecReviewFile: unknown project ${task.projectId}`);
3243
+ }
3244
+ const dev = this.getAgentConfig(task.agentId);
3245
+ if (!dev) {
3246
+ throw new Error(`readSpecReviewFile: task ${taskId} has no dev agent bound`);
3247
+ }
3248
+ const runner = this.createRunnerFor(dev);
3249
+ const store = this.createRepoStore(dev, project, runner);
3250
+ const workdir = await this.resolveWorkdir(dev, await this.agentStore.get(dev.id))
3251
+ ?? await store.ensure();
3252
+ const filePath = `.baxian/spec-review/${fileName}`;
3253
+ return store.readFileFromBranch(workdir, task.branch, filePath);
3254
+ }
3255
+ async dispatchSpecReviewToQa(taskId) {
3256
+ // Phase 1 (lock): validate + decide qa + compute newToken/newRound (无 mutation, 无 park)。
3257
+ // 关键约束:task 不能在 startSession 之前被改 — startSession 内部调用
3258
+ // buildPromptInline,prompt 必须看到的是新 token 和新 round;这里只 *计算*,
3259
+ // 真正写回 task 放到 Phase 3。
3260
+ const claim = await this.withTaskLock(async () => {
3261
+ const task = await this.taskStore.get(taskId);
3262
+ if (!task)
3263
+ throw new Error(`dispatchSpecReviewToQa: task ${taskId} not found`);
3264
+ if (!task.branch)
3265
+ throw new Error(`dispatchSpecReviewToQa: task ${taskId} has no branch`);
3266
+ // Stale spec-ready guard: 一旦 task 离开 pre-spec 阶段 (phase='code' 或其他
3267
+ // 非 'spec'/undefined 值),迟到的 spec-ready marker 不应再 dispatch review。
3268
+ // 允许 phase==='spec' 是预留 dev 在 fix-complete 后再 emit spec-ready 的扩展点。
3269
+ if (task.phase !== undefined && task.phase !== 'spec') {
3270
+ await this.safeEmit({
3271
+ id: '',
3272
+ type: 'human.intervention',
3273
+ timestamp: new Date().toISOString(),
3274
+ projectId: task.projectId,
3275
+ agentId: task.agentId,
3276
+ taskId,
3277
+ data: { phase: 'spec-ready-stale-after-code', taskPhase: task.phase },
3278
+ });
3279
+ return null;
3280
+ }
3281
+ const qa = this.findQaPartner(task.agentId);
3282
+ if (!qa) {
3283
+ await this.safeEmit({
3284
+ id: '',
3285
+ type: 'human.intervention',
3286
+ timestamp: new Date().toISOString(),
3287
+ projectId: task.projectId,
3288
+ agentId: task.agentId,
3289
+ taskId,
3290
+ data: { phase: 'spec-review-no-qa-partner', devAgentId: task.agentId },
3291
+ });
3292
+ return null;
3293
+ }
3294
+ // 记录入口 status — fix-then-review 重派 (fromStatus 含 'fixing') 时,
3295
+ // spawn 失败 rollback 不能无差别回 in_progress;必须回到原 status 以保留 spec phase。
3296
+ // transitionTaskStatus 的 fromStatus 守门已限定为这三种之一; 其他 status 不会走到这里。
3297
+ const isReviewEntry = task.status === 'in_progress'
3298
+ || task.status === 'fixing'
3299
+ || task.status === 'pending';
3300
+ if (!isReviewEntry)
3301
+ return null;
3302
+ return {
3303
+ qaId: qa.id,
3304
+ devAgentId: task.agentId,
3305
+ projectId: task.projectId,
3306
+ newToken: createMarkerToken(),
3307
+ newRound: (task.specReviewRound ?? 0) + 1,
3308
+ originalStatus: task.status,
3309
+ // 记录原 spec-ready token — pre-spec entry rollback 时 restore,
3310
+ // 让 dev 后续 spec-ready marker (with 原 token) 经 handler freshness gate 通过 → auto retry。
3311
+ originalToken: task.specMarkerToken,
3312
+ // 回滚时 restore — round 是 "已完成轮次" 计数, 累计失败不应吃 round 配额。
3313
+ originalRound: task.specReviewRound,
3314
+ };
3315
+ });
3316
+ if (!claim)
3317
+ return null;
3318
+ const { qaId, devAgentId, projectId, newToken, newRound, originalStatus, originalToken, originalRound } = claim;
3319
+ // Phase 2a: 先 acquire QA — 失败时 dev 还未 park,直接 return 即可。
3320
+ const acquired = await this.acquireAgentForTask(qaId, taskId, 'spec-review');
3321
+ if (!acquired) {
3322
+ await this.safeEmit({
3323
+ id: '',
3324
+ type: 'human.intervention',
3325
+ timestamp: new Date().toISOString(),
3326
+ projectId,
3327
+ agentId: qaId,
3328
+ taskId,
3329
+ data: { phase: 'spec-review-qa-acquire-failed', qaAgentId: qaId },
3330
+ });
3331
+ return null;
3332
+ }
3333
+ // Phase 2b: dev gate — park dev so it stops editing the spec while QA reviews。
3334
+ // 顺序在 acquireQA 之后:避免 QA 失败时 dev 已 parked 但 task 仍 in_progress,
3335
+ // 无任何后续 dispatch 把 dev 拉出 waiting (即 dev 永久挂起)。
3336
+ if (devAgentId) {
3337
+ const devOk = await this.markAgentWaiting(devAgentId, taskId);
3338
+ if (!devOk) {
3339
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
3340
+ .catch(() => undefined);
3341
+ await this.safeEmit({
3342
+ id: '',
3343
+ type: 'human.intervention',
3344
+ timestamp: new Date().toISOString(),
3345
+ projectId,
3346
+ agentId: devAgentId,
3347
+ taskId,
3348
+ data: { phase: 'spec-review-dev-park-failed', devAgentId },
3349
+ });
3350
+ return null;
3351
+ }
3352
+ }
3353
+ // Phase 2c (lock): atomic transition + persist newToken/newRound/phase/qaAgentId.
3354
+ // 必须在 startSession 之前;若顺序反过来,startSession 之后崩溃但 transition 没做时,
3355
+ // armRecoveredSpecMarkers 会读旧 status/token 推断错 kind/token,新 marker 无法匹配 → 链路死。
3356
+ const transition = await this.transitionTaskStatus(taskId, 'review', { fromStatus: ['in_progress', 'fixing', 'pending'] }, {
3357
+ specReviewRound: newRound,
3358
+ specMarkerToken: newToken,
3359
+ phase: 'spec',
3360
+ qaAgentId: qaId,
3361
+ });
3362
+ if (!transition) {
3363
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
3364
+ .catch(() => undefined);
3365
+ // 不 re-acquire dev: markAgentWaiting (mode='waiting') 仅 bump updatedAt,
3366
+ // dev 仍 bound 到 task; develop phase 不在 reentry 集合, 重 acquire 必返回 false (dead code)。
3367
+ await this.safeEmit({
3368
+ id: '',
3369
+ type: 'human.intervention',
3370
+ timestamp: new Date().toISOString(),
3371
+ projectId,
3372
+ agentId: qaId,
3373
+ taskId,
3374
+ data: { phase: 'spec-review-transition-failed', qaAgentId: qaId },
3375
+ });
3376
+ return null;
3377
+ }
3378
+ // Phase 2d: startSession 用显式 newToken/newRound 透传到 prompt。
3379
+ // 失败时回滚 transition + 清新 persist 字段,避免 task 留在 review 但 qa 无 session 的 stuck。
3380
+ // 不调 acquireAgentForTask(dev, 'develop'):markAgentWaiting 走 mode='waiting' 仅 bump updatedAt
3381
+ // (不清 binding 也不真正 park REPL),dev 仍 bound 到 task;且 develop phase 不在
3382
+ // canDispatchWithBinding 的 reentry 集合,重 acquire 必返回 false — 是 dead code。
3383
+ let started = false;
3384
+ try {
3385
+ started = await this.startSession(taskId, qaId, 'spec-review', {
3386
+ bypassTaskStatusGate: true,
3387
+ specMarkerToken: newToken,
3388
+ currentSpecRound: newRound,
3389
+ });
3390
+ }
3391
+ catch (err) {
3392
+ // DispatchTerminalError 都委托给 failTaskForDispatchError:ack_unknown 会保留绑定走
3393
+ // markAwaitingHuman,其他 reason(prompt_too_large 等非 transient)让 task 进 failed
3394
+ // 而不是 rollback 让 cron 反复 retry。其他异常(瞬时 / 不明)才走 rollback + release。
3395
+ if (err instanceof DispatchTerminalError) {
3396
+ await this.failTaskForDispatchError(taskId, 'spec-review', qaId, err);
3397
+ }
3398
+ else if (err instanceof EnsureSessionError && err.partial.handled) {
3399
+ // handleDialogPendingFromRuntime 已 Held + fail task + release partners;跳过 rollback + release,
3400
+ // 否则 boundTask terminal 让 release gate 放行清掉仍卡 dialog 的 pane lock。
3401
+ }
3402
+ else {
3403
+ await this.rollbackSpecReviewTransition(taskId, originalStatus, originalToken, originalRound);
3404
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
3405
+ .catch(() => undefined);
3406
+ }
3407
+ throw err;
3408
+ }
3409
+ if (!started) {
3410
+ await this.rollbackSpecReviewTransition(taskId, originalStatus, originalToken, originalRound);
3411
+ await this.releaseAgentForTask(qaId, taskId, 'idle')
3412
+ .catch(() => undefined);
3413
+ await this.safeEmit({
3414
+ id: '',
3415
+ type: 'human.intervention',
3416
+ timestamp: new Date().toISOString(),
3417
+ projectId,
3418
+ agentId: qaId,
3419
+ taskId,
3420
+ data: { phase: 'spec-review-start-failed', qaAgentId: qaId },
3421
+ });
3422
+ return null;
3423
+ }
3424
+ // Phase 3: arm watcher。spec-ready 已被消费,先 disarm 防止 dev 之后无关 marker 误触发。
3425
+ this.stopSpecMarkerWatcher(taskId);
3426
+ await this.armSpecMarkerWatcher(taskId, qaId, 'spec-review-complete', newToken);
3427
+ return await this.taskStore.get(taskId);
3428
+ }
3429
+ // startSession 失败回滚:
3430
+ // - pre-spec entry: restore originalToken 让 dev 后续 spec-ready marker 经 freshness gate 通过 → auto retry。
3431
+ // - fixing entry: 保留 phase='spec' + qaAgentId(否则 spec.* freshness gate 全 fail),清 token 防 stale。
3432
+ // round 必须 restore — round 是 "已完成轮次" 计数, 累计失败不应吃 round 配额。
3433
+ async rollbackSpecReviewTransition(taskId, originalStatus, originalToken, originalRound) {
3434
+ if (originalStatus === 'fixing') {
3435
+ await this.transitionTaskStatus(taskId, 'fixing', { fromStatus: ['review'] }, { specMarkerToken: undefined, specReviewRound: originalRound });
3436
+ return;
3437
+ }
3438
+ await this.transitionTaskStatus(taskId, originalStatus, { fromStatus: ['review'] }, {
3439
+ specMarkerToken: originalToken,
3440
+ phase: undefined,
3441
+ qaAgentId: undefined,
3442
+ specReviewRound: originalRound,
3443
+ });
3444
+ }
3445
+ async dispatchSpecFixToDev(taskId, findings) {
3446
+ // Phase 1 (lock): validate + phase guard + decide newToken。
3447
+ // fix 是同 round 的 dev 处理 QA findings,round 不递增;只刷新 token 让 prompt + watcher 唯一识别本轮 fix。
3448
+ const claim = await this.withTaskLock(async () => {
3449
+ const task = await this.taskStore.get(taskId);
3450
+ if (!task)
3451
+ throw new Error(`dispatchSpecFixToDev: task ${taskId} not found`);
3452
+ const devAgentId = task.agentId;
3453
+ if (!devAgentId) {
3454
+ throw new Error(`dispatchSpecFixToDev: task ${taskId} has no dev agent`);
3455
+ }
3456
+ // 离开 spec 阶段的 task 不应再被 spec-fix dispatch 击中 (defense in depth — handler 也 gate)。
3457
+ if (task.phase !== 'spec') {
3458
+ await this.safeEmit({
3459
+ id: '',
3460
+ type: 'human.intervention',
3461
+ timestamp: new Date().toISOString(),
3462
+ projectId: task.projectId,
3463
+ agentId: devAgentId,
3464
+ taskId,
3465
+ data: { phase: 'spec-fix-stale-phase', taskPhase: task.phase },
3466
+ });
3467
+ return null;
3468
+ }
3469
+ return {
3470
+ devAgentId,
3471
+ qaAgentId: task.qaAgentId,
3472
+ projectId: task.projectId,
3473
+ newToken: createMarkerToken(),
3474
+ currentRound: task.specReviewRound ?? 1,
3475
+ };
3476
+ });
3477
+ if (!claim)
3478
+ return null;
3479
+ const { devAgentId, qaAgentId, projectId, newToken, currentRound } = claim;
3480
+ if (qaAgentId) {
3481
+ // release 失败留 stale qa binding,下一轮 acquireAgentForTask(qa) 必拒;abort + emit intervention。
3482
+ const released = await this.releaseAgentForTask(qaAgentId, taskId, 'idle')
3483
+ .catch(err => {
3484
+ console.warn(`[AgentManager] dispatchSpecFixToDev release qa=${qaAgentId} failed:`, err);
3485
+ return false;
3486
+ });
3487
+ if (!released) {
3488
+ await this.safeEmit({
3489
+ id: '',
3490
+ type: 'human.intervention',
3491
+ timestamp: new Date().toISOString(),
3492
+ projectId,
3493
+ agentId: qaAgentId,
3494
+ taskId,
3495
+ data: { phase: 'spec-fix-qa-release-failed', qaAgentId },
3496
+ });
3497
+ return null;
3498
+ }
3499
+ }
3500
+ // Phase 2a: acquire dev。
3501
+ const acquired = await this.acquireAgentForTask(devAgentId, taskId, 'spec-fix');
3502
+ if (!acquired) {
3503
+ await this.safeEmit({
3504
+ id: '',
3505
+ type: 'human.intervention',
3506
+ timestamp: new Date().toISOString(),
3507
+ projectId,
3508
+ agentId: devAgentId,
3509
+ taskId,
3510
+ data: { phase: 'spec-fix-dev-acquire-failed', devAgentId },
3511
+ });
3512
+ return null;
3513
+ }
3514
+ // Phase 2b (lock): atomic transition + persist newToken/phase。
3515
+ // 必须在 continueSession 之前;否则崩溃后 armRecoveredSpecMarkers 读旧 token,
3516
+ // 与 dev 输出的 newToken marker 不匹配 → 链路死。
3517
+ const transition = await this.transitionTaskStatus(taskId, 'fixing', { fromStatus: ['review'] }, { specMarkerToken: newToken, phase: 'spec' });
3518
+ if (!transition) {
3519
+ await this.releaseAgentForTask(devAgentId, taskId, 'idle')
3520
+ .catch(() => undefined);
3521
+ await this.safeEmit({
3522
+ id: '',
3523
+ type: 'human.intervention',
3524
+ timestamp: new Date().toISOString(),
3525
+ projectId,
3526
+ agentId: devAgentId,
3527
+ taskId,
3528
+ data: { phase: 'spec-fix-transition-failed', devAgentId },
3529
+ });
3530
+ return null;
3531
+ }
3532
+ // Phase 2c: continueSession 透传 newToken + currentRound 给 prompt。
3533
+ // 失败时回滚 transition + 清新 token,避免 task 留在 fixing 但 dev 无 spec-fix prompt 的 stuck。
3534
+ let resumed = false;
3535
+ try {
3536
+ resumed = await this.continueSession(taskId, devAgentId, 'spec-fix', {
3537
+ specFindings: findings,
3538
+ specMarkerToken: newToken,
3539
+ currentSpecRound: currentRound,
3540
+ bypassTaskStatusGate: true,
3541
+ });
3542
+ }
3543
+ catch (err) {
3544
+ // 同 spec-review:DispatchTerminalError 走 failTaskForDispatchError 统一处理
3545
+ // (ack_unknown → markAwaitingHuman,其他 reason → release + task failed)。
3546
+ if (err instanceof DispatchTerminalError) {
3547
+ await this.failTaskForDispatchError(taskId, 'spec-fix', devAgentId, err);
3548
+ }
3549
+ else if (err instanceof EnsureSessionError && err.partial.handled) {
3550
+ // handleDialogPendingFromRuntime 已 Held + fail task + release partners;跳过 rollback + release。
3551
+ }
3552
+ else {
3553
+ await this.rollbackSpecFixTransition(taskId);
3554
+ await this.releaseAgentForTask(devAgentId, taskId, 'idle')
3555
+ .catch(() => undefined);
3556
+ }
3557
+ console.error(`[AgentManager] dispatchSpecFixToDev continueSession(dev=${devAgentId}) failed:`, err);
3558
+ throw err;
3559
+ }
3560
+ if (!resumed) {
3561
+ await this.rollbackSpecFixTransition(taskId);
3562
+ await this.releaseAgentForTask(devAgentId, taskId, 'idle')
3563
+ .catch(() => undefined);
3564
+ await this.safeEmit({
3565
+ id: '',
3566
+ type: 'human.intervention',
3567
+ timestamp: new Date().toISOString(),
3568
+ projectId,
3569
+ agentId: devAgentId,
3570
+ taskId,
3571
+ data: { phase: 'spec-fix-resume-failed', devAgentId },
3572
+ });
3573
+ return null;
3574
+ }
3575
+ // Phase 3: arm watcher。
3576
+ await this.armSpecMarkerWatcher(taskId, devAgentId, 'spec-fix-complete', newToken);
3577
+ return await this.taskStore.get(taskId);
3578
+ }
3579
+ // continueSession 失败回滚:fixing → review + 清新 token。
3580
+ // 保留 phase='spec' 与 qaAgentId — 失败后 review 状态需要人工 retry 或重新 dispatch。
3581
+ async rollbackSpecFixTransition(taskId) {
3582
+ await this.transitionTaskStatus(taskId, 'review', { fromStatus: ['fixing'] }, { specMarkerToken: undefined });
3583
+ }
3584
+ async transitionToCodePhase(taskId) {
3585
+ const task = await this.taskStore.get(taskId);
3586
+ if (!task)
3587
+ return null;
3588
+ const devAgentId = task.agentId;
3589
+ if (!devAgentId)
3590
+ return null;
3591
+ // Atomic transition + persist: 旧版先 transition 再 updateTask, 中间崩溃 task 卡在
3592
+ // (phase='spec', status='in_progress') — armRecoveredSpecMarkers 三个 case 都不匹配,
3593
+ // freshness gate 也拒所有 spec.* event, 任务 stranded 无 auto-recovery。
3594
+ const transition = await this.transitionTaskStatus(taskId, 'in_progress', { fromStatus: ['review', 'fixing'] }, { phase: 'code', specMarkerToken: undefined });
3595
+ if (!transition)
3596
+ return null;
3597
+ this.stopSpecMarkerWatcher(taskId);
3598
+ if (task.qaAgentId) {
3599
+ // release 失败留 stale qa binding → emit intervention 让其可见。
3600
+ const released = await this.releaseAgentForTask(task.qaAgentId, taskId, 'idle')
3601
+ .catch(() => false);
3602
+ if (!released) {
3603
+ await this.safeEmit({
3604
+ id: '',
3605
+ type: 'human.intervention',
3606
+ timestamp: new Date().toISOString(),
3607
+ projectId: task.projectId,
3608
+ agentId: task.qaAgentId,
3609
+ taskId,
3610
+ data: { phase: 'code-phase-qa-release-failed', qaAgentId: task.qaAgentId },
3611
+ });
3612
+ }
3613
+ }
3614
+ const acquired = await this.acquireAgentForTask(devAgentId, taskId, 'code');
3615
+ if (!acquired) {
3616
+ await this.safeEmit({
3617
+ id: '',
3618
+ type: 'human.intervention',
3619
+ timestamp: new Date().toISOString(),
3620
+ projectId: task.projectId,
3621
+ agentId: devAgentId,
3622
+ taskId,
3623
+ data: { phase: 'code-dev-acquire-failed', devAgentId },
3624
+ });
3625
+ return null;
3626
+ }
3627
+ let resumed = false;
3628
+ try {
3629
+ resumed = await this.continueSession(taskId, devAgentId, 'code');
3630
+ }
3631
+ catch (err) {
3632
+ // 同 spec-review/spec-fix:DispatchTerminalError 委托给 failTaskForDispatchError
3633
+ // (ack_unknown → markAwaitingHuman;其他 reason → release + task failed)。
3634
+ if (err instanceof DispatchTerminalError) {
3635
+ await this.failTaskForDispatchError(taskId, 'code', devAgentId, err);
3636
+ }
3637
+ console.error(`[AgentManager] transitionToCodePhase continueSession(dev=${devAgentId}) failed:`, err);
3638
+ throw err;
3639
+ }
3640
+ if (!resumed) {
3641
+ await this.safeEmit({
3642
+ id: '',
3643
+ type: 'human.intervention',
3644
+ timestamp: new Date().toISOString(),
3645
+ projectId: task.projectId,
3646
+ agentId: devAgentId,
3647
+ taskId,
3648
+ data: { phase: 'code-resume-failed', devAgentId },
3649
+ });
3650
+ return null;
3651
+ }
3652
+ return await this.taskStore.get(taskId);
3653
+ }
3654
+ }
3655
+ function buildAgentIndex(config) {
3656
+ const index = new Map();
3657
+ for (const project of config.project) {
3658
+ for (const pair of project.agent) {
3659
+ for (const agent of pair) {
3660
+ index.set(agent.id, { ...agent, projectId: project.id });
3661
+ }
3662
+ }
3663
+ }
3664
+ return index;
3665
+ }
3666
+ //# sourceMappingURL=manager.js.map