@songsid/agend 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (232) hide show
  1. package/README.md +210 -0
  2. package/README.zh-TW.md +134 -0
  3. package/dist/access-path.d.ts +10 -0
  4. package/dist/access-path.js +32 -0
  5. package/dist/access-path.js.map +1 -0
  6. package/dist/adapter-world.d.ts +25 -0
  7. package/dist/adapter-world.js +41 -0
  8. package/dist/adapter-world.js.map +1 -0
  9. package/dist/agent-cli-instructions.md +50 -0
  10. package/dist/agent-cli.d.ts +2 -0
  11. package/dist/agent-cli.js +200 -0
  12. package/dist/agent-cli.js.map +1 -0
  13. package/dist/agent-endpoint.d.ts +25 -0
  14. package/dist/agent-endpoint.js +162 -0
  15. package/dist/agent-endpoint.js.map +1 -0
  16. package/dist/backend/antigravity.d.ts +17 -0
  17. package/dist/backend/antigravity.js +98 -0
  18. package/dist/backend/antigravity.js.map +1 -0
  19. package/dist/backend/claude-code.d.ts +23 -0
  20. package/dist/backend/claude-code.js +171 -0
  21. package/dist/backend/claude-code.js.map +1 -0
  22. package/dist/backend/codex.d.ts +18 -0
  23. package/dist/backend/codex.js +160 -0
  24. package/dist/backend/codex.js.map +1 -0
  25. package/dist/backend/factory.d.ts +2 -0
  26. package/dist/backend/factory.js +28 -0
  27. package/dist/backend/factory.js.map +1 -0
  28. package/dist/backend/gemini-cli.d.ts +17 -0
  29. package/dist/backend/gemini-cli.js +163 -0
  30. package/dist/backend/gemini-cli.js.map +1 -0
  31. package/dist/backend/index.d.ts +7 -0
  32. package/dist/backend/index.js +7 -0
  33. package/dist/backend/index.js.map +1 -0
  34. package/dist/backend/kiro.d.ts +17 -0
  35. package/dist/backend/kiro.js +147 -0
  36. package/dist/backend/kiro.js.map +1 -0
  37. package/dist/backend/marker-utils.d.ts +13 -0
  38. package/dist/backend/marker-utils.js +64 -0
  39. package/dist/backend/marker-utils.js.map +1 -0
  40. package/dist/backend/mock.d.ts +25 -0
  41. package/dist/backend/mock.js +85 -0
  42. package/dist/backend/mock.js.map +1 -0
  43. package/dist/backend/opencode.d.ts +16 -0
  44. package/dist/backend/opencode.js +136 -0
  45. package/dist/backend/opencode.js.map +1 -0
  46. package/dist/backend/types.d.ts +86 -0
  47. package/dist/backend/types.js +33 -0
  48. package/dist/backend/types.js.map +1 -0
  49. package/dist/channel/access-manager.d.ts +18 -0
  50. package/dist/channel/access-manager.js +153 -0
  51. package/dist/channel/access-manager.js.map +1 -0
  52. package/dist/channel/adapters/telegram.d.ts +63 -0
  53. package/dist/channel/adapters/telegram.js +646 -0
  54. package/dist/channel/adapters/telegram.js.map +1 -0
  55. package/dist/channel/attachment-handler.d.ts +15 -0
  56. package/dist/channel/attachment-handler.js +88 -0
  57. package/dist/channel/attachment-handler.js.map +1 -0
  58. package/dist/channel/factory.d.ts +12 -0
  59. package/dist/channel/factory.js +67 -0
  60. package/dist/channel/factory.js.map +1 -0
  61. package/dist/channel/ipc-bridge.d.ts +26 -0
  62. package/dist/channel/ipc-bridge.js +220 -0
  63. package/dist/channel/ipc-bridge.js.map +1 -0
  64. package/dist/channel/mcp-server.d.ts +10 -0
  65. package/dist/channel/mcp-server.js +288 -0
  66. package/dist/channel/mcp-server.js.map +1 -0
  67. package/dist/channel/mcp-tools.d.ts +17 -0
  68. package/dist/channel/mcp-tools.js +110 -0
  69. package/dist/channel/mcp-tools.js.map +1 -0
  70. package/dist/channel/message-bus.d.ts +17 -0
  71. package/dist/channel/message-bus.js +86 -0
  72. package/dist/channel/message-bus.js.map +1 -0
  73. package/dist/channel/message-queue.d.ts +39 -0
  74. package/dist/channel/message-queue.js +253 -0
  75. package/dist/channel/message-queue.js.map +1 -0
  76. package/dist/channel/tool-router.d.ts +6 -0
  77. package/dist/channel/tool-router.js +75 -0
  78. package/dist/channel/tool-router.js.map +1 -0
  79. package/dist/channel/tool-tracker.d.ts +13 -0
  80. package/dist/channel/tool-tracker.js +58 -0
  81. package/dist/channel/tool-tracker.js.map +1 -0
  82. package/dist/channel/types.d.ts +118 -0
  83. package/dist/channel/types.js +2 -0
  84. package/dist/channel/types.js.map +1 -0
  85. package/dist/chat-export.d.ts +4 -0
  86. package/dist/chat-export.js +91 -0
  87. package/dist/chat-export.js.map +1 -0
  88. package/dist/classic-channel-manager.d.ts +59 -0
  89. package/dist/classic-channel-manager.js +193 -0
  90. package/dist/classic-channel-manager.js.map +1 -0
  91. package/dist/cli.d.ts +2 -0
  92. package/dist/cli.js +1833 -0
  93. package/dist/cli.js.map +1 -0
  94. package/dist/config.d.ts +9 -0
  95. package/dist/config.js +118 -0
  96. package/dist/config.js.map +1 -0
  97. package/dist/context-guardian.d.ts +26 -0
  98. package/dist/context-guardian.js +73 -0
  99. package/dist/context-guardian.js.map +1 -0
  100. package/dist/cost-guard.d.ts +36 -0
  101. package/dist/cost-guard.js +147 -0
  102. package/dist/cost-guard.js.map +1 -0
  103. package/dist/daemon-entry.d.ts +1 -0
  104. package/dist/daemon-entry.js +29 -0
  105. package/dist/daemon-entry.js.map +1 -0
  106. package/dist/daemon.d.ts +152 -0
  107. package/dist/daemon.js +1714 -0
  108. package/dist/daemon.js.map +1 -0
  109. package/dist/daily-summary.d.ts +13 -0
  110. package/dist/daily-summary.js +55 -0
  111. package/dist/daily-summary.js.map +1 -0
  112. package/dist/event-log.d.ts +36 -0
  113. package/dist/event-log.js +100 -0
  114. package/dist/event-log.js.map +1 -0
  115. package/dist/export-import.d.ts +2 -0
  116. package/dist/export-import.js +162 -0
  117. package/dist/export-import.js.map +1 -0
  118. package/dist/fleet-context.d.ts +61 -0
  119. package/dist/fleet-context.js +4 -0
  120. package/dist/fleet-context.js.map +1 -0
  121. package/dist/fleet-dashboard-html.d.ts +6 -0
  122. package/dist/fleet-dashboard-html.js +443 -0
  123. package/dist/fleet-dashboard-html.js.map +1 -0
  124. package/dist/fleet-health-server.d.ts +35 -0
  125. package/dist/fleet-health-server.js +290 -0
  126. package/dist/fleet-health-server.js.map +1 -0
  127. package/dist/fleet-instructions.d.ts +5 -0
  128. package/dist/fleet-instructions.js +161 -0
  129. package/dist/fleet-instructions.js.map +1 -0
  130. package/dist/fleet-manager.d.ts +212 -0
  131. package/dist/fleet-manager.js +3655 -0
  132. package/dist/fleet-manager.js.map +1 -0
  133. package/dist/fleet-rpc-handlers.d.ts +42 -0
  134. package/dist/fleet-rpc-handlers.js +356 -0
  135. package/dist/fleet-rpc-handlers.js.map +1 -0
  136. package/dist/fleet-system-prompt.d.ts +11 -0
  137. package/dist/fleet-system-prompt.js +61 -0
  138. package/dist/fleet-system-prompt.js.map +1 -0
  139. package/dist/general-knowledge/skills.md +177 -0
  140. package/dist/hang-detector.d.ts +16 -0
  141. package/dist/hang-detector.js +53 -0
  142. package/dist/hang-detector.js.map +1 -0
  143. package/dist/index.d.ts +8 -0
  144. package/dist/index.js +6 -0
  145. package/dist/index.js.map +1 -0
  146. package/dist/instance-lifecycle.d.ts +90 -0
  147. package/dist/instance-lifecycle.js +592 -0
  148. package/dist/instance-lifecycle.js.map +1 -0
  149. package/dist/instructions.d.ts +15 -0
  150. package/dist/instructions.js +90 -0
  151. package/dist/instructions.js.map +1 -0
  152. package/dist/logger.d.ts +7 -0
  153. package/dist/logger.js +84 -0
  154. package/dist/logger.js.map +1 -0
  155. package/dist/outbound-handlers.d.ts +51 -0
  156. package/dist/outbound-handlers.js +739 -0
  157. package/dist/outbound-handlers.js.map +1 -0
  158. package/dist/outbound-schemas.d.ts +238 -0
  159. package/dist/outbound-schemas.js +248 -0
  160. package/dist/outbound-schemas.js.map +1 -0
  161. package/dist/paths.d.ts +10 -0
  162. package/dist/paths.js +42 -0
  163. package/dist/paths.js.map +1 -0
  164. package/dist/plugin/agend/.claude-plugin/plugin.json +5 -0
  165. package/dist/quickstart.d.ts +1 -0
  166. package/dist/quickstart.js +595 -0
  167. package/dist/quickstart.js.map +1 -0
  168. package/dist/routing-engine.d.ts +22 -0
  169. package/dist/routing-engine.js +44 -0
  170. package/dist/routing-engine.js.map +1 -0
  171. package/dist/safe-async.d.ts +6 -0
  172. package/dist/safe-async.js +20 -0
  173. package/dist/safe-async.js.map +1 -0
  174. package/dist/scheduler/db.d.ts +37 -0
  175. package/dist/scheduler/db.js +360 -0
  176. package/dist/scheduler/db.js.map +1 -0
  177. package/dist/scheduler/db.test.d.ts +1 -0
  178. package/dist/scheduler/db.test.js +92 -0
  179. package/dist/scheduler/db.test.js.map +1 -0
  180. package/dist/scheduler/index.d.ts +4 -0
  181. package/dist/scheduler/index.js +4 -0
  182. package/dist/scheduler/index.js.map +1 -0
  183. package/dist/scheduler/scheduler.d.ts +44 -0
  184. package/dist/scheduler/scheduler.js +197 -0
  185. package/dist/scheduler/scheduler.js.map +1 -0
  186. package/dist/scheduler/scheduler.test.d.ts +1 -0
  187. package/dist/scheduler/scheduler.test.js +119 -0
  188. package/dist/scheduler/scheduler.test.js.map +1 -0
  189. package/dist/scheduler/types.d.ts +107 -0
  190. package/dist/scheduler/types.js +7 -0
  191. package/dist/scheduler/types.js.map +1 -0
  192. package/dist/service-installer.d.ts +17 -0
  193. package/dist/service-installer.js +182 -0
  194. package/dist/service-installer.js.map +1 -0
  195. package/dist/setup-wizard.d.ts +48 -0
  196. package/dist/setup-wizard.js +701 -0
  197. package/dist/setup-wizard.js.map +1 -0
  198. package/dist/statusline-watcher.d.ts +34 -0
  199. package/dist/statusline-watcher.js +73 -0
  200. package/dist/statusline-watcher.js.map +1 -0
  201. package/dist/stt.d.ts +10 -0
  202. package/dist/stt.js +33 -0
  203. package/dist/stt.js.map +1 -0
  204. package/dist/tmux-control.d.ts +52 -0
  205. package/dist/tmux-control.js +207 -0
  206. package/dist/tmux-control.js.map +1 -0
  207. package/dist/tmux-manager.d.ts +44 -0
  208. package/dist/tmux-manager.js +218 -0
  209. package/dist/tmux-manager.js.map +1 -0
  210. package/dist/topic-archiver.d.ts +40 -0
  211. package/dist/topic-archiver.js +103 -0
  212. package/dist/topic-archiver.js.map +1 -0
  213. package/dist/topic-commands.d.ts +28 -0
  214. package/dist/topic-commands.js +359 -0
  215. package/dist/topic-commands.js.map +1 -0
  216. package/dist/transcript-monitor.d.ts +23 -0
  217. package/dist/transcript-monitor.js +164 -0
  218. package/dist/transcript-monitor.js.map +1 -0
  219. package/dist/types.d.ts +211 -0
  220. package/dist/types.js +2 -0
  221. package/dist/types.js.map +1 -0
  222. package/dist/ui/dashboard.html +719 -0
  223. package/dist/web-api.d.ts +101 -0
  224. package/dist/web-api.js +648 -0
  225. package/dist/web-api.js.map +1 -0
  226. package/dist/webhook-emitter.d.ts +15 -0
  227. package/dist/webhook-emitter.js +41 -0
  228. package/dist/webhook-emitter.js.map +1 -0
  229. package/dist/workflow-templates/default.md +35 -0
  230. package/package.json +76 -0
  231. package/templates/launchd.plist.ejs +31 -0
  232. package/templates/systemd.service.ejs +16 -0
package/dist/daemon.js ADDED
@@ -0,0 +1,1714 @@
1
+ import { join, dirname, basename, resolve } from "node:path";
2
+ import { mkdirSync, writeFileSync, readFileSync, existsSync, unlinkSync, rmSync, appendFileSync, statSync, chmodSync } from "node:fs";
3
+ import { fileURLToPath } from "node:url";
4
+ import { randomBytes } from "node:crypto";
5
+ import { EventEmitter } from "node:events";
6
+ import { createLogger } from "./logger.js";
7
+ import { TmuxManager } from "./tmux-manager.js";
8
+ import { TranscriptMonitor } from "./transcript-monitor.js";
9
+ import { ContextGuardian } from "./context-guardian.js";
10
+ import { IpcServer } from "./channel/ipc-bridge.js";
11
+ import { MessageBus } from "./channel/message-bus.js";
12
+ import { shellQuote } from "./backend/types.js";
13
+ import { getTmuxSession } from "./config.js";
14
+ import { routeToolCall } from "./channel/tool-router.js";
15
+ import { HangDetector } from "./hang-detector.js";
16
+ import { buildFleetInstructions } from "./instructions.js";
17
+ const __filename = fileURLToPath(import.meta.url);
18
+ const __dirname = dirname(__filename);
19
+ // Tool routing sets — module-level to avoid re-creation on every handleToolCall
20
+ const CROSS_INSTANCE_TOOLS = new Set(["send_to_instance", "list_instances", "start_instance", "restart_instance", "create_instance", "delete_instance", "replace_instance", "request_information", "delegate_task", "report_result", "describe_instance"]);
21
+ const SCHEDULE_TOOLS = new Set(["create_schedule", "list_schedules", "update_schedule", "delete_schedule"]);
22
+ const DECISION_TOOLS = new Set(["post_decision", "list_decisions", "update_decision"]);
23
+ const TASK_TOOL = "task";
24
+ export class Daemon extends EventEmitter {
25
+ name;
26
+ config;
27
+ instanceDir;
28
+ topicMode;
29
+ backend;
30
+ controlClient;
31
+ logger;
32
+ tmuxSessionName;
33
+ tmux = null;
34
+ ipcServer = null;
35
+ messageBus;
36
+ transcriptMonitor = null;
37
+ toolTracker = null;
38
+ guardian = null;
39
+ adapter = null;
40
+ pendingIpcRequests = new Map();
41
+ // Track chatId/threadId from inbound messages for automatic outbound routing
42
+ lastChatId;
43
+ lastThreadId;
44
+ // Pending ack: react 🫡 on first transcript activity after receiving a message
45
+ pendingAckMessage = null;
46
+ // Tool status tracking for channel adapter
47
+ toolStatusMessageId = null;
48
+ toolStatusLines = [];
49
+ toolStatusDebounce = null;
50
+ // Session identity: map IPC socket → sessionName (from mcp_ready)
51
+ socketSessionNames = new Map();
52
+ // Crash recovery
53
+ static tmuxServerCrashTimestamps = [];
54
+ static tmuxServerPaused = false;
55
+ static tmuxServerRecoveryTimer = null;
56
+ healthCheckTimer = null;
57
+ crashCount = 0;
58
+ lastCrashAt = 0;
59
+ lastSpawnAt = 0;
60
+ crashTimestamps = [];
61
+ healthCheckPaused = false;
62
+ spawning = false;
63
+ skipResume = false;
64
+ /** Whether the last spawn started a fresh session (not resumed). */
65
+ isNewSession = false;
66
+ // Context rotation quality tracking
67
+ rotationStartedAt = 0;
68
+ preRotationContextPct = 0;
69
+ hangDetector = null;
70
+ // Model failover: override model on next spawn when rate-limited
71
+ modelOverride;
72
+ // Context rotation v3: ring buffers for daemon-side snapshot
73
+ recentUserMessages = [];
74
+ recentEvents = [];
75
+ recentToolActivity = [];
76
+ snapshotConsumed = false;
77
+ pasteLock = Promise.resolve();
78
+ pendingInstructionsUpdate;
79
+ pendingInstructionsNotice = false;
80
+ pasteQueueDepth = 0;
81
+ // PTY error pattern monitoring
82
+ errorMonitorTimer = null;
83
+ errorWaitingForRecovery = false; // true = error detected, waiting for ready pattern
84
+ errorDetectedAt = 0;
85
+ /** Whether this instance is in an error state (rate-limited, paused, or crash loop). */
86
+ get isErrorState() {
87
+ return this.errorWaitingForRecovery || this.healthCheckPaused || Daemon.tmuxServerPaused;
88
+ }
89
+ lastFailoverAt = 0; // cooldown: prevent repeated failover triggers
90
+ static FAILOVER_COOLDOWN_MS = 5 * 60_000; // 5 minutes
91
+ lastErrorNotifiedAt = new Map(); // per-type cooldown for all actions
92
+ static ERROR_COOLDOWN_MS = 5 * 60_000;
93
+ /** Cheap hash for pane content dedup — not cryptographic, just identity check */
94
+ static cheapPaneHash(pane) {
95
+ return `${pane.length}:${pane.slice(-200)}`;
96
+ }
97
+ // Hash dedup: suppress stale error re-detection after recovery
98
+ lastRecoveryPaneHash = null;
99
+ lastRecoveredErrorType = null;
100
+ lastDetectedErrorType = null;
101
+ constructor(name, config, instanceDir, topicMode = false, backend, controlClient) {
102
+ super();
103
+ this.name = name;
104
+ this.config = config;
105
+ this.instanceDir = instanceDir;
106
+ this.topicMode = topicMode;
107
+ this.backend = backend;
108
+ this.controlClient = controlClient;
109
+ this.logger = createLogger(config.log_level);
110
+ this.tmuxSessionName = getTmuxSession();
111
+ this.messageBus = new MessageBus();
112
+ this.messageBus.setLogger(this.logger);
113
+ }
114
+ async start() {
115
+ mkdirSync(this.instanceDir, { recursive: true });
116
+ writeFileSync(join(this.instanceDir, "daemon.pid"), String(process.pid));
117
+ this.logger.info(`Starting ${this.name}`);
118
+ // P1: Read crash state from previous run — skip resume if last run was a crash loop
119
+ const crashStatePath = join(this.instanceDir, "crash-state.json");
120
+ try {
121
+ if (existsSync(crashStatePath)) {
122
+ const state = JSON.parse(readFileSync(crashStatePath, "utf-8"));
123
+ if (state.resumeDisabled) {
124
+ this.skipResume = true;
125
+ this.logger.warn("Previous crash loop detected — starting without resume");
126
+ }
127
+ unlinkSync(crashStatePath);
128
+ }
129
+ }
130
+ catch { /* corrupt file — ignore */ }
131
+ // 1. IPC server — bridge between MCP server (Claude's child) and daemon
132
+ const sockPath = join(this.instanceDir, "channel.sock");
133
+ this.ipcServer = new IpcServer(sockPath, this.logger);
134
+ // Forward IPC server errors as daemon events (prevents unhandled 'error' crash).
135
+ // Guard: only forward post-listen errors — startup errors are handled by listen() rejection.
136
+ let ipcListening = false;
137
+ this.ipcServer.on("error", (err) => {
138
+ if (!ipcListening)
139
+ return; // startup errors handled by listen() rejection
140
+ this.logger.error({ err, name: this.name }, "IPC server error");
141
+ this.emit("error", err);
142
+ });
143
+ await this.ipcServer.listen();
144
+ ipcListening = true;
145
+ // Permanent IPC dispatcher: routes responses to pending requests by type+id key
146
+ this.ipcServer.on("message", (msg) => {
147
+ const type = msg.type;
148
+ if (!type)
149
+ return;
150
+ // Build lookup key matching the pattern used when registering
151
+ let key;
152
+ if ((type === "fleet_schedule_response" || type === "fleet_outbound_response" || type === "fleet_decision_response" || type === "fleet_task_response" || type === "fleet_display_name_response" || type === "fleet_description_response") && msg.fleetRequestId) {
153
+ key = String(msg.fleetRequestId);
154
+ }
155
+ else if (type === "fleet_outbound_response" && msg.requestId != null) {
156
+ key = `fleet_out_${msg.requestId}`;
157
+ }
158
+ if (key && this.pendingIpcRequests.has(key)) {
159
+ const handler = this.pendingIpcRequests.get(key);
160
+ this.pendingIpcRequests.delete(key);
161
+ handler(msg);
162
+ }
163
+ });
164
+ // IPC message relay: when daemon wants to push a channel message to Claude,
165
+ // it broadcasts to all IPC clients (the MCP server is one of them).
166
+ // When MCP server sends a tool_call, daemon handles it via the messageBus.
167
+ this.ipcServer.on("message", (msg, socket) => {
168
+ if (msg.type === "tool_call") {
169
+ // MCP server forwarding a Claude tool call (reply, react, edit, download)
170
+ this.handleToolCall(msg, socket);
171
+ }
172
+ else if (msg.type === "mcp_ready") {
173
+ const sessionName = msg.sessionName;
174
+ if (sessionName) {
175
+ this.socketSessionNames.set(socket, sessionName);
176
+ socket.on("close", () => {
177
+ this.socketSessionNames.delete(socket);
178
+ // Notify fleet manager so it can clean up sessionRegistry
179
+ if (sessionName !== this.name) {
180
+ this.ipcServer?.broadcast({ type: "session_disconnected", sessionName });
181
+ }
182
+ });
183
+ }
184
+ this.logger.debug({ sessionName }, "MCP channel server connected and ready");
185
+ // Notify FleetManager's IPC client that MCP is ready
186
+ this.ipcServer?.broadcast({ type: "mcp_ready", sessionName });
187
+ }
188
+ else if (msg.type === "query_sessions") {
189
+ // Fleet manager asks for all registered session names (catches sessions
190
+ // that sent mcp_ready before fleet manager connected).
191
+ const sessions = [];
192
+ for (const [s, sessionName] of this.socketSessionNames) {
193
+ if (!s.destroyed && sessionName !== this.name) {
194
+ // Individual mcp_ready for initial registration path
195
+ this.ipcServer?.send(socket, { type: "mcp_ready", sessionName });
196
+ sessions.push(sessionName);
197
+ }
198
+ }
199
+ // Batch response for prune path
200
+ this.ipcServer?.send(socket, { type: "query_sessions_response", sessions });
201
+ }
202
+ else if (msg.type === "fleet_inbound") {
203
+ // Fleet manager routed a message to us (topic mode)
204
+ const meta = msg.meta;
205
+ const targetSession = msg.targetSession;
206
+ // Only update lastChatId/lastThreadId from real channel messages (non-empty chat_id).
207
+ // Cross-instance messages have empty chat_id and must not overwrite these.
208
+ if (meta.chat_id)
209
+ this.lastChatId = meta.chat_id;
210
+ if (meta.chat_id && meta.thread_id)
211
+ this.lastThreadId = meta.thread_id;
212
+ this.pushChannelMessage(msg.content, meta, targetSession);
213
+ }
214
+ else if (msg.type === "raw_paste") {
215
+ // Paste raw text directly to CLI without [user:] wrapping.
216
+ // Use pasteLock to serialize with other deliveries and wait for idle.
217
+ if (this.tmux) {
218
+ const rawText = msg.content;
219
+ this.pasteLock = this.pasteLock.then(async () => {
220
+ await this.deliverMessage(rawText);
221
+ this.logger.debug({ text: rawText.slice(0, 100) }, "Raw paste delivered");
222
+ }).catch(err => {
223
+ this.logger.warn({ err: err.message }, "raw_paste delivery error");
224
+ });
225
+ }
226
+ }
227
+ else if (msg.type === "fleet_schedule_trigger") {
228
+ const payload = msg.payload;
229
+ const meta = msg.meta;
230
+ this.lastChatId = meta.chat_id;
231
+ this.lastThreadId = meta.thread_id;
232
+ this.pushChannelMessage(payload.message, meta);
233
+ }
234
+ else if (msg.type === "fleet_tool_status_ack") {
235
+ // Fleet manager sent us the messageId for our tool status message
236
+ this.toolStatusMessageId = msg.messageId;
237
+ }
238
+ });
239
+ // 2. Tmux — ensure session, create window if not alive
240
+ await TmuxManager.ensureSession(this.tmuxSessionName);
241
+ this.tmux = new TmuxManager(this.tmuxSessionName, "");
242
+ // Strategy A: always start fresh Claude window (MCP server has no reconnection)
243
+ // Kill any existing window from previous run
244
+ const windowIdFile = join(this.instanceDir, "window-id");
245
+ if (existsSync(windowIdFile)) {
246
+ const savedId = readFileSync(windowIdFile, "utf-8").trim();
247
+ if (savedId) {
248
+ const oldTmux = new TmuxManager(this.tmuxSessionName, savedId);
249
+ if (await oldTmux.isWindowAlive()) {
250
+ this.saveSessionId();
251
+ await oldTmux.killWindow();
252
+ this.logger.info({ savedId }, "Killed old tmux window for fresh start");
253
+ }
254
+ }
255
+ }
256
+ const resumed = await this.spawnClaudeWindow();
257
+ this.isNewSession = !resumed;
258
+ if (!resumed) {
259
+ await this.injectSnapshotMessage();
260
+ }
261
+ else {
262
+ // Clean up stale snapshot file — resume restored full context, snapshot not needed
263
+ try {
264
+ unlinkSync(join(this.instanceDir, "rotation-state.json"));
265
+ }
266
+ catch { /* may not exist */ }
267
+ }
268
+ if (!this.config.lightweight) {
269
+ // 3. Pipe-pane for prompt detection
270
+ const outputLog = join(this.instanceDir, "output.log");
271
+ await this.tmux.pipeOutput(outputLog).catch(() => { });
272
+ // 4. Transcript monitor
273
+ this.transcriptMonitor = new TranscriptMonitor(this.instanceDir, this.logger);
274
+ // 5. Wire transcript events
275
+ const ackIfPending = () => {
276
+ if (!this.pendingAckMessage || !this.adapter)
277
+ return;
278
+ const { chatId, messageId } = this.pendingAckMessage;
279
+ this.pendingAckMessage = null;
280
+ this.adapter.react(chatId, messageId, "🫡")
281
+ .catch(e => this.logger.debug({ err: e.message }, "Ack react failed"));
282
+ };
283
+ this.transcriptMonitor.on("tool_use", (name, input) => {
284
+ this.logger.debug({ tool: name }, "Tool use");
285
+ ackIfPending();
286
+ this.hangDetector?.recordActivity();
287
+ this.recordRecentEvent({ type: "tool_use", name, preview: this.summarizeTool(name, input) });
288
+ this.recordRecentToolActivity(this.summarizeTool(name, input));
289
+ });
290
+ this.transcriptMonitor.on("tool_result", (name, _output) => {
291
+ this.hangDetector?.recordActivity();
292
+ this.recordRecentEvent({ type: "tool_result", name });
293
+ });
294
+ this.transcriptMonitor.on("assistant_text", (text) => {
295
+ this.logger.debug({ text: text.slice(0, 200) }, "Claude response");
296
+ ackIfPending();
297
+ this.hangDetector?.recordActivity();
298
+ this.recordRecentEvent({ type: "assistant_text", preview: text.slice(0, 100) });
299
+ });
300
+ this.transcriptMonitor.startPolling();
301
+ // Hang detector
302
+ this.hangDetector = new HangDetector(15);
303
+ this.hangDetector.start();
304
+ // 8. Context guardian
305
+ const statusFile = join(this.instanceDir, "statusline.json");
306
+ this.guardian = new ContextGuardian(this.config.context_guardian, this.logger, statusFile);
307
+ this.guardian.startWatching();
308
+ this.guardian.on("status_update", () => {
309
+ this.saveSessionId();
310
+ this.hangDetector?.recordStatuslineUpdate();
311
+ });
312
+ // Context rotation removed: all CLI backends have built-in auto-compact.
313
+ // Crash recovery (health check + respawn with snapshot) is retained below.
314
+ }
315
+ // NOTE: Do NOT set process.env.AGEND_SOCKET_PATH here — it pollutes the
316
+ // shared fleet manager process env. Each daemon overwrites it, so the last
317
+ // one wins, causing MCP servers (especially kiro-cli which inherits process
318
+ // env) to connect to the wrong socket. The socket path is passed via
319
+ // per-instance MCP config files or wrapper scripts instead.
320
+ // 10. Health check — detect crashed tmux window and respawn
321
+ // Re-enabled: orphan window issue fixed by killing same-name windows before respawn.
322
+ // Without this, a dead CLI window goes undetected and messages are silently lost.
323
+ this.startHealthCheck();
324
+ if (!this.config.lightweight) {
325
+ this.startErrorMonitor();
326
+ }
327
+ this.logger.info(`${this.name} ready`);
328
+ }
329
+ startHealthCheck() {
330
+ const { max_retries, backoff, reset_after } = this.config.restart_policy;
331
+ if (max_retries <= 0)
332
+ return; // restart disabled
333
+ const scheduleNext = () => {
334
+ this.healthCheckTimer = setTimeout(async () => {
335
+ // Instance directory removed externally (e.g. `rm -rf ~/.agend/instances/<name>`).
336
+ // Stop the loop permanently — otherwise every tick triggers a respawn, whose
337
+ // writeRotationSnapshot fails with ENOENT and gets caught as "Failed to respawn",
338
+ // spamming errors every ~30s forever.
339
+ if (!existsSync(this.instanceDir)) {
340
+ this.logger.warn({ instanceDir: this.instanceDir }, "Instance directory missing — stopping health check");
341
+ this.healthCheckPaused = true;
342
+ this.healthCheckTimer = null;
343
+ return;
344
+ }
345
+ if (!this.tmux || this.spawning || this.healthCheckPaused || Daemon.tmuxServerPaused) {
346
+ scheduleNext();
347
+ return;
348
+ }
349
+ const paneStatus = await this.tmux.getPaneStatus();
350
+ if (paneStatus?.alive) {
351
+ scheduleNext();
352
+ return;
353
+ }
354
+ // paneStatus === null → window gone entirely (e.g. tmux server crash)
355
+ // paneStatus.alive === false → pane dead, exit code available
356
+ const exitCode = paneStatus?.exitCode;
357
+ this.logger.debug({ exitCode }, `[health] pane exited with code: ${exitCode}`);
358
+ // Normal exit (e.g. user Ctrl+C or /exit) — no crash, no respawn
359
+ if (paneStatus && exitCode === 0) {
360
+ this.logger.info("CLI exited normally (code 0) — pausing health check");
361
+ await this.tmux.killWindow();
362
+ this.healthCheckPaused = true;
363
+ return;
364
+ }
365
+ // Distinguish tmux server crash from single window crash
366
+ let crashType = "window";
367
+ if (!paneStatus) {
368
+ const serverAlive = await TmuxManager.sessionExists(this.tmuxSessionName);
369
+ if (!serverAlive) {
370
+ crashType = "server";
371
+ this.logger.error("tmux server died — all windows lost");
372
+ // Fleet-level circuit breaker: pause all instances on repeated tmux server crashes
373
+ Daemon.tmuxServerCrashTimestamps.push(Date.now());
374
+ const cutoff = Date.now() - 5 * 60_000;
375
+ Daemon.tmuxServerCrashTimestamps = Daemon.tmuxServerCrashTimestamps.filter(t => t > cutoff);
376
+ if (Daemon.tmuxServerCrashTimestamps.length >= 2 && !Daemon.tmuxServerPaused) {
377
+ Daemon.tmuxServerPaused = true;
378
+ this.logger.error("Fleet-level tmux server circuit breaker triggered — pausing all respawns for 30s");
379
+ this.emit("tmux_server_crash", this.name);
380
+ if (!Daemon.tmuxServerRecoveryTimer) {
381
+ Daemon.tmuxServerRecoveryTimer = setTimeout(() => {
382
+ Daemon.tmuxServerRecoveryTimer = null;
383
+ Daemon.tmuxServerPaused = false;
384
+ }, 30_000);
385
+ }
386
+ scheduleNext();
387
+ return;
388
+ }
389
+ await new Promise(r => setTimeout(r, 2_000)); // let session stabilize
390
+ }
391
+ else {
392
+ this.logger.warn({ exitCode }, "Claude window died (tmux server alive)");
393
+ }
394
+ }
395
+ else {
396
+ this.logger.warn({ exitCode }, "Claude process exited");
397
+ }
398
+ // Capture last output from dead pane before killing
399
+ let lastOutput;
400
+ if (paneStatus) {
401
+ try {
402
+ const raw = await this.tmux.capturePaneWithHistory(50);
403
+ // Strip ANSI escape codes for readability
404
+ const cleaned = raw.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "");
405
+ lastOutput = cleaned.trimEnd() || undefined;
406
+ }
407
+ catch { /* best effort */ }
408
+ }
409
+ // Kill the dead window (remain-on-exit keeps it around) before respawn
410
+ if (paneStatus) {
411
+ await this.tmux.killWindow();
412
+ }
413
+ // Append to crash history
414
+ this.appendCrashHistory({ exitCode, lastOutput, crashType });
415
+ // Detect rapid crash: sliding window — 3+ crashes in 5 minutes
416
+ this.crashTimestamps.push(Date.now());
417
+ const crashWindowMs = 5 * 60_000;
418
+ this.crashTimestamps = this.crashTimestamps.filter(t => t > Date.now() - crashWindowMs);
419
+ if (this.crashTimestamps.length >= 3) {
420
+ this.healthCheckPaused = true;
421
+ this.logger.error({ crashesInWindow: this.crashTimestamps.length }, "3+ crashes in 5 minutes — pausing respawn");
422
+ // P1: Persist crash state so next process restart skips resume
423
+ try {
424
+ writeFileSync(join(this.instanceDir, "crash-state.json"), JSON.stringify({
425
+ crashesInWindow: this.crashTimestamps.length,
426
+ lastCrashAt: Date.now(),
427
+ resumeDisabled: true,
428
+ }));
429
+ }
430
+ catch { /* best effort */ }
431
+ this.emit("crash_loop", this.name);
432
+ return; // don't schedule next — paused
433
+ }
434
+ // Reset crash count if enough time has passed
435
+ if (reset_after > 0 && Date.now() - this.lastCrashAt > reset_after) {
436
+ this.crashCount = 0;
437
+ }
438
+ this.crashCount++;
439
+ this.lastCrashAt = Date.now();
440
+ if (this.crashCount > max_retries) {
441
+ this.logger.error({ crashCount: this.crashCount, maxRetries: max_retries }, "Max crash retries exceeded — not respawning");
442
+ return; // don't schedule next — given up
443
+ }
444
+ // Calculate backoff delay
445
+ const delay = backoff === "exponential"
446
+ ? Math.min(1000 * Math.pow(2, this.crashCount - 1), 60_000)
447
+ : 1000 * this.crashCount;
448
+ this.logger.warn({ crashCount: this.crashCount, delay }, "Claude window died — respawning after backoff");
449
+ await new Promise(r => setTimeout(r, delay));
450
+ try {
451
+ this.saveSessionId();
452
+ this.transcriptMonitor?.resetOffset();
453
+ // Kill orphan MCP server from the crashed CLI session.
454
+ // MCP server writes its PID to channel.mcp.pid on startup.
455
+ try {
456
+ const pidFile = join(this.instanceDir, "channel.mcp.pid");
457
+ const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
458
+ process.kill(pid, "SIGTERM");
459
+ this.logger.info({ pid }, "Killed orphan MCP server");
460
+ }
461
+ catch { /* no pid file or process already dead */ }
462
+ // Kill any same-name windows before respawn to prevent orphans.
463
+ // Wrapped in try-catch: if tmux server is dead, listWindows throws —
464
+ // must not block spawnClaudeWindow (which calls ensureSession).
465
+ try {
466
+ const windows = await TmuxManager.listWindows(this.tmuxSessionName);
467
+ for (const w of windows) {
468
+ if (w.name === this.name) {
469
+ const tm = new TmuxManager(this.tmuxSessionName, w.id);
470
+ await tm.killWindow();
471
+ }
472
+ }
473
+ }
474
+ catch { /* tmux server may be dead — ensureSession in trySpawn will recover */ }
475
+ // Write snapshot before spawn — consumed only if resume fails
476
+ this.writeRotationSnapshot("crash");
477
+ // Try --resume first; spawnClaudeWindow falls back to fresh session if resume fails
478
+ const resumed = await this.spawnClaudeWindow();
479
+ if (!resumed) {
480
+ // Resume failed → fresh session → inject snapshot for context
481
+ await this.injectSnapshotMessage();
482
+ }
483
+ else {
484
+ // Clean up stale snapshot — resume restored full context
485
+ try {
486
+ unlinkSync(join(this.instanceDir, "rotation-state.json"));
487
+ }
488
+ catch { /* may not exist */ }
489
+ }
490
+ this.logger.info({ resumed }, "Respawned Claude window after crash");
491
+ this.emit("crash_respawn", this.name);
492
+ }
493
+ catch (err) {
494
+ this.logger.error({ err }, "Failed to respawn Claude window");
495
+ }
496
+ scheduleNext();
497
+ }, this.config.restart_policy.health_check_interval_ms ?? 30_000);
498
+ };
499
+ scheduleNext();
500
+ }
501
+ /**
502
+ * Periodically scan PTY output for backend-defined error patterns.
503
+ *
504
+ * State machine to avoid false positives from stale buffer text:
505
+ * MONITORING → (error pattern match) → WAITING_FOR_RECOVERY → (ready pattern match) → MONITORING
506
+ *
507
+ * Only emits pty_error once per error occurrence. After the agent recovers
508
+ * (ready pattern visible), it goes back to monitoring for new errors.
509
+ */
510
+ startErrorMonitor() {
511
+ const patterns = this.backend?.getErrorPatterns?.() ?? [];
512
+ const dialogs = this.backend?.getRuntimeDialogs?.() ?? [];
513
+ if (!patterns.length && !dialogs.length)
514
+ return;
515
+ if (!this.tmux)
516
+ return;
517
+ if (!this.backend)
518
+ return; // lightweight mode has no backend
519
+ const readyPattern = this.backend.getReadyPattern();
520
+ this.errorMonitorTimer = setInterval(async () => {
521
+ if (!this.tmux || this.spawning)
522
+ return;
523
+ try {
524
+ const alive = await this.tmux.isWindowAlive();
525
+ if (!alive)
526
+ return;
527
+ const pane = await this.tmux.capturePane();
528
+ // Only scan text after the last prompt marker to avoid matching stale errors
529
+ // that remain in the capture-pane buffer after recovery.
530
+ let scanText = pane;
531
+ const rpg = new RegExp(readyPattern.source, readyPattern.flags.includes("g") ? readyPattern.flags : readyPattern.flags + "g");
532
+ let lastIdx = -1;
533
+ let m;
534
+ while ((m = rpg.exec(pane)) !== null)
535
+ lastIdx = m.index;
536
+ if (lastIdx >= 0)
537
+ scanText = pane.slice(lastIdx);
538
+ // Auto-dismiss runtime dialogs (e.g. Codex rate limit model switch)
539
+ for (const dialog of dialogs) {
540
+ if (!dialog.pattern.test(pane))
541
+ continue;
542
+ this.logger.info(`Auto-dismissing runtime dialog: ${dialog.description}`);
543
+ const SPECIAL_KEYS = new Set(["Up", "Down", "Enter", "Escape", "Right", "Left"]);
544
+ for (const key of dialog.keys) {
545
+ if (SPECIAL_KEYS.has(key)) {
546
+ await this.tmux.sendSpecialKey(key);
547
+ }
548
+ else {
549
+ await this.tmux.pasteText(key);
550
+ }
551
+ await new Promise(r => setTimeout(r, 200));
552
+ }
553
+ return; // Dialog dismissed, skip error checks this cycle
554
+ }
555
+ // State: waiting for recovery — check if agent is back to ready
556
+ if (this.errorWaitingForRecovery) {
557
+ if (readyPattern.test(pane)) {
558
+ const downtime = Math.round((Date.now() - this.errorDetectedAt) / 1000);
559
+ // Record pane hash at recovery to suppress stale re-detection
560
+ this.lastRecoveryPaneHash = Daemon.cheapPaneHash(pane);
561
+ this.lastRecoveredErrorType = this.lastDetectedErrorType;
562
+ this.errorWaitingForRecovery = false;
563
+ this.errorDetectedAt = 0;
564
+ this.logger.info({ downtime_s: downtime }, "PTY error recovered — agent is ready again");
565
+ this.emit("pty_recovered", { name: this.name, downtime_s: downtime });
566
+ }
567
+ return; // Don't check for errors while waiting for recovery
568
+ }
569
+ // State: monitoring — check for new errors
570
+ const currentPaneHash = Daemon.cheapPaneHash(pane);
571
+ for (const ep of patterns) {
572
+ if (!ep.pattern.test(scanText))
573
+ continue;
574
+ // Dedup: suppress if same error on same screen as last recovery
575
+ if (this.lastRecoveryPaneHash && ep.type === this.lastRecoveredErrorType) {
576
+ if (currentPaneHash === this.lastRecoveryPaneHash) {
577
+ break; // same screen, same error → stale
578
+ }
579
+ // Screen changed — stop suppressing
580
+ this.lastRecoveryPaneHash = null;
581
+ this.lastRecoveredErrorType = null;
582
+ }
583
+ // Cooldown: skip if same error type was recently notified
584
+ const lastNotified = this.lastErrorNotifiedAt.get(ep.type) ?? 0;
585
+ if (Date.now() - lastNotified < Daemon.ERROR_COOLDOWN_MS) {
586
+ this.logger.debug({ errorType: ep.type }, "PTY error suppressed (cooldown active)");
587
+ break;
588
+ }
589
+ if (ep.action === "failover" && Date.now() - this.lastFailoverAt < Daemon.FAILOVER_COOLDOWN_MS) {
590
+ this.logger.debug({ errorType: ep.type }, "PTY error suppressed (failover cooldown active)");
591
+ break;
592
+ }
593
+ this.errorWaitingForRecovery = true;
594
+ this.errorDetectedAt = Date.now();
595
+ this.lastDetectedErrorType = ep.type;
596
+ this.lastErrorNotifiedAt.set(ep.type, Date.now());
597
+ if (ep.action === "failover")
598
+ this.lastFailoverAt = Date.now();
599
+ this.logger.warn({ errorType: ep.type, action: ep.action }, `PTY error detected: ${ep.message}`);
600
+ this.emit("pty_error", { name: this.name, ...ep });
601
+ break; // Only handle first match per scan
602
+ }
603
+ }
604
+ catch {
605
+ // capturePane can fail if window is transitioning — ignore
606
+ }
607
+ }, 5_000); // Check every 5 seconds (runtime dialogs need fast response)
608
+ }
609
+ async stop() {
610
+ this.logger.info("Stopping daemon instance");
611
+ if (this.healthCheckTimer) {
612
+ clearTimeout(this.healthCheckTimer);
613
+ this.healthCheckTimer = null;
614
+ }
615
+ if (this.errorMonitorTimer) {
616
+ clearInterval(this.errorMonitorTimer);
617
+ this.errorMonitorTimer = null;
618
+ }
619
+ if (this.toolStatusDebounce) {
620
+ clearTimeout(this.toolStatusDebounce);
621
+ this.toolStatusDebounce = null;
622
+ }
623
+ this.pendingIpcRequests.clear();
624
+ this.hangDetector?.stop();
625
+ this.transcriptMonitor?.stop();
626
+ this.guardian?.stop();
627
+ if (this.adapter)
628
+ await this.adapter.stop();
629
+ // Notify MCP servers of graceful shutdown (prevents reconnect attempts)
630
+ this.ipcServer?.broadcast({ type: "shutdown" });
631
+ // Quit CLI FIRST — this kills MCP server child processes cleanly.
632
+ // IPC must stay open during quit so MCP servers receive the shutdown message.
633
+ if (this.tmux) {
634
+ this.saveSessionId();
635
+ this.healthCheckPaused = true;
636
+ let killed = false;
637
+ const quitCmd = this.backend?.getQuitCommand();
638
+ if (quitCmd) {
639
+ await this.tmux.sendKeys(quitCmd);
640
+ // Delay before Enter to prevent tmux server race when multiple
641
+ // instances stop in parallel (same pattern as pasteText).
642
+ await new Promise(r => setTimeout(r, 150));
643
+ await this.tmux.sendSpecialKey("Enter");
644
+ // Wait up to 10s for graceful exit
645
+ for (let i = 0; i < 20; i++) {
646
+ await new Promise(r => setTimeout(r, 500));
647
+ const status = await this.tmux.getPaneStatus();
648
+ if (!status || !status.alive) {
649
+ killed = true;
650
+ break;
651
+ }
652
+ }
653
+ }
654
+ if (!killed)
655
+ this.logger.warn("CLI did not exit gracefully within 10s, force killing window");
656
+ // Always kill window — remain-on-exit keeps dead panes around after CLI exits
657
+ await this.tmux.killWindow();
658
+ const windowIdFile = join(this.instanceDir, "window-id");
659
+ try {
660
+ unlinkSync(windowIdFile);
661
+ }
662
+ catch (e) {
663
+ this.logger.debug({ err: e }, "Failed to remove window-id file");
664
+ }
665
+ }
666
+ // Close IPC AFTER CLI has exited — MCP servers are already dead at this point
667
+ await this.ipcServer?.close();
668
+ // Clean up backend config files
669
+ if (this.backend?.cleanup) {
670
+ this.backend.cleanup(this.buildBackendConfig());
671
+ }
672
+ // Clean up checked-out repos
673
+ try {
674
+ rmSync(join(this.instanceDir, "repos"), { recursive: true, force: true });
675
+ }
676
+ catch { /* best effort */ }
677
+ const pidPath = join(this.instanceDir, "daemon.pid");
678
+ try {
679
+ unlinkSync(pidPath);
680
+ }
681
+ catch (e) {
682
+ this.logger.debug({ err: e }, "Failed to remove PID file");
683
+ }
684
+ }
685
+ getHangDetector() {
686
+ return this.hangDetector;
687
+ }
688
+ getMessageBus() {
689
+ return this.messageBus;
690
+ }
691
+ // ── Tool status tracking ──────────────────────────────────────
692
+ summarizeTool(name, input) {
693
+ const inp = input;
694
+ if (!inp)
695
+ return name;
696
+ if (name === "Read")
697
+ return `Read ${inp.file_path ?? ""}`;
698
+ if (name === "Edit")
699
+ return `Edit ${inp.file_path ?? ""}`;
700
+ if (name === "Write")
701
+ return `Write ${inp.file_path ?? ""}`;
702
+ if (name === "Bash")
703
+ return `$ ${String(inp.command ?? "").slice(0, 50)}`;
704
+ if (name === "Glob")
705
+ return `Glob ${inp.pattern ?? ""}`;
706
+ if (name === "Grep")
707
+ return `Grep ${inp.pattern ?? ""}`;
708
+ if (name === "Agent")
709
+ return "Agent (subagent)";
710
+ if (name.startsWith("mcp__agend__"))
711
+ return ""; // skip channel tools
712
+ return name;
713
+ }
714
+ addToolStatus(name, input, state) {
715
+ const summary = this.summarizeTool(name, input);
716
+ if (!summary)
717
+ return; // skip empty (e.g., channel tools)
718
+ if (state === "running") {
719
+ this.toolStatusLines.push(`⏳ ${summary}`);
720
+ }
721
+ else {
722
+ // Mark the last matching tool as done
723
+ for (let i = this.toolStatusLines.length - 1; i >= 0; i--) {
724
+ if (this.toolStatusLines[i].includes(name) && this.toolStatusLines[i].startsWith("⏳")) {
725
+ this.toolStatusLines[i] = this.toolStatusLines[i].replace("⏳", "✅");
726
+ break;
727
+ }
728
+ }
729
+ }
730
+ this.debouncedSendToolStatus();
731
+ }
732
+ /** Debounce tool status updates to avoid channel rate limits */
733
+ debouncedSendToolStatus() {
734
+ if (this.toolStatusDebounce)
735
+ clearTimeout(this.toolStatusDebounce);
736
+ this.toolStatusDebounce = setTimeout(() => this.sendToolStatus(), 500);
737
+ }
738
+ sendToolStatus() {
739
+ const text = this.toolStatusLines.join("\n");
740
+ if (!text)
741
+ return;
742
+ this.ipcServer?.broadcast({
743
+ type: "fleet_tool_status",
744
+ instanceName: this.name,
745
+ text,
746
+ editMessageId: this.toolStatusMessageId,
747
+ });
748
+ }
749
+ /** Called by fleet manager when tool status message is sent (returns messageId) */
750
+ setToolStatusMessageId(messageId) {
751
+ this.toolStatusMessageId = messageId;
752
+ }
753
+ /**
754
+ * Push an inbound channel message to a specific MCP session.
755
+ * If targetSession is provided, only send to the matching socket.
756
+ * Otherwise send to the instance's own session (this.name).
757
+ */
758
+ pushChannelMessage(content, meta, _targetSession) {
759
+ if (!this.tmux) {
760
+ this.logger.warn("Cannot push channel message: tmux not running");
761
+ return;
762
+ }
763
+ if (this.pendingInstructionsUpdate) {
764
+ writeFileSync(join(this.instanceDir, "prev-instructions"), this.pendingInstructionsUpdate);
765
+ this.pendingInstructionsUpdate = undefined;
766
+ }
767
+ this.hangDetector?.recordInbound();
768
+ // v3: record user messages for rotation snapshot
769
+ this.recordRecentUserMessage(content, meta);
770
+ // Format message with metadata prefix for the agent
771
+ const user = meta.user || "unknown";
772
+ const fromInstance = meta.from_instance;
773
+ // /raw prefix: paste directly without [user:] wrapping (topic mode only, protected by allowed_users upstream)
774
+ if (!fromInstance && content.startsWith("/raw ")) {
775
+ const rawText = content.slice(5);
776
+ this.logger.info({ user }, "Raw paste from topic mode user");
777
+ this.pasteLock = this.pasteLock.then(async () => {
778
+ await this.deliverMessage(rawText);
779
+ }).catch(err => {
780
+ this.logger.warn({ err: err.message }, "pasteLock raw delivery error");
781
+ });
782
+ return;
783
+ }
784
+ let formatted;
785
+ if (fromInstance) {
786
+ formatted = `[from:${fromInstance}] ${content}\n(Reply using send_to_instance tool, NOT direct text)`;
787
+ }
788
+ else {
789
+ const via = meta.source ? ` via ${meta.source}` : "";
790
+ formatted = `[user:${user}${via}] ${content}\n(Reply using the reply tool — do NOT respond with direct text)`;
791
+ }
792
+ if (meta.reply_to_text) {
793
+ formatted += `\n(reply_to: "${meta.reply_to_text}")`;
794
+ }
795
+ // Serialize deliveries: each message waits for the previous to complete,
796
+ // and each waits for the CLI to be idle before pasting.
797
+ const enqueuedAt = Date.now();
798
+ const isFromInstance = !!meta.from_instance;
799
+ const chatId = meta.chat_id;
800
+ const messageId = meta.message_id;
801
+ const wasQueued = this.pasteQueueDepth > 0;
802
+ this.pasteQueueDepth++;
803
+ if (this.pasteQueueDepth > 3) {
804
+ this.logger.warn({ depth: this.pasteQueueDepth }, "Message delivery queue backing up");
805
+ }
806
+ if (wasQueued && chatId && messageId) {
807
+ this.emit("message_queued", { chatId, messageId });
808
+ }
809
+ this.pasteLock = this.pasteLock.then(async () => {
810
+ try {
811
+ // Drop stale user messages (>60s in queue), but never drop cross-instance messages
812
+ if (!isFromInstance && Date.now() - enqueuedAt > 60_000) {
813
+ this.logger.warn({ age: Date.now() - enqueuedAt, user: meta.user }, "Dropping stale message");
814
+ return;
815
+ }
816
+ if (this.config.pre_task_command) {
817
+ await this.deliverMessage(this.config.pre_task_command);
818
+ }
819
+ if (this.pendingInstructionsNotice) {
820
+ this.pendingInstructionsNotice = false;
821
+ await this.deliverMessage("[system] Your instructions/steering files have been updated. Please re-read them for the latest guidelines.");
822
+ }
823
+ await this.deliverMessage(formatted);
824
+ if (chatId && messageId) {
825
+ this.emit("message_delivered", { chatId, messageId });
826
+ }
827
+ }
828
+ finally {
829
+ this.pasteQueueDepth--;
830
+ }
831
+ }).catch(err => {
832
+ this.logger.warn({ err: err.message }, "pasteLock delivery error — chain continues");
833
+ });
834
+ this.logger.debug({ user: meta.user, text: content.slice(0, 100) }, "Queued channel message for delivery");
835
+ }
836
+ /** Deliver a single message: wait for idle, then paste */
837
+ async deliverMessage(formatted) {
838
+ const windowId = this.getWindowId();
839
+ if (windowId && this.controlClient) {
840
+ const idle = await this.controlClient.waitForIdle(windowId, this.config.lightweight ? 30_000 : 120_000);
841
+ if (!idle) {
842
+ this.logger.warn("Delivering message after idle timeout (CLI may be busy)");
843
+ }
844
+ }
845
+ const ok = await this.tmux.pasteText(formatted);
846
+ if (!ok) {
847
+ // Window ID may be stale after crash/respawn — try to find by name
848
+ this.logger.warn("pasteText failed, looking up window by name");
849
+ try {
850
+ const windows = await TmuxManager.listWindows(this.tmuxSessionName);
851
+ const match = windows.find(w => w.name === this.name);
852
+ if (match) {
853
+ this.tmux = new TmuxManager(this.tmuxSessionName, match.id);
854
+ writeFileSync(join(this.instanceDir, "window-id"), match.id);
855
+ await this.controlClient?.registerWindow(match.id);
856
+ await this.tmux.pasteText(formatted);
857
+ this.logger.info({ windowId: match.id }, "Recovered window ID and delivered message");
858
+ }
859
+ }
860
+ catch (retryErr) {
861
+ this.logger.error({ err: retryErr }, "Failed to recover window for message delivery");
862
+ }
863
+ }
864
+ }
865
+ getWindowId() {
866
+ try {
867
+ return readFileSync(join(this.instanceDir, "window-id"), "utf-8").trim() || undefined;
868
+ }
869
+ catch {
870
+ return undefined;
871
+ }
872
+ }
873
+ /** Find the IPC socket for a given sessionName */
874
+ findSocketBySession(sessionName) {
875
+ for (const [socket, name] of this.socketSessionNames) {
876
+ if (name === sessionName && !socket.destroyed)
877
+ return socket;
878
+ }
879
+ return undefined;
880
+ }
881
+ /**
882
+ * Handle a tool call from the MCP server (forwarded by Claude).
883
+ * Routes to the channel adapter via MessageBus.
884
+ */
885
+ handleToolCall(msg, socket) {
886
+ const tool = msg.tool;
887
+ const args = (msg.args ?? {});
888
+ const requestId = msg.requestId;
889
+ this.logger.debug({ tool, requestId }, "Tool call from MCP server");
890
+ // For now, log and respond. Full adapter routing will be wired in fleet manager.
891
+ const respond = (result, error) => {
892
+ this.ipcServer?.send(socket, { requestId, result, error });
893
+ };
894
+ // Repo checkout — handled locally in daemon (no fleet-manager)
895
+ if (tool === "checkout_repo") {
896
+ this.handleCheckoutRepo(args, respond);
897
+ return;
898
+ }
899
+ if (tool === "release_repo") {
900
+ this.handleReleaseRepo(args, respond);
901
+ return;
902
+ }
903
+ if (tool === "set_display_name" || tool === "set_description") {
904
+ const type = tool === "set_display_name" ? "fleet_set_display_name" : "fleet_set_description";
905
+ const fleetReqId = `${tool === "set_display_name" ? "dn" : "desc"}_${requestId}`;
906
+ this.ipcServer?.broadcast({
907
+ type,
908
+ payload: args,
909
+ meta: { instance_name: this.name },
910
+ fleetRequestId: fleetReqId,
911
+ });
912
+ const timeout = setTimeout(() => {
913
+ this.pendingIpcRequests.delete(fleetReqId);
914
+ respond(null, `${tool} timed out`);
915
+ }, 10_000);
916
+ this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
917
+ clearTimeout(timeout);
918
+ respond(respMsg.result, respMsg.error);
919
+ });
920
+ return;
921
+ }
922
+ if (tool === TASK_TOOL) {
923
+ const fleetReqId = `task_${requestId}`;
924
+ this.ipcServer?.broadcast({
925
+ type: "fleet_task",
926
+ payload: args,
927
+ meta: { instance_name: this.name },
928
+ fleetRequestId: fleetReqId,
929
+ });
930
+ const timeout = setTimeout(() => {
931
+ this.pendingIpcRequests.delete(fleetReqId);
932
+ respond(null, "Task operation timed out after 30s");
933
+ }, 30_000);
934
+ this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
935
+ clearTimeout(timeout);
936
+ respond(respMsg.result, respMsg.error);
937
+ });
938
+ return;
939
+ }
940
+ if (DECISION_TOOLS.has(tool)) {
941
+ const typeMap = {
942
+ post_decision: "fleet_decision_create",
943
+ list_decisions: "fleet_decision_list",
944
+ update_decision: "fleet_decision_update",
945
+ };
946
+ const fleetReqId = `dec_${requestId}`;
947
+ this.ipcServer?.broadcast({
948
+ type: typeMap[tool],
949
+ payload: args,
950
+ meta: { instance_name: this.name, working_directory: this.config.working_directory },
951
+ fleetRequestId: fleetReqId,
952
+ });
953
+ const timeout = setTimeout(() => {
954
+ this.pendingIpcRequests.delete(fleetReqId);
955
+ respond(null, "Decision operation timed out after 30s");
956
+ }, 30_000);
957
+ this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
958
+ clearTimeout(timeout);
959
+ respond(respMsg.result, respMsg.error);
960
+ });
961
+ return;
962
+ }
963
+ if (SCHEDULE_TOOLS.has(tool)) {
964
+ const typeMap = {
965
+ create_schedule: "fleet_schedule_create",
966
+ list_schedules: "fleet_schedule_list",
967
+ update_schedule: "fleet_schedule_update",
968
+ delete_schedule: "fleet_schedule_delete",
969
+ };
970
+ // Use fleetRequestId (not requestId) to avoid MCP server resolving the
971
+ // pending tool call prematurely when it receives the broadcast.
972
+ const fleetReqId = `sched_${requestId}`;
973
+ this.ipcServer?.broadcast({
974
+ type: typeMap[tool],
975
+ payload: args,
976
+ meta: { chat_id: this.lastChatId, thread_id: this.lastThreadId, instance_name: this.name },
977
+ fleetRequestId: fleetReqId,
978
+ });
979
+ // Wait for fleet_schedule_response via pending request map
980
+ const timeout = setTimeout(() => {
981
+ this.pendingIpcRequests.delete(fleetReqId);
982
+ respond(null, "Schedule operation timed out after 30s");
983
+ }, 30_000);
984
+ this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
985
+ clearTimeout(timeout);
986
+ respond(respMsg.result, respMsg.error);
987
+ });
988
+ return;
989
+ }
990
+ if (CROSS_INSTANCE_TOOLS.has(tool)) {
991
+ // Route to fleet manager via IPC (topic mode only)
992
+ if (this.topicMode && this.ipcServer) {
993
+ // Use fleetRequestId (not requestId) to avoid MCP server resolving the
994
+ // pending tool call prematurely when it receives the broadcast.
995
+ const fleetReqId = `xmsg_${requestId}`;
996
+ const senderSessionName = this.socketSessionNames.get(socket);
997
+ this.ipcServer.broadcast({
998
+ type: "fleet_outbound",
999
+ tool,
1000
+ args,
1001
+ fleetRequestId: fleetReqId,
1002
+ senderSessionName,
1003
+ });
1004
+ const crossTimeoutMs = (tool === "start_instance" || tool === "create_instance" || tool === "replace_instance") ? 60_000 : 30_000;
1005
+ const timeout = setTimeout(() => {
1006
+ this.pendingIpcRequests.delete(fleetReqId);
1007
+ respond(null, `Cross-instance operation timed out after ${crossTimeoutMs / 1000}s`);
1008
+ }, crossTimeoutMs);
1009
+ this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
1010
+ clearTimeout(timeout);
1011
+ respond(respMsg.result, respMsg.error);
1012
+ });
1013
+ }
1014
+ else {
1015
+ respond(null, "Cross-instance messaging requires topic mode");
1016
+ }
1017
+ return;
1018
+ }
1019
+ // Context-bound routing: reply/react/edit_message always use the daemon's last known context.
1020
+ // chat_id and thread_id are not exposed in the tool schema — daemon is solely responsible for routing.
1021
+ // Must run before IPC forwarding so topic-mode (fleet manager) also receives the correct chat_id.
1022
+ if (["reply", "react", "edit_message"].includes(tool)) {
1023
+ if (!this.lastChatId) {
1024
+ respond(null, "No active chat context — awaiting inbound message");
1025
+ return;
1026
+ }
1027
+ args.chat_id = this.lastChatId;
1028
+ if (tool === "reply")
1029
+ args.thread_id = this.lastThreadId;
1030
+ }
1031
+ // Route to adapter via MessageBus
1032
+ const adapters = this.messageBus.getAllAdapters();
1033
+ if (adapters.length === 0) {
1034
+ // Topic mode: forward to fleet manager via IPC (fleet manager connected as IPC client)
1035
+ // The fleet manager's IPC client receives this and routes to shared adapter.
1036
+ // Use fleetRequestId (not requestId) to avoid other MCP sessions on this daemon
1037
+ // from prematurely resolving their pending requests when they receive the broadcast.
1038
+ const fleetReqId = `tool_${requestId}`;
1039
+ const outboundKey = fleetReqId;
1040
+ this.ipcServer?.broadcast({ type: "fleet_outbound", tool, args, fleetRequestId: fleetReqId });
1041
+ const timeout = setTimeout(() => {
1042
+ this.pendingIpcRequests.delete(outboundKey);
1043
+ respond(null, "Fleet outbound timed out after 30s");
1044
+ }, 30_000);
1045
+ this.pendingIpcRequests.set(outboundKey, (respMsg) => {
1046
+ clearTimeout(timeout);
1047
+ respond(respMsg.result, respMsg.error);
1048
+ });
1049
+ return;
1050
+ }
1051
+ const adapter = adapters[0];
1052
+ if (!routeToolCall(adapter, tool, args, this.lastThreadId, respond)) {
1053
+ respond(null, `Unknown tool: ${tool}`);
1054
+ }
1055
+ }
1056
+ /** Build config object for the CLI backend */
1057
+ buildBackendConfig() {
1058
+ const isCliMode = this.config.agent_mode === "cli";
1059
+ const sockPath = join(this.instanceDir, "channel.sock");
1060
+ let serverJs = join(__dirname, "channel", "mcp-server.js");
1061
+ if (!existsSync(serverJs)) {
1062
+ serverJs = join(__dirname, "..", "dist", "channel", "mcp-server.js");
1063
+ }
1064
+ // ── Resolve workflow and systemPrompt once, share between MCP env and instructions ──
1065
+ let resolvedWorkflow;
1066
+ if (this.config.workflow === false) {
1067
+ resolvedWorkflow = false;
1068
+ }
1069
+ else {
1070
+ const wf = this.config.workflow ?? "builtin";
1071
+ if (wf !== "builtin") {
1072
+ let content = wf;
1073
+ if (content.startsWith("file:")) {
1074
+ try {
1075
+ content = readFileSync(content.slice(5), "utf-8");
1076
+ }
1077
+ catch {
1078
+ content = "";
1079
+ }
1080
+ }
1081
+ resolvedWorkflow = content || undefined;
1082
+ }
1083
+ }
1084
+ let resolvedCustomPrompt;
1085
+ if (this.config.systemPrompt) {
1086
+ // Support comma-separated file: paths for prompt modularization:
1087
+ // systemPrompt: "file:prompts/role.md, file:prompts/rules.md, file:prompts/context.md"
1088
+ const parts = this.config.systemPrompt.split(",").map((s) => s.trim());
1089
+ const resolved = parts.map((part) => {
1090
+ if (part.startsWith("file:")) {
1091
+ try {
1092
+ return readFileSync(part.slice(5), "utf-8");
1093
+ }
1094
+ catch {
1095
+ return "";
1096
+ }
1097
+ }
1098
+ return part;
1099
+ }).filter(Boolean);
1100
+ if (resolved.length > 0)
1101
+ resolvedCustomPrompt = resolved.join("\n\n");
1102
+ }
1103
+ let decisions;
1104
+ if (process.env.AGEND_DECISIONS) {
1105
+ try {
1106
+ decisions = JSON.parse(process.env.AGEND_DECISIONS);
1107
+ }
1108
+ catch (err) {
1109
+ this.logger.warn({ err }, "AGEND_DECISIONS env var is not valid JSON — decisions will not be injected");
1110
+ }
1111
+ }
1112
+ // ── MCP server env (dual-track: still passes env vars for MCP instructions fallback) ──
1113
+ const mcpEnv = {
1114
+ AGEND_SOCKET_PATH: sockPath,
1115
+ AGEND_INSTANCE_NAME: this.name,
1116
+ AGEND_WORKING_DIR: this.config.working_directory,
1117
+ };
1118
+ if (this.config.tool_set)
1119
+ mcpEnv.AGEND_TOOL_SET = this.config.tool_set;
1120
+ if (this.config.display_name)
1121
+ mcpEnv.AGEND_DISPLAY_NAME = this.config.display_name;
1122
+ if (this.config.description)
1123
+ mcpEnv.AGEND_DESCRIPTION = this.config.description;
1124
+ if (resolvedWorkflow === false)
1125
+ mcpEnv.AGEND_WORKFLOW = "false";
1126
+ else if (resolvedWorkflow)
1127
+ mcpEnv.AGEND_WORKFLOW = resolvedWorkflow;
1128
+ if (resolvedCustomPrompt)
1129
+ mcpEnv.AGEND_CUSTOM_PROMPT = resolvedCustomPrompt;
1130
+ if (process.env.AGEND_DECISIONS)
1131
+ mcpEnv.AGEND_DECISIONS = process.env.AGEND_DECISIONS;
1132
+ // ── Fleet instructions for additive system prompt injection ──
1133
+ let instructions;
1134
+ if (isCliMode) {
1135
+ // CLI mode: inject CLI quick reference instead of MCP tool schema
1136
+ let cliRef = "";
1137
+ try {
1138
+ const cliInstrPath = join(__dirname, "agent-cli-instructions.md");
1139
+ if (!existsSync(cliInstrPath)) {
1140
+ const altPath = join(__dirname, "..", "dist", "agent-cli-instructions.md");
1141
+ if (existsSync(altPath))
1142
+ cliRef = readFileSync(altPath, "utf-8");
1143
+ }
1144
+ else {
1145
+ cliRef = readFileSync(cliInstrPath, "utf-8");
1146
+ }
1147
+ }
1148
+ catch { /* fallback to empty */ }
1149
+ instructions = buildFleetInstructions({
1150
+ instanceName: this.name,
1151
+ workingDirectory: this.config.working_directory,
1152
+ displayName: this.config.display_name,
1153
+ description: this.config.description,
1154
+ customPrompt: resolvedCustomPrompt,
1155
+ workflow: resolvedWorkflow,
1156
+ decisions,
1157
+ cliInstructions: cliRef || undefined,
1158
+ });
1159
+ }
1160
+ else {
1161
+ instructions = buildFleetInstructions({
1162
+ instanceName: this.name,
1163
+ workingDirectory: this.config.working_directory,
1164
+ displayName: this.config.display_name,
1165
+ description: this.config.description,
1166
+ customPrompt: resolvedCustomPrompt,
1167
+ workflow: resolvedWorkflow,
1168
+ decisions,
1169
+ });
1170
+ }
1171
+ const agentPort = parseInt(process.env.AGEND_PORT ?? "19280", 10);
1172
+ return {
1173
+ workingDirectory: this.config.working_directory,
1174
+ instanceDir: this.instanceDir,
1175
+ instanceName: this.name,
1176
+ mcpServers: isCliMode ? {} : {
1177
+ "agend": {
1178
+ command: "node",
1179
+ args: [serverJs],
1180
+ env: mcpEnv,
1181
+ },
1182
+ },
1183
+ skipPermissions: this.config.skipPermissions,
1184
+ model: this.modelOverride ?? this.config.model,
1185
+ skipResume: this.skipResume,
1186
+ instructions,
1187
+ agentMode: isCliMode ? "cli" : "mcp",
1188
+ agentPort: isCliMode ? agentPort : undefined,
1189
+ };
1190
+ }
1191
+ /**
1192
+ * After CLI is ready, paste any pending session snapshot as the first
1193
+ * user input so the agent picks up where the previous session left off.
1194
+ * This replaces the old system-prompt injection approach.
1195
+ */
1196
+ async injectSnapshotMessage() {
1197
+ if (this.snapshotConsumed)
1198
+ return;
1199
+ const snapshot = this.buildSnapshotPrompt();
1200
+ if (!snapshot || !this.tmux)
1201
+ return;
1202
+ if (this.pendingInstructionsUpdate) {
1203
+ writeFileSync(join(this.instanceDir, "prev-instructions"), this.pendingInstructionsUpdate);
1204
+ this.pendingInstructionsUpdate = undefined;
1205
+ }
1206
+ // Small delay to let the CLI fully render its ready prompt
1207
+ await new Promise(r => setTimeout(r, 1_000));
1208
+ try {
1209
+ await this.tmux.pasteText(`[system:session-snapshot]\n${snapshot}\n\nThis is a background context restore — do NOT reply to or acknowledge this message. Simply resume normal operation when the next user or instance message arrives.`);
1210
+ this.logger.info("Injected session snapshot as first message");
1211
+ this.emit("snapshot_injected", this.name);
1212
+ }
1213
+ catch (err) {
1214
+ this.logger.error({ err }, "Snapshot injection failed — session continues without context");
1215
+ this.emit("snapshot_failed", this.name);
1216
+ }
1217
+ }
1218
+ /** Spawn a CLI window. Returns true if --resume was used successfully. */
1219
+ async spawnClaudeWindow() {
1220
+ this.spawning = true;
1221
+ let resumedSuccessfully = false;
1222
+ try {
1223
+ this.toolStatusLines = [];
1224
+ this.toolStatusMessageId = null;
1225
+ if (!this.backend) {
1226
+ throw new Error("No backend configured — cannot spawn CLI window");
1227
+ }
1228
+ const attemptedResume = !this.skipResume;
1229
+ const alive = await this.trySpawn();
1230
+ if (!alive) {
1231
+ // First attempt failed (stale --resume, crash, rate limit, etc.)
1232
+ // Clean slate: clear session-id, skip resume, and retry once.
1233
+ this.logger.warn("CLI startup failed — clearing session-id and retrying without resume");
1234
+ const sidFile = join(this.instanceDir, "session-id");
1235
+ try {
1236
+ unlinkSync(sidFile);
1237
+ }
1238
+ catch { /* may not exist */ }
1239
+ this.skipResume = true;
1240
+ await this.killProcessTree();
1241
+ await this.tmux.killWindow();
1242
+ const retryAlive = await this.trySpawn();
1243
+ if (!retryAlive) {
1244
+ await this.killProcessTree();
1245
+ await this.tmux.killWindow();
1246
+ throw new Error("CLI failed to start after retry");
1247
+ }
1248
+ }
1249
+ else if (attemptedResume) {
1250
+ resumedSuccessfully = true;
1251
+ }
1252
+ this.lastSpawnAt = Date.now();
1253
+ this.skipResume = false; // CLI started successfully — reset for next spawn
1254
+ }
1255
+ finally {
1256
+ this.spawning = false;
1257
+ }
1258
+ return resumedSuccessfully;
1259
+ }
1260
+ /** Kill the entire process tree of the current tmux pane (CLI + MCP server). */
1261
+ async killProcessTree() {
1262
+ if (!this.tmux)
1263
+ return;
1264
+ try {
1265
+ const pid = await TmuxManager.getPanePid(this.tmuxSessionName, this.tmux.getWindowId());
1266
+ if (pid) {
1267
+ process.kill(-pid, "SIGTERM");
1268
+ this.logger.debug({ pid }, "Killed process group");
1269
+ }
1270
+ }
1271
+ catch { /* process group may not exist or already dead */ }
1272
+ }
1273
+ /**
1274
+ * Spawn a CLI window and verify it reaches a ready state.
1275
+ * Uses control mode to wait for output, then checks pane content.
1276
+ * Handles confirmation dialogs (trust folder, bypass permissions).
1277
+ * Returns true if CLI is ready, false if it failed or got stuck.
1278
+ */
1279
+ async trySpawn() {
1280
+ const backendConfig = this.buildBackendConfig();
1281
+ // Detect instructions change → notify agent on next message instead of
1282
+ // forcing a new session. Resume is preserved so context isn't lost.
1283
+ if (!backendConfig.skipResume && !this.backend.instructionsReloadedOnResume && backendConfig.instructions) {
1284
+ const prevFile = join(this.instanceDir, "prev-instructions");
1285
+ let prev = "";
1286
+ try {
1287
+ prev = readFileSync(prevFile, "utf-8");
1288
+ }
1289
+ catch { }
1290
+ if (prev !== backendConfig.instructions) {
1291
+ if (prev) {
1292
+ this.logger.info("Instructions changed — will notify agent on next message");
1293
+ this.pendingInstructionsNotice = true;
1294
+ }
1295
+ this.pendingInstructionsUpdate = backendConfig.instructions;
1296
+ }
1297
+ }
1298
+ this.backend.writeConfig(backendConfig);
1299
+ this.backend.preTrust?.(this.config.working_directory);
1300
+ // Generate a fresh per-instance agent token each spawn. agent-cli reads
1301
+ // this file from <instanceDir>/agent.token (mode 0o600) and sends its
1302
+ // value in the X-Agend-Instance-Token header; the daemon-side /agent
1303
+ // endpoint verifies it matches the on-disk value for the claimed
1304
+ // instance. This prevents other local processes (even those holding
1305
+ // the global web token) from impersonating instances.
1306
+ const agentTokenPath = join(this.instanceDir, "agent.token");
1307
+ const agentToken = randomBytes(32).toString("hex");
1308
+ writeFileSync(agentTokenPath, agentToken, { mode: 0o600 });
1309
+ try {
1310
+ chmodSync(agentTokenPath, 0o600);
1311
+ }
1312
+ catch { }
1313
+ // AGEND_HOME points the child's agent-cli at the same data dir the daemon
1314
+ // is using, so it can locate <instanceDir>/agent.token.
1315
+ const agendHome = join(this.instanceDir, "..", "..");
1316
+ let envPrefix = `TERM=xterm-256color AGEND_INSTANCE_NAME=${shellQuote(this.name)} AGEND_HOME=${shellQuote(agendHome)}`;
1317
+ if (backendConfig.agentMode === "cli" && backendConfig.agentPort) {
1318
+ envPrefix += ` AGEND_PORT=${backendConfig.agentPort}`;
1319
+ }
1320
+ const cmd = `${envPrefix} ` + this.backend.buildCommand(backendConfig);
1321
+ // Ensure tmux session exists (may have been destroyed if all windows died)
1322
+ await TmuxManager.ensureSession(this.tmuxSessionName);
1323
+ const windowId = await this.tmux.createWindow(cmd, this.config.working_directory, this.name);
1324
+ writeFileSync(join(this.instanceDir, "window-id"), windowId);
1325
+ // Enable remain-on-exit to capture exit codes on crash
1326
+ await this.tmux.setRemainOnExit().catch(err => {
1327
+ this.logger.warn({ err }, "Failed to set remain-on-exit — exit codes will not be captured");
1328
+ });
1329
+ // Register with control client and wait for output + idle
1330
+ await this.controlClient?.registerWindow(windowId);
1331
+ if (this.controlClient) {
1332
+ const total = this.config.startup_timeout_ms ?? 25_000;
1333
+ const outputTimeout = Math.round(total * 0.6);
1334
+ const idleTimeout = total - outputTimeout;
1335
+ const hasOutput = await this.controlClient.waitForOutput(windowId, outputTimeout);
1336
+ if (!hasOutput) {
1337
+ // Fallback: some TUI backends (e.g. opencode) don't trigger tmux %output events.
1338
+ // Check pane content directly for ready pattern before giving up.
1339
+ const pane = await this.tmux.capturePane();
1340
+ if (!this.backend.getReadyPattern().test(pane))
1341
+ return false;
1342
+ }
1343
+ else {
1344
+ await this.controlClient.waitForIdle(windowId, idleTimeout);
1345
+ }
1346
+ }
1347
+ else {
1348
+ await new Promise(r => setTimeout(r, 10_000));
1349
+ }
1350
+ // Dismiss confirmation dialogs and verify CLI reached prompt.
1351
+ // With remain-on-exit, isWindowAlive() returns true even for dead panes,
1352
+ // but a startup crash would already be caught by waitForOutput/waitForIdle above.
1353
+ if (!await this.tmux.isWindowAlive())
1354
+ return false;
1355
+ return this.dismissDialogsUntilReady(3);
1356
+ }
1357
+ /**
1358
+ * Repeatedly check pane content, dismiss any confirmation dialogs,
1359
+ * and return true once CLI reaches a ready prompt.
1360
+ */
1361
+ async dismissDialogsUntilReady(maxAttempts) {
1362
+ // Backend-specific startup dialogs, with hardcoded fallback for backward compat
1363
+ const startupDialogs = this.backend?.getStartupDialogs?.() ?? [
1364
+ { pattern: /[❯›]\s*\d+\.\s*No/m, keys: ["Down", "Enter"], description: "Confirmation dialog — navigate past No" },
1365
+ { pattern: /[❯›]\s*Don't trust/m, keys: ["Up", "Up", "Enter"], description: "Trust dialog — navigate to trust option" },
1366
+ { pattern: /No, exit|No, quit|Don't trust|I accept|I trust|Yes, continue|Trust folder/i, keys: ["Enter"], description: "Generic confirmation dialog" },
1367
+ { pattern: /Resume Session/i, keys: ["Escape"], description: "Resume session picker — start fresh" },
1368
+ ];
1369
+ for (let i = 0; i < maxAttempts; i++) {
1370
+ try {
1371
+ const pane = await this.tmux.capturePane();
1372
+ // Try each startup dialog pattern before checking ready state
1373
+ let matched = false;
1374
+ for (const dialog of startupDialogs) {
1375
+ if (dialog.pattern.test(pane)) {
1376
+ this.logger.debug(`Dismissing startup dialog: ${dialog.description}`);
1377
+ for (const key of dialog.keys) {
1378
+ if (key === "Up" || key === "Down" || key === "Enter" || key === "Escape") {
1379
+ await this.tmux.sendSpecialKey(key);
1380
+ }
1381
+ else {
1382
+ await this.tmux.sendKeys(key);
1383
+ }
1384
+ await new Promise(r => setTimeout(r, 200));
1385
+ }
1386
+ // Wait for next screen to render
1387
+ if (this.controlClient) {
1388
+ const wid = readFileSync(join(this.instanceDir, "window-id"), "utf-8").trim();
1389
+ await this.controlClient.waitForIdle(wid, 10_000);
1390
+ }
1391
+ else {
1392
+ await new Promise(r => setTimeout(r, 3_000));
1393
+ }
1394
+ if (!await this.tmux.isWindowAlive())
1395
+ return false;
1396
+ matched = true;
1397
+ break;
1398
+ }
1399
+ }
1400
+ if (matched)
1401
+ continue;
1402
+ // CLI is ready (pattern defined by each backend)
1403
+ if (this.backend.getReadyPattern().test(pane))
1404
+ return true;
1405
+ // Fatal: command not found (must match full phrase to avoid false positives
1406
+ // like Kiro's "agent X not found, using default")
1407
+ if (/command not found|: not found$/m.test(pane))
1408
+ return false;
1409
+ }
1410
+ catch {
1411
+ return false;
1412
+ }
1413
+ }
1414
+ // Exhausted attempts — assume ok for unknown CLI prompts
1415
+ return true;
1416
+ }
1417
+ saveSessionId() {
1418
+ const sid = this.backend?.getSessionId();
1419
+ if (sid) {
1420
+ writeFileSync(join(this.instanceDir, "session-id"), sid);
1421
+ }
1422
+ }
1423
+ readContextPercentage() {
1424
+ return this.backend?.getContextUsage() ?? 0;
1425
+ }
1426
+ /** Set a model override for next spawn (used by failover logic) */
1427
+ setModelOverride(model) {
1428
+ this.modelOverride = model;
1429
+ }
1430
+ /** Get the currently active model override */
1431
+ getModelOverride() {
1432
+ return this.modelOverride;
1433
+ }
1434
+ /** Public wrapper for graceful restart — wait for instance to be idle. */
1435
+ waitForIdle(quietMs = 5000) {
1436
+ return new Promise((resolve) => {
1437
+ const monitor = this.transcriptMonitor;
1438
+ // No transcript monitor (e.g. lightweight mode) — no events to wait for.
1439
+ if (!monitor) {
1440
+ setTimeout(resolve, quietMs);
1441
+ return;
1442
+ }
1443
+ const events = ["tool_use", "tool_result", "assistant_text"];
1444
+ let timer;
1445
+ let settled = false;
1446
+ const done = () => {
1447
+ if (settled)
1448
+ return;
1449
+ settled = true;
1450
+ // Always remove from the same monitor we registered on — avoids
1451
+ // imbalance if this.transcriptMonitor is later reassigned.
1452
+ events.forEach(e => monitor.removeListener(e, reset));
1453
+ resolve();
1454
+ };
1455
+ const reset = () => {
1456
+ clearTimeout(timer);
1457
+ timer = setTimeout(done, quietMs);
1458
+ };
1459
+ timer = setTimeout(done, quietMs);
1460
+ events.forEach(e => monitor.on(e, reset));
1461
+ });
1462
+ }
1463
+ // ── Context Rotation v3: Ring buffers ─────────────────────────
1464
+ recordRecentUserMessage(content, meta) {
1465
+ // Only record real user messages, not cross-instance messages
1466
+ if (!meta.user || meta.user.startsWith("instance:"))
1467
+ return;
1468
+ this.recentUserMessages.push({
1469
+ text: content.slice(0, 200),
1470
+ ts: meta.ts ?? new Date().toISOString(),
1471
+ });
1472
+ if (this.recentUserMessages.length > 10)
1473
+ this.recentUserMessages.shift();
1474
+ }
1475
+ recordRecentEvent(event) {
1476
+ this.recentEvents.push(event);
1477
+ if (this.recentEvents.length > 15)
1478
+ this.recentEvents.shift();
1479
+ }
1480
+ recordRecentToolActivity(summary) {
1481
+ if (!summary)
1482
+ return;
1483
+ this.recentToolActivity.push(summary);
1484
+ if (this.recentToolActivity.length > 10)
1485
+ this.recentToolActivity.shift();
1486
+ }
1487
+ // ── Context Rotation v3: Snapshot writer ──────────────────────
1488
+ writeRotationSnapshot(reason) {
1489
+ const statusline = this.readStatuslineData();
1490
+ const snapshot = {
1491
+ instance: this.name,
1492
+ reason,
1493
+ created_at: new Date().toISOString(),
1494
+ working_directory: this.config.working_directory,
1495
+ session_id: this.backend?.getSessionId() ?? null,
1496
+ context_pct: this.readContextPercentage(),
1497
+ recent_user_messages: [...this.recentUserMessages],
1498
+ recent_events: [...this.recentEvents],
1499
+ recent_tool_activity: [...this.recentToolActivity],
1500
+ last_statusline: statusline ? {
1501
+ model: statusline.model?.display_name,
1502
+ cost_usd: statusline.cost?.total_cost_usd,
1503
+ five_hour_pct: statusline.rate_limits?.five_hour?.used_percentage,
1504
+ seven_day_pct: statusline.rate_limits?.seven_day?.used_percentage,
1505
+ } : undefined,
1506
+ };
1507
+ const snapshotPath = join(this.instanceDir, "rotation-state.json");
1508
+ writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2));
1509
+ this.snapshotConsumed = false;
1510
+ this.logger.info({
1511
+ reason,
1512
+ context_pct: snapshot.context_pct,
1513
+ user_msg_count: snapshot.recent_user_messages?.length ?? 0,
1514
+ event_count: snapshot.recent_events?.length ?? 0,
1515
+ }, "Snapshot written");
1516
+ return snapshot;
1517
+ }
1518
+ /** Collect ring buffer data for handover to a replacement instance. */
1519
+ collectHandoverContext() {
1520
+ const lines = [];
1521
+ if (this.recentUserMessages.length > 0) {
1522
+ lines.push("Recent user messages:");
1523
+ for (const msg of this.recentUserMessages)
1524
+ lines.push(`- ${msg.text}`);
1525
+ lines.push("");
1526
+ }
1527
+ if (this.recentEvents.length > 0) {
1528
+ lines.push("Recent activity:");
1529
+ for (const ev of this.recentEvents) {
1530
+ if (ev.type === "assistant_text")
1531
+ lines.push(`- Assistant: ${ev.preview}`);
1532
+ else
1533
+ lines.push(`- ${ev.name}${ev.preview ? `: ${ev.preview}` : ""}`);
1534
+ }
1535
+ lines.push("");
1536
+ }
1537
+ if (this.recentToolActivity.length > 0) {
1538
+ lines.push("Recent tool activity:");
1539
+ for (const t of this.recentToolActivity)
1540
+ lines.push(`- ${t}`);
1541
+ lines.push("");
1542
+ }
1543
+ const pct = this.readContextPercentage();
1544
+ if (pct != null)
1545
+ lines.push(`Context usage: ${pct}%`);
1546
+ return lines.join("\n").slice(0, 4000);
1547
+ }
1548
+ appendCrashHistory(data) {
1549
+ try {
1550
+ const historyPath = join(this.instanceDir, "crash-history.jsonl");
1551
+ const entry = {
1552
+ timestamp: new Date().toISOString(),
1553
+ instance: this.name,
1554
+ crashType: data.crashType,
1555
+ exitCode: data.exitCode,
1556
+ lastOutput: data.lastOutput,
1557
+ crashCount: this.crashCount + 1,
1558
+ crashesInWindow: this.crashTimestamps.length,
1559
+ };
1560
+ appendFileSync(historyPath, JSON.stringify(entry) + "\n");
1561
+ // Rotate based on file size (cheaper than parsing every time)
1562
+ try {
1563
+ const stat = statSync(historyPath);
1564
+ if (stat.size > 512_000) {
1565
+ const content = readFileSync(historyPath, "utf-8");
1566
+ const lines = content.trim().split("\n").filter(Boolean);
1567
+ writeFileSync(historyPath, lines.slice(-50).join("\n") + "\n");
1568
+ }
1569
+ }
1570
+ catch { /* best effort */ }
1571
+ }
1572
+ catch { /* best effort */ }
1573
+ }
1574
+ readStatuslineData() {
1575
+ try {
1576
+ const sf = join(this.instanceDir, "statusline.json");
1577
+ return JSON.parse(readFileSync(sf, "utf-8"));
1578
+ }
1579
+ catch {
1580
+ return null;
1581
+ }
1582
+ }
1583
+ // ── Repo Checkout ─────────────────────────────────────────
1584
+ async handleCheckoutRepo(args, respond) {
1585
+ const { execFile: execFileCb } = await import("node:child_process");
1586
+ const { promisify } = await import("node:util");
1587
+ const execFileAsync = promisify(execFileCb);
1588
+ const rawSource = args.source;
1589
+ if (!rawSource) {
1590
+ respond(null, "checkout_repo: missing required argument 'source'");
1591
+ return;
1592
+ }
1593
+ const expanded = rawSource.replace(/^~/, process.env.HOME || "~");
1594
+ // Resolve instance name to working_directory via IPC query
1595
+ // If source doesn't look like a path, treat it as an instance name
1596
+ if (!expanded.startsWith("/")) {
1597
+ // Broadcast to get instance info — but we don't have fleet config in daemon.
1598
+ // Instead, rely on fleet manager to resolve. For now, reject non-path sources.
1599
+ respond(null, `Source must be an absolute path or ~-prefixed path. Use describe_instance to find a repo's working_directory.`);
1600
+ return;
1601
+ }
1602
+ // Normalize to collapse any `..` segments.
1603
+ const source = resolve(expanded);
1604
+ const branch = args.branch || "HEAD";
1605
+ // Validate branch ref: git refs allow [A-Za-z0-9._/-], reject `..` to prevent
1606
+ // worktreePath escape via basename(source)-${branch.replace("/", "-")}.
1607
+ // Reject leading `-` or `+` so git cannot interpret the value as an option
1608
+ // flag (e.g. `--upload-pack=...`), which execFile cannot prevent on its own.
1609
+ if (!/^[A-Za-z0-9._/-]+$/.test(branch) || branch.includes("..") || /^[-+]/.test(branch)) {
1610
+ respond(null, `Invalid branch name: ${branch}`);
1611
+ return;
1612
+ }
1613
+ // Verify it's a git repo
1614
+ try {
1615
+ await execFileAsync("git", ["rev-parse", "--git-dir"], { cwd: source });
1616
+ }
1617
+ catch {
1618
+ respond(null, `Not a git repository: ${source}`);
1619
+ return;
1620
+ }
1621
+ const repoDir = join(this.instanceDir, "repos");
1622
+ mkdirSync(repoDir, { recursive: true });
1623
+ const safeName = `${basename(source)}-${branch.replace(/\//g, "-")}`;
1624
+ const worktreePath = join(repoDir, safeName);
1625
+ try {
1626
+ // Resolve branch/ref to verify it exists. Use `--` so git never treats
1627
+ // branch as an option flag (defense in depth on top of the regex above).
1628
+ await execFileAsync("git", ["rev-parse", "--verify", "--", branch], { cwd: source });
1629
+ await execFileAsync("git", ["worktree", "add", "--detach", worktreePath, branch], { cwd: source });
1630
+ const { stdout: commitHash } = await execFileAsync("git", ["rev-parse", "--short", "HEAD"], { cwd: worktreePath });
1631
+ respond({ path: worktreePath, branch, source, commit: commitHash.trim() });
1632
+ }
1633
+ catch (err) {
1634
+ respond(null, `Failed to checkout: ${err.message}`);
1635
+ }
1636
+ }
1637
+ async handleReleaseRepo(args, respond) {
1638
+ const repoPath = args.path;
1639
+ const reposDir = join(this.instanceDir, "repos");
1640
+ // Safety: only allow releasing paths under our repos/ directory
1641
+ if (!repoPath.startsWith(reposDir)) {
1642
+ respond(null, `Cannot release path outside instance repos directory`);
1643
+ return;
1644
+ }
1645
+ try {
1646
+ const { execFile: execFileCb } = await import("node:child_process");
1647
+ const { promisify } = await import("node:util");
1648
+ const execFileAsync = promisify(execFileCb);
1649
+ await execFileAsync("git", ["worktree", "remove", "--force", repoPath]);
1650
+ }
1651
+ catch {
1652
+ // Fallback: rm directly if git worktree remove fails
1653
+ try {
1654
+ rmSync(repoPath, { recursive: true, force: true });
1655
+ }
1656
+ catch { /* best effort */ }
1657
+ }
1658
+ respond({ released: true, path: repoPath });
1659
+ }
1660
+ buildSnapshotPrompt() {
1661
+ const snapshotPath = join(this.instanceDir, "rotation-state.json");
1662
+ try {
1663
+ if (!existsSync(snapshotPath))
1664
+ return null;
1665
+ const snapshot = JSON.parse(readFileSync(snapshotPath, "utf-8"));
1666
+ // Mark consumed in-memory to prevent re-injection on crash respawn.
1667
+ // Delete file so subsequent daemon restarts don't re-inject stale snapshot.
1668
+ this.snapshotConsumed = true;
1669
+ try {
1670
+ unlinkSync(snapshotPath);
1671
+ }
1672
+ catch { /* best effort */ }
1673
+ const lines = ["## Previous Session Snapshot", ""];
1674
+ lines.push(`Restart reason: ${snapshot.reason}`);
1675
+ if (snapshot.context_pct != null)
1676
+ lines.push(`Previous context usage: ${snapshot.context_pct}%`);
1677
+ if (snapshot.session_id)
1678
+ lines.push(`Previous session id: ${snapshot.session_id}`);
1679
+ lines.push(`Working directory: ${snapshot.working_directory}`);
1680
+ lines.push("");
1681
+ if (snapshot.recent_user_messages && snapshot.recent_user_messages.length > 0) {
1682
+ lines.push("Recent user messages:");
1683
+ for (const msg of snapshot.recent_user_messages) {
1684
+ lines.push(`- ${msg.text}`);
1685
+ }
1686
+ lines.push("");
1687
+ }
1688
+ if (snapshot.recent_events && snapshot.recent_events.length > 0) {
1689
+ lines.push("Recent activity:");
1690
+ for (const ev of snapshot.recent_events) {
1691
+ if (ev.type === "assistant_text") {
1692
+ lines.push(`- Assistant: ${ev.preview}`);
1693
+ }
1694
+ else {
1695
+ lines.push(`- ${ev.name}${ev.preview ? `: ${ev.preview}` : ""}`);
1696
+ }
1697
+ }
1698
+ lines.push("");
1699
+ }
1700
+ lines.push("Instruction:");
1701
+ lines.push("Resume work from this snapshot when relevant. Do not assume anything not stated here.");
1702
+ // Enforce 2000-char budget
1703
+ let result = lines.join("\n");
1704
+ if (result.length > 2000) {
1705
+ result = result.slice(0, 1997) + "...";
1706
+ }
1707
+ return result;
1708
+ }
1709
+ catch {
1710
+ return null;
1711
+ }
1712
+ }
1713
+ }
1714
+ //# sourceMappingURL=daemon.js.map