@songsid/agend 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +210 -0
- package/README.zh-TW.md +134 -0
- package/dist/access-path.d.ts +10 -0
- package/dist/access-path.js +32 -0
- package/dist/access-path.js.map +1 -0
- package/dist/adapter-world.d.ts +25 -0
- package/dist/adapter-world.js +41 -0
- package/dist/adapter-world.js.map +1 -0
- package/dist/agent-cli-instructions.md +50 -0
- package/dist/agent-cli.d.ts +2 -0
- package/dist/agent-cli.js +200 -0
- package/dist/agent-cli.js.map +1 -0
- package/dist/agent-endpoint.d.ts +25 -0
- package/dist/agent-endpoint.js +162 -0
- package/dist/agent-endpoint.js.map +1 -0
- package/dist/backend/antigravity.d.ts +17 -0
- package/dist/backend/antigravity.js +98 -0
- package/dist/backend/antigravity.js.map +1 -0
- package/dist/backend/claude-code.d.ts +23 -0
- package/dist/backend/claude-code.js +171 -0
- package/dist/backend/claude-code.js.map +1 -0
- package/dist/backend/codex.d.ts +18 -0
- package/dist/backend/codex.js +160 -0
- package/dist/backend/codex.js.map +1 -0
- package/dist/backend/factory.d.ts +2 -0
- package/dist/backend/factory.js +28 -0
- package/dist/backend/factory.js.map +1 -0
- package/dist/backend/gemini-cli.d.ts +17 -0
- package/dist/backend/gemini-cli.js +163 -0
- package/dist/backend/gemini-cli.js.map +1 -0
- package/dist/backend/index.d.ts +7 -0
- package/dist/backend/index.js +7 -0
- package/dist/backend/index.js.map +1 -0
- package/dist/backend/kiro.d.ts +17 -0
- package/dist/backend/kiro.js +147 -0
- package/dist/backend/kiro.js.map +1 -0
- package/dist/backend/marker-utils.d.ts +13 -0
- package/dist/backend/marker-utils.js +64 -0
- package/dist/backend/marker-utils.js.map +1 -0
- package/dist/backend/mock.d.ts +25 -0
- package/dist/backend/mock.js +85 -0
- package/dist/backend/mock.js.map +1 -0
- package/dist/backend/opencode.d.ts +16 -0
- package/dist/backend/opencode.js +136 -0
- package/dist/backend/opencode.js.map +1 -0
- package/dist/backend/types.d.ts +86 -0
- package/dist/backend/types.js +33 -0
- package/dist/backend/types.js.map +1 -0
- package/dist/channel/access-manager.d.ts +18 -0
- package/dist/channel/access-manager.js +153 -0
- package/dist/channel/access-manager.js.map +1 -0
- package/dist/channel/adapters/telegram.d.ts +63 -0
- package/dist/channel/adapters/telegram.js +646 -0
- package/dist/channel/adapters/telegram.js.map +1 -0
- package/dist/channel/attachment-handler.d.ts +15 -0
- package/dist/channel/attachment-handler.js +88 -0
- package/dist/channel/attachment-handler.js.map +1 -0
- package/dist/channel/factory.d.ts +12 -0
- package/dist/channel/factory.js +67 -0
- package/dist/channel/factory.js.map +1 -0
- package/dist/channel/ipc-bridge.d.ts +26 -0
- package/dist/channel/ipc-bridge.js +220 -0
- package/dist/channel/ipc-bridge.js.map +1 -0
- package/dist/channel/mcp-server.d.ts +10 -0
- package/dist/channel/mcp-server.js +288 -0
- package/dist/channel/mcp-server.js.map +1 -0
- package/dist/channel/mcp-tools.d.ts +17 -0
- package/dist/channel/mcp-tools.js +110 -0
- package/dist/channel/mcp-tools.js.map +1 -0
- package/dist/channel/message-bus.d.ts +17 -0
- package/dist/channel/message-bus.js +86 -0
- package/dist/channel/message-bus.js.map +1 -0
- package/dist/channel/message-queue.d.ts +39 -0
- package/dist/channel/message-queue.js +253 -0
- package/dist/channel/message-queue.js.map +1 -0
- package/dist/channel/tool-router.d.ts +6 -0
- package/dist/channel/tool-router.js +75 -0
- package/dist/channel/tool-router.js.map +1 -0
- package/dist/channel/tool-tracker.d.ts +13 -0
- package/dist/channel/tool-tracker.js +58 -0
- package/dist/channel/tool-tracker.js.map +1 -0
- package/dist/channel/types.d.ts +118 -0
- package/dist/channel/types.js +2 -0
- package/dist/channel/types.js.map +1 -0
- package/dist/chat-export.d.ts +4 -0
- package/dist/chat-export.js +91 -0
- package/dist/chat-export.js.map +1 -0
- package/dist/classic-channel-manager.d.ts +59 -0
- package/dist/classic-channel-manager.js +193 -0
- package/dist/classic-channel-manager.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +1833 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +9 -0
- package/dist/config.js +118 -0
- package/dist/config.js.map +1 -0
- package/dist/context-guardian.d.ts +26 -0
- package/dist/context-guardian.js +73 -0
- package/dist/context-guardian.js.map +1 -0
- package/dist/cost-guard.d.ts +36 -0
- package/dist/cost-guard.js +147 -0
- package/dist/cost-guard.js.map +1 -0
- package/dist/daemon-entry.d.ts +1 -0
- package/dist/daemon-entry.js +29 -0
- package/dist/daemon-entry.js.map +1 -0
- package/dist/daemon.d.ts +152 -0
- package/dist/daemon.js +1714 -0
- package/dist/daemon.js.map +1 -0
- package/dist/daily-summary.d.ts +13 -0
- package/dist/daily-summary.js +55 -0
- package/dist/daily-summary.js.map +1 -0
- package/dist/event-log.d.ts +36 -0
- package/dist/event-log.js +100 -0
- package/dist/event-log.js.map +1 -0
- package/dist/export-import.d.ts +2 -0
- package/dist/export-import.js +162 -0
- package/dist/export-import.js.map +1 -0
- package/dist/fleet-context.d.ts +61 -0
- package/dist/fleet-context.js +4 -0
- package/dist/fleet-context.js.map +1 -0
- package/dist/fleet-dashboard-html.d.ts +6 -0
- package/dist/fleet-dashboard-html.js +443 -0
- package/dist/fleet-dashboard-html.js.map +1 -0
- package/dist/fleet-health-server.d.ts +35 -0
- package/dist/fleet-health-server.js +290 -0
- package/dist/fleet-health-server.js.map +1 -0
- package/dist/fleet-instructions.d.ts +5 -0
- package/dist/fleet-instructions.js +161 -0
- package/dist/fleet-instructions.js.map +1 -0
- package/dist/fleet-manager.d.ts +212 -0
- package/dist/fleet-manager.js +3655 -0
- package/dist/fleet-manager.js.map +1 -0
- package/dist/fleet-rpc-handlers.d.ts +42 -0
- package/dist/fleet-rpc-handlers.js +356 -0
- package/dist/fleet-rpc-handlers.js.map +1 -0
- package/dist/fleet-system-prompt.d.ts +11 -0
- package/dist/fleet-system-prompt.js +61 -0
- package/dist/fleet-system-prompt.js.map +1 -0
- package/dist/general-knowledge/skills.md +177 -0
- package/dist/hang-detector.d.ts +16 -0
- package/dist/hang-detector.js +53 -0
- package/dist/hang-detector.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/instance-lifecycle.d.ts +90 -0
- package/dist/instance-lifecycle.js +592 -0
- package/dist/instance-lifecycle.js.map +1 -0
- package/dist/instructions.d.ts +15 -0
- package/dist/instructions.js +90 -0
- package/dist/instructions.js.map +1 -0
- package/dist/logger.d.ts +7 -0
- package/dist/logger.js +84 -0
- package/dist/logger.js.map +1 -0
- package/dist/outbound-handlers.d.ts +51 -0
- package/dist/outbound-handlers.js +739 -0
- package/dist/outbound-handlers.js.map +1 -0
- package/dist/outbound-schemas.d.ts +238 -0
- package/dist/outbound-schemas.js +248 -0
- package/dist/outbound-schemas.js.map +1 -0
- package/dist/paths.d.ts +10 -0
- package/dist/paths.js +42 -0
- package/dist/paths.js.map +1 -0
- package/dist/plugin/agend/.claude-plugin/plugin.json +5 -0
- package/dist/quickstart.d.ts +1 -0
- package/dist/quickstart.js +595 -0
- package/dist/quickstart.js.map +1 -0
- package/dist/routing-engine.d.ts +22 -0
- package/dist/routing-engine.js +44 -0
- package/dist/routing-engine.js.map +1 -0
- package/dist/safe-async.d.ts +6 -0
- package/dist/safe-async.js +20 -0
- package/dist/safe-async.js.map +1 -0
- package/dist/scheduler/db.d.ts +37 -0
- package/dist/scheduler/db.js +360 -0
- package/dist/scheduler/db.js.map +1 -0
- package/dist/scheduler/db.test.d.ts +1 -0
- package/dist/scheduler/db.test.js +92 -0
- package/dist/scheduler/db.test.js.map +1 -0
- package/dist/scheduler/index.d.ts +4 -0
- package/dist/scheduler/index.js +4 -0
- package/dist/scheduler/index.js.map +1 -0
- package/dist/scheduler/scheduler.d.ts +44 -0
- package/dist/scheduler/scheduler.js +197 -0
- package/dist/scheduler/scheduler.js.map +1 -0
- package/dist/scheduler/scheduler.test.d.ts +1 -0
- package/dist/scheduler/scheduler.test.js +119 -0
- package/dist/scheduler/scheduler.test.js.map +1 -0
- package/dist/scheduler/types.d.ts +107 -0
- package/dist/scheduler/types.js +7 -0
- package/dist/scheduler/types.js.map +1 -0
- package/dist/service-installer.d.ts +17 -0
- package/dist/service-installer.js +182 -0
- package/dist/service-installer.js.map +1 -0
- package/dist/setup-wizard.d.ts +48 -0
- package/dist/setup-wizard.js +701 -0
- package/dist/setup-wizard.js.map +1 -0
- package/dist/statusline-watcher.d.ts +34 -0
- package/dist/statusline-watcher.js +73 -0
- package/dist/statusline-watcher.js.map +1 -0
- package/dist/stt.d.ts +10 -0
- package/dist/stt.js +33 -0
- package/dist/stt.js.map +1 -0
- package/dist/tmux-control.d.ts +52 -0
- package/dist/tmux-control.js +207 -0
- package/dist/tmux-control.js.map +1 -0
- package/dist/tmux-manager.d.ts +44 -0
- package/dist/tmux-manager.js +218 -0
- package/dist/tmux-manager.js.map +1 -0
- package/dist/topic-archiver.d.ts +40 -0
- package/dist/topic-archiver.js +103 -0
- package/dist/topic-archiver.js.map +1 -0
- package/dist/topic-commands.d.ts +28 -0
- package/dist/topic-commands.js +359 -0
- package/dist/topic-commands.js.map +1 -0
- package/dist/transcript-monitor.d.ts +23 -0
- package/dist/transcript-monitor.js +164 -0
- package/dist/transcript-monitor.js.map +1 -0
- package/dist/types.d.ts +211 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/ui/dashboard.html +719 -0
- package/dist/web-api.d.ts +101 -0
- package/dist/web-api.js +648 -0
- package/dist/web-api.js.map +1 -0
- package/dist/webhook-emitter.d.ts +15 -0
- package/dist/webhook-emitter.js +41 -0
- package/dist/webhook-emitter.js.map +1 -0
- package/dist/workflow-templates/default.md +35 -0
- package/package.json +76 -0
- package/templates/launchd.plist.ejs +31 -0
- package/templates/systemd.service.ejs +16 -0
package/dist/daemon.js
ADDED
|
@@ -0,0 +1,1714 @@
|
|
|
1
|
+
import { join, dirname, basename, resolve } from "node:path";
|
|
2
|
+
import { mkdirSync, writeFileSync, readFileSync, existsSync, unlinkSync, rmSync, appendFileSync, statSync, chmodSync } from "node:fs";
|
|
3
|
+
import { fileURLToPath } from "node:url";
|
|
4
|
+
import { randomBytes } from "node:crypto";
|
|
5
|
+
import { EventEmitter } from "node:events";
|
|
6
|
+
import { createLogger } from "./logger.js";
|
|
7
|
+
import { TmuxManager } from "./tmux-manager.js";
|
|
8
|
+
import { TranscriptMonitor } from "./transcript-monitor.js";
|
|
9
|
+
import { ContextGuardian } from "./context-guardian.js";
|
|
10
|
+
import { IpcServer } from "./channel/ipc-bridge.js";
|
|
11
|
+
import { MessageBus } from "./channel/message-bus.js";
|
|
12
|
+
import { shellQuote } from "./backend/types.js";
|
|
13
|
+
import { getTmuxSession } from "./config.js";
|
|
14
|
+
import { routeToolCall } from "./channel/tool-router.js";
|
|
15
|
+
import { HangDetector } from "./hang-detector.js";
|
|
16
|
+
import { buildFleetInstructions } from "./instructions.js";
|
|
17
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __dirname = dirname(__filename);
|
|
19
|
+
// Tool routing sets — module-level to avoid re-creation on every handleToolCall
|
|
20
|
+
const CROSS_INSTANCE_TOOLS = new Set(["send_to_instance", "list_instances", "start_instance", "restart_instance", "create_instance", "delete_instance", "replace_instance", "request_information", "delegate_task", "report_result", "describe_instance"]);
|
|
21
|
+
const SCHEDULE_TOOLS = new Set(["create_schedule", "list_schedules", "update_schedule", "delete_schedule"]);
|
|
22
|
+
const DECISION_TOOLS = new Set(["post_decision", "list_decisions", "update_decision"]);
|
|
23
|
+
const TASK_TOOL = "task";
|
|
24
|
+
export class Daemon extends EventEmitter {
|
|
25
|
+
name;
|
|
26
|
+
config;
|
|
27
|
+
instanceDir;
|
|
28
|
+
topicMode;
|
|
29
|
+
backend;
|
|
30
|
+
controlClient;
|
|
31
|
+
logger;
|
|
32
|
+
tmuxSessionName;
|
|
33
|
+
tmux = null;
|
|
34
|
+
ipcServer = null;
|
|
35
|
+
messageBus;
|
|
36
|
+
transcriptMonitor = null;
|
|
37
|
+
toolTracker = null;
|
|
38
|
+
guardian = null;
|
|
39
|
+
adapter = null;
|
|
40
|
+
pendingIpcRequests = new Map();
|
|
41
|
+
// Track chatId/threadId from inbound messages for automatic outbound routing
|
|
42
|
+
lastChatId;
|
|
43
|
+
lastThreadId;
|
|
44
|
+
// Pending ack: react 🫡 on first transcript activity after receiving a message
|
|
45
|
+
pendingAckMessage = null;
|
|
46
|
+
// Tool status tracking for channel adapter
|
|
47
|
+
toolStatusMessageId = null;
|
|
48
|
+
toolStatusLines = [];
|
|
49
|
+
toolStatusDebounce = null;
|
|
50
|
+
// Session identity: map IPC socket → sessionName (from mcp_ready)
|
|
51
|
+
socketSessionNames = new Map();
|
|
52
|
+
// Crash recovery
|
|
53
|
+
static tmuxServerCrashTimestamps = [];
|
|
54
|
+
static tmuxServerPaused = false;
|
|
55
|
+
static tmuxServerRecoveryTimer = null;
|
|
56
|
+
healthCheckTimer = null;
|
|
57
|
+
crashCount = 0;
|
|
58
|
+
lastCrashAt = 0;
|
|
59
|
+
lastSpawnAt = 0;
|
|
60
|
+
crashTimestamps = [];
|
|
61
|
+
healthCheckPaused = false;
|
|
62
|
+
spawning = false;
|
|
63
|
+
skipResume = false;
|
|
64
|
+
/** Whether the last spawn started a fresh session (not resumed). */
|
|
65
|
+
isNewSession = false;
|
|
66
|
+
// Context rotation quality tracking
|
|
67
|
+
rotationStartedAt = 0;
|
|
68
|
+
preRotationContextPct = 0;
|
|
69
|
+
hangDetector = null;
|
|
70
|
+
// Model failover: override model on next spawn when rate-limited
|
|
71
|
+
modelOverride;
|
|
72
|
+
// Context rotation v3: ring buffers for daemon-side snapshot
|
|
73
|
+
recentUserMessages = [];
|
|
74
|
+
recentEvents = [];
|
|
75
|
+
recentToolActivity = [];
|
|
76
|
+
snapshotConsumed = false;
|
|
77
|
+
pasteLock = Promise.resolve();
|
|
78
|
+
pendingInstructionsUpdate;
|
|
79
|
+
pendingInstructionsNotice = false;
|
|
80
|
+
pasteQueueDepth = 0;
|
|
81
|
+
// PTY error pattern monitoring
|
|
82
|
+
errorMonitorTimer = null;
|
|
83
|
+
errorWaitingForRecovery = false; // true = error detected, waiting for ready pattern
|
|
84
|
+
errorDetectedAt = 0;
|
|
85
|
+
/** Whether this instance is in an error state (rate-limited, paused, or crash loop). */
|
|
86
|
+
get isErrorState() {
|
|
87
|
+
return this.errorWaitingForRecovery || this.healthCheckPaused || Daemon.tmuxServerPaused;
|
|
88
|
+
}
|
|
89
|
+
lastFailoverAt = 0; // cooldown: prevent repeated failover triggers
|
|
90
|
+
static FAILOVER_COOLDOWN_MS = 5 * 60_000; // 5 minutes
|
|
91
|
+
lastErrorNotifiedAt = new Map(); // per-type cooldown for all actions
|
|
92
|
+
static ERROR_COOLDOWN_MS = 5 * 60_000;
|
|
93
|
+
/** Cheap hash for pane content dedup — not cryptographic, just identity check */
|
|
94
|
+
static cheapPaneHash(pane) {
|
|
95
|
+
return `${pane.length}:${pane.slice(-200)}`;
|
|
96
|
+
}
|
|
97
|
+
// Hash dedup: suppress stale error re-detection after recovery
|
|
98
|
+
lastRecoveryPaneHash = null;
|
|
99
|
+
lastRecoveredErrorType = null;
|
|
100
|
+
lastDetectedErrorType = null;
|
|
101
|
+
constructor(name, config, instanceDir, topicMode = false, backend, controlClient) {
|
|
102
|
+
super();
|
|
103
|
+
this.name = name;
|
|
104
|
+
this.config = config;
|
|
105
|
+
this.instanceDir = instanceDir;
|
|
106
|
+
this.topicMode = topicMode;
|
|
107
|
+
this.backend = backend;
|
|
108
|
+
this.controlClient = controlClient;
|
|
109
|
+
this.logger = createLogger(config.log_level);
|
|
110
|
+
this.tmuxSessionName = getTmuxSession();
|
|
111
|
+
this.messageBus = new MessageBus();
|
|
112
|
+
this.messageBus.setLogger(this.logger);
|
|
113
|
+
}
|
|
114
|
+
async start() {
|
|
115
|
+
mkdirSync(this.instanceDir, { recursive: true });
|
|
116
|
+
writeFileSync(join(this.instanceDir, "daemon.pid"), String(process.pid));
|
|
117
|
+
this.logger.info(`Starting ${this.name}`);
|
|
118
|
+
// P1: Read crash state from previous run — skip resume if last run was a crash loop
|
|
119
|
+
const crashStatePath = join(this.instanceDir, "crash-state.json");
|
|
120
|
+
try {
|
|
121
|
+
if (existsSync(crashStatePath)) {
|
|
122
|
+
const state = JSON.parse(readFileSync(crashStatePath, "utf-8"));
|
|
123
|
+
if (state.resumeDisabled) {
|
|
124
|
+
this.skipResume = true;
|
|
125
|
+
this.logger.warn("Previous crash loop detected — starting without resume");
|
|
126
|
+
}
|
|
127
|
+
unlinkSync(crashStatePath);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
catch { /* corrupt file — ignore */ }
|
|
131
|
+
// 1. IPC server — bridge between MCP server (Claude's child) and daemon
|
|
132
|
+
const sockPath = join(this.instanceDir, "channel.sock");
|
|
133
|
+
this.ipcServer = new IpcServer(sockPath, this.logger);
|
|
134
|
+
// Forward IPC server errors as daemon events (prevents unhandled 'error' crash).
|
|
135
|
+
// Guard: only forward post-listen errors — startup errors are handled by listen() rejection.
|
|
136
|
+
let ipcListening = false;
|
|
137
|
+
this.ipcServer.on("error", (err) => {
|
|
138
|
+
if (!ipcListening)
|
|
139
|
+
return; // startup errors handled by listen() rejection
|
|
140
|
+
this.logger.error({ err, name: this.name }, "IPC server error");
|
|
141
|
+
this.emit("error", err);
|
|
142
|
+
});
|
|
143
|
+
await this.ipcServer.listen();
|
|
144
|
+
ipcListening = true;
|
|
145
|
+
// Permanent IPC dispatcher: routes responses to pending requests by type+id key
|
|
146
|
+
this.ipcServer.on("message", (msg) => {
|
|
147
|
+
const type = msg.type;
|
|
148
|
+
if (!type)
|
|
149
|
+
return;
|
|
150
|
+
// Build lookup key matching the pattern used when registering
|
|
151
|
+
let key;
|
|
152
|
+
if ((type === "fleet_schedule_response" || type === "fleet_outbound_response" || type === "fleet_decision_response" || type === "fleet_task_response" || type === "fleet_display_name_response" || type === "fleet_description_response") && msg.fleetRequestId) {
|
|
153
|
+
key = String(msg.fleetRequestId);
|
|
154
|
+
}
|
|
155
|
+
else if (type === "fleet_outbound_response" && msg.requestId != null) {
|
|
156
|
+
key = `fleet_out_${msg.requestId}`;
|
|
157
|
+
}
|
|
158
|
+
if (key && this.pendingIpcRequests.has(key)) {
|
|
159
|
+
const handler = this.pendingIpcRequests.get(key);
|
|
160
|
+
this.pendingIpcRequests.delete(key);
|
|
161
|
+
handler(msg);
|
|
162
|
+
}
|
|
163
|
+
});
|
|
164
|
+
// IPC message relay: when daemon wants to push a channel message to Claude,
|
|
165
|
+
// it broadcasts to all IPC clients (the MCP server is one of them).
|
|
166
|
+
// When MCP server sends a tool_call, daemon handles it via the messageBus.
|
|
167
|
+
this.ipcServer.on("message", (msg, socket) => {
|
|
168
|
+
if (msg.type === "tool_call") {
|
|
169
|
+
// MCP server forwarding a Claude tool call (reply, react, edit, download)
|
|
170
|
+
this.handleToolCall(msg, socket);
|
|
171
|
+
}
|
|
172
|
+
else if (msg.type === "mcp_ready") {
|
|
173
|
+
const sessionName = msg.sessionName;
|
|
174
|
+
if (sessionName) {
|
|
175
|
+
this.socketSessionNames.set(socket, sessionName);
|
|
176
|
+
socket.on("close", () => {
|
|
177
|
+
this.socketSessionNames.delete(socket);
|
|
178
|
+
// Notify fleet manager so it can clean up sessionRegistry
|
|
179
|
+
if (sessionName !== this.name) {
|
|
180
|
+
this.ipcServer?.broadcast({ type: "session_disconnected", sessionName });
|
|
181
|
+
}
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
this.logger.debug({ sessionName }, "MCP channel server connected and ready");
|
|
185
|
+
// Notify FleetManager's IPC client that MCP is ready
|
|
186
|
+
this.ipcServer?.broadcast({ type: "mcp_ready", sessionName });
|
|
187
|
+
}
|
|
188
|
+
else if (msg.type === "query_sessions") {
|
|
189
|
+
// Fleet manager asks for all registered session names (catches sessions
|
|
190
|
+
// that sent mcp_ready before fleet manager connected).
|
|
191
|
+
const sessions = [];
|
|
192
|
+
for (const [s, sessionName] of this.socketSessionNames) {
|
|
193
|
+
if (!s.destroyed && sessionName !== this.name) {
|
|
194
|
+
// Individual mcp_ready for initial registration path
|
|
195
|
+
this.ipcServer?.send(socket, { type: "mcp_ready", sessionName });
|
|
196
|
+
sessions.push(sessionName);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// Batch response for prune path
|
|
200
|
+
this.ipcServer?.send(socket, { type: "query_sessions_response", sessions });
|
|
201
|
+
}
|
|
202
|
+
else if (msg.type === "fleet_inbound") {
|
|
203
|
+
// Fleet manager routed a message to us (topic mode)
|
|
204
|
+
const meta = msg.meta;
|
|
205
|
+
const targetSession = msg.targetSession;
|
|
206
|
+
// Only update lastChatId/lastThreadId from real channel messages (non-empty chat_id).
|
|
207
|
+
// Cross-instance messages have empty chat_id and must not overwrite these.
|
|
208
|
+
if (meta.chat_id)
|
|
209
|
+
this.lastChatId = meta.chat_id;
|
|
210
|
+
if (meta.chat_id && meta.thread_id)
|
|
211
|
+
this.lastThreadId = meta.thread_id;
|
|
212
|
+
this.pushChannelMessage(msg.content, meta, targetSession);
|
|
213
|
+
}
|
|
214
|
+
else if (msg.type === "raw_paste") {
|
|
215
|
+
// Paste raw text directly to CLI without [user:] wrapping.
|
|
216
|
+
// Use pasteLock to serialize with other deliveries and wait for idle.
|
|
217
|
+
if (this.tmux) {
|
|
218
|
+
const rawText = msg.content;
|
|
219
|
+
this.pasteLock = this.pasteLock.then(async () => {
|
|
220
|
+
await this.deliverMessage(rawText);
|
|
221
|
+
this.logger.debug({ text: rawText.slice(0, 100) }, "Raw paste delivered");
|
|
222
|
+
}).catch(err => {
|
|
223
|
+
this.logger.warn({ err: err.message }, "raw_paste delivery error");
|
|
224
|
+
});
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
else if (msg.type === "fleet_schedule_trigger") {
|
|
228
|
+
const payload = msg.payload;
|
|
229
|
+
const meta = msg.meta;
|
|
230
|
+
this.lastChatId = meta.chat_id;
|
|
231
|
+
this.lastThreadId = meta.thread_id;
|
|
232
|
+
this.pushChannelMessage(payload.message, meta);
|
|
233
|
+
}
|
|
234
|
+
else if (msg.type === "fleet_tool_status_ack") {
|
|
235
|
+
// Fleet manager sent us the messageId for our tool status message
|
|
236
|
+
this.toolStatusMessageId = msg.messageId;
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
// 2. Tmux — ensure session, create window if not alive
|
|
240
|
+
await TmuxManager.ensureSession(this.tmuxSessionName);
|
|
241
|
+
this.tmux = new TmuxManager(this.tmuxSessionName, "");
|
|
242
|
+
// Strategy A: always start fresh Claude window (MCP server has no reconnection)
|
|
243
|
+
// Kill any existing window from previous run
|
|
244
|
+
const windowIdFile = join(this.instanceDir, "window-id");
|
|
245
|
+
if (existsSync(windowIdFile)) {
|
|
246
|
+
const savedId = readFileSync(windowIdFile, "utf-8").trim();
|
|
247
|
+
if (savedId) {
|
|
248
|
+
const oldTmux = new TmuxManager(this.tmuxSessionName, savedId);
|
|
249
|
+
if (await oldTmux.isWindowAlive()) {
|
|
250
|
+
this.saveSessionId();
|
|
251
|
+
await oldTmux.killWindow();
|
|
252
|
+
this.logger.info({ savedId }, "Killed old tmux window for fresh start");
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
const resumed = await this.spawnClaudeWindow();
|
|
257
|
+
this.isNewSession = !resumed;
|
|
258
|
+
if (!resumed) {
|
|
259
|
+
await this.injectSnapshotMessage();
|
|
260
|
+
}
|
|
261
|
+
else {
|
|
262
|
+
// Clean up stale snapshot file — resume restored full context, snapshot not needed
|
|
263
|
+
try {
|
|
264
|
+
unlinkSync(join(this.instanceDir, "rotation-state.json"));
|
|
265
|
+
}
|
|
266
|
+
catch { /* may not exist */ }
|
|
267
|
+
}
|
|
268
|
+
if (!this.config.lightweight) {
|
|
269
|
+
// 3. Pipe-pane for prompt detection
|
|
270
|
+
const outputLog = join(this.instanceDir, "output.log");
|
|
271
|
+
await this.tmux.pipeOutput(outputLog).catch(() => { });
|
|
272
|
+
// 4. Transcript monitor
|
|
273
|
+
this.transcriptMonitor = new TranscriptMonitor(this.instanceDir, this.logger);
|
|
274
|
+
// 5. Wire transcript events
|
|
275
|
+
const ackIfPending = () => {
|
|
276
|
+
if (!this.pendingAckMessage || !this.adapter)
|
|
277
|
+
return;
|
|
278
|
+
const { chatId, messageId } = this.pendingAckMessage;
|
|
279
|
+
this.pendingAckMessage = null;
|
|
280
|
+
this.adapter.react(chatId, messageId, "🫡")
|
|
281
|
+
.catch(e => this.logger.debug({ err: e.message }, "Ack react failed"));
|
|
282
|
+
};
|
|
283
|
+
this.transcriptMonitor.on("tool_use", (name, input) => {
|
|
284
|
+
this.logger.debug({ tool: name }, "Tool use");
|
|
285
|
+
ackIfPending();
|
|
286
|
+
this.hangDetector?.recordActivity();
|
|
287
|
+
this.recordRecentEvent({ type: "tool_use", name, preview: this.summarizeTool(name, input) });
|
|
288
|
+
this.recordRecentToolActivity(this.summarizeTool(name, input));
|
|
289
|
+
});
|
|
290
|
+
this.transcriptMonitor.on("tool_result", (name, _output) => {
|
|
291
|
+
this.hangDetector?.recordActivity();
|
|
292
|
+
this.recordRecentEvent({ type: "tool_result", name });
|
|
293
|
+
});
|
|
294
|
+
this.transcriptMonitor.on("assistant_text", (text) => {
|
|
295
|
+
this.logger.debug({ text: text.slice(0, 200) }, "Claude response");
|
|
296
|
+
ackIfPending();
|
|
297
|
+
this.hangDetector?.recordActivity();
|
|
298
|
+
this.recordRecentEvent({ type: "assistant_text", preview: text.slice(0, 100) });
|
|
299
|
+
});
|
|
300
|
+
this.transcriptMonitor.startPolling();
|
|
301
|
+
// Hang detector
|
|
302
|
+
this.hangDetector = new HangDetector(15);
|
|
303
|
+
this.hangDetector.start();
|
|
304
|
+
// 8. Context guardian
|
|
305
|
+
const statusFile = join(this.instanceDir, "statusline.json");
|
|
306
|
+
this.guardian = new ContextGuardian(this.config.context_guardian, this.logger, statusFile);
|
|
307
|
+
this.guardian.startWatching();
|
|
308
|
+
this.guardian.on("status_update", () => {
|
|
309
|
+
this.saveSessionId();
|
|
310
|
+
this.hangDetector?.recordStatuslineUpdate();
|
|
311
|
+
});
|
|
312
|
+
// Context rotation removed: all CLI backends have built-in auto-compact.
|
|
313
|
+
// Crash recovery (health check + respawn with snapshot) is retained below.
|
|
314
|
+
}
|
|
315
|
+
// NOTE: Do NOT set process.env.AGEND_SOCKET_PATH here — it pollutes the
|
|
316
|
+
// shared fleet manager process env. Each daemon overwrites it, so the last
|
|
317
|
+
// one wins, causing MCP servers (especially kiro-cli which inherits process
|
|
318
|
+
// env) to connect to the wrong socket. The socket path is passed via
|
|
319
|
+
// per-instance MCP config files or wrapper scripts instead.
|
|
320
|
+
// 10. Health check — detect crashed tmux window and respawn
|
|
321
|
+
// Re-enabled: orphan window issue fixed by killing same-name windows before respawn.
|
|
322
|
+
// Without this, a dead CLI window goes undetected and messages are silently lost.
|
|
323
|
+
this.startHealthCheck();
|
|
324
|
+
if (!this.config.lightweight) {
|
|
325
|
+
this.startErrorMonitor();
|
|
326
|
+
}
|
|
327
|
+
this.logger.info(`${this.name} ready`);
|
|
328
|
+
}
|
|
329
|
+
startHealthCheck() {
|
|
330
|
+
const { max_retries, backoff, reset_after } = this.config.restart_policy;
|
|
331
|
+
if (max_retries <= 0)
|
|
332
|
+
return; // restart disabled
|
|
333
|
+
const scheduleNext = () => {
|
|
334
|
+
this.healthCheckTimer = setTimeout(async () => {
|
|
335
|
+
// Instance directory removed externally (e.g. `rm -rf ~/.agend/instances/<name>`).
|
|
336
|
+
// Stop the loop permanently — otherwise every tick triggers a respawn, whose
|
|
337
|
+
// writeRotationSnapshot fails with ENOENT and gets caught as "Failed to respawn",
|
|
338
|
+
// spamming errors every ~30s forever.
|
|
339
|
+
if (!existsSync(this.instanceDir)) {
|
|
340
|
+
this.logger.warn({ instanceDir: this.instanceDir }, "Instance directory missing — stopping health check");
|
|
341
|
+
this.healthCheckPaused = true;
|
|
342
|
+
this.healthCheckTimer = null;
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
if (!this.tmux || this.spawning || this.healthCheckPaused || Daemon.tmuxServerPaused) {
|
|
346
|
+
scheduleNext();
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
const paneStatus = await this.tmux.getPaneStatus();
|
|
350
|
+
if (paneStatus?.alive) {
|
|
351
|
+
scheduleNext();
|
|
352
|
+
return;
|
|
353
|
+
}
|
|
354
|
+
// paneStatus === null → window gone entirely (e.g. tmux server crash)
|
|
355
|
+
// paneStatus.alive === false → pane dead, exit code available
|
|
356
|
+
const exitCode = paneStatus?.exitCode;
|
|
357
|
+
this.logger.debug({ exitCode }, `[health] pane exited with code: ${exitCode}`);
|
|
358
|
+
// Normal exit (e.g. user Ctrl+C or /exit) — no crash, no respawn
|
|
359
|
+
if (paneStatus && exitCode === 0) {
|
|
360
|
+
this.logger.info("CLI exited normally (code 0) — pausing health check");
|
|
361
|
+
await this.tmux.killWindow();
|
|
362
|
+
this.healthCheckPaused = true;
|
|
363
|
+
return;
|
|
364
|
+
}
|
|
365
|
+
// Distinguish tmux server crash from single window crash
|
|
366
|
+
let crashType = "window";
|
|
367
|
+
if (!paneStatus) {
|
|
368
|
+
const serverAlive = await TmuxManager.sessionExists(this.tmuxSessionName);
|
|
369
|
+
if (!serverAlive) {
|
|
370
|
+
crashType = "server";
|
|
371
|
+
this.logger.error("tmux server died — all windows lost");
|
|
372
|
+
// Fleet-level circuit breaker: pause all instances on repeated tmux server crashes
|
|
373
|
+
Daemon.tmuxServerCrashTimestamps.push(Date.now());
|
|
374
|
+
const cutoff = Date.now() - 5 * 60_000;
|
|
375
|
+
Daemon.tmuxServerCrashTimestamps = Daemon.tmuxServerCrashTimestamps.filter(t => t > cutoff);
|
|
376
|
+
if (Daemon.tmuxServerCrashTimestamps.length >= 2 && !Daemon.tmuxServerPaused) {
|
|
377
|
+
Daemon.tmuxServerPaused = true;
|
|
378
|
+
this.logger.error("Fleet-level tmux server circuit breaker triggered — pausing all respawns for 30s");
|
|
379
|
+
this.emit("tmux_server_crash", this.name);
|
|
380
|
+
if (!Daemon.tmuxServerRecoveryTimer) {
|
|
381
|
+
Daemon.tmuxServerRecoveryTimer = setTimeout(() => {
|
|
382
|
+
Daemon.tmuxServerRecoveryTimer = null;
|
|
383
|
+
Daemon.tmuxServerPaused = false;
|
|
384
|
+
}, 30_000);
|
|
385
|
+
}
|
|
386
|
+
scheduleNext();
|
|
387
|
+
return;
|
|
388
|
+
}
|
|
389
|
+
await new Promise(r => setTimeout(r, 2_000)); // let session stabilize
|
|
390
|
+
}
|
|
391
|
+
else {
|
|
392
|
+
this.logger.warn({ exitCode }, "Claude window died (tmux server alive)");
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
else {
|
|
396
|
+
this.logger.warn({ exitCode }, "Claude process exited");
|
|
397
|
+
}
|
|
398
|
+
// Capture last output from dead pane before killing
|
|
399
|
+
let lastOutput;
|
|
400
|
+
if (paneStatus) {
|
|
401
|
+
try {
|
|
402
|
+
const raw = await this.tmux.capturePaneWithHistory(50);
|
|
403
|
+
// Strip ANSI escape codes for readability
|
|
404
|
+
const cleaned = raw.replace(/\x1b\[[0-9;]*[a-zA-Z]/g, "");
|
|
405
|
+
lastOutput = cleaned.trimEnd() || undefined;
|
|
406
|
+
}
|
|
407
|
+
catch { /* best effort */ }
|
|
408
|
+
}
|
|
409
|
+
// Kill the dead window (remain-on-exit keeps it around) before respawn
|
|
410
|
+
if (paneStatus) {
|
|
411
|
+
await this.tmux.killWindow();
|
|
412
|
+
}
|
|
413
|
+
// Append to crash history
|
|
414
|
+
this.appendCrashHistory({ exitCode, lastOutput, crashType });
|
|
415
|
+
// Detect rapid crash: sliding window — 3+ crashes in 5 minutes
|
|
416
|
+
this.crashTimestamps.push(Date.now());
|
|
417
|
+
const crashWindowMs = 5 * 60_000;
|
|
418
|
+
this.crashTimestamps = this.crashTimestamps.filter(t => t > Date.now() - crashWindowMs);
|
|
419
|
+
if (this.crashTimestamps.length >= 3) {
|
|
420
|
+
this.healthCheckPaused = true;
|
|
421
|
+
this.logger.error({ crashesInWindow: this.crashTimestamps.length }, "3+ crashes in 5 minutes — pausing respawn");
|
|
422
|
+
// P1: Persist crash state so next process restart skips resume
|
|
423
|
+
try {
|
|
424
|
+
writeFileSync(join(this.instanceDir, "crash-state.json"), JSON.stringify({
|
|
425
|
+
crashesInWindow: this.crashTimestamps.length,
|
|
426
|
+
lastCrashAt: Date.now(),
|
|
427
|
+
resumeDisabled: true,
|
|
428
|
+
}));
|
|
429
|
+
}
|
|
430
|
+
catch { /* best effort */ }
|
|
431
|
+
this.emit("crash_loop", this.name);
|
|
432
|
+
return; // don't schedule next — paused
|
|
433
|
+
}
|
|
434
|
+
// Reset crash count if enough time has passed
|
|
435
|
+
if (reset_after > 0 && Date.now() - this.lastCrashAt > reset_after) {
|
|
436
|
+
this.crashCount = 0;
|
|
437
|
+
}
|
|
438
|
+
this.crashCount++;
|
|
439
|
+
this.lastCrashAt = Date.now();
|
|
440
|
+
if (this.crashCount > max_retries) {
|
|
441
|
+
this.logger.error({ crashCount: this.crashCount, maxRetries: max_retries }, "Max crash retries exceeded — not respawning");
|
|
442
|
+
return; // don't schedule next — given up
|
|
443
|
+
}
|
|
444
|
+
// Calculate backoff delay
|
|
445
|
+
const delay = backoff === "exponential"
|
|
446
|
+
? Math.min(1000 * Math.pow(2, this.crashCount - 1), 60_000)
|
|
447
|
+
: 1000 * this.crashCount;
|
|
448
|
+
this.logger.warn({ crashCount: this.crashCount, delay }, "Claude window died — respawning after backoff");
|
|
449
|
+
await new Promise(r => setTimeout(r, delay));
|
|
450
|
+
try {
|
|
451
|
+
this.saveSessionId();
|
|
452
|
+
this.transcriptMonitor?.resetOffset();
|
|
453
|
+
// Kill orphan MCP server from the crashed CLI session.
|
|
454
|
+
// MCP server writes its PID to channel.mcp.pid on startup.
|
|
455
|
+
try {
|
|
456
|
+
const pidFile = join(this.instanceDir, "channel.mcp.pid");
|
|
457
|
+
const pid = parseInt(readFileSync(pidFile, "utf-8").trim(), 10);
|
|
458
|
+
process.kill(pid, "SIGTERM");
|
|
459
|
+
this.logger.info({ pid }, "Killed orphan MCP server");
|
|
460
|
+
}
|
|
461
|
+
catch { /* no pid file or process already dead */ }
|
|
462
|
+
// Kill any same-name windows before respawn to prevent orphans.
|
|
463
|
+
// Wrapped in try-catch: if tmux server is dead, listWindows throws —
|
|
464
|
+
// must not block spawnClaudeWindow (which calls ensureSession).
|
|
465
|
+
try {
|
|
466
|
+
const windows = await TmuxManager.listWindows(this.tmuxSessionName);
|
|
467
|
+
for (const w of windows) {
|
|
468
|
+
if (w.name === this.name) {
|
|
469
|
+
const tm = new TmuxManager(this.tmuxSessionName, w.id);
|
|
470
|
+
await tm.killWindow();
|
|
471
|
+
}
|
|
472
|
+
}
|
|
473
|
+
}
|
|
474
|
+
catch { /* tmux server may be dead — ensureSession in trySpawn will recover */ }
|
|
475
|
+
// Write snapshot before spawn — consumed only if resume fails
|
|
476
|
+
this.writeRotationSnapshot("crash");
|
|
477
|
+
// Try --resume first; spawnClaudeWindow falls back to fresh session if resume fails
|
|
478
|
+
const resumed = await this.spawnClaudeWindow();
|
|
479
|
+
if (!resumed) {
|
|
480
|
+
// Resume failed → fresh session → inject snapshot for context
|
|
481
|
+
await this.injectSnapshotMessage();
|
|
482
|
+
}
|
|
483
|
+
else {
|
|
484
|
+
// Clean up stale snapshot — resume restored full context
|
|
485
|
+
try {
|
|
486
|
+
unlinkSync(join(this.instanceDir, "rotation-state.json"));
|
|
487
|
+
}
|
|
488
|
+
catch { /* may not exist */ }
|
|
489
|
+
}
|
|
490
|
+
this.logger.info({ resumed }, "Respawned Claude window after crash");
|
|
491
|
+
this.emit("crash_respawn", this.name);
|
|
492
|
+
}
|
|
493
|
+
catch (err) {
|
|
494
|
+
this.logger.error({ err }, "Failed to respawn Claude window");
|
|
495
|
+
}
|
|
496
|
+
scheduleNext();
|
|
497
|
+
}, this.config.restart_policy.health_check_interval_ms ?? 30_000);
|
|
498
|
+
};
|
|
499
|
+
scheduleNext();
|
|
500
|
+
}
|
|
501
|
+
/**
|
|
502
|
+
* Periodically scan PTY output for backend-defined error patterns.
|
|
503
|
+
*
|
|
504
|
+
* State machine to avoid false positives from stale buffer text:
|
|
505
|
+
* MONITORING → (error pattern match) → WAITING_FOR_RECOVERY → (ready pattern match) → MONITORING
|
|
506
|
+
*
|
|
507
|
+
* Only emits pty_error once per error occurrence. After the agent recovers
|
|
508
|
+
* (ready pattern visible), it goes back to monitoring for new errors.
|
|
509
|
+
*/
|
|
510
|
+
startErrorMonitor() {
|
|
511
|
+
const patterns = this.backend?.getErrorPatterns?.() ?? [];
|
|
512
|
+
const dialogs = this.backend?.getRuntimeDialogs?.() ?? [];
|
|
513
|
+
if (!patterns.length && !dialogs.length)
|
|
514
|
+
return;
|
|
515
|
+
if (!this.tmux)
|
|
516
|
+
return;
|
|
517
|
+
if (!this.backend)
|
|
518
|
+
return; // lightweight mode has no backend
|
|
519
|
+
const readyPattern = this.backend.getReadyPattern();
|
|
520
|
+
this.errorMonitorTimer = setInterval(async () => {
|
|
521
|
+
if (!this.tmux || this.spawning)
|
|
522
|
+
return;
|
|
523
|
+
try {
|
|
524
|
+
const alive = await this.tmux.isWindowAlive();
|
|
525
|
+
if (!alive)
|
|
526
|
+
return;
|
|
527
|
+
const pane = await this.tmux.capturePane();
|
|
528
|
+
// Only scan text after the last prompt marker to avoid matching stale errors
|
|
529
|
+
// that remain in the capture-pane buffer after recovery.
|
|
530
|
+
let scanText = pane;
|
|
531
|
+
const rpg = new RegExp(readyPattern.source, readyPattern.flags.includes("g") ? readyPattern.flags : readyPattern.flags + "g");
|
|
532
|
+
let lastIdx = -1;
|
|
533
|
+
let m;
|
|
534
|
+
while ((m = rpg.exec(pane)) !== null)
|
|
535
|
+
lastIdx = m.index;
|
|
536
|
+
if (lastIdx >= 0)
|
|
537
|
+
scanText = pane.slice(lastIdx);
|
|
538
|
+
// Auto-dismiss runtime dialogs (e.g. Codex rate limit model switch)
|
|
539
|
+
for (const dialog of dialogs) {
|
|
540
|
+
if (!dialog.pattern.test(pane))
|
|
541
|
+
continue;
|
|
542
|
+
this.logger.info(`Auto-dismissing runtime dialog: ${dialog.description}`);
|
|
543
|
+
const SPECIAL_KEYS = new Set(["Up", "Down", "Enter", "Escape", "Right", "Left"]);
|
|
544
|
+
for (const key of dialog.keys) {
|
|
545
|
+
if (SPECIAL_KEYS.has(key)) {
|
|
546
|
+
await this.tmux.sendSpecialKey(key);
|
|
547
|
+
}
|
|
548
|
+
else {
|
|
549
|
+
await this.tmux.pasteText(key);
|
|
550
|
+
}
|
|
551
|
+
await new Promise(r => setTimeout(r, 200));
|
|
552
|
+
}
|
|
553
|
+
return; // Dialog dismissed, skip error checks this cycle
|
|
554
|
+
}
|
|
555
|
+
// State: waiting for recovery — check if agent is back to ready
|
|
556
|
+
if (this.errorWaitingForRecovery) {
|
|
557
|
+
if (readyPattern.test(pane)) {
|
|
558
|
+
const downtime = Math.round((Date.now() - this.errorDetectedAt) / 1000);
|
|
559
|
+
// Record pane hash at recovery to suppress stale re-detection
|
|
560
|
+
this.lastRecoveryPaneHash = Daemon.cheapPaneHash(pane);
|
|
561
|
+
this.lastRecoveredErrorType = this.lastDetectedErrorType;
|
|
562
|
+
this.errorWaitingForRecovery = false;
|
|
563
|
+
this.errorDetectedAt = 0;
|
|
564
|
+
this.logger.info({ downtime_s: downtime }, "PTY error recovered — agent is ready again");
|
|
565
|
+
this.emit("pty_recovered", { name: this.name, downtime_s: downtime });
|
|
566
|
+
}
|
|
567
|
+
return; // Don't check for errors while waiting for recovery
|
|
568
|
+
}
|
|
569
|
+
// State: monitoring — check for new errors
|
|
570
|
+
const currentPaneHash = Daemon.cheapPaneHash(pane);
|
|
571
|
+
for (const ep of patterns) {
|
|
572
|
+
if (!ep.pattern.test(scanText))
|
|
573
|
+
continue;
|
|
574
|
+
// Dedup: suppress if same error on same screen as last recovery
|
|
575
|
+
if (this.lastRecoveryPaneHash && ep.type === this.lastRecoveredErrorType) {
|
|
576
|
+
if (currentPaneHash === this.lastRecoveryPaneHash) {
|
|
577
|
+
break; // same screen, same error → stale
|
|
578
|
+
}
|
|
579
|
+
// Screen changed — stop suppressing
|
|
580
|
+
this.lastRecoveryPaneHash = null;
|
|
581
|
+
this.lastRecoveredErrorType = null;
|
|
582
|
+
}
|
|
583
|
+
// Cooldown: skip if same error type was recently notified
|
|
584
|
+
const lastNotified = this.lastErrorNotifiedAt.get(ep.type) ?? 0;
|
|
585
|
+
if (Date.now() - lastNotified < Daemon.ERROR_COOLDOWN_MS) {
|
|
586
|
+
this.logger.debug({ errorType: ep.type }, "PTY error suppressed (cooldown active)");
|
|
587
|
+
break;
|
|
588
|
+
}
|
|
589
|
+
if (ep.action === "failover" && Date.now() - this.lastFailoverAt < Daemon.FAILOVER_COOLDOWN_MS) {
|
|
590
|
+
this.logger.debug({ errorType: ep.type }, "PTY error suppressed (failover cooldown active)");
|
|
591
|
+
break;
|
|
592
|
+
}
|
|
593
|
+
this.errorWaitingForRecovery = true;
|
|
594
|
+
this.errorDetectedAt = Date.now();
|
|
595
|
+
this.lastDetectedErrorType = ep.type;
|
|
596
|
+
this.lastErrorNotifiedAt.set(ep.type, Date.now());
|
|
597
|
+
if (ep.action === "failover")
|
|
598
|
+
this.lastFailoverAt = Date.now();
|
|
599
|
+
this.logger.warn({ errorType: ep.type, action: ep.action }, `PTY error detected: ${ep.message}`);
|
|
600
|
+
this.emit("pty_error", { name: this.name, ...ep });
|
|
601
|
+
break; // Only handle first match per scan
|
|
602
|
+
}
|
|
603
|
+
}
|
|
604
|
+
catch {
|
|
605
|
+
// capturePane can fail if window is transitioning — ignore
|
|
606
|
+
}
|
|
607
|
+
}, 5_000); // Check every 5 seconds (runtime dialogs need fast response)
|
|
608
|
+
}
|
|
609
|
+
async stop() {
|
|
610
|
+
this.logger.info("Stopping daemon instance");
|
|
611
|
+
if (this.healthCheckTimer) {
|
|
612
|
+
clearTimeout(this.healthCheckTimer);
|
|
613
|
+
this.healthCheckTimer = null;
|
|
614
|
+
}
|
|
615
|
+
if (this.errorMonitorTimer) {
|
|
616
|
+
clearInterval(this.errorMonitorTimer);
|
|
617
|
+
this.errorMonitorTimer = null;
|
|
618
|
+
}
|
|
619
|
+
if (this.toolStatusDebounce) {
|
|
620
|
+
clearTimeout(this.toolStatusDebounce);
|
|
621
|
+
this.toolStatusDebounce = null;
|
|
622
|
+
}
|
|
623
|
+
this.pendingIpcRequests.clear();
|
|
624
|
+
this.hangDetector?.stop();
|
|
625
|
+
this.transcriptMonitor?.stop();
|
|
626
|
+
this.guardian?.stop();
|
|
627
|
+
if (this.adapter)
|
|
628
|
+
await this.adapter.stop();
|
|
629
|
+
// Notify MCP servers of graceful shutdown (prevents reconnect attempts)
|
|
630
|
+
this.ipcServer?.broadcast({ type: "shutdown" });
|
|
631
|
+
// Quit CLI FIRST — this kills MCP server child processes cleanly.
|
|
632
|
+
// IPC must stay open during quit so MCP servers receive the shutdown message.
|
|
633
|
+
if (this.tmux) {
|
|
634
|
+
this.saveSessionId();
|
|
635
|
+
this.healthCheckPaused = true;
|
|
636
|
+
let killed = false;
|
|
637
|
+
const quitCmd = this.backend?.getQuitCommand();
|
|
638
|
+
if (quitCmd) {
|
|
639
|
+
await this.tmux.sendKeys(quitCmd);
|
|
640
|
+
// Delay before Enter to prevent tmux server race when multiple
|
|
641
|
+
// instances stop in parallel (same pattern as pasteText).
|
|
642
|
+
await new Promise(r => setTimeout(r, 150));
|
|
643
|
+
await this.tmux.sendSpecialKey("Enter");
|
|
644
|
+
// Wait up to 10s for graceful exit
|
|
645
|
+
for (let i = 0; i < 20; i++) {
|
|
646
|
+
await new Promise(r => setTimeout(r, 500));
|
|
647
|
+
const status = await this.tmux.getPaneStatus();
|
|
648
|
+
if (!status || !status.alive) {
|
|
649
|
+
killed = true;
|
|
650
|
+
break;
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
}
|
|
654
|
+
if (!killed)
|
|
655
|
+
this.logger.warn("CLI did not exit gracefully within 10s, force killing window");
|
|
656
|
+
// Always kill window — remain-on-exit keeps dead panes around after CLI exits
|
|
657
|
+
await this.tmux.killWindow();
|
|
658
|
+
const windowIdFile = join(this.instanceDir, "window-id");
|
|
659
|
+
try {
|
|
660
|
+
unlinkSync(windowIdFile);
|
|
661
|
+
}
|
|
662
|
+
catch (e) {
|
|
663
|
+
this.logger.debug({ err: e }, "Failed to remove window-id file");
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
// Close IPC AFTER CLI has exited — MCP servers are already dead at this point
|
|
667
|
+
await this.ipcServer?.close();
|
|
668
|
+
// Clean up backend config files
|
|
669
|
+
if (this.backend?.cleanup) {
|
|
670
|
+
this.backend.cleanup(this.buildBackendConfig());
|
|
671
|
+
}
|
|
672
|
+
// Clean up checked-out repos
|
|
673
|
+
try {
|
|
674
|
+
rmSync(join(this.instanceDir, "repos"), { recursive: true, force: true });
|
|
675
|
+
}
|
|
676
|
+
catch { /* best effort */ }
|
|
677
|
+
const pidPath = join(this.instanceDir, "daemon.pid");
|
|
678
|
+
try {
|
|
679
|
+
unlinkSync(pidPath);
|
|
680
|
+
}
|
|
681
|
+
catch (e) {
|
|
682
|
+
this.logger.debug({ err: e }, "Failed to remove PID file");
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
getHangDetector() {
|
|
686
|
+
return this.hangDetector;
|
|
687
|
+
}
|
|
688
|
+
getMessageBus() {
|
|
689
|
+
return this.messageBus;
|
|
690
|
+
}
|
|
691
|
+
// ── Tool status tracking ──────────────────────────────────────
|
|
692
|
+
summarizeTool(name, input) {
|
|
693
|
+
const inp = input;
|
|
694
|
+
if (!inp)
|
|
695
|
+
return name;
|
|
696
|
+
if (name === "Read")
|
|
697
|
+
return `Read ${inp.file_path ?? ""}`;
|
|
698
|
+
if (name === "Edit")
|
|
699
|
+
return `Edit ${inp.file_path ?? ""}`;
|
|
700
|
+
if (name === "Write")
|
|
701
|
+
return `Write ${inp.file_path ?? ""}`;
|
|
702
|
+
if (name === "Bash")
|
|
703
|
+
return `$ ${String(inp.command ?? "").slice(0, 50)}`;
|
|
704
|
+
if (name === "Glob")
|
|
705
|
+
return `Glob ${inp.pattern ?? ""}`;
|
|
706
|
+
if (name === "Grep")
|
|
707
|
+
return `Grep ${inp.pattern ?? ""}`;
|
|
708
|
+
if (name === "Agent")
|
|
709
|
+
return "Agent (subagent)";
|
|
710
|
+
if (name.startsWith("mcp__agend__"))
|
|
711
|
+
return ""; // skip channel tools
|
|
712
|
+
return name;
|
|
713
|
+
}
|
|
714
|
+
addToolStatus(name, input, state) {
|
|
715
|
+
const summary = this.summarizeTool(name, input);
|
|
716
|
+
if (!summary)
|
|
717
|
+
return; // skip empty (e.g., channel tools)
|
|
718
|
+
if (state === "running") {
|
|
719
|
+
this.toolStatusLines.push(`⏳ ${summary}`);
|
|
720
|
+
}
|
|
721
|
+
else {
|
|
722
|
+
// Mark the last matching tool as done
|
|
723
|
+
for (let i = this.toolStatusLines.length - 1; i >= 0; i--) {
|
|
724
|
+
if (this.toolStatusLines[i].includes(name) && this.toolStatusLines[i].startsWith("⏳")) {
|
|
725
|
+
this.toolStatusLines[i] = this.toolStatusLines[i].replace("⏳", "✅");
|
|
726
|
+
break;
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
this.debouncedSendToolStatus();
|
|
731
|
+
}
|
|
732
|
+
/** Debounce tool status updates to avoid channel rate limits */
|
|
733
|
+
debouncedSendToolStatus() {
|
|
734
|
+
if (this.toolStatusDebounce)
|
|
735
|
+
clearTimeout(this.toolStatusDebounce);
|
|
736
|
+
this.toolStatusDebounce = setTimeout(() => this.sendToolStatus(), 500);
|
|
737
|
+
}
|
|
738
|
+
sendToolStatus() {
|
|
739
|
+
const text = this.toolStatusLines.join("\n");
|
|
740
|
+
if (!text)
|
|
741
|
+
return;
|
|
742
|
+
this.ipcServer?.broadcast({
|
|
743
|
+
type: "fleet_tool_status",
|
|
744
|
+
instanceName: this.name,
|
|
745
|
+
text,
|
|
746
|
+
editMessageId: this.toolStatusMessageId,
|
|
747
|
+
});
|
|
748
|
+
}
|
|
749
|
+
/** Called by fleet manager when tool status message is sent (returns messageId) */
|
|
750
|
+
setToolStatusMessageId(messageId) {
|
|
751
|
+
this.toolStatusMessageId = messageId;
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Push an inbound channel message to a specific MCP session.
|
|
755
|
+
* If targetSession is provided, only send to the matching socket.
|
|
756
|
+
* Otherwise send to the instance's own session (this.name).
|
|
757
|
+
*/
|
|
758
|
+
pushChannelMessage(content, meta, _targetSession) {
|
|
759
|
+
if (!this.tmux) {
|
|
760
|
+
this.logger.warn("Cannot push channel message: tmux not running");
|
|
761
|
+
return;
|
|
762
|
+
}
|
|
763
|
+
if (this.pendingInstructionsUpdate) {
|
|
764
|
+
writeFileSync(join(this.instanceDir, "prev-instructions"), this.pendingInstructionsUpdate);
|
|
765
|
+
this.pendingInstructionsUpdate = undefined;
|
|
766
|
+
}
|
|
767
|
+
this.hangDetector?.recordInbound();
|
|
768
|
+
// v3: record user messages for rotation snapshot
|
|
769
|
+
this.recordRecentUserMessage(content, meta);
|
|
770
|
+
// Format message with metadata prefix for the agent
|
|
771
|
+
const user = meta.user || "unknown";
|
|
772
|
+
const fromInstance = meta.from_instance;
|
|
773
|
+
// /raw prefix: paste directly without [user:] wrapping (topic mode only, protected by allowed_users upstream)
|
|
774
|
+
if (!fromInstance && content.startsWith("/raw ")) {
|
|
775
|
+
const rawText = content.slice(5);
|
|
776
|
+
this.logger.info({ user }, "Raw paste from topic mode user");
|
|
777
|
+
this.pasteLock = this.pasteLock.then(async () => {
|
|
778
|
+
await this.deliverMessage(rawText);
|
|
779
|
+
}).catch(err => {
|
|
780
|
+
this.logger.warn({ err: err.message }, "pasteLock raw delivery error");
|
|
781
|
+
});
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
let formatted;
|
|
785
|
+
if (fromInstance) {
|
|
786
|
+
formatted = `[from:${fromInstance}] ${content}\n(Reply using send_to_instance tool, NOT direct text)`;
|
|
787
|
+
}
|
|
788
|
+
else {
|
|
789
|
+
const via = meta.source ? ` via ${meta.source}` : "";
|
|
790
|
+
formatted = `[user:${user}${via}] ${content}\n(Reply using the reply tool — do NOT respond with direct text)`;
|
|
791
|
+
}
|
|
792
|
+
if (meta.reply_to_text) {
|
|
793
|
+
formatted += `\n(reply_to: "${meta.reply_to_text}")`;
|
|
794
|
+
}
|
|
795
|
+
// Serialize deliveries: each message waits for the previous to complete,
|
|
796
|
+
// and each waits for the CLI to be idle before pasting.
|
|
797
|
+
const enqueuedAt = Date.now();
|
|
798
|
+
const isFromInstance = !!meta.from_instance;
|
|
799
|
+
const chatId = meta.chat_id;
|
|
800
|
+
const messageId = meta.message_id;
|
|
801
|
+
const wasQueued = this.pasteQueueDepth > 0;
|
|
802
|
+
this.pasteQueueDepth++;
|
|
803
|
+
if (this.pasteQueueDepth > 3) {
|
|
804
|
+
this.logger.warn({ depth: this.pasteQueueDepth }, "Message delivery queue backing up");
|
|
805
|
+
}
|
|
806
|
+
if (wasQueued && chatId && messageId) {
|
|
807
|
+
this.emit("message_queued", { chatId, messageId });
|
|
808
|
+
}
|
|
809
|
+
this.pasteLock = this.pasteLock.then(async () => {
|
|
810
|
+
try {
|
|
811
|
+
// Drop stale user messages (>60s in queue), but never drop cross-instance messages
|
|
812
|
+
if (!isFromInstance && Date.now() - enqueuedAt > 60_000) {
|
|
813
|
+
this.logger.warn({ age: Date.now() - enqueuedAt, user: meta.user }, "Dropping stale message");
|
|
814
|
+
return;
|
|
815
|
+
}
|
|
816
|
+
if (this.config.pre_task_command) {
|
|
817
|
+
await this.deliverMessage(this.config.pre_task_command);
|
|
818
|
+
}
|
|
819
|
+
if (this.pendingInstructionsNotice) {
|
|
820
|
+
this.pendingInstructionsNotice = false;
|
|
821
|
+
await this.deliverMessage("[system] Your instructions/steering files have been updated. Please re-read them for the latest guidelines.");
|
|
822
|
+
}
|
|
823
|
+
await this.deliverMessage(formatted);
|
|
824
|
+
if (chatId && messageId) {
|
|
825
|
+
this.emit("message_delivered", { chatId, messageId });
|
|
826
|
+
}
|
|
827
|
+
}
|
|
828
|
+
finally {
|
|
829
|
+
this.pasteQueueDepth--;
|
|
830
|
+
}
|
|
831
|
+
}).catch(err => {
|
|
832
|
+
this.logger.warn({ err: err.message }, "pasteLock delivery error — chain continues");
|
|
833
|
+
});
|
|
834
|
+
this.logger.debug({ user: meta.user, text: content.slice(0, 100) }, "Queued channel message for delivery");
|
|
835
|
+
}
|
|
836
|
+
/** Deliver a single message: wait for idle, then paste */
|
|
837
|
+
async deliverMessage(formatted) {
|
|
838
|
+
const windowId = this.getWindowId();
|
|
839
|
+
if (windowId && this.controlClient) {
|
|
840
|
+
const idle = await this.controlClient.waitForIdle(windowId, this.config.lightweight ? 30_000 : 120_000);
|
|
841
|
+
if (!idle) {
|
|
842
|
+
this.logger.warn("Delivering message after idle timeout (CLI may be busy)");
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
const ok = await this.tmux.pasteText(formatted);
|
|
846
|
+
if (!ok) {
|
|
847
|
+
// Window ID may be stale after crash/respawn — try to find by name
|
|
848
|
+
this.logger.warn("pasteText failed, looking up window by name");
|
|
849
|
+
try {
|
|
850
|
+
const windows = await TmuxManager.listWindows(this.tmuxSessionName);
|
|
851
|
+
const match = windows.find(w => w.name === this.name);
|
|
852
|
+
if (match) {
|
|
853
|
+
this.tmux = new TmuxManager(this.tmuxSessionName, match.id);
|
|
854
|
+
writeFileSync(join(this.instanceDir, "window-id"), match.id);
|
|
855
|
+
await this.controlClient?.registerWindow(match.id);
|
|
856
|
+
await this.tmux.pasteText(formatted);
|
|
857
|
+
this.logger.info({ windowId: match.id }, "Recovered window ID and delivered message");
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
catch (retryErr) {
|
|
861
|
+
this.logger.error({ err: retryErr }, "Failed to recover window for message delivery");
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
getWindowId() {
|
|
866
|
+
try {
|
|
867
|
+
return readFileSync(join(this.instanceDir, "window-id"), "utf-8").trim() || undefined;
|
|
868
|
+
}
|
|
869
|
+
catch {
|
|
870
|
+
return undefined;
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
/** Find the IPC socket for a given sessionName */
|
|
874
|
+
findSocketBySession(sessionName) {
|
|
875
|
+
for (const [socket, name] of this.socketSessionNames) {
|
|
876
|
+
if (name === sessionName && !socket.destroyed)
|
|
877
|
+
return socket;
|
|
878
|
+
}
|
|
879
|
+
return undefined;
|
|
880
|
+
}
|
|
881
|
+
/**
|
|
882
|
+
* Handle a tool call from the MCP server (forwarded by Claude).
|
|
883
|
+
* Routes to the channel adapter via MessageBus.
|
|
884
|
+
*/
|
|
885
|
+
handleToolCall(msg, socket) {
|
|
886
|
+
const tool = msg.tool;
|
|
887
|
+
const args = (msg.args ?? {});
|
|
888
|
+
const requestId = msg.requestId;
|
|
889
|
+
this.logger.debug({ tool, requestId }, "Tool call from MCP server");
|
|
890
|
+
// For now, log and respond. Full adapter routing will be wired in fleet manager.
|
|
891
|
+
const respond = (result, error) => {
|
|
892
|
+
this.ipcServer?.send(socket, { requestId, result, error });
|
|
893
|
+
};
|
|
894
|
+
// Repo checkout — handled locally in daemon (no fleet-manager)
|
|
895
|
+
if (tool === "checkout_repo") {
|
|
896
|
+
this.handleCheckoutRepo(args, respond);
|
|
897
|
+
return;
|
|
898
|
+
}
|
|
899
|
+
if (tool === "release_repo") {
|
|
900
|
+
this.handleReleaseRepo(args, respond);
|
|
901
|
+
return;
|
|
902
|
+
}
|
|
903
|
+
if (tool === "set_display_name" || tool === "set_description") {
|
|
904
|
+
const type = tool === "set_display_name" ? "fleet_set_display_name" : "fleet_set_description";
|
|
905
|
+
const fleetReqId = `${tool === "set_display_name" ? "dn" : "desc"}_${requestId}`;
|
|
906
|
+
this.ipcServer?.broadcast({
|
|
907
|
+
type,
|
|
908
|
+
payload: args,
|
|
909
|
+
meta: { instance_name: this.name },
|
|
910
|
+
fleetRequestId: fleetReqId,
|
|
911
|
+
});
|
|
912
|
+
const timeout = setTimeout(() => {
|
|
913
|
+
this.pendingIpcRequests.delete(fleetReqId);
|
|
914
|
+
respond(null, `${tool} timed out`);
|
|
915
|
+
}, 10_000);
|
|
916
|
+
this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
|
|
917
|
+
clearTimeout(timeout);
|
|
918
|
+
respond(respMsg.result, respMsg.error);
|
|
919
|
+
});
|
|
920
|
+
return;
|
|
921
|
+
}
|
|
922
|
+
if (tool === TASK_TOOL) {
|
|
923
|
+
const fleetReqId = `task_${requestId}`;
|
|
924
|
+
this.ipcServer?.broadcast({
|
|
925
|
+
type: "fleet_task",
|
|
926
|
+
payload: args,
|
|
927
|
+
meta: { instance_name: this.name },
|
|
928
|
+
fleetRequestId: fleetReqId,
|
|
929
|
+
});
|
|
930
|
+
const timeout = setTimeout(() => {
|
|
931
|
+
this.pendingIpcRequests.delete(fleetReqId);
|
|
932
|
+
respond(null, "Task operation timed out after 30s");
|
|
933
|
+
}, 30_000);
|
|
934
|
+
this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
|
|
935
|
+
clearTimeout(timeout);
|
|
936
|
+
respond(respMsg.result, respMsg.error);
|
|
937
|
+
});
|
|
938
|
+
return;
|
|
939
|
+
}
|
|
940
|
+
if (DECISION_TOOLS.has(tool)) {
|
|
941
|
+
const typeMap = {
|
|
942
|
+
post_decision: "fleet_decision_create",
|
|
943
|
+
list_decisions: "fleet_decision_list",
|
|
944
|
+
update_decision: "fleet_decision_update",
|
|
945
|
+
};
|
|
946
|
+
const fleetReqId = `dec_${requestId}`;
|
|
947
|
+
this.ipcServer?.broadcast({
|
|
948
|
+
type: typeMap[tool],
|
|
949
|
+
payload: args,
|
|
950
|
+
meta: { instance_name: this.name, working_directory: this.config.working_directory },
|
|
951
|
+
fleetRequestId: fleetReqId,
|
|
952
|
+
});
|
|
953
|
+
const timeout = setTimeout(() => {
|
|
954
|
+
this.pendingIpcRequests.delete(fleetReqId);
|
|
955
|
+
respond(null, "Decision operation timed out after 30s");
|
|
956
|
+
}, 30_000);
|
|
957
|
+
this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
|
|
958
|
+
clearTimeout(timeout);
|
|
959
|
+
respond(respMsg.result, respMsg.error);
|
|
960
|
+
});
|
|
961
|
+
return;
|
|
962
|
+
}
|
|
963
|
+
if (SCHEDULE_TOOLS.has(tool)) {
|
|
964
|
+
const typeMap = {
|
|
965
|
+
create_schedule: "fleet_schedule_create",
|
|
966
|
+
list_schedules: "fleet_schedule_list",
|
|
967
|
+
update_schedule: "fleet_schedule_update",
|
|
968
|
+
delete_schedule: "fleet_schedule_delete",
|
|
969
|
+
};
|
|
970
|
+
// Use fleetRequestId (not requestId) to avoid MCP server resolving the
|
|
971
|
+
// pending tool call prematurely when it receives the broadcast.
|
|
972
|
+
const fleetReqId = `sched_${requestId}`;
|
|
973
|
+
this.ipcServer?.broadcast({
|
|
974
|
+
type: typeMap[tool],
|
|
975
|
+
payload: args,
|
|
976
|
+
meta: { chat_id: this.lastChatId, thread_id: this.lastThreadId, instance_name: this.name },
|
|
977
|
+
fleetRequestId: fleetReqId,
|
|
978
|
+
});
|
|
979
|
+
// Wait for fleet_schedule_response via pending request map
|
|
980
|
+
const timeout = setTimeout(() => {
|
|
981
|
+
this.pendingIpcRequests.delete(fleetReqId);
|
|
982
|
+
respond(null, "Schedule operation timed out after 30s");
|
|
983
|
+
}, 30_000);
|
|
984
|
+
this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
|
|
985
|
+
clearTimeout(timeout);
|
|
986
|
+
respond(respMsg.result, respMsg.error);
|
|
987
|
+
});
|
|
988
|
+
return;
|
|
989
|
+
}
|
|
990
|
+
if (CROSS_INSTANCE_TOOLS.has(tool)) {
|
|
991
|
+
// Route to fleet manager via IPC (topic mode only)
|
|
992
|
+
if (this.topicMode && this.ipcServer) {
|
|
993
|
+
// Use fleetRequestId (not requestId) to avoid MCP server resolving the
|
|
994
|
+
// pending tool call prematurely when it receives the broadcast.
|
|
995
|
+
const fleetReqId = `xmsg_${requestId}`;
|
|
996
|
+
const senderSessionName = this.socketSessionNames.get(socket);
|
|
997
|
+
this.ipcServer.broadcast({
|
|
998
|
+
type: "fleet_outbound",
|
|
999
|
+
tool,
|
|
1000
|
+
args,
|
|
1001
|
+
fleetRequestId: fleetReqId,
|
|
1002
|
+
senderSessionName,
|
|
1003
|
+
});
|
|
1004
|
+
const crossTimeoutMs = (tool === "start_instance" || tool === "create_instance" || tool === "replace_instance") ? 60_000 : 30_000;
|
|
1005
|
+
const timeout = setTimeout(() => {
|
|
1006
|
+
this.pendingIpcRequests.delete(fleetReqId);
|
|
1007
|
+
respond(null, `Cross-instance operation timed out after ${crossTimeoutMs / 1000}s`);
|
|
1008
|
+
}, crossTimeoutMs);
|
|
1009
|
+
this.pendingIpcRequests.set(fleetReqId, (respMsg) => {
|
|
1010
|
+
clearTimeout(timeout);
|
|
1011
|
+
respond(respMsg.result, respMsg.error);
|
|
1012
|
+
});
|
|
1013
|
+
}
|
|
1014
|
+
else {
|
|
1015
|
+
respond(null, "Cross-instance messaging requires topic mode");
|
|
1016
|
+
}
|
|
1017
|
+
return;
|
|
1018
|
+
}
|
|
1019
|
+
// Context-bound routing: reply/react/edit_message always use the daemon's last known context.
|
|
1020
|
+
// chat_id and thread_id are not exposed in the tool schema — daemon is solely responsible for routing.
|
|
1021
|
+
// Must run before IPC forwarding so topic-mode (fleet manager) also receives the correct chat_id.
|
|
1022
|
+
if (["reply", "react", "edit_message"].includes(tool)) {
|
|
1023
|
+
if (!this.lastChatId) {
|
|
1024
|
+
respond(null, "No active chat context — awaiting inbound message");
|
|
1025
|
+
return;
|
|
1026
|
+
}
|
|
1027
|
+
args.chat_id = this.lastChatId;
|
|
1028
|
+
if (tool === "reply")
|
|
1029
|
+
args.thread_id = this.lastThreadId;
|
|
1030
|
+
}
|
|
1031
|
+
// Route to adapter via MessageBus
|
|
1032
|
+
const adapters = this.messageBus.getAllAdapters();
|
|
1033
|
+
if (adapters.length === 0) {
|
|
1034
|
+
// Topic mode: forward to fleet manager via IPC (fleet manager connected as IPC client)
|
|
1035
|
+
// The fleet manager's IPC client receives this and routes to shared adapter.
|
|
1036
|
+
// Use fleetRequestId (not requestId) to avoid other MCP sessions on this daemon
|
|
1037
|
+
// from prematurely resolving their pending requests when they receive the broadcast.
|
|
1038
|
+
const fleetReqId = `tool_${requestId}`;
|
|
1039
|
+
const outboundKey = fleetReqId;
|
|
1040
|
+
this.ipcServer?.broadcast({ type: "fleet_outbound", tool, args, fleetRequestId: fleetReqId });
|
|
1041
|
+
const timeout = setTimeout(() => {
|
|
1042
|
+
this.pendingIpcRequests.delete(outboundKey);
|
|
1043
|
+
respond(null, "Fleet outbound timed out after 30s");
|
|
1044
|
+
}, 30_000);
|
|
1045
|
+
this.pendingIpcRequests.set(outboundKey, (respMsg) => {
|
|
1046
|
+
clearTimeout(timeout);
|
|
1047
|
+
respond(respMsg.result, respMsg.error);
|
|
1048
|
+
});
|
|
1049
|
+
return;
|
|
1050
|
+
}
|
|
1051
|
+
const adapter = adapters[0];
|
|
1052
|
+
if (!routeToolCall(adapter, tool, args, this.lastThreadId, respond)) {
|
|
1053
|
+
respond(null, `Unknown tool: ${tool}`);
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
/** Build config object for the CLI backend */
|
|
1057
|
+
buildBackendConfig() {
|
|
1058
|
+
const isCliMode = this.config.agent_mode === "cli";
|
|
1059
|
+
const sockPath = join(this.instanceDir, "channel.sock");
|
|
1060
|
+
let serverJs = join(__dirname, "channel", "mcp-server.js");
|
|
1061
|
+
if (!existsSync(serverJs)) {
|
|
1062
|
+
serverJs = join(__dirname, "..", "dist", "channel", "mcp-server.js");
|
|
1063
|
+
}
|
|
1064
|
+
// ── Resolve workflow and systemPrompt once, share between MCP env and instructions ──
|
|
1065
|
+
let resolvedWorkflow;
|
|
1066
|
+
if (this.config.workflow === false) {
|
|
1067
|
+
resolvedWorkflow = false;
|
|
1068
|
+
}
|
|
1069
|
+
else {
|
|
1070
|
+
const wf = this.config.workflow ?? "builtin";
|
|
1071
|
+
if (wf !== "builtin") {
|
|
1072
|
+
let content = wf;
|
|
1073
|
+
if (content.startsWith("file:")) {
|
|
1074
|
+
try {
|
|
1075
|
+
content = readFileSync(content.slice(5), "utf-8");
|
|
1076
|
+
}
|
|
1077
|
+
catch {
|
|
1078
|
+
content = "";
|
|
1079
|
+
}
|
|
1080
|
+
}
|
|
1081
|
+
resolvedWorkflow = content || undefined;
|
|
1082
|
+
}
|
|
1083
|
+
}
|
|
1084
|
+
let resolvedCustomPrompt;
|
|
1085
|
+
if (this.config.systemPrompt) {
|
|
1086
|
+
// Support comma-separated file: paths for prompt modularization:
|
|
1087
|
+
// systemPrompt: "file:prompts/role.md, file:prompts/rules.md, file:prompts/context.md"
|
|
1088
|
+
const parts = this.config.systemPrompt.split(",").map((s) => s.trim());
|
|
1089
|
+
const resolved = parts.map((part) => {
|
|
1090
|
+
if (part.startsWith("file:")) {
|
|
1091
|
+
try {
|
|
1092
|
+
return readFileSync(part.slice(5), "utf-8");
|
|
1093
|
+
}
|
|
1094
|
+
catch {
|
|
1095
|
+
return "";
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
return part;
|
|
1099
|
+
}).filter(Boolean);
|
|
1100
|
+
if (resolved.length > 0)
|
|
1101
|
+
resolvedCustomPrompt = resolved.join("\n\n");
|
|
1102
|
+
}
|
|
1103
|
+
let decisions;
|
|
1104
|
+
if (process.env.AGEND_DECISIONS) {
|
|
1105
|
+
try {
|
|
1106
|
+
decisions = JSON.parse(process.env.AGEND_DECISIONS);
|
|
1107
|
+
}
|
|
1108
|
+
catch (err) {
|
|
1109
|
+
this.logger.warn({ err }, "AGEND_DECISIONS env var is not valid JSON — decisions will not be injected");
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
// ── MCP server env (dual-track: still passes env vars for MCP instructions fallback) ──
|
|
1113
|
+
const mcpEnv = {
|
|
1114
|
+
AGEND_SOCKET_PATH: sockPath,
|
|
1115
|
+
AGEND_INSTANCE_NAME: this.name,
|
|
1116
|
+
AGEND_WORKING_DIR: this.config.working_directory,
|
|
1117
|
+
};
|
|
1118
|
+
if (this.config.tool_set)
|
|
1119
|
+
mcpEnv.AGEND_TOOL_SET = this.config.tool_set;
|
|
1120
|
+
if (this.config.display_name)
|
|
1121
|
+
mcpEnv.AGEND_DISPLAY_NAME = this.config.display_name;
|
|
1122
|
+
if (this.config.description)
|
|
1123
|
+
mcpEnv.AGEND_DESCRIPTION = this.config.description;
|
|
1124
|
+
if (resolvedWorkflow === false)
|
|
1125
|
+
mcpEnv.AGEND_WORKFLOW = "false";
|
|
1126
|
+
else if (resolvedWorkflow)
|
|
1127
|
+
mcpEnv.AGEND_WORKFLOW = resolvedWorkflow;
|
|
1128
|
+
if (resolvedCustomPrompt)
|
|
1129
|
+
mcpEnv.AGEND_CUSTOM_PROMPT = resolvedCustomPrompt;
|
|
1130
|
+
if (process.env.AGEND_DECISIONS)
|
|
1131
|
+
mcpEnv.AGEND_DECISIONS = process.env.AGEND_DECISIONS;
|
|
1132
|
+
// ── Fleet instructions for additive system prompt injection ──
|
|
1133
|
+
let instructions;
|
|
1134
|
+
if (isCliMode) {
|
|
1135
|
+
// CLI mode: inject CLI quick reference instead of MCP tool schema
|
|
1136
|
+
let cliRef = "";
|
|
1137
|
+
try {
|
|
1138
|
+
const cliInstrPath = join(__dirname, "agent-cli-instructions.md");
|
|
1139
|
+
if (!existsSync(cliInstrPath)) {
|
|
1140
|
+
const altPath = join(__dirname, "..", "dist", "agent-cli-instructions.md");
|
|
1141
|
+
if (existsSync(altPath))
|
|
1142
|
+
cliRef = readFileSync(altPath, "utf-8");
|
|
1143
|
+
}
|
|
1144
|
+
else {
|
|
1145
|
+
cliRef = readFileSync(cliInstrPath, "utf-8");
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
catch { /* fallback to empty */ }
|
|
1149
|
+
instructions = buildFleetInstructions({
|
|
1150
|
+
instanceName: this.name,
|
|
1151
|
+
workingDirectory: this.config.working_directory,
|
|
1152
|
+
displayName: this.config.display_name,
|
|
1153
|
+
description: this.config.description,
|
|
1154
|
+
customPrompt: resolvedCustomPrompt,
|
|
1155
|
+
workflow: resolvedWorkflow,
|
|
1156
|
+
decisions,
|
|
1157
|
+
cliInstructions: cliRef || undefined,
|
|
1158
|
+
});
|
|
1159
|
+
}
|
|
1160
|
+
else {
|
|
1161
|
+
instructions = buildFleetInstructions({
|
|
1162
|
+
instanceName: this.name,
|
|
1163
|
+
workingDirectory: this.config.working_directory,
|
|
1164
|
+
displayName: this.config.display_name,
|
|
1165
|
+
description: this.config.description,
|
|
1166
|
+
customPrompt: resolvedCustomPrompt,
|
|
1167
|
+
workflow: resolvedWorkflow,
|
|
1168
|
+
decisions,
|
|
1169
|
+
});
|
|
1170
|
+
}
|
|
1171
|
+
const agentPort = parseInt(process.env.AGEND_PORT ?? "19280", 10);
|
|
1172
|
+
return {
|
|
1173
|
+
workingDirectory: this.config.working_directory,
|
|
1174
|
+
instanceDir: this.instanceDir,
|
|
1175
|
+
instanceName: this.name,
|
|
1176
|
+
mcpServers: isCliMode ? {} : {
|
|
1177
|
+
"agend": {
|
|
1178
|
+
command: "node",
|
|
1179
|
+
args: [serverJs],
|
|
1180
|
+
env: mcpEnv,
|
|
1181
|
+
},
|
|
1182
|
+
},
|
|
1183
|
+
skipPermissions: this.config.skipPermissions,
|
|
1184
|
+
model: this.modelOverride ?? this.config.model,
|
|
1185
|
+
skipResume: this.skipResume,
|
|
1186
|
+
instructions,
|
|
1187
|
+
agentMode: isCliMode ? "cli" : "mcp",
|
|
1188
|
+
agentPort: isCliMode ? agentPort : undefined,
|
|
1189
|
+
};
|
|
1190
|
+
}
|
|
1191
|
+
/**
|
|
1192
|
+
* After CLI is ready, paste any pending session snapshot as the first
|
|
1193
|
+
* user input so the agent picks up where the previous session left off.
|
|
1194
|
+
* This replaces the old system-prompt injection approach.
|
|
1195
|
+
*/
|
|
1196
|
+
async injectSnapshotMessage() {
|
|
1197
|
+
if (this.snapshotConsumed)
|
|
1198
|
+
return;
|
|
1199
|
+
const snapshot = this.buildSnapshotPrompt();
|
|
1200
|
+
if (!snapshot || !this.tmux)
|
|
1201
|
+
return;
|
|
1202
|
+
if (this.pendingInstructionsUpdate) {
|
|
1203
|
+
writeFileSync(join(this.instanceDir, "prev-instructions"), this.pendingInstructionsUpdate);
|
|
1204
|
+
this.pendingInstructionsUpdate = undefined;
|
|
1205
|
+
}
|
|
1206
|
+
// Small delay to let the CLI fully render its ready prompt
|
|
1207
|
+
await new Promise(r => setTimeout(r, 1_000));
|
|
1208
|
+
try {
|
|
1209
|
+
await this.tmux.pasteText(`[system:session-snapshot]\n${snapshot}\n\nThis is a background context restore — do NOT reply to or acknowledge this message. Simply resume normal operation when the next user or instance message arrives.`);
|
|
1210
|
+
this.logger.info("Injected session snapshot as first message");
|
|
1211
|
+
this.emit("snapshot_injected", this.name);
|
|
1212
|
+
}
|
|
1213
|
+
catch (err) {
|
|
1214
|
+
this.logger.error({ err }, "Snapshot injection failed — session continues without context");
|
|
1215
|
+
this.emit("snapshot_failed", this.name);
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
/** Spawn a CLI window. Returns true if --resume was used successfully. */
|
|
1219
|
+
async spawnClaudeWindow() {
|
|
1220
|
+
this.spawning = true;
|
|
1221
|
+
let resumedSuccessfully = false;
|
|
1222
|
+
try {
|
|
1223
|
+
this.toolStatusLines = [];
|
|
1224
|
+
this.toolStatusMessageId = null;
|
|
1225
|
+
if (!this.backend) {
|
|
1226
|
+
throw new Error("No backend configured — cannot spawn CLI window");
|
|
1227
|
+
}
|
|
1228
|
+
const attemptedResume = !this.skipResume;
|
|
1229
|
+
const alive = await this.trySpawn();
|
|
1230
|
+
if (!alive) {
|
|
1231
|
+
// First attempt failed (stale --resume, crash, rate limit, etc.)
|
|
1232
|
+
// Clean slate: clear session-id, skip resume, and retry once.
|
|
1233
|
+
this.logger.warn("CLI startup failed — clearing session-id and retrying without resume");
|
|
1234
|
+
const sidFile = join(this.instanceDir, "session-id");
|
|
1235
|
+
try {
|
|
1236
|
+
unlinkSync(sidFile);
|
|
1237
|
+
}
|
|
1238
|
+
catch { /* may not exist */ }
|
|
1239
|
+
this.skipResume = true;
|
|
1240
|
+
await this.killProcessTree();
|
|
1241
|
+
await this.tmux.killWindow();
|
|
1242
|
+
const retryAlive = await this.trySpawn();
|
|
1243
|
+
if (!retryAlive) {
|
|
1244
|
+
await this.killProcessTree();
|
|
1245
|
+
await this.tmux.killWindow();
|
|
1246
|
+
throw new Error("CLI failed to start after retry");
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
else if (attemptedResume) {
|
|
1250
|
+
resumedSuccessfully = true;
|
|
1251
|
+
}
|
|
1252
|
+
this.lastSpawnAt = Date.now();
|
|
1253
|
+
this.skipResume = false; // CLI started successfully — reset for next spawn
|
|
1254
|
+
}
|
|
1255
|
+
finally {
|
|
1256
|
+
this.spawning = false;
|
|
1257
|
+
}
|
|
1258
|
+
return resumedSuccessfully;
|
|
1259
|
+
}
|
|
1260
|
+
/** Kill the entire process tree of the current tmux pane (CLI + MCP server). */
|
|
1261
|
+
async killProcessTree() {
|
|
1262
|
+
if (!this.tmux)
|
|
1263
|
+
return;
|
|
1264
|
+
try {
|
|
1265
|
+
const pid = await TmuxManager.getPanePid(this.tmuxSessionName, this.tmux.getWindowId());
|
|
1266
|
+
if (pid) {
|
|
1267
|
+
process.kill(-pid, "SIGTERM");
|
|
1268
|
+
this.logger.debug({ pid }, "Killed process group");
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
catch { /* process group may not exist or already dead */ }
|
|
1272
|
+
}
|
|
1273
|
+
/**
|
|
1274
|
+
* Spawn a CLI window and verify it reaches a ready state.
|
|
1275
|
+
* Uses control mode to wait for output, then checks pane content.
|
|
1276
|
+
* Handles confirmation dialogs (trust folder, bypass permissions).
|
|
1277
|
+
* Returns true if CLI is ready, false if it failed or got stuck.
|
|
1278
|
+
*/
|
|
1279
|
+
async trySpawn() {
|
|
1280
|
+
const backendConfig = this.buildBackendConfig();
|
|
1281
|
+
// Detect instructions change → notify agent on next message instead of
|
|
1282
|
+
// forcing a new session. Resume is preserved so context isn't lost.
|
|
1283
|
+
if (!backendConfig.skipResume && !this.backend.instructionsReloadedOnResume && backendConfig.instructions) {
|
|
1284
|
+
const prevFile = join(this.instanceDir, "prev-instructions");
|
|
1285
|
+
let prev = "";
|
|
1286
|
+
try {
|
|
1287
|
+
prev = readFileSync(prevFile, "utf-8");
|
|
1288
|
+
}
|
|
1289
|
+
catch { }
|
|
1290
|
+
if (prev !== backendConfig.instructions) {
|
|
1291
|
+
if (prev) {
|
|
1292
|
+
this.logger.info("Instructions changed — will notify agent on next message");
|
|
1293
|
+
this.pendingInstructionsNotice = true;
|
|
1294
|
+
}
|
|
1295
|
+
this.pendingInstructionsUpdate = backendConfig.instructions;
|
|
1296
|
+
}
|
|
1297
|
+
}
|
|
1298
|
+
this.backend.writeConfig(backendConfig);
|
|
1299
|
+
this.backend.preTrust?.(this.config.working_directory);
|
|
1300
|
+
// Generate a fresh per-instance agent token each spawn. agent-cli reads
|
|
1301
|
+
// this file from <instanceDir>/agent.token (mode 0o600) and sends its
|
|
1302
|
+
// value in the X-Agend-Instance-Token header; the daemon-side /agent
|
|
1303
|
+
// endpoint verifies it matches the on-disk value for the claimed
|
|
1304
|
+
// instance. This prevents other local processes (even those holding
|
|
1305
|
+
// the global web token) from impersonating instances.
|
|
1306
|
+
const agentTokenPath = join(this.instanceDir, "agent.token");
|
|
1307
|
+
const agentToken = randomBytes(32).toString("hex");
|
|
1308
|
+
writeFileSync(agentTokenPath, agentToken, { mode: 0o600 });
|
|
1309
|
+
try {
|
|
1310
|
+
chmodSync(agentTokenPath, 0o600);
|
|
1311
|
+
}
|
|
1312
|
+
catch { }
|
|
1313
|
+
// AGEND_HOME points the child's agent-cli at the same data dir the daemon
|
|
1314
|
+
// is using, so it can locate <instanceDir>/agent.token.
|
|
1315
|
+
const agendHome = join(this.instanceDir, "..", "..");
|
|
1316
|
+
let envPrefix = `TERM=xterm-256color AGEND_INSTANCE_NAME=${shellQuote(this.name)} AGEND_HOME=${shellQuote(agendHome)}`;
|
|
1317
|
+
if (backendConfig.agentMode === "cli" && backendConfig.agentPort) {
|
|
1318
|
+
envPrefix += ` AGEND_PORT=${backendConfig.agentPort}`;
|
|
1319
|
+
}
|
|
1320
|
+
const cmd = `${envPrefix} ` + this.backend.buildCommand(backendConfig);
|
|
1321
|
+
// Ensure tmux session exists (may have been destroyed if all windows died)
|
|
1322
|
+
await TmuxManager.ensureSession(this.tmuxSessionName);
|
|
1323
|
+
const windowId = await this.tmux.createWindow(cmd, this.config.working_directory, this.name);
|
|
1324
|
+
writeFileSync(join(this.instanceDir, "window-id"), windowId);
|
|
1325
|
+
// Enable remain-on-exit to capture exit codes on crash
|
|
1326
|
+
await this.tmux.setRemainOnExit().catch(err => {
|
|
1327
|
+
this.logger.warn({ err }, "Failed to set remain-on-exit — exit codes will not be captured");
|
|
1328
|
+
});
|
|
1329
|
+
// Register with control client and wait for output + idle
|
|
1330
|
+
await this.controlClient?.registerWindow(windowId);
|
|
1331
|
+
if (this.controlClient) {
|
|
1332
|
+
const total = this.config.startup_timeout_ms ?? 25_000;
|
|
1333
|
+
const outputTimeout = Math.round(total * 0.6);
|
|
1334
|
+
const idleTimeout = total - outputTimeout;
|
|
1335
|
+
const hasOutput = await this.controlClient.waitForOutput(windowId, outputTimeout);
|
|
1336
|
+
if (!hasOutput) {
|
|
1337
|
+
// Fallback: some TUI backends (e.g. opencode) don't trigger tmux %output events.
|
|
1338
|
+
// Check pane content directly for ready pattern before giving up.
|
|
1339
|
+
const pane = await this.tmux.capturePane();
|
|
1340
|
+
if (!this.backend.getReadyPattern().test(pane))
|
|
1341
|
+
return false;
|
|
1342
|
+
}
|
|
1343
|
+
else {
|
|
1344
|
+
await this.controlClient.waitForIdle(windowId, idleTimeout);
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
else {
|
|
1348
|
+
await new Promise(r => setTimeout(r, 10_000));
|
|
1349
|
+
}
|
|
1350
|
+
// Dismiss confirmation dialogs and verify CLI reached prompt.
|
|
1351
|
+
// With remain-on-exit, isWindowAlive() returns true even for dead panes,
|
|
1352
|
+
// but a startup crash would already be caught by waitForOutput/waitForIdle above.
|
|
1353
|
+
if (!await this.tmux.isWindowAlive())
|
|
1354
|
+
return false;
|
|
1355
|
+
return this.dismissDialogsUntilReady(3);
|
|
1356
|
+
}
|
|
1357
|
+
/**
|
|
1358
|
+
* Repeatedly check pane content, dismiss any confirmation dialogs,
|
|
1359
|
+
* and return true once CLI reaches a ready prompt.
|
|
1360
|
+
*/
|
|
1361
|
+
async dismissDialogsUntilReady(maxAttempts) {
|
|
1362
|
+
// Backend-specific startup dialogs, with hardcoded fallback for backward compat
|
|
1363
|
+
const startupDialogs = this.backend?.getStartupDialogs?.() ?? [
|
|
1364
|
+
{ pattern: /[❯›]\s*\d+\.\s*No/m, keys: ["Down", "Enter"], description: "Confirmation dialog — navigate past No" },
|
|
1365
|
+
{ pattern: /[❯›]\s*Don't trust/m, keys: ["Up", "Up", "Enter"], description: "Trust dialog — navigate to trust option" },
|
|
1366
|
+
{ pattern: /No, exit|No, quit|Don't trust|I accept|I trust|Yes, continue|Trust folder/i, keys: ["Enter"], description: "Generic confirmation dialog" },
|
|
1367
|
+
{ pattern: /Resume Session/i, keys: ["Escape"], description: "Resume session picker — start fresh" },
|
|
1368
|
+
];
|
|
1369
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
1370
|
+
try {
|
|
1371
|
+
const pane = await this.tmux.capturePane();
|
|
1372
|
+
// Try each startup dialog pattern before checking ready state
|
|
1373
|
+
let matched = false;
|
|
1374
|
+
for (const dialog of startupDialogs) {
|
|
1375
|
+
if (dialog.pattern.test(pane)) {
|
|
1376
|
+
this.logger.debug(`Dismissing startup dialog: ${dialog.description}`);
|
|
1377
|
+
for (const key of dialog.keys) {
|
|
1378
|
+
if (key === "Up" || key === "Down" || key === "Enter" || key === "Escape") {
|
|
1379
|
+
await this.tmux.sendSpecialKey(key);
|
|
1380
|
+
}
|
|
1381
|
+
else {
|
|
1382
|
+
await this.tmux.sendKeys(key);
|
|
1383
|
+
}
|
|
1384
|
+
await new Promise(r => setTimeout(r, 200));
|
|
1385
|
+
}
|
|
1386
|
+
// Wait for next screen to render
|
|
1387
|
+
if (this.controlClient) {
|
|
1388
|
+
const wid = readFileSync(join(this.instanceDir, "window-id"), "utf-8").trim();
|
|
1389
|
+
await this.controlClient.waitForIdle(wid, 10_000);
|
|
1390
|
+
}
|
|
1391
|
+
else {
|
|
1392
|
+
await new Promise(r => setTimeout(r, 3_000));
|
|
1393
|
+
}
|
|
1394
|
+
if (!await this.tmux.isWindowAlive())
|
|
1395
|
+
return false;
|
|
1396
|
+
matched = true;
|
|
1397
|
+
break;
|
|
1398
|
+
}
|
|
1399
|
+
}
|
|
1400
|
+
if (matched)
|
|
1401
|
+
continue;
|
|
1402
|
+
// CLI is ready (pattern defined by each backend)
|
|
1403
|
+
if (this.backend.getReadyPattern().test(pane))
|
|
1404
|
+
return true;
|
|
1405
|
+
// Fatal: command not found (must match full phrase to avoid false positives
|
|
1406
|
+
// like Kiro's "agent X not found, using default")
|
|
1407
|
+
if (/command not found|: not found$/m.test(pane))
|
|
1408
|
+
return false;
|
|
1409
|
+
}
|
|
1410
|
+
catch {
|
|
1411
|
+
return false;
|
|
1412
|
+
}
|
|
1413
|
+
}
|
|
1414
|
+
// Exhausted attempts — assume ok for unknown CLI prompts
|
|
1415
|
+
return true;
|
|
1416
|
+
}
|
|
1417
|
+
saveSessionId() {
|
|
1418
|
+
const sid = this.backend?.getSessionId();
|
|
1419
|
+
if (sid) {
|
|
1420
|
+
writeFileSync(join(this.instanceDir, "session-id"), sid);
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
readContextPercentage() {
|
|
1424
|
+
return this.backend?.getContextUsage() ?? 0;
|
|
1425
|
+
}
|
|
1426
|
+
/** Set a model override for next spawn (used by failover logic) */
|
|
1427
|
+
setModelOverride(model) {
|
|
1428
|
+
this.modelOverride = model;
|
|
1429
|
+
}
|
|
1430
|
+
/** Get the currently active model override */
|
|
1431
|
+
getModelOverride() {
|
|
1432
|
+
return this.modelOverride;
|
|
1433
|
+
}
|
|
1434
|
+
/** Public wrapper for graceful restart — wait for instance to be idle. */
|
|
1435
|
+
waitForIdle(quietMs = 5000) {
|
|
1436
|
+
return new Promise((resolve) => {
|
|
1437
|
+
const monitor = this.transcriptMonitor;
|
|
1438
|
+
// No transcript monitor (e.g. lightweight mode) — no events to wait for.
|
|
1439
|
+
if (!monitor) {
|
|
1440
|
+
setTimeout(resolve, quietMs);
|
|
1441
|
+
return;
|
|
1442
|
+
}
|
|
1443
|
+
const events = ["tool_use", "tool_result", "assistant_text"];
|
|
1444
|
+
let timer;
|
|
1445
|
+
let settled = false;
|
|
1446
|
+
const done = () => {
|
|
1447
|
+
if (settled)
|
|
1448
|
+
return;
|
|
1449
|
+
settled = true;
|
|
1450
|
+
// Always remove from the same monitor we registered on — avoids
|
|
1451
|
+
// imbalance if this.transcriptMonitor is later reassigned.
|
|
1452
|
+
events.forEach(e => monitor.removeListener(e, reset));
|
|
1453
|
+
resolve();
|
|
1454
|
+
};
|
|
1455
|
+
const reset = () => {
|
|
1456
|
+
clearTimeout(timer);
|
|
1457
|
+
timer = setTimeout(done, quietMs);
|
|
1458
|
+
};
|
|
1459
|
+
timer = setTimeout(done, quietMs);
|
|
1460
|
+
events.forEach(e => monitor.on(e, reset));
|
|
1461
|
+
});
|
|
1462
|
+
}
|
|
1463
|
+
// ── Context Rotation v3: Ring buffers ─────────────────────────
|
|
1464
|
+
recordRecentUserMessage(content, meta) {
|
|
1465
|
+
// Only record real user messages, not cross-instance messages
|
|
1466
|
+
if (!meta.user || meta.user.startsWith("instance:"))
|
|
1467
|
+
return;
|
|
1468
|
+
this.recentUserMessages.push({
|
|
1469
|
+
text: content.slice(0, 200),
|
|
1470
|
+
ts: meta.ts ?? new Date().toISOString(),
|
|
1471
|
+
});
|
|
1472
|
+
if (this.recentUserMessages.length > 10)
|
|
1473
|
+
this.recentUserMessages.shift();
|
|
1474
|
+
}
|
|
1475
|
+
recordRecentEvent(event) {
|
|
1476
|
+
this.recentEvents.push(event);
|
|
1477
|
+
if (this.recentEvents.length > 15)
|
|
1478
|
+
this.recentEvents.shift();
|
|
1479
|
+
}
|
|
1480
|
+
recordRecentToolActivity(summary) {
|
|
1481
|
+
if (!summary)
|
|
1482
|
+
return;
|
|
1483
|
+
this.recentToolActivity.push(summary);
|
|
1484
|
+
if (this.recentToolActivity.length > 10)
|
|
1485
|
+
this.recentToolActivity.shift();
|
|
1486
|
+
}
|
|
1487
|
+
// ── Context Rotation v3: Snapshot writer ──────────────────────
|
|
1488
|
+
writeRotationSnapshot(reason) {
|
|
1489
|
+
const statusline = this.readStatuslineData();
|
|
1490
|
+
const snapshot = {
|
|
1491
|
+
instance: this.name,
|
|
1492
|
+
reason,
|
|
1493
|
+
created_at: new Date().toISOString(),
|
|
1494
|
+
working_directory: this.config.working_directory,
|
|
1495
|
+
session_id: this.backend?.getSessionId() ?? null,
|
|
1496
|
+
context_pct: this.readContextPercentage(),
|
|
1497
|
+
recent_user_messages: [...this.recentUserMessages],
|
|
1498
|
+
recent_events: [...this.recentEvents],
|
|
1499
|
+
recent_tool_activity: [...this.recentToolActivity],
|
|
1500
|
+
last_statusline: statusline ? {
|
|
1501
|
+
model: statusline.model?.display_name,
|
|
1502
|
+
cost_usd: statusline.cost?.total_cost_usd,
|
|
1503
|
+
five_hour_pct: statusline.rate_limits?.five_hour?.used_percentage,
|
|
1504
|
+
seven_day_pct: statusline.rate_limits?.seven_day?.used_percentage,
|
|
1505
|
+
} : undefined,
|
|
1506
|
+
};
|
|
1507
|
+
const snapshotPath = join(this.instanceDir, "rotation-state.json");
|
|
1508
|
+
writeFileSync(snapshotPath, JSON.stringify(snapshot, null, 2));
|
|
1509
|
+
this.snapshotConsumed = false;
|
|
1510
|
+
this.logger.info({
|
|
1511
|
+
reason,
|
|
1512
|
+
context_pct: snapshot.context_pct,
|
|
1513
|
+
user_msg_count: snapshot.recent_user_messages?.length ?? 0,
|
|
1514
|
+
event_count: snapshot.recent_events?.length ?? 0,
|
|
1515
|
+
}, "Snapshot written");
|
|
1516
|
+
return snapshot;
|
|
1517
|
+
}
|
|
1518
|
+
/** Collect ring buffer data for handover to a replacement instance. */
|
|
1519
|
+
collectHandoverContext() {
|
|
1520
|
+
const lines = [];
|
|
1521
|
+
if (this.recentUserMessages.length > 0) {
|
|
1522
|
+
lines.push("Recent user messages:");
|
|
1523
|
+
for (const msg of this.recentUserMessages)
|
|
1524
|
+
lines.push(`- ${msg.text}`);
|
|
1525
|
+
lines.push("");
|
|
1526
|
+
}
|
|
1527
|
+
if (this.recentEvents.length > 0) {
|
|
1528
|
+
lines.push("Recent activity:");
|
|
1529
|
+
for (const ev of this.recentEvents) {
|
|
1530
|
+
if (ev.type === "assistant_text")
|
|
1531
|
+
lines.push(`- Assistant: ${ev.preview}`);
|
|
1532
|
+
else
|
|
1533
|
+
lines.push(`- ${ev.name}${ev.preview ? `: ${ev.preview}` : ""}`);
|
|
1534
|
+
}
|
|
1535
|
+
lines.push("");
|
|
1536
|
+
}
|
|
1537
|
+
if (this.recentToolActivity.length > 0) {
|
|
1538
|
+
lines.push("Recent tool activity:");
|
|
1539
|
+
for (const t of this.recentToolActivity)
|
|
1540
|
+
lines.push(`- ${t}`);
|
|
1541
|
+
lines.push("");
|
|
1542
|
+
}
|
|
1543
|
+
const pct = this.readContextPercentage();
|
|
1544
|
+
if (pct != null)
|
|
1545
|
+
lines.push(`Context usage: ${pct}%`);
|
|
1546
|
+
return lines.join("\n").slice(0, 4000);
|
|
1547
|
+
}
|
|
1548
|
+
appendCrashHistory(data) {
|
|
1549
|
+
try {
|
|
1550
|
+
const historyPath = join(this.instanceDir, "crash-history.jsonl");
|
|
1551
|
+
const entry = {
|
|
1552
|
+
timestamp: new Date().toISOString(),
|
|
1553
|
+
instance: this.name,
|
|
1554
|
+
crashType: data.crashType,
|
|
1555
|
+
exitCode: data.exitCode,
|
|
1556
|
+
lastOutput: data.lastOutput,
|
|
1557
|
+
crashCount: this.crashCount + 1,
|
|
1558
|
+
crashesInWindow: this.crashTimestamps.length,
|
|
1559
|
+
};
|
|
1560
|
+
appendFileSync(historyPath, JSON.stringify(entry) + "\n");
|
|
1561
|
+
// Rotate based on file size (cheaper than parsing every time)
|
|
1562
|
+
try {
|
|
1563
|
+
const stat = statSync(historyPath);
|
|
1564
|
+
if (stat.size > 512_000) {
|
|
1565
|
+
const content = readFileSync(historyPath, "utf-8");
|
|
1566
|
+
const lines = content.trim().split("\n").filter(Boolean);
|
|
1567
|
+
writeFileSync(historyPath, lines.slice(-50).join("\n") + "\n");
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
catch { /* best effort */ }
|
|
1571
|
+
}
|
|
1572
|
+
catch { /* best effort */ }
|
|
1573
|
+
}
|
|
1574
|
+
readStatuslineData() {
|
|
1575
|
+
try {
|
|
1576
|
+
const sf = join(this.instanceDir, "statusline.json");
|
|
1577
|
+
return JSON.parse(readFileSync(sf, "utf-8"));
|
|
1578
|
+
}
|
|
1579
|
+
catch {
|
|
1580
|
+
return null;
|
|
1581
|
+
}
|
|
1582
|
+
}
|
|
1583
|
+
// ── Repo Checkout ─────────────────────────────────────────
|
|
1584
|
+
async handleCheckoutRepo(args, respond) {
|
|
1585
|
+
const { execFile: execFileCb } = await import("node:child_process");
|
|
1586
|
+
const { promisify } = await import("node:util");
|
|
1587
|
+
const execFileAsync = promisify(execFileCb);
|
|
1588
|
+
const rawSource = args.source;
|
|
1589
|
+
if (!rawSource) {
|
|
1590
|
+
respond(null, "checkout_repo: missing required argument 'source'");
|
|
1591
|
+
return;
|
|
1592
|
+
}
|
|
1593
|
+
const expanded = rawSource.replace(/^~/, process.env.HOME || "~");
|
|
1594
|
+
// Resolve instance name to working_directory via IPC query
|
|
1595
|
+
// If source doesn't look like a path, treat it as an instance name
|
|
1596
|
+
if (!expanded.startsWith("/")) {
|
|
1597
|
+
// Broadcast to get instance info — but we don't have fleet config in daemon.
|
|
1598
|
+
// Instead, rely on fleet manager to resolve. For now, reject non-path sources.
|
|
1599
|
+
respond(null, `Source must be an absolute path or ~-prefixed path. Use describe_instance to find a repo's working_directory.`);
|
|
1600
|
+
return;
|
|
1601
|
+
}
|
|
1602
|
+
// Normalize to collapse any `..` segments.
|
|
1603
|
+
const source = resolve(expanded);
|
|
1604
|
+
const branch = args.branch || "HEAD";
|
|
1605
|
+
// Validate branch ref: git refs allow [A-Za-z0-9._/-], reject `..` to prevent
|
|
1606
|
+
// worktreePath escape via basename(source)-${branch.replace("/", "-")}.
|
|
1607
|
+
// Reject leading `-` or `+` so git cannot interpret the value as an option
|
|
1608
|
+
// flag (e.g. `--upload-pack=...`), which execFile cannot prevent on its own.
|
|
1609
|
+
if (!/^[A-Za-z0-9._/-]+$/.test(branch) || branch.includes("..") || /^[-+]/.test(branch)) {
|
|
1610
|
+
respond(null, `Invalid branch name: ${branch}`);
|
|
1611
|
+
return;
|
|
1612
|
+
}
|
|
1613
|
+
// Verify it's a git repo
|
|
1614
|
+
try {
|
|
1615
|
+
await execFileAsync("git", ["rev-parse", "--git-dir"], { cwd: source });
|
|
1616
|
+
}
|
|
1617
|
+
catch {
|
|
1618
|
+
respond(null, `Not a git repository: ${source}`);
|
|
1619
|
+
return;
|
|
1620
|
+
}
|
|
1621
|
+
const repoDir = join(this.instanceDir, "repos");
|
|
1622
|
+
mkdirSync(repoDir, { recursive: true });
|
|
1623
|
+
const safeName = `${basename(source)}-${branch.replace(/\//g, "-")}`;
|
|
1624
|
+
const worktreePath = join(repoDir, safeName);
|
|
1625
|
+
try {
|
|
1626
|
+
// Resolve branch/ref to verify it exists. Use `--` so git never treats
|
|
1627
|
+
// branch as an option flag (defense in depth on top of the regex above).
|
|
1628
|
+
await execFileAsync("git", ["rev-parse", "--verify", "--", branch], { cwd: source });
|
|
1629
|
+
await execFileAsync("git", ["worktree", "add", "--detach", worktreePath, branch], { cwd: source });
|
|
1630
|
+
const { stdout: commitHash } = await execFileAsync("git", ["rev-parse", "--short", "HEAD"], { cwd: worktreePath });
|
|
1631
|
+
respond({ path: worktreePath, branch, source, commit: commitHash.trim() });
|
|
1632
|
+
}
|
|
1633
|
+
catch (err) {
|
|
1634
|
+
respond(null, `Failed to checkout: ${err.message}`);
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
async handleReleaseRepo(args, respond) {
|
|
1638
|
+
const repoPath = args.path;
|
|
1639
|
+
const reposDir = join(this.instanceDir, "repos");
|
|
1640
|
+
// Safety: only allow releasing paths under our repos/ directory
|
|
1641
|
+
if (!repoPath.startsWith(reposDir)) {
|
|
1642
|
+
respond(null, `Cannot release path outside instance repos directory`);
|
|
1643
|
+
return;
|
|
1644
|
+
}
|
|
1645
|
+
try {
|
|
1646
|
+
const { execFile: execFileCb } = await import("node:child_process");
|
|
1647
|
+
const { promisify } = await import("node:util");
|
|
1648
|
+
const execFileAsync = promisify(execFileCb);
|
|
1649
|
+
await execFileAsync("git", ["worktree", "remove", "--force", repoPath]);
|
|
1650
|
+
}
|
|
1651
|
+
catch {
|
|
1652
|
+
// Fallback: rm directly if git worktree remove fails
|
|
1653
|
+
try {
|
|
1654
|
+
rmSync(repoPath, { recursive: true, force: true });
|
|
1655
|
+
}
|
|
1656
|
+
catch { /* best effort */ }
|
|
1657
|
+
}
|
|
1658
|
+
respond({ released: true, path: repoPath });
|
|
1659
|
+
}
|
|
1660
|
+
buildSnapshotPrompt() {
|
|
1661
|
+
const snapshotPath = join(this.instanceDir, "rotation-state.json");
|
|
1662
|
+
try {
|
|
1663
|
+
if (!existsSync(snapshotPath))
|
|
1664
|
+
return null;
|
|
1665
|
+
const snapshot = JSON.parse(readFileSync(snapshotPath, "utf-8"));
|
|
1666
|
+
// Mark consumed in-memory to prevent re-injection on crash respawn.
|
|
1667
|
+
// Delete file so subsequent daemon restarts don't re-inject stale snapshot.
|
|
1668
|
+
this.snapshotConsumed = true;
|
|
1669
|
+
try {
|
|
1670
|
+
unlinkSync(snapshotPath);
|
|
1671
|
+
}
|
|
1672
|
+
catch { /* best effort */ }
|
|
1673
|
+
const lines = ["## Previous Session Snapshot", ""];
|
|
1674
|
+
lines.push(`Restart reason: ${snapshot.reason}`);
|
|
1675
|
+
if (snapshot.context_pct != null)
|
|
1676
|
+
lines.push(`Previous context usage: ${snapshot.context_pct}%`);
|
|
1677
|
+
if (snapshot.session_id)
|
|
1678
|
+
lines.push(`Previous session id: ${snapshot.session_id}`);
|
|
1679
|
+
lines.push(`Working directory: ${snapshot.working_directory}`);
|
|
1680
|
+
lines.push("");
|
|
1681
|
+
if (snapshot.recent_user_messages && snapshot.recent_user_messages.length > 0) {
|
|
1682
|
+
lines.push("Recent user messages:");
|
|
1683
|
+
for (const msg of snapshot.recent_user_messages) {
|
|
1684
|
+
lines.push(`- ${msg.text}`);
|
|
1685
|
+
}
|
|
1686
|
+
lines.push("");
|
|
1687
|
+
}
|
|
1688
|
+
if (snapshot.recent_events && snapshot.recent_events.length > 0) {
|
|
1689
|
+
lines.push("Recent activity:");
|
|
1690
|
+
for (const ev of snapshot.recent_events) {
|
|
1691
|
+
if (ev.type === "assistant_text") {
|
|
1692
|
+
lines.push(`- Assistant: ${ev.preview}`);
|
|
1693
|
+
}
|
|
1694
|
+
else {
|
|
1695
|
+
lines.push(`- ${ev.name}${ev.preview ? `: ${ev.preview}` : ""}`);
|
|
1696
|
+
}
|
|
1697
|
+
}
|
|
1698
|
+
lines.push("");
|
|
1699
|
+
}
|
|
1700
|
+
lines.push("Instruction:");
|
|
1701
|
+
lines.push("Resume work from this snapshot when relevant. Do not assume anything not stated here.");
|
|
1702
|
+
// Enforce 2000-char budget
|
|
1703
|
+
let result = lines.join("\n");
|
|
1704
|
+
if (result.length > 2000) {
|
|
1705
|
+
result = result.slice(0, 1997) + "...";
|
|
1706
|
+
}
|
|
1707
|
+
return result;
|
|
1708
|
+
}
|
|
1709
|
+
catch {
|
|
1710
|
+
return null;
|
|
1711
|
+
}
|
|
1712
|
+
}
|
|
1713
|
+
}
|
|
1714
|
+
//# sourceMappingURL=daemon.js.map
|