@suzuke/agend 0.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +78 -0
- package/README.zh-TW.md +79 -0
- package/dist/access-path.d.ts +7 -0
- package/dist/access-path.js +12 -0
- package/dist/access-path.js.map +1 -0
- package/dist/backend/claude-code.d.ts +13 -0
- package/dist/backend/claude-code.js +114 -0
- package/dist/backend/claude-code.js.map +1 -0
- package/dist/backend/codex.d.ts +10 -0
- package/dist/backend/codex.js +58 -0
- package/dist/backend/codex.js.map +1 -0
- package/dist/backend/factory.d.ts +2 -0
- package/dist/backend/factory.js +19 -0
- package/dist/backend/factory.js.map +1 -0
- package/dist/backend/gemini-cli.d.ts +10 -0
- package/dist/backend/gemini-cli.js +68 -0
- package/dist/backend/gemini-cli.js.map +1 -0
- package/dist/backend/index.d.ts +6 -0
- package/dist/backend/index.js +6 -0
- package/dist/backend/index.js.map +1 -0
- package/dist/backend/opencode.d.ts +10 -0
- package/dist/backend/opencode.js +63 -0
- package/dist/backend/opencode.js.map +1 -0
- package/dist/backend/types.d.ts +26 -0
- package/dist/backend/types.js +2 -0
- package/dist/backend/types.js.map +1 -0
- package/dist/channel/access-manager.d.ts +18 -0
- package/dist/channel/access-manager.js +149 -0
- package/dist/channel/access-manager.js.map +1 -0
- package/dist/channel/adapters/discord.d.ts +45 -0
- package/dist/channel/adapters/discord.js +366 -0
- package/dist/channel/adapters/discord.js.map +1 -0
- package/dist/channel/adapters/telegram.d.ts +58 -0
- package/dist/channel/adapters/telegram.js +569 -0
- package/dist/channel/adapters/telegram.js.map +1 -0
- package/dist/channel/attachment-handler.d.ts +15 -0
- package/dist/channel/attachment-handler.js +55 -0
- package/dist/channel/attachment-handler.js.map +1 -0
- package/dist/channel/factory.d.ts +12 -0
- package/dist/channel/factory.js +38 -0
- package/dist/channel/factory.js.map +1 -0
- package/dist/channel/ipc-bridge.d.ts +26 -0
- package/dist/channel/ipc-bridge.js +170 -0
- package/dist/channel/ipc-bridge.js.map +1 -0
- package/dist/channel/mcp-server.d.ts +10 -0
- package/dist/channel/mcp-server.js +196 -0
- package/dist/channel/mcp-server.js.map +1 -0
- package/dist/channel/mcp-tools.d.ts +909 -0
- package/dist/channel/mcp-tools.js +346 -0
- package/dist/channel/mcp-tools.js.map +1 -0
- package/dist/channel/message-bus.d.ts +17 -0
- package/dist/channel/message-bus.js +86 -0
- package/dist/channel/message-bus.js.map +1 -0
- package/dist/channel/message-queue.d.ts +39 -0
- package/dist/channel/message-queue.js +248 -0
- package/dist/channel/message-queue.js.map +1 -0
- package/dist/channel/tool-router.d.ts +6 -0
- package/dist/channel/tool-router.js +69 -0
- package/dist/channel/tool-router.js.map +1 -0
- package/dist/channel/tool-tracker.d.ts +13 -0
- package/dist/channel/tool-tracker.js +58 -0
- package/dist/channel/tool-tracker.js.map +1 -0
- package/dist/channel/types.d.ts +116 -0
- package/dist/channel/types.js +2 -0
- package/dist/channel/types.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +782 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +8 -0
- package/dist/config.js +85 -0
- package/dist/config.js.map +1 -0
- package/dist/context-guardian.d.ts +29 -0
- package/dist/context-guardian.js +123 -0
- package/dist/context-guardian.js.map +1 -0
- package/dist/cost-guard.d.ts +21 -0
- package/dist/cost-guard.js +113 -0
- package/dist/cost-guard.js.map +1 -0
- package/dist/daemon-entry.d.ts +1 -0
- package/dist/daemon-entry.js +29 -0
- package/dist/daemon-entry.js.map +1 -0
- package/dist/daemon.d.ts +88 -0
- package/dist/daemon.js +821 -0
- package/dist/daemon.js.map +1 -0
- package/dist/daily-summary.d.ts +13 -0
- package/dist/daily-summary.js +55 -0
- package/dist/daily-summary.js.map +1 -0
- package/dist/event-log.d.ts +22 -0
- package/dist/event-log.js +66 -0
- package/dist/event-log.js.map +1 -0
- package/dist/export-import.d.ts +2 -0
- package/dist/export-import.js +110 -0
- package/dist/export-import.js.map +1 -0
- package/dist/fleet-context.d.ts +36 -0
- package/dist/fleet-context.js +4 -0
- package/dist/fleet-context.js.map +1 -0
- package/dist/fleet-manager.d.ts +115 -0
- package/dist/fleet-manager.js +1739 -0
- package/dist/fleet-manager.js.map +1 -0
- package/dist/fleet-system-prompt.d.ts +11 -0
- package/dist/fleet-system-prompt.js +60 -0
- package/dist/fleet-system-prompt.js.map +1 -0
- package/dist/hang-detector.d.ts +16 -0
- package/dist/hang-detector.js +53 -0
- package/dist/hang-detector.js.map +1 -0
- package/dist/index.d.ts +8 -0
- package/dist/index.js +6 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +3 -0
- package/dist/logger.js +63 -0
- package/dist/logger.js.map +1 -0
- package/dist/plugin/agend/.claude-plugin/plugin.json +5 -0
- package/dist/scheduler/db.d.ts +16 -0
- package/dist/scheduler/db.js +132 -0
- package/dist/scheduler/db.js.map +1 -0
- package/dist/scheduler/db.test.d.ts +1 -0
- package/dist/scheduler/db.test.js +92 -0
- package/dist/scheduler/db.test.js.map +1 -0
- package/dist/scheduler/index.d.ts +4 -0
- package/dist/scheduler/index.js +4 -0
- package/dist/scheduler/index.js.map +1 -0
- package/dist/scheduler/scheduler.d.ts +25 -0
- package/dist/scheduler/scheduler.js +119 -0
- package/dist/scheduler/scheduler.js.map +1 -0
- package/dist/scheduler/scheduler.test.d.ts +1 -0
- package/dist/scheduler/scheduler.test.js +119 -0
- package/dist/scheduler/scheduler.test.js.map +1 -0
- package/dist/scheduler/types.d.ts +47 -0
- package/dist/scheduler/types.js +7 -0
- package/dist/scheduler/types.js.map +1 -0
- package/dist/service-installer.d.ts +14 -0
- package/dist/service-installer.js +91 -0
- package/dist/service-installer.js.map +1 -0
- package/dist/setup-wizard.d.ts +14 -0
- package/dist/setup-wizard.js +517 -0
- package/dist/setup-wizard.js.map +1 -0
- package/dist/stt.d.ts +10 -0
- package/dist/stt.js +33 -0
- package/dist/stt.js.map +1 -0
- package/dist/tmux-manager.d.ts +22 -0
- package/dist/tmux-manager.js +132 -0
- package/dist/tmux-manager.js.map +1 -0
- package/dist/topic-commands.d.ts +22 -0
- package/dist/topic-commands.js +176 -0
- package/dist/topic-commands.js.map +1 -0
- package/dist/transcript-monitor.d.ts +21 -0
- package/dist/transcript-monitor.js +149 -0
- package/dist/transcript-monitor.js.map +1 -0
- package/dist/types.d.ts +153 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/webhook-emitter.d.ts +15 -0
- package/dist/webhook-emitter.js +41 -0
- package/dist/webhook-emitter.js.map +1 -0
- package/package.json +58 -4
- package/templates/launchd.plist.ejs +29 -0
- package/templates/systemd.service.ejs +15 -0
- package/index.js +0 -1
|
@@ -0,0 +1,1739 @@
|
|
|
1
|
+
import { existsSync, readFileSync, mkdirSync, writeFileSync, unlinkSync } from "node:fs";
|
|
2
|
+
import { access } from "node:fs/promises";
|
|
3
|
+
import { createServer } from "node:http";
|
|
4
|
+
import { join, dirname, basename } from "node:path";
|
|
5
|
+
import { homedir } from "node:os";
|
|
6
|
+
import { fileURLToPath } from "node:url";
|
|
7
|
+
import yaml from "js-yaml";
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
const __dirname = dirname(__filename);
|
|
10
|
+
import { isProbeableRouteTarget } from "./fleet-context.js";
|
|
11
|
+
import { loadFleetConfig, DEFAULT_COST_GUARD, DEFAULT_DAILY_SUMMARY, DEFAULT_INSTANCE_CONFIG } from "./config.js";
|
|
12
|
+
import { EventLog } from "./event-log.js";
|
|
13
|
+
import { CostGuard, formatCents } from "./cost-guard.js";
|
|
14
|
+
import { TmuxManager } from "./tmux-manager.js";
|
|
15
|
+
import { AccessManager } from "./channel/access-manager.js";
|
|
16
|
+
import { IpcClient } from "./channel/ipc-bridge.js";
|
|
17
|
+
import { createAdapter } from "./channel/factory.js";
|
|
18
|
+
import { createLogger } from "./logger.js";
|
|
19
|
+
import { processAttachments } from "./channel/attachment-handler.js";
|
|
20
|
+
import { routeToolCall } from "./channel/tool-router.js";
|
|
21
|
+
import { Scheduler } from "./scheduler/index.js";
|
|
22
|
+
import { DEFAULT_SCHEDULER_CONFIG } from "./scheduler/index.js";
|
|
23
|
+
import { TopicCommands, sanitizeInstanceName } from "./topic-commands.js";
|
|
24
|
+
import { DailySummary } from "./daily-summary.js";
|
|
25
|
+
import { WebhookEmitter } from "./webhook-emitter.js";
|
|
26
|
+
const TMUX_SESSION = "agend";
|
|
27
|
+
export function resolveReplyThreadId(argsThreadId, instanceConfig) {
|
|
28
|
+
if (typeof argsThreadId === "string" && argsThreadId.length > 0) {
|
|
29
|
+
return argsThreadId;
|
|
30
|
+
}
|
|
31
|
+
if (instanceConfig?.general_topic) {
|
|
32
|
+
return undefined;
|
|
33
|
+
}
|
|
34
|
+
return instanceConfig?.topic_id != null ? String(instanceConfig.topic_id) : undefined;
|
|
35
|
+
}
|
|
36
|
+
export class FleetManager {
|
|
37
|
+
dataDir;
|
|
38
|
+
children = new Map();
|
|
39
|
+
daemons = new Map();
|
|
40
|
+
fleetConfig = null;
|
|
41
|
+
adapter = null;
|
|
42
|
+
routingTable = new Map();
|
|
43
|
+
instanceIpcClients = new Map();
|
|
44
|
+
scheduler = null;
|
|
45
|
+
configPath = "";
|
|
46
|
+
logger = createLogger("info");
|
|
47
|
+
topicCommands;
|
|
48
|
+
// sessionName → instanceName mapping for external sessions
|
|
49
|
+
sessionRegistry = new Map();
|
|
50
|
+
eventLog = null;
|
|
51
|
+
costGuard = null;
|
|
52
|
+
statuslineWatchers = new Map();
|
|
53
|
+
instanceRateLimits = new Map();
|
|
54
|
+
dailySummary = null;
|
|
55
|
+
webhookEmitter = null;
|
|
56
|
+
// Topic icon + auto-archive state
|
|
57
|
+
topicIcons = {};
|
|
58
|
+
lastActivity = new Map();
|
|
59
|
+
archivedTopics = new Set();
|
|
60
|
+
archiveTimer = null;
|
|
61
|
+
static ARCHIVE_IDLE_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
62
|
+
// Model failover state
|
|
63
|
+
failoverActive = new Map(); // instance → current failover model
|
|
64
|
+
// Health endpoint
|
|
65
|
+
healthServer = null;
|
|
66
|
+
startedAt = 0;
|
|
67
|
+
constructor(dataDir) {
|
|
68
|
+
this.dataDir = dataDir;
|
|
69
|
+
this.topicCommands = new TopicCommands(this);
|
|
70
|
+
}
|
|
71
|
+
/** Load fleet.yaml and build routing table */
|
|
72
|
+
loadConfig(configPath) {
|
|
73
|
+
this.fleetConfig = loadFleetConfig(configPath);
|
|
74
|
+
return this.fleetConfig;
|
|
75
|
+
}
|
|
76
|
+
/** Build topic routing table: { topicId -> RouteTarget } */
|
|
77
|
+
buildRoutingTable() {
|
|
78
|
+
const table = new Map();
|
|
79
|
+
if (!this.fleetConfig)
|
|
80
|
+
return table;
|
|
81
|
+
for (const [name, inst] of Object.entries(this.fleetConfig.instances)) {
|
|
82
|
+
if (inst.topic_id != null) {
|
|
83
|
+
table.set(inst.topic_id, {
|
|
84
|
+
kind: inst.general_topic ? "general" : "instance",
|
|
85
|
+
name,
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return table;
|
|
90
|
+
}
|
|
91
|
+
getInstanceDir(name) {
|
|
92
|
+
return join(this.dataDir, "instances", name);
|
|
93
|
+
}
|
|
94
|
+
getInstanceStatus(name) {
|
|
95
|
+
const pidPath = join(this.getInstanceDir(name), "daemon.pid");
|
|
96
|
+
if (!existsSync(pidPath))
|
|
97
|
+
return "stopped";
|
|
98
|
+
const pid = parseInt(readFileSync(pidPath, "utf-8").trim(), 10);
|
|
99
|
+
try {
|
|
100
|
+
process.kill(pid, 0);
|
|
101
|
+
return "running";
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
return "crashed";
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
async startInstance(name, config, topicMode) {
|
|
108
|
+
if (this.daemons.has(name)) {
|
|
109
|
+
this.logger.info({ name }, "Instance already running, skipping");
|
|
110
|
+
return;
|
|
111
|
+
}
|
|
112
|
+
if (!existsSync(config.working_directory)) {
|
|
113
|
+
this.logger.error({ name, working_directory: config.working_directory }, "Working directory does not exist — skipping instance");
|
|
114
|
+
return;
|
|
115
|
+
}
|
|
116
|
+
const instanceDir = this.getInstanceDir(name);
|
|
117
|
+
mkdirSync(instanceDir, { recursive: true });
|
|
118
|
+
const { Daemon } = await import("./daemon.js");
|
|
119
|
+
const { createBackend } = await import("./backend/factory.js");
|
|
120
|
+
const backendName = config.backend ?? this.fleetConfig?.defaults?.backend ?? "claude-code";
|
|
121
|
+
const backend = createBackend(backendName, instanceDir);
|
|
122
|
+
const daemon = new Daemon(name, config, instanceDir, topicMode, backend);
|
|
123
|
+
await daemon.start();
|
|
124
|
+
this.daemons.set(name, daemon);
|
|
125
|
+
daemon.on("restart_complete", (data) => {
|
|
126
|
+
this.eventLog?.insert(name, "context_rotation", data);
|
|
127
|
+
this.logger.info({ name, ...data }, "Context restart completed");
|
|
128
|
+
});
|
|
129
|
+
const hangDetector = daemon.getHangDetector();
|
|
130
|
+
if (hangDetector) {
|
|
131
|
+
hangDetector.on("hang", () => {
|
|
132
|
+
this.eventLog?.insert(name, "hang_detected", {});
|
|
133
|
+
this.logger.warn({ name }, "Instance appears hung");
|
|
134
|
+
this.sendHangNotification(name);
|
|
135
|
+
this.webhookEmitter?.emit("hang", name);
|
|
136
|
+
});
|
|
137
|
+
}
|
|
138
|
+
daemon.on("crash_loop", () => {
|
|
139
|
+
this.eventLog?.insert(name, "crash_loop", {});
|
|
140
|
+
this.logger.error({ name }, "Instance in crash loop — respawn paused");
|
|
141
|
+
this.notifyInstanceTopic(name, `🔴 ${name} keeps crashing shortly after launch — respawn paused. Check rate limits or run \`agend fleet restart\`.`);
|
|
142
|
+
this.setTopicIcon(name, "red");
|
|
143
|
+
});
|
|
144
|
+
this.setTopicIcon(name, "green");
|
|
145
|
+
this.touchActivity(name);
|
|
146
|
+
}
|
|
147
|
+
async stopInstance(name) {
|
|
148
|
+
this.setTopicIcon(name, "remove");
|
|
149
|
+
this.failoverActive.delete(name);
|
|
150
|
+
const daemon = this.daemons.get(name);
|
|
151
|
+
if (daemon) {
|
|
152
|
+
await daemon.stop();
|
|
153
|
+
this.daemons.delete(name);
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
const pidPath = join(this.getInstanceDir(name), "daemon.pid");
|
|
157
|
+
if (existsSync(pidPath)) {
|
|
158
|
+
const pid = parseInt(readFileSync(pidPath, "utf-8").trim(), 10);
|
|
159
|
+
try {
|
|
160
|
+
process.kill(pid, "SIGTERM");
|
|
161
|
+
}
|
|
162
|
+
catch (e) {
|
|
163
|
+
this.logger.debug({ err: e, pid }, "SIGTERM failed for stale process");
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
/** Load .env file from data dir into process.env */
|
|
169
|
+
loadEnvFile() {
|
|
170
|
+
const envPath = join(this.dataDir, ".env");
|
|
171
|
+
if (!existsSync(envPath))
|
|
172
|
+
return;
|
|
173
|
+
const content = readFileSync(envPath, "utf-8");
|
|
174
|
+
for (const line of content.split("\n")) {
|
|
175
|
+
const trimmed = line.trim();
|
|
176
|
+
if (!trimmed || trimmed.startsWith("#"))
|
|
177
|
+
continue;
|
|
178
|
+
const eqIdx = trimmed.indexOf("=");
|
|
179
|
+
if (eqIdx < 0)
|
|
180
|
+
continue;
|
|
181
|
+
const key = trimmed.slice(0, eqIdx);
|
|
182
|
+
const raw = trimmed.slice(eqIdx + 1);
|
|
183
|
+
const value = raw.replace(/^["'](.*)["']$/, '$1');
|
|
184
|
+
if (!process.env[key]) {
|
|
185
|
+
process.env[key] = value;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
/** Start all instances from fleet config */
|
|
190
|
+
async startAll(configPath) {
|
|
191
|
+
this.configPath = configPath;
|
|
192
|
+
this.loadEnvFile();
|
|
193
|
+
const fleet = this.loadConfig(configPath);
|
|
194
|
+
const topicMode = fleet.channel?.mode === "topic";
|
|
195
|
+
await TmuxManager.ensureSession(TMUX_SESSION);
|
|
196
|
+
// Stop any running daemons first (their health checks would respawn killed windows)
|
|
197
|
+
for (const [name] of this.daemons) {
|
|
198
|
+
await this.stopInstance(name);
|
|
199
|
+
}
|
|
200
|
+
// Then kill all remaining agend instance windows to prevent orphans
|
|
201
|
+
const existingWindows = await TmuxManager.listWindows(TMUX_SESSION);
|
|
202
|
+
for (const w of existingWindows) {
|
|
203
|
+
if (w.name !== "zsh") {
|
|
204
|
+
const tm = new TmuxManager(TMUX_SESSION, w.id);
|
|
205
|
+
await tm.killWindow();
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
const pidPath = join(this.dataDir, "fleet.pid");
|
|
209
|
+
writeFileSync(pidPath, String(process.pid), "utf-8");
|
|
210
|
+
this.eventLog = new EventLog(join(this.dataDir, "events.db"));
|
|
211
|
+
const costGuardConfig = {
|
|
212
|
+
...DEFAULT_COST_GUARD,
|
|
213
|
+
...fleet.defaults?.cost_guard ?? {},
|
|
214
|
+
};
|
|
215
|
+
this.costGuard = new CostGuard(costGuardConfig, this.eventLog);
|
|
216
|
+
this.costGuard.startMidnightReset();
|
|
217
|
+
const webhookConfigs = fleet.defaults?.webhooks ?? [];
|
|
218
|
+
if (webhookConfigs.length > 0) {
|
|
219
|
+
this.webhookEmitter = new WebhookEmitter(webhookConfigs, this.logger);
|
|
220
|
+
this.logger.info({ count: webhookConfigs.length }, "Webhook emitter initialized");
|
|
221
|
+
}
|
|
222
|
+
this.costGuard.on("warn", (instance, totalCents, limitCents) => {
|
|
223
|
+
this.notifyInstanceTopic(instance, `⚠️ ${instance} cost: ${formatCents(totalCents)} / ${formatCents(limitCents)} (${Math.round(totalCents / limitCents * 100)}%)`);
|
|
224
|
+
this.webhookEmitter?.emit("cost_warning", instance, { cost_cents: totalCents, limit_cents: limitCents });
|
|
225
|
+
});
|
|
226
|
+
this.costGuard.on("limit", (instance, totalCents, limitCents) => {
|
|
227
|
+
this.notifyInstanceTopic(instance, `🛑 ${instance} daily limit ${formatCents(limitCents)} reached — pausing instance.`);
|
|
228
|
+
this.eventLog?.insert(instance, "instance_paused", { reason: "cost_limit", cost_cents: totalCents });
|
|
229
|
+
this.webhookEmitter?.emit("cost_limit", instance, { cost_cents: totalCents, limit_cents: limitCents });
|
|
230
|
+
this.stopInstance(instance).catch(err => this.logger.error({ err, instance }, "Failed to pause instance on cost limit"));
|
|
231
|
+
});
|
|
232
|
+
const summaryConfig = {
|
|
233
|
+
...DEFAULT_DAILY_SUMMARY,
|
|
234
|
+
...fleet.defaults?.daily_summary ?? {},
|
|
235
|
+
};
|
|
236
|
+
this.dailySummary = new DailySummary(summaryConfig, costGuardConfig.timezone, (text) => {
|
|
237
|
+
if (!this.adapter || !this.fleetConfig?.channel?.group_id)
|
|
238
|
+
return;
|
|
239
|
+
this.adapter.sendText(String(this.fleetConfig.channel.group_id), text)
|
|
240
|
+
.catch(e => this.logger.debug({ err: e }, "Failed to send daily summary"));
|
|
241
|
+
}, () => {
|
|
242
|
+
const instances = Object.keys(this.fleetConfig?.instances ?? {});
|
|
243
|
+
const costMap = new Map();
|
|
244
|
+
for (const name of instances) {
|
|
245
|
+
costMap.set(name, this.costGuard?.getDailyCostCents(name) ?? 0);
|
|
246
|
+
}
|
|
247
|
+
return DailySummary.generateText(this.eventLog, instances, costMap, this.costGuard?.getFleetTotalCents() ?? 0);
|
|
248
|
+
});
|
|
249
|
+
this.dailySummary.start();
|
|
250
|
+
// Auto-create general instance if none configured
|
|
251
|
+
const hasGeneralTopic = Object.values(fleet.instances).some(inst => inst.general_topic === true);
|
|
252
|
+
if (!hasGeneralTopic) {
|
|
253
|
+
this.logger.info("Auto-creating general instance for General Topic");
|
|
254
|
+
const generalDir = join(homedir(), ".agend", "general");
|
|
255
|
+
mkdirSync(generalDir, { recursive: true });
|
|
256
|
+
const claudeMdPath = join(generalDir, "CLAUDE.md");
|
|
257
|
+
if (!existsSync(claudeMdPath)) {
|
|
258
|
+
writeFileSync(claudeMdPath, `# General Assistant
|
|
259
|
+
|
|
260
|
+
你是這個 AgEnD fleet 的通用入口。
|
|
261
|
+
|
|
262
|
+
## 行為準則
|
|
263
|
+
|
|
264
|
+
- 簡單任務(搜尋、翻譯、一般問答):自己處理。
|
|
265
|
+
- 屬於特定專案的任務:用 list_instances() 找到對應 agent,需要時用 start_instance() 啟動,再用 send_to_instance() 委派。
|
|
266
|
+
- 需要多個 agent 協作的任務:協調各 agent 並行或串行執行,收集結果後彙整。
|
|
267
|
+
- 使用者想開新的專案 agent:用 create_instance() 建立。
|
|
268
|
+
- 不再需要的 instance(例如功能完成):用 delete_instance() 清除。
|
|
269
|
+
- 收到其他 instance 委派的任務時,完成後一定要用 send_to_instance() 回報結果。
|
|
270
|
+
|
|
271
|
+
## 委派原則
|
|
272
|
+
|
|
273
|
+
只在有具體理由時才委派:
|
|
274
|
+
- 任務需要存取特定專案的檔案
|
|
275
|
+
- 任務可以從多 agent 平行執行中受益
|
|
276
|
+
- 保留自己的 context 更重要,把不相關的工作交出去
|
|
277
|
+
- 絕不把任務回委給委派你的 instance
|
|
278
|
+
|
|
279
|
+
自己能做的,就自己做。
|
|
280
|
+
`, "utf-8");
|
|
281
|
+
}
|
|
282
|
+
const generalConfig = {
|
|
283
|
+
...DEFAULT_INSTANCE_CONFIG,
|
|
284
|
+
working_directory: generalDir,
|
|
285
|
+
general_topic: true,
|
|
286
|
+
};
|
|
287
|
+
fleet.instances["general"] = generalConfig;
|
|
288
|
+
this.saveFleetConfig();
|
|
289
|
+
}
|
|
290
|
+
const instanceEntries = Object.entries(fleet.instances);
|
|
291
|
+
for (const [name, config] of instanceEntries) {
|
|
292
|
+
await this.startInstance(name, config, topicMode).catch(err => this.logger.error({ err, name }, "Failed to start instance"));
|
|
293
|
+
}
|
|
294
|
+
if (topicMode && fleet.channel) {
|
|
295
|
+
const schedulerConfig = {
|
|
296
|
+
...DEFAULT_SCHEDULER_CONFIG,
|
|
297
|
+
...this.fleetConfig?.defaults?.scheduler ?? {},
|
|
298
|
+
};
|
|
299
|
+
this.scheduler = new Scheduler(join(this.dataDir, "scheduler.db"), (schedule) => this.handleScheduleTrigger(schedule), schedulerConfig, (name) => this.fleetConfig?.instances?.[name] != null);
|
|
300
|
+
this.scheduler.init();
|
|
301
|
+
this.logger.info("Scheduler initialized");
|
|
302
|
+
await this.startSharedAdapter(fleet);
|
|
303
|
+
// Auto-create topics AFTER adapter is ready (needs adapter.createTopic)
|
|
304
|
+
await this.topicCommands.autoCreateTopics();
|
|
305
|
+
this.routingTable = this.buildRoutingTable();
|
|
306
|
+
const routeSummary = [...this.routingTable.entries()].map(([tid, target]) => `#${tid}→${target.name}`).join(", ");
|
|
307
|
+
this.logger.info(`Routes: ${routeSummary}`);
|
|
308
|
+
// Resolve topic icon emoji IDs and start idle archive poller
|
|
309
|
+
await this.resolveTopicIcons();
|
|
310
|
+
this.startArchivePoller();
|
|
311
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
312
|
+
await this.connectToInstances(fleet);
|
|
313
|
+
for (const name of Object.keys(fleet.instances)) {
|
|
314
|
+
this.startStatuslineWatcher(name);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// Health HTTP endpoint
|
|
318
|
+
this.startHealthServer(fleet.health_port ?? 19280);
|
|
319
|
+
// SIGHUP: reload scheduler (use once + re-register to avoid duplicates)
|
|
320
|
+
const onSighup = () => {
|
|
321
|
+
this.logger.info("Received SIGHUP, reloading scheduler...");
|
|
322
|
+
this.scheduler?.reload();
|
|
323
|
+
process.once("SIGHUP", onSighup);
|
|
324
|
+
};
|
|
325
|
+
process.once("SIGHUP", onSighup);
|
|
326
|
+
const onRestart = () => {
|
|
327
|
+
this.logger.info("Received SIGUSR2, initiating graceful restart...");
|
|
328
|
+
this.restartInstances()
|
|
329
|
+
.catch(err => this.logger.error({ err }, "Graceful restart failed"))
|
|
330
|
+
.finally(() => process.once("SIGUSR2", onRestart));
|
|
331
|
+
};
|
|
332
|
+
process.once("SIGUSR2", onRestart);
|
|
333
|
+
// SIGUSR1: full process reload (graceful stop → exit → CLI restarts)
|
|
334
|
+
const onFullRestart = () => {
|
|
335
|
+
this.logger.info("Received SIGUSR1, initiating full restart (process reload)...");
|
|
336
|
+
this.gracefulShutdownForReload()
|
|
337
|
+
.then(() => {
|
|
338
|
+
this.logger.info("Full restart: shutdown complete, exiting for reload");
|
|
339
|
+
process.exit(0);
|
|
340
|
+
})
|
|
341
|
+
.catch(err => {
|
|
342
|
+
this.logger.error({ err }, "Full restart: graceful shutdown failed");
|
|
343
|
+
process.exit(1);
|
|
344
|
+
});
|
|
345
|
+
};
|
|
346
|
+
process.once("SIGUSR1", onFullRestart);
|
|
347
|
+
}
|
|
348
|
+
/** Start the shared Telegram adapter for topic mode */
|
|
349
|
+
async startSharedAdapter(fleet) {
|
|
350
|
+
const channelConfig = fleet.channel;
|
|
351
|
+
const botToken = process.env[channelConfig.bot_token_env];
|
|
352
|
+
if (!botToken) {
|
|
353
|
+
this.logger.warn({ env: channelConfig.bot_token_env }, "Bot token env not set, skipping shared adapter");
|
|
354
|
+
return;
|
|
355
|
+
}
|
|
356
|
+
const accessDir = join(this.dataDir, "access");
|
|
357
|
+
mkdirSync(accessDir, { recursive: true });
|
|
358
|
+
const accessManager = new AccessManager(channelConfig.access, join(accessDir, "access.json"));
|
|
359
|
+
const inboxDir = join(this.dataDir, "inbox");
|
|
360
|
+
mkdirSync(inboxDir, { recursive: true });
|
|
361
|
+
this.adapter = await createAdapter(channelConfig, {
|
|
362
|
+
id: "fleet",
|
|
363
|
+
botToken,
|
|
364
|
+
accessManager,
|
|
365
|
+
inboxDir,
|
|
366
|
+
});
|
|
367
|
+
this.adapter.on("message", (msg) => {
|
|
368
|
+
this.handleInboundMessage(msg);
|
|
369
|
+
});
|
|
370
|
+
this.adapter.on("callback_query", async (data) => {
|
|
371
|
+
if (data.callbackData.startsWith("hang:")) {
|
|
372
|
+
const parts = data.callbackData.split(":");
|
|
373
|
+
const action = parts[1];
|
|
374
|
+
const instanceName = parts[2];
|
|
375
|
+
if (action === "restart") {
|
|
376
|
+
await this.stopInstance(instanceName);
|
|
377
|
+
const config = this.fleetConfig?.instances[instanceName];
|
|
378
|
+
if (config) {
|
|
379
|
+
const topicMode = this.fleetConfig?.channel?.mode === "topic";
|
|
380
|
+
await this.startInstance(instanceName, config, topicMode);
|
|
381
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
382
|
+
await this.connectIpcToInstance(instanceName);
|
|
383
|
+
}
|
|
384
|
+
this.adapter?.editMessage(data.chatId, data.messageId, `🔄 ${instanceName} restarted.`).catch(() => { });
|
|
385
|
+
}
|
|
386
|
+
else {
|
|
387
|
+
this.adapter?.editMessage(data.chatId, data.messageId, `⏳ Continuing to wait for ${instanceName}.`).catch(() => { });
|
|
388
|
+
}
|
|
389
|
+
return;
|
|
390
|
+
}
|
|
391
|
+
});
|
|
392
|
+
this.adapter.on("topic_closed", (data) => {
|
|
393
|
+
const tid = parseInt(data.threadId, 10);
|
|
394
|
+
// Skip unbind if we archived this topic ourselves
|
|
395
|
+
if (this.archivedTopics.has(tid))
|
|
396
|
+
return;
|
|
397
|
+
this.topicCommands.handleTopicDeleted(tid);
|
|
398
|
+
});
|
|
399
|
+
await this.topicCommands.registerBotCommands();
|
|
400
|
+
await this.adapter.start();
|
|
401
|
+
if (fleet.channel?.group_id) {
|
|
402
|
+
this.adapter.setChatId(String(fleet.channel.group_id));
|
|
403
|
+
}
|
|
404
|
+
this.adapter.on("started", (username) => {
|
|
405
|
+
this.logger.info(`Telegram bot @${username} polling`);
|
|
406
|
+
});
|
|
407
|
+
this.adapter.on("polling_conflict", ({ attempt, delay }) => {
|
|
408
|
+
this.logger.warn(`409 Conflict (attempt ${attempt}), retry in ${delay / 1000}s`);
|
|
409
|
+
});
|
|
410
|
+
this.adapter.on("handler_error", (err) => {
|
|
411
|
+
this.logger.warn({ err: err instanceof Error ? err.message : String(err) }, "Telegram handler error");
|
|
412
|
+
});
|
|
413
|
+
this.startTopicCleanupPoller();
|
|
414
|
+
// Prune stale external sessions every 5 minutes
|
|
415
|
+
this.sessionPruneTimer = setInterval(() => {
|
|
416
|
+
this.pruneStaleExternalSessions().catch(err => this.logger.debug({ err }, "Session prune failed"));
|
|
417
|
+
}, 5 * 60 * 1000);
|
|
418
|
+
}
|
|
419
|
+
/** Connect IPC clients to each daemon instance's channel.sock */
|
|
420
|
+
async connectToInstances(fleet) {
|
|
421
|
+
for (const name of Object.keys(fleet.instances)) {
|
|
422
|
+
await this.connectIpcToInstance(name);
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
/** Connect IPC to a single instance with all handlers */
|
|
426
|
+
async connectIpcToInstance(name) {
|
|
427
|
+
const sockPath = join(this.getInstanceDir(name), "channel.sock");
|
|
428
|
+
if (!existsSync(sockPath))
|
|
429
|
+
return;
|
|
430
|
+
const ipc = new IpcClient(sockPath);
|
|
431
|
+
try {
|
|
432
|
+
await ipc.connect();
|
|
433
|
+
this.instanceIpcClients.set(name, ipc);
|
|
434
|
+
ipc.on("message", (msg) => {
|
|
435
|
+
if (msg.type === "mcp_ready") {
|
|
436
|
+
// Register external sessions (sessionName differs from instance name)
|
|
437
|
+
const sessionName = msg.sessionName;
|
|
438
|
+
if (sessionName && sessionName !== name) {
|
|
439
|
+
this.sessionRegistry.set(sessionName, name);
|
|
440
|
+
this.logger.info({ sessionName, instanceName: name }, "Registered external session");
|
|
441
|
+
}
|
|
442
|
+
}
|
|
443
|
+
else if (msg.type === "session_disconnected") {
|
|
444
|
+
const sessionName = msg.sessionName;
|
|
445
|
+
if (sessionName && this.sessionRegistry.has(sessionName)) {
|
|
446
|
+
this.sessionRegistry.delete(sessionName);
|
|
447
|
+
this.logger.info({ sessionName, instanceName: name }, "Unregistered external session");
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
else if (msg.type === "fleet_outbound") {
|
|
451
|
+
// Auto-register external session on first outbound message — covers the
|
|
452
|
+
// race where mcp_ready arrived before fleet manager connected and query_sessions
|
|
453
|
+
// fired before the MCP server reconnected.
|
|
454
|
+
const sender = msg.senderSessionName;
|
|
455
|
+
if (sender && sender !== name && !this.sessionRegistry.has(sender)) {
|
|
456
|
+
this.sessionRegistry.set(sender, name);
|
|
457
|
+
this.logger.info({ sessionName: sender, instanceName: name }, "Registered external session");
|
|
458
|
+
}
|
|
459
|
+
this.handleOutboundFromInstance(name, msg).catch(err => this.logger.error({ err }, "handleOutboundFromInstance error"));
|
|
460
|
+
}
|
|
461
|
+
else if (msg.type === "fleet_tool_status") {
|
|
462
|
+
this.handleToolStatusFromInstance(name, msg);
|
|
463
|
+
}
|
|
464
|
+
else if (msg.type === "fleet_schedule_create" || msg.type === "fleet_schedule_list" ||
|
|
465
|
+
msg.type === "fleet_schedule_update" || msg.type === "fleet_schedule_delete") {
|
|
466
|
+
this.handleScheduleCrud(name, msg);
|
|
467
|
+
}
|
|
468
|
+
});
|
|
469
|
+
// Ask daemon for any sessions that registered before we connected
|
|
470
|
+
// (fixes race condition where mcp_ready was broadcast before fleet manager connected)
|
|
471
|
+
ipc.send({ type: "query_sessions" });
|
|
472
|
+
this.logger.debug({ name }, "Connected to instance IPC");
|
|
473
|
+
if (!this.statuslineWatchers.has(name)) {
|
|
474
|
+
this.startStatuslineWatcher(name);
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
catch (err) {
|
|
478
|
+
this.logger.warn({ name, err }, "Failed to connect to instance IPC");
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
/** Handle inbound message — transcribe voice if present, then route */
|
|
482
|
+
findGeneralInstance() {
|
|
483
|
+
if (!this.fleetConfig)
|
|
484
|
+
return undefined;
|
|
485
|
+
for (const [name, config] of Object.entries(this.fleetConfig.instances)) {
|
|
486
|
+
if (config.general_topic === true) {
|
|
487
|
+
return this.daemons.has(name) ? name : undefined;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
return undefined;
|
|
491
|
+
}
|
|
492
|
+
async handleInboundMessage(msg) {
|
|
493
|
+
const threadId = msg.threadId ? parseInt(msg.threadId, 10) : undefined;
|
|
494
|
+
if (threadId == null) {
|
|
495
|
+
// General topic: check for /status command
|
|
496
|
+
if (await this.topicCommands.handleGeneralCommand(msg))
|
|
497
|
+
return;
|
|
498
|
+
// Forward to General Topic instance if configured
|
|
499
|
+
const generalInstance = this.findGeneralInstance();
|
|
500
|
+
if (generalInstance) {
|
|
501
|
+
if (this.replyIfRateLimited(generalInstance, msg))
|
|
502
|
+
return;
|
|
503
|
+
const { text, extraMeta } = await processAttachments(msg, this.adapter, this.logger, generalInstance);
|
|
504
|
+
const ipc = this.instanceIpcClients.get(generalInstance);
|
|
505
|
+
if (ipc) {
|
|
506
|
+
if (this.adapter && msg.chatId && msg.messageId) {
|
|
507
|
+
this.adapter.react(msg.chatId, msg.messageId, "👀")
|
|
508
|
+
.catch(e => this.logger.debug({ err: e.message }, "Auto-react failed"));
|
|
509
|
+
}
|
|
510
|
+
ipc.send({
|
|
511
|
+
type: "fleet_inbound",
|
|
512
|
+
content: text,
|
|
513
|
+
targetSession: generalInstance,
|
|
514
|
+
meta: {
|
|
515
|
+
chat_id: msg.chatId,
|
|
516
|
+
message_id: msg.messageId,
|
|
517
|
+
user: msg.username,
|
|
518
|
+
user_id: msg.userId,
|
|
519
|
+
ts: msg.timestamp.toISOString(),
|
|
520
|
+
thread_id: "",
|
|
521
|
+
...(msg.replyToText ? { reply_to_text: msg.replyToText } : {}),
|
|
522
|
+
...extraMeta,
|
|
523
|
+
},
|
|
524
|
+
});
|
|
525
|
+
this.logger.info(`← ${generalInstance} ${msg.username}: ${(text ?? "").slice(0, 100)}`);
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
const target = this.routingTable.get(threadId);
|
|
531
|
+
if (!target) {
|
|
532
|
+
this.topicCommands.handleUnboundTopic(msg);
|
|
533
|
+
return;
|
|
534
|
+
}
|
|
535
|
+
const instanceName = target.name;
|
|
536
|
+
// Reopen archived topic before routing
|
|
537
|
+
if (this.archivedTopics.has(threadId)) {
|
|
538
|
+
await this.reopenArchivedTopic(threadId, instanceName);
|
|
539
|
+
}
|
|
540
|
+
this.touchActivity(instanceName);
|
|
541
|
+
this.setTopicIcon(instanceName, "blue");
|
|
542
|
+
if (this.replyIfRateLimited(instanceName, msg))
|
|
543
|
+
return;
|
|
544
|
+
const { text, extraMeta } = await processAttachments(msg, this.adapter, this.logger, instanceName);
|
|
545
|
+
const ipc = this.instanceIpcClients.get(instanceName);
|
|
546
|
+
if (!ipc) {
|
|
547
|
+
this.logger.warn({ instanceName }, "No IPC connection to instance");
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
550
|
+
if (this.adapter && msg.chatId && msg.messageId) {
|
|
551
|
+
this.adapter.react(msg.chatId, msg.messageId, "👀")
|
|
552
|
+
.catch(e => this.logger.debug({ err: e.message }, "Auto-react failed"));
|
|
553
|
+
}
|
|
554
|
+
ipc.send({
|
|
555
|
+
type: "fleet_inbound",
|
|
556
|
+
content: text,
|
|
557
|
+
targetSession: instanceName, // Telegram messages → instance's own session
|
|
558
|
+
meta: {
|
|
559
|
+
chat_id: msg.chatId,
|
|
560
|
+
message_id: msg.messageId,
|
|
561
|
+
user: msg.username,
|
|
562
|
+
user_id: msg.userId,
|
|
563
|
+
ts: msg.timestamp.toISOString(),
|
|
564
|
+
thread_id: msg.threadId ?? "",
|
|
565
|
+
...(msg.replyToText ? { reply_to_text: msg.replyToText } : {}),
|
|
566
|
+
...extraMeta,
|
|
567
|
+
},
|
|
568
|
+
});
|
|
569
|
+
this.logger.info(`← ${instanceName} ${msg.username}: ${(text ?? "").slice(0, 100)}`);
|
|
570
|
+
}
|
|
571
|
+
/** Handle outbound tool calls from a daemon instance */
|
|
572
|
+
replyIfRateLimited(instanceName, msg) {
|
|
573
|
+
const rl = this.instanceRateLimits.get(instanceName);
|
|
574
|
+
if (!rl || rl.seven_day_pct < 100)
|
|
575
|
+
return false;
|
|
576
|
+
if (this.adapter && msg.chatId) {
|
|
577
|
+
const threadId = msg.threadId ?? undefined;
|
|
578
|
+
this.adapter.sendText(msg.chatId, `⏸ ${instanceName} has hit the weekly usage limit. Your message was not delivered. Limit resets automatically — check /status for details.`, { threadId })
|
|
579
|
+
.catch(e => this.logger.debug({ err: e }, "Failed to send rate limit notice"));
|
|
580
|
+
}
|
|
581
|
+
this.logger.info({ instanceName }, "Blocked inbound message — weekly rate limit at 100%");
|
|
582
|
+
return true;
|
|
583
|
+
}
|
|
584
|
+
/** Handle outbound tool calls from a daemon instance */
|
|
585
|
+
async handleOutboundFromInstance(instanceName, msg) {
|
|
586
|
+
if (!this.adapter)
|
|
587
|
+
return;
|
|
588
|
+
this.touchActivity(instanceName);
|
|
589
|
+
this.setTopicIcon(instanceName, "green");
|
|
590
|
+
const tool = msg.tool;
|
|
591
|
+
const args = (msg.args ?? {});
|
|
592
|
+
const requestId = msg.requestId;
|
|
593
|
+
const fleetRequestId = msg.fleetRequestId;
|
|
594
|
+
const senderSessionName = msg.senderSessionName;
|
|
595
|
+
const respond = (result, error) => {
|
|
596
|
+
const ipc = this.instanceIpcClients.get(instanceName);
|
|
597
|
+
if (fleetRequestId) {
|
|
598
|
+
ipc?.send({ type: "fleet_outbound_response", fleetRequestId, result, error });
|
|
599
|
+
}
|
|
600
|
+
else {
|
|
601
|
+
ipc?.send({ type: "fleet_outbound_response", requestId, result, error });
|
|
602
|
+
}
|
|
603
|
+
};
|
|
604
|
+
// Resolve threadId from instance → topic_id mapping
|
|
605
|
+
const instanceConfig = this.fleetConfig?.instances[instanceName];
|
|
606
|
+
const threadId = resolveReplyThreadId(args.thread_id, instanceConfig);
|
|
607
|
+
// Route standard channel tools (reply, react, edit_message, download_attachment)
|
|
608
|
+
if (routeToolCall(this.adapter, tool, args, threadId, respond)) {
|
|
609
|
+
if (tool === "reply") {
|
|
610
|
+
this.logger.info(`→ ${instanceName} claude: ${(args.text ?? "").slice(0, 100)}`);
|
|
611
|
+
}
|
|
612
|
+
return;
|
|
613
|
+
}
|
|
614
|
+
// Fleet-specific tools
|
|
615
|
+
switch (tool) {
|
|
616
|
+
case "send_to_instance": {
|
|
617
|
+
const targetName = args.instance_name;
|
|
618
|
+
const message = args.message;
|
|
619
|
+
if (!targetName) {
|
|
620
|
+
respond(null, "send_to_instance: missing required argument 'instance_name'");
|
|
621
|
+
break;
|
|
622
|
+
}
|
|
623
|
+
if (!message) {
|
|
624
|
+
respond(null, "send_to_instance: missing required argument 'message'");
|
|
625
|
+
break;
|
|
626
|
+
}
|
|
627
|
+
const senderLabel = senderSessionName ?? instanceName;
|
|
628
|
+
const isExternalSender = senderSessionName != null && senderSessionName !== instanceName;
|
|
629
|
+
// Resolve target: could be an instance name or an external session name
|
|
630
|
+
let targetIpc = this.instanceIpcClients.get(targetName);
|
|
631
|
+
let targetSession = targetName; // default: target is the instance itself
|
|
632
|
+
let targetInstanceName = targetName;
|
|
633
|
+
if (!targetIpc) {
|
|
634
|
+
// Check if target is an external session
|
|
635
|
+
const hostInstance = this.sessionRegistry.get(targetName);
|
|
636
|
+
if (hostInstance) {
|
|
637
|
+
targetIpc = this.instanceIpcClients.get(hostInstance);
|
|
638
|
+
targetSession = targetName; // deliver to the external session
|
|
639
|
+
targetInstanceName = hostInstance;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
if (!targetIpc) {
|
|
643
|
+
// Check if instance exists in config but is stopped
|
|
644
|
+
const existsInConfig = targetName in (this.fleetConfig?.instances ?? {});
|
|
645
|
+
if (existsInConfig) {
|
|
646
|
+
respond(null, `Instance '${targetName}' is stopped. Use start_instance('${targetName}') to start it first.`);
|
|
647
|
+
}
|
|
648
|
+
else {
|
|
649
|
+
respond(null, `Instance or session not found: ${targetName}`);
|
|
650
|
+
}
|
|
651
|
+
break;
|
|
652
|
+
}
|
|
653
|
+
// Build structured metadata (Phase 2)
|
|
654
|
+
const correlationId = args.correlation_id || `cid-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
655
|
+
const meta = {
|
|
656
|
+
chat_id: "",
|
|
657
|
+
message_id: `xmsg-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
658
|
+
user: `instance:${senderLabel}`,
|
|
659
|
+
user_id: `instance:${senderLabel}`,
|
|
660
|
+
ts: new Date().toISOString(),
|
|
661
|
+
thread_id: "",
|
|
662
|
+
from_instance: senderLabel,
|
|
663
|
+
correlation_id: correlationId,
|
|
664
|
+
};
|
|
665
|
+
if (args.request_kind)
|
|
666
|
+
meta.request_kind = args.request_kind;
|
|
667
|
+
if (args.requires_reply != null)
|
|
668
|
+
meta.requires_reply = String(args.requires_reply);
|
|
669
|
+
if (args.task_summary)
|
|
670
|
+
meta.task_summary = args.task_summary;
|
|
671
|
+
if (args.working_directory)
|
|
672
|
+
meta.working_directory = args.working_directory;
|
|
673
|
+
if (args.branch)
|
|
674
|
+
meta.branch = args.branch;
|
|
675
|
+
targetIpc.send({
|
|
676
|
+
type: "fleet_inbound",
|
|
677
|
+
targetSession,
|
|
678
|
+
content: message,
|
|
679
|
+
meta,
|
|
680
|
+
});
|
|
681
|
+
// Post to Telegram topics for visibility
|
|
682
|
+
const groupId = this.fleetConfig?.channel?.group_id;
|
|
683
|
+
if (groupId && this.adapter) {
|
|
684
|
+
const senderTopicId = this.fleetConfig?.instances[instanceName]?.topic_id;
|
|
685
|
+
const targetTopicId = this.fleetConfig?.instances[targetInstanceName]?.topic_id;
|
|
686
|
+
// Post full message to topics — adapter handles 4096-char chunking
|
|
687
|
+
// Only post to sender topic if sender is the instance itself (not external)
|
|
688
|
+
if (senderTopicId && !isExternalSender) {
|
|
689
|
+
this.adapter.sendText(String(groupId), `→ ${targetName}:\n${message}`, {
|
|
690
|
+
threadId: String(senderTopicId),
|
|
691
|
+
}).catch(e => this.logger.debug({ err: e }, "Failed to post cross-instance notification"));
|
|
692
|
+
}
|
|
693
|
+
// Only post to target topic if target is an instance (not external session)
|
|
694
|
+
if (targetTopicId && !this.sessionRegistry.has(targetName)) {
|
|
695
|
+
this.adapter.sendText(String(groupId), `← ${senderLabel}:\n${message}`, {
|
|
696
|
+
threadId: String(targetTopicId),
|
|
697
|
+
}).catch(e => this.logger.debug({ err: e }, "Failed to post cross-instance notification"));
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
this.logger.info(`✉ ${senderLabel} → ${targetName}: ${(message ?? "").slice(0, 100)}`);
|
|
701
|
+
respond({ sent: true, target: targetName, correlation_id: correlationId });
|
|
702
|
+
break;
|
|
703
|
+
}
|
|
704
|
+
case "list_instances": {
|
|
705
|
+
const senderLabel = senderSessionName ?? instanceName;
|
|
706
|
+
const allInstances = Object.entries(this.fleetConfig?.instances ?? {})
|
|
707
|
+
.filter(([name]) => name !== instanceName && name !== senderLabel)
|
|
708
|
+
.map(([name, config]) => ({
|
|
709
|
+
name,
|
|
710
|
+
type: "instance",
|
|
711
|
+
status: this.daemons.has(name) ? "running" : "stopped",
|
|
712
|
+
working_directory: config.working_directory,
|
|
713
|
+
topic_id: config.topic_id ?? null,
|
|
714
|
+
description: config.description ?? null,
|
|
715
|
+
tags: config.tags ?? [],
|
|
716
|
+
last_activity: this.lastActivity.get(name) ? new Date(this.lastActivity.get(name)).toISOString() : null,
|
|
717
|
+
}));
|
|
718
|
+
// Include external sessions (excluding self)
|
|
719
|
+
const externalSessions = [...this.sessionRegistry.entries()]
|
|
720
|
+
.filter(([sessName]) => sessName !== senderLabel)
|
|
721
|
+
.map(([sessName, hostInstance]) => ({
|
|
722
|
+
name: sessName, type: "session", host: hostInstance,
|
|
723
|
+
}));
|
|
724
|
+
respond({ instances: allInstances, external_sessions: externalSessions });
|
|
725
|
+
break;
|
|
726
|
+
}
|
|
727
|
+
// Phase 3: High-level collaboration tools (wrappers around send_to_instance)
|
|
728
|
+
case "request_information": {
|
|
729
|
+
const targetName = args.target_instance;
|
|
730
|
+
const question = args.question;
|
|
731
|
+
const context = args.context;
|
|
732
|
+
const body = context ? `${question}\n\nContext: ${context}` : question;
|
|
733
|
+
// Re-dispatch as send_to_instance with structured metadata
|
|
734
|
+
args.instance_name = targetName;
|
|
735
|
+
args.message = body;
|
|
736
|
+
args.request_kind = "query";
|
|
737
|
+
args.requires_reply = true;
|
|
738
|
+
args.task_summary = question.slice(0, 120);
|
|
739
|
+
// Recursively handle via the same switch (will hit send_to_instance case above)
|
|
740
|
+
return this.handleOutboundFromInstance(instanceName, { tool: "send_to_instance", args, requestId, fleetRequestId, senderSessionName });
|
|
741
|
+
}
|
|
742
|
+
case "delegate_task": {
|
|
743
|
+
const targetName = args.target_instance;
|
|
744
|
+
const task = args.task;
|
|
745
|
+
const criteria = args.success_criteria;
|
|
746
|
+
const context = args.context;
|
|
747
|
+
let body = task;
|
|
748
|
+
if (criteria)
|
|
749
|
+
body += `\n\nSuccess criteria: ${criteria}`;
|
|
750
|
+
if (context)
|
|
751
|
+
body += `\n\nContext: ${context}`;
|
|
752
|
+
args.instance_name = targetName;
|
|
753
|
+
args.message = body;
|
|
754
|
+
args.request_kind = "task";
|
|
755
|
+
args.requires_reply = true;
|
|
756
|
+
args.task_summary = task.slice(0, 120);
|
|
757
|
+
return this.handleOutboundFromInstance(instanceName, { tool: "send_to_instance", args, requestId, fleetRequestId, senderSessionName });
|
|
758
|
+
}
|
|
759
|
+
case "report_result": {
|
|
760
|
+
const targetName = args.target_instance;
|
|
761
|
+
const summary = args.summary;
|
|
762
|
+
const artifacts = args.artifacts;
|
|
763
|
+
if (!args.correlation_id) {
|
|
764
|
+
this.logger.warn({ instanceName, targetName }, "report_result called without correlation_id — recipient cannot match this to an original request");
|
|
765
|
+
}
|
|
766
|
+
let body = summary;
|
|
767
|
+
if (artifacts)
|
|
768
|
+
body += `\n\nArtifacts: ${artifacts}`;
|
|
769
|
+
args.instance_name = targetName;
|
|
770
|
+
args.message = body;
|
|
771
|
+
args.request_kind = "report";
|
|
772
|
+
args.requires_reply = false;
|
|
773
|
+
args.task_summary = summary.slice(0, 120);
|
|
774
|
+
return this.handleOutboundFromInstance(instanceName, { tool: "send_to_instance", args, requestId, fleetRequestId, senderSessionName });
|
|
775
|
+
}
|
|
776
|
+
// Phase 4: Capability discovery
|
|
777
|
+
case "describe_instance": {
|
|
778
|
+
const targetName = args.name;
|
|
779
|
+
const config = this.fleetConfig?.instances[targetName];
|
|
780
|
+
if (config) {
|
|
781
|
+
respond({
|
|
782
|
+
name: targetName,
|
|
783
|
+
type: "instance",
|
|
784
|
+
description: config.description ?? null,
|
|
785
|
+
tags: config.tags ?? [],
|
|
786
|
+
working_directory: config.working_directory,
|
|
787
|
+
status: this.daemons.has(targetName) ? "running" : "stopped",
|
|
788
|
+
topic_id: config.topic_id ?? null,
|
|
789
|
+
model: config.model ?? null,
|
|
790
|
+
last_activity: this.lastActivity.get(targetName) ? new Date(this.lastActivity.get(targetName)).toISOString() : null,
|
|
791
|
+
worktree_source: config.worktree_source ?? null,
|
|
792
|
+
});
|
|
793
|
+
break;
|
|
794
|
+
}
|
|
795
|
+
// Check if it's a known external session
|
|
796
|
+
const hostInstance = this.sessionRegistry.get(targetName);
|
|
797
|
+
if (hostInstance) {
|
|
798
|
+
respond({
|
|
799
|
+
name: targetName,
|
|
800
|
+
type: "session",
|
|
801
|
+
host: hostInstance,
|
|
802
|
+
status: "running",
|
|
803
|
+
});
|
|
804
|
+
break;
|
|
805
|
+
}
|
|
806
|
+
respond(null, `Instance or session '${targetName}' not found`);
|
|
807
|
+
break;
|
|
808
|
+
}
|
|
809
|
+
case "start_instance": {
|
|
810
|
+
const targetName = args.name;
|
|
811
|
+
// Already running?
|
|
812
|
+
if (this.daemons.has(targetName)) {
|
|
813
|
+
respond({ success: true, status: "already_running" });
|
|
814
|
+
break;
|
|
815
|
+
}
|
|
816
|
+
// Exists in config?
|
|
817
|
+
const targetConfig = this.fleetConfig?.instances[targetName];
|
|
818
|
+
if (!targetConfig) {
|
|
819
|
+
respond(null, `Instance '${targetName}' not found in fleet config`);
|
|
820
|
+
break;
|
|
821
|
+
}
|
|
822
|
+
try {
|
|
823
|
+
await this.startInstance(targetName, targetConfig, true);
|
|
824
|
+
await this.connectIpcToInstance(targetName);
|
|
825
|
+
respond({ success: true, status: "started" });
|
|
826
|
+
}
|
|
827
|
+
catch (err) {
|
|
828
|
+
respond(null, `Failed to start instance '${targetName}': ${err.message}`);
|
|
829
|
+
}
|
|
830
|
+
break;
|
|
831
|
+
}
|
|
832
|
+
case "create_instance": {
|
|
833
|
+
const directory = args.directory.replace(/^~/, process.env.HOME || "~");
|
|
834
|
+
const topicName = args.topic_name || basename(directory);
|
|
835
|
+
const description = args.description;
|
|
836
|
+
const branch = args.branch;
|
|
837
|
+
// Validate directory exists
|
|
838
|
+
try {
|
|
839
|
+
await access(directory);
|
|
840
|
+
}
|
|
841
|
+
catch {
|
|
842
|
+
respond(null, `Directory does not exist: ${directory}`);
|
|
843
|
+
break;
|
|
844
|
+
}
|
|
845
|
+
// Check for duplicate early (before worktree creation) — only when no branch
|
|
846
|
+
if (!branch) {
|
|
847
|
+
const expandHome = (p) => p.replace(/^~/, process.env.HOME || "~");
|
|
848
|
+
const existingInstance = Object.entries(this.fleetConfig?.instances ?? {})
|
|
849
|
+
.find(([_, config]) => expandHome(config.working_directory) === directory);
|
|
850
|
+
if (existingInstance) {
|
|
851
|
+
const [eName, eConfig] = existingInstance;
|
|
852
|
+
respond({
|
|
853
|
+
success: true,
|
|
854
|
+
status: "already_exists",
|
|
855
|
+
name: eName,
|
|
856
|
+
topic_id: eConfig.topic_id,
|
|
857
|
+
running: this.daemons.has(eName),
|
|
858
|
+
});
|
|
859
|
+
break;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
// If branch specified, create git worktree
|
|
863
|
+
let workDir = directory;
|
|
864
|
+
let worktreePath;
|
|
865
|
+
if (branch) {
|
|
866
|
+
try {
|
|
867
|
+
const { execFile: execFileCb } = await import("node:child_process");
|
|
868
|
+
const { promisify } = await import("node:util");
|
|
869
|
+
const execFileAsync = promisify(execFileCb);
|
|
870
|
+
// Verify it's a git repo
|
|
871
|
+
await execFileAsync("git", ["rev-parse", "--git-dir"], { cwd: directory });
|
|
872
|
+
// Determine worktree path: sibling directory named repo-branch
|
|
873
|
+
const repoName = basename(directory);
|
|
874
|
+
const safeBranch = branch.replace(/\//g, "-");
|
|
875
|
+
worktreePath = join(dirname(directory), `${repoName}-${safeBranch}`);
|
|
876
|
+
// Check if branch exists
|
|
877
|
+
let branchExists = false;
|
|
878
|
+
try {
|
|
879
|
+
await execFileAsync("git", ["rev-parse", "--verify", branch], { cwd: directory });
|
|
880
|
+
branchExists = true;
|
|
881
|
+
}
|
|
882
|
+
catch { /* branch doesn't exist */ }
|
|
883
|
+
if (branchExists) {
|
|
884
|
+
await execFileAsync("git", ["worktree", "add", worktreePath, branch], { cwd: directory });
|
|
885
|
+
}
|
|
886
|
+
else {
|
|
887
|
+
await execFileAsync("git", ["worktree", "add", worktreePath, "-b", branch], { cwd: directory });
|
|
888
|
+
}
|
|
889
|
+
this.logger.info({ worktreePath, branch, repo: directory }, "Created git worktree for instance");
|
|
890
|
+
workDir = worktreePath;
|
|
891
|
+
}
|
|
892
|
+
catch (err) {
|
|
893
|
+
respond(null, `Failed to create worktree: ${err.message}`);
|
|
894
|
+
break;
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
// Check worktree path for duplicates (branch case only — non-branch already checked above)
|
|
898
|
+
if (worktreePath) {
|
|
899
|
+
const expandHome = (p) => p.replace(/^~/, process.env.HOME || "~");
|
|
900
|
+
const existingInstance = Object.entries(this.fleetConfig?.instances ?? {})
|
|
901
|
+
.find(([_, config]) => expandHome(config.working_directory) === workDir);
|
|
902
|
+
if (existingInstance) {
|
|
903
|
+
const [eName, eConfig] = existingInstance;
|
|
904
|
+
respond({
|
|
905
|
+
success: true,
|
|
906
|
+
status: "already_exists",
|
|
907
|
+
name: eName,
|
|
908
|
+
topic_id: eConfig.topic_id,
|
|
909
|
+
running: this.daemons.has(eName),
|
|
910
|
+
});
|
|
911
|
+
break;
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
// Sequential steps with rollback
|
|
915
|
+
let createdTopicId;
|
|
916
|
+
let newInstanceName;
|
|
917
|
+
try {
|
|
918
|
+
// Step a: Create Telegram topic
|
|
919
|
+
createdTopicId = await this.createForumTopic(topicName);
|
|
920
|
+
// Step b: Register in config
|
|
921
|
+
// Use topicName for worktree instances to avoid long paths (Unix socket limit 104 bytes)
|
|
922
|
+
const nameBase = worktreePath ? topicName : basename(workDir);
|
|
923
|
+
newInstanceName = `${sanitizeInstanceName(nameBase)}-t${createdTopicId}`;
|
|
924
|
+
const instanceConfig = {
|
|
925
|
+
...DEFAULT_INSTANCE_CONFIG,
|
|
926
|
+
...this.fleetConfig.defaults,
|
|
927
|
+
working_directory: workDir,
|
|
928
|
+
topic_id: createdTopicId,
|
|
929
|
+
...(description ? { description } : {}),
|
|
930
|
+
...(args.model ? { model: args.model } : {}),
|
|
931
|
+
...(args.backend ? { backend: args.backend } : {}),
|
|
932
|
+
...(worktreePath ? { worktree_source: directory } : {}),
|
|
933
|
+
};
|
|
934
|
+
this.fleetConfig.instances[newInstanceName] = instanceConfig;
|
|
935
|
+
this.routingTable.set(createdTopicId, { kind: "instance", name: newInstanceName });
|
|
936
|
+
this.saveFleetConfig();
|
|
937
|
+
// Step c: Start instance
|
|
938
|
+
await this.startInstance(newInstanceName, instanceConfig, true);
|
|
939
|
+
await this.connectIpcToInstance(newInstanceName);
|
|
940
|
+
respond({
|
|
941
|
+
success: true,
|
|
942
|
+
name: newInstanceName,
|
|
943
|
+
topic_id: createdTopicId,
|
|
944
|
+
...(worktreePath ? { worktree_path: worktreePath, branch } : {}),
|
|
945
|
+
});
|
|
946
|
+
}
|
|
947
|
+
catch (err) {
|
|
948
|
+
// Rollback in reverse order
|
|
949
|
+
if (newInstanceName && this.daemons.has(newInstanceName)) {
|
|
950
|
+
await this.stopInstance(newInstanceName).catch(() => { });
|
|
951
|
+
}
|
|
952
|
+
if (newInstanceName && this.fleetConfig?.instances[newInstanceName]) {
|
|
953
|
+
delete this.fleetConfig.instances[newInstanceName];
|
|
954
|
+
if (createdTopicId)
|
|
955
|
+
this.routingTable.delete(createdTopicId);
|
|
956
|
+
this.saveFleetConfig();
|
|
957
|
+
}
|
|
958
|
+
if (createdTopicId) {
|
|
959
|
+
await this.deleteForumTopic(createdTopicId);
|
|
960
|
+
}
|
|
961
|
+
// Rollback worktree
|
|
962
|
+
if (worktreePath) {
|
|
963
|
+
try {
|
|
964
|
+
const { execFile: execFileCb } = await import("node:child_process");
|
|
965
|
+
const { promisify } = await import("node:util");
|
|
966
|
+
const execFileAsync = promisify(execFileCb);
|
|
967
|
+
await execFileAsync("git", ["worktree", "remove", "--force", worktreePath], { cwd: directory });
|
|
968
|
+
}
|
|
969
|
+
catch { /* best-effort worktree cleanup */ }
|
|
970
|
+
}
|
|
971
|
+
respond(null, `Failed to create instance: ${err.message}`);
|
|
972
|
+
}
|
|
973
|
+
break;
|
|
974
|
+
}
|
|
975
|
+
case "delete_instance": {
|
|
976
|
+
const instanceName = args.name;
|
|
977
|
+
const deleteTopic = args.delete_topic ?? false;
|
|
978
|
+
const instanceConfig = this.fleetConfig?.instances[instanceName];
|
|
979
|
+
if (!instanceConfig) {
|
|
980
|
+
respond(null, `Instance not found: ${instanceName}`);
|
|
981
|
+
break;
|
|
982
|
+
}
|
|
983
|
+
if (instanceConfig.general_topic) {
|
|
984
|
+
respond(null, "Cannot delete the General instance");
|
|
985
|
+
break;
|
|
986
|
+
}
|
|
987
|
+
// Delete Telegram topic if requested (before removeInstance clears config)
|
|
988
|
+
if (deleteTopic && instanceConfig.topic_id) {
|
|
989
|
+
await this.deleteForumTopic(instanceConfig.topic_id);
|
|
990
|
+
}
|
|
991
|
+
await this.removeInstance(instanceName);
|
|
992
|
+
respond({ success: true, name: instanceName, topic_deleted: deleteTopic });
|
|
993
|
+
break;
|
|
994
|
+
}
|
|
995
|
+
default:
|
|
996
|
+
respond(null, `Unknown tool: ${tool}`);
|
|
997
|
+
}
|
|
998
|
+
}
|
|
999
|
+
/** Handle tool status update from a daemon instance */
|
|
1000
|
+
handleToolStatusFromInstance(instanceName, msg) {
|
|
1001
|
+
if (!this.adapter)
|
|
1002
|
+
return;
|
|
1003
|
+
const text = msg.text;
|
|
1004
|
+
const editMessageId = msg.editMessageId;
|
|
1005
|
+
const instanceConfig = this.fleetConfig?.instances[instanceName];
|
|
1006
|
+
const threadId = instanceConfig?.topic_id ? String(instanceConfig.topic_id) : undefined;
|
|
1007
|
+
const chatId = this.adapter.getChatId();
|
|
1008
|
+
if (!chatId)
|
|
1009
|
+
return;
|
|
1010
|
+
if (editMessageId) {
|
|
1011
|
+
this.adapter.editMessage(chatId, editMessageId, text).catch(e => this.logger.debug({ err: e }, "Failed to edit tool status message"));
|
|
1012
|
+
}
|
|
1013
|
+
else {
|
|
1014
|
+
this.adapter.sendText(chatId, text, { threadId }).then((sent) => {
|
|
1015
|
+
const ipc = this.instanceIpcClients.get(instanceName);
|
|
1016
|
+
ipc?.send({ type: "fleet_tool_status_ack", messageId: sent.messageId });
|
|
1017
|
+
}).catch(e => this.logger.debug({ err: e }, "Failed to send tool status message"));
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
// ===================== Scheduler =====================
|
|
1021
|
+
async handleScheduleTrigger(schedule) {
|
|
1022
|
+
const { target, reply_chat_id, reply_thread_id, message, label, id, source } = schedule;
|
|
1023
|
+
const RATE_LIMIT_DEFER_THRESHOLD = 85;
|
|
1024
|
+
const rl = this.instanceRateLimits.get(target);
|
|
1025
|
+
if (rl && rl.five_hour_pct > RATE_LIMIT_DEFER_THRESHOLD) {
|
|
1026
|
+
this.scheduler.recordRun(id, "deferred", `5hr rate limit at ${rl.five_hour_pct}%`);
|
|
1027
|
+
this.eventLog?.insert(target, "schedule_deferred", {
|
|
1028
|
+
schedule_id: id,
|
|
1029
|
+
label,
|
|
1030
|
+
five_hour_pct: rl.five_hour_pct,
|
|
1031
|
+
});
|
|
1032
|
+
this.webhookEmitter?.emit("schedule_deferred", target, { schedule_id: id, label, five_hour_pct: rl.five_hour_pct });
|
|
1033
|
+
this.notifyInstanceTopic(target, `⏳ Schedule "${label ?? id}" deferred — rate limit at ${rl.five_hour_pct}%`);
|
|
1034
|
+
this.logger.info({ target, scheduleId: id, rateLimitPct: rl.five_hour_pct }, "Schedule deferred due to rate limit");
|
|
1035
|
+
return;
|
|
1036
|
+
}
|
|
1037
|
+
const defaults = this.fleetConfig?.defaults;
|
|
1038
|
+
const schedulerDefaults = defaults?.scheduler;
|
|
1039
|
+
const retryCount = schedulerDefaults?.retry_count ?? 3;
|
|
1040
|
+
const retryInterval = schedulerDefaults?.retry_interval_ms ?? 30_000;
|
|
1041
|
+
const deliver = () => {
|
|
1042
|
+
const ipc = this.instanceIpcClients.get(target);
|
|
1043
|
+
if (!ipc?.connected)
|
|
1044
|
+
return false;
|
|
1045
|
+
ipc.send({
|
|
1046
|
+
type: "fleet_schedule_trigger",
|
|
1047
|
+
payload: { schedule_id: id, message: `[排程任務] ${message}`, label },
|
|
1048
|
+
meta: { chat_id: reply_chat_id, thread_id: reply_thread_id, user: "scheduler" },
|
|
1049
|
+
});
|
|
1050
|
+
return true;
|
|
1051
|
+
};
|
|
1052
|
+
if (deliver()) {
|
|
1053
|
+
this.scheduler.recordRun(id, "delivered");
|
|
1054
|
+
if (source !== target)
|
|
1055
|
+
this.notifySourceTopic(schedule);
|
|
1056
|
+
return;
|
|
1057
|
+
}
|
|
1058
|
+
for (let i = 0; i < retryCount; i++) {
|
|
1059
|
+
await new Promise((r) => setTimeout(r, retryInterval));
|
|
1060
|
+
if (deliver()) {
|
|
1061
|
+
this.scheduler.recordRun(id, "delivered");
|
|
1062
|
+
if (source !== target)
|
|
1063
|
+
this.notifySourceTopic(schedule);
|
|
1064
|
+
return;
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
this.scheduler.recordRun(id, "instance_offline", `retry ${retryCount}x failed`);
|
|
1068
|
+
this.notifyScheduleFailure(schedule);
|
|
1069
|
+
}
|
|
1070
|
+
notifySourceTopic(schedule) {
|
|
1071
|
+
if (!this.adapter)
|
|
1072
|
+
return;
|
|
1073
|
+
const text = `⏰ 排程「${schedule.label ?? schedule.id}」已觸發,目標實例:${schedule.target}`;
|
|
1074
|
+
this.adapter.sendText(schedule.reply_chat_id, text, {
|
|
1075
|
+
threadId: schedule.reply_thread_id ?? undefined,
|
|
1076
|
+
}).catch((err) => this.logger.error({ err }, "Failed to send cross-instance notification"));
|
|
1077
|
+
}
|
|
1078
|
+
notifyScheduleFailure(schedule) {
|
|
1079
|
+
if (!this.adapter)
|
|
1080
|
+
return;
|
|
1081
|
+
const text = `⏰ 排程「${schedule.label ?? schedule.id}」觸發失敗:實例 ${schedule.target} 未在線。`;
|
|
1082
|
+
this.adapter.sendText(schedule.reply_chat_id, text, {
|
|
1083
|
+
threadId: schedule.reply_thread_id ?? undefined,
|
|
1084
|
+
}).catch((err) => this.logger.error({ err }, "Failed to send schedule failure notification"));
|
|
1085
|
+
}
|
|
1086
|
+
handleScheduleCrud(instanceName, msg) {
|
|
1087
|
+
const fleetRequestId = msg.fleetRequestId;
|
|
1088
|
+
const payload = (msg.payload ?? {});
|
|
1089
|
+
const meta = (msg.meta ?? {});
|
|
1090
|
+
const ipc = this.instanceIpcClients.get(instanceName);
|
|
1091
|
+
if (!ipc)
|
|
1092
|
+
return;
|
|
1093
|
+
try {
|
|
1094
|
+
let result;
|
|
1095
|
+
switch (msg.type) {
|
|
1096
|
+
case "fleet_schedule_create": {
|
|
1097
|
+
const params = {
|
|
1098
|
+
cron: payload.cron,
|
|
1099
|
+
message: payload.message,
|
|
1100
|
+
source: instanceName,
|
|
1101
|
+
target: payload.target || instanceName,
|
|
1102
|
+
reply_chat_id: meta.chat_id,
|
|
1103
|
+
reply_thread_id: meta.thread_id || null,
|
|
1104
|
+
label: payload.label,
|
|
1105
|
+
timezone: payload.timezone,
|
|
1106
|
+
};
|
|
1107
|
+
result = this.scheduler.create(params);
|
|
1108
|
+
break;
|
|
1109
|
+
}
|
|
1110
|
+
case "fleet_schedule_list":
|
|
1111
|
+
result = this.scheduler.list(payload.target);
|
|
1112
|
+
break;
|
|
1113
|
+
case "fleet_schedule_update":
|
|
1114
|
+
result = this.scheduler.update(payload.id, payload);
|
|
1115
|
+
break;
|
|
1116
|
+
case "fleet_schedule_delete":
|
|
1117
|
+
this.scheduler.delete(payload.id);
|
|
1118
|
+
result = "ok";
|
|
1119
|
+
break;
|
|
1120
|
+
}
|
|
1121
|
+
ipc.send({ type: "fleet_schedule_response", fleetRequestId, result });
|
|
1122
|
+
}
|
|
1123
|
+
catch (err) {
|
|
1124
|
+
ipc.send({ type: "fleet_schedule_response", fleetRequestId, error: err.message });
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
// ===================== Topic management =====================
|
|
1128
|
+
/** Create a forum topic via the adapter. Returns the message_thread_id. */
|
|
1129
|
+
async createForumTopic(topicName) {
|
|
1130
|
+
if (!this.adapter?.createTopic) {
|
|
1131
|
+
throw new Error("Adapter does not support topic creation");
|
|
1132
|
+
}
|
|
1133
|
+
return this.adapter.createTopic(topicName);
|
|
1134
|
+
}
|
|
1135
|
+
async deleteForumTopic(topicId) {
|
|
1136
|
+
try {
|
|
1137
|
+
const groupId = this.fleetConfig?.channel?.group_id;
|
|
1138
|
+
const botTokenEnv = this.fleetConfig?.channel?.bot_token_env;
|
|
1139
|
+
if (!groupId || !botTokenEnv)
|
|
1140
|
+
return;
|
|
1141
|
+
const botToken = process.env[botTokenEnv];
|
|
1142
|
+
if (!botToken)
|
|
1143
|
+
return;
|
|
1144
|
+
await fetch(`https://api.telegram.org/bot${botToken}/deleteForumTopic`, {
|
|
1145
|
+
method: "POST",
|
|
1146
|
+
headers: { "Content-Type": "application/json" },
|
|
1147
|
+
body: JSON.stringify({ chat_id: groupId, message_thread_id: topicId }),
|
|
1148
|
+
});
|
|
1149
|
+
}
|
|
1150
|
+
catch (err) {
|
|
1151
|
+
this.logger.warn({ err, topicId }, "Failed to delete forum topic during rollback");
|
|
1152
|
+
}
|
|
1153
|
+
}
|
|
1154
|
+
topicCleanupTimer = null;
|
|
1155
|
+
sessionPruneTimer = null;
|
|
1156
|
+
/** Periodically check if bound topics still exist */
|
|
1157
|
+
startTopicCleanupPoller() {
|
|
1158
|
+
this.topicCleanupTimer = setInterval(async () => {
|
|
1159
|
+
if (!this.fleetConfig?.channel?.group_id || !this.adapter?.topicExists)
|
|
1160
|
+
return;
|
|
1161
|
+
for (const [threadId, target] of this.routingTable) {
|
|
1162
|
+
try {
|
|
1163
|
+
if (!isProbeableRouteTarget(target)) {
|
|
1164
|
+
continue;
|
|
1165
|
+
}
|
|
1166
|
+
const exists = await this.adapter.topicExists(threadId);
|
|
1167
|
+
if (!exists) {
|
|
1168
|
+
await this.topicCommands.handleTopicDeleted(threadId);
|
|
1169
|
+
}
|
|
1170
|
+
}
|
|
1171
|
+
catch (err) {
|
|
1172
|
+
this.logger.debug({ err, threadId }, "Topic existence check failed");
|
|
1173
|
+
}
|
|
1174
|
+
}
|
|
1175
|
+
}, 5 * 60_000);
|
|
1176
|
+
}
|
|
1177
|
+
/** Save fleet config back to fleet.yaml */
|
|
1178
|
+
saveFleetConfig() {
|
|
1179
|
+
if (!this.fleetConfig || !this.configPath)
|
|
1180
|
+
return;
|
|
1181
|
+
const toSave = {};
|
|
1182
|
+
if (this.fleetConfig.project_roots)
|
|
1183
|
+
toSave.project_roots = this.fleetConfig.project_roots;
|
|
1184
|
+
if (this.fleetConfig.channel)
|
|
1185
|
+
toSave.channel = this.fleetConfig.channel;
|
|
1186
|
+
if (this.fleetConfig.health_port)
|
|
1187
|
+
toSave.health_port = this.fleetConfig.health_port;
|
|
1188
|
+
if (Object.keys(this.fleetConfig.defaults).length > 0)
|
|
1189
|
+
toSave.defaults = this.fleetConfig.defaults;
|
|
1190
|
+
toSave.instances = {};
|
|
1191
|
+
for (const [name, inst] of Object.entries(this.fleetConfig.instances)) {
|
|
1192
|
+
const serialized = {
|
|
1193
|
+
working_directory: inst.working_directory,
|
|
1194
|
+
topic_id: inst.topic_id,
|
|
1195
|
+
};
|
|
1196
|
+
// Preserve all optional user-configured fields so saveFleetConfig() never silently drops them
|
|
1197
|
+
if (inst.general_topic)
|
|
1198
|
+
serialized.general_topic = true;
|
|
1199
|
+
if (inst.description)
|
|
1200
|
+
serialized.description = inst.description;
|
|
1201
|
+
if (inst.tags?.length)
|
|
1202
|
+
serialized.tags = inst.tags;
|
|
1203
|
+
if (inst.model)
|
|
1204
|
+
serialized.model = inst.model;
|
|
1205
|
+
if (inst.model_failover?.length)
|
|
1206
|
+
serialized.model_failover = inst.model_failover;
|
|
1207
|
+
if (inst.worktree_source)
|
|
1208
|
+
serialized.worktree_source = inst.worktree_source;
|
|
1209
|
+
if (inst.backend)
|
|
1210
|
+
serialized.backend = inst.backend;
|
|
1211
|
+
if (inst.systemPrompt)
|
|
1212
|
+
serialized.systemPrompt = inst.systemPrompt;
|
|
1213
|
+
if (inst.skipPermissions)
|
|
1214
|
+
serialized.skipPermissions = inst.skipPermissions;
|
|
1215
|
+
if (inst.lightweight)
|
|
1216
|
+
serialized.lightweight = inst.lightweight;
|
|
1217
|
+
if (inst.cost_guard)
|
|
1218
|
+
serialized.cost_guard = inst.cost_guard;
|
|
1219
|
+
toSave.instances[name] = serialized;
|
|
1220
|
+
}
|
|
1221
|
+
writeFileSync(this.configPath, yaml.dump(toSave, { lineWidth: 120 }));
|
|
1222
|
+
this.logger.info({ path: this.configPath }, "Saved fleet config");
|
|
1223
|
+
}
|
|
1224
|
+
async removeInstance(name) {
|
|
1225
|
+
const config = this.fleetConfig?.instances[name];
|
|
1226
|
+
if (!config)
|
|
1227
|
+
return;
|
|
1228
|
+
// Never remove the General instance
|
|
1229
|
+
if (config.general_topic) {
|
|
1230
|
+
this.logger.warn({ name }, "Refusing to remove General instance");
|
|
1231
|
+
return;
|
|
1232
|
+
}
|
|
1233
|
+
// Clean up schedules
|
|
1234
|
+
if (this.scheduler && config.topic_id) {
|
|
1235
|
+
const count = this.scheduler.deleteByInstanceOrThread(name, String(config.topic_id));
|
|
1236
|
+
if (count > 0) {
|
|
1237
|
+
this.logger.info({ name, count }, "Cleaned up schedules for deleted instance");
|
|
1238
|
+
}
|
|
1239
|
+
}
|
|
1240
|
+
// Stop daemon if running
|
|
1241
|
+
if (this.daemons.has(name)) {
|
|
1242
|
+
await this.stopInstance(name);
|
|
1243
|
+
}
|
|
1244
|
+
// Clean up git worktree if applicable
|
|
1245
|
+
if (config.worktree_source && config.working_directory) {
|
|
1246
|
+
const { existsSync } = await import("node:fs");
|
|
1247
|
+
if (!existsSync(config.working_directory)) {
|
|
1248
|
+
this.logger.info({ worktree: config.working_directory }, "Worktree directory already gone, skipping removal");
|
|
1249
|
+
}
|
|
1250
|
+
else {
|
|
1251
|
+
try {
|
|
1252
|
+
const { execFile: execFileCb } = await import("node:child_process");
|
|
1253
|
+
const { promisify } = await import("node:util");
|
|
1254
|
+
const execFileAsync = promisify(execFileCb);
|
|
1255
|
+
await execFileAsync("git", ["worktree", "remove", "--force", config.working_directory], {
|
|
1256
|
+
cwd: config.worktree_source,
|
|
1257
|
+
});
|
|
1258
|
+
this.logger.info({ worktree: config.working_directory }, "Removed git worktree");
|
|
1259
|
+
}
|
|
1260
|
+
catch (err) {
|
|
1261
|
+
this.logger.warn({ err, worktree: config.working_directory }, "Failed to remove git worktree");
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
}
|
|
1265
|
+
// Clean up IPC
|
|
1266
|
+
const ipc = this.instanceIpcClients.get(name);
|
|
1267
|
+
if (ipc) {
|
|
1268
|
+
await ipc.close();
|
|
1269
|
+
this.instanceIpcClients.delete(name);
|
|
1270
|
+
}
|
|
1271
|
+
// Remove from routing table
|
|
1272
|
+
if (config.topic_id) {
|
|
1273
|
+
this.routingTable.delete(config.topic_id);
|
|
1274
|
+
}
|
|
1275
|
+
// Remove from fleet config and save
|
|
1276
|
+
delete this.fleetConfig.instances[name];
|
|
1277
|
+
this.saveFleetConfig();
|
|
1278
|
+
this.logger.info({ name }, "Instance removed");
|
|
1279
|
+
}
|
|
1280
|
+
startStatuslineWatcher(name) {
|
|
1281
|
+
const statusFile = join(this.getInstanceDir(name), "statusline.json");
|
|
1282
|
+
const timer = setInterval(() => {
|
|
1283
|
+
try {
|
|
1284
|
+
const data = JSON.parse(readFileSync(statusFile, "utf-8"));
|
|
1285
|
+
this.costGuard?.updateCost(name, data.cost?.total_cost_usd ?? 0);
|
|
1286
|
+
const rl = data.rate_limits;
|
|
1287
|
+
if (rl) {
|
|
1288
|
+
const prev = this.instanceRateLimits.get(name);
|
|
1289
|
+
const newSevenDay = rl.seven_day?.used_percentage ?? 0;
|
|
1290
|
+
if (prev?.seven_day_pct === 100 && newSevenDay < 100) {
|
|
1291
|
+
this.notifyInstanceTopic(name, `✅ ${name} weekly usage limit has reset — instance is available again.`);
|
|
1292
|
+
this.logger.info({ name }, "Weekly rate limit recovered");
|
|
1293
|
+
}
|
|
1294
|
+
this.instanceRateLimits.set(name, {
|
|
1295
|
+
five_hour_pct: rl.five_hour?.used_percentage ?? 0,
|
|
1296
|
+
seven_day_pct: newSevenDay,
|
|
1297
|
+
});
|
|
1298
|
+
this.checkModelFailover(name, rl.five_hour?.used_percentage ?? 0);
|
|
1299
|
+
}
|
|
1300
|
+
}
|
|
1301
|
+
catch { /* file may not exist yet or be mid-write */ }
|
|
1302
|
+
}, 10_000);
|
|
1303
|
+
this.statuslineWatchers.set(name, timer);
|
|
1304
|
+
}
|
|
1305
|
+
// ── Model failover ──────────────────────────────────────────────────────
|
|
1306
|
+
static FAILOVER_TRIGGER_PCT = 90;
|
|
1307
|
+
static FAILOVER_RECOVER_PCT = 50;
|
|
1308
|
+
checkModelFailover(name, fiveHourPct) {
|
|
1309
|
+
const config = this.fleetConfig?.instances[name];
|
|
1310
|
+
if (!config?.model_failover?.length)
|
|
1311
|
+
return;
|
|
1312
|
+
const daemon = this.daemons.get(name);
|
|
1313
|
+
if (!daemon)
|
|
1314
|
+
return;
|
|
1315
|
+
const failoverList = config.model_failover;
|
|
1316
|
+
const primaryModel = failoverList[0];
|
|
1317
|
+
const currentFailover = this.failoverActive.get(name);
|
|
1318
|
+
if (fiveHourPct >= FleetManager.FAILOVER_TRIGGER_PCT && !currentFailover) {
|
|
1319
|
+
// Trigger failover: pick next model in list
|
|
1320
|
+
const fallbackModel = failoverList.length > 1 ? failoverList[1] : undefined;
|
|
1321
|
+
if (!fallbackModel)
|
|
1322
|
+
return;
|
|
1323
|
+
this.failoverActive.set(name, fallbackModel);
|
|
1324
|
+
daemon.setModelOverride(fallbackModel);
|
|
1325
|
+
this.logger.info({ instance: name, from: primaryModel, to: fallbackModel, ratePct: fiveHourPct }, "Model failover triggered");
|
|
1326
|
+
this.eventLog?.insert(name, "model_failover", {
|
|
1327
|
+
from: primaryModel, to: fallbackModel, five_hour_pct: fiveHourPct,
|
|
1328
|
+
});
|
|
1329
|
+
this.webhookEmitter?.emit("model_failover", name, { from: primaryModel, to: fallbackModel, five_hour_pct: fiveHourPct });
|
|
1330
|
+
this.notifyInstanceTopic(name, `⚡ Rate limit ${fiveHourPct}% — next rotation will use ${fallbackModel} (was ${primaryModel})`);
|
|
1331
|
+
}
|
|
1332
|
+
else if (fiveHourPct < FleetManager.FAILOVER_RECOVER_PCT && currentFailover) {
|
|
1333
|
+
// Recover: switch back to primary
|
|
1334
|
+
this.failoverActive.delete(name);
|
|
1335
|
+
daemon.setModelOverride(undefined);
|
|
1336
|
+
this.logger.info({ instance: name, restored: primaryModel, ratePct: fiveHourPct }, "Model failover recovered");
|
|
1337
|
+
this.eventLog?.insert(name, "model_recovered", {
|
|
1338
|
+
restored: primaryModel, five_hour_pct: fiveHourPct,
|
|
1339
|
+
});
|
|
1340
|
+
this.webhookEmitter?.emit("model_recovered", name, { restored: primaryModel, five_hour_pct: fiveHourPct });
|
|
1341
|
+
this.notifyInstanceTopic(name, `✅ Rate limit recovered (${fiveHourPct}%) — next rotation will use ${primaryModel}`);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
notifyInstanceTopic(instanceName, text) {
|
|
1345
|
+
if (!this.adapter)
|
|
1346
|
+
return;
|
|
1347
|
+
const groupId = this.fleetConfig?.channel?.group_id;
|
|
1348
|
+
if (!groupId)
|
|
1349
|
+
return;
|
|
1350
|
+
const threadId = this.fleetConfig?.instances[instanceName]?.topic_id;
|
|
1351
|
+
this.adapter.sendText(String(groupId), text, {
|
|
1352
|
+
threadId: threadId != null ? String(threadId) : undefined,
|
|
1353
|
+
}).catch(e => this.logger.debug({ err: e }, "Failed to send notification"));
|
|
1354
|
+
}
|
|
1355
|
+
async sendHangNotification(instanceName) {
|
|
1356
|
+
if (!this.adapter)
|
|
1357
|
+
return;
|
|
1358
|
+
const groupId = this.fleetConfig?.channel?.group_id;
|
|
1359
|
+
if (!groupId)
|
|
1360
|
+
return;
|
|
1361
|
+
const threadId = this.fleetConfig?.instances[instanceName]?.topic_id;
|
|
1362
|
+
this.setTopicIcon(instanceName, "red");
|
|
1363
|
+
await this.adapter.notifyAlert(String(groupId), {
|
|
1364
|
+
type: "hang",
|
|
1365
|
+
instanceName,
|
|
1366
|
+
message: `⚠️ ${instanceName} appears hung (no activity for 15+ minutes)`,
|
|
1367
|
+
choices: [
|
|
1368
|
+
{ id: `hang:restart:${instanceName}`, label: "🔄 Force restart" },
|
|
1369
|
+
{ id: `hang:wait:${instanceName}`, label: "⏳ Keep waiting" },
|
|
1370
|
+
],
|
|
1371
|
+
}, {
|
|
1372
|
+
threadId: threadId != null ? String(threadId) : undefined,
|
|
1373
|
+
}).catch(e => this.logger.debug({ err: e }, "Failed to send hang notification"));
|
|
1374
|
+
}
|
|
1375
|
+
// ── Topic icon + auto-archive ─────────────────────────────────────────────
|
|
1376
|
+
/** Fetch forum topic icon stickers and pick emoji IDs for each state */
|
|
1377
|
+
async resolveTopicIcons() {
|
|
1378
|
+
if (!this.adapter?.getTopicIconStickers)
|
|
1379
|
+
return;
|
|
1380
|
+
try {
|
|
1381
|
+
const stickers = await this.adapter.getTopicIconStickers();
|
|
1382
|
+
if (stickers.length === 0)
|
|
1383
|
+
return;
|
|
1384
|
+
// Telegram's getForumTopicIconStickers returns a fixed set.
|
|
1385
|
+
// Try to match by emoji character, fall back to positional.
|
|
1386
|
+
const find = (targets) => stickers.find((s) => targets.some((t) => s.emoji.includes(t)));
|
|
1387
|
+
const green = find(["🟢", "✅", "💚"]);
|
|
1388
|
+
const blue = find(["🔵", "💙", "📘"]);
|
|
1389
|
+
const red = find(["🔴", "❌", "💔"]);
|
|
1390
|
+
this.topicIcons = {
|
|
1391
|
+
green: green?.customEmojiId ?? stickers[0]?.customEmojiId,
|
|
1392
|
+
blue: blue?.customEmojiId ?? stickers[1]?.customEmojiId ?? stickers[0]?.customEmojiId,
|
|
1393
|
+
red: red?.customEmojiId ?? stickers[Math.min(5, stickers.length - 1)]?.customEmojiId,
|
|
1394
|
+
};
|
|
1395
|
+
this.logger.info({ icons: this.topicIcons }, "Resolved topic icon emoji IDs");
|
|
1396
|
+
}
|
|
1397
|
+
catch (err) {
|
|
1398
|
+
this.logger.debug({ err }, "Failed to resolve topic icons (non-fatal)");
|
|
1399
|
+
}
|
|
1400
|
+
}
|
|
1401
|
+
/** Set topic icon based on instance state */
|
|
1402
|
+
setTopicIcon(instanceName, state) {
|
|
1403
|
+
const topicId = this.fleetConfig?.instances[instanceName]?.topic_id;
|
|
1404
|
+
if (topicId == null || !this.adapter?.editForumTopic)
|
|
1405
|
+
return;
|
|
1406
|
+
const emojiId = state === "remove" ? "" : this.topicIcons[state];
|
|
1407
|
+
if (emojiId == null && state !== "remove")
|
|
1408
|
+
return; // no icon resolved
|
|
1409
|
+
this.adapter.editForumTopic(topicId, { iconCustomEmojiId: emojiId })
|
|
1410
|
+
.catch((e) => this.logger.debug({ err: e, instanceName, state }, "Topic icon update failed"));
|
|
1411
|
+
}
|
|
1412
|
+
/** Track activity timestamp for idle detection */
|
|
1413
|
+
touchActivity(instanceName) {
|
|
1414
|
+
this.lastActivity.set(instanceName, Date.now());
|
|
1415
|
+
}
|
|
1416
|
+
/** Start periodic idle archive checker */
|
|
1417
|
+
startArchivePoller() {
|
|
1418
|
+
this.archiveTimer = setInterval(() => {
|
|
1419
|
+
this.archiveIdleTopics().catch((err) => this.logger.debug({ err }, "Archive idle check failed"));
|
|
1420
|
+
}, 30 * 60_000); // check every 30 minutes
|
|
1421
|
+
}
|
|
1422
|
+
/** Close topics that have been idle beyond threshold */
|
|
1423
|
+
async archiveIdleTopics() {
|
|
1424
|
+
if (!this.adapter?.closeForumTopic || !this.fleetConfig)
|
|
1425
|
+
return;
|
|
1426
|
+
const now = Date.now();
|
|
1427
|
+
for (const [name, config] of Object.entries(this.fleetConfig.instances)) {
|
|
1428
|
+
const topicId = config.topic_id;
|
|
1429
|
+
if (topicId == null || config.general_topic)
|
|
1430
|
+
continue;
|
|
1431
|
+
if (this.archivedTopics.has(topicId))
|
|
1432
|
+
continue;
|
|
1433
|
+
const status = this.getInstanceStatus(name);
|
|
1434
|
+
if (status !== "running")
|
|
1435
|
+
continue; // only archive running-but-idle
|
|
1436
|
+
const last = this.lastActivity.get(name) ?? 0;
|
|
1437
|
+
if (last === 0)
|
|
1438
|
+
continue; // never active → skip (just started)
|
|
1439
|
+
if (now - last < FleetManager.ARCHIVE_IDLE_MS)
|
|
1440
|
+
continue;
|
|
1441
|
+
this.logger.info({ name, topicId, idleHours: Math.round((now - last) / 3600000) }, "Archiving idle topic");
|
|
1442
|
+
this.archivedTopics.add(topicId);
|
|
1443
|
+
this.setTopicIcon(name, "remove");
|
|
1444
|
+
await this.adapter.closeForumTopic(topicId);
|
|
1445
|
+
}
|
|
1446
|
+
}
|
|
1447
|
+
/** Reopen an archived topic and restore icon */
|
|
1448
|
+
async reopenArchivedTopic(topicId, instanceName) {
|
|
1449
|
+
if (!this.archivedTopics.has(topicId))
|
|
1450
|
+
return;
|
|
1451
|
+
this.archivedTopics.delete(topicId);
|
|
1452
|
+
if (this.adapter?.reopenForumTopic) {
|
|
1453
|
+
await this.adapter.reopenForumTopic(topicId);
|
|
1454
|
+
}
|
|
1455
|
+
this.setTopicIcon(instanceName, "green");
|
|
1456
|
+
this.touchActivity(instanceName);
|
|
1457
|
+
this.logger.info({ instanceName, topicId }, "Reopened archived topic");
|
|
1458
|
+
}
|
|
1459
|
+
clearStatuslineWatchers() {
|
|
1460
|
+
for (const [, timer] of this.statuslineWatchers)
|
|
1461
|
+
clearInterval(timer);
|
|
1462
|
+
this.statuslineWatchers.clear();
|
|
1463
|
+
this.instanceRateLimits.clear();
|
|
1464
|
+
this.failoverActive.clear();
|
|
1465
|
+
}
|
|
1466
|
+
async stopAll() {
|
|
1467
|
+
this.clearStatuslineWatchers();
|
|
1468
|
+
this.costGuard?.stop();
|
|
1469
|
+
this.dailySummary?.stop();
|
|
1470
|
+
if (this.topicCleanupTimer) {
|
|
1471
|
+
clearInterval(this.topicCleanupTimer);
|
|
1472
|
+
this.topicCleanupTimer = null;
|
|
1473
|
+
}
|
|
1474
|
+
if (this.sessionPruneTimer) {
|
|
1475
|
+
clearInterval(this.sessionPruneTimer);
|
|
1476
|
+
this.sessionPruneTimer = null;
|
|
1477
|
+
}
|
|
1478
|
+
if (this.archiveTimer) {
|
|
1479
|
+
clearInterval(this.archiveTimer);
|
|
1480
|
+
this.archiveTimer = null;
|
|
1481
|
+
}
|
|
1482
|
+
this.scheduler?.shutdown();
|
|
1483
|
+
await Promise.allSettled([...this.daemons.entries()].map(async ([name, daemon]) => {
|
|
1484
|
+
try {
|
|
1485
|
+
await daemon.stop();
|
|
1486
|
+
}
|
|
1487
|
+
catch (err) {
|
|
1488
|
+
this.logger.warn({ name, err }, "Stop failed");
|
|
1489
|
+
}
|
|
1490
|
+
this.daemons.delete(name);
|
|
1491
|
+
}));
|
|
1492
|
+
for (const [, ipc] of this.instanceIpcClients) {
|
|
1493
|
+
await ipc.close();
|
|
1494
|
+
}
|
|
1495
|
+
this.instanceIpcClients.clear();
|
|
1496
|
+
if (this.adapter) {
|
|
1497
|
+
await this.adapter.stop();
|
|
1498
|
+
this.adapter = null;
|
|
1499
|
+
}
|
|
1500
|
+
if (this.healthServer) {
|
|
1501
|
+
this.healthServer.close();
|
|
1502
|
+
this.healthServer = null;
|
|
1503
|
+
}
|
|
1504
|
+
this.eventLog?.close();
|
|
1505
|
+
const pidPath = join(this.dataDir, "fleet.pid");
|
|
1506
|
+
try {
|
|
1507
|
+
unlinkSync(pidPath);
|
|
1508
|
+
}
|
|
1509
|
+
catch (e) {
|
|
1510
|
+
this.logger.debug({ err: e }, "Failed to remove fleet PID file");
|
|
1511
|
+
}
|
|
1512
|
+
}
|
|
1513
|
+
/**
|
|
1514
|
+
* Prune stale external sessions by re-querying each daemon for live sessions.
|
|
1515
|
+
* Sessions in the registry that are no longer reported by any daemon are removed.
|
|
1516
|
+
*/
|
|
1517
|
+
async pruneStaleExternalSessions() {
|
|
1518
|
+
const liveSessions = new Set();
|
|
1519
|
+
// Ask each daemon for its currently connected external sessions
|
|
1520
|
+
const queries = [...this.instanceIpcClients.entries()].map(([name, ipc]) => {
|
|
1521
|
+
if (!ipc.connected)
|
|
1522
|
+
return Promise.resolve();
|
|
1523
|
+
return new Promise((resolve) => {
|
|
1524
|
+
const timeout = setTimeout(resolve, 5000);
|
|
1525
|
+
const handler = (msg) => {
|
|
1526
|
+
if (msg.type !== "query_sessions_response")
|
|
1527
|
+
return;
|
|
1528
|
+
ipc.removeListener("message", handler);
|
|
1529
|
+
clearTimeout(timeout);
|
|
1530
|
+
for (const s of msg.sessions)
|
|
1531
|
+
liveSessions.add(s);
|
|
1532
|
+
resolve();
|
|
1533
|
+
};
|
|
1534
|
+
ipc.on("message", handler);
|
|
1535
|
+
ipc.send({ type: "query_sessions" });
|
|
1536
|
+
});
|
|
1537
|
+
});
|
|
1538
|
+
await Promise.all(queries);
|
|
1539
|
+
// Remove sessions not found in any daemon
|
|
1540
|
+
let pruned = 0;
|
|
1541
|
+
for (const [sessionName] of this.sessionRegistry) {
|
|
1542
|
+
if (!liveSessions.has(sessionName)) {
|
|
1543
|
+
this.sessionRegistry.delete(sessionName);
|
|
1544
|
+
this.logger.info({ sessionName }, "Pruned stale external session");
|
|
1545
|
+
pruned++;
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
if (pruned > 0) {
|
|
1549
|
+
this.logger.info({ pruned, remaining: this.sessionRegistry.size }, "Session registry pruned");
|
|
1550
|
+
}
|
|
1551
|
+
return pruned;
|
|
1552
|
+
}
|
|
1553
|
+
/**
|
|
1554
|
+
* Graceful shutdown for full reload: wait for idle, notify, then stop everything.
|
|
1555
|
+
* The caller is expected to exit the process after this resolves.
|
|
1556
|
+
*/
|
|
1557
|
+
async gracefulShutdownForReload() {
|
|
1558
|
+
const instanceNames = [...this.daemons.keys()];
|
|
1559
|
+
if (instanceNames.length === 0) {
|
|
1560
|
+
this.logger.info("No instances to stop");
|
|
1561
|
+
await this.stopAll();
|
|
1562
|
+
return;
|
|
1563
|
+
}
|
|
1564
|
+
this.logger.info(`Full restart: waiting for ${instanceNames.length} instances to idle...`);
|
|
1565
|
+
const groupId = this.fleetConfig?.channel?.group_id;
|
|
1566
|
+
if (groupId && this.adapter) {
|
|
1567
|
+
await this.adapter.sendText(String(groupId), `🔄 Full restart initiated — waiting for all instances to idle, then reloading process...`)
|
|
1568
|
+
.catch(e => this.logger.debug({ err: e }, "Failed to post full restart notification"));
|
|
1569
|
+
}
|
|
1570
|
+
// Wait for idle with 5-minute timeout
|
|
1571
|
+
const IDLE_TIMEOUT_MS = 5 * 60 * 1000;
|
|
1572
|
+
let timeoutHandle;
|
|
1573
|
+
const idleDeadline = new Promise((_, reject) => {
|
|
1574
|
+
timeoutHandle = setTimeout(() => reject(new Error("Idle wait timed out after 5 minutes")), IDLE_TIMEOUT_MS);
|
|
1575
|
+
});
|
|
1576
|
+
try {
|
|
1577
|
+
await Promise.race([
|
|
1578
|
+
Promise.all(instanceNames.map(async (name) => {
|
|
1579
|
+
const daemon = this.daemons.get(name);
|
|
1580
|
+
if (daemon) {
|
|
1581
|
+
this.logger.info(`Waiting for ${name} to idle...`);
|
|
1582
|
+
await daemon.waitForIdle(10_000);
|
|
1583
|
+
this.logger.info(`${name} is idle`);
|
|
1584
|
+
}
|
|
1585
|
+
})),
|
|
1586
|
+
idleDeadline,
|
|
1587
|
+
]);
|
|
1588
|
+
}
|
|
1589
|
+
catch (err) {
|
|
1590
|
+
this.logger.warn({ err }, "Idle wait timed out — force stopping");
|
|
1591
|
+
}
|
|
1592
|
+
finally {
|
|
1593
|
+
clearTimeout(timeoutHandle);
|
|
1594
|
+
}
|
|
1595
|
+
this.logger.info("All instances idle — stopping for reload...");
|
|
1596
|
+
await this.stopAll();
|
|
1597
|
+
}
|
|
1598
|
+
/**
|
|
1599
|
+
* Graceful restart: wait for all instances to be idle, then stop and start them.
|
|
1600
|
+
*/
|
|
1601
|
+
async restartInstances() {
|
|
1602
|
+
if (!this.configPath) {
|
|
1603
|
+
this.logger.error("Cannot restart: no config path (was startAll called?)");
|
|
1604
|
+
return;
|
|
1605
|
+
}
|
|
1606
|
+
const instanceNames = [...this.daemons.keys()];
|
|
1607
|
+
if (instanceNames.length === 0) {
|
|
1608
|
+
this.logger.info("No instances to restart");
|
|
1609
|
+
return;
|
|
1610
|
+
}
|
|
1611
|
+
this.logger.info(`Graceful restart: waiting for ${instanceNames.length} instances to idle...`);
|
|
1612
|
+
const groupId = this.fleetConfig?.channel?.group_id;
|
|
1613
|
+
if (groupId && this.adapter) {
|
|
1614
|
+
await this.adapter.sendText(String(groupId), `🔄 Graceful restart initiated — waiting for all instances to idle...`)
|
|
1615
|
+
.catch(e => this.logger.debug({ err: e }, "Failed to post restart notification"));
|
|
1616
|
+
}
|
|
1617
|
+
const IDLE_TIMEOUT_MS = 5 * 60 * 1000;
|
|
1618
|
+
let timeoutHandle;
|
|
1619
|
+
const idleDeadline = new Promise((_, reject) => {
|
|
1620
|
+
timeoutHandle = setTimeout(() => reject(new Error("Idle wait timed out after 5 minutes")), IDLE_TIMEOUT_MS);
|
|
1621
|
+
});
|
|
1622
|
+
try {
|
|
1623
|
+
await Promise.race([
|
|
1624
|
+
Promise.all(instanceNames.map(async (name) => {
|
|
1625
|
+
const daemon = this.daemons.get(name);
|
|
1626
|
+
if (daemon) {
|
|
1627
|
+
this.logger.info(`Waiting for ${name} to idle...`);
|
|
1628
|
+
await daemon.waitForIdle(10_000);
|
|
1629
|
+
this.logger.info(`${name} is idle`);
|
|
1630
|
+
}
|
|
1631
|
+
})),
|
|
1632
|
+
idleDeadline,
|
|
1633
|
+
]);
|
|
1634
|
+
}
|
|
1635
|
+
catch (err) {
|
|
1636
|
+
this.logger.warn({ err }, "Idle wait timed out — force restarting");
|
|
1637
|
+
}
|
|
1638
|
+
finally {
|
|
1639
|
+
clearTimeout(timeoutHandle);
|
|
1640
|
+
}
|
|
1641
|
+
this.logger.info("All instances idle — restarting...");
|
|
1642
|
+
this.clearStatuslineWatchers();
|
|
1643
|
+
for (const [, ipc] of this.instanceIpcClients) {
|
|
1644
|
+
await ipc.close();
|
|
1645
|
+
}
|
|
1646
|
+
this.instanceIpcClients.clear();
|
|
1647
|
+
await Promise.allSettled(instanceNames.map(name => this.stopInstance(name)));
|
|
1648
|
+
const fleet = this.loadConfig(this.configPath);
|
|
1649
|
+
this.fleetConfig = fleet;
|
|
1650
|
+
const topicMode = fleet.channel?.mode === "topic";
|
|
1651
|
+
for (const [name, config] of Object.entries(fleet.instances)) {
|
|
1652
|
+
await this.startInstance(name, config, topicMode);
|
|
1653
|
+
}
|
|
1654
|
+
if (topicMode) {
|
|
1655
|
+
this.routingTable = this.buildRoutingTable();
|
|
1656
|
+
await new Promise(r => setTimeout(r, 3000));
|
|
1657
|
+
await this.connectToInstances(fleet);
|
|
1658
|
+
for (const name of Object.keys(fleet.instances)) {
|
|
1659
|
+
this.startStatuslineWatcher(name);
|
|
1660
|
+
}
|
|
1661
|
+
}
|
|
1662
|
+
this.logger.info("Graceful restart complete");
|
|
1663
|
+
if (groupId && this.adapter) {
|
|
1664
|
+
await this.adapter.sendText(String(groupId), `✅ Graceful restart complete — ${this.daemons.size} instances running`)
|
|
1665
|
+
.catch(e => this.logger.debug({ err: e }, "Failed to post restart completion notification"));
|
|
1666
|
+
// Notify each instance's channel so Claude resumes work
|
|
1667
|
+
const instances = Object.entries(this.fleetConfig?.instances ?? {});
|
|
1668
|
+
this.logger.info({ count: instances.length }, "Sending restart notification to instances");
|
|
1669
|
+
for (const [name, config] of instances) {
|
|
1670
|
+
const threadId = config.topic_id != null ? String(config.topic_id) : undefined;
|
|
1671
|
+
// Send to Telegram topic so the message appears in the chat
|
|
1672
|
+
if (threadId) {
|
|
1673
|
+
this.adapter.sendText(String(groupId), "Fleet restart complete. Continue from where you left off.", { threadId })
|
|
1674
|
+
.catch(e => this.logger.warn({ err: e, name, threadId }, "Failed to post per-instance restart notification"));
|
|
1675
|
+
}
|
|
1676
|
+
// Push to daemon IPC so the Claude session receives the message
|
|
1677
|
+
const ipc = this.instanceIpcClients.get(name);
|
|
1678
|
+
if (ipc?.connected) {
|
|
1679
|
+
ipc.send({
|
|
1680
|
+
type: "fleet_inbound",
|
|
1681
|
+
content: "Fleet restart complete. Continue from where you left off.",
|
|
1682
|
+
meta: {
|
|
1683
|
+
chat_id: String(groupId),
|
|
1684
|
+
thread_id: threadId ?? "",
|
|
1685
|
+
ts: new Date().toISOString(),
|
|
1686
|
+
},
|
|
1687
|
+
});
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
}
|
|
1691
|
+
}
|
|
1692
|
+
// ── Health HTTP endpoint ─────────────────────────────────────────────
|
|
1693
|
+
startHealthServer(port) {
|
|
1694
|
+
this.startedAt = Date.now();
|
|
1695
|
+
this.healthServer = createServer((req, res) => {
|
|
1696
|
+
res.setHeader("Content-Type", "application/json");
|
|
1697
|
+
if (req.method === "GET" && req.url === "/health") {
|
|
1698
|
+
const instanceCount = this.fleetConfig?.instances
|
|
1699
|
+
? Object.keys(this.fleetConfig.instances).length
|
|
1700
|
+
: 0;
|
|
1701
|
+
res.writeHead(200);
|
|
1702
|
+
res.end(JSON.stringify({
|
|
1703
|
+
status: "ok",
|
|
1704
|
+
instances: instanceCount,
|
|
1705
|
+
uptime: Math.floor((Date.now() - this.startedAt) / 1000),
|
|
1706
|
+
}));
|
|
1707
|
+
return;
|
|
1708
|
+
}
|
|
1709
|
+
if (req.method === "GET" && req.url === "/status") {
|
|
1710
|
+
const instances = Object.keys(this.fleetConfig?.instances ?? {}).map(name => {
|
|
1711
|
+
const statusFile = join(this.getInstanceDir(name), "statusline.json");
|
|
1712
|
+
let context_pct = 0;
|
|
1713
|
+
let cost = 0;
|
|
1714
|
+
try {
|
|
1715
|
+
const data = JSON.parse(readFileSync(statusFile, "utf-8"));
|
|
1716
|
+
context_pct = data.context_window?.used_percentage ?? 0;
|
|
1717
|
+
cost = data.cost?.total_cost_usd ?? 0;
|
|
1718
|
+
}
|
|
1719
|
+
catch { /* statusline not yet available */ }
|
|
1720
|
+
return {
|
|
1721
|
+
name,
|
|
1722
|
+
status: this.getInstanceStatus(name),
|
|
1723
|
+
context_pct,
|
|
1724
|
+
cost,
|
|
1725
|
+
};
|
|
1726
|
+
});
|
|
1727
|
+
res.writeHead(200);
|
|
1728
|
+
res.end(JSON.stringify({ instances }));
|
|
1729
|
+
return;
|
|
1730
|
+
}
|
|
1731
|
+
res.writeHead(404);
|
|
1732
|
+
res.end(JSON.stringify({ error: "not found" }));
|
|
1733
|
+
});
|
|
1734
|
+
this.healthServer.listen(port, "127.0.0.1", () => {
|
|
1735
|
+
this.logger.info({ port }, "Health endpoint listening");
|
|
1736
|
+
});
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
//# sourceMappingURL=fleet-manager.js.map
|