@songsid/agend 0.0.13 ā 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +51 -8
- package/dist/cli.js.map +1 -1
- package/dist/fleet-manager.d.ts +6 -0
- package/dist/fleet-manager.js +96 -0
- package/dist/fleet-manager.js.map +1 -1
- package/dist/instance-lifecycle.d.ts +1 -0
- package/dist/instance-lifecycle.js +2 -0
- package/dist/instance-lifecycle.js.map +1 -1
- package/package.json +1 -1
package/dist/fleet-manager.js
CHANGED
|
@@ -82,6 +82,10 @@ export class FleetManager {
|
|
|
82
82
|
classicChannels = null;
|
|
83
83
|
// Model failover state
|
|
84
84
|
failoverActive = new Map(); // instance ā current failover model
|
|
85
|
+
// IPC reconnect: tracks instances being intentionally stopped (skip reconnect)
|
|
86
|
+
ipcStoppingInstances = new Set();
|
|
87
|
+
// Adapter restart: prevents re-entrant restart attempts
|
|
88
|
+
adapterRestarting = new Set();
|
|
85
89
|
// Health endpoint
|
|
86
90
|
healthServer = null;
|
|
87
91
|
startedAt = 0;
|
|
@@ -783,6 +787,10 @@ export class FleetManager {
|
|
|
783
787
|
this.adapter.on("handler_error", safeHandler((err) => {
|
|
784
788
|
this.logger.warn({ err: err instanceof Error ? err.message : String(err) }, "Adapter handler error");
|
|
785
789
|
}, this.logger, "adapter.handler_error"));
|
|
790
|
+
this.adapter.on("error", (err) => {
|
|
791
|
+
this.logger.error({ err }, "Primary adapter fatal error");
|
|
792
|
+
this.restartAdapter(this.adapter, "primary").catch(() => { });
|
|
793
|
+
});
|
|
786
794
|
this.adapter.on("new_group_detected", safeHandler((data) => {
|
|
787
795
|
const adminMsg = `š Bot added to new server:\n⢠Name: ${data.groupTitle}\n⢠ID: ${data.groupId}\n⢠Platform: ${data.source}\n\nTo allow: add \`${data.groupId}\` to classicBot.yaml \`allowed_guilds\``;
|
|
788
796
|
const generalId = this.findGeneralInstance();
|
|
@@ -972,6 +980,10 @@ export class FleetManager {
|
|
|
972
980
|
if (generalId)
|
|
973
981
|
this.notifyInstanceTopic(generalId, adminMsg);
|
|
974
982
|
}, this.logger, `adapter[${adapterId}].new_group_detected`));
|
|
983
|
+
adapter.on("error", (err) => {
|
|
984
|
+
this.logger.error({ err, adapterId }, "Additional adapter fatal error");
|
|
985
|
+
this.restartAdapter(adapter, adapterId).catch(() => { });
|
|
986
|
+
});
|
|
975
987
|
this.logger.info({ adapterId, type: channelConfig.type }, "Additional adapter started");
|
|
976
988
|
}
|
|
977
989
|
/** Connect IPC to a single instance with all handlers */
|
|
@@ -979,6 +991,7 @@ export class FleetManager {
|
|
|
979
991
|
// Close existing client to prevent socket leak on reconnect
|
|
980
992
|
const existing = this.instanceIpcClients.get(name);
|
|
981
993
|
if (existing) {
|
|
994
|
+
this.ipcStoppingInstances.add(name);
|
|
982
995
|
try {
|
|
983
996
|
existing.close();
|
|
984
997
|
}
|
|
@@ -986,6 +999,7 @@ export class FleetManager {
|
|
|
986
999
|
this.logger.debug({ err, name }, "IPC client close failed (likely already closed)");
|
|
987
1000
|
}
|
|
988
1001
|
this.instanceIpcClients.delete(name);
|
|
1002
|
+
this.ipcStoppingInstances.delete(name);
|
|
989
1003
|
}
|
|
990
1004
|
const sockPath = join(this.getInstanceDir(name), "channel.sock");
|
|
991
1005
|
if (!existsSync(sockPath))
|
|
@@ -1049,11 +1063,89 @@ export class FleetManager {
|
|
|
1049
1063
|
if (!this.statuslineWatcher.has(name)) {
|
|
1050
1064
|
this.statuslineWatcher.watch(name);
|
|
1051
1065
|
}
|
|
1066
|
+
// Auto-reconnect on disconnect (unless intentionally stopping)
|
|
1067
|
+
ipc.on("disconnect", () => {
|
|
1068
|
+
this.instanceIpcClients.delete(name);
|
|
1069
|
+
if (this.ipcStoppingInstances.has(name))
|
|
1070
|
+
return;
|
|
1071
|
+
this.ipcReconnect(name).catch(() => { });
|
|
1072
|
+
});
|
|
1052
1073
|
}
|
|
1053
1074
|
catch (err) {
|
|
1054
1075
|
this.logger.warn({ name, err }, "Failed to connect to instance IPC");
|
|
1055
1076
|
}
|
|
1056
1077
|
}
|
|
1078
|
+
/** Attempt IPC reconnection with exponential backoff */
|
|
1079
|
+
async ipcReconnect(name) {
|
|
1080
|
+
for (let attempt = 1;; attempt++) {
|
|
1081
|
+
if (this.ipcStoppingInstances.has(name) || !this.daemons.has(name))
|
|
1082
|
+
return;
|
|
1083
|
+
const delay = attempt <= 3 ? 3000 * Math.pow(2, attempt - 1) : 60_000; // 3s, 6s, 12s, then 60s
|
|
1084
|
+
await new Promise(r => setTimeout(r, delay));
|
|
1085
|
+
if (this.ipcStoppingInstances.has(name) || !this.daemons.has(name))
|
|
1086
|
+
return;
|
|
1087
|
+
try {
|
|
1088
|
+
await this.connectIpcToInstance(name);
|
|
1089
|
+
if (this.instanceIpcClients.has(name)) {
|
|
1090
|
+
this.logger.info({ name, attempt }, "IPC reconnected");
|
|
1091
|
+
return;
|
|
1092
|
+
}
|
|
1093
|
+
}
|
|
1094
|
+
catch { /* retry */ }
|
|
1095
|
+
// Periodic pane health check (every attempt after initial 3)
|
|
1096
|
+
if (attempt >= 3) {
|
|
1097
|
+
const instanceDir = this.getInstanceDir(name);
|
|
1098
|
+
const windowIdPath = join(instanceDir, "window-id");
|
|
1099
|
+
if (existsSync(windowIdPath)) {
|
|
1100
|
+
const windowId = readFileSync(windowIdPath, "utf-8").trim();
|
|
1101
|
+
if (windowId) {
|
|
1102
|
+
try {
|
|
1103
|
+
const { execSync } = await import("node:child_process");
|
|
1104
|
+
execSync(`tmux list-panes -t "${windowId}"`, { stdio: "ignore" });
|
|
1105
|
+
}
|
|
1106
|
+
catch {
|
|
1107
|
+
// Pane dead ā respawn
|
|
1108
|
+
this.logger.info({ name }, "Tmux pane dead after IPC loss ā respawning instance");
|
|
1109
|
+
this.restartSingleInstance(name).catch(err => this.logger.error({ name, err }, "Auto-respawn after IPC loss failed"));
|
|
1110
|
+
return;
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
}
|
|
1115
|
+
if (attempt % 10 === 0) {
|
|
1116
|
+
this.logger.warn({ name, attempt }, "IPC reconnect still failing");
|
|
1117
|
+
}
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
/** Restart a channel adapter after fatal error with infinite retry + 60s cap */
|
|
1121
|
+
async restartAdapter(adapter, id) {
|
|
1122
|
+
if (this.adapterRestarting.has(id))
|
|
1123
|
+
return;
|
|
1124
|
+
this.adapterRestarting.add(id);
|
|
1125
|
+
try {
|
|
1126
|
+
for (let attempt = 1;; attempt++) {
|
|
1127
|
+
if (this.ipcStoppingInstances.has("__fleet_stopping__"))
|
|
1128
|
+
return;
|
|
1129
|
+
const delay = attempt <= 3 ? 5000 * Math.pow(2, attempt - 1) : 60_000; // 5s, 10s, 20s, then 60s
|
|
1130
|
+
await new Promise(r => setTimeout(r, delay));
|
|
1131
|
+
if (this.ipcStoppingInstances.has("__fleet_stopping__"))
|
|
1132
|
+
return;
|
|
1133
|
+
try {
|
|
1134
|
+
await adapter.stop().catch(() => { });
|
|
1135
|
+
await adapter.start();
|
|
1136
|
+
this.logger.info({ id, attempt }, "Adapter restarted successfully");
|
|
1137
|
+
return;
|
|
1138
|
+
}
|
|
1139
|
+
catch { /* retry */ }
|
|
1140
|
+
if (attempt % 10 === 0) {
|
|
1141
|
+
this.logger.warn({ id, attempt }, "Adapter restart still failing");
|
|
1142
|
+
}
|
|
1143
|
+
}
|
|
1144
|
+
}
|
|
1145
|
+
finally {
|
|
1146
|
+
this.adapterRestarting.delete(id);
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1057
1149
|
/** Handle inbound message ā transcribe voice if present, then route */
|
|
1058
1150
|
findGeneralInstance(adapterId) {
|
|
1059
1151
|
if (!this.fleetConfig)
|
|
@@ -2629,6 +2721,7 @@ When users create specialized instances, suggest these configurations:
|
|
|
2629
2721
|
return `š Agent stopped in this channel.`;
|
|
2630
2722
|
}
|
|
2631
2723
|
async stopAll() {
|
|
2724
|
+
this.ipcStoppingInstances.add("__fleet_stopping__");
|
|
2632
2725
|
this.clearStatuslineWatchers();
|
|
2633
2726
|
this.costGuard?.stop();
|
|
2634
2727
|
this.dailySummary?.stop();
|
|
@@ -2655,6 +2748,8 @@ When users create specialized instances, suggest these configurations:
|
|
|
2655
2748
|
// Concurrency limited to avoid overwhelming the tmux server.
|
|
2656
2749
|
const STOP_CONCURRENCY = 5;
|
|
2657
2750
|
const entries = [...this.daemons.entries()];
|
|
2751
|
+
for (const [name] of entries)
|
|
2752
|
+
this.ipcStoppingInstances.add(name);
|
|
2658
2753
|
for (let i = 0; i < entries.length; i += STOP_CONCURRENCY) {
|
|
2659
2754
|
const batch = entries.slice(i, i + STOP_CONCURRENCY);
|
|
2660
2755
|
await Promise.all(batch.map(async ([name, daemon]) => {
|
|
@@ -2671,6 +2766,7 @@ When users create specialized instances, suggest these configurations:
|
|
|
2671
2766
|
await ipc.close();
|
|
2672
2767
|
}
|
|
2673
2768
|
this.instanceIpcClients.clear();
|
|
2769
|
+
this.ipcStoppingInstances.clear();
|
|
2674
2770
|
for (const [, w] of this.worlds) {
|
|
2675
2771
|
await w.stop().catch(() => { });
|
|
2676
2772
|
}
|