@agent-relay/resiliency 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context-persistence.d.ts +140 -0
- package/dist/context-persistence.d.ts.map +1 -0
- package/dist/context-persistence.js +397 -0
- package/dist/context-persistence.js.map +1 -0
- package/dist/crash-insights.d.ts +156 -0
- package/dist/crash-insights.d.ts.map +1 -0
- package/dist/crash-insights.js +492 -0
- package/dist/crash-insights.js.map +1 -0
- package/dist/gossip-health.d.ts +137 -0
- package/dist/gossip-health.d.ts.map +1 -0
- package/dist/gossip-health.js +241 -0
- package/dist/gossip-health.js.map +1 -0
- package/dist/health-monitor.d.ts +97 -0
- package/dist/health-monitor.d.ts.map +1 -0
- package/dist/health-monitor.js +291 -0
- package/dist/health-monitor.js.map +1 -0
- package/dist/index.d.ts +68 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +68 -0
- package/dist/index.js.map +1 -0
- package/dist/leader-watchdog.d.ts +109 -0
- package/dist/leader-watchdog.d.ts.map +1 -0
- package/dist/leader-watchdog.js +189 -0
- package/dist/leader-watchdog.js.map +1 -0
- package/dist/logger.d.ts +114 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +250 -0
- package/dist/logger.js.map +1 -0
- package/dist/memory-monitor.d.ts +172 -0
- package/dist/memory-monitor.d.ts.map +1 -0
- package/dist/memory-monitor.js +599 -0
- package/dist/memory-monitor.js.map +1 -0
- package/dist/metrics.d.ts +115 -0
- package/dist/metrics.d.ts.map +1 -0
- package/dist/metrics.js +239 -0
- package/dist/metrics.js.map +1 -0
- package/dist/provider-context.d.ts +100 -0
- package/dist/provider-context.d.ts.map +1 -0
- package/dist/provider-context.js +362 -0
- package/dist/provider-context.js.map +1 -0
- package/dist/stateless-lead.d.ts +149 -0
- package/dist/stateless-lead.d.ts.map +1 -0
- package/dist/stateless-lead.js +308 -0
- package/dist/stateless-lead.js.map +1 -0
- package/dist/supervisor.d.ts +147 -0
- package/dist/supervisor.d.ts.map +1 -0
- package/dist/supervisor.js +459 -0
- package/dist/supervisor.js.map +1 -0
- package/package.json +28 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Health Monitor
|
|
3
|
+
*
|
|
4
|
+
* Monitors spawned agent processes and ensures they stay alive.
|
|
5
|
+
* - Periodic health checks (process liveness)
|
|
6
|
+
* - Auto-restart on crash
|
|
7
|
+
* - Death detection and logging
|
|
8
|
+
* - Metrics collection
|
|
9
|
+
*/
|
|
10
|
+
import { EventEmitter } from 'events';
|
|
11
|
+
const DEFAULT_CONFIG = {
|
|
12
|
+
checkIntervalMs: 5000,
|
|
13
|
+
responseTimeoutMs: 10000,
|
|
14
|
+
maxRestarts: 5,
|
|
15
|
+
restartCooldownMs: 2000,
|
|
16
|
+
maxConsecutiveFailures: 3,
|
|
17
|
+
};
|
|
18
|
+
export class AgentHealthMonitor extends EventEmitter {
|
|
19
|
+
agents = new Map();
|
|
20
|
+
health = new Map();
|
|
21
|
+
intervalId;
|
|
22
|
+
config;
|
|
23
|
+
isRunning = false;
|
|
24
|
+
constructor(config = {}) {
|
|
25
|
+
super();
|
|
26
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Register an agent for health monitoring
|
|
30
|
+
*/
|
|
31
|
+
register(agent) {
|
|
32
|
+
this.agents.set(agent.name, agent);
|
|
33
|
+
this.health.set(agent.name, {
|
|
34
|
+
name: agent.name,
|
|
35
|
+
pid: agent.pid,
|
|
36
|
+
status: 'healthy',
|
|
37
|
+
lastHealthCheck: new Date(),
|
|
38
|
+
lastResponse: new Date(),
|
|
39
|
+
restartCount: 0,
|
|
40
|
+
consecutiveFailures: 0,
|
|
41
|
+
uptime: 0,
|
|
42
|
+
startedAt: new Date(),
|
|
43
|
+
});
|
|
44
|
+
this.emit('registered', { name: agent.name, pid: agent.pid });
|
|
45
|
+
this.log('info', `Registered agent for health monitoring: ${agent.name} (PID: ${agent.pid})`);
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Unregister an agent from health monitoring
|
|
49
|
+
*/
|
|
50
|
+
unregister(name) {
|
|
51
|
+
this.agents.delete(name);
|
|
52
|
+
this.health.delete(name);
|
|
53
|
+
this.emit('unregistered', { name });
|
|
54
|
+
this.log('info', `Unregistered agent: ${name}`);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Start the health monitoring loop
|
|
58
|
+
*/
|
|
59
|
+
start() {
|
|
60
|
+
if (this.isRunning)
|
|
61
|
+
return;
|
|
62
|
+
this.isRunning = true;
|
|
63
|
+
this.log('info', 'Health monitor started', {
|
|
64
|
+
checkInterval: this.config.checkIntervalMs,
|
|
65
|
+
maxRestarts: this.config.maxRestarts,
|
|
66
|
+
});
|
|
67
|
+
this.intervalId = setInterval(() => {
|
|
68
|
+
this.checkAll();
|
|
69
|
+
}, this.config.checkIntervalMs);
|
|
70
|
+
// Initial check
|
|
71
|
+
this.checkAll();
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Stop the health monitoring loop
|
|
75
|
+
*/
|
|
76
|
+
stop() {
|
|
77
|
+
if (this.intervalId) {
|
|
78
|
+
clearInterval(this.intervalId);
|
|
79
|
+
this.intervalId = undefined;
|
|
80
|
+
}
|
|
81
|
+
this.isRunning = false;
|
|
82
|
+
this.log('info', 'Health monitor stopped');
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Get health status for all agents
|
|
86
|
+
*/
|
|
87
|
+
getAll() {
|
|
88
|
+
return Array.from(this.health.values()).map((h) => ({
|
|
89
|
+
...h,
|
|
90
|
+
uptime: Date.now() - h.startedAt.getTime(),
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Get health status for a specific agent
|
|
95
|
+
*/
|
|
96
|
+
get(name) {
|
|
97
|
+
const health = this.health.get(name);
|
|
98
|
+
if (health) {
|
|
99
|
+
return { ...health, uptime: Date.now() - health.startedAt.getTime() };
|
|
100
|
+
}
|
|
101
|
+
return undefined;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Check health of all registered agents
|
|
105
|
+
*/
|
|
106
|
+
async checkAll() {
|
|
107
|
+
const checks = Array.from(this.agents.entries()).map(([name, agent]) => this.checkAgent(name, agent));
|
|
108
|
+
await Promise.all(checks);
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Check health of a single agent
|
|
112
|
+
*/
|
|
113
|
+
async checkAgent(name, agent) {
|
|
114
|
+
const health = this.health.get(name);
|
|
115
|
+
if (!health)
|
|
116
|
+
return;
|
|
117
|
+
health.lastHealthCheck = new Date();
|
|
118
|
+
try {
|
|
119
|
+
// First check: Is the process alive?
|
|
120
|
+
const isAlive = this.isProcessAlive(agent.pid);
|
|
121
|
+
if (!isAlive) {
|
|
122
|
+
await this.handleDeath(name, agent, health, 'Process not found');
|
|
123
|
+
return;
|
|
124
|
+
}
|
|
125
|
+
// Second check: Does it respond to health check?
|
|
126
|
+
if (agent.sendHealthCheck) {
|
|
127
|
+
const responded = await Promise.race([
|
|
128
|
+
agent.sendHealthCheck(),
|
|
129
|
+
new Promise((resolve) => setTimeout(() => resolve(false), this.config.responseTimeoutMs)),
|
|
130
|
+
]);
|
|
131
|
+
if (!responded) {
|
|
132
|
+
health.consecutiveFailures++;
|
|
133
|
+
this.log('warn', `Agent unresponsive: ${name}`, {
|
|
134
|
+
failures: health.consecutiveFailures,
|
|
135
|
+
max: this.config.maxConsecutiveFailures,
|
|
136
|
+
});
|
|
137
|
+
if (health.consecutiveFailures >= this.config.maxConsecutiveFailures) {
|
|
138
|
+
health.status = 'unresponsive';
|
|
139
|
+
await this.handleDeath(name, agent, health, 'Unresponsive after multiple health checks');
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
health.status = 'unresponsive';
|
|
143
|
+
this.emit('unhealthy', { name, health });
|
|
144
|
+
}
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
// Get memory/CPU usage if available
|
|
149
|
+
try {
|
|
150
|
+
const usage = await this.getProcessUsage(agent.pid);
|
|
151
|
+
health.memoryUsage = usage.memory;
|
|
152
|
+
health.cpuUsage = usage.cpu;
|
|
153
|
+
}
|
|
154
|
+
catch {
|
|
155
|
+
// Ignore usage errors
|
|
156
|
+
}
|
|
157
|
+
// All good
|
|
158
|
+
health.status = 'healthy';
|
|
159
|
+
health.lastResponse = new Date();
|
|
160
|
+
health.consecutiveFailures = 0;
|
|
161
|
+
this.emit('healthy', { name, health });
|
|
162
|
+
}
|
|
163
|
+
catch (error) {
|
|
164
|
+
health.consecutiveFailures++;
|
|
165
|
+
health.lastError = error instanceof Error ? error.message : String(error);
|
|
166
|
+
this.log('error', `Health check error for ${name}`, { error: health.lastError });
|
|
167
|
+
if (health.consecutiveFailures >= this.config.maxConsecutiveFailures) {
|
|
168
|
+
await this.handleDeath(name, agent, health, health.lastError);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Handle agent death - attempt restart or mark as dead
|
|
174
|
+
*/
|
|
175
|
+
async handleDeath(name, agent, health, reason) {
|
|
176
|
+
this.log('error', `Agent died: ${name}`, {
|
|
177
|
+
reason,
|
|
178
|
+
restartCount: health.restartCount,
|
|
179
|
+
maxRestarts: this.config.maxRestarts,
|
|
180
|
+
});
|
|
181
|
+
this.emit('died', { name, reason, restartCount: health.restartCount });
|
|
182
|
+
// Check if we should attempt restart
|
|
183
|
+
if (health.restartCount >= this.config.maxRestarts) {
|
|
184
|
+
health.status = 'dead';
|
|
185
|
+
health.lastError = `Exceeded max restarts (${this.config.maxRestarts}): ${reason}`;
|
|
186
|
+
this.log('error', `Agent permanently dead: ${name}`, { reason: health.lastError });
|
|
187
|
+
this.emit('permanentlyDead', { name, health });
|
|
188
|
+
return;
|
|
189
|
+
}
|
|
190
|
+
// Attempt restart
|
|
191
|
+
health.status = 'restarting';
|
|
192
|
+
health.restartCount++;
|
|
193
|
+
this.log('info', `Attempting restart ${health.restartCount}/${this.config.maxRestarts}: ${name}`);
|
|
194
|
+
this.emit('restarting', { name, attempt: health.restartCount });
|
|
195
|
+
// Wait cooldown
|
|
196
|
+
await new Promise((resolve) => setTimeout(resolve, this.config.restartCooldownMs));
|
|
197
|
+
try {
|
|
198
|
+
await agent.restart();
|
|
199
|
+
// Update health after successful restart
|
|
200
|
+
health.status = 'healthy';
|
|
201
|
+
health.consecutiveFailures = 0;
|
|
202
|
+
health.startedAt = new Date();
|
|
203
|
+
health.lastResponse = new Date();
|
|
204
|
+
health.pid = agent.pid;
|
|
205
|
+
this.log('info', `Agent restarted successfully: ${name}`, {
|
|
206
|
+
newPid: agent.pid,
|
|
207
|
+
attempt: health.restartCount,
|
|
208
|
+
});
|
|
209
|
+
this.emit('restarted', { name, pid: agent.pid, attempt: health.restartCount });
|
|
210
|
+
}
|
|
211
|
+
catch (error) {
|
|
212
|
+
health.lastError = error instanceof Error ? error.message : String(error);
|
|
213
|
+
this.log('error', `Restart failed: ${name}`, { error: health.lastError });
|
|
214
|
+
this.emit('restartFailed', { name, error: health.lastError });
|
|
215
|
+
// Recursively try again if under limit
|
|
216
|
+
if (health.restartCount < this.config.maxRestarts) {
|
|
217
|
+
await this.handleDeath(name, agent, health, health.lastError);
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
health.status = 'dead';
|
|
221
|
+
this.emit('permanentlyDead', { name, health });
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Check if a process is alive by PID
|
|
227
|
+
*/
|
|
228
|
+
isProcessAlive(pid) {
|
|
229
|
+
try {
|
|
230
|
+
// Sending signal 0 checks if process exists without killing it
|
|
231
|
+
process.kill(pid, 0);
|
|
232
|
+
return true;
|
|
233
|
+
}
|
|
234
|
+
catch {
|
|
235
|
+
return false;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
/**
|
|
239
|
+
* Get memory and CPU usage for a process
|
|
240
|
+
*/
|
|
241
|
+
async getProcessUsage(pid) {
|
|
242
|
+
const { execSync } = await import('child_process');
|
|
243
|
+
try {
|
|
244
|
+
// This works on Linux/Mac
|
|
245
|
+
const output = execSync(`ps -o rss=,pcpu= -p ${pid}`, { encoding: 'utf8' }).trim();
|
|
246
|
+
const [rss, cpu] = output.split(/\s+/);
|
|
247
|
+
return {
|
|
248
|
+
memory: parseInt(rss, 10) * 1024, // RSS in bytes
|
|
249
|
+
cpu: parseFloat(cpu),
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
catch {
|
|
253
|
+
return { memory: 0, cpu: 0 };
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
/**
|
|
257
|
+
* Structured logging
|
|
258
|
+
*/
|
|
259
|
+
log(level, message, context) {
|
|
260
|
+
const entry = {
|
|
261
|
+
timestamp: new Date().toISOString(),
|
|
262
|
+
level,
|
|
263
|
+
component: 'health-monitor',
|
|
264
|
+
message,
|
|
265
|
+
...context,
|
|
266
|
+
};
|
|
267
|
+
this.emit('log', entry);
|
|
268
|
+
// Also log to console with structure
|
|
269
|
+
const prefix = `[health-monitor]`;
|
|
270
|
+
switch (level) {
|
|
271
|
+
case 'info':
|
|
272
|
+
console.log(prefix, message, context ? JSON.stringify(context) : '');
|
|
273
|
+
break;
|
|
274
|
+
case 'warn':
|
|
275
|
+
console.warn(prefix, message, context ? JSON.stringify(context) : '');
|
|
276
|
+
break;
|
|
277
|
+
case 'error':
|
|
278
|
+
console.error(prefix, message, context ? JSON.stringify(context) : '');
|
|
279
|
+
break;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
// Singleton instance
|
|
284
|
+
let _monitor = null;
|
|
285
|
+
export function getHealthMonitor(config) {
|
|
286
|
+
if (!_monitor) {
|
|
287
|
+
_monitor = new AgentHealthMonitor(config);
|
|
288
|
+
}
|
|
289
|
+
return _monitor;
|
|
290
|
+
}
|
|
291
|
+
//# sourceMappingURL=health-monitor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"health-monitor.js","sourceRoot":"","sources":["../src/health-monitor.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAkCtC,MAAM,cAAc,GAAwB;IAC1C,eAAe,EAAE,IAAI;IACrB,iBAAiB,EAAE,KAAK;IACxB,WAAW,EAAE,CAAC;IACd,iBAAiB,EAAE,IAAI;IACvB,sBAAsB,EAAE,CAAC;CAC1B,CAAC;AAEF,MAAM,OAAO,kBAAmB,SAAQ,YAAY;IAC1C,MAAM,GAAG,IAAI,GAAG,EAAwB,CAAC;IACzC,MAAM,GAAG,IAAI,GAAG,EAAuB,CAAC;IACxC,UAAU,CAAkC;IAC5C,MAAM,CAAsB;IAC5B,SAAS,GAAG,KAAK,CAAC;IAE1B,YAAY,SAAuC,EAAE;QACnD,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,KAAmB;QAC1B,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QACnC,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,EAAE;YAC1B,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,GAAG,EAAE,KAAK,CAAC,GAAG;YACd,MAAM,EAAE,SAAS;YACjB,eAAe,EAAE,IAAI,IAAI,EAAE;YAC3B,YAAY,EAAE,IAAI,IAAI,EAAE;YACxB,YAAY,EAAE,CAAC;YACf,mBAAmB,EAAE,CAAC;YACtB,MAAM,EAAE,CAAC;YACT,SAAS,EAAE,IAAI,IAAI,EAAE;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,CAAC,CAAC;QAC9D,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,2CAA2C,KAAK,CAAC,IAAI,UAAU,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC;IAChG,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,IAAY;QACrB,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACzB,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;QACpC,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,uBAAuB,IAAI,EAAE,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO;QAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAEtB,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,wBAAwB,EAAE;YACzC,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe;YAC1C,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;SACrC,CAAC,CAAC;QAEH,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE;YACjC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClB,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAEhC,gBAAgB;QAChB,IAAI,CAAC,QAAQ,EAAE,CAAC;IAClB,CAAC;IAED;;OAEG;IACH,IAAI;QACF,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC/B,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC9B,CAAC;QACD,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACvB,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,wBAAwB,CAAC,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,MAAM;QACJ,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAClD,GAAG,CAAC;YACJ,MAAM,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,SAAS,CAAC,OAAO,EAAE;SAC3C,CAAC,CAAC,CAAC;IACN,CAAC;IAED;;OAEG;IACH,GAAG,CAAC,IAAY;QACd,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,MAAM,EAAE,CAAC;YACX,OAAO,EAAE,GAAG,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,MAAM,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,CAAC;QACxE,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,QAAQ;QACpB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,EAAE,CACrE,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,KAAK,CAAC,CAC7B,CAAC;QACF,MAAM,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,UAAU,CAAC,IAAY,EAAE,KAAmB;QACxD,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;QACrC,IAAI,CAAC,MAAM;YAAE,OAAO;QAEpB,MAAM,CAAC,eAAe,GAAG,IAAI,IAAI,EAAE,CAAC;QAEpC,IAAI,CAAC;YACH,qCAAqC;YACrC,MAAM,OAAO,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAE/C,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,mBAAmB,CAAC,CAAC;gBACjE,OAAO;YACT,CAAC;YAED,iDAAiD;YACjD,IAAI,KAAK,CAAC,eAAe,EAAE,CAAC;gBAC1B,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC;oBACnC,KAAK,CAAC,eAAe,EAAE;oBACvB,IAAI,OAAO,CAAQ,CAAC,OAAO,EAAE,EAAE,CAC7B,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAChE;iBACF,CAAC,CAAC;gBAEH,IAAI,CAAC,SAAS,EAAE,CAAC;oBACf,MAAM,CAAC,mBAAmB,EAAE,CAAC;oBAC7B,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,uBAAuB,IAAI,EAAE,EAAE;wBAC9C,QAAQ,EAAE,MAAM,CAAC,mBAAmB;wBACpC,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,sBAAsB;qBACxC,CAAC,CAAC;oBAEH,IAAI,MAAM,CAAC,mBAAmB,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB,EAAE,CAAC;wBACrE,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC;wBAC/B,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,2CAA2C,CAAC,CAAC;oBAC3F,CAAC;yBAAM,CAAC;wBACN,MAAM,CAAC,MAAM,GAAG,cAAc,CAAC;wBAC/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC3C,CAAC;oBACD,OAAO;gBACT,CAAC;YACH,CAAC;YAED,oCAAoC;YACpC,IAAI,CAAC;gBACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;gBACpD,MAAM,CAAC,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC;gBAClC,MAAM,CAAC,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC;YAC9B,CAAC;YAAC,MAAM,CAAC;gBACP,sBAAsB;YACxB,CAAC;YAED,WAAW;YACX,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC;YAC1B,MAAM,CAAC,YAAY,GAAG,IAAI,IAAI,EAAE,CAAC;YACjC,MAAM,CAAC,mBAAmB,GAAG,CAAC,CAAC;YAE/B,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,mBAAmB,EAAE,CAAC;YAC7B,MAAM,CAAC,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1E,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,0BAA0B,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAEjF,IAAI,MAAM,CAAC,mBAAmB,IAAI,IAAI,CAAC,MAAM,CAAC,sBAAsB,EAAE,CAAC;gBACrE,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,WAAW,CACvB,IAAY,EACZ,KAAmB,EACnB,MAAmB,EACnB,MAAc;QAEd,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,eAAe,IAAI,EAAE,EAAE;YACvC,MAAM;YACN,YAAY,EAAE,MAAM,CAAC,YAAY;YACjC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;SACrC,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAEvE,qCAAqC;QACrC,IAAI,MAAM,CAAC,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YACnD,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;YACvB,MAAM,CAAC,SAAS,GAAG,0BAA0B,IAAI,CAAC,MAAM,CAAC,WAAW,MAAM,MAAM,EAAE,CAAC;YACnF,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,2BAA2B,IAAI,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YACnF,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;YAC/C,OAAO;QACT,CAAC;QAED,kBAAkB;QAClB,MAAM,CAAC,MAAM,GAAG,YAAY,CAAC;QAC7B,MAAM,CAAC,YAAY,EAAE,CAAC;QAEtB,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,sBAAsB,MAAM,CAAC,YAAY,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC,CAAC;QAClG,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QAEhE,gBAAgB;QAChB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,iBAAiB,CAAC,CAAC,CAAC;QAEnF,IAAI,CAAC;YACH,MAAM,KAAK,CAAC,OAAO,EAAE,CAAC;YAEtB,yCAAyC;YACzC,MAAM,CAAC,MAAM,GAAG,SAAS,CAAC;YAC1B,MAAM,CAAC,mBAAmB,GAAG,CAAC,CAAC;YAC/B,MAAM,CAAC,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC;YAC9B,MAAM,CAAC,YAAY,GAAG,IAAI,IAAI,EAAE,CAAC;YACjC,MAAM,CAAC,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC;YAEvB,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,iCAAiC,IAAI,EAAE,EAAE;gBACxD,MAAM,EAAE,KAAK,CAAC,GAAG;gBACjB,OAAO,EAAE,MAAM,CAAC,YAAY;aAC7B,CAAC,CAAC;YAEH,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,MAAM,CAAC,YAAY,EAAE,CAAC,CAAC;QACjF,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,SAAS,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAC1E,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,mBAAmB,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAC1E,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;YAE9D,uCAAuC;YACvC,IAAI,MAAM,CAAC,YAAY,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;gBAClD,MAAM,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,CAAC,SAAS,CAAC,CAAC;YAChE,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;gBACvB,IAAI,CAAC,IAAI,CAAC,iBAAiB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;YACjD,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,GAAW;QAChC,IAAI,CAAC;YACH,+DAA+D;YAC/D,OAAO,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YACrB,OAAO,IAAI,CAAC;QACd,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,eAAe,CAAC,GAAW;QACvC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;QAEnD,IAAI,CAAC;YACH,0BAA0B;YAC1B,MAAM,MAAM,GAAG,QAAQ,CAAC,uBAAuB,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACnF,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACvC,OAAO;gBACL,MAAM,EAAE,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,IAAI,EAAE,eAAe;gBACjD,GAAG,EAAE,UAAU,CAAC,GAAG,CAAC;aACrB,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,EAAE,MAAM,EAAE,CAAC,EAAE,GAAG,EAAE,CAAC,EAAE,CAAC;QAC/B,CAAC;IACH,CAAC;IAED;;OAEG;IACK,GAAG,CACT,KAAgC,EAChC,OAAe,EACf,OAAiC;QAEjC,MAAM,KAAK,GAAG;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;YACnC,KAAK;YACL,SAAS,EAAE,gBAAgB;YAC3B,OAAO;YACP,GAAG,OAAO;SACX,CAAC;QAEF,IAAI,CAAC,IAAI,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAExB,qCAAqC;QACrC,MAAM,MAAM,GAAG,kBAAkB,CAAC;QAClC,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,MAAM;gBACT,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACrE,MAAM;YACR,KAAK,MAAM;gBACT,OAAO,CAAC,IAAI,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACtE,MAAM;YACR,KAAK,OAAO;gBACV,OAAO,CAAC,KAAK,CAAC,MAAM,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;gBACvE,MAAM;QACV,CAAC;IACH,CAAC;CACF;AAED,qBAAqB;AACrB,IAAI,QAAQ,GAA8B,IAAI,CAAC;AAE/C,MAAM,UAAU,gBAAgB,CAAC,MAAqC;IACpE,IAAI,CAAC,QAAQ,EAAE,CAAC;QACd,QAAQ,GAAG,IAAI,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAC5C,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Resiliency Module
|
|
3
|
+
*
|
|
4
|
+
* Provides comprehensive health monitoring, auto-restart, logging,
|
|
5
|
+
* metrics, and context persistence for agent-relay agents.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Health monitoring with process liveness checks
|
|
9
|
+
* - Auto-restart on crash with configurable limits
|
|
10
|
+
* - Prometheus-compatible metrics export
|
|
11
|
+
* - Structured JSON logging with rotation
|
|
12
|
+
* - Context persistence across restarts (inspired by Continuous-Claude-v2)
|
|
13
|
+
* - Provider-specific context injection (Claude hooks, Codex config, Gemini instructions)
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
*
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { getSupervisor, metrics, createLogger } from './resiliency';
|
|
19
|
+
*
|
|
20
|
+
* // Start the supervisor with context persistence
|
|
21
|
+
* const supervisor = getSupervisor({
|
|
22
|
+
* autoRestart: true,
|
|
23
|
+
* maxRestarts: 5,
|
|
24
|
+
* contextPersistence: {
|
|
25
|
+
* enabled: true,
|
|
26
|
+
* autoInjectOnRestart: true,
|
|
27
|
+
* },
|
|
28
|
+
* });
|
|
29
|
+
* supervisor.start();
|
|
30
|
+
*
|
|
31
|
+
* // Add an agent to supervision
|
|
32
|
+
* supervisor.supervise(
|
|
33
|
+
* {
|
|
34
|
+
* name: 'worker-1',
|
|
35
|
+
* cli: 'claude',
|
|
36
|
+
* pid: 12345,
|
|
37
|
+
* spawnedAt: new Date(),
|
|
38
|
+
* workingDir: '/path/to/repo',
|
|
39
|
+
* provider: 'claude', // or 'codex', 'gemini'
|
|
40
|
+
* },
|
|
41
|
+
* {
|
|
42
|
+
* isAlive: () => process.kill(12345, 0),
|
|
43
|
+
* kill: (sig) => process.kill(12345, sig),
|
|
44
|
+
* restart: async () => { ... },
|
|
45
|
+
* }
|
|
46
|
+
* );
|
|
47
|
+
*
|
|
48
|
+
* // Get metrics
|
|
49
|
+
* console.log(metrics.toPrometheus());
|
|
50
|
+
* ```
|
|
51
|
+
*
|
|
52
|
+
* Context persistence works differently per provider:
|
|
53
|
+
* - Claude: Uses hooks to inject context into CLAUDE.md
|
|
54
|
+
* - Codex: Uses config for periodic context refresh via system prompt
|
|
55
|
+
* - Gemini: Updates system instruction file
|
|
56
|
+
*/
|
|
57
|
+
export { AgentHealthMonitor, getHealthMonitor, type AgentHealth, type AgentProcess, type HealthMonitorConfig, } from './health-monitor.js';
|
|
58
|
+
export { Logger, createLogger, configure as configureLogging, loggers, type LogLevel, type LogEntry, type LoggerConfig, } from './logger.js';
|
|
59
|
+
export { metrics, type AgentMetrics, type SystemMetrics, type MetricPoint } from './metrics.js';
|
|
60
|
+
export { AgentSupervisor, getSupervisor, type SupervisedAgent, type SupervisorConfig, } from './supervisor.js';
|
|
61
|
+
export { ContextPersistence, getContextPersistence, type AgentState, type Decision, type Artifact, type Handoff, type LedgerEntry, } from './context-persistence.js';
|
|
62
|
+
export { createContextHandler, detectProvider, ClaudeContextHandler, CodexContextHandler, GeminiContextHandler, type ProviderType, type ProviderContextConfig, type ClaudeHooksConfig, type CodexContextConfig, } from './provider-context.js';
|
|
63
|
+
export { AgentMemoryMonitor, getMemoryMonitor, formatBytes, type MemorySnapshot, type AgentMemoryMetrics, type MemoryThresholds, type MemoryMonitorConfig, type MemoryAlert, type CrashMemoryContext, } from './memory-monitor.js';
|
|
64
|
+
export { CrashInsightsService, getCrashInsights, type CrashRecord, type CrashAnalysis, type CrashPattern, type CrashStats, } from './crash-insights.js';
|
|
65
|
+
export { StatelessLeadCoordinator, createStatelessLead, type BeadsTask, type LeadHeartbeat, type StatelessLeadConfig, } from './stateless-lead.js';
|
|
66
|
+
export { LeaderWatchdog, createLeaderWatchdog, type LeaderWatchdogConfig, type ElectionResult, } from './leader-watchdog.js';
|
|
67
|
+
export { GossipHealthMonitor, createGossipHealth, type GossipHeartbeat, type PeerHealth, type GossipHealthConfig, } from './gossip-health.js';
|
|
68
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAEH,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,KAAK,WAAW,EAChB,KAAK,YAAY,EACjB,KAAK,mBAAmB,GACzB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,MAAM,EACN,YAAY,EACZ,SAAS,IAAI,gBAAgB,EAC7B,OAAO,EACP,KAAK,QAAQ,EACb,KAAK,QAAQ,EACb,KAAK,YAAY,GAClB,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,OAAO,EAAE,KAAK,YAAY,EAAE,KAAK,aAAa,EAAE,KAAK,WAAW,EAAE,MAAM,cAAc,CAAC;AAEhG,OAAO,EACL,eAAe,EACf,aAAa,EACb,KAAK,eAAe,EACpB,KAAK,gBAAgB,GACtB,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,kBAAkB,EAClB,qBAAqB,EACrB,KAAK,UAAU,EACf,KAAK,QAAQ,EACb,KAAK,QAAQ,EACb,KAAK,OAAO,EACZ,KAAK,WAAW,GACjB,MAAM,0BAA0B,CAAC;AAElC,OAAO,EACL,oBAAoB,EACpB,cAAc,EACd,oBAAoB,EACpB,mBAAmB,EACnB,oBAAoB,EACpB,KAAK,YAAY,EACjB,KAAK,qBAAqB,EAC1B,KAAK,iBAAiB,EACtB,KAAK,kBAAkB,GACxB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,WAAW,EACX,KAAK,cAAc,EACnB,KAAK,kBAAkB,EACvB,KAAK,gBAAgB,EACrB,KAAK,mBAAmB,EACxB,KAAK,WAAW,EAChB,KAAK,kBAAkB,GACxB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,gBAAgB,EAChB,KAAK,WAAW,EAChB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,UAAU,GAChB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,mBAAmB,EACnB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,mBAAmB,GACzB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,cAAc,EACd,oBAAoB,EACpB,KAAK,oBAAoB,EACzB,KAAK,cAAc,GACpB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,mBAAmB,EACnB,kBAAkB,EAClB,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,kBAAkB,GACxB,MAAM,oBAAoB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Resiliency Module
|
|
3
|
+
*
|
|
4
|
+
* Provides comprehensive health monitoring, auto-restart, logging,
|
|
5
|
+
* metrics, and context persistence for agent-relay agents.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Health monitoring with process liveness checks
|
|
9
|
+
* - Auto-restart on crash with configurable limits
|
|
10
|
+
* - Prometheus-compatible metrics export
|
|
11
|
+
* - Structured JSON logging with rotation
|
|
12
|
+
* - Context persistence across restarts (inspired by Continuous-Claude-v2)
|
|
13
|
+
* - Provider-specific context injection (Claude hooks, Codex config, Gemini instructions)
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
*
|
|
17
|
+
* ```ts
|
|
18
|
+
* import { getSupervisor, metrics, createLogger } from './resiliency';
|
|
19
|
+
*
|
|
20
|
+
* // Start the supervisor with context persistence
|
|
21
|
+
* const supervisor = getSupervisor({
|
|
22
|
+
* autoRestart: true,
|
|
23
|
+
* maxRestarts: 5,
|
|
24
|
+
* contextPersistence: {
|
|
25
|
+
* enabled: true,
|
|
26
|
+
* autoInjectOnRestart: true,
|
|
27
|
+
* },
|
|
28
|
+
* });
|
|
29
|
+
* supervisor.start();
|
|
30
|
+
*
|
|
31
|
+
* // Add an agent to supervision
|
|
32
|
+
* supervisor.supervise(
|
|
33
|
+
* {
|
|
34
|
+
* name: 'worker-1',
|
|
35
|
+
* cli: 'claude',
|
|
36
|
+
* pid: 12345,
|
|
37
|
+
* spawnedAt: new Date(),
|
|
38
|
+
* workingDir: '/path/to/repo',
|
|
39
|
+
* provider: 'claude', // or 'codex', 'gemini'
|
|
40
|
+
* },
|
|
41
|
+
* {
|
|
42
|
+
* isAlive: () => process.kill(12345, 0),
|
|
43
|
+
* kill: (sig) => process.kill(12345, sig),
|
|
44
|
+
* restart: async () => { ... },
|
|
45
|
+
* }
|
|
46
|
+
* );
|
|
47
|
+
*
|
|
48
|
+
* // Get metrics
|
|
49
|
+
* console.log(metrics.toPrometheus());
|
|
50
|
+
* ```
|
|
51
|
+
*
|
|
52
|
+
* Context persistence works differently per provider:
|
|
53
|
+
* - Claude: Uses hooks to inject context into CLAUDE.md
|
|
54
|
+
* - Codex: Uses config for periodic context refresh via system prompt
|
|
55
|
+
* - Gemini: Updates system instruction file
|
|
56
|
+
*/
|
|
57
|
+
export { AgentHealthMonitor, getHealthMonitor, } from './health-monitor.js';
|
|
58
|
+
export { Logger, createLogger, configure as configureLogging, loggers, } from './logger.js';
|
|
59
|
+
export { metrics } from './metrics.js';
|
|
60
|
+
export { AgentSupervisor, getSupervisor, } from './supervisor.js';
|
|
61
|
+
export { ContextPersistence, getContextPersistence, } from './context-persistence.js';
|
|
62
|
+
export { createContextHandler, detectProvider, ClaudeContextHandler, CodexContextHandler, GeminiContextHandler, } from './provider-context.js';
|
|
63
|
+
export { AgentMemoryMonitor, getMemoryMonitor, formatBytes, } from './memory-monitor.js';
|
|
64
|
+
export { CrashInsightsService, getCrashInsights, } from './crash-insights.js';
|
|
65
|
+
export { StatelessLeadCoordinator, createStatelessLead, } from './stateless-lead.js';
|
|
66
|
+
export { LeaderWatchdog, createLeaderWatchdog, } from './leader-watchdog.js';
|
|
67
|
+
export { GossipHealthMonitor, createGossipHealth, } from './gossip-health.js';
|
|
68
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAEH,OAAO,EACL,kBAAkB,EAClB,gBAAgB,GAIjB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,MAAM,EACN,YAAY,EACZ,SAAS,IAAI,gBAAgB,EAC7B,OAAO,GAIR,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,OAAO,EAA2D,MAAM,cAAc,CAAC;AAEhG,OAAO,EACL,eAAe,EACf,aAAa,GAGd,MAAM,iBAAiB,CAAC;AAEzB,OAAO,EACL,kBAAkB,EAClB,qBAAqB,GAMtB,MAAM,0BAA0B,CAAC;AAElC,OAAO,EACL,oBAAoB,EACpB,cAAc,EACd,oBAAoB,EACpB,mBAAmB,EACnB,oBAAoB,GAKrB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EACL,kBAAkB,EAClB,gBAAgB,EAChB,WAAW,GAOZ,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,gBAAgB,GAKjB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,wBAAwB,EACxB,mBAAmB,GAIpB,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,cAAc,EACd,oBAAoB,GAGrB,MAAM,sBAAsB,CAAC;AAE9B,OAAO,EACL,mBAAmB,EACnB,kBAAkB,GAInB,MAAM,oBAAoB,CAAC"}
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Leader Watchdog
|
|
3
|
+
*
|
|
4
|
+
* Implements P3: Monitor lead health, trigger promotion if lead dies.
|
|
5
|
+
* Integrates with AgentSupervisor and heartbeat system.
|
|
6
|
+
*
|
|
7
|
+
* Features:
|
|
8
|
+
* - Monitors leader heartbeat file
|
|
9
|
+
* - Detects stale/missing leader
|
|
10
|
+
* - Triggers leader election or self-promotion
|
|
11
|
+
* - Integrates with supervisor events
|
|
12
|
+
*/
|
|
13
|
+
import { EventEmitter } from 'events';
|
|
14
|
+
import { LeadHeartbeat } from './stateless-lead.js';
|
|
15
|
+
/**
|
|
16
|
+
* Watchdog configuration
|
|
17
|
+
*/
|
|
18
|
+
export interface LeaderWatchdogConfig {
|
|
19
|
+
/** Path to .beads directory */
|
|
20
|
+
beadsDir: string;
|
|
21
|
+
/** This agent's name */
|
|
22
|
+
agentName: string;
|
|
23
|
+
/** This agent's unique ID */
|
|
24
|
+
agentId: string;
|
|
25
|
+
/** How often to check leader health (ms) */
|
|
26
|
+
checkIntervalMs: number;
|
|
27
|
+
/** Leader considered stale after this duration (ms) */
|
|
28
|
+
staleThresholdMs: number;
|
|
29
|
+
/** Callback when this agent should become leader */
|
|
30
|
+
onBecomeLeader: () => Promise<void>;
|
|
31
|
+
/** Callback to get all healthy agents for election */
|
|
32
|
+
getHealthyAgents: () => Promise<Array<{
|
|
33
|
+
name: string;
|
|
34
|
+
id: string;
|
|
35
|
+
spawnedAt: Date;
|
|
36
|
+
}>>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Election result
|
|
40
|
+
*/
|
|
41
|
+
export interface ElectionResult {
|
|
42
|
+
winner: string;
|
|
43
|
+
winnerId: string;
|
|
44
|
+
candidates: string[];
|
|
45
|
+
method: 'oldest' | 'self' | 'none';
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Leader Watchdog
|
|
49
|
+
*
|
|
50
|
+
* Runs on each agent, monitors leader health, triggers election if needed.
|
|
51
|
+
*/
|
|
52
|
+
export declare class LeaderWatchdog extends EventEmitter {
|
|
53
|
+
private config;
|
|
54
|
+
private heartbeatPath;
|
|
55
|
+
private checkInterval?;
|
|
56
|
+
private isRunning;
|
|
57
|
+
private currentLeader;
|
|
58
|
+
private isLeader;
|
|
59
|
+
constructor(config: LeaderWatchdogConfig);
|
|
60
|
+
/**
|
|
61
|
+
* Start watching for leader health
|
|
62
|
+
*/
|
|
63
|
+
start(): void;
|
|
64
|
+
/**
|
|
65
|
+
* Stop watching
|
|
66
|
+
*/
|
|
67
|
+
stop(): void;
|
|
68
|
+
/**
|
|
69
|
+
* Check if current leader is healthy
|
|
70
|
+
*/
|
|
71
|
+
private checkLeaderHealth;
|
|
72
|
+
/**
|
|
73
|
+
* Trigger leader election
|
|
74
|
+
*/
|
|
75
|
+
private triggerElection;
|
|
76
|
+
/**
|
|
77
|
+
* Simple leader election: oldest healthy agent wins
|
|
78
|
+
*/
|
|
79
|
+
private electLeader;
|
|
80
|
+
/**
|
|
81
|
+
* Check if this agent is currently the leader
|
|
82
|
+
*/
|
|
83
|
+
isCurrentLeader(): boolean;
|
|
84
|
+
/**
|
|
85
|
+
* Get current leader info
|
|
86
|
+
*/
|
|
87
|
+
getCurrentLeader(): LeadHeartbeat | null;
|
|
88
|
+
/**
|
|
89
|
+
* Get watchdog status
|
|
90
|
+
*/
|
|
91
|
+
getStatus(): {
|
|
92
|
+
isRunning: boolean;
|
|
93
|
+
isLeader: boolean;
|
|
94
|
+
currentLeader: LeadHeartbeat | null;
|
|
95
|
+
agentName: string;
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Create a leader watchdog with defaults
|
|
100
|
+
*/
|
|
101
|
+
export declare function createLeaderWatchdog(beadsDir: string, agentName: string, agentId: string, callbacks: {
|
|
102
|
+
onBecomeLeader: () => Promise<void>;
|
|
103
|
+
getHealthyAgents: () => Promise<Array<{
|
|
104
|
+
name: string;
|
|
105
|
+
id: string;
|
|
106
|
+
spawnedAt: Date;
|
|
107
|
+
}>>;
|
|
108
|
+
}): LeaderWatchdog;
|
|
109
|
+
//# sourceMappingURL=leader-watchdog.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"leader-watchdog.d.ts","sourceRoot":"","sources":["../src/leader-watchdog.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAGH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,EAA4B,aAAa,EAAE,MAAM,qBAAqB,CAAC;AAE9E;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,+BAA+B;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,wBAAwB;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,eAAe,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,gBAAgB,EAAE,MAAM,CAAC;IACzB,oDAAoD;IACpD,cAAc,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,sDAAsD;IACtD,gBAAgB,EAAE,MAAM,OAAO,CAAC,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,IAAI,CAAA;KAAE,CAAC,CAAC,CAAC;CACvF;AAOD;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,EAAE,CAAC;IACrB,MAAM,EAAE,QAAQ,GAAG,MAAM,GAAG,MAAM,CAAC;CACpC;AAED;;;;GAIG;AACH,qBAAa,cAAe,SAAQ,YAAY;IAC9C,OAAO,CAAC,MAAM,CAAuB;IACrC,OAAO,CAAC,aAAa,CAAS;IAC9B,OAAO,CAAC,aAAa,CAAC,CAAiC;IACvD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,aAAa,CAA8B;IACnD,OAAO,CAAC,QAAQ,CAAS;gBAEb,MAAM,EAAE,oBAAoB;IAMxC;;OAEG;IACH,KAAK,IAAI,IAAI;IAuBb;;OAEG;IACH,IAAI,IAAI,IAAI;IAaZ;;OAEG;YACW,iBAAiB;IAoC/B;;OAEG;YACW,eAAe;IAwB7B;;OAEG;YACW,WAAW;IAyBzB;;OAEG;IACH,eAAe,IAAI,OAAO;IAI1B;;OAEG;IACH,gBAAgB,IAAI,aAAa,GAAG,IAAI;IAIxC;;OAEG;IACH,SAAS,IAAI;QACX,SAAS,EAAE,OAAO,CAAC;QACnB,QAAQ,EAAE,OAAO,CAAC;QAClB,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;QACpC,SAAS,EAAE,MAAM,CAAC;KACnB;CAQF;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE;IACT,cAAc,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IACpC,gBAAgB,EAAE,MAAM,OAAO,CAAC,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,EAAE,EAAE,MAAM,CAAC;QAAC,SAAS,EAAE,IAAI,CAAA;KAAE,CAAC,CAAC,CAAC;CACvF,GACA,cAAc,CAShB"}
|