@agent-relay/resiliency 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context-persistence.d.ts +140 -0
- package/dist/context-persistence.d.ts.map +1 -0
- package/dist/context-persistence.js +397 -0
- package/dist/context-persistence.js.map +1 -0
- package/dist/crash-insights.d.ts +156 -0
- package/dist/crash-insights.d.ts.map +1 -0
- package/dist/crash-insights.js +492 -0
- package/dist/crash-insights.js.map +1 -0
- package/dist/gossip-health.d.ts +137 -0
- package/dist/gossip-health.d.ts.map +1 -0
- package/dist/gossip-health.js +241 -0
- package/dist/gossip-health.js.map +1 -0
- package/dist/health-monitor.d.ts +97 -0
- package/dist/health-monitor.d.ts.map +1 -0
- package/dist/health-monitor.js +291 -0
- package/dist/health-monitor.js.map +1 -0
- package/dist/index.d.ts +68 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +68 -0
- package/dist/index.js.map +1 -0
- package/dist/leader-watchdog.d.ts +109 -0
- package/dist/leader-watchdog.d.ts.map +1 -0
- package/dist/leader-watchdog.js +189 -0
- package/dist/leader-watchdog.js.map +1 -0
- package/dist/logger.d.ts +114 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +250 -0
- package/dist/logger.js.map +1 -0
- package/dist/memory-monitor.d.ts +172 -0
- package/dist/memory-monitor.d.ts.map +1 -0
- package/dist/memory-monitor.js +599 -0
- package/dist/memory-monitor.js.map +1 -0
- package/dist/metrics.d.ts +115 -0
- package/dist/metrics.d.ts.map +1 -0
- package/dist/metrics.js +239 -0
- package/dist/metrics.js.map +1 -0
- package/dist/provider-context.d.ts +100 -0
- package/dist/provider-context.d.ts.map +1 -0
- package/dist/provider-context.js +362 -0
- package/dist/provider-context.js.map +1 -0
- package/dist/stateless-lead.d.ts +149 -0
- package/dist/stateless-lead.d.ts.map +1 -0
- package/dist/stateless-lead.js +308 -0
- package/dist/stateless-lead.js.map +1 -0
- package/dist/supervisor.d.ts +147 -0
- package/dist/supervisor.d.ts.map +1 -0
- package/dist/supervisor.js +459 -0
- package/dist/supervisor.js.map +1 -0
- package/package.json +28 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gossip-Based Health Broadcast
|
|
3
|
+
*
|
|
4
|
+
* Implements P4: Agents broadcast heartbeats via relay.
|
|
5
|
+
* Collective failure detection without central monitor.
|
|
6
|
+
*
|
|
7
|
+
* Each agent:
|
|
8
|
+
* - Periodically broadcasts HEARTBEAT message to all agents
|
|
9
|
+
* - Tracks health of all known peers
|
|
10
|
+
* - Detects failures when peer heartbeats go stale
|
|
11
|
+
* - Emits events for peer health changes
|
|
12
|
+
*/
|
|
13
|
+
import { EventEmitter } from 'events';
|
|
14
|
+
/**
|
|
15
|
+
* Peer health state
|
|
16
|
+
*/
|
|
17
|
+
export interface PeerHealth {
|
|
18
|
+
name: string;
|
|
19
|
+
lastHeartbeat: number;
|
|
20
|
+
load: number;
|
|
21
|
+
healthy: boolean;
|
|
22
|
+
isLeader: boolean;
|
|
23
|
+
taskCount: number;
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Heartbeat payload (broadcast via relay)
|
|
27
|
+
*/
|
|
28
|
+
export interface GossipHeartbeat {
|
|
29
|
+
type: 'HEARTBEAT';
|
|
30
|
+
agent: string;
|
|
31
|
+
agentId: string;
|
|
32
|
+
timestamp: number;
|
|
33
|
+
load: number;
|
|
34
|
+
healthy: boolean;
|
|
35
|
+
isLeader: boolean;
|
|
36
|
+
taskCount: number;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Gossip health configuration
|
|
40
|
+
*/
|
|
41
|
+
export interface GossipHealthConfig {
|
|
42
|
+
/** This agent's name */
|
|
43
|
+
agentName: string;
|
|
44
|
+
/** This agent's unique ID */
|
|
45
|
+
agentId: string;
|
|
46
|
+
/** How often to broadcast heartbeat (ms) */
|
|
47
|
+
broadcastIntervalMs: number;
|
|
48
|
+
/** Peer considered stale after this duration (ms) */
|
|
49
|
+
staleThresholdMs: number;
|
|
50
|
+
/** How often to check for stale peers (ms) */
|
|
51
|
+
checkIntervalMs: number;
|
|
52
|
+
/** Callback to broadcast message to all agents */
|
|
53
|
+
broadcast: (message: string) => Promise<void>;
|
|
54
|
+
/** Callback to get current load (0-1) */
|
|
55
|
+
getLoad?: () => number;
|
|
56
|
+
/** Callback to get current task count */
|
|
57
|
+
getTaskCount?: () => number;
|
|
58
|
+
/** Callback to check if this agent is leader */
|
|
59
|
+
isLeader?: () => boolean;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Gossip Health Monitor
|
|
63
|
+
*
|
|
64
|
+
* Broadcasts heartbeats and tracks peer health via gossip protocol.
|
|
65
|
+
*/
|
|
66
|
+
export declare class GossipHealthMonitor extends EventEmitter {
|
|
67
|
+
private config;
|
|
68
|
+
private peers;
|
|
69
|
+
private broadcastInterval?;
|
|
70
|
+
private checkInterval?;
|
|
71
|
+
private isRunning;
|
|
72
|
+
private healthy;
|
|
73
|
+
constructor(config: GossipHealthConfig);
|
|
74
|
+
/**
|
|
75
|
+
* Start gossip health monitoring
|
|
76
|
+
*/
|
|
77
|
+
start(): void;
|
|
78
|
+
/**
|
|
79
|
+
* Stop gossip health monitoring
|
|
80
|
+
*/
|
|
81
|
+
stop(): void;
|
|
82
|
+
/**
|
|
83
|
+
* Broadcast heartbeat to all agents
|
|
84
|
+
*/
|
|
85
|
+
private broadcastHeartbeat;
|
|
86
|
+
/**
|
|
87
|
+
* Process incoming heartbeat from another agent
|
|
88
|
+
*/
|
|
89
|
+
processHeartbeat(heartbeat: GossipHeartbeat): void;
|
|
90
|
+
/**
|
|
91
|
+
* Parse heartbeat from relay message
|
|
92
|
+
*/
|
|
93
|
+
static parseHeartbeat(message: string): GossipHeartbeat | null;
|
|
94
|
+
/**
|
|
95
|
+
* Check for stale peers
|
|
96
|
+
*/
|
|
97
|
+
private checkStalePeers;
|
|
98
|
+
/**
|
|
99
|
+
* Set this agent's health status
|
|
100
|
+
*/
|
|
101
|
+
setHealthy(healthy: boolean): void;
|
|
102
|
+
/**
|
|
103
|
+
* Get all known peers
|
|
104
|
+
*/
|
|
105
|
+
getPeers(): PeerHealth[];
|
|
106
|
+
/**
|
|
107
|
+
* Get healthy peers
|
|
108
|
+
*/
|
|
109
|
+
getHealthyPeers(): PeerHealth[];
|
|
110
|
+
/**
|
|
111
|
+
* Get current leader from gossip
|
|
112
|
+
*/
|
|
113
|
+
getLeader(): PeerHealth | null;
|
|
114
|
+
/**
|
|
115
|
+
* Get peer by name
|
|
116
|
+
*/
|
|
117
|
+
getPeer(name: string): PeerHealth | undefined;
|
|
118
|
+
/**
|
|
119
|
+
* Get status
|
|
120
|
+
*/
|
|
121
|
+
getStatus(): {
|
|
122
|
+
isRunning: boolean;
|
|
123
|
+
agentName: string;
|
|
124
|
+
peerCount: number;
|
|
125
|
+
healthyPeerCount: number;
|
|
126
|
+
leader: string | null;
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Create gossip health monitor with defaults
|
|
131
|
+
*/
|
|
132
|
+
export declare function createGossipHealth(agentName: string, agentId: string, broadcast: (message: string) => Promise<void>, options?: {
|
|
133
|
+
getLoad?: () => number;
|
|
134
|
+
getTaskCount?: () => number;
|
|
135
|
+
isLeader?: () => boolean;
|
|
136
|
+
}): GossipHealthMonitor;
|
|
137
|
+
//# sourceMappingURL=gossip-health.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gossip-health.d.ts","sourceRoot":"","sources":["../src/gossip-health.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,UAAU;IACzB,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,WAAW,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,QAAQ,EAAE,OAAO,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,wBAAwB;IACxB,SAAS,EAAE,MAAM,CAAC;IAClB,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,4CAA4C;IAC5C,mBAAmB,EAAE,MAAM,CAAC;IAC5B,qDAAqD;IACrD,gBAAgB,EAAE,MAAM,CAAC;IACzB,8CAA8C;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,kDAAkD;IAClD,SAAS,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC9C,yCAAyC;IACzC,OAAO,CAAC,EAAE,MAAM,MAAM,CAAC;IACvB,yCAAyC;IACzC,YAAY,CAAC,EAAE,MAAM,MAAM,CAAC;IAC5B,gDAAgD;IAChD,QAAQ,CAAC,EAAE,MAAM,OAAO,CAAC;CAC1B;AAQD;;;;GAIG;AACH,qBAAa,mBAAoB,SAAQ,YAAY;IACnD,OAAO,CAAC,MAAM,CAAqB;IACnC,OAAO,CAAC,KAAK,CAAiC;IAC9C,OAAO,CAAC,iBAAiB,CAAC,CAAiC;IAC3D,OAAO,CAAC,aAAa,CAAC,CAAiC;IACvD,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,OAAO,CAAQ;gBAEX,MAAM,EAAE,kBAAkB;IAKtC;;OAEG;IACH,KAAK,IAAI,IAAI;IA4Bb;;OAEG;IACH,IAAI,IAAI,IAAI;IAkBZ;;OAEG;YACW,kBAAkB;IAgBhC;;OAEG;IACH,gBAAgB,CAAC,SAAS,EAAE,eAAe,GAAG,IAAI;IAoClD;;OAEG;IACH,MAAM,CAAC,cAAc,CAAC,OAAO,EAAE,MAAM,GAAG,eAAe,GAAG,IAAI;IAgB9D;;OAEG;IACH,OAAO,CAAC,eAAe;IAsBvB;;OAEG;IACH,UAAU,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI;IAUlC;;OAEG;IACH,QAAQ,IAAI,UAAU,EAAE;IAIxB;;OAEG;IACH,eAAe,IAAI,UAAU,EAAE;IAI/B;;OAEG;IACH,SAAS,IAAI,UAAU,GAAG,IAAI;IAS9B;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,SAAS;IAI7C;;OAEG;IACH,SAAS,IAAI;QACX,SAAS,EAAE,OAAO,CAAC;QACnB,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,gBAAgB,EAAE,MAAM,CAAC;QACzB,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;KACvB;CAUF;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,SAAS,EAAE,MAAM,EACjB,OAAO,EAAE,MAAM,EACf,SAAS,EAAE,CAAC,OAAO,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,EAC7C,OAAO,CAAC,EAAE;IACR,OAAO,CAAC,EAAE,MAAM,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,MAAM,CAAC;IAC5B,QAAQ,CAAC,EAAE,MAAM,OAAO,CAAC;CAC1B,GACA,mBAAmB,CAUrB"}
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Gossip-Based Health Broadcast
|
|
3
|
+
*
|
|
4
|
+
* Implements P4: Agents broadcast heartbeats via relay.
|
|
5
|
+
* Collective failure detection without central monitor.
|
|
6
|
+
*
|
|
7
|
+
* Each agent:
|
|
8
|
+
* - Periodically broadcasts HEARTBEAT message to all agents
|
|
9
|
+
* - Tracks health of all known peers
|
|
10
|
+
* - Detects failures when peer heartbeats go stale
|
|
11
|
+
* - Emits events for peer health changes
|
|
12
|
+
*/
|
|
13
|
+
import { EventEmitter } from 'events';
|
|
14
|
+
const DEFAULT_CONFIG = {
|
|
15
|
+
broadcastIntervalMs: 10000,
|
|
16
|
+
staleThresholdMs: 30000,
|
|
17
|
+
checkIntervalMs: 5000,
|
|
18
|
+
};
|
|
19
|
+
/**
|
|
20
|
+
* Gossip Health Monitor
|
|
21
|
+
*
|
|
22
|
+
* Broadcasts heartbeats and tracks peer health via gossip protocol.
|
|
23
|
+
*/
|
|
24
|
+
export class GossipHealthMonitor extends EventEmitter {
|
|
25
|
+
config;
|
|
26
|
+
peers = new Map();
|
|
27
|
+
broadcastInterval;
|
|
28
|
+
checkInterval;
|
|
29
|
+
isRunning = false;
|
|
30
|
+
healthy = true;
|
|
31
|
+
constructor(config) {
|
|
32
|
+
super();
|
|
33
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Start gossip health monitoring
|
|
37
|
+
*/
|
|
38
|
+
start() {
|
|
39
|
+
if (this.isRunning)
|
|
40
|
+
return;
|
|
41
|
+
this.isRunning = true;
|
|
42
|
+
console.log(`[gossip] Started health broadcast for ${this.config.agentName}`);
|
|
43
|
+
// Start broadcasting heartbeats
|
|
44
|
+
this.broadcastInterval = setInterval(async () => {
|
|
45
|
+
try {
|
|
46
|
+
await this.broadcastHeartbeat();
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
console.error('[gossip] Broadcast error:', err);
|
|
50
|
+
}
|
|
51
|
+
}, this.config.broadcastIntervalMs);
|
|
52
|
+
// Start checking for stale peers
|
|
53
|
+
this.checkInterval = setInterval(() => {
|
|
54
|
+
this.checkStalePeers();
|
|
55
|
+
}, this.config.checkIntervalMs);
|
|
56
|
+
// Initial broadcast
|
|
57
|
+
this.broadcastHeartbeat().catch((err) => {
|
|
58
|
+
console.error('[gossip] Initial broadcast error:', err);
|
|
59
|
+
});
|
|
60
|
+
this.emit('started');
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Stop gossip health monitoring
|
|
64
|
+
*/
|
|
65
|
+
stop() {
|
|
66
|
+
if (!this.isRunning)
|
|
67
|
+
return;
|
|
68
|
+
this.isRunning = false;
|
|
69
|
+
if (this.broadcastInterval) {
|
|
70
|
+
clearInterval(this.broadcastInterval);
|
|
71
|
+
this.broadcastInterval = undefined;
|
|
72
|
+
}
|
|
73
|
+
if (this.checkInterval) {
|
|
74
|
+
clearInterval(this.checkInterval);
|
|
75
|
+
this.checkInterval = undefined;
|
|
76
|
+
}
|
|
77
|
+
console.log(`[gossip] Stopped health broadcast for ${this.config.agentName}`);
|
|
78
|
+
this.emit('stopped');
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Broadcast heartbeat to all agents
|
|
82
|
+
*/
|
|
83
|
+
async broadcastHeartbeat() {
|
|
84
|
+
const heartbeat = {
|
|
85
|
+
type: 'HEARTBEAT',
|
|
86
|
+
agent: this.config.agentName,
|
|
87
|
+
agentId: this.config.agentId,
|
|
88
|
+
timestamp: Date.now(),
|
|
89
|
+
load: this.config.getLoad?.() ?? 0,
|
|
90
|
+
healthy: this.healthy,
|
|
91
|
+
isLeader: this.config.isLeader?.() ?? false,
|
|
92
|
+
taskCount: this.config.getTaskCount?.() ?? 0,
|
|
93
|
+
};
|
|
94
|
+
const message = `HEARTBEAT: ${JSON.stringify(heartbeat)}`;
|
|
95
|
+
await this.config.broadcast(message);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Process incoming heartbeat from another agent
|
|
99
|
+
*/
|
|
100
|
+
processHeartbeat(heartbeat) {
|
|
101
|
+
// Ignore our own heartbeats
|
|
102
|
+
if (heartbeat.agentId === this.config.agentId)
|
|
103
|
+
return;
|
|
104
|
+
const existing = this.peers.get(heartbeat.agent);
|
|
105
|
+
const wasHealthy = existing?.healthy ?? true;
|
|
106
|
+
const wasLeader = existing?.isLeader ?? false;
|
|
107
|
+
// Update peer state
|
|
108
|
+
const peer = {
|
|
109
|
+
name: heartbeat.agent,
|
|
110
|
+
lastHeartbeat: heartbeat.timestamp,
|
|
111
|
+
load: heartbeat.load,
|
|
112
|
+
healthy: heartbeat.healthy,
|
|
113
|
+
isLeader: heartbeat.isLeader,
|
|
114
|
+
taskCount: heartbeat.taskCount,
|
|
115
|
+
};
|
|
116
|
+
this.peers.set(heartbeat.agent, peer);
|
|
117
|
+
// Emit events for state changes
|
|
118
|
+
if (!existing) {
|
|
119
|
+
this.emit('peerDiscovered', peer);
|
|
120
|
+
}
|
|
121
|
+
else {
|
|
122
|
+
if (!wasHealthy && heartbeat.healthy) {
|
|
123
|
+
this.emit('peerRecovered', peer);
|
|
124
|
+
}
|
|
125
|
+
if (wasHealthy && !heartbeat.healthy) {
|
|
126
|
+
this.emit('peerUnhealthy', peer);
|
|
127
|
+
}
|
|
128
|
+
if (!wasLeader && heartbeat.isLeader) {
|
|
129
|
+
this.emit('newLeader', peer);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Parse heartbeat from relay message
|
|
135
|
+
*/
|
|
136
|
+
static parseHeartbeat(message) {
|
|
137
|
+
const match = message.match(/^HEARTBEAT:\s*(.+)$/);
|
|
138
|
+
if (!match)
|
|
139
|
+
return null;
|
|
140
|
+
try {
|
|
141
|
+
const data = JSON.parse(match[1]);
|
|
142
|
+
if (data.type === 'HEARTBEAT') {
|
|
143
|
+
return data;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
catch {
|
|
147
|
+
// Invalid JSON
|
|
148
|
+
}
|
|
149
|
+
return null;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Check for stale peers
|
|
153
|
+
*/
|
|
154
|
+
checkStalePeers() {
|
|
155
|
+
const now = Date.now();
|
|
156
|
+
for (const [name, peer] of this.peers) {
|
|
157
|
+
const age = now - peer.lastHeartbeat;
|
|
158
|
+
if (age > this.config.staleThresholdMs) {
|
|
159
|
+
if (peer.healthy) {
|
|
160
|
+
// Mark as unhealthy
|
|
161
|
+
peer.healthy = false;
|
|
162
|
+
console.log(`[gossip] Peer ${name} is stale (${Math.round(age / 1000)}s since last heartbeat)`);
|
|
163
|
+
this.emit('peerStale', { peer, age });
|
|
164
|
+
// If the stale peer was leader, emit leader lost
|
|
165
|
+
if (peer.isLeader) {
|
|
166
|
+
this.emit('leaderLost', peer);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
/**
|
|
173
|
+
* Set this agent's health status
|
|
174
|
+
*/
|
|
175
|
+
setHealthy(healthy) {
|
|
176
|
+
if (this.healthy !== healthy) {
|
|
177
|
+
this.healthy = healthy;
|
|
178
|
+
// Broadcast immediately on health change
|
|
179
|
+
this.broadcastHeartbeat().catch((err) => {
|
|
180
|
+
console.error('[gossip] Immediate broadcast error:', err);
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
/**
|
|
185
|
+
* Get all known peers
|
|
186
|
+
*/
|
|
187
|
+
getPeers() {
|
|
188
|
+
return Array.from(this.peers.values());
|
|
189
|
+
}
|
|
190
|
+
/**
|
|
191
|
+
* Get healthy peers
|
|
192
|
+
*/
|
|
193
|
+
getHealthyPeers() {
|
|
194
|
+
return this.getPeers().filter((p) => p.healthy);
|
|
195
|
+
}
|
|
196
|
+
/**
|
|
197
|
+
* Get current leader from gossip
|
|
198
|
+
*/
|
|
199
|
+
getLeader() {
|
|
200
|
+
for (const peer of this.peers.values()) {
|
|
201
|
+
if (peer.isLeader && peer.healthy) {
|
|
202
|
+
return peer;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return null;
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Get peer by name
|
|
209
|
+
*/
|
|
210
|
+
getPeer(name) {
|
|
211
|
+
return this.peers.get(name);
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Get status
|
|
215
|
+
*/
|
|
216
|
+
getStatus() {
|
|
217
|
+
const leader = this.getLeader();
|
|
218
|
+
return {
|
|
219
|
+
isRunning: this.isRunning,
|
|
220
|
+
agentName: this.config.agentName,
|
|
221
|
+
peerCount: this.peers.size,
|
|
222
|
+
healthyPeerCount: this.getHealthyPeers().length,
|
|
223
|
+
leader: leader?.name ?? null,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Create gossip health monitor with defaults
|
|
229
|
+
*/
|
|
230
|
+
export function createGossipHealth(agentName, agentId, broadcast, options) {
|
|
231
|
+
return new GossipHealthMonitor({
|
|
232
|
+
agentName,
|
|
233
|
+
agentId,
|
|
234
|
+
broadcast,
|
|
235
|
+
broadcastIntervalMs: 10000,
|
|
236
|
+
staleThresholdMs: 30000,
|
|
237
|
+
checkIntervalMs: 5000,
|
|
238
|
+
...options,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
//# sourceMappingURL=gossip-health.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gossip-health.js","sourceRoot":"","sources":["../src/gossip-health.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAoDtC,MAAM,cAAc,GAAgC;IAClD,mBAAmB,EAAE,KAAK;IAC1B,gBAAgB,EAAE,KAAK;IACvB,eAAe,EAAE,IAAI;CACtB,CAAC;AAEF;;;;GAIG;AACH,MAAM,OAAO,mBAAoB,SAAQ,YAAY;IAC3C,MAAM,CAAqB;IAC3B,KAAK,GAAG,IAAI,GAAG,EAAsB,CAAC;IACtC,iBAAiB,CAAkC;IACnD,aAAa,CAAkC;IAC/C,SAAS,GAAG,KAAK,CAAC;IAClB,OAAO,GAAG,IAAI,CAAC;IAEvB,YAAY,MAA0B;QACpC,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAwB,CAAC;IACvE,CAAC;IAED;;OAEG;IACH,KAAK;QACH,IAAI,IAAI,CAAC,SAAS;YAAE,OAAO;QAC3B,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAEtB,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QAE9E,gCAAgC;QAChC,IAAI,CAAC,iBAAiB,GAAG,WAAW,CAAC,KAAK,IAAI,EAAE;YAC9C,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,kBAAkB,EAAE,CAAC;YAClC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,2BAA2B,EAAE,GAAG,CAAC,CAAC;YAClD,CAAC;QACH,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;QAEpC,iCAAiC;QACjC,IAAI,CAAC,aAAa,GAAG,WAAW,CAAC,GAAG,EAAE;YACpC,IAAI,CAAC,eAAe,EAAE,CAAC;QACzB,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;QAEhC,oBAAoB;QACpB,IAAI,CAAC,kBAAkB,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;YACtC,OAAO,CAAC,KAAK,CAAC,mCAAmC,EAAE,GAAG,CAAC,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACvB,CAAC;IAED;;OAEG;IACH,IAAI;QACF,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAC5B,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QAEvB,IAAI,IAAI,CAAC,iBAAiB,EAAE,CAAC;YAC3B,aAAa,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;YACtC,IAAI,CAAC,iBAAiB,GAAG,SAAS,CAAC;QACrC,CAAC;QAED,IAAI,IAAI,CAAC,aAAa,EAAE,CAAC;YACvB,aAAa,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;YAClC,IAAI,CAAC,aAAa,GAAG,SAAS,CAAC;QACjC,CAAC;QAED,OAAO,CAAC,GAAG,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;QAC9E,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACvB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,kBAAkB;QAC9B,MAAM,SAAS,GAAoB;YACjC,IAAI,EAAE,WAAW;YACjB,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAC5B,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO;YAC5B,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,EAAE,IAAI,CAAC;YAClC,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,EAAE,IAAI,KAAK;YAC3C,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,YAAY,EAAE,EAAE,IAAI,CAAC;SAC7C,CAAC;QAEF,MAAM,OAAO,GAAG,cAAc,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;IACvC,CAAC;IAED;;OAEG;IACH,gBAAgB,CAAC,SAA0B;QACzC,4BAA4B;QAC5B,IAAI,SAAS,CAAC,OAAO,KAAK,IAAI,CAAC,MAAM,CAAC,OAAO;YAAE,OAAO;QAEtD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QACjD,MAAM,UAAU,GAAG,QAAQ,EAAE,OAAO,IAAI,IAAI,CAAC;QAC7C,MAAM,SAAS,GAAG,QAAQ,EAAE,QAAQ,IAAI,KAAK,CAAC;QAE9C,oBAAoB;QACpB,MAAM,IAAI,GAAe;YACvB,IAAI,EAAE,SAAS,CAAC,KAAK;YACrB,aAAa,EAAE,SAAS,CAAC,SAAS;YAClC,IAAI,EAAE,SAAS,CAAC,IAAI;YACpB,OAAO,EAAE,SAAS,CAAC,OAAO;YAC1B,QAAQ,EAAE,SAAS,CAAC,QAAQ;YAC5B,SAAS,EAAE,SAAS,CAAC,SAAS;SAC/B,CAAC;QAEF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;QAEtC,gCAAgC;QAChC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,IAAI,CAAC,IAAI,CAAC,gBAAgB,EAAE,IAAI,CAAC,CAAC;QACpC,CAAC;aAAM,CAAC;YACN,IAAI,CAAC,UAAU,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;gBACrC,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,UAAU,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,CAAC;gBACrC,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,IAAI,CAAC,CAAC;YACnC,CAAC;YACD,IAAI,CAAC,SAAS,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;gBACrC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;YAC/B,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,cAAc,CAAC,OAAe;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,qBAAqB,CAAC,CAAC;QACnD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAClC,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;gBAC9B,OAAO,IAAuB,CAAC;YACjC,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,eAAe;QACjB,CAAC;QAED,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACK,eAAe;QACrB,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAEvB,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,GAAG,GAAG,IAAI,CAAC,aAAa,CAAC;YAErC,IAAI,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,gBAAgB,EAAE,CAAC;gBACvC,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;oBACjB,oBAAoB;oBACpB,IAAI,CAAC,OAAO,GAAG,KAAK,CAAC;oBACrB,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,cAAc,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,IAAI,CAAC,yBAAyB,CAAC,CAAC;oBAChG,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;oBAEtC,iDAAiD;oBACjD,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;wBAClB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,CAAC;oBAChC,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,OAAgB;QACzB,IAAI,IAAI,CAAC,OAAO,KAAK,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;YACvB,yCAAyC;YACzC,IAAI,CAAC,kBAAkB,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;gBACtC,OAAO,CAAC,KAAK,CAAC,qCAAqC,EAAE,GAAG,CAAC,CAAC;YAC5D,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,QAAQ,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACH,SAAS;QACP,KAAK,MAAM,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;YACvC,IAAI,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gBAClC,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,OAAO,CAAC,IAAY;QAClB,OAAO,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IAC9B,CAAC;IAED;;OAEG;IACH,SAAS;QAOP,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QAChC,OAAO;YACL,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;YAChC,SAAS,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI;YAC1B,gBAAgB,EAAE,IAAI,CAAC,eAAe,EAAE,CAAC,MAAM;YAC/C,MAAM,EAAE,MAAM,EAAE,IAAI,IAAI,IAAI;SAC7B,CAAC;IACJ,CAAC;CACF;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,SAAiB,EACjB,OAAe,EACf,SAA6C,EAC7C,OAIC;IAED,OAAO,IAAI,mBAAmB,CAAC;QAC7B,SAAS;QACT,OAAO;QACP,SAAS;QACT,mBAAmB,EAAE,KAAK;QAC1B,gBAAgB,EAAE,KAAK;QACvB,eAAe,EAAE,IAAI;QACrB,GAAG,OAAO;KACX,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent Health Monitor
|
|
3
|
+
*
|
|
4
|
+
* Monitors spawned agent processes and ensures they stay alive.
|
|
5
|
+
* - Periodic health checks (process liveness)
|
|
6
|
+
* - Auto-restart on crash
|
|
7
|
+
* - Death detection and logging
|
|
8
|
+
* - Metrics collection
|
|
9
|
+
*/
|
|
10
|
+
import { EventEmitter } from 'events';
|
|
11
|
+
export interface AgentHealth {
|
|
12
|
+
name: string;
|
|
13
|
+
pid: number;
|
|
14
|
+
status: 'healthy' | 'unresponsive' | 'dead' | 'restarting';
|
|
15
|
+
lastHealthCheck: Date;
|
|
16
|
+
lastResponse: Date;
|
|
17
|
+
restartCount: number;
|
|
18
|
+
consecutiveFailures: number;
|
|
19
|
+
uptime: number;
|
|
20
|
+
startedAt: Date;
|
|
21
|
+
memoryUsage?: number;
|
|
22
|
+
cpuUsage?: number;
|
|
23
|
+
lastError?: string;
|
|
24
|
+
}
|
|
25
|
+
export interface HealthMonitorConfig {
|
|
26
|
+
checkIntervalMs: number;
|
|
27
|
+
responseTimeoutMs: number;
|
|
28
|
+
maxRestarts: number;
|
|
29
|
+
restartCooldownMs: number;
|
|
30
|
+
maxConsecutiveFailures: number;
|
|
31
|
+
}
|
|
32
|
+
export interface AgentProcess {
|
|
33
|
+
name: string;
|
|
34
|
+
pid: number;
|
|
35
|
+
isAlive: () => boolean;
|
|
36
|
+
kill: (signal?: string) => void;
|
|
37
|
+
restart: () => Promise<void>;
|
|
38
|
+
sendHealthCheck?: () => Promise<boolean>;
|
|
39
|
+
}
|
|
40
|
+
export declare class AgentHealthMonitor extends EventEmitter {
|
|
41
|
+
private agents;
|
|
42
|
+
private health;
|
|
43
|
+
private intervalId?;
|
|
44
|
+
private config;
|
|
45
|
+
private isRunning;
|
|
46
|
+
constructor(config?: Partial<HealthMonitorConfig>);
|
|
47
|
+
/**
|
|
48
|
+
* Register an agent for health monitoring
|
|
49
|
+
*/
|
|
50
|
+
register(agent: AgentProcess): void;
|
|
51
|
+
/**
|
|
52
|
+
* Unregister an agent from health monitoring
|
|
53
|
+
*/
|
|
54
|
+
unregister(name: string): void;
|
|
55
|
+
/**
|
|
56
|
+
* Start the health monitoring loop
|
|
57
|
+
*/
|
|
58
|
+
start(): void;
|
|
59
|
+
/**
|
|
60
|
+
* Stop the health monitoring loop
|
|
61
|
+
*/
|
|
62
|
+
stop(): void;
|
|
63
|
+
/**
|
|
64
|
+
* Get health status for all agents
|
|
65
|
+
*/
|
|
66
|
+
getAll(): AgentHealth[];
|
|
67
|
+
/**
|
|
68
|
+
* Get health status for a specific agent
|
|
69
|
+
*/
|
|
70
|
+
get(name: string): AgentHealth | undefined;
|
|
71
|
+
/**
|
|
72
|
+
* Check health of all registered agents
|
|
73
|
+
*/
|
|
74
|
+
private checkAll;
|
|
75
|
+
/**
|
|
76
|
+
* Check health of a single agent
|
|
77
|
+
*/
|
|
78
|
+
private checkAgent;
|
|
79
|
+
/**
|
|
80
|
+
* Handle agent death - attempt restart or mark as dead
|
|
81
|
+
*/
|
|
82
|
+
private handleDeath;
|
|
83
|
+
/**
|
|
84
|
+
* Check if a process is alive by PID
|
|
85
|
+
*/
|
|
86
|
+
private isProcessAlive;
|
|
87
|
+
/**
|
|
88
|
+
* Get memory and CPU usage for a process
|
|
89
|
+
*/
|
|
90
|
+
private getProcessUsage;
|
|
91
|
+
/**
|
|
92
|
+
* Structured logging
|
|
93
|
+
*/
|
|
94
|
+
private log;
|
|
95
|
+
}
|
|
96
|
+
export declare function getHealthMonitor(config?: Partial<HealthMonitorConfig>): AgentHealthMonitor;
|
|
97
|
+
//# sourceMappingURL=health-monitor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"health-monitor.d.ts","sourceRoot":"","sources":["../src/health-monitor.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AAEtC,MAAM,WAAW,WAAW;IAC1B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,SAAS,GAAG,cAAc,GAAG,MAAM,GAAG,YAAY,CAAC;IAC3D,eAAe,EAAE,IAAI,CAAC;IACtB,YAAY,EAAE,IAAI,CAAC;IACnB,YAAY,EAAE,MAAM,CAAC;IACrB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,IAAI,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,mBAAmB;IAClC,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,WAAW,EAAE,MAAM,CAAC;IACpB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,sBAAsB,EAAE,MAAM,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,OAAO,CAAC;IACvB,IAAI,EAAE,CAAC,MAAM,CAAC,EAAE,MAAM,KAAK,IAAI,CAAC;IAChC,OAAO,EAAE,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7B,eAAe,CAAC,EAAE,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC;CAC1C;AAUD,qBAAa,kBAAmB,SAAQ,YAAY;IAClD,OAAO,CAAC,MAAM,CAAmC;IACjD,OAAO,CAAC,MAAM,CAAkC;IAChD,OAAO,CAAC,UAAU,CAAC,CAAiC;IACpD,OAAO,CAAC,MAAM,CAAsB;IACpC,OAAO,CAAC,SAAS,CAAS;gBAEd,MAAM,GAAE,OAAO,CAAC,mBAAmB,CAAM;IAKrD;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE,YAAY,GAAG,IAAI;IAkBnC;;OAEG;IACH,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI;IAO9B;;OAEG;IACH,KAAK,IAAI,IAAI;IAiBb;;OAEG;IACH,IAAI,IAAI,IAAI;IASZ;;OAEG;IACH,MAAM,IAAI,WAAW,EAAE;IAOvB;;OAEG;IACH,GAAG,CAAC,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,SAAS;IAQ1C;;OAEG;YACW,QAAQ;IAOtB;;OAEG;YACW,UAAU;IAoExB;;OAEG;YACW,WAAW;IAgEzB;;OAEG;IACH,OAAO,CAAC,cAAc;IAUtB;;OAEG;YACW,eAAe;IAgB7B;;OAEG;IACH,OAAO,CAAC,GAAG;CA6BZ;AAKD,wBAAgB,gBAAgB,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,mBAAmB,CAAC,GAAG,kBAAkB,CAK1F"}
|