@ruvector/edge-net 0.1.7 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ledger.js +663 -0
- package/monitor.js +675 -0
- package/onnx-worker.js +482 -0
- package/package.json +40 -5
- package/qdag.js +582 -0
- package/scheduler.js +764 -0
- package/signaling.js +732 -0
package/monitor.js
ADDED
|
@@ -0,0 +1,675 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @ruvector/edge-net Monitoring and Metrics System
|
|
3
|
+
*
|
|
4
|
+
* Real-time monitoring for distributed compute network:
|
|
5
|
+
* - System metrics collection
|
|
6
|
+
* - Network health monitoring
|
|
7
|
+
* - Performance tracking
|
|
8
|
+
* - Alert system
|
|
9
|
+
* - Metrics aggregation
|
|
10
|
+
*
|
|
11
|
+
* @module @ruvector/edge-net/monitor
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { EventEmitter } from 'events';
|
|
15
|
+
import { randomBytes } from 'crypto';
|
|
16
|
+
import { cpus, totalmem, freemem, loadavg } from 'os';
|
|
17
|
+
|
|
18
|
+
// ============================================
|
|
19
|
+
// METRICS COLLECTOR
|
|
20
|
+
// ============================================
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* Time-series metrics storage
|
|
24
|
+
*/
|
|
25
|
+
class MetricsSeries {
|
|
26
|
+
constructor(options = {}) {
|
|
27
|
+
this.name = options.name;
|
|
28
|
+
this.maxPoints = options.maxPoints || 1000;
|
|
29
|
+
this.points = [];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
add(value, timestamp = Date.now()) {
|
|
33
|
+
this.points.push({ value, timestamp });
|
|
34
|
+
|
|
35
|
+
// Prune old points
|
|
36
|
+
if (this.points.length > this.maxPoints) {
|
|
37
|
+
this.points = this.points.slice(-this.maxPoints);
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
latest() {
|
|
42
|
+
return this.points.length > 0 ? this.points[this.points.length - 1] : null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
avg(duration = 60000) {
|
|
46
|
+
const cutoff = Date.now() - duration;
|
|
47
|
+
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
|
48
|
+
if (recent.length === 0) return 0;
|
|
49
|
+
return recent.reduce((sum, p) => sum + p.value, 0) / recent.length;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
min(duration = 60000) {
|
|
53
|
+
const cutoff = Date.now() - duration;
|
|
54
|
+
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
|
55
|
+
if (recent.length === 0) return 0;
|
|
56
|
+
return Math.min(...recent.map(p => p.value));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
max(duration = 60000) {
|
|
60
|
+
const cutoff = Date.now() - duration;
|
|
61
|
+
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
|
62
|
+
if (recent.length === 0) return 0;
|
|
63
|
+
return Math.max(...recent.map(p => p.value));
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
rate(duration = 60000) {
|
|
67
|
+
const cutoff = Date.now() - duration;
|
|
68
|
+
const recent = this.points.filter(p => p.timestamp >= cutoff);
|
|
69
|
+
if (recent.length < 2) return 0;
|
|
70
|
+
|
|
71
|
+
const first = recent[0];
|
|
72
|
+
const last = recent[recent.length - 1];
|
|
73
|
+
const timeDiff = (last.timestamp - first.timestamp) / 1000;
|
|
74
|
+
|
|
75
|
+
return timeDiff > 0 ? (last.value - first.value) / timeDiff : 0;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
percentile(p, duration = 60000) {
|
|
79
|
+
const cutoff = Date.now() - duration;
|
|
80
|
+
const recent = this.points.filter(pt => pt.timestamp >= cutoff);
|
|
81
|
+
if (recent.length === 0) return 0;
|
|
82
|
+
|
|
83
|
+
const sorted = recent.map(pt => pt.value).sort((a, b) => a - b);
|
|
84
|
+
const index = Math.ceil((p / 100) * sorted.length) - 1;
|
|
85
|
+
return sorted[Math.max(0, index)];
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
toJSON() {
|
|
89
|
+
return {
|
|
90
|
+
name: this.name,
|
|
91
|
+
count: this.points.length,
|
|
92
|
+
latest: this.latest(),
|
|
93
|
+
avg: this.avg(),
|
|
94
|
+
min: this.min(),
|
|
95
|
+
max: this.max(),
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* Counter metric (monotonically increasing)
|
|
102
|
+
*/
|
|
103
|
+
class Counter {
|
|
104
|
+
constructor(name) {
|
|
105
|
+
this.name = name;
|
|
106
|
+
this.value = 0;
|
|
107
|
+
this.lastReset = Date.now();
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
inc(amount = 1) {
|
|
111
|
+
this.value += amount;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
get() {
|
|
115
|
+
return this.value;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
reset() {
|
|
119
|
+
this.value = 0;
|
|
120
|
+
this.lastReset = Date.now();
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
toJSON() {
|
|
124
|
+
return {
|
|
125
|
+
name: this.name,
|
|
126
|
+
value: this.value,
|
|
127
|
+
lastReset: this.lastReset,
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Gauge metric (can go up and down)
|
|
134
|
+
*/
|
|
135
|
+
class Gauge {
|
|
136
|
+
constructor(name) {
|
|
137
|
+
this.name = name;
|
|
138
|
+
this.value = 0;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
set(value) {
|
|
142
|
+
this.value = value;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
inc(amount = 1) {
|
|
146
|
+
this.value += amount;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
dec(amount = 1) {
|
|
150
|
+
this.value -= amount;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
get() {
|
|
154
|
+
return this.value;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
toJSON() {
|
|
158
|
+
return {
|
|
159
|
+
name: this.name,
|
|
160
|
+
value: this.value,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Histogram metric
|
|
167
|
+
*/
|
|
168
|
+
class Histogram {
|
|
169
|
+
constructor(name, buckets = [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]) {
|
|
170
|
+
this.name = name;
|
|
171
|
+
this.buckets = buckets.sort((a, b) => a - b);
|
|
172
|
+
this.counts = new Map(buckets.map(b => [b, 0]));
|
|
173
|
+
this.counts.set(Infinity, 0);
|
|
174
|
+
this.sum = 0;
|
|
175
|
+
this.count = 0;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
observe(value) {
|
|
179
|
+
this.sum += value;
|
|
180
|
+
this.count++;
|
|
181
|
+
|
|
182
|
+
for (const bucket of this.buckets) {
|
|
183
|
+
if (value <= bucket) {
|
|
184
|
+
this.counts.set(bucket, this.counts.get(bucket) + 1);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
this.counts.set(Infinity, this.counts.get(Infinity) + 1);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
avg() {
|
|
191
|
+
return this.count > 0 ? this.sum / this.count : 0;
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
toJSON() {
|
|
195
|
+
return {
|
|
196
|
+
name: this.name,
|
|
197
|
+
count: this.count,
|
|
198
|
+
sum: this.sum,
|
|
199
|
+
avg: this.avg(),
|
|
200
|
+
buckets: Object.fromEntries(this.counts),
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// ============================================
|
|
206
|
+
// SYSTEM MONITOR
|
|
207
|
+
// ============================================
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* System resource monitor
|
|
211
|
+
*/
|
|
212
|
+
export class SystemMonitor extends EventEmitter {
|
|
213
|
+
constructor(options = {}) {
|
|
214
|
+
super();
|
|
215
|
+
this.interval = options.interval || 5000;
|
|
216
|
+
this.timer = null;
|
|
217
|
+
|
|
218
|
+
// Metrics
|
|
219
|
+
this.cpu = new MetricsSeries({ name: 'cpu_usage' });
|
|
220
|
+
this.memory = new MetricsSeries({ name: 'memory_usage' });
|
|
221
|
+
this.load = new MetricsSeries({ name: 'load_avg' });
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
start() {
|
|
225
|
+
this.collect();
|
|
226
|
+
this.timer = setInterval(() => this.collect(), this.interval);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
stop() {
|
|
230
|
+
if (this.timer) {
|
|
231
|
+
clearInterval(this.timer);
|
|
232
|
+
this.timer = null;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
collect() {
|
|
237
|
+
// CPU usage (simplified - percentage of load vs cores)
|
|
238
|
+
const load = loadavg()[0];
|
|
239
|
+
const cores = cpus().length;
|
|
240
|
+
const cpuUsage = Math.min(100, (load / cores) * 100);
|
|
241
|
+
this.cpu.add(cpuUsage);
|
|
242
|
+
|
|
243
|
+
// Memory usage
|
|
244
|
+
const total = totalmem();
|
|
245
|
+
const free = freemem();
|
|
246
|
+
const memUsage = ((total - free) / total) * 100;
|
|
247
|
+
this.memory.add(memUsage);
|
|
248
|
+
|
|
249
|
+
// Load average
|
|
250
|
+
this.load.add(load);
|
|
251
|
+
|
|
252
|
+
this.emit('metrics', this.getMetrics());
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
getMetrics() {
|
|
256
|
+
return {
|
|
257
|
+
timestamp: Date.now(),
|
|
258
|
+
cpu: {
|
|
259
|
+
usage: this.cpu.latest()?.value || 0,
|
|
260
|
+
avg1m: this.cpu.avg(60000),
|
|
261
|
+
avg5m: this.cpu.avg(300000),
|
|
262
|
+
},
|
|
263
|
+
memory: {
|
|
264
|
+
usage: this.memory.latest()?.value || 0,
|
|
265
|
+
total: totalmem(),
|
|
266
|
+
free: freemem(),
|
|
267
|
+
},
|
|
268
|
+
load: {
|
|
269
|
+
current: this.load.latest()?.value || 0,
|
|
270
|
+
avg: loadavg(),
|
|
271
|
+
},
|
|
272
|
+
cores: cpus().length,
|
|
273
|
+
};
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// ============================================
|
|
278
|
+
// NETWORK MONITOR
|
|
279
|
+
// ============================================
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* Network health and performance monitor
|
|
283
|
+
*/
|
|
284
|
+
export class NetworkMonitor extends EventEmitter {
|
|
285
|
+
constructor(options = {}) {
|
|
286
|
+
super();
|
|
287
|
+
this.nodeId = options.nodeId;
|
|
288
|
+
this.checkInterval = options.checkInterval || 30000;
|
|
289
|
+
this.timer = null;
|
|
290
|
+
|
|
291
|
+
// Metrics
|
|
292
|
+
this.peers = new Gauge('connected_peers');
|
|
293
|
+
this.messages = new Counter('messages_total');
|
|
294
|
+
this.errors = new Counter('errors_total');
|
|
295
|
+
this.latency = new Histogram('peer_latency_ms');
|
|
296
|
+
|
|
297
|
+
// Series
|
|
298
|
+
this.bandwidth = new MetricsSeries({ name: 'bandwidth_bps' });
|
|
299
|
+
this.peerCount = new MetricsSeries({ name: 'peer_count' });
|
|
300
|
+
|
|
301
|
+
// Peer tracking
|
|
302
|
+
this.peerLatencies = new Map(); // peerId -> latency ms
|
|
303
|
+
this.peerStatus = new Map(); // peerId -> { status, lastSeen }
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
start() {
|
|
307
|
+
this.timer = setInterval(() => this.check(), this.checkInterval);
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
stop() {
|
|
311
|
+
if (this.timer) {
|
|
312
|
+
clearInterval(this.timer);
|
|
313
|
+
this.timer = null;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
/**
|
|
318
|
+
* Record peer connection
|
|
319
|
+
*/
|
|
320
|
+
peerConnected(peerId) {
|
|
321
|
+
this.peers.inc();
|
|
322
|
+
this.peerStatus.set(peerId, { status: 'connected', lastSeen: Date.now() });
|
|
323
|
+
this.peerCount.add(this.peers.get());
|
|
324
|
+
this.emit('peer-connected', { peerId });
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Record peer disconnection
|
|
329
|
+
*/
|
|
330
|
+
peerDisconnected(peerId) {
|
|
331
|
+
this.peers.dec();
|
|
332
|
+
this.peerStatus.set(peerId, { status: 'disconnected', lastSeen: Date.now() });
|
|
333
|
+
this.peerCount.add(this.peers.get());
|
|
334
|
+
this.emit('peer-disconnected', { peerId });
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* Record message
|
|
339
|
+
*/
|
|
340
|
+
recordMessage(peerId, bytes) {
|
|
341
|
+
this.messages.inc();
|
|
342
|
+
this.bandwidth.add(bytes);
|
|
343
|
+
|
|
344
|
+
if (peerId && this.peerStatus.has(peerId)) {
|
|
345
|
+
this.peerStatus.get(peerId).lastSeen = Date.now();
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Record latency measurement
|
|
351
|
+
*/
|
|
352
|
+
recordLatency(peerId, latencyMs) {
|
|
353
|
+
this.latency.observe(latencyMs);
|
|
354
|
+
this.peerLatencies.set(peerId, latencyMs);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* Record error
|
|
359
|
+
*/
|
|
360
|
+
recordError(type) {
|
|
361
|
+
this.errors.inc();
|
|
362
|
+
this.emit('error', { type });
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
/**
|
|
366
|
+
* Periodic health check
|
|
367
|
+
*/
|
|
368
|
+
check() {
|
|
369
|
+
const metrics = this.getMetrics();
|
|
370
|
+
|
|
371
|
+
// Check for issues
|
|
372
|
+
if (metrics.peers.current === 0) {
|
|
373
|
+
this.emit('alert', { type: 'no_peers', message: 'No connected peers' });
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
if (metrics.latency.avg > 1000) {
|
|
377
|
+
this.emit('alert', { type: 'high_latency', message: 'High network latency', value: metrics.latency.avg });
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
this.emit('health-check', metrics);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
getMetrics() {
|
|
384
|
+
return {
|
|
385
|
+
timestamp: Date.now(),
|
|
386
|
+
peers: {
|
|
387
|
+
current: this.peers.get(),
|
|
388
|
+
avg1h: this.peerCount.avg(3600000),
|
|
389
|
+
},
|
|
390
|
+
messages: this.messages.get(),
|
|
391
|
+
errors: this.errors.get(),
|
|
392
|
+
latency: {
|
|
393
|
+
avg: this.latency.avg(),
|
|
394
|
+
p50: this.latency.toJSON().buckets[50] || 0,
|
|
395
|
+
p99: this.latency.toJSON().buckets[1000] || 0,
|
|
396
|
+
},
|
|
397
|
+
bandwidth: {
|
|
398
|
+
current: this.bandwidth.rate(),
|
|
399
|
+
avg1m: this.bandwidth.avg(60000),
|
|
400
|
+
},
|
|
401
|
+
};
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// ============================================
|
|
406
|
+
// TASK MONITOR
|
|
407
|
+
// ============================================
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* Task execution monitor
|
|
411
|
+
*/
|
|
412
|
+
export class TaskMonitor extends EventEmitter {
|
|
413
|
+
constructor(options = {}) {
|
|
414
|
+
super();
|
|
415
|
+
|
|
416
|
+
// Counters
|
|
417
|
+
this.submitted = new Counter('tasks_submitted');
|
|
418
|
+
this.completed = new Counter('tasks_completed');
|
|
419
|
+
this.failed = new Counter('tasks_failed');
|
|
420
|
+
this.retried = new Counter('tasks_retried');
|
|
421
|
+
|
|
422
|
+
// Gauges
|
|
423
|
+
this.pending = new Gauge('tasks_pending');
|
|
424
|
+
this.running = new Gauge('tasks_running');
|
|
425
|
+
|
|
426
|
+
// Histograms
|
|
427
|
+
this.waitTime = new Histogram('task_wait_time_ms');
|
|
428
|
+
this.execTime = new Histogram('task_exec_time_ms');
|
|
429
|
+
|
|
430
|
+
// Series
|
|
431
|
+
this.throughput = new MetricsSeries({ name: 'tasks_per_second' });
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
taskSubmitted() {
|
|
435
|
+
this.submitted.inc();
|
|
436
|
+
this.pending.inc();
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
taskStarted() {
|
|
440
|
+
this.pending.dec();
|
|
441
|
+
this.running.inc();
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
taskCompleted(waitTimeMs, execTimeMs) {
|
|
445
|
+
this.running.dec();
|
|
446
|
+
this.completed.inc();
|
|
447
|
+
this.waitTime.observe(waitTimeMs);
|
|
448
|
+
this.execTime.observe(execTimeMs);
|
|
449
|
+
this.throughput.add(1);
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
taskFailed() {
|
|
453
|
+
this.running.dec();
|
|
454
|
+
this.failed.inc();
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
taskRetried() {
|
|
458
|
+
this.retried.inc();
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
getMetrics() {
|
|
462
|
+
const total = this.completed.get() + this.failed.get();
|
|
463
|
+
const successRate = total > 0 ? this.completed.get() / total : 1;
|
|
464
|
+
|
|
465
|
+
return {
|
|
466
|
+
timestamp: Date.now(),
|
|
467
|
+
submitted: this.submitted.get(),
|
|
468
|
+
completed: this.completed.get(),
|
|
469
|
+
failed: this.failed.get(),
|
|
470
|
+
retried: this.retried.get(),
|
|
471
|
+
pending: this.pending.get(),
|
|
472
|
+
running: this.running.get(),
|
|
473
|
+
successRate,
|
|
474
|
+
waitTime: {
|
|
475
|
+
avg: this.waitTime.avg(),
|
|
476
|
+
p50: this.waitTime.toJSON().buckets[100] || 0,
|
|
477
|
+
p99: this.waitTime.toJSON().buckets[5000] || 0,
|
|
478
|
+
},
|
|
479
|
+
execTime: {
|
|
480
|
+
avg: this.execTime.avg(),
|
|
481
|
+
p50: this.execTime.toJSON().buckets[500] || 0,
|
|
482
|
+
p99: this.execTime.toJSON().buckets[10000] || 0,
|
|
483
|
+
},
|
|
484
|
+
throughput: this.throughput.rate(60000),
|
|
485
|
+
};
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// ============================================
|
|
490
|
+
// MONITORING DASHBOARD
|
|
491
|
+
// ============================================
|
|
492
|
+
|
|
493
|
+
/**
|
|
494
|
+
* Unified monitoring dashboard
|
|
495
|
+
*/
|
|
496
|
+
export class Monitor extends EventEmitter {
|
|
497
|
+
constructor(options = {}) {
|
|
498
|
+
super();
|
|
499
|
+
this.nodeId = options.nodeId || `monitor-${randomBytes(8).toString('hex')}`;
|
|
500
|
+
|
|
501
|
+
// Sub-monitors
|
|
502
|
+
this.system = new SystemMonitor(options.system);
|
|
503
|
+
this.network = new NetworkMonitor({ ...options.network, nodeId: this.nodeId });
|
|
504
|
+
this.tasks = new TaskMonitor(options.tasks);
|
|
505
|
+
|
|
506
|
+
// Alert thresholds
|
|
507
|
+
this.thresholds = {
|
|
508
|
+
cpuHigh: options.cpuHigh || 90,
|
|
509
|
+
memoryHigh: options.memoryHigh || 90,
|
|
510
|
+
latencyHigh: options.latencyHigh || 1000,
|
|
511
|
+
errorRateHigh: options.errorRateHigh || 0.1,
|
|
512
|
+
...options.thresholds,
|
|
513
|
+
};
|
|
514
|
+
|
|
515
|
+
// Alert state
|
|
516
|
+
this.alerts = new Map();
|
|
517
|
+
this.alertHistory = [];
|
|
518
|
+
|
|
519
|
+
// Reporting
|
|
520
|
+
this.reportInterval = options.reportInterval || 60000;
|
|
521
|
+
this.reportTimer = null;
|
|
522
|
+
|
|
523
|
+
// Forward events
|
|
524
|
+
this.system.on('metrics', m => this.emit('system-metrics', m));
|
|
525
|
+
this.network.on('health-check', m => this.emit('network-metrics', m));
|
|
526
|
+
this.network.on('alert', a => this.handleAlert(a));
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
/**
|
|
530
|
+
* Start all monitors
|
|
531
|
+
*/
|
|
532
|
+
start() {
|
|
533
|
+
this.system.start();
|
|
534
|
+
this.network.start();
|
|
535
|
+
|
|
536
|
+
this.reportTimer = setInterval(() => {
|
|
537
|
+
this.generateReport();
|
|
538
|
+
}, this.reportInterval);
|
|
539
|
+
|
|
540
|
+
this.emit('started');
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
/**
|
|
544
|
+
* Stop all monitors
|
|
545
|
+
*/
|
|
546
|
+
stop() {
|
|
547
|
+
this.system.stop();
|
|
548
|
+
this.network.stop();
|
|
549
|
+
|
|
550
|
+
if (this.reportTimer) {
|
|
551
|
+
clearInterval(this.reportTimer);
|
|
552
|
+
this.reportTimer = null;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
this.emit('stopped');
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/**
|
|
559
|
+
* Handle alert
|
|
560
|
+
*/
|
|
561
|
+
handleAlert(alert) {
|
|
562
|
+
const key = `${alert.type}`;
|
|
563
|
+
const existing = this.alerts.get(key);
|
|
564
|
+
|
|
565
|
+
if (existing) {
|
|
566
|
+
existing.count++;
|
|
567
|
+
existing.lastSeen = Date.now();
|
|
568
|
+
} else {
|
|
569
|
+
const newAlert = {
|
|
570
|
+
...alert,
|
|
571
|
+
id: `alert-${randomBytes(4).toString('hex')}`,
|
|
572
|
+
count: 1,
|
|
573
|
+
firstSeen: Date.now(),
|
|
574
|
+
lastSeen: Date.now(),
|
|
575
|
+
};
|
|
576
|
+
this.alerts.set(key, newAlert);
|
|
577
|
+
this.alertHistory.push(newAlert);
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
this.emit('alert', alert);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
/**
|
|
584
|
+
* Clear alert
|
|
585
|
+
*/
|
|
586
|
+
clearAlert(type) {
|
|
587
|
+
this.alerts.delete(type);
|
|
588
|
+
this.emit('alert-cleared', { type });
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/**
|
|
592
|
+
* Generate comprehensive report
|
|
593
|
+
*/
|
|
594
|
+
generateReport() {
|
|
595
|
+
const report = {
|
|
596
|
+
timestamp: Date.now(),
|
|
597
|
+
nodeId: this.nodeId,
|
|
598
|
+
system: this.system.getMetrics(),
|
|
599
|
+
network: this.network.getMetrics(),
|
|
600
|
+
tasks: this.tasks.getMetrics(),
|
|
601
|
+
alerts: Array.from(this.alerts.values()),
|
|
602
|
+
health: this.calculateHealth(),
|
|
603
|
+
};
|
|
604
|
+
|
|
605
|
+
this.emit('report', report);
|
|
606
|
+
return report;
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
/**
|
|
610
|
+
* Calculate overall health score (0-100)
|
|
611
|
+
*/
|
|
612
|
+
calculateHealth() {
|
|
613
|
+
let score = 100;
|
|
614
|
+
const issues = [];
|
|
615
|
+
|
|
616
|
+
// System health
|
|
617
|
+
const sysMetrics = this.system.getMetrics();
|
|
618
|
+
if (sysMetrics.cpu.usage > this.thresholds.cpuHigh) {
|
|
619
|
+
score -= 20;
|
|
620
|
+
issues.push('high_cpu');
|
|
621
|
+
}
|
|
622
|
+
if (sysMetrics.memory.usage > this.thresholds.memoryHigh) {
|
|
623
|
+
score -= 20;
|
|
624
|
+
issues.push('high_memory');
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
// Network health
|
|
628
|
+
const netMetrics = this.network.getMetrics();
|
|
629
|
+
if (netMetrics.peers.current === 0) {
|
|
630
|
+
score -= 30;
|
|
631
|
+
issues.push('no_peers');
|
|
632
|
+
}
|
|
633
|
+
if (netMetrics.latency.avg > this.thresholds.latencyHigh) {
|
|
634
|
+
score -= 15;
|
|
635
|
+
issues.push('high_latency');
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// Task health
|
|
639
|
+
const taskMetrics = this.tasks.getMetrics();
|
|
640
|
+
if (taskMetrics.successRate < (1 - this.thresholds.errorRateHigh)) {
|
|
641
|
+
score -= 15;
|
|
642
|
+
issues.push('high_error_rate');
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
return {
|
|
646
|
+
score: Math.max(0, score),
|
|
647
|
+
status: score >= 80 ? 'healthy' : score >= 50 ? 'degraded' : 'unhealthy',
|
|
648
|
+
issues,
|
|
649
|
+
};
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
/**
|
|
653
|
+
* Get current metrics summary
|
|
654
|
+
*/
|
|
655
|
+
getMetrics() {
|
|
656
|
+
return {
|
|
657
|
+
system: this.system.getMetrics(),
|
|
658
|
+
network: this.network.getMetrics(),
|
|
659
|
+
tasks: this.tasks.getMetrics(),
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Get active alerts
|
|
665
|
+
*/
|
|
666
|
+
getAlerts() {
|
|
667
|
+
return Array.from(this.alerts.values());
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// ============================================
|
|
672
|
+
// EXPORTS
|
|
673
|
+
// ============================================
|
|
674
|
+
|
|
675
|
+
export default Monitor;
|