@ruvector/edge-net 0.1.7 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/monitor.js ADDED
@@ -0,0 +1,675 @@
1
+ /**
2
+ * @ruvector/edge-net Monitoring and Metrics System
3
+ *
4
+ * Real-time monitoring for distributed compute network:
5
+ * - System metrics collection
6
+ * - Network health monitoring
7
+ * - Performance tracking
8
+ * - Alert system
9
+ * - Metrics aggregation
10
+ *
11
+ * @module @ruvector/edge-net/monitor
12
+ */
13
+
14
+ import { EventEmitter } from 'events';
15
+ import { randomBytes } from 'crypto';
16
+ import { cpus, totalmem, freemem, loadavg } from 'os';
17
+
18
+ // ============================================
19
+ // METRICS COLLECTOR
20
+ // ============================================
21
+
22
+ /**
23
+ * Time-series metrics storage
24
+ */
25
+ class MetricsSeries {
26
+ constructor(options = {}) {
27
+ this.name = options.name;
28
+ this.maxPoints = options.maxPoints || 1000;
29
+ this.points = [];
30
+ }
31
+
32
+ add(value, timestamp = Date.now()) {
33
+ this.points.push({ value, timestamp });
34
+
35
+ // Prune old points
36
+ if (this.points.length > this.maxPoints) {
37
+ this.points = this.points.slice(-this.maxPoints);
38
+ }
39
+ }
40
+
41
+ latest() {
42
+ return this.points.length > 0 ? this.points[this.points.length - 1] : null;
43
+ }
44
+
45
+ avg(duration = 60000) {
46
+ const cutoff = Date.now() - duration;
47
+ const recent = this.points.filter(p => p.timestamp >= cutoff);
48
+ if (recent.length === 0) return 0;
49
+ return recent.reduce((sum, p) => sum + p.value, 0) / recent.length;
50
+ }
51
+
52
+ min(duration = 60000) {
53
+ const cutoff = Date.now() - duration;
54
+ const recent = this.points.filter(p => p.timestamp >= cutoff);
55
+ if (recent.length === 0) return 0;
56
+ return Math.min(...recent.map(p => p.value));
57
+ }
58
+
59
+ max(duration = 60000) {
60
+ const cutoff = Date.now() - duration;
61
+ const recent = this.points.filter(p => p.timestamp >= cutoff);
62
+ if (recent.length === 0) return 0;
63
+ return Math.max(...recent.map(p => p.value));
64
+ }
65
+
66
+ rate(duration = 60000) {
67
+ const cutoff = Date.now() - duration;
68
+ const recent = this.points.filter(p => p.timestamp >= cutoff);
69
+ if (recent.length < 2) return 0;
70
+
71
+ const first = recent[0];
72
+ const last = recent[recent.length - 1];
73
+ const timeDiff = (last.timestamp - first.timestamp) / 1000;
74
+
75
+ return timeDiff > 0 ? (last.value - first.value) / timeDiff : 0;
76
+ }
77
+
78
+ percentile(p, duration = 60000) {
79
+ const cutoff = Date.now() - duration;
80
+ const recent = this.points.filter(pt => pt.timestamp >= cutoff);
81
+ if (recent.length === 0) return 0;
82
+
83
+ const sorted = recent.map(pt => pt.value).sort((a, b) => a - b);
84
+ const index = Math.ceil((p / 100) * sorted.length) - 1;
85
+ return sorted[Math.max(0, index)];
86
+ }
87
+
88
+ toJSON() {
89
+ return {
90
+ name: this.name,
91
+ count: this.points.length,
92
+ latest: this.latest(),
93
+ avg: this.avg(),
94
+ min: this.min(),
95
+ max: this.max(),
96
+ };
97
+ }
98
+ }
99
+
100
+ /**
101
+ * Counter metric (monotonically increasing)
102
+ */
103
+ class Counter {
104
+ constructor(name) {
105
+ this.name = name;
106
+ this.value = 0;
107
+ this.lastReset = Date.now();
108
+ }
109
+
110
+ inc(amount = 1) {
111
+ this.value += amount;
112
+ }
113
+
114
+ get() {
115
+ return this.value;
116
+ }
117
+
118
+ reset() {
119
+ this.value = 0;
120
+ this.lastReset = Date.now();
121
+ }
122
+
123
+ toJSON() {
124
+ return {
125
+ name: this.name,
126
+ value: this.value,
127
+ lastReset: this.lastReset,
128
+ };
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Gauge metric (can go up and down)
134
+ */
135
+ class Gauge {
136
+ constructor(name) {
137
+ this.name = name;
138
+ this.value = 0;
139
+ }
140
+
141
+ set(value) {
142
+ this.value = value;
143
+ }
144
+
145
+ inc(amount = 1) {
146
+ this.value += amount;
147
+ }
148
+
149
+ dec(amount = 1) {
150
+ this.value -= amount;
151
+ }
152
+
153
+ get() {
154
+ return this.value;
155
+ }
156
+
157
+ toJSON() {
158
+ return {
159
+ name: this.name,
160
+ value: this.value,
161
+ };
162
+ }
163
+ }
164
+
165
+ /**
166
+ * Histogram metric
167
+ */
168
+ class Histogram {
169
+ constructor(name, buckets = [5, 10, 25, 50, 100, 250, 500, 1000, 2500, 5000, 10000]) {
170
+ this.name = name;
171
+ this.buckets = buckets.sort((a, b) => a - b);
172
+ this.counts = new Map(buckets.map(b => [b, 0]));
173
+ this.counts.set(Infinity, 0);
174
+ this.sum = 0;
175
+ this.count = 0;
176
+ }
177
+
178
+ observe(value) {
179
+ this.sum += value;
180
+ this.count++;
181
+
182
+ for (const bucket of this.buckets) {
183
+ if (value <= bucket) {
184
+ this.counts.set(bucket, this.counts.get(bucket) + 1);
185
+ }
186
+ }
187
+ this.counts.set(Infinity, this.counts.get(Infinity) + 1);
188
+ }
189
+
190
+ avg() {
191
+ return this.count > 0 ? this.sum / this.count : 0;
192
+ }
193
+
194
+ toJSON() {
195
+ return {
196
+ name: this.name,
197
+ count: this.count,
198
+ sum: this.sum,
199
+ avg: this.avg(),
200
+ buckets: Object.fromEntries(this.counts),
201
+ };
202
+ }
203
+ }
204
+
205
+ // ============================================
206
+ // SYSTEM MONITOR
207
+ // ============================================
208
+
209
+ /**
210
+ * System resource monitor
211
+ */
212
+ export class SystemMonitor extends EventEmitter {
213
+ constructor(options = {}) {
214
+ super();
215
+ this.interval = options.interval || 5000;
216
+ this.timer = null;
217
+
218
+ // Metrics
219
+ this.cpu = new MetricsSeries({ name: 'cpu_usage' });
220
+ this.memory = new MetricsSeries({ name: 'memory_usage' });
221
+ this.load = new MetricsSeries({ name: 'load_avg' });
222
+ }
223
+
224
+ start() {
225
+ this.collect();
226
+ this.timer = setInterval(() => this.collect(), this.interval);
227
+ }
228
+
229
+ stop() {
230
+ if (this.timer) {
231
+ clearInterval(this.timer);
232
+ this.timer = null;
233
+ }
234
+ }
235
+
236
+ collect() {
237
+ // CPU usage (simplified - percentage of load vs cores)
238
+ const load = loadavg()[0];
239
+ const cores = cpus().length;
240
+ const cpuUsage = Math.min(100, (load / cores) * 100);
241
+ this.cpu.add(cpuUsage);
242
+
243
+ // Memory usage
244
+ const total = totalmem();
245
+ const free = freemem();
246
+ const memUsage = ((total - free) / total) * 100;
247
+ this.memory.add(memUsage);
248
+
249
+ // Load average
250
+ this.load.add(load);
251
+
252
+ this.emit('metrics', this.getMetrics());
253
+ }
254
+
255
+ getMetrics() {
256
+ return {
257
+ timestamp: Date.now(),
258
+ cpu: {
259
+ usage: this.cpu.latest()?.value || 0,
260
+ avg1m: this.cpu.avg(60000),
261
+ avg5m: this.cpu.avg(300000),
262
+ },
263
+ memory: {
264
+ usage: this.memory.latest()?.value || 0,
265
+ total: totalmem(),
266
+ free: freemem(),
267
+ },
268
+ load: {
269
+ current: this.load.latest()?.value || 0,
270
+ avg: loadavg(),
271
+ },
272
+ cores: cpus().length,
273
+ };
274
+ }
275
+ }
276
+
277
+ // ============================================
278
+ // NETWORK MONITOR
279
+ // ============================================
280
+
281
+ /**
282
+ * Network health and performance monitor
283
+ */
284
+ export class NetworkMonitor extends EventEmitter {
285
+ constructor(options = {}) {
286
+ super();
287
+ this.nodeId = options.nodeId;
288
+ this.checkInterval = options.checkInterval || 30000;
289
+ this.timer = null;
290
+
291
+ // Metrics
292
+ this.peers = new Gauge('connected_peers');
293
+ this.messages = new Counter('messages_total');
294
+ this.errors = new Counter('errors_total');
295
+ this.latency = new Histogram('peer_latency_ms');
296
+
297
+ // Series
298
+ this.bandwidth = new MetricsSeries({ name: 'bandwidth_bps' });
299
+ this.peerCount = new MetricsSeries({ name: 'peer_count' });
300
+
301
+ // Peer tracking
302
+ this.peerLatencies = new Map(); // peerId -> latency ms
303
+ this.peerStatus = new Map(); // peerId -> { status, lastSeen }
304
+ }
305
+
306
+ start() {
307
+ this.timer = setInterval(() => this.check(), this.checkInterval);
308
+ }
309
+
310
+ stop() {
311
+ if (this.timer) {
312
+ clearInterval(this.timer);
313
+ this.timer = null;
314
+ }
315
+ }
316
+
317
+ /**
318
+ * Record peer connection
319
+ */
320
+ peerConnected(peerId) {
321
+ this.peers.inc();
322
+ this.peerStatus.set(peerId, { status: 'connected', lastSeen: Date.now() });
323
+ this.peerCount.add(this.peers.get());
324
+ this.emit('peer-connected', { peerId });
325
+ }
326
+
327
+ /**
328
+ * Record peer disconnection
329
+ */
330
+ peerDisconnected(peerId) {
331
+ this.peers.dec();
332
+ this.peerStatus.set(peerId, { status: 'disconnected', lastSeen: Date.now() });
333
+ this.peerCount.add(this.peers.get());
334
+ this.emit('peer-disconnected', { peerId });
335
+ }
336
+
337
+ /**
338
+ * Record message
339
+ */
340
+ recordMessage(peerId, bytes) {
341
+ this.messages.inc();
342
+ this.bandwidth.add(bytes);
343
+
344
+ if (peerId && this.peerStatus.has(peerId)) {
345
+ this.peerStatus.get(peerId).lastSeen = Date.now();
346
+ }
347
+ }
348
+
349
+ /**
350
+ * Record latency measurement
351
+ */
352
+ recordLatency(peerId, latencyMs) {
353
+ this.latency.observe(latencyMs);
354
+ this.peerLatencies.set(peerId, latencyMs);
355
+ }
356
+
357
+ /**
358
+ * Record error
359
+ */
360
+ recordError(type) {
361
+ this.errors.inc();
362
+ this.emit('error', { type });
363
+ }
364
+
365
+ /**
366
+ * Periodic health check
367
+ */
368
+ check() {
369
+ const metrics = this.getMetrics();
370
+
371
+ // Check for issues
372
+ if (metrics.peers.current === 0) {
373
+ this.emit('alert', { type: 'no_peers', message: 'No connected peers' });
374
+ }
375
+
376
+ if (metrics.latency.avg > 1000) {
377
+ this.emit('alert', { type: 'high_latency', message: 'High network latency', value: metrics.latency.avg });
378
+ }
379
+
380
+ this.emit('health-check', metrics);
381
+ }
382
+
383
+ getMetrics() {
384
+ return {
385
+ timestamp: Date.now(),
386
+ peers: {
387
+ current: this.peers.get(),
388
+ avg1h: this.peerCount.avg(3600000),
389
+ },
390
+ messages: this.messages.get(),
391
+ errors: this.errors.get(),
392
+ latency: {
393
+ avg: this.latency.avg(),
394
+ p50: this.latency.toJSON().buckets[50] || 0,
395
+ p99: this.latency.toJSON().buckets[1000] || 0,
396
+ },
397
+ bandwidth: {
398
+ current: this.bandwidth.rate(),
399
+ avg1m: this.bandwidth.avg(60000),
400
+ },
401
+ };
402
+ }
403
+ }
404
+
405
+ // ============================================
406
+ // TASK MONITOR
407
+ // ============================================
408
+
409
+ /**
410
+ * Task execution monitor
411
+ */
412
+ export class TaskMonitor extends EventEmitter {
413
+ constructor(options = {}) {
414
+ super();
415
+
416
+ // Counters
417
+ this.submitted = new Counter('tasks_submitted');
418
+ this.completed = new Counter('tasks_completed');
419
+ this.failed = new Counter('tasks_failed');
420
+ this.retried = new Counter('tasks_retried');
421
+
422
+ // Gauges
423
+ this.pending = new Gauge('tasks_pending');
424
+ this.running = new Gauge('tasks_running');
425
+
426
+ // Histograms
427
+ this.waitTime = new Histogram('task_wait_time_ms');
428
+ this.execTime = new Histogram('task_exec_time_ms');
429
+
430
+ // Series
431
+ this.throughput = new MetricsSeries({ name: 'tasks_per_second' });
432
+ }
433
+
434
+ taskSubmitted() {
435
+ this.submitted.inc();
436
+ this.pending.inc();
437
+ }
438
+
439
+ taskStarted() {
440
+ this.pending.dec();
441
+ this.running.inc();
442
+ }
443
+
444
+ taskCompleted(waitTimeMs, execTimeMs) {
445
+ this.running.dec();
446
+ this.completed.inc();
447
+ this.waitTime.observe(waitTimeMs);
448
+ this.execTime.observe(execTimeMs);
449
+ this.throughput.add(1);
450
+ }
451
+
452
+ taskFailed() {
453
+ this.running.dec();
454
+ this.failed.inc();
455
+ }
456
+
457
+ taskRetried() {
458
+ this.retried.inc();
459
+ }
460
+
461
+ getMetrics() {
462
+ const total = this.completed.get() + this.failed.get();
463
+ const successRate = total > 0 ? this.completed.get() / total : 1;
464
+
465
+ return {
466
+ timestamp: Date.now(),
467
+ submitted: this.submitted.get(),
468
+ completed: this.completed.get(),
469
+ failed: this.failed.get(),
470
+ retried: this.retried.get(),
471
+ pending: this.pending.get(),
472
+ running: this.running.get(),
473
+ successRate,
474
+ waitTime: {
475
+ avg: this.waitTime.avg(),
476
+ p50: this.waitTime.toJSON().buckets[100] || 0,
477
+ p99: this.waitTime.toJSON().buckets[5000] || 0,
478
+ },
479
+ execTime: {
480
+ avg: this.execTime.avg(),
481
+ p50: this.execTime.toJSON().buckets[500] || 0,
482
+ p99: this.execTime.toJSON().buckets[10000] || 0,
483
+ },
484
+ throughput: this.throughput.rate(60000),
485
+ };
486
+ }
487
+ }
488
+
489
+ // ============================================
490
+ // MONITORING DASHBOARD
491
+ // ============================================
492
+
493
+ /**
494
+ * Unified monitoring dashboard
495
+ */
496
+ export class Monitor extends EventEmitter {
497
+ constructor(options = {}) {
498
+ super();
499
+ this.nodeId = options.nodeId || `monitor-${randomBytes(8).toString('hex')}`;
500
+
501
+ // Sub-monitors
502
+ this.system = new SystemMonitor(options.system);
503
+ this.network = new NetworkMonitor({ ...options.network, nodeId: this.nodeId });
504
+ this.tasks = new TaskMonitor(options.tasks);
505
+
506
+ // Alert thresholds
507
+ this.thresholds = {
508
+ cpuHigh: options.cpuHigh || 90,
509
+ memoryHigh: options.memoryHigh || 90,
510
+ latencyHigh: options.latencyHigh || 1000,
511
+ errorRateHigh: options.errorRateHigh || 0.1,
512
+ ...options.thresholds,
513
+ };
514
+
515
+ // Alert state
516
+ this.alerts = new Map();
517
+ this.alertHistory = [];
518
+
519
+ // Reporting
520
+ this.reportInterval = options.reportInterval || 60000;
521
+ this.reportTimer = null;
522
+
523
+ // Forward events
524
+ this.system.on('metrics', m => this.emit('system-metrics', m));
525
+ this.network.on('health-check', m => this.emit('network-metrics', m));
526
+ this.network.on('alert', a => this.handleAlert(a));
527
+ }
528
+
529
+ /**
530
+ * Start all monitors
531
+ */
532
+ start() {
533
+ this.system.start();
534
+ this.network.start();
535
+
536
+ this.reportTimer = setInterval(() => {
537
+ this.generateReport();
538
+ }, this.reportInterval);
539
+
540
+ this.emit('started');
541
+ }
542
+
543
+ /**
544
+ * Stop all monitors
545
+ */
546
+ stop() {
547
+ this.system.stop();
548
+ this.network.stop();
549
+
550
+ if (this.reportTimer) {
551
+ clearInterval(this.reportTimer);
552
+ this.reportTimer = null;
553
+ }
554
+
555
+ this.emit('stopped');
556
+ }
557
+
558
+ /**
559
+ * Handle alert
560
+ */
561
+ handleAlert(alert) {
562
+ const key = `${alert.type}`;
563
+ const existing = this.alerts.get(key);
564
+
565
+ if (existing) {
566
+ existing.count++;
567
+ existing.lastSeen = Date.now();
568
+ } else {
569
+ const newAlert = {
570
+ ...alert,
571
+ id: `alert-${randomBytes(4).toString('hex')}`,
572
+ count: 1,
573
+ firstSeen: Date.now(),
574
+ lastSeen: Date.now(),
575
+ };
576
+ this.alerts.set(key, newAlert);
577
+ this.alertHistory.push(newAlert);
578
+ }
579
+
580
+ this.emit('alert', alert);
581
+ }
582
+
583
+ /**
584
+ * Clear alert
585
+ */
586
+ clearAlert(type) {
587
+ this.alerts.delete(type);
588
+ this.emit('alert-cleared', { type });
589
+ }
590
+
591
+ /**
592
+ * Generate comprehensive report
593
+ */
594
+ generateReport() {
595
+ const report = {
596
+ timestamp: Date.now(),
597
+ nodeId: this.nodeId,
598
+ system: this.system.getMetrics(),
599
+ network: this.network.getMetrics(),
600
+ tasks: this.tasks.getMetrics(),
601
+ alerts: Array.from(this.alerts.values()),
602
+ health: this.calculateHealth(),
603
+ };
604
+
605
+ this.emit('report', report);
606
+ return report;
607
+ }
608
+
609
+ /**
610
+ * Calculate overall health score (0-100)
611
+ */
612
+ calculateHealth() {
613
+ let score = 100;
614
+ const issues = [];
615
+
616
+ // System health
617
+ const sysMetrics = this.system.getMetrics();
618
+ if (sysMetrics.cpu.usage > this.thresholds.cpuHigh) {
619
+ score -= 20;
620
+ issues.push('high_cpu');
621
+ }
622
+ if (sysMetrics.memory.usage > this.thresholds.memoryHigh) {
623
+ score -= 20;
624
+ issues.push('high_memory');
625
+ }
626
+
627
+ // Network health
628
+ const netMetrics = this.network.getMetrics();
629
+ if (netMetrics.peers.current === 0) {
630
+ score -= 30;
631
+ issues.push('no_peers');
632
+ }
633
+ if (netMetrics.latency.avg > this.thresholds.latencyHigh) {
634
+ score -= 15;
635
+ issues.push('high_latency');
636
+ }
637
+
638
+ // Task health
639
+ const taskMetrics = this.tasks.getMetrics();
640
+ if (taskMetrics.successRate < (1 - this.thresholds.errorRateHigh)) {
641
+ score -= 15;
642
+ issues.push('high_error_rate');
643
+ }
644
+
645
+ return {
646
+ score: Math.max(0, score),
647
+ status: score >= 80 ? 'healthy' : score >= 50 ? 'degraded' : 'unhealthy',
648
+ issues,
649
+ };
650
+ }
651
+
652
+ /**
653
+ * Get current metrics summary
654
+ */
655
+ getMetrics() {
656
+ return {
657
+ system: this.system.getMetrics(),
658
+ network: this.network.getMetrics(),
659
+ tasks: this.tasks.getMetrics(),
660
+ };
661
+ }
662
+
663
+ /**
664
+ * Get active alerts
665
+ */
666
+ getAlerts() {
667
+ return Array.from(this.alerts.values());
668
+ }
669
+ }
670
+
671
+ // ============================================
672
+ // EXPORTS
673
+ // ============================================
674
+
675
+ export default Monitor;