pms_md 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +93 -0
- package/node-monitor/ARCHITECTURE.md +341 -0
- package/node-monitor/CHANGELOG.md +105 -0
- package/node-monitor/CONTRIBUTING.md +96 -0
- package/node-monitor/DESIGN_IMPROVEMENTS.md +286 -0
- package/node-monitor/FILTER_BUTTONS_FIX.md +303 -0
- package/node-monitor/GETTING_STARTED.md +416 -0
- package/node-monitor/INSTALLATION.md +470 -0
- package/node-monitor/LICENSE +22 -0
- package/node-monitor/PUBLISHING_GUIDE.md +331 -0
- package/node-monitor/QUICK_REFERENCE.md +252 -0
- package/node-monitor/README.md +458 -0
- package/node-monitor/READY_TO_PUBLISH.md +272 -0
- package/node-monitor/SETUP_GUIDE.md +479 -0
- package/node-monitor/examples/EMAIL_SETUP_GUIDE.md +282 -0
- package/node-monitor/examples/ERROR_LOGGING_GUIDE.md +405 -0
- package/node-monitor/examples/GET_APP_PASSWORD.md +145 -0
- package/node-monitor/examples/LOG_FILES_REFERENCE.md +336 -0
- package/node-monitor/examples/QUICK_START_EMAIL.md +126 -0
- package/node-monitor/examples/express-app.js +499 -0
- package/node-monitor/examples/package-lock.json +1295 -0
- package/node-monitor/examples/package.json +18 -0
- package/node-monitor/examples/public/css/style.css +718 -0
- package/node-monitor/examples/public/js/dashboard.js +207 -0
- package/node-monitor/examples/public/js/health.js +114 -0
- package/node-monitor/examples/public/js/main.js +89 -0
- package/node-monitor/examples/public/js/metrics.js +225 -0
- package/node-monitor/examples/public/js/theme.js +138 -0
- package/node-monitor/examples/views/dashboard.ejs +20 -0
- package/node-monitor/examples/views/error-logs.ejs +1129 -0
- package/node-monitor/examples/views/health.ejs +21 -0
- package/node-monitor/examples/views/home.ejs +341 -0
- package/node-monitor/examples/views/layout.ejs +50 -0
- package/node-monitor/examples/views/metrics.ejs +16 -0
- package/node-monitor/examples/views/partials/footer.ejs +16 -0
- package/node-monitor/examples/views/partials/header.ejs +35 -0
- package/node-monitor/examples/views/partials/nav.ejs +23 -0
- package/node-monitor/examples/views/status.ejs +390 -0
- package/node-monitor/package-lock.json +4300 -0
- package/node-monitor/package.json +76 -0
- package/node-monitor/pre-publish-check.js +200 -0
- package/node-monitor/src/config/monitoringConfig.js +255 -0
- package/node-monitor/src/index.js +300 -0
- package/node-monitor/src/logger/errorLogger.js +297 -0
- package/node-monitor/src/monitors/apiErrorMonitor.js +156 -0
- package/node-monitor/src/monitors/dbConnectionMonitor.js +389 -0
- package/node-monitor/src/monitors/serverHealthMonitor.js +320 -0
- package/node-monitor/src/monitors/systemResourceMonitor.js +357 -0
- package/node-monitor/src/notifiers/emailNotifier.js +248 -0
- package/node-monitor/src/notifiers/notificationManager.js +96 -0
- package/node-monitor/src/notifiers/slackNotifier.js +209 -0
- package/node-monitor/src/views/dashboard.html +530 -0
- package/node-monitor/src/views/health.html +399 -0
- package/node-monitor/src/views/metrics.html +406 -0
- package/package.json +22 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Server Health Monitor
|
|
3
|
+
* Monitors server uptime and provides health check endpoints
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const cron = require('node-cron');
|
|
7
|
+
|
|
8
|
+
class ServerHealthMonitor {
|
|
9
|
+
constructor(config, logger, notificationManager) {
|
|
10
|
+
this.config = config;
|
|
11
|
+
this.logger = logger;
|
|
12
|
+
this.notificationManager = notificationManager;
|
|
13
|
+
this.startTime = Date.now();
|
|
14
|
+
this.isHealthy = true;
|
|
15
|
+
this.healthChecks = [];
|
|
16
|
+
this.cronJob = null;
|
|
17
|
+
this.consecutiveFailures = 0;
|
|
18
|
+
this.lastHealthStatus = true;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Start health monitoring
|
|
23
|
+
*/
|
|
24
|
+
start() {
|
|
25
|
+
if (!this.config.healthCheck.enabled) {
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
this.logger.logInfo('Server health monitoring started');
|
|
30
|
+
|
|
31
|
+
// Schedule periodic health checks
|
|
32
|
+
const intervalMinutes = Math.max(1, Math.floor(this.config.intervals.health / 60000));
|
|
33
|
+
const cronExpression = `*/${intervalMinutes} * * * *`;
|
|
34
|
+
|
|
35
|
+
this.cronJob = cron.schedule(cronExpression, async () => {
|
|
36
|
+
await this.performHealthCheck();
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
// Perform initial health check
|
|
40
|
+
this.performHealthCheck();
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Stop health monitoring
|
|
45
|
+
*/
|
|
46
|
+
stop() {
|
|
47
|
+
if (this.cronJob) {
|
|
48
|
+
this.cronJob.stop();
|
|
49
|
+
this.logger.logInfo('Server health monitoring stopped');
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Register a custom health check
|
|
55
|
+
*/
|
|
56
|
+
registerHealthCheck(name, checkFunction) {
|
|
57
|
+
this.healthChecks.push({
|
|
58
|
+
name,
|
|
59
|
+
check: checkFunction
|
|
60
|
+
});
|
|
61
|
+
this.logger.logInfo(`Registered health check: ${name}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Perform health check
|
|
66
|
+
*/
|
|
67
|
+
async performHealthCheck() {
|
|
68
|
+
const results = {
|
|
69
|
+
status: 'healthy',
|
|
70
|
+
timestamp: new Date().toISOString(),
|
|
71
|
+
uptime: this.getUptime(),
|
|
72
|
+
checks: {}
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
let allHealthy = true;
|
|
76
|
+
|
|
77
|
+
// Run all registered health checks
|
|
78
|
+
for (const healthCheck of this.healthChecks) {
|
|
79
|
+
try {
|
|
80
|
+
const checkResult = await Promise.race([
|
|
81
|
+
healthCheck.check(),
|
|
82
|
+
this.timeout(this.config.healthCheck.timeout)
|
|
83
|
+
]);
|
|
84
|
+
|
|
85
|
+
results.checks[healthCheck.name] = {
|
|
86
|
+
status: checkResult ? 'pass' : 'fail',
|
|
87
|
+
...(typeof checkResult === 'object' && checkResult)
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
if (!checkResult) {
|
|
91
|
+
allHealthy = false;
|
|
92
|
+
}
|
|
93
|
+
} catch (error) {
|
|
94
|
+
results.checks[healthCheck.name] = {
|
|
95
|
+
status: 'fail',
|
|
96
|
+
error: error.message
|
|
97
|
+
};
|
|
98
|
+
allHealthy = false;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
results.status = allHealthy ? 'healthy' : 'unhealthy';
|
|
103
|
+
this.isHealthy = allHealthy;
|
|
104
|
+
|
|
105
|
+
// Handle health status changes
|
|
106
|
+
await this.handleHealthStatusChange(allHealthy, results);
|
|
107
|
+
|
|
108
|
+
return results;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Handle health status changes
|
|
113
|
+
*/
|
|
114
|
+
async handleHealthStatusChange(isHealthy, results) {
|
|
115
|
+
if (!isHealthy) {
|
|
116
|
+
this.consecutiveFailures++;
|
|
117
|
+
|
|
118
|
+
// Send alert if threshold reached
|
|
119
|
+
if (this.consecutiveFailures >= this.config.thresholds.consecutiveFailures) {
|
|
120
|
+
if (this.lastHealthStatus) {
|
|
121
|
+
// Status changed from healthy to unhealthy
|
|
122
|
+
await this.notificationManager.sendCritical(
|
|
123
|
+
'Server Health Check Failed',
|
|
124
|
+
`Server health checks have failed ${this.consecutiveFailures} consecutive times`,
|
|
125
|
+
{
|
|
126
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
127
|
+
failedChecks: Object.entries(results.checks)
|
|
128
|
+
.filter(([_, check]) => check.status === 'fail')
|
|
129
|
+
.map(([name]) => name)
|
|
130
|
+
.join(', '),
|
|
131
|
+
uptime: this.getUptimeFormatted()
|
|
132
|
+
}
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
this.lastHealthStatus = false;
|
|
138
|
+
} else {
|
|
139
|
+
// Recovery
|
|
140
|
+
if (!this.lastHealthStatus && this.consecutiveFailures >= this.config.thresholds.consecutiveFailures) {
|
|
141
|
+
await this.notificationManager.sendRecovery(
|
|
142
|
+
'Server Health Restored',
|
|
143
|
+
'All health checks are now passing',
|
|
144
|
+
{
|
|
145
|
+
previousFailures: this.consecutiveFailures,
|
|
146
|
+
uptime: this.getUptimeFormatted()
|
|
147
|
+
}
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
this.consecutiveFailures = 0;
|
|
152
|
+
this.lastHealthStatus = true;
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
/**
|
|
157
|
+
* Get health check endpoint middleware
|
|
158
|
+
*/
|
|
159
|
+
healthCheckEndpoint() {
|
|
160
|
+
return async (req, res) => {
|
|
161
|
+
const healthStatus = await this.performHealthCheck();
|
|
162
|
+
const statusCode = healthStatus.status === 'healthy' ? 200 : 503;
|
|
163
|
+
|
|
164
|
+
res.status(statusCode).json(healthStatus);
|
|
165
|
+
};
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Get basic health info endpoint
|
|
170
|
+
*/
|
|
171
|
+
healthInfoEndpoint() {
|
|
172
|
+
return (req, res) => {
|
|
173
|
+
res.json({
|
|
174
|
+
status: this.isHealthy ? 'healthy' : 'unhealthy',
|
|
175
|
+
uptime: this.getUptimeFormatted(),
|
|
176
|
+
timestamp: new Date().toISOString(),
|
|
177
|
+
application: {
|
|
178
|
+
name: this.config.app.name,
|
|
179
|
+
version: this.config.app.version,
|
|
180
|
+
environment: this.config.app.environment
|
|
181
|
+
},
|
|
182
|
+
process: {
|
|
183
|
+
pid: process.pid,
|
|
184
|
+
nodeVersion: process.version,
|
|
185
|
+
platform: process.platform
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Get uptime in seconds
|
|
193
|
+
*/
|
|
194
|
+
getUptime() {
|
|
195
|
+
return Math.floor((Date.now() - this.startTime) / 1000);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Get formatted uptime
|
|
200
|
+
*/
|
|
201
|
+
getUptimeFormatted() {
|
|
202
|
+
const uptime = this.getUptime();
|
|
203
|
+
const days = Math.floor(uptime / 86400);
|
|
204
|
+
const hours = Math.floor((uptime % 86400) / 3600);
|
|
205
|
+
const minutes = Math.floor((uptime % 3600) / 60);
|
|
206
|
+
const seconds = uptime % 60;
|
|
207
|
+
|
|
208
|
+
const parts = [];
|
|
209
|
+
if (days > 0) parts.push(`${days}d`);
|
|
210
|
+
if (hours > 0) parts.push(`${hours}h`);
|
|
211
|
+
if (minutes > 0) parts.push(`${minutes}m`);
|
|
212
|
+
parts.push(`${seconds}s`);
|
|
213
|
+
|
|
214
|
+
return parts.join(' ');
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Timeout helper
|
|
219
|
+
*/
|
|
220
|
+
timeout(ms) {
|
|
221
|
+
return new Promise((_, reject) =>
|
|
222
|
+
setTimeout(() => reject(new Error('Health check timeout')), ms)
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* Get current health status
|
|
228
|
+
*/
|
|
229
|
+
getStatus() {
|
|
230
|
+
return {
|
|
231
|
+
isHealthy: this.isHealthy,
|
|
232
|
+
uptime: this.getUptime(), // Return numeric seconds instead of formatted string
|
|
233
|
+
uptimeFormatted: this.getUptimeFormatted(), // Also include formatted version
|
|
234
|
+
consecutiveFailures: this.consecutiveFailures,
|
|
235
|
+
registeredChecks: this.healthChecks.length
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Handle graceful shutdown
|
|
241
|
+
*/
|
|
242
|
+
async handleShutdown(signal) {
|
|
243
|
+
this.logger.logWarning('shutdown', `Received ${signal} signal, shutting down gracefully`);
|
|
244
|
+
|
|
245
|
+
await this.notificationManager.sendWarning(
|
|
246
|
+
'Server Shutdown Initiated',
|
|
247
|
+
`Server received ${signal} signal and is shutting down`,
|
|
248
|
+
{
|
|
249
|
+
signal,
|
|
250
|
+
uptime: this.getUptimeFormatted(),
|
|
251
|
+
timestamp: new Date().toISOString()
|
|
252
|
+
}
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
this.stop();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Setup graceful shutdown handlers
|
|
260
|
+
*/
|
|
261
|
+
setupGracefulShutdown(server) {
|
|
262
|
+
const signals = ['SIGTERM', 'SIGINT'];
|
|
263
|
+
|
|
264
|
+
signals.forEach(signal => {
|
|
265
|
+
process.on(signal, async () => {
|
|
266
|
+
await this.handleShutdown(signal);
|
|
267
|
+
|
|
268
|
+
// Close server
|
|
269
|
+
server.close(() => {
|
|
270
|
+
this.logger.logInfo('Server closed');
|
|
271
|
+
process.exit(0);
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// Force close after timeout
|
|
275
|
+
setTimeout(() => {
|
|
276
|
+
this.logger.logWarning('forced_shutdown', 'Forcing shutdown after timeout');
|
|
277
|
+
process.exit(1);
|
|
278
|
+
}, 10000);
|
|
279
|
+
});
|
|
280
|
+
});
|
|
281
|
+
|
|
282
|
+
// Handle uncaught exceptions
|
|
283
|
+
process.on('uncaughtException', async (error) => {
|
|
284
|
+
this.logger.logSystemError('uncaught_exception', 'Uncaught Exception', {
|
|
285
|
+
error: error.message,
|
|
286
|
+
stack: error.stack
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
await this.notificationManager.sendCritical(
|
|
290
|
+
'Uncaught Exception',
|
|
291
|
+
error.message,
|
|
292
|
+
{
|
|
293
|
+
stack: error.stack,
|
|
294
|
+
uptime: this.getUptimeFormatted()
|
|
295
|
+
}
|
|
296
|
+
);
|
|
297
|
+
|
|
298
|
+
process.exit(1);
|
|
299
|
+
});
|
|
300
|
+
|
|
301
|
+
// Handle unhandled promise rejections
|
|
302
|
+
process.on('unhandledRejection', async (reason, promise) => {
|
|
303
|
+
this.logger.logSystemError('unhandled_rejection', 'Unhandled Promise Rejection', {
|
|
304
|
+
reason: String(reason),
|
|
305
|
+
promise: String(promise)
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
await this.notificationManager.sendCritical(
|
|
309
|
+
'Unhandled Promise Rejection',
|
|
310
|
+
String(reason),
|
|
311
|
+
{
|
|
312
|
+
uptime: this.getUptimeFormatted()
|
|
313
|
+
}
|
|
314
|
+
);
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
module.exports = ServerHealthMonitor;
|
|
320
|
+
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* System Resource Monitor
|
|
3
|
+
* Monitors CPU, memory, and other system resources
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
const os = require('os');
|
|
7
|
+
const cron = require('node-cron');
|
|
8
|
+
|
|
9
|
+
class SystemResourceMonitor {
|
|
10
|
+
constructor(config, logger, notificationManager) {
|
|
11
|
+
this.config = config;
|
|
12
|
+
this.logger = logger;
|
|
13
|
+
this.notificationManager = notificationManager;
|
|
14
|
+
this.cronJob = null;
|
|
15
|
+
this.metrics = {
|
|
16
|
+
cpu: [],
|
|
17
|
+
memory: []
|
|
18
|
+
};
|
|
19
|
+
this.maxMetricsHistory = 60; // Keep last 60 readings
|
|
20
|
+
this.consecutiveHighCpu = 0;
|
|
21
|
+
this.consecutiveHighMemory = 0;
|
|
22
|
+
this.lastCpuAlert = 0;
|
|
23
|
+
this.lastMemoryAlert = 0;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Start system monitoring
|
|
28
|
+
*/
|
|
29
|
+
start() {
|
|
30
|
+
if (!this.config.system.enabled) {
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
this.logger.logInfo('System resource monitoring started');
|
|
35
|
+
|
|
36
|
+
// Schedule periodic checks
|
|
37
|
+
const intervalMinutes = Math.max(1, Math.floor(this.config.intervals.system / 60000));
|
|
38
|
+
const cronExpression = `*/${intervalMinutes} * * * *`;
|
|
39
|
+
|
|
40
|
+
this.cronJob = cron.schedule(cronExpression, async () => {
|
|
41
|
+
await this.collectMetrics();
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
// Collect initial metrics
|
|
45
|
+
this.collectMetrics();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Stop system monitoring
|
|
50
|
+
*/
|
|
51
|
+
stop() {
|
|
52
|
+
if (this.cronJob) {
|
|
53
|
+
this.cronJob.stop();
|
|
54
|
+
this.logger.logInfo('System resource monitoring stopped');
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Collect system metrics
|
|
60
|
+
*/
|
|
61
|
+
async collectMetrics() {
|
|
62
|
+
const metrics = {
|
|
63
|
+
timestamp: new Date().toISOString(),
|
|
64
|
+
cpu: null,
|
|
65
|
+
memory: null,
|
|
66
|
+
eventLoop: null
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
// Collect CPU metrics
|
|
70
|
+
if (this.config.system.trackCpu) {
|
|
71
|
+
metrics.cpu = await this.getCpuUsage();
|
|
72
|
+
this.addMetric('cpu', metrics.cpu);
|
|
73
|
+
await this.checkCpuThreshold(metrics.cpu);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Collect memory metrics
|
|
77
|
+
if (this.config.system.trackMemory) {
|
|
78
|
+
metrics.memory = this.getMemoryUsage();
|
|
79
|
+
this.addMetric('memory', metrics.memory);
|
|
80
|
+
await this.checkMemoryThreshold(metrics.memory);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Collect event loop lag (if enabled)
|
|
84
|
+
if (this.config.system.trackEventLoop) {
|
|
85
|
+
metrics.eventLoop = this.getEventLoopLag();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
this.logger.logInfo('System metrics collected', metrics);
|
|
89
|
+
|
|
90
|
+
return metrics;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Get CPU usage percentage
|
|
95
|
+
*/
|
|
96
|
+
async getCpuUsage() {
|
|
97
|
+
const startUsage = process.cpuUsage();
|
|
98
|
+
const startTime = Date.now();
|
|
99
|
+
|
|
100
|
+
// Wait 100ms to measure CPU usage
|
|
101
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
102
|
+
|
|
103
|
+
const endUsage = process.cpuUsage(startUsage);
|
|
104
|
+
const endTime = Date.now();
|
|
105
|
+
|
|
106
|
+
const elapsedTime = (endTime - startTime) * 1000; // Convert to microseconds
|
|
107
|
+
const totalUsage = endUsage.user + endUsage.system;
|
|
108
|
+
const cpuPercent = (totalUsage / elapsedTime) * 100;
|
|
109
|
+
|
|
110
|
+
// Get system-wide CPU info
|
|
111
|
+
const cpus = os.cpus();
|
|
112
|
+
const systemCpuUsage = this.getSystemCpuUsage(cpus);
|
|
113
|
+
|
|
114
|
+
return {
|
|
115
|
+
process: Math.round(cpuPercent * 100) / 100,
|
|
116
|
+
system: Math.round(systemCpuUsage * 100) / 100,
|
|
117
|
+
cores: cpus.length
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Get system-wide CPU usage
|
|
123
|
+
*/
|
|
124
|
+
getSystemCpuUsage(cpus) {
|
|
125
|
+
let totalIdle = 0;
|
|
126
|
+
let totalTick = 0;
|
|
127
|
+
|
|
128
|
+
cpus.forEach(cpu => {
|
|
129
|
+
for (const type in cpu.times) {
|
|
130
|
+
totalTick += cpu.times[type];
|
|
131
|
+
}
|
|
132
|
+
totalIdle += cpu.times.idle;
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
const idle = totalIdle / cpus.length;
|
|
136
|
+
const total = totalTick / cpus.length;
|
|
137
|
+
const usage = 100 - (100 * idle / total);
|
|
138
|
+
|
|
139
|
+
return usage;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Get memory usage
|
|
144
|
+
*/
|
|
145
|
+
getMemoryUsage() {
|
|
146
|
+
const totalMemory = os.totalmem();
|
|
147
|
+
const freeMemory = os.freemem();
|
|
148
|
+
const usedMemory = totalMemory - freeMemory;
|
|
149
|
+
|
|
150
|
+
const processMemory = process.memoryUsage();
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
system: {
|
|
154
|
+
total: this.formatBytes(totalMemory),
|
|
155
|
+
used: this.formatBytes(usedMemory),
|
|
156
|
+
free: this.formatBytes(freeMemory),
|
|
157
|
+
usagePercent: Math.round((usedMemory / totalMemory) * 100 * 100) / 100
|
|
158
|
+
},
|
|
159
|
+
process: {
|
|
160
|
+
rss: this.formatBytes(processMemory.rss),
|
|
161
|
+
heapTotal: this.formatBytes(processMemory.heapTotal),
|
|
162
|
+
heapUsed: this.formatBytes(processMemory.heapUsed),
|
|
163
|
+
external: this.formatBytes(processMemory.external),
|
|
164
|
+
heapUsagePercent: Math.round((processMemory.heapUsed / processMemory.heapTotal) * 100 * 100) / 100
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Get event loop lag
|
|
171
|
+
*/
|
|
172
|
+
getEventLoopLag() {
|
|
173
|
+
const start = Date.now();
|
|
174
|
+
|
|
175
|
+
setImmediate(() => {
|
|
176
|
+
const lag = Date.now() - start;
|
|
177
|
+
return lag;
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
return 0; // Simplified for now
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Format bytes to human-readable format
|
|
185
|
+
*/
|
|
186
|
+
formatBytes(bytes) {
|
|
187
|
+
const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB'];
|
|
188
|
+
if (bytes === 0) return '0 Bytes';
|
|
189
|
+
|
|
190
|
+
const i = Math.floor(Math.log(bytes) / Math.log(1024));
|
|
191
|
+
const value = Math.round((bytes / Math.pow(1024, i)) * 100) / 100;
|
|
192
|
+
|
|
193
|
+
return `${value} ${sizes[i]}`;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/**
|
|
197
|
+
* Add metric to history
|
|
198
|
+
*/
|
|
199
|
+
addMetric(type, value) {
|
|
200
|
+
this.metrics[type].push({
|
|
201
|
+
timestamp: Date.now(),
|
|
202
|
+
value
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
// Keep only recent metrics
|
|
206
|
+
if (this.metrics[type].length > this.maxMetricsHistory) {
|
|
207
|
+
this.metrics[type].shift();
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
/**
|
|
212
|
+
* Check CPU threshold
|
|
213
|
+
*/
|
|
214
|
+
async checkCpuThreshold(cpuMetrics) {
|
|
215
|
+
const threshold = this.config.thresholds.cpu;
|
|
216
|
+
const systemUsage = cpuMetrics.system;
|
|
217
|
+
|
|
218
|
+
if (systemUsage >= threshold) {
|
|
219
|
+
this.consecutiveHighCpu++;
|
|
220
|
+
|
|
221
|
+
if (this.consecutiveHighCpu >= this.config.thresholds.consecutiveFailures) {
|
|
222
|
+
const now = Date.now();
|
|
223
|
+
const cooldown = this.config.notifications.cooldown;
|
|
224
|
+
|
|
225
|
+
if (now - this.lastCpuAlert >= cooldown) {
|
|
226
|
+
await this.notificationManager.sendWarning(
|
|
227
|
+
'High CPU Usage Detected',
|
|
228
|
+
`System CPU usage is at ${systemUsage.toFixed(2)}%, exceeding threshold of ${threshold}%`,
|
|
229
|
+
{
|
|
230
|
+
systemCpu: `${systemUsage.toFixed(2)}%`,
|
|
231
|
+
processCpu: `${cpuMetrics.process.toFixed(2)}%`,
|
|
232
|
+
threshold: `${threshold}%`,
|
|
233
|
+
consecutiveOccurrences: this.consecutiveHighCpu,
|
|
234
|
+
cores: cpuMetrics.cores
|
|
235
|
+
}
|
|
236
|
+
);
|
|
237
|
+
|
|
238
|
+
this.lastCpuAlert = now;
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
} else {
|
|
242
|
+
// Recovery
|
|
243
|
+
if (this.consecutiveHighCpu >= this.config.thresholds.consecutiveFailures) {
|
|
244
|
+
await this.notificationManager.sendRecovery(
|
|
245
|
+
'CPU Usage Normalized',
|
|
246
|
+
`System CPU usage has returned to normal levels: ${systemUsage.toFixed(2)}%`,
|
|
247
|
+
{
|
|
248
|
+
currentCpu: `${systemUsage.toFixed(2)}%`,
|
|
249
|
+
threshold: `${threshold}%`
|
|
250
|
+
}
|
|
251
|
+
);
|
|
252
|
+
}
|
|
253
|
+
this.consecutiveHighCpu = 0;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
/**
|
|
258
|
+
* Check memory threshold
|
|
259
|
+
*/
|
|
260
|
+
async checkMemoryThreshold(memoryMetrics) {
|
|
261
|
+
const threshold = this.config.thresholds.memory;
|
|
262
|
+
const systemUsage = memoryMetrics.system.usagePercent;
|
|
263
|
+
|
|
264
|
+
if (systemUsage >= threshold) {
|
|
265
|
+
this.consecutiveHighMemory++;
|
|
266
|
+
|
|
267
|
+
if (this.consecutiveHighMemory >= this.config.thresholds.consecutiveFailures) {
|
|
268
|
+
const now = Date.now();
|
|
269
|
+
const cooldown = this.config.notifications.cooldown;
|
|
270
|
+
|
|
271
|
+
if (now - this.lastMemoryAlert >= cooldown) {
|
|
272
|
+
await this.notificationManager.sendWarning(
|
|
273
|
+
'High Memory Usage Detected',
|
|
274
|
+
`System memory usage is at ${systemUsage.toFixed(2)}%, exceeding threshold of ${threshold}%`,
|
|
275
|
+
{
|
|
276
|
+
systemMemory: `${systemUsage.toFixed(2)}%`,
|
|
277
|
+
used: memoryMetrics.system.used,
|
|
278
|
+
total: memoryMetrics.system.total,
|
|
279
|
+
processHeap: memoryMetrics.process.heapUsed,
|
|
280
|
+
threshold: `${threshold}%`,
|
|
281
|
+
consecutiveOccurrences: this.consecutiveHighMemory
|
|
282
|
+
}
|
|
283
|
+
);
|
|
284
|
+
|
|
285
|
+
this.lastMemoryAlert = now;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
} else {
|
|
289
|
+
// Recovery
|
|
290
|
+
if (this.consecutiveHighMemory >= this.config.thresholds.consecutiveFailures) {
|
|
291
|
+
await this.notificationManager.sendRecovery(
|
|
292
|
+
'Memory Usage Normalized',
|
|
293
|
+
`System memory usage has returned to normal levels: ${systemUsage.toFixed(2)}%`,
|
|
294
|
+
{
|
|
295
|
+
currentMemory: `${systemUsage.toFixed(2)}%`,
|
|
296
|
+
threshold: `${threshold}%`
|
|
297
|
+
}
|
|
298
|
+
);
|
|
299
|
+
}
|
|
300
|
+
this.consecutiveHighMemory = 0;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Get current metrics
|
|
306
|
+
*/
|
|
307
|
+
getCurrentMetrics() {
|
|
308
|
+
return {
|
|
309
|
+
cpu: this.metrics.cpu[this.metrics.cpu.length - 1]?.value || null,
|
|
310
|
+
memory: this.metrics.memory[this.metrics.memory.length - 1]?.value || null
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Get metrics history
|
|
316
|
+
*/
|
|
317
|
+
getMetricsHistory() {
|
|
318
|
+
return {
|
|
319
|
+
cpu: this.metrics.cpu,
|
|
320
|
+
memory: this.metrics.memory
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Get system info
|
|
326
|
+
*/
|
|
327
|
+
getSystemInfo() {
|
|
328
|
+
return {
|
|
329
|
+
platform: os.platform(),
|
|
330
|
+
arch: os.arch(),
|
|
331
|
+
hostname: os.hostname(),
|
|
332
|
+
cpus: os.cpus().length,
|
|
333
|
+
totalMemory: this.formatBytes(os.totalmem()),
|
|
334
|
+
uptime: this.formatUptime(os.uptime()),
|
|
335
|
+
nodeVersion: process.version
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
/**
|
|
340
|
+
* Format uptime
|
|
341
|
+
*/
|
|
342
|
+
formatUptime(seconds) {
|
|
343
|
+
const days = Math.floor(seconds / 86400);
|
|
344
|
+
const hours = Math.floor((seconds % 86400) / 3600);
|
|
345
|
+
const minutes = Math.floor((seconds % 3600) / 60);
|
|
346
|
+
|
|
347
|
+
const parts = [];
|
|
348
|
+
if (days > 0) parts.push(`${days}d`);
|
|
349
|
+
if (hours > 0) parts.push(`${hours}h`);
|
|
350
|
+
if (minutes > 0) parts.push(`${minutes}m`);
|
|
351
|
+
|
|
352
|
+
return parts.join(' ') || '0m';
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
module.exports = SystemResourceMonitor;
|
|
357
|
+
|