@techalmondsai/nodejs-monitoring 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,494 @@
1
+ # Node.js Monitoring Service
2
+
3
+ A comprehensive, container-aware monitoring service for Node.js applications with built-in health probes, metrics collection, Kubernetes-native endpoints, and graceful shutdown support. A lightweight alternative to New Relic.
4
+
5
+ ## Features
6
+
7
+ - **Easy Integration** - One-line setup for Express applications
8
+ - **Container-Aware** - Reads cgroup v1/v2 for accurate memory and CPU metrics in Docker/Kubernetes
9
+ - **Kubernetes-Native** - Separate `/healthz` (liveness) and `/readyz` (readiness) endpoints
10
+ - **Built-in Metrics** - Memory, CPU, heap, uptime, request tracking
11
+ - **Health Probes** - Memory, CPU, response time, error rate, disk space, uptime
12
+ - **Graceful Shutdown** - SIGTERM handling, interval cleanup, shutdown-aware probes
13
+ - **Performance Tracking** - Request/response time monitoring (5xx errors only)
14
+ - **Custom Probes** - Add your own health checks with configurable timeouts
15
+ - **REST API** - Built-in endpoints for health, metrics, liveness, and readiness
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ npm install @techalmondsai/nodejs-monitoring
21
+ ```
22
+
23
+ ## Quick Start
24
+
25
+ ### Basic Setup
26
+
27
+ ```typescript
28
+ import express from "express";
29
+ import { setupMonitoring } from "@techalmondsai/nodejs-monitoring";
30
+
31
+ const app = express();
32
+
33
+ // Setup monitoring with one line
34
+ const monitoring = setupMonitoring(app);
35
+
36
+ // Your routes
37
+ app.get("/api/users", (req, res) => {
38
+ res.json({ users: [] });
39
+ });
40
+
41
+ app.listen(3000, () => {
42
+ console.log("Server running on port 3000");
43
+ console.log("Health check: http://localhost:3000/health");
44
+ console.log("Liveness: http://localhost:3000/healthz");
45
+ console.log("Readiness: http://localhost:3000/readyz");
46
+ });
47
+ ```
48
+
49
+ ### Advanced Configuration
50
+
51
+ ```typescript
52
+ import express from "express";
53
+ import { setupMonitoring } from "@techalmondsai/nodejs-monitoring";
54
+
55
+ const app = express();
56
+
57
+ const monitoring = setupMonitoring(app, {
58
+ healthRoutePath: "/api/health",
59
+ metricsInterval: 15000,
60
+ enableRequestTracking: true,
61
+ enableErrorTracking: true,
62
+ livenessPath: "/api/liveness",
63
+ readinessPath: "/api/readiness",
64
+ probeTimeout: 5000,
65
+ alertThresholds: {
66
+ memoryUsage: 85,
67
+ cpuUsage: 90,
68
+ responseTime: 2000,
69
+ errorRate: 5,
70
+ },
71
+ });
72
+
73
+ // Add custom health probe
74
+ monitoring.addProbe({
75
+ name: "database_connection",
76
+ check: async () => {
77
+ try {
78
+ await database.ping();
79
+ return {
80
+ status: "healthy",
81
+ message: "Database connection successful",
82
+ };
83
+ } catch (error) {
84
+ return {
85
+ status: "critical",
86
+ message: `Database connection failed: ${error.message}`,
87
+ };
88
+ }
89
+ },
90
+ interval: 30000,
91
+ });
92
+
93
+ app.listen(3000);
94
+ ```
95
+
96
+ ## API Endpoints
97
+
98
+ Once integrated, your application will have the following endpoints:
99
+
100
+ ### Health Check (Full Report)
101
+
102
+ ```
103
+ GET /health
104
+ ```
105
+
106
+ Returns comprehensive health information including all metrics and probe results. Returns `200` when healthy, `503` when any probe is critical or warning.
107
+
108
+ ```json
109
+ {
110
+ "status": "healthy",
111
+ "timestamp": 1640995200000,
112
+ "uptime": 3600,
113
+ "hostname": "my-pod-abc123",
114
+ "metrics": {
115
+ "timestamp": 1640995200000,
116
+ "uptime": 3600,
117
+ "memory": {
118
+ "used": 256,
119
+ "total": 2048,
120
+ "percentage": 12,
121
+ "heap": {
122
+ "used": 80,
123
+ "total": 1584,
124
+ "percentage": 5
125
+ },
126
+ "isContainerAware": true
127
+ },
128
+ "cpu": {
129
+ "usage": 15.5,
130
+ "loadAverage": [0.5, 0.3, 0.2],
131
+ "effectiveCpus": 0.5,
132
+ "isContainerAware": true
133
+ },
134
+ "process": {
135
+ "pid": 1,
136
+ "version": "v18.17.0",
137
+ "platform": "linux",
138
+ "arch": "x64",
139
+ "hostname": "my-pod-abc123"
140
+ },
141
+ "requests": {
142
+ "total": 150,
143
+ "active": 2,
144
+ "averageResponseTime": 120
145
+ },
146
+ "errors": {
147
+ "total": 5,
148
+ "rate": 0.2
149
+ }
150
+ },
151
+ "probes": {
152
+ "memory_usage": {
153
+ "status": "healthy",
154
+ "message": "Memory usage is 12%",
155
+ "value": 12
156
+ },
157
+ "cpu_usage": {
158
+ "status": "healthy",
159
+ "message": "CPU usage is 15.5%",
160
+ "value": 15.5
161
+ },
162
+ "response_time": {
163
+ "status": "healthy",
164
+ "message": "Avg response time is 120ms",
165
+ "value": 120
166
+ },
167
+ "error_rate": {
168
+ "status": "healthy",
169
+ "message": "Error rate is 0.2/min",
170
+ "value": 0.2
171
+ },
172
+ "uptime": {
173
+ "status": "healthy",
174
+ "message": "Application has been running for 1h 0m",
175
+ "value": 3600
176
+ },
177
+ "disk_space": {
178
+ "status": "healthy",
179
+ "message": "Disk usage is 45%",
180
+ "value": 45
181
+ }
182
+ },
183
+ "version": "1.0.0"
184
+ }
185
+ ```
186
+
187
+ ### Kubernetes Liveness Probe
188
+
189
+ ```
190
+ GET /healthz
191
+ ```
192
+
193
+ Returns `200` if the process is alive and not shutting down. Does **not** check downstream dependencies — prevents unnecessary pod restarts.
194
+
195
+ ```json
196
+ {
197
+ "status": "alive",
198
+ "uptime": 3600,
199
+ "hostname": "my-pod-abc123"
200
+ }
201
+ ```
202
+
203
+ ### Kubernetes Readiness Probe
204
+
205
+ ```
206
+ GET /readyz
207
+ ```
208
+
209
+ Returns `200` if the service is ready to accept traffic. Returns `503` only when a probe is `critical` — warnings are treated as still ready.
210
+
211
+ ```json
212
+ {
213
+ "status": "ready",
214
+ "hostname": "my-pod-abc123"
215
+ }
216
+ ```
217
+
218
+ ### Metrics History
219
+
220
+ ```
221
+ GET /health/metrics?limit=50
222
+ ```
223
+
224
+ Returns historical metrics data (up to 100 entries, clamped):
225
+
226
+ ```json
227
+ {
228
+ "metrics": [],
229
+ "count": 50,
230
+ "latest": { }
231
+ }
232
+ ```
233
+
234
+ ## Kubernetes Configuration
235
+
236
+ ```yaml
237
+ livenessProbe:
238
+ httpGet:
239
+ path: /healthz
240
+ port: 3000
241
+ initialDelaySeconds: 10
242
+ periodSeconds: 15
243
+ failureThreshold: 3
244
+ timeoutSeconds: 5
245
+
246
+ readinessProbe:
247
+ httpGet:
248
+ path: /readyz
249
+ port: 3000
250
+ initialDelaySeconds: 5
251
+ periodSeconds: 10
252
+ failureThreshold: 3
253
+ timeoutSeconds: 3
254
+ ```
255
+
256
+ The paths are configurable via `livenessPath` and `readinessPath` in the config.
257
+
258
+ ## Built-in Probes
259
+
260
+ | Probe | Default Threshold | Description |
261
+ |-------|------------------|-------------|
262
+ | **Memory Usage** | 80% | Container-aware (cgroup v1/v2). Warning at 80% of threshold, critical above. |
263
+ | **CPU Usage** | 80% | Normalized against cgroup CPU quota. Warning at 80% of threshold, critical above. |
264
+ | **Response Time** | 5000ms | Average response time across last 100 requests. |
265
+ | **Error Rate** | 10/min | 5xx errors per minute (4xx excluded). Adjusts window for short uptimes. |
266
+ | **Uptime** | - | Application uptime. Always healthy. |
267
+ | **Disk Space** | 85% | Actual disk usage via `statfsSync` or async `df`. Warning at 80% of threshold. |
268
+
269
+ ## Container-Aware Metrics
270
+
271
+ On Kubernetes/Docker, the library automatically detects the container environment and reads real resource limits:
272
+
273
+ - **Memory**: Reads from `/sys/fs/cgroup/memory.max` (v2) or `/sys/fs/cgroup/memory/memory.limit_in_bytes` (v1) instead of `os.totalmem()`
274
+ - **CPU**: Normalizes usage against cgroup CPU quota (`cpu.max` / `cpu.cfs_quota_us`) instead of host CPU count
275
+ - **Heap**: Uses `v8.getHeapStatistics().heap_size_limit` for the real V8 heap ceiling
276
+
277
+ On bare metal/EC2 without containers, it falls back to standard OS metrics.
278
+
279
+ ## Configuration Options
280
+
281
+ ```typescript
282
+ interface MonitoringConfig {
283
+ enableHealthRoute?: boolean; // Enable /health endpoint (default: true)
284
+ healthRoutePath?: string; // Health route path (default: '/health')
285
+ enableMetricsCollection?: boolean; // Enable metrics collection (default: true)
286
+ metricsInterval?: number; // Collection interval in ms (default: 30000)
287
+ enableRequestTracking?: boolean; // Track HTTP requests (default: true)
288
+ enableErrorTracking?: boolean; // Track 5xx errors (default: true)
289
+ customProbes?: CustomProbe[]; // Additional custom probes
290
+ alertThresholds?: AlertThresholds; // Custom alert thresholds
291
+ livenessPath?: string; // K8s liveness path (default: '/healthz')
292
+ readinessPath?: string; // K8s readiness path (default: '/readyz')
293
+ probeTimeout?: number; // Probe execution timeout in ms (default: 10000)
294
+ }
295
+
296
+ interface AlertThresholds {
297
+ memoryUsage?: number; // Memory usage threshold percentage
298
+ cpuUsage?: number; // CPU usage threshold percentage
299
+ responseTime?: number; // Response time threshold in ms
300
+ errorRate?: number; // Error rate threshold per minute
301
+ }
302
+ ```
303
+
304
+ ## Graceful Shutdown
305
+
306
+ The service automatically handles `SIGTERM` signals (sent by Kubernetes during pod termination):
307
+
308
+ 1. Sets `isShuttingDown` flag — liveness and readiness probes immediately return `503`
309
+ 2. Clears all metric collection and probe execution intervals
310
+ 3. Cleans up system metrics and request tracker resources
311
+
312
+ You can also trigger shutdown manually:
313
+
314
+ ```typescript
315
+ const monitoring = MonitoringService.getInstance();
316
+
317
+ process.on("SIGTERM", async () => {
318
+ await monitoring.shutdown();
319
+ server.close();
320
+ });
321
+ ```
322
+
323
+ ## Custom Probes
324
+
325
+ ```typescript
326
+ import { CustomProbe, ProbeResult } from "@techalmondsai/nodejs-monitoring";
327
+
328
+ const customProbe: CustomProbe = {
329
+ name: "external_api_check",
330
+ check: async (): Promise<ProbeResult> => {
331
+ const start = Date.now();
332
+ try {
333
+ const response = await fetch("https://api.example.com/health");
334
+ return {
335
+ status: response.ok ? "healthy" : "critical",
336
+ message: `External API responded with ${response.status}`,
337
+ value: response.status,
338
+ metadata: {
339
+ responseTime: Date.now() - start,
340
+ url: "https://api.example.com/health",
341
+ },
342
+ };
343
+ } catch (error) {
344
+ return {
345
+ status: "critical",
346
+ message: `External API unreachable: ${error.message}`,
347
+ };
348
+ }
349
+ },
350
+ interval: 60000,
351
+ };
352
+
353
+ monitoring.addProbe(customProbe);
354
+ ```
355
+
356
+ Probes that hang beyond the configured `probeTimeout` (default 10s) are automatically marked as `critical`.
357
+
358
+ ## Integration Examples
359
+
360
+ ### Express.js with TypeScript
361
+
362
+ ```typescript
363
+ import express from "express";
364
+ import { setupMonitoring } from "@techalmondsai/nodejs-monitoring";
365
+
366
+ const app = express();
367
+
368
+ const monitoring = setupMonitoring(app, {
369
+ healthRoutePath: "/api/health",
370
+ alertThresholds: {
371
+ memoryUsage: 85,
372
+ cpuUsage: 90,
373
+ },
374
+ });
375
+
376
+ monitoring.addProbe({
377
+ name: "postgres_connection",
378
+ check: async () => {
379
+ try {
380
+ const client = await pool.connect();
381
+ await client.query("SELECT NOW()");
382
+ client.release();
383
+ return { status: "healthy", message: "PostgreSQL connected" };
384
+ } catch (error) {
385
+ return {
386
+ status: "critical",
387
+ message: `PostgreSQL error: ${error.message}`,
388
+ };
389
+ }
390
+ },
391
+ });
392
+
393
+ export default app;
394
+ ```
395
+
396
+ ### NestJS Integration
397
+
398
+ ```typescript
399
+ import { NestFactory } from "@nestjs/core";
400
+ import { AppModule } from "./app.module";
401
+ import { setupMonitoring } from "@techalmondsai/nodejs-monitoring";
402
+
403
+ async function bootstrap() {
404
+ const app = await NestFactory.create(AppModule);
405
+
406
+ const monitoring = setupMonitoring(app.getHttpAdapter().getInstance(), {
407
+ healthRoutePath: "/health",
408
+ metricsInterval: 20000,
409
+ });
410
+
411
+ await app.listen(3000);
412
+ }
413
+
414
+ bootstrap();
415
+ ```
416
+
417
+ ### Manual Usage (Without Express)
418
+
419
+ ```typescript
420
+ import { MonitoringService } from "@techalmondsai/nodejs-monitoring";
421
+
422
+ const monitoring = MonitoringService.getInstance({
423
+ enableHealthRoute: false,
424
+ metricsInterval: 15000,
425
+ });
426
+
427
+ // Get current metrics
428
+ const metrics = monitoring.getCurrentMetrics();
429
+
430
+ // Get probe results
431
+ const probes = monitoring.getProbeResults();
432
+
433
+ // Add custom probe
434
+ monitoring.addProbe({
435
+ name: "custom_check",
436
+ check: async () => {
437
+ return { status: "healthy", message: "All good!" };
438
+ },
439
+ });
440
+ ```
441
+
442
+ ## API Reference
443
+
444
+ ### MonitoringService
445
+
446
+ | Method | Description |
447
+ |--------|-------------|
448
+ | `getInstance(config?)` | Get or create the singleton instance |
449
+ | `addProbe(probe)` | Add a custom health probe |
450
+ | `getCurrentMetrics()` | Get current system metrics |
451
+ | `getProbeResults()` | Get all probe results |
452
+ | `requestTrackingMiddleware()` | Express middleware for request tracking |
453
+ | `healthCheckHandler()` | Express handler for `/health` |
454
+ | `livenessHandler()` | Express handler for `/healthz` |
455
+ | `readinessHandler()` | Express handler for `/readyz` |
456
+ | `metricsHistoryHandler()` | Express handler for `/health/metrics` |
457
+ | `shutdown()` | Graceful shutdown (clears intervals, sets shutting down flag) |
458
+ | `static reset()` | Reset all singletons (for testing) |
459
+
460
+ ### Helper Functions
461
+
462
+ | Function | Description |
463
+ |----------|-------------|
464
+ | `setupMonitoring(app, config?)` | Quick setup for Express apps — registers all endpoints and middleware |
465
+
466
+ ### Exports
467
+
468
+ | Export | Description |
469
+ |--------|-------------|
470
+ | `MonitoringService` | Core monitoring singleton |
471
+ | `BuiltInProbes` | Factory methods for built-in probes |
472
+ | `ContainerDetector` | Container/cgroup detection utility |
473
+ | `HealthMetrics` | TypeScript interface for metrics |
474
+ | `MonitoringConfig` | TypeScript interface for config |
475
+ | `CustomProbe` | TypeScript interface for custom probes |
476
+ | `ProbeResult` | TypeScript interface for probe results |
477
+ | `AlertThresholds` | TypeScript interface for thresholds |
478
+
479
+ ## Testing
480
+
481
+ ```bash
482
+ npm test
483
+ npm run test:watch
484
+ npm test -- --coverage
485
+ ```
486
+
487
+ ## License
488
+
489
+ MIT License - see LICENSE file for details.
490
+
491
+ ## Support
492
+
493
+ - GitHub Issues: [Report bugs and request features](https://github.com/techalmondsai/nodejs-monitoring/issues)
494
+ - Documentation: [Full API documentation](https://github.com/techalmondsai/nodejs-monitoring/wiki)
@@ -0,0 +1,40 @@
1
+ import { Request, Response, NextFunction } from "express";
2
+ import { HealthMetrics, MonitoringConfig, CustomProbe, ProbeResult } from "./types";
3
+ export declare class MonitoringService {
4
+ private static instance;
5
+ private config;
6
+ private systemMetrics;
7
+ private requestTracker;
8
+ private customProbes;
9
+ private probeResults;
10
+ private metricsRingBuffer;
11
+ private metricsWriteIndex;
12
+ private metricsCount;
13
+ private startTime;
14
+ private intervals;
15
+ private isShuttingDown;
16
+ private sigTermHandler;
17
+ private constructor();
18
+ static getInstance(config?: MonitoringConfig): MonitoringService;
19
+ private registerSigTermHandler;
20
+ private initializeBuiltInProbes;
21
+ private startMetricsCollection;
22
+ private pushMetrics;
23
+ private getMetricsHistory;
24
+ private startProbeExecution;
25
+ private startProbe;
26
+ private executeWithTimeout;
27
+ private collectMetrics;
28
+ requestTrackingMiddleware(): (_req: Request, _res: Response, next: NextFunction) => void;
29
+ healthCheckHandler(): (_req: Request, res: Response) => void;
30
+ livenessHandler(): (_req: Request, res: Response) => Response<any, Record<string, any>>;
31
+ readinessHandler(): (_req: Request, res: Response) => Response<any, Record<string, any>>;
32
+ metricsHistoryHandler(): (req: Request, res: Response) => void;
33
+ private getOverallStatus;
34
+ addProbe(probe: CustomProbe): void;
35
+ getCurrentMetrics(): HealthMetrics;
36
+ getProbeResults(): Record<string, ProbeResult>;
37
+ shutdown(): Promise<void>;
38
+ static reset(): void;
39
+ }
40
+ //# sourceMappingURL=MonitoringService.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MonitoringService.d.ts","sourceRoot":"","sources":["../src/MonitoringService.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAC1D,OAAO,EACL,aAAa,EACb,gBAAgB,EAChB,WAAW,EACX,WAAW,EACZ,MAAM,SAAS,CAAC;AAQjB,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAoB;IAC3C,OAAO,CAAC,MAAM,CAAmB;IACjC,OAAO,CAAC,aAAa,CAAgB;IACrC,OAAO,CAAC,cAAc,CAAiB;IACvC,OAAO,CAAC,YAAY,CAAqB;IACzC,OAAO,CAAC,YAAY,CAAuC;IAC3D,OAAO,CAAC,iBAAiB,CAAuB;IAChD,OAAO,CAAC,iBAAiB,CAAK;IAC9B,OAAO,CAAC,YAAY,CAAK;IACzB,OAAO,CAAC,SAAS,CAAS;IAC1B,OAAO,CAAC,SAAS,CAAwC;IACzD,OAAO,CAAC,cAAc,CAAS;IAC/B,OAAO,CAAC,cAAc,CAA6B;IAEnD,OAAO;IAqBP,MAAM,CAAC,WAAW,CAAC,MAAM,CAAC,EAAE,gBAAgB,GAAG,iBAAiB;IAYhE,OAAO,CAAC,sBAAsB;IAQ9B,OAAO,CAAC,uBAAuB;IAiB/B,OAAO,CAAC,sBAAsB;IAY9B,OAAO,CAAC,WAAW;IAWnB,OAAO,CAAC,iBAAiB;IAsBzB,OAAO,CAAC,mBAAmB;IAM3B,OAAO,CAAC,UAAU;IA6BlB,OAAO,CAAC,kBAAkB;IAsB1B,OAAO,CAAC,cAAc;IAuBtB,yBAAyB,WAEP,OAAO,QAAQ,QAAQ,QAAQ,YAAY;IAkC7D,kBAAkB,KACR,MAAM,OAAO,EAAE,KAAK,QAAQ;IAoBtC,eAAe,KACL,MAAM,OAAO,EAAE,KAAK,QAAQ;IAmBtC,gBAAgB,KACN,MAAM,OAAO,EAAE,KAAK,QAAQ;IA6BtC,qBAAqB,KACX,KAAK,OAAO,EAAE,KAAK,QAAQ;IAgBrC,OAAO,CAAC,gBAAgB;IAWxB,QAAQ,CAAC,KAAK,EAAE,WAAW,GAAG,IAAI;IAMlC,iBAAiB,IAAI,aAAa;IAKlC,eAAe,IAAI,MAAM,CAAC,MAAM,EAAE,WAAW,CAAC;IAKxC,QAAQ,IAAI,OAAO,CAAC,IAAI,CAAC;IAsB/B,MAAM,CAAC,KAAK,IAAI,IAAI;CAQrB"}