@gravito/monitor 3.0.0 โ 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +85 -220
- package/README.zh-TW.md +170 -0
- package/dist/index.cjs +72 -6
- package/dist/index.d.cts +40 -0
- package/dist/index.d.ts +40 -0
- package/dist/index.js +72 -6
- package/package.json +6 -4
package/README.md
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
|
-
# @gravito/monitor
|
|
1
|
+
# @gravito/monitor ๐ฐ๏ธ
|
|
2
2
|
|
|
3
|
-
Lightweight observability module for Gravito - Health Checks, Metrics, and Tracing.
|
|
3
|
+
Lightweight observability module for Gravito - Health Checks, Metrics, and Tracing. Built on top of the **Galaxy Architecture**, this Orbit provides essential infrastructure for monitoring your planet's health.
|
|
4
4
|
|
|
5
|
-
## Features
|
|
5
|
+
## ๐ Features
|
|
6
6
|
|
|
7
|
-
- ๐ฅ **Health Checks** - Kubernetes-ready `/health`, `/ready`, `/live` endpoints
|
|
8
|
-
- ๐ **Metrics** - Prometheus-compatible `/metrics` endpoint
|
|
9
|
-
- ๐ **Tracing** - OpenTelemetry OTLP support
|
|
7
|
+
- ๐ฅ **Health Checks** - Kubernetes-ready `/health`, `/ready`, `/live` endpoints with custom check support.
|
|
8
|
+
- ๐ **Metrics** - Prometheus-compatible `/metrics` endpoint with built-in Node.js runtime and HTTP metrics.
|
|
9
|
+
- ๐ **Tracing** - OpenTelemetry OTLP support for distributed tracing across services.
|
|
10
|
+
- ๐ก๏ธ **Kubernetes Native** - Seamless integration with probe configurations and Prometheus ServiceMonitors.
|
|
10
11
|
|
|
11
|
-
## Installation
|
|
12
|
+
## ๐ฆ Installation
|
|
12
13
|
|
|
13
14
|
```bash
|
|
14
15
|
bun add @gravito/monitor
|
|
@@ -20,7 +21,9 @@ For OpenTelemetry tracing (optional):
|
|
|
20
21
|
bun add @opentelemetry/sdk-node @opentelemetry/exporter-trace-otlp-http
|
|
21
22
|
```
|
|
22
23
|
|
|
23
|
-
## Quick Start
|
|
24
|
+
## ๐ Quick Start
|
|
25
|
+
|
|
26
|
+
Enable observability by adding the `MonitorOrbit` to your `PlanetCore`.
|
|
24
27
|
|
|
25
28
|
```typescript
|
|
26
29
|
import { PlanetCore } from '@gravito/core'
|
|
@@ -32,274 +35,136 @@ core.orbit(new MonitorOrbit({
|
|
|
32
35
|
health: {
|
|
33
36
|
enabled: true,
|
|
34
37
|
path: '/health',
|
|
35
|
-
readyPath: '/ready',
|
|
36
|
-
livePath: '/live',
|
|
37
38
|
},
|
|
38
39
|
metrics: {
|
|
39
40
|
enabled: true,
|
|
40
|
-
path: '/metrics',
|
|
41
41
|
prefix: 'myapp_',
|
|
42
42
|
},
|
|
43
43
|
tracing: {
|
|
44
|
-
enabled:
|
|
45
|
-
serviceName: '
|
|
46
|
-
endpoint: 'http://localhost:4318/v1/traces',
|
|
44
|
+
enabled: process.env.NODE_ENV === 'production',
|
|
45
|
+
serviceName: 'order-service',
|
|
47
46
|
},
|
|
48
47
|
}))
|
|
49
48
|
|
|
50
49
|
await core.liftoff()
|
|
51
50
|
```
|
|
52
51
|
|
|
53
|
-
## Health Checks
|
|
52
|
+
## ๐ฅ Health Checks
|
|
54
53
|
|
|
55
|
-
|
|
54
|
+
The health system provides three distinct probes following Kubernetes best practices:
|
|
56
55
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
| `GET /ready` | Kubernetes readiness probe |
|
|
61
|
-
| `GET /live` | Kubernetes liveness probe |
|
|
56
|
+
- **Liveness (`/live`)**: Indicates if the process is running.
|
|
57
|
+
- **Readiness (`/ready`)**: Indicates if the app is ready to serve traffic (waits for all checks to pass).
|
|
58
|
+
- **Health (`/health`)**: Full aggregated report of all registered checks.
|
|
62
59
|
|
|
63
60
|
### Registering Custom Checks
|
|
64
61
|
|
|
62
|
+
You can register custom health checks via the `monitor` service.
|
|
63
|
+
|
|
65
64
|
```typescript
|
|
66
65
|
const monitor = core.services.get('monitor')
|
|
67
66
|
|
|
68
|
-
//
|
|
67
|
+
// Simple check
|
|
69
68
|
monitor.health.register('database', async () => {
|
|
70
|
-
const
|
|
71
|
-
return
|
|
72
|
-
? { status: 'healthy' }
|
|
73
|
-
: { status: 'unhealthy', message: 'Database disconnected' }
|
|
74
|
-
})
|
|
75
|
-
|
|
76
|
-
// Register a Redis check
|
|
77
|
-
monitor.health.register('redis', async () => {
|
|
78
|
-
const result = await redis.ping()
|
|
79
|
-
return { status: result === 'PONG' ? 'healthy' : 'unhealthy' }
|
|
69
|
+
const isOk = await db.ping()
|
|
70
|
+
return isOk ? { status: 'healthy' } : { status: 'unhealthy', message: 'DB down' }
|
|
80
71
|
})
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
### Built-in Check Factories
|
|
84
|
-
|
|
85
|
-
```typescript
|
|
86
|
-
import {
|
|
87
|
-
createDatabaseCheck,
|
|
88
|
-
createRedisCheck,
|
|
89
|
-
createMemoryCheck,
|
|
90
|
-
createHttpCheck
|
|
91
|
-
} from '@gravito/monitor'
|
|
92
72
|
|
|
93
|
-
//
|
|
94
|
-
monitor.health.register('
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
// External service check
|
|
100
|
-
monitor.health.register('api', createHttpCheck('https://api.example.com/health'))
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
### Health Response Format
|
|
104
|
-
|
|
105
|
-
```json
|
|
106
|
-
{
|
|
107
|
-
"status": "healthy",
|
|
108
|
-
"timestamp": "2024-12-25T12:00:00Z",
|
|
109
|
-
"uptime": 3600,
|
|
110
|
-
"checks": {
|
|
111
|
-
"database": { "status": "healthy", "latency": 5 },
|
|
112
|
-
"redis": { "status": "healthy", "latency": 2 },
|
|
113
|
-
"memory": {
|
|
114
|
-
"status": "healthy",
|
|
115
|
-
"details": { "heapUsedPercent": "45.2" }
|
|
116
|
-
}
|
|
73
|
+
// Detailed check
|
|
74
|
+
monitor.health.register('disk_space', () => {
|
|
75
|
+
const usage = getDiskUsage()
|
|
76
|
+
return {
|
|
77
|
+
status: usage < 90 ? 'healthy' : 'degraded',
|
|
78
|
+
details: { usage: `${usage}%` }
|
|
117
79
|
}
|
|
118
|
-
}
|
|
80
|
+
})
|
|
119
81
|
```
|
|
120
82
|
|
|
121
|
-
## Metrics
|
|
122
|
-
|
|
123
|
-
### Prometheus Endpoint
|
|
83
|
+
## ๐ Metrics
|
|
124
84
|
|
|
125
|
-
|
|
85
|
+
Metrics are exposed in Prometheus text format at `/metrics`.
|
|
126
86
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
myapp_http_requests_total{method="GET",path="/api/users",status="200"} 150
|
|
131
|
-
|
|
132
|
-
# HELP myapp_http_request_duration_seconds HTTP request duration
|
|
133
|
-
# TYPE myapp_http_request_duration_seconds histogram
|
|
134
|
-
myapp_http_request_duration_seconds_bucket{le="0.01"} 50
|
|
135
|
-
myapp_http_request_duration_seconds_bucket{le="0.1"} 120
|
|
136
|
-
myapp_http_request_duration_seconds_sum 12.5
|
|
137
|
-
myapp_http_request_duration_seconds_count 150
|
|
138
|
-
```
|
|
87
|
+
### Built-in Metrics
|
|
88
|
+
- **Runtime**: Heap usage, uptime, active handles.
|
|
89
|
+
- **HTTP**: Request total (`http_requests_total`), duration histogram (`http_request_duration_seconds`).
|
|
139
90
|
|
|
140
91
|
### Custom Metrics
|
|
141
92
|
|
|
142
93
|
```typescript
|
|
143
94
|
const monitor = core.services.get('monitor')
|
|
144
95
|
|
|
145
|
-
// Counter
|
|
146
|
-
const
|
|
147
|
-
name: '
|
|
148
|
-
help: 'Total
|
|
149
|
-
labels: ['
|
|
96
|
+
// 1. Counter (Monotonically increasing)
|
|
97
|
+
const orders = monitor.metrics.counter({
|
|
98
|
+
name: 'orders_total',
|
|
99
|
+
help: 'Total orders processed',
|
|
100
|
+
labels: ['status']
|
|
150
101
|
})
|
|
151
|
-
|
|
102
|
+
orders.inc({ status: 'completed' })
|
|
152
103
|
|
|
153
|
-
// Gauge
|
|
154
|
-
const
|
|
155
|
-
name: '
|
|
156
|
-
help: 'Current active
|
|
104
|
+
// 2. Gauge (Can go up and down)
|
|
105
|
+
const activeUsers = monitor.metrics.gauge({
|
|
106
|
+
name: 'active_users',
|
|
107
|
+
help: 'Current active users'
|
|
157
108
|
})
|
|
158
|
-
|
|
159
|
-
activeConnections.inc()
|
|
160
|
-
activeConnections.dec()
|
|
161
|
-
|
|
162
|
-
// Histogram
|
|
163
|
-
const responseTime = monitor.metrics.histogram({
|
|
164
|
-
name: 'response_time_seconds',
|
|
165
|
-
help: 'Response time in seconds',
|
|
166
|
-
labels: ['endpoint'],
|
|
167
|
-
buckets: [0.01, 0.05, 0.1, 0.5, 1],
|
|
168
|
-
})
|
|
169
|
-
responseTime.observe(0.125, { endpoint: '/users' })
|
|
109
|
+
activeUsers.set(42)
|
|
170
110
|
|
|
171
|
-
//
|
|
172
|
-
const
|
|
173
|
-
|
|
174
|
-
|
|
111
|
+
// 3. Histogram (Value distribution)
|
|
112
|
+
const processTime = monitor.metrics.histogram({
|
|
113
|
+
name: 'order_processing_seconds',
|
|
114
|
+
help: 'Time to process orders',
|
|
115
|
+
buckets: [0.1, 0.5, 1, 2, 5]
|
|
116
|
+
})
|
|
117
|
+
const stop = processTime.startTimer()
|
|
118
|
+
// ... logic ...
|
|
119
|
+
stop()
|
|
175
120
|
```
|
|
176
121
|
|
|
177
|
-
## Tracing
|
|
178
|
-
|
|
179
|
-
### OpenTelemetry Integration
|
|
122
|
+
## ๐ Tracing
|
|
180
123
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
| Backend | Method |
|
|
184
|
-
|---------|--------|
|
|
185
|
-
| **Jaeger** | OTLP Collector โ Jaeger |
|
|
186
|
-
| **Zipkin** | OTLP Collector โ Zipkin |
|
|
187
|
-
| **AWS X-Ray** | AWS ADOT Collector |
|
|
188
|
-
| **Google Cloud Trace** | GCP OTLP Collector |
|
|
189
|
-
| **Datadog** | Datadog Agent (OTLP) |
|
|
124
|
+
Distributed tracing is powered by OpenTelemetry (OTLP). It automatically propagates trace context via W3C `traceparent` headers.
|
|
190
125
|
|
|
191
126
|
### Configuration
|
|
192
|
-
|
|
193
127
|
```typescript
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
},
|
|
204
|
-
},
|
|
205
|
-
})
|
|
128
|
+
tracing: {
|
|
129
|
+
enabled: true,
|
|
130
|
+
serviceName: 'gateway',
|
|
131
|
+
endpoint: 'http://otel-collector:4318/v1/traces',
|
|
132
|
+
sampleRate: 0.1, // Sample 10% of requests
|
|
133
|
+
resourceAttributes: {
|
|
134
|
+
env: 'production'
|
|
135
|
+
}
|
|
136
|
+
}
|
|
206
137
|
```
|
|
207
138
|
|
|
208
139
|
### Manual Spans
|
|
209
|
-
|
|
210
140
|
```typescript
|
|
211
141
|
const tracer = core.services.get('tracing')
|
|
212
142
|
|
|
213
|
-
|
|
214
|
-
const span = tracer.startSpan('process-order', {
|
|
215
|
-
attributes: { 'order.id': '12345' },
|
|
216
|
-
})
|
|
217
|
-
|
|
143
|
+
const span = tracer.startSpan('compute_heavy_logic')
|
|
218
144
|
try {
|
|
219
|
-
//
|
|
220
|
-
tracer.
|
|
221
|
-
tracer.setAttribute(span, 'order.total', 99.99)
|
|
145
|
+
// ... work ...
|
|
146
|
+
tracer.setAttribute(span, 'items_count', 100)
|
|
222
147
|
tracer.endSpan(span, 'ok')
|
|
223
|
-
} catch (
|
|
148
|
+
} catch (e) {
|
|
224
149
|
tracer.endSpan(span, 'error')
|
|
225
|
-
throw error
|
|
226
150
|
}
|
|
227
151
|
```
|
|
228
152
|
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
The tracing middleware automatically:
|
|
232
|
-
- Extracts `traceparent` header from incoming requests
|
|
233
|
-
- Injects trace context into outgoing requests
|
|
234
|
-
- Records HTTP method, path, status code
|
|
235
|
-
|
|
236
|
-
## Kubernetes Integration
|
|
237
|
-
|
|
238
|
-
### Deployment Example
|
|
239
|
-
|
|
240
|
-
```yaml
|
|
241
|
-
apiVersion: apps/v1
|
|
242
|
-
kind: Deployment
|
|
243
|
-
metadata:
|
|
244
|
-
name: my-gravito-app
|
|
245
|
-
spec:
|
|
246
|
-
template:
|
|
247
|
-
spec:
|
|
248
|
-
containers:
|
|
249
|
-
- name: app
|
|
250
|
-
image: my-app:latest
|
|
251
|
-
ports:
|
|
252
|
-
- containerPort: 3000
|
|
253
|
-
livenessProbe:
|
|
254
|
-
httpGet:
|
|
255
|
-
path: /live
|
|
256
|
-
port: 3000
|
|
257
|
-
initialDelaySeconds: 5
|
|
258
|
-
periodSeconds: 10
|
|
259
|
-
readinessProbe:
|
|
260
|
-
httpGet:
|
|
261
|
-
path: /ready
|
|
262
|
-
port: 3000
|
|
263
|
-
initialDelaySeconds: 5
|
|
264
|
-
periodSeconds: 5
|
|
265
|
-
```
|
|
266
|
-
|
|
267
|
-
### ServiceMonitor for Prometheus
|
|
268
|
-
|
|
269
|
-
```yaml
|
|
270
|
-
apiVersion: monitoring.coreos.com/v1
|
|
271
|
-
kind: ServiceMonitor
|
|
272
|
-
metadata:
|
|
273
|
-
name: my-gravito-app
|
|
274
|
-
spec:
|
|
275
|
-
selector:
|
|
276
|
-
matchLabels:
|
|
277
|
-
app: my-gravito-app
|
|
278
|
-
endpoints:
|
|
279
|
-
- port: http
|
|
280
|
-
path: /metrics
|
|
281
|
-
interval: 15s
|
|
282
|
-
```
|
|
283
|
-
|
|
284
|
-
## Configuration Reference
|
|
153
|
+
## โ๏ธ Configuration Reference
|
|
285
154
|
|
|
286
155
|
| Option | Type | Default | Description |
|
|
287
156
|
|--------|------|---------|-------------|
|
|
288
|
-
| `health.enabled` | boolean | `true` | Enable health endpoints |
|
|
289
|
-
| `health.path` | string | `/health` |
|
|
290
|
-
| `health.
|
|
291
|
-
| `health.
|
|
292
|
-
| `
|
|
293
|
-
| `
|
|
294
|
-
| `metrics.
|
|
295
|
-
| `
|
|
296
|
-
| `
|
|
297
|
-
| `
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
## License
|
|
304
|
-
|
|
305
|
-
MIT
|
|
157
|
+
| `health.enabled` | `boolean` | `true` | Enable health endpoints |
|
|
158
|
+
| `health.path` | `string` | `/health` | Path for aggregated health check |
|
|
159
|
+
| `health.timeout` | `number` | `5000` | Timeout for checks in ms |
|
|
160
|
+
| `health.cacheTtl` | `number` | `0` | Cache results in ms (0 = disabled) |
|
|
161
|
+
| `metrics.enabled` | `boolean` | `true` | Enable Prometheus endpoint |
|
|
162
|
+
| `metrics.prefix` | `string` | `gravito_` | Metric name prefix |
|
|
163
|
+
| `metrics.defaultMetrics` | `boolean` | `true` | Collect Node.js runtime metrics |
|
|
164
|
+
| `tracing.enabled` | `boolean` | `false` | Enable OpenTelemetry tracing |
|
|
165
|
+
| `tracing.endpoint` | `string` | `http://localhost:4318/v1/traces` | OTLP Collector URL |
|
|
166
|
+
| `tracing.sampleRate` | `number` | `1.0` | Probability sampling (0.0 - 1.0) |
|
|
167
|
+
|
|
168
|
+
## ๐ License
|
|
169
|
+
|
|
170
|
+
MIT ยฉ Carl Lee
|
package/README.zh-TW.md
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# @gravito/monitor ๐ฐ๏ธ
|
|
2
|
+
|
|
3
|
+
่ผ้็ด็ Gravito ๅฏ่งๆธฌๆงๆจก็ต (Observability) - ๅ
ๅซๅฅๅบทๆชขๆฅ (Health Checks)ใๆๆจ็ฃๆง (Metrics) ่้่ทฏ่ฟฝ่นค (Tracing)ใๅบๆผ **Galaxy Architecture** ่จญ่จ๏ผๆญค Orbit ็บๆจ็ๆ็ๆไพๅฟ
ๅ็็ฃๆงๅบ็ค่จญๆฝใ
|
|
4
|
+
|
|
5
|
+
## ๐ ๆ ธๅฟ็นๆง
|
|
6
|
+
|
|
7
|
+
- ๐ฅ **ๅฅๅบทๆชขๆฅ (Health Checks)** - ๆฏๆด Kubernetes ๆจๆบ็ `/health`ใ`/ready`ใ`/live` ็ซฏ้ป๏ผไธฆๅฏ่ชๅฎ็พฉๆชขๆฅ้
ใ
|
|
8
|
+
- ๐ **ๆๆจ็ฃๆง (Metrics)** - ็ธๅฎน Prometheus ๆ ผๅผ็ `/metrics` ็ซฏ้ป๏ผๅ
งๅปบ Node.js ๅท่ก้ๆฎต่ HTTP ็ฃๆงใ
|
|
9
|
+
- ๐ **้่ทฏ่ฟฝ่นค (Tracing)** - ๆฏๆด OpenTelemetry OTLP ๆจๆบ๏ผๅฏฆ็พ่ทจๆๅ็ๅๆฃๅผ่ฟฝ่นคใ
|
|
10
|
+
- ๐ก๏ธ **้ฒๅ็ๆดๅ** - ๅฎ็พ้ฉ้
Kubernetes Probe ่จญๅฎ่ Prometheus ServiceMonitorใ
|
|
11
|
+
|
|
12
|
+
## ๐ฆ ๅฎ่ฃ
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
bun add @gravito/monitor
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
ๅฆ้ไฝฟ็จ OpenTelemetry ่ฟฝ่นคๅ่ฝ๏ผ้ธ็จ๏ผ๏ผ
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
bun add @opentelemetry/sdk-node @opentelemetry/exporter-trace-otlp-http
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## ๐ ๅฟซ้ไธๆ
|
|
25
|
+
|
|
26
|
+
ๅช้ๅฐ `MonitorOrbit` ๅ ๅ
ฅๆจ็ `PlanetCore` ๅณๅฏๅ็จใ
|
|
27
|
+
|
|
28
|
+
```typescript
|
|
29
|
+
import { PlanetCore } from '@gravito/core'
|
|
30
|
+
import { MonitorOrbit } from '@gravito/monitor'
|
|
31
|
+
|
|
32
|
+
const core = new PlanetCore()
|
|
33
|
+
|
|
34
|
+
core.orbit(new MonitorOrbit({
|
|
35
|
+
health: {
|
|
36
|
+
enabled: true,
|
|
37
|
+
path: '/health',
|
|
38
|
+
},
|
|
39
|
+
metrics: {
|
|
40
|
+
enabled: true,
|
|
41
|
+
prefix: 'myapp_',
|
|
42
|
+
},
|
|
43
|
+
tracing: {
|
|
44
|
+
enabled: process.env.NODE_ENV === 'production',
|
|
45
|
+
serviceName: 'order-service',
|
|
46
|
+
},
|
|
47
|
+
}))
|
|
48
|
+
|
|
49
|
+
await core.liftoff()
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## ๐ฅ ๅฅๅบทๆชขๆฅ (Health Checks)
|
|
53
|
+
|
|
54
|
+
ๅฅๅบทๆชขๆฅ็ณป็ตฑๆไพไธๅ็ฌฆๅ Kubernetes ๆไฝณๅฏฆ่ธ็็ซฏ้ป๏ผ
|
|
55
|
+
|
|
56
|
+
- **Liveness (`/live`)**: ๆ็คบ็จๅบๆฏๅฆๆญฃๅธธ้่กใ
|
|
57
|
+
- **Readiness (`/ready`)**: ๆ็คบๆ็จๆฏๅฆๆบๅๅฅฝๆฅๆถๆต้๏ผ้้้ๆๆ่จปๅ็ๆชขๆฅ้
๏ผใ
|
|
58
|
+
- **Health (`/health`)**: ๅฎๆด็ๅฅๅบทๅ ฑๅ๏ผๅ
ๅซๆๆ่จปๅ็็ดฐ็ฏใ
|
|
59
|
+
|
|
60
|
+
### ่จปๅ่ชๅฎ็พฉๆชขๆฅ้
|
|
61
|
+
|
|
62
|
+
ๆจๅฏไปฅ้้ `monitor` ๆๅ่จปๅ่ชๅฎ็พฉๆชขๆฅ้่ผฏใ
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
const monitor = core.services.get('monitor')
|
|
66
|
+
|
|
67
|
+
// ็ฐกๅฎๆชขๆฅ
|
|
68
|
+
monitor.health.register('database', async () => {
|
|
69
|
+
const isOk = await db.ping()
|
|
70
|
+
return isOk ? { status: 'healthy' } : { status: 'unhealthy', message: '่ณๆๅบซ้ฃ็ทไธญๆท' }
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
// ่ฉณ็ดฐๅ ฑๅ
|
|
74
|
+
monitor.health.register('disk_space', () => {
|
|
75
|
+
const usage = getDiskUsage()
|
|
76
|
+
return {
|
|
77
|
+
status: usage < 90 ? 'healthy' : 'degraded',
|
|
78
|
+
details: { usage: `${usage}%` }
|
|
79
|
+
}
|
|
80
|
+
})
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## ๐ ๆๆจ็ฃๆง (Metrics)
|
|
84
|
+
|
|
85
|
+
ๆๆจๆไปฅ Prometheus ๆๅญๆ ผๅผๆด้ฒๆผ `/metrics` ็ซฏ้ปใ
|
|
86
|
+
|
|
87
|
+
### ๅ
งๅปบๆๆจ
|
|
88
|
+
- **ๅท่ก้ๆฎต (Runtime)**: ๅ ็ฉ่จๆถ้ซ (Heap usage)ใ้่กๆ้ (Uptime)ใๆดป่บๆงๅถไปฃ็ขผ (Active handles)ใ
|
|
89
|
+
- **HTTP**: ่ซๆฑ็ธฝๆธ (`http_requests_total`)ใ่ซๆฑ่ๆๅไฝ (`http_request_duration_seconds`)ใ
|
|
90
|
+
|
|
91
|
+
### ่ชๅฎ็พฉๆๆจ
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
const monitor = core.services.get('monitor')
|
|
95
|
+
|
|
96
|
+
// 1. Counter (ๅฎ่ชฟ้ๅข็่จๆธๅจ)
|
|
97
|
+
const orders = monitor.metrics.counter({
|
|
98
|
+
name: 'orders_total',
|
|
99
|
+
help: '่็็่จๅฎ็ธฝๆธ',
|
|
100
|
+
labels: ['status']
|
|
101
|
+
})
|
|
102
|
+
orders.inc({ status: 'completed' })
|
|
103
|
+
|
|
104
|
+
// 2. Gauge (ๅฏๅขๅฏๆธ็้่กจ)
|
|
105
|
+
const activeUsers = monitor.metrics.gauge({
|
|
106
|
+
name: 'active_users',
|
|
107
|
+
help: '็ถๅๆดป่บไฝฟ็จ่
ๆธ'
|
|
108
|
+
})
|
|
109
|
+
activeUsers.set(42)
|
|
110
|
+
|
|
111
|
+
// 3. Histogram (ๆธๅผๅไฝ็ตฑ่จ)
|
|
112
|
+
const processTime = monitor.metrics.histogram({
|
|
113
|
+
name: 'order_processing_seconds',
|
|
114
|
+
help: '่จๅฎ่็่ๆ',
|
|
115
|
+
buckets: [0.1, 0.5, 1, 2, 5]
|
|
116
|
+
})
|
|
117
|
+
const stop = processTime.startTimer()
|
|
118
|
+
// ... ๅท่ก้่ผฏ ...
|
|
119
|
+
stop()
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## ๐ ้่ทฏ่ฟฝ่นค (Tracing)
|
|
123
|
+
|
|
124
|
+
้่ทฏ่ฟฝ่นค็ฑ OpenTelemetry (OTLP) ้ฉ
ๅ๏ผๆ่ชๅ้้ W3C `traceparent` Header ๅณ้่ฟฝ่นคไธไธๆใ
|
|
125
|
+
|
|
126
|
+
### ้
็ฝฎ็ฏไพ
|
|
127
|
+
```typescript
|
|
128
|
+
tracing: {
|
|
129
|
+
enabled: true,
|
|
130
|
+
serviceName: 'gateway',
|
|
131
|
+
endpoint: 'http://otel-collector:4318/v1/traces',
|
|
132
|
+
sampleRate: 0.1, // ๅๆจฃ 10% ็่ซๆฑ
|
|
133
|
+
resourceAttributes: {
|
|
134
|
+
env: 'production'
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### ๆๅๅปบ็ซ Span
|
|
140
|
+
```typescript
|
|
141
|
+
const tracer = core.services.get('tracing')
|
|
142
|
+
|
|
143
|
+
const span = tracer.startSpan('compute_heavy_logic')
|
|
144
|
+
try {
|
|
145
|
+
// ... ๅท่ก่ค้้็ฎ ...
|
|
146
|
+
tracer.setAttribute(span, 'items_count', 100)
|
|
147
|
+
tracer.endSpan(span, 'ok')
|
|
148
|
+
} catch (e) {
|
|
149
|
+
tracer.endSpan(span, 'error')
|
|
150
|
+
}
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
## โ๏ธ ้
็ฝฎๅๆธๅ่
|
|
154
|
+
|
|
155
|
+
| ๅๆธ | ้กๅ | ้ ่จญๅผ | ่ชชๆ |
|
|
156
|
+
|--------|------|---------|-------------|
|
|
157
|
+
| `health.enabled` | `boolean` | `true` | ๆฏๅฆๅ็จๅฅๅบทๆชขๆฅ็ซฏ้ป |
|
|
158
|
+
| `health.path` | `string` | `/health` | ๅฎๆดๅฅๅบทๅ ฑๅ่ทฏๅพ |
|
|
159
|
+
| `health.timeout` | `number` | `5000` | ๆชขๆฅ้
้พๆๆ้ (ms) |
|
|
160
|
+
| `health.cacheTtl` | `number` | `0` | ็ตๆๅฟซๅๆ้ (ms, 0 ไปฃ่กจไธๅฟซๅ) |
|
|
161
|
+
| `metrics.enabled` | `boolean` | `true` | ๆฏๅฆๅ็จ Prometheus ๆๆจ็ซฏ้ป |
|
|
162
|
+
| `metrics.prefix` | `string` | `gravito_` | ๆๆจๅ็จฑๅ็ถด |
|
|
163
|
+
| `metrics.defaultMetrics` | `boolean` | `true` | ๆฏๅฆๆถ้ Node.js ๅท่ก้ๆฎตๆๆจ |
|
|
164
|
+
| `tracing.enabled` | `boolean` | `false` | ๆฏๅฆๅ็จ OpenTelemetry ่ฟฝ่นค |
|
|
165
|
+
| `tracing.endpoint` | `string` | `http://localhost:4318/v1/traces` | OTLP Collector ่ทฏๅพ |
|
|
166
|
+
| `tracing.sampleRate` | `number` | `1.0` | ๅๆจฃ็ (0.0 - 1.0) |
|
|
167
|
+
|
|
168
|
+
## ๐ ๆๆฌๅ่ญฐ
|
|
169
|
+
|
|
170
|
+
MIT ยฉ Carl Lee
|
package/dist/index.cjs
CHANGED
|
@@ -65,8 +65,15 @@ var HealthController = class {
|
|
|
65
65
|
*/
|
|
66
66
|
async health(c) {
|
|
67
67
|
const report = await this.registry.check();
|
|
68
|
+
const cacheStats = this.registry.getCacheStats();
|
|
68
69
|
const status = report.status === "healthy" ? 200 : report.status === "degraded" ? 200 : 503;
|
|
69
|
-
return c.json(
|
|
70
|
+
return c.json(
|
|
71
|
+
{
|
|
72
|
+
...report,
|
|
73
|
+
cache: cacheStats
|
|
74
|
+
},
|
|
75
|
+
status
|
|
76
|
+
);
|
|
70
77
|
}
|
|
71
78
|
/**
|
|
72
79
|
* GET /ready - Kubernetes readiness probe
|
|
@@ -101,6 +108,8 @@ var HealthRegistry = class {
|
|
|
101
108
|
cacheExpiry = 0;
|
|
102
109
|
timeout;
|
|
103
110
|
cacheTtl;
|
|
111
|
+
cacheHits = 0;
|
|
112
|
+
cacheMisses = 0;
|
|
104
113
|
constructor(config = {}) {
|
|
105
114
|
this.timeout = config.timeout ?? DEFAULTS.timeout;
|
|
106
115
|
this.cacheTtl = config.cacheTtl ?? DEFAULTS.cacheTtl;
|
|
@@ -158,8 +167,10 @@ var HealthRegistry = class {
|
|
|
158
167
|
*/
|
|
159
168
|
async check() {
|
|
160
169
|
if (this.cacheTtl > 0 && this.cachedReport && Date.now() < this.cacheExpiry) {
|
|
170
|
+
this.cacheHits++;
|
|
161
171
|
return this.cachedReport;
|
|
162
172
|
}
|
|
173
|
+
this.cacheMisses++;
|
|
163
174
|
const results = await Promise.all(
|
|
164
175
|
Array.from(this.checks.entries()).map(([name, check]) => this.executeCheck(name, check))
|
|
165
176
|
);
|
|
@@ -209,6 +220,19 @@ var HealthRegistry = class {
|
|
|
209
220
|
}
|
|
210
221
|
return { status: "healthy" };
|
|
211
222
|
}
|
|
223
|
+
/**
|
|
224
|
+
* Get cache statistics
|
|
225
|
+
*
|
|
226
|
+
* Useful for monitoring cache effectiveness and tuning cacheTtl
|
|
227
|
+
*/
|
|
228
|
+
getCacheStats() {
|
|
229
|
+
const total = this.cacheHits + this.cacheMisses;
|
|
230
|
+
return {
|
|
231
|
+
hits: this.cacheHits,
|
|
232
|
+
misses: this.cacheMisses,
|
|
233
|
+
hitRate: total > 0 ? this.cacheHits / total : 0
|
|
234
|
+
};
|
|
235
|
+
}
|
|
212
236
|
};
|
|
213
237
|
|
|
214
238
|
// src/health/index.ts
|
|
@@ -342,6 +366,7 @@ var MetricsController = class {
|
|
|
342
366
|
* GET /metrics - Prometheus metrics endpoint
|
|
343
367
|
*/
|
|
344
368
|
async metrics(_c) {
|
|
369
|
+
this.updateHealthCacheMetrics();
|
|
345
370
|
const prometheusFormat = this.registry.toPrometheus();
|
|
346
371
|
return new Response(prometheusFormat, {
|
|
347
372
|
status: 200,
|
|
@@ -350,6 +375,19 @@ var MetricsController = class {
|
|
|
350
375
|
}
|
|
351
376
|
});
|
|
352
377
|
}
|
|
378
|
+
/**
|
|
379
|
+
* ๆดๆฐ health cache metrics
|
|
380
|
+
*
|
|
381
|
+
* ๅพ HealthRegistry ่ฎๅๆๆฐ็ cache ็ตฑ่จไธฆๆดๆฐ gauges
|
|
382
|
+
*/
|
|
383
|
+
updateHealthCacheMetrics() {
|
|
384
|
+
const healthMetrics = this.registry._healthCacheMetrics;
|
|
385
|
+
if (!healthMetrics) return;
|
|
386
|
+
const stats = healthMetrics.registry.getCacheStats();
|
|
387
|
+
healthMetrics.hits.set(stats.hits);
|
|
388
|
+
healthMetrics.misses.set(stats.misses);
|
|
389
|
+
healthMetrics.hitRate.set(stats.hitRate);
|
|
390
|
+
}
|
|
353
391
|
};
|
|
354
392
|
|
|
355
393
|
// src/metrics/MetricsRegistry.ts
|
|
@@ -705,7 +743,7 @@ function createHttpMetricsMiddleware(registry) {
|
|
|
705
743
|
});
|
|
706
744
|
return async (c, next) => {
|
|
707
745
|
const method = c.req.method;
|
|
708
|
-
const path = normalizePath(c.req.path);
|
|
746
|
+
const path = c.req.routePattern ?? normalizePath(c.req.path);
|
|
709
747
|
const start = performance.now();
|
|
710
748
|
await next();
|
|
711
749
|
const duration = (performance.now() - start) / 1e3;
|
|
@@ -986,10 +1024,10 @@ var MonitorOrbit = class {
|
|
|
986
1024
|
metrics: this.metricsRegistry,
|
|
987
1025
|
tracing: this.tracingManager
|
|
988
1026
|
};
|
|
989
|
-
core.
|
|
990
|
-
core.
|
|
991
|
-
core.
|
|
992
|
-
core.
|
|
1027
|
+
core.container.instance("monitor", monitorService);
|
|
1028
|
+
core.container.instance("health", this.healthRegistry);
|
|
1029
|
+
core.container.instance("metrics", this.metricsRegistry);
|
|
1030
|
+
core.container.instance("tracing", this.tracingManager);
|
|
993
1031
|
const router = core.router;
|
|
994
1032
|
if (healthEnabled && this.healthRegistry) {
|
|
995
1033
|
const healthController = new HealthController(this.healthRegistry);
|
|
@@ -1002,9 +1040,37 @@ var MonitorOrbit = class {
|
|
|
1002
1040
|
const metricsController = new MetricsController(this.metricsRegistry);
|
|
1003
1041
|
router.get(metricsPath, (c) => metricsController.metrics(c));
|
|
1004
1042
|
console.log(`[Monitor] Metrics endpoint: ${metricsPath}`);
|
|
1043
|
+
if (healthEnabled && this.healthRegistry) {
|
|
1044
|
+
this.registerHealthCacheMetrics(this.metricsRegistry, this.healthRegistry);
|
|
1045
|
+
}
|
|
1005
1046
|
}
|
|
1006
1047
|
console.log("[Monitor] Observability services initialized");
|
|
1007
1048
|
}
|
|
1049
|
+
/**
|
|
1050
|
+
* ่จปๅ health cache metrics
|
|
1051
|
+
*
|
|
1052
|
+
* ๅปบ็ซ metrics ไพ่ฟฝ่นค health check cache ็ๆ่ฝ
|
|
1053
|
+
*/
|
|
1054
|
+
registerHealthCacheMetrics(metricsRegistry, healthRegistry) {
|
|
1055
|
+
const cacheHitsGauge = metricsRegistry.gauge({
|
|
1056
|
+
name: "health_cache_hits_total",
|
|
1057
|
+
help: "Total number of health check cache hits"
|
|
1058
|
+
});
|
|
1059
|
+
const cacheMissesGauge = metricsRegistry.gauge({
|
|
1060
|
+
name: "health_cache_misses_total",
|
|
1061
|
+
help: "Total number of health check cache misses"
|
|
1062
|
+
});
|
|
1063
|
+
const cacheHitRateGauge = metricsRegistry.gauge({
|
|
1064
|
+
name: "health_cache_hit_rate",
|
|
1065
|
+
help: "Health check cache hit rate (0.0 to 1.0)"
|
|
1066
|
+
});
|
|
1067
|
+
metricsRegistry._healthCacheMetrics = {
|
|
1068
|
+
hits: cacheHitsGauge,
|
|
1069
|
+
misses: cacheMissesGauge,
|
|
1070
|
+
hitRate: cacheHitRateGauge,
|
|
1071
|
+
registry: healthRegistry
|
|
1072
|
+
};
|
|
1073
|
+
}
|
|
1008
1074
|
/**
|
|
1009
1075
|
* Shutdown hook
|
|
1010
1076
|
*/
|
package/dist/index.d.cts
CHANGED
|
@@ -101,6 +101,10 @@ declare function defineMonitorConfig(config: MonitorConfig): MonitorConfig;
|
|
|
101
101
|
* Manages health check registrations and executions
|
|
102
102
|
*/
|
|
103
103
|
|
|
104
|
+
/**
|
|
105
|
+
* Aggregated health check report.
|
|
106
|
+
* @public
|
|
107
|
+
*/
|
|
104
108
|
interface HealthReport {
|
|
105
109
|
status: 'healthy' | 'unhealthy' | 'degraded';
|
|
106
110
|
timestamp: string;
|
|
@@ -109,6 +113,14 @@ interface HealthReport {
|
|
|
109
113
|
name: string;
|
|
110
114
|
}>;
|
|
111
115
|
}
|
|
116
|
+
/**
|
|
117
|
+
* Cache statistics
|
|
118
|
+
*/
|
|
119
|
+
interface CacheStats {
|
|
120
|
+
hits: number;
|
|
121
|
+
misses: number;
|
|
122
|
+
hitRate: number;
|
|
123
|
+
}
|
|
112
124
|
/**
|
|
113
125
|
* HealthRegistry manages all health checks
|
|
114
126
|
*/
|
|
@@ -119,6 +131,8 @@ declare class HealthRegistry {
|
|
|
119
131
|
private cacheExpiry;
|
|
120
132
|
private timeout;
|
|
121
133
|
private cacheTtl;
|
|
134
|
+
private cacheHits;
|
|
135
|
+
private cacheMisses;
|
|
122
136
|
constructor(config?: HealthConfig);
|
|
123
137
|
/**
|
|
124
138
|
* Register a health check
|
|
@@ -154,6 +168,12 @@ declare class HealthRegistry {
|
|
|
154
168
|
status: 'healthy' | 'unhealthy';
|
|
155
169
|
reason?: string;
|
|
156
170
|
}>;
|
|
171
|
+
/**
|
|
172
|
+
* Get cache statistics
|
|
173
|
+
*
|
|
174
|
+
* Useful for monitoring cache effectiveness and tuning cacheTtl
|
|
175
|
+
*/
|
|
176
|
+
getCacheStats(): CacheStats;
|
|
157
177
|
}
|
|
158
178
|
|
|
159
179
|
/**
|
|
@@ -224,6 +244,10 @@ declare function createDiskCheck(options?: {
|
|
|
224
244
|
* Manages metric collection and Prometheus exposition
|
|
225
245
|
*/
|
|
226
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Represents a single metric data point with labels.
|
|
249
|
+
* @public
|
|
250
|
+
*/
|
|
227
251
|
interface MetricValue {
|
|
228
252
|
value: number;
|
|
229
253
|
labels: Record<string, string>;
|
|
@@ -365,6 +389,12 @@ declare class MetricsController {
|
|
|
365
389
|
* GET /metrics - Prometheus metrics endpoint
|
|
366
390
|
*/
|
|
367
391
|
metrics(_c: GravitoContext): Promise<Response>;
|
|
392
|
+
/**
|
|
393
|
+
* ๆดๆฐ health cache metrics
|
|
394
|
+
*
|
|
395
|
+
* ๅพ HealthRegistry ่ฎๅๆๆฐ็ cache ็ตฑ่จไธฆๆดๆฐ gauges
|
|
396
|
+
*/
|
|
397
|
+
private updateHealthCacheMetrics;
|
|
368
398
|
}
|
|
369
399
|
|
|
370
400
|
/**
|
|
@@ -396,6 +426,10 @@ interface Span {
|
|
|
396
426
|
status: 'ok' | 'error' | 'unset';
|
|
397
427
|
events: SpanEvent[];
|
|
398
428
|
}
|
|
429
|
+
/**
|
|
430
|
+
* Event recorded within a span.
|
|
431
|
+
* @public
|
|
432
|
+
*/
|
|
399
433
|
interface SpanEvent {
|
|
400
434
|
name: string;
|
|
401
435
|
timestamp: number;
|
|
@@ -507,6 +541,12 @@ declare class MonitorOrbit implements GravitoOrbit {
|
|
|
507
541
|
* Install the orbit (required by GravitoOrbit interface)
|
|
508
542
|
*/
|
|
509
543
|
install(core: PlanetCore): Promise<void>;
|
|
544
|
+
/**
|
|
545
|
+
* ่จปๅ health cache metrics
|
|
546
|
+
*
|
|
547
|
+
* ๅปบ็ซ metrics ไพ่ฟฝ่นค health check cache ็ๆ่ฝ
|
|
548
|
+
*/
|
|
549
|
+
private registerHealthCacheMetrics;
|
|
510
550
|
/**
|
|
511
551
|
* Shutdown hook
|
|
512
552
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -101,6 +101,10 @@ declare function defineMonitorConfig(config: MonitorConfig): MonitorConfig;
|
|
|
101
101
|
* Manages health check registrations and executions
|
|
102
102
|
*/
|
|
103
103
|
|
|
104
|
+
/**
|
|
105
|
+
* Aggregated health check report.
|
|
106
|
+
* @public
|
|
107
|
+
*/
|
|
104
108
|
interface HealthReport {
|
|
105
109
|
status: 'healthy' | 'unhealthy' | 'degraded';
|
|
106
110
|
timestamp: string;
|
|
@@ -109,6 +113,14 @@ interface HealthReport {
|
|
|
109
113
|
name: string;
|
|
110
114
|
}>;
|
|
111
115
|
}
|
|
116
|
+
/**
|
|
117
|
+
* Cache statistics
|
|
118
|
+
*/
|
|
119
|
+
interface CacheStats {
|
|
120
|
+
hits: number;
|
|
121
|
+
misses: number;
|
|
122
|
+
hitRate: number;
|
|
123
|
+
}
|
|
112
124
|
/**
|
|
113
125
|
* HealthRegistry manages all health checks
|
|
114
126
|
*/
|
|
@@ -119,6 +131,8 @@ declare class HealthRegistry {
|
|
|
119
131
|
private cacheExpiry;
|
|
120
132
|
private timeout;
|
|
121
133
|
private cacheTtl;
|
|
134
|
+
private cacheHits;
|
|
135
|
+
private cacheMisses;
|
|
122
136
|
constructor(config?: HealthConfig);
|
|
123
137
|
/**
|
|
124
138
|
* Register a health check
|
|
@@ -154,6 +168,12 @@ declare class HealthRegistry {
|
|
|
154
168
|
status: 'healthy' | 'unhealthy';
|
|
155
169
|
reason?: string;
|
|
156
170
|
}>;
|
|
171
|
+
/**
|
|
172
|
+
* Get cache statistics
|
|
173
|
+
*
|
|
174
|
+
* Useful for monitoring cache effectiveness and tuning cacheTtl
|
|
175
|
+
*/
|
|
176
|
+
getCacheStats(): CacheStats;
|
|
157
177
|
}
|
|
158
178
|
|
|
159
179
|
/**
|
|
@@ -224,6 +244,10 @@ declare function createDiskCheck(options?: {
|
|
|
224
244
|
* Manages metric collection and Prometheus exposition
|
|
225
245
|
*/
|
|
226
246
|
|
|
247
|
+
/**
|
|
248
|
+
* Represents a single metric data point with labels.
|
|
249
|
+
* @public
|
|
250
|
+
*/
|
|
227
251
|
interface MetricValue {
|
|
228
252
|
value: number;
|
|
229
253
|
labels: Record<string, string>;
|
|
@@ -365,6 +389,12 @@ declare class MetricsController {
|
|
|
365
389
|
* GET /metrics - Prometheus metrics endpoint
|
|
366
390
|
*/
|
|
367
391
|
metrics(_c: GravitoContext): Promise<Response>;
|
|
392
|
+
/**
|
|
393
|
+
* ๆดๆฐ health cache metrics
|
|
394
|
+
*
|
|
395
|
+
* ๅพ HealthRegistry ่ฎๅๆๆฐ็ cache ็ตฑ่จไธฆๆดๆฐ gauges
|
|
396
|
+
*/
|
|
397
|
+
private updateHealthCacheMetrics;
|
|
368
398
|
}
|
|
369
399
|
|
|
370
400
|
/**
|
|
@@ -396,6 +426,10 @@ interface Span {
|
|
|
396
426
|
status: 'ok' | 'error' | 'unset';
|
|
397
427
|
events: SpanEvent[];
|
|
398
428
|
}
|
|
429
|
+
/**
|
|
430
|
+
* Event recorded within a span.
|
|
431
|
+
* @public
|
|
432
|
+
*/
|
|
399
433
|
interface SpanEvent {
|
|
400
434
|
name: string;
|
|
401
435
|
timestamp: number;
|
|
@@ -507,6 +541,12 @@ declare class MonitorOrbit implements GravitoOrbit {
|
|
|
507
541
|
* Install the orbit (required by GravitoOrbit interface)
|
|
508
542
|
*/
|
|
509
543
|
install(core: PlanetCore): Promise<void>;
|
|
544
|
+
/**
|
|
545
|
+
* ่จปๅ health cache metrics
|
|
546
|
+
*
|
|
547
|
+
* ๅปบ็ซ metrics ไพ่ฟฝ่นค health check cache ็ๆ่ฝ
|
|
548
|
+
*/
|
|
549
|
+
private registerHealthCacheMetrics;
|
|
510
550
|
/**
|
|
511
551
|
* Shutdown hook
|
|
512
552
|
*/
|
package/dist/index.js
CHANGED
|
@@ -13,8 +13,15 @@ var HealthController = class {
|
|
|
13
13
|
*/
|
|
14
14
|
async health(c) {
|
|
15
15
|
const report = await this.registry.check();
|
|
16
|
+
const cacheStats = this.registry.getCacheStats();
|
|
16
17
|
const status = report.status === "healthy" ? 200 : report.status === "degraded" ? 200 : 503;
|
|
17
|
-
return c.json(
|
|
18
|
+
return c.json(
|
|
19
|
+
{
|
|
20
|
+
...report,
|
|
21
|
+
cache: cacheStats
|
|
22
|
+
},
|
|
23
|
+
status
|
|
24
|
+
);
|
|
18
25
|
}
|
|
19
26
|
/**
|
|
20
27
|
* GET /ready - Kubernetes readiness probe
|
|
@@ -49,6 +56,8 @@ var HealthRegistry = class {
|
|
|
49
56
|
cacheExpiry = 0;
|
|
50
57
|
timeout;
|
|
51
58
|
cacheTtl;
|
|
59
|
+
cacheHits = 0;
|
|
60
|
+
cacheMisses = 0;
|
|
52
61
|
constructor(config = {}) {
|
|
53
62
|
this.timeout = config.timeout ?? DEFAULTS.timeout;
|
|
54
63
|
this.cacheTtl = config.cacheTtl ?? DEFAULTS.cacheTtl;
|
|
@@ -106,8 +115,10 @@ var HealthRegistry = class {
|
|
|
106
115
|
*/
|
|
107
116
|
async check() {
|
|
108
117
|
if (this.cacheTtl > 0 && this.cachedReport && Date.now() < this.cacheExpiry) {
|
|
118
|
+
this.cacheHits++;
|
|
109
119
|
return this.cachedReport;
|
|
110
120
|
}
|
|
121
|
+
this.cacheMisses++;
|
|
111
122
|
const results = await Promise.all(
|
|
112
123
|
Array.from(this.checks.entries()).map(([name, check]) => this.executeCheck(name, check))
|
|
113
124
|
);
|
|
@@ -157,6 +168,19 @@ var HealthRegistry = class {
|
|
|
157
168
|
}
|
|
158
169
|
return { status: "healthy" };
|
|
159
170
|
}
|
|
171
|
+
/**
|
|
172
|
+
* Get cache statistics
|
|
173
|
+
*
|
|
174
|
+
* Useful for monitoring cache effectiveness and tuning cacheTtl
|
|
175
|
+
*/
|
|
176
|
+
getCacheStats() {
|
|
177
|
+
const total = this.cacheHits + this.cacheMisses;
|
|
178
|
+
return {
|
|
179
|
+
hits: this.cacheHits,
|
|
180
|
+
misses: this.cacheMisses,
|
|
181
|
+
hitRate: total > 0 ? this.cacheHits / total : 0
|
|
182
|
+
};
|
|
183
|
+
}
|
|
160
184
|
};
|
|
161
185
|
|
|
162
186
|
// src/health/index.ts
|
|
@@ -290,6 +314,7 @@ var MetricsController = class {
|
|
|
290
314
|
* GET /metrics - Prometheus metrics endpoint
|
|
291
315
|
*/
|
|
292
316
|
async metrics(_c) {
|
|
317
|
+
this.updateHealthCacheMetrics();
|
|
293
318
|
const prometheusFormat = this.registry.toPrometheus();
|
|
294
319
|
return new Response(prometheusFormat, {
|
|
295
320
|
status: 200,
|
|
@@ -298,6 +323,19 @@ var MetricsController = class {
|
|
|
298
323
|
}
|
|
299
324
|
});
|
|
300
325
|
}
|
|
326
|
+
/**
|
|
327
|
+
* ๆดๆฐ health cache metrics
|
|
328
|
+
*
|
|
329
|
+
* ๅพ HealthRegistry ่ฎๅๆๆฐ็ cache ็ตฑ่จไธฆๆดๆฐ gauges
|
|
330
|
+
*/
|
|
331
|
+
updateHealthCacheMetrics() {
|
|
332
|
+
const healthMetrics = this.registry._healthCacheMetrics;
|
|
333
|
+
if (!healthMetrics) return;
|
|
334
|
+
const stats = healthMetrics.registry.getCacheStats();
|
|
335
|
+
healthMetrics.hits.set(stats.hits);
|
|
336
|
+
healthMetrics.misses.set(stats.misses);
|
|
337
|
+
healthMetrics.hitRate.set(stats.hitRate);
|
|
338
|
+
}
|
|
301
339
|
};
|
|
302
340
|
|
|
303
341
|
// src/metrics/MetricsRegistry.ts
|
|
@@ -653,7 +691,7 @@ function createHttpMetricsMiddleware(registry) {
|
|
|
653
691
|
});
|
|
654
692
|
return async (c, next) => {
|
|
655
693
|
const method = c.req.method;
|
|
656
|
-
const path = normalizePath(c.req.path);
|
|
694
|
+
const path = c.req.routePattern ?? normalizePath(c.req.path);
|
|
657
695
|
const start = performance.now();
|
|
658
696
|
await next();
|
|
659
697
|
const duration = (performance.now() - start) / 1e3;
|
|
@@ -934,10 +972,10 @@ var MonitorOrbit = class {
|
|
|
934
972
|
metrics: this.metricsRegistry,
|
|
935
973
|
tracing: this.tracingManager
|
|
936
974
|
};
|
|
937
|
-
core.
|
|
938
|
-
core.
|
|
939
|
-
core.
|
|
940
|
-
core.
|
|
975
|
+
core.container.instance("monitor", monitorService);
|
|
976
|
+
core.container.instance("health", this.healthRegistry);
|
|
977
|
+
core.container.instance("metrics", this.metricsRegistry);
|
|
978
|
+
core.container.instance("tracing", this.tracingManager);
|
|
941
979
|
const router = core.router;
|
|
942
980
|
if (healthEnabled && this.healthRegistry) {
|
|
943
981
|
const healthController = new HealthController(this.healthRegistry);
|
|
@@ -950,9 +988,37 @@ var MonitorOrbit = class {
|
|
|
950
988
|
const metricsController = new MetricsController(this.metricsRegistry);
|
|
951
989
|
router.get(metricsPath, (c) => metricsController.metrics(c));
|
|
952
990
|
console.log(`[Monitor] Metrics endpoint: ${metricsPath}`);
|
|
991
|
+
if (healthEnabled && this.healthRegistry) {
|
|
992
|
+
this.registerHealthCacheMetrics(this.metricsRegistry, this.healthRegistry);
|
|
993
|
+
}
|
|
953
994
|
}
|
|
954
995
|
console.log("[Monitor] Observability services initialized");
|
|
955
996
|
}
|
|
997
|
+
/**
|
|
998
|
+
* ่จปๅ health cache metrics
|
|
999
|
+
*
|
|
1000
|
+
* ๅปบ็ซ metrics ไพ่ฟฝ่นค health check cache ็ๆ่ฝ
|
|
1001
|
+
*/
|
|
1002
|
+
registerHealthCacheMetrics(metricsRegistry, healthRegistry) {
|
|
1003
|
+
const cacheHitsGauge = metricsRegistry.gauge({
|
|
1004
|
+
name: "health_cache_hits_total",
|
|
1005
|
+
help: "Total number of health check cache hits"
|
|
1006
|
+
});
|
|
1007
|
+
const cacheMissesGauge = metricsRegistry.gauge({
|
|
1008
|
+
name: "health_cache_misses_total",
|
|
1009
|
+
help: "Total number of health check cache misses"
|
|
1010
|
+
});
|
|
1011
|
+
const cacheHitRateGauge = metricsRegistry.gauge({
|
|
1012
|
+
name: "health_cache_hit_rate",
|
|
1013
|
+
help: "Health check cache hit rate (0.0 to 1.0)"
|
|
1014
|
+
});
|
|
1015
|
+
metricsRegistry._healthCacheMetrics = {
|
|
1016
|
+
hits: cacheHitsGauge,
|
|
1017
|
+
misses: cacheMissesGauge,
|
|
1018
|
+
hitRate: cacheHitRateGauge,
|
|
1019
|
+
registry: healthRegistry
|
|
1020
|
+
};
|
|
1021
|
+
}
|
|
956
1022
|
/**
|
|
957
1023
|
* Shutdown hook
|
|
958
1024
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gravito/monitor",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.1.0",
|
|
4
4
|
"description": "Observability module for Gravito - Health checks, Metrics, and Tracing",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -25,9 +25,11 @@
|
|
|
25
25
|
"scripts": {
|
|
26
26
|
"build": "bun run build.ts",
|
|
27
27
|
"typecheck": "bun tsc -p tsconfig.json --noEmit --skipLibCheck",
|
|
28
|
-
"test": "bun test",
|
|
29
|
-
"test:coverage": "bun test --coverage --coverage-
|
|
30
|
-
"test:ci": "bun test --coverage --coverage-
|
|
28
|
+
"test": "bun test --timeout=10000",
|
|
29
|
+
"test:coverage": "bun test --timeout=10000 --coverage --coverage-reporter=lcov --coverage-dir coverage && bun run --bun scripts/check-coverage.ts",
|
|
30
|
+
"test:ci": "bun test --timeout=10000 --coverage --coverage-reporter=lcov --coverage-dir coverage && bun run --bun scripts/check-coverage.ts",
|
|
31
|
+
"test:unit": "bun test tests/ --timeout=10000",
|
|
32
|
+
"test:integration": "test $(find tests -name '*.integration.test.ts' 2>/dev/null | wc -l) -gt 0 && find tests -name '*.integration.test.ts' -print0 | xargs -0 bun test --timeout=10000 || echo 'No integration tests found'"
|
|
31
33
|
},
|
|
32
34
|
"peerDependencies": {
|
|
33
35
|
"@gravito/core": "workspace:*",
|