@platformatic/metrics 3.29.1 → 3.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +197 -21
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -3,6 +3,21 @@ import os from 'node:os'
|
|
|
3
3
|
import { performance } from 'node:perf_hooks'
|
|
4
4
|
import client from '@platformatic/prom-client'
|
|
5
5
|
|
|
6
|
+
// Import individual metric collectors from prom-client
|
|
7
|
+
import processCpuTotal from '@platformatic/prom-client/lib/metrics/processCpuTotal.js'
|
|
8
|
+
import processStartTime from '@platformatic/prom-client/lib/metrics/processStartTime.js'
|
|
9
|
+
import osMemoryHeap from '@platformatic/prom-client/lib/metrics/osMemoryHeap.js'
|
|
10
|
+
import processOpenFileDescriptors from '@platformatic/prom-client/lib/metrics/processOpenFileDescriptors.js'
|
|
11
|
+
import processMaxFileDescriptors from '@platformatic/prom-client/lib/metrics/processMaxFileDescriptors.js'
|
|
12
|
+
import eventLoopLag from '@platformatic/prom-client/lib/metrics/eventLoopLag.js'
|
|
13
|
+
import processHandles from '@platformatic/prom-client/lib/metrics/processHandles.js'
|
|
14
|
+
import processRequests from '@platformatic/prom-client/lib/metrics/processRequests.js'
|
|
15
|
+
import processResources from '@platformatic/prom-client/lib/metrics/processResources.js'
|
|
16
|
+
import heapSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSizeAndUsed.js'
|
|
17
|
+
import heapSpacesSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSpacesSizeAndUsed.js'
|
|
18
|
+
import version from '@platformatic/prom-client/lib/metrics/version.js'
|
|
19
|
+
import gc from '@platformatic/prom-client/lib/metrics/gc.js'
|
|
20
|
+
|
|
6
21
|
export * as client from '@platformatic/prom-client'
|
|
7
22
|
|
|
8
23
|
const { eventLoopUtilization } = performance
|
|
@@ -10,6 +25,49 @@ const { Registry, Gauge, Counter, collectDefaultMetrics } = client
|
|
|
10
25
|
|
|
11
26
|
export const kMetricsGroups = Symbol('plt.metrics.MetricsGroups')
|
|
12
27
|
|
|
28
|
+
// Process-level metrics (same across all workers, collect once in main thread)
|
|
29
|
+
export const PROCESS_LEVEL_METRICS = [
|
|
30
|
+
'process_cpu_user_seconds_total',
|
|
31
|
+
'process_cpu_system_seconds_total',
|
|
32
|
+
'process_cpu_seconds_total',
|
|
33
|
+
'process_start_time_seconds',
|
|
34
|
+
'process_resident_memory_bytes',
|
|
35
|
+
'process_open_fds',
|
|
36
|
+
'process_max_fds',
|
|
37
|
+
'nodejs_version_info',
|
|
38
|
+
'process_cpu_percent_usage'
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
// Thread/isolate-specific metrics (different per worker)
|
|
42
|
+
export const THREAD_LEVEL_METRICS = [
|
|
43
|
+
'nodejs_heap_size_total_bytes',
|
|
44
|
+
'nodejs_heap_size_used_bytes',
|
|
45
|
+
'nodejs_external_memory_bytes',
|
|
46
|
+
'nodejs_heap_space_size_total_bytes',
|
|
47
|
+
'nodejs_heap_space_size_used_bytes',
|
|
48
|
+
'nodejs_heap_space_size_available_bytes',
|
|
49
|
+
'nodejs_eventloop_lag_seconds',
|
|
50
|
+
'nodejs_eventloop_lag_min_seconds',
|
|
51
|
+
'nodejs_eventloop_lag_max_seconds',
|
|
52
|
+
'nodejs_eventloop_lag_mean_seconds',
|
|
53
|
+
'nodejs_eventloop_lag_stddev_seconds',
|
|
54
|
+
'nodejs_eventloop_lag_p50_seconds',
|
|
55
|
+
'nodejs_eventloop_lag_p90_seconds',
|
|
56
|
+
'nodejs_eventloop_lag_p99_seconds',
|
|
57
|
+
'nodejs_eventloop_utilization',
|
|
58
|
+
'nodejs_gc_duration_seconds',
|
|
59
|
+
'nodejs_active_handles',
|
|
60
|
+
'nodejs_active_handles_total',
|
|
61
|
+
'nodejs_active_requests',
|
|
62
|
+
'nodejs_active_requests_total',
|
|
63
|
+
'nodejs_active_resources',
|
|
64
|
+
'nodejs_active_resources_total',
|
|
65
|
+
'thread_cpu_user_system_seconds_total',
|
|
66
|
+
'thread_cpu_system_seconds_total',
|
|
67
|
+
'thread_cpu_seconds_total',
|
|
68
|
+
'thread_cpu_percent_usage'
|
|
69
|
+
]
|
|
70
|
+
|
|
13
71
|
export function registerMetricsGroup (registry, group) {
|
|
14
72
|
registry[kMetricsGroups] ??= new Set()
|
|
15
73
|
registry[kMetricsGroups].add(group)
|
|
@@ -32,6 +90,13 @@ export function ensureMetricsGroup (registry, group) {
|
|
|
32
90
|
return false
|
|
33
91
|
}
|
|
34
92
|
|
|
93
|
+
export function clearRegistry (registry) {
|
|
94
|
+
registry.clear()
|
|
95
|
+
if (registry[kMetricsGroups]) {
|
|
96
|
+
registry[kMetricsGroups].clear()
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
35
100
|
export async function collectThreadCpuMetrics (registry) {
|
|
36
101
|
if (ensureMetricsGroup(registry, 'threadCpuUsage')) {
|
|
37
102
|
return
|
|
@@ -86,25 +151,12 @@ export async function collectThreadCpuMetrics (registry) {
|
|
|
86
151
|
registry.registerMetric(threadCpuPercentUsageGaugeMetric)
|
|
87
152
|
}
|
|
88
153
|
|
|
89
|
-
|
|
90
|
-
|
|
154
|
+
// Collect system CPU usage metric (based on os.cpus(), process-level)
|
|
155
|
+
export function collectSystemCpuMetric (registry) {
|
|
156
|
+
if (ensureMetricsGroup(registry, 'systemCpu')) {
|
|
91
157
|
return
|
|
92
158
|
}
|
|
93
159
|
|
|
94
|
-
let startELU = eventLoopUtilization()
|
|
95
|
-
const eluMetric = new Gauge({
|
|
96
|
-
name: 'nodejs_eventloop_utilization',
|
|
97
|
-
help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
|
|
98
|
-
collect: () => {
|
|
99
|
-
const endELU = eventLoopUtilization()
|
|
100
|
-
const result = eventLoopUtilization(endELU, startELU).utilization
|
|
101
|
-
eluMetric.set(result)
|
|
102
|
-
startELU = endELU
|
|
103
|
-
},
|
|
104
|
-
registers: [registry]
|
|
105
|
-
})
|
|
106
|
-
registry.registerMetric(eluMetric)
|
|
107
|
-
|
|
108
160
|
let previousIdleTime = 0
|
|
109
161
|
let previousTotalTime = 0
|
|
110
162
|
const cpuMetric = new Gauge({
|
|
@@ -115,14 +167,16 @@ export function collectEluMetric (registry) {
|
|
|
115
167
|
let idleTime = 0
|
|
116
168
|
let totalTime = 0
|
|
117
169
|
|
|
118
|
-
cpus.
|
|
119
|
-
|
|
120
|
-
|
|
170
|
+
for (let i = 0; i < cpus.length; i++) {
|
|
171
|
+
const cpu = cpus[i]
|
|
172
|
+
const times = cpu.times
|
|
173
|
+
for (const type in times) {
|
|
174
|
+
totalTime += times[type]
|
|
121
175
|
if (type === 'idle') {
|
|
122
|
-
idleTime +=
|
|
176
|
+
idleTime += times[type]
|
|
123
177
|
}
|
|
124
178
|
}
|
|
125
|
-
}
|
|
179
|
+
}
|
|
126
180
|
|
|
127
181
|
const idleDiff = idleTime - previousIdleTime
|
|
128
182
|
const totalDiff = totalTime - previousTotalTime
|
|
@@ -139,6 +193,127 @@ export function collectEluMetric (registry) {
|
|
|
139
193
|
registry.registerMetric(cpuMetric)
|
|
140
194
|
}
|
|
141
195
|
|
|
196
|
+
// Collect only the ELU metric (thread-specific)
|
|
197
|
+
export function collectEluMetric (registry) {
|
|
198
|
+
if (ensureMetricsGroup(registry, 'elu')) {
|
|
199
|
+
return
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
let startELU = eventLoopUtilization()
|
|
203
|
+
const eluMetric = new Gauge({
|
|
204
|
+
name: 'nodejs_eventloop_utilization',
|
|
205
|
+
help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
|
|
206
|
+
collect: () => {
|
|
207
|
+
const endELU = eventLoopUtilization()
|
|
208
|
+
const result = eventLoopUtilization(endELU, startELU).utilization
|
|
209
|
+
eluMetric.set(result)
|
|
210
|
+
startELU = endELU
|
|
211
|
+
},
|
|
212
|
+
registers: [registry]
|
|
213
|
+
})
|
|
214
|
+
registry.registerMetric(eluMetric)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Legacy function that collects both ELU and system CPU (for backward compatibility)
|
|
218
|
+
export function collectEluAndSystemCpuMetrics (registry) {
|
|
219
|
+
collectEluMetric(registry)
|
|
220
|
+
collectSystemCpuMetric(registry)
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Collect process-level metrics (same across all workers, should run in main thread only)
|
|
224
|
+
export function collectProcessMetrics (registry) {
|
|
225
|
+
if (ensureMetricsGroup(registry, 'process-level')) {
|
|
226
|
+
return
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const config = {}
|
|
230
|
+
|
|
231
|
+
// Process CPU metrics
|
|
232
|
+
processCpuTotal(registry, config)
|
|
233
|
+
// Process start time
|
|
234
|
+
processStartTime(registry, config)
|
|
235
|
+
// Resident memory (RSS)
|
|
236
|
+
osMemoryHeap(registry, config)
|
|
237
|
+
// Open file descriptors (Linux)
|
|
238
|
+
processOpenFileDescriptors(registry, config)
|
|
239
|
+
// Max file descriptors (Linux)
|
|
240
|
+
processMaxFileDescriptors(registry, config)
|
|
241
|
+
// Node.js version info
|
|
242
|
+
version(registry, config)
|
|
243
|
+
// System CPU percent usage (os.cpus() based)
|
|
244
|
+
collectSystemCpuMetric(registry)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Collect thread-specific metrics (different per worker)
|
|
248
|
+
export async function collectThreadMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
|
|
249
|
+
if (!registry) {
|
|
250
|
+
registry = new Registry()
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const labels = { ...metricsConfig.labels }
|
|
254
|
+
|
|
255
|
+
// Use the configured label name
|
|
256
|
+
const labelName = metricsConfig.idLabel || 'applicationId'
|
|
257
|
+
labels[labelName] = applicationId
|
|
258
|
+
|
|
259
|
+
if (workerId >= 0) {
|
|
260
|
+
labels.workerId = workerId
|
|
261
|
+
}
|
|
262
|
+
registry.setDefaultLabels(labels)
|
|
263
|
+
|
|
264
|
+
if (metricsConfig.defaultMetrics) {
|
|
265
|
+
if (!ensureMetricsGroup(registry, 'thread-level')) {
|
|
266
|
+
const config = { eventLoopMonitoringPrecision: 10 }
|
|
267
|
+
|
|
268
|
+
// Thread-specific metrics only
|
|
269
|
+
heapSizeAndUsed(registry, config)
|
|
270
|
+
heapSpacesSizeAndUsed(registry, config)
|
|
271
|
+
eventLoopLag(registry, config)
|
|
272
|
+
gc(registry, config)
|
|
273
|
+
processHandles(registry, config)
|
|
274
|
+
processRequests(registry, config)
|
|
275
|
+
if (typeof process.getActiveResourcesInfo === 'function') {
|
|
276
|
+
processResources(registry, config)
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Event loop utilization (thread-specific)
|
|
281
|
+
collectEluMetric(registry)
|
|
282
|
+
// Thread CPU metrics
|
|
283
|
+
await collectThreadCpuMetrics(registry)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (metricsConfig.httpMetrics && !ensureMetricsGroup(registry, 'http')) {
|
|
287
|
+
collectHttpMetrics(registry, {
|
|
288
|
+
customLabels: ['telemetry_id'],
|
|
289
|
+
getCustomLabels: req => {
|
|
290
|
+
const telemetryId = req.headers?.['x-plt-telemetry-id'] ?? 'unknown'
|
|
291
|
+
return { telemetry_id: telemetryId }
|
|
292
|
+
},
|
|
293
|
+
histogram: {
|
|
294
|
+
name: 'http_request_all_duration_seconds',
|
|
295
|
+
help: 'request duration in seconds summary for all requests',
|
|
296
|
+
collect: function () {
|
|
297
|
+
process.nextTick(() => this.reset())
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
summary: {
|
|
301
|
+
name: 'http_request_all_summary_seconds',
|
|
302
|
+
help: 'request duration in seconds histogram for all requests',
|
|
303
|
+
collect: function () {
|
|
304
|
+
process.nextTick(() => this.reset())
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
})
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return {
|
|
311
|
+
registry,
|
|
312
|
+
otlpBridge: null
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Original function for backward compatibility (collects all metrics)
|
|
142
317
|
export async function collectMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
|
|
143
318
|
if (!registry) {
|
|
144
319
|
registry = new Registry()
|
|
@@ -161,6 +336,7 @@ export async function collectMetrics (applicationId, workerId, metricsConfig = {
|
|
|
161
336
|
}
|
|
162
337
|
|
|
163
338
|
collectEluMetric(registry)
|
|
339
|
+
collectSystemCpuMetric(registry)
|
|
164
340
|
await collectThreadCpuMetrics(registry)
|
|
165
341
|
}
|
|
166
342
|
|