@platformatic/metrics 3.30.0 → 3.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +190 -21
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -3,6 +3,21 @@ import os from 'node:os'
|
|
|
3
3
|
import { performance } from 'node:perf_hooks'
|
|
4
4
|
import client from '@platformatic/prom-client'
|
|
5
5
|
|
|
6
|
+
// Import individual metric collectors from prom-client
|
|
7
|
+
import processCpuTotal from '@platformatic/prom-client/lib/metrics/processCpuTotal.js'
|
|
8
|
+
import processStartTime from '@platformatic/prom-client/lib/metrics/processStartTime.js'
|
|
9
|
+
import osMemoryHeap from '@platformatic/prom-client/lib/metrics/osMemoryHeap.js'
|
|
10
|
+
import processOpenFileDescriptors from '@platformatic/prom-client/lib/metrics/processOpenFileDescriptors.js'
|
|
11
|
+
import processMaxFileDescriptors from '@platformatic/prom-client/lib/metrics/processMaxFileDescriptors.js'
|
|
12
|
+
import eventLoopLag from '@platformatic/prom-client/lib/metrics/eventLoopLag.js'
|
|
13
|
+
import processHandles from '@platformatic/prom-client/lib/metrics/processHandles.js'
|
|
14
|
+
import processRequests from '@platformatic/prom-client/lib/metrics/processRequests.js'
|
|
15
|
+
import processResources from '@platformatic/prom-client/lib/metrics/processResources.js'
|
|
16
|
+
import heapSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSizeAndUsed.js'
|
|
17
|
+
import heapSpacesSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSpacesSizeAndUsed.js'
|
|
18
|
+
import version from '@platformatic/prom-client/lib/metrics/version.js'
|
|
19
|
+
import gc from '@platformatic/prom-client/lib/metrics/gc.js'
|
|
20
|
+
|
|
6
21
|
export * as client from '@platformatic/prom-client'
|
|
7
22
|
|
|
8
23
|
const { eventLoopUtilization } = performance
|
|
@@ -10,6 +25,49 @@ const { Registry, Gauge, Counter, collectDefaultMetrics } = client
|
|
|
10
25
|
|
|
11
26
|
export const kMetricsGroups = Symbol('plt.metrics.MetricsGroups')
|
|
12
27
|
|
|
28
|
+
// Process-level metrics (same across all workers, collect once in main thread)
|
|
29
|
+
export const PROCESS_LEVEL_METRICS = [
|
|
30
|
+
'process_cpu_user_seconds_total',
|
|
31
|
+
'process_cpu_system_seconds_total',
|
|
32
|
+
'process_cpu_seconds_total',
|
|
33
|
+
'process_start_time_seconds',
|
|
34
|
+
'process_resident_memory_bytes',
|
|
35
|
+
'process_open_fds',
|
|
36
|
+
'process_max_fds',
|
|
37
|
+
'nodejs_version_info',
|
|
38
|
+
'process_cpu_percent_usage'
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
// Thread/isolate-specific metrics (different per worker)
|
|
42
|
+
export const THREAD_LEVEL_METRICS = [
|
|
43
|
+
'nodejs_heap_size_total_bytes',
|
|
44
|
+
'nodejs_heap_size_used_bytes',
|
|
45
|
+
'nodejs_external_memory_bytes',
|
|
46
|
+
'nodejs_heap_space_size_total_bytes',
|
|
47
|
+
'nodejs_heap_space_size_used_bytes',
|
|
48
|
+
'nodejs_heap_space_size_available_bytes',
|
|
49
|
+
'nodejs_eventloop_lag_seconds',
|
|
50
|
+
'nodejs_eventloop_lag_min_seconds',
|
|
51
|
+
'nodejs_eventloop_lag_max_seconds',
|
|
52
|
+
'nodejs_eventloop_lag_mean_seconds',
|
|
53
|
+
'nodejs_eventloop_lag_stddev_seconds',
|
|
54
|
+
'nodejs_eventloop_lag_p50_seconds',
|
|
55
|
+
'nodejs_eventloop_lag_p90_seconds',
|
|
56
|
+
'nodejs_eventloop_lag_p99_seconds',
|
|
57
|
+
'nodejs_eventloop_utilization',
|
|
58
|
+
'nodejs_gc_duration_seconds',
|
|
59
|
+
'nodejs_active_handles',
|
|
60
|
+
'nodejs_active_handles_total',
|
|
61
|
+
'nodejs_active_requests',
|
|
62
|
+
'nodejs_active_requests_total',
|
|
63
|
+
'nodejs_active_resources',
|
|
64
|
+
'nodejs_active_resources_total',
|
|
65
|
+
'thread_cpu_user_system_seconds_total',
|
|
66
|
+
'thread_cpu_system_seconds_total',
|
|
67
|
+
'thread_cpu_seconds_total',
|
|
68
|
+
'thread_cpu_percent_usage'
|
|
69
|
+
]
|
|
70
|
+
|
|
13
71
|
export function registerMetricsGroup (registry, group) {
|
|
14
72
|
registry[kMetricsGroups] ??= new Set()
|
|
15
73
|
registry[kMetricsGroups].add(group)
|
|
@@ -93,25 +151,12 @@ export async function collectThreadCpuMetrics (registry) {
|
|
|
93
151
|
registry.registerMetric(threadCpuPercentUsageGaugeMetric)
|
|
94
152
|
}
|
|
95
153
|
|
|
96
|
-
|
|
97
|
-
|
|
154
|
+
// Collect system CPU usage metric (based on os.cpus(), process-level)
|
|
155
|
+
export function collectSystemCpuMetric (registry) {
|
|
156
|
+
if (ensureMetricsGroup(registry, 'systemCpu')) {
|
|
98
157
|
return
|
|
99
158
|
}
|
|
100
159
|
|
|
101
|
-
let startELU = eventLoopUtilization()
|
|
102
|
-
const eluMetric = new Gauge({
|
|
103
|
-
name: 'nodejs_eventloop_utilization',
|
|
104
|
-
help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
|
|
105
|
-
collect: () => {
|
|
106
|
-
const endELU = eventLoopUtilization()
|
|
107
|
-
const result = eventLoopUtilization(endELU, startELU).utilization
|
|
108
|
-
eluMetric.set(result)
|
|
109
|
-
startELU = endELU
|
|
110
|
-
},
|
|
111
|
-
registers: [registry]
|
|
112
|
-
})
|
|
113
|
-
registry.registerMetric(eluMetric)
|
|
114
|
-
|
|
115
160
|
let previousIdleTime = 0
|
|
116
161
|
let previousTotalTime = 0
|
|
117
162
|
const cpuMetric = new Gauge({
|
|
@@ -122,14 +167,16 @@ export function collectEluMetric (registry) {
|
|
|
122
167
|
let idleTime = 0
|
|
123
168
|
let totalTime = 0
|
|
124
169
|
|
|
125
|
-
cpus.
|
|
126
|
-
|
|
127
|
-
|
|
170
|
+
for (let i = 0; i < cpus.length; i++) {
|
|
171
|
+
const cpu = cpus[i]
|
|
172
|
+
const times = cpu.times
|
|
173
|
+
for (const type in times) {
|
|
174
|
+
totalTime += times[type]
|
|
128
175
|
if (type === 'idle') {
|
|
129
|
-
idleTime +=
|
|
176
|
+
idleTime += times[type]
|
|
130
177
|
}
|
|
131
178
|
}
|
|
132
|
-
}
|
|
179
|
+
}
|
|
133
180
|
|
|
134
181
|
const idleDiff = idleTime - previousIdleTime
|
|
135
182
|
const totalDiff = totalTime - previousTotalTime
|
|
@@ -146,6 +193,127 @@ export function collectEluMetric (registry) {
|
|
|
146
193
|
registry.registerMetric(cpuMetric)
|
|
147
194
|
}
|
|
148
195
|
|
|
196
|
+
// Collect only the ELU metric (thread-specific)
|
|
197
|
+
export function collectEluMetric (registry) {
|
|
198
|
+
if (ensureMetricsGroup(registry, 'elu')) {
|
|
199
|
+
return
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
let startELU = eventLoopUtilization()
|
|
203
|
+
const eluMetric = new Gauge({
|
|
204
|
+
name: 'nodejs_eventloop_utilization',
|
|
205
|
+
help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
|
|
206
|
+
collect: () => {
|
|
207
|
+
const endELU = eventLoopUtilization()
|
|
208
|
+
const result = eventLoopUtilization(endELU, startELU).utilization
|
|
209
|
+
eluMetric.set(result)
|
|
210
|
+
startELU = endELU
|
|
211
|
+
},
|
|
212
|
+
registers: [registry]
|
|
213
|
+
})
|
|
214
|
+
registry.registerMetric(eluMetric)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Legacy function that collects both ELU and system CPU (for backward compatibility)
|
|
218
|
+
export function collectEluAndSystemCpuMetrics (registry) {
|
|
219
|
+
collectEluMetric(registry)
|
|
220
|
+
collectSystemCpuMetric(registry)
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// Collect process-level metrics (same across all workers, should run in main thread only)
|
|
224
|
+
export function collectProcessMetrics (registry) {
|
|
225
|
+
if (ensureMetricsGroup(registry, 'process-level')) {
|
|
226
|
+
return
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const config = {}
|
|
230
|
+
|
|
231
|
+
// Process CPU metrics
|
|
232
|
+
processCpuTotal(registry, config)
|
|
233
|
+
// Process start time
|
|
234
|
+
processStartTime(registry, config)
|
|
235
|
+
// Resident memory (RSS)
|
|
236
|
+
osMemoryHeap(registry, config)
|
|
237
|
+
// Open file descriptors (Linux)
|
|
238
|
+
processOpenFileDescriptors(registry, config)
|
|
239
|
+
// Max file descriptors (Linux)
|
|
240
|
+
processMaxFileDescriptors(registry, config)
|
|
241
|
+
// Node.js version info
|
|
242
|
+
version(registry, config)
|
|
243
|
+
// System CPU percent usage (os.cpus() based)
|
|
244
|
+
collectSystemCpuMetric(registry)
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Collect thread-specific metrics (different per worker)
|
|
248
|
+
export async function collectThreadMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
|
|
249
|
+
if (!registry) {
|
|
250
|
+
registry = new Registry()
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const labels = { ...metricsConfig.labels }
|
|
254
|
+
|
|
255
|
+
// Use the configured label name
|
|
256
|
+
const labelName = metricsConfig.idLabel || 'applicationId'
|
|
257
|
+
labels[labelName] = applicationId
|
|
258
|
+
|
|
259
|
+
if (workerId >= 0) {
|
|
260
|
+
labels.workerId = workerId
|
|
261
|
+
}
|
|
262
|
+
registry.setDefaultLabels(labels)
|
|
263
|
+
|
|
264
|
+
if (metricsConfig.defaultMetrics) {
|
|
265
|
+
if (!ensureMetricsGroup(registry, 'thread-level')) {
|
|
266
|
+
const config = { eventLoopMonitoringPrecision: 10 }
|
|
267
|
+
|
|
268
|
+
// Thread-specific metrics only
|
|
269
|
+
heapSizeAndUsed(registry, config)
|
|
270
|
+
heapSpacesSizeAndUsed(registry, config)
|
|
271
|
+
eventLoopLag(registry, config)
|
|
272
|
+
gc(registry, config)
|
|
273
|
+
processHandles(registry, config)
|
|
274
|
+
processRequests(registry, config)
|
|
275
|
+
if (typeof process.getActiveResourcesInfo === 'function') {
|
|
276
|
+
processResources(registry, config)
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
// Event loop utilization (thread-specific)
|
|
281
|
+
collectEluMetric(registry)
|
|
282
|
+
// Thread CPU metrics
|
|
283
|
+
await collectThreadCpuMetrics(registry)
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if (metricsConfig.httpMetrics && !ensureMetricsGroup(registry, 'http')) {
|
|
287
|
+
collectHttpMetrics(registry, {
|
|
288
|
+
customLabels: ['telemetry_id'],
|
|
289
|
+
getCustomLabels: req => {
|
|
290
|
+
const telemetryId = req.headers?.['x-plt-telemetry-id'] ?? 'unknown'
|
|
291
|
+
return { telemetry_id: telemetryId }
|
|
292
|
+
},
|
|
293
|
+
histogram: {
|
|
294
|
+
name: 'http_request_all_duration_seconds',
|
|
295
|
+
help: 'request duration in seconds summary for all requests',
|
|
296
|
+
collect: function () {
|
|
297
|
+
process.nextTick(() => this.reset())
|
|
298
|
+
}
|
|
299
|
+
},
|
|
300
|
+
summary: {
|
|
301
|
+
name: 'http_request_all_summary_seconds',
|
|
302
|
+
help: 'request duration in seconds histogram for all requests',
|
|
303
|
+
collect: function () {
|
|
304
|
+
process.nextTick(() => this.reset())
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
})
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
return {
|
|
311
|
+
registry,
|
|
312
|
+
otlpBridge: null
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// Original function for backward compatibility (collects all metrics)
|
|
149
317
|
export async function collectMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
|
|
150
318
|
if (!registry) {
|
|
151
319
|
registry = new Registry()
|
|
@@ -168,6 +336,7 @@ export async function collectMetrics (applicationId, workerId, metricsConfig = {
|
|
|
168
336
|
}
|
|
169
337
|
|
|
170
338
|
collectEluMetric(registry)
|
|
339
|
+
collectSystemCpuMetric(registry)
|
|
171
340
|
await collectThreadCpuMetrics(registry)
|
|
172
341
|
}
|
|
173
342
|
|