@platformatic/metrics 3.31.0 → 3.32.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +21 -190
- package/package.json +1 -1
package/index.js
CHANGED
|
@@ -3,21 +3,6 @@ import os from 'node:os'
|
|
|
3
3
|
import { performance } from 'node:perf_hooks'
|
|
4
4
|
import client from '@platformatic/prom-client'
|
|
5
5
|
|
|
6
|
-
// Import individual metric collectors from prom-client
|
|
7
|
-
import processCpuTotal from '@platformatic/prom-client/lib/metrics/processCpuTotal.js'
|
|
8
|
-
import processStartTime from '@platformatic/prom-client/lib/metrics/processStartTime.js'
|
|
9
|
-
import osMemoryHeap from '@platformatic/prom-client/lib/metrics/osMemoryHeap.js'
|
|
10
|
-
import processOpenFileDescriptors from '@platformatic/prom-client/lib/metrics/processOpenFileDescriptors.js'
|
|
11
|
-
import processMaxFileDescriptors from '@platformatic/prom-client/lib/metrics/processMaxFileDescriptors.js'
|
|
12
|
-
import eventLoopLag from '@platformatic/prom-client/lib/metrics/eventLoopLag.js'
|
|
13
|
-
import processHandles from '@platformatic/prom-client/lib/metrics/processHandles.js'
|
|
14
|
-
import processRequests from '@platformatic/prom-client/lib/metrics/processRequests.js'
|
|
15
|
-
import processResources from '@platformatic/prom-client/lib/metrics/processResources.js'
|
|
16
|
-
import heapSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSizeAndUsed.js'
|
|
17
|
-
import heapSpacesSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSpacesSizeAndUsed.js'
|
|
18
|
-
import version from '@platformatic/prom-client/lib/metrics/version.js'
|
|
19
|
-
import gc from '@platformatic/prom-client/lib/metrics/gc.js'
|
|
20
|
-
|
|
21
6
|
export * as client from '@platformatic/prom-client'
|
|
22
7
|
|
|
23
8
|
const { eventLoopUtilization } = performance
|
|
@@ -25,49 +10,6 @@ const { Registry, Gauge, Counter, collectDefaultMetrics } = client
|
|
|
25
10
|
|
|
26
11
|
export const kMetricsGroups = Symbol('plt.metrics.MetricsGroups')
|
|
27
12
|
|
|
28
|
-
// Process-level metrics (same across all workers, collect once in main thread)
|
|
29
|
-
export const PROCESS_LEVEL_METRICS = [
|
|
30
|
-
'process_cpu_user_seconds_total',
|
|
31
|
-
'process_cpu_system_seconds_total',
|
|
32
|
-
'process_cpu_seconds_total',
|
|
33
|
-
'process_start_time_seconds',
|
|
34
|
-
'process_resident_memory_bytes',
|
|
35
|
-
'process_open_fds',
|
|
36
|
-
'process_max_fds',
|
|
37
|
-
'nodejs_version_info',
|
|
38
|
-
'process_cpu_percent_usage'
|
|
39
|
-
]
|
|
40
|
-
|
|
41
|
-
// Thread/isolate-specific metrics (different per worker)
|
|
42
|
-
export const THREAD_LEVEL_METRICS = [
|
|
43
|
-
'nodejs_heap_size_total_bytes',
|
|
44
|
-
'nodejs_heap_size_used_bytes',
|
|
45
|
-
'nodejs_external_memory_bytes',
|
|
46
|
-
'nodejs_heap_space_size_total_bytes',
|
|
47
|
-
'nodejs_heap_space_size_used_bytes',
|
|
48
|
-
'nodejs_heap_space_size_available_bytes',
|
|
49
|
-
'nodejs_eventloop_lag_seconds',
|
|
50
|
-
'nodejs_eventloop_lag_min_seconds',
|
|
51
|
-
'nodejs_eventloop_lag_max_seconds',
|
|
52
|
-
'nodejs_eventloop_lag_mean_seconds',
|
|
53
|
-
'nodejs_eventloop_lag_stddev_seconds',
|
|
54
|
-
'nodejs_eventloop_lag_p50_seconds',
|
|
55
|
-
'nodejs_eventloop_lag_p90_seconds',
|
|
56
|
-
'nodejs_eventloop_lag_p99_seconds',
|
|
57
|
-
'nodejs_eventloop_utilization',
|
|
58
|
-
'nodejs_gc_duration_seconds',
|
|
59
|
-
'nodejs_active_handles',
|
|
60
|
-
'nodejs_active_handles_total',
|
|
61
|
-
'nodejs_active_requests',
|
|
62
|
-
'nodejs_active_requests_total',
|
|
63
|
-
'nodejs_active_resources',
|
|
64
|
-
'nodejs_active_resources_total',
|
|
65
|
-
'thread_cpu_user_system_seconds_total',
|
|
66
|
-
'thread_cpu_system_seconds_total',
|
|
67
|
-
'thread_cpu_seconds_total',
|
|
68
|
-
'thread_cpu_percent_usage'
|
|
69
|
-
]
|
|
70
|
-
|
|
71
13
|
export function registerMetricsGroup (registry, group) {
|
|
72
14
|
registry[kMetricsGroups] ??= new Set()
|
|
73
15
|
registry[kMetricsGroups].add(group)
|
|
@@ -151,12 +93,25 @@ export async function collectThreadCpuMetrics (registry) {
|
|
|
151
93
|
registry.registerMetric(threadCpuPercentUsageGaugeMetric)
|
|
152
94
|
}
|
|
153
95
|
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
if (ensureMetricsGroup(registry, 'systemCpu')) {
|
|
96
|
+
export function collectEluMetric (registry) {
|
|
97
|
+
if (ensureMetricsGroup(registry, 'elu')) {
|
|
157
98
|
return
|
|
158
99
|
}
|
|
159
100
|
|
|
101
|
+
let startELU = eventLoopUtilization()
|
|
102
|
+
const eluMetric = new Gauge({
|
|
103
|
+
name: 'nodejs_eventloop_utilization',
|
|
104
|
+
help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
|
|
105
|
+
collect: () => {
|
|
106
|
+
const endELU = eventLoopUtilization()
|
|
107
|
+
const result = eventLoopUtilization(endELU, startELU).utilization
|
|
108
|
+
eluMetric.set(result)
|
|
109
|
+
startELU = endELU
|
|
110
|
+
},
|
|
111
|
+
registers: [registry]
|
|
112
|
+
})
|
|
113
|
+
registry.registerMetric(eluMetric)
|
|
114
|
+
|
|
160
115
|
let previousIdleTime = 0
|
|
161
116
|
let previousTotalTime = 0
|
|
162
117
|
const cpuMetric = new Gauge({
|
|
@@ -167,16 +122,14 @@ export function collectSystemCpuMetric (registry) {
|
|
|
167
122
|
let idleTime = 0
|
|
168
123
|
let totalTime = 0
|
|
169
124
|
|
|
170
|
-
|
|
171
|
-
const cpu
|
|
172
|
-
|
|
173
|
-
for (const type in times) {
|
|
174
|
-
totalTime += times[type]
|
|
125
|
+
cpus.forEach(cpu => {
|
|
126
|
+
for (const type in cpu.times) {
|
|
127
|
+
totalTime += cpu.times[type]
|
|
175
128
|
if (type === 'idle') {
|
|
176
|
-
idleTime += times[type]
|
|
129
|
+
idleTime += cpu.times[type]
|
|
177
130
|
}
|
|
178
131
|
}
|
|
179
|
-
}
|
|
132
|
+
})
|
|
180
133
|
|
|
181
134
|
const idleDiff = idleTime - previousIdleTime
|
|
182
135
|
const totalDiff = totalTime - previousTotalTime
|
|
@@ -193,127 +146,6 @@ export function collectSystemCpuMetric (registry) {
|
|
|
193
146
|
registry.registerMetric(cpuMetric)
|
|
194
147
|
}
|
|
195
148
|
|
|
196
|
-
// Collect only the ELU metric (thread-specific)
|
|
197
|
-
export function collectEluMetric (registry) {
|
|
198
|
-
if (ensureMetricsGroup(registry, 'elu')) {
|
|
199
|
-
return
|
|
200
|
-
}
|
|
201
|
-
|
|
202
|
-
let startELU = eventLoopUtilization()
|
|
203
|
-
const eluMetric = new Gauge({
|
|
204
|
-
name: 'nodejs_eventloop_utilization',
|
|
205
|
-
help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
|
|
206
|
-
collect: () => {
|
|
207
|
-
const endELU = eventLoopUtilization()
|
|
208
|
-
const result = eventLoopUtilization(endELU, startELU).utilization
|
|
209
|
-
eluMetric.set(result)
|
|
210
|
-
startELU = endELU
|
|
211
|
-
},
|
|
212
|
-
registers: [registry]
|
|
213
|
-
})
|
|
214
|
-
registry.registerMetric(eluMetric)
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// Legacy function that collects both ELU and system CPU (for backward compatibility)
|
|
218
|
-
export function collectEluAndSystemCpuMetrics (registry) {
|
|
219
|
-
collectEluMetric(registry)
|
|
220
|
-
collectSystemCpuMetric(registry)
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// Collect process-level metrics (same across all workers, should run in main thread only)
|
|
224
|
-
export function collectProcessMetrics (registry) {
|
|
225
|
-
if (ensureMetricsGroup(registry, 'process-level')) {
|
|
226
|
-
return
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
const config = {}
|
|
230
|
-
|
|
231
|
-
// Process CPU metrics
|
|
232
|
-
processCpuTotal(registry, config)
|
|
233
|
-
// Process start time
|
|
234
|
-
processStartTime(registry, config)
|
|
235
|
-
// Resident memory (RSS)
|
|
236
|
-
osMemoryHeap(registry, config)
|
|
237
|
-
// Open file descriptors (Linux)
|
|
238
|
-
processOpenFileDescriptors(registry, config)
|
|
239
|
-
// Max file descriptors (Linux)
|
|
240
|
-
processMaxFileDescriptors(registry, config)
|
|
241
|
-
// Node.js version info
|
|
242
|
-
version(registry, config)
|
|
243
|
-
// System CPU percent usage (os.cpus() based)
|
|
244
|
-
collectSystemCpuMetric(registry)
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// Collect thread-specific metrics (different per worker)
|
|
248
|
-
export async function collectThreadMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
|
|
249
|
-
if (!registry) {
|
|
250
|
-
registry = new Registry()
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
const labels = { ...metricsConfig.labels }
|
|
254
|
-
|
|
255
|
-
// Use the configured label name
|
|
256
|
-
const labelName = metricsConfig.idLabel || 'applicationId'
|
|
257
|
-
labels[labelName] = applicationId
|
|
258
|
-
|
|
259
|
-
if (workerId >= 0) {
|
|
260
|
-
labels.workerId = workerId
|
|
261
|
-
}
|
|
262
|
-
registry.setDefaultLabels(labels)
|
|
263
|
-
|
|
264
|
-
if (metricsConfig.defaultMetrics) {
|
|
265
|
-
if (!ensureMetricsGroup(registry, 'thread-level')) {
|
|
266
|
-
const config = { eventLoopMonitoringPrecision: 10 }
|
|
267
|
-
|
|
268
|
-
// Thread-specific metrics only
|
|
269
|
-
heapSizeAndUsed(registry, config)
|
|
270
|
-
heapSpacesSizeAndUsed(registry, config)
|
|
271
|
-
eventLoopLag(registry, config)
|
|
272
|
-
gc(registry, config)
|
|
273
|
-
processHandles(registry, config)
|
|
274
|
-
processRequests(registry, config)
|
|
275
|
-
if (typeof process.getActiveResourcesInfo === 'function') {
|
|
276
|
-
processResources(registry, config)
|
|
277
|
-
}
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
// Event loop utilization (thread-specific)
|
|
281
|
-
collectEluMetric(registry)
|
|
282
|
-
// Thread CPU metrics
|
|
283
|
-
await collectThreadCpuMetrics(registry)
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
if (metricsConfig.httpMetrics && !ensureMetricsGroup(registry, 'http')) {
|
|
287
|
-
collectHttpMetrics(registry, {
|
|
288
|
-
customLabels: ['telemetry_id'],
|
|
289
|
-
getCustomLabels: req => {
|
|
290
|
-
const telemetryId = req.headers?.['x-plt-telemetry-id'] ?? 'unknown'
|
|
291
|
-
return { telemetry_id: telemetryId }
|
|
292
|
-
},
|
|
293
|
-
histogram: {
|
|
294
|
-
name: 'http_request_all_duration_seconds',
|
|
295
|
-
help: 'request duration in seconds summary for all requests',
|
|
296
|
-
collect: function () {
|
|
297
|
-
process.nextTick(() => this.reset())
|
|
298
|
-
}
|
|
299
|
-
},
|
|
300
|
-
summary: {
|
|
301
|
-
name: 'http_request_all_summary_seconds',
|
|
302
|
-
help: 'request duration in seconds histogram for all requests',
|
|
303
|
-
collect: function () {
|
|
304
|
-
process.nextTick(() => this.reset())
|
|
305
|
-
}
|
|
306
|
-
}
|
|
307
|
-
})
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
return {
|
|
311
|
-
registry,
|
|
312
|
-
otlpBridge: null
|
|
313
|
-
}
|
|
314
|
-
}
|
|
315
|
-
|
|
316
|
-
// Original function for backward compatibility (collects all metrics)
|
|
317
149
|
export async function collectMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
|
|
318
150
|
if (!registry) {
|
|
319
151
|
registry = new Registry()
|
|
@@ -336,7 +168,6 @@ export async function collectMetrics (applicationId, workerId, metricsConfig = {
|
|
|
336
168
|
}
|
|
337
169
|
|
|
338
170
|
collectEluMetric(registry)
|
|
339
|
-
collectSystemCpuMetric(registry)
|
|
340
171
|
await collectThreadCpuMetrics(registry)
|
|
341
172
|
}
|
|
342
173
|
|