@platformatic/metrics 3.29.1 → 3.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/index.js +197 -21
  2. package/package.json +1 -1
package/index.js CHANGED
@@ -3,6 +3,21 @@ import os from 'node:os'
3
3
  import { performance } from 'node:perf_hooks'
4
4
  import client from '@platformatic/prom-client'
5
5
 
6
+ // Import individual metric collectors from prom-client
7
+ import processCpuTotal from '@platformatic/prom-client/lib/metrics/processCpuTotal.js'
8
+ import processStartTime from '@platformatic/prom-client/lib/metrics/processStartTime.js'
9
+ import osMemoryHeap from '@platformatic/prom-client/lib/metrics/osMemoryHeap.js'
10
+ import processOpenFileDescriptors from '@platformatic/prom-client/lib/metrics/processOpenFileDescriptors.js'
11
+ import processMaxFileDescriptors from '@platformatic/prom-client/lib/metrics/processMaxFileDescriptors.js'
12
+ import eventLoopLag from '@platformatic/prom-client/lib/metrics/eventLoopLag.js'
13
+ import processHandles from '@platformatic/prom-client/lib/metrics/processHandles.js'
14
+ import processRequests from '@platformatic/prom-client/lib/metrics/processRequests.js'
15
+ import processResources from '@platformatic/prom-client/lib/metrics/processResources.js'
16
+ import heapSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSizeAndUsed.js'
17
+ import heapSpacesSizeAndUsed from '@platformatic/prom-client/lib/metrics/heapSpacesSizeAndUsed.js'
18
+ import version from '@platformatic/prom-client/lib/metrics/version.js'
19
+ import gc from '@platformatic/prom-client/lib/metrics/gc.js'
20
+
6
21
  export * as client from '@platformatic/prom-client'
7
22
 
8
23
  const { eventLoopUtilization } = performance
@@ -10,6 +25,49 @@ const { Registry, Gauge, Counter, collectDefaultMetrics } = client
10
25
 
11
26
  export const kMetricsGroups = Symbol('plt.metrics.MetricsGroups')
12
27
 
28
+ // Process-level metrics (same across all workers, collect once in main thread)
29
+ export const PROCESS_LEVEL_METRICS = [
30
+ 'process_cpu_user_seconds_total',
31
+ 'process_cpu_system_seconds_total',
32
+ 'process_cpu_seconds_total',
33
+ 'process_start_time_seconds',
34
+ 'process_resident_memory_bytes',
35
+ 'process_open_fds',
36
+ 'process_max_fds',
37
+ 'nodejs_version_info',
38
+ 'process_cpu_percent_usage'
39
+ ]
40
+
41
+ // Thread/isolate-specific metrics (different per worker)
42
+ export const THREAD_LEVEL_METRICS = [
43
+ 'nodejs_heap_size_total_bytes',
44
+ 'nodejs_heap_size_used_bytes',
45
+ 'nodejs_external_memory_bytes',
46
+ 'nodejs_heap_space_size_total_bytes',
47
+ 'nodejs_heap_space_size_used_bytes',
48
+ 'nodejs_heap_space_size_available_bytes',
49
+ 'nodejs_eventloop_lag_seconds',
50
+ 'nodejs_eventloop_lag_min_seconds',
51
+ 'nodejs_eventloop_lag_max_seconds',
52
+ 'nodejs_eventloop_lag_mean_seconds',
53
+ 'nodejs_eventloop_lag_stddev_seconds',
54
+ 'nodejs_eventloop_lag_p50_seconds',
55
+ 'nodejs_eventloop_lag_p90_seconds',
56
+ 'nodejs_eventloop_lag_p99_seconds',
57
+ 'nodejs_eventloop_utilization',
58
+ 'nodejs_gc_duration_seconds',
59
+ 'nodejs_active_handles',
60
+ 'nodejs_active_handles_total',
61
+ 'nodejs_active_requests',
62
+ 'nodejs_active_requests_total',
63
+ 'nodejs_active_resources',
64
+ 'nodejs_active_resources_total',
65
+ 'thread_cpu_user_system_seconds_total',
66
+ 'thread_cpu_system_seconds_total',
67
+ 'thread_cpu_seconds_total',
68
+ 'thread_cpu_percent_usage'
69
+ ]
70
+
13
71
  export function registerMetricsGroup (registry, group) {
14
72
  registry[kMetricsGroups] ??= new Set()
15
73
  registry[kMetricsGroups].add(group)
@@ -32,6 +90,13 @@ export function ensureMetricsGroup (registry, group) {
32
90
  return false
33
91
  }
34
92
 
93
+ export function clearRegistry (registry) {
94
+ registry.clear()
95
+ if (registry[kMetricsGroups]) {
96
+ registry[kMetricsGroups].clear()
97
+ }
98
+ }
99
+
35
100
  export async function collectThreadCpuMetrics (registry) {
36
101
  if (ensureMetricsGroup(registry, 'threadCpuUsage')) {
37
102
  return
@@ -86,25 +151,12 @@ export async function collectThreadCpuMetrics (registry) {
86
151
  registry.registerMetric(threadCpuPercentUsageGaugeMetric)
87
152
  }
88
153
 
89
- export function collectEluMetric (registry) {
90
- if (ensureMetricsGroup(registry, 'elu')) {
154
+ // Collect system CPU usage metric (based on os.cpus(), process-level)
155
+ export function collectSystemCpuMetric (registry) {
156
+ if (ensureMetricsGroup(registry, 'systemCpu')) {
91
157
  return
92
158
  }
93
159
 
94
- let startELU = eventLoopUtilization()
95
- const eluMetric = new Gauge({
96
- name: 'nodejs_eventloop_utilization',
97
- help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
98
- collect: () => {
99
- const endELU = eventLoopUtilization()
100
- const result = eventLoopUtilization(endELU, startELU).utilization
101
- eluMetric.set(result)
102
- startELU = endELU
103
- },
104
- registers: [registry]
105
- })
106
- registry.registerMetric(eluMetric)
107
-
108
160
  let previousIdleTime = 0
109
161
  let previousTotalTime = 0
110
162
  const cpuMetric = new Gauge({
@@ -115,14 +167,16 @@ export function collectEluMetric (registry) {
115
167
  let idleTime = 0
116
168
  let totalTime = 0
117
169
 
118
- cpus.forEach(cpu => {
119
- for (const type in cpu.times) {
120
- totalTime += cpu.times[type]
170
+ for (let i = 0; i < cpus.length; i++) {
171
+ const cpu = cpus[i]
172
+ const times = cpu.times
173
+ for (const type in times) {
174
+ totalTime += times[type]
121
175
  if (type === 'idle') {
122
- idleTime += cpu.times[type]
176
+ idleTime += times[type]
123
177
  }
124
178
  }
125
- })
179
+ }
126
180
 
127
181
  const idleDiff = idleTime - previousIdleTime
128
182
  const totalDiff = totalTime - previousTotalTime
@@ -139,6 +193,127 @@ export function collectEluMetric (registry) {
139
193
  registry.registerMetric(cpuMetric)
140
194
  }
141
195
 
196
+ // Collect only the ELU metric (thread-specific)
197
+ export function collectEluMetric (registry) {
198
+ if (ensureMetricsGroup(registry, 'elu')) {
199
+ return
200
+ }
201
+
202
+ let startELU = eventLoopUtilization()
203
+ const eluMetric = new Gauge({
204
+ name: 'nodejs_eventloop_utilization',
205
+ help: 'The event loop utilization as a fraction of the loop time. 1 is fully utilized, 0 is fully idle.',
206
+ collect: () => {
207
+ const endELU = eventLoopUtilization()
208
+ const result = eventLoopUtilization(endELU, startELU).utilization
209
+ eluMetric.set(result)
210
+ startELU = endELU
211
+ },
212
+ registers: [registry]
213
+ })
214
+ registry.registerMetric(eluMetric)
215
+ }
216
+
217
+ // Legacy function that collects both ELU and system CPU (for backward compatibility)
218
+ export function collectEluAndSystemCpuMetrics (registry) {
219
+ collectEluMetric(registry)
220
+ collectSystemCpuMetric(registry)
221
+ }
222
+
223
+ // Collect process-level metrics (same across all workers, should run in main thread only)
224
+ export function collectProcessMetrics (registry) {
225
+ if (ensureMetricsGroup(registry, 'process-level')) {
226
+ return
227
+ }
228
+
229
+ const config = {}
230
+
231
+ // Process CPU metrics
232
+ processCpuTotal(registry, config)
233
+ // Process start time
234
+ processStartTime(registry, config)
235
+ // Resident memory (RSS)
236
+ osMemoryHeap(registry, config)
237
+ // Open file descriptors (Linux)
238
+ processOpenFileDescriptors(registry, config)
239
+ // Max file descriptors (Linux)
240
+ processMaxFileDescriptors(registry, config)
241
+ // Node.js version info
242
+ version(registry, config)
243
+ // System CPU percent usage (os.cpus() based)
244
+ collectSystemCpuMetric(registry)
245
+ }
246
+
247
+ // Collect thread-specific metrics (different per worker)
248
+ export async function collectThreadMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
249
+ if (!registry) {
250
+ registry = new Registry()
251
+ }
252
+
253
+ const labels = { ...metricsConfig.labels }
254
+
255
+ // Use the configured label name
256
+ const labelName = metricsConfig.idLabel || 'applicationId'
257
+ labels[labelName] = applicationId
258
+
259
+ if (workerId >= 0) {
260
+ labels.workerId = workerId
261
+ }
262
+ registry.setDefaultLabels(labels)
263
+
264
+ if (metricsConfig.defaultMetrics) {
265
+ if (!ensureMetricsGroup(registry, 'thread-level')) {
266
+ const config = { eventLoopMonitoringPrecision: 10 }
267
+
268
+ // Thread-specific metrics only
269
+ heapSizeAndUsed(registry, config)
270
+ heapSpacesSizeAndUsed(registry, config)
271
+ eventLoopLag(registry, config)
272
+ gc(registry, config)
273
+ processHandles(registry, config)
274
+ processRequests(registry, config)
275
+ if (typeof process.getActiveResourcesInfo === 'function') {
276
+ processResources(registry, config)
277
+ }
278
+ }
279
+
280
+ // Event loop utilization (thread-specific)
281
+ collectEluMetric(registry)
282
+ // Thread CPU metrics
283
+ await collectThreadCpuMetrics(registry)
284
+ }
285
+
286
+ if (metricsConfig.httpMetrics && !ensureMetricsGroup(registry, 'http')) {
287
+ collectHttpMetrics(registry, {
288
+ customLabels: ['telemetry_id'],
289
+ getCustomLabels: req => {
290
+ const telemetryId = req.headers?.['x-plt-telemetry-id'] ?? 'unknown'
291
+ return { telemetry_id: telemetryId }
292
+ },
293
+ histogram: {
294
+ name: 'http_request_all_duration_seconds',
295
+ help: 'request duration in seconds summary for all requests',
296
+ collect: function () {
297
+ process.nextTick(() => this.reset())
298
+ }
299
+ },
300
+ summary: {
301
+ name: 'http_request_all_summary_seconds',
302
+ help: 'request duration in seconds histogram for all requests',
303
+ collect: function () {
304
+ process.nextTick(() => this.reset())
305
+ }
306
+ }
307
+ })
308
+ }
309
+
310
+ return {
311
+ registry,
312
+ otlpBridge: null
313
+ }
314
+ }
315
+
316
+ // Original function for backward compatibility (collects all metrics)
142
317
  export async function collectMetrics (applicationId, workerId, metricsConfig = {}, registry = undefined) {
143
318
  if (!registry) {
144
319
  registry = new Registry()
@@ -161,6 +336,7 @@ export async function collectMetrics (applicationId, workerId, metricsConfig = {
161
336
  }
162
337
 
163
338
  collectEluMetric(registry)
339
+ collectSystemCpuMetric(registry)
164
340
  await collectThreadCpuMetrics(registry)
165
341
  }
166
342
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platformatic/metrics",
3
- "version": "3.29.1",
3
+ "version": "3.31.0",
4
4
  "description": "Platformatic Capability Metrics",
5
5
  "main": "index.js",
6
6
  "type": "module",