@platformatic/watt-extra 1.3.2 → 1.4.0-alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugins/flamegraphs.js +46 -11
- package/test/trigger-flamegraphs.test.js +165 -3
package/package.json
CHANGED
package/plugins/flamegraphs.js
CHANGED
|
@@ -13,6 +13,23 @@ async function flamegraphs (app, _opts) {
|
|
|
13
13
|
const eluThreshold = parseFloat(flamegraphsELUThreshold)
|
|
14
14
|
const gracePeriod = parseInt(flamegraphsGracePeriod)
|
|
15
15
|
|
|
16
|
+
let workerStartedListener = null
|
|
17
|
+
|
|
18
|
+
const startProfilingOnWorker = async (runtime, workerFullId, logContext = {}) => {
|
|
19
|
+
await sleep(gracePeriod)
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
await runtime.sendCommandToApplication(
|
|
23
|
+
workerFullId,
|
|
24
|
+
'startProfiling',
|
|
25
|
+
{ durationMillis, eluThreshold }
|
|
26
|
+
)
|
|
27
|
+
} catch (err) {
|
|
28
|
+
app.log.error({ err, ...logContext }, 'Failed to start profiling')
|
|
29
|
+
throw err
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
16
33
|
app.setupFlamegraphs = async () => {
|
|
17
34
|
if (isFlamegraphsDisabled) {
|
|
18
35
|
app.log.info('PLT_DISABLE_FLAMEGRAPHS is set, skipping profiling')
|
|
@@ -21,19 +38,15 @@ async function flamegraphs (app, _opts) {
|
|
|
21
38
|
|
|
22
39
|
app.log.info('Start profiling services')
|
|
23
40
|
|
|
24
|
-
await sleep(gracePeriod)
|
|
25
|
-
|
|
26
41
|
const runtime = app.watt.runtime
|
|
27
|
-
const
|
|
42
|
+
const workers = await runtime.getWorkers()
|
|
28
43
|
|
|
29
44
|
const promises = []
|
|
30
|
-
for (const
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
)
|
|
36
|
-
promises.push(promise)
|
|
45
|
+
for (const [workerFullId, workerInfo] of Object.entries(workers)) {
|
|
46
|
+
if (workerInfo.status === 'started') {
|
|
47
|
+
const promise = startProfilingOnWorker(runtime, workerFullId, { workerFullId })
|
|
48
|
+
promises.push(promise)
|
|
49
|
+
}
|
|
37
50
|
}
|
|
38
51
|
|
|
39
52
|
const results = await Promise.allSettled(promises)
|
|
@@ -42,6 +55,28 @@ async function flamegraphs (app, _opts) {
|
|
|
42
55
|
app.log.error({ result }, 'Failed to start profiling')
|
|
43
56
|
}
|
|
44
57
|
}
|
|
58
|
+
|
|
59
|
+
// Listen for new workers starting and start profiling on them
|
|
60
|
+
workerStartedListener = ({ application, worker }) => {
|
|
61
|
+
if (isFlamegraphsDisabled) {
|
|
62
|
+
return
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const workerFullId = [application, worker].join(':')
|
|
66
|
+
app.log.info({ application, worker }, 'Starting profiling on new worker')
|
|
67
|
+
|
|
68
|
+
startProfilingOnWorker(runtime, workerFullId, { application, worker }).catch(() => {
|
|
69
|
+
// Error already logged in startProfilingOnWorker
|
|
70
|
+
})
|
|
71
|
+
}
|
|
72
|
+
runtime.on('application:worker:started', workerStartedListener)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
app.cleanupFlamegraphs = () => {
|
|
76
|
+
if (workerStartedListener && app.watt?.runtime) {
|
|
77
|
+
app.watt.runtime.removeListener('application:worker:started', workerStartedListener)
|
|
78
|
+
workerStartedListener = null
|
|
79
|
+
}
|
|
45
80
|
}
|
|
46
81
|
|
|
47
82
|
app.sendFlamegraphs = async (options = {}) => {
|
|
@@ -70,7 +105,7 @@ async function flamegraphs (app, _opts) {
|
|
|
70
105
|
|
|
71
106
|
const uploadPromises = serviceIds.map(async (serviceId) => {
|
|
72
107
|
try {
|
|
73
|
-
const profile = await runtime.sendCommandToApplication(serviceId, 'getLastProfile')
|
|
108
|
+
const profile = await runtime.sendCommandToApplication(serviceId, 'getLastProfile', { type: profileType })
|
|
74
109
|
if (!profile || !(profile instanceof Uint8Array)) {
|
|
75
110
|
app.log.error({ serviceId }, 'Failed to get profile from service')
|
|
76
111
|
return
|
|
@@ -36,11 +36,38 @@ function setupMockIccServer (wss, receivedMessages, validateAuth = false) {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
function createMockApp (port, includeScalerUrl = true) {
|
|
39
|
+
const eventListeners = new Map()
|
|
40
|
+
|
|
39
41
|
const mockWatt = {
|
|
40
42
|
runtime: {
|
|
41
|
-
|
|
43
|
+
getWorkers: async () => ({
|
|
44
|
+
'service-1:0': { application: 'service-1', worker: 0, status: 'started' },
|
|
45
|
+
'service-2:0': { application: 'service-2', worker: 0, status: 'started' }
|
|
46
|
+
}),
|
|
47
|
+
getApplications: async () => ({
|
|
42
48
|
applications: [{ id: 'service-1' }, { id: 'service-2' }]
|
|
43
|
-
})
|
|
49
|
+
}),
|
|
50
|
+
on: (event, listener) => {
|
|
51
|
+
if (!eventListeners.has(event)) {
|
|
52
|
+
eventListeners.set(event, [])
|
|
53
|
+
}
|
|
54
|
+
eventListeners.get(event).push(listener)
|
|
55
|
+
},
|
|
56
|
+
removeListener: (event, listener) => {
|
|
57
|
+
const listeners = eventListeners.get(event)
|
|
58
|
+
if (listeners) {
|
|
59
|
+
const index = listeners.indexOf(listener)
|
|
60
|
+
if (index !== -1) {
|
|
61
|
+
listeners.splice(index, 1)
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
emit: (event, ...args) => {
|
|
66
|
+
const listeners = eventListeners.get(event) || []
|
|
67
|
+
for (const listener of listeners) {
|
|
68
|
+
listener(...args)
|
|
69
|
+
}
|
|
70
|
+
}
|
|
44
71
|
}
|
|
45
72
|
}
|
|
46
73
|
|
|
@@ -108,6 +135,9 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
|
|
|
108
135
|
serviceId,
|
|
109
136
|
command
|
|
110
137
|
) => {
|
|
138
|
+
if (command === 'startProfiling') {
|
|
139
|
+
return { success: true }
|
|
140
|
+
}
|
|
111
141
|
if (command === 'getLastProfile') {
|
|
112
142
|
getFlamegraphReqs.push({ serviceId })
|
|
113
143
|
if (getFlamegraphReqs.length === 2) {
|
|
@@ -146,6 +176,7 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
|
|
|
146
176
|
equal(service1Req.serviceId, 'service-1')
|
|
147
177
|
equal(service2Req.serviceId, 'service-2')
|
|
148
178
|
|
|
179
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
149
180
|
await app.closeUpdates()
|
|
150
181
|
})
|
|
151
182
|
|
|
@@ -178,6 +209,7 @@ test('should handle trigger-flamegraph when no runtime is available', async (t)
|
|
|
178
209
|
|
|
179
210
|
await sleep(100)
|
|
180
211
|
|
|
212
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
181
213
|
await app.closeUpdates()
|
|
182
214
|
})
|
|
183
215
|
|
|
@@ -202,6 +234,9 @@ test('should handle trigger-flamegraph when flamegraph upload fails', async (t)
|
|
|
202
234
|
command,
|
|
203
235
|
options
|
|
204
236
|
) => {
|
|
237
|
+
if (command === 'startProfiling') {
|
|
238
|
+
return { success: true }
|
|
239
|
+
}
|
|
205
240
|
if (command === 'sendFlamegraph' && options.url && options.headers) {
|
|
206
241
|
throw new Error('Flamegraph upload failed')
|
|
207
242
|
}
|
|
@@ -224,6 +259,7 @@ test('should handle trigger-flamegraph when flamegraph upload fails', async (t)
|
|
|
224
259
|
|
|
225
260
|
await sleep(100)
|
|
226
261
|
|
|
262
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
227
263
|
await app.closeUpdates()
|
|
228
264
|
})
|
|
229
265
|
|
|
@@ -250,8 +286,15 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
|
|
|
250
286
|
|
|
251
287
|
app.watt.runtime.sendCommandToApplication = async (
|
|
252
288
|
serviceId,
|
|
253
|
-
command
|
|
289
|
+
command,
|
|
290
|
+
options
|
|
254
291
|
) => {
|
|
292
|
+
if (options && options.type) {
|
|
293
|
+
equal(options.type, 'heap')
|
|
294
|
+
}
|
|
295
|
+
if (command === 'startProfiling') {
|
|
296
|
+
return { success: true }
|
|
297
|
+
}
|
|
255
298
|
if (command === 'getLastProfile') {
|
|
256
299
|
getHeapProfileReqs.push({ serviceId })
|
|
257
300
|
if (getHeapProfileReqs.length === 2) {
|
|
@@ -290,6 +333,7 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
|
|
|
290
333
|
equal(service1Req.serviceId, 'service-1')
|
|
291
334
|
equal(service2Req.serviceId, 'service-2')
|
|
292
335
|
|
|
336
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
293
337
|
await app.closeUpdates()
|
|
294
338
|
})
|
|
295
339
|
|
|
@@ -330,6 +374,9 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
330
374
|
serviceId,
|
|
331
375
|
command
|
|
332
376
|
) => {
|
|
377
|
+
if (command === 'startProfiling') {
|
|
378
|
+
return { success: true }
|
|
379
|
+
}
|
|
333
380
|
if (command === 'getLastProfile') {
|
|
334
381
|
const error = new Error('No profile available - wait for profiling to complete or trigger manual capture')
|
|
335
382
|
error.code = 'PLT_PPROF_NO_PROFILE_AVAILABLE'
|
|
@@ -359,6 +406,7 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
359
406
|
equal(infoLogs[0][0].podId, 'test-pod-123')
|
|
360
407
|
equal(infoLogs[0][1], 'No profile available for the service')
|
|
361
408
|
|
|
409
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
362
410
|
await app.closeUpdates()
|
|
363
411
|
})
|
|
364
412
|
|
|
@@ -399,6 +447,9 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
|
|
|
399
447
|
serviceId,
|
|
400
448
|
command
|
|
401
449
|
) => {
|
|
450
|
+
if (command === 'startProfiling') {
|
|
451
|
+
return { success: true }
|
|
452
|
+
}
|
|
402
453
|
if (command === 'getLastProfile') {
|
|
403
454
|
const error = new Error('No profile available - event loop utilization has been below threshold for too long')
|
|
404
455
|
error.code = 'PLT_PPROF_NOT_ENOUGH_ELU'
|
|
@@ -428,5 +479,116 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
|
|
|
428
479
|
equal(infoLogs[0][0].podId, 'test-pod-123')
|
|
429
480
|
equal(infoLogs[0][1], 'ELU low, CPU profiling not active')
|
|
430
481
|
|
|
482
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
483
|
+
await app.closeUpdates()
|
|
484
|
+
})
|
|
485
|
+
|
|
486
|
+
test('should start profiling on new workers that start after initial setup', async (t) => {
|
|
487
|
+
setUpEnvironment()
|
|
488
|
+
|
|
489
|
+
const receivedMessages = []
|
|
490
|
+
const startProfilingCalls = []
|
|
491
|
+
|
|
492
|
+
const wss = new WebSocketServer({ port: port + 6 })
|
|
493
|
+
t.after(async () => wss.close())
|
|
494
|
+
|
|
495
|
+
const { waitForClientSubscription } = setupMockIccServer(
|
|
496
|
+
wss,
|
|
497
|
+
receivedMessages,
|
|
498
|
+
false
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
const app = createMockApp(port + 6)
|
|
502
|
+
|
|
503
|
+
app.watt.runtime.sendCommandToApplication = async (
|
|
504
|
+
serviceId,
|
|
505
|
+
command,
|
|
506
|
+
options
|
|
507
|
+
) => {
|
|
508
|
+
if (command === 'startProfiling') {
|
|
509
|
+
startProfilingCalls.push({ serviceId, options })
|
|
510
|
+
}
|
|
511
|
+
return { success: true }
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
await updatePlugin(app)
|
|
515
|
+
await flamegraphsPlugin(app)
|
|
516
|
+
|
|
517
|
+
await app.connectToUpdates()
|
|
518
|
+
await app.setupFlamegraphs()
|
|
519
|
+
|
|
520
|
+
await waitForClientSubscription
|
|
521
|
+
|
|
522
|
+
equal(startProfilingCalls.length, 2)
|
|
523
|
+
equal(startProfilingCalls[0].serviceId, 'service-1:0')
|
|
524
|
+
equal(startProfilingCalls[1].serviceId, 'service-2:0')
|
|
525
|
+
|
|
526
|
+
app.watt.runtime.emit('application:worker:started', {
|
|
527
|
+
application: 'service-1',
|
|
528
|
+
worker: 1,
|
|
529
|
+
workersCount: 2
|
|
530
|
+
})
|
|
531
|
+
|
|
532
|
+
await sleep(10)
|
|
533
|
+
|
|
534
|
+
equal(startProfilingCalls.length, 3)
|
|
535
|
+
equal(startProfilingCalls[2].serviceId, 'service-1:1')
|
|
536
|
+
equal(startProfilingCalls[2].options.durationMillis, 1000)
|
|
537
|
+
equal(startProfilingCalls[2].options.eluThreshold, 0)
|
|
538
|
+
|
|
539
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
540
|
+
await app.closeUpdates()
|
|
541
|
+
})
|
|
542
|
+
|
|
543
|
+
test('should not start profiling on new workers when flamegraphs are disabled', async (t) => {
|
|
544
|
+
setUpEnvironment()
|
|
545
|
+
|
|
546
|
+
const receivedMessages = []
|
|
547
|
+
const startProfilingCalls = []
|
|
548
|
+
|
|
549
|
+
const wss = new WebSocketServer({ port: port + 7 })
|
|
550
|
+
t.after(async () => wss.close())
|
|
551
|
+
|
|
552
|
+
const { waitForClientSubscription } = setupMockIccServer(
|
|
553
|
+
wss,
|
|
554
|
+
receivedMessages,
|
|
555
|
+
false
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
const app = createMockApp(port + 7)
|
|
559
|
+
app.env.PLT_DISABLE_FLAMEGRAPHS = true
|
|
560
|
+
|
|
561
|
+
app.watt.runtime.sendCommandToApplication = async (
|
|
562
|
+
serviceId,
|
|
563
|
+
command,
|
|
564
|
+
options
|
|
565
|
+
) => {
|
|
566
|
+
if (command === 'startProfiling') {
|
|
567
|
+
startProfilingCalls.push({ serviceId, options })
|
|
568
|
+
}
|
|
569
|
+
return { success: true }
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
await updatePlugin(app)
|
|
573
|
+
await flamegraphsPlugin(app)
|
|
574
|
+
|
|
575
|
+
await app.connectToUpdates()
|
|
576
|
+
await app.setupFlamegraphs()
|
|
577
|
+
|
|
578
|
+
await waitForClientSubscription
|
|
579
|
+
|
|
580
|
+
equal(startProfilingCalls.length, 0)
|
|
581
|
+
|
|
582
|
+
app.watt.runtime.emit('application:worker:started', {
|
|
583
|
+
application: 'service-1',
|
|
584
|
+
worker: 1,
|
|
585
|
+
workersCount: 2
|
|
586
|
+
})
|
|
587
|
+
|
|
588
|
+
await sleep(10)
|
|
589
|
+
|
|
590
|
+
equal(startProfilingCalls.length, 0)
|
|
591
|
+
|
|
592
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
431
593
|
await app.closeUpdates()
|
|
432
594
|
})
|