@platformatic/watt-extra 1.3.2 → 1.4.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +11 -0
- package/package.json +1 -1
- package/plugins/flamegraphs.js +45 -10
- package/test/trigger-flamegraphs.test.js +160 -2
package/package.json
CHANGED
package/plugins/flamegraphs.js
CHANGED
|
@@ -13,6 +13,23 @@ async function flamegraphs (app, _opts) {
|
|
|
13
13
|
const eluThreshold = parseFloat(flamegraphsELUThreshold)
|
|
14
14
|
const gracePeriod = parseInt(flamegraphsGracePeriod)
|
|
15
15
|
|
|
16
|
+
let workerStartedListener = null
|
|
17
|
+
|
|
18
|
+
const startProfilingOnWorker = async (runtime, workerFullId, logContext = {}) => {
|
|
19
|
+
await sleep(gracePeriod)
|
|
20
|
+
|
|
21
|
+
try {
|
|
22
|
+
await runtime.sendCommandToApplication(
|
|
23
|
+
workerFullId,
|
|
24
|
+
'startProfiling',
|
|
25
|
+
{ durationMillis, eluThreshold }
|
|
26
|
+
)
|
|
27
|
+
} catch (err) {
|
|
28
|
+
app.log.error({ err, ...logContext }, 'Failed to start profiling')
|
|
29
|
+
throw err
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
16
33
|
app.setupFlamegraphs = async () => {
|
|
17
34
|
if (isFlamegraphsDisabled) {
|
|
18
35
|
app.log.info('PLT_DISABLE_FLAMEGRAPHS is set, skipping profiling')
|
|
@@ -21,19 +38,15 @@ async function flamegraphs (app, _opts) {
|
|
|
21
38
|
|
|
22
39
|
app.log.info('Start profiling services')
|
|
23
40
|
|
|
24
|
-
await sleep(gracePeriod)
|
|
25
|
-
|
|
26
41
|
const runtime = app.watt.runtime
|
|
27
|
-
const
|
|
42
|
+
const workers = await runtime.getWorkers()
|
|
28
43
|
|
|
29
44
|
const promises = []
|
|
30
|
-
for (const
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
)
|
|
36
|
-
promises.push(promise)
|
|
45
|
+
for (const [workerFullId, workerInfo] of Object.entries(workers)) {
|
|
46
|
+
if (workerInfo.status === 'started') {
|
|
47
|
+
const promise = startProfilingOnWorker(runtime, workerFullId, { workerFullId })
|
|
48
|
+
promises.push(promise)
|
|
49
|
+
}
|
|
37
50
|
}
|
|
38
51
|
|
|
39
52
|
const results = await Promise.allSettled(promises)
|
|
@@ -42,6 +55,28 @@ async function flamegraphs (app, _opts) {
|
|
|
42
55
|
app.log.error({ result }, 'Failed to start profiling')
|
|
43
56
|
}
|
|
44
57
|
}
|
|
58
|
+
|
|
59
|
+
// Listen for new workers starting and start profiling on them
|
|
60
|
+
workerStartedListener = ({ application, worker }) => {
|
|
61
|
+
if (isFlamegraphsDisabled) {
|
|
62
|
+
return
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const workerFullId = [application, worker].join(':')
|
|
66
|
+
app.log.info({ application, worker }, 'Starting profiling on new worker')
|
|
67
|
+
|
|
68
|
+
startProfilingOnWorker(runtime, workerFullId, { application, worker }).catch(() => {
|
|
69
|
+
// Error already logged in startProfilingOnWorker
|
|
70
|
+
})
|
|
71
|
+
}
|
|
72
|
+
runtime.on('application:worker:started', workerStartedListener)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
app.cleanupFlamegraphs = () => {
|
|
76
|
+
if (workerStartedListener && app.watt?.runtime) {
|
|
77
|
+
app.watt.runtime.removeListener('application:worker:started', workerStartedListener)
|
|
78
|
+
workerStartedListener = null
|
|
79
|
+
}
|
|
45
80
|
}
|
|
46
81
|
|
|
47
82
|
app.sendFlamegraphs = async (options = {}) => {
|
|
@@ -36,11 +36,38 @@ function setupMockIccServer (wss, receivedMessages, validateAuth = false) {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
function createMockApp (port, includeScalerUrl = true) {
|
|
39
|
+
const eventListeners = new Map()
|
|
40
|
+
|
|
39
41
|
const mockWatt = {
|
|
40
42
|
runtime: {
|
|
41
|
-
|
|
43
|
+
getWorkers: async () => ({
|
|
44
|
+
'service-1:0': { application: 'service-1', worker: 0, status: 'started' },
|
|
45
|
+
'service-2:0': { application: 'service-2', worker: 0, status: 'started' }
|
|
46
|
+
}),
|
|
47
|
+
getApplications: async () => ({
|
|
42
48
|
applications: [{ id: 'service-1' }, { id: 'service-2' }]
|
|
43
|
-
})
|
|
49
|
+
}),
|
|
50
|
+
on: (event, listener) => {
|
|
51
|
+
if (!eventListeners.has(event)) {
|
|
52
|
+
eventListeners.set(event, [])
|
|
53
|
+
}
|
|
54
|
+
eventListeners.get(event).push(listener)
|
|
55
|
+
},
|
|
56
|
+
removeListener: (event, listener) => {
|
|
57
|
+
const listeners = eventListeners.get(event)
|
|
58
|
+
if (listeners) {
|
|
59
|
+
const index = listeners.indexOf(listener)
|
|
60
|
+
if (index !== -1) {
|
|
61
|
+
listeners.splice(index, 1)
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
emit: (event, ...args) => {
|
|
66
|
+
const listeners = eventListeners.get(event) || []
|
|
67
|
+
for (const listener of listeners) {
|
|
68
|
+
listener(...args)
|
|
69
|
+
}
|
|
70
|
+
}
|
|
44
71
|
}
|
|
45
72
|
}
|
|
46
73
|
|
|
@@ -108,6 +135,9 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
|
|
|
108
135
|
serviceId,
|
|
109
136
|
command
|
|
110
137
|
) => {
|
|
138
|
+
if (command === 'startProfiling') {
|
|
139
|
+
return { success: true }
|
|
140
|
+
}
|
|
111
141
|
if (command === 'getLastProfile') {
|
|
112
142
|
getFlamegraphReqs.push({ serviceId })
|
|
113
143
|
if (getFlamegraphReqs.length === 2) {
|
|
@@ -146,6 +176,7 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
|
|
|
146
176
|
equal(service1Req.serviceId, 'service-1')
|
|
147
177
|
equal(service2Req.serviceId, 'service-2')
|
|
148
178
|
|
|
179
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
149
180
|
await app.closeUpdates()
|
|
150
181
|
})
|
|
151
182
|
|
|
@@ -178,6 +209,7 @@ test('should handle trigger-flamegraph when no runtime is available', async (t)
|
|
|
178
209
|
|
|
179
210
|
await sleep(100)
|
|
180
211
|
|
|
212
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
181
213
|
await app.closeUpdates()
|
|
182
214
|
})
|
|
183
215
|
|
|
@@ -202,6 +234,9 @@ test('should handle trigger-flamegraph when flamegraph upload fails', async (t)
|
|
|
202
234
|
command,
|
|
203
235
|
options
|
|
204
236
|
) => {
|
|
237
|
+
if (command === 'startProfiling') {
|
|
238
|
+
return { success: true }
|
|
239
|
+
}
|
|
205
240
|
if (command === 'sendFlamegraph' && options.url && options.headers) {
|
|
206
241
|
throw new Error('Flamegraph upload failed')
|
|
207
242
|
}
|
|
@@ -224,6 +259,7 @@ test('should handle trigger-flamegraph when flamegraph upload fails', async (t)
|
|
|
224
259
|
|
|
225
260
|
await sleep(100)
|
|
226
261
|
|
|
262
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
227
263
|
await app.closeUpdates()
|
|
228
264
|
})
|
|
229
265
|
|
|
@@ -252,6 +288,9 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
|
|
|
252
288
|
serviceId,
|
|
253
289
|
command
|
|
254
290
|
) => {
|
|
291
|
+
if (command === 'startProfiling') {
|
|
292
|
+
return { success: true }
|
|
293
|
+
}
|
|
255
294
|
if (command === 'getLastProfile') {
|
|
256
295
|
getHeapProfileReqs.push({ serviceId })
|
|
257
296
|
if (getHeapProfileReqs.length === 2) {
|
|
@@ -290,6 +329,7 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
|
|
|
290
329
|
equal(service1Req.serviceId, 'service-1')
|
|
291
330
|
equal(service2Req.serviceId, 'service-2')
|
|
292
331
|
|
|
332
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
293
333
|
await app.closeUpdates()
|
|
294
334
|
})
|
|
295
335
|
|
|
@@ -330,6 +370,9 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
330
370
|
serviceId,
|
|
331
371
|
command
|
|
332
372
|
) => {
|
|
373
|
+
if (command === 'startProfiling') {
|
|
374
|
+
return { success: true }
|
|
375
|
+
}
|
|
333
376
|
if (command === 'getLastProfile') {
|
|
334
377
|
const error = new Error('No profile available - wait for profiling to complete or trigger manual capture')
|
|
335
378
|
error.code = 'PLT_PPROF_NO_PROFILE_AVAILABLE'
|
|
@@ -359,6 +402,7 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
359
402
|
equal(infoLogs[0][0].podId, 'test-pod-123')
|
|
360
403
|
equal(infoLogs[0][1], 'No profile available for the service')
|
|
361
404
|
|
|
405
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
362
406
|
await app.closeUpdates()
|
|
363
407
|
})
|
|
364
408
|
|
|
@@ -399,6 +443,9 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
|
|
|
399
443
|
serviceId,
|
|
400
444
|
command
|
|
401
445
|
) => {
|
|
446
|
+
if (command === 'startProfiling') {
|
|
447
|
+
return { success: true }
|
|
448
|
+
}
|
|
402
449
|
if (command === 'getLastProfile') {
|
|
403
450
|
const error = new Error('No profile available - event loop utilization has been below threshold for too long')
|
|
404
451
|
error.code = 'PLT_PPROF_NOT_ENOUGH_ELU'
|
|
@@ -428,5 +475,116 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
|
|
|
428
475
|
equal(infoLogs[0][0].podId, 'test-pod-123')
|
|
429
476
|
equal(infoLogs[0][1], 'ELU low, CPU profiling not active')
|
|
430
477
|
|
|
478
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
479
|
+
await app.closeUpdates()
|
|
480
|
+
})
|
|
481
|
+
|
|
482
|
+
test('should start profiling on new workers that start after initial setup', async (t) => {
|
|
483
|
+
setUpEnvironment()
|
|
484
|
+
|
|
485
|
+
const receivedMessages = []
|
|
486
|
+
const startProfilingCalls = []
|
|
487
|
+
|
|
488
|
+
const wss = new WebSocketServer({ port: port + 6 })
|
|
489
|
+
t.after(async () => wss.close())
|
|
490
|
+
|
|
491
|
+
const { waitForClientSubscription } = setupMockIccServer(
|
|
492
|
+
wss,
|
|
493
|
+
receivedMessages,
|
|
494
|
+
false
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
const app = createMockApp(port + 6)
|
|
498
|
+
|
|
499
|
+
app.watt.runtime.sendCommandToApplication = async (
|
|
500
|
+
serviceId,
|
|
501
|
+
command,
|
|
502
|
+
options
|
|
503
|
+
) => {
|
|
504
|
+
if (command === 'startProfiling') {
|
|
505
|
+
startProfilingCalls.push({ serviceId, options })
|
|
506
|
+
}
|
|
507
|
+
return { success: true }
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
await updatePlugin(app)
|
|
511
|
+
await flamegraphsPlugin(app)
|
|
512
|
+
|
|
513
|
+
await app.connectToUpdates()
|
|
514
|
+
await app.setupFlamegraphs()
|
|
515
|
+
|
|
516
|
+
await waitForClientSubscription
|
|
517
|
+
|
|
518
|
+
equal(startProfilingCalls.length, 2)
|
|
519
|
+
equal(startProfilingCalls[0].serviceId, 'service-1:0')
|
|
520
|
+
equal(startProfilingCalls[1].serviceId, 'service-2:0')
|
|
521
|
+
|
|
522
|
+
app.watt.runtime.emit('application:worker:started', {
|
|
523
|
+
application: 'service-1',
|
|
524
|
+
worker: 1,
|
|
525
|
+
workersCount: 2
|
|
526
|
+
})
|
|
527
|
+
|
|
528
|
+
await sleep(10)
|
|
529
|
+
|
|
530
|
+
equal(startProfilingCalls.length, 3)
|
|
531
|
+
equal(startProfilingCalls[2].serviceId, 'service-1:1')
|
|
532
|
+
equal(startProfilingCalls[2].options.durationMillis, 1000)
|
|
533
|
+
equal(startProfilingCalls[2].options.eluThreshold, 0)
|
|
534
|
+
|
|
535
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
536
|
+
await app.closeUpdates()
|
|
537
|
+
})
|
|
538
|
+
|
|
539
|
+
test('should not start profiling on new workers when flamegraphs are disabled', async (t) => {
|
|
540
|
+
setUpEnvironment()
|
|
541
|
+
|
|
542
|
+
const receivedMessages = []
|
|
543
|
+
const startProfilingCalls = []
|
|
544
|
+
|
|
545
|
+
const wss = new WebSocketServer({ port: port + 7 })
|
|
546
|
+
t.after(async () => wss.close())
|
|
547
|
+
|
|
548
|
+
const { waitForClientSubscription } = setupMockIccServer(
|
|
549
|
+
wss,
|
|
550
|
+
receivedMessages,
|
|
551
|
+
false
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
const app = createMockApp(port + 7)
|
|
555
|
+
app.env.PLT_DISABLE_FLAMEGRAPHS = true
|
|
556
|
+
|
|
557
|
+
app.watt.runtime.sendCommandToApplication = async (
|
|
558
|
+
serviceId,
|
|
559
|
+
command,
|
|
560
|
+
options
|
|
561
|
+
) => {
|
|
562
|
+
if (command === 'startProfiling') {
|
|
563
|
+
startProfilingCalls.push({ serviceId, options })
|
|
564
|
+
}
|
|
565
|
+
return { success: true }
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
await updatePlugin(app)
|
|
569
|
+
await flamegraphsPlugin(app)
|
|
570
|
+
|
|
571
|
+
await app.connectToUpdates()
|
|
572
|
+
await app.setupFlamegraphs()
|
|
573
|
+
|
|
574
|
+
await waitForClientSubscription
|
|
575
|
+
|
|
576
|
+
equal(startProfilingCalls.length, 0)
|
|
577
|
+
|
|
578
|
+
app.watt.runtime.emit('application:worker:started', {
|
|
579
|
+
application: 'service-1',
|
|
580
|
+
worker: 1,
|
|
581
|
+
workersCount: 2
|
|
582
|
+
})
|
|
583
|
+
|
|
584
|
+
await sleep(10)
|
|
585
|
+
|
|
586
|
+
equal(startProfilingCalls.length, 0)
|
|
587
|
+
|
|
588
|
+
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
431
589
|
await app.closeUpdates()
|
|
432
590
|
})
|