@platformatic/watt-extra 1.3.2 → 1.4.0-alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@platformatic/watt-extra",
3
- "version": "1.3.2",
3
+ "version": "1.4.0-alpha.1",
4
4
  "description": "The Platformatic runtime manager",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -13,6 +13,23 @@ async function flamegraphs (app, _opts) {
13
13
  const eluThreshold = parseFloat(flamegraphsELUThreshold)
14
14
  const gracePeriod = parseInt(flamegraphsGracePeriod)
15
15
 
16
+ let workerStartedListener = null
17
+
18
+ const startProfilingOnWorker = async (runtime, workerFullId, logContext = {}) => {
19
+ await sleep(gracePeriod)
20
+
21
+ try {
22
+ await runtime.sendCommandToApplication(
23
+ workerFullId,
24
+ 'startProfiling',
25
+ { durationMillis, eluThreshold }
26
+ )
27
+ } catch (err) {
28
+ app.log.error({ err, ...logContext }, 'Failed to start profiling')
29
+ throw err
30
+ }
31
+ }
32
+
16
33
  app.setupFlamegraphs = async () => {
17
34
  if (isFlamegraphsDisabled) {
18
35
  app.log.info('PLT_DISABLE_FLAMEGRAPHS is set, skipping profiling')
@@ -21,19 +38,15 @@ async function flamegraphs (app, _opts) {
21
38
 
22
39
  app.log.info('Start profiling services')
23
40
 
24
- await sleep(gracePeriod)
25
-
26
41
  const runtime = app.watt.runtime
27
- const { applications } = await runtime.getApplications()
42
+ const workers = await runtime.getWorkers()
28
43
 
29
44
  const promises = []
30
- for (const application of applications) {
31
- const promise = runtime.sendCommandToApplication(
32
- application.id,
33
- 'startProfiling',
34
- { durationMillis, eluThreshold }
35
- )
36
- promises.push(promise)
45
+ for (const [workerFullId, workerInfo] of Object.entries(workers)) {
46
+ if (workerInfo.status === 'started') {
47
+ const promise = startProfilingOnWorker(runtime, workerFullId, { workerFullId })
48
+ promises.push(promise)
49
+ }
37
50
  }
38
51
 
39
52
  const results = await Promise.allSettled(promises)
@@ -42,6 +55,28 @@ async function flamegraphs (app, _opts) {
42
55
  app.log.error({ result }, 'Failed to start profiling')
43
56
  }
44
57
  }
58
+
59
+ // Listen for new workers starting and start profiling on them
60
+ workerStartedListener = ({ application, worker }) => {
61
+ if (isFlamegraphsDisabled) {
62
+ return
63
+ }
64
+
65
+ const workerFullId = [application, worker].join(':')
66
+ app.log.info({ application, worker }, 'Starting profiling on new worker')
67
+
68
+ startProfilingOnWorker(runtime, workerFullId, { application, worker }).catch(() => {
69
+ // Error already logged in startProfilingOnWorker
70
+ })
71
+ }
72
+ runtime.on('application:worker:started', workerStartedListener)
73
+ }
74
+
75
+ app.cleanupFlamegraphs = () => {
76
+ if (workerStartedListener && app.watt?.runtime) {
77
+ app.watt.runtime.removeListener('application:worker:started', workerStartedListener)
78
+ workerStartedListener = null
79
+ }
45
80
  }
46
81
 
47
82
  app.sendFlamegraphs = async (options = {}) => {
@@ -70,7 +105,7 @@ async function flamegraphs (app, _opts) {
70
105
 
71
106
  const uploadPromises = serviceIds.map(async (serviceId) => {
72
107
  try {
73
- const profile = await runtime.sendCommandToApplication(serviceId, 'getLastProfile')
108
+ const profile = await runtime.sendCommandToApplication(serviceId, 'getLastProfile', { type: profileType })
74
109
  if (!profile || !(profile instanceof Uint8Array)) {
75
110
  app.log.error({ serviceId }, 'Failed to get profile from service')
76
111
  return
@@ -36,11 +36,38 @@ function setupMockIccServer (wss, receivedMessages, validateAuth = false) {
36
36
  }
37
37
 
38
38
  function createMockApp (port, includeScalerUrl = true) {
39
+ const eventListeners = new Map()
40
+
39
41
  const mockWatt = {
40
42
  runtime: {
41
- getApplications: () => ({
43
+ getWorkers: async () => ({
44
+ 'service-1:0': { application: 'service-1', worker: 0, status: 'started' },
45
+ 'service-2:0': { application: 'service-2', worker: 0, status: 'started' }
46
+ }),
47
+ getApplications: async () => ({
42
48
  applications: [{ id: 'service-1' }, { id: 'service-2' }]
43
- })
49
+ }),
50
+ on: (event, listener) => {
51
+ if (!eventListeners.has(event)) {
52
+ eventListeners.set(event, [])
53
+ }
54
+ eventListeners.get(event).push(listener)
55
+ },
56
+ removeListener: (event, listener) => {
57
+ const listeners = eventListeners.get(event)
58
+ if (listeners) {
59
+ const index = listeners.indexOf(listener)
60
+ if (index !== -1) {
61
+ listeners.splice(index, 1)
62
+ }
63
+ }
64
+ },
65
+ emit: (event, ...args) => {
66
+ const listeners = eventListeners.get(event) || []
67
+ for (const listener of listeners) {
68
+ listener(...args)
69
+ }
70
+ }
44
71
  }
45
72
  }
46
73
 
@@ -108,6 +135,9 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
108
135
  serviceId,
109
136
  command
110
137
  ) => {
138
+ if (command === 'startProfiling') {
139
+ return { success: true }
140
+ }
111
141
  if (command === 'getLastProfile') {
112
142
  getFlamegraphReqs.push({ serviceId })
113
143
  if (getFlamegraphReqs.length === 2) {
@@ -146,6 +176,7 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
146
176
  equal(service1Req.serviceId, 'service-1')
147
177
  equal(service2Req.serviceId, 'service-2')
148
178
 
179
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
149
180
  await app.closeUpdates()
150
181
  })
151
182
 
@@ -178,6 +209,7 @@ test('should handle trigger-flamegraph when no runtime is available', async (t)
178
209
 
179
210
  await sleep(100)
180
211
 
212
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
181
213
  await app.closeUpdates()
182
214
  })
183
215
 
@@ -202,6 +234,9 @@ test('should handle trigger-flamegraph when flamegraph upload fails', async (t)
202
234
  command,
203
235
  options
204
236
  ) => {
237
+ if (command === 'startProfiling') {
238
+ return { success: true }
239
+ }
205
240
  if (command === 'sendFlamegraph' && options.url && options.headers) {
206
241
  throw new Error('Flamegraph upload failed')
207
242
  }
@@ -224,6 +259,7 @@ test('should handle trigger-flamegraph when flamegraph upload fails', async (t)
224
259
 
225
260
  await sleep(100)
226
261
 
262
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
227
263
  await app.closeUpdates()
228
264
  })
229
265
 
@@ -250,8 +286,15 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
250
286
 
251
287
  app.watt.runtime.sendCommandToApplication = async (
252
288
  serviceId,
253
- command
289
+ command,
290
+ options
254
291
  ) => {
292
+ if (options && options.type) {
293
+ equal(options.type, 'heap')
294
+ }
295
+ if (command === 'startProfiling') {
296
+ return { success: true }
297
+ }
255
298
  if (command === 'getLastProfile') {
256
299
  getHeapProfileReqs.push({ serviceId })
257
300
  if (getHeapProfileReqs.length === 2) {
@@ -290,6 +333,7 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
290
333
  equal(service1Req.serviceId, 'service-1')
291
334
  equal(service2Req.serviceId, 'service-2')
292
335
 
336
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
293
337
  await app.closeUpdates()
294
338
  })
295
339
 
@@ -330,6 +374,9 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
330
374
  serviceId,
331
375
  command
332
376
  ) => {
377
+ if (command === 'startProfiling') {
378
+ return { success: true }
379
+ }
333
380
  if (command === 'getLastProfile') {
334
381
  const error = new Error('No profile available - wait for profiling to complete or trigger manual capture')
335
382
  error.code = 'PLT_PPROF_NO_PROFILE_AVAILABLE'
@@ -359,6 +406,7 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
359
406
  equal(infoLogs[0][0].podId, 'test-pod-123')
360
407
  equal(infoLogs[0][1], 'No profile available for the service')
361
408
 
409
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
362
410
  await app.closeUpdates()
363
411
  })
364
412
 
@@ -399,6 +447,9 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
399
447
  serviceId,
400
448
  command
401
449
  ) => {
450
+ if (command === 'startProfiling') {
451
+ return { success: true }
452
+ }
402
453
  if (command === 'getLastProfile') {
403
454
  const error = new Error('No profile available - event loop utilization has been below threshold for too long')
404
455
  error.code = 'PLT_PPROF_NOT_ENOUGH_ELU'
@@ -428,5 +479,116 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
428
479
  equal(infoLogs[0][0].podId, 'test-pod-123')
429
480
  equal(infoLogs[0][1], 'ELU low, CPU profiling not active')
430
481
 
482
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
483
+ await app.closeUpdates()
484
+ })
485
+
486
+ test('should start profiling on new workers that start after initial setup', async (t) => {
487
+ setUpEnvironment()
488
+
489
+ const receivedMessages = []
490
+ const startProfilingCalls = []
491
+
492
+ const wss = new WebSocketServer({ port: port + 6 })
493
+ t.after(async () => wss.close())
494
+
495
+ const { waitForClientSubscription } = setupMockIccServer(
496
+ wss,
497
+ receivedMessages,
498
+ false
499
+ )
500
+
501
+ const app = createMockApp(port + 6)
502
+
503
+ app.watt.runtime.sendCommandToApplication = async (
504
+ serviceId,
505
+ command,
506
+ options
507
+ ) => {
508
+ if (command === 'startProfiling') {
509
+ startProfilingCalls.push({ serviceId, options })
510
+ }
511
+ return { success: true }
512
+ }
513
+
514
+ await updatePlugin(app)
515
+ await flamegraphsPlugin(app)
516
+
517
+ await app.connectToUpdates()
518
+ await app.setupFlamegraphs()
519
+
520
+ await waitForClientSubscription
521
+
522
+ equal(startProfilingCalls.length, 2)
523
+ equal(startProfilingCalls[0].serviceId, 'service-1:0')
524
+ equal(startProfilingCalls[1].serviceId, 'service-2:0')
525
+
526
+ app.watt.runtime.emit('application:worker:started', {
527
+ application: 'service-1',
528
+ worker: 1,
529
+ workersCount: 2
530
+ })
531
+
532
+ await sleep(10)
533
+
534
+ equal(startProfilingCalls.length, 3)
535
+ equal(startProfilingCalls[2].serviceId, 'service-1:1')
536
+ equal(startProfilingCalls[2].options.durationMillis, 1000)
537
+ equal(startProfilingCalls[2].options.eluThreshold, 0)
538
+
539
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
540
+ await app.closeUpdates()
541
+ })
542
+
543
+ test('should not start profiling on new workers when flamegraphs are disabled', async (t) => {
544
+ setUpEnvironment()
545
+
546
+ const receivedMessages = []
547
+ const startProfilingCalls = []
548
+
549
+ const wss = new WebSocketServer({ port: port + 7 })
550
+ t.after(async () => wss.close())
551
+
552
+ const { waitForClientSubscription } = setupMockIccServer(
553
+ wss,
554
+ receivedMessages,
555
+ false
556
+ )
557
+
558
+ const app = createMockApp(port + 7)
559
+ app.env.PLT_DISABLE_FLAMEGRAPHS = true
560
+
561
+ app.watt.runtime.sendCommandToApplication = async (
562
+ serviceId,
563
+ command,
564
+ options
565
+ ) => {
566
+ if (command === 'startProfiling') {
567
+ startProfilingCalls.push({ serviceId, options })
568
+ }
569
+ return { success: true }
570
+ }
571
+
572
+ await updatePlugin(app)
573
+ await flamegraphsPlugin(app)
574
+
575
+ await app.connectToUpdates()
576
+ await app.setupFlamegraphs()
577
+
578
+ await waitForClientSubscription
579
+
580
+ equal(startProfilingCalls.length, 0)
581
+
582
+ app.watt.runtime.emit('application:worker:started', {
583
+ application: 'service-1',
584
+ worker: 1,
585
+ workersCount: 2
586
+ })
587
+
588
+ await sleep(10)
589
+
590
+ equal(startProfilingCalls.length, 0)
591
+
592
+ if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
431
593
  await app.closeUpdates()
432
594
  })