@platformatic/watt-extra 1.6.2 → 1.6.3-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +9 -9
- package/plugins/alerts.js +6 -7
- package/plugins/env.js +1 -0
- package/plugins/flamegraphs.js +76 -46
- package/plugins/health-signals.js +11 -8
- package/plugins/update.js +2 -2
- package/test/trigger-flamegraphs.test.js +143 -51
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platformatic/watt-extra",
|
|
3
|
-
"version": "1.6.
|
|
3
|
+
"version": "1.6.3-alpha.0",
|
|
4
4
|
"description": "The Platformatic runtime manager",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"scripts": {
|
|
@@ -19,10 +19,10 @@
|
|
|
19
19
|
},
|
|
20
20
|
"devDependencies": {
|
|
21
21
|
"@fastify/websocket": "^11.1.0",
|
|
22
|
-
"@platformatic/composer": "^3.
|
|
23
|
-
"@platformatic/next": "^3.
|
|
24
|
-
"@platformatic/node": "^3.
|
|
25
|
-
"@platformatic/service": "^3.
|
|
22
|
+
"@platformatic/composer": "^3.23.0",
|
|
23
|
+
"@platformatic/next": "^3.23.0",
|
|
24
|
+
"@platformatic/node": "^3.23.0",
|
|
25
|
+
"@platformatic/service": "^3.23.0",
|
|
26
26
|
"atomic-sleep": "^1.0.0",
|
|
27
27
|
"borp": "^0.21.0",
|
|
28
28
|
"eslint": "9",
|
|
@@ -30,16 +30,16 @@
|
|
|
30
30
|
"fastify-plugin": "^5.0.1",
|
|
31
31
|
"neostandard": "^0.12.0",
|
|
32
32
|
"next": "^16.0.0",
|
|
33
|
-
"platformatic": "^3.
|
|
33
|
+
"platformatic": "^3.23.0",
|
|
34
34
|
"pprof-format": "^2.1.0",
|
|
35
35
|
"why-is-node-running": "^2.3.0"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@datadog/pprof": "^5.9.0",
|
|
39
39
|
"@fastify/error": "^4.2.0",
|
|
40
|
-
"@platformatic/foundation": "^3.
|
|
41
|
-
"@platformatic/runtime": "^3.
|
|
42
|
-
"@platformatic/wattpm-pprof-capture": "^3.
|
|
40
|
+
"@platformatic/foundation": "^3.23.0",
|
|
41
|
+
"@platformatic/runtime": "^3.23.0",
|
|
42
|
+
"@platformatic/wattpm-pprof-capture": "^3.23.0",
|
|
43
43
|
"avvio": "^9.1.0",
|
|
44
44
|
"chalk": "^4.1.2",
|
|
45
45
|
"commist": "^3.2.0",
|
package/plugins/alerts.js
CHANGED
|
@@ -40,6 +40,7 @@ async function alerts (app, _opts) {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
const timestamp = Date.now()
|
|
43
|
+
const workerId = healthInfo.id
|
|
43
44
|
const serviceId = healthInfo.application
|
|
44
45
|
const healthWithTimestamp = { ...healthInfo, timestamp, service: serviceId }
|
|
45
46
|
delete healthWithTimestamp.healthConfig // we don't need to store this
|
|
@@ -111,14 +112,12 @@ async function alerts (app, _opts) {
|
|
|
111
112
|
|
|
112
113
|
const alert = await body.json()
|
|
113
114
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
})
|
|
119
|
-
} catch (err) {
|
|
115
|
+
app.sendFlamegraphs({
|
|
116
|
+
workerIds: [workerId],
|
|
117
|
+
alertId: alert.id
|
|
118
|
+
}).catch(err => {
|
|
120
119
|
app.log.error({ err }, 'Failed to send a flamegraph')
|
|
121
|
-
}
|
|
120
|
+
})
|
|
122
121
|
}
|
|
123
122
|
}
|
|
124
123
|
|
package/plugins/env.js
CHANGED
|
@@ -21,6 +21,7 @@ const schema = {
|
|
|
21
21
|
PLT_FLAMEGRAPHS_INTERVAL_SEC: { type: 'number', default: 60 },
|
|
22
22
|
PLT_FLAMEGRAPHS_ELU_THRESHOLD: { type: 'number', default: 0.4 },
|
|
23
23
|
PLT_FLAMEGRAPHS_GRACE_PERIOD: { type: 'number', default: 3000 },
|
|
24
|
+
PLT_FLAMEGRAPHS_ATTEMPT_TIMEOUT: { type: 'number', default: 10000 },
|
|
24
25
|
PLT_JWT_EXPIRATION_OFFSET_SEC: { type: 'number', default: 60 },
|
|
25
26
|
PLT_UPDATES_RECONNECT_INTERVAL_SEC: { type: 'number', default: 1 },
|
|
26
27
|
PLT_ELU_HEALTH_SIGNAL_THRESHOLD: { type: 'number', default: 0.8 },
|
package/plugins/flamegraphs.js
CHANGED
|
@@ -8,10 +8,13 @@ async function flamegraphs (app, _opts) {
|
|
|
8
8
|
const flamegraphsIntervalSec = app.env.PLT_FLAMEGRAPHS_INTERVAL_SEC
|
|
9
9
|
const flamegraphsELUThreshold = app.env.PLT_FLAMEGRAPHS_ELU_THRESHOLD
|
|
10
10
|
const flamegraphsGracePeriod = app.env.PLT_FLAMEGRAPHS_GRACE_PERIOD
|
|
11
|
+
const flamegraphsAttemptTimeout = app.env.PLT_FLAMEGRAPHS_ATTEMPT_TIMEOUT
|
|
11
12
|
|
|
12
13
|
const durationMillis = parseInt(flamegraphsIntervalSec) * 1000
|
|
13
14
|
const eluThreshold = parseFloat(flamegraphsELUThreshold)
|
|
14
15
|
const gracePeriod = parseInt(flamegraphsGracePeriod)
|
|
16
|
+
const attemptTimeout = Math.min(parseInt(flamegraphsAttemptTimeout), durationMillis)
|
|
17
|
+
const maxAttempts = Math.ceil(durationMillis / attemptTimeout) + 1
|
|
15
18
|
|
|
16
19
|
let workerStartedListener = null
|
|
17
20
|
|
|
@@ -125,13 +128,75 @@ async function flamegraphs (app, _opts) {
|
|
|
125
128
|
}
|
|
126
129
|
}
|
|
127
130
|
|
|
131
|
+
async function getServiceFlamegraph (workerId, profileType, attempt = 1) {
|
|
132
|
+
const runtime = app.watt.runtime
|
|
133
|
+
|
|
134
|
+
try {
|
|
135
|
+
const profile = await runtime.sendCommandToApplication(workerId, 'getLastProfile', { type: profileType })
|
|
136
|
+
return profile
|
|
137
|
+
} catch (err) {
|
|
138
|
+
if (err.code === 'PLT_PPROF_NO_PROFILE_AVAILABLE') {
|
|
139
|
+
app.log.info(
|
|
140
|
+
{ workerId, attempt, maxAttempts, attemptTimeout },
|
|
141
|
+
'No profile available for the service. Waiting for profiling to complete.'
|
|
142
|
+
)
|
|
143
|
+
if (attempt <= maxAttempts) {
|
|
144
|
+
await sleep(attemptTimeout)
|
|
145
|
+
return getServiceFlamegraph(workerId, profileType, attempt + 1)
|
|
146
|
+
}
|
|
147
|
+
} else if (err.code === 'PLT_PPROF_NOT_ENOUGH_ELU') {
|
|
148
|
+
app.log.info({ workerId }, 'ELU low, CPU profiling not active')
|
|
149
|
+
} else {
|
|
150
|
+
app.log.warn({ err, workerId }, 'Failed to get profile from a worker')
|
|
151
|
+
|
|
152
|
+
const [serviceId, workerIndex] = workerId.split(':')
|
|
153
|
+
if (workerIndex) {
|
|
154
|
+
app.log.warn('Worker not available, trying to get profile from another worker')
|
|
155
|
+
return getServiceFlamegraph(serviceId, profileType)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async function sendServiceFlamegraph (scalerUrl, serviceId, profile, profileType, alertId) {
|
|
162
|
+
const podId = app.instanceId
|
|
163
|
+
const url = `${scalerUrl}/pods/${podId}/services/${serviceId}/flamegraph`
|
|
164
|
+
app.log.info({ serviceId, podId, profileType }, 'Sending flamegraph')
|
|
165
|
+
|
|
166
|
+
const query = { profileType }
|
|
167
|
+
if (alertId) {
|
|
168
|
+
query.alertId = alertId
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
const authHeaders = await app.getAuthorizationHeader()
|
|
173
|
+
const { statusCode, body } = await request(url, {
|
|
174
|
+
method: 'POST',
|
|
175
|
+
headers: {
|
|
176
|
+
'Content-Type': 'application/octet-stream',
|
|
177
|
+
...authHeaders
|
|
178
|
+
},
|
|
179
|
+
query,
|
|
180
|
+
body: profile
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
if (statusCode !== 200) {
|
|
184
|
+
const error = await body.text()
|
|
185
|
+
app.log.error({ error }, 'Failed to send flamegraph')
|
|
186
|
+
throw new Error(`Failed to send flamegraph: ${error}`)
|
|
187
|
+
}
|
|
188
|
+
} catch (err) {
|
|
189
|
+
app.log.warn({ err, serviceId, podId }, 'Failed to send flamegraph from service')
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
128
193
|
app.sendFlamegraphs = async (options = {}) => {
|
|
129
194
|
if (isFlamegraphsDisabled) {
|
|
130
195
|
app.log.info('PLT_DISABLE_FLAMEGRAPHS is set, flamegraphs are disabled')
|
|
131
196
|
return
|
|
132
197
|
}
|
|
133
198
|
|
|
134
|
-
let {
|
|
199
|
+
let { workerIds, alertId, profileType = 'cpu' } = options
|
|
135
200
|
|
|
136
201
|
const scalerUrl = app.instanceConfig?.iccServices?.scaler?.url
|
|
137
202
|
if (!scalerUrl) {
|
|
@@ -139,57 +204,22 @@ async function flamegraphs (app, _opts) {
|
|
|
139
204
|
throw new Error('No scaler URL found in ICC services, cannot send flamegraph')
|
|
140
205
|
}
|
|
141
206
|
|
|
142
|
-
const podId = app.instanceId
|
|
143
207
|
const runtime = app.watt.runtime
|
|
144
208
|
|
|
145
|
-
if (!
|
|
209
|
+
if (!workerIds) {
|
|
146
210
|
const { applications } = await runtime.getApplications()
|
|
147
|
-
|
|
211
|
+
workerIds = applications.map(app => `${app.id}:0`)
|
|
148
212
|
}
|
|
149
213
|
|
|
150
|
-
const
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
if (!profile || !(profile instanceof Uint8Array)) {
|
|
156
|
-
app.log.error({ serviceId }, 'Failed to get profile from service')
|
|
157
|
-
return
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
const url = `${scalerUrl}/pods/${podId}/services/${serviceId}/flamegraph`
|
|
161
|
-
|
|
162
|
-
app.log.info({ serviceId, podId, profileType }, 'Sending flamegraph')
|
|
163
|
-
|
|
164
|
-
const query = { profileType }
|
|
165
|
-
if (alertId) {
|
|
166
|
-
query.alertId = alertId
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
const { statusCode, body } = await request(url, {
|
|
170
|
-
method: 'POST',
|
|
171
|
-
headers: {
|
|
172
|
-
'Content-Type': 'application/octet-stream',
|
|
173
|
-
...authHeaders
|
|
174
|
-
},
|
|
175
|
-
query,
|
|
176
|
-
body: profile
|
|
177
|
-
})
|
|
178
|
-
|
|
179
|
-
if (statusCode !== 200) {
|
|
180
|
-
const error = await body.text()
|
|
181
|
-
app.log.error({ error }, 'Failed to send flamegraph')
|
|
182
|
-
throw new Error(`Failed to send flamegraph: ${error}`)
|
|
183
|
-
}
|
|
184
|
-
} catch (err) {
|
|
185
|
-
if (err.code === 'PLT_PPROF_NO_PROFILE_AVAILABLE') {
|
|
186
|
-
app.log.info({ serviceId, podId }, 'No profile available for the service')
|
|
187
|
-
} else if (err.code === 'PLT_PPROF_NOT_ENOUGH_ELU') {
|
|
188
|
-
app.log.info({ serviceId, podId }, 'ELU low, CPU profiling not active')
|
|
189
|
-
} else {
|
|
190
|
-
app.log.warn({ err, serviceId, podId }, 'Failed to send flamegraph from service')
|
|
191
|
-
}
|
|
214
|
+
const uploadPromises = workerIds.map(async (workerId) => {
|
|
215
|
+
const profile = await getServiceFlamegraph(workerId, profileType)
|
|
216
|
+
if (!profile || !(profile instanceof Uint8Array)) {
|
|
217
|
+
app.log.error({ workerId }, 'Failed to get profile from service')
|
|
218
|
+
return
|
|
192
219
|
}
|
|
220
|
+
|
|
221
|
+
const serviceId = workerId.split(':')[0]
|
|
222
|
+
await sendServiceFlamegraph(scalerUrl, serviceId, profile, profileType, alertId)
|
|
193
223
|
})
|
|
194
224
|
|
|
195
225
|
await Promise.all(uploadPromises)
|
|
@@ -80,6 +80,7 @@ async function healthSignals (app, _opts) {
|
|
|
80
80
|
}
|
|
81
81
|
|
|
82
82
|
const {
|
|
83
|
+
id: workerId,
|
|
83
84
|
application: serviceId,
|
|
84
85
|
currentHealth,
|
|
85
86
|
healthSignals
|
|
@@ -125,13 +126,13 @@ async function healthSignals (app, _opts) {
|
|
|
125
126
|
}
|
|
126
127
|
|
|
127
128
|
if (healthSignals.length > 0) {
|
|
128
|
-
await sendHealthSignalsWithTimeout(serviceId, healthSignals)
|
|
129
|
+
await sendHealthSignalsWithTimeout(serviceId, workerId, healthSignals)
|
|
129
130
|
}
|
|
130
131
|
})
|
|
131
132
|
}
|
|
132
133
|
app.setupHealthSignals = setupHealthSignals
|
|
133
134
|
|
|
134
|
-
async function sendHealthSignalsWithTimeout (serviceId, signals) {
|
|
135
|
+
async function sendHealthSignalsWithTimeout (serviceId, workerId, signals) {
|
|
135
136
|
signalsCaches[serviceId] ??= new HealthSignalsCache()
|
|
136
137
|
servicesSendingStatuses[serviceId] ??= false
|
|
137
138
|
|
|
@@ -148,7 +149,7 @@ async function healthSignals (app, _opts) {
|
|
|
148
149
|
|
|
149
150
|
try {
|
|
150
151
|
const signals = signalsCache.getAll()
|
|
151
|
-
await sendHealthSignals(serviceId, signals, metrics)
|
|
152
|
+
await sendHealthSignals(serviceId, workerId, signals, metrics)
|
|
152
153
|
} catch (err) {
|
|
153
154
|
app.log.error({ err }, 'Failed to send health signals to scaler')
|
|
154
155
|
}
|
|
@@ -156,7 +157,7 @@ async function healthSignals (app, _opts) {
|
|
|
156
157
|
}
|
|
157
158
|
}
|
|
158
159
|
|
|
159
|
-
async function sendHealthSignals (serviceId, signals, metrics) {
|
|
160
|
+
async function sendHealthSignals (serviceId, workerId, signals, metrics) {
|
|
160
161
|
const scalerUrl = app.instanceConfig?.iccServices?.scaler?.url
|
|
161
162
|
const applicationId = app.instanceConfig?.applicationId
|
|
162
163
|
const authHeaders = await app.getAuthorizationHeader()
|
|
@@ -184,11 +185,13 @@ async function healthSignals (app, _opts) {
|
|
|
184
185
|
|
|
185
186
|
const alert = await body.json()
|
|
186
187
|
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
188
|
+
app.sendFlamegraphs({
|
|
189
|
+
serviceIds: [serviceId],
|
|
190
|
+
workerIds: [workerId],
|
|
191
|
+
alertId: alert.id
|
|
192
|
+
}).catch(err => {
|
|
190
193
|
app.log.error({ err }, 'Failed to send a flamegraph')
|
|
191
|
-
}
|
|
194
|
+
})
|
|
192
195
|
}
|
|
193
196
|
}
|
|
194
197
|
|
package/plugins/update.js
CHANGED
|
@@ -23,14 +23,14 @@ async function updatePlugin (app) {
|
|
|
23
23
|
// Handle trigger-flamegraph command from ICC
|
|
24
24
|
if (command === 'trigger-flamegraph') {
|
|
25
25
|
app.log.info({ command }, 'Received trigger-flamegraph command from ICC')
|
|
26
|
-
|
|
26
|
+
app.sendFlamegraphs({ profileType: 'cpu' })
|
|
27
27
|
return
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
// Handle trigger-heapprofile command from ICC
|
|
31
31
|
if (command === 'trigger-heapprofile') {
|
|
32
32
|
app.log.info({ command }, 'Received trigger-heapprofile command from ICC')
|
|
33
|
-
|
|
33
|
+
app.sendFlamegraphs({ profileType: 'heap' })
|
|
34
34
|
return
|
|
35
35
|
}
|
|
36
36
|
|
|
@@ -35,7 +35,7 @@ function setupMockIccServer (wss, receivedMessages, validateAuth = false) {
|
|
|
35
35
|
return { waitForClientSubscription, getWs: () => ws }
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
function createMockApp (port, includeScalerUrl = true) {
|
|
38
|
+
function createMockApp (port, includeScalerUrl = true, env = {}) {
|
|
39
39
|
const eventListeners = new Map()
|
|
40
40
|
|
|
41
41
|
const mockWatt = {
|
|
@@ -100,7 +100,9 @@ function createMockApp (port, includeScalerUrl = true) {
|
|
|
100
100
|
PLT_DISABLE_FLAMEGRAPHS: false,
|
|
101
101
|
PLT_FLAMEGRAPHS_INTERVAL_SEC: 1,
|
|
102
102
|
PLT_FLAMEGRAPHS_ELU_THRESHOLD: 0,
|
|
103
|
-
PLT_FLAMEGRAPHS_GRACE_PERIOD: 0
|
|
103
|
+
PLT_FLAMEGRAPHS_GRACE_PERIOD: 0,
|
|
104
|
+
PLT_FLAMEGRAPHS_ATTEMPT_TIMEOUT: 1000,
|
|
105
|
+
...env
|
|
104
106
|
},
|
|
105
107
|
watt: mockWatt
|
|
106
108
|
}
|
|
@@ -333,15 +335,15 @@ test('sendFlamegraphs should handle missing profile data', async (t) => {
|
|
|
333
335
|
equal(errors.length, 2, 'Should log errors for both services with missing profiles')
|
|
334
336
|
})
|
|
335
337
|
|
|
336
|
-
test('sendFlamegraphs should filter by
|
|
338
|
+
test('sendFlamegraphs should filter by workerIds when provided', async (t) => {
|
|
337
339
|
setUpEnvironment()
|
|
338
340
|
|
|
339
341
|
const app = createMockApp(port + 12)
|
|
340
342
|
const getProfileCalls = []
|
|
341
343
|
|
|
342
|
-
app.watt.runtime.sendCommandToApplication = async (
|
|
344
|
+
app.watt.runtime.sendCommandToApplication = async (workerId, command) => {
|
|
343
345
|
if (command === 'getLastProfile') {
|
|
344
|
-
getProfileCalls.push(
|
|
346
|
+
getProfileCalls.push(workerId)
|
|
345
347
|
return new Uint8Array([1, 2, 3])
|
|
346
348
|
}
|
|
347
349
|
return { success: false }
|
|
@@ -362,10 +364,49 @@ test('sendFlamegraphs should filter by serviceIds when provided', async (t) => {
|
|
|
362
364
|
t.after(() => server.close())
|
|
363
365
|
|
|
364
366
|
await flamegraphsPlugin(app)
|
|
365
|
-
await app.sendFlamegraphs({
|
|
367
|
+
await app.sendFlamegraphs({ workerIds: ['service-1:0'] })
|
|
366
368
|
|
|
367
369
|
equal(getProfileCalls.length, 1, 'Should only request profile for specified service')
|
|
368
|
-
equal(getProfileCalls[0], 'service-1', 'Should request profile for service-1')
|
|
370
|
+
equal(getProfileCalls[0], 'service-1:0', 'Should request profile for service-1')
|
|
371
|
+
})
|
|
372
|
+
|
|
373
|
+
test('sendFlamegraphs should try to get the profile from a service if worker is not available', async (t) => {
|
|
374
|
+
setUpEnvironment()
|
|
375
|
+
|
|
376
|
+
const app = createMockApp(port + 12)
|
|
377
|
+
const getProfileCalls = []
|
|
378
|
+
|
|
379
|
+
app.watt.runtime.sendCommandToApplication = async (workerId, command) => {
|
|
380
|
+
if (command === 'getLastProfile') {
|
|
381
|
+
getProfileCalls.push(workerId)
|
|
382
|
+
if (workerId === 'service-1:2') {
|
|
383
|
+
throw new Error('Worker not available')
|
|
384
|
+
}
|
|
385
|
+
return new Uint8Array([1, 2, 3])
|
|
386
|
+
}
|
|
387
|
+
return { success: false }
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
// Mock HTTP server
|
|
391
|
+
const { createServer } = await import('node:http')
|
|
392
|
+
const server = createServer((req, res) => {
|
|
393
|
+
const body = []
|
|
394
|
+
req.on('data', chunk => body.push(chunk))
|
|
395
|
+
req.on('end', () => {
|
|
396
|
+
res.writeHead(200)
|
|
397
|
+
res.end()
|
|
398
|
+
})
|
|
399
|
+
})
|
|
400
|
+
|
|
401
|
+
await new Promise(resolve => server.listen(port + 12, resolve))
|
|
402
|
+
t.after(() => server.close())
|
|
403
|
+
|
|
404
|
+
await flamegraphsPlugin(app)
|
|
405
|
+
await app.sendFlamegraphs({ workerIds: ['service-1:2'] })
|
|
406
|
+
|
|
407
|
+
equal(getProfileCalls.length, 2)
|
|
408
|
+
equal(getProfileCalls[0], 'service-1:2')
|
|
409
|
+
equal(getProfileCalls[1], 'service-1')
|
|
369
410
|
})
|
|
370
411
|
|
|
371
412
|
test('sendFlamegraphs should skip when PLT_DISABLE_FLAMEGRAPHS is set', async (t) => {
|
|
@@ -376,9 +417,9 @@ test('sendFlamegraphs should skip when PLT_DISABLE_FLAMEGRAPHS is set', async (t
|
|
|
376
417
|
|
|
377
418
|
const getProfileCalls = []
|
|
378
419
|
|
|
379
|
-
app.watt.runtime.sendCommandToApplication = async (
|
|
420
|
+
app.watt.runtime.sendCommandToApplication = async (workerId, command) => {
|
|
380
421
|
if (command === 'getLastProfile') {
|
|
381
|
-
getProfileCalls.push(
|
|
422
|
+
getProfileCalls.push(workerId)
|
|
382
423
|
return new Uint8Array([1, 2, 3])
|
|
383
424
|
}
|
|
384
425
|
return { success: false }
|
|
@@ -465,14 +506,14 @@ test('should handle trigger-flamegraph command and upload flamegraphs from servi
|
|
|
465
506
|
equal(getFlamegraphReqs.length, 2)
|
|
466
507
|
|
|
467
508
|
const service1Req = getFlamegraphReqs.find(
|
|
468
|
-
(f) => f.serviceId === 'service-1'
|
|
509
|
+
(f) => f.serviceId === 'service-1:0'
|
|
469
510
|
)
|
|
470
511
|
const service2Req = getFlamegraphReqs.find(
|
|
471
|
-
(f) => f.serviceId === 'service-2'
|
|
512
|
+
(f) => f.serviceId === 'service-2:0'
|
|
472
513
|
)
|
|
473
514
|
|
|
474
|
-
equal(service1Req.serviceId, 'service-1')
|
|
475
|
-
equal(service2Req.serviceId, 'service-2')
|
|
515
|
+
equal(service1Req.serviceId, 'service-1:0')
|
|
516
|
+
equal(service2Req.serviceId, 'service-2:0')
|
|
476
517
|
|
|
477
518
|
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
478
519
|
await app.closeUpdates()
|
|
@@ -622,14 +663,14 @@ test('should handle trigger-heapprofile command and upload heap profiles from se
|
|
|
622
663
|
equal(getHeapProfileReqs.length, 2)
|
|
623
664
|
|
|
624
665
|
const service1Req = getHeapProfileReqs.find(
|
|
625
|
-
(f) => f.serviceId === 'service-1'
|
|
666
|
+
(f) => f.serviceId === 'service-1:0'
|
|
626
667
|
)
|
|
627
668
|
const service2Req = getHeapProfileReqs.find(
|
|
628
|
-
(f) => f.serviceId === 'service-2'
|
|
669
|
+
(f) => f.serviceId === 'service-2:0'
|
|
629
670
|
)
|
|
630
671
|
|
|
631
|
-
equal(service1Req.serviceId, 'service-1')
|
|
632
|
-
equal(service2Req.serviceId, 'service-2')
|
|
672
|
+
equal(service1Req.serviceId, 'service-1:0')
|
|
673
|
+
equal(service2Req.serviceId, 'service-2:0')
|
|
633
674
|
|
|
634
675
|
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
635
676
|
await app.closeUpdates()
|
|
@@ -640,11 +681,6 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
640
681
|
|
|
641
682
|
const receivedMessages = []
|
|
642
683
|
const infoLogs = []
|
|
643
|
-
let errorCount = 0
|
|
644
|
-
let uploadResolve
|
|
645
|
-
const allUploadsComplete = new Promise((resolve) => {
|
|
646
|
-
uploadResolve = resolve
|
|
647
|
-
})
|
|
648
684
|
|
|
649
685
|
const wss = new WebSocketServer({ port: port + 4 })
|
|
650
686
|
t.after(async () => wss.close())
|
|
@@ -655,19 +691,21 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
655
691
|
true
|
|
656
692
|
)
|
|
657
693
|
|
|
658
|
-
const app = createMockApp(port + 4
|
|
694
|
+
const app = createMockApp(port + 4, true, {
|
|
695
|
+
PLT_FLAMEGRAPHS_INTERVAL_SEC: 10,
|
|
696
|
+
PLT_FLAMEGRAPHS_ATTEMPT_TIMEOUT: 1000
|
|
697
|
+
})
|
|
698
|
+
|
|
659
699
|
const originalInfo = app.log.info
|
|
660
700
|
app.log.info = (...args) => {
|
|
661
701
|
originalInfo(...args)
|
|
662
|
-
|
|
663
|
-
infoLogs.push(args)
|
|
664
|
-
errorCount++
|
|
665
|
-
if (errorCount === 2) {
|
|
666
|
-
uploadResolve()
|
|
667
|
-
}
|
|
668
|
-
}
|
|
702
|
+
infoLogs.push(args)
|
|
669
703
|
}
|
|
670
704
|
|
|
705
|
+
// Profile will be generated in 10s
|
|
706
|
+
const profileGenerationDate = Date.now() + 10000
|
|
707
|
+
const mockProfile = new Uint8Array([1, 2, 3, 4, 5])
|
|
708
|
+
|
|
671
709
|
app.watt.runtime.sendCommandToApplication = async (
|
|
672
710
|
serviceId,
|
|
673
711
|
command
|
|
@@ -676,9 +714,13 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
676
714
|
return { success: true }
|
|
677
715
|
}
|
|
678
716
|
if (command === 'getLastProfile') {
|
|
679
|
-
const
|
|
680
|
-
|
|
681
|
-
|
|
717
|
+
const now = Date.now()
|
|
718
|
+
if (now < profileGenerationDate) {
|
|
719
|
+
const error = new Error('No profile available - wait for profiling to complete or trigger manual capture')
|
|
720
|
+
error.code = 'PLT_PPROF_NO_PROFILE_AVAILABLE'
|
|
721
|
+
throw error
|
|
722
|
+
}
|
|
723
|
+
return mockProfile
|
|
682
724
|
}
|
|
683
725
|
return { success: false }
|
|
684
726
|
}
|
|
@@ -689,6 +731,13 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
689
731
|
await app.connectToUpdates()
|
|
690
732
|
await app.setupFlamegraphs()
|
|
691
733
|
|
|
734
|
+
t.after(async () => {
|
|
735
|
+
if (app.cleanupFlamegraphs) {
|
|
736
|
+
app.cleanupFlamegraphs()
|
|
737
|
+
}
|
|
738
|
+
await app.closeUpdates()
|
|
739
|
+
})
|
|
740
|
+
|
|
692
741
|
await waitForClientSubscription
|
|
693
742
|
|
|
694
743
|
const triggerFlamegraphMessage = {
|
|
@@ -697,15 +746,47 @@ test('should handle PLT_PPROF_NO_PROFILE_AVAILABLE error with info log', async (
|
|
|
697
746
|
|
|
698
747
|
getWs().send(JSON.stringify(triggerFlamegraphMessage))
|
|
699
748
|
|
|
700
|
-
await
|
|
749
|
+
await sleep(15000)
|
|
701
750
|
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
751
|
+
const service1AttemptLogs = []
|
|
752
|
+
const service2AttemptLogs = []
|
|
753
|
+
const service1SuccessLogs = []
|
|
754
|
+
const service2SuccessLogs = []
|
|
706
755
|
|
|
707
|
-
|
|
708
|
-
|
|
756
|
+
for (const infoLog of infoLogs) {
|
|
757
|
+
if (infoLog.length !== 2) continue
|
|
758
|
+
const [options, message] = infoLog
|
|
759
|
+
|
|
760
|
+
if (message.includes('No profile available for the service')) {
|
|
761
|
+
const { workerId, attempt, maxAttempts, attemptTimeout } = options
|
|
762
|
+
|
|
763
|
+
equal(maxAttempts, 11)
|
|
764
|
+
equal(attemptTimeout, 1000)
|
|
765
|
+
|
|
766
|
+
if (workerId === 'service-1:0') {
|
|
767
|
+
service1AttemptLogs.push(infoLog)
|
|
768
|
+
equal(attempt, service1AttemptLogs.length)
|
|
769
|
+
}
|
|
770
|
+
if (workerId === 'service-2:0') {
|
|
771
|
+
service2AttemptLogs.push(infoLog)
|
|
772
|
+
equal(attempt, service2AttemptLogs.length)
|
|
773
|
+
}
|
|
774
|
+
continue
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
if (message.includes('Sending flamegraph')) {
|
|
778
|
+
if (options.serviceId === 'service-1') {
|
|
779
|
+
service1SuccessLogs.push(infoLog)
|
|
780
|
+
} else if (options.serviceId === 'service-2') {
|
|
781
|
+
service2SuccessLogs.push(infoLog)
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
equal(service1AttemptLogs.length, 10)
|
|
787
|
+
equal(service2AttemptLogs.length, 10)
|
|
788
|
+
equal(service1SuccessLogs.length, 1)
|
|
789
|
+
equal(service2SuccessLogs.length, 1)
|
|
709
790
|
})
|
|
710
791
|
|
|
711
792
|
test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) => {
|
|
@@ -762,6 +843,13 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
|
|
|
762
843
|
await app.connectToUpdates()
|
|
763
844
|
await app.setupFlamegraphs()
|
|
764
845
|
|
|
846
|
+
t.after(async () => {
|
|
847
|
+
if (app.cleanupFlamegraphs) {
|
|
848
|
+
app.cleanupFlamegraphs()
|
|
849
|
+
}
|
|
850
|
+
await app.closeUpdates()
|
|
851
|
+
})
|
|
852
|
+
|
|
765
853
|
await waitForClientSubscription
|
|
766
854
|
|
|
767
855
|
const triggerFlamegraphMessage = {
|
|
@@ -773,12 +861,8 @@ test('should handle PLT_PPROF_NOT_ENOUGH_ELU error with info log', async (t) =>
|
|
|
773
861
|
await allUploadsComplete
|
|
774
862
|
|
|
775
863
|
equal(infoLogs.length, 2)
|
|
776
|
-
equal(infoLogs[0][0].
|
|
777
|
-
equal(infoLogs[0][0].podId, 'test-pod-123')
|
|
864
|
+
equal(infoLogs[0][0].workerId, 'service-1:0')
|
|
778
865
|
equal(infoLogs[0][1], 'ELU low, CPU profiling not active')
|
|
779
|
-
|
|
780
|
-
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
781
|
-
await app.closeUpdates()
|
|
782
866
|
})
|
|
783
867
|
|
|
784
868
|
test('should start profiling on new workers that start after initial setup', async (t) => {
|
|
@@ -815,6 +899,13 @@ test('should start profiling on new workers that start after initial setup', asy
|
|
|
815
899
|
await app.connectToUpdates()
|
|
816
900
|
await app.setupFlamegraphs()
|
|
817
901
|
|
|
902
|
+
t.after(async () => {
|
|
903
|
+
if (app.cleanupFlamegraphs) {
|
|
904
|
+
app.cleanupFlamegraphs()
|
|
905
|
+
}
|
|
906
|
+
await app.closeUpdates()
|
|
907
|
+
})
|
|
908
|
+
|
|
818
909
|
await waitForClientSubscription
|
|
819
910
|
|
|
820
911
|
equal(startProfilingCalls.length, 4)
|
|
@@ -844,9 +935,6 @@ test('should start profiling on new workers that start after initial setup', asy
|
|
|
844
935
|
equal(startProfilingCalls[5].options.durationMillis, 1000)
|
|
845
936
|
equal(startProfilingCalls[5].options.eluThreshold, 0)
|
|
846
937
|
equal(startProfilingCalls[5].options.type, 'heap')
|
|
847
|
-
|
|
848
|
-
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
849
|
-
await app.closeUpdates()
|
|
850
938
|
})
|
|
851
939
|
|
|
852
940
|
test('should not start profiling on new workers when flamegraphs are disabled', async (t) => {
|
|
@@ -884,6 +972,13 @@ test('should not start profiling on new workers when flamegraphs are disabled',
|
|
|
884
972
|
await app.connectToUpdates()
|
|
885
973
|
await app.setupFlamegraphs()
|
|
886
974
|
|
|
975
|
+
t.after(async () => {
|
|
976
|
+
if (app.cleanupFlamegraphs) {
|
|
977
|
+
app.cleanupFlamegraphs()
|
|
978
|
+
}
|
|
979
|
+
await app.closeUpdates()
|
|
980
|
+
})
|
|
981
|
+
|
|
887
982
|
await waitForClientSubscription
|
|
888
983
|
|
|
889
984
|
equal(startProfilingCalls.length, 0)
|
|
@@ -897,9 +992,6 @@ test('should not start profiling on new workers when flamegraphs are disabled',
|
|
|
897
992
|
await sleep(10)
|
|
898
993
|
|
|
899
994
|
equal(startProfilingCalls.length, 0)
|
|
900
|
-
|
|
901
|
-
if (app.cleanupFlamegraphs) app.cleanupFlamegraphs()
|
|
902
|
-
await app.closeUpdates()
|
|
903
995
|
})
|
|
904
996
|
|
|
905
997
|
test('sendFlamegraphs should include alertId in query params when provided', async (t) => {
|