@platformatic/watt-extra 1.6.3-alpha.2 → 1.6.3-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +10 -0
- package/package.json +1 -1
- package/plugins/alerts.js +1 -1
- package/plugins/flamegraphs.js +227 -207
- package/plugins/health-signals.js +1 -1
- package/plugins/update.js +2 -2
- package/test/alerts.test.js +7 -17
- package/test/health-signals.test.js +2 -5
- package/test/trigger-flamegraphs.test.js +187 -439
package/package.json
CHANGED
package/plugins/alerts.js
CHANGED
package/plugins/flamegraphs.js
CHANGED
|
@@ -1,146 +1,200 @@
|
|
|
1
1
|
'use strict'
|
|
2
2
|
|
|
3
|
-
import { setTimeout as sleep } from 'node:timers/promises'
|
|
4
3
|
import { request } from 'undici'
|
|
5
4
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
5
|
+
export class Profiler {
|
|
6
|
+
#workerId
|
|
7
|
+
#type
|
|
8
|
+
#duration
|
|
9
|
+
#profileOptions
|
|
10
|
+
#runtime
|
|
11
|
+
#log
|
|
12
|
+
#requests
|
|
13
|
+
#isProfiling
|
|
14
|
+
#onProfile
|
|
15
|
+
#getProfileInterval
|
|
16
|
+
#stopProfileTimeout
|
|
17
|
+
|
|
18
|
+
constructor (options = {}) {
|
|
19
|
+
const { type, duration, workerId, sourceMaps, app, onProfile } = options
|
|
20
|
+
|
|
21
|
+
if (type !== 'cpu' && type !== 'heap') {
|
|
22
|
+
throw new Error('Invalid Profiler type. Must be either "cpu" or "heap"')
|
|
23
|
+
}
|
|
24
|
+
if (typeof duration !== 'number') {
|
|
25
|
+
throw new Error('Invalid Profiler duration. Must be a number')
|
|
26
|
+
}
|
|
27
|
+
if (typeof workerId !== 'string') {
|
|
28
|
+
throw new Error('Invalid Worker ID. Must be a string')
|
|
29
|
+
}
|
|
30
|
+
if (!workerId.includes(':')) {
|
|
31
|
+
throw new Error('Worker ID must include the service ID and worker index')
|
|
32
|
+
}
|
|
33
|
+
if (typeof onProfile !== 'function') {
|
|
34
|
+
throw new Error('Invalid onProfile handler. Must be a function')
|
|
35
|
+
}
|
|
13
36
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
const maxAttempts = Math.ceil(durationMillis / attemptTimeout) + 1
|
|
19
|
-
const cacheCleanupInterval = parseInt(flamegraphsCacheCleanupInterval)
|
|
37
|
+
this.#type = type
|
|
38
|
+
this.#duration = duration
|
|
39
|
+
this.#workerId = workerId
|
|
40
|
+
this.#onProfile = onProfile
|
|
20
41
|
|
|
21
|
-
|
|
42
|
+
this.#profileOptions = {
|
|
43
|
+
type,
|
|
44
|
+
durationMillis: duration,
|
|
45
|
+
sourceMaps: sourceMaps ?? false
|
|
46
|
+
}
|
|
22
47
|
|
|
23
|
-
|
|
24
|
-
|
|
48
|
+
this.#requests = []
|
|
49
|
+
this.#isProfiling = false
|
|
25
50
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
51
|
+
this.#runtime = app.watt.runtime
|
|
52
|
+
this.#log = app.log.child({
|
|
53
|
+
workerId: this.#workerId,
|
|
54
|
+
profilerType: this.#type
|
|
55
|
+
})
|
|
56
|
+
}
|
|
29
57
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
'startProfiling',
|
|
35
|
-
{ durationMillis, eluThreshold, type: 'cpu', sourceMaps }
|
|
36
|
-
)
|
|
58
|
+
async requestProfile (request = {}) {
|
|
59
|
+
request.timestamp ??= Date.now()
|
|
60
|
+
this.#requests.push(request)
|
|
61
|
+
this.#unscheduleStopProfiling()
|
|
37
62
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
workerFullId,
|
|
41
|
-
'startProfiling',
|
|
42
|
-
{ durationMillis, eluThreshold, type: 'heap', sourceMaps }
|
|
43
|
-
)
|
|
44
|
-
} catch (err) {
|
|
45
|
-
app.log.error({ err, ...logContext }, 'Failed to start profiling')
|
|
46
|
-
throw err
|
|
63
|
+
if (!this.#isProfiling) {
|
|
64
|
+
this.#startProfilingLoop()
|
|
47
65
|
}
|
|
48
66
|
}
|
|
49
67
|
|
|
50
|
-
|
|
51
|
-
if (
|
|
52
|
-
|
|
68
|
+
async stop () {
|
|
69
|
+
if (this.#getProfileInterval) {
|
|
70
|
+
clearInterval(this.#getProfileInterval)
|
|
71
|
+
this.#getProfileInterval = null
|
|
72
|
+
}
|
|
73
|
+
if (this.#stopProfileTimeout) {
|
|
74
|
+
clearTimeout(this.#stopProfileTimeout)
|
|
75
|
+
this.#stopProfileTimeout = null
|
|
76
|
+
}
|
|
77
|
+
if (this.#isProfiling) {
|
|
78
|
+
await this.#stopProfiling()
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async #startProfilingLoop () {
|
|
83
|
+
try {
|
|
84
|
+
await this.#startProfiling()
|
|
85
|
+
} catch (err) {
|
|
86
|
+
this.#log.error({ err }, 'Failed to start profiling')
|
|
87
|
+
const requests = this.#getProfileRequests(Date.now())
|
|
88
|
+
this.#onProfile(err, null, requests)
|
|
53
89
|
return
|
|
54
90
|
}
|
|
55
91
|
|
|
56
|
-
|
|
92
|
+
this.#getProfileInterval = setInterval(
|
|
93
|
+
() => this.#processProfile(),
|
|
94
|
+
this.#duration
|
|
95
|
+
).unref()
|
|
96
|
+
}
|
|
57
97
|
|
|
58
|
-
|
|
59
|
-
|
|
98
|
+
async #processProfile () {
|
|
99
|
+
try {
|
|
100
|
+
const profile = await this.#getProfile()
|
|
101
|
+
const requests = this.#getProfileRequests(profile.timestamp)
|
|
102
|
+
this.#onProfile(null, profile, requests)
|
|
103
|
+
} catch (err) {
|
|
104
|
+
this.#log.error({ err }, 'Failed to generate a profile')
|
|
105
|
+
const requests = this.#getProfileRequests(Date.now())
|
|
106
|
+
this.#onProfile(err, null, requests)
|
|
107
|
+
}
|
|
60
108
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if (workerInfo.status === 'started') {
|
|
64
|
-
const promise = startProfilingOnWorker(runtime, workerFullId, { workerFullId })
|
|
65
|
-
promises.push(promise)
|
|
66
|
-
}
|
|
109
|
+
if (this.#requests.length === 0) {
|
|
110
|
+
this.#scheduleStopProfiling()
|
|
67
111
|
}
|
|
112
|
+
}
|
|
68
113
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
114
|
+
#scheduleStopProfiling () {
|
|
115
|
+
// Stop profiling after the duration/2 if there are no more requests
|
|
116
|
+
this.#stopProfileTimeout = setTimeout(
|
|
117
|
+
() => this.stop(),
|
|
118
|
+
this.#duration / 2
|
|
119
|
+
).unref()
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
#unscheduleStopProfiling () {
|
|
123
|
+
if (this.#stopProfileTimeout) {
|
|
124
|
+
clearTimeout(this.#stopProfileTimeout)
|
|
125
|
+
this.#stopProfileTimeout = null
|
|
74
126
|
}
|
|
127
|
+
}
|
|
75
128
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
return
|
|
80
|
-
}
|
|
129
|
+
async #startProfiling () {
|
|
130
|
+
this.#isProfiling = true
|
|
131
|
+
this.#log.info('Starting profiling')
|
|
81
132
|
|
|
82
|
-
|
|
83
|
-
|
|
133
|
+
await this.#runtime.sendCommandToApplication(
|
|
134
|
+
this.#workerId, 'startProfiling', this.#profileOptions
|
|
135
|
+
)
|
|
136
|
+
}
|
|
84
137
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
}
|
|
89
|
-
runtime.on('application:worker:started', workerStartedListener)
|
|
138
|
+
async #stopProfiling () {
|
|
139
|
+
this.#isProfiling = false
|
|
140
|
+
this.#log.info('Stopping profiling')
|
|
90
141
|
|
|
91
|
-
|
|
142
|
+
await this.#runtime.sendCommandToApplication(
|
|
143
|
+
this.#workerId, 'stopProfiling', this.#profileOptions
|
|
144
|
+
)
|
|
92
145
|
}
|
|
93
146
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
app.watt.runtime.removeListener('application:worker:started', workerStartedListener)
|
|
97
|
-
workerStartedListener = null
|
|
98
|
-
}
|
|
147
|
+
async #getProfile () {
|
|
148
|
+
this.#log.info('Getting profile from worker')
|
|
99
149
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
app.log.warn({ err, workerFullId }, 'Failed to stop CPU profiling')
|
|
113
|
-
}
|
|
114
|
-
})
|
|
115
|
-
)
|
|
116
|
-
stopPromises.push(
|
|
117
|
-
app.watt.runtime.sendCommandToApplication(workerFullId, 'stopProfiling', { type: 'heap' })
|
|
118
|
-
.catch(err => {
|
|
119
|
-
// Ignore errors if profiling wasn't running
|
|
120
|
-
if (err.code !== 'PLT_PPROF_PROFILING_NOT_STARTED') {
|
|
121
|
-
app.log.warn({ err, workerFullId }, 'Failed to stop heap profiling')
|
|
122
|
-
}
|
|
123
|
-
})
|
|
124
|
-
)
|
|
125
|
-
}
|
|
126
|
-
await Promise.all(stopPromises)
|
|
127
|
-
// Small delay to ensure native cleanup completes
|
|
128
|
-
await sleep(100)
|
|
129
|
-
} catch (err) {
|
|
130
|
-
app.log.warn({ err }, 'Failed to stop profiling during cleanup')
|
|
150
|
+
const [state, profile] = await Promise.all([
|
|
151
|
+
this.#runtime.sendCommandToApplication(this.#workerId, 'getProfilingState', { type: this.#type }),
|
|
152
|
+
this.#runtime.sendCommandToApplication(this.#workerId, 'getLastProfile', { type: this.#type })
|
|
153
|
+
])
|
|
154
|
+
return { data: profile, timestamp: state.latestProfileTimestamp }
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
#getProfileRequests (profileTimestamp) {
|
|
158
|
+
let processedIndex = 0
|
|
159
|
+
for (let i = 0; i < this.#requests.length; i++) {
|
|
160
|
+
if (this.#requests[i].timestamp <= profileTimestamp) {
|
|
161
|
+
processedIndex = i + 1
|
|
131
162
|
}
|
|
132
163
|
}
|
|
164
|
+
return this.#requests.splice(0, processedIndex)
|
|
133
165
|
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async function flamegraphs (app, _opts) {
|
|
169
|
+
const isFlamegraphsDisabled = app.env.PLT_DISABLE_FLAMEGRAPHS
|
|
170
|
+
const flamegraphsIntervalSec = app.env.PLT_FLAMEGRAPHS_INTERVAL_SEC
|
|
171
|
+
|
|
172
|
+
const durationMillis = parseInt(flamegraphsIntervalSec) * 1000
|
|
134
173
|
|
|
135
|
-
const
|
|
174
|
+
const profilers = {}
|
|
175
|
+
const profilersConfigs = {}
|
|
136
176
|
|
|
137
|
-
app.
|
|
177
|
+
app.setupFlamegraphs = async () => {
|
|
138
178
|
if (isFlamegraphsDisabled) {
|
|
139
|
-
app.log.info('PLT_DISABLE_FLAMEGRAPHS is set,
|
|
179
|
+
app.log.info('PLT_DISABLE_FLAMEGRAPHS is set, skipping profiling')
|
|
140
180
|
return
|
|
141
181
|
}
|
|
142
182
|
|
|
143
|
-
|
|
183
|
+
const runtime = app.watt.runtime
|
|
184
|
+
const { applications } = await runtime.getApplications()
|
|
185
|
+
|
|
186
|
+
for (const application of applications) {
|
|
187
|
+
const appDetails = await runtime.getApplicationDetails(application.id)
|
|
188
|
+
const sourceMaps = appDetails.sourceMaps ?? false
|
|
189
|
+
profilersConfigs[application.id] = { durationMillis, sourceMaps }
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
app.requestFlamegraphs = async (options = {}) => {
|
|
194
|
+
if (isFlamegraphsDisabled) {
|
|
195
|
+
app.log.info('PLT_DISABLE_FLAMEGRAPHS is set, flamegraphs are disabled')
|
|
196
|
+
return
|
|
197
|
+
}
|
|
144
198
|
|
|
145
199
|
const scalerUrl = app.instanceConfig?.iccServices?.scaler?.url
|
|
146
200
|
if (!scalerUrl) {
|
|
@@ -150,118 +204,89 @@ async function flamegraphs (app, _opts) {
|
|
|
150
204
|
|
|
151
205
|
const runtime = app.watt.runtime
|
|
152
206
|
|
|
153
|
-
|
|
154
|
-
const { applications } = await runtime.getApplications()
|
|
155
|
-
workerIds = applications.map(app => app.id)
|
|
156
|
-
}
|
|
207
|
+
let { workerIds, alertId, profileType = 'cpu' } = options
|
|
157
208
|
|
|
158
|
-
|
|
209
|
+
const servicesWorkers = {}
|
|
210
|
+
const workers = await runtime.getWorkers()
|
|
211
|
+
for (const workerId in workers) {
|
|
212
|
+
const workerInfo = workers[workerId]
|
|
213
|
+
const serviceId = workerInfo.application
|
|
159
214
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
215
|
+
servicesWorkers[serviceId] ??= []
|
|
216
|
+
servicesWorkers[serviceId].push(workerId)
|
|
217
|
+
}
|
|
163
218
|
|
|
164
|
-
|
|
165
|
-
if (profile !== undefined) {
|
|
166
|
-
if (alertId) {
|
|
167
|
-
app.log.info(
|
|
168
|
-
{ workerId, alertId }, 'Flamegraph will be attached to the alert'
|
|
169
|
-
)
|
|
170
|
-
profile.waitingAlerts.push(alertId)
|
|
171
|
-
}
|
|
219
|
+
workerIds ??= Object.keys(servicesWorkers)
|
|
172
220
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
221
|
+
for (let workerId of workerIds) {
|
|
222
|
+
const [serviceId, workerIndex] = workerId.split(':')
|
|
223
|
+
if (workerIndex === undefined) {
|
|
224
|
+
workerId = servicesWorkers[serviceId][0]
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
if (workerId === undefined) {
|
|
228
|
+
app.log.error({ serviceId }, 'No worker found for an application')
|
|
229
|
+
continue
|
|
177
230
|
}
|
|
178
231
|
|
|
179
|
-
|
|
180
|
-
|
|
232
|
+
const profileKey = `${workerId}:${profileType}`
|
|
233
|
+
|
|
234
|
+
let profiler = profilers[profileKey]
|
|
235
|
+
if (!profiler) {
|
|
236
|
+
const config = profilersConfigs[serviceId]
|
|
237
|
+
profiler = new Profiler({
|
|
238
|
+
app,
|
|
239
|
+
workerId,
|
|
181
240
|
type: profileType,
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
241
|
+
duration: config.durationMillis,
|
|
242
|
+
sourceMaps: config.sourceMaps,
|
|
243
|
+
onProfile: createProfileHandler(scalerUrl, workerId, profileType)
|
|
244
|
+
})
|
|
245
|
+
profilers[profileKey] = profiler
|
|
246
|
+
}
|
|
188
247
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
248
|
+
profiler.requestProfile({ alertId })
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function createProfileHandler (scalerUrl, workerId, profileType) {
|
|
253
|
+
const serviceId = workerId.split(':')[0]
|
|
195
254
|
|
|
196
|
-
|
|
197
|
-
|
|
255
|
+
return async (err, profile, requests) => {
|
|
256
|
+
if (err) {
|
|
257
|
+
app.log.error({ err }, 'Failed to generate a profile')
|
|
258
|
+
return
|
|
198
259
|
}
|
|
199
260
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
serviceId,
|
|
205
|
-
profile.data,
|
|
206
|
-
profileType,
|
|
207
|
-
alertId
|
|
208
|
-
)
|
|
209
|
-
profile.flamegraphId = flamegraph.id
|
|
210
|
-
} catch (err) {
|
|
211
|
-
app.log.error({ err, workerId, alertId, profileType }, 'Failed to send flamegraph')
|
|
212
|
-
delete profilesByWorkerId[profileKey]
|
|
213
|
-
return
|
|
261
|
+
const alertIds = []
|
|
262
|
+
for (const request of requests) {
|
|
263
|
+
if (request.alertId) {
|
|
264
|
+
alertIds.push(request.alertId)
|
|
214
265
|
}
|
|
215
266
|
}
|
|
216
267
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
await _attachFlamegraphToAlerts(
|
|
268
|
+
try {
|
|
269
|
+
const alertId = alertIds.shift()
|
|
270
|
+
const flamegraph = await sendServiceFlamegraph(
|
|
221
271
|
scalerUrl,
|
|
222
272
|
serviceId,
|
|
223
|
-
profile.flamegraphId,
|
|
224
273
|
profile.data,
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
)
|
|
228
|
-
}
|
|
229
|
-
})
|
|
230
|
-
|
|
231
|
-
await Promise.all(uploadPromises)
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
async function getServiceFlamegraph (workerId, profileType, attempt = 1) {
|
|
235
|
-
const runtime = app.watt.runtime
|
|
236
|
-
|
|
237
|
-
app.log.info({ workerId, attempt, maxAttempts, attemptTimeout }, 'Getting profile from worker')
|
|
238
|
-
|
|
239
|
-
try {
|
|
240
|
-
const [state, profile] = await Promise.all([
|
|
241
|
-
runtime.sendCommandToApplication(workerId, 'getProfilingState', { type: profileType }),
|
|
242
|
-
runtime.sendCommandToApplication(workerId, 'getLastProfile', { type: profileType })
|
|
243
|
-
])
|
|
244
|
-
return { data: profile, timestamp: state.latestProfileTimestamp }
|
|
245
|
-
} catch (err) {
|
|
246
|
-
if (err.code === 'PLT_PPROF_NO_PROFILE_AVAILABLE') {
|
|
247
|
-
app.log.info(
|
|
248
|
-
{ workerId, attempt, maxAttempts, attemptTimeout },
|
|
249
|
-
'No profile available for the service. Waiting for profiling to complete.'
|
|
274
|
+
profileType,
|
|
275
|
+
alertId
|
|
250
276
|
)
|
|
251
|
-
if (attempt <= maxAttempts) {
|
|
252
|
-
await sleep(attemptTimeout)
|
|
253
|
-
return getServiceFlamegraph(workerId, profileType, attempt + 1)
|
|
254
|
-
}
|
|
255
|
-
} else if (err.code === 'PLT_PPROF_NOT_ENOUGH_ELU') {
|
|
256
|
-
app.log.info({ workerId }, 'ELU low, CPU profiling not active')
|
|
257
|
-
} else {
|
|
258
|
-
app.log.warn({ err, workerId }, 'Failed to get profile from a worker')
|
|
259
277
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
278
|
+
if (alertIds.length > 0) {
|
|
279
|
+
await _attachFlamegraphToAlerts(
|
|
280
|
+
scalerUrl,
|
|
281
|
+
serviceId,
|
|
282
|
+
flamegraph.id,
|
|
283
|
+
profile.data,
|
|
284
|
+
profileType,
|
|
285
|
+
alertIds
|
|
286
|
+
)
|
|
264
287
|
}
|
|
288
|
+
} catch (err) {
|
|
289
|
+
app.log.error({ err, workerId }, 'Failed to send flamegraph')
|
|
265
290
|
}
|
|
266
291
|
}
|
|
267
292
|
}
|
|
@@ -367,15 +392,10 @@ async function flamegraphs (app, _opts) {
|
|
|
367
392
|
}
|
|
368
393
|
}
|
|
369
394
|
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
const timestamp = profilesByWorkerId[profileKey]?.timestamp
|
|
375
|
-
if (timestamp && now - timestamp > durationMillis) {
|
|
376
|
-
delete profilesByWorkerId[profileKey]
|
|
377
|
-
}
|
|
378
|
-
}
|
|
395
|
+
app.cleanupFlamegraphs = async () => {
|
|
396
|
+
// Stop all tracked profilers in parallel
|
|
397
|
+
const stopPromises = Object.values(profilers).map(profiler => profiler.stop())
|
|
398
|
+
await Promise.all(stopPromises)
|
|
379
399
|
}
|
|
380
400
|
}
|
|
381
401
|
|
package/plugins/update.js
CHANGED
|
@@ -23,14 +23,14 @@ async function updatePlugin (app) {
|
|
|
23
23
|
// Handle trigger-flamegraph command from ICC
|
|
24
24
|
if (command === 'trigger-flamegraph') {
|
|
25
25
|
app.log.info({ command }, 'Received trigger-flamegraph command from ICC')
|
|
26
|
-
app.
|
|
26
|
+
app.requestFlamegraphs({ profileType: 'cpu' })
|
|
27
27
|
return
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
// Handle trigger-heapprofile command from ICC
|
|
31
31
|
if (command === 'trigger-heapprofile') {
|
|
32
32
|
app.log.info({ command }, 'Received trigger-heapprofile command from ICC')
|
|
33
|
-
app.
|
|
33
|
+
app.requestFlamegraphs({ profileType: 'heap' })
|
|
34
34
|
return
|
|
35
35
|
}
|
|
36
36
|
|
package/test/alerts.test.js
CHANGED
|
@@ -90,9 +90,6 @@ test('should send alert when service becomes unhealthy', async (t) => {
|
|
|
90
90
|
await icc.close()
|
|
91
91
|
})
|
|
92
92
|
|
|
93
|
-
// Wait for the first flamegraph to be generated
|
|
94
|
-
await sleep(5000)
|
|
95
|
-
|
|
96
93
|
// Manually trigger health event with unhealthy state
|
|
97
94
|
const healthInfo = {
|
|
98
95
|
id: 'main:0',
|
|
@@ -133,6 +130,9 @@ test('should send alert when service becomes unhealthy', async (t) => {
|
|
|
133
130
|
assert.strictEqual(alertReceived.healthHistory[0].application, 'main')
|
|
134
131
|
assert.strictEqual(alertReceived.healthHistory[0].service, 'main')
|
|
135
132
|
|
|
133
|
+
// Wait for flamegraph to be generated (duration is 2 seconds)
|
|
134
|
+
await sleep(2500)
|
|
135
|
+
|
|
136
136
|
assert.ok(flamegraphReceived, 'Flamegraph should have been received')
|
|
137
137
|
|
|
138
138
|
const profile = Profile.decode(flamegraphReceived)
|
|
@@ -526,8 +526,6 @@ test('should send alert when flamegraphs are disabled', async (t) => {
|
|
|
526
526
|
await icc.close()
|
|
527
527
|
})
|
|
528
528
|
|
|
529
|
-
await sleep(5000)
|
|
530
|
-
|
|
531
529
|
// Manually trigger health event with unhealthy state
|
|
532
530
|
const healthInfo = {
|
|
533
531
|
id: 'main:0',
|
|
@@ -611,8 +609,6 @@ test('should send alert when failed to send a flamegraph', async (t) => {
|
|
|
611
609
|
await icc.close()
|
|
612
610
|
})
|
|
613
611
|
|
|
614
|
-
await sleep(5000)
|
|
615
|
-
|
|
616
612
|
// Manually trigger health event with unhealthy state
|
|
617
613
|
const healthInfo = {
|
|
618
614
|
id: 'main:0',
|
|
@@ -799,9 +795,6 @@ test('should attach one flamegraph to multiple alerts', async (t) => {
|
|
|
799
795
|
await icc.close()
|
|
800
796
|
})
|
|
801
797
|
|
|
802
|
-
// Wait for the first flamegraph to be generated
|
|
803
|
-
await sleep(5000)
|
|
804
|
-
|
|
805
798
|
// Manually trigger health event with unhealthy state
|
|
806
799
|
const healthInfo = {
|
|
807
800
|
id: 'main:0',
|
|
@@ -827,8 +820,8 @@ test('should attach one flamegraph to multiple alerts', async (t) => {
|
|
|
827
820
|
await sleep(1000)
|
|
828
821
|
emitHealthEvent(app, healthInfo)
|
|
829
822
|
|
|
830
|
-
// Wait for
|
|
831
|
-
await sleep(
|
|
823
|
+
// Wait for flamegraph to be generated (duration is 5 seconds) and sent
|
|
824
|
+
await sleep(5500)
|
|
832
825
|
|
|
833
826
|
assert.strictEqual(receivedAlerts.length, 2)
|
|
834
827
|
const alert1 = receivedAlerts[0]
|
|
@@ -902,9 +895,6 @@ test('should send flamegraphs if attaching fails', async (t) => {
|
|
|
902
895
|
await icc.close()
|
|
903
896
|
})
|
|
904
897
|
|
|
905
|
-
// Wait for the first flamegraph to be generated
|
|
906
|
-
await sleep(5000)
|
|
907
|
-
|
|
908
898
|
// Manually trigger health event with unhealthy state
|
|
909
899
|
const healthInfo = {
|
|
910
900
|
id: 'main:0',
|
|
@@ -930,8 +920,8 @@ test('should send flamegraphs if attaching fails', async (t) => {
|
|
|
930
920
|
await sleep(1000)
|
|
931
921
|
emitHealthEvent(app, healthInfo)
|
|
932
922
|
|
|
933
|
-
// Wait for
|
|
934
|
-
await sleep(
|
|
923
|
+
// Wait for flamegraph to be generated (duration is 5 seconds) and sent
|
|
924
|
+
await sleep(5500)
|
|
935
925
|
|
|
936
926
|
assert.strictEqual(receivedAlerts.length, 2)
|
|
937
927
|
const alert1 = receivedAlerts[0]
|
|
@@ -58,9 +58,6 @@ test('should send health signals when service becomes unhealthy', async (t) => {
|
|
|
58
58
|
await icc.close()
|
|
59
59
|
})
|
|
60
60
|
|
|
61
|
-
// Wait for the first flamegraph to be generated
|
|
62
|
-
await sleep(5000)
|
|
63
|
-
|
|
64
61
|
{
|
|
65
62
|
const { statusCode } = await request('http://127.0.0.1:3042/custom-health-signal', {
|
|
66
63
|
method: 'POST',
|
|
@@ -119,8 +116,8 @@ test('should send health signals when service becomes unhealthy', async (t) => {
|
|
|
119
116
|
assert.ok(receivedSignal.timestamp > 0)
|
|
120
117
|
}
|
|
121
118
|
|
|
122
|
-
// Wait for
|
|
123
|
-
await sleep(
|
|
119
|
+
// Wait for flamegraph to be generated (duration is 2 seconds)
|
|
120
|
+
await sleep(2500)
|
|
124
121
|
|
|
125
122
|
// assert.strictEqual(receivedFlamegraphReqs.length, 1)
|
|
126
123
|
|