@platformatic/watt-extra 0.1.8-alpha.0 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/plugins/alerts.js +15 -1
- package/test/alerts.test.js +88 -0
- package/pnpm-workspace.yaml +0 -2
package/package.json
CHANGED
package/plugins/alerts.js
CHANGED
|
@@ -4,6 +4,10 @@ async function alerts (app, _opts) {
|
|
|
4
4
|
const healthCache = [] // It's OK to have this in memory, this is per-pod.
|
|
5
5
|
const podHealthWindow =
|
|
6
6
|
app.instanceConfig?.config?.scaler?.podHealthWindow || 60 * 1000
|
|
7
|
+
const alertRetentionWindow =
|
|
8
|
+
app.instanceConfig?.config?.scaler?.alertRetentionWindow || 10 * 1000
|
|
9
|
+
|
|
10
|
+
const lastServicesAlertTime = {}
|
|
7
11
|
|
|
8
12
|
async function setupAlerts () {
|
|
9
13
|
// Skip alerts setup if ICC is not configured
|
|
@@ -62,6 +66,17 @@ async function alerts (app, _opts) {
|
|
|
62
66
|
// }
|
|
63
67
|
|
|
64
68
|
if (healthInfo.unhealthy) {
|
|
69
|
+
const currentTime = Date.now()
|
|
70
|
+
|
|
71
|
+
const serviceId = healthInfo.id
|
|
72
|
+
const lastAlertTime = lastServicesAlertTime[serviceId]
|
|
73
|
+
|
|
74
|
+
if (lastAlertTime && currentTime - lastAlertTime < alertRetentionWindow) {
|
|
75
|
+
app.log.debug('Skipping alert, within retention window')
|
|
76
|
+
return
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
lastServicesAlertTime[serviceId] = currentTime
|
|
65
80
|
delete healthInfo.healthConfig
|
|
66
81
|
|
|
67
82
|
const authHeaders = await app.getAuthorizationHeader()
|
|
@@ -86,7 +101,6 @@ async function alerts (app, _opts) {
|
|
|
86
101
|
}
|
|
87
102
|
|
|
88
103
|
const alert = await body.json()
|
|
89
|
-
const serviceId = healthInfo.id
|
|
90
104
|
|
|
91
105
|
try {
|
|
92
106
|
await app.sendFlamegraphs({
|
package/test/alerts.test.js
CHANGED
|
@@ -327,6 +327,94 @@ test('should not fail when health info is missing', async (t) => {
|
|
|
327
327
|
assert.strictEqual(alertReceived, null, 'No alert should have been received')
|
|
328
328
|
})
|
|
329
329
|
|
|
330
|
+
test('should respect alert retention window', async (t) => {
|
|
331
|
+
const applicationName = 'test-app'
|
|
332
|
+
const applicationId = randomUUID()
|
|
333
|
+
const applicationPath = join(__dirname, 'fixtures', 'service-1')
|
|
334
|
+
|
|
335
|
+
const alertsReceived = []
|
|
336
|
+
|
|
337
|
+
const getAuthorizationHeader = async (headers) => {
|
|
338
|
+
return { ...headers, authorization: 'Bearer test-token' }
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const icc = await startICC(t, {
|
|
342
|
+
applicationId,
|
|
343
|
+
applicationName,
|
|
344
|
+
iccConfig: {
|
|
345
|
+
scaler: {
|
|
346
|
+
alertRetentionWindow: 500
|
|
347
|
+
}
|
|
348
|
+
},
|
|
349
|
+
processAlerts: (req) => {
|
|
350
|
+
const alert = req.body
|
|
351
|
+
assert.equal(req.headers.authorization, 'Bearer test-token')
|
|
352
|
+
alertsReceived.push(alert)
|
|
353
|
+
return { id: 'test-alert-id', ...alert }
|
|
354
|
+
}
|
|
355
|
+
})
|
|
356
|
+
|
|
357
|
+
setUpEnvironment({
|
|
358
|
+
PLT_APP_NAME: applicationName,
|
|
359
|
+
PLT_APP_DIR: applicationPath,
|
|
360
|
+
PLT_ICC_URL: 'http://127.0.0.1:3000'
|
|
361
|
+
})
|
|
362
|
+
|
|
363
|
+
const app = await start()
|
|
364
|
+
|
|
365
|
+
app.getAuthorizationHeader = getAuthorizationHeader
|
|
366
|
+
|
|
367
|
+
await app.setupAlerts()
|
|
368
|
+
|
|
369
|
+
t.after(async () => {
|
|
370
|
+
await app.close()
|
|
371
|
+
await icc.close()
|
|
372
|
+
})
|
|
373
|
+
|
|
374
|
+
// Create a health info template
|
|
375
|
+
const createHealthInfo = (serviceId, unhealthy = true) => ({
|
|
376
|
+
id: serviceId,
|
|
377
|
+
service: serviceId,
|
|
378
|
+
currentHealth: {
|
|
379
|
+
elu: unhealthy ? 0.95 : 0.5,
|
|
380
|
+
heapUsed: 76798040,
|
|
381
|
+
heapTotal: 99721216
|
|
382
|
+
},
|
|
383
|
+
unhealthy,
|
|
384
|
+
healthConfig: {
|
|
385
|
+
enabled: true,
|
|
386
|
+
interval: 1000,
|
|
387
|
+
gracePeriod: 1000,
|
|
388
|
+
maxUnhealthyChecks: 10,
|
|
389
|
+
maxELU: 0.99,
|
|
390
|
+
maxHeapUsed: 0.99,
|
|
391
|
+
maxHeapTotal: 4294967296
|
|
392
|
+
}
|
|
393
|
+
})
|
|
394
|
+
|
|
395
|
+
// Send first unhealthy event - should trigger alert
|
|
396
|
+
app.watt.runtime.emit('application:worker:health', createHealthInfo('service-1', true))
|
|
397
|
+
await sleep(50)
|
|
398
|
+
|
|
399
|
+
// Send second unhealthy event immediately - should trigger alert
|
|
400
|
+
app.watt.runtime.emit('application:worker:health', createHealthInfo('service-2', true))
|
|
401
|
+
await sleep(50)
|
|
402
|
+
|
|
403
|
+
// Send second unhealthy event immediately - should be ignored due to retention window
|
|
404
|
+
app.watt.runtime.emit('application:worker:health', createHealthInfo('service-1', true))
|
|
405
|
+
await sleep(100)
|
|
406
|
+
|
|
407
|
+
assert.strictEqual(alertsReceived.length, 2, 'Only one alert should be sent within retention window')
|
|
408
|
+
|
|
409
|
+
await sleep(500)
|
|
410
|
+
|
|
411
|
+
// Send third unhealthy event - should trigger second alert
|
|
412
|
+
app.watt.runtime.emit('application:worker:health', createHealthInfo('service-1', true))
|
|
413
|
+
await sleep(100)
|
|
414
|
+
|
|
415
|
+
assert.strictEqual(alertsReceived.length, 3, 'Second alert should be sent after retention window expires')
|
|
416
|
+
})
|
|
417
|
+
|
|
330
418
|
test('should not set up alerts when scaler URL is missing', async (t) => {
|
|
331
419
|
const applicationName = 'test-app'
|
|
332
420
|
const applicationId = randomUUID()
|
package/pnpm-workspace.yaml
DELETED