@platformatic/watt-extra 1.5.3 → 1.6.0-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +5 -1
- package/lib/watt.js +23 -5
- package/package.json +9 -9
- package/plugins/alerts.js +40 -2
- package/plugins/flamegraphs.js +8 -2
- package/test/alerts.test.js +151 -19
- package/test/fixtures/runtime-health-custom/package.json +20 -0
- package/test/fixtures/runtime-health-custom/platformatic.json +21 -0
- package/test/fixtures/runtime-health-custom/services/service-1/package.json +17 -0
- package/test/fixtures/runtime-health-custom/services/service-1/platformatic.json +16 -0
- package/test/fixtures/runtime-health-custom/services/service-1/plugins/example.js +6 -0
- package/test/fixtures/runtime-health-custom/services/service-1/routes/root.cjs +8 -0
- package/test/fixtures/runtime-health-custom/services/service-2/package.json +17 -0
- package/test/fixtures/runtime-health-custom/services/service-2/platformatic.json +16 -0
- package/test/fixtures/runtime-health-custom/services/service-2/plugins/example.js +6 -0
- package/test/fixtures/runtime-health-custom/services/service-2/routes/root.cjs +8 -0
- package/test/fixtures/runtime-health-disabled/package.json +20 -0
- package/test/fixtures/runtime-health-disabled/platformatic.json +20 -0
- package/test/fixtures/runtime-health-disabled/services/service-1/package.json +17 -0
- package/test/fixtures/runtime-health-disabled/services/service-1/platformatic.json +16 -0
- package/test/fixtures/runtime-health-disabled/services/service-1/plugins/example.js +6 -0
- package/test/fixtures/runtime-health-disabled/services/service-1/routes/root.cjs +8 -0
- package/test/fixtures/runtime-health-disabled/services/service-2/package.json +17 -0
- package/test/fixtures/runtime-health-disabled/services/service-2/platformatic.json +16 -0
- package/test/fixtures/runtime-health-disabled/services/service-2/plugins/example.js +6 -0
- package/test/fixtures/runtime-health-disabled/services/service-2/routes/root.cjs +8 -0
- package/test/health.test.js +85 -2
- package/test/patch-config.test.js +117 -2
- package/test/trigger-flamegraphs.test.js +431 -9
|
@@ -3,7 +3,11 @@
|
|
|
3
3
|
"allow": [
|
|
4
4
|
"Read(//work/workspaces/workspace-platformatic/platformatic/**)",
|
|
5
5
|
"Bash(npx borp:*)",
|
|
6
|
-
"Bash(timeout 30 npx borp -c 1 --timeout=20000 ./test/trigger-flamegraphs.test.js)"
|
|
6
|
+
"Bash(timeout 30 npx borp -c 1 --timeout=20000 ./test/trigger-flamegraphs.test.js)",
|
|
7
|
+
"Bash(xargs cat:*)",
|
|
8
|
+
"Bash(pnpm install)",
|
|
9
|
+
"Bash(find:*)",
|
|
10
|
+
"Bash(cat:*)"
|
|
7
11
|
],
|
|
8
12
|
"deny": [],
|
|
9
13
|
"ask": []
|
package/lib/watt.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFile } from 'node:fs/promises'
|
|
2
2
|
import { join, resolve } from 'node:path'
|
|
3
3
|
import { createRequire } from 'node:module'
|
|
4
|
+
import semver from 'semver'
|
|
4
5
|
|
|
5
6
|
const require = createRequire(import.meta.url)
|
|
6
7
|
|
|
@@ -103,6 +104,11 @@ class Watt {
|
|
|
103
104
|
return version
|
|
104
105
|
}
|
|
105
106
|
|
|
107
|
+
runtimeSupportsNewHealthMetrics () {
|
|
108
|
+
const runtimeVersion = this.getRuntimeVersion()
|
|
109
|
+
return semver.gte(runtimeVersion, '3.18.0')
|
|
110
|
+
}
|
|
111
|
+
|
|
106
112
|
async #createRuntime () {
|
|
107
113
|
this.#logger.info('Creating runtime')
|
|
108
114
|
const { create, transform } = this.#require('@platformatic/runtime')
|
|
@@ -391,14 +397,26 @@ class Watt {
|
|
|
391
397
|
}
|
|
392
398
|
|
|
393
399
|
#configureHealth (config) {
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
400
|
+
if (this.runtimeSupportsNewHealthMetrics()) {
|
|
401
|
+
// New behavior: just force enabled to true, inherit everything else from app config
|
|
402
|
+
config.health = {
|
|
403
|
+
...config.health,
|
|
404
|
+
enabled: true
|
|
405
|
+
}
|
|
406
|
+
} else {
|
|
407
|
+
config.health = {
|
|
408
|
+
...config.health,
|
|
409
|
+
enabled: true,
|
|
410
|
+
interval: 1000,
|
|
411
|
+
maxUnhealthyChecks: 30
|
|
412
|
+
}
|
|
399
413
|
}
|
|
400
414
|
}
|
|
401
415
|
|
|
416
|
+
getHealthConfig () {
|
|
417
|
+
return this.#config?.health
|
|
418
|
+
}
|
|
419
|
+
|
|
402
420
|
#configureScheduler (config) {
|
|
403
421
|
// Disable all watt schedules. We do that because
|
|
404
422
|
// we will create/update them in ICC, not on watt in memory
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@platformatic/watt-extra",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.6.0-alpha.0",
|
|
4
4
|
"description": "The Platformatic runtime manager",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"scripts": {
|
|
@@ -19,10 +19,10 @@
|
|
|
19
19
|
},
|
|
20
20
|
"devDependencies": {
|
|
21
21
|
"@fastify/websocket": "^11.1.0",
|
|
22
|
-
"@platformatic/composer": "^3.
|
|
23
|
-
"@platformatic/next": "^3.
|
|
24
|
-
"@platformatic/node": "^3.
|
|
25
|
-
"@platformatic/service": "^3.
|
|
22
|
+
"@platformatic/composer": "^3.18.0",
|
|
23
|
+
"@platformatic/next": "^3.18.0",
|
|
24
|
+
"@platformatic/node": "^3.18.0",
|
|
25
|
+
"@platformatic/service": "^3.18.0",
|
|
26
26
|
"atomic-sleep": "^1.0.0",
|
|
27
27
|
"borp": "^0.21.0",
|
|
28
28
|
"eslint": "9",
|
|
@@ -30,16 +30,16 @@
|
|
|
30
30
|
"fastify-plugin": "^5.0.1",
|
|
31
31
|
"neostandard": "^0.12.0",
|
|
32
32
|
"next": "^16.0.0",
|
|
33
|
-
"platformatic": "^3.
|
|
33
|
+
"platformatic": "^3.18.0",
|
|
34
34
|
"pprof-format": "^2.1.0",
|
|
35
35
|
"why-is-node-running": "^2.3.0"
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
38
|
"@datadog/pprof": "^5.9.0",
|
|
39
39
|
"@fastify/error": "^4.2.0",
|
|
40
|
-
"@platformatic/foundation": "^3.
|
|
41
|
-
"@platformatic/runtime": "^3.
|
|
42
|
-
"@platformatic/wattpm-pprof-capture": "^3.
|
|
40
|
+
"@platformatic/foundation": "^3.18.0",
|
|
41
|
+
"@platformatic/runtime": "^3.18.0",
|
|
42
|
+
"@platformatic/wattpm-pprof-capture": "^3.18.0",
|
|
43
43
|
"avvio": "^9.1.0",
|
|
44
44
|
"chalk": "^4.1.2",
|
|
45
45
|
"commist": "^3.2.0",
|
package/plugins/alerts.js
CHANGED
|
@@ -33,7 +33,7 @@ async function alerts (app, _opts) {
|
|
|
33
33
|
return
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
const processHealthInfo = async (healthInfo) => {
|
|
37
37
|
if (!healthInfo) {
|
|
38
38
|
app.log.error('No health info received')
|
|
39
39
|
return
|
|
@@ -120,7 +120,45 @@ async function alerts (app, _opts) {
|
|
|
120
120
|
app.log.error({ err }, 'Failed to send a flamegraph')
|
|
121
121
|
}
|
|
122
122
|
}
|
|
123
|
-
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if (app.watt.runtimeSupportsNewHealthMetrics()) {
|
|
126
|
+
// Runtime >= 3.18.0: Listen to health:metrics
|
|
127
|
+
runtime.on('application:worker:health:metrics', async (health) => {
|
|
128
|
+
if (!health) {
|
|
129
|
+
app.log.error('No health info received')
|
|
130
|
+
return
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const {
|
|
134
|
+
id,
|
|
135
|
+
application: serviceId,
|
|
136
|
+
currentHealth
|
|
137
|
+
} = health
|
|
138
|
+
|
|
139
|
+
const { elu, heapUsed, heapTotal } = currentHealth
|
|
140
|
+
const healthConfig = app.watt.getHealthConfig()
|
|
141
|
+
|
|
142
|
+
const maxELU = healthConfig?.maxELU ?? 0.99
|
|
143
|
+
const maxHeapUsed = healthConfig?.maxHeapUsed ?? 0.99
|
|
144
|
+
|
|
145
|
+
const memoryUsage = heapUsed / heapTotal
|
|
146
|
+
const unhealthy = elu > maxELU || memoryUsage > maxHeapUsed
|
|
147
|
+
|
|
148
|
+
const healthInfo = {
|
|
149
|
+
id,
|
|
150
|
+
application: serviceId,
|
|
151
|
+
currentHealth,
|
|
152
|
+
unhealthy,
|
|
153
|
+
healthConfig: healthConfig || {}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
await processHealthInfo(healthInfo)
|
|
157
|
+
})
|
|
158
|
+
} else {
|
|
159
|
+
// Runtime < 3.18.0:
|
|
160
|
+
runtime.on('application:worker:health', processHealthInfo)
|
|
161
|
+
}
|
|
124
162
|
}
|
|
125
163
|
app.setupAlerts = setupAlerts
|
|
126
164
|
}
|
package/plugins/flamegraphs.js
CHANGED
|
@@ -18,19 +18,25 @@ async function flamegraphs (app, _opts) {
|
|
|
18
18
|
const startProfilingOnWorker = async (runtime, workerFullId, logContext = {}) => {
|
|
19
19
|
await sleep(gracePeriod)
|
|
20
20
|
|
|
21
|
+
const runtimeConfig = await runtime.getRuntimeConfig()
|
|
22
|
+
// Get application details to read service-level sourceMaps setting
|
|
23
|
+
const appDetails = await runtime.getApplicationDetails(workerFullId)
|
|
24
|
+
// Resolve sourceMaps: service-level overrides runtime-level (same logic as runtime.js:1440)
|
|
25
|
+
const sourceMaps = appDetails.config?.sourceMaps ?? runtimeConfig.sourceMaps
|
|
26
|
+
|
|
21
27
|
try {
|
|
22
28
|
// Start CPU profiling
|
|
23
29
|
await runtime.sendCommandToApplication(
|
|
24
30
|
workerFullId,
|
|
25
31
|
'startProfiling',
|
|
26
|
-
{ durationMillis, eluThreshold, type: 'cpu' }
|
|
32
|
+
{ durationMillis, eluThreshold, type: 'cpu', sourceMaps }
|
|
27
33
|
)
|
|
28
34
|
|
|
29
35
|
// Start HEAP profiling
|
|
30
36
|
await runtime.sendCommandToApplication(
|
|
31
37
|
workerFullId,
|
|
32
38
|
'startProfiling',
|
|
33
|
-
{ durationMillis, eluThreshold, type: 'heap' }
|
|
39
|
+
{ durationMillis, eluThreshold, type: 'heap', sourceMaps }
|
|
34
40
|
)
|
|
35
41
|
} catch (err) {
|
|
36
42
|
app.log.error({ err, ...logContext }, 'Failed to start profiling')
|
package/test/alerts.test.js
CHANGED
|
@@ -11,6 +11,39 @@ import { start } from '../index.js'
|
|
|
11
11
|
const __filename = fileURLToPath(import.meta.url)
|
|
12
12
|
const __dirname = dirname(__filename)
|
|
13
13
|
|
|
14
|
+
function emitHealthEvent (app, healthInfo) {
|
|
15
|
+
if (app.watt.runtimeSupportsNewHealthMetrics()) {
|
|
16
|
+
if (!healthInfo) {
|
|
17
|
+
// Emit null for testing error handling
|
|
18
|
+
app.watt.runtime.emit('application:worker:health:metrics', null)
|
|
19
|
+
return
|
|
20
|
+
}
|
|
21
|
+
// Runtime >= 3.18.0: emit health:metrics event with real event shape
|
|
22
|
+
const { id, application, currentHealth } = healthInfo
|
|
23
|
+
|
|
24
|
+
// Add currentELU to match real event shape
|
|
25
|
+
const enrichedCurrentHealth = {
|
|
26
|
+
...currentHealth,
|
|
27
|
+
currentELU: {
|
|
28
|
+
idle: 1000,
|
|
29
|
+
active: currentHealth.elu * 1000,
|
|
30
|
+
utilization: currentHealth.elu
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
app.watt.runtime.emit('application:worker:health:metrics', {
|
|
35
|
+
id,
|
|
36
|
+
application,
|
|
37
|
+
worker: 0,
|
|
38
|
+
currentHealth: enrichedCurrentHealth,
|
|
39
|
+
healthSignals: []
|
|
40
|
+
})
|
|
41
|
+
} else {
|
|
42
|
+
// Runtime < 3.18.0: emit health event with full healthInfo
|
|
43
|
+
app.watt.runtime.emit('application:worker:health', healthInfo)
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
14
47
|
test('should send alert when service becomes unhealthy', async (t) => {
|
|
15
48
|
const applicationName = 'test-app'
|
|
16
49
|
const applicationId = randomUUID()
|
|
@@ -65,7 +98,7 @@ test('should send alert when service becomes unhealthy', async (t) => {
|
|
|
65
98
|
id: 'main:0',
|
|
66
99
|
application: 'main',
|
|
67
100
|
currentHealth: {
|
|
68
|
-
elu: 0.
|
|
101
|
+
elu: 0.995,
|
|
69
102
|
heapUsed: 76798040,
|
|
70
103
|
heapTotal: 99721216
|
|
71
104
|
},
|
|
@@ -81,15 +114,20 @@ test('should send alert when service becomes unhealthy', async (t) => {
|
|
|
81
114
|
}
|
|
82
115
|
}
|
|
83
116
|
|
|
84
|
-
app
|
|
117
|
+
emitHealthEvent(app, healthInfo)
|
|
85
118
|
|
|
86
119
|
await sleep(200)
|
|
87
120
|
|
|
88
121
|
assert.ok(alertReceived, 'Alert should have been received')
|
|
89
122
|
assert.strictEqual(alertReceived.applicationId, applicationId)
|
|
90
|
-
assert.
|
|
123
|
+
assert.strictEqual(alertReceived.alert.id, healthInfo.id)
|
|
91
124
|
assert.strictEqual(alertReceived.alert.application, 'main')
|
|
92
125
|
assert.strictEqual(alertReceived.alert.service, 'main')
|
|
126
|
+
assert.strictEqual(alertReceived.alert.unhealthy, true)
|
|
127
|
+
assert.strictEqual(alertReceived.alert.currentHealth.elu, healthInfo.currentHealth.elu)
|
|
128
|
+
assert.strictEqual(alertReceived.alert.currentHealth.heapUsed, healthInfo.currentHealth.heapUsed)
|
|
129
|
+
assert.strictEqual(alertReceived.alert.currentHealth.heapTotal, healthInfo.currentHealth.heapTotal)
|
|
130
|
+
assert.strictEqual(alertReceived.alert.healthConfig, undefined, 'healthConfig should be deleted from alert')
|
|
93
131
|
assert.ok(Array.isArray(alertReceived.healthHistory), 'Health history should be an array')
|
|
94
132
|
assert.ok(alertReceived.healthHistory.length > 0, 'Health history should not be empty')
|
|
95
133
|
assert.strictEqual(alertReceived.healthHistory[0].application, 'main')
|
|
@@ -158,7 +196,7 @@ test('should not send alert when application is healthy', async (t) => {
|
|
|
158
196
|
}
|
|
159
197
|
}
|
|
160
198
|
|
|
161
|
-
app
|
|
199
|
+
emitHealthEvent(app, healthInfo)
|
|
162
200
|
|
|
163
201
|
await sleep(200)
|
|
164
202
|
|
|
@@ -225,7 +263,7 @@ test('should cache health data and include it in alerts', async (t) => {
|
|
|
225
263
|
}
|
|
226
264
|
}
|
|
227
265
|
|
|
228
|
-
app
|
|
266
|
+
emitHealthEvent(app, healthyInfo)
|
|
229
267
|
await sleep(100) // Small delay between events
|
|
230
268
|
}
|
|
231
269
|
|
|
@@ -234,7 +272,7 @@ test('should cache health data and include it in alerts', async (t) => {
|
|
|
234
272
|
id: 'service-1',
|
|
235
273
|
application: 'service-1',
|
|
236
274
|
currentHealth: {
|
|
237
|
-
elu: 0.
|
|
275
|
+
elu: 0.995,
|
|
238
276
|
heapUsed: 76798040,
|
|
239
277
|
heapTotal: 99721216
|
|
240
278
|
},
|
|
@@ -250,7 +288,7 @@ test('should cache health data and include it in alerts', async (t) => {
|
|
|
250
288
|
}
|
|
251
289
|
}
|
|
252
290
|
|
|
253
|
-
app
|
|
291
|
+
emitHealthEvent(app, unhealthyInfo)
|
|
254
292
|
await sleep(200)
|
|
255
293
|
|
|
256
294
|
assert.ok(alertReceived, 'Alert should have been received')
|
|
@@ -319,7 +357,7 @@ test('should not fail when health info is missing', async (t) => {
|
|
|
319
357
|
await icc.close()
|
|
320
358
|
})
|
|
321
359
|
|
|
322
|
-
app
|
|
360
|
+
emitHealthEvent(app, null)
|
|
323
361
|
|
|
324
362
|
await sleep(200)
|
|
325
363
|
|
|
@@ -373,7 +411,7 @@ test('should respect alert retention window', async (t) => {
|
|
|
373
411
|
id: applicationId,
|
|
374
412
|
application: applicationId,
|
|
375
413
|
currentHealth: {
|
|
376
|
-
elu: unhealthy ? 0.
|
|
414
|
+
elu: unhealthy ? 0.995 : 0.5,
|
|
377
415
|
heapUsed: 76798040,
|
|
378
416
|
heapTotal: 99721216
|
|
379
417
|
},
|
|
@@ -390,15 +428,15 @@ test('should respect alert retention window', async (t) => {
|
|
|
390
428
|
})
|
|
391
429
|
|
|
392
430
|
// Send first unhealthy event - should trigger alert
|
|
393
|
-
app
|
|
431
|
+
emitHealthEvent(app, createHealthInfo('service-1', true))
|
|
394
432
|
await sleep(50)
|
|
395
433
|
|
|
396
434
|
// Send second unhealthy event immediately - should trigger alert
|
|
397
|
-
app
|
|
435
|
+
emitHealthEvent(app, createHealthInfo('service-2', true))
|
|
398
436
|
await sleep(50)
|
|
399
437
|
|
|
400
438
|
// Send second unhealthy event immediately - should be ignored due to retention window
|
|
401
|
-
app
|
|
439
|
+
emitHealthEvent(app, createHealthInfo('service-1', true))
|
|
402
440
|
await sleep(100)
|
|
403
441
|
|
|
404
442
|
assert.strictEqual(alertsReceived.length, 2, 'Only one alert should be sent within retention window')
|
|
@@ -406,7 +444,7 @@ test('should respect alert retention window', async (t) => {
|
|
|
406
444
|
await sleep(500)
|
|
407
445
|
|
|
408
446
|
// Send third unhealthy event - should trigger second alert
|
|
409
|
-
app
|
|
447
|
+
emitHealthEvent(app, createHealthInfo('service-1', true))
|
|
410
448
|
await sleep(100)
|
|
411
449
|
|
|
412
450
|
assert.strictEqual(alertsReceived.length, 3, 'Second alert should be sent after retention window expires')
|
|
@@ -497,7 +535,7 @@ test('should send alert when flamegraphs are disabled', async (t) => {
|
|
|
497
535
|
id: 'main:0',
|
|
498
536
|
application: 'main',
|
|
499
537
|
currentHealth: {
|
|
500
|
-
elu: 0.
|
|
538
|
+
elu: 0.995,
|
|
501
539
|
heapUsed: 76798040,
|
|
502
540
|
heapTotal: 99721216
|
|
503
541
|
},
|
|
@@ -513,15 +551,20 @@ test('should send alert when flamegraphs are disabled', async (t) => {
|
|
|
513
551
|
}
|
|
514
552
|
}
|
|
515
553
|
|
|
516
|
-
app
|
|
554
|
+
emitHealthEvent(app, healthInfo)
|
|
517
555
|
|
|
518
556
|
await sleep(200)
|
|
519
557
|
|
|
520
558
|
assert.ok(alertReceived, 'Alert should have been received')
|
|
521
559
|
assert.strictEqual(alertReceived.applicationId, applicationId)
|
|
522
|
-
assert.
|
|
560
|
+
assert.strictEqual(alertReceived.alert.id, healthInfo.id)
|
|
523
561
|
assert.strictEqual(alertReceived.alert.application, 'main')
|
|
524
562
|
assert.strictEqual(alertReceived.alert.service, 'main')
|
|
563
|
+
assert.strictEqual(alertReceived.alert.unhealthy, true)
|
|
564
|
+
assert.strictEqual(alertReceived.alert.currentHealth.elu, healthInfo.currentHealth.elu)
|
|
565
|
+
assert.strictEqual(alertReceived.alert.currentHealth.heapUsed, healthInfo.currentHealth.heapUsed)
|
|
566
|
+
assert.strictEqual(alertReceived.alert.currentHealth.heapTotal, healthInfo.currentHealth.heapTotal)
|
|
567
|
+
assert.strictEqual(alertReceived.alert.healthConfig, undefined, 'healthConfig should be deleted from alert')
|
|
525
568
|
assert.ok(Array.isArray(alertReceived.healthHistory), 'Health history should be an array')
|
|
526
569
|
assert.ok(alertReceived.healthHistory.length > 0, 'Health history should not be empty')
|
|
527
570
|
assert.strictEqual(alertReceived.healthHistory[0].application, 'main')
|
|
@@ -577,7 +620,7 @@ test('should send alert when failed to send a flamegraph', async (t) => {
|
|
|
577
620
|
id: 'main:0',
|
|
578
621
|
application: 'main',
|
|
579
622
|
currentHealth: {
|
|
580
|
-
elu: 0.
|
|
623
|
+
elu: 0.995,
|
|
581
624
|
heapUsed: 76798040,
|
|
582
625
|
heapTotal: 99721216
|
|
583
626
|
},
|
|
@@ -593,18 +636,107 @@ test('should send alert when failed to send a flamegraph', async (t) => {
|
|
|
593
636
|
}
|
|
594
637
|
}
|
|
595
638
|
|
|
596
|
-
app
|
|
639
|
+
emitHealthEvent(app, healthInfo)
|
|
597
640
|
|
|
598
641
|
await sleep(200)
|
|
599
642
|
|
|
600
643
|
assert.ok(alertReceived, 'Alert should have been received')
|
|
601
644
|
assert.strictEqual(alertReceived.applicationId, applicationId)
|
|
602
|
-
assert.
|
|
645
|
+
assert.strictEqual(alertReceived.alert.id, healthInfo.id)
|
|
603
646
|
assert.strictEqual(alertReceived.alert.application, 'main')
|
|
604
647
|
assert.strictEqual(alertReceived.alert.service, 'main')
|
|
648
|
+
assert.strictEqual(alertReceived.alert.unhealthy, true)
|
|
649
|
+
assert.strictEqual(alertReceived.alert.currentHealth.elu, healthInfo.currentHealth.elu)
|
|
650
|
+
assert.strictEqual(alertReceived.alert.currentHealth.heapUsed, healthInfo.currentHealth.heapUsed)
|
|
651
|
+
assert.strictEqual(alertReceived.alert.currentHealth.heapTotal, healthInfo.currentHealth.heapTotal)
|
|
652
|
+
assert.strictEqual(alertReceived.alert.healthConfig, undefined, 'healthConfig should be deleted from alert')
|
|
605
653
|
assert.ok(Array.isArray(alertReceived.healthHistory), 'Health history should be an array')
|
|
606
654
|
assert.ok(alertReceived.healthHistory.length > 0, 'Health history should not be empty')
|
|
607
655
|
assert.strictEqual(alertReceived.healthHistory[0].application, 'main')
|
|
608
656
|
assert.strictEqual(alertReceived.healthHistory[0].service, 'main')
|
|
609
657
|
assert.equal(alertReceived.flamegraph, null, 'Flamegraph should be null')
|
|
610
658
|
})
|
|
659
|
+
|
|
660
|
+
test('should handle old runtime (< 3.18.0) health events', async (t) => {
|
|
661
|
+
const applicationName = 'test-app'
|
|
662
|
+
const applicationId = randomUUID()
|
|
663
|
+
const applicationPath = join(__dirname, 'fixtures', 'service-1')
|
|
664
|
+
|
|
665
|
+
let alertReceived = null
|
|
666
|
+
|
|
667
|
+
const getAuthorizationHeader = async (headers) => {
|
|
668
|
+
return { ...headers, authorization: 'Bearer test-token' }
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
const icc = await startICC(t, {
|
|
672
|
+
applicationId,
|
|
673
|
+
applicationName,
|
|
674
|
+
processAlerts: (req) => {
|
|
675
|
+
const alert = req.body
|
|
676
|
+
assert.equal(req.headers.authorization, 'Bearer test-token')
|
|
677
|
+
alertReceived = alert
|
|
678
|
+
return { id: 'test-alert-id', ...alert }
|
|
679
|
+
}
|
|
680
|
+
})
|
|
681
|
+
|
|
682
|
+
setUpEnvironment({
|
|
683
|
+
PLT_APP_NAME: applicationName,
|
|
684
|
+
PLT_APP_DIR: applicationPath,
|
|
685
|
+
PLT_ICC_URL: 'http://127.0.0.1:3000'
|
|
686
|
+
})
|
|
687
|
+
|
|
688
|
+
const app = await start()
|
|
689
|
+
app.getAuthorizationHeader = getAuthorizationHeader
|
|
690
|
+
|
|
691
|
+
// Mock the runtime version check to simulate old runtime
|
|
692
|
+
const originalFn = app.watt.runtimeSupportsNewHealthMetrics
|
|
693
|
+
app.watt.runtimeSupportsNewHealthMetrics = () => false
|
|
694
|
+
|
|
695
|
+
// Remove all existing event listeners
|
|
696
|
+
app.watt.runtime.removeAllListeners('application:worker:health:metrics')
|
|
697
|
+
app.watt.runtime.removeAllListeners('application:worker:health')
|
|
698
|
+
|
|
699
|
+
// Re-setup alerts with the mocked function (will use old path)
|
|
700
|
+
await app.setupAlerts()
|
|
701
|
+
|
|
702
|
+
t.after(async () => {
|
|
703
|
+
app.watt.runtimeSupportsNewHealthMetrics = originalFn
|
|
704
|
+
await app.close()
|
|
705
|
+
await icc.close()
|
|
706
|
+
})
|
|
707
|
+
|
|
708
|
+
// Manually trigger health event with unhealthy state using old event format
|
|
709
|
+
const healthInfo = {
|
|
710
|
+
id: 'main:0',
|
|
711
|
+
application: 'main',
|
|
712
|
+
currentHealth: {
|
|
713
|
+
elu: 0.995,
|
|
714
|
+
heapUsed: 76798040,
|
|
715
|
+
heapTotal: 99721216
|
|
716
|
+
},
|
|
717
|
+
unhealthy: true,
|
|
718
|
+
healthConfig: {
|
|
719
|
+
enabled: true,
|
|
720
|
+
interval: 1000,
|
|
721
|
+
gracePeriod: 1000,
|
|
722
|
+
maxUnhealthyChecks: 10,
|
|
723
|
+
maxELU: 0.99,
|
|
724
|
+
maxHeapUsed: 0.99,
|
|
725
|
+
maxHeapTotal: 4294967296
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
// Emit using old event format (application:worker:health)
|
|
730
|
+
app.watt.runtime.emit('application:worker:health', healthInfo)
|
|
731
|
+
|
|
732
|
+
await sleep(200)
|
|
733
|
+
|
|
734
|
+
assert.ok(alertReceived, 'Alert should have been received')
|
|
735
|
+
assert.strictEqual(alertReceived.applicationId, applicationId)
|
|
736
|
+
assert.strictEqual(alertReceived.alert.id, healthInfo.id)
|
|
737
|
+
assert.strictEqual(alertReceived.alert.application, 'main')
|
|
738
|
+
assert.strictEqual(alertReceived.alert.service, 'main')
|
|
739
|
+
assert.strictEqual(alertReceived.alert.unhealthy, true)
|
|
740
|
+
assert.deepStrictEqual(alertReceived.alert.currentHealth, healthInfo.currentHealth)
|
|
741
|
+
assert.strictEqual(alertReceived.alert.healthConfig, undefined, 'healthConfig should be deleted from alert')
|
|
742
|
+
})
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "runtime-health",
|
|
3
|
+
"workspaces": [
|
|
4
|
+
"services/*"
|
|
5
|
+
],
|
|
6
|
+
"scripts": {
|
|
7
|
+
"start": "platformatic start"
|
|
8
|
+
},
|
|
9
|
+
"devDependencies": {
|
|
10
|
+
"fastify": "^5.0.0",
|
|
11
|
+
"borp": "^0.19.0"
|
|
12
|
+
},
|
|
13
|
+
"dependencies": {
|
|
14
|
+
"@platformatic/runtime": "workspace:*",
|
|
15
|
+
"platformatic": "workspace:*"
|
|
16
|
+
},
|
|
17
|
+
"engines": {
|
|
18
|
+
"node": "^18.8.0 || >=20.6.0"
|
|
19
|
+
}
|
|
20
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://schemas.platformatic.dev/@platformatic/runtime/2.62.1.json",
|
|
3
|
+
"entrypoint": "service-1",
|
|
4
|
+
"watch": true,
|
|
5
|
+
"autoload": {
|
|
6
|
+
"path": "services",
|
|
7
|
+
"exclude": ["docs"]
|
|
8
|
+
},
|
|
9
|
+
"logger": {
|
|
10
|
+
"level": "info"
|
|
11
|
+
},
|
|
12
|
+
"server": {
|
|
13
|
+
"hostname": "127.0.0.1",
|
|
14
|
+
"port": "3042"
|
|
15
|
+
},
|
|
16
|
+
"health": {
|
|
17
|
+
"enabled": true,
|
|
18
|
+
"interval": 2500,
|
|
19
|
+
"maxUnhealthyChecks": 50
|
|
20
|
+
}
|
|
21
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "service-1",
|
|
3
|
+
"scripts": {
|
|
4
|
+
"start": "platformatic start",
|
|
5
|
+
"test": "borp"
|
|
6
|
+
},
|
|
7
|
+
"devDependencies": {
|
|
8
|
+
"fastify": "^5.0.0",
|
|
9
|
+
"borp": "^0.19.0"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@platformatic/service": "^2.70.0"
|
|
13
|
+
},
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": "^18.8.0 || >=20.6.0"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://schemas.platformatic.dev/@platformatic/service/2.62.1.json",
|
|
3
|
+
"service": {
|
|
4
|
+
"openapi": true
|
|
5
|
+
},
|
|
6
|
+
"watch": true,
|
|
7
|
+
"plugins": {
|
|
8
|
+
"paths": [
|
|
9
|
+
{
|
|
10
|
+
"path": "./plugins",
|
|
11
|
+
"encapsulate": false
|
|
12
|
+
},
|
|
13
|
+
"./routes"
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "service-2",
|
|
3
|
+
"scripts": {
|
|
4
|
+
"start": "platformatic start",
|
|
5
|
+
"test": "borp"
|
|
6
|
+
},
|
|
7
|
+
"devDependencies": {
|
|
8
|
+
"fastify": "^5.0.0",
|
|
9
|
+
"borp": "^0.19.0"
|
|
10
|
+
},
|
|
11
|
+
"dependencies": {
|
|
12
|
+
"@platformatic/service": "^2.70.0"
|
|
13
|
+
},
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": "^18.8.0 || >=20.6.0"
|
|
16
|
+
}
|
|
17
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://schemas.platformatic.dev/@platformatic/service/2.62.1.json",
|
|
3
|
+
"service": {
|
|
4
|
+
"openapi": true
|
|
5
|
+
},
|
|
6
|
+
"watch": true,
|
|
7
|
+
"plugins": {
|
|
8
|
+
"paths": [
|
|
9
|
+
{
|
|
10
|
+
"path": "./plugins",
|
|
11
|
+
"encapsulate": false
|
|
12
|
+
},
|
|
13
|
+
"./routes"
|
|
14
|
+
]
|
|
15
|
+
}
|
|
16
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "runtime-health",
|
|
3
|
+
"workspaces": [
|
|
4
|
+
"services/*"
|
|
5
|
+
],
|
|
6
|
+
"scripts": {
|
|
7
|
+
"start": "platformatic start"
|
|
8
|
+
},
|
|
9
|
+
"devDependencies": {
|
|
10
|
+
"fastify": "^5.0.0",
|
|
11
|
+
"borp": "^0.19.0"
|
|
12
|
+
},
|
|
13
|
+
"dependencies": {
|
|
14
|
+
"@platformatic/runtime": "workspace:*",
|
|
15
|
+
"platformatic": "workspace:*"
|
|
16
|
+
},
|
|
17
|
+
"engines": {
|
|
18
|
+
"node": "^18.8.0 || >=20.6.0"
|
|
19
|
+
}
|
|
20
|
+
}
|