@platformatic/watt-extra 1.6.3-alpha.5 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +8 -5
- package/package.json +1 -1
- package/plugins/alerts.js +25 -1
- package/plugins/env.js +2 -1
- package/plugins/flamegraphs.js +210 -244
- package/plugins/health-signals.js +3 -5
- package/plugins/update.js +2 -2
- package/test/alerts.test.js +179 -7
- package/test/health-signals.test.js +5 -2
- package/test/helper.js +1 -0
- package/test/trigger-flamegraphs.test.js +439 -187
- package/test/profiler.test.js +0 -443
package/plugins/update.js
CHANGED
|
@@ -23,14 +23,14 @@ async function updatePlugin (app) {
|
|
|
23
23
|
// Handle trigger-flamegraph command from ICC
|
|
24
24
|
if (command === 'trigger-flamegraph') {
|
|
25
25
|
app.log.info({ command }, 'Received trigger-flamegraph command from ICC')
|
|
26
|
-
app.
|
|
26
|
+
app.sendFlamegraphs({ profileType: 'cpu' })
|
|
27
27
|
return
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
// Handle trigger-heapprofile command from ICC
|
|
31
31
|
if (command === 'trigger-heapprofile') {
|
|
32
32
|
app.log.info({ command }, 'Received trigger-heapprofile command from ICC')
|
|
33
|
-
app.
|
|
33
|
+
app.sendFlamegraphs({ profileType: 'heap' })
|
|
34
34
|
return
|
|
35
35
|
}
|
|
36
36
|
|
package/test/alerts.test.js
CHANGED
|
@@ -90,6 +90,9 @@ test('should send alert when service becomes unhealthy', async (t) => {
|
|
|
90
90
|
await icc.close()
|
|
91
91
|
})
|
|
92
92
|
|
|
93
|
+
// Wait for the first flamegraph to be generated
|
|
94
|
+
await sleep(5000)
|
|
95
|
+
|
|
93
96
|
// Manually trigger health event with unhealthy state
|
|
94
97
|
const healthInfo = {
|
|
95
98
|
id: 'main:0',
|
|
@@ -130,9 +133,6 @@ test('should send alert when service becomes unhealthy', async (t) => {
|
|
|
130
133
|
assert.strictEqual(alertReceived.healthHistory[0].application, 'main')
|
|
131
134
|
assert.strictEqual(alertReceived.healthHistory[0].service, 'main')
|
|
132
135
|
|
|
133
|
-
// Wait for flamegraph to be generated (duration is 2 seconds)
|
|
134
|
-
await sleep(2500)
|
|
135
|
-
|
|
136
136
|
assert.ok(flamegraphReceived, 'Flamegraph should have been received')
|
|
137
137
|
|
|
138
138
|
const profile = Profile.decode(flamegraphReceived)
|
|
@@ -526,6 +526,8 @@ test('should send alert when flamegraphs are disabled', async (t) => {
|
|
|
526
526
|
await icc.close()
|
|
527
527
|
})
|
|
528
528
|
|
|
529
|
+
await sleep(5000)
|
|
530
|
+
|
|
529
531
|
// Manually trigger health event with unhealthy state
|
|
530
532
|
const healthInfo = {
|
|
531
533
|
id: 'main:0',
|
|
@@ -609,6 +611,8 @@ test('should send alert when failed to send a flamegraph', async (t) => {
|
|
|
609
611
|
await icc.close()
|
|
610
612
|
})
|
|
611
613
|
|
|
614
|
+
await sleep(5000)
|
|
615
|
+
|
|
612
616
|
// Manually trigger health event with unhealthy state
|
|
613
617
|
const healthInfo = {
|
|
614
618
|
id: 'main:0',
|
|
@@ -795,6 +799,9 @@ test('should attach one flamegraph to multiple alerts', async (t) => {
|
|
|
795
799
|
await icc.close()
|
|
796
800
|
})
|
|
797
801
|
|
|
802
|
+
// Wait for the first flamegraph to be generated
|
|
803
|
+
await sleep(5000)
|
|
804
|
+
|
|
798
805
|
// Manually trigger health event with unhealthy state
|
|
799
806
|
const healthInfo = {
|
|
800
807
|
id: 'main:0',
|
|
@@ -820,8 +827,8 @@ test('should attach one flamegraph to multiple alerts', async (t) => {
|
|
|
820
827
|
await sleep(1000)
|
|
821
828
|
emitHealthEvent(app, healthInfo)
|
|
822
829
|
|
|
823
|
-
// Wait for
|
|
824
|
-
await sleep(
|
|
830
|
+
// Wait for flamegraphs to be sent
|
|
831
|
+
await sleep(1000)
|
|
825
832
|
|
|
826
833
|
assert.strictEqual(receivedAlerts.length, 2)
|
|
827
834
|
const alert1 = receivedAlerts[0]
|
|
@@ -895,6 +902,9 @@ test('should send flamegraphs if attaching fails', async (t) => {
|
|
|
895
902
|
await icc.close()
|
|
896
903
|
})
|
|
897
904
|
|
|
905
|
+
// Wait for the first flamegraph to be generated
|
|
906
|
+
await sleep(5000)
|
|
907
|
+
|
|
898
908
|
// Manually trigger health event with unhealthy state
|
|
899
909
|
const healthInfo = {
|
|
900
910
|
id: 'main:0',
|
|
@@ -920,8 +930,8 @@ test('should send flamegraphs if attaching fails', async (t) => {
|
|
|
920
930
|
await sleep(1000)
|
|
921
931
|
emitHealthEvent(app, healthInfo)
|
|
922
932
|
|
|
923
|
-
// Wait for
|
|
924
|
-
await sleep(
|
|
933
|
+
// Wait for flamegraphs to be sent
|
|
934
|
+
await sleep(1000)
|
|
925
935
|
|
|
926
936
|
assert.strictEqual(receivedAlerts.length, 2)
|
|
927
937
|
const alert1 = receivedAlerts[0]
|
|
@@ -938,3 +948,165 @@ test('should send flamegraphs if attaching fails', async (t) => {
|
|
|
938
948
|
assert.strictEqual(flamegraph2.id, 'flamegraph-2')
|
|
939
949
|
assert.strictEqual(flamegraph2.alertId, 'alert-2')
|
|
940
950
|
})
|
|
951
|
+
|
|
952
|
+
test('should skip alerts during grace period but still cache health data', async (t) => {
|
|
953
|
+
const applicationName = 'test-app'
|
|
954
|
+
const applicationId = randomUUID()
|
|
955
|
+
const applicationPath = join(__dirname, 'fixtures', 'service-1')
|
|
956
|
+
|
|
957
|
+
let alertReceived = null
|
|
958
|
+
|
|
959
|
+
const getAuthorizationHeader = async (headers) => {
|
|
960
|
+
return { ...headers, authorization: 'Bearer test-token' }
|
|
961
|
+
}
|
|
962
|
+
|
|
963
|
+
const icc = await startICC(t, {
|
|
964
|
+
applicationId,
|
|
965
|
+
applicationName,
|
|
966
|
+
processAlerts: (req) => {
|
|
967
|
+
const alert = req.body
|
|
968
|
+
alertReceived = alert
|
|
969
|
+
return { id: 'test-alert-id', ...alert }
|
|
970
|
+
}
|
|
971
|
+
})
|
|
972
|
+
|
|
973
|
+
// Set grace period to 2 seconds for this test
|
|
974
|
+
setUpEnvironment({
|
|
975
|
+
PLT_APP_NAME: applicationName,
|
|
976
|
+
PLT_APP_DIR: applicationPath,
|
|
977
|
+
PLT_ICC_URL: 'http://127.0.0.1:3000',
|
|
978
|
+
PLT_ALERTS_GRACE_PERIOD_SEC: '2'
|
|
979
|
+
})
|
|
980
|
+
|
|
981
|
+
const app = await start()
|
|
982
|
+
app.getAuthorizationHeader = getAuthorizationHeader
|
|
983
|
+
|
|
984
|
+
t.after(async () => {
|
|
985
|
+
await app.close()
|
|
986
|
+
await icc.close()
|
|
987
|
+
})
|
|
988
|
+
|
|
989
|
+
// Manually trigger unhealthy event during grace period
|
|
990
|
+
const healthInfo = {
|
|
991
|
+
id: 'main:0',
|
|
992
|
+
application: 'main',
|
|
993
|
+
currentHealth: {
|
|
994
|
+
elu: 0.995,
|
|
995
|
+
heapUsed: 76798040,
|
|
996
|
+
heapTotal: 99721216
|
|
997
|
+
},
|
|
998
|
+
unhealthy: true,
|
|
999
|
+
healthConfig: {
|
|
1000
|
+
enabled: true,
|
|
1001
|
+
interval: 1000,
|
|
1002
|
+
gracePeriod: 1000,
|
|
1003
|
+
maxUnhealthyChecks: 10,
|
|
1004
|
+
maxELU: 0.99,
|
|
1005
|
+
maxHeapUsed: 0.99,
|
|
1006
|
+
maxHeapTotal: 4294967296
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
emitHealthEvent(app, healthInfo)
|
|
1011
|
+
await sleep(200)
|
|
1012
|
+
|
|
1013
|
+
// Alert should NOT have been received during grace period
|
|
1014
|
+
assert.strictEqual(alertReceived, null, 'No alert should be sent during grace period')
|
|
1015
|
+
|
|
1016
|
+
// Wait for grace period to expire
|
|
1017
|
+
await sleep(2500)
|
|
1018
|
+
|
|
1019
|
+
// Now trigger another unhealthy event after grace period
|
|
1020
|
+
emitHealthEvent(app, healthInfo)
|
|
1021
|
+
await sleep(200)
|
|
1022
|
+
|
|
1023
|
+
// Alert should now be received
|
|
1024
|
+
assert.ok(alertReceived, 'Alert should be sent after grace period expires')
|
|
1025
|
+
assert.strictEqual(alertReceived.applicationId, applicationId)
|
|
1026
|
+
assert.strictEqual(alertReceived.alert.application, 'main')
|
|
1027
|
+
})
|
|
1028
|
+
|
|
1029
|
+
test('should reset grace period when worker restarts', async (t) => {
|
|
1030
|
+
const applicationName = 'test-app'
|
|
1031
|
+
const applicationId = randomUUID()
|
|
1032
|
+
const applicationPath = join(__dirname, 'fixtures', 'service-1')
|
|
1033
|
+
|
|
1034
|
+
const alertsReceived = []
|
|
1035
|
+
|
|
1036
|
+
const getAuthorizationHeader = async (headers) => {
|
|
1037
|
+
return { ...headers, authorization: 'Bearer test-token' }
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
const icc = await startICC(t, {
|
|
1041
|
+
applicationId,
|
|
1042
|
+
applicationName,
|
|
1043
|
+
scaler: {
|
|
1044
|
+
alertRetentionWindow: 1 // Very short retention window
|
|
1045
|
+
},
|
|
1046
|
+
processAlerts: (req) => {
|
|
1047
|
+
const alert = req.body
|
|
1048
|
+
alertsReceived.push(alert)
|
|
1049
|
+
return { id: `alert-${alertsReceived.length}`, ...alert }
|
|
1050
|
+
}
|
|
1051
|
+
})
|
|
1052
|
+
|
|
1053
|
+
// Set grace period to 1 second for faster testing
|
|
1054
|
+
setUpEnvironment({
|
|
1055
|
+
PLT_APP_NAME: applicationName,
|
|
1056
|
+
PLT_APP_DIR: applicationPath,
|
|
1057
|
+
PLT_ICC_URL: 'http://127.0.0.1:3000',
|
|
1058
|
+
PLT_ALERTS_GRACE_PERIOD_SEC: '1'
|
|
1059
|
+
})
|
|
1060
|
+
|
|
1061
|
+
const app = await start()
|
|
1062
|
+
app.getAuthorizationHeader = getAuthorizationHeader
|
|
1063
|
+
|
|
1064
|
+
t.after(async () => {
|
|
1065
|
+
await app.close()
|
|
1066
|
+
await icc.close()
|
|
1067
|
+
})
|
|
1068
|
+
|
|
1069
|
+
const healthInfo = {
|
|
1070
|
+
id: 'main:0',
|
|
1071
|
+
application: 'main',
|
|
1072
|
+
currentHealth: {
|
|
1073
|
+
elu: 0.995,
|
|
1074
|
+
heapUsed: 76798040,
|
|
1075
|
+
heapTotal: 99721216
|
|
1076
|
+
},
|
|
1077
|
+
unhealthy: true,
|
|
1078
|
+
healthConfig: {
|
|
1079
|
+
enabled: true,
|
|
1080
|
+
interval: 1000,
|
|
1081
|
+
gracePeriod: 1000,
|
|
1082
|
+
maxUnhealthyChecks: 10,
|
|
1083
|
+
maxELU: 0.99,
|
|
1084
|
+
maxHeapUsed: 0.99,
|
|
1085
|
+
maxHeapTotal: 4294967296
|
|
1086
|
+
}
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
// Wait for initial grace period to expire (uses plugin start time as default)
|
|
1090
|
+
await sleep(1500)
|
|
1091
|
+
|
|
1092
|
+
// Emit unhealthy event - should trigger alert
|
|
1093
|
+
emitHealthEvent(app, healthInfo)
|
|
1094
|
+
await sleep(200)
|
|
1095
|
+
assert.strictEqual(alertsReceived.length, 1, 'First alert should be sent after grace period')
|
|
1096
|
+
|
|
1097
|
+
// Simulate worker restart by emitting worker started event
|
|
1098
|
+
app.watt.runtime.emit('application:worker:started', { id: 'main:0' })
|
|
1099
|
+
|
|
1100
|
+
// Emit unhealthy event immediately after restart - should be skipped (new grace period)
|
|
1101
|
+
emitHealthEvent(app, healthInfo)
|
|
1102
|
+
await sleep(200)
|
|
1103
|
+
assert.strictEqual(alertsReceived.length, 1, 'Alert should be skipped during new grace period after restart')
|
|
1104
|
+
|
|
1105
|
+
// Wait for the new grace period to expire
|
|
1106
|
+
await sleep(1500)
|
|
1107
|
+
|
|
1108
|
+
// Emit unhealthy event again - should trigger alert
|
|
1109
|
+
emitHealthEvent(app, healthInfo)
|
|
1110
|
+
await sleep(200)
|
|
1111
|
+
assert.strictEqual(alertsReceived.length, 2, 'Second alert should be sent after restart grace period expires')
|
|
1112
|
+
})
|
|
@@ -58,6 +58,9 @@ test('should send health signals when service becomes unhealthy', async (t) => {
|
|
|
58
58
|
await icc.close()
|
|
59
59
|
})
|
|
60
60
|
|
|
61
|
+
// Wait for the first flamegraph to be generated
|
|
62
|
+
await sleep(5000)
|
|
63
|
+
|
|
61
64
|
{
|
|
62
65
|
const { statusCode } = await request('http://127.0.0.1:3042/custom-health-signal', {
|
|
63
66
|
method: 'POST',
|
|
@@ -116,8 +119,8 @@ test('should send health signals when service becomes unhealthy', async (t) => {
|
|
|
116
119
|
assert.ok(receivedSignal.timestamp > 0)
|
|
117
120
|
}
|
|
118
121
|
|
|
119
|
-
// Wait for flamegraph to be generated
|
|
120
|
-
await sleep(
|
|
122
|
+
// Wait for the second flamegraph to be generated
|
|
123
|
+
await sleep(2000)
|
|
121
124
|
|
|
122
125
|
// assert.strictEqual(receivedFlamegraphReqs.length, 1)
|
|
123
126
|
|
package/test/helper.js
CHANGED
|
@@ -25,6 +25,7 @@ function setUpEnvironment (env = {}) {
|
|
|
25
25
|
PLT_DISABLE_COMPLIANCE_CHECK: 'true',
|
|
26
26
|
PLT_DISABLE_FLAMEGRAPHS: 'true',
|
|
27
27
|
PLT_THROW_ON_COMPLIANCE_FAILURE: 'false',
|
|
28
|
+
PLT_ALERTS_GRACE_PERIOD_SEC: '0', // Disable grace period for tests
|
|
28
29
|
PLT_TEST_TOKEN: createJwtToken(3600)
|
|
29
30
|
}
|
|
30
31
|
Object.assign(process.env, defaultEnv, env)
|