pm2-perfmonitor 2.1.6 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/alert.js +29 -0
- package/lib/app.js +44 -3
- package/lib/defaults.js +17 -0
- package/lib/execa-helper.js +17 -0
- package/lib/perf-sampler.js +1 -14
- package/lib/utils.js +14 -0
- package/package.json +3 -2
package/lib/alert.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
const { getExeca } = require('./execa-helper')
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 发送警告
|
|
5
|
+
* @param { object } options
|
|
6
|
+
* @param { string } options.cmd - bash 脚本 path(默认值:/var/job/alert.sh)
|
|
7
|
+
* @param { string } options.env - 环境(默认值: prod)
|
|
8
|
+
* @param { string } options.level - 报警级别(默认值: Sev-2)
|
|
9
|
+
* @param { string } options.title - 报警标题
|
|
10
|
+
* @param { string } options.content - 报警正文
|
|
11
|
+
*/
|
|
12
|
+
const sendAlert = async (options) => {
|
|
13
|
+
try {
|
|
14
|
+
const execa = await getExeca()
|
|
15
|
+
|
|
16
|
+
const args = [options.env, options.level, options.title, options.content]
|
|
17
|
+
|
|
18
|
+
await execa(options.cmd, args)
|
|
19
|
+
|
|
20
|
+
return true
|
|
21
|
+
} catch (err) {
|
|
22
|
+
console.error('[Send Alert Error]:', err)
|
|
23
|
+
return false
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
module.exports = {
|
|
28
|
+
sendAlert,
|
|
29
|
+
}
|
package/lib/app.js
CHANGED
|
@@ -6,10 +6,12 @@ const {
|
|
|
6
6
|
parseParamToNumber,
|
|
7
7
|
parseBool,
|
|
8
8
|
sleepAsync,
|
|
9
|
+
getSysCpuUsageByPid,
|
|
9
10
|
} = require('./utils')
|
|
10
11
|
const { defaultOptions } = require('./defaults')
|
|
11
12
|
const { sendMessage } = require('./message')
|
|
12
13
|
const { performPerfSampling } = require('./perf-sampler')
|
|
14
|
+
const { sendAlert } = require('./alert')
|
|
13
15
|
|
|
14
16
|
const conf = pmx.initModule({}, (err, incomingConf) => {
|
|
15
17
|
if (err) {
|
|
@@ -50,6 +52,10 @@ const enableNodeInspectorCollection = parseBool(
|
|
|
50
52
|
const nodeInspectorSampleDuration = parseParamToNumber(
|
|
51
53
|
conf.nodeInspectorSampleDuration,
|
|
52
54
|
)
|
|
55
|
+
const enableAlert = parseBool(conf.enableAlert)
|
|
56
|
+
const alertCmdPath = conf.alertCmdPath
|
|
57
|
+
const alertEnv = conf.alertEnv
|
|
58
|
+
const alertLevel = conf.alertLevel
|
|
53
59
|
|
|
54
60
|
// 存储每个进程的 CPU 采样历史(pm_id -> [cpu1, cpu2, ...])
|
|
55
61
|
const zombieCpuHistory = new Map()
|
|
@@ -60,6 +66,8 @@ const cpuOverloadHistory = new Map()
|
|
|
60
66
|
const cpuOverloadRestartHistory = new Map()
|
|
61
67
|
const cpuOverloadRestartFailedHistory = new Map()
|
|
62
68
|
|
|
69
|
+
let isProcessCheckerRunning = false
|
|
70
|
+
|
|
63
71
|
/**
|
|
64
72
|
* perf 样本是否采集中
|
|
65
73
|
* @type { Map<number,boolean> }
|
|
@@ -158,7 +166,22 @@ const setCpuOverloadRestartFailedHistory = (pm_id) => {
|
|
|
158
166
|
}
|
|
159
167
|
}
|
|
160
168
|
|
|
161
|
-
|
|
169
|
+
/**
|
|
170
|
+
* 发送重启警告
|
|
171
|
+
* @param {string} title
|
|
172
|
+
* @param {string} content
|
|
173
|
+
*/
|
|
174
|
+
const sendRestartAlert = async (title, content) => {
|
|
175
|
+
if (!enableAlert) return
|
|
176
|
+
|
|
177
|
+
return await sendAlert({
|
|
178
|
+
cmd: alertCmdPath,
|
|
179
|
+
env: alertEnv,
|
|
180
|
+
level: alertLevel,
|
|
181
|
+
title: `[${MODULE_NAME}] Alert: ${title}`,
|
|
182
|
+
content,
|
|
183
|
+
})
|
|
184
|
+
}
|
|
162
185
|
|
|
163
186
|
/**
|
|
164
187
|
* check process
|
|
@@ -172,10 +195,12 @@ const processChecker = async () => {
|
|
|
172
195
|
const apps = await listAppsAsync()
|
|
173
196
|
|
|
174
197
|
for (const app of apps) {
|
|
175
|
-
const { name, pm_id, monit, pm2_env
|
|
198
|
+
const { name, pid, pm_id, monit, pm2_env } = app
|
|
199
|
+
|
|
200
|
+
const sysCpuUsage = await getSysCpuUsageByPid(pid)
|
|
176
201
|
|
|
177
202
|
const appStatus = pm2_env?.status
|
|
178
|
-
const appCpuUsage =
|
|
203
|
+
const appCpuUsage = sysCpuUsage
|
|
179
204
|
|
|
180
205
|
// 非目标应用,跳过
|
|
181
206
|
if (
|
|
@@ -240,6 +265,14 @@ const processChecker = async () => {
|
|
|
240
265
|
`[ZOMBIE] Restarted ${name} (pm_id: ${pm_id}) successfully!!! Restarted ${zombieRestartHistory.get(pm_id)} times`,
|
|
241
266
|
)
|
|
242
267
|
|
|
268
|
+
await sendRestartAlert(
|
|
269
|
+
`The zombie process has been restarted!`,
|
|
270
|
+
`appName: ${name}, \n
|
|
271
|
+
pid: ${pid}, \n
|
|
272
|
+
pm_id: ${pm_id}, \n
|
|
273
|
+
restarted: ${zombieRestartHistory.get(pm_id)} times`,
|
|
274
|
+
)
|
|
275
|
+
|
|
243
276
|
// 重启后清除该进程的历史记录,避免刚重启又被判定为僵尸
|
|
244
277
|
zombieCpuHistory.delete(pm_id)
|
|
245
278
|
} catch (restartErr) {
|
|
@@ -294,6 +327,14 @@ const processChecker = async () => {
|
|
|
294
327
|
`[CPU OVERLOAD] Restarted ${name} (pm_id: ${pm_id}) successfully!!! Restarted ${cpuOverloadRestartHistory.get(pm_id)} times`,
|
|
295
328
|
)
|
|
296
329
|
|
|
330
|
+
await sendRestartAlert(
|
|
331
|
+
`CPU overload process restarted!`,
|
|
332
|
+
`appName: ${name}, \n
|
|
333
|
+
pid: ${pid}, \n
|
|
334
|
+
pm_id: ${pm_id}, \n
|
|
335
|
+
restarted: ${cpuOverloadRestartHistory.get(pm_id)} times`,
|
|
336
|
+
)
|
|
337
|
+
|
|
297
338
|
cpuOverloadHistory.delete(pm_id)
|
|
298
339
|
} catch (restartErr) {
|
|
299
340
|
logger(
|
package/lib/defaults.js
CHANGED
|
@@ -78,6 +78,23 @@ const defaultOptions = {
|
|
|
78
78
|
* node:inspector 性能采集持续时间 (s)
|
|
79
79
|
*/
|
|
80
80
|
nodeInspectorSampleDuration: 10,
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* 指定是否开启报警
|
|
84
|
+
*/
|
|
85
|
+
enableAlert: false,
|
|
86
|
+
/**
|
|
87
|
+
* 指定报警执行 bash 脚本位置
|
|
88
|
+
*/
|
|
89
|
+
alertCmdPath: '/var/job/alert.sh',
|
|
90
|
+
/**
|
|
91
|
+
* 指定报警环境类别
|
|
92
|
+
*/
|
|
93
|
+
alertEnv: 'prod',
|
|
94
|
+
/**
|
|
95
|
+
* 指定报警级别
|
|
96
|
+
*/
|
|
97
|
+
alertLevel: 'Sev-2',
|
|
81
98
|
}
|
|
82
99
|
|
|
83
100
|
module.exports = {
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
let execaCommandCache
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 获取 execa 函数(缓存)
|
|
5
|
+
* @returns { import('execa')['execa'] }
|
|
6
|
+
*/
|
|
7
|
+
const getExeca = async () => {
|
|
8
|
+
if (!execaCommandCache) {
|
|
9
|
+
const execaModule = await import('execa')
|
|
10
|
+
execaCommandCache = execaModule.execa
|
|
11
|
+
}
|
|
12
|
+
return execaCommandCache
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
module.exports = {
|
|
16
|
+
getExeca,
|
|
17
|
+
}
|
package/lib/perf-sampler.js
CHANGED
|
@@ -1,19 +1,6 @@
|
|
|
1
1
|
const fs = require('fs-extra')
|
|
2
2
|
const path = require('path')
|
|
3
|
-
|
|
4
|
-
let execaCommandCache
|
|
5
|
-
|
|
6
|
-
/**
|
|
7
|
-
* 获取 execa 函数(缓存)
|
|
8
|
-
* @returns { import('execa')['execa'] }
|
|
9
|
-
*/
|
|
10
|
-
const getExeca = async () => {
|
|
11
|
-
if (!execaCommandCache) {
|
|
12
|
-
const execaModule = await import('execa')
|
|
13
|
-
execaCommandCache = execaModule.execa
|
|
14
|
-
}
|
|
15
|
-
return execaCommandCache
|
|
16
|
-
}
|
|
3
|
+
const { getExeca } = require('./execa-helper')
|
|
17
4
|
|
|
18
5
|
/**
|
|
19
6
|
* 执行命令(不通过 shell,直接使用参数数组)
|
package/lib/utils.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
const pidusage = require('pidusage')
|
|
2
|
+
|
|
1
3
|
const parseParamToArray = (value, defaultVal = []) => {
|
|
2
4
|
if (Array.isArray(value)) return value
|
|
3
5
|
|
|
@@ -37,9 +39,21 @@ const sleepAsync = (duration = 0) => {
|
|
|
37
39
|
})
|
|
38
40
|
}
|
|
39
41
|
|
|
42
|
+
/**
|
|
43
|
+
* 获取指定进程的CPU使用率
|
|
44
|
+
* @param {string| number} pid
|
|
45
|
+
* @returns { Promise<number> } CPU 使用率
|
|
46
|
+
*/
|
|
47
|
+
const getSysCpuUsageByPid = async (pid) => {
|
|
48
|
+
const stats = await pidusage(pid)
|
|
49
|
+
|
|
50
|
+
return stats.cpu
|
|
51
|
+
}
|
|
52
|
+
|
|
40
53
|
module.exports = {
|
|
41
54
|
parseParamToArray,
|
|
42
55
|
parseParamToNumber,
|
|
43
56
|
parseBool,
|
|
44
57
|
sleepAsync,
|
|
58
|
+
getSysCpuUsageByPid,
|
|
45
59
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pm2-perfmonitor",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.3.0",
|
|
4
4
|
"description": "A pm2 module for performance monitoring. Automatically detect zombie processes and restart it",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "elenh",
|
|
@@ -44,6 +44,7 @@
|
|
|
44
44
|
"dependencies": {
|
|
45
45
|
"execa": "^9.6.1",
|
|
46
46
|
"fs-extra": "^11.3.4",
|
|
47
|
+
"pidusage": "^4.0.1",
|
|
47
48
|
"pm2": "latest",
|
|
48
49
|
"pmx": "latest"
|
|
49
50
|
},
|
|
@@ -52,4 +53,4 @@
|
|
|
52
53
|
"cz-conventional-changelog": "^3.3.0",
|
|
53
54
|
"minimist": "^1.2.8"
|
|
54
55
|
}
|
|
55
|
-
}
|
|
56
|
+
}
|