pm2-perfmonitor 2.3.1 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/app.js +16 -17
- package/lib/defaults.js +2 -0
- package/lib/job-conf.js +39 -0
- package/lib/utils.js +6 -3
- package/package.json +1 -1
package/lib/app.js
CHANGED
|
@@ -12,6 +12,7 @@ const { defaultOptions } = require('./defaults')
|
|
|
12
12
|
const { sendMessage } = require('./message')
|
|
13
13
|
const { performPerfSampling } = require('./perf-sampler')
|
|
14
14
|
const { sendAlert } = require('./alert')
|
|
15
|
+
const { getJobConfHostName } = require('./job-conf')
|
|
15
16
|
|
|
16
17
|
const conf = pmx.initModule({}, (err, incomingConf) => {
|
|
17
18
|
if (err) {
|
|
@@ -56,6 +57,7 @@ const enableAlert = parseBool(conf.enableAlert)
|
|
|
56
57
|
const alertCmdPath = conf.alertCmdPath
|
|
57
58
|
const alertEnv = conf.alertEnv
|
|
58
59
|
const alertLevel = conf.alertLevel
|
|
60
|
+
const jobHostNameConfPath = conf.jobHostNameConfPath
|
|
59
61
|
|
|
60
62
|
// 存储每个进程的 CPU 采样历史(pm_id -> [cpu1, cpu2, ...])
|
|
61
63
|
const zombieCpuHistory = new Map()
|
|
@@ -169,17 +171,20 @@ const setCpuOverloadRestartFailedHistory = (pm_id) => {
|
|
|
169
171
|
/**
|
|
170
172
|
* 发送重启警告
|
|
171
173
|
* @param {string} title
|
|
172
|
-
* @param {string}
|
|
174
|
+
* @param {string} message
|
|
173
175
|
*/
|
|
174
|
-
const sendRestartAlert = async (title,
|
|
176
|
+
const sendRestartAlert = async (title, message) => {
|
|
175
177
|
if (!enableAlert) return
|
|
176
178
|
|
|
179
|
+
const datetime = new Date().toLocaleString()
|
|
180
|
+
const jobHostName = getJobConfHostName(jobHostNameConfPath)
|
|
181
|
+
|
|
177
182
|
return await sendAlert({
|
|
178
183
|
cmd: alertCmdPath,
|
|
179
184
|
env: alertEnv,
|
|
180
185
|
level: alertLevel,
|
|
181
|
-
title: `[${MODULE_NAME}] Alert: ${title}`,
|
|
182
|
-
content
|
|
186
|
+
title: `[${MODULE_NAME}] [${datetime}] Alert: ${title}`,
|
|
187
|
+
content: `[${jobHostName}] - ${message}`,
|
|
183
188
|
})
|
|
184
189
|
}
|
|
185
190
|
|
|
@@ -197,12 +202,7 @@ const processChecker = async () => {
|
|
|
197
202
|
for (const app of apps) {
|
|
198
203
|
const { name, pid, pm_id, monit, pm2_env } = app
|
|
199
204
|
|
|
200
|
-
const sysCpuUsage = await getSysCpuUsageByPid(pid)
|
|
201
|
-
const pm2CpuUsage = monit?.cpu
|
|
202
|
-
|
|
203
205
|
const appStatus = pm2_env?.status
|
|
204
|
-
const appCpuUsage =
|
|
205
|
-
typeof sysCpuUsage === 'number' ? sysCpuUsage : pm2CpuUsage
|
|
206
206
|
|
|
207
207
|
// 非目标应用,跳过
|
|
208
208
|
if (
|
|
@@ -222,6 +222,11 @@ const processChecker = async () => {
|
|
|
222
222
|
continue
|
|
223
223
|
}
|
|
224
224
|
|
|
225
|
+
const pm2CpuUsage = monit?.cpu
|
|
226
|
+
const sysCpuUsage = await getSysCpuUsageByPid(pid)
|
|
227
|
+
const appCpuUsage =
|
|
228
|
+
typeof sysCpuUsage === 'number' ? sysCpuUsage : pm2CpuUsage
|
|
229
|
+
|
|
225
230
|
const history = setZombieCpuHistory(pm_id, appCpuUsage)
|
|
226
231
|
const history2 = setCpuOverloadHistory(pm_id, appCpuUsage)
|
|
227
232
|
|
|
@@ -269,10 +274,7 @@ const processChecker = async () => {
|
|
|
269
274
|
|
|
270
275
|
await sendRestartAlert(
|
|
271
276
|
`The zombie process has been restarted!`,
|
|
272
|
-
`appName: ${name},
|
|
273
|
-
pid: ${pid}, \n
|
|
274
|
-
pm_id: ${pm_id}, \n
|
|
275
|
-
restarted: ${zombieRestartHistory.get(pm_id)} times`,
|
|
277
|
+
`appName: ${name}, pid: ${pid}, pm_id: ${pm_id}, restarted: ${zombieRestartHistory.get(pm_id)} times`,
|
|
276
278
|
)
|
|
277
279
|
|
|
278
280
|
// 重启后清除该进程的历史记录,避免刚重启又被判定为僵尸
|
|
@@ -331,10 +333,7 @@ const processChecker = async () => {
|
|
|
331
333
|
|
|
332
334
|
await sendRestartAlert(
|
|
333
335
|
`CPU overload process restarted!`,
|
|
334
|
-
`appName: ${name},
|
|
335
|
-
pid: ${pid}, \n
|
|
336
|
-
pm_id: ${pm_id}, \n
|
|
337
|
-
restarted: ${cpuOverloadRestartHistory.get(pm_id)} times`,
|
|
336
|
+
`appName: ${name}, pid: ${pid}, pm_id: ${pm_id}, restarted: ${cpuOverloadRestartHistory.get(pm_id)} times`,
|
|
338
337
|
)
|
|
339
338
|
|
|
340
339
|
cpuOverloadHistory.delete(pm_id)
|
package/lib/defaults.js
CHANGED
package/lib/job-conf.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import fs from 'node:fs'
|
|
2
|
+
import path from 'node:path'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @param {string} filePath
|
|
6
|
+
* @returns { { success: boolean, data: string | null }}
|
|
7
|
+
*/
|
|
8
|
+
const readFileContent = (filePath) => {
|
|
9
|
+
try {
|
|
10
|
+
const absolutePath = path.resolve(filePath)
|
|
11
|
+
const data = fs.readFileSync(absolutePath, 'utf8')
|
|
12
|
+
return { success: true, data }
|
|
13
|
+
} catch {
|
|
14
|
+
return {
|
|
15
|
+
success: false,
|
|
16
|
+
data: null,
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* @param {string} path
|
|
23
|
+
*/
|
|
24
|
+
const getJobConfHostName = (path) => {
|
|
25
|
+
const result = readFileContent(path)
|
|
26
|
+
|
|
27
|
+
if (result.success && result.data) {
|
|
28
|
+
const arr = result.data.split('=')
|
|
29
|
+
|
|
30
|
+
return arr[1]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return 'unknown'
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
module.exports = {
|
|
37
|
+
readFileContent,
|
|
38
|
+
getJobConfHostName,
|
|
39
|
+
}
|
package/lib/utils.js
CHANGED
|
@@ -45,9 +45,12 @@ const sleepAsync = (duration = 0) => {
|
|
|
45
45
|
* @returns { Promise<number> } CPU 使用率
|
|
46
46
|
*/
|
|
47
47
|
const getSysCpuUsageByPid = async (pid) => {
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
48
|
+
try {
|
|
49
|
+
const stats = await pidusage(pid)
|
|
50
|
+
return stats.cpu
|
|
51
|
+
} catch (err) {
|
|
52
|
+
console.error('Call pidusage error:', err)
|
|
53
|
+
}
|
|
51
54
|
}
|
|
52
55
|
|
|
53
56
|
module.exports = {
|