pm2-perfmonitor 1.0.3 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -1
- package/lib/app.js +241 -0
- package/lib/utils.js +35 -0
- package/package.json +14 -5
- package/.czrc +0 -3
- package/CHANGELOG.md +0 -31
package/README.md
CHANGED
|
@@ -1,2 +1,37 @@
|
|
|
1
1
|
# pm2-perfmonitor
|
|
2
|
-
|
|
2
|
+
|
|
3
|
+
A pm2 module for performance monitor.
|
|
4
|
+
|
|
5
|
+
# Features
|
|
6
|
+
|
|
7
|
+
- Automatically detect **zombie** processes and restart it.
|
|
8
|
+
- Monitor the number of zombie process restarts (pm2 monit).
|
|
9
|
+
|
|
10
|
+
# Installation
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
$ pm2 install pm2-perfmonitor
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
> NOTE: the command is `pm2 install` NOT `npm install`
|
|
17
|
+
|
|
18
|
+
# Configure
|
|
19
|
+
|
|
20
|
+
| Property | Default Value | Description |
|
|
21
|
+
| :-----------------------------: | :-----------: | :----------------------------------------------------------------------: |
|
|
22
|
+
| `enabled` | `true` | Specify whether to enable this module |
|
|
23
|
+
| `excludeApps` | - | Specify the application name that needs to be excluded from guardianship |
|
|
24
|
+
| `includeApps` | - | Specify the application name that needs to be guarded |
|
|
25
|
+
| `workerInterval` | `60000` | Timed task execution interval (ms) |
|
|
26
|
+
| `zombieDetection` | `true` | Specify whether to enable zombie process protection |
|
|
27
|
+
| `zombieMaxHits` | `10` | Specify the maximum occurrence frequency of zombie status |
|
|
28
|
+
| `autoRestartWhenZombieDetected` | `true` | Specify whether to automatically restart zombie processes |
|
|
29
|
+
|
|
30
|
+
# How to set these values ?
|
|
31
|
+
|
|
32
|
+
After having installed the module you have to type : `pm2 set pm2-perfmonitor:<param> <value>`
|
|
33
|
+
|
|
34
|
+
**e.g:**
|
|
35
|
+
|
|
36
|
+
- `pm2 set pm2-perfmonitor:includeApps myNuxtApp1, myNextApp2` (Only detect applications named `myNuxtApp1` and `myNextApp2`)
|
|
37
|
+
- `pm2 set pm2-perfmonitor:workerInterval 120000` (Check every `2` minutes)
|
package/lib/app.js
CHANGED
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
const pmx = require('pmx')
|
|
2
|
+
const pm2 = require('pm2')
|
|
3
|
+
const { parseParamToArray, parseParamToNumber, parseBool } = require('./utils')
|
|
4
|
+
|
|
5
|
+
const defaultOptions = {
|
|
6
|
+
enabled: true,
|
|
7
|
+
/**
|
|
8
|
+
* 排除的 app 名
|
|
9
|
+
*/
|
|
10
|
+
excludeApps: [],
|
|
11
|
+
/**
|
|
12
|
+
* 包含的 app 名
|
|
13
|
+
*/
|
|
14
|
+
includeApps: [],
|
|
15
|
+
/**
|
|
16
|
+
* 定时检测间隔(ms)
|
|
17
|
+
*/
|
|
18
|
+
workerInterval: 60000,
|
|
19
|
+
/**
|
|
20
|
+
* 是否开启僵尸进程守护
|
|
21
|
+
*/
|
|
22
|
+
zombieDetection: true,
|
|
23
|
+
/**
|
|
24
|
+
* 僵尸状态最大出现次数
|
|
25
|
+
*/
|
|
26
|
+
zombieMaxHits: 10,
|
|
27
|
+
/**
|
|
28
|
+
* 僵尸状态达到最大容忍度时,是否自动重启僵尸进程
|
|
29
|
+
*/
|
|
30
|
+
autoRestartWhenZombieDetected: true,
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const conf = pmx.initModule({}, (err, incomingConf) => {
|
|
34
|
+
if (err) {
|
|
35
|
+
console.error(`[${incomingConf.module_name}] init module error:`, err)
|
|
36
|
+
process.exit(2)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return {
|
|
40
|
+
...defaultOptions,
|
|
41
|
+
...incomingConf,
|
|
42
|
+
}
|
|
43
|
+
})
|
|
44
|
+
|
|
45
|
+
const Probe = pmx.probe()
|
|
46
|
+
const MODULE_NAME = conf.module_name
|
|
47
|
+
const MODULE_ENABLED = parseBool(conf.enabled)
|
|
48
|
+
const WORKER_INTERVAL = parseParamToNumber(conf.workerInterval)
|
|
49
|
+
const INCLUDE_APPS = parseParamToArray(conf.includeApps)
|
|
50
|
+
const EXCLUDE_APPS = parseParamToArray(conf.excludeApps)
|
|
51
|
+
const ZOMBIE_DETECTION = parseBool(conf.zombieDetection)
|
|
52
|
+
const AUTO_RESTART_WHEN_ZOMBIE_DETECTED = parseBool(
|
|
53
|
+
conf.autoRestartWhenZombieDetected,
|
|
54
|
+
)
|
|
55
|
+
const ZOMBIE_MAX_HITS = parseParamToNumber(conf.zombieMaxHits)
|
|
56
|
+
|
|
57
|
+
// 存储每个进程的 CPU 采样历史(pm_id -> [cpu1, cpu2, ...])
|
|
58
|
+
const cpuHistory = new Map()
|
|
59
|
+
const zombieRestartHistory = new Map()
|
|
60
|
+
const restartFailedHistory = new Map()
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* @param {'log' | 'info' | 'error' | 'warn'} type
|
|
64
|
+
*
|
|
65
|
+
*/
|
|
66
|
+
const logger = (type, ...args) => {
|
|
67
|
+
return console[type](`[${MODULE_NAME}]`, ...args)
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* 判断是否为僵尸进程:最近 ZOMBIE_MAX_HITS 次全是 0%
|
|
72
|
+
*/
|
|
73
|
+
const isZombie = (history) => {
|
|
74
|
+
return history.length >= ZOMBIE_MAX_HITS && history.every((v) => v === 0)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* check zombie process
|
|
79
|
+
*/
|
|
80
|
+
const zombieProcessChecker = () => {
|
|
81
|
+
if (!ZOMBIE_DETECTION) return
|
|
82
|
+
|
|
83
|
+
pm2.list((err, apps) => {
|
|
84
|
+
apps.forEach((app) => {
|
|
85
|
+
const { name, pm_id, monit, pm2_env } = app
|
|
86
|
+
|
|
87
|
+
const appStatus = pm2_env?.status
|
|
88
|
+
const appCpuUsage = monit?.cpu || 0
|
|
89
|
+
|
|
90
|
+
if (
|
|
91
|
+
MODULE_NAME === name ||
|
|
92
|
+
(INCLUDE_APPS.length > 0 && !INCLUDE_APPS.includes(name)) ||
|
|
93
|
+
(EXCLUDE_APPS.length > 0 && EXCLUDE_APPS.includes(name))
|
|
94
|
+
) {
|
|
95
|
+
return
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 2. 只处理 online 状态的进程
|
|
99
|
+
if (appStatus !== 'online') {
|
|
100
|
+
// 进程不在 online 状态时,清空其历史记录,避免干扰
|
|
101
|
+
cpuHistory.delete(pm_id)
|
|
102
|
+
return
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (!cpuHistory.has(pm_id)) {
|
|
106
|
+
cpuHistory.set(pm_id, [])
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
const history = cpuHistory.get(pm_id)
|
|
110
|
+
|
|
111
|
+
history.push(appCpuUsage)
|
|
112
|
+
|
|
113
|
+
// 只保留最近 ZOMBIE_MAX_HITS 次记录
|
|
114
|
+
if (history.length > ZOMBIE_MAX_HITS) {
|
|
115
|
+
history.shift()
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// 4. 判断是否为僵尸:最近 ZOMBIE_MAX_HITS 次全是 0%
|
|
119
|
+
|
|
120
|
+
if (isZombie(history)) {
|
|
121
|
+
logger('info', `Zombie detected: ${name} (pm_id: ${pm_id})`)
|
|
122
|
+
|
|
123
|
+
if (AUTO_RESTART_WHEN_ZOMBIE_DETECTED) {
|
|
124
|
+
logger('info', 'restarting...')
|
|
125
|
+
|
|
126
|
+
pm2.restart(pm_id, (restartErr) => {
|
|
127
|
+
if (restartErr) {
|
|
128
|
+
logger(
|
|
129
|
+
'error',
|
|
130
|
+
`Restart failed for ${name} (pm_id: ${pm_id}):`,
|
|
131
|
+
restartErr,
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
if (!restartFailedHistory.has(pm_id)) {
|
|
135
|
+
restartFailedHistory.set(pm_id, 1)
|
|
136
|
+
} else {
|
|
137
|
+
restartFailedHistory.set(
|
|
138
|
+
pm_id,
|
|
139
|
+
restartFailedHistory.get(pm_id) + 1,
|
|
140
|
+
)
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (!zombieRestartHistory.has(pm_id)) {
|
|
147
|
+
zombieRestartHistory.set(pm_id, 1)
|
|
148
|
+
} else {
|
|
149
|
+
const history = zombieRestartHistory.get(pm_id)
|
|
150
|
+
|
|
151
|
+
zombieRestartHistory.set(pm_id, history + 1)
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
logger(
|
|
155
|
+
'info',
|
|
156
|
+
`Restarted ${name} (pm_id: ${pm_id}) successfully!!! Restarted ${zombieRestartHistory.get(pm_id)} times`,
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
// 重启后清除该进程的历史记录,避免刚重启又被判定为僵尸
|
|
160
|
+
cpuHistory.delete(pm_id)
|
|
161
|
+
})
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
})
|
|
165
|
+
})
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const runModule = () => {
|
|
169
|
+
if (!MODULE_ENABLED) return
|
|
170
|
+
|
|
171
|
+
// connect to local pm2
|
|
172
|
+
pm2.connect((err) => {
|
|
173
|
+
if (err) {
|
|
174
|
+
logger('error', `PM2 connection error:`, err)
|
|
175
|
+
|
|
176
|
+
process.exit(1)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
logger('info', 'Connected to PM2, starting monitor...')
|
|
180
|
+
|
|
181
|
+
zombieProcessChecker()
|
|
182
|
+
|
|
183
|
+
setInterval(() => {
|
|
184
|
+
zombieProcessChecker()
|
|
185
|
+
}, WORKER_INTERVAL)
|
|
186
|
+
})
|
|
187
|
+
|
|
188
|
+
/** PROB PMX **/
|
|
189
|
+
Probe.metric({
|
|
190
|
+
name: 'Zombie Restarts',
|
|
191
|
+
value: () => {
|
|
192
|
+
const res = []
|
|
193
|
+
|
|
194
|
+
for (const [k, v] of zombieRestartHistory) {
|
|
195
|
+
if (v > 0) {
|
|
196
|
+
res.push([k, v])
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
if (!res.length) return 'N/A'
|
|
201
|
+
|
|
202
|
+
return res.map((v) => `[${v[0]}]:${v[1]}`).join(' ; ')
|
|
203
|
+
},
|
|
204
|
+
})
|
|
205
|
+
|
|
206
|
+
Probe.metric({
|
|
207
|
+
name: 'Zombie Restarts (failed)',
|
|
208
|
+
value: () => {
|
|
209
|
+
const res = []
|
|
210
|
+
|
|
211
|
+
for (const [k, v] of restartFailedHistory) {
|
|
212
|
+
if (v > 0) {
|
|
213
|
+
res.push([k, v])
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (!res.length) return 'N/A'
|
|
218
|
+
|
|
219
|
+
return res.map((v) => `[${v[0]}]:${v[1]}`).join(' ; ')
|
|
220
|
+
},
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
Probe.metric({
|
|
224
|
+
name: 'Zombie Processes',
|
|
225
|
+
value: () => {
|
|
226
|
+
const res = []
|
|
227
|
+
|
|
228
|
+
for (const [pmId, arr] of cpuHistory) {
|
|
229
|
+
if (isZombie(arr)) {
|
|
230
|
+
res.push(pmId)
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
if (!res.length) return 'N/A'
|
|
235
|
+
|
|
236
|
+
return res.join(',')
|
|
237
|
+
},
|
|
238
|
+
})
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
runModule()
|
package/lib/utils.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
const parseParamToArray = (value, defaultVal = []) => {
|
|
2
|
+
if (Array.isArray(value)) return value
|
|
3
|
+
|
|
4
|
+
if (typeof value === 'string') {
|
|
5
|
+
return value.split(',').map((v) => v.trim())
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
return defaultVal
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
const parseParamToNumber = (value) => {
|
|
12
|
+
if (typeof value === 'number') return value
|
|
13
|
+
if (!value) return 0
|
|
14
|
+
|
|
15
|
+
if (typeof value === 'string') {
|
|
16
|
+
return Number(value)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return 0
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const parseBool = (value, defaultVal = false) => {
|
|
23
|
+
if (typeof value === 'boolean') return value
|
|
24
|
+
|
|
25
|
+
if (value === 'true') return true
|
|
26
|
+
if (value === 'false') return false
|
|
27
|
+
|
|
28
|
+
return defaultVal
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
module.exports = {
|
|
32
|
+
parseParamToArray,
|
|
33
|
+
parseParamToNumber,
|
|
34
|
+
parseBool,
|
|
35
|
+
}
|
package/package.json
CHANGED
|
@@ -1,10 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "pm2-perfmonitor",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "A pm2 module for performance monitoring",
|
|
5
|
-
"author":
|
|
3
|
+
"version": "1.1.2",
|
|
4
|
+
"description": "A pm2 module for performance monitoring. Automatically detect zombie processes and restart it",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "elenh",
|
|
7
|
+
"email": "yisiwings@163.com"
|
|
8
|
+
},
|
|
6
9
|
"license": "MIT",
|
|
7
10
|
"main": "lib/app.js",
|
|
11
|
+
"files": [
|
|
12
|
+
"lib"
|
|
13
|
+
],
|
|
8
14
|
"repository": {
|
|
9
15
|
"type": "git",
|
|
10
16
|
"url": "https://github.com/yisibell/pm2-perfmonitor.git"
|
|
@@ -14,12 +20,14 @@
|
|
|
14
20
|
},
|
|
15
21
|
"homepage": "https://github.com/yisibell/pm2-perfmonitor",
|
|
16
22
|
"scripts": {
|
|
17
|
-
"
|
|
23
|
+
"start": "pm2 delete app1 || true && pm2 start ecosystem.app.config.cjs",
|
|
24
|
+
"dev": "pm2 start ecosystem.dev.config.cjs",
|
|
18
25
|
"release": "changelogen --release && npm publish --access=public && git push --follow-tags"
|
|
19
26
|
},
|
|
20
27
|
"keywords": [
|
|
21
28
|
"pm2",
|
|
22
|
-
"monitor"
|
|
29
|
+
"monitor",
|
|
30
|
+
"zombie process"
|
|
23
31
|
],
|
|
24
32
|
"apps": [
|
|
25
33
|
{
|
|
@@ -28,6 +36,7 @@
|
|
|
28
36
|
"max_memory_restart": "500M"
|
|
29
37
|
}
|
|
30
38
|
],
|
|
39
|
+
"config": {},
|
|
31
40
|
"dependencies": {
|
|
32
41
|
"pm2": "latest",
|
|
33
42
|
"pmx": "latest"
|
package/.czrc
DELETED
package/CHANGELOG.md
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
# Changelog
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
## v1.0.3
|
|
5
|
-
|
|
6
|
-
[compare changes](https://github.com/yisibell/pm2-perfmonitor/compare/v1.0.2...v1.0.3)
|
|
7
|
-
|
|
8
|
-
## v1.0.2
|
|
9
|
-
|
|
10
|
-
[compare changes](https://github.com/yisibell/pm2-perfmonitor/compare/v1.0.1...v1.0.2)
|
|
11
|
-
|
|
12
|
-
### 🏡 Chore
|
|
13
|
-
|
|
14
|
-
- Update author name ([8fbec40](https://github.com/yisibell/pm2-perfmonitor/commit/8fbec40))
|
|
15
|
-
|
|
16
|
-
### ❤️ Contributors
|
|
17
|
-
|
|
18
|
-
- Wenqing <wenqing@kerrylan.com>
|
|
19
|
-
|
|
20
|
-
## v1.0.1
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
### 🏡 Chore
|
|
24
|
-
|
|
25
|
-
- Init ([297fff0](https://github.com/yisibell/pm2-perfmonitor/commit/297fff0))
|
|
26
|
-
- Init release ([72dc521](https://github.com/yisibell/pm2-perfmonitor/commit/72dc521))
|
|
27
|
-
|
|
28
|
-
### ❤️ Contributors
|
|
29
|
-
|
|
30
|
-
- Wenqing <wenqing@kerrylan.com>
|
|
31
|
-
|