pm2-perfmonitor 2.5.2 → 2.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +59 -59
- package/lib/alert.js +29 -29
- package/lib/app.js +581 -516
- package/lib/defaults.js +123 -123
- package/lib/execa-helper.js +17 -17
- package/lib/job-conf.js +39 -39
- package/lib/message.js +35 -35
- package/lib/perf-sampler.js +241 -241
- package/lib/pm2-extra.js +54 -54
- package/lib/utils.js +77 -62
- package/lib/zombie-check.js +65 -65
- package/package.json +2 -2
package/lib/defaults.js
CHANGED
|
@@ -1,123 +1,123 @@
|
|
|
1
|
-
const defaultOptions = {
|
|
2
|
-
enabled: true,
|
|
3
|
-
/**
|
|
4
|
-
* 排除的 app 名
|
|
5
|
-
*/
|
|
6
|
-
excludeApps: [],
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* 包含的 app 名
|
|
10
|
-
*/
|
|
11
|
-
includeApps: [],
|
|
12
|
-
|
|
13
|
-
/**
|
|
14
|
-
* 定时检测间隔(ms)
|
|
15
|
-
*/
|
|
16
|
-
workerInterval: 60000,
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* 是否开启僵尸进程守护
|
|
20
|
-
*/
|
|
21
|
-
zombieDetection: true,
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* 僵尸进程检测策略
|
|
25
|
-
* 1. 进程状态标记为 Z(僵尸)
|
|
26
|
-
* 2. CPU 占用率连续多次采样为 0
|
|
27
|
-
* 3. 状态为 Z 且 CPU 持续为零
|
|
28
|
-
* 4. 状态为 Z 或 CPU 持续为零
|
|
29
|
-
* @type { 'zombie-state' | 'zero-cpu-consecutive' | 'zombie-state-and-zero-cpu' | 'zombie-state-or-zero-cpu'}
|
|
30
|
-
*/
|
|
31
|
-
zombieProcessDetectionStrategy: 'zombie-state-and-zero-cpu',
|
|
32
|
-
|
|
33
|
-
/**
|
|
34
|
-
* 僵尸状态最大出现次数
|
|
35
|
-
*/
|
|
36
|
-
zombieMaxHits: 10,
|
|
37
|
-
|
|
38
|
-
/**
|
|
39
|
-
* 僵尸状态达到最大容忍度时,是否自动重启僵尸进程
|
|
40
|
-
*/
|
|
41
|
-
autoRestartWhenZombieDetected: true,
|
|
42
|
-
|
|
43
|
-
/**
|
|
44
|
-
* 僵尸进程最大重启次数,设置为0表示不限制
|
|
45
|
-
*/
|
|
46
|
-
zombieMaxRestarts: 0,
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* 是否开启 CPU 过载守护
|
|
50
|
-
*/
|
|
51
|
-
cpuOverloadDetection: false,
|
|
52
|
-
|
|
53
|
-
/**
|
|
54
|
-
* 判定 CPU 过载阈值
|
|
55
|
-
*/
|
|
56
|
-
cpuOverloadThreshold:
|
|
57
|
-
|
|
58
|
-
/**
|
|
59
|
-
* 允许 CPU 过载最大连续出现次数,达到时自动重启
|
|
60
|
-
*/
|
|
61
|
-
cpuOverloadMaxHits: 5,
|
|
62
|
-
|
|
63
|
-
/**
|
|
64
|
-
* 是否开启 perf 性能采集
|
|
65
|
-
*/
|
|
66
|
-
enablePerfCollection: false,
|
|
67
|
-
|
|
68
|
-
/**
|
|
69
|
-
* 性能报告生成目录
|
|
70
|
-
*/
|
|
71
|
-
perfReportGenerationDir: '/var/log/pm2/perf',
|
|
72
|
-
|
|
73
|
-
/**
|
|
74
|
-
* flamegraph 火焰图生成工具目录
|
|
75
|
-
*/
|
|
76
|
-
flamegraphDir: '/opt/FlameGraph',
|
|
77
|
-
|
|
78
|
-
/**
|
|
79
|
-
* perf 采样持续时间 (s)
|
|
80
|
-
*/
|
|
81
|
-
perfSampleDuration: 10,
|
|
82
|
-
|
|
83
|
-
/**
|
|
84
|
-
* perf 采样频率 (Hz)
|
|
85
|
-
*/
|
|
86
|
-
perfSampleFrequency: 99,
|
|
87
|
-
|
|
88
|
-
/**
|
|
89
|
-
* 是否开启 node:inspector 性能采集
|
|
90
|
-
*/
|
|
91
|
-
enableNodeInspectorCollection: false,
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* node:inspector 性能采集持续时间 (s)
|
|
95
|
-
*/
|
|
96
|
-
nodeInspectorSampleDuration: 10,
|
|
97
|
-
|
|
98
|
-
/**
|
|
99
|
-
* 指定是否开启报警
|
|
100
|
-
*/
|
|
101
|
-
enableAlert: false,
|
|
102
|
-
|
|
103
|
-
/**
|
|
104
|
-
* 指定报警执行 bash 脚本位置
|
|
105
|
-
*/
|
|
106
|
-
alertCmdPath: '/var/job/alert.sh',
|
|
107
|
-
|
|
108
|
-
/**
|
|
109
|
-
* 指定报警环境类别
|
|
110
|
-
*/
|
|
111
|
-
alertEnv: 'prod',
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* 指定报警级别
|
|
115
|
-
*/
|
|
116
|
-
alertLevel: 'Sev-2',
|
|
117
|
-
|
|
118
|
-
jobHostNameConfPath: '/var/job/host_name.conf',
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
module.exports = {
|
|
122
|
-
defaultOptions,
|
|
123
|
-
}
|
|
1
|
+
const defaultOptions = {
|
|
2
|
+
enabled: true,
|
|
3
|
+
/**
|
|
4
|
+
* 排除的 app 名
|
|
5
|
+
*/
|
|
6
|
+
excludeApps: [],
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* 包含的 app 名
|
|
10
|
+
*/
|
|
11
|
+
includeApps: [],
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* 定时检测间隔(ms)
|
|
15
|
+
*/
|
|
16
|
+
workerInterval: 60000,
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* 是否开启僵尸进程守护
|
|
20
|
+
*/
|
|
21
|
+
zombieDetection: true,
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* 僵尸进程检测策略
|
|
25
|
+
* 1. 进程状态标记为 Z(僵尸)
|
|
26
|
+
* 2. CPU 占用率连续多次采样为 0
|
|
27
|
+
* 3. 状态为 Z 且 CPU 持续为零
|
|
28
|
+
* 4. 状态为 Z 或 CPU 持续为零
|
|
29
|
+
* @type { 'zombie-state' | 'zero-cpu-consecutive' | 'zombie-state-and-zero-cpu' | 'zombie-state-or-zero-cpu'}
|
|
30
|
+
*/
|
|
31
|
+
zombieProcessDetectionStrategy: 'zombie-state-and-zero-cpu',
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* 僵尸状态最大出现次数
|
|
35
|
+
*/
|
|
36
|
+
zombieMaxHits: 10,
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* 僵尸状态达到最大容忍度时,是否自动重启僵尸进程
|
|
40
|
+
*/
|
|
41
|
+
autoRestartWhenZombieDetected: true,
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* 僵尸进程最大重启次数,设置为0表示不限制
|
|
45
|
+
*/
|
|
46
|
+
zombieMaxRestarts: 0,
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* 是否开启 CPU 过载守护
|
|
50
|
+
*/
|
|
51
|
+
cpuOverloadDetection: false,
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* 判定 CPU 过载阈值
|
|
55
|
+
*/
|
|
56
|
+
cpuOverloadThreshold: 99,
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* 允许 CPU 过载最大连续出现次数,达到时自动重启
|
|
60
|
+
*/
|
|
61
|
+
cpuOverloadMaxHits: 5,
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* 是否开启 perf 性能采集
|
|
65
|
+
*/
|
|
66
|
+
enablePerfCollection: false,
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* 性能报告生成目录
|
|
70
|
+
*/
|
|
71
|
+
perfReportGenerationDir: '/var/log/pm2/perf',
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* flamegraph 火焰图生成工具目录
|
|
75
|
+
*/
|
|
76
|
+
flamegraphDir: '/opt/FlameGraph',
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* perf 采样持续时间 (s)
|
|
80
|
+
*/
|
|
81
|
+
perfSampleDuration: 10,
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* perf 采样频率 (Hz)
|
|
85
|
+
*/
|
|
86
|
+
perfSampleFrequency: 99,
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* 是否开启 node:inspector 性能采集
|
|
90
|
+
*/
|
|
91
|
+
enableNodeInspectorCollection: false,
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* node:inspector 性能采集持续时间 (s)
|
|
95
|
+
*/
|
|
96
|
+
nodeInspectorSampleDuration: 10,
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* 指定是否开启报警
|
|
100
|
+
*/
|
|
101
|
+
enableAlert: false,
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* 指定报警执行 bash 脚本位置
|
|
105
|
+
*/
|
|
106
|
+
alertCmdPath: '/var/job/alert.sh',
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* 指定报警环境类别
|
|
110
|
+
*/
|
|
111
|
+
alertEnv: 'prod',
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* 指定报警级别
|
|
115
|
+
*/
|
|
116
|
+
alertLevel: 'Sev-2',
|
|
117
|
+
|
|
118
|
+
jobHostNameConfPath: '/var/job/host_name.conf',
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
module.exports = {
|
|
122
|
+
defaultOptions,
|
|
123
|
+
}
|
package/lib/execa-helper.js
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
|
-
let execaCommandCache
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* 获取 execa 函数(缓存)
|
|
5
|
-
* @returns { import('execa')['execa'] }
|
|
6
|
-
*/
|
|
7
|
-
const getExeca = async () => {
|
|
8
|
-
if (!execaCommandCache) {
|
|
9
|
-
const execaModule = await import('execa')
|
|
10
|
-
execaCommandCache = execaModule.execa
|
|
11
|
-
}
|
|
12
|
-
return execaCommandCache
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
module.exports = {
|
|
16
|
-
getExeca,
|
|
17
|
-
}
|
|
1
|
+
let execaCommandCache
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* 获取 execa 函数(缓存)
|
|
5
|
+
* @returns { import('execa')['execa'] }
|
|
6
|
+
*/
|
|
7
|
+
const getExeca = async () => {
|
|
8
|
+
if (!execaCommandCache) {
|
|
9
|
+
const execaModule = await import('execa')
|
|
10
|
+
execaCommandCache = execaModule.execa
|
|
11
|
+
}
|
|
12
|
+
return execaCommandCache
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
module.exports = {
|
|
16
|
+
getExeca,
|
|
17
|
+
}
|
package/lib/job-conf.js
CHANGED
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
const fs = require('node:fs')
|
|
2
|
-
const path = require('node:path')
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* @param {string} filePath
|
|
6
|
-
* @returns { { success: boolean, data: string | null }}
|
|
7
|
-
*/
|
|
8
|
-
const readFileContent = (filePath) => {
|
|
9
|
-
try {
|
|
10
|
-
const absolutePath = path.resolve(filePath)
|
|
11
|
-
const data = fs.readFileSync(absolutePath, 'utf8')
|
|
12
|
-
return { success: true, data }
|
|
13
|
-
} catch {
|
|
14
|
-
return {
|
|
15
|
-
success: false,
|
|
16
|
-
data: null,
|
|
17
|
-
}
|
|
18
|
-
}
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
/**
|
|
22
|
-
* @param {string} path
|
|
23
|
-
*/
|
|
24
|
-
const getJobConfHostName = (path) => {
|
|
25
|
-
const result = readFileContent(path)
|
|
26
|
-
|
|
27
|
-
if (result.success && result.data) {
|
|
28
|
-
const arr = result.data.split('=')
|
|
29
|
-
|
|
30
|
-
return arr[1]
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
return 'unknown'
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
module.exports = {
|
|
37
|
-
readFileContent,
|
|
38
|
-
getJobConfHostName,
|
|
39
|
-
}
|
|
1
|
+
const fs = require('node:fs')
|
|
2
|
+
const path = require('node:path')
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* @param {string} filePath
|
|
6
|
+
* @returns { { success: boolean, data: string | null }}
|
|
7
|
+
*/
|
|
8
|
+
const readFileContent = (filePath) => {
|
|
9
|
+
try {
|
|
10
|
+
const absolutePath = path.resolve(filePath)
|
|
11
|
+
const data = fs.readFileSync(absolutePath, 'utf8')
|
|
12
|
+
return { success: true, data }
|
|
13
|
+
} catch {
|
|
14
|
+
return {
|
|
15
|
+
success: false,
|
|
16
|
+
data: null,
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* @param {string} path
|
|
23
|
+
*/
|
|
24
|
+
const getJobConfHostName = (path) => {
|
|
25
|
+
const result = readFileContent(path)
|
|
26
|
+
|
|
27
|
+
if (result.success && result.data) {
|
|
28
|
+
const arr = result.data.split('=')
|
|
29
|
+
|
|
30
|
+
return arr[1]
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return 'unknown'
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
module.exports = {
|
|
37
|
+
readFileContent,
|
|
38
|
+
getJobConfHostName,
|
|
39
|
+
}
|
package/lib/message.js
CHANGED
|
@@ -1,35 +1,35 @@
|
|
|
1
|
-
const pm2 = require('pm2')
|
|
2
|
-
|
|
3
|
-
/**
|
|
4
|
-
* @param { number } pm_id - pm2 应用id
|
|
5
|
-
* @param { string } eventName - 事件名
|
|
6
|
-
* @param { object } [data] - 发送的数据
|
|
7
|
-
* @returns { Promise<void> }
|
|
8
|
-
*/
|
|
9
|
-
const sendMessage = (pm_id, eventName, data) => {
|
|
10
|
-
return new Promise((resolve, reject) => {
|
|
11
|
-
pm2.sendDataToProcessId(
|
|
12
|
-
pm_id,
|
|
13
|
-
{
|
|
14
|
-
id: pm_id,
|
|
15
|
-
type: 'process:msg',
|
|
16
|
-
topic: true,
|
|
17
|
-
data: {
|
|
18
|
-
event: `pm2-perfmonitor:${eventName}`,
|
|
19
|
-
data,
|
|
20
|
-
},
|
|
21
|
-
},
|
|
22
|
-
(err) => {
|
|
23
|
-
if (err) {
|
|
24
|
-
return reject(err)
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
resolve()
|
|
28
|
-
},
|
|
29
|
-
)
|
|
30
|
-
})
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
module.exports = {
|
|
34
|
-
sendMessage,
|
|
35
|
-
}
|
|
1
|
+
const pm2 = require('pm2')
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* @param { number } pm_id - pm2 应用id
|
|
5
|
+
* @param { string } eventName - 事件名
|
|
6
|
+
* @param { object } [data] - 发送的数据
|
|
7
|
+
* @returns { Promise<void> }
|
|
8
|
+
*/
|
|
9
|
+
const sendMessage = (pm_id, eventName, data) => {
|
|
10
|
+
return new Promise((resolve, reject) => {
|
|
11
|
+
pm2.sendDataToProcessId(
|
|
12
|
+
pm_id,
|
|
13
|
+
{
|
|
14
|
+
id: pm_id,
|
|
15
|
+
type: 'process:msg',
|
|
16
|
+
topic: true,
|
|
17
|
+
data: {
|
|
18
|
+
event: `pm2-perfmonitor:${eventName}`,
|
|
19
|
+
data,
|
|
20
|
+
},
|
|
21
|
+
},
|
|
22
|
+
(err) => {
|
|
23
|
+
if (err) {
|
|
24
|
+
return reject(err)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
resolve()
|
|
28
|
+
},
|
|
29
|
+
)
|
|
30
|
+
})
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
module.exports = {
|
|
34
|
+
sendMessage,
|
|
35
|
+
}
|