processwatch 3.0.8 → 3.0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,9 @@
1
+ License: (MIT) Copyright (C) 2013 Process Watch Author Phil Chen.
2
+
3
+ 3.0.9 Aug 18 2013
4
+ ====
5
+ * Added system utilization statistics for when a process issue is detected prior to correction
6
+
7
+ 3.0.8 July 19 2013
8
+ ====
9
+ * Converted Process Watch a Ruby Gem
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Processwatch
1
+ # Process Watch
2
2
 
3
3
  License: (MIT) Copyright (C) 2013 Phil Chen.
4
4
 
@@ -14,6 +14,8 @@ This is useful for systems issues, automating troubleshooting, provisioning, sca
14
14
 
15
15
  Detecting if a process is a runaway if so execute some combination of notify and killing it
16
16
 
17
+ Email system utilization statistics in the state prior to correcting the runaway process or starting a dead one
18
+
17
19
  ## PREREQUISITES:
18
20
 
19
21
  Linux, Ruby, Cron (Or some other scheduler)
@@ -43,7 +45,8 @@ ruby setup.rb
43
45
  Configure the configuration files locate in /usr/local/processwatch/conf/
44
46
 
45
47
  Note you can have multiple services monitored for restart.
46
- Sample restart configuration file begins with prefix restart_ naming convention looks like restart_uniqueappname
48
+ See example restart configuration file restart_ssh
49
+ You can add more restart processes with the naming convention restart_uniqueappname (note prefix restart_)
47
50
 
48
51
  ## USING PROCESSWATCH:
49
52
 
@@ -1,3 +1,4 @@
1
1
  require 'processwatch/version'
2
2
  require 'processwatch/pw_setup'
3
+ require 'processwatch/pw_diagnostic.rb'
3
4
  require 'processwatch/pw_linux'
@@ -0,0 +1,312 @@
1
+ #License: (MIT), Copyright (C) 2013 Process Watch Author Phil Chen.
2
+ module Usagewatch
3
+ # Show the amount of total disk used in Gigabytes
4
+ def self.uw_diskused
5
+ @df = `df`
6
+ @parts = @df.split(" ").map { |s| s.to_i }
7
+ @sum = 0
8
+ for i in (9..@parts.size - 1).step(6) do
9
+ @sum += @parts[i]
10
+ end
11
+ @round = @sum.round(2)
12
+ @totaldiskused = ((@round/1024)/1024).round(2)
13
+ end
14
+
15
+ # Show the percentage of disk used.
16
+ def self.uw_diskused_perc
17
+ df = `df --total`
18
+ df.split(" ").last.to_f.round(2)
19
+ end
20
+
21
+ # Show the percentage of CPU used
22
+ def self.uw_cpuused
23
+ @proc0 = File.readlines('/proc/stat').grep(/^cpu /).first.split(" ")
24
+ sleep 1
25
+ @proc1 = File.readlines('/proc/stat').grep(/^cpu /).first.split(" ")
26
+
27
+ @proc0usagesum = @proc0[1].to_i + @proc0[2].to_i + @proc0[3].to_i
28
+ @proc1usagesum = @proc1[1].to_i + @proc1[2].to_i + @proc1[3].to_i
29
+ @procusage = @proc1usagesum - @proc0usagesum
30
+
31
+ @proc0total = 0
32
+ for i in (1..4) do
33
+ @proc0total += @proc0[i].to_i
34
+ end
35
+ @proc1total = 0
36
+ for i in (1..4) do
37
+ @proc1total += @proc1[i].to_i
38
+ end
39
+ @proctotal = (@proc1total - @proc0total)
40
+
41
+ @cpuusage = (@procusage.to_f / @proctotal.to_f)
42
+ @cpuusagepercentage = (100 * @cpuusage).to_f.round(2)
43
+ end
44
+
45
+ # return hash of top ten proccesses by cpu consumption
46
+ # example [["apache2", 12.0], ["passenger", 13.2]]
47
+ def self.uw_cputop
48
+ ps = `ps aux | awk '{print $11, $3}' | sort -k2nr | head -n 10`
49
+ array = []
50
+ ps.each_line do |line|
51
+ line = line.chomp.split(" ")
52
+ array << [line.first.gsub(/[\[\]]/, ""), line.last]
53
+ end
54
+ array
55
+ end
56
+
57
+ # Show the number of TCP connections used
58
+ def self.uw_tcpused
59
+ if File.exists?("/proc/net/sockstat")
60
+ File.open("/proc/net/sockstat", "r") do |ipv4|
61
+ @sockstat = ipv4.read
62
+ end
63
+
64
+ @tcp4data = @sockstat.split
65
+ @tcp4count = @tcp4data[5]
66
+ end
67
+
68
+ if File.exists?("/proc/net/sockstat6")
69
+ File.open("/proc/net/sockstat6", "r") do |ipv6|
70
+ @sockstat6 = ipv6.read
71
+
72
+ end
73
+
74
+ @tcp6data = @sockstat6.split
75
+ @tcp6count = @tcp6data[2]
76
+ end
77
+
78
+ @totaltcpused = @tcp4count.to_i + @tcp6count.to_i
79
+ end
80
+
81
+ # Show the number of UDP connections used
82
+ def self.uw_udpused
83
+ if File.exists?("/proc/net/sockstat")
84
+ File.open("/proc/net/sockstat", "r") do |ipv4|
85
+ @sockstat = ipv4.read
86
+ end
87
+
88
+ @udp4data = @sockstat.split
89
+ @udp4count = @udp4data[16]
90
+ end
91
+
92
+ if File.exists?("/proc/net/sockstat6")
93
+ File.open("/proc/net/sockstat6", "r") do |ipv6|
94
+ @sockstat6 = ipv6.read
95
+ end
96
+
97
+ @udp6data = @sockstat6.split
98
+ @udp6count = @udp6data[5]
99
+ end
100
+
101
+ @totaludpused = @udp4count.to_i + @udp6count.to_i
102
+ end
103
+
104
+ # Show the percentage of Active Memory used
105
+ def self.uw_memused
106
+ if File.exists?("/proc/meminfo")
107
+ File.open("/proc/meminfo", "r") do |file|
108
+ @result = file.read
109
+ end
110
+ end
111
+
112
+ @memstat = @result.split("\n").collect{|x| x.strip}
113
+ @memtotal = @memstat[0].gsub(/[^0-9]/, "")
114
+ @memactive = @memstat[5].gsub(/[^0-9]/, "")
115
+ @memactivecalc = (@memactive.to_f * 100) / @memtotal.to_f
116
+ @memusagepercentage = @memactivecalc.round
117
+ end
118
+
119
+ # return hash of top ten proccesses by mem consumption
120
+ # example [["apache2", 12.0], ["passenger", 13.2]]
121
+ def self.uw_memtop
122
+ ps = `ps aux | awk '{print $11, $4}' | sort -k2nr | head -n 10`
123
+ array = []
124
+ ps.each_line do |line|
125
+ line = line.chomp.split(" ")
126
+ array << [line.first.gsub(/[\[\]]/, ""), line.last]
127
+ end
128
+ array
129
+ end
130
+
131
+ # Show the average system load of the past minute
132
+ def self.uw_load
133
+ if File.exists?("/proc/loadavg")
134
+ File.open("/proc/loadavg", "r") do |file|
135
+ @loaddata = file.read
136
+ end
137
+
138
+ @load = @loaddata.split(/ /).first.to_f
139
+ end
140
+ end
141
+
142
+ # Bandwidth Received Method
143
+ def self.bandrx
144
+
145
+ if File.exists?("/proc/net/dev")
146
+ File.open("/proc/net/dev", "r") do |file|
147
+ @result = file.read
148
+ end
149
+ end
150
+
151
+ @arrRows = @result.split("\n")
152
+
153
+ @arrEthLoRows = @arrRows.grep(/eth|lo/)
154
+
155
+ rowcount = (@arrEthLoRows.count - 1)
156
+
157
+ for i in (0..rowcount)
158
+ @arrEthLoRows[i] = @arrEthLoRows[i].gsub(/\s+/m, ' ').strip.split(" ")
159
+ end
160
+
161
+ @arrColumns = Array.new
162
+ for l in (0..rowcount)
163
+ @temp = Array.new
164
+ @temp[0] = @arrEthLoRows[l][1]
165
+ @temp[1] = @arrEthLoRows[l][9]
166
+ @arrColumns << @temp
167
+ end
168
+
169
+ columncount = (@arrColumns[0].count - 1)
170
+
171
+ @arrTotal = Array.new
172
+ for p in (0..columncount)
173
+ @arrTotal[p] = 0
174
+ end
175
+
176
+ for j in (0..columncount)
177
+ for k in (0..rowcount)
178
+ @arrTotal[j] = @arrColumns[k][j].to_i + @arrTotal[j]
179
+ end
180
+ end
181
+
182
+ @bandrxtx = @arrTotal
183
+ end
184
+
185
+ # Current Bandwidth Received Calculation in Mbit/s
186
+ def self.uw_bandrx
187
+
188
+ @new0 = self.bandrx
189
+ sleep 1
190
+ @new1 = self.bandrx
191
+
192
+ @bytesreceived = @new1[0].to_i - @new0[0].to_i
193
+ @bitsreceived = (@bytesreceived * 8)
194
+ @megabitsreceived = (@bitsreceived.to_f / 1024 / 1024).round(3)
195
+ end
196
+
197
+ # Bandwidth Transmitted Method
198
+ def self.bandtx
199
+
200
+ if File.exists?("/proc/net/dev")
201
+ File.open("/proc/net/dev", "r") do |file|
202
+ @result = file.read
203
+ end
204
+ end
205
+
206
+ @arrRows = @result.split("\n")
207
+
208
+ @arrEthLoRows = @arrRows.grep(/eth|lo/)
209
+
210
+ rowcount = (@arrEthLoRows.count - 1)
211
+
212
+ for i in (0..rowcount)
213
+ @arrEthLoRows[i] = @arrEthLoRows[i].gsub(/\s+/m, ' ').strip.split(" ")
214
+ end
215
+
216
+ @arrColumns = Array.new
217
+ for l in (0..rowcount)
218
+ @temp = Array.new
219
+ @temp[0] = @arrEthLoRows[l][1]
220
+ @temp[1] = @arrEthLoRows[l][9]
221
+ @arrColumns << @temp
222
+ end
223
+
224
+ columncount = (@arrColumns[0].count - 1)
225
+
226
+ @arrTotal = Array.new
227
+ for p in (0..columncount)
228
+ @arrTotal[p] = 0
229
+ end
230
+
231
+ for j in (0..columncount)
232
+ for k in (0..rowcount)
233
+ @arrTotal[j] = @arrColumns[k][j].to_i + @arrTotal[j]
234
+ end
235
+ end
236
+
237
+ @bandrxtx = @arrTotal
238
+ end
239
+
240
+ # Current Bandwidth Transmitted in Mbit/s
241
+ def self.uw_bandtx
242
+
243
+ @new0 = self.bandtx
244
+ sleep 1
245
+ @new1 = self.bandtx
246
+
247
+ @bytestransmitted = @new1[1].to_i - @new0[1].to_i
248
+ @bitstransmitted = (@bytestransmitted * 8)
249
+ @megabitstransmitted = (@bitstransmitted.to_f / 1024 / 1024).round(3)
250
+ end
251
+
252
+ # Disk Usage Method
253
+ def self.diskio
254
+
255
+ if File.exists?("/proc/diskstats")
256
+ File.open("/proc/diskstats", "r") do |file|
257
+ @result = file.read
258
+ end
259
+ end
260
+
261
+ @arrRows = @result.split("\n")
262
+
263
+ rowcount = (@arrRows.count - 1)
264
+
265
+ for i in (0..rowcount)
266
+ @arrRows[i] = @arrRows[i].gsub(/\s+/m, ' ').strip.split(" ")
267
+ end
268
+
269
+ @arrColumns = Array.new
270
+ for l in (0..rowcount)
271
+ @temp = Array.new
272
+ @temp[0] = @arrRows[l][3]
273
+ @temp[1] = @arrRows[l][7]
274
+ @arrColumns << @temp
275
+ end
276
+
277
+ columncount = (@arrColumns[0].count - 1)
278
+
279
+ @arrTotal = Array.new
280
+ for p in (0..columncount)
281
+ @arrTotal[p] = 0
282
+ end
283
+
284
+ for j in (0..columncount)
285
+ for k in (0..rowcount)
286
+ @arrTotal[j] = @arrColumns[k][j].to_i + @arrTotal[j]
287
+ end
288
+ end
289
+
290
+ @diskiorw= @arrTotal
291
+ end
292
+
293
+ # Current Disk Reads Completed
294
+ def self.uw_diskioreads
295
+
296
+ @new0 = self.diskio
297
+ sleep 1
298
+ @new1 = self.diskio
299
+
300
+ @diskreads = @new1[0].to_i - @new0[0].to_i
301
+ end
302
+
303
+ # Current Disk Writes Completed
304
+ def self.uw_diskiowrites
305
+
306
+ @new0 = self.diskio
307
+ sleep 1
308
+ @new1 = self.diskio
309
+
310
+ @diskwrites = @new1[1].to_i - @new0[1].to_i
311
+ end
312
+ end
@@ -15,6 +15,25 @@ module Processwatch
15
15
 
16
16
  list = ps_list.split(/\n/)
17
17
 
18
+ occurances = list.grep(/.*#$restart_process.*/)
19
+ if occurances.length == 0 && $restart_mail == "yes" && $restart_start == "yes"
20
+ usw = Usagewatch
21
+ diagnostic_msgstr = <<END_OF_MESSAGE
22
+ #{usw.uw_diskused} Total Gigabytes Disk Used
23
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
24
+ #{usw.uw_cpuused}% CPU Used
25
+ #{usw.uw_tcpused} TCP Connections Used
26
+ #{usw.uw_udpused} UDP Connections Used
27
+ #{usw.uw_memused}% Active Memory Used
28
+ #{usw.uw_load} Average System Load Of The Past Minute
29
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
30
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
31
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
32
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
33
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
34
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
35
+ END_OF_MESSAGE
36
+
18
37
  restart_msgstr = <<END_OF_MESSAGE
19
38
  From: #$restart_from <#$restart_from_email>
20
39
  To: #$restart_to <#$restart_to_email>
@@ -24,8 +43,34 @@ Process Watch has detected #{$restart_process} has died.
24
43
 
25
44
  We have started #{$restart_process} again.
26
45
 
46
+ Below Is a Snapshot Of System Statistics Before The Process Was Started Again
47
+
48
+ #{diagnostic_msgstr}
49
+
27
50
  -Process Watch
28
51
 
52
+ END_OF_MESSAGE
53
+ system $restart_action
54
+ require 'net/smtp'
55
+ Net::SMTP.start("#$restart_smtp_host", "#$restart_smtp_port") do |smtp|
56
+ smtp.send_message(restart_msgstr, "#$restart_from_email", "#$restart_to_email")
57
+ end
58
+ elsif occurances.length == 0 && $restart_mail == "yes" && $restart_start == "no"
59
+ usw = Usagewatch
60
+ diagnostic_msgstr = <<END_OF_MESSAGE
61
+ #{usw.uw_diskused} Total Gigabytes Disk Used
62
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
63
+ #{usw.uw_cpuused}% CPU Used
64
+ #{usw.uw_tcpused} TCP Connections Used
65
+ #{usw.uw_udpused} UDP Connections Used
66
+ #{usw.uw_memused}% Active Memory Used
67
+ #{usw.uw_load} Average System Load Of The Past Minute
68
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
69
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
70
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
71
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
72
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
73
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
29
74
  END_OF_MESSAGE
30
75
 
31
76
  dead_process_msgstr = <<END_OF_MESSAGE
@@ -35,18 +80,13 @@ Subject: Dead Process Detected On #{host}
35
80
 
36
81
  Process Watch has detected #{$restart_process} has died.
37
82
 
83
+ Below Is A Snapshot Of Current System Statistics
84
+
85
+ #{diagnostic_msgstr}
86
+
38
87
  -Process Watch
39
88
 
40
89
  END_OF_MESSAGE
41
-
42
- occurances = list.grep(/.*#$restart_process.*/)
43
- if occurances.length == 0 && $restart_mail == "yes" && $restart_start == "yes"
44
- system $restart_action
45
- require 'net/smtp'
46
- Net::SMTP.start("#$restart_smtp_host", "#$restart_smtp_port") do |smtp|
47
- smtp.send_message(restart_msgstr, "#$restart_from_email", "#$restart_to_email")
48
- end
49
- elsif occurances.length == 0 && $restart_mail == "yes" && $restart_start == "no"
50
90
  require 'net/smtp'
51
91
  Net::SMTP.start("#$restart_smtp_host", "#$restart_smtp_port") do |smtp|
52
92
  smtp.send_message(dead_process_msgstr, "#$restart_from_email", "#$restart_to_email")
@@ -74,26 +114,80 @@ end
74
114
  cpu_time = ($1.to_i*3600.to_i + $2.to_i*60.to_i + $3.to_i)
75
115
  next if cpu_time.to_i < $runaway_max_time.to_i
76
116
  begin
117
+ if $runaway_mail == "yes" && $runaway_kill == "yes"
118
+ usw = Usagewatch
119
+ diagnostic_msgstr = <<END_OF_MESSAGE
120
+ #{usw.uw_diskused} Total Gigabytes Disk Used
121
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
122
+ #{usw.uw_cpuused}% CPU Used
123
+ #{usw.uw_tcpused} TCP Connections Used
124
+ #{usw.uw_udpused} UDP Connections Used
125
+ #{usw.uw_memused}% Active Memory Used
126
+ #{usw.uw_load} Average System Load Of The Past Minute
127
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
128
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
129
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
130
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
131
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
132
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
133
+ END_OF_MESSAGE
77
134
 
78
- msgstr = <<END_OF_MESSAGE
135
+ kill_msgstr = <<END_OF_MESSAGE
79
136
  From: #$runaway_from <#$runaway_from_email>
80
137
  To: #$runaway_to <#$runaway_to_email>
81
138
  Subject: Runaway Process Detected
82
139
 
140
+ The Below Runaway Process Has Been Detected
141
+
83
142
  #{process[4]}
84
143
 
144
+ It has been killed!
145
+
146
+ Below Is A Snapshot Of The System Statistics Before It Was Killed
147
+
148
+ #{diagnostic_msgstr}
85
149
  END_OF_MESSAGE
86
150
 
87
- if $runaway_mail == "yes" && $runaway_kill == "yes"
88
151
  require 'net/smtp'
89
152
  Net::SMTP.start("#$runaway_smtp_host", "#$runaway_smtp_port") do |smtp|
90
- smtp.send_message(msgstr, "#$runaway_from_email", "#$runaway_to_email")
153
+ smtp.send_message(kill_msgstr, "#$runaway_from_email", "#$runaway_to_email")
91
154
  system("/bin/kill" + " " + "-9" + " " + "#{process[2]}")
92
155
  end
93
156
  elsif $runaway_mail == "yes" && $runaway_kill == "no"
157
+ usw = Usagewatch
158
+ diagnostic_msgstr = <<END_OF_MESSAGE
159
+ #{usw.uw_diskused} Total Gigabytes Disk Used
160
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
161
+ #{usw.uw_cpuused}% CPU Used
162
+ #{usw.uw_tcpused} TCP Connections Used
163
+ #{usw.uw_udpused} UDP Connections Used
164
+ #{usw.uw_memused}% Active Memory Used
165
+ #{usw.uw_load} Average System Load Of The Past Minute
166
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
167
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
168
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
169
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
170
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
171
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
172
+ END_OF_MESSAGE
173
+
174
+ no_kill_msgstr = <<END_OF_MESSAGE
175
+ From: #$runaway_from <#$runaway_from_email>
176
+ To: #$runaway_to <#$runaway_to_email>
177
+ Subject: Runaway Process Detected
178
+
179
+ The Below Runaway Process Has Been Detected
180
+
181
+ #{process[4]}
182
+
183
+ Below Is A Snapshot Of Current System Statistics
184
+
185
+ #{diagnostic_msgstr}
186
+ END_OF_MESSAGE
187
+
94
188
  require 'net/smtp'
95
189
  Net::SMTP.start("#$runaway_smtp_host", "#$runaway_smtp_port") do |smtp|
96
- smtp.send_message(msgstr, "#$runaway_from_email", "#$runaway_to_email")
190
+ smtp.send_message(no_kill_msgstr, "#$runaway_from_email", "#$runaway_to_email")
97
191
  end
98
192
  elsif $runaway_mail == "no" && $runaway_kill == "yes"
99
193
  system("/bin/kill" + " " + "-9" + " " + "#{process[2]}")
@@ -1,3 +1,3 @@
1
1
  module Processwatch
2
- VERSION = "3.0.8"
2
+ VERSION = "3.0.9"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: processwatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.8
4
+ version: 3.0.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-20 00:00:00.000000000 Z
12
+ date: 2013-08-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -51,11 +51,14 @@ executables: []
51
51
  extensions: []
52
52
  extra_rdoc_files: []
53
53
  files:
54
+ - .gitignore
55
+ - CHANGELOG
54
56
  - Gemfile
55
57
  - LICENSE.txt
56
58
  - README.md
57
59
  - Rakefile
58
60
  - lib/processwatch.rb
61
+ - lib/processwatch/pw_diagnostic.rb
59
62
  - lib/processwatch/pw_linux.rb
60
63
  - lib/processwatch/pw_setup.rb
61
64
  - lib/processwatch/version.rb