processwatch 3.0.8 → 3.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
@@ -0,0 +1,9 @@
1
+ License: (MIT) Copyright (C) 2013 Process Watch Author Phil Chen.
2
+
3
+ 3.0.9 Aug 18 2013
4
+ ====
5
+ * Added system utilization statistics for when a process issue is detected prior to correction
6
+
7
+ 3.0.8 July 19 2013
8
+ ====
9
+ * Converted Process Watch a Ruby Gem
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # Processwatch
1
+ # Process Watch
2
2
 
3
3
  License: (MIT) Copyright (C) 2013 Phil Chen.
4
4
 
@@ -14,6 +14,8 @@ This is useful for systems issues, automating troubleshooting, provisioning, sca
14
14
 
15
15
  Detecting if a process is a runaway if so execute some combination of notify and killing it
16
16
 
17
+ Email system utilization statistics in the state prior to correcting the runaway process or starting a dead one
18
+
17
19
  ## PREREQUISITES:
18
20
 
19
21
  Linux, Ruby, Cron (Or some other scheduler)
@@ -43,7 +45,8 @@ ruby setup.rb
43
45
  Configure the configuration files locate in /usr/local/processwatch/conf/
44
46
 
45
47
  Note you can have multiple services monitored for restart.
46
- Sample restart configuration file begins with prefix restart_ naming convention looks like restart_uniqueappname
48
+ See example restart configuration file restart_ssh
49
+ You can add more restart processes with the naming convention restart_uniqueappname (note prefix restart_)
47
50
 
48
51
  ## USING PROCESSWATCH:
49
52
 
@@ -1,3 +1,4 @@
1
1
  require 'processwatch/version'
2
2
  require 'processwatch/pw_setup'
3
+ require 'processwatch/pw_diagnostic.rb'
3
4
  require 'processwatch/pw_linux'
@@ -0,0 +1,312 @@
1
+ #License: (MIT), Copyright (C) 2013 Process Watch Author Phil Chen.
2
+ module Usagewatch
3
+ # Show the amount of total disk used in Gigabytes
4
+ def self.uw_diskused
5
+ @df = `df`
6
+ @parts = @df.split(" ").map { |s| s.to_i }
7
+ @sum = 0
8
+ for i in (9..@parts.size - 1).step(6) do
9
+ @sum += @parts[i]
10
+ end
11
+ @round = @sum.round(2)
12
+ @totaldiskused = ((@round/1024)/1024).round(2)
13
+ end
14
+
15
+ # Show the percentage of disk used.
16
+ def self.uw_diskused_perc
17
+ df = `df --total`
18
+ df.split(" ").last.to_f.round(2)
19
+ end
20
+
21
+ # Show the percentage of CPU used
22
+ def self.uw_cpuused
23
+ @proc0 = File.readlines('/proc/stat').grep(/^cpu /).first.split(" ")
24
+ sleep 1
25
+ @proc1 = File.readlines('/proc/stat').grep(/^cpu /).first.split(" ")
26
+
27
+ @proc0usagesum = @proc0[1].to_i + @proc0[2].to_i + @proc0[3].to_i
28
+ @proc1usagesum = @proc1[1].to_i + @proc1[2].to_i + @proc1[3].to_i
29
+ @procusage = @proc1usagesum - @proc0usagesum
30
+
31
+ @proc0total = 0
32
+ for i in (1..4) do
33
+ @proc0total += @proc0[i].to_i
34
+ end
35
+ @proc1total = 0
36
+ for i in (1..4) do
37
+ @proc1total += @proc1[i].to_i
38
+ end
39
+ @proctotal = (@proc1total - @proc0total)
40
+
41
+ @cpuusage = (@procusage.to_f / @proctotal.to_f)
42
+ @cpuusagepercentage = (100 * @cpuusage).to_f.round(2)
43
+ end
44
+
45
+ # return hash of top ten proccesses by cpu consumption
46
+ # example [["apache2", 12.0], ["passenger", 13.2]]
47
+ def self.uw_cputop
48
+ ps = `ps aux | awk '{print $11, $3}' | sort -k2nr | head -n 10`
49
+ array = []
50
+ ps.each_line do |line|
51
+ line = line.chomp.split(" ")
52
+ array << [line.first.gsub(/[\[\]]/, ""), line.last]
53
+ end
54
+ array
55
+ end
56
+
57
+ # Show the number of TCP connections used
58
+ def self.uw_tcpused
59
+ if File.exists?("/proc/net/sockstat")
60
+ File.open("/proc/net/sockstat", "r") do |ipv4|
61
+ @sockstat = ipv4.read
62
+ end
63
+
64
+ @tcp4data = @sockstat.split
65
+ @tcp4count = @tcp4data[5]
66
+ end
67
+
68
+ if File.exists?("/proc/net/sockstat6")
69
+ File.open("/proc/net/sockstat6", "r") do |ipv6|
70
+ @sockstat6 = ipv6.read
71
+
72
+ end
73
+
74
+ @tcp6data = @sockstat6.split
75
+ @tcp6count = @tcp6data[2]
76
+ end
77
+
78
+ @totaltcpused = @tcp4count.to_i + @tcp6count.to_i
79
+ end
80
+
81
+ # Show the number of UDP connections used
82
+ def self.uw_udpused
83
+ if File.exists?("/proc/net/sockstat")
84
+ File.open("/proc/net/sockstat", "r") do |ipv4|
85
+ @sockstat = ipv4.read
86
+ end
87
+
88
+ @udp4data = @sockstat.split
89
+ @udp4count = @udp4data[16]
90
+ end
91
+
92
+ if File.exists?("/proc/net/sockstat6")
93
+ File.open("/proc/net/sockstat6", "r") do |ipv6|
94
+ @sockstat6 = ipv6.read
95
+ end
96
+
97
+ @udp6data = @sockstat6.split
98
+ @udp6count = @udp6data[5]
99
+ end
100
+
101
+ @totaludpused = @udp4count.to_i + @udp6count.to_i
102
+ end
103
+
104
+ # Show the percentage of Active Memory used
105
+ def self.uw_memused
106
+ if File.exists?("/proc/meminfo")
107
+ File.open("/proc/meminfo", "r") do |file|
108
+ @result = file.read
109
+ end
110
+ end
111
+
112
+ @memstat = @result.split("\n").collect{|x| x.strip}
113
+ @memtotal = @memstat[0].gsub(/[^0-9]/, "")
114
+ @memactive = @memstat[5].gsub(/[^0-9]/, "")
115
+ @memactivecalc = (@memactive.to_f * 100) / @memtotal.to_f
116
+ @memusagepercentage = @memactivecalc.round
117
+ end
118
+
119
+ # return hash of top ten proccesses by mem consumption
120
+ # example [["apache2", 12.0], ["passenger", 13.2]]
121
+ def self.uw_memtop
122
+ ps = `ps aux | awk '{print $11, $4}' | sort -k2nr | head -n 10`
123
+ array = []
124
+ ps.each_line do |line|
125
+ line = line.chomp.split(" ")
126
+ array << [line.first.gsub(/[\[\]]/, ""), line.last]
127
+ end
128
+ array
129
+ end
130
+
131
+ # Show the average system load of the past minute
132
+ def self.uw_load
133
+ if File.exists?("/proc/loadavg")
134
+ File.open("/proc/loadavg", "r") do |file|
135
+ @loaddata = file.read
136
+ end
137
+
138
+ @load = @loaddata.split(/ /).first.to_f
139
+ end
140
+ end
141
+
142
+ # Bandwidth Received Method
143
+ def self.bandrx
144
+
145
+ if File.exists?("/proc/net/dev")
146
+ File.open("/proc/net/dev", "r") do |file|
147
+ @result = file.read
148
+ end
149
+ end
150
+
151
+ @arrRows = @result.split("\n")
152
+
153
+ @arrEthLoRows = @arrRows.grep(/eth|lo/)
154
+
155
+ rowcount = (@arrEthLoRows.count - 1)
156
+
157
+ for i in (0..rowcount)
158
+ @arrEthLoRows[i] = @arrEthLoRows[i].gsub(/\s+/m, ' ').strip.split(" ")
159
+ end
160
+
161
+ @arrColumns = Array.new
162
+ for l in (0..rowcount)
163
+ @temp = Array.new
164
+ @temp[0] = @arrEthLoRows[l][1]
165
+ @temp[1] = @arrEthLoRows[l][9]
166
+ @arrColumns << @temp
167
+ end
168
+
169
+ columncount = (@arrColumns[0].count - 1)
170
+
171
+ @arrTotal = Array.new
172
+ for p in (0..columncount)
173
+ @arrTotal[p] = 0
174
+ end
175
+
176
+ for j in (0..columncount)
177
+ for k in (0..rowcount)
178
+ @arrTotal[j] = @arrColumns[k][j].to_i + @arrTotal[j]
179
+ end
180
+ end
181
+
182
+ @bandrxtx = @arrTotal
183
+ end
184
+
185
+ # Current Bandwidth Received Calculation in Mbit/s
186
+ def self.uw_bandrx
187
+
188
+ @new0 = self.bandrx
189
+ sleep 1
190
+ @new1 = self.bandrx
191
+
192
+ @bytesreceived = @new1[0].to_i - @new0[0].to_i
193
+ @bitsreceived = (@bytesreceived * 8)
194
+ @megabitsreceived = (@bitsreceived.to_f / 1024 / 1024).round(3)
195
+ end
196
+
197
+ # Bandwidth Transmitted Method
198
+ def self.bandtx
199
+
200
+ if File.exists?("/proc/net/dev")
201
+ File.open("/proc/net/dev", "r") do |file|
202
+ @result = file.read
203
+ end
204
+ end
205
+
206
+ @arrRows = @result.split("\n")
207
+
208
+ @arrEthLoRows = @arrRows.grep(/eth|lo/)
209
+
210
+ rowcount = (@arrEthLoRows.count - 1)
211
+
212
+ for i in (0..rowcount)
213
+ @arrEthLoRows[i] = @arrEthLoRows[i].gsub(/\s+/m, ' ').strip.split(" ")
214
+ end
215
+
216
+ @arrColumns = Array.new
217
+ for l in (0..rowcount)
218
+ @temp = Array.new
219
+ @temp[0] = @arrEthLoRows[l][1]
220
+ @temp[1] = @arrEthLoRows[l][9]
221
+ @arrColumns << @temp
222
+ end
223
+
224
+ columncount = (@arrColumns[0].count - 1)
225
+
226
+ @arrTotal = Array.new
227
+ for p in (0..columncount)
228
+ @arrTotal[p] = 0
229
+ end
230
+
231
+ for j in (0..columncount)
232
+ for k in (0..rowcount)
233
+ @arrTotal[j] = @arrColumns[k][j].to_i + @arrTotal[j]
234
+ end
235
+ end
236
+
237
+ @bandrxtx = @arrTotal
238
+ end
239
+
240
+ # Current Bandwidth Transmitted in Mbit/s
241
+ def self.uw_bandtx
242
+
243
+ @new0 = self.bandtx
244
+ sleep 1
245
+ @new1 = self.bandtx
246
+
247
+ @bytestransmitted = @new1[1].to_i - @new0[1].to_i
248
+ @bitstransmitted = (@bytestransmitted * 8)
249
+ @megabitstransmitted = (@bitstransmitted.to_f / 1024 / 1024).round(3)
250
+ end
251
+
252
+ # Disk Usage Method
253
+ def self.diskio
254
+
255
+ if File.exists?("/proc/diskstats")
256
+ File.open("/proc/diskstats", "r") do |file|
257
+ @result = file.read
258
+ end
259
+ end
260
+
261
+ @arrRows = @result.split("\n")
262
+
263
+ rowcount = (@arrRows.count - 1)
264
+
265
+ for i in (0..rowcount)
266
+ @arrRows[i] = @arrRows[i].gsub(/\s+/m, ' ').strip.split(" ")
267
+ end
268
+
269
+ @arrColumns = Array.new
270
+ for l in (0..rowcount)
271
+ @temp = Array.new
272
+ @temp[0] = @arrRows[l][3]
273
+ @temp[1] = @arrRows[l][7]
274
+ @arrColumns << @temp
275
+ end
276
+
277
+ columncount = (@arrColumns[0].count - 1)
278
+
279
+ @arrTotal = Array.new
280
+ for p in (0..columncount)
281
+ @arrTotal[p] = 0
282
+ end
283
+
284
+ for j in (0..columncount)
285
+ for k in (0..rowcount)
286
+ @arrTotal[j] = @arrColumns[k][j].to_i + @arrTotal[j]
287
+ end
288
+ end
289
+
290
+ @diskiorw= @arrTotal
291
+ end
292
+
293
+ # Current Disk Reads Completed
294
+ def self.uw_diskioreads
295
+
296
+ @new0 = self.diskio
297
+ sleep 1
298
+ @new1 = self.diskio
299
+
300
+ @diskreads = @new1[0].to_i - @new0[0].to_i
301
+ end
302
+
303
+ # Current Disk Writes Completed
304
+ def self.uw_diskiowrites
305
+
306
+ @new0 = self.diskio
307
+ sleep 1
308
+ @new1 = self.diskio
309
+
310
+ @diskwrites = @new1[1].to_i - @new0[1].to_i
311
+ end
312
+ end
@@ -15,6 +15,25 @@ module Processwatch
15
15
 
16
16
  list = ps_list.split(/\n/)
17
17
 
18
+ occurances = list.grep(/.*#$restart_process.*/)
19
+ if occurances.length == 0 && $restart_mail == "yes" && $restart_start == "yes"
20
+ usw = Usagewatch
21
+ diagnostic_msgstr = <<END_OF_MESSAGE
22
+ #{usw.uw_diskused} Total Gigabytes Disk Used
23
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
24
+ #{usw.uw_cpuused}% CPU Used
25
+ #{usw.uw_tcpused} TCP Connections Used
26
+ #{usw.uw_udpused} UDP Connections Used
27
+ #{usw.uw_memused}% Active Memory Used
28
+ #{usw.uw_load} Average System Load Of The Past Minute
29
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
30
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
31
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
32
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
33
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
34
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
35
+ END_OF_MESSAGE
36
+
18
37
  restart_msgstr = <<END_OF_MESSAGE
19
38
  From: #$restart_from <#$restart_from_email>
20
39
  To: #$restart_to <#$restart_to_email>
@@ -24,8 +43,34 @@ Process Watch has detected #{$restart_process} has died.
24
43
 
25
44
  We have started #{$restart_process} again.
26
45
 
46
+ Below Is a Snapshot Of System Statistics Before The Process Was Started Again
47
+
48
+ #{diagnostic_msgstr}
49
+
27
50
  -Process Watch
28
51
 
52
+ END_OF_MESSAGE
53
+ system $restart_action
54
+ require 'net/smtp'
55
+ Net::SMTP.start("#$restart_smtp_host", "#$restart_smtp_port") do |smtp|
56
+ smtp.send_message(restart_msgstr, "#$restart_from_email", "#$restart_to_email")
57
+ end
58
+ elsif occurances.length == 0 && $restart_mail == "yes" && $restart_start == "no"
59
+ usw = Usagewatch
60
+ diagnostic_msgstr = <<END_OF_MESSAGE
61
+ #{usw.uw_diskused} Total Gigabytes Disk Used
62
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
63
+ #{usw.uw_cpuused}% CPU Used
64
+ #{usw.uw_tcpused} TCP Connections Used
65
+ #{usw.uw_udpused} UDP Connections Used
66
+ #{usw.uw_memused}% Active Memory Used
67
+ #{usw.uw_load} Average System Load Of The Past Minute
68
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
69
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
70
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
71
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
72
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
73
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
29
74
  END_OF_MESSAGE
30
75
 
31
76
  dead_process_msgstr = <<END_OF_MESSAGE
@@ -35,18 +80,13 @@ Subject: Dead Process Detected On #{host}
35
80
 
36
81
  Process Watch has detected #{$restart_process} has died.
37
82
 
83
+ Below Is A Snapshot Of Current System Statistics
84
+
85
+ #{diagnostic_msgstr}
86
+
38
87
  -Process Watch
39
88
 
40
89
  END_OF_MESSAGE
41
-
42
- occurances = list.grep(/.*#$restart_process.*/)
43
- if occurances.length == 0 && $restart_mail == "yes" && $restart_start == "yes"
44
- system $restart_action
45
- require 'net/smtp'
46
- Net::SMTP.start("#$restart_smtp_host", "#$restart_smtp_port") do |smtp|
47
- smtp.send_message(restart_msgstr, "#$restart_from_email", "#$restart_to_email")
48
- end
49
- elsif occurances.length == 0 && $restart_mail == "yes" && $restart_start == "no"
50
90
  require 'net/smtp'
51
91
  Net::SMTP.start("#$restart_smtp_host", "#$restart_smtp_port") do |smtp|
52
92
  smtp.send_message(dead_process_msgstr, "#$restart_from_email", "#$restart_to_email")
@@ -74,26 +114,80 @@ end
74
114
  cpu_time = ($1.to_i*3600.to_i + $2.to_i*60.to_i + $3.to_i)
75
115
  next if cpu_time.to_i < $runaway_max_time.to_i
76
116
  begin
117
+ if $runaway_mail == "yes" && $runaway_kill == "yes"
118
+ usw = Usagewatch
119
+ diagnostic_msgstr = <<END_OF_MESSAGE
120
+ #{usw.uw_diskused} Total Gigabytes Disk Used
121
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
122
+ #{usw.uw_cpuused}% CPU Used
123
+ #{usw.uw_tcpused} TCP Connections Used
124
+ #{usw.uw_udpused} UDP Connections Used
125
+ #{usw.uw_memused}% Active Memory Used
126
+ #{usw.uw_load} Average System Load Of The Past Minute
127
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
128
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
129
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
130
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
131
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
132
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
133
+ END_OF_MESSAGE
77
134
 
78
- msgstr = <<END_OF_MESSAGE
135
+ kill_msgstr = <<END_OF_MESSAGE
79
136
  From: #$runaway_from <#$runaway_from_email>
80
137
  To: #$runaway_to <#$runaway_to_email>
81
138
  Subject: Runaway Process Detected
82
139
 
140
+ The Below Runaway Process Has Been Detected
141
+
83
142
  #{process[4]}
84
143
 
144
+ It has been killed!
145
+
146
+ Below Is A Snapshot Of The System Statistics Before It Was Killed
147
+
148
+ #{diagnostic_msgstr}
85
149
  END_OF_MESSAGE
86
150
 
87
- if $runaway_mail == "yes" && $runaway_kill == "yes"
88
151
  require 'net/smtp'
89
152
  Net::SMTP.start("#$runaway_smtp_host", "#$runaway_smtp_port") do |smtp|
90
- smtp.send_message(msgstr, "#$runaway_from_email", "#$runaway_to_email")
153
+ smtp.send_message(kill_msgstr, "#$runaway_from_email", "#$runaway_to_email")
91
154
  system("/bin/kill" + " " + "-9" + " " + "#{process[2]}")
92
155
  end
93
156
  elsif $runaway_mail == "yes" && $runaway_kill == "no"
157
+ usw = Usagewatch
158
+ diagnostic_msgstr = <<END_OF_MESSAGE
159
+ #{usw.uw_diskused} Total Gigabytes Disk Used
160
+ #{usw.uw_diskused_perc} Percentage of Gigabytes Used
161
+ #{usw.uw_cpuused}% CPU Used
162
+ #{usw.uw_tcpused} TCP Connections Used
163
+ #{usw.uw_udpused} UDP Connections Used
164
+ #{usw.uw_memused}% Active Memory Used
165
+ #{usw.uw_load} Average System Load Of The Past Minute
166
+ #{usw.uw_bandrx} Mbit/s Current Bandwidth Received
167
+ #{usw.uw_bandtx} Mbit/s Current Bandwidth Transmitted
168
+ #{usw.uw_diskioreads}/s Current Disk Reads Completed
169
+ #{usw.uw_diskiowrites}/s Current Disk Writes Completed
170
+ Top Ten Processes By CPU Consumption: #{usw.uw_cputop}
171
+ Top Ten Processes By Memory Consumption: #{usw.uw_memtop}
172
+ END_OF_MESSAGE
173
+
174
+ no_kill_msgstr = <<END_OF_MESSAGE
175
+ From: #$runaway_from <#$runaway_from_email>
176
+ To: #$runaway_to <#$runaway_to_email>
177
+ Subject: Runaway Process Detected
178
+
179
+ The Below Runaway Process Has Been Detected
180
+
181
+ #{process[4]}
182
+
183
+ Below Is A Snapshot Of Current System Statistics
184
+
185
+ #{diagnostic_msgstr}
186
+ END_OF_MESSAGE
187
+
94
188
  require 'net/smtp'
95
189
  Net::SMTP.start("#$runaway_smtp_host", "#$runaway_smtp_port") do |smtp|
96
- smtp.send_message(msgstr, "#$runaway_from_email", "#$runaway_to_email")
190
+ smtp.send_message(no_kill_msgstr, "#$runaway_from_email", "#$runaway_to_email")
97
191
  end
98
192
  elsif $runaway_mail == "no" && $runaway_kill == "yes"
99
193
  system("/bin/kill" + " " + "-9" + " " + "#{process[2]}")
@@ -1,3 +1,3 @@
1
1
  module Processwatch
2
- VERSION = "3.0.8"
2
+ VERSION = "3.0.9"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: processwatch
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.8
4
+ version: 3.0.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-07-20 00:00:00.000000000 Z
12
+ date: 2013-08-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -51,11 +51,14 @@ executables: []
51
51
  extensions: []
52
52
  extra_rdoc_files: []
53
53
  files:
54
+ - .gitignore
55
+ - CHANGELOG
54
56
  - Gemfile
55
57
  - LICENSE.txt
56
58
  - README.md
57
59
  - Rakefile
58
60
  - lib/processwatch.rb
61
+ - lib/processwatch/pw_diagnostic.rb
59
62
  - lib/processwatch/pw_linux.rb
60
63
  - lib/processwatch/pw_setup.rb
61
64
  - lib/processwatch/version.rb