watchmonkey_cli 1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/Rakefile +1 -0
- data/VERSION +1 -0
- data/bin/watchmonkey +8 -0
- data/bin/watchmonkey.sh +14 -0
- data/lib/watchmonkey_cli/application/colorize.rb +22 -0
- data/lib/watchmonkey_cli/application/configuration.rb +55 -0
- data/lib/watchmonkey_cli/application/configuration.tpl +142 -0
- data/lib/watchmonkey_cli/application/dispatch.rb +86 -0
- data/lib/watchmonkey_cli/application.rb +255 -0
- data/lib/watchmonkey_cli/checker.rb +228 -0
- data/lib/watchmonkey_cli/checkers/ftp_availability.rb +22 -0
- data/lib/watchmonkey_cli/checkers/mysql_replication.rb +57 -0
- data/lib/watchmonkey_cli/checkers/ssl_expiration.rb +83 -0
- data/lib/watchmonkey_cli/checkers/unix_defaults.rb +40 -0
- data/lib/watchmonkey_cli/checkers/unix_df.rb +41 -0
- data/lib/watchmonkey_cli/checkers/unix_file_exists.rb +25 -0
- data/lib/watchmonkey_cli/checkers/unix_load.rb +30 -0
- data/lib/watchmonkey_cli/checkers/unix_mdadm.rb +60 -0
- data/lib/watchmonkey_cli/checkers/unix_memory.rb +37 -0
- data/lib/watchmonkey_cli/checkers/www_availability.rb +54 -0
- data/lib/watchmonkey_cli/helpers.rb +19 -0
- data/lib/watchmonkey_cli/hooks/platypus.rb +38 -0
- data/lib/watchmonkey_cli/hooks/requeue.rb +106 -0
- data/lib/watchmonkey_cli/loopback_connection.rb +36 -0
- data/lib/watchmonkey_cli/ssh_connection.rb +50 -0
- data/lib/watchmonkey_cli/version.rb +4 -0
- data/lib/watchmonkey_cli.rb +41 -0
- data/watchmonkey_cli.gemspec +27 -0
- metadata +163 -0
@@ -0,0 +1,228 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
class Checker
|
3
|
+
# Descendant tracking for inherited classes.
|
4
|
+
def self.descendants
|
5
|
+
@descendants ||= []
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.inherited(descendant)
|
9
|
+
descendants << descendant
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.checker_name
|
13
|
+
@checker_name || self.name
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.checker_name= name
|
17
|
+
@checker_name = name
|
18
|
+
end
|
19
|
+
|
20
|
+
module AppHelper
|
21
|
+
def init_checkers!
|
22
|
+
@checkers = {}
|
23
|
+
WatchmonkeyCli::Checker.descendants.uniq.each do |klass|
|
24
|
+
debug "[SETUP] Initializing checker `#{klass.name}'"
|
25
|
+
@checkers[klass.checker_name] = klass.new(self)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def start_checkers!
|
30
|
+
@checkers.each do |key, instance|
|
31
|
+
debug "[SETUP] Starting checker `#{key}' (#{instance.class.name})"
|
32
|
+
instance.start
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def stop_checkers!
|
37
|
+
@checkers.each do |key, instance|
|
38
|
+
debug "[SETUP] Stopping checker `#{key}' (#{instance.class.name})"
|
39
|
+
instance.stop
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Result
|
45
|
+
attr_reader :checker, :type, :args
|
46
|
+
attr_accessor :result, :command, :data
|
47
|
+
|
48
|
+
def initialize checker, *args
|
49
|
+
@checker = checker
|
50
|
+
@args = args
|
51
|
+
@mutex = Monitor.new
|
52
|
+
@type = :info
|
53
|
+
@spool = { error: [], info: [], debug: []}
|
54
|
+
end
|
55
|
+
|
56
|
+
def sync &block
|
57
|
+
@mutex.synchronize(&block)
|
58
|
+
end
|
59
|
+
|
60
|
+
def descriptor
|
61
|
+
"[#{@checker.class.checker_name} | #{args.join(" | ")}]"
|
62
|
+
end
|
63
|
+
|
64
|
+
def str_safe
|
65
|
+
"#{descriptor}\n\t"
|
66
|
+
end
|
67
|
+
|
68
|
+
def str_running
|
69
|
+
"Running checker #{@checker.class.checker_name} with [#{args.join(" | ")}]"
|
70
|
+
end
|
71
|
+
|
72
|
+
def str_descriptor
|
73
|
+
"#{descriptor}\n\t"
|
74
|
+
end
|
75
|
+
|
76
|
+
def messages
|
77
|
+
@spool.map(&:second).flatten
|
78
|
+
end
|
79
|
+
|
80
|
+
def dump!
|
81
|
+
sync do
|
82
|
+
@spool.each do |t, messages|
|
83
|
+
while messages.any?
|
84
|
+
@checker.send(t, "#{str_descriptor}#{messages.shift}", self)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
[:info, :debug, :error].each do |meth|
|
91
|
+
define_method meth do |msg|
|
92
|
+
sync { @spool[meth] << msg }
|
93
|
+
end
|
94
|
+
define_method :"#{meth}!" do |msg = nil|
|
95
|
+
sync do
|
96
|
+
@spool[meth] << msg if msg
|
97
|
+
@type = meth
|
98
|
+
end
|
99
|
+
end
|
100
|
+
define_method :"#{meth}?" do
|
101
|
+
sync { @type == meth }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# -------------------
|
107
|
+
|
108
|
+
attr_reader :app
|
109
|
+
|
110
|
+
def initialize app
|
111
|
+
@app = app
|
112
|
+
send(:init) if respond_to?(:init)
|
113
|
+
end
|
114
|
+
|
115
|
+
def info msg, robj = nil
|
116
|
+
app.fire(:on_info, msg, robj)
|
117
|
+
return if app.opts[:quiet]
|
118
|
+
_tolog(msg, :info)
|
119
|
+
app.sync do
|
120
|
+
puts app.c(msg, :blue)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def debug msg, robj = nil
|
125
|
+
app.fire(:on_debug, msg, robj)
|
126
|
+
app.fire(:on_message, msg, robj)
|
127
|
+
return if app.opts[:quiet] || app.opts[:silent]
|
128
|
+
_tolog(msg, :debug)
|
129
|
+
app.sync do
|
130
|
+
puts app.c(msg, :black)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def error msg, robj = nil
|
135
|
+
app.fire(:on_error, msg, robj)
|
136
|
+
app.fire(:on_message, msg, robj)
|
137
|
+
_tolog(msg, :error)
|
138
|
+
app.sync { app.error(msg) }
|
139
|
+
end
|
140
|
+
|
141
|
+
def _tolog msg, meth = :log
|
142
|
+
return unless app.opts[:logfile]
|
143
|
+
app.logger.public_send(meth, msg)
|
144
|
+
end
|
145
|
+
|
146
|
+
def spawn_sub which, *args
|
147
|
+
return false if app.running?
|
148
|
+
if sec = app.checkers["ssl_expiration"]
|
149
|
+
sec.enqueue(*args)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# def to_s
|
154
|
+
# string = "#<#{self.class.name}:#{self.object_id} "
|
155
|
+
# fields = self.class.inspector_fields.map{|field| "#{field}: #{self.send(field)}"}
|
156
|
+
# string << fields.join(", ") << ">"
|
157
|
+
# end
|
158
|
+
|
159
|
+
def local
|
160
|
+
@app.fetch_connection(:loopback, :local)
|
161
|
+
end
|
162
|
+
|
163
|
+
def safe descriptor = nil, &block
|
164
|
+
tries = 0
|
165
|
+
begin
|
166
|
+
tries += 1
|
167
|
+
block.call
|
168
|
+
rescue StandardError => e
|
169
|
+
unless tries > 3
|
170
|
+
app.sync do
|
171
|
+
error "#{descriptor}retry #{tries} reason is `#{e.class}: #{e.message}'"
|
172
|
+
e.backtrace.each{|l| debug "\t\t#{l}" }
|
173
|
+
end
|
174
|
+
sleep 1
|
175
|
+
retry
|
176
|
+
end
|
177
|
+
error "#{descriptor}retries exceeded"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def rsafe resultobj, &block
|
182
|
+
tries = 0
|
183
|
+
begin
|
184
|
+
tries += 1
|
185
|
+
block.call
|
186
|
+
rescue StandardError => e
|
187
|
+
unless tries > 3
|
188
|
+
resultobj.sync do
|
189
|
+
resultobj.error! "retry #{tries} reason is `#{e.class}: #{e.message}'"
|
190
|
+
e.backtrace.each{|l| resultobj.debug "\t\t#{l}" }
|
191
|
+
resultobj.dump!
|
192
|
+
end
|
193
|
+
sleep 1
|
194
|
+
retry
|
195
|
+
end
|
196
|
+
resultobj.error! "#{descriptor}retries exceeded"
|
197
|
+
resultobj.dump!
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
# =================
|
203
|
+
# = API =
|
204
|
+
# =================
|
205
|
+
|
206
|
+
def init
|
207
|
+
# hook method (called when checker is being initialized)
|
208
|
+
end
|
209
|
+
|
210
|
+
def start
|
211
|
+
# hook method (called after all checkers were initialized and configs + hosts are loaded)
|
212
|
+
# can/should be used for starting connections, etc.
|
213
|
+
end
|
214
|
+
|
215
|
+
def stop
|
216
|
+
# hook method (called on application shutdown)
|
217
|
+
# connections should be closed here
|
218
|
+
|
219
|
+
# DO NOT CLOSE CONNECTIONS HANDLED BY THE APP!
|
220
|
+
# Keep in mind that the checkers run concurrently
|
221
|
+
# and therefore shared resources might still be in use
|
222
|
+
end
|
223
|
+
|
224
|
+
def check! *a
|
225
|
+
raise NotImplementedError, "a checker (#{self.class.name}) must implement `#check!' method!"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class FtpAvailability < Checker
|
4
|
+
self.checker_name = "ftp_availability"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { threshold: 1.months }.merge(opts)
|
8
|
+
app.enqueue(self, host, opts)
|
9
|
+
end
|
10
|
+
|
11
|
+
def check! result, host, opts = {}
|
12
|
+
Net::FTP.open(host) do |ftp|
|
13
|
+
ftp.login(opts[:user], opts[:password])
|
14
|
+
end
|
15
|
+
rescue Net::FTPPermError
|
16
|
+
result.error "Invalid credentials!"
|
17
|
+
rescue SocketError => e
|
18
|
+
result.error "#{e.class}: #{e.message}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class MysqlReplication < Checker
|
4
|
+
self.checker_name = "mysql_replication"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { host: "127.0.0.1", user: "root", sbm_threshold: 60 }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
cmd = ["mysql"]
|
15
|
+
cmd << "-u#{opts[:user]}" if opts[:user]
|
16
|
+
cmd << "-p#{opts[:password]}" if opts[:password]
|
17
|
+
cmd << "-h#{opts[:host]}" if opts[:host]
|
18
|
+
cmd << "-P#{opts[:port]}" if opts[:port]
|
19
|
+
cmd << %{-e "SHOW SLAVE STATUS\\G"}
|
20
|
+
result.command = cmd.join(" ")
|
21
|
+
result.result = host.exec(result.command)
|
22
|
+
result.data = _parse_response(result.result)
|
23
|
+
|
24
|
+
io = result.data["Slave_IO_Running"]
|
25
|
+
sql = result.data["Slave_SQL_Running"]
|
26
|
+
sbm = result.data["Seconds_Behind_Master"]
|
27
|
+
pres = io.nil? && sql.nil? ? "\n\t#{res}" : ""
|
28
|
+
|
29
|
+
if !io && !sql
|
30
|
+
result.error! "MySQL replication is offline (IO=#{io},SQL=#{sql})#{pres}"
|
31
|
+
elsif !io || !sql
|
32
|
+
result.error! "MySQL replication is BROKEN (IO=#{io},SQL=#{sql})#{pres}"
|
33
|
+
elsif sbm > opts[:sbm_threshold]
|
34
|
+
result.error! "MySQL replication is #{sbm} SECONDS BEHIND master (IO=#{io},SQL=#{sql})#{pres}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def _parse_response res
|
39
|
+
{}.tap do |r|
|
40
|
+
res.split("\n").map(&:strip).reject(&:blank?).each do |line|
|
41
|
+
next if line.start_with?("***")
|
42
|
+
chunks = line.split(":")
|
43
|
+
key = chunks.shift.strip
|
44
|
+
val = chunks.join(":").strip
|
45
|
+
|
46
|
+
# value cast
|
47
|
+
val = false if %w[no No].include?(val)
|
48
|
+
val = true if %w[yes Yes].include?(val)
|
49
|
+
val = val.to_i if val =~ /^\d+$/
|
50
|
+
|
51
|
+
r[key] = val
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class SslExpiration < Checker
|
4
|
+
self.checker_name = "ssl_expiration"
|
5
|
+
|
6
|
+
def enqueue page, opts = {}
|
7
|
+
opts = { threshold: 1.months }.merge(opts)
|
8
|
+
app.enqueue(self, page, opts)
|
9
|
+
end
|
10
|
+
|
11
|
+
def check! result, page, opts = {}
|
12
|
+
uri = URI.parse(page)
|
13
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
14
|
+
http.use_ssl = true
|
15
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
16
|
+
cert = nil
|
17
|
+
http.start do |h|
|
18
|
+
cert = h.peer_cert
|
19
|
+
end
|
20
|
+
|
21
|
+
if cert.not_before > Time.current
|
22
|
+
result.error! "Certificate is not yet valid (will in #{fseconds(cert.not_before - Time.current)}, #{cert.not_before})!"
|
23
|
+
return
|
24
|
+
end
|
25
|
+
|
26
|
+
if cert.not_after <= Time.current
|
27
|
+
result.error! "Certificate is EXPIRED (since #{fseconds(cert.not_after - Time.current)}, #{cert.not_after})!"
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
if cert.not_after <= Time.current + opts[:threshold]
|
32
|
+
result.error! "Certificate is about to expire within threshold (in #{fseconds(cert.not_after - Time.current)}, #{cert.not_after})!"
|
33
|
+
return
|
34
|
+
else
|
35
|
+
result.info! "Certificate for `#{page}' expires in #{fseconds(cert.not_after - Time.current)} (#{cert.not_after})!"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def fseconds secs
|
40
|
+
secs = secs.to_i
|
41
|
+
t_minute = 60
|
42
|
+
t_hour = t_minute * 60
|
43
|
+
t_day = t_hour * 24
|
44
|
+
t_week = t_day * 7
|
45
|
+
t_month = t_day * 30
|
46
|
+
t_year = t_month * 12
|
47
|
+
"".tap do |r|
|
48
|
+
if secs >= t_year
|
49
|
+
r << "#{secs / t_year}y "
|
50
|
+
secs = secs % t_year
|
51
|
+
end
|
52
|
+
|
53
|
+
if secs >= t_month
|
54
|
+
r << "#{secs / t_month}m "
|
55
|
+
secs = secs % t_month
|
56
|
+
end
|
57
|
+
|
58
|
+
if secs >= t_week
|
59
|
+
r << "#{secs / t_week}w "
|
60
|
+
secs = secs % t_week
|
61
|
+
end
|
62
|
+
|
63
|
+
if secs >= t_day || !r.blank?
|
64
|
+
r << "#{secs / t_day}d "
|
65
|
+
secs = secs % t_day
|
66
|
+
end
|
67
|
+
|
68
|
+
if secs >= t_hour || !r.blank?
|
69
|
+
r << "#{secs / t_hour}h "
|
70
|
+
secs = secs % t_hour
|
71
|
+
end
|
72
|
+
|
73
|
+
if secs >= t_minute || !r.blank?
|
74
|
+
r << "#{secs / t_minute}m "
|
75
|
+
secs = secs % t_minute
|
76
|
+
end
|
77
|
+
|
78
|
+
r << "#{secs}s" unless r.include?("d")
|
79
|
+
end.strip
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixDefaults < Checker
|
4
|
+
self.checker_name = "unix_defaults"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
return if app.running? # we only ever want to run this once to spawn sub checkers
|
8
|
+
opts = { unix_load: {}, unix_memory: {}, unix_df: {}, unix_mdadm: {} }.merge(opts)
|
9
|
+
|
10
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
11
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
12
|
+
|
13
|
+
# option shortcuts
|
14
|
+
opts[:unix_load][:limits] = opts[:load] if opts[:load]
|
15
|
+
opts[:unix_load] = false if opts[:load] == false
|
16
|
+
opts[:unix_memory][:min_percent] = opts[:memory_min] if opts[:memory_min]
|
17
|
+
opts[:unix_memory] = false if opts[:memory_min] == false
|
18
|
+
opts[:unix_df][:min_percent] = opts[:df_min] if opts[:df_min]
|
19
|
+
opts[:unix_df] = false if opts[:df_min] == false
|
20
|
+
opts[:unix_mdadm] = false if opts[:mdadm] == false
|
21
|
+
opts.delete(:load)
|
22
|
+
opts.delete(:memory_min)
|
23
|
+
opts.delete(:df_min)
|
24
|
+
opts.delete(:mdadm)
|
25
|
+
|
26
|
+
app.enqueue(self, host, opts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def check! result, host, opts = {}
|
30
|
+
[:unix_load, :unix_memory, :unix_df, :unix_mdadm].each do |which|
|
31
|
+
# if opts[which] && sec = app.checkers[which.to_s]
|
32
|
+
# sec.check!(result, host, opts[which])
|
33
|
+
# end
|
34
|
+
# app.enqueue_sub(self, which, host, opts[which]) if opts[which]
|
35
|
+
spawn_sub(which, host, opts[which].is_a?(Hash) ? opts[which] : {})
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixDf < Checker
|
4
|
+
self.checker_name = "unix_df"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { min_percent: 25 }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "df"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
result.data.each do |fs|
|
19
|
+
result.error! "disk space on `#{fs[:mountpoint] || "unmounted"}' (#{fs[:filesystem]}) is low (limit is min. #{opts[:min_percent]}%, got #{fs[:free]}%)" if fs[:free] < opts[:min_percent]
|
20
|
+
end if opts[:min_percent]
|
21
|
+
end
|
22
|
+
|
23
|
+
def _parse_response res
|
24
|
+
[].tap do |r|
|
25
|
+
res.strip.split("\n")[1..-1].each do |device|
|
26
|
+
chunks = device.split(" ")
|
27
|
+
r << {
|
28
|
+
filesystem: chunks[0],
|
29
|
+
size: chunks[1].to_i,
|
30
|
+
used: chunks[2].to_i,
|
31
|
+
available: chunks[3].to_i,
|
32
|
+
use: chunks[4].to_i,
|
33
|
+
free: 100-chunks[4].to_i,
|
34
|
+
mountpoint: chunks[5],
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class FileExists < Checker
|
4
|
+
self.checker_name = "unix_file_exists"
|
5
|
+
|
6
|
+
def enqueue host, file, opts = {}
|
7
|
+
opts = { message: "File #{file} does not exist!" }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, file, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, file, opts = {}
|
14
|
+
descriptor = "[#{self.class.checker_name} | #{host} | #{file} | #{opts}]\n\t"
|
15
|
+
if host.is_a?(WatchmonkeyCli::LoopbackConnection)
|
16
|
+
result.error! "#{descriptor}#{opts[:message]} (ENOENT)" if !File.exist?(file)
|
17
|
+
else
|
18
|
+
result.command = "test -f #{Shellwords.escape(file)} && echo exists"
|
19
|
+
result.result = host.exec(result.command)
|
20
|
+
result.error! "#{opts[:message]} (#{result.result.presence || "ENOENT"})" if result.result != "exists"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixLoad < Checker
|
4
|
+
self.checker_name = "unix_load"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { limits: [4, 2, 1.5] }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "uptime"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
ld = result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
emsg = []
|
19
|
+
emsg << "load1 is to high (limit1 is #{opts[:limits][0]}, load1 is #{ld[0]})" if ld[0] > opts[:limits][0]
|
20
|
+
emsg << "load5 is to high (limit5 is #{opts[:limits][1]}, load5 is #{ld[1]})" if ld[1] > opts[:limits][1]
|
21
|
+
emsg << "load15 is to high (limit15 is #{opts[:limits][2]}, load15 is #{ld[2]})" if ld[2] > opts[:limits][2]
|
22
|
+
result.error!(emsg.join("\n\t")) if emsg.any?
|
23
|
+
end
|
24
|
+
|
25
|
+
def _parse_response res
|
26
|
+
res.match(/load average(?:s)?: (?:([\d\.]+), ([\d\.]+), ([\d\.]+))|(?:([\d,]+) ([\d\,]+) ([\d\,]+))/i)[1..-1].reject(&:blank?).map{|v| v.gsub(",", ".").to_f }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixMdadm < Checker
|
4
|
+
self.checker_name = "unix_mdadm"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { log_checking: true }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "cat /proc/mdstat"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
if !result.data
|
19
|
+
result.error!(result.result)
|
20
|
+
else
|
21
|
+
result.data[:devices].each do |rdev|
|
22
|
+
dev = rdev[0].split(" ").first
|
23
|
+
status = rdev[1].split(" ").last
|
24
|
+
progress = rdev[2].to_s
|
25
|
+
return if status == "chunks"
|
26
|
+
result.error! "#{dev} seems broken (expected U+, got `#{status}')" if status !~ /\[U+\]/
|
27
|
+
if opts[:log_checking] && progress && m = progress.match(/\[[=>\.]+\]\s+([^\s]+)\s+=\s+([^\s]+)\s+\(([^\/]+)\/([^\)]+)\)\s+finish=([^\s]+)\s+speed=([^\s]+)/i)
|
28
|
+
info "#{dev} on is checking (status:#{m[1]}|done:#{m[2]}|eta:#{m[5]}|speed:#{m[6]}|blocks_done:#{m[3]}/#{m[4]})"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def _parse_response res
|
35
|
+
return false if res.downcase["no such file"]
|
36
|
+
|
37
|
+
{ devices: [] }.tap do |r|
|
38
|
+
res = res.strip
|
39
|
+
chunks = res.split("\n").map(&:strip)
|
40
|
+
chunks.reject!{|el| el =~ /\Awarning:/i }
|
41
|
+
|
42
|
+
# personalities
|
43
|
+
personalities = chunks.delete_at(chunks.index{|c| c =~ /^personalities/i })
|
44
|
+
r[:personalities] = personalities.match(/^personalities(?:\s?): (.*)$/i)[1].split(" ").map{|s| s[1..-2] }
|
45
|
+
|
46
|
+
# unusued devices
|
47
|
+
unused_devices = chunks.delete_at(chunks.index{|c| c =~ /^unused devices/i })
|
48
|
+
r[:unused_devices] = unused_devices.match(/^unused devices\s?: (.*)$/i)[1]
|
49
|
+
|
50
|
+
# device output
|
51
|
+
chunks.join("\n").split("\n\n").map{|sp| sp.split("\n") }.each do |rdev|
|
52
|
+
r[:devices] << rdev
|
53
|
+
end
|
54
|
+
end
|
55
|
+
rescue StandardError => e
|
56
|
+
return "failed to parse mdadm output - #{e.class}: #{e.message}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixMemory < Checker
|
4
|
+
self.checker_name = "unix_memory"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { min_percent: 25 }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "cat /proc/meminfo"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
if !result.data
|
19
|
+
result.error! result.result
|
20
|
+
else
|
21
|
+
result.error! "memory is low (limit is min. #{opts[:min_percent]}%, got #{result.data["free"]}%)" if opts[:min_percent] && result.data["free"] < opts[:min_percent]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def _parse_response res
|
26
|
+
return false if res.downcase["no such file"]
|
27
|
+
{}.tap do |r|
|
28
|
+
res.strip.split("\n").each do |line|
|
29
|
+
chunks = line.split(":").map(&:strip)
|
30
|
+
r[chunks[0]] = chunks[1].to_i
|
31
|
+
end
|
32
|
+
r["free"] = ((r["MemFree"].to_i + r["Buffers"].to_i + r["Cached"].to_i) / r["MemTotal"].to_f * 100).round(2)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class WwwAvailability < Checker
|
4
|
+
self.checker_name = "www_availability"
|
5
|
+
|
6
|
+
def enqueue page, opts = {}
|
7
|
+
app.enqueue(self, page, opts.except(:ssl_expiration))
|
8
|
+
|
9
|
+
# if available enable ssl_expiration support
|
10
|
+
if page.start_with?("https://") && opts[:ssl_expiration] != false
|
11
|
+
spawn_sub("ssl_expiration", page, opts[:ssl_expiration].is_a?(Hash) ? opts[:ssl_expiration] : {})
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def check! result, page, opts = {}
|
16
|
+
begin
|
17
|
+
resp = HTTParty.get(page, no_follow: true, verify: false)
|
18
|
+
result.result = resp
|
19
|
+
rescue HTTParty::RedirectionTooDeep => e
|
20
|
+
result.result = e.response
|
21
|
+
original_response = true
|
22
|
+
rescue Errno::ECONNREFUSED => e
|
23
|
+
result.error! "Failed to fetch #{page} (#{e.class}: #{e.message})"
|
24
|
+
return
|
25
|
+
end
|
26
|
+
|
27
|
+
# status
|
28
|
+
if opts[:status]
|
29
|
+
stati = [*opts[:status]]
|
30
|
+
result.error! "#{result.result.code} is not in #{stati}!" if !stati.include?(result.result.code.to_i)
|
31
|
+
end
|
32
|
+
|
33
|
+
# body
|
34
|
+
if rx = opts[:body]
|
35
|
+
if rx.is_a?(String)
|
36
|
+
result.error! "body does not include #{rx}!" if !result.result.body.include?(rx)
|
37
|
+
elsif rx.is_a?(Regexp)
|
38
|
+
result.error! "body does not match #{rx}!" if !result.result.body.match(rx)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# headers
|
43
|
+
if opts[:headers]
|
44
|
+
hdata = original_response ? result.result : result.result.headers
|
45
|
+
opts[:headers].each do |k, v|
|
46
|
+
if !(v.is_a?(Regexp) ? hdata[k].match(v) : hdata[k] == v)
|
47
|
+
result.error! "header #{k} mismatches (expected `#{v}' got `#{hdata[k] || "nil"}')"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|