watchmonkey_cli 1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +45 -0
- data/Rakefile +1 -0
- data/VERSION +1 -0
- data/bin/watchmonkey +8 -0
- data/bin/watchmonkey.sh +14 -0
- data/lib/watchmonkey_cli/application/colorize.rb +22 -0
- data/lib/watchmonkey_cli/application/configuration.rb +55 -0
- data/lib/watchmonkey_cli/application/configuration.tpl +142 -0
- data/lib/watchmonkey_cli/application/dispatch.rb +86 -0
- data/lib/watchmonkey_cli/application.rb +255 -0
- data/lib/watchmonkey_cli/checker.rb +228 -0
- data/lib/watchmonkey_cli/checkers/ftp_availability.rb +22 -0
- data/lib/watchmonkey_cli/checkers/mysql_replication.rb +57 -0
- data/lib/watchmonkey_cli/checkers/ssl_expiration.rb +83 -0
- data/lib/watchmonkey_cli/checkers/unix_defaults.rb +40 -0
- data/lib/watchmonkey_cli/checkers/unix_df.rb +41 -0
- data/lib/watchmonkey_cli/checkers/unix_file_exists.rb +25 -0
- data/lib/watchmonkey_cli/checkers/unix_load.rb +30 -0
- data/lib/watchmonkey_cli/checkers/unix_mdadm.rb +60 -0
- data/lib/watchmonkey_cli/checkers/unix_memory.rb +37 -0
- data/lib/watchmonkey_cli/checkers/www_availability.rb +54 -0
- data/lib/watchmonkey_cli/helpers.rb +19 -0
- data/lib/watchmonkey_cli/hooks/platypus.rb +38 -0
- data/lib/watchmonkey_cli/hooks/requeue.rb +106 -0
- data/lib/watchmonkey_cli/loopback_connection.rb +36 -0
- data/lib/watchmonkey_cli/ssh_connection.rb +50 -0
- data/lib/watchmonkey_cli/version.rb +4 -0
- data/lib/watchmonkey_cli.rb +41 -0
- data/watchmonkey_cli.gemspec +27 -0
- metadata +163 -0
@@ -0,0 +1,228 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
class Checker
|
3
|
+
# Descendant tracking for inherited classes.
|
4
|
+
def self.descendants
|
5
|
+
@descendants ||= []
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.inherited(descendant)
|
9
|
+
descendants << descendant
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.checker_name
|
13
|
+
@checker_name || self.name
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.checker_name= name
|
17
|
+
@checker_name = name
|
18
|
+
end
|
19
|
+
|
20
|
+
module AppHelper
|
21
|
+
def init_checkers!
|
22
|
+
@checkers = {}
|
23
|
+
WatchmonkeyCli::Checker.descendants.uniq.each do |klass|
|
24
|
+
debug "[SETUP] Initializing checker `#{klass.name}'"
|
25
|
+
@checkers[klass.checker_name] = klass.new(self)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def start_checkers!
|
30
|
+
@checkers.each do |key, instance|
|
31
|
+
debug "[SETUP] Starting checker `#{key}' (#{instance.class.name})"
|
32
|
+
instance.start
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def stop_checkers!
|
37
|
+
@checkers.each do |key, instance|
|
38
|
+
debug "[SETUP] Stopping checker `#{key}' (#{instance.class.name})"
|
39
|
+
instance.stop
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Result
|
45
|
+
attr_reader :checker, :type, :args
|
46
|
+
attr_accessor :result, :command, :data
|
47
|
+
|
48
|
+
def initialize checker, *args
|
49
|
+
@checker = checker
|
50
|
+
@args = args
|
51
|
+
@mutex = Monitor.new
|
52
|
+
@type = :info
|
53
|
+
@spool = { error: [], info: [], debug: []}
|
54
|
+
end
|
55
|
+
|
56
|
+
def sync &block
|
57
|
+
@mutex.synchronize(&block)
|
58
|
+
end
|
59
|
+
|
60
|
+
def descriptor
|
61
|
+
"[#{@checker.class.checker_name} | #{args.join(" | ")}]"
|
62
|
+
end
|
63
|
+
|
64
|
+
def str_safe
|
65
|
+
"#{descriptor}\n\t"
|
66
|
+
end
|
67
|
+
|
68
|
+
def str_running
|
69
|
+
"Running checker #{@checker.class.checker_name} with [#{args.join(" | ")}]"
|
70
|
+
end
|
71
|
+
|
72
|
+
def str_descriptor
|
73
|
+
"#{descriptor}\n\t"
|
74
|
+
end
|
75
|
+
|
76
|
+
def messages
|
77
|
+
@spool.map(&:second).flatten
|
78
|
+
end
|
79
|
+
|
80
|
+
def dump!
|
81
|
+
sync do
|
82
|
+
@spool.each do |t, messages|
|
83
|
+
while messages.any?
|
84
|
+
@checker.send(t, "#{str_descriptor}#{messages.shift}", self)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
[:info, :debug, :error].each do |meth|
|
91
|
+
define_method meth do |msg|
|
92
|
+
sync { @spool[meth] << msg }
|
93
|
+
end
|
94
|
+
define_method :"#{meth}!" do |msg = nil|
|
95
|
+
sync do
|
96
|
+
@spool[meth] << msg if msg
|
97
|
+
@type = meth
|
98
|
+
end
|
99
|
+
end
|
100
|
+
define_method :"#{meth}?" do
|
101
|
+
sync { @type == meth }
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
# -------------------
|
107
|
+
|
108
|
+
attr_reader :app
|
109
|
+
|
110
|
+
def initialize app
|
111
|
+
@app = app
|
112
|
+
send(:init) if respond_to?(:init)
|
113
|
+
end
|
114
|
+
|
115
|
+
def info msg, robj = nil
|
116
|
+
app.fire(:on_info, msg, robj)
|
117
|
+
return if app.opts[:quiet]
|
118
|
+
_tolog(msg, :info)
|
119
|
+
app.sync do
|
120
|
+
puts app.c(msg, :blue)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def debug msg, robj = nil
|
125
|
+
app.fire(:on_debug, msg, robj)
|
126
|
+
app.fire(:on_message, msg, robj)
|
127
|
+
return if app.opts[:quiet] || app.opts[:silent]
|
128
|
+
_tolog(msg, :debug)
|
129
|
+
app.sync do
|
130
|
+
puts app.c(msg, :black)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def error msg, robj = nil
|
135
|
+
app.fire(:on_error, msg, robj)
|
136
|
+
app.fire(:on_message, msg, robj)
|
137
|
+
_tolog(msg, :error)
|
138
|
+
app.sync { app.error(msg) }
|
139
|
+
end
|
140
|
+
|
141
|
+
def _tolog msg, meth = :log
|
142
|
+
return unless app.opts[:logfile]
|
143
|
+
app.logger.public_send(meth, msg)
|
144
|
+
end
|
145
|
+
|
146
|
+
def spawn_sub which, *args
|
147
|
+
return false if app.running?
|
148
|
+
if sec = app.checkers["ssl_expiration"]
|
149
|
+
sec.enqueue(*args)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# def to_s
|
154
|
+
# string = "#<#{self.class.name}:#{self.object_id} "
|
155
|
+
# fields = self.class.inspector_fields.map{|field| "#{field}: #{self.send(field)}"}
|
156
|
+
# string << fields.join(", ") << ">"
|
157
|
+
# end
|
158
|
+
|
159
|
+
def local
|
160
|
+
@app.fetch_connection(:loopback, :local)
|
161
|
+
end
|
162
|
+
|
163
|
+
def safe descriptor = nil, &block
|
164
|
+
tries = 0
|
165
|
+
begin
|
166
|
+
tries += 1
|
167
|
+
block.call
|
168
|
+
rescue StandardError => e
|
169
|
+
unless tries > 3
|
170
|
+
app.sync do
|
171
|
+
error "#{descriptor}retry #{tries} reason is `#{e.class}: #{e.message}'"
|
172
|
+
e.backtrace.each{|l| debug "\t\t#{l}" }
|
173
|
+
end
|
174
|
+
sleep 1
|
175
|
+
retry
|
176
|
+
end
|
177
|
+
error "#{descriptor}retries exceeded"
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def rsafe resultobj, &block
|
182
|
+
tries = 0
|
183
|
+
begin
|
184
|
+
tries += 1
|
185
|
+
block.call
|
186
|
+
rescue StandardError => e
|
187
|
+
unless tries > 3
|
188
|
+
resultobj.sync do
|
189
|
+
resultobj.error! "retry #{tries} reason is `#{e.class}: #{e.message}'"
|
190
|
+
e.backtrace.each{|l| resultobj.debug "\t\t#{l}" }
|
191
|
+
resultobj.dump!
|
192
|
+
end
|
193
|
+
sleep 1
|
194
|
+
retry
|
195
|
+
end
|
196
|
+
resultobj.error! "#{descriptor}retries exceeded"
|
197
|
+
resultobj.dump!
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
|
202
|
+
# =================
|
203
|
+
# = API =
|
204
|
+
# =================
|
205
|
+
|
206
|
+
def init
|
207
|
+
# hook method (called when checker is being initialized)
|
208
|
+
end
|
209
|
+
|
210
|
+
def start
|
211
|
+
# hook method (called after all checkers were initialized and configs + hosts are loaded)
|
212
|
+
# can/should be used for starting connections, etc.
|
213
|
+
end
|
214
|
+
|
215
|
+
def stop
|
216
|
+
# hook method (called on application shutdown)
|
217
|
+
# connections should be closed here
|
218
|
+
|
219
|
+
# DO NOT CLOSE CONNECTIONS HANDLED BY THE APP!
|
220
|
+
# Keep in mind that the checkers run concurrently
|
221
|
+
# and therefore shared resources might still be in use
|
222
|
+
end
|
223
|
+
|
224
|
+
def check! *a
|
225
|
+
raise NotImplementedError, "a checker (#{self.class.name}) must implement `#check!' method!"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class FtpAvailability < Checker
|
4
|
+
self.checker_name = "ftp_availability"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { threshold: 1.months }.merge(opts)
|
8
|
+
app.enqueue(self, host, opts)
|
9
|
+
end
|
10
|
+
|
11
|
+
def check! result, host, opts = {}
|
12
|
+
Net::FTP.open(host) do |ftp|
|
13
|
+
ftp.login(opts[:user], opts[:password])
|
14
|
+
end
|
15
|
+
rescue Net::FTPPermError
|
16
|
+
result.error "Invalid credentials!"
|
17
|
+
rescue SocketError => e
|
18
|
+
result.error "#{e.class}: #{e.message}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class MysqlReplication < Checker
|
4
|
+
self.checker_name = "mysql_replication"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { host: "127.0.0.1", user: "root", sbm_threshold: 60 }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
cmd = ["mysql"]
|
15
|
+
cmd << "-u#{opts[:user]}" if opts[:user]
|
16
|
+
cmd << "-p#{opts[:password]}" if opts[:password]
|
17
|
+
cmd << "-h#{opts[:host]}" if opts[:host]
|
18
|
+
cmd << "-P#{opts[:port]}" if opts[:port]
|
19
|
+
cmd << %{-e "SHOW SLAVE STATUS\\G"}
|
20
|
+
result.command = cmd.join(" ")
|
21
|
+
result.result = host.exec(result.command)
|
22
|
+
result.data = _parse_response(result.result)
|
23
|
+
|
24
|
+
io = result.data["Slave_IO_Running"]
|
25
|
+
sql = result.data["Slave_SQL_Running"]
|
26
|
+
sbm = result.data["Seconds_Behind_Master"]
|
27
|
+
pres = io.nil? && sql.nil? ? "\n\t#{res}" : ""
|
28
|
+
|
29
|
+
if !io && !sql
|
30
|
+
result.error! "MySQL replication is offline (IO=#{io},SQL=#{sql})#{pres}"
|
31
|
+
elsif !io || !sql
|
32
|
+
result.error! "MySQL replication is BROKEN (IO=#{io},SQL=#{sql})#{pres}"
|
33
|
+
elsif sbm > opts[:sbm_threshold]
|
34
|
+
result.error! "MySQL replication is #{sbm} SECONDS BEHIND master (IO=#{io},SQL=#{sql})#{pres}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def _parse_response res
|
39
|
+
{}.tap do |r|
|
40
|
+
res.split("\n").map(&:strip).reject(&:blank?).each do |line|
|
41
|
+
next if line.start_with?("***")
|
42
|
+
chunks = line.split(":")
|
43
|
+
key = chunks.shift.strip
|
44
|
+
val = chunks.join(":").strip
|
45
|
+
|
46
|
+
# value cast
|
47
|
+
val = false if %w[no No].include?(val)
|
48
|
+
val = true if %w[yes Yes].include?(val)
|
49
|
+
val = val.to_i if val =~ /^\d+$/
|
50
|
+
|
51
|
+
r[key] = val
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class SslExpiration < Checker
|
4
|
+
self.checker_name = "ssl_expiration"
|
5
|
+
|
6
|
+
def enqueue page, opts = {}
|
7
|
+
opts = { threshold: 1.months }.merge(opts)
|
8
|
+
app.enqueue(self, page, opts)
|
9
|
+
end
|
10
|
+
|
11
|
+
def check! result, page, opts = {}
|
12
|
+
uri = URI.parse(page)
|
13
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
14
|
+
http.use_ssl = true
|
15
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
16
|
+
cert = nil
|
17
|
+
http.start do |h|
|
18
|
+
cert = h.peer_cert
|
19
|
+
end
|
20
|
+
|
21
|
+
if cert.not_before > Time.current
|
22
|
+
result.error! "Certificate is not yet valid (will in #{fseconds(cert.not_before - Time.current)}, #{cert.not_before})!"
|
23
|
+
return
|
24
|
+
end
|
25
|
+
|
26
|
+
if cert.not_after <= Time.current
|
27
|
+
result.error! "Certificate is EXPIRED (since #{fseconds(cert.not_after - Time.current)}, #{cert.not_after})!"
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
if cert.not_after <= Time.current + opts[:threshold]
|
32
|
+
result.error! "Certificate is about to expire within threshold (in #{fseconds(cert.not_after - Time.current)}, #{cert.not_after})!"
|
33
|
+
return
|
34
|
+
else
|
35
|
+
result.info! "Certificate for `#{page}' expires in #{fseconds(cert.not_after - Time.current)} (#{cert.not_after})!"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def fseconds secs
|
40
|
+
secs = secs.to_i
|
41
|
+
t_minute = 60
|
42
|
+
t_hour = t_minute * 60
|
43
|
+
t_day = t_hour * 24
|
44
|
+
t_week = t_day * 7
|
45
|
+
t_month = t_day * 30
|
46
|
+
t_year = t_month * 12
|
47
|
+
"".tap do |r|
|
48
|
+
if secs >= t_year
|
49
|
+
r << "#{secs / t_year}y "
|
50
|
+
secs = secs % t_year
|
51
|
+
end
|
52
|
+
|
53
|
+
if secs >= t_month
|
54
|
+
r << "#{secs / t_month}m "
|
55
|
+
secs = secs % t_month
|
56
|
+
end
|
57
|
+
|
58
|
+
if secs >= t_week
|
59
|
+
r << "#{secs / t_week}w "
|
60
|
+
secs = secs % t_week
|
61
|
+
end
|
62
|
+
|
63
|
+
if secs >= t_day || !r.blank?
|
64
|
+
r << "#{secs / t_day}d "
|
65
|
+
secs = secs % t_day
|
66
|
+
end
|
67
|
+
|
68
|
+
if secs >= t_hour || !r.blank?
|
69
|
+
r << "#{secs / t_hour}h "
|
70
|
+
secs = secs % t_hour
|
71
|
+
end
|
72
|
+
|
73
|
+
if secs >= t_minute || !r.blank?
|
74
|
+
r << "#{secs / t_minute}m "
|
75
|
+
secs = secs % t_minute
|
76
|
+
end
|
77
|
+
|
78
|
+
r << "#{secs}s" unless r.include?("d")
|
79
|
+
end.strip
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixDefaults < Checker
|
4
|
+
self.checker_name = "unix_defaults"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
return if app.running? # we only ever want to run this once to spawn sub checkers
|
8
|
+
opts = { unix_load: {}, unix_memory: {}, unix_df: {}, unix_mdadm: {} }.merge(opts)
|
9
|
+
|
10
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
11
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
12
|
+
|
13
|
+
# option shortcuts
|
14
|
+
opts[:unix_load][:limits] = opts[:load] if opts[:load]
|
15
|
+
opts[:unix_load] = false if opts[:load] == false
|
16
|
+
opts[:unix_memory][:min_percent] = opts[:memory_min] if opts[:memory_min]
|
17
|
+
opts[:unix_memory] = false if opts[:memory_min] == false
|
18
|
+
opts[:unix_df][:min_percent] = opts[:df_min] if opts[:df_min]
|
19
|
+
opts[:unix_df] = false if opts[:df_min] == false
|
20
|
+
opts[:unix_mdadm] = false if opts[:mdadm] == false
|
21
|
+
opts.delete(:load)
|
22
|
+
opts.delete(:memory_min)
|
23
|
+
opts.delete(:df_min)
|
24
|
+
opts.delete(:mdadm)
|
25
|
+
|
26
|
+
app.enqueue(self, host, opts)
|
27
|
+
end
|
28
|
+
|
29
|
+
def check! result, host, opts = {}
|
30
|
+
[:unix_load, :unix_memory, :unix_df, :unix_mdadm].each do |which|
|
31
|
+
# if opts[which] && sec = app.checkers[which.to_s]
|
32
|
+
# sec.check!(result, host, opts[which])
|
33
|
+
# end
|
34
|
+
# app.enqueue_sub(self, which, host, opts[which]) if opts[which]
|
35
|
+
spawn_sub(which, host, opts[which].is_a?(Hash) ? opts[which] : {})
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixDf < Checker
|
4
|
+
self.checker_name = "unix_df"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { min_percent: 25 }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "df"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
result.data.each do |fs|
|
19
|
+
result.error! "disk space on `#{fs[:mountpoint] || "unmounted"}' (#{fs[:filesystem]}) is low (limit is min. #{opts[:min_percent]}%, got #{fs[:free]}%)" if fs[:free] < opts[:min_percent]
|
20
|
+
end if opts[:min_percent]
|
21
|
+
end
|
22
|
+
|
23
|
+
def _parse_response res
|
24
|
+
[].tap do |r|
|
25
|
+
res.strip.split("\n")[1..-1].each do |device|
|
26
|
+
chunks = device.split(" ")
|
27
|
+
r << {
|
28
|
+
filesystem: chunks[0],
|
29
|
+
size: chunks[1].to_i,
|
30
|
+
used: chunks[2].to_i,
|
31
|
+
available: chunks[3].to_i,
|
32
|
+
use: chunks[4].to_i,
|
33
|
+
free: 100-chunks[4].to_i,
|
34
|
+
mountpoint: chunks[5],
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class FileExists < Checker
|
4
|
+
self.checker_name = "unix_file_exists"
|
5
|
+
|
6
|
+
def enqueue host, file, opts = {}
|
7
|
+
opts = { message: "File #{file} does not exist!" }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, file, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, file, opts = {}
|
14
|
+
descriptor = "[#{self.class.checker_name} | #{host} | #{file} | #{opts}]\n\t"
|
15
|
+
if host.is_a?(WatchmonkeyCli::LoopbackConnection)
|
16
|
+
result.error! "#{descriptor}#{opts[:message]} (ENOENT)" if !File.exist?(file)
|
17
|
+
else
|
18
|
+
result.command = "test -f #{Shellwords.escape(file)} && echo exists"
|
19
|
+
result.result = host.exec(result.command)
|
20
|
+
result.error! "#{opts[:message]} (#{result.result.presence || "ENOENT"})" if result.result != "exists"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixLoad < Checker
|
4
|
+
self.checker_name = "unix_load"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { limits: [4, 2, 1.5] }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "uptime"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
ld = result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
emsg = []
|
19
|
+
emsg << "load1 is to high (limit1 is #{opts[:limits][0]}, load1 is #{ld[0]})" if ld[0] > opts[:limits][0]
|
20
|
+
emsg << "load5 is to high (limit5 is #{opts[:limits][1]}, load5 is #{ld[1]})" if ld[1] > opts[:limits][1]
|
21
|
+
emsg << "load15 is to high (limit15 is #{opts[:limits][2]}, load15 is #{ld[2]})" if ld[2] > opts[:limits][2]
|
22
|
+
result.error!(emsg.join("\n\t")) if emsg.any?
|
23
|
+
end
|
24
|
+
|
25
|
+
def _parse_response res
|
26
|
+
res.match(/load average(?:s)?: (?:([\d\.]+), ([\d\.]+), ([\d\.]+))|(?:([\d,]+) ([\d\,]+) ([\d\,]+))/i)[1..-1].reject(&:blank?).map{|v| v.gsub(",", ".").to_f }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixMdadm < Checker
|
4
|
+
self.checker_name = "unix_mdadm"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { log_checking: true }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "cat /proc/mdstat"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
if !result.data
|
19
|
+
result.error!(result.result)
|
20
|
+
else
|
21
|
+
result.data[:devices].each do |rdev|
|
22
|
+
dev = rdev[0].split(" ").first
|
23
|
+
status = rdev[1].split(" ").last
|
24
|
+
progress = rdev[2].to_s
|
25
|
+
return if status == "chunks"
|
26
|
+
result.error! "#{dev} seems broken (expected U+, got `#{status}')" if status !~ /\[U+\]/
|
27
|
+
if opts[:log_checking] && progress && m = progress.match(/\[[=>\.]+\]\s+([^\s]+)\s+=\s+([^\s]+)\s+\(([^\/]+)\/([^\)]+)\)\s+finish=([^\s]+)\s+speed=([^\s]+)/i)
|
28
|
+
info "#{dev} on is checking (status:#{m[1]}|done:#{m[2]}|eta:#{m[5]}|speed:#{m[6]}|blocks_done:#{m[3]}/#{m[4]})"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def _parse_response res
|
35
|
+
return false if res.downcase["no such file"]
|
36
|
+
|
37
|
+
{ devices: [] }.tap do |r|
|
38
|
+
res = res.strip
|
39
|
+
chunks = res.split("\n").map(&:strip)
|
40
|
+
chunks.reject!{|el| el =~ /\Awarning:/i }
|
41
|
+
|
42
|
+
# personalities
|
43
|
+
personalities = chunks.delete_at(chunks.index{|c| c =~ /^personalities/i })
|
44
|
+
r[:personalities] = personalities.match(/^personalities(?:\s?): (.*)$/i)[1].split(" ").map{|s| s[1..-2] }
|
45
|
+
|
46
|
+
# unusued devices
|
47
|
+
unused_devices = chunks.delete_at(chunks.index{|c| c =~ /^unused devices/i })
|
48
|
+
r[:unused_devices] = unused_devices.match(/^unused devices\s?: (.*)$/i)[1]
|
49
|
+
|
50
|
+
# device output
|
51
|
+
chunks.join("\n").split("\n\n").map{|sp| sp.split("\n") }.each do |rdev|
|
52
|
+
r[:devices] << rdev
|
53
|
+
end
|
54
|
+
end
|
55
|
+
rescue StandardError => e
|
56
|
+
return "failed to parse mdadm output - #{e.class}: #{e.message}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class UnixMemory < Checker
|
4
|
+
self.checker_name = "unix_memory"
|
5
|
+
|
6
|
+
def enqueue host, opts = {}
|
7
|
+
opts = { min_percent: 25 }.merge(opts)
|
8
|
+
host = app.fetch_connection(:loopback, :local) if !host || host == :local
|
9
|
+
host = app.fetch_connection(:ssh, host) if host.is_a?(Symbol)
|
10
|
+
app.enqueue(self, host, opts)
|
11
|
+
end
|
12
|
+
|
13
|
+
def check! result, host, opts = {}
|
14
|
+
result.command = "cat /proc/meminfo"
|
15
|
+
result.result = host.exec(result.command)
|
16
|
+
result.data = _parse_response(result.result)
|
17
|
+
|
18
|
+
if !result.data
|
19
|
+
result.error! result.result
|
20
|
+
else
|
21
|
+
result.error! "memory is low (limit is min. #{opts[:min_percent]}%, got #{result.data["free"]}%)" if opts[:min_percent] && result.data["free"] < opts[:min_percent]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def _parse_response res
|
26
|
+
return false if res.downcase["no such file"]
|
27
|
+
{}.tap do |r|
|
28
|
+
res.strip.split("\n").each do |line|
|
29
|
+
chunks = line.split(":").map(&:strip)
|
30
|
+
r[chunks[0]] = chunks[1].to_i
|
31
|
+
end
|
32
|
+
r["free"] = ((r["MemFree"].to_i + r["Buffers"].to_i + r["Cached"].to_i) / r["MemTotal"].to_f * 100).round(2)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module WatchmonkeyCli
|
2
|
+
module Checkers
|
3
|
+
class WwwAvailability < Checker
|
4
|
+
self.checker_name = "www_availability"
|
5
|
+
|
6
|
+
def enqueue page, opts = {}
|
7
|
+
app.enqueue(self, page, opts.except(:ssl_expiration))
|
8
|
+
|
9
|
+
# if available enable ssl_expiration support
|
10
|
+
if page.start_with?("https://") && opts[:ssl_expiration] != false
|
11
|
+
spawn_sub("ssl_expiration", page, opts[:ssl_expiration].is_a?(Hash) ? opts[:ssl_expiration] : {})
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def check! result, page, opts = {}
|
16
|
+
begin
|
17
|
+
resp = HTTParty.get(page, no_follow: true, verify: false)
|
18
|
+
result.result = resp
|
19
|
+
rescue HTTParty::RedirectionTooDeep => e
|
20
|
+
result.result = e.response
|
21
|
+
original_response = true
|
22
|
+
rescue Errno::ECONNREFUSED => e
|
23
|
+
result.error! "Failed to fetch #{page} (#{e.class}: #{e.message})"
|
24
|
+
return
|
25
|
+
end
|
26
|
+
|
27
|
+
# status
|
28
|
+
if opts[:status]
|
29
|
+
stati = [*opts[:status]]
|
30
|
+
result.error! "#{result.result.code} is not in #{stati}!" if !stati.include?(result.result.code.to_i)
|
31
|
+
end
|
32
|
+
|
33
|
+
# body
|
34
|
+
if rx = opts[:body]
|
35
|
+
if rx.is_a?(String)
|
36
|
+
result.error! "body does not include #{rx}!" if !result.result.body.include?(rx)
|
37
|
+
elsif rx.is_a?(Regexp)
|
38
|
+
result.error! "body does not match #{rx}!" if !result.result.body.match(rx)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# headers
|
43
|
+
if opts[:headers]
|
44
|
+
hdata = original_response ? result.result : result.result.headers
|
45
|
+
opts[:headers].each do |k, v|
|
46
|
+
if !(v.is_a?(Regexp) ? hdata[k].match(v) : hdata[k] == v)
|
47
|
+
result.error! "header #{k} mismatches (expected `#{v}' got `#{hdata[k] || "nil"}')"
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|