remon 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +3 -0
- data/FEATURES.md +39 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +41 -0
- data/Rakefile +27 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/dev_exe/remon +4 -0
- data/exe/remon +101 -0
- data/lib/remon.rb +11 -0
- data/lib/remon/check.rb +145 -0
- data/lib/remon/check_dsl.rb +92 -0
- data/lib/remon/check_runner.rb +53 -0
- data/lib/remon/checks/consul.rb +41 -0
- data/lib/remon/checks/disk.rb +36 -0
- data/lib/remon/checks/http.rb +53 -0
- data/lib/remon/checks/oom.rb +26 -0
- data/lib/remon/checks/redis.rb +23 -0
- data/lib/remon/checks/salt.rb +27 -0
- data/lib/remon/checks/system.rb +96 -0
- data/lib/remon/checks/yum.rb +30 -0
- data/lib/remon/config.rb +101 -0
- data/lib/remon/custom_logger.rb +6 -0
- data/lib/remon/deduped_queue.rb +38 -0
- data/lib/remon/error.rb +4 -0
- data/lib/remon/event_processor.rb +33 -0
- data/lib/remon/ext/num_ext.rb +23 -0
- data/lib/remon/helper.rb +41 -0
- data/lib/remon/logger.rb +17 -0
- data/lib/remon/metrics/consul.rb +32 -0
- data/lib/remon/metrics/disk.rb +24 -0
- data/lib/remon/metrics/http.rb +40 -0
- data/lib/remon/metrics/oom.rb +32 -0
- data/lib/remon/metrics/salt.rb +18 -0
- data/lib/remon/metrics/system.rb +63 -0
- data/lib/remon/metrics/yum.rb +20 -0
- data/lib/remon/proc_check.rb +26 -0
- data/lib/remon/scheduler.rb +106 -0
- data/lib/remon/scripts/salt-status +24 -0
- data/lib/remon/scripts/yum-status +12 -0
- data/lib/remon/sysinfo.rb +69 -0
- data/lib/remon/version.rb +3 -0
- data/remon.gemspec +26 -0
- data/test_config.rb +44 -0
- metadata +146 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'remon/metrics/yum'
|
2
|
+
|
3
|
+
defcheck :yum do
|
4
|
+
|
5
|
+
def init
|
6
|
+
@yum = Metrics::Yum.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def run
|
10
|
+
updates_available
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def updates_available
|
16
|
+
service = "yum updates"
|
17
|
+
count = @yum.updates_available
|
18
|
+
state = count > 0 ? "warning" : "ok"
|
19
|
+
metric = state == "ok" ? 0 : 1
|
20
|
+
event({
|
21
|
+
service: service,
|
22
|
+
description: "#{count} updates available",
|
23
|
+
state: state,
|
24
|
+
metric: metric
|
25
|
+
})
|
26
|
+
rescue => e
|
27
|
+
logger.error "#{e.class}: #{e.message}"
|
28
|
+
warning_event service
|
29
|
+
end
|
30
|
+
end
|
data/lib/remon/config.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'set'
|
2
|
+
require_relative 'error'
|
3
|
+
require_relative 'ext/num_ext'
|
4
|
+
require_relative 'check_dsl'
|
5
|
+
require 'forwardable'
|
6
|
+
|
7
|
+
module Remon
|
8
|
+
class Config
|
9
|
+
using NumExt
|
10
|
+
|
11
|
+
LOAD_PATHS = ["#{__dir__}/checks"]
|
12
|
+
|
13
|
+
extend Forwardable
|
14
|
+
|
15
|
+
def initialize(config_file: nil, config_dir: nil, load_paths: [])
|
16
|
+
@config_file = config_file
|
17
|
+
@config_dir = config_dir
|
18
|
+
@schedule = {}
|
19
|
+
@scheduler_offset = 0
|
20
|
+
@workers = 1
|
21
|
+
@task_group = { interval: 0, offset: 15, randomize: false }
|
22
|
+
load_paths = Set.new(load_paths).merge(LOAD_PATHS)
|
23
|
+
@dsl = CheckDsl.new load_paths.to_a
|
24
|
+
end
|
25
|
+
|
26
|
+
def config
|
27
|
+
@config_read ||= begin
|
28
|
+
read_config
|
29
|
+
true
|
30
|
+
end
|
31
|
+
{
|
32
|
+
schedule: @schedule,
|
33
|
+
scheduler_offset: @scheduler_offset,
|
34
|
+
process_proc: @process_proc,
|
35
|
+
workers: @workers
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def_delegators :@dsl, :check, :defcheck, :proc_check
|
42
|
+
|
43
|
+
def host(host)
|
44
|
+
Remon.host = host
|
45
|
+
end
|
46
|
+
|
47
|
+
def every(secs, randomize: false, offset: 0, &block)
|
48
|
+
raise Error, "offset must be less than interval" if offset > secs
|
49
|
+
before = @task_group
|
50
|
+
@task_group = { interval: secs, offset: offset, randomize: randomize }
|
51
|
+
yield
|
52
|
+
ensure
|
53
|
+
@task_group = before
|
54
|
+
end
|
55
|
+
|
56
|
+
def scheduler_offset(offset)
|
57
|
+
@scheduler_offset = offset
|
58
|
+
end
|
59
|
+
|
60
|
+
def process_event(&block)
|
61
|
+
@process_proc = block
|
62
|
+
end
|
63
|
+
|
64
|
+
def workers(workers)
|
65
|
+
@workers = workers
|
66
|
+
end
|
67
|
+
|
68
|
+
def schedule_check(check, args = [], kwargs = {})
|
69
|
+
@schedule[@task_group] ||= Set.new
|
70
|
+
if not check.is_a? Check
|
71
|
+
kwargs[:ttl] ||= default_ttl(@task_group[:interval])
|
72
|
+
klass = self.check(check)
|
73
|
+
check = klass.new(*args, **kwargs)
|
74
|
+
end
|
75
|
+
@schedule[@task_group] << check
|
76
|
+
end
|
77
|
+
|
78
|
+
def default_ttl(interval)
|
79
|
+
3 * interval
|
80
|
+
end
|
81
|
+
|
82
|
+
def read_config
|
83
|
+
read_config_file @config_file if @config_file
|
84
|
+
if @config_dir
|
85
|
+
Dir.glob("#{@config_dir}/*.rb").each { |f| read_config_file f }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def read_config_file(file)
|
90
|
+
if not File.readable? file
|
91
|
+
raise Error, "config #{file} not readable"
|
92
|
+
end
|
93
|
+
instance_eval(File.read(file))
|
94
|
+
rescue NoMethodError => e
|
95
|
+
raise Error, "invalid option used in config: #{e.name}"
|
96
|
+
end
|
97
|
+
|
98
|
+
alias_method :run_check, :schedule_check
|
99
|
+
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require_relative 'logger'
|
2
|
+
|
3
|
+
module Remon
|
4
|
+
class DedupedQueue
|
5
|
+
|
6
|
+
include Logger
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@hash = {}
|
10
|
+
@mutex = Mutex.new
|
11
|
+
@queue = Queue.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def enqueue(task)
|
15
|
+
@mutex.synchronize do
|
16
|
+
if @hash[task]
|
17
|
+
logger.debug "duplicate item #{task}" if logger.debug?
|
18
|
+
return
|
19
|
+
end
|
20
|
+
@hash[task] = true
|
21
|
+
end
|
22
|
+
@queue << task
|
23
|
+
end
|
24
|
+
|
25
|
+
def dequeue(non_block = false)
|
26
|
+
task = @queue.pop(non_block)
|
27
|
+
@mutex.synchronize do
|
28
|
+
@hash.delete task
|
29
|
+
end
|
30
|
+
task
|
31
|
+
end
|
32
|
+
|
33
|
+
alias_method :'<<', :enqueue
|
34
|
+
alias_method :pop, :dequeue
|
35
|
+
alias_method :shift, :dequeue
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
data/lib/remon/error.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative 'logger'
|
2
|
+
|
3
|
+
module Remon
|
4
|
+
class EventProcessor
|
5
|
+
|
6
|
+
include Logger
|
7
|
+
attr_reader :queue
|
8
|
+
|
9
|
+
def initialize(pr)
|
10
|
+
@proc = pr
|
11
|
+
@queue = Queue.new
|
12
|
+
end
|
13
|
+
|
14
|
+
def start
|
15
|
+
@thread ||= Thread.new do
|
16
|
+
logger.debug { "starting event processor" }
|
17
|
+
loop { process_event }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def process_event
|
22
|
+
event = @queue.pop
|
23
|
+
@proc.call event
|
24
|
+
rescue => e
|
25
|
+
logger.warn "warn error #{e.message}"
|
26
|
+
end
|
27
|
+
|
28
|
+
def stop
|
29
|
+
Thread.kill @thread if @thread
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
data/lib/remon/helper.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
require_relative 'logger'
|
2
|
+
module Remon
|
3
|
+
module Helper
|
4
|
+
|
5
|
+
include Logger
|
6
|
+
|
7
|
+
def cmd(command, error_msg: nil, return_output: true, env: {}, shell: false)
|
8
|
+
if command.is_a? Array
|
9
|
+
command_arr = command
|
10
|
+
command_str = command.join(" ")
|
11
|
+
else
|
12
|
+
command_arr = command.split
|
13
|
+
command_str = command
|
14
|
+
end
|
15
|
+
logger.debug command_str
|
16
|
+
|
17
|
+
run_command = shell ? command_str : command_arr
|
18
|
+
output = if return_output
|
19
|
+
IO.popen(env, run_command) { |f| f.read }
|
20
|
+
else
|
21
|
+
system(env, run_command, 2 => 1)
|
22
|
+
end
|
23
|
+
exitstatus = $?.exitstatus
|
24
|
+
|
25
|
+
if exitstatus != 0
|
26
|
+
error_msg ||= "non zero exit for \"#{command_str}\""
|
27
|
+
raise Error, error_msg
|
28
|
+
end
|
29
|
+
return output
|
30
|
+
end
|
31
|
+
|
32
|
+
def safe_cmd(*args, **kwargs)
|
33
|
+
output = cmd(*args, **kwargs)
|
34
|
+
return $?.exitstatus, output
|
35
|
+
rescue => e
|
36
|
+
logger.debug e.message
|
37
|
+
return -1, nil
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
data/lib/remon/logger.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module Remon
|
5
|
+
module Metrics
|
6
|
+
class Consul
|
7
|
+
|
8
|
+
def initialize(host: "127.0.0.1", port: 8500)
|
9
|
+
@host = host
|
10
|
+
@port = port
|
11
|
+
end
|
12
|
+
|
13
|
+
def failed_nodes
|
14
|
+
nodes = {}
|
15
|
+
failed_serf_checks = critical_checks.select { |i| i["CheckID"] == "serfHealth" }
|
16
|
+
failed_serf_checks.each { |i| nodes[i["Node"]] = i["Output"] }
|
17
|
+
nodes
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def consul_url(path)
|
23
|
+
"http://#{@host}:#{@port}#{path}"
|
24
|
+
end
|
25
|
+
|
26
|
+
def critical_checks
|
27
|
+
url = consul_url("/v1/health/state/critical")
|
28
|
+
JSON.parse(open(url).read)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Remon
|
2
|
+
module Metrics
|
3
|
+
class Disk
|
4
|
+
|
5
|
+
def disks_usage
|
6
|
+
disks = []
|
7
|
+
IO.popen(['df', '-h']) do |io|
|
8
|
+
io.each_line do |l|
|
9
|
+
f = l.split(/\s+/)
|
10
|
+
next if f[0] == 'Filesystem'
|
11
|
+
next unless f[0] =~ /\// # Needs at least one slash in the mount path
|
12
|
+
|
13
|
+
disk_info = {}
|
14
|
+
disk_info[:mount] = f[5]
|
15
|
+
disk_info[:percent] = (f[4].to_f/100).round(2)
|
16
|
+
disk_info[:size] = f[1]
|
17
|
+
disks << disk_info
|
18
|
+
end
|
19
|
+
end
|
20
|
+
disks
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
module Remon
|
5
|
+
module Metrics
|
6
|
+
class Http
|
7
|
+
|
8
|
+
def initialize(url)
|
9
|
+
@uri = URI.parse(url)
|
10
|
+
end
|
11
|
+
|
12
|
+
def status(read_timeout: 1, open_timeout: 1)
|
13
|
+
status = nil
|
14
|
+
time = Benchmark.realtime do
|
15
|
+
status = get_status(read_timeout, open_timeout)
|
16
|
+
end
|
17
|
+
return time, status
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def get_status(read_timeout, open_timeout)
|
23
|
+
@uri.open(read_timeout: read_timeout, open_timeout: open_timeout, redirect: false) do |f|
|
24
|
+
f.status[0].to_i
|
25
|
+
end
|
26
|
+
rescue EOFError
|
27
|
+
return 444
|
28
|
+
rescue Errno::ECONNREFUSED
|
29
|
+
return 502
|
30
|
+
rescue Net::OpenTimeout
|
31
|
+
return 504
|
32
|
+
rescue Net::ReadTimeout
|
33
|
+
return 504
|
34
|
+
rescue OpenURI::HTTPRedirect => e
|
35
|
+
return 301
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Remon
|
2
|
+
module Metrics
|
3
|
+
class Oom
|
4
|
+
|
5
|
+
def initialize(log_file)
|
6
|
+
@log_file = log_file
|
7
|
+
raise Error, "#{log_file} not readable" if not File.readable? log_file
|
8
|
+
end
|
9
|
+
|
10
|
+
def stats
|
11
|
+
counts = oom_counts
|
12
|
+
total_count = counts.values.reduce(&:+)
|
13
|
+
todays_count = counts[Time.now.strftime("%b%e")]
|
14
|
+
{today: todays_count, total: total_count}
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def oom_counts
|
20
|
+
counts = Hash.new(0)
|
21
|
+
IO.popen "grep 'invoked oom-killer' #{@log_file} | awk '{print $1 $2}' | uniq -c" do |f|
|
22
|
+
f.each_line do |line|
|
23
|
+
split = line.strip.split
|
24
|
+
counts[split[1]] = split[0].to_i
|
25
|
+
end
|
26
|
+
end
|
27
|
+
counts
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Remon
|
2
|
+
module Metrics
|
3
|
+
class Salt
|
4
|
+
|
5
|
+
def initialize(timeout: 240)
|
6
|
+
@timeout = timeout
|
7
|
+
end
|
8
|
+
|
9
|
+
def status
|
10
|
+
script = File.expand_path("#{__dir__}/../scripts/salt-status")
|
11
|
+
out = `ruby #{script} #{@timeout} 2>/dev/null`
|
12
|
+
output = out.chomp.split(":")
|
13
|
+
{ state: output[0], ok: output[1].to_i, total: output[2].to_i }
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|