remon 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.rspec +3 -0
  4. data/FEATURES.md +39 -0
  5. data/Gemfile +4 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +41 -0
  8. data/Rakefile +27 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/dev_exe/remon +4 -0
  12. data/exe/remon +101 -0
  13. data/lib/remon.rb +11 -0
  14. data/lib/remon/check.rb +145 -0
  15. data/lib/remon/check_dsl.rb +92 -0
  16. data/lib/remon/check_runner.rb +53 -0
  17. data/lib/remon/checks/consul.rb +41 -0
  18. data/lib/remon/checks/disk.rb +36 -0
  19. data/lib/remon/checks/http.rb +53 -0
  20. data/lib/remon/checks/oom.rb +26 -0
  21. data/lib/remon/checks/redis.rb +23 -0
  22. data/lib/remon/checks/salt.rb +27 -0
  23. data/lib/remon/checks/system.rb +96 -0
  24. data/lib/remon/checks/yum.rb +30 -0
  25. data/lib/remon/config.rb +101 -0
  26. data/lib/remon/custom_logger.rb +6 -0
  27. data/lib/remon/deduped_queue.rb +38 -0
  28. data/lib/remon/error.rb +4 -0
  29. data/lib/remon/event_processor.rb +33 -0
  30. data/lib/remon/ext/num_ext.rb +23 -0
  31. data/lib/remon/helper.rb +41 -0
  32. data/lib/remon/logger.rb +17 -0
  33. data/lib/remon/metrics/consul.rb +32 -0
  34. data/lib/remon/metrics/disk.rb +24 -0
  35. data/lib/remon/metrics/http.rb +40 -0
  36. data/lib/remon/metrics/oom.rb +32 -0
  37. data/lib/remon/metrics/salt.rb +18 -0
  38. data/lib/remon/metrics/system.rb +63 -0
  39. data/lib/remon/metrics/yum.rb +20 -0
  40. data/lib/remon/proc_check.rb +26 -0
  41. data/lib/remon/scheduler.rb +106 -0
  42. data/lib/remon/scripts/salt-status +24 -0
  43. data/lib/remon/scripts/yum-status +12 -0
  44. data/lib/remon/sysinfo.rb +69 -0
  45. data/lib/remon/version.rb +3 -0
  46. data/remon.gemspec +26 -0
  47. data/test_config.rb +44 -0
  48. metadata +146 -0
@@ -0,0 +1,30 @@
1
+ require 'remon/metrics/yum'
2
+
3
+ defcheck :yum do
4
+
5
+ def init
6
+ @yum = Metrics::Yum.new
7
+ end
8
+
9
+ def run
10
+ updates_available
11
+ end
12
+
13
+ private
14
+
15
+ def updates_available
16
+ service = "yum updates"
17
+ count = @yum.updates_available
18
+ state = count > 0 ? "warning" : "ok"
19
+ metric = state == "ok" ? 0 : 1
20
+ event({
21
+ service: service,
22
+ description: "#{count} updates available",
23
+ state: state,
24
+ metric: metric
25
+ })
26
+ rescue => e
27
+ logger.error "#{e.class}: #{e.message}"
28
+ warning_event service
29
+ end
30
+ end
@@ -0,0 +1,101 @@
1
+ require 'set'
2
+ require_relative 'error'
3
+ require_relative 'ext/num_ext'
4
+ require_relative 'check_dsl'
5
+ require 'forwardable'
6
+
7
+ module Remon
8
+ class Config
9
+ using NumExt
10
+
11
+ LOAD_PATHS = ["#{__dir__}/checks"]
12
+
13
+ extend Forwardable
14
+
15
+ def initialize(config_file: nil, config_dir: nil, load_paths: [])
16
+ @config_file = config_file
17
+ @config_dir = config_dir
18
+ @schedule = {}
19
+ @scheduler_offset = 0
20
+ @workers = 1
21
+ @task_group = { interval: 0, offset: 15, randomize: false }
22
+ load_paths = Set.new(load_paths).merge(LOAD_PATHS)
23
+ @dsl = CheckDsl.new load_paths.to_a
24
+ end
25
+
26
+ def config
27
+ @config_read ||= begin
28
+ read_config
29
+ true
30
+ end
31
+ {
32
+ schedule: @schedule,
33
+ scheduler_offset: @scheduler_offset,
34
+ process_proc: @process_proc,
35
+ workers: @workers
36
+ }
37
+ end
38
+
39
+ private
40
+
41
+ def_delegators :@dsl, :check, :defcheck, :proc_check
42
+
43
+ def host(host)
44
+ Remon.host = host
45
+ end
46
+
47
+ def every(secs, randomize: false, offset: 0, &block)
48
+ raise Error, "offset must be less than interval" if offset > secs
49
+ before = @task_group
50
+ @task_group = { interval: secs, offset: offset, randomize: randomize }
51
+ yield
52
+ ensure
53
+ @task_group = before
54
+ end
55
+
56
+ def scheduler_offset(offset)
57
+ @scheduler_offset = offset
58
+ end
59
+
60
+ def process_event(&block)
61
+ @process_proc = block
62
+ end
63
+
64
+ def workers(workers)
65
+ @workers = workers
66
+ end
67
+
68
+ def schedule_check(check, args = [], kwargs = {})
69
+ @schedule[@task_group] ||= Set.new
70
+ if not check.is_a? Check
71
+ kwargs[:ttl] ||= default_ttl(@task_group[:interval])
72
+ klass = self.check(check)
73
+ check = klass.new(*args, **kwargs)
74
+ end
75
+ @schedule[@task_group] << check
76
+ end
77
+
78
+ def default_ttl(interval)
79
+ 3 * interval
80
+ end
81
+
82
+ def read_config
83
+ read_config_file @config_file if @config_file
84
+ if @config_dir
85
+ Dir.glob("#{@config_dir}/*.rb").each { |f| read_config_file f }
86
+ end
87
+ end
88
+
89
+ def read_config_file(file)
90
+ if not File.readable? file
91
+ raise Error, "config #{file} not readable"
92
+ end
93
+ instance_eval(File.read(file))
94
+ rescue NoMethodError => e
95
+ raise Error, "invalid option used in config: #{e.name}"
96
+ end
97
+
98
+ alias_method :run_check, :schedule_check
99
+
100
+ end
101
+ end
@@ -0,0 +1,6 @@
1
+ require 'logger'
2
+
3
+ module Remon
4
+ class CustomLogger
5
+ end
6
+ end
@@ -0,0 +1,38 @@
1
+ require_relative 'logger'
2
+
3
+ module Remon
4
+ class DedupedQueue
5
+
6
+ include Logger
7
+
8
+ def initialize
9
+ @hash = {}
10
+ @mutex = Mutex.new
11
+ @queue = Queue.new
12
+ end
13
+
14
+ def enqueue(task)
15
+ @mutex.synchronize do
16
+ if @hash[task]
17
+ logger.debug "duplicate item #{task}" if logger.debug?
18
+ return
19
+ end
20
+ @hash[task] = true
21
+ end
22
+ @queue << task
23
+ end
24
+
25
+ def dequeue(non_block = false)
26
+ task = @queue.pop(non_block)
27
+ @mutex.synchronize do
28
+ @hash.delete task
29
+ end
30
+ task
31
+ end
32
+
33
+ alias_method :'<<', :enqueue
34
+ alias_method :pop, :dequeue
35
+ alias_method :shift, :dequeue
36
+
37
+ end
38
+ end
@@ -0,0 +1,4 @@
1
+ module Remon
2
+ class Error < StandardError
3
+ end
4
+ end
@@ -0,0 +1,33 @@
1
+ require_relative 'logger'
2
+
3
+ module Remon
4
+ class EventProcessor
5
+
6
+ include Logger
7
+ attr_reader :queue
8
+
9
+ def initialize(pr)
10
+ @proc = pr
11
+ @queue = Queue.new
12
+ end
13
+
14
+ def start
15
+ @thread ||= Thread.new do
16
+ logger.debug { "starting event processor" }
17
+ loop { process_event }
18
+ end
19
+ end
20
+
21
+ def process_event
22
+ event = @queue.pop
23
+ @proc.call event
24
+ rescue => e
25
+ logger.warn "warn error #{e.message}"
26
+ end
27
+
28
+ def stop
29
+ Thread.kill @thread if @thread
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,23 @@
1
+ module Remon
2
+ module NumExt
3
+ refine Integer do
4
+
5
+ def seconds
6
+ self
7
+ end
8
+
9
+ def second
10
+ self
11
+ end
12
+
13
+ def minutes
14
+ self * 60
15
+ end
16
+
17
+ def minute
18
+ self * 60
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,41 @@
1
+ require_relative 'logger'
2
+ module Remon
3
+ module Helper
4
+
5
+ include Logger
6
+
7
+ def cmd(command, error_msg: nil, return_output: true, env: {}, shell: false)
8
+ if command.is_a? Array
9
+ command_arr = command
10
+ command_str = command.join(" ")
11
+ else
12
+ command_arr = command.split
13
+ command_str = command
14
+ end
15
+ logger.debug command_str
16
+
17
+ run_command = shell ? command_str : command_arr
18
+ output = if return_output
19
+ IO.popen(env, run_command) { |f| f.read }
20
+ else
21
+ system(env, run_command, 2 => 1)
22
+ end
23
+ exitstatus = $?.exitstatus
24
+
25
+ if exitstatus != 0
26
+ error_msg ||= "non zero exit for \"#{command_str}\""
27
+ raise Error, error_msg
28
+ end
29
+ return output
30
+ end
31
+
32
+ def safe_cmd(*args, **kwargs)
33
+ output = cmd(*args, **kwargs)
34
+ return $?.exitstatus, output
35
+ rescue => e
36
+ logger.debug e.message
37
+ return -1, nil
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,17 @@
1
+ require 'logger'
2
+ module Remon
3
+ module Logger
4
+
5
+ def self.logger
6
+ @logger ||= begin
7
+ l = ::Logger.new(STDOUT)
8
+ l.level = ::Logger::WARN
9
+ l
10
+ end
11
+ end
12
+
13
+ def logger
14
+ ::Remon::Logger.logger
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,32 @@
1
+ require 'open-uri'
2
+ require 'json'
3
+
4
+ module Remon
5
+ module Metrics
6
+ class Consul
7
+
8
+ def initialize(host: "127.0.0.1", port: 8500)
9
+ @host = host
10
+ @port = port
11
+ end
12
+
13
+ def failed_nodes
14
+ nodes = {}
15
+ failed_serf_checks = critical_checks.select { |i| i["CheckID"] == "serfHealth" }
16
+ failed_serf_checks.each { |i| nodes[i["Node"]] = i["Output"] }
17
+ nodes
18
+ end
19
+
20
+ private
21
+
22
+ def consul_url(path)
23
+ "http://#{@host}:#{@port}#{path}"
24
+ end
25
+
26
+ def critical_checks
27
+ url = consul_url("/v1/health/state/critical")
28
+ JSON.parse(open(url).read)
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,24 @@
1
+ module Remon
2
+ module Metrics
3
+ class Disk
4
+
5
+ def disks_usage
6
+ disks = []
7
+ IO.popen(['df', '-h']) do |io|
8
+ io.each_line do |l|
9
+ f = l.split(/\s+/)
10
+ next if f[0] == 'Filesystem'
11
+ next unless f[0] =~ /\// # Needs at least one slash in the mount path
12
+
13
+ disk_info = {}
14
+ disk_info[:mount] = f[5]
15
+ disk_info[:percent] = (f[4].to_f/100).round(2)
16
+ disk_info[:size] = f[1]
17
+ disks << disk_info
18
+ end
19
+ end
20
+ disks
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,40 @@
1
+ require 'open-uri'
2
+ require 'benchmark'
3
+
4
+ module Remon
5
+ module Metrics
6
+ class Http
7
+
8
+ def initialize(url)
9
+ @uri = URI.parse(url)
10
+ end
11
+
12
+ def status(read_timeout: 1, open_timeout: 1)
13
+ status = nil
14
+ time = Benchmark.realtime do
15
+ status = get_status(read_timeout, open_timeout)
16
+ end
17
+ return time, status
18
+ end
19
+
20
+ private
21
+
22
+ def get_status(read_timeout, open_timeout)
23
+ @uri.open(read_timeout: read_timeout, open_timeout: open_timeout, redirect: false) do |f|
24
+ f.status[0].to_i
25
+ end
26
+ rescue EOFError
27
+ return 444
28
+ rescue Errno::ECONNREFUSED
29
+ return 502
30
+ rescue Net::OpenTimeout
31
+ return 504
32
+ rescue Net::ReadTimeout
33
+ return 504
34
+ rescue OpenURI::HTTPRedirect => e
35
+ return 301
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,32 @@
1
+ module Remon
2
+ module Metrics
3
+ class Oom
4
+
5
+ def initialize(log_file)
6
+ @log_file = log_file
7
+ raise Error, "#{log_file} not readable" if not File.readable? log_file
8
+ end
9
+
10
+ def stats
11
+ counts = oom_counts
12
+ total_count = counts.values.reduce(&:+)
13
+ todays_count = counts[Time.now.strftime("%b%e")]
14
+ {today: todays_count, total: total_count}
15
+ end
16
+
17
+ private
18
+
19
+ def oom_counts
20
+ counts = Hash.new(0)
21
+ IO.popen "grep 'invoked oom-killer' #{@log_file} | awk '{print $1 $2}' | uniq -c" do |f|
22
+ f.each_line do |line|
23
+ split = line.strip.split
24
+ counts[split[1]] = split[0].to_i
25
+ end
26
+ end
27
+ counts
28
+ end
29
+
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,18 @@
1
+ module Remon
2
+ module Metrics
3
+ class Salt
4
+
5
+ def initialize(timeout: 240)
6
+ @timeout = timeout
7
+ end
8
+
9
+ def status
10
+ script = File.expand_path("#{__dir__}/../scripts/salt-status")
11
+ out = `ruby #{script} #{@timeout} 2>/dev/null`
12
+ output = out.chomp.split(":")
13
+ { state: output[0], ok: output[1].to_i, total: output[2].to_i }
14
+ end
15
+
16
+ end
17
+ end
18
+ end