riemann-tools 0.2.13 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.docker/Dockerfile +7 -0
  3. data/.docker/publish.sh +35 -0
  4. data/.github/dependabot.yml +11 -0
  5. data/.github/workflows/ci.yml +42 -0
  6. data/.github/workflows/codeql-analysis.yml +72 -0
  7. data/.gitignore +6 -0
  8. data/.rspec +2 -0
  9. data/.rubocop.yml +32 -0
  10. data/.travis.yml +31 -0
  11. data/CHANGELOG.md +422 -0
  12. data/Gemfile +6 -0
  13. data/ISSUE_TEMPLATE.md +15 -0
  14. data/README.markdown +14 -15
  15. data/Rakefile +23 -0
  16. data/SECURITY.md +42 -0
  17. data/bin/riemann-apache-status +92 -77
  18. data/bin/riemann-bench +54 -48
  19. data/bin/riemann-cloudant +44 -39
  20. data/bin/riemann-consul +82 -75
  21. data/bin/riemann-dir-files-count +53 -46
  22. data/bin/riemann-dir-space +53 -46
  23. data/bin/riemann-diskstats +78 -74
  24. data/bin/riemann-fd +68 -47
  25. data/bin/riemann-freeswitch +108 -102
  26. data/bin/riemann-haproxy +46 -39
  27. data/bin/riemann-health +4 -335
  28. data/bin/riemann-kvminstance +18 -12
  29. data/bin/riemann-memcached +35 -28
  30. data/bin/riemann-net +4 -103
  31. data/bin/riemann-nginx-status +74 -66
  32. data/bin/riemann-ntp +4 -32
  33. data/bin/riemann-portcheck +40 -30
  34. data/bin/riemann-proc +96 -89
  35. data/bin/riemann-varnish +51 -44
  36. data/bin/riemann-zookeeper +38 -33
  37. data/lib/riemann/tools/health.rb +347 -0
  38. data/lib/riemann/tools/net.rb +104 -0
  39. data/lib/riemann/tools/ntp.rb +41 -0
  40. data/lib/riemann/tools/utils.rb +17 -0
  41. data/lib/riemann/tools/version.rb +7 -0
  42. data/lib/riemann/tools.rb +40 -33
  43. data/riemann-tools.gemspec +42 -0
  44. data/tools/riemann-aws/LICENSE +21 -0
  45. data/tools/riemann-aws/README.md +54 -0
  46. data/tools/riemann-aws/Rakefile +37 -0
  47. data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
  48. data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
  49. data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
  50. data/tools/riemann-aws/bin/riemann-aws-status +83 -0
  51. data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
  52. data/tools/riemann-aws/bin/riemann-s3-list +87 -0
  53. data/tools/riemann-aws/bin/riemann-s3-status +102 -0
  54. data/tools/riemann-chronos/LICENSE +21 -0
  55. data/tools/riemann-chronos/README.md +10 -0
  56. data/tools/riemann-chronos/Rakefile +37 -0
  57. data/tools/riemann-chronos/bin/riemann-chronos +161 -0
  58. data/tools/riemann-docker/LICENSE +21 -0
  59. data/tools/riemann-docker/README.md +10 -0
  60. data/tools/riemann-docker/Rakefile +36 -0
  61. data/tools/riemann-docker/bin/riemann-docker +206 -0
  62. data/tools/riemann-elasticsearch/LICENSE +21 -0
  63. data/tools/riemann-elasticsearch/README.md +10 -0
  64. data/tools/riemann-elasticsearch/Rakefile +37 -0
  65. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
  66. data/tools/riemann-marathon/LICENSE +21 -0
  67. data/tools/riemann-marathon/README.md +10 -0
  68. data/tools/riemann-marathon/Rakefile +37 -0
  69. data/tools/riemann-marathon/bin/riemann-marathon +163 -0
  70. data/tools/riemann-mesos/LICENSE +21 -0
  71. data/tools/riemann-mesos/README.md +10 -0
  72. data/tools/riemann-mesos/Rakefile +37 -0
  73. data/tools/riemann-mesos/bin/riemann-mesos +146 -0
  74. data/tools/riemann-munin/LICENSE +21 -0
  75. data/tools/riemann-munin/README.md +10 -0
  76. data/tools/riemann-munin/Rakefile +36 -0
  77. data/tools/riemann-munin/bin/riemann-munin +43 -0
  78. data/tools/riemann-rabbitmq/LICENSE +21 -0
  79. data/tools/riemann-rabbitmq/README.md +10 -0
  80. data/tools/riemann-rabbitmq/Rakefile +37 -0
  81. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
  82. data/tools/riemann-riak/LICENSE +21 -0
  83. data/tools/riemann-riak/README.md +10 -0
  84. data/tools/riemann-riak/Rakefile +36 -0
  85. data/tools/riemann-riak/bin/riemann-riak +323 -0
  86. data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
  87. data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
  88. data/tools/riemann-riak/riak_status/key_count.erl +13 -0
  89. data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
  90. data/tools/riemann-riak/riak_status/ringready.erl +9 -0
  91. metadata +195 -34
@@ -1,118 +1,124 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
- require File.expand_path('../../lib/riemann/tools', __FILE__)
4
+ require 'English'
4
5
 
5
- class Riemann::Tools::FreeSWITCH
6
- include Riemann::Tools
6
+ Process.setproctitle($PROGRAM_NAME)
7
7
 
8
- opt :calls_warning, "Calls warning threshold", :default => 100
9
- opt :calls_critical, "Calls critical threshold", :default => 300
10
- opt :pid_file, "FreeSWITCH daemon pidfile", :type => String, :default => "/var/run/freeswitch/freeswitch.pid"
8
+ require File.expand_path('../lib/riemann/tools', __dir__)
11
9
 
12
- def initialize
13
- @limits = {
14
- :calls => {:critical => opts[:calls_critical], :warning => opts[:calls_warning]}
15
- }
16
- end
10
+ module Riemann
11
+ module Tools
12
+ class FreeSWITCH
13
+ include Riemann::Tools
17
14
 
18
- def dead_proc?(pid)
19
- begin
20
- Process.getpgid(pid)
21
- false
22
- rescue Errno::ESRCH
23
- true
24
- end
25
- end
15
+ opt :calls_warning, 'Calls warning threshold', default: 100
16
+ opt :calls_critical, 'Calls critical threshold', default: 300
17
+ opt :pid_file, 'FreeSWITCH daemon pidfile', type: String, default: '/var/run/freeswitch/freeswitch.pid'
26
18
 
27
- def alert(service, state, metric, description)
28
- report(
29
- :service => service.to_s,
30
- :state => state.to_s,
31
- :metric => metric.to_f,
32
- :description => description
33
- )
34
- end
35
-
36
- def exec_with_timeout(cmd, timeout)
37
- pid = Process.spawn(cmd, {[:err,:out] => :close, :pgroup => true})
38
- begin
39
- Timeout.timeout(timeout) do
40
- Process.waitpid(pid, 0)
41
- $?.exitstatus == 0
19
+ def initialize
20
+ @limits = {
21
+ calls: { critical: opts[:calls_critical], warning: opts[:calls_warning] },
22
+ }
42
23
  end
43
- rescue Timeout::Error
44
- Process.kill(15, -Process.getpgid(pid))
45
- puts "Killed pid: #{pid}"
46
- false
47
- end
48
- end
49
-
50
- def tick
51
- # Determine how many current calls I have according to FreeSWITCH
52
- fs_calls = %x[fs_cli -x "show calls count"| grep -Po '^\\d+'].to_i
53
-
54
- # Determine how many current channels I have according to FreeSWITCH
55
- fs_channels = %x[fs_cli -x "show channels count"| grep -Po '^\\d+'].to_i
56
-
57
- # Determine how many conferences I have according to FreeSWITCH
58
- fs_conferences = %x[fs_cli -x "conference list"| grep -Pco '^Conference'].to_i
59
-
60
- # Try to read pidfile. If it fails use Devil's dummy PID
61
- begin
62
- fs_pid = File.read(opts[:pid_file]).to_i
63
- rescue
64
- puts "Couldn't read pidfile: #{opts[:pid_file]}"
65
- fs_pid = -666
66
- end
67
-
68
- fs_threads = fs_pid > 0 ? %x[ps huH p #{fs_pid} | wc -l].to_i : 0
69
24
 
70
- # Submit calls to riemann
71
- if fs_calls > @limits[:calls][:critical]
72
- alert "FreeSWITCH current calls", :critical, fs_calls, "Number of calls are #{fs_calls}"
73
- elsif fs_calls > @limits[:calls][:warning]
74
- alert "FreeSWITCH current calls", :warning, fs_calls, "Number of calls are #{fs_calls}"
75
- else
76
- alert "FreeSWITCH current calls", :ok, fs_calls, "Number of calls are #{fs_calls}"
77
- end
78
-
79
- # Submit channels to riemann
80
- if fs_channels > @limits[:calls][:critical]
81
- alert "FreeSWITCH current channels", :critical, fs_channels, "Number of channels are #{fs_channels}"
82
- elsif fs_channels > @limits[:calls][:warning]
83
- alert "FreeSWITCH current channels", :warning, fs_channels, "Number of channels are #{fs_channels}"
84
- else
85
- alert "FreeSWITCH current channels", :ok, fs_channels, "Number of channels are #{fs_channels}"
86
- end
87
-
88
- # Submit conferences to riemann
89
- if fs_conferences > @limits[:calls][:critical]
90
- alert "FreeSWITCH current conferences", :critical, fs_conferences, "Number of conferences are #{fs_conferences}"
91
- elsif fs_conferences > @limits[:calls][:warning]
92
- alert "FreeSWITCH current conferences", :warning, fs_conferences, "Number of conferences are #{fs_conferences}"
93
- else
94
- alert "FreeSWITCH current conferences", :ok, fs_conferences, "Number of conferences are #{fs_conferences}"
95
- end
25
+ def dead_proc?(pid)
26
+ Process.getpgid(pid)
27
+ false
28
+ rescue Errno::ESRCH
29
+ true
30
+ end
96
31
 
97
- # Submit threads to riemann
98
- if fs_threads
99
- alert "FreeSWITCH current threads", :ok, fs_threads, "Number of threads are #{fs_threads}"
100
- end
32
+ def alert(service, state, metric, description)
33
+ report(
34
+ service: service.to_s,
35
+ state: state.to_s,
36
+ metric: metric.to_f,
37
+ description: description,
38
+ )
39
+ end
101
40
 
102
- # Submit status to riemann
103
- if dead_proc?(fs_pid)
104
- alert "FreeSWITCH status", :critical, -1, "FreeSWITCH service status: not running"
105
- else
106
- alert "FreeSWITCH status", :ok, nil, "FreeSWITCH service status: running"
107
- end
41
+ def exec_with_timeout(cmd, timeout)
42
+ pid = Process.spawn(cmd, { %i[err out] => :close, :pgroup => true })
43
+ begin
44
+ Timeout.timeout(timeout) do
45
+ Process.waitpid(pid, 0)
46
+ $CHILD_STATUS.exitstatus.zero?
47
+ end
48
+ rescue Timeout::Error
49
+ Process.kill(15, -Process.getpgid(pid))
50
+ puts "Killed pid: #{pid}"
51
+ false
52
+ end
53
+ end
108
54
 
109
- # Submit CLI status to riemann using timeout in case it's unresponsive
110
- if exec_with_timeout("fs_cli -x status", 2)
111
- alert "FreeSWITCH CLI status", :ok, nil, "FreeSWITCH CLI status: responsive"
112
- else
113
- alert "FreeSWITCH CLI status", :critical, -1, "FreeSWITCH CLI status: not responding"
55
+ def tick
56
+ # Determine how many current calls I have according to FreeSWITCH
57
+ fs_calls = `fs_cli -x "show calls count"| grep -Po '^\\d+'`.to_i
58
+
59
+ # Determine how many current channels I have according to FreeSWITCH
60
+ fs_channels = `fs_cli -x "show channels count"| grep -Po '^\\d+'`.to_i
61
+
62
+ # Determine how many conferences I have according to FreeSWITCH
63
+ fs_conferences = `fs_cli -x "conference list"| grep -Pco '^Conference'`.to_i
64
+
65
+ # Try to read pidfile. If it fails use Devil's dummy PID
66
+ begin
67
+ fs_pid = File.read(opts[:pid_file]).to_i
68
+ rescue StandardError
69
+ puts "Couldn't read pidfile: #{opts[:pid_file]}"
70
+ fs_pid = -666
71
+ end
72
+
73
+ fs_threads = fs_pid.positive? ? `ps huH p #{fs_pid} | wc -l`.to_i : 0
74
+
75
+ # Submit calls to riemann
76
+ if fs_calls > @limits[:calls][:critical]
77
+ alert 'FreeSWITCH current calls', :critical, fs_calls, "Number of calls are #{fs_calls}"
78
+ elsif fs_calls > @limits[:calls][:warning]
79
+ alert 'FreeSWITCH current calls', :warning, fs_calls, "Number of calls are #{fs_calls}"
80
+ else
81
+ alert 'FreeSWITCH current calls', :ok, fs_calls, "Number of calls are #{fs_calls}"
82
+ end
83
+
84
+ # Submit channels to riemann
85
+ if fs_channels > @limits[:calls][:critical]
86
+ alert 'FreeSWITCH current channels', :critical, fs_channels, "Number of channels are #{fs_channels}"
87
+ elsif fs_channels > @limits[:calls][:warning]
88
+ alert 'FreeSWITCH current channels', :warning, fs_channels, "Number of channels are #{fs_channels}"
89
+ else
90
+ alert 'FreeSWITCH current channels', :ok, fs_channels, "Number of channels are #{fs_channels}"
91
+ end
92
+
93
+ # Submit conferences to riemann
94
+ if fs_conferences > @limits[:calls][:critical]
95
+ alert 'FreeSWITCH current conferences', :critical, fs_conferences,
96
+ "Number of conferences are #{fs_conferences}"
97
+ elsif fs_conferences > @limits[:calls][:warning]
98
+ alert 'FreeSWITCH current conferences', :warning, fs_conferences,
99
+ "Number of conferences are #{fs_conferences}"
100
+ else
101
+ alert 'FreeSWITCH current conferences', :ok, fs_conferences, "Number of conferences are #{fs_conferences}"
102
+ end
103
+
104
+ # Submit threads to riemann
105
+ alert 'FreeSWITCH current threads', :ok, fs_threads, "Number of threads are #{fs_threads}" if fs_threads
106
+
107
+ # Submit status to riemann
108
+ if dead_proc?(fs_pid)
109
+ alert 'FreeSWITCH status', :critical, -1, 'FreeSWITCH service status: not running'
110
+ else
111
+ alert 'FreeSWITCH status', :ok, nil, 'FreeSWITCH service status: running'
112
+ end
113
+
114
+ # Submit CLI status to riemann using timeout in case it's unresponsive
115
+ if exec_with_timeout('fs_cli -x status', 2)
116
+ alert 'FreeSWITCH CLI status', :ok, nil, 'FreeSWITCH CLI status: responsive'
117
+ else
118
+ alert 'FreeSWITCH CLI status', :critical, -1, 'FreeSWITCH CLI status: not responding'
119
+ end
120
+ end
114
121
  end
115
-
116
122
  end
117
123
  end
118
124
 
data/bin/riemann-haproxy CHANGED
@@ -1,58 +1,65 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
2
5
 
3
6
  # Gathers haproxy CSV statistics and submits them to Riemann.
4
7
 
5
- require File.expand_path('../../lib/riemann/tools', __FILE__)
8
+ require File.expand_path('../lib/riemann/tools', __dir__)
6
9
 
7
- class Riemann::Tools::Haproxy
8
- include Riemann::Tools
9
- require 'net/http'
10
- require 'csv'
10
+ module Riemann
11
+ module Tools
12
+ class Haproxy
13
+ include Riemann::Tools
14
+ require 'net/http'
15
+ require 'csv'
11
16
 
12
- opt :stats_url, "Full url to haproxy stats (eg: https://user:password@host.com:9999/stats)", :required => true, :type => :string
17
+ opt :stats_url, 'Full url to haproxy stats (eg: https://user:password@host.com:9999/stats)', required: true,
18
+ type: :string
13
19
 
14
- def initialize
15
- @uri = URI(opts[:stats_url]+';csv')
16
- end
20
+ def initialize
21
+ @uri = URI("#{opts[:stats_url]};csv")
22
+ end
23
+
24
+ def tick
25
+ csv.each do |row|
26
+ row = row.to_hash
27
+ ns = "haproxy #{row['pxname']} #{row['svname']}"
28
+ row.each do |property, metric|
29
+ next if property.nil? || property == 'pxname' || property == 'svname'
30
+
31
+ report(
32
+ host: @uri.host,
33
+ service: "#{ns} #{property}",
34
+ metric: metric.to_f,
35
+ tags: ['haproxy'],
36
+ )
37
+ end
17
38
 
18
- def tick
19
- csv = CSV.parse(get_csv.body.split("# ")[1], { :headers => true })
20
- csv.each do |row|
21
- row = row.to_hash
22
- ns = "haproxy #{row['pxname']} #{row['svname']}"
23
- row.each do |property, metric|
24
- unless (property.nil? || property == 'pxname' || property == 'svname')
25
39
  report(
26
- :host => @uri.host,
27
- :service => "#{ns} #{property}",
28
- :metric => metric.to_f,
29
- :tags => ['haproxy']
40
+ host: @uri.host,
41
+ service: "#{ns} state",
42
+ state: (%w[UP OPEN].include?(row['status']) ? 'ok' : 'critical'),
43
+ tags: ['haproxy'],
30
44
  )
31
45
  end
32
46
  end
33
47
 
34
- report(
35
- :host => @uri.host,
36
- :service => "#{ns} state",
37
- :state => (['UP', 'OPEN'].include?(row['status']) ? 'ok' : 'critical'),
38
- :tags => ['haproxy']
39
- )
40
- end
41
- end
42
-
43
- def get_csv
44
- http = Net::HTTP.new(@uri.host, @uri.port)
45
- http.use_ssl = true if @uri.scheme == 'https'
46
- http.start do |h|
47
- get = Net::HTTP::Get.new(@uri.request_uri)
48
- unless @uri.userinfo.nil?
49
- userinfo = @uri.userinfo.split(":")
50
- get.basic_auth userinfo[0], userinfo[1]
48
+ def csv
49
+ http = Net::HTTP.new(@uri.host, @uri.port)
50
+ http.use_ssl = true if @uri.scheme == 'https'
51
+ http.start do |h|
52
+ get = Net::HTTP::Get.new(@uri.request_uri)
53
+ unless @uri.userinfo.nil?
54
+ userinfo = @uri.userinfo.split(':')
55
+ get.basic_auth userinfo[0], userinfo[1]
56
+ end
57
+ h.request get
58
+ end
59
+ CSV.parse(http.body.split('# ')[1], { headers: true })
51
60
  end
52
- h.request get
53
61
  end
54
62
  end
55
-
56
63
  end
57
64
 
58
65
  Riemann::Tools::Haproxy.run
data/bin/riemann-health CHANGED
@@ -1,341 +1,10 @@
1
1
  #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
2
3
 
3
- # Reports current CPU, disk, load average, and memory use to riemann.
4
-
5
- require File.expand_path('../../lib/riemann/tools', __FILE__)
6
-
7
- class Riemann::Tools::Health
8
- include Riemann::Tools
9
-
10
- opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
11
- opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
12
- opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
13
- opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
14
- opt :load_warning, "Load warning threshold (load average / core)", :default => 3
15
- opt :load_critical, "Load critical threshold (load average / core)", :default => 8
16
- opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
17
- opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
18
- opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'load', 'memory', 'disk']
19
-
20
- def initialize
21
- @limits = {
22
- :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
23
- :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
24
- :load => {:critical => opts[:load_critical], :warning => opts[:load_warning]},
25
- :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
26
- }
27
- case (@ostype = `uname -s`.chomp.downcase)
28
- when 'darwin'
29
- @cores = `sysctl -n hw.ncpu`.to_i
30
- @cpu = method :darwin_cpu
31
- @disk = method :disk
32
- @load = method :darwin_load
33
- @memory = method :darwin_memory
34
- darwin_top
35
- when 'freebsd'
36
- @cores = `sysctl -n hw.ncpu`.to_i
37
- @cpu = method :freebsd_cpu
38
- @disk = method :disk
39
- @load = method :bsd_load
40
- @memory = method :freebsd_memory
41
- when 'openbsd'
42
- @cores = `sysctl -n hw.ncpu`.to_i
43
- @cpu = method :openbsd_cpu
44
- @disk = method :disk
45
- @load = method :bsd_load
46
- @memory = method :openbsd_memory
47
- when 'sunos'
48
- @cores = `mpstat -a 2>/dev/null`.split[33].to_i
49
- @cpu = method :sunos_cpu
50
- @disk = method :disk
51
- @load = method :bsd_load
52
- @memory = method :sunos_memory
53
- else
54
- @cores = `nproc`.to_i
55
- puts "WARNING: OS '#{@ostype}' not explicitly supported. Falling back to Linux" unless @ostype == "linux"
56
- @cpu = method :linux_cpu
57
- @disk = method :disk
58
- @load = method :linux_load
59
- @memory = method :linux_memory
60
- end
61
-
62
- opts[:checks].each do |check|
63
- case check
64
- when "disk"
65
- @disk_enabled = true
66
- when "load"
67
- @load_enabled = true
68
- when "cpu"
69
- @cpu_enabled = true
70
- when "memory"
71
- @memory_enabled = true
72
- end
73
- end
74
- end
75
-
76
- def alert(service, state, metric, description)
77
- report(
78
- :service => service.to_s,
79
- :state => state.to_s,
80
- :metric => metric.to_f,
81
- :description => description
82
- )
83
- end
84
-
85
- def report_pct(service, fraction, report)
86
- if fraction
87
- if fraction > @limits[service][:critical]
88
- alert service, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
89
- elsif fraction > @limits[service][:warning]
90
- alert service, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
91
- else
92
- alert service, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
93
- end
94
- end
95
- end
96
-
97
- def linux_cpu
98
- new = File.read('/proc/stat')
99
- unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
100
- alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
101
- return false
102
- end
103
- u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
104
-
105
- if @old_cpu
106
- u1, n1, s1, i1 = @old_cpu
107
-
108
- used = (u2+n2+s2) - (u1+n1+s1)
109
- total = used + i2-i1
110
- fraction = used.to_f / total
111
-
112
- report_pct :cpu, fraction, "user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
113
- end
114
-
115
- @old_cpu = [u2, n2, s2, i2]
116
- end
117
-
118
- def linux_load
119
- load = File.read('/proc/loadavg').split(/\s+/)[0].to_f / @cores
120
- if load > @limits[:load][:critical]
121
- alert "load", :critical, load, "1-minute load average/core is #{load}"
122
- elsif load > @limits[:load][:warning]
123
- alert "load", :warning, load, "1-minute load average/core is #{load}"
124
- else
125
- alert "load", :ok, load, "1-minute load average/core is #{load}"
126
- end
127
- end
128
-
129
- def linux_memory
130
- m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
131
- x = line.split(/:?\s+/)
132
- # Assume kB...
133
- info[x[0]] = x[1].to_i
134
- info
135
- }
136
-
137
- free = m['MemFree'].to_i + m['Buffers'].to_i + m['Cached'].to_i
138
- total = m['MemTotal'].to_i
139
- fraction = 1 - (free.to_f / total)
140
-
141
- report_pct :memory, fraction, "used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
142
- end
143
-
144
- def freebsd_cpu
145
- u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split.map{ |e| e.to_i } #FreeBSD has 5 cpu stats
146
-
147
- if @old_cpu
148
- u1, n1, s1, t1, i1 = @old_cpu
149
-
150
- used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
151
- total = used + i2-i1
152
- fraction = used.to_f / total
153
-
154
- report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
155
- end
156
-
157
- @old_cpu = [u2, n2, s2, t2, i2]
158
- end
159
-
160
- def openbsd_cpu
161
- u2, n2, s2, t2, i2 = `sysctl -n kern.cp_time 2>/dev/null`.split(',').map{ |e| e.to_i } #OpenBSD separates with ,
162
-
163
- if @old_cpu
164
- u1, n1, s1, t1, i1 = @old_cpu
4
+ Process.setproctitle($PROGRAM_NAME)
165
5
 
166
- used = (u2+n2+s2+t2) - (u1+n1+s1+t1)
167
- total = used + i2-i1
168
- fraction = used.to_f / total
169
-
170
- report_pct :cpu, fraction, "user+nice+sytem+interrupt\n\n#{`ps -axo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
171
- end
172
-
173
- @old_cpu = [u2, n2, s2, t2, i2]
174
- end
175
-
176
- def sunos_cpu
177
- mpstats = `mpstat -a 2>/dev/null`.split
178
- u2 = mpstats[29].to_i
179
- s2 = mpstats[30].to_i
180
- t2 = mpstats[31].to_i
181
- i2 = mpstats[32].to_i
182
-
183
- if @old_cpu
184
- u1, s1, t1, i1 = @old_cpu
185
-
186
- used = (u2+s2+t2) - (u1+s1+t1)
187
- total = used + i2-i1
188
- if i2 == i1 && used == 0 #If the system is <1% used in both samples then total will be 0 + (99 - 99), avoid a div by 0
189
- fraction = 0
190
- else
191
- fraction = used.to_f / total
192
- end
193
-
194
- report_pct :cpu, fraction, "user+sytem+interrupt\n\n#{`ps -ao pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
195
- end
196
-
197
- @old_cpu = [u2, s2, t2, i2]
198
- end
199
-
200
- def bsd_load
201
- m = `uptime`.split(':')[-1].chomp.gsub(/\s+/,'').split(',')
202
- load = m[0].to_f / @cores
203
- if load > @limits[:load][:critical]
204
- alert "load", :critical, load, "1-minute load average/core is #{load}"
205
- elsif load > @limits[:load][:warning]
206
- alert "load", :warning, load, "1-minute load average/core is #{load}"
207
- else
208
- alert "load", :ok, load, "1-minute load average/core is #{load}"
209
- end
210
- end
211
-
212
- def freebsd_memory
213
- meminfo = `sysctl -n vm.stats.vm.v_page_count vm.stats.vm.v_wire_count vm.stats.vm.v_active_count 2>/dev/null`.chomp.split
214
- fraction = (meminfo[1].to_f + meminfo[2].to_f) / meminfo[0].to_f
215
-
216
- report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
217
- end
218
-
219
- def openbsd_memory
220
- meminfo = `vmstat 2>/dev/null`.chomp.split
221
- fraction = meminfo[28].to_f / meminfo[29].to_f #The ratio of active to free memory unlike the others :(
222
-
223
- report_pct :memory, fraction, "used\n\n#{`ps -axo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
224
- end
225
-
226
- def sunos_memory
227
- meminfo = `vmstat 2>/dev/null`.chomp.split
228
- total_mem = `prtconf | grep Memory`.split[2].to_f * 1024 # reports in GB but vmstat is in MB
229
- fraction = ( total_mem - meminfo[32].to_f ) / total_mem
230
-
231
- report_pct :memory, fraction, "used\n\n#{`ps -ao pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
232
- end
233
-
234
- def darwin_top
235
- raw = `top -l 1 | grep -i "^\\(cpu\\|physmem\\|load\\)"`.chomp
236
- @topdata = {:stamp => Time.now.to_i }
237
- raw.each_line do |ln|
238
- if ln.match(/Load Avg: [0-9.]+, [0-9.]+, ([0-9.])+/i)
239
- @topdata[:load] = $1.to_f
240
- elsif ln.match(/CPU usage: [0-9.]+% user, [0-9.]+% sys, ([0-9.]+)% idle/i)
241
- @topdata[:cpu] = 1 - ($1.to_f / 100)
242
- elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) wired, ([0-9]+)([BKMGT]) active, ([0-9]+)([BKMGT]) inactive, ([0-9]+)([BKMGT]) used, ([0-9]+)([BKMGT]) free/i)
243
- wired = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
244
- active = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
245
- inactive = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
246
- used = mdat[7].to_i * (1024 ** "BKMGT".index(mdat[8]))
247
- free = mdat[9].to_i * (1024 ** "BKMGT".index(mdat[10]))
248
- @topdata[:memory] = (wired + active + used).to_f / (wired + active + used + inactive + free)
249
- # This is for OSX Mavericks which
250
- # uses a different format for top
251
- # Example: PhysMem: 4662M used (1328M wired), 2782M unused.
252
- elsif mdat = ln.match(/PhysMem: ([0-9]+)([BKMGT]) used \(([0-9]+)([BKMGT]) wired\), ([0-9]+)([BKMGT]) unused/i)
253
- used = mdat[1].to_i * (1024 ** "BKMGT".index(mdat[2]))
254
- wired = mdat[3].to_i * (1024 ** "BKMGT".index(mdat[4]))
255
- unused = mdat[5].to_i * (1024 ** "BKMGT".index(mdat[6]))
256
- @topdata[:memory] = (used).to_f / (used + unused)
257
- end
258
- end
259
- end
260
-
261
- def darwin_cpu
262
- darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
263
- unless @topdata[:cpu]
264
- alert 'cpu', :unknown, nil, "unable to get CPU stats from top"
265
- return false
266
- end
267
- report_pct :cpu, @topdata[:cpu], "usage\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}"
268
- end
269
-
270
- def darwin_load
271
- darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
272
- unless @topdata[:load]
273
- alert 'load', :unknown, nil, "unable to get load ave from top"
274
- return false
275
- end
276
- metric = @topdata[:load] / @cores
277
- if metric > @limits[:load][:critical]
278
- alert "load", :critical, metric, "1-minute load average per core is #{metric}"
279
- elsif metric > @limits[:load][:warning]
280
- alert "load", :warning, metric, "1-minute load average per core is #{metric}"
281
- else
282
- alert "load", :ok, metric, "1-minute load average per core is #{metric}"
283
- end
284
- end
285
-
286
- def darwin_memory
287
- darwin_top unless (Time.now.to_i - @topdata[:stamp]) < opts[:interval]
288
- unless @topdata[:memory]
289
- alert 'memory', :unknown, nil, "unable to get memory data from top"
290
- return false
291
- end
292
- report_pct :memory, @topdata[:memory], "usage\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}"
293
- end
294
-
295
- def df
296
- case @ostype
297
- when 'darwin', 'freebsd', 'openbsd'
298
- `df -P -t noiso9660`
299
- when 'sunos'
300
- `df -P` # Is there a good way to exlude iso9660 here?
301
- else
302
- `df -P --exclude-type=iso9660`
303
- end
304
- end
305
-
306
- def disk
307
- df.split(/\n/).each do |r|
308
- f = r.split(/\s+/)
309
- next if f[0] == 'Filesystem'
310
- next unless f[0] =~ /\// # Needs at least one slash in the mount path
311
-
312
- # Calculate capacity
313
- x = f[4].to_f/100
314
-
315
- if x > @limits[:disk][:critical]
316
- alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
317
- elsif x > @limits[:disk][:warning]
318
- alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
319
- else
320
- alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
321
- end
322
- end
323
- end
6
+ # Reports current CPU, disk, load average, and memory use to riemann.
324
7
 
325
- def tick
326
- if @cpu_enabled
327
- @cpu.call
328
- end
329
- if @memory_enabled
330
- @memory.call
331
- end
332
- if @disk_enabled
333
- @disk.call
334
- end
335
- if @load_enabled
336
- @load.call
337
- end
338
- end
339
- end
8
+ require 'riemann/tools/health'
340
9
 
341
10
  Riemann::Tools::Health.run