riemann-tools 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,18 @@
1
+ Riemann Tools
2
+ =============
3
+
4
+ Tiny programs to submit events to Riemann.
5
+
6
+ Riemann-health, for example, submits events about the current CPU, load,
7
+ memory, and disk use. Bench submits randomly distributed metrics for load
8
+ testing. I've got a whole bunch of these internally for monitoring Redis, Riak,
9
+ queues, etc. Most have internal configuration dependencies, so it'll be a while
10
+ before I can extract them for re-use.
11
+
12
+ Get started
13
+ ==========
14
+
15
+ ``` bash
16
+ gem install riemann-tools
17
+ riemann-health --host my.riemann.server
18
+ ```
data/bin/riemann-bench ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Connects to a server (first arg) and populates it with a constant stream of
4
+ # events for testing.
5
+
6
+ require 'riemann/client'
7
+ require 'pp'
8
+
9
+ class Riemann::Bench
10
+ attr_accessor :client, :hosts, :services, :states
11
+ def initialize
12
+ @hosts = [nil] + (0...10).map { |i| "host#{i}" }
13
+ @hosts = ['test']
14
+ @services = %w(per)
15
+ @states = {}
16
+ @client = Riemann::Client.new(host: (ARGV.first || 'localhost'))
17
+ end
18
+
19
+ def evolve(state)
20
+ m = rand
21
+ s = case m
22
+ when 0...0.75
23
+ 'ok'
24
+ when 0.75...0.9
25
+ 'warning'
26
+ when 0.9..1.0
27
+ 'critical'
28
+ end
29
+
30
+ {
31
+ metric_f: m,
32
+ state: s,
33
+ host: state[:host],
34
+ service: state[:service],
35
+ description: "at #{Time.now}"
36
+ }
37
+ end
38
+
39
+ def tick
40
+ # pp @states
41
+ hosts.product(services).each do |id|
42
+ client << (states[id] = evolve(states[id]))
43
+ end
44
+ end
45
+
46
+ def run
47
+ start
48
+ loop do
49
+ # sleep 0.01
50
+ tick
51
+ end
52
+ end
53
+
54
+ def start
55
+ hosts.product(services).each do |host, service|
56
+ states[[host, service]] = {
57
+ metric_f: 0.5,
58
+ state: 'ok',
59
+ description: "Starting up",
60
+ host: host,
61
+ service: service
62
+ }
63
+ end
64
+ end
65
+ end
66
+
67
+ Riemann::Bench.new.run
@@ -0,0 +1,165 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # Reports current CPU, disk, load average, and memory use to riemann.
4
+
5
+ require 'trollop'
6
+ require 'riemann/client'
7
+
8
+ class Riemann::Health
9
+ def initialize(opts)
10
+ @host = opts[:host]
11
+ @port = opts[:port]
12
+ @interval = opts[:interval]
13
+ @limits = {
14
+ cpu: {critical: opts[:cpu_critical], warning: opts[:cpu_warning]},
15
+ disk: {critical: opts[:disk_critical], warning: opts[:disk_warning]},
16
+ :load => {critical: opts[:load_critical], warning: opts[:load_warning]},
17
+ memory: {critical: opts[:memory_critical], warning: opts[:memory_warning]}
18
+ }
19
+
20
+ @client = Riemann::Client.new(:host => @host, :port => @port)
21
+ end
22
+
23
+ def alert(service, state, metric, description)
24
+ @client << {
25
+ service: service,
26
+ state: state.to_s,
27
+ metric: metric.to_f,
28
+ description: description
29
+ }
30
+ end
31
+
32
+ def cores
33
+ i = 0;
34
+ File.read("/proc/cpuinfo").split(/\n\n/).inject({}) do |cores, p|
35
+ physical_id = p[/physical id\s+:\s+(\d+)/, 1]
36
+ core_id = p[/core id\s+:\s+(\d+)/, 1]
37
+ if physical_id and core_id
38
+ cores["#{physical_id}:#{core_id}"] = true
39
+ elsif physical_id
40
+ cores["#{physical_id}:"] = true
41
+ else
42
+ cores[i += 1] = true;
43
+ end
44
+
45
+ cores
46
+ end.size
47
+ end
48
+
49
+ def cpu
50
+ new = File.read('/proc/stat')
51
+ unless new[/cpu\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
52
+ alert 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line"
53
+ return false
54
+ end
55
+ u2, n2, s2, i2 = [$1, $2, $3, $4].map { |e| e.to_i }
56
+
57
+ if @old_cpu
58
+ u1, n1, s1, i1 = @old_cpu
59
+
60
+ used = (u2+n2+s2) - (u1+n1+s1)
61
+ total = used + i2-i1
62
+ fraction = used.to_f / total
63
+
64
+ if fraction > @limits[:cpu][:critical]
65
+ alert "cpu", :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% user+nice+sytem\n\n#{cpu_report}"
66
+ elsif fraction > @limits[:cpu][:warning]
67
+ alert "cpu", :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% user+nice+sytem\n\n#{cpu_report}"
68
+ else
69
+ alert "cpu", :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% user+nice+sytem\n\n#{cpu_report}"
70
+ end
71
+ end
72
+
73
+ @old_cpu = [u2, n2, s2, i2]
74
+ end
75
+
76
+ def cpu_report
77
+ `ps -eo pcpu,pid,args | sort -nrb -k1 | head -10`.chomp
78
+ end
79
+
80
+ def disk
81
+ `df`.split(/\n/).each do |r|
82
+ f = r.split(/\s+/)
83
+ next unless f[0] =~ /^\//
84
+ next if f[0] == 'Filesystem'
85
+ x = f[4].to_f/100
86
+
87
+ if x > @limits[:disk][:critical]
88
+ alert "disk #{f[5]}", :critical, x, "#{f[4]} used"
89
+ elsif x > @limits[:disk][:warning]
90
+ alert "disk #{f[5]}", :warning, x, "#{f[4]} used"
91
+ else
92
+ alert "disk #{f[5]}", :ok, x, "#{f[4]} used"
93
+ end
94
+ end
95
+ end
96
+
97
+ def load
98
+ load = File.read('/proc/loadavg').split(/\s+/)[2].to_f / cores
99
+ if load > @limits[:load][:critical]
100
+ alert "load", :critical, load, "15-minute load average/core is #{load}"
101
+ elsif load > @limits[:load][:warning]
102
+ alert "load", :warning, load, "15-minute load average/core is #{load}"
103
+ else
104
+ alert "load", :ok, load, "15-minute load average/core is #{load}"
105
+ end
106
+ end
107
+
108
+ def memory
109
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) { |info, line|
110
+ x = line.split(/:?\s+/)
111
+ # Assume kB...
112
+ info[x[0]] = x[1].to_i
113
+ info
114
+ }
115
+
116
+ free = m['MemFree'] + m['Buffers'] + m['Cached']
117
+ total = m['MemTotal']
118
+ fraction = 1 - (free.to_f / total)
119
+
120
+ if fraction > @limits[:memory][:critical]
121
+ alert "memory", :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% used\n\n#{memory_report}"
122
+ elsif fraction > @limits[:memory][:warning]
123
+ alert "memory", :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% used\n\n#{memory_report}"
124
+ else
125
+ alert "memory", :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% used\n\n#{memory_report}"
126
+ end
127
+ end
128
+
129
+ def memory_report
130
+ `ps -eo pmem,pid,args | sort -nrb -k1 | head -10`.chomp
131
+ end
132
+
133
+ def tick
134
+ begin
135
+ cpu
136
+ memory
137
+ load
138
+ disk
139
+ rescue => e
140
+ $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
141
+ sleep 10
142
+ end
143
+ end
144
+
145
+ def run
146
+ loop do
147
+ tick
148
+ sleep @interval
149
+ end
150
+ end
151
+ end
152
+
153
+ Riemann::Health.new(Trollop.options do
154
+ opt :host, "Host", :default => '127.0.0.1'
155
+ opt :port, "Port", :default => 5555
156
+ opt :interval, "Seconds between updates", :default => 5
157
+ opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
158
+ opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
159
+ opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
160
+ opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
161
+ opt :load_warning, "Load warning threshold (load average / core)", :default => 3
162
+ opt :load_critical, "Load critical threshold (load average / core)", :default => 8
163
+ opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
164
+ opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
165
+ end).run
@@ -0,0 +1,4 @@
1
+ module Riemann
2
+ module Tools
3
+ end
4
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: riemann-tools
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kyle Kingsbury
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-02-24 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: riemann-client
16
+ requirement: &8635900 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.0.4
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *8635900
25
+ - !ruby/object:Gem::Dependency
26
+ name: trollop
27
+ requirement: &8635300 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 1.16.2
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *8635300
36
+ description:
37
+ email: aphyr@aphyr.com
38
+ executables:
39
+ - riemann-bench
40
+ - riemann-health
41
+ extensions: []
42
+ extra_rdoc_files: []
43
+ files:
44
+ - lib/riemann/tools.rb
45
+ - bin/riemann-bench
46
+ - bin/riemann-health
47
+ - LICENSE
48
+ - README.markdown
49
+ homepage: https://github.com/aphyr/riemann-tools
50
+ licenses: []
51
+ post_install_message:
52
+ rdoc_options: []
53
+ require_paths:
54
+ - lib
55
+ required_ruby_version: !ruby/object:Gem::Requirement
56
+ none: false
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: 1.9.1
61
+ required_rubygems_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ requirements: []
68
+ rubyforge_project: riemann-tools
69
+ rubygems_version: 1.8.10
70
+ signing_key:
71
+ specification_version: 3
72
+ summary: HTTP dashboard for the distributed event system Riemann.
73
+ test_files: []