riemann-smith 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/riemann-smith +105 -76
  3. metadata +21 -35
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 599a249192095c3c61053316cb92d3768a1d49df
4
- data.tar.gz: 6daf55d263d32f3f222a6d397e9ab7155ed45e0c
3
+ metadata.gz: abe727427108fb93a1597b67f52c52a41150aa9c
4
+ data.tar.gz: 0ef96af51a6fcd3a78703e42a9aac5edbbde8251
5
5
  SHA512:
6
- metadata.gz: 3e7049aa2f7d3c41240ff7c482aaff2515ee4611d6666a813f0f9ab2ef56c2bd26af15eb4d5bbacf006fb220b3f68da723c6452b7dc34adf3915a966664d978a
7
- data.tar.gz: 53bbeea669d614a7eede325e48fe923272f58c74ffb9c4427517b1c24847616d7b2324488bea931dafb035a0dc4b2e489cfbe682fa5e97a42bcdc71cbe8dd479
6
+ metadata.gz: 28f120246ee14e7e4e72b694a87159b94e81239daee8370179273df96d6bc123688f888d8d9fad8f75c91d878866052fa8c791f64a072c73c1708383bf979042
7
+ data.tar.gz: 5c9fc151c4aced5fe8c9cdee03dcb13d25a2ecf7a562d0b706a5de1739e6d5eb80aa897612cb144ced2357004f807b13ef88b89525b083f5a6422e2d430b4c2a
data/bin/riemann-smith CHANGED
@@ -1,89 +1,67 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'riemann/tools'
3
+ require 'pp'
4
+ require 'riemann'
4
5
  require 'sys/proctable'
6
+
5
7
  require 'smith'
6
8
  require 'smith/commands/common'
7
9
 
10
+
8
11
  class Smith::Riemann
9
- include Riemann::Tools
12
+
10
13
  include Smith::Commands::Common
14
+ include Smith::Logger
11
15
 
12
- opt :agency_timeout, "Max time (in seconds) to wait for the agency to respond", type: :int, default: 60
13
- opt :service_name, "The name to associate with this health check", type: :string, default: "smith"
14
- opt :agents, "Agents to monitor", type: :strings
15
- opt :agent_group, "The name of an 'agent group' to monitor", type: :strings
16
+ RIEMANN_SERVICE_NAME = "smith"
16
17
 
17
18
  def initialize
18
- options[:agents] ||= []
19
+ log_level :info
20
+ @riemann = Riemann::Client.new(:host => options[:host], :port => options[:port])
21
+ end
19
22
 
20
- if options[:agent_group]
21
- options[:agents] += options[:agent_group].map { |g| agent_group(g) }.flatten
22
- options[:agents].uniq!
23
- end
23
+ def run
24
+ Smith::Messaging::Sender.new(Smith::QueueDefinitions::Agency_control) do |queue|
25
+ on_check { |check| @riemann << check }
24
26
 
25
- Smith.compile_acls
27
+ EM.add_periodic_timer(options[:interval]) { run_check(queue) }
28
+ end
26
29
  end
27
30
 
28
- def tick
29
- Smith.start do
30
- Smith::Messaging::Sender.new(Smith::QueueDefinitions::Agency_control) do |queue|
31
- queue.on_timeout(options[:agency_timeout]) do |message_id|
32
- report(
33
- :host => options[:event_host],
34
- :service => "#{options[:service_name]}.agency",
35
- :state => "critical",
36
- :description => "Failed to respond in a timely fashion"
37
- )
38
- Smith.stop
39
- return
40
- end
31
+ def run_check(queue)
32
+ queue.on_timeout(options[:timeout]) do |message_id|
33
+ @on_check.call(alert(:agency, :timeout, "Agency timeout."))
34
+ end
41
35
 
42
- # TODO remove this when smith randomises the reply queue name (probably > 0.7.2).
43
- reply_queue = "riemann.#{Digest::SHA1.hexdigest(Time.now.to_s + $$.to_s)}"
44
-
45
- queue.on_reply(:auto_ack => true, :reply_queue_name => reply_queue) do |reply_payload, r|
46
- results = parse_response(reply_payload.response)
47
-
48
- options[:agents].each do |agent|
49
- if results.has_key?(agent) && results[agent].any? { |a| a[:state] == "running" && Sys::ProcTable.ps(a[:pid].to_i) }
50
- $stderr.puts "#{agent}: ok"
51
- report(
52
- :host => options[:event_host],
53
- :service => "#{options[:service_name]}.#{agent}",
54
- :state => "ok",
55
- :description => "Agent is running"
56
- )
57
- else
58
- $stderr.puts "#{agent}: critical"
59
- report(
60
- :host => options[:event_host],
61
- :service => "#{options[:service_name]}.#{agent}",
62
- :state => "critical",
63
- :description => "Agent is not running"
64
- )
65
- end
66
- end
36
+ queue.on_reply(:auto_ack => true, :reply_queue_name => "riemann.#{Digest::SHA1.hexdigest($$.to_s)}") do |reply_payload, r|
37
+ check(reply_payload.response).each do |agent|
38
+ @on_check.call(agent)
39
+ end
40
+ end
67
41
 
68
- Smith.stop
69
- return
70
- end
42
+ queue.consumer_count do |count|
43
+ if count > 0
44
+ queue.publish(Smith::ACL::AgencyCommand.new(:command => 'list', :args => ['-a', '-l']))
45
+ else
46
+ @on_check.call(alert(:agency, :critical, "Agency not running."))
47
+ end
48
+ end
49
+ end
71
50
 
72
- queue.consumer_count do |count|
73
- if count == 0
74
- report(
75
- :host => options[:event_host],
76
- :service => "#{options[:service_name]}.agency",
77
- :state => "critical",
78
- :description => "Not running"
79
- )
80
-
81
- Smith.stop
82
- return
83
- else
84
- queue.publish(Smith::ACL::AgencyCommand.new(:command => 'list', :args => ['-a', '-l']))
85
- end
86
- end
51
+ def on_check(&blk)
52
+ @on_check = blk
53
+ end
54
+
55
+ # Checks that the list of agents return from the list command matches
56
+ # the list of know agents and composes an appropriate alert.
57
+ def check(response)
58
+ agents = parse_response(response)
59
+
60
+ options[:agents].map do |agent_name|
61
+ if agents.has_key?(agent_name) && agents[agent_name].any? { |a| a[:state] == "running" && running?(a[:pid]) }
62
+ alert(agent_name, :running, "Agent running")
63
+ else
64
+ alert(agent_name, :critical, "Agent not running")
87
65
  end
88
66
  end
89
67
  end
@@ -91,15 +69,66 @@ class Smith::Riemann
91
69
  # Returns a Nested hash representing the running agents.
92
70
  def parse_response(response)
93
71
  split_response = response.split(/\n/).map(&:strip)
94
- ((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |entry, h|
95
- fields = entry.split(/\s +/)
96
- h[fields.last] << {
97
- :state => fields[0],
98
- :uuid => fields[1],
99
- :pid => fields[2]
100
- }
72
+ ((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |e, acc|
73
+ fields = e.split(/\s +/)
74
+ acc[fields.last] << { :state => fields[0], :uuid => fields[1], :pid => fields[2], :description => fields.last }
75
+ end
76
+ end
77
+
78
+ # Construct an alert messages
79
+ def alert(service, state, description)
80
+ {:tags => options[:tags], :service => service(service), :state => state(state), :description => description}.tap { |a| puts "#{a[:service]}: #{state(state)}" }
81
+ end
82
+
83
+ # Return true if the given pid exists in the process table.
84
+ def running?(pid)
85
+ !(pid.nil? || Sys::ProcTable.ps(pid.to_i).nil?)
86
+ end
87
+
88
+ # Return the service name
89
+ def service(name)
90
+ "#{RIEMANN_SERVICE_NAME}:#{name}"
91
+ end
92
+
93
+ def state(state)
94
+ {:timeout => "timeout", :running => "ok", :critical => "critical"}[state]
95
+ end
96
+
97
+ def options
98
+ @options ||= begin
99
+ OptionParser.accept(Pathname) {|p,| Pathname.new(p) if p}
100
+
101
+ defaults = {:interval => 30, :timeout => 10, :agents => [], :tags => [], :host => 'localhost', :port => 5555, :agency_timeout => 60}
102
+ defaults.tap do |options|
103
+ parser = OptionParser.new do |opts|
104
+ opts.separator "\n"
105
+ opts.set_summary_indent " "
106
+
107
+ opts.banner = "\nUsage: #{opts.program_name} OPTIONS"
108
+ opts.on_head "\n Periodically lists the running agents and sends the result to riemann."
109
+
110
+ opts.on("--interval <i>", Integer, "Polling interval in seconds (default #{options[:interval]})") { |t| options[:interval] = t }
111
+ opts.on("--timeout <i>", Integer, "Agency timeout (default #{options[:timeout]})") { |t| options[:interval] = t }
112
+ opts.on("--host <s>", String, "Riemann host (default #{options[:host]})") { |v| options[:host] = v }
113
+ opts.on("--port <i>", Integer, "Riemann port (default #{options[:port]})") { |v| options[:port] = v }
114
+ opts.on("--tags <tag1,tag1,...>", Array, "Tags to add to the alert") { |t| options[:tags] = t }
115
+ opts.on("--agents <agent1,agent2,...>", Array, "Agents to monitor") { |v| options[:agents] + v }
116
+ opts.on("--agent-group <group name>>", Array, "The name of an 'agent group' to monitor") do |g|
117
+ options[:agents] += g.map { |g| agent_group(g) }.flatten
118
+ end
119
+
120
+ opts.separator "\n"
121
+ end
122
+
123
+ parser.parse!
124
+ end
101
125
  end
102
126
  end
103
127
  end
104
128
 
105
- Smith::Riemann.run
129
+ checker = Smith::Riemann.new
130
+
131
+ Smith.compile_acls
132
+ Smith.start do
133
+ checker.run
134
+ end
metadata CHANGED
@@ -1,100 +1,86 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: riemann-smith
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Heycock
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-17 00:00:00.000000000 Z
11
+ date: 2015-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: riemann-tools
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: smith
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - "~>"
32
18
  - !ruby/object:Gem::Version
33
- version: '0.6'
19
+ version: '0.7'
34
20
  type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '0.6'
26
+ version: '0.7'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: sys-proctable
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - ">="
31
+ - - "~>"
46
32
  - !ruby/object:Gem::Version
47
- version: '0'
33
+ version: '0.9'
48
34
  type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - ">="
38
+ - - "~>"
53
39
  - !ruby/object:Gem::Version
54
- version: '0'
40
+ version: '0.9'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: yard
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - ">="
45
+ - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: 0.8.7
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - ">="
52
+ - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 0.8.7
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: bundler
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - ">="
59
+ - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '0'
61
+ version: '1'
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - ">="
66
+ - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '0'
68
+ version: '1'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: rake
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - ">="
73
+ - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '0'
75
+ version: '10'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - ">="
80
+ - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '0'
97
- description:
82
+ version: '10'
83
+ description: Integrate smith agent monitoring with Riemann
98
84
  email: rgh@digivizer.com
99
85
  executables:
100
86
  - queue-lengths