riemann-smith 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/bin/riemann-smith +105 -76
  3. metadata +21 -35
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 599a249192095c3c61053316cb92d3768a1d49df
4
- data.tar.gz: 6daf55d263d32f3f222a6d397e9ab7155ed45e0c
3
+ metadata.gz: abe727427108fb93a1597b67f52c52a41150aa9c
4
+ data.tar.gz: 0ef96af51a6fcd3a78703e42a9aac5edbbde8251
5
5
  SHA512:
6
- metadata.gz: 3e7049aa2f7d3c41240ff7c482aaff2515ee4611d6666a813f0f9ab2ef56c2bd26af15eb4d5bbacf006fb220b3f68da723c6452b7dc34adf3915a966664d978a
7
- data.tar.gz: 53bbeea669d614a7eede325e48fe923272f58c74ffb9c4427517b1c24847616d7b2324488bea931dafb035a0dc4b2e489cfbe682fa5e97a42bcdc71cbe8dd479
6
+ metadata.gz: 28f120246ee14e7e4e72b694a87159b94e81239daee8370179273df96d6bc123688f888d8d9fad8f75c91d878866052fa8c791f64a072c73c1708383bf979042
7
+ data.tar.gz: 5c9fc151c4aced5fe8c9cdee03dcb13d25a2ecf7a562d0b706a5de1739e6d5eb80aa897612cb144ced2357004f807b13ef88b89525b083f5a6422e2d430b4c2a
data/bin/riemann-smith CHANGED
@@ -1,89 +1,67 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'riemann/tools'
3
+ require 'pp'
4
+ require 'riemann'
4
5
  require 'sys/proctable'
6
+
5
7
  require 'smith'
6
8
  require 'smith/commands/common'
7
9
 
10
+
8
11
  class Smith::Riemann
9
- include Riemann::Tools
12
+
10
13
  include Smith::Commands::Common
14
+ include Smith::Logger
11
15
 
12
- opt :agency_timeout, "Max time (in seconds) to wait for the agency to respond", type: :int, default: 60
13
- opt :service_name, "The name to associate with this health check", type: :string, default: "smith"
14
- opt :agents, "Agents to monitor", type: :strings
15
- opt :agent_group, "The name of an 'agent group' to monitor", type: :strings
16
+ RIEMANN_SERVICE_NAME = "smith"
16
17
 
17
18
  def initialize
18
- options[:agents] ||= []
19
+ log_level :info
20
+ @riemann = Riemann::Client.new(:host => options[:host], :port => options[:port])
21
+ end
19
22
 
20
- if options[:agent_group]
21
- options[:agents] += options[:agent_group].map { |g| agent_group(g) }.flatten
22
- options[:agents].uniq!
23
- end
23
+ def run
24
+ Smith::Messaging::Sender.new(Smith::QueueDefinitions::Agency_control) do |queue|
25
+ on_check { |check| @riemann << check }
24
26
 
25
- Smith.compile_acls
27
+ EM.add_periodic_timer(options[:interval]) { run_check(queue) }
28
+ end
26
29
  end
27
30
 
28
- def tick
29
- Smith.start do
30
- Smith::Messaging::Sender.new(Smith::QueueDefinitions::Agency_control) do |queue|
31
- queue.on_timeout(options[:agency_timeout]) do |message_id|
32
- report(
33
- :host => options[:event_host],
34
- :service => "#{options[:service_name]}.agency",
35
- :state => "critical",
36
- :description => "Failed to respond in a timely fashion"
37
- )
38
- Smith.stop
39
- return
40
- end
31
+ def run_check(queue)
32
+ queue.on_timeout(options[:timeout]) do |message_id|
33
+ @on_check.call(alert(:agency, :timeout, "Agency timeout."))
34
+ end
41
35
 
42
- # TODO remove this when smith randomises the reply queue name (probably > 0.7.2).
43
- reply_queue = "riemann.#{Digest::SHA1.hexdigest(Time.now.to_s + $$.to_s)}"
44
-
45
- queue.on_reply(:auto_ack => true, :reply_queue_name => reply_queue) do |reply_payload, r|
46
- results = parse_response(reply_payload.response)
47
-
48
- options[:agents].each do |agent|
49
- if results.has_key?(agent) && results[agent].any? { |a| a[:state] == "running" && Sys::ProcTable.ps(a[:pid].to_i) }
50
- $stderr.puts "#{agent}: ok"
51
- report(
52
- :host => options[:event_host],
53
- :service => "#{options[:service_name]}.#{agent}",
54
- :state => "ok",
55
- :description => "Agent is running"
56
- )
57
- else
58
- $stderr.puts "#{agent}: critical"
59
- report(
60
- :host => options[:event_host],
61
- :service => "#{options[:service_name]}.#{agent}",
62
- :state => "critical",
63
- :description => "Agent is not running"
64
- )
65
- end
66
- end
36
+ queue.on_reply(:auto_ack => true, :reply_queue_name => "riemann.#{Digest::SHA1.hexdigest($$.to_s)}") do |reply_payload, r|
37
+ check(reply_payload.response).each do |agent|
38
+ @on_check.call(agent)
39
+ end
40
+ end
67
41
 
68
- Smith.stop
69
- return
70
- end
42
+ queue.consumer_count do |count|
43
+ if count > 0
44
+ queue.publish(Smith::ACL::AgencyCommand.new(:command => 'list', :args => ['-a', '-l']))
45
+ else
46
+ @on_check.call(alert(:agency, :critical, "Agency not running."))
47
+ end
48
+ end
49
+ end
71
50
 
72
- queue.consumer_count do |count|
73
- if count == 0
74
- report(
75
- :host => options[:event_host],
76
- :service => "#{options[:service_name]}.agency",
77
- :state => "critical",
78
- :description => "Not running"
79
- )
80
-
81
- Smith.stop
82
- return
83
- else
84
- queue.publish(Smith::ACL::AgencyCommand.new(:command => 'list', :args => ['-a', '-l']))
85
- end
86
- end
51
+ def on_check(&blk)
52
+ @on_check = blk
53
+ end
54
+
55
+ # Checks that the list of agents return from the list command matches
56
+ # the list of know agents and composes an appropriate alert.
57
+ def check(response)
58
+ agents = parse_response(response)
59
+
60
+ options[:agents].map do |agent_name|
61
+ if agents.has_key?(agent_name) && agents[agent_name].any? { |a| a[:state] == "running" && running?(a[:pid]) }
62
+ alert(agent_name, :running, "Agent running")
63
+ else
64
+ alert(agent_name, :critical, "Agent not running")
87
65
  end
88
66
  end
89
67
  end
@@ -91,15 +69,66 @@ class Smith::Riemann
91
69
  # Returns a Nested hash representing the running agents.
92
70
  def parse_response(response)
93
71
  split_response = response.split(/\n/).map(&:strip)
94
- ((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |entry, h|
95
- fields = entry.split(/\s +/)
96
- h[fields.last] << {
97
- :state => fields[0],
98
- :uuid => fields[1],
99
- :pid => fields[2]
100
- }
72
+ ((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |e, acc|
73
+ fields = e.split(/\s +/)
74
+ acc[fields.last] << { :state => fields[0], :uuid => fields[1], :pid => fields[2], :description => fields.last }
75
+ end
76
+ end
77
+
78
+ # Construct an alert messages
79
+ def alert(service, state, description)
80
+ {:tags => options[:tags], :service => service(service), :state => state(state), :description => description}.tap { |a| puts "#{a[:service]}: #{state(state)}" }
81
+ end
82
+
83
+ # Return true if the given pid exists in the process table.
84
+ def running?(pid)
85
+ !(pid.nil? || Sys::ProcTable.ps(pid.to_i).nil?)
86
+ end
87
+
88
+ # Return the service name
89
+ def service(name)
90
+ "#{RIEMANN_SERVICE_NAME}:#{name}"
91
+ end
92
+
93
+ def state(state)
94
+ {:timeout => "timeout", :running => "ok", :critical => "critical"}[state]
95
+ end
96
+
97
+ def options
98
+ @options ||= begin
99
+ OptionParser.accept(Pathname) {|p,| Pathname.new(p) if p}
100
+
101
+ defaults = {:interval => 30, :timeout => 10, :agents => [], :tags => [], :host => 'localhost', :port => 5555, :agency_timeout => 60}
102
+ defaults.tap do |options|
103
+ parser = OptionParser.new do |opts|
104
+ opts.separator "\n"
105
+ opts.set_summary_indent " "
106
+
107
+ opts.banner = "\nUsage: #{opts.program_name} OPTIONS"
108
+ opts.on_head "\n Periodically lists the running agents and sends the result to riemann."
109
+
110
+ opts.on("--interval <i>", Integer, "Polling interval in seconds (default #{options[:interval]})") { |t| options[:interval] = t }
111
+ opts.on("--timeout <i>", Integer, "Agency timeout (default #{options[:timeout]})") { |t| options[:interval] = t }
112
+ opts.on("--host <s>", String, "Riemann host (default #{options[:host]})") { |v| options[:host] = v }
113
+ opts.on("--port <i>", Integer, "Riemann port (default #{options[:port]})") { |v| options[:port] = v }
114
+ opts.on("--tags <tag1,tag1,...>", Array, "Tags to add to the alert") { |t| options[:tags] = t }
115
+ opts.on("--agents <agent1,agent2,...>", Array, "Agents to monitor") { |v| options[:agents] + v }
116
+ opts.on("--agent-group <group name>>", Array, "The name of an 'agent group' to monitor") do |g|
117
+ options[:agents] += g.map { |g| agent_group(g) }.flatten
118
+ end
119
+
120
+ opts.separator "\n"
121
+ end
122
+
123
+ parser.parse!
124
+ end
101
125
  end
102
126
  end
103
127
  end
104
128
 
105
- Smith::Riemann.run
129
+ checker = Smith::Riemann.new
130
+
131
+ Smith.compile_acls
132
+ Smith.start do
133
+ checker.run
134
+ end
metadata CHANGED
@@ -1,100 +1,86 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: riemann-smith
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Heycock
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-17 00:00:00.000000000 Z
11
+ date: 2015-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: riemann-tools
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - ">="
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - ">="
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: smith
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
17
  - - "~>"
32
18
  - !ruby/object:Gem::Version
33
- version: '0.6'
19
+ version: '0.7'
34
20
  type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
24
  - - "~>"
39
25
  - !ruby/object:Gem::Version
40
- version: '0.6'
26
+ version: '0.7'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: sys-proctable
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - ">="
31
+ - - "~>"
46
32
  - !ruby/object:Gem::Version
47
- version: '0'
33
+ version: '0.9'
48
34
  type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - ">="
38
+ - - "~>"
53
39
  - !ruby/object:Gem::Version
54
- version: '0'
40
+ version: '0.9'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: yard
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - ">="
45
+ - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: 0.8.7
62
48
  type: :development
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - ">="
52
+ - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 0.8.7
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: bundler
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - ">="
59
+ - - "~>"
74
60
  - !ruby/object:Gem::Version
75
- version: '0'
61
+ version: '1'
76
62
  type: :development
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - ">="
66
+ - - "~>"
81
67
  - !ruby/object:Gem::Version
82
- version: '0'
68
+ version: '1'
83
69
  - !ruby/object:Gem::Dependency
84
70
  name: rake
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
- - - ">="
73
+ - - "~>"
88
74
  - !ruby/object:Gem::Version
89
- version: '0'
75
+ version: '10'
90
76
  type: :development
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
- - - ">="
80
+ - - "~>"
95
81
  - !ruby/object:Gem::Version
96
- version: '0'
97
- description:
82
+ version: '10'
83
+ description: Integrate smith agent monitoring with Riemann
98
84
  email: rgh@digivizer.com
99
85
  executables:
100
86
  - queue-lengths