riemann-smith 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/riemann-smith +105 -76
- metadata +21 -35
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: abe727427108fb93a1597b67f52c52a41150aa9c
|
4
|
+
data.tar.gz: 0ef96af51a6fcd3a78703e42a9aac5edbbde8251
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28f120246ee14e7e4e72b694a87159b94e81239daee8370179273df96d6bc123688f888d8d9fad8f75c91d878866052fa8c791f64a072c73c1708383bf979042
|
7
|
+
data.tar.gz: 5c9fc151c4aced5fe8c9cdee03dcb13d25a2ecf7a562d0b706a5de1739e6d5eb80aa897612cb144ced2357004f807b13ef88b89525b083f5a6422e2d430b4c2a
|
data/bin/riemann-smith
CHANGED
@@ -1,89 +1,67 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require '
|
3
|
+
require 'pp'
|
4
|
+
require 'riemann'
|
4
5
|
require 'sys/proctable'
|
6
|
+
|
5
7
|
require 'smith'
|
6
8
|
require 'smith/commands/common'
|
7
9
|
|
10
|
+
|
8
11
|
class Smith::Riemann
|
9
|
-
|
12
|
+
|
10
13
|
include Smith::Commands::Common
|
14
|
+
include Smith::Logger
|
11
15
|
|
12
|
-
|
13
|
-
opt :service_name, "The name to associate with this health check", type: :string, default: "smith"
|
14
|
-
opt :agents, "Agents to monitor", type: :strings
|
15
|
-
opt :agent_group, "The name of an 'agent group' to monitor", type: :strings
|
16
|
+
RIEMANN_SERVICE_NAME = "smith"
|
16
17
|
|
17
18
|
def initialize
|
18
|
-
|
19
|
+
log_level :info
|
20
|
+
@riemann = Riemann::Client.new(:host => options[:host], :port => options[:port])
|
21
|
+
end
|
19
22
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
23
|
+
def run
|
24
|
+
Smith::Messaging::Sender.new(Smith::QueueDefinitions::Agency_control) do |queue|
|
25
|
+
on_check { |check| @riemann << check }
|
24
26
|
|
25
|
-
|
27
|
+
EM.add_periodic_timer(options[:interval]) { run_check(queue) }
|
28
|
+
end
|
26
29
|
end
|
27
30
|
|
28
|
-
def
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
report(
|
33
|
-
:host => options[:event_host],
|
34
|
-
:service => "#{options[:service_name]}.agency",
|
35
|
-
:state => "critical",
|
36
|
-
:description => "Failed to respond in a timely fashion"
|
37
|
-
)
|
38
|
-
Smith.stop
|
39
|
-
return
|
40
|
-
end
|
31
|
+
def run_check(queue)
|
32
|
+
queue.on_timeout(options[:timeout]) do |message_id|
|
33
|
+
@on_check.call(alert(:agency, :timeout, "Agency timeout."))
|
34
|
+
end
|
41
35
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
options[:agents].each do |agent|
|
49
|
-
if results.has_key?(agent) && results[agent].any? { |a| a[:state] == "running" && Sys::ProcTable.ps(a[:pid].to_i) }
|
50
|
-
$stderr.puts "#{agent}: ok"
|
51
|
-
report(
|
52
|
-
:host => options[:event_host],
|
53
|
-
:service => "#{options[:service_name]}.#{agent}",
|
54
|
-
:state => "ok",
|
55
|
-
:description => "Agent is running"
|
56
|
-
)
|
57
|
-
else
|
58
|
-
$stderr.puts "#{agent}: critical"
|
59
|
-
report(
|
60
|
-
:host => options[:event_host],
|
61
|
-
:service => "#{options[:service_name]}.#{agent}",
|
62
|
-
:state => "critical",
|
63
|
-
:description => "Agent is not running"
|
64
|
-
)
|
65
|
-
end
|
66
|
-
end
|
36
|
+
queue.on_reply(:auto_ack => true, :reply_queue_name => "riemann.#{Digest::SHA1.hexdigest($$.to_s)}") do |reply_payload, r|
|
37
|
+
check(reply_payload.response).each do |agent|
|
38
|
+
@on_check.call(agent)
|
39
|
+
end
|
40
|
+
end
|
67
41
|
|
68
|
-
|
69
|
-
|
70
|
-
|
42
|
+
queue.consumer_count do |count|
|
43
|
+
if count > 0
|
44
|
+
queue.publish(Smith::ACL::AgencyCommand.new(:command => 'list', :args => ['-a', '-l']))
|
45
|
+
else
|
46
|
+
@on_check.call(alert(:agency, :critical, "Agency not running."))
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
71
50
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
end
|
51
|
+
def on_check(&blk)
|
52
|
+
@on_check = blk
|
53
|
+
end
|
54
|
+
|
55
|
+
# Checks that the list of agents return from the list command matches
|
56
|
+
# the list of know agents and composes an appropriate alert.
|
57
|
+
def check(response)
|
58
|
+
agents = parse_response(response)
|
59
|
+
|
60
|
+
options[:agents].map do |agent_name|
|
61
|
+
if agents.has_key?(agent_name) && agents[agent_name].any? { |a| a[:state] == "running" && running?(a[:pid]) }
|
62
|
+
alert(agent_name, :running, "Agent running")
|
63
|
+
else
|
64
|
+
alert(agent_name, :critical, "Agent not running")
|
87
65
|
end
|
88
66
|
end
|
89
67
|
end
|
@@ -91,15 +69,66 @@ class Smith::Riemann
|
|
91
69
|
# Returns a Nested hash representing the running agents.
|
92
70
|
def parse_response(response)
|
93
71
|
split_response = response.split(/\n/).map(&:strip)
|
94
|
-
((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |
|
95
|
-
fields =
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
72
|
+
((/^total/.match(split_response.first)) ? split_response[1..-1] : []).each_with_object(Hash.new { |h,k| h[k] = []}) do |e, acc|
|
73
|
+
fields = e.split(/\s +/)
|
74
|
+
acc[fields.last] << { :state => fields[0], :uuid => fields[1], :pid => fields[2], :description => fields.last }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# Construct an alert messages
|
79
|
+
def alert(service, state, description)
|
80
|
+
{:tags => options[:tags], :service => service(service), :state => state(state), :description => description}.tap { |a| puts "#{a[:service]}: #{state(state)}" }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Return true if the given pid exists in the process table.
|
84
|
+
def running?(pid)
|
85
|
+
!(pid.nil? || Sys::ProcTable.ps(pid.to_i).nil?)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return the service name
|
89
|
+
def service(name)
|
90
|
+
"#{RIEMANN_SERVICE_NAME}:#{name}"
|
91
|
+
end
|
92
|
+
|
93
|
+
def state(state)
|
94
|
+
{:timeout => "timeout", :running => "ok", :critical => "critical"}[state]
|
95
|
+
end
|
96
|
+
|
97
|
+
def options
|
98
|
+
@options ||= begin
|
99
|
+
OptionParser.accept(Pathname) {|p,| Pathname.new(p) if p}
|
100
|
+
|
101
|
+
defaults = {:interval => 30, :timeout => 10, :agents => [], :tags => [], :host => 'localhost', :port => 5555, :agency_timeout => 60}
|
102
|
+
defaults.tap do |options|
|
103
|
+
parser = OptionParser.new do |opts|
|
104
|
+
opts.separator "\n"
|
105
|
+
opts.set_summary_indent " "
|
106
|
+
|
107
|
+
opts.banner = "\nUsage: #{opts.program_name} OPTIONS"
|
108
|
+
opts.on_head "\n Periodically lists the running agents and sends the result to riemann."
|
109
|
+
|
110
|
+
opts.on("--interval <i>", Integer, "Polling interval in seconds (default #{options[:interval]})") { |t| options[:interval] = t }
|
111
|
+
opts.on("--timeout <i>", Integer, "Agency timeout (default #{options[:timeout]})") { |t| options[:interval] = t }
|
112
|
+
opts.on("--host <s>", String, "Riemann host (default #{options[:host]})") { |v| options[:host] = v }
|
113
|
+
opts.on("--port <i>", Integer, "Riemann port (default #{options[:port]})") { |v| options[:port] = v }
|
114
|
+
opts.on("--tags <tag1,tag1,...>", Array, "Tags to add to the alert") { |t| options[:tags] = t }
|
115
|
+
opts.on("--agents <agent1,agent2,...>", Array, "Agents to monitor") { |v| options[:agents] + v }
|
116
|
+
opts.on("--agent-group <group name>>", Array, "The name of an 'agent group' to monitor") do |g|
|
117
|
+
options[:agents] += g.map { |g| agent_group(g) }.flatten
|
118
|
+
end
|
119
|
+
|
120
|
+
opts.separator "\n"
|
121
|
+
end
|
122
|
+
|
123
|
+
parser.parse!
|
124
|
+
end
|
101
125
|
end
|
102
126
|
end
|
103
127
|
end
|
104
128
|
|
105
|
-
Smith::Riemann.
|
129
|
+
checker = Smith::Riemann.new
|
130
|
+
|
131
|
+
Smith.compile_acls
|
132
|
+
Smith.start do
|
133
|
+
checker.run
|
134
|
+
end
|
metadata
CHANGED
@@ -1,100 +1,86 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: riemann-smith
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Heycock
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: riemann-tools
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: smith
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
30
16
|
requirements:
|
31
17
|
- - "~>"
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version: '0.
|
19
|
+
version: '0.7'
|
34
20
|
type: :runtime
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
24
|
- - "~>"
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version: '0.
|
26
|
+
version: '0.7'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: sys-proctable
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
|
-
- - "
|
31
|
+
- - "~>"
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
33
|
+
version: '0.9'
|
48
34
|
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
|
-
- - "
|
38
|
+
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
40
|
+
version: '0.9'
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
42
|
name: yard
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
|
-
- - "
|
45
|
+
- - "~>"
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
47
|
+
version: 0.8.7
|
62
48
|
type: :development
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- - "
|
52
|
+
- - "~>"
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 0.8.7
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
56
|
name: bundler
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
|
-
- - "
|
59
|
+
- - "~>"
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
61
|
+
version: '1'
|
76
62
|
type: :development
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
|
-
- - "
|
66
|
+
- - "~>"
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
68
|
+
version: '1'
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
70
|
name: rake
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
|
-
- - "
|
73
|
+
- - "~>"
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version: '
|
75
|
+
version: '10'
|
90
76
|
type: :development
|
91
77
|
prerelease: false
|
92
78
|
version_requirements: !ruby/object:Gem::Requirement
|
93
79
|
requirements:
|
94
|
-
- - "
|
80
|
+
- - "~>"
|
95
81
|
- !ruby/object:Gem::Version
|
96
|
-
version: '
|
97
|
-
description:
|
82
|
+
version: '10'
|
83
|
+
description: Integrate smith agent monitoring with Riemann
|
98
84
|
email: rgh@digivizer.com
|
99
85
|
executables:
|
100
86
|
- queue-lengths
|