rubix 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +262 -0
- data/VERSION +1 -0
- data/bin/zabbix_api +60 -0
- data/bin/zabbix_pipe +77 -0
- data/lib/rubix.rb +42 -0
- data/lib/rubix/connection.rb +111 -0
- data/lib/rubix/examples/es_monitor.rb +130 -0
- data/lib/rubix/examples/hbase_monitor.rb +87 -0
- data/lib/rubix/examples/mongo_monitor.rb +125 -0
- data/lib/rubix/log.rb +70 -0
- data/lib/rubix/model.rb +56 -0
- data/lib/rubix/models/application.rb +76 -0
- data/lib/rubix/models/host.rb +127 -0
- data/lib/rubix/models/host_group.rb +74 -0
- data/lib/rubix/models/item.rb +122 -0
- data/lib/rubix/models/template.rb +81 -0
- data/lib/rubix/monitor.rb +167 -0
- data/lib/rubix/monitors/chef_monitor.rb +82 -0
- data/lib/rubix/monitors/cluster_monitor.rb +84 -0
- data/lib/rubix/response.rb +124 -0
- data/lib/rubix/sender.rb +301 -0
- data/spec/rubix/connection_spec.rb +43 -0
- data/spec/rubix/models/host_group_spec.rb +56 -0
- data/spec/rubix/monitor_spec.rb +81 -0
- data/spec/rubix/monitors/chef_monitor_spec.rb +11 -0
- data/spec/rubix/monitors/cluster_monitor_spec.rb +11 -0
- data/spec/rubix/response_spec.rb +35 -0
- data/spec/rubix/sender_spec.rb +9 -0
- data/spec/spec_helper.rb +14 -0
- data/spec/support/response_helper.rb +17 -0
- metadata +140 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module Rubix
|
2
|
+
|
3
|
+
class Template < Model
|
4
|
+
|
5
|
+
attr_accessor :name, :host_ids
|
6
|
+
|
7
|
+
def initialize properties={}
|
8
|
+
super(properties)
|
9
|
+
@name = properties[:name]
|
10
|
+
end
|
11
|
+
|
12
|
+
def log_name
|
13
|
+
"TEMPLATE #{name || id}"
|
14
|
+
end
|
15
|
+
|
16
|
+
def register
|
17
|
+
exists? ? update : create
|
18
|
+
end
|
19
|
+
|
20
|
+
def unregister
|
21
|
+
destroy if exists?
|
22
|
+
end
|
23
|
+
|
24
|
+
def load
|
25
|
+
response = request('template.get', 'filter' => {'templateid' => id, 'name' => name}, 'select_hosts' => 'refer', 'output' => 'extend')
|
26
|
+
case
|
27
|
+
when response.has_data?
|
28
|
+
@id = response.first['templateid'].to_i
|
29
|
+
@name = response.first['name']
|
30
|
+
@host_ids = response.first['hosts'].map { |host_info| host_info['hostid'].to_i }
|
31
|
+
@loaded = true
|
32
|
+
@exists = true
|
33
|
+
when response.success?
|
34
|
+
@exists = false
|
35
|
+
@loaded = true
|
36
|
+
else
|
37
|
+
error("Could not load: #{response.error_messaage}")
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def create
|
42
|
+
response = request('template.create', [{'name' => name}])
|
43
|
+
if response.has_data?
|
44
|
+
@id = response['templateids'].first.to_i
|
45
|
+
@exists = true
|
46
|
+
info("Created")
|
47
|
+
else
|
48
|
+
error("Could not create: #{response.error_message}.")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def update
|
53
|
+
# noop
|
54
|
+
info("Updated")
|
55
|
+
end
|
56
|
+
|
57
|
+
def destroy
|
58
|
+
response = request('template.delete', [{'templateid' => id}])
|
59
|
+
case
|
60
|
+
when response.has_data? && response['templateids'].first.to_i == id
|
61
|
+
info("Deleted")
|
62
|
+
when response.zabbix_error? && response.error_message =~ /does not exist/i
|
63
|
+
# was never there...
|
64
|
+
else
|
65
|
+
error("Could not delete: #{response.error_message}")
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def contains? host
|
70
|
+
return unless exists?
|
71
|
+
host_ids.include?(host.id)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.find_or_create_by_name name
|
75
|
+
new(:name => name).tap do |group|
|
76
|
+
group.create unless group.exists?
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'configliere'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module Rubix
|
5
|
+
|
6
|
+
# A generic monitor class for constructing Zabbix monitors.
|
7
|
+
#
|
8
|
+
# This class handles the low-level logic of sleeping, waking up, and
|
9
|
+
# sending data to Zabbix.
|
10
|
+
#
|
11
|
+
# It's up to a subclass to determine how to make a measurement.
|
12
|
+
#
|
13
|
+
# Here's an example of a script which measures the uptime of the
|
14
|
+
# current machine.
|
15
|
+
#
|
16
|
+
# #!/usr/bin/env ruby
|
17
|
+
# # in uptime_monitor
|
18
|
+
# class UptimeMonitor < Rubix::Monitor
|
19
|
+
#
|
20
|
+
# def measure
|
21
|
+
# return unless `uptime`.chomp =~ /(\d+) days/
|
22
|
+
# write do |data|
|
23
|
+
# data << ([['uptime', $1.to_i]])
|
24
|
+
# end
|
25
|
+
# end
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# UptimeMonitor.run if $0 == __FILE__
|
29
|
+
#
|
30
|
+
# See what the script measures by running it directly.
|
31
|
+
#
|
32
|
+
# $ ./uptime_monitor
|
33
|
+
#
|
34
|
+
# Or have it send its output to another file or FIFO
|
35
|
+
#
|
36
|
+
# $ ./uptime_monitor /path/to/some/file
|
37
|
+
#
|
38
|
+
# Or have it loop every 30 seconds
|
39
|
+
#
|
40
|
+
# $ ./uptime_monitor --loop=30 /path/to/some/file &
|
41
|
+
class Monitor
|
42
|
+
|
43
|
+
#
|
44
|
+
# Class-level settings and a function to run a monito
|
45
|
+
#
|
46
|
+
|
47
|
+
def self.default_settings
|
48
|
+
Configliere::Param.new.tap do |s|
|
49
|
+
s.use :commandline
|
50
|
+
|
51
|
+
s.define :loop, :description => "Run every this many seconds", :required => false, :type => Integer
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.run
|
56
|
+
settings = default_settings
|
57
|
+
begin
|
58
|
+
settings.resolve!
|
59
|
+
rescue => e
|
60
|
+
puts e.message
|
61
|
+
exit(1)
|
62
|
+
end
|
63
|
+
new(settings).run
|
64
|
+
end
|
65
|
+
|
66
|
+
#
|
67
|
+
# Instance-level settings that provide logic for running once or
|
68
|
+
# looping.
|
69
|
+
#
|
70
|
+
|
71
|
+
attr_reader :settings
|
72
|
+
|
73
|
+
def initialize settings
|
74
|
+
@settings = settings
|
75
|
+
end
|
76
|
+
|
77
|
+
def loop?
|
78
|
+
loop_period > 0
|
79
|
+
end
|
80
|
+
|
81
|
+
def loop_period
|
82
|
+
settings[:loop].to_i
|
83
|
+
end
|
84
|
+
|
85
|
+
def run
|
86
|
+
begin
|
87
|
+
if loop?
|
88
|
+
while true
|
89
|
+
measure
|
90
|
+
output.flush if output
|
91
|
+
sleep loop_period
|
92
|
+
end
|
93
|
+
else
|
94
|
+
measure
|
95
|
+
end
|
96
|
+
ensure
|
97
|
+
close
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def measure
|
102
|
+
raise NotImplementedError.new("Override the 'measure' method in a subclass to conduct a measurement.")
|
103
|
+
end
|
104
|
+
|
105
|
+
#
|
106
|
+
# Methods for writing data to Zabbix.
|
107
|
+
#
|
108
|
+
|
109
|
+
def write options={}, &block
|
110
|
+
return unless output
|
111
|
+
data = []
|
112
|
+
block.call(data) if block_given?
|
113
|
+
text = {
|
114
|
+
:data => data.map do |measurement|
|
115
|
+
key, value = measurement
|
116
|
+
{ :key => key, :value => value }
|
117
|
+
end
|
118
|
+
}.merge(options).to_json
|
119
|
+
|
120
|
+
begin
|
121
|
+
output.puts(text)
|
122
|
+
rescue Errno::ENXIO
|
123
|
+
# FIFO's reader isn't alive...
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def output_path
|
128
|
+
settings.rest.first
|
129
|
+
end
|
130
|
+
|
131
|
+
def stdout?
|
132
|
+
output_path.nil?
|
133
|
+
end
|
134
|
+
|
135
|
+
def file?
|
136
|
+
!stdout? && (!File.exist?(output_path) || File.ftype(output_path) == 'file')
|
137
|
+
end
|
138
|
+
|
139
|
+
def fifo?
|
140
|
+
!stdout? && File.exist?(output_path) && File.ftype(output_path) == 'fifo'
|
141
|
+
end
|
142
|
+
|
143
|
+
def output
|
144
|
+
return @output if @output
|
145
|
+
case
|
146
|
+
when stdout?
|
147
|
+
@output = $stdout
|
148
|
+
when fifo?
|
149
|
+
begin
|
150
|
+
@output = open(output_path, (File::WRONLY | File::NONBLOCK))
|
151
|
+
rescue Errno::ENXIO
|
152
|
+
# FIFO's reader isn't alive...
|
153
|
+
end
|
154
|
+
else
|
155
|
+
@output = File.open(output_path, 'a')
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def close
|
160
|
+
return unless output
|
161
|
+
output.flush
|
162
|
+
return if stdout?
|
163
|
+
output.close
|
164
|
+
end
|
165
|
+
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Rubix
|
2
|
+
# A generic monitor class for constructing Zabbix monitors that need
|
3
|
+
# to talk to Chef servers.
|
4
|
+
#
|
5
|
+
# This class handles the low-level logic of connecting to Chef and
|
6
|
+
# parsing results from searches.
|
7
|
+
#
|
8
|
+
# It's still up to a subclass to determine how to make a measurement.
|
9
|
+
#
|
10
|
+
# Here's an example of a script which checks the availibility of a web
|
11
|
+
# server at the EC2 public hostname of the Chef node 'webserver'.
|
12
|
+
#
|
13
|
+
# #!/usr/bin/env ruby
|
14
|
+
# # in webserver_monitor
|
15
|
+
#
|
16
|
+
# require 'net/http'
|
17
|
+
#
|
18
|
+
# class WebserverMonitor < Rubix::ChefMonitor
|
19
|
+
#
|
20
|
+
# def measure
|
21
|
+
# webserver = chef_node_from_node_name('webserver')
|
22
|
+
# begin
|
23
|
+
# if Net::HTTP.get_response(URI.parse("http://#{webserver['ec2']['public_hostname']}")).code.to_i == 200
|
24
|
+
# write do |data|
|
25
|
+
# data << ['webserver.available', 1]
|
26
|
+
# end
|
27
|
+
# return
|
28
|
+
# end
|
29
|
+
# rescue => e
|
30
|
+
# end
|
31
|
+
# write do |data|
|
32
|
+
# data << ([['webserver.available', 0]])
|
33
|
+
# end
|
34
|
+
# end
|
35
|
+
# end
|
36
|
+
#
|
37
|
+
# WebserverMonitor.run if $0 == __FILE__
|
38
|
+
#
|
39
|
+
# See documentation for Rubix::Monitor to understand how to run this
|
40
|
+
# script.
|
41
|
+
class ChefMonitor < Monitor
|
42
|
+
|
43
|
+
def self.default_settings
|
44
|
+
super().tap do |s|
|
45
|
+
s.define :chef_server_url, :description => "Chef server URL" , :required => true
|
46
|
+
s.define :chef_node_name, :description => "Node name to identify to Chef server", :required => true
|
47
|
+
s.define :chef_client_key, :description => "Path to Chef client private key", :required => true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def initialize settings
|
52
|
+
super(settings)
|
53
|
+
set_chef_credentials
|
54
|
+
end
|
55
|
+
|
56
|
+
def set_chef_credentials
|
57
|
+
require 'chef'
|
58
|
+
Chef::Config[:chef_server_url] = settings[:chef_server_url]
|
59
|
+
Chef::Config[:node_name] = settings[:chef_node_name]
|
60
|
+
Chef::Config[:client_key] = settings[:chef_client_key]
|
61
|
+
end
|
62
|
+
|
63
|
+
def search_nodes *args
|
64
|
+
Chef::Search::Query.new.search('node', *args)
|
65
|
+
end
|
66
|
+
|
67
|
+
def chef_node_from_node_name node_name
|
68
|
+
return if node_name.nil? || node_name.empty?
|
69
|
+
results = search_nodes("name:#{node_name}")
|
70
|
+
return unless results.first.size > 0
|
71
|
+
results.first.first
|
72
|
+
end
|
73
|
+
|
74
|
+
def chef_node_name_from_ip ip
|
75
|
+
return if ip.nil? || ip.empty?
|
76
|
+
results = search_nodes("ipaddress:#{ip} OR fqdn:#{ip}")
|
77
|
+
return unless results.first.size > 0
|
78
|
+
results.first.first['node_name']
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Rubix
|
2
|
+
|
3
|
+
# A generic monitor class for constructing Zabbix monitors that
|
4
|
+
# monitor whole clusters.
|
5
|
+
#
|
6
|
+
# This class handles the low-level logic of finding a set of nodes and
|
7
|
+
# then grouping them by cluster.
|
8
|
+
#
|
9
|
+
# It's still up to a subclass to determine how to make a measurement
|
10
|
+
# on the cluster.
|
11
|
+
#
|
12
|
+
# Here's an example of a script which finds the average uptime of
|
13
|
+
# nodes a value of 'bar' set for property 'foo', grouped by cluster.
|
14
|
+
#
|
15
|
+
# #!/usr/bin/env ruby
|
16
|
+
# # in cluster_uptime_monitor
|
17
|
+
#
|
18
|
+
# class ClusterUptimeMonitor < Rubix::ClusterMonitor
|
19
|
+
#
|
20
|
+
# def node_query
|
21
|
+
# 'role:nginx'
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# def measure_cluster cluster_name
|
25
|
+
# total_seconds = nodes_by_cluster[cluster_name].inject(0.0) do |sum, node|
|
26
|
+
# sum += node['uptime_seconds']
|
27
|
+
# end
|
28
|
+
# average_uptime = total_seconds.to_f / nodes_by_cluster[cluster_name].size.to_f
|
29
|
+
# write(:hostname => 'cluster_name') do |data|
|
30
|
+
# data << ['uptime.average', average_uptime]
|
31
|
+
# end
|
32
|
+
# end
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# ClusterUptimeMonitor.run if $0 == __FILE__
|
36
|
+
#
|
37
|
+
# See documentation for Rubix::Monitor to understand how to run this
|
38
|
+
# script.
|
39
|
+
class ClusterMonitor < ChefMonitor
|
40
|
+
|
41
|
+
attr_reader :private_ips_by_cluster, :nodes_by_cluster
|
42
|
+
|
43
|
+
def initialize settings
|
44
|
+
super(settings)
|
45
|
+
group_nodes_by_cluster
|
46
|
+
end
|
47
|
+
|
48
|
+
def node_query
|
49
|
+
''
|
50
|
+
end
|
51
|
+
|
52
|
+
def matching_chef_nodes
|
53
|
+
search_nodes(node_query)
|
54
|
+
end
|
55
|
+
|
56
|
+
def group_nodes_by_cluster
|
57
|
+
@private_ips_by_cluster = {}
|
58
|
+
@nodes_by_cluster = {}
|
59
|
+
matching_chef_nodes.first.each do |node|
|
60
|
+
@nodes_by_cluster[node['cluster_name']] ||= []
|
61
|
+
@nodes_by_cluster[node['cluster_name']] << node
|
62
|
+
|
63
|
+
@private_ips_by_cluster[node['cluster_name']] ||= []
|
64
|
+
@private_ips_by_cluster[node['cluster_name']] << node['ipaddress']
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def clusters
|
69
|
+
private_ips_by_cluster.keys
|
70
|
+
end
|
71
|
+
|
72
|
+
def measure
|
73
|
+
clusters.each do |cluster_name|
|
74
|
+
measure_cluster(cluster_name)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def measure_cluster cluster_name
|
79
|
+
raise NotImplementedError.new("Override the 'measure_cluster' method to make measurements of a given cluster.")
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,124 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Rubix
|
4
|
+
|
5
|
+
class Response
|
6
|
+
|
7
|
+
attr_reader :http_response, :code, :body
|
8
|
+
|
9
|
+
def initialize(http_response)
|
10
|
+
@http_response = http_response
|
11
|
+
@body = http_response.body
|
12
|
+
@code = http_response.code.to_i
|
13
|
+
end
|
14
|
+
|
15
|
+
#
|
16
|
+
# Parsing
|
17
|
+
#
|
18
|
+
|
19
|
+
def parsed
|
20
|
+
return @parsed if @parsed
|
21
|
+
if non_200?
|
22
|
+
@parsed = {}
|
23
|
+
else
|
24
|
+
begin
|
25
|
+
@parsed = JSON.parse(@body) if @code == 200
|
26
|
+
rescue JSON::ParserError => e
|
27
|
+
@parsed = {}
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
#
|
33
|
+
# Error Handling
|
34
|
+
#
|
35
|
+
|
36
|
+
def non_200?
|
37
|
+
code != 200
|
38
|
+
end
|
39
|
+
|
40
|
+
def error?
|
41
|
+
non_200? || (parsed.is_a?(Hash) && parsed['error'])
|
42
|
+
end
|
43
|
+
|
44
|
+
def zabbix_error?
|
45
|
+
code == 200 && error?
|
46
|
+
end
|
47
|
+
|
48
|
+
def error_code
|
49
|
+
return unless error?
|
50
|
+
(non_200? ? code : parsed['error']['code'].to_i) rescue 0
|
51
|
+
end
|
52
|
+
|
53
|
+
def error_type
|
54
|
+
return unless error?
|
55
|
+
(non_200? ? "Non-200 Error" : parsed['error']['message']) rescue 'Unknown Error'
|
56
|
+
end
|
57
|
+
|
58
|
+
def error_message
|
59
|
+
return unless error?
|
60
|
+
begin
|
61
|
+
if non_200?
|
62
|
+
"Could not get a 200 response from the Zabbix API. Further details are unavailable."
|
63
|
+
else
|
64
|
+
stripped_message = (parsed['error']['message'] || '').gsub(/\.$/, '')
|
65
|
+
stripped_data = (parsed['error']['data'] || '').gsub(/^\[.*?\] /, '')
|
66
|
+
[stripped_message, stripped_data].map(&:strip).reject(&:empty?).join(', ')
|
67
|
+
end
|
68
|
+
rescue => e
|
69
|
+
"No details available."
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def success?
|
74
|
+
!error?
|
75
|
+
end
|
76
|
+
|
77
|
+
#
|
78
|
+
# Inspecting contents
|
79
|
+
#
|
80
|
+
|
81
|
+
def result
|
82
|
+
parsed['result']
|
83
|
+
end
|
84
|
+
|
85
|
+
def [] key
|
86
|
+
return if error?
|
87
|
+
result[key]
|
88
|
+
end
|
89
|
+
|
90
|
+
def first
|
91
|
+
return if error?
|
92
|
+
result.first
|
93
|
+
end
|
94
|
+
|
95
|
+
def empty?
|
96
|
+
result.empty?
|
97
|
+
end
|
98
|
+
|
99
|
+
def has_data?
|
100
|
+
success? && (!empty?)
|
101
|
+
end
|
102
|
+
|
103
|
+
def hash?
|
104
|
+
return false if error?
|
105
|
+
result.is_a?(Hash) && result.size > 0 && result.first.last
|
106
|
+
end
|
107
|
+
|
108
|
+
def array?
|
109
|
+
return false if error?
|
110
|
+
result.is_a?(Array) && result.size > 0 && result.first
|
111
|
+
end
|
112
|
+
|
113
|
+
def string?
|
114
|
+
return false if error?
|
115
|
+
result.is_a?(String) && result.size > 0
|
116
|
+
end
|
117
|
+
|
118
|
+
def boolean?
|
119
|
+
return false if error?
|
120
|
+
result == true || result == false
|
121
|
+
end
|
122
|
+
|
123
|
+
end
|
124
|
+
end
|