rubix 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/rubix/examples/simple_chef_monitor.rb +37 -0
- data/lib/rubix/examples/simple_cluster_monitor.rb +42 -0
- data/lib/rubix/examples/{uptime_monitor.rb → simple_uptime_monitor.rb} +0 -0
- data/lib/rubix/examples/simple_zabbix_monitor.rb +39 -0
- data/lib/rubix/models/model.rb +15 -0
- data/lib/rubix/monitors.rb +1 -0
- data/lib/rubix/monitors/chef_monitor.rb +18 -14
- data/lib/rubix/monitors/cluster_monitor.rb +54 -62
- data/lib/rubix/monitors/monitor.rb +5 -4
- data/lib/rubix/monitors/zabbix_monitor.rb +71 -0
- data/spec/requests/host_request_spec.rb +16 -1
- data/spec/rubix/monitors/chef_monitor_spec.rb +51 -2
- data/spec/rubix/monitors/cluster_monitor_spec.rb +25 -23
- data/spec/rubix/monitors/zabbix_monitor_spec.rb +46 -0
- data/spec/support/integration_helper.rb +7 -3
- metadata +10 -8
- data/lib/rubix/examples/es_monitor.rb +0 -134
- data/lib/rubix/examples/hbase_monitor.rb +0 -94
- data/lib/rubix/examples/mongo_monitor.rb +0 -130
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.4.0
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
|
4
|
+
$: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
|
5
|
+
|
6
|
+
require 'rubix'
|
7
|
+
require 'net/http'
|
8
|
+
require 'timeout'
|
9
|
+
|
10
|
+
class HttpAvailabilityMonitor < Rubix::Monitor
|
11
|
+
|
12
|
+
include Rubix::ChefMonitor
|
13
|
+
|
14
|
+
def chef_node
|
15
|
+
begin
|
16
|
+
@chef_node ||= chef_node_from_node_name(Chef::Config[:node_name])
|
17
|
+
rescue => e
|
18
|
+
puts "Could not find a Chef node named #{Chef::Config[:node_name]} -- are you sure your Chef settings are correct?"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def measure
|
23
|
+
begin
|
24
|
+
timeout(1) do
|
25
|
+
if Net::HTTP.get_response(URI.parse("http://#{chef_node['fqdn']}/")).code.to_i == 200
|
26
|
+
write [host.name, 'webserver.available', 1]
|
27
|
+
return
|
28
|
+
end
|
29
|
+
end
|
30
|
+
rescue => e
|
31
|
+
puts e.message
|
32
|
+
end
|
33
|
+
write [host.name, 'webserver.available', 0]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
HttpAvailabilityMonitor.run if $0 == __FILE__
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
|
4
|
+
$: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
|
5
|
+
|
6
|
+
require 'rubix'
|
7
|
+
require 'net/http'
|
8
|
+
require 'timeout'
|
9
|
+
|
10
|
+
class HttpAvailabilityMonitor < Rubix::Monitor
|
11
|
+
|
12
|
+
include Rubix::ZabbixMonitor
|
13
|
+
include Rubix::ClusterMonitor
|
14
|
+
|
15
|
+
def host_group_name
|
16
|
+
'Zabbix servers'
|
17
|
+
end
|
18
|
+
|
19
|
+
def measure_cluster cluster_name
|
20
|
+
hosts_by_cluster[cluster_name].each do |host|
|
21
|
+
measure_host(host)
|
22
|
+
end
|
23
|
+
write [cluster_name, 'something', 1]
|
24
|
+
end
|
25
|
+
|
26
|
+
def measure_host host
|
27
|
+
begin
|
28
|
+
timeout(1) do
|
29
|
+
if Net::HTTP.get_response(URI.parse("http://#{host.ip}/")).code.to_i == 200
|
30
|
+
write [host.name, 'webserver.available', 1]
|
31
|
+
return
|
32
|
+
end
|
33
|
+
end
|
34
|
+
rescue => e
|
35
|
+
puts e.message
|
36
|
+
end
|
37
|
+
write [host.name, 'webserver.available', 0]
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
HttpAvailabilityMonitor.run if $0 == __FILE__
|
File without changes
|
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
|
4
|
+
$: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
|
5
|
+
|
6
|
+
require 'rubix'
|
7
|
+
require 'net/http'
|
8
|
+
require 'timeout'
|
9
|
+
|
10
|
+
class HttpAvailabilityMonitor < Rubix::Monitor
|
11
|
+
|
12
|
+
include Rubix::ZabbixMonitor
|
13
|
+
|
14
|
+
def host_group_name
|
15
|
+
'Zabbix servers'
|
16
|
+
end
|
17
|
+
|
18
|
+
def measure
|
19
|
+
hosts.each do |host|
|
20
|
+
measure_host(host)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def measure_host host
|
25
|
+
begin
|
26
|
+
timeout(1) do
|
27
|
+
if Net::HTTP.get_response(URI.parse("http://#{host.ip}/")).code.to_i == 200
|
28
|
+
write [host.name, 'webserver.available', 1]
|
29
|
+
return
|
30
|
+
end
|
31
|
+
end
|
32
|
+
rescue => e
|
33
|
+
puts e.message
|
34
|
+
end
|
35
|
+
write [host.name, 'webserver.available', 0]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
HttpAvailabilityMonitor.run if $0 == __FILE__
|
data/lib/rubix/models/model.rb
CHANGED
@@ -362,5 +362,20 @@ module Rubix
|
|
362
362
|
end
|
363
363
|
end
|
364
364
|
|
365
|
+
def self.list ids
|
366
|
+
return [] if ids.nil? || ids.empty?
|
367
|
+
response = request("#{zabbix_name}.get", get_params.merge((id_field + 's') => ids))
|
368
|
+
case
|
369
|
+
when response.has_data?
|
370
|
+
response.result.map do |obj|
|
371
|
+
build(obj)
|
372
|
+
end
|
373
|
+
when response.success?
|
374
|
+
[]
|
375
|
+
else
|
376
|
+
error("Error listing Zabbix #{resource_name}s: #{response.error_message}")
|
377
|
+
end
|
378
|
+
end
|
379
|
+
|
365
380
|
end
|
366
381
|
end
|
data/lib/rubix/monitors.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module Rubix
|
2
|
-
|
3
|
-
#
|
2
|
+
|
3
|
+
# A module that lets monitors talk to Chef servers.
|
4
4
|
#
|
5
5
|
# This class handles the low-level logic of connecting to Chef and
|
6
6
|
# parsing results from searches.
|
@@ -15,8 +15,10 @@ module Rubix
|
|
15
15
|
#
|
16
16
|
# require 'net/http'
|
17
17
|
#
|
18
|
-
# class WebserverMonitor < Rubix::
|
18
|
+
# class WebserverMonitor < Rubix::Monitor
|
19
19
|
#
|
20
|
+
# include Rubix::ChefMonitor
|
21
|
+
#
|
20
22
|
# def measure
|
21
23
|
# webserver = chef_node_from_node_name('webserver')
|
22
24
|
# begin
|
@@ -31,16 +33,13 @@ module Rubix
|
|
31
33
|
# end
|
32
34
|
#
|
33
35
|
# WebserverMonitor.run if $0 == __FILE__
|
34
|
-
|
35
|
-
# See documentation for Rubix::Monitor to understand how to run this
|
36
|
-
# script.
|
37
|
-
class ChefMonitor < Monitor
|
36
|
+
module ChefMonitor
|
38
37
|
|
39
|
-
def self.
|
40
|
-
|
41
|
-
s.define :chef_server_url, :description => "Chef server URL" , :required => true
|
42
|
-
s.define :chef_node_name, :description => "Node name to identify to Chef server", :required => true
|
43
|
-
s.define :chef_client_key, :description => "Path to Chef client private key", :required => true
|
38
|
+
def self.included klass
|
39
|
+
klass.default_settings.tap do |s|
|
40
|
+
s.define :chef_server_url, :description => "Chef server URL" , :required => true, :default => 'http://localhost'
|
41
|
+
s.define :chef_node_name, :description => "Node name to identify to Chef server", :required => true, :default => ENV["HOSTNAME"]
|
42
|
+
s.define :chef_client_key, :description => "Path to Chef client private key", :required => true, :default => '/etc/chef/client.pem'
|
44
43
|
end
|
45
44
|
end
|
46
45
|
|
@@ -67,11 +66,16 @@ module Rubix
|
|
67
66
|
results.first.first
|
68
67
|
end
|
69
68
|
|
70
|
-
def
|
69
|
+
def chef_node_from_ip ip
|
71
70
|
return if ip.nil? || ip.empty?
|
72
71
|
results = search_nodes("ipaddress:#{ip} OR fqdn:#{ip}")
|
73
72
|
return unless results.first.size > 0
|
74
|
-
results.first.first
|
73
|
+
results.first.first
|
74
|
+
end
|
75
|
+
|
76
|
+
def chef_node_name_from_ip ip
|
77
|
+
node = chef_node_from_ip(ip)
|
78
|
+
return node['node_name'] if node
|
75
79
|
end
|
76
80
|
|
77
81
|
end
|
@@ -1,93 +1,85 @@
|
|
1
1
|
module Rubix
|
2
2
|
|
3
|
-
# A
|
4
|
-
#
|
3
|
+
# A module for building monitors which measure items for several
|
4
|
+
# hosts in a cluster as well as items for the cluster itself.
|
5
5
|
#
|
6
|
-
# This
|
7
|
-
#
|
6
|
+
# This module assumes that an existing +hosts+ method returns an
|
7
|
+
# Array of Zabbix hosts that can be grouped into clusters.
|
8
8
|
#
|
9
|
-
#
|
10
|
-
# on the cluster.
|
11
|
-
#
|
12
|
-
# Here's an example of a script which finds the average uptime of
|
13
|
-
# nodes a value of 'bar' set for property 'foo', grouped by cluster.
|
9
|
+
# Here's an example:
|
14
10
|
#
|
15
11
|
# #!/usr/bin/env ruby
|
16
|
-
# # in cluster_uptime_monitor
|
17
|
-
#
|
18
|
-
# class ClusterUptimeMonitor < Rubix::ClusterMonitor
|
19
12
|
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
#
|
23
|
-
#
|
13
|
+
# class ClusterPingMonitor < Rubix::Monitor
|
14
|
+
#
|
15
|
+
# include Rubix::ClusterMonitor
|
16
|
+
#
|
24
17
|
# def measure_cluster cluster_name
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
#
|
29
|
-
#
|
30
|
-
# data << ['uptime.average', average_uptime]
|
18
|
+
# total_ping = 0.0
|
19
|
+
# num_hosts = 0
|
20
|
+
# hosts_by_cluster[cluster_name].each do |host|
|
21
|
+
# total_ping += measure_host(host)
|
22
|
+
# num_hosts += 1
|
31
23
|
# end
|
24
|
+
# write [cluster_name, 'average_ping', total_ping / num_hosts] unless num_hosts == 0
|
25
|
+
# end
|
26
|
+
#
|
27
|
+
# def measure_host host
|
28
|
+
# ping = measure_ping_to(host.ip)
|
29
|
+
# write [host.name, 'ping', ping]
|
30
|
+
# ping # return this so the measure_cluster method can use it
|
32
31
|
# end
|
33
32
|
# end
|
34
33
|
#
|
35
|
-
#
|
34
|
+
# ClusterPingMonitor.run if $0 == __FILE__
|
36
35
|
#
|
37
|
-
#
|
38
|
-
#
|
39
|
-
|
36
|
+
# You may want to override the +cluster_name_from_host+ method. By
|
37
|
+
# defaul it assumes that hosts in Zabbix are named
|
38
|
+
# 'cluster-facet-index', a la Ironfan.
|
39
|
+
module ClusterMonitor
|
40
|
+
|
41
|
+
# The name of the default cluster.
|
42
|
+
DEFAULT_CLUSTER = 'All Hosts'
|
43
|
+
|
44
|
+
attr_reader :hosts_by_cluster
|
40
45
|
|
41
|
-
|
46
|
+
def default_cluster
|
47
|
+
::Rubix::ClusterMonitor::DEFAULT_CLUSTER
|
48
|
+
end
|
42
49
|
|
43
50
|
def initialize settings
|
44
51
|
super(settings)
|
45
|
-
|
52
|
+
@hosts_by_cluster = {}
|
53
|
+
group_hosts_by_cluster
|
46
54
|
end
|
47
55
|
|
48
|
-
def
|
49
|
-
|
56
|
+
def measure
|
57
|
+
clusters.each do |cluster_name|
|
58
|
+
measure_cluster(cluster_name)
|
59
|
+
end
|
50
60
|
end
|
51
61
|
|
52
|
-
def
|
53
|
-
|
62
|
+
def group_hosts_by_cluster
|
63
|
+
hosts.each do |host|
|
64
|
+
cluster_name = cluster_name_from_host(host)
|
65
|
+
@hosts_by_cluster[cluster_name] ||= []
|
66
|
+
@hosts_by_cluster[cluster_name] << host
|
67
|
+
end
|
54
68
|
end
|
55
69
|
|
56
|
-
def
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
@nodes_by_cluster[node['cluster_name']] ||= []
|
64
|
-
|
65
|
-
@all_nodes_by_cluster[node['cluster_name']] << node
|
66
|
-
@nodes_by_cluster[node['cluster_name']] << node unless %w[stopped].include?(node['state'])
|
67
|
-
|
68
|
-
|
69
|
-
@all_private_ips_by_cluster[node['cluster_name']] ||= []
|
70
|
-
@private_ips_by_cluster[node['cluster_name']] ||= []
|
71
|
-
|
72
|
-
@all_private_ips_by_cluster[node['cluster_name']] << node['ipaddress']
|
73
|
-
@private_ips_by_cluster[node['cluster_name']] << node['ipaddress'] unless %w[stopped].include?(node['state'])
|
70
|
+
def cluster_name_from_host host
|
71
|
+
return default_cluster if host.name.nil? || host.name.empty?
|
72
|
+
parts = host.name.split("-")
|
73
|
+
if parts.size == 3
|
74
|
+
parts.first
|
75
|
+
else
|
76
|
+
default_cluster
|
74
77
|
end
|
75
78
|
end
|
76
79
|
|
77
80
|
def clusters
|
78
|
-
|
79
|
-
end
|
80
|
-
|
81
|
-
def measure
|
82
|
-
clusters.each do |cluster_name|
|
83
|
-
measure_cluster(cluster_name)
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def measure_cluster cluster_name
|
88
|
-
raise NotImplementedError.new("Override the 'measure_cluster' method to make measurements of a given cluster.")
|
81
|
+
@hosts_by_cluster.keys
|
89
82
|
end
|
90
83
|
|
91
84
|
end
|
92
|
-
|
93
85
|
end
|
@@ -55,7 +55,8 @@ module Rubix
|
|
55
55
|
#
|
56
56
|
|
57
57
|
def self.default_settings
|
58
|
-
Configliere::Param.new.tap do |s|
|
58
|
+
@default_settings ||= Configliere::Param.new.tap do |s|
|
59
|
+
|
59
60
|
s.use :commandline
|
60
61
|
|
61
62
|
s.define :loop, :description => "Run every this many seconds", :required => false, :type => Integer
|
@@ -63,10 +64,10 @@ module Rubix
|
|
63
64
|
# The following options are only used when sending directly
|
64
65
|
# with <tt>zabbix_sender</tt>
|
65
66
|
s.define :server, :description => "IP of a Zabbix server", :required => false, :default => 'localhost'
|
66
|
-
s.define :port, :description => "Port of a Zabbix server", :required => false, :
|
67
|
-
s.define :host, :description => "Name of a Zabbix host", :required => false
|
67
|
+
s.define :port, :description => "Port of a Zabbix server", :required => false, :default => 10051, :type => Integer
|
68
|
+
s.define :host, :description => "Name of a Zabbix host", :required => false, :default => ENV["HOSTNAME"]
|
68
69
|
s.define :config, :description => "Local Zabbix agentd configuration file", :required => false, :default => "/etc/zabbix/zabbix_agentd.conf"
|
69
|
-
s.define :send, :description => "Send data directlyt to Zabbix using 'zabbix_sender'", :required => false, :
|
70
|
+
s.define :send, :description => "Send data directlyt to Zabbix using 'zabbix_sender'", :required => false, :default => false, :type => :boolean
|
70
71
|
end
|
71
72
|
end
|
72
73
|
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module Rubix
|
2
|
+
|
3
|
+
# A module for finding hosts for a monitor from Zabbix templates or
|
4
|
+
# host groups.
|
5
|
+
#
|
6
|
+
# Here's an example of a monitor which makes a measurement of all
|
7
|
+
# hosts with +Template_Foo+ by making a web request to the physical
|
8
|
+
# host.
|
9
|
+
#
|
10
|
+
# #!/usr/bin/env ruby
|
11
|
+
# # in cluster_uptime_monitor
|
12
|
+
#
|
13
|
+
# class FooMonitor < Rubix::Monitor
|
14
|
+
#
|
15
|
+
# include Rubix::ZabbixMonitor
|
16
|
+
#
|
17
|
+
# # Define either 'template' or 'host_group' to select hosts (or
|
18
|
+
# # define 'hosts').
|
19
|
+
# def template
|
20
|
+
# 'Template_Foo'
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# def measure
|
24
|
+
# self.hosts.each do |host|
|
25
|
+
# measure_host(host)
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# def measure_host host
|
30
|
+
# ...
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# FooMonitor.run if $0 == __FILE__
|
34
|
+
module ZabbixMonitor
|
35
|
+
|
36
|
+
attr_accessor :template, :host_group, :hosts
|
37
|
+
|
38
|
+
def self.included klass
|
39
|
+
klass.default_settings.tap do |s|
|
40
|
+
s.define :zabbix_api_url, :description => "Zabbix API URL" , :required => true, :default => 'http://localhost/api_jsonrpc.php'
|
41
|
+
s.define :username, :description => "Username for Zabbix API", :required => true, :default => 'admin'
|
42
|
+
s.define :password, :description => "Password for Zabbix API", :required => true, :default => 'zabbix'
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def initialize settings
|
47
|
+
super(settings)
|
48
|
+
Rubix.connect(settings[:zabbix_api_url], settings[:username], settings[:password])
|
49
|
+
find_hosts
|
50
|
+
end
|
51
|
+
|
52
|
+
def template_name
|
53
|
+
end
|
54
|
+
|
55
|
+
def host_group_name
|
56
|
+
end
|
57
|
+
|
58
|
+
def find_hosts
|
59
|
+
case
|
60
|
+
when template_name
|
61
|
+
self.template = Rubix::Template.find(:name => template_name)
|
62
|
+
self.hosts = Rubix::Host.list(self.template.host_ids).find_all(&:monitored)
|
63
|
+
when host_group_name
|
64
|
+
self.host_group = Rubix::HostGroup.find(:name => host_group_name)
|
65
|
+
self.hosts = Rubix::Host.list(self.host_group.host_ids).find_all(&:monitored)
|
66
|
+
else
|
67
|
+
raise Rubix::Error.new("Must define either a 'template_name' or a 'host_group_name' property for a Zabbix monitor.")
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -5,7 +5,6 @@ describe "Hosts" do
|
|
5
5
|
before do
|
6
6
|
integration_test
|
7
7
|
@host_group_1 = ensure_save(Rubix::HostGroup.new(:name => 'rubix_spec_host_group_1'))
|
8
|
-
|
9
8
|
end
|
10
9
|
|
11
10
|
after do
|
@@ -18,6 +17,14 @@ describe "Hosts" do
|
|
18
17
|
Rubix::Host.find(:name => 'rubix_spec_host_1').should be_nil
|
19
18
|
end
|
20
19
|
|
20
|
+
it "returns an empty array when listing without IDs" do
|
21
|
+
Rubix::Host.list([1,2,3]).should == []
|
22
|
+
end
|
23
|
+
|
24
|
+
it "returns an empty array when listing with IDs" do
|
25
|
+
Rubix::Host.list([1,2,3]).should == []
|
26
|
+
end
|
27
|
+
|
21
28
|
it "can be created" do
|
22
29
|
host = Rubix::Host.new(:name => 'rubix_spec_host_1', :host_groups => [@host_group_1])
|
23
30
|
host.save.should be_true
|
@@ -34,6 +41,13 @@ describe "Hosts" do
|
|
34
41
|
@template_2 = ensure_save(Rubix::Template.new(:name => 'rubix_spec_template_2', :host_groups => [@host_group_2]))
|
35
42
|
end
|
36
43
|
|
44
|
+
it "can be listed by ID" do
|
45
|
+
hosts = Rubix::Host.list([@host.id])
|
46
|
+
hosts.should_not be_nil
|
47
|
+
hosts.should_not be_empty
|
48
|
+
hosts.first.name.should == @host.name
|
49
|
+
end
|
50
|
+
|
37
51
|
it "can have its name changed" do
|
38
52
|
@host.name = 'rubix_spec_host_2'
|
39
53
|
@host.save
|
@@ -78,4 +92,5 @@ describe "Hosts" do
|
|
78
92
|
end
|
79
93
|
|
80
94
|
end
|
95
|
+
|
81
96
|
end
|
@@ -2,10 +2,59 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Rubix::ChefMonitor do
|
4
4
|
|
5
|
-
|
6
|
-
|
5
|
+
def mock_query query, nodes=[]
|
6
|
+
require 'chef'
|
7
|
+
chef_query = mock("Chef::Search::Query")
|
8
|
+
::Chef::Search::Query.should_receive(:new).and_return(chef_query)
|
9
|
+
chef_query.should_receive(:search).with('node', query).and_return([nodes, nodes.length])
|
7
10
|
end
|
8
11
|
|
12
|
+
before do
|
13
|
+
@wrapper = Class.new(Rubix::Monitor)
|
14
|
+
@wrapper.send(:include, Rubix::ChefMonitor)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "has options for talking to Chef" do
|
18
|
+
@wrapper.default_settings.should include(:chef_server_url)
|
19
|
+
@wrapper.default_settings.should include(:chef_node_name)
|
20
|
+
@wrapper.default_settings.should include(:chef_client_key)
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "finding nodes in Chef" do
|
9
24
|
|
25
|
+
describe 'when a node exists' do
|
26
|
+
before do
|
27
|
+
@node = { 'node_name' => 'foobar', 'ipaddress' => '123', 'fdqn' => '456' }
|
28
|
+
end
|
29
|
+
|
30
|
+
it "can find it based on its node name" do
|
31
|
+
mock_query('name:foobar', [@node])
|
32
|
+
@wrapper.new(@wrapper.default_settings).chef_node_from_node_name('foobar').should == @node
|
33
|
+
end
|
34
|
+
|
35
|
+
it "can find it based on its IP" do
|
36
|
+
mock_query('ipaddress:123 OR fqdn:123', [@node])
|
37
|
+
@wrapper.new(@wrapper.default_settings).chef_node_from_ip('123').should == @node
|
38
|
+
end
|
39
|
+
|
40
|
+
it "can find it based on its FQDN" do
|
41
|
+
mock_query('ipaddress:456 OR fqdn:456', [@node])
|
42
|
+
@wrapper.new(@wrapper.default_settings).chef_node_from_ip('456').should == @node
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "when a node doesn't exist" do
|
47
|
+
|
48
|
+
it "returns nil when searching by node name" do
|
49
|
+
mock_query('name:foobar')
|
50
|
+
@wrapper.new(@wrapper.default_settings).chef_node_from_node_name('foobar').should be_nil
|
51
|
+
end
|
52
|
+
|
53
|
+
it "returns nil when searching by IP" do
|
54
|
+
mock_query('ipaddress:123 OR fqdn:123')
|
55
|
+
@wrapper.new(@wrapper.default_settings).chef_node_from_ip('123').should be_nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
10
59
|
end
|
11
60
|
|
@@ -2,32 +2,34 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Rubix::ClusterMonitor do
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
])
|
19
|
-
cm = Rubix::ClusterMonitor.new({})
|
20
|
-
cm.clusters.should include('foo', 'bar')
|
21
|
-
cm.private_ips_by_cluster.should == { 'foo' => ['123'], 'bar' => ['789'] }
|
22
|
-
cm.all_private_ips_by_cluster.should == { 'foo' => ['123', '456'], 'bar' => ['789', '321'] }
|
5
|
+
before do
|
6
|
+
@wrapper = Class.new(Rubix::Monitor)
|
7
|
+
@wrapper.class_eval do
|
8
|
+
include Rubix::ClusterMonitor
|
9
|
+
def hosts
|
10
|
+
[
|
11
|
+
Rubix::Host.new(:name => 'cluster1-facet1-host1'),
|
12
|
+
Rubix::Host.new(:name => 'cluster1-facet1-host2'),
|
13
|
+
Rubix::Host.new(:name => 'cluster1-facet2-host3'),
|
14
|
+
|
15
|
+
Rubix::Host.new(:name => 'cluster2-facet1-host1'),
|
16
|
+
Rubix::Host.new(:name => 'cluster2-facet1-host2'),
|
17
|
+
Rubix::Host.new(:name => 'cluster2-facet2-host3'),
|
23
18
|
|
24
|
-
|
25
|
-
|
19
|
+
Rubix::Host.new(:name => 'malformed')
|
20
|
+
]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
26
24
|
|
27
|
-
|
28
|
-
|
29
|
-
|
25
|
+
it "should be able to filter hosts into clusters" do
|
26
|
+
monitor = @wrapper.new(@wrapper.default_settings)
|
27
|
+
monitor.clusters.should include('cluster1', 'cluster2', Rubix::ClusterMonitor::DEFAULT_CLUSTER)
|
28
|
+
monitor.hosts_by_cluster['cluster1'].map(&:name).should include('cluster1-facet1-host1', 'cluster1-facet1-host2', 'cluster1-facet2-host3')
|
29
|
+
monitor.hosts_by_cluster['cluster2'].map(&:name).should include('cluster2-facet1-host1', 'cluster2-facet1-host2', 'cluster2-facet2-host3')
|
30
|
+
monitor.hosts_by_cluster[Rubix::ClusterMonitor::DEFAULT_CLUSTER].map(&:name).should include('malformed')
|
30
31
|
end
|
32
|
+
|
31
33
|
|
32
34
|
|
33
35
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Rubix::ZabbixMonitor do
|
4
|
+
|
5
|
+
before do
|
6
|
+
@wrapper = Class.new(Rubix::Monitor)
|
7
|
+
@wrapper.class_eval do
|
8
|
+
include Rubix::ZabbixMonitor
|
9
|
+
end
|
10
|
+
@hosts = [Rubix::Host.new(:name => 'host1'), Rubix::Host.new(:name => 'host2'), Rubix::Host.new(:name => 'host3', :monitored => false)]
|
11
|
+
end
|
12
|
+
|
13
|
+
it "will raise an error when no template name or host group is defined" do
|
14
|
+
lambda { @wrapper.new(@wrapper.default_settings) }.should raise_error(Rubix::Error)
|
15
|
+
end
|
16
|
+
|
17
|
+
it "can find hosts based on a template" do
|
18
|
+
@wrapper.class_eval do
|
19
|
+
def template_name
|
20
|
+
'Template_Foo'
|
21
|
+
end
|
22
|
+
end
|
23
|
+
@template = Rubix::Template.new(:name => 'Template_Foo')
|
24
|
+
Rubix::Template.should_receive(:find).with(:name => 'Template_Foo').and_return(@template)
|
25
|
+
@template.should_receive(:host_ids).and_return([1,2,3])
|
26
|
+
Rubix::Host.should_receive(:list).with([1,2,3]).and_return(@hosts)
|
27
|
+
@wrapper.new(@wrapper.default_settings).hosts.should == @hosts[0..1]
|
28
|
+
end
|
29
|
+
|
30
|
+
it "can find hosts based on a host group" do
|
31
|
+
@wrapper.class_eval do
|
32
|
+
def host_group_name
|
33
|
+
'Foos'
|
34
|
+
end
|
35
|
+
end
|
36
|
+
@host_group = Rubix::HostGroup.new(:name => 'Foos')
|
37
|
+
Rubix::HostGroup.should_receive(:find).with(:name => 'Foos').and_return(@host_group)
|
38
|
+
@host_group.should_receive(:host_ids).and_return([1,2,3])
|
39
|
+
Rubix::Host.should_receive(:list).with([1,2,3]).and_return(@hosts)
|
40
|
+
@wrapper.new(@wrapper.default_settings).hosts.should == @hosts[0..1]
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
|
46
|
+
|
@@ -1,8 +1,12 @@
|
|
1
1
|
module Rubix
|
2
2
|
module IntegrationHelper
|
3
|
-
|
3
|
+
|
4
4
|
def integration_test
|
5
|
-
|
5
|
+
if $RUBIX_INTEGRATION_TEST
|
6
|
+
Rubix.connect($RUBIX_INTEGRATION_TEST['url'], $RUBIX_INTEGRATION_TEST['username'], $RUBIX_INTEGRATION_TEST['password'])
|
7
|
+
else
|
8
|
+
pending("A live Zabbix API to test against")
|
9
|
+
end
|
6
10
|
end
|
7
11
|
|
8
12
|
def ensure_save(obj)
|
@@ -61,7 +65,7 @@ module Rubix
|
|
61
65
|
|
62
66
|
truncate_all_tables
|
63
67
|
|
64
|
-
$RUBIX_INTEGRATION_TEST =
|
68
|
+
$RUBIX_INTEGRATION_TEST = api_connection
|
65
69
|
end
|
66
70
|
|
67
71
|
RUBIX_TABLES_TO_TRUNCATE = %w[applications groups hostmacro hosts hosts_groups hosts_profiles hosts_profiles_ext hosts_templates items items_applications profiles triggers trigger_depends]
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version: 0.
|
7
|
+
- 4
|
8
|
+
- 0
|
9
|
+
version: 0.4.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Dhruv Bansal
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-02-
|
17
|
+
date: 2012-02-13 00:00:00 -06:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -96,10 +96,10 @@ files:
|
|
96
96
|
- lib/rubix/models/model.rb
|
97
97
|
- lib/rubix/models/time_series.rb
|
98
98
|
- lib/rubix/models/application.rb
|
99
|
-
- lib/rubix/examples/
|
100
|
-
- lib/rubix/examples/
|
101
|
-
- lib/rubix/examples/
|
102
|
-
- lib/rubix/examples/
|
99
|
+
- lib/rubix/examples/simple_zabbix_monitor.rb
|
100
|
+
- lib/rubix/examples/simple_uptime_monitor.rb
|
101
|
+
- lib/rubix/examples/simple_cluster_monitor.rb
|
102
|
+
- lib/rubix/examples/simple_chef_monitor.rb
|
103
103
|
- lib/rubix/auto_sender.rb
|
104
104
|
- lib/rubix/sender.rb
|
105
105
|
- lib/rubix/log.rb
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/rubix/associations/has_many_user_macros.rb
|
118
118
|
- lib/rubix/monitors/chef_monitor.rb
|
119
119
|
- lib/rubix/monitors/cluster_monitor.rb
|
120
|
+
- lib/rubix/monitors/zabbix_monitor.rb
|
120
121
|
- lib/rubix/monitors/monitor.rb
|
121
122
|
- lib/rubix/connection.rb
|
122
123
|
- lib/rubix/associations.rb
|
@@ -124,6 +125,7 @@ files:
|
|
124
125
|
- spec/rubix/auto_sender_spec.rb
|
125
126
|
- spec/rubix/monitors/monitor_spec.rb
|
126
127
|
- spec/rubix/monitors/chef_monitor_spec.rb
|
128
|
+
- spec/rubix/monitors/zabbix_monitor_spec.rb
|
127
129
|
- spec/rubix/monitors/cluster_monitor_spec.rb
|
128
130
|
- spec/rubix/response_spec.rb
|
129
131
|
- spec/rubix/sender_spec.rb
|
@@ -1,134 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
|
4
|
-
$: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
|
5
|
-
|
6
|
-
require 'rubix'
|
7
|
-
require 'open-uri'
|
8
|
-
|
9
|
-
class ESMonitor < Rubix::ClusterMonitor
|
10
|
-
|
11
|
-
# Hostgroup for any hosts that needs to be created.
|
12
|
-
CLUSTER_HOSTGROUPS = 'Elasticsearch clusters'
|
13
|
-
|
14
|
-
# Templates for any hosts that need to be created.
|
15
|
-
CLUSTER_TEMPLATES = 'Template_Elasticsearch_Cluster'
|
16
|
-
NODE_TEMPLATES = 'Template_Elasticsearch_Node'
|
17
|
-
|
18
|
-
# Applications for new items
|
19
|
-
CLUSTER_APPLICATIONS = '_cluster'
|
20
|
-
NODE_APPLICATIONS = 'Elasticsearch'
|
21
|
-
|
22
|
-
def node_query
|
23
|
-
'provides_service:*-elasticsearch'
|
24
|
-
end
|
25
|
-
|
26
|
-
def es_url private_ip, *args
|
27
|
-
"http://" + File.join(private_ip + ":9200", *args)
|
28
|
-
end
|
29
|
-
|
30
|
-
def measure_cluster cluster_name
|
31
|
-
measured_cluster_health = false
|
32
|
-
measured_cluster_indices = false
|
33
|
-
measured_cluster_nodes = false
|
34
|
-
private_ips_by_cluster[cluster_name].each do |private_ip|
|
35
|
-
measured_cluster_health = measure_cluster_health(cluster_name, private_ip) unless measured_cluster_health
|
36
|
-
measured_cluster_indices = measure_cluster_indices(cluster_name, private_ip) unless measured_cluster_indices
|
37
|
-
measured_cluster_nodes = measure_cluster_nodes(cluster_name, private_ip) unless measured_cluster_nodes
|
38
|
-
break if measured_cluster_health && measured_cluster_indices && measured_cluster_nodes
|
39
|
-
end
|
40
|
-
end
|
41
|
-
|
42
|
-
# Measure the cluster health metrics -- /_cluster/health
|
43
|
-
def measure_cluster_health cluster_name, private_ip
|
44
|
-
begin
|
45
|
-
cluster_health = JSON.parse(open(es_url(private_ip, '_cluster', 'health')).read)
|
46
|
-
rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
|
47
|
-
# This node may not be running a webnode...
|
48
|
-
return false
|
49
|
-
end
|
50
|
-
write({
|
51
|
-
:hostname => "#{cluster_name}-elasticsearch",
|
52
|
-
:hostgroup => self.class::CLUSTER_HOSTGROUPS,
|
53
|
-
:templates => self.class::CLUSTER_TEMPLATES,
|
54
|
-
:application => self.class::CLUSTER_APPLICATIONS
|
55
|
-
}) do |d|
|
56
|
-
d << ['status', cluster_health['status'] ]
|
57
|
-
d << ['nodes.total', cluster_health['number_of_nodes'] ]
|
58
|
-
d << ['nodes.data', cluster_health['number_of_data_nodes'] ]
|
59
|
-
d << ['shards.active', cluster_health['active_shards'] ]
|
60
|
-
d << ['shards.relocating', cluster_health['relocating_shards'] ]
|
61
|
-
d << ['shards.unassigned', cluster_health['unassigned_shards'] ]
|
62
|
-
d << ['shards.initializing', cluster_health['initializing_shards'] ]
|
63
|
-
end
|
64
|
-
true
|
65
|
-
end
|
66
|
-
|
67
|
-
def measure_cluster_indices cluster_name, private_ip
|
68
|
-
begin
|
69
|
-
index_data = JSON.parse(open(es_url(private_ip, '_status')).read)
|
70
|
-
rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
|
71
|
-
# This node may not be running a webnode...
|
72
|
-
return false
|
73
|
-
end
|
74
|
-
index_data['indices'].each_pair do |index_name, index_data|
|
75
|
-
write({
|
76
|
-
:hostname => "#{cluster_name}-elasticsearch",
|
77
|
-
:hostgroup => self.class::CLUSTER_HOSTGROUPS,
|
78
|
-
:templates => self.class::CLUSTER_TEMPLATES,
|
79
|
-
:appliation => index_name
|
80
|
-
}) do |d|
|
81
|
-
d << ["#{index_name}.size", index_data["index"]["size_in_bytes"] ]
|
82
|
-
d << ["#{index_name}.docs.num", index_data["docs"]["num_docs"] ]
|
83
|
-
d << ["#{index_name}.docs.max", index_data["docs"]["max_doc"] ]
|
84
|
-
d << ["#{index_name}.docs.deleted", index_data["docs"]["deleted_docs"] ]
|
85
|
-
d << ["#{index_name}.operations", index_data["translog"]["operations"] ]
|
86
|
-
d << ["#{index_name}.merges.total", index_data["merges"]["total"] ]
|
87
|
-
d << ["#{index_name}.merges.current", index_data["merges"]["current"] ]
|
88
|
-
end
|
89
|
-
end
|
90
|
-
true
|
91
|
-
end
|
92
|
-
|
93
|
-
def measure_cluster_nodes cluster_name, private_ip
|
94
|
-
begin
|
95
|
-
nodes_data = JSON.parse(open(es_url(private_ip, '_cluster', 'nodes')).read)
|
96
|
-
nodes_stats_data = JSON.parse(open(es_url(private_ip, '_cluster', 'nodes', 'stats')).read)
|
97
|
-
rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
|
98
|
-
# This node may not be running a webnode...
|
99
|
-
return false
|
100
|
-
end
|
101
|
-
|
102
|
-
nodes_stats_data['nodes'].each_pair do |id, stats|
|
103
|
-
|
104
|
-
ip = nodes_data['nodes'][id]['network']['primary_interface']['address']
|
105
|
-
node_name = chef_node_name_from_ip(ip)
|
106
|
-
next unless node_name
|
107
|
-
write({
|
108
|
-
:hostname => node_name,
|
109
|
-
:templates => self.class::NODE_TEMPLATES,
|
110
|
-
:application => self.class::NODE_APPLICATIONS
|
111
|
-
}) do |d|
|
112
|
-
# concurrency
|
113
|
-
d << ['es.jvm.threads.count', stats['jvm']['threads']['count'] ]
|
114
|
-
|
115
|
-
# garbage collection
|
116
|
-
d << ['es.jvm.gc.coll_time', stats['jvm']['gc']['collection_time_in_millis'] ]
|
117
|
-
d << ['es.jvm.gc.coll_count', stats['jvm']['gc']['collection_count'] ]
|
118
|
-
|
119
|
-
# memory
|
120
|
-
d << ['es.jvm.mem.heap_used', stats['jvm']['mem']['heap_used_in_bytes'] ]
|
121
|
-
d << ['es.jvm.mem.non_heap_used', stats['jvm']['mem']['non_heap_used_in_bytes'] ]
|
122
|
-
d << ['es.jvm.mem.heap_comm', stats['jvm']['mem']['heap_committed_in_bytes'] ]
|
123
|
-
d << ['es.jvm.mem.non_heap_comm', stats['jvm']['mem']['non_heap_committed_in_bytes'] ]
|
124
|
-
|
125
|
-
# indices
|
126
|
-
d << ['es.indices.size', stats['indices']['size_in_bytes'] ]
|
127
|
-
end
|
128
|
-
end
|
129
|
-
true
|
130
|
-
end
|
131
|
-
|
132
|
-
end
|
133
|
-
|
134
|
-
ESMonitor.run if $0 == __FILE__
|
@@ -1,94 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
|
4
|
-
$: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
|
5
|
-
|
6
|
-
require 'rubix'
|
7
|
-
require 'net/http'
|
8
|
-
require 'crack'
|
9
|
-
|
10
|
-
class HBaseMonitor < Rubix::ClusterMonitor
|
11
|
-
|
12
|
-
# Hostgroups for clusters & hosts that need to be created.
|
13
|
-
CLUSTER_HOSTGROUPS = 'HBase clusters'
|
14
|
-
|
15
|
-
# Templates for any hosts that need to be created.
|
16
|
-
CLUSTER_TEMPLATES = 'Template_HBase_Cluster'
|
17
|
-
NODE_TEMPLATES = 'Template_HBase_Node'
|
18
|
-
|
19
|
-
# Applications for items that are written
|
20
|
-
CLUSTER_APPLICATIONS = '_cluster'
|
21
|
-
NODE_APPLICATIONS = "Hbase"
|
22
|
-
|
23
|
-
def matching_chef_nodes
|
24
|
-
Chef::Search::Query.new.search('node', 'provides_service:*hbase-stargate AND facet_name:alpha')
|
25
|
-
end
|
26
|
-
|
27
|
-
def measure_cluster cluster_name
|
28
|
-
measured_cluster_status = false
|
29
|
-
private_ips_by_cluster[cluster_name].each do |private_ip|
|
30
|
-
measured_cluster_status = measure_cluster_status(cluster_name, private_ip) unless measured_cluster_status
|
31
|
-
break if measured_cluster_status
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
# Measure the cluster health metrics -- /status/cluster
|
36
|
-
def measure_cluster_status cluster_name, private_ip
|
37
|
-
begin
|
38
|
-
connection = Net::HTTP.new(private_ip, 8080) # FIXME port
|
39
|
-
request = Net::HTTP::Get.new('/status/cluster', 'Accept' => 'text/xml')
|
40
|
-
response = connection.request(request)
|
41
|
-
return false unless response.code.to_i == 200
|
42
|
-
|
43
|
-
data = Crack::XML.parse(response.body)
|
44
|
-
cluster_status = data['ClusterStatus']
|
45
|
-
dead_nodes = cluster_status['DeadNodes'] ? cluster_status['DeadNodes']['Node'] : []
|
46
|
-
live_nodes = cluster_status['LiveNodes']['Node']
|
47
|
-
rescue NoMethodError, SocketError, REXML::ParseException, Errno::ECONNREFUSED => e
|
48
|
-
# puts "#{e.class} -- #{e.message}"
|
49
|
-
# puts e.backtrace
|
50
|
-
return false
|
51
|
-
end
|
52
|
-
|
53
|
-
write({
|
54
|
-
:hostname => "#{cluster_name}-hbase",
|
55
|
-
:hostgroup => self.class::CLUSTER_HOSTGROUPS,
|
56
|
-
:application => self.class::CLUSTER_APPLICATIONS,
|
57
|
-
:templates => self.class::CLUSTER_TEMPLATES
|
58
|
-
}) do |d|
|
59
|
-
d << ['requests', cluster_status['requests']]
|
60
|
-
d << ['regions', cluster_status['regions']]
|
61
|
-
d << ['load', cluster_status['averageLoad']]
|
62
|
-
d << ['nodes.dead', dead_nodes.size]
|
63
|
-
d << ['nodes.alive', live_nodes.size]
|
64
|
-
end
|
65
|
-
measure_cluster_tables(cluster_name, data)
|
66
|
-
measure_cluster_nodes(cluster_name, live_nodes)
|
67
|
-
true
|
68
|
-
end
|
69
|
-
|
70
|
-
def measure_cluster_tables cluster_name, data
|
71
|
-
# FIXME...not sure how best to get information about "tables" in HBase...
|
72
|
-
end
|
73
|
-
|
74
|
-
def measure_cluster_nodes cluster_name, live_nodes
|
75
|
-
live_nodes.each do |live_node|
|
76
|
-
next unless live_node
|
77
|
-
ip = (live_node['name'] || '').split(':').first
|
78
|
-
node_name = chef_node_name_from_ip(ip)
|
79
|
-
next unless node_name
|
80
|
-
write({
|
81
|
-
:hostname => node_name,
|
82
|
-
:application => self.class::NODE_APPLICATIONS,
|
83
|
-
:templates => self.class::NODE_TEMPLATES
|
84
|
-
}) do |d|
|
85
|
-
d << ['hbase.regions', (live_node['Region'] || []).size]
|
86
|
-
d << ['hbase.heap_size', live_node['heapSizeMB']]
|
87
|
-
d << ['hbase.requests', live_node['requests']]
|
88
|
-
end
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
end
|
93
|
-
|
94
|
-
HBaseMonitor.run if $0 == __FILE__
|
@@ -1,130 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
|
4
|
-
$: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
|
5
|
-
|
6
|
-
require 'rubix'
|
7
|
-
require 'open-uri'
|
8
|
-
require 'set'
|
9
|
-
require 'mongo'
|
10
|
-
|
11
|
-
class MongoMonitor < Rubix::ClusterMonitor
|
12
|
-
|
13
|
-
# Hostgroup for any hosts that needs to be created.
|
14
|
-
CLUSTER_HOSTGROUPS = 'MongoDB clusters'
|
15
|
-
|
16
|
-
# Templates for any hosts that need to be created.
|
17
|
-
CLUSTER_TEMPLATES = 'Template_MongoDB'
|
18
|
-
|
19
|
-
# Applications
|
20
|
-
CLUSTER_APPLICATIONS = '_cluster'
|
21
|
-
|
22
|
-
# Names of database to ignore when we find them.
|
23
|
-
IGNORED_DATABASES = %w[db test admin local].to_set
|
24
|
-
|
25
|
-
def matching_chef_nodes
|
26
|
-
Chef::Search::Query.new.search('node', 'provides_service:*-mongodb-server')
|
27
|
-
end
|
28
|
-
|
29
|
-
def measure_cluster cluster_name
|
30
|
-
measured_mongo_server = false
|
31
|
-
measured_mongo_databases = false
|
32
|
-
private_ips_by_cluster[cluster_name].each do |private_ip|
|
33
|
-
begin
|
34
|
-
connection = Mongo::Connection.new(private_ip)
|
35
|
-
rescue Mongo::ConnectionFailure => e
|
36
|
-
next
|
37
|
-
end
|
38
|
-
measured_mongo_server = measure_mongo_server(cluster_name, connection) unless measured_mongo_server
|
39
|
-
measured_mongo_databases = measure_mongo_databases(cluster_name, connection) unless measured_mongo_databases
|
40
|
-
break if measured_mongo_server && measured_mongo_databases
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def measure_mongo_server cluster_name, connection
|
45
|
-
initial = nil, final = nil
|
46
|
-
db = connection.db('system') # the name of this db doesn't matter?
|
47
|
-
command = {:serverStatus => true} # the value of the 'serverStatus' key doesn't matter?
|
48
|
-
|
49
|
-
# gather metrics with a 1.0 second gap
|
50
|
-
initial = db.command(command) ; sleep 1.0 ; final = db.command(command)
|
51
|
-
return false unless initial && final
|
52
|
-
dt = final['localTime'].to_f - initial['localTime'].to_f
|
53
|
-
write({
|
54
|
-
:hostname => "#{cluster_name}-mongodb",
|
55
|
-
:hostgroup => self.class::CLUSTER_HOSTGROUPS,
|
56
|
-
:templates => self.class::CLUSTER_TEMPLATES,
|
57
|
-
:application => self.class::CLUSTER_APPLICATIONS
|
58
|
-
}) do |d|
|
59
|
-
|
60
|
-
# operations
|
61
|
-
d << ['inserts', (final['opcounters']['insert'] - initial['opcounters']['insert']) / dt]
|
62
|
-
d << ['queries', (final['opcounters']['query'] - initial['opcounters']['query']) / dt]
|
63
|
-
d << ['updates', (final['opcounters']['update'] - initial['opcounters']['update']) / dt]
|
64
|
-
d << ['deletes', (final['opcounters']['delete'] - initial['opcounters']['delete']) / dt]
|
65
|
-
d << ['getmores', (final['opcounters']['getmore'] - initial['opcounters']['getmore']) / dt]
|
66
|
-
d << ['commands', (final['opcounters']['command'] - initial['opcounters']['command']) / dt]
|
67
|
-
|
68
|
-
# memory
|
69
|
-
d << ['mem.resident', final['mem']['resident']]
|
70
|
-
d << ['mem.virtual', final['mem']['virtual']]
|
71
|
-
d << ['mem.mapped', final['mem']['mapped']]
|
72
|
-
|
73
|
-
# disk
|
74
|
-
d << ['flushes', (final['backgroundFlushing']['flushes'] - initial['backgroundFlushing']['flushes']) / dt]
|
75
|
-
d << ['flush_time', (final['backgroundFlushing']['total_ms'] - initial['backgroundFlushing']['total_ms']) ]
|
76
|
-
d << ['faults', (final['extra_info']['page_faults'] - initial['extra_info']['page_faults']) / dt]
|
77
|
-
|
78
|
-
# index
|
79
|
-
d << ['accesses', (final['indexCounters']['btree']['accesses'] - initial['indexCounters']['btree']['accesses']) / dt]
|
80
|
-
d << ['hits', (final['indexCounters']['btree']['hits'] - initial['indexCounters']['btree']['hits']) / dt]
|
81
|
-
d << ['misses', (final['indexCounters']['btree']['misses'] - initial['indexCounters']['btree']['misses']) / dt]
|
82
|
-
d << ['resets', (final['indexCounters']['btree']['resets'] - initial['indexCounters']['btree']['resets']) / dt]
|
83
|
-
|
84
|
-
# read/write load
|
85
|
-
d << ['queue.total', final['globalLock']['currentQueue']['total']]
|
86
|
-
d << ['queue.read', final['globalLock']['currentQueue']['readers']]
|
87
|
-
d << ['queue.write', final['globalLock']['currentQueue']['writers']]
|
88
|
-
d << ['clients.total', final['globalLock']['activeClients']['total']]
|
89
|
-
d << ['clients.read', final['globalLock']['activeClients']['readers']]
|
90
|
-
d << ['clients.write', final['globalLock']['activeClients']['writers']]
|
91
|
-
|
92
|
-
# network
|
93
|
-
d << ['net.in', (final['network']['bytesIn'] - initial['network']['bytesIn']) / dt]
|
94
|
-
d << ['net.out', (final['network']['bytesOut'] - initial['network']['bytesOut']) / dt]
|
95
|
-
d << ['requests', (final['network']['numRequests'] - initial['network']['numRequests']) / dt]
|
96
|
-
d << ['connections', final['connections']['current']]
|
97
|
-
end
|
98
|
-
true
|
99
|
-
end
|
100
|
-
|
101
|
-
def measure_mongo_databases cluster_name, connection
|
102
|
-
dbs = connection.database_names
|
103
|
-
return true if dbs.size == 0 # nothing to do here
|
104
|
-
|
105
|
-
dbs.each do |database_name|
|
106
|
-
next if self.class::IGNORED_DATABASES.include?(database_name.downcase)
|
107
|
-
stats = connection.db(database_name).stats()
|
108
|
-
|
109
|
-
write({
|
110
|
-
:hostname => "#{cluster_name}-mongodb",
|
111
|
-
:hostgroup => self.class::CLUSTER_HOSTGROUPS,
|
112
|
-
:templates => self.class::CLUSTER_TEMPLATES,
|
113
|
-
:application => database_name
|
114
|
-
}) do |d|
|
115
|
-
d << ["#{database_name}.collections", stats["collections"] ]
|
116
|
-
d << ["#{database_name}.objects.count", stats["objects"] ]
|
117
|
-
d << ["#{database_name}.objects.avg_size", stats["avgObjSize"] ]
|
118
|
-
d << ["#{database_name}.size.data", stats["dataSize"] ]
|
119
|
-
d << ["#{database_name}.size.disk", stats["storageSize"] ]
|
120
|
-
d << ["#{database_name}.size.indexes", stats["indexSize"] ]
|
121
|
-
d << ["#{database_name}.size.file", stats["fileSize"] ]
|
122
|
-
d << ["#{database_name}.extents", stats["numExtents"] ]
|
123
|
-
d << ["#{database_name}.indexes", stats["indexes"] ]
|
124
|
-
end
|
125
|
-
end
|
126
|
-
true
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
MongoMonitor.run if $0 == __FILE__
|