rubix 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,111 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'json'
4
+
5
+ require 'rubix/log'
6
+
7
+ module Rubix
8
+
9
+ class Connection
10
+
11
+ include Logs
12
+
13
+ attr_reader :uri, :server, :auth, :request_id, :username, :password
14
+
15
+ def initialize uri_or_string, username=nil, password=nil
16
+ self.uri = uri_or_string
17
+ @username = username || uri.user
18
+ @password = password || uri.password
19
+ @request_id = 0
20
+ end
21
+
22
+ def uri= uri_or_string
23
+ if uri_or_string.respond_to?(:host)
24
+ @uri = uri_or_string
25
+ else
26
+ string = uri_or_string =~ /^http/ ? uri_or_string : 'http://' + uri_or_string.to_s
27
+ @uri = URI.parse(string)
28
+ end
29
+ @server = Net::HTTP.new(uri.host, uri.port)
30
+ end
31
+
32
+ def authorized?
33
+ !auth.nil?
34
+ end
35
+
36
+ def authorization_params
37
+ {
38
+ :jsonrpc => "2.0",
39
+ :id => request_id,
40
+ :method => "user.login",
41
+ :params => {
42
+ :user => username,
43
+ :password => password
44
+ }
45
+ }
46
+ end
47
+
48
+ def authorize!
49
+ response = till_response { send_raw_request(authorization_params) }
50
+ raise AuthenticationError.new("Could not authenticate with Zabbix API at #{uri}: #{response.error_message}") if response.error?
51
+ raise AuthenticationError.new("Malformed response from Zabbix API: #{response.body}") unless response.string?
52
+ @auth = response.result
53
+ end
54
+
55
+ def till_response attempt=1, max_attempts=5, &block
56
+ response = block.call
57
+ Rubix.logger.log(Logger::DEBUG, response.body, 'RECV') if Rubix.logger
58
+ case
59
+ when response.code.to_i >= 500 && attempt <= max_attempts
60
+ sleep 1 # FIXME make the sleep time configurable...
61
+ till_response(attempt + 1, max_attempts, &block)
62
+ when response.code.to_i >= 500
63
+ raise ConnectionError.new("Too many consecutive failed requests (#{max_attempts}) to the Zabbix API at (#{uri}).")
64
+ else
65
+ Response.new(response)
66
+ end
67
+ end
68
+
69
+ def request method, params
70
+ authorize! unless authorized?
71
+ till_response do
72
+ raw_params = {
73
+ :jsonrpc => "2.0",
74
+ :id => request_id,
75
+ :method => method,
76
+ :params => params,
77
+ :auth => auth
78
+ }
79
+ send_raw_request(raw_params)
80
+ end
81
+ end
82
+
83
+ def raw_post_request raw_params
84
+ json_body = raw_params.to_json
85
+ Rubix.logger.log(Logger::DEBUG, json_body, 'SEND') if Rubix.logger
86
+ Net::HTTP::Post.new(uri.path).tap do |req|
87
+ req['Content-Type'] = 'application/json-rpc'
88
+ req.body = json_body
89
+ end
90
+ end
91
+
92
+ def host_with_port
93
+ if uri.port.nil? || uri.port.to_i == 80
94
+ uri.host
95
+ else
96
+ "#{uri.host}:#{uri.port}"
97
+ end
98
+ end
99
+
100
+ def send_raw_request raw_params
101
+ @request_id += 1
102
+ begin
103
+ raw_response = server.request(raw_post_request(raw_params))
104
+ rescue NoMethodError, SocketError => e
105
+ raise RequestError.new("Could not connect to Zabbix server at #{host_with_port}")
106
+ end
107
+ raw_response
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
4
+ $: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
5
+
6
+ require 'rubix'
7
+ require 'open-uri'
8
+
9
+ class ESMonitor < Rubix::ClusterMonitor
10
+
11
+ # Hostgroup for any hosts that needs to be created.
12
+ HOSTGROUP = 'Elasticsearch clusters'
13
+
14
+ # Templates for any hosts that need to be created.
15
+ CLUSTER_TEMPLATES = 'Template_Elasticsearch_Cluster'
16
+ NODE_TEMPLATES = 'Template_Elasticsearch_Node'
17
+
18
+ def node_query
19
+ 'provides_service:*-elasticsearch'
20
+ end
21
+
22
+ def es_url private_ip, *args
23
+ "http://" + File.join(private_ip + ":9200", *args)
24
+ end
25
+
26
+ def measure_cluster cluster_name
27
+ measured_cluster_health = false
28
+ measured_cluster_indices = false
29
+ measured_cluster_nodes = false
30
+ private_ips_by_cluster[cluster_name].each do |private_ip|
31
+ measured_cluster_health = measure_cluster_health(cluster_name, private_ip) unless measured_cluster_health
32
+ measured_cluster_indices = measure_cluster_indices(cluster_name, private_ip) unless measured_cluster_indices
33
+ measured_cluster_nodes = measure_cluster_nodes(cluster_name, private_ip) unless measured_cluster_nodes
34
+ break if measured_cluster_health && measured_cluster_indices && measured_cluster_nodes
35
+ end
36
+ end
37
+
38
+ # Measure the cluster health metrics -- /_cluster/health
39
+ def measure_cluster_health cluster_name, private_ip
40
+ begin
41
+ cluster_health = JSON.parse(open(es_url(private_ip, '_cluster', 'health')).read)
42
+ rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
43
+ # This node may not be running a webnode...
44
+ return false
45
+ end
46
+ write({
47
+ :hostname => "#{cluster_name}-elasticsearch",
48
+ :application => "_cluster",
49
+ :templates => self.class::CLUSTER_TEMPLATES,
50
+ :hostgroup => self.class::HOSTGROUP
51
+ }) do |d|
52
+ d << ['status', cluster_health['status'] ]
53
+ d << ['nodes.total', cluster_health['number_of_nodes'] ]
54
+ d << ['nodes.data', cluster_health['number_of_data_nodes'] ]
55
+ d << ['shards.active', cluster_health['active_shards'] ]
56
+ d << ['shards.relocating', cluster_health['relocating_shards'] ]
57
+ d << ['shards.unassigned', cluster_health['unassigned_shards'] ]
58
+ d << ['shards.initializing', cluster_health['initializing_shards'] ]
59
+ end
60
+ true
61
+ end
62
+
63
+ def measure_cluster_indices cluster_name, private_ip
64
+ begin
65
+ index_data = JSON.parse(open(es_url(private_ip, '_status')).read)
66
+ rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
67
+ # This node may not be running a webnode...
68
+ return false
69
+ end
70
+ index_data['indices'].each_pair do |index_name, index_data|
71
+ write({
72
+ :hostname => "#{cluster_name}-elasticsearch",
73
+ :appliation => index_name,
74
+ :templates => self.class::CLUSTER_TEMPLATES,
75
+ :hostgroup => self.class::HOSTGROUP
76
+ }) do |d|
77
+ d << ["#{index_name}.size", index_data["index"]["size_in_bytes"] ]
78
+ d << ["#{index_name}.docs.num", index_data["docs"]["num_docs"] ]
79
+ d << ["#{index_name}.docs.max", index_data["docs"]["max_doc"] ]
80
+ d << ["#{index_name}.docs.deleted", index_data["docs"]["deleted_docs"] ]
81
+ d << ["#{index_name}.operations", index_data["translog"]["operations"] ]
82
+ d << ["#{index_name}.merges.total", index_data["merges"]["total"] ]
83
+ d << ["#{index_name}.merges.current", index_data["merges"]["current"] ]
84
+ end
85
+ end
86
+ true
87
+ end
88
+
89
+ def measure_cluster_nodes cluster_name, private_ip
90
+ begin
91
+ nodes_data = JSON.parse(open(es_url(private_ip, '_cluster', 'nodes')).read)
92
+ nodes_stats_data = JSON.parse(open(es_url(private_ip, '_cluster', 'nodes', 'stats')).read)
93
+ rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
94
+ # This node may not be running a webnode...
95
+ return false
96
+ end
97
+
98
+ nodes_stats_data['nodes'].each_pair do |id, stats|
99
+
100
+ ip = nodes_data['nodes'][id]['network']['primary_interface']['address']
101
+ node_name = chef_node_name_from_ip(ip)
102
+ next unless node_name
103
+ write({
104
+ :hostname => node_name,
105
+ :application => "Elasticsearch",
106
+ :templates => self.class::NODE_TEMPLATES
107
+ }) do |d|
108
+ # concurrency
109
+ d << ['es.jvm.threads.count', stats['jvm']['threads']['count'] ]
110
+
111
+ # garbage collection
112
+ d << ['es.jvm.gc.coll_time', stats['jvm']['gc']['collection_time_in_millis'] ]
113
+ d << ['es.jvm.gc.coll_count', stats['jvm']['gc']['collection_count'] ]
114
+
115
+ # memory
116
+ d << ['es.jvm.mem.heap_used', stats['jvm']['mem']['heap_used_in_bytes'] ]
117
+ d << ['es.jvm.mem.non_heap_used', stats['jvm']['mem']['non_heap_used_in_bytes'] ]
118
+ d << ['es.jvm.mem.heap_comm', stats['jvm']['mem']['heap_committed_in_bytes'] ]
119
+ d << ['es.jvm.mem.non_heap_comm', stats['jvm']['mem']['non_heap_committed_in_bytes'] ]
120
+
121
+ # indices
122
+ d << ['es.indices.size', stats['indices']['size_in_bytes'] ]
123
+ end
124
+ end
125
+ true
126
+ end
127
+
128
+ end
129
+
130
+ ESMonitor.run if $0 == __FILE__
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'zabbix_cluster_monitor'
4
+ require 'net/http'
5
+ require 'crack'
6
+
7
+ class HBaseMonitor < ZabbixClusterMonitor
8
+
9
+ # Hostgroups for clusters & hosts that need to be created.
10
+ CLUSTER_HOSTGROUP = 'HBase clusters'
11
+
12
+ # Templates for any hosts that need to be created.
13
+ CLUSTER_TEMPLATES = 'Template_HBase_Cluster'
14
+ NODE_TEMPLATES = 'Template_HBase_Node'
15
+
16
+ def matching_chef_nodes
17
+ Chef::Search::Query.new.search('node', 'provides_service:*hbase-stargate AND facet_name:alpha')
18
+ end
19
+
20
+ def measure_cluster cluster_name
21
+ measured_cluster_status = false
22
+ private_ips_by_cluster[cluster_name].each do |private_ip|
23
+ measured_cluster_status = measure_cluster_status(cluster_name, private_ip) unless measured_cluster_status
24
+ break if measured_cluster_status
25
+ end
26
+ end
27
+
28
+ # Measure the cluster health metrics -- /status/cluster
29
+ def measure_cluster_status cluster_name, private_ip
30
+ begin
31
+ connection = Net::HTTP.new(private_ip, 8080) # FIXME port
32
+ request = Net::HTTP::Get.new('/status/cluster', 'Accept' => 'text/xml')
33
+ response = connection.request(request)
34
+ return false unless response.code.to_i == 200
35
+
36
+ data = Crack::XML.parse(response.body)
37
+ cluster_status = data['ClusterStatus']
38
+ dead_nodes = cluster_status['DeadNodes'] ? cluster_status['DeadNodes']['Node'] : []
39
+ live_nodes = cluster_status['LiveNodes']['Node']
40
+ rescue NoMethodError, SocketError, REXML::ParseException, Errno::ECONNREFUSED => e
41
+ # puts "#{e.class} -- #{e.message}"
42
+ # puts e.backtrace
43
+ return false
44
+ end
45
+
46
+ write({
47
+ :hostname => "#{cluster_name}-hbase",
48
+ :application => '_cluster',
49
+ :hostgroup => 'HBase Clusters',
50
+ :templates => self.class::CLUSTER_TEMPLATES
51
+ }) do |d|
52
+ d << ['requests', cluster_status['requests']]
53
+ d << ['regions', cluster_status['regions']]
54
+ d << ['load', cluster_status['averageLoad']]
55
+ d << ['nodes.dead', dead_nodes.size]
56
+ d << ['nodes.alive', live_nodes.size]
57
+ end
58
+ measure_cluster_tables(cluster_name, data)
59
+ measure_cluster_nodes(cluster_name, live_nodes)
60
+ true
61
+ end
62
+
63
+ def measure_cluster_tables cluster_name, data
64
+ # FIXME...not sure how best to get information about "tables" in HBase...
65
+ end
66
+
67
+ def measure_cluster_nodes cluster_name, live_nodes
68
+ live_nodes.each do |live_node|
69
+ next unless live_node
70
+ ip = (live_node['name'] || '').split(':').first
71
+ node_name = chef_node_name_from_ip(ip)
72
+ next unless node_name
73
+ write({
74
+ :hostname => node_name,
75
+ :application => "HBase",
76
+ :templates => self.class::NODE_TEMPLATES
77
+ }) do |d|
78
+ d << ['hbase.regions', (live_node['Region'] || []).size]
79
+ d << ['hbase.heap_size', live_node['heapSizeMB']]
80
+ d << ['hbase.requests', live_node['requests']]
81
+ end
82
+ end
83
+ end
84
+
85
+ end
86
+
87
+ HBaseMonitor.run if $0 == __FILE__
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'zabbix_cluster_monitor'
4
+ require 'open-uri'
5
+ require 'set'
6
+ require 'mongo'
7
+
8
+ class MongoMonitor < ZabbixClusterMonitor
9
+
10
+ # Hostgroup for any hosts that needs to be created.
11
+ HOSTGROUP = 'MongoDB clusters'
12
+
13
+ # Templates for any hosts that need to be created.
14
+ TEMPLATES = 'Template_MongoDB'
15
+
16
+ # Names of database to ignore when we find them.
17
+ IGNORED_DATABASES = %w[db test admin local].to_set
18
+
19
+ def matching_chef_nodes
20
+ Chef::Search::Query.new.search('node', 'provides_service:*-mongodb-server')
21
+ end
22
+
23
+ def measure_cluster cluster_name
24
+ measured_mongo_server = false
25
+ measured_mongo_databases = false
26
+ private_ips_by_cluster[cluster_name].each do |private_ip|
27
+ begin
28
+ connection = Mongo::Connection.new(private_ip)
29
+ rescue Mongo::ConnectionFailure => e
30
+ next
31
+ end
32
+ measured_mongo_server = measure_mongo_server(cluster_name, connection) unless measured_mongo_server
33
+ measured_mongo_databases = measure_mongo_databases(cluster_name, connection) unless measured_mongo_databases
34
+ break if measured_mongo_server && measured_mongo_databases
35
+ end
36
+ end
37
+
38
+ def measure_mongo_server cluster_name, connection
39
+ initial = nil, final = nil
40
+ db = connection.db('system') # the name of this db doesn't matter?
41
+ command = {:serverStatus => true} # the value of the 'serverStatus' key doesn't matter?
42
+
43
+ # gather metrics with a 1.0 second gap
44
+ initial = db.command(command) ; sleep 1.0 ; final = db.command(command)
45
+ return false unless initial && final
46
+ dt = final['localTime'].to_f - initial['localTime'].to_f
47
+
48
+ write({
49
+ :hostname => "#{cluster_name}-mongodb",
50
+ :application => '_cluster',
51
+ :hostgroup => self.class::HOSTGROUP,
52
+ :templates => self.class::TEMPLATES
53
+ }) do |d|
54
+
55
+ # operations
56
+ d << ['inserts', (final['opcounters']['insert'] - initial['opcounters']['insert']) / dt]
57
+ d << ['queries', (final['opcounters']['query'] - initial['opcounters']['query']) / dt]
58
+ d << ['updates', (final['opcounters']['update'] - initial['opcounters']['update']) / dt]
59
+ d << ['deletes', (final['opcounters']['delete'] - initial['opcounters']['delete']) / dt]
60
+ d << ['getmores', (final['opcounters']['getmore'] - initial['opcounters']['getmore']) / dt]
61
+ d << ['commands', (final['opcounters']['command'] - initial['opcounters']['command']) / dt]
62
+
63
+ # memory
64
+ d << ['mem.resident', final['mem']['resident']]
65
+ d << ['mem.virtual', final['mem']['virtual']]
66
+ d << ['mem.mapped', final['mem']['mapped']]
67
+
68
+ # disk
69
+ d << ['flushes', (final['backgroundFlushing']['flushes'] - initial['backgroundFlushing']['flushes']) / dt]
70
+ d << ['flush_time', (final['backgroundFlushing']['total_ms'] - initial['backgroundFlushing']['total_ms']) ]
71
+ d << ['faults', (final['extra_info']['page_faults'] - initial['extra_info']['page_faults']) / dt]
72
+
73
+ # index
74
+ d << ['accesses', (final['indexCounters']['btree']['accesses'] - initial['indexCounters']['btree']['accesses']) / dt]
75
+ d << ['hits', (final['indexCounters']['btree']['hits'] - initial['indexCounters']['btree']['hits']) / dt]
76
+ d << ['misses', (final['indexCounters']['btree']['misses'] - initial['indexCounters']['btree']['misses']) / dt]
77
+ d << ['resets', (final['indexCounters']['btree']['resets'] - initial['indexCounters']['btree']['resets']) / dt]
78
+
79
+ # read/write load
80
+ d << ['queue.total', final['globalLock']['currentQueue']['total']]
81
+ d << ['queue.read', final['globalLock']['currentQueue']['readers']]
82
+ d << ['queue.write', final['globalLock']['currentQueue']['writers']]
83
+ d << ['clients.total', final['globalLock']['activeClients']['total']]
84
+ d << ['clients.read', final['globalLock']['activeClients']['readers']]
85
+ d << ['clients.write', final['globalLock']['activeClients']['writers']]
86
+
87
+ # network
88
+ d << ['net.in', (final['network']['bytesIn'] - initial['network']['bytesIn']) / dt]
89
+ d << ['net.out', (final['network']['bytesOut'] - initial['network']['bytesOut']) / dt]
90
+ d << ['requests', (final['network']['numRequests'] - initial['network']['numRequests']) / dt]
91
+ d << ['connections', final['connections']['current']]
92
+ end
93
+ true
94
+ end
95
+
96
+ def measure_mongo_databases cluster_name, connection
97
+ dbs = connection.database_names
98
+ return true if dbs.size == 0 # nothing to do here
99
+
100
+ dbs.each do |database_name|
101
+ next if self.class::IGNORED_DATABASES.include?(database_name.downcase)
102
+ stats = connection.db(database_name).stats()
103
+
104
+ write({
105
+ :hostname => "#{cluster_name}-mongodb",
106
+ :application => database_name,
107
+ :hostgroup => self.class::HOSTGROUP,
108
+ :templates => self.class::TEMPLATES
109
+ }) do |d|
110
+ d << ["#{database_name}.collections", stats["collections"] ]
111
+ d << ["#{database_name}.objects.count", stats["objects"] ]
112
+ d << ["#{database_name}.objects.avg_size", stats["avgObjSize"] ]
113
+ d << ["#{database_name}.size.data", stats["dataSize"] ]
114
+ d << ["#{database_name}.size.disk", stats["storageSize"] ]
115
+ d << ["#{database_name}.size.indexes", stats["indexSize"] ]
116
+ d << ["#{database_name}.size.file", stats["fileSize"] ]
117
+ d << ["#{database_name}.extents", stats["numExtents"] ]
118
+ d << ["#{database_name}.indexes", stats["indexes"] ]
119
+ end
120
+ end
121
+ true
122
+ end
123
+ end
124
+
125
+ MongoMonitor.run if $0 == __FILE__