rubix 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,111 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'json'
4
+
5
+ require 'rubix/log'
6
+
7
+ module Rubix
8
+
9
+ class Connection
10
+
11
+ include Logs
12
+
13
+ attr_reader :uri, :server, :auth, :request_id, :username, :password
14
+
15
+ def initialize uri_or_string, username=nil, password=nil
16
+ self.uri = uri_or_string
17
+ @username = username || uri.user
18
+ @password = password || uri.password
19
+ @request_id = 0
20
+ end
21
+
22
+ def uri= uri_or_string
23
+ if uri_or_string.respond_to?(:host)
24
+ @uri = uri_or_string
25
+ else
26
+ string = uri_or_string =~ /^http/ ? uri_or_string : 'http://' + uri_or_string.to_s
27
+ @uri = URI.parse(string)
28
+ end
29
+ @server = Net::HTTP.new(uri.host, uri.port)
30
+ end
31
+
32
+ def authorized?
33
+ !auth.nil?
34
+ end
35
+
36
+ def authorization_params
37
+ {
38
+ :jsonrpc => "2.0",
39
+ :id => request_id,
40
+ :method => "user.login",
41
+ :params => {
42
+ :user => username,
43
+ :password => password
44
+ }
45
+ }
46
+ end
47
+
48
+ def authorize!
49
+ response = till_response { send_raw_request(authorization_params) }
50
+ raise AuthenticationError.new("Could not authenticate with Zabbix API at #{uri}: #{response.error_message}") if response.error?
51
+ raise AuthenticationError.new("Malformed response from Zabbix API: #{response.body}") unless response.string?
52
+ @auth = response.result
53
+ end
54
+
55
+ def till_response attempt=1, max_attempts=5, &block
56
+ response = block.call
57
+ Rubix.logger.log(Logger::DEBUG, response.body, 'RECV') if Rubix.logger
58
+ case
59
+ when response.code.to_i >= 500 && attempt <= max_attempts
60
+ sleep 1 # FIXME make the sleep time configurable...
61
+ till_response(attempt + 1, max_attempts, &block)
62
+ when response.code.to_i >= 500
63
+ raise ConnectionError.new("Too many consecutive failed requests (#{max_attempts}) to the Zabbix API at (#{uri}).")
64
+ else
65
+ Response.new(response)
66
+ end
67
+ end
68
+
69
+ def request method, params
70
+ authorize! unless authorized?
71
+ till_response do
72
+ raw_params = {
73
+ :jsonrpc => "2.0",
74
+ :id => request_id,
75
+ :method => method,
76
+ :params => params,
77
+ :auth => auth
78
+ }
79
+ send_raw_request(raw_params)
80
+ end
81
+ end
82
+
83
+ def raw_post_request raw_params
84
+ json_body = raw_params.to_json
85
+ Rubix.logger.log(Logger::DEBUG, json_body, 'SEND') if Rubix.logger
86
+ Net::HTTP::Post.new(uri.path).tap do |req|
87
+ req['Content-Type'] = 'application/json-rpc'
88
+ req.body = json_body
89
+ end
90
+ end
91
+
92
+ def host_with_port
93
+ if uri.port.nil? || uri.port.to_i == 80
94
+ uri.host
95
+ else
96
+ "#{uri.host}:#{uri.port}"
97
+ end
98
+ end
99
+
100
+ def send_raw_request raw_params
101
+ @request_id += 1
102
+ begin
103
+ raw_response = server.request(raw_post_request(raw_params))
104
+ rescue NoMethodError, SocketError => e
105
+ raise RequestError.new("Could not connect to Zabbix server at #{host_with_port}")
106
+ end
107
+ raw_response
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,130 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ RUBIX_ROOT = File.expand_path('../../../../lib', __FILE__)
4
+ $: << RUBIX_ROOT unless $:.include?(RUBIX_ROOT)
5
+
6
+ require 'rubix'
7
+ require 'open-uri'
8
+
9
+ class ESMonitor < Rubix::ClusterMonitor
10
+
11
+ # Hostgroup for any hosts that needs to be created.
12
+ HOSTGROUP = 'Elasticsearch clusters'
13
+
14
+ # Templates for any hosts that need to be created.
15
+ CLUSTER_TEMPLATES = 'Template_Elasticsearch_Cluster'
16
+ NODE_TEMPLATES = 'Template_Elasticsearch_Node'
17
+
18
+ def node_query
19
+ 'provides_service:*-elasticsearch'
20
+ end
21
+
22
+ def es_url private_ip, *args
23
+ "http://" + File.join(private_ip + ":9200", *args)
24
+ end
25
+
26
+ def measure_cluster cluster_name
27
+ measured_cluster_health = false
28
+ measured_cluster_indices = false
29
+ measured_cluster_nodes = false
30
+ private_ips_by_cluster[cluster_name].each do |private_ip|
31
+ measured_cluster_health = measure_cluster_health(cluster_name, private_ip) unless measured_cluster_health
32
+ measured_cluster_indices = measure_cluster_indices(cluster_name, private_ip) unless measured_cluster_indices
33
+ measured_cluster_nodes = measure_cluster_nodes(cluster_name, private_ip) unless measured_cluster_nodes
34
+ break if measured_cluster_health && measured_cluster_indices && measured_cluster_nodes
35
+ end
36
+ end
37
+
38
+ # Measure the cluster health metrics -- /_cluster/health
39
+ def measure_cluster_health cluster_name, private_ip
40
+ begin
41
+ cluster_health = JSON.parse(open(es_url(private_ip, '_cluster', 'health')).read)
42
+ rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
43
+ # This node may not be running a webnode...
44
+ return false
45
+ end
46
+ write({
47
+ :hostname => "#{cluster_name}-elasticsearch",
48
+ :application => "_cluster",
49
+ :templates => self.class::CLUSTER_TEMPLATES,
50
+ :hostgroup => self.class::HOSTGROUP
51
+ }) do |d|
52
+ d << ['status', cluster_health['status'] ]
53
+ d << ['nodes.total', cluster_health['number_of_nodes'] ]
54
+ d << ['nodes.data', cluster_health['number_of_data_nodes'] ]
55
+ d << ['shards.active', cluster_health['active_shards'] ]
56
+ d << ['shards.relocating', cluster_health['relocating_shards'] ]
57
+ d << ['shards.unassigned', cluster_health['unassigned_shards'] ]
58
+ d << ['shards.initializing', cluster_health['initializing_shards'] ]
59
+ end
60
+ true
61
+ end
62
+
63
+ def measure_cluster_indices cluster_name, private_ip
64
+ begin
65
+ index_data = JSON.parse(open(es_url(private_ip, '_status')).read)
66
+ rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
67
+ # This node may not be running a webnode...
68
+ return false
69
+ end
70
+ index_data['indices'].each_pair do |index_name, index_data|
71
+ write({
72
+ :hostname => "#{cluster_name}-elasticsearch",
73
+ :appliation => index_name,
74
+ :templates => self.class::CLUSTER_TEMPLATES,
75
+ :hostgroup => self.class::HOSTGROUP
76
+ }) do |d|
77
+ d << ["#{index_name}.size", index_data["index"]["size_in_bytes"] ]
78
+ d << ["#{index_name}.docs.num", index_data["docs"]["num_docs"] ]
79
+ d << ["#{index_name}.docs.max", index_data["docs"]["max_doc"] ]
80
+ d << ["#{index_name}.docs.deleted", index_data["docs"]["deleted_docs"] ]
81
+ d << ["#{index_name}.operations", index_data["translog"]["operations"] ]
82
+ d << ["#{index_name}.merges.total", index_data["merges"]["total"] ]
83
+ d << ["#{index_name}.merges.current", index_data["merges"]["current"] ]
84
+ end
85
+ end
86
+ true
87
+ end
88
+
89
+ def measure_cluster_nodes cluster_name, private_ip
90
+ begin
91
+ nodes_data = JSON.parse(open(es_url(private_ip, '_cluster', 'nodes')).read)
92
+ nodes_stats_data = JSON.parse(open(es_url(private_ip, '_cluster', 'nodes', 'stats')).read)
93
+ rescue SocketError, OpenURI::HTTPError, JSON::ParserError, Errno::ECONNREFUSED => e
94
+ # This node may not be running a webnode...
95
+ return false
96
+ end
97
+
98
+ nodes_stats_data['nodes'].each_pair do |id, stats|
99
+
100
+ ip = nodes_data['nodes'][id]['network']['primary_interface']['address']
101
+ node_name = chef_node_name_from_ip(ip)
102
+ next unless node_name
103
+ write({
104
+ :hostname => node_name,
105
+ :application => "Elasticsearch",
106
+ :templates => self.class::NODE_TEMPLATES
107
+ }) do |d|
108
+ # concurrency
109
+ d << ['es.jvm.threads.count', stats['jvm']['threads']['count'] ]
110
+
111
+ # garbage collection
112
+ d << ['es.jvm.gc.coll_time', stats['jvm']['gc']['collection_time_in_millis'] ]
113
+ d << ['es.jvm.gc.coll_count', stats['jvm']['gc']['collection_count'] ]
114
+
115
+ # memory
116
+ d << ['es.jvm.mem.heap_used', stats['jvm']['mem']['heap_used_in_bytes'] ]
117
+ d << ['es.jvm.mem.non_heap_used', stats['jvm']['mem']['non_heap_used_in_bytes'] ]
118
+ d << ['es.jvm.mem.heap_comm', stats['jvm']['mem']['heap_committed_in_bytes'] ]
119
+ d << ['es.jvm.mem.non_heap_comm', stats['jvm']['mem']['non_heap_committed_in_bytes'] ]
120
+
121
+ # indices
122
+ d << ['es.indices.size', stats['indices']['size_in_bytes'] ]
123
+ end
124
+ end
125
+ true
126
+ end
127
+
128
+ end
129
+
130
+ ESMonitor.run if $0 == __FILE__
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'zabbix_cluster_monitor'
4
+ require 'net/http'
5
+ require 'crack'
6
+
7
+ class HBaseMonitor < ZabbixClusterMonitor
8
+
9
+ # Hostgroups for clusters & hosts that need to be created.
10
+ CLUSTER_HOSTGROUP = 'HBase clusters'
11
+
12
+ # Templates for any hosts that need to be created.
13
+ CLUSTER_TEMPLATES = 'Template_HBase_Cluster'
14
+ NODE_TEMPLATES = 'Template_HBase_Node'
15
+
16
+ def matching_chef_nodes
17
+ Chef::Search::Query.new.search('node', 'provides_service:*hbase-stargate AND facet_name:alpha')
18
+ end
19
+
20
+ def measure_cluster cluster_name
21
+ measured_cluster_status = false
22
+ private_ips_by_cluster[cluster_name].each do |private_ip|
23
+ measured_cluster_status = measure_cluster_status(cluster_name, private_ip) unless measured_cluster_status
24
+ break if measured_cluster_status
25
+ end
26
+ end
27
+
28
+ # Measure the cluster health metrics -- /status/cluster
29
+ def measure_cluster_status cluster_name, private_ip
30
+ begin
31
+ connection = Net::HTTP.new(private_ip, 8080) # FIXME port
32
+ request = Net::HTTP::Get.new('/status/cluster', 'Accept' => 'text/xml')
33
+ response = connection.request(request)
34
+ return false unless response.code.to_i == 200
35
+
36
+ data = Crack::XML.parse(response.body)
37
+ cluster_status = data['ClusterStatus']
38
+ dead_nodes = cluster_status['DeadNodes'] ? cluster_status['DeadNodes']['Node'] : []
39
+ live_nodes = cluster_status['LiveNodes']['Node']
40
+ rescue NoMethodError, SocketError, REXML::ParseException, Errno::ECONNREFUSED => e
41
+ # puts "#{e.class} -- #{e.message}"
42
+ # puts e.backtrace
43
+ return false
44
+ end
45
+
46
+ write({
47
+ :hostname => "#{cluster_name}-hbase",
48
+ :application => '_cluster',
49
+ :hostgroup => 'HBase Clusters',
50
+ :templates => self.class::CLUSTER_TEMPLATES
51
+ }) do |d|
52
+ d << ['requests', cluster_status['requests']]
53
+ d << ['regions', cluster_status['regions']]
54
+ d << ['load', cluster_status['averageLoad']]
55
+ d << ['nodes.dead', dead_nodes.size]
56
+ d << ['nodes.alive', live_nodes.size]
57
+ end
58
+ measure_cluster_tables(cluster_name, data)
59
+ measure_cluster_nodes(cluster_name, live_nodes)
60
+ true
61
+ end
62
+
63
+ def measure_cluster_tables cluster_name, data
64
+ # FIXME...not sure how best to get information about "tables" in HBase...
65
+ end
66
+
67
+ def measure_cluster_nodes cluster_name, live_nodes
68
+ live_nodes.each do |live_node|
69
+ next unless live_node
70
+ ip = (live_node['name'] || '').split(':').first
71
+ node_name = chef_node_name_from_ip(ip)
72
+ next unless node_name
73
+ write({
74
+ :hostname => node_name,
75
+ :application => "HBase",
76
+ :templates => self.class::NODE_TEMPLATES
77
+ }) do |d|
78
+ d << ['hbase.regions', (live_node['Region'] || []).size]
79
+ d << ['hbase.heap_size', live_node['heapSizeMB']]
80
+ d << ['hbase.requests', live_node['requests']]
81
+ end
82
+ end
83
+ end
84
+
85
+ end
86
+
87
+ HBaseMonitor.run if $0 == __FILE__
@@ -0,0 +1,125 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'zabbix_cluster_monitor'
4
+ require 'open-uri'
5
+ require 'set'
6
+ require 'mongo'
7
+
8
+ class MongoMonitor < ZabbixClusterMonitor
9
+
10
+ # Hostgroup for any hosts that needs to be created.
11
+ HOSTGROUP = 'MongoDB clusters'
12
+
13
+ # Templates for any hosts that need to be created.
14
+ TEMPLATES = 'Template_MongoDB'
15
+
16
+ # Names of database to ignore when we find them.
17
+ IGNORED_DATABASES = %w[db test admin local].to_set
18
+
19
+ def matching_chef_nodes
20
+ Chef::Search::Query.new.search('node', 'provides_service:*-mongodb-server')
21
+ end
22
+
23
+ def measure_cluster cluster_name
24
+ measured_mongo_server = false
25
+ measured_mongo_databases = false
26
+ private_ips_by_cluster[cluster_name].each do |private_ip|
27
+ begin
28
+ connection = Mongo::Connection.new(private_ip)
29
+ rescue Mongo::ConnectionFailure => e
30
+ next
31
+ end
32
+ measured_mongo_server = measure_mongo_server(cluster_name, connection) unless measured_mongo_server
33
+ measured_mongo_databases = measure_mongo_databases(cluster_name, connection) unless measured_mongo_databases
34
+ break if measured_mongo_server && measured_mongo_databases
35
+ end
36
+ end
37
+
38
+ def measure_mongo_server cluster_name, connection
39
+ initial = nil, final = nil
40
+ db = connection.db('system') # the name of this db doesn't matter?
41
+ command = {:serverStatus => true} # the value of the 'serverStatus' key doesn't matter?
42
+
43
+ # gather metrics with a 1.0 second gap
44
+ initial = db.command(command) ; sleep 1.0 ; final = db.command(command)
45
+ return false unless initial && final
46
+ dt = final['localTime'].to_f - initial['localTime'].to_f
47
+
48
+ write({
49
+ :hostname => "#{cluster_name}-mongodb",
50
+ :application => '_cluster',
51
+ :hostgroup => self.class::HOSTGROUP,
52
+ :templates => self.class::TEMPLATES
53
+ }) do |d|
54
+
55
+ # operations
56
+ d << ['inserts', (final['opcounters']['insert'] - initial['opcounters']['insert']) / dt]
57
+ d << ['queries', (final['opcounters']['query'] - initial['opcounters']['query']) / dt]
58
+ d << ['updates', (final['opcounters']['update'] - initial['opcounters']['update']) / dt]
59
+ d << ['deletes', (final['opcounters']['delete'] - initial['opcounters']['delete']) / dt]
60
+ d << ['getmores', (final['opcounters']['getmore'] - initial['opcounters']['getmore']) / dt]
61
+ d << ['commands', (final['opcounters']['command'] - initial['opcounters']['command']) / dt]
62
+
63
+ # memory
64
+ d << ['mem.resident', final['mem']['resident']]
65
+ d << ['mem.virtual', final['mem']['virtual']]
66
+ d << ['mem.mapped', final['mem']['mapped']]
67
+
68
+ # disk
69
+ d << ['flushes', (final['backgroundFlushing']['flushes'] - initial['backgroundFlushing']['flushes']) / dt]
70
+ d << ['flush_time', (final['backgroundFlushing']['total_ms'] - initial['backgroundFlushing']['total_ms']) ]
71
+ d << ['faults', (final['extra_info']['page_faults'] - initial['extra_info']['page_faults']) / dt]
72
+
73
+ # index
74
+ d << ['accesses', (final['indexCounters']['btree']['accesses'] - initial['indexCounters']['btree']['accesses']) / dt]
75
+ d << ['hits', (final['indexCounters']['btree']['hits'] - initial['indexCounters']['btree']['hits']) / dt]
76
+ d << ['misses', (final['indexCounters']['btree']['misses'] - initial['indexCounters']['btree']['misses']) / dt]
77
+ d << ['resets', (final['indexCounters']['btree']['resets'] - initial['indexCounters']['btree']['resets']) / dt]
78
+
79
+ # read/write load
80
+ d << ['queue.total', final['globalLock']['currentQueue']['total']]
81
+ d << ['queue.read', final['globalLock']['currentQueue']['readers']]
82
+ d << ['queue.write', final['globalLock']['currentQueue']['writers']]
83
+ d << ['clients.total', final['globalLock']['activeClients']['total']]
84
+ d << ['clients.read', final['globalLock']['activeClients']['readers']]
85
+ d << ['clients.write', final['globalLock']['activeClients']['writers']]
86
+
87
+ # network
88
+ d << ['net.in', (final['network']['bytesIn'] - initial['network']['bytesIn']) / dt]
89
+ d << ['net.out', (final['network']['bytesOut'] - initial['network']['bytesOut']) / dt]
90
+ d << ['requests', (final['network']['numRequests'] - initial['network']['numRequests']) / dt]
91
+ d << ['connections', final['connections']['current']]
92
+ end
93
+ true
94
+ end
95
+
96
+ def measure_mongo_databases cluster_name, connection
97
+ dbs = connection.database_names
98
+ return true if dbs.size == 0 # nothing to do here
99
+
100
+ dbs.each do |database_name|
101
+ next if self.class::IGNORED_DATABASES.include?(database_name.downcase)
102
+ stats = connection.db(database_name).stats()
103
+
104
+ write({
105
+ :hostname => "#{cluster_name}-mongodb",
106
+ :application => database_name,
107
+ :hostgroup => self.class::HOSTGROUP,
108
+ :templates => self.class::TEMPLATES
109
+ }) do |d|
110
+ d << ["#{database_name}.collections", stats["collections"] ]
111
+ d << ["#{database_name}.objects.count", stats["objects"] ]
112
+ d << ["#{database_name}.objects.avg_size", stats["avgObjSize"] ]
113
+ d << ["#{database_name}.size.data", stats["dataSize"] ]
114
+ d << ["#{database_name}.size.disk", stats["storageSize"] ]
115
+ d << ["#{database_name}.size.indexes", stats["indexSize"] ]
116
+ d << ["#{database_name}.size.file", stats["fileSize"] ]
117
+ d << ["#{database_name}.extents", stats["numExtents"] ]
118
+ d << ["#{database_name}.indexes", stats["indexes"] ]
119
+ end
120
+ end
121
+ true
122
+ end
123
+ end
124
+
125
+ MongoMonitor.run if $0 == __FILE__