librato-storm-kafka 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ *.gem
2
+ .bundle
3
+ pkg/*
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in kafka-storm-monitor.gemspec
4
+ gemspec
@@ -0,0 +1,43 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ librato-storm-kafka (0.0.1)
5
+ faraday_middleware
6
+ kafka-rb
7
+ librato-metrics
8
+ multi_xml
9
+ trollop
10
+ yajl-ruby
11
+ zookeeper
12
+
13
+ GEM
14
+ remote: http://rubygems.org/
15
+ specs:
16
+ aggregate (0.2.2)
17
+ faraday (0.8.4)
18
+ multipart-post (~> 1.1)
19
+ faraday_middleware (0.9.0)
20
+ faraday (>= 0.7.4, < 0.9)
21
+ kafka-rb (0.0.11)
22
+ librato-metrics (0.7.4)
23
+ aggregate (~> 0.2.2)
24
+ faraday (~> 0.7)
25
+ multi_json
26
+ little-plugger (1.1.3)
27
+ logging (1.7.2)
28
+ little-plugger (>= 1.1.3)
29
+ multi_json (1.3.7)
30
+ multi_xml (0.5.1)
31
+ multipart-post (1.1.5)
32
+ rake (10.0.2)
33
+ trollop (2.0)
34
+ yajl-ruby (1.1.0)
35
+ zookeeper (1.3.0)
36
+ logging (~> 1.7.2)
37
+
38
+ PLATFORMS
39
+ ruby
40
+
41
+ DEPENDENCIES
42
+ librato-storm-kafka!
43
+ rake
@@ -0,0 +1,72 @@
1
+ # librato-storm-kafka
2
+
3
+ Automatically detects Kafka spouts from a Storm topology from
4
+ ZooKeeper, determines the Spout consumer offsets, connects to the
5
+ Kafka hosts/partitions and reads current JMX/offset stats, and
6
+ publishes topic metrics to [Librato
7
+ Metrics](https://metrics.librato.com). The metrics published from this
8
+ gem allow the user to graph consumer lag.
9
+
10
+ Intended to be run periodically, for example, from cron.
11
+
12
+ ### Install
13
+
14
+ ```
15
+ $ gem install librato-storm-kafka
16
+ ```
17
+
18
+ ### Requirements
19
+
20
+ * Storm 0.8.1+ topology
21
+ * storm-contrib dynamic-brokers branch
22
+ * Kafka 0.7.1+ with MX4J
23
+
24
+ Firewall requirements:
25
+
26
+ * Access to ZK servers
27
+ * Access to MX4J server on Kafka
28
+ * Access to Kafka on regular port (9092)
29
+
30
+ ### Run
31
+
32
+ Run with minimal options:
33
+
34
+ ```
35
+ $ librato-storm-kafka --email <librato email>
36
+ --token <librato token>
37
+ --zk-servers <comma sperated list of ZK servers>
38
+ --zk-prefix <Storm prefix in ZK>
39
+ ```
40
+
41
+ ### Options
42
+
43
+ * `email`: Email associated with your Librato Metrics account.
44
+ * `token`: API token for your Librato Metrics accounts.
45
+ * `prefix`: Prefix to use for metric names. Defaults to
46
+ **kafkastorm.**.
47
+ * `mx4j-port`: Port to connect to Kafka's MX4J port on (default 8082)
48
+ * `zk-servers`: Comma-separated list of ZK Servers.
49
+ * `zk-port`: ZK port (default 2181)
50
+ * `zk-prefix`: Prefix of Storm spout configs in ZK (What you pass to `SpoutConfig`)
51
+ * `floor-in-secs`: By default all measurements are posted with
52
+ the current time as the measure_time. This option,
53
+ specified as a value in seconds, will floor the
54
+ time by this value. For example, 300 would floor
55
+ all measure times to the 5 minute mark.
56
+
57
+
58
+ ## Contributing to librato-storm-kafka
59
+
60
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
61
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
62
+ * Fork the project.
63
+ * Start a feature/bugfix branch.
64
+ * Commit and push until you are happy with your contribution.
65
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
66
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
67
+
68
+ ## Copyright
69
+
70
+ Copyright (c) 2012 Mike Heffner. See LICENSE.txt for
71
+ further details.
72
+
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,247 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'trollop'
4
+ require 'zookeeper'
5
+ require 'yajl/json_gem'
6
+ require 'faraday_middleware'
7
+ require 'kafka'
8
+
9
+ require 'librato/metrics'
10
+
11
+ $:.unshift File.join(File.dirname(__FILE__), '../lib')
12
+
13
+ require 'librato-storm-kafka'
14
+
15
+ parser = Trollop::Parser.new do
16
+ version "librato-storm-kafka version %s" %
17
+ [Librato::Storm::Kafka::VERSION]
18
+
19
+ opt :api, "Change the API endpoint", {
20
+ :type => :string, :default => "metrics-api.librato.com"
21
+ }
22
+
23
+ opt :email, "Librato Metrics Email", {
24
+ :type => :string, :default => ENV['LIBRATO_METRICS_EMAIL']
25
+ }
26
+
27
+ opt :token, "Librato Metrics API Token", {
28
+ :type => :string, :default => ENV['LIBRATO_METRICS_TOKEN']
29
+ }
30
+
31
+ opt :prefix, "Metric name prefix", {
32
+ :type => :string, :default => "kafkastorm."
33
+ }
34
+
35
+ opt :floor_in_secs, "floor() measure times to this interval", {
36
+ :type => :int
37
+ }
38
+
39
+ opt :mx4j_port, "Port to connect to MX4J on", {
40
+ :default => 8082
41
+ }
42
+
43
+ opt :zk_servers, "ZooKeeper servers (comma separated)", {
44
+ :type => :string
45
+ }
46
+
47
+ opt :zk_port, "Port to connect to ZK on", {
48
+ :default => 2181
49
+ }
50
+
51
+ opt :zk_prefix, "Prefix path in ZK to find consumers", {
52
+ :default => "/kafkastorm"
53
+ }
54
+ end
55
+
56
+ opts = Trollop::with_standard_exception_handling(parser) do
57
+ if ARGV.length == 0
58
+ raise Trollop::HelpNeeded
59
+ end
60
+
61
+ opts = parser.parse ARGV
62
+ %w{ email token zk_servers }.each do |f|
63
+ unless opts[f.to_sym]
64
+ $stderr.puts "Error: Must specify option --%s." % [f]
65
+ puts
66
+ raise Trollop::HelpNeeded
67
+ end
68
+ end
69
+ opts
70
+ end
71
+
72
+ $mt = Time.now.tv_sec
73
+ if opts[:floor_in_secs]
74
+ $mt = ($mt / opts[:floor_in_secs]) * opts[:floor_in_secs]
75
+ end
76
+
77
+ endpoint = "https://#{opts[:api]}"
78
+ $client = Librato::Metrics::Client.new
79
+ $client.api_endpoint = endpoint
80
+ $client.authenticate opts[:email], opts[:token]
81
+
82
+ zk_connect_str = opts[:zk_servers].split(",").
83
+ map {|s| "#{s}:#{opts[:zk_port]}"}.join(",")
84
+
85
+ $z = Zookeeper.new(zk_connect_str)
86
+
87
+ def submit(q)
88
+ begin
89
+ q.submit
90
+ rescue => err
91
+ $stderr.puts "Failed to submit stats to Librato Metrics: %s" %
92
+ [err.message]
93
+ exit 1
94
+ end
95
+ end
96
+
97
+ def with_children(path, &blk)
98
+ kids = $z.get_children(:path => path)
99
+ if kids[:rc] != 0
100
+ $stderr.puts "Unable to list children at path: #{path}"
101
+ exit 1
102
+ end
103
+ kids[:children].each {|k| yield(k, "#{path}/#{k}") }
104
+ end
105
+
106
+ def http(host)
107
+ Faraday.new("http://#{host}:8082") do |conn|
108
+ conn.response :xml
109
+
110
+ conn.adapter Faraday.default_adapter
111
+ end
112
+ end
113
+
114
+ def bean_lookup(host, beanname)
115
+ begin
116
+ http(host).get("/mbean") do |req|
117
+ req.params = { :objectname => beanname, :template => 'identity' }
118
+ req.options[:timeout] = 5
119
+ req.options[:open_timeout] = 3
120
+ end
121
+ rescue Faraday::Error::TimeoutError
122
+ raise Errno::ETIMEDOUT
123
+ rescue Faraday::Error::ConnectionFailed
124
+ raise Errno::EHOSTUNREACH
125
+ end
126
+ end
127
+
128
+ def partition_bean_lookup(host, partname, partidx)
129
+ bean_lookup(host, "kafka:type=kafka.logs.#{partname}-#{partidx}")
130
+ end
131
+
132
+ def jmxinfo_to_attrs(jmxinfo)
133
+ attrs = {}
134
+ jmxinfo.body['MBean']['Attribute'].each do |attr|
135
+ next if attr['isnull'] != 'false'
136
+ if ['long', 'int'].include?(attr['type'])
137
+ attrs[attr['name']] = Integer(attr['value'])
138
+ elsif attr['type'] =~ /CompositeData/
139
+ # XXX: Hack out the contents of the composite type. This should
140
+ # really be expanded in the XML
141
+ fields = attr['value'].gsub(/^.*,contents={/, '').gsub(/}\)$/, '')
142
+ .gsub(/[=,]/, '@').split("@")
143
+ attrs[attr['name']] = Hash[*fields.map {|u| u.strip}]
144
+ else
145
+ attrs[attr['name']] = attr['value']
146
+ end
147
+ end
148
+ attrs
149
+ end
150
+
151
+ def monitor_partition(submit_queue, hosts, opts, part)
152
+ jmxinfo = partition_bean_lookup(part['broker']['host'],
153
+ part['topic'], part['partition'])
154
+
155
+ hosts[part['broker']['host']] = 1
156
+
157
+ attrs = jmxinfo_to_attrs(jmxinfo)
158
+
159
+ #puts "part: #{part.inspect}"
160
+ #puts "attrs: #{attrs.inspect}"
161
+
162
+ consumer = Kafka::Consumer.new({ :host => part['broker']['host'],
163
+ :topic => part['topic'],
164
+ :partition => part['partition']})
165
+ endoffset = consumer.fetch_latest_offset
166
+ #puts "%s:%s:%d: latest offset: %d" % [part['broker']['host'], part['topic'], part['partition'], endoffset]
167
+
168
+ q = $client.new_queue({ :prefix => "#{opts[:prefix]}topics.#{part['topic']}",
169
+ :source => "#{part['broker']['host']}_#{part['partition']}"})
170
+
171
+ q.add :bytes_vol => {:value => endoffset, :type => :counter}
172
+ #endsize = [attrs['CurrentOffset'], attrs['Size']].max
173
+ diff = endoffset > part['offset'] ? endoffset - part['offset'] : 0
174
+ q.add :pending_bytes => {:value => diff}
175
+
176
+
177
+ if attrs['NumAppendedMessages'] > 0
178
+ bytes_per_msg = Float(endoffset) / Float(attrs['NumAppendedMessages'])
179
+ q.add :bytes_per_msg => {:value => bytes_per_msg}
180
+ q.add :pending_msgs => {:value => Float(diff) / bytes_per_msg} if bytes_per_msg > 0.0
181
+ q.add :msg_vol => {:value => attrs['NumAppendedMessages'], :type => :counter}
182
+ end
183
+
184
+ submit_queue.merge!(q)
185
+ end
186
+
187
+ partitions = 0
188
+ hosts = {}
189
+ submit_queue = $client.new_queue(:measure_time => $mt)
190
+ with_children(opts[:zk_prefix]) do |spout, fullsp|
191
+ with_children(fullsp) do |part, fullpart|
192
+ next if part == 'user'
193
+
194
+ partinfo = $z.get(:path => fullpart)
195
+ if partinfo[:rc] != 0
196
+ $stderr.puts "Failed to lookup partition: #{fullpart}"
197
+ exit 1
198
+ end
199
+
200
+ body = JSON.parse(partinfo[:data])
201
+ begin
202
+ monitor_partition(submit_queue, hosts, opts, body)
203
+ partitions += 1
204
+ rescue Errno::ETIMEDOUT, Errno::EHOSTUNREACH
205
+ end
206
+ end
207
+ end
208
+
209
+ submit_queue.add "#{opts[:prefix]}active_partitions" => {:value => partitions}
210
+
211
+ # Now check host-level options
212
+ active_hosts = 0
213
+ hosts.each_pair do |host, v|
214
+ q = $client.new_queue({ :prefix => "#{opts[:prefix]}hosts",
215
+ :source => "#{host}"})
216
+ begin
217
+ jmxinfo = bean_lookup(host, "kafka:type=kafka.BrokerAllTopicStat")
218
+ attrs = jmxinfo_to_attrs(jmxinfo)
219
+ q.add :bytes_in => {:value => attrs['BytesIn'], :type => :counter}
220
+ q.add :bytes_out => {:value => attrs['BytesOut'], :type => :counter}
221
+ q.add :failed_fetch_request => {:value => attrs['FailedFetchRequest'], :type => :counter}
222
+ q.add :failed_produce_request => {:value => attrs['FailedProduceRequest'], :type => :counter}
223
+ q.add :messages_in => {:value => attrs['MessagesIn'], :type => :counter}
224
+
225
+ jmxinfo = bean_lookup(host, "java.lang:type=Memory")
226
+ attrs = jmxinfo_to_attrs(jmxinfo)
227
+ attrs.each_pair do |name, values|
228
+ next unless name =~ /HeapMemory/
229
+ values.each_pair do |attrn, attrv|
230
+ q.add "memory.#{name.downcase}.#{attrn.downcase}" => {:value => Integer(attrv)}
231
+ end
232
+ end
233
+
234
+ active_hosts += 1
235
+ rescue Errno::ETIMEDOUT, Errno::EHOSTUNREACH
236
+ next
237
+ end
238
+ submit_queue.merge!(q)
239
+ end
240
+
241
+ submit_queue.add "#{opts[:prefix]}active_hosts" => {:value => active_hosts}
242
+
243
+ submit_queue.submit
244
+
245
+ # Local Variables:
246
+ # mode: ruby
247
+ # End:
@@ -0,0 +1,8 @@
1
+ require "librato-storm-kafka/version"
2
+
3
+ module Librato
4
+ module Storm
5
+ module Kafka
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,7 @@
1
+ module Librato
2
+ module Storm
3
+ module Kafka
4
+ VERSION = "0.0.1"
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,34 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "librato-storm-kafka/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "librato-storm-kafka"
7
+ s.version = Librato::Storm::Kafka::VERSION
8
+ s.authors = ["Mike Heffner"]
9
+ s.email = ["mike@librato.com"]
10
+ s.homepage = ""
11
+ s.summary = %q{Monitor storm-kafka topics}
12
+ s.description = %q{Collects metrics from a storm-kafka setup and publishes them to Librato}
13
+
14
+ s.rubyforge_project = "librato-storm-kafka"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
19
+ s.require_paths = ["lib"]
20
+
21
+ s.add_runtime_dependency "zookeeper"
22
+ s.add_runtime_dependency "librato-metrics"
23
+ s.add_runtime_dependency "trollop"
24
+ s.add_runtime_dependency "yajl-ruby"
25
+ s.add_runtime_dependency "faraday_middleware"
26
+ s.add_runtime_dependency "multi_xml"
27
+ s.add_runtime_dependency "kafka-rb"
28
+
29
+ s.add_development_dependency "rake"
30
+
31
+ # specify any dependencies here; for example:
32
+ # s.add_development_dependency "rspec"
33
+ # s.add_runtime_dependency "rest-client"
34
+ end
metadata ADDED
@@ -0,0 +1,189 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: librato-storm-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Mike Heffner
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-11-22 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: zookeeper
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: librato-metrics
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: trollop
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: yajl-ruby
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: faraday_middleware
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: multi_xml
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :runtime
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: kafka-rb
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: rake
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ description: Collects metrics from a storm-kafka setup and publishes them to Librato
143
+ email:
144
+ - mike@librato.com
145
+ executables:
146
+ - librato-storm-kafka
147
+ extensions: []
148
+ extra_rdoc_files: []
149
+ files:
150
+ - .gitignore
151
+ - Gemfile
152
+ - Gemfile.lock
153
+ - README.md
154
+ - Rakefile
155
+ - bin/librato-storm-kafka
156
+ - lib/librato-storm-kafka.rb
157
+ - lib/librato-storm-kafka/version.rb
158
+ - librato-storm-kafka.gemspec
159
+ homepage: ''
160
+ licenses: []
161
+ post_install_message:
162
+ rdoc_options: []
163
+ require_paths:
164
+ - lib
165
+ required_ruby_version: !ruby/object:Gem::Requirement
166
+ none: false
167
+ requirements:
168
+ - - ! '>='
169
+ - !ruby/object:Gem::Version
170
+ version: '0'
171
+ segments:
172
+ - 0
173
+ hash: -917739231179684100
174
+ required_rubygems_version: !ruby/object:Gem::Requirement
175
+ none: false
176
+ requirements:
177
+ - - ! '>='
178
+ - !ruby/object:Gem::Version
179
+ version: '0'
180
+ segments:
181
+ - 0
182
+ hash: -917739231179684100
183
+ requirements: []
184
+ rubyforge_project: librato-storm-kafka
185
+ rubygems_version: 1.8.24
186
+ signing_key:
187
+ specification_version: 3
188
+ summary: Monitor storm-kafka topics
189
+ test_files: []