kafka-consumer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ad31d35f34d283a73bc8865fc90e1748ecc48ffd
4
+ data.tar.gz: 58fe93f719be96c3276604973b12887f42eb00f5
5
+ SHA512:
6
+ metadata.gz: 1ac246b560fe2f8ed24f71d9f32a0323f7d79dbbd1643a52e17fb2e524c027166f87dc12ad136ddcee0343c012fe27932a82ed26513a7184bb030a8fc5d4a64b
7
+ data.tar.gz: 415dc58012fc48dba46a9d5ba34f6ea955fb78a194e3666c9d78f6e2767fdac76fd5decff989d7b4b52b1e31e054b3cca5b49abe337138ed9230ce3f82a92bdc
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
4
+ gem 'snappy'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Willem van Bergen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # Kafka::Consumer
2
+
3
+ High-level Kafka consumer for Ruby. Uses Zookeeper to manage load balancing, failover, and offset management.
4
+
5
+ A consumer group consists of multiple instances of the same consumer. Every instance registers itself in
6
+ Zookeeper. Based on the number of instances that are registered, an instance will start consuming some or
7
+ all of the partitions of the topics it wants to consume.
8
+
9
+ The distribution algorithm will make sure that every partition is only consumed by one consumer
10
+ instance at a time. It uses Zookeeper watches to be notified of new consumer instances coming
11
+ online or going offline, which will trigger a redistribition of all the partitions that are consumed.
12
+
13
+ Periodically, it will commit the last processed offset of every partition to Zookeeper. Whenever a
14
+ new consumer starts, it will resume consumingevery partition at the last committed offset. This implements
15
+ an **at least once guarantee**, so it is possible that you end up consuming the same message more than once.
16
+ It's your responsibility to deal with this if that is a problem for you, e.g. by using idempotent operations.
17
+
18
+ ## Usage
19
+
20
+ First, add `kafka-consumer` to your **Gemfile**, and run `bundle install.
21
+ If your messages are snappy-compressed, add the `snappy` gem as well.
22
+
23
+ ``` ruby
24
+ zookeeper = "zk1:2181,zk2:2181,zk3:2181"
25
+ name = "access-log-processor"
26
+ topics = ["access_log"]
27
+
28
+ consumer = Kafka::Consumer.new(name, topics, zookeeper: zookeeper)
29
+
30
+ Signal.trap("INT") { consumer.interrupt }
31
+
32
+ consumer.each do |message|
33
+ # process message
34
+ end
35
+ ```
36
+
37
+ ## Contributing
38
+
39
+ 1. Fork it ( https://github.com/wvanbergen/kafka-consumer/fork )
40
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
41
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
42
+ 4. Push to the branch (`git push origin my-new-feature`)
43
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require "bundler/gem_tasks"
2
+ require "kafka/consumer"
3
+ require "rake/testtask"
4
+ require "benchmark"
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs = ["lib", "test"]
8
+ t.test_files = FileList['test/*_test.rb']
9
+ end
10
+
11
+ namespace :kafka do
12
+ task :consumer do
13
+ zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
14
+ name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
15
+ topics = ENV["TOPICS"] or raise "Specify the TOPICS you want to consume. Use comma as separator."
16
+
17
+ consumer = Kafka::Consumer.new(name, topics.split(','), zookeeper: zookeeper, initial_offset: :earliest_offset)
18
+
19
+ Signal.trap("TERM") { puts "TERM received"; consumer.interrupt }
20
+ Signal.trap("INT") { puts "INT received"; consumer.interrupt }
21
+
22
+ counter = 0
23
+ duration = Benchmark.realtime do
24
+ consumer.each do |event|
25
+ counter += 1
26
+ print "Consumed #{counter} messages.\n" if counter % 1000 == 0
27
+ end
28
+ end
29
+
30
+ puts
31
+ puts "%d messages consumed in %0.3fs (%0.3f msg/s)" % [counter, duration, counter.to_f / duration]
32
+ end
33
+
34
+ namespace :consumer do
35
+ task :reset do
36
+ zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
37
+ name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
38
+
39
+ consumer = Kafka::Consumer.new(name, [], zookeeper: zookeeper)
40
+ consumer.group.reset_offsets
41
+ end
42
+ end
43
+ end
data/bin/kazoo ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
3
+ require 'kazoo/cli'
4
+
5
+ begin
6
+ ENV["THOR_DEBUG"] = "1"
7
+ Kazoo::CLI.start(ARGV)
8
+ rescue Thor::UndefinedCommandError, Thor::UnknownArgumentError, Thor::AmbiguousCommandError, Thor::InvocationError => e
9
+ $stderr.puts(e.message)
10
+ exit(64)
11
+ rescue Thor::Error => e
12
+ $stderr.puts(e.message)
13
+ exit(1)
14
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kafka/consumer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kafka-consumer"
8
+ spec.version = Kafka::Consumer::VERSION
9
+ spec.authors = ["Willem van Bergen"]
10
+ spec.email = ["willem@vanbergen.org"]
11
+ spec.summary = %q{High-level consumer for Kafka}
12
+ spec.description = %q{High-level consumer for Kafka. Implements the Zookeeper-backed consumer implementation that offers offset management, load balancing and automatic failovers.}
13
+ spec.homepage = "https://github.com/wvanbergen/kafka-consumer"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "minitest", "~> 5.0"
24
+ spec.add_development_dependency "mocha", "~> 1.0"
25
+
26
+ spec.add_runtime_dependency "poseidon", "~> 0.0.5"
27
+ spec.add_runtime_dependency "zookeeper", "~> 1.4"
28
+ end
@@ -0,0 +1,12 @@
1
+ module Kafka
2
+ class Consumer
3
+ class Message
4
+ attr_reader :topic, :partition, :offset, :key, :value
5
+
6
+ def initialize(topic, partition, fetched_message)
7
+ @topic, @partition = topic, partition
8
+ @key, @value, @offset = fetched_message.key, fetched_message.value, fetched_message.offset
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,146 @@
1
+ module Kafka
2
+ class Consumer
3
+ class PartitionConsumer
4
+
5
+ attr_reader :consumer, :partition, :handler, :max_wait_ms, :initial_offset,
6
+ :commit_interval, :last_processed_offset, :last_committed_offset
7
+
8
+ def initialize(consumer, partition, handler: nil, max_wait_ms: 100, initial_offset: :latest_offset, commit_interval: 5.0)
9
+ @consumer, @partition, @handler = consumer, partition, handler
10
+ @initial_offset, @max_wait_ms, @commit_interval = initial_offset, max_wait_ms, commit_interval
11
+
12
+ @commit_mutex = Mutex.new
13
+
14
+ @consumer_thread = Thread.new do
15
+ Thread.current.abort_on_exception = true
16
+ manage_partition_consumer
17
+ end
18
+
19
+ Thread.new do
20
+ Thread.current.abort_on_exception = true
21
+ background_committer
22
+ end
23
+ end
24
+
25
+ def wait
26
+ @consumer_thread.join if @consumer_thread.alive?
27
+ end
28
+
29
+ def interrupt
30
+ @consumer_thread[:interrupted] = true
31
+ continue
32
+ end
33
+
34
+ def interrupted?
35
+ @consumer_thread[:interrupted]
36
+ end
37
+
38
+ def stop
39
+ interrupt
40
+ wait
41
+ consumer.logger.info "Consumer for #{partition.topic.name}/#{partition.id} stopped."
42
+ end
43
+
44
+ def continue
45
+ @consumer_thread.run if @consumer_thread.status == 'sleep'
46
+ end
47
+
48
+ def claim_partition
49
+ consumer.logger.info "Claiming partition #{partition.topic.name}/#{partition.id}..."
50
+ begin
51
+ other_instance, change = consumer.group.watch_partition_claim(partition) { continue }
52
+ if other_instance.nil?
53
+ consumer.instance.claim_partition(partition)
54
+ elsif other_instance == consumer.instance
55
+ raise Kazoo::Error, "Already claimed this partition myself. That should not happen"
56
+ else
57
+ consumer.logger.warn "Partition #{partition.topic.name}/#{partition.id} is still claimed by instance #{other_instance.id}. Waiting for the claim to be released..."
58
+ Thread.stop unless change.completed?
59
+
60
+ return false if interrupted?
61
+ raise Kazoo::PartitionAlreadyClaimed
62
+ end
63
+ rescue Kazoo::PartitionAlreadyClaimed
64
+ retry unless interrupted?
65
+ end
66
+
67
+ true
68
+ end
69
+
70
+ def commit_last_offset
71
+ @commit_mutex.synchronize do
72
+ if last_processed_offset && (last_committed_offset.nil? || last_committed_offset < last_processed_offset)
73
+ consumer.group.commit_offset(partition, last_processed_offset)
74
+ @last_committed_offset = last_processed_offset + 1
75
+ end
76
+ end
77
+ end
78
+
79
+ def background_committer
80
+ until interrupted?
81
+ commit_last_offset
82
+ sleep(commit_interval)
83
+ end
84
+ end
85
+
86
+ def manage_partition_consumer
87
+ # First, we will try to claim the partition in Zookeeper to ensure there's
88
+ # only one consumer for it simultaneously.
89
+ if claim_partition
90
+ @last_committed_offset = consumer.group.retrieve_offset(partition)
91
+ case start_offset = last_committed_offset || initial_offset
92
+ when :earliest_offset, -2
93
+ consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at the earliest available offset..."
94
+ when :latest_offset, -1
95
+ consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} for new messages..."
96
+ else
97
+ consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at offset #{start_offset}..."
98
+ end
99
+
100
+ begin
101
+ pc = Poseidon::PartitionConsumer.consumer_for_partition(
102
+ consumer.group.name,
103
+ consumer.cluster.brokers.values.map(&:addr),
104
+ partition.topic.name,
105
+ partition.id,
106
+ start_offset
107
+ )
108
+
109
+ until interrupted?
110
+ pc.fetch(max_wait_ms: max_wait_ms).each do |message|
111
+ message = Message.new(partition.topic.name, partition.id, message)
112
+ handler.call(message)
113
+ @last_processed_offset = message.offset
114
+ end
115
+ end
116
+
117
+ rescue Poseidon::Errors::OffsetOutOfRange
118
+ pc.close
119
+
120
+ consumer.logger.warn "Offset #{start_offset} is no longer available for #{partition.topic.name}/#{partition.id}!"
121
+ case initial_offset
122
+ when :earliest_offset, -2
123
+ consumer.logger.warn "Instead, start consuming #{partition.topic.name}/#{partition.id} at the earliest available offset."
124
+ when :latest_offset, -1
125
+ consumer.logger.warn "Instead, start onsuming #{partition.topic.name}/#{partition.id} for new messages only."
126
+ end
127
+
128
+ start_offset = initial_offset
129
+ retry
130
+
131
+ ensure
132
+ consumer.logger.debug "Stopping consumer for #{partition.topic.name}/#{partition.id}..."
133
+ pc.close
134
+ end
135
+
136
+
137
+ commit_last_offset
138
+ consumer.logger.info "Committed offset #{last_committed_offset - 1} for #{partition.topic.name}/#{partition.id}..." if last_committed_offset
139
+
140
+ consumer.instance.release_partition(partition)
141
+ consumer.logger.debug "Released claim for partition #{partition.topic.name}/#{partition.id}."
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,5 @@
1
+ module Kafka
2
+ class Consumer
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,184 @@
1
+ require "kazoo"
2
+ require "poseidon"
3
+ require "thread"
4
+ require "logger"
5
+
6
+ require "kafka/consumer/message"
7
+ require "kafka/consumer/partition_consumer"
8
+ require "kafka/consumer/version"
9
+
10
+ module Kafka
11
+ class Consumer
12
+ BACKPRESSURE_MESSAGE_LIMIT = 1000
13
+
14
+ include Enumerable
15
+
16
+ attr_reader :subscription,
17
+ :cluster, :group, :instance,
18
+ :max_wait_ms, :initial_offset,
19
+ :logger
20
+
21
+ def initialize(name, subscription, zookeeper: [], chroot: '', max_wait_ms: 200, initial_offset: :latest_offset, logger: nil)
22
+ @name, @subscription = name, subscription
23
+ @max_wait_ms, @initial_offset = max_wait_ms, initial_offset
24
+ @logger = logger || Logger.new($stdout)
25
+
26
+ @cluster = Kazoo::Cluster.new(zookeeper, chroot: chroot)
27
+ @group = Kazoo::Consumergroup.new(@cluster, name)
28
+ @group.create unless @group.exists?
29
+
30
+ @instance = @group.instantiate
31
+ @instance.register(topics)
32
+ end
33
+
34
+ def name
35
+ group.name
36
+ end
37
+
38
+ def id
39
+ instance.id
40
+ end
41
+
42
+ def topics
43
+ @topics ||= begin
44
+ topic_names = Array(subscription)
45
+ topic_names.map { |topic_name| cluster.topics.fetch(topic_name) }
46
+ end
47
+ end
48
+
49
+ def partitions
50
+ topics.flat_map(&:partitions).sort_by { |partition| [partition.leader.id, partition.topic.name, partition.id] }
51
+ end
52
+
53
+ def interrupt
54
+ Thread.new do
55
+ Thread.current.abort_on_exception = true
56
+
57
+ logger.info "Stopping partition consumers..."
58
+ @consumer_manager[:interrupted] = true
59
+
60
+ # Make sure to wake up the manager thread, so it can shut down
61
+ continue
62
+ end
63
+ end
64
+
65
+ def interrupted?
66
+ @consumer_manager[:interrupted]
67
+ end
68
+
69
+ def stop
70
+ interrupt
71
+ wait
72
+ end
73
+
74
+ def wait
75
+ @consumer_manager.join if @consumer_manager.alive?
76
+ end
77
+
78
+ def dead?
79
+ @consumer_manager.status == false
80
+ end
81
+
82
+ def each(&block)
83
+ mutex = Mutex.new
84
+
85
+ handler = lambda do |message|
86
+ mutex.synchronize do
87
+ block.call(message)
88
+ end
89
+ end
90
+
91
+ @consumer_manager = Thread.new do
92
+ Thread.current.abort_on_exception = true
93
+ manage_partition_consumers(handler)
94
+ end
95
+
96
+ wait
97
+ end
98
+
99
+ def self.distribute_partitions(instances, partitions)
100
+ return {} if instances.empty?
101
+ partitions_per_instance = partitions.length.to_f / instances.length.to_f
102
+
103
+ partitions.group_by.with_index do |partition, index|
104
+ instance_index = index.fdiv(partitions_per_instance).floor
105
+ instances[instance_index]
106
+ end
107
+ end
108
+
109
+ private
110
+
111
+ def continue
112
+ @consumer_manager.run if @consumer_manager.status == 'sleep'
113
+ end
114
+
115
+ def manage_partition_consumers(handler)
116
+ logger.info "Registered for #{group.name} as #{instance.id}"
117
+
118
+ @partition_consumers = {}
119
+
120
+ until interrupted?
121
+ running_instances, change = group.watch_instances { continue }
122
+ logger.info "#{running_instances.length} instances have been registered: #{running_instances.map(&:id).join(', ')}."
123
+
124
+ # Distribute the partitions over the running instances. Afterwards, we can see
125
+ # what partitions are assigned to this particular instance. Because all instances
126
+ # run the same algorithm on the same sorted lists of instances and partitions,
127
+ # all instances should be in agreement of the distribtion.
128
+ distributed_partitions = self.class.distribute_partitions(running_instances, partitions)
129
+ my_partitions = distributed_partitions[@instance]
130
+
131
+ logger.info "Claiming #{my_partitions.length} out of #{partitions.length} partitions."
132
+
133
+ # based onw hat partitions we should be consuming and the partitions
134
+ # that we already are consuming, figure out what partition consumers
135
+ # to stop and start
136
+ partitions_to_stop = @partition_consumers.keys - my_partitions
137
+ partitions_to_start = my_partitions - @partition_consumers.keys
138
+
139
+ # Stop the partition consumers we should no longer be running in parallel
140
+ if partitions_to_stop.length > 0
141
+ logger.info "Stopping #{partitions_to_stop.length} out of #{@partition_consumers.length} partition consumers."
142
+
143
+ threads = []
144
+ partitions_to_stop.each do |partition|
145
+ partition_consumer = @partition_consumers.delete(partition)
146
+ threads << Thread.new { partition_consumer.stop }
147
+ end
148
+ threads.each(&:join)
149
+ end
150
+
151
+ # Start all the partition consumers we are missing.
152
+ if partitions_to_start.length > 0
153
+ logger.info "Starting #{partitions_to_start.length} new partition consumers."
154
+
155
+ partitions_to_start.each do |partition|
156
+ @partition_consumers[partition] = PartitionConsumer.new(self, partition,
157
+ max_wait_ms: max_wait_ms, initial_offset: initial_offset, handler: handler)
158
+ end
159
+ end
160
+
161
+ unless change.completed?
162
+ logger.debug "Suspended consumer manager thread."
163
+ Thread.stop
164
+ logger.debug "Consumer manager thread woke up..."
165
+ end
166
+ end
167
+
168
+ logger.debug "Consumer interrupted."
169
+
170
+ # Stop all running partition consumers
171
+ threads = []
172
+ @partition_consumers.each_value do |partition_consumer|
173
+ threads << Thread.new { partition_consumer.stop }
174
+ end
175
+ threads.each(&:join)
176
+
177
+ # Deregister the instance. This should trigger a rebalance in all the remaining instances.
178
+ @instance.deregister
179
+ logger.debug "Consumer group instance #{instance.id} was deregistered"
180
+
181
+ cluster.close
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,68 @@
1
+ module Kazoo
2
+ class Broker
3
+ attr_reader :cluster, :id, :host, :port, :jmx_port
4
+
5
+ def initialize(cluster, id, host, port, jmx_port: nil)
6
+ @cluster = cluster
7
+ @id, @host, @port = id, host, port
8
+ @jmx_port = jmx_port
9
+ end
10
+
11
+ def led_partitions
12
+ result, threads, mutex = [], ThreadGroup.new, Mutex.new
13
+ cluster.partitions.each do |partition|
14
+ t = Thread.new do
15
+ select = partition.leader == self
16
+ mutex.synchronize { result << partition } if select
17
+ end
18
+ threads.add(t)
19
+ end
20
+ threads.list.each(&:join)
21
+ result
22
+ end
23
+
24
+ def replicated_partitions
25
+ result, threads, mutex = [], ThreadGroup.new, Mutex.new
26
+ cluster.partitions.each do |partition|
27
+ t = Thread.new do
28
+ select = partition.replicas.include?(self)
29
+ mutex.synchronize { result << partition } if select
30
+ end
31
+ threads.add(t)
32
+ end
33
+ threads.list.each(&:join)
34
+ result
35
+ end
36
+
37
+ def critical?(replicas: 1)
38
+ result, threads, mutex = false, ThreadGroup.new, Mutex.new
39
+ replicated_partitions.each do |partition|
40
+ t = Thread.new do
41
+ isr = partition.isr.reject { |r| r == self }
42
+ mutex.synchronize { result = true if isr.length < replicas }
43
+ end
44
+ threads.add(t)
45
+ end
46
+ threads.list.each(&:join)
47
+ result
48
+ end
49
+
50
+ def addr
51
+ "#{host}:#{port}"
52
+ end
53
+
54
+ def eql?(other)
55
+ other.is_a?(Kazoo::Broker) && other.cluster == self.cluster && other.id == self.id
56
+ end
57
+
58
+ alias_method :==, :eql?
59
+
60
+ def hash
61
+ [self.cluster, self.id].hash
62
+ end
63
+
64
+ def self.from_json(cluster, id, json)
65
+ new(cluster, id.to_i, json.fetch('host'), json.fetch('port'), jmx_port: json.fetch('jmx_port', nil))
66
+ end
67
+ end
68
+ end
data/lib/kazoo/cli.rb ADDED
@@ -0,0 +1,79 @@
1
+ require 'kazoo'
2
+ require 'thor'
3
+
4
+ module Kazoo
5
+ class CLI < Thor
6
+ class_option :zookeeper, :type => :string, :default => ENV['ZOOKEEPER_PEERS']
7
+ class_option :chroot, :type => :string, :default => ""
8
+
9
+ desc "cluster", "Describes the Kafka cluster as registered in Zookeeper"
10
+ def cluster
11
+ validate_class_options!
12
+
13
+ kafka_cluster.brokers.values.sort_by(&:id).each do |broker|
14
+ $stdout.puts "#{broker.id}:\t#{broker.addr}\t(hosts #{broker.replicated_partitions.length} partitions, leads #{broker.led_partitions.length})"
15
+ end
16
+ end
17
+
18
+ desc "topics", "Lists all topics in the cluster"
19
+ def topics
20
+ validate_class_options!
21
+
22
+ kafka_cluster.topics.values.sort_by(&:name).each do |topic|
23
+ $stdout.puts topic.name
24
+ end
25
+ end
26
+
27
+ option :topic, :type => :string
28
+ desc "partitions", "Lists partitions"
29
+ def partitions
30
+ validate_class_options!
31
+
32
+ topics = kafka_cluster.topics.values
33
+ topics.select! { |t| t.name == options[:topic] } if options[:topic]
34
+ topics.sort_by!(&:name)
35
+
36
+ topics.each do |topic|
37
+ topic.partitions.each do |partition|
38
+ $stdout.puts "#{partition.topic.name}/#{partition.id}\tReplicas: #{partition.replicas.map(&:id).join(",")}"
39
+ end
40
+ end
41
+ end
42
+
43
+ option :replicas, :type => :numeric, :default => 1
44
+ desc "critical <broker>", "Determine whether a broker is critical"
45
+ def critical(broker_name)
46
+ validate_class_options!
47
+
48
+ if broker(broker_name).critical?(replicas: options[:replicas])
49
+ raise Thor::Error, "WARNING: broker #{broker_name} is critical and cannot be stopped safely!"
50
+ else
51
+ $stdout.puts "Broker #{broker_name} is non-critical and can be stopped safely."
52
+ end
53
+ end
54
+
55
+
56
+ private
57
+
58
+ def validate_class_options!
59
+ if options[:zookeeper].nil? || options[:zookeeper] == ''
60
+ raise Thor::InvocationError, "Please supply --zookeeper argument, or set the ZOOKEEPER_PEERS environment variable"
61
+ end
62
+ end
63
+
64
+ def broker(name_or_id)
65
+ broker = if name_or_id =~ /\A\d+\z/
66
+ kafka_cluster.brokers[name_or_id.to_i]
67
+ else
68
+ kafka_cluster.brokers.values.detect { |b| b.addr == name_or_id } || cluster.brokers.values.detect { |b| b.host == name_or_id }
69
+ end
70
+
71
+ raise Thor::InvocationError, "Broker #{name_or_id.inspect} not found!" if broker.nil?
72
+ broker
73
+ end
74
+
75
+ def kafka_cluster
76
+ @kafka_cluster ||= Kazoo::Cluster.new(options[:zookeeper], chroot: options[:chroot])
77
+ end
78
+ end
79
+ end