kafka-consumer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ad31d35f34d283a73bc8865fc90e1748ecc48ffd
4
+ data.tar.gz: 58fe93f719be96c3276604973b12887f42eb00f5
5
+ SHA512:
6
+ metadata.gz: 1ac246b560fe2f8ed24f71d9f32a0323f7d79dbbd1643a52e17fb2e524c027166f87dc12ad136ddcee0343c012fe27932a82ed26513a7184bb030a8fc5d4a64b
7
+ data.tar.gz: 415dc58012fc48dba46a9d5ba34f6ea955fb78a194e3666c9d78f6e2767fdac76fd5decff989d7b4b52b1e31e054b3cca5b49abe337138ed9230ce3f82a92bdc
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
3
+
4
+ gem 'snappy'
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Willem van Bergen
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,43 @@
1
+ # Kafka::Consumer
2
+
3
+ High-level Kafka consumer for Ruby. Uses Zookeeper to manage load balancing, failover, and offset management.
4
+
5
+ A consumer group consists of multiple instances of the same consumer. Every instance registers itself in
6
+ Zookeeper. Based on the number of instances that are registered, an instance will start consuming some or
7
+ all of the partitions of the topics it wants to consume.
8
+
9
+ The distribution algorithm will make sure that every partition is only consumed by one consumer
10
+ instance at a time. It uses Zookeeper watches to be notified of new consumer instances coming
11
+ online or going offline, which will trigger a redistribition of all the partitions that are consumed.
12
+
13
+ Periodically, it will commit the last processed offset of every partition to Zookeeper. Whenever a
14
+ new consumer starts, it will resume consumingevery partition at the last committed offset. This implements
15
+ an **at least once guarantee**, so it is possible that you end up consuming the same message more than once.
16
+ It's your responsibility to deal with this if that is a problem for you, e.g. by using idempotent operations.
17
+
18
+ ## Usage
19
+
20
+ First, add `kafka-consumer` to your **Gemfile**, and run `bundle install.
21
+ If your messages are snappy-compressed, add the `snappy` gem as well.
22
+
23
+ ``` ruby
24
+ zookeeper = "zk1:2181,zk2:2181,zk3:2181"
25
+ name = "access-log-processor"
26
+ topics = ["access_log"]
27
+
28
+ consumer = Kafka::Consumer.new(name, topics, zookeeper: zookeeper)
29
+
30
+ Signal.trap("INT") { consumer.interrupt }
31
+
32
+ consumer.each do |message|
33
+ # process message
34
+ end
35
+ ```
36
+
37
+ ## Contributing
38
+
39
+ 1. Fork it ( https://github.com/wvanbergen/kafka-consumer/fork )
40
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
41
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
42
+ 4. Push to the branch (`git push origin my-new-feature`)
43
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,43 @@
1
+ require "bundler/gem_tasks"
2
+ require "kafka/consumer"
3
+ require "rake/testtask"
4
+ require "benchmark"
5
+
6
+ Rake::TestTask.new do |t|
7
+ t.libs = ["lib", "test"]
8
+ t.test_files = FileList['test/*_test.rb']
9
+ end
10
+
11
+ namespace :kafka do
12
+ task :consumer do
13
+ zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
14
+ name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
15
+ topics = ENV["TOPICS"] or raise "Specify the TOPICS you want to consume. Use comma as separator."
16
+
17
+ consumer = Kafka::Consumer.new(name, topics.split(','), zookeeper: zookeeper, initial_offset: :earliest_offset)
18
+
19
+ Signal.trap("TERM") { puts "TERM received"; consumer.interrupt }
20
+ Signal.trap("INT") { puts "INT received"; consumer.interrupt }
21
+
22
+ counter = 0
23
+ duration = Benchmark.realtime do
24
+ consumer.each do |event|
25
+ counter += 1
26
+ print "Consumed #{counter} messages.\n" if counter % 1000 == 0
27
+ end
28
+ end
29
+
30
+ puts
31
+ puts "%d messages consumed in %0.3fs (%0.3f msg/s)" % [counter, duration, counter.to_f / duration]
32
+ end
33
+
34
+ namespace :consumer do
35
+ task :reset do
36
+ zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
37
+ name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
38
+
39
+ consumer = Kafka::Consumer.new(name, [], zookeeper: zookeeper)
40
+ consumer.group.reset_offsets
41
+ end
42
+ end
43
+ end
data/bin/kazoo ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
3
+ require 'kazoo/cli'
4
+
5
+ begin
6
+ ENV["THOR_DEBUG"] = "1"
7
+ Kazoo::CLI.start(ARGV)
8
+ rescue Thor::UndefinedCommandError, Thor::UnknownArgumentError, Thor::AmbiguousCommandError, Thor::InvocationError => e
9
+ $stderr.puts(e.message)
10
+ exit(64)
11
+ rescue Thor::Error => e
12
+ $stderr.puts(e.message)
13
+ exit(1)
14
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'kafka/consumer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "kafka-consumer"
8
+ spec.version = Kafka::Consumer::VERSION
9
+ spec.authors = ["Willem van Bergen"]
10
+ spec.email = ["willem@vanbergen.org"]
11
+ spec.summary = %q{High-level consumer for Kafka}
12
+ spec.description = %q{High-level consumer for Kafka. Implements the Zookeeper-backed consumer implementation that offers offset management, load balancing and automatic failovers.}
13
+ spec.homepage = "https://github.com/wvanbergen/kafka-consumer"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.7"
22
+ spec.add_development_dependency "rake", "~> 10.0"
23
+ spec.add_development_dependency "minitest", "~> 5.0"
24
+ spec.add_development_dependency "mocha", "~> 1.0"
25
+
26
+ spec.add_runtime_dependency "poseidon", "~> 0.0.5"
27
+ spec.add_runtime_dependency "zookeeper", "~> 1.4"
28
+ end
@@ -0,0 +1,12 @@
1
+ module Kafka
2
+ class Consumer
3
+ class Message
4
+ attr_reader :topic, :partition, :offset, :key, :value
5
+
6
+ def initialize(topic, partition, fetched_message)
7
+ @topic, @partition = topic, partition
8
+ @key, @value, @offset = fetched_message.key, fetched_message.value, fetched_message.offset
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,146 @@
1
+ module Kafka
2
+ class Consumer
3
+ class PartitionConsumer
4
+
5
+ attr_reader :consumer, :partition, :handler, :max_wait_ms, :initial_offset,
6
+ :commit_interval, :last_processed_offset, :last_committed_offset
7
+
8
+ def initialize(consumer, partition, handler: nil, max_wait_ms: 100, initial_offset: :latest_offset, commit_interval: 5.0)
9
+ @consumer, @partition, @handler = consumer, partition, handler
10
+ @initial_offset, @max_wait_ms, @commit_interval = initial_offset, max_wait_ms, commit_interval
11
+
12
+ @commit_mutex = Mutex.new
13
+
14
+ @consumer_thread = Thread.new do
15
+ Thread.current.abort_on_exception = true
16
+ manage_partition_consumer
17
+ end
18
+
19
+ Thread.new do
20
+ Thread.current.abort_on_exception = true
21
+ background_committer
22
+ end
23
+ end
24
+
25
+ def wait
26
+ @consumer_thread.join if @consumer_thread.alive?
27
+ end
28
+
29
+ def interrupt
30
+ @consumer_thread[:interrupted] = true
31
+ continue
32
+ end
33
+
34
+ def interrupted?
35
+ @consumer_thread[:interrupted]
36
+ end
37
+
38
+ def stop
39
+ interrupt
40
+ wait
41
+ consumer.logger.info "Consumer for #{partition.topic.name}/#{partition.id} stopped."
42
+ end
43
+
44
+ def continue
45
+ @consumer_thread.run if @consumer_thread.status == 'sleep'
46
+ end
47
+
48
+ def claim_partition
49
+ consumer.logger.info "Claiming partition #{partition.topic.name}/#{partition.id}..."
50
+ begin
51
+ other_instance, change = consumer.group.watch_partition_claim(partition) { continue }
52
+ if other_instance.nil?
53
+ consumer.instance.claim_partition(partition)
54
+ elsif other_instance == consumer.instance
55
+ raise Kazoo::Error, "Already claimed this partition myself. That should not happen"
56
+ else
57
+ consumer.logger.warn "Partition #{partition.topic.name}/#{partition.id} is still claimed by instance #{other_instance.id}. Waiting for the claim to be released..."
58
+ Thread.stop unless change.completed?
59
+
60
+ return false if interrupted?
61
+ raise Kazoo::PartitionAlreadyClaimed
62
+ end
63
+ rescue Kazoo::PartitionAlreadyClaimed
64
+ retry unless interrupted?
65
+ end
66
+
67
+ true
68
+ end
69
+
70
+ def commit_last_offset
71
+ @commit_mutex.synchronize do
72
+ if last_processed_offset && (last_committed_offset.nil? || last_committed_offset < last_processed_offset)
73
+ consumer.group.commit_offset(partition, last_processed_offset)
74
+ @last_committed_offset = last_processed_offset + 1
75
+ end
76
+ end
77
+ end
78
+
79
+ def background_committer
80
+ until interrupted?
81
+ commit_last_offset
82
+ sleep(commit_interval)
83
+ end
84
+ end
85
+
86
+ def manage_partition_consumer
87
+ # First, we will try to claim the partition in Zookeeper to ensure there's
88
+ # only one consumer for it simultaneously.
89
+ if claim_partition
90
+ @last_committed_offset = consumer.group.retrieve_offset(partition)
91
+ case start_offset = last_committed_offset || initial_offset
92
+ when :earliest_offset, -2
93
+ consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at the earliest available offset..."
94
+ when :latest_offset, -1
95
+ consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} for new messages..."
96
+ else
97
+ consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at offset #{start_offset}..."
98
+ end
99
+
100
+ begin
101
+ pc = Poseidon::PartitionConsumer.consumer_for_partition(
102
+ consumer.group.name,
103
+ consumer.cluster.brokers.values.map(&:addr),
104
+ partition.topic.name,
105
+ partition.id,
106
+ start_offset
107
+ )
108
+
109
+ until interrupted?
110
+ pc.fetch(max_wait_ms: max_wait_ms).each do |message|
111
+ message = Message.new(partition.topic.name, partition.id, message)
112
+ handler.call(message)
113
+ @last_processed_offset = message.offset
114
+ end
115
+ end
116
+
117
+ rescue Poseidon::Errors::OffsetOutOfRange
118
+ pc.close
119
+
120
+ consumer.logger.warn "Offset #{start_offset} is no longer available for #{partition.topic.name}/#{partition.id}!"
121
+ case initial_offset
122
+ when :earliest_offset, -2
123
+ consumer.logger.warn "Instead, start consuming #{partition.topic.name}/#{partition.id} at the earliest available offset."
124
+ when :latest_offset, -1
125
+ consumer.logger.warn "Instead, start onsuming #{partition.topic.name}/#{partition.id} for new messages only."
126
+ end
127
+
128
+ start_offset = initial_offset
129
+ retry
130
+
131
+ ensure
132
+ consumer.logger.debug "Stopping consumer for #{partition.topic.name}/#{partition.id}..."
133
+ pc.close
134
+ end
135
+
136
+
137
+ commit_last_offset
138
+ consumer.logger.info "Committed offset #{last_committed_offset - 1} for #{partition.topic.name}/#{partition.id}..." if last_committed_offset
139
+
140
+ consumer.instance.release_partition(partition)
141
+ consumer.logger.debug "Released claim for partition #{partition.topic.name}/#{partition.id}."
142
+ end
143
+ end
144
+ end
145
+ end
146
+ end
@@ -0,0 +1,5 @@
1
+ module Kafka
2
+ class Consumer
3
+ VERSION = "0.0.1"
4
+ end
5
+ end
@@ -0,0 +1,184 @@
1
+ require "kazoo"
2
+ require "poseidon"
3
+ require "thread"
4
+ require "logger"
5
+
6
+ require "kafka/consumer/message"
7
+ require "kafka/consumer/partition_consumer"
8
+ require "kafka/consumer/version"
9
+
10
+ module Kafka
11
+ class Consumer
12
+ BACKPRESSURE_MESSAGE_LIMIT = 1000
13
+
14
+ include Enumerable
15
+
16
+ attr_reader :subscription,
17
+ :cluster, :group, :instance,
18
+ :max_wait_ms, :initial_offset,
19
+ :logger
20
+
21
+ def initialize(name, subscription, zookeeper: [], chroot: '', max_wait_ms: 200, initial_offset: :latest_offset, logger: nil)
22
+ @name, @subscription = name, subscription
23
+ @max_wait_ms, @initial_offset = max_wait_ms, initial_offset
24
+ @logger = logger || Logger.new($stdout)
25
+
26
+ @cluster = Kazoo::Cluster.new(zookeeper, chroot: chroot)
27
+ @group = Kazoo::Consumergroup.new(@cluster, name)
28
+ @group.create unless @group.exists?
29
+
30
+ @instance = @group.instantiate
31
+ @instance.register(topics)
32
+ end
33
+
34
+ def name
35
+ group.name
36
+ end
37
+
38
+ def id
39
+ instance.id
40
+ end
41
+
42
+ def topics
43
+ @topics ||= begin
44
+ topic_names = Array(subscription)
45
+ topic_names.map { |topic_name| cluster.topics.fetch(topic_name) }
46
+ end
47
+ end
48
+
49
+ def partitions
50
+ topics.flat_map(&:partitions).sort_by { |partition| [partition.leader.id, partition.topic.name, partition.id] }
51
+ end
52
+
53
+ def interrupt
54
+ Thread.new do
55
+ Thread.current.abort_on_exception = true
56
+
57
+ logger.info "Stopping partition consumers..."
58
+ @consumer_manager[:interrupted] = true
59
+
60
+ # Make sure to wake up the manager thread, so it can shut down
61
+ continue
62
+ end
63
+ end
64
+
65
+ def interrupted?
66
+ @consumer_manager[:interrupted]
67
+ end
68
+
69
+ def stop
70
+ interrupt
71
+ wait
72
+ end
73
+
74
+ def wait
75
+ @consumer_manager.join if @consumer_manager.alive?
76
+ end
77
+
78
+ def dead?
79
+ @consumer_manager.status == false
80
+ end
81
+
82
+ def each(&block)
83
+ mutex = Mutex.new
84
+
85
+ handler = lambda do |message|
86
+ mutex.synchronize do
87
+ block.call(message)
88
+ end
89
+ end
90
+
91
+ @consumer_manager = Thread.new do
92
+ Thread.current.abort_on_exception = true
93
+ manage_partition_consumers(handler)
94
+ end
95
+
96
+ wait
97
+ end
98
+
99
+ def self.distribute_partitions(instances, partitions)
100
+ return {} if instances.empty?
101
+ partitions_per_instance = partitions.length.to_f / instances.length.to_f
102
+
103
+ partitions.group_by.with_index do |partition, index|
104
+ instance_index = index.fdiv(partitions_per_instance).floor
105
+ instances[instance_index]
106
+ end
107
+ end
108
+
109
+ private
110
+
111
+ def continue
112
+ @consumer_manager.run if @consumer_manager.status == 'sleep'
113
+ end
114
+
115
+ def manage_partition_consumers(handler)
116
+ logger.info "Registered for #{group.name} as #{instance.id}"
117
+
118
+ @partition_consumers = {}
119
+
120
+ until interrupted?
121
+ running_instances, change = group.watch_instances { continue }
122
+ logger.info "#{running_instances.length} instances have been registered: #{running_instances.map(&:id).join(', ')}."
123
+
124
+ # Distribute the partitions over the running instances. Afterwards, we can see
125
+ # what partitions are assigned to this particular instance. Because all instances
126
+ # run the same algorithm on the same sorted lists of instances and partitions,
127
+ # all instances should be in agreement of the distribtion.
128
+ distributed_partitions = self.class.distribute_partitions(running_instances, partitions)
129
+ my_partitions = distributed_partitions[@instance]
130
+
131
+ logger.info "Claiming #{my_partitions.length} out of #{partitions.length} partitions."
132
+
133
+ # based onw hat partitions we should be consuming and the partitions
134
+ # that we already are consuming, figure out what partition consumers
135
+ # to stop and start
136
+ partitions_to_stop = @partition_consumers.keys - my_partitions
137
+ partitions_to_start = my_partitions - @partition_consumers.keys
138
+
139
+ # Stop the partition consumers we should no longer be running in parallel
140
+ if partitions_to_stop.length > 0
141
+ logger.info "Stopping #{partitions_to_stop.length} out of #{@partition_consumers.length} partition consumers."
142
+
143
+ threads = []
144
+ partitions_to_stop.each do |partition|
145
+ partition_consumer = @partition_consumers.delete(partition)
146
+ threads << Thread.new { partition_consumer.stop }
147
+ end
148
+ threads.each(&:join)
149
+ end
150
+
151
+ # Start all the partition consumers we are missing.
152
+ if partitions_to_start.length > 0
153
+ logger.info "Starting #{partitions_to_start.length} new partition consumers."
154
+
155
+ partitions_to_start.each do |partition|
156
+ @partition_consumers[partition] = PartitionConsumer.new(self, partition,
157
+ max_wait_ms: max_wait_ms, initial_offset: initial_offset, handler: handler)
158
+ end
159
+ end
160
+
161
+ unless change.completed?
162
+ logger.debug "Suspended consumer manager thread."
163
+ Thread.stop
164
+ logger.debug "Consumer manager thread woke up..."
165
+ end
166
+ end
167
+
168
+ logger.debug "Consumer interrupted."
169
+
170
+ # Stop all running partition consumers
171
+ threads = []
172
+ @partition_consumers.each_value do |partition_consumer|
173
+ threads << Thread.new { partition_consumer.stop }
174
+ end
175
+ threads.each(&:join)
176
+
177
+ # Deregister the instance. This should trigger a rebalance in all the remaining instances.
178
+ @instance.deregister
179
+ logger.debug "Consumer group instance #{instance.id} was deregistered"
180
+
181
+ cluster.close
182
+ end
183
+ end
184
+ end
@@ -0,0 +1,68 @@
1
+ module Kazoo
2
+ class Broker
3
+ attr_reader :cluster, :id, :host, :port, :jmx_port
4
+
5
+ def initialize(cluster, id, host, port, jmx_port: nil)
6
+ @cluster = cluster
7
+ @id, @host, @port = id, host, port
8
+ @jmx_port = jmx_port
9
+ end
10
+
11
+ def led_partitions
12
+ result, threads, mutex = [], ThreadGroup.new, Mutex.new
13
+ cluster.partitions.each do |partition|
14
+ t = Thread.new do
15
+ select = partition.leader == self
16
+ mutex.synchronize { result << partition } if select
17
+ end
18
+ threads.add(t)
19
+ end
20
+ threads.list.each(&:join)
21
+ result
22
+ end
23
+
24
+ def replicated_partitions
25
+ result, threads, mutex = [], ThreadGroup.new, Mutex.new
26
+ cluster.partitions.each do |partition|
27
+ t = Thread.new do
28
+ select = partition.replicas.include?(self)
29
+ mutex.synchronize { result << partition } if select
30
+ end
31
+ threads.add(t)
32
+ end
33
+ threads.list.each(&:join)
34
+ result
35
+ end
36
+
37
+ def critical?(replicas: 1)
38
+ result, threads, mutex = false, ThreadGroup.new, Mutex.new
39
+ replicated_partitions.each do |partition|
40
+ t = Thread.new do
41
+ isr = partition.isr.reject { |r| r == self }
42
+ mutex.synchronize { result = true if isr.length < replicas }
43
+ end
44
+ threads.add(t)
45
+ end
46
+ threads.list.each(&:join)
47
+ result
48
+ end
49
+
50
+ def addr
51
+ "#{host}:#{port}"
52
+ end
53
+
54
+ def eql?(other)
55
+ other.is_a?(Kazoo::Broker) && other.cluster == self.cluster && other.id == self.id
56
+ end
57
+
58
+ alias_method :==, :eql?
59
+
60
+ def hash
61
+ [self.cluster, self.id].hash
62
+ end
63
+
64
+ def self.from_json(cluster, id, json)
65
+ new(cluster, id.to_i, json.fetch('host'), json.fetch('port'), jmx_port: json.fetch('jmx_port', nil))
66
+ end
67
+ end
68
+ end
data/lib/kazoo/cli.rb ADDED
@@ -0,0 +1,79 @@
1
+ require 'kazoo'
2
+ require 'thor'
3
+
4
+ module Kazoo
5
+ class CLI < Thor
6
+ class_option :zookeeper, :type => :string, :default => ENV['ZOOKEEPER_PEERS']
7
+ class_option :chroot, :type => :string, :default => ""
8
+
9
+ desc "cluster", "Describes the Kafka cluster as registered in Zookeeper"
10
+ def cluster
11
+ validate_class_options!
12
+
13
+ kafka_cluster.brokers.values.sort_by(&:id).each do |broker|
14
+ $stdout.puts "#{broker.id}:\t#{broker.addr}\t(hosts #{broker.replicated_partitions.length} partitions, leads #{broker.led_partitions.length})"
15
+ end
16
+ end
17
+
18
+ desc "topics", "Lists all topics in the cluster"
19
+ def topics
20
+ validate_class_options!
21
+
22
+ kafka_cluster.topics.values.sort_by(&:name).each do |topic|
23
+ $stdout.puts topic.name
24
+ end
25
+ end
26
+
27
+ option :topic, :type => :string
28
+ desc "partitions", "Lists partitions"
29
+ def partitions
30
+ validate_class_options!
31
+
32
+ topics = kafka_cluster.topics.values
33
+ topics.select! { |t| t.name == options[:topic] } if options[:topic]
34
+ topics.sort_by!(&:name)
35
+
36
+ topics.each do |topic|
37
+ topic.partitions.each do |partition|
38
+ $stdout.puts "#{partition.topic.name}/#{partition.id}\tReplicas: #{partition.replicas.map(&:id).join(",")}"
39
+ end
40
+ end
41
+ end
42
+
43
+ option :replicas, :type => :numeric, :default => 1
44
+ desc "critical <broker>", "Determine whether a broker is critical"
45
+ def critical(broker_name)
46
+ validate_class_options!
47
+
48
+ if broker(broker_name).critical?(replicas: options[:replicas])
49
+ raise Thor::Error, "WARNING: broker #{broker_name} is critical and cannot be stopped safely!"
50
+ else
51
+ $stdout.puts "Broker #{broker_name} is non-critical and can be stopped safely."
52
+ end
53
+ end
54
+
55
+
56
+ private
57
+
58
+ def validate_class_options!
59
+ if options[:zookeeper].nil? || options[:zookeeper] == ''
60
+ raise Thor::InvocationError, "Please supply --zookeeper argument, or set the ZOOKEEPER_PEERS environment variable"
61
+ end
62
+ end
63
+
64
+ def broker(name_or_id)
65
+ broker = if name_or_id =~ /\A\d+\z/
66
+ kafka_cluster.brokers[name_or_id.to_i]
67
+ else
68
+ kafka_cluster.brokers.values.detect { |b| b.addr == name_or_id } || cluster.brokers.values.detect { |b| b.host == name_or_id }
69
+ end
70
+
71
+ raise Thor::InvocationError, "Broker #{name_or_id.inspect} not found!" if broker.nil?
72
+ broker
73
+ end
74
+
75
+ def kafka_cluster
76
+ @kafka_cluster ||= Kazoo::Cluster.new(options[:zookeeper], chroot: options[:chroot])
77
+ end
78
+ end
79
+ end