kafka-consumer 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +43 -0
- data/Rakefile +43 -0
- data/bin/kazoo +14 -0
- data/kafka-consumer.gemspec +28 -0
- data/lib/kafka/consumer/message.rb +12 -0
- data/lib/kafka/consumer/partition_consumer.rb +146 -0
- data/lib/kafka/consumer/version.rb +5 -0
- data/lib/kafka/consumer.rb +184 -0
- data/lib/kazoo/broker.rb +68 -0
- data/lib/kazoo/cli.rb +79 -0
- data/lib/kazoo/cluster.rb +82 -0
- data/lib/kazoo/consumergroup.rb +229 -0
- data/lib/kazoo/partition.rb +62 -0
- data/lib/kazoo/topic.rb +46 -0
- data/lib/kazoo/version.rb +3 -0
- data/lib/kazoo.rb +19 -0
- data/test/broker_test.rb +45 -0
- data/test/cluster_test.rb +16 -0
- data/test/partition_distribution_test.rb +117 -0
- data/test/partition_test.rb +25 -0
- data/test/test_helper.rb +48 -0
- data/test/topic_test.rb +40 -0
- metadata +161 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ad31d35f34d283a73bc8865fc90e1748ecc48ffd
|
4
|
+
data.tar.gz: 58fe93f719be96c3276604973b12887f42eb00f5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1ac246b560fe2f8ed24f71d9f32a0323f7d79dbbd1643a52e17fb2e524c027166f87dc12ad136ddcee0343c012fe27932a82ed26513a7184bb030a8fc5d4a64b
|
7
|
+
data.tar.gz: 415dc58012fc48dba46a9d5ba34f6ea955fb78a194e3666c9d78f6e2767fdac76fd5decff989d7b4b52b1e31e054b3cca5b49abe337138ed9230ce3f82a92bdc
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Willem van Bergen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# Kafka::Consumer
|
2
|
+
|
3
|
+
High-level Kafka consumer for Ruby. Uses Zookeeper to manage load balancing, failover, and offset management.
|
4
|
+
|
5
|
+
A consumer group consists of multiple instances of the same consumer. Every instance registers itself in
|
6
|
+
Zookeeper. Based on the number of instances that are registered, an instance will start consuming some or
|
7
|
+
all of the partitions of the topics it wants to consume.
|
8
|
+
|
9
|
+
The distribution algorithm will make sure that every partition is only consumed by one consumer
|
10
|
+
instance at a time. It uses Zookeeper watches to be notified of new consumer instances coming
|
11
|
+
online or going offline, which will trigger a redistribition of all the partitions that are consumed.
|
12
|
+
|
13
|
+
Periodically, it will commit the last processed offset of every partition to Zookeeper. Whenever a
|
14
|
+
new consumer starts, it will resume consumingevery partition at the last committed offset. This implements
|
15
|
+
an **at least once guarantee**, so it is possible that you end up consuming the same message more than once.
|
16
|
+
It's your responsibility to deal with this if that is a problem for you, e.g. by using idempotent operations.
|
17
|
+
|
18
|
+
## Usage
|
19
|
+
|
20
|
+
First, add `kafka-consumer` to your **Gemfile**, and run `bundle install.
|
21
|
+
If your messages are snappy-compressed, add the `snappy` gem as well.
|
22
|
+
|
23
|
+
``` ruby
|
24
|
+
zookeeper = "zk1:2181,zk2:2181,zk3:2181"
|
25
|
+
name = "access-log-processor"
|
26
|
+
topics = ["access_log"]
|
27
|
+
|
28
|
+
consumer = Kafka::Consumer.new(name, topics, zookeeper: zookeeper)
|
29
|
+
|
30
|
+
Signal.trap("INT") { consumer.interrupt }
|
31
|
+
|
32
|
+
consumer.each do |message|
|
33
|
+
# process message
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
## Contributing
|
38
|
+
|
39
|
+
1. Fork it ( https://github.com/wvanbergen/kafka-consumer/fork )
|
40
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
41
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
42
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
43
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "kafka/consumer"
|
3
|
+
require "rake/testtask"
|
4
|
+
require "benchmark"
|
5
|
+
|
6
|
+
Rake::TestTask.new do |t|
|
7
|
+
t.libs = ["lib", "test"]
|
8
|
+
t.test_files = FileList['test/*_test.rb']
|
9
|
+
end
|
10
|
+
|
11
|
+
namespace :kafka do
|
12
|
+
task :consumer do
|
13
|
+
zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
|
14
|
+
name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
|
15
|
+
topics = ENV["TOPICS"] or raise "Specify the TOPICS you want to consume. Use comma as separator."
|
16
|
+
|
17
|
+
consumer = Kafka::Consumer.new(name, topics.split(','), zookeeper: zookeeper, initial_offset: :earliest_offset)
|
18
|
+
|
19
|
+
Signal.trap("TERM") { puts "TERM received"; consumer.interrupt }
|
20
|
+
Signal.trap("INT") { puts "INT received"; consumer.interrupt }
|
21
|
+
|
22
|
+
counter = 0
|
23
|
+
duration = Benchmark.realtime do
|
24
|
+
consumer.each do |event|
|
25
|
+
counter += 1
|
26
|
+
print "Consumed #{counter} messages.\n" if counter % 1000 == 0
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
puts
|
31
|
+
puts "%d messages consumed in %0.3fs (%0.3f msg/s)" % [counter, duration, counter.to_f / duration]
|
32
|
+
end
|
33
|
+
|
34
|
+
namespace :consumer do
|
35
|
+
task :reset do
|
36
|
+
zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
|
37
|
+
name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
|
38
|
+
|
39
|
+
consumer = Kafka::Consumer.new(name, [], zookeeper: zookeeper)
|
40
|
+
consumer.group.reset_offsets
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/bin/kazoo
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
|
3
|
+
require 'kazoo/cli'
|
4
|
+
|
5
|
+
begin
|
6
|
+
ENV["THOR_DEBUG"] = "1"
|
7
|
+
Kazoo::CLI.start(ARGV)
|
8
|
+
rescue Thor::UndefinedCommandError, Thor::UnknownArgumentError, Thor::AmbiguousCommandError, Thor::InvocationError => e
|
9
|
+
$stderr.puts(e.message)
|
10
|
+
exit(64)
|
11
|
+
rescue Thor::Error => e
|
12
|
+
$stderr.puts(e.message)
|
13
|
+
exit(1)
|
14
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'kafka/consumer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "kafka-consumer"
|
8
|
+
spec.version = Kafka::Consumer::VERSION
|
9
|
+
spec.authors = ["Willem van Bergen"]
|
10
|
+
spec.email = ["willem@vanbergen.org"]
|
11
|
+
spec.summary = %q{High-level consumer for Kafka}
|
12
|
+
spec.description = %q{High-level consumer for Kafka. Implements the Zookeeper-backed consumer implementation that offers offset management, load balancing and automatic failovers.}
|
13
|
+
spec.homepage = "https://github.com/wvanbergen/kafka-consumer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
24
|
+
spec.add_development_dependency "mocha", "~> 1.0"
|
25
|
+
|
26
|
+
spec.add_runtime_dependency "poseidon", "~> 0.0.5"
|
27
|
+
spec.add_runtime_dependency "zookeeper", "~> 1.4"
|
28
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Consumer
|
3
|
+
class Message
|
4
|
+
attr_reader :topic, :partition, :offset, :key, :value
|
5
|
+
|
6
|
+
def initialize(topic, partition, fetched_message)
|
7
|
+
@topic, @partition = topic, partition
|
8
|
+
@key, @value, @offset = fetched_message.key, fetched_message.value, fetched_message.offset
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Consumer
|
3
|
+
class PartitionConsumer
|
4
|
+
|
5
|
+
attr_reader :consumer, :partition, :handler, :max_wait_ms, :initial_offset,
|
6
|
+
:commit_interval, :last_processed_offset, :last_committed_offset
|
7
|
+
|
8
|
+
def initialize(consumer, partition, handler: nil, max_wait_ms: 100, initial_offset: :latest_offset, commit_interval: 5.0)
|
9
|
+
@consumer, @partition, @handler = consumer, partition, handler
|
10
|
+
@initial_offset, @max_wait_ms, @commit_interval = initial_offset, max_wait_ms, commit_interval
|
11
|
+
|
12
|
+
@commit_mutex = Mutex.new
|
13
|
+
|
14
|
+
@consumer_thread = Thread.new do
|
15
|
+
Thread.current.abort_on_exception = true
|
16
|
+
manage_partition_consumer
|
17
|
+
end
|
18
|
+
|
19
|
+
Thread.new do
|
20
|
+
Thread.current.abort_on_exception = true
|
21
|
+
background_committer
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def wait
|
26
|
+
@consumer_thread.join if @consumer_thread.alive?
|
27
|
+
end
|
28
|
+
|
29
|
+
def interrupt
|
30
|
+
@consumer_thread[:interrupted] = true
|
31
|
+
continue
|
32
|
+
end
|
33
|
+
|
34
|
+
def interrupted?
|
35
|
+
@consumer_thread[:interrupted]
|
36
|
+
end
|
37
|
+
|
38
|
+
def stop
|
39
|
+
interrupt
|
40
|
+
wait
|
41
|
+
consumer.logger.info "Consumer for #{partition.topic.name}/#{partition.id} stopped."
|
42
|
+
end
|
43
|
+
|
44
|
+
def continue
|
45
|
+
@consumer_thread.run if @consumer_thread.status == 'sleep'
|
46
|
+
end
|
47
|
+
|
48
|
+
def claim_partition
|
49
|
+
consumer.logger.info "Claiming partition #{partition.topic.name}/#{partition.id}..."
|
50
|
+
begin
|
51
|
+
other_instance, change = consumer.group.watch_partition_claim(partition) { continue }
|
52
|
+
if other_instance.nil?
|
53
|
+
consumer.instance.claim_partition(partition)
|
54
|
+
elsif other_instance == consumer.instance
|
55
|
+
raise Kazoo::Error, "Already claimed this partition myself. That should not happen"
|
56
|
+
else
|
57
|
+
consumer.logger.warn "Partition #{partition.topic.name}/#{partition.id} is still claimed by instance #{other_instance.id}. Waiting for the claim to be released..."
|
58
|
+
Thread.stop unless change.completed?
|
59
|
+
|
60
|
+
return false if interrupted?
|
61
|
+
raise Kazoo::PartitionAlreadyClaimed
|
62
|
+
end
|
63
|
+
rescue Kazoo::PartitionAlreadyClaimed
|
64
|
+
retry unless interrupted?
|
65
|
+
end
|
66
|
+
|
67
|
+
true
|
68
|
+
end
|
69
|
+
|
70
|
+
def commit_last_offset
|
71
|
+
@commit_mutex.synchronize do
|
72
|
+
if last_processed_offset && (last_committed_offset.nil? || last_committed_offset < last_processed_offset)
|
73
|
+
consumer.group.commit_offset(partition, last_processed_offset)
|
74
|
+
@last_committed_offset = last_processed_offset + 1
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def background_committer
|
80
|
+
until interrupted?
|
81
|
+
commit_last_offset
|
82
|
+
sleep(commit_interval)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def manage_partition_consumer
|
87
|
+
# First, we will try to claim the partition in Zookeeper to ensure there's
|
88
|
+
# only one consumer for it simultaneously.
|
89
|
+
if claim_partition
|
90
|
+
@last_committed_offset = consumer.group.retrieve_offset(partition)
|
91
|
+
case start_offset = last_committed_offset || initial_offset
|
92
|
+
when :earliest_offset, -2
|
93
|
+
consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at the earliest available offset..."
|
94
|
+
when :latest_offset, -1
|
95
|
+
consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} for new messages..."
|
96
|
+
else
|
97
|
+
consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at offset #{start_offset}..."
|
98
|
+
end
|
99
|
+
|
100
|
+
begin
|
101
|
+
pc = Poseidon::PartitionConsumer.consumer_for_partition(
|
102
|
+
consumer.group.name,
|
103
|
+
consumer.cluster.brokers.values.map(&:addr),
|
104
|
+
partition.topic.name,
|
105
|
+
partition.id,
|
106
|
+
start_offset
|
107
|
+
)
|
108
|
+
|
109
|
+
until interrupted?
|
110
|
+
pc.fetch(max_wait_ms: max_wait_ms).each do |message|
|
111
|
+
message = Message.new(partition.topic.name, partition.id, message)
|
112
|
+
handler.call(message)
|
113
|
+
@last_processed_offset = message.offset
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
rescue Poseidon::Errors::OffsetOutOfRange
|
118
|
+
pc.close
|
119
|
+
|
120
|
+
consumer.logger.warn "Offset #{start_offset} is no longer available for #{partition.topic.name}/#{partition.id}!"
|
121
|
+
case initial_offset
|
122
|
+
when :earliest_offset, -2
|
123
|
+
consumer.logger.warn "Instead, start consuming #{partition.topic.name}/#{partition.id} at the earliest available offset."
|
124
|
+
when :latest_offset, -1
|
125
|
+
consumer.logger.warn "Instead, start onsuming #{partition.topic.name}/#{partition.id} for new messages only."
|
126
|
+
end
|
127
|
+
|
128
|
+
start_offset = initial_offset
|
129
|
+
retry
|
130
|
+
|
131
|
+
ensure
|
132
|
+
consumer.logger.debug "Stopping consumer for #{partition.topic.name}/#{partition.id}..."
|
133
|
+
pc.close
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
commit_last_offset
|
138
|
+
consumer.logger.info "Committed offset #{last_committed_offset - 1} for #{partition.topic.name}/#{partition.id}..." if last_committed_offset
|
139
|
+
|
140
|
+
consumer.instance.release_partition(partition)
|
141
|
+
consumer.logger.debug "Released claim for partition #{partition.topic.name}/#{partition.id}."
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
require "kazoo"
|
2
|
+
require "poseidon"
|
3
|
+
require "thread"
|
4
|
+
require "logger"
|
5
|
+
|
6
|
+
require "kafka/consumer/message"
|
7
|
+
require "kafka/consumer/partition_consumer"
|
8
|
+
require "kafka/consumer/version"
|
9
|
+
|
10
|
+
module Kafka
|
11
|
+
class Consumer
|
12
|
+
BACKPRESSURE_MESSAGE_LIMIT = 1000
|
13
|
+
|
14
|
+
include Enumerable
|
15
|
+
|
16
|
+
attr_reader :subscription,
|
17
|
+
:cluster, :group, :instance,
|
18
|
+
:max_wait_ms, :initial_offset,
|
19
|
+
:logger
|
20
|
+
|
21
|
+
def initialize(name, subscription, zookeeper: [], chroot: '', max_wait_ms: 200, initial_offset: :latest_offset, logger: nil)
|
22
|
+
@name, @subscription = name, subscription
|
23
|
+
@max_wait_ms, @initial_offset = max_wait_ms, initial_offset
|
24
|
+
@logger = logger || Logger.new($stdout)
|
25
|
+
|
26
|
+
@cluster = Kazoo::Cluster.new(zookeeper, chroot: chroot)
|
27
|
+
@group = Kazoo::Consumergroup.new(@cluster, name)
|
28
|
+
@group.create unless @group.exists?
|
29
|
+
|
30
|
+
@instance = @group.instantiate
|
31
|
+
@instance.register(topics)
|
32
|
+
end
|
33
|
+
|
34
|
+
def name
|
35
|
+
group.name
|
36
|
+
end
|
37
|
+
|
38
|
+
def id
|
39
|
+
instance.id
|
40
|
+
end
|
41
|
+
|
42
|
+
def topics
|
43
|
+
@topics ||= begin
|
44
|
+
topic_names = Array(subscription)
|
45
|
+
topic_names.map { |topic_name| cluster.topics.fetch(topic_name) }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def partitions
|
50
|
+
topics.flat_map(&:partitions).sort_by { |partition| [partition.leader.id, partition.topic.name, partition.id] }
|
51
|
+
end
|
52
|
+
|
53
|
+
def interrupt
|
54
|
+
Thread.new do
|
55
|
+
Thread.current.abort_on_exception = true
|
56
|
+
|
57
|
+
logger.info "Stopping partition consumers..."
|
58
|
+
@consumer_manager[:interrupted] = true
|
59
|
+
|
60
|
+
# Make sure to wake up the manager thread, so it can shut down
|
61
|
+
continue
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def interrupted?
|
66
|
+
@consumer_manager[:interrupted]
|
67
|
+
end
|
68
|
+
|
69
|
+
def stop
|
70
|
+
interrupt
|
71
|
+
wait
|
72
|
+
end
|
73
|
+
|
74
|
+
def wait
|
75
|
+
@consumer_manager.join if @consumer_manager.alive?
|
76
|
+
end
|
77
|
+
|
78
|
+
def dead?
|
79
|
+
@consumer_manager.status == false
|
80
|
+
end
|
81
|
+
|
82
|
+
def each(&block)
|
83
|
+
mutex = Mutex.new
|
84
|
+
|
85
|
+
handler = lambda do |message|
|
86
|
+
mutex.synchronize do
|
87
|
+
block.call(message)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
@consumer_manager = Thread.new do
|
92
|
+
Thread.current.abort_on_exception = true
|
93
|
+
manage_partition_consumers(handler)
|
94
|
+
end
|
95
|
+
|
96
|
+
wait
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.distribute_partitions(instances, partitions)
|
100
|
+
return {} if instances.empty?
|
101
|
+
partitions_per_instance = partitions.length.to_f / instances.length.to_f
|
102
|
+
|
103
|
+
partitions.group_by.with_index do |partition, index|
|
104
|
+
instance_index = index.fdiv(partitions_per_instance).floor
|
105
|
+
instances[instance_index]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def continue
|
112
|
+
@consumer_manager.run if @consumer_manager.status == 'sleep'
|
113
|
+
end
|
114
|
+
|
115
|
+
def manage_partition_consumers(handler)
|
116
|
+
logger.info "Registered for #{group.name} as #{instance.id}"
|
117
|
+
|
118
|
+
@partition_consumers = {}
|
119
|
+
|
120
|
+
until interrupted?
|
121
|
+
running_instances, change = group.watch_instances { continue }
|
122
|
+
logger.info "#{running_instances.length} instances have been registered: #{running_instances.map(&:id).join(', ')}."
|
123
|
+
|
124
|
+
# Distribute the partitions over the running instances. Afterwards, we can see
|
125
|
+
# what partitions are assigned to this particular instance. Because all instances
|
126
|
+
# run the same algorithm on the same sorted lists of instances and partitions,
|
127
|
+
# all instances should be in agreement of the distribtion.
|
128
|
+
distributed_partitions = self.class.distribute_partitions(running_instances, partitions)
|
129
|
+
my_partitions = distributed_partitions[@instance]
|
130
|
+
|
131
|
+
logger.info "Claiming #{my_partitions.length} out of #{partitions.length} partitions."
|
132
|
+
|
133
|
+
# based onw hat partitions we should be consuming and the partitions
|
134
|
+
# that we already are consuming, figure out what partition consumers
|
135
|
+
# to stop and start
|
136
|
+
partitions_to_stop = @partition_consumers.keys - my_partitions
|
137
|
+
partitions_to_start = my_partitions - @partition_consumers.keys
|
138
|
+
|
139
|
+
# Stop the partition consumers we should no longer be running in parallel
|
140
|
+
if partitions_to_stop.length > 0
|
141
|
+
logger.info "Stopping #{partitions_to_stop.length} out of #{@partition_consumers.length} partition consumers."
|
142
|
+
|
143
|
+
threads = []
|
144
|
+
partitions_to_stop.each do |partition|
|
145
|
+
partition_consumer = @partition_consumers.delete(partition)
|
146
|
+
threads << Thread.new { partition_consumer.stop }
|
147
|
+
end
|
148
|
+
threads.each(&:join)
|
149
|
+
end
|
150
|
+
|
151
|
+
# Start all the partition consumers we are missing.
|
152
|
+
if partitions_to_start.length > 0
|
153
|
+
logger.info "Starting #{partitions_to_start.length} new partition consumers."
|
154
|
+
|
155
|
+
partitions_to_start.each do |partition|
|
156
|
+
@partition_consumers[partition] = PartitionConsumer.new(self, partition,
|
157
|
+
max_wait_ms: max_wait_ms, initial_offset: initial_offset, handler: handler)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
unless change.completed?
|
162
|
+
logger.debug "Suspended consumer manager thread."
|
163
|
+
Thread.stop
|
164
|
+
logger.debug "Consumer manager thread woke up..."
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
logger.debug "Consumer interrupted."
|
169
|
+
|
170
|
+
# Stop all running partition consumers
|
171
|
+
threads = []
|
172
|
+
@partition_consumers.each_value do |partition_consumer|
|
173
|
+
threads << Thread.new { partition_consumer.stop }
|
174
|
+
end
|
175
|
+
threads.each(&:join)
|
176
|
+
|
177
|
+
# Deregister the instance. This should trigger a rebalance in all the remaining instances.
|
178
|
+
@instance.deregister
|
179
|
+
logger.debug "Consumer group instance #{instance.id} was deregistered"
|
180
|
+
|
181
|
+
cluster.close
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
data/lib/kazoo/broker.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module Kazoo
|
2
|
+
class Broker
|
3
|
+
attr_reader :cluster, :id, :host, :port, :jmx_port
|
4
|
+
|
5
|
+
def initialize(cluster, id, host, port, jmx_port: nil)
|
6
|
+
@cluster = cluster
|
7
|
+
@id, @host, @port = id, host, port
|
8
|
+
@jmx_port = jmx_port
|
9
|
+
end
|
10
|
+
|
11
|
+
def led_partitions
|
12
|
+
result, threads, mutex = [], ThreadGroup.new, Mutex.new
|
13
|
+
cluster.partitions.each do |partition|
|
14
|
+
t = Thread.new do
|
15
|
+
select = partition.leader == self
|
16
|
+
mutex.synchronize { result << partition } if select
|
17
|
+
end
|
18
|
+
threads.add(t)
|
19
|
+
end
|
20
|
+
threads.list.each(&:join)
|
21
|
+
result
|
22
|
+
end
|
23
|
+
|
24
|
+
def replicated_partitions
|
25
|
+
result, threads, mutex = [], ThreadGroup.new, Mutex.new
|
26
|
+
cluster.partitions.each do |partition|
|
27
|
+
t = Thread.new do
|
28
|
+
select = partition.replicas.include?(self)
|
29
|
+
mutex.synchronize { result << partition } if select
|
30
|
+
end
|
31
|
+
threads.add(t)
|
32
|
+
end
|
33
|
+
threads.list.each(&:join)
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
def critical?(replicas: 1)
|
38
|
+
result, threads, mutex = false, ThreadGroup.new, Mutex.new
|
39
|
+
replicated_partitions.each do |partition|
|
40
|
+
t = Thread.new do
|
41
|
+
isr = partition.isr.reject { |r| r == self }
|
42
|
+
mutex.synchronize { result = true if isr.length < replicas }
|
43
|
+
end
|
44
|
+
threads.add(t)
|
45
|
+
end
|
46
|
+
threads.list.each(&:join)
|
47
|
+
result
|
48
|
+
end
|
49
|
+
|
50
|
+
def addr
|
51
|
+
"#{host}:#{port}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def eql?(other)
|
55
|
+
other.is_a?(Kazoo::Broker) && other.cluster == self.cluster && other.id == self.id
|
56
|
+
end
|
57
|
+
|
58
|
+
alias_method :==, :eql?
|
59
|
+
|
60
|
+
def hash
|
61
|
+
[self.cluster, self.id].hash
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.from_json(cluster, id, json)
|
65
|
+
new(cluster, id.to_i, json.fetch('host'), json.fetch('port'), jmx_port: json.fetch('jmx_port', nil))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/kazoo/cli.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'kazoo'
|
2
|
+
require 'thor'
|
3
|
+
|
4
|
+
module Kazoo
|
5
|
+
class CLI < Thor
|
6
|
+
class_option :zookeeper, :type => :string, :default => ENV['ZOOKEEPER_PEERS']
|
7
|
+
class_option :chroot, :type => :string, :default => ""
|
8
|
+
|
9
|
+
desc "cluster", "Describes the Kafka cluster as registered in Zookeeper"
|
10
|
+
def cluster
|
11
|
+
validate_class_options!
|
12
|
+
|
13
|
+
kafka_cluster.brokers.values.sort_by(&:id).each do |broker|
|
14
|
+
$stdout.puts "#{broker.id}:\t#{broker.addr}\t(hosts #{broker.replicated_partitions.length} partitions, leads #{broker.led_partitions.length})"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "topics", "Lists all topics in the cluster"
|
19
|
+
def topics
|
20
|
+
validate_class_options!
|
21
|
+
|
22
|
+
kafka_cluster.topics.values.sort_by(&:name).each do |topic|
|
23
|
+
$stdout.puts topic.name
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
option :topic, :type => :string
|
28
|
+
desc "partitions", "Lists partitions"
|
29
|
+
def partitions
|
30
|
+
validate_class_options!
|
31
|
+
|
32
|
+
topics = kafka_cluster.topics.values
|
33
|
+
topics.select! { |t| t.name == options[:topic] } if options[:topic]
|
34
|
+
topics.sort_by!(&:name)
|
35
|
+
|
36
|
+
topics.each do |topic|
|
37
|
+
topic.partitions.each do |partition|
|
38
|
+
$stdout.puts "#{partition.topic.name}/#{partition.id}\tReplicas: #{partition.replicas.map(&:id).join(",")}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
option :replicas, :type => :numeric, :default => 1
|
44
|
+
desc "critical <broker>", "Determine whether a broker is critical"
|
45
|
+
def critical(broker_name)
|
46
|
+
validate_class_options!
|
47
|
+
|
48
|
+
if broker(broker_name).critical?(replicas: options[:replicas])
|
49
|
+
raise Thor::Error, "WARNING: broker #{broker_name} is critical and cannot be stopped safely!"
|
50
|
+
else
|
51
|
+
$stdout.puts "Broker #{broker_name} is non-critical and can be stopped safely."
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def validate_class_options!
|
59
|
+
if options[:zookeeper].nil? || options[:zookeeper] == ''
|
60
|
+
raise Thor::InvocationError, "Please supply --zookeeper argument, or set the ZOOKEEPER_PEERS environment variable"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def broker(name_or_id)
|
65
|
+
broker = if name_or_id =~ /\A\d+\z/
|
66
|
+
kafka_cluster.brokers[name_or_id.to_i]
|
67
|
+
else
|
68
|
+
kafka_cluster.brokers.values.detect { |b| b.addr == name_or_id } || cluster.brokers.values.detect { |b| b.host == name_or_id }
|
69
|
+
end
|
70
|
+
|
71
|
+
raise Thor::InvocationError, "Broker #{name_or_id.inspect} not found!" if broker.nil?
|
72
|
+
broker
|
73
|
+
end
|
74
|
+
|
75
|
+
def kafka_cluster
|
76
|
+
@kafka_cluster ||= Kazoo::Cluster.new(options[:zookeeper], chroot: options[:chroot])
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|