kafka-consumer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +14 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +43 -0
- data/Rakefile +43 -0
- data/bin/kazoo +14 -0
- data/kafka-consumer.gemspec +28 -0
- data/lib/kafka/consumer/message.rb +12 -0
- data/lib/kafka/consumer/partition_consumer.rb +146 -0
- data/lib/kafka/consumer/version.rb +5 -0
- data/lib/kafka/consumer.rb +184 -0
- data/lib/kazoo/broker.rb +68 -0
- data/lib/kazoo/cli.rb +79 -0
- data/lib/kazoo/cluster.rb +82 -0
- data/lib/kazoo/consumergroup.rb +229 -0
- data/lib/kazoo/partition.rb +62 -0
- data/lib/kazoo/topic.rb +46 -0
- data/lib/kazoo/version.rb +3 -0
- data/lib/kazoo.rb +19 -0
- data/test/broker_test.rb +45 -0
- data/test/cluster_test.rb +16 -0
- data/test/partition_distribution_test.rb +117 -0
- data/test/partition_test.rb +25 -0
- data/test/test_helper.rb +48 -0
- data/test/topic_test.rb +40 -0
- metadata +161 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: ad31d35f34d283a73bc8865fc90e1748ecc48ffd
|
4
|
+
data.tar.gz: 58fe93f719be96c3276604973b12887f42eb00f5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1ac246b560fe2f8ed24f71d9f32a0323f7d79dbbd1643a52e17fb2e524c027166f87dc12ad136ddcee0343c012fe27932a82ed26513a7184bb030a8fc5d4a64b
|
7
|
+
data.tar.gz: 415dc58012fc48dba46a9d5ba34f6ea955fb78a194e3666c9d78f6e2767fdac76fd5decff989d7b4b52b1e31e054b3cca5b49abe337138ed9230ce3f82a92bdc
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Willem van Bergen
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
# Kafka::Consumer
|
2
|
+
|
3
|
+
High-level Kafka consumer for Ruby. Uses Zookeeper to manage load balancing, failover, and offset management.
|
4
|
+
|
5
|
+
A consumer group consists of multiple instances of the same consumer. Every instance registers itself in
|
6
|
+
Zookeeper. Based on the number of instances that are registered, an instance will start consuming some or
|
7
|
+
all of the partitions of the topics it wants to consume.
|
8
|
+
|
9
|
+
The distribution algorithm will make sure that every partition is only consumed by one consumer
|
10
|
+
instance at a time. It uses Zookeeper watches to be notified of new consumer instances coming
|
11
|
+
online or going offline, which will trigger a redistribition of all the partitions that are consumed.
|
12
|
+
|
13
|
+
Periodically, it will commit the last processed offset of every partition to Zookeeper. Whenever a
|
14
|
+
new consumer starts, it will resume consumingevery partition at the last committed offset. This implements
|
15
|
+
an **at least once guarantee**, so it is possible that you end up consuming the same message more than once.
|
16
|
+
It's your responsibility to deal with this if that is a problem for you, e.g. by using idempotent operations.
|
17
|
+
|
18
|
+
## Usage
|
19
|
+
|
20
|
+
First, add `kafka-consumer` to your **Gemfile**, and run `bundle install.
|
21
|
+
If your messages are snappy-compressed, add the `snappy` gem as well.
|
22
|
+
|
23
|
+
``` ruby
|
24
|
+
zookeeper = "zk1:2181,zk2:2181,zk3:2181"
|
25
|
+
name = "access-log-processor"
|
26
|
+
topics = ["access_log"]
|
27
|
+
|
28
|
+
consumer = Kafka::Consumer.new(name, topics, zookeeper: zookeeper)
|
29
|
+
|
30
|
+
Signal.trap("INT") { consumer.interrupt }
|
31
|
+
|
32
|
+
consumer.each do |message|
|
33
|
+
# process message
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
## Contributing
|
38
|
+
|
39
|
+
1. Fork it ( https://github.com/wvanbergen/kafka-consumer/fork )
|
40
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
41
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
42
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
43
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "kafka/consumer"
|
3
|
+
require "rake/testtask"
|
4
|
+
require "benchmark"
|
5
|
+
|
6
|
+
Rake::TestTask.new do |t|
|
7
|
+
t.libs = ["lib", "test"]
|
8
|
+
t.test_files = FileList['test/*_test.rb']
|
9
|
+
end
|
10
|
+
|
11
|
+
namespace :kafka do
|
12
|
+
task :consumer do
|
13
|
+
zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
|
14
|
+
name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
|
15
|
+
topics = ENV["TOPICS"] or raise "Specify the TOPICS you want to consume. Use comma as separator."
|
16
|
+
|
17
|
+
consumer = Kafka::Consumer.new(name, topics.split(','), zookeeper: zookeeper, initial_offset: :earliest_offset)
|
18
|
+
|
19
|
+
Signal.trap("TERM") { puts "TERM received"; consumer.interrupt }
|
20
|
+
Signal.trap("INT") { puts "INT received"; consumer.interrupt }
|
21
|
+
|
22
|
+
counter = 0
|
23
|
+
duration = Benchmark.realtime do
|
24
|
+
consumer.each do |event|
|
25
|
+
counter += 1
|
26
|
+
print "Consumed #{counter} messages.\n" if counter % 1000 == 0
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
puts
|
31
|
+
puts "%d messages consumed in %0.3fs (%0.3f msg/s)" % [counter, duration, counter.to_f / duration]
|
32
|
+
end
|
33
|
+
|
34
|
+
namespace :consumer do
|
35
|
+
task :reset do
|
36
|
+
zookeeper = ENV["ZOOKEEPER"] or raise "Specify the ZOOKEEPER connection string."
|
37
|
+
name = ENV["NAME"] or raise "Specify NAME to name the consumergroup."
|
38
|
+
|
39
|
+
consumer = Kafka::Consumer.new(name, [], zookeeper: zookeeper)
|
40
|
+
consumer.group.reset_offsets
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/bin/kazoo
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
|
3
|
+
require 'kazoo/cli'
|
4
|
+
|
5
|
+
begin
|
6
|
+
ENV["THOR_DEBUG"] = "1"
|
7
|
+
Kazoo::CLI.start(ARGV)
|
8
|
+
rescue Thor::UndefinedCommandError, Thor::UnknownArgumentError, Thor::AmbiguousCommandError, Thor::InvocationError => e
|
9
|
+
$stderr.puts(e.message)
|
10
|
+
exit(64)
|
11
|
+
rescue Thor::Error => e
|
12
|
+
$stderr.puts(e.message)
|
13
|
+
exit(1)
|
14
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'kafka/consumer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "kafka-consumer"
|
8
|
+
spec.version = Kafka::Consumer::VERSION
|
9
|
+
spec.authors = ["Willem van Bergen"]
|
10
|
+
spec.email = ["willem@vanbergen.org"]
|
11
|
+
spec.summary = %q{High-level consumer for Kafka}
|
12
|
+
spec.description = %q{High-level consumer for Kafka. Implements the Zookeeper-backed consumer implementation that offers offset management, load balancing and automatic failovers.}
|
13
|
+
spec.homepage = "https://github.com/wvanbergen/kafka-consumer"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
24
|
+
spec.add_development_dependency "mocha", "~> 1.0"
|
25
|
+
|
26
|
+
spec.add_runtime_dependency "poseidon", "~> 0.0.5"
|
27
|
+
spec.add_runtime_dependency "zookeeper", "~> 1.4"
|
28
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Consumer
|
3
|
+
class Message
|
4
|
+
attr_reader :topic, :partition, :offset, :key, :value
|
5
|
+
|
6
|
+
def initialize(topic, partition, fetched_message)
|
7
|
+
@topic, @partition = topic, partition
|
8
|
+
@key, @value, @offset = fetched_message.key, fetched_message.value, fetched_message.offset
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,146 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Consumer
|
3
|
+
class PartitionConsumer
|
4
|
+
|
5
|
+
attr_reader :consumer, :partition, :handler, :max_wait_ms, :initial_offset,
|
6
|
+
:commit_interval, :last_processed_offset, :last_committed_offset
|
7
|
+
|
8
|
+
def initialize(consumer, partition, handler: nil, max_wait_ms: 100, initial_offset: :latest_offset, commit_interval: 5.0)
|
9
|
+
@consumer, @partition, @handler = consumer, partition, handler
|
10
|
+
@initial_offset, @max_wait_ms, @commit_interval = initial_offset, max_wait_ms, commit_interval
|
11
|
+
|
12
|
+
@commit_mutex = Mutex.new
|
13
|
+
|
14
|
+
@consumer_thread = Thread.new do
|
15
|
+
Thread.current.abort_on_exception = true
|
16
|
+
manage_partition_consumer
|
17
|
+
end
|
18
|
+
|
19
|
+
Thread.new do
|
20
|
+
Thread.current.abort_on_exception = true
|
21
|
+
background_committer
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def wait
|
26
|
+
@consumer_thread.join if @consumer_thread.alive?
|
27
|
+
end
|
28
|
+
|
29
|
+
def interrupt
|
30
|
+
@consumer_thread[:interrupted] = true
|
31
|
+
continue
|
32
|
+
end
|
33
|
+
|
34
|
+
def interrupted?
|
35
|
+
@consumer_thread[:interrupted]
|
36
|
+
end
|
37
|
+
|
38
|
+
def stop
|
39
|
+
interrupt
|
40
|
+
wait
|
41
|
+
consumer.logger.info "Consumer for #{partition.topic.name}/#{partition.id} stopped."
|
42
|
+
end
|
43
|
+
|
44
|
+
def continue
|
45
|
+
@consumer_thread.run if @consumer_thread.status == 'sleep'
|
46
|
+
end
|
47
|
+
|
48
|
+
def claim_partition
|
49
|
+
consumer.logger.info "Claiming partition #{partition.topic.name}/#{partition.id}..."
|
50
|
+
begin
|
51
|
+
other_instance, change = consumer.group.watch_partition_claim(partition) { continue }
|
52
|
+
if other_instance.nil?
|
53
|
+
consumer.instance.claim_partition(partition)
|
54
|
+
elsif other_instance == consumer.instance
|
55
|
+
raise Kazoo::Error, "Already claimed this partition myself. That should not happen"
|
56
|
+
else
|
57
|
+
consumer.logger.warn "Partition #{partition.topic.name}/#{partition.id} is still claimed by instance #{other_instance.id}. Waiting for the claim to be released..."
|
58
|
+
Thread.stop unless change.completed?
|
59
|
+
|
60
|
+
return false if interrupted?
|
61
|
+
raise Kazoo::PartitionAlreadyClaimed
|
62
|
+
end
|
63
|
+
rescue Kazoo::PartitionAlreadyClaimed
|
64
|
+
retry unless interrupted?
|
65
|
+
end
|
66
|
+
|
67
|
+
true
|
68
|
+
end
|
69
|
+
|
70
|
+
def commit_last_offset
|
71
|
+
@commit_mutex.synchronize do
|
72
|
+
if last_processed_offset && (last_committed_offset.nil? || last_committed_offset < last_processed_offset)
|
73
|
+
consumer.group.commit_offset(partition, last_processed_offset)
|
74
|
+
@last_committed_offset = last_processed_offset + 1
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def background_committer
|
80
|
+
until interrupted?
|
81
|
+
commit_last_offset
|
82
|
+
sleep(commit_interval)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def manage_partition_consumer
|
87
|
+
# First, we will try to claim the partition in Zookeeper to ensure there's
|
88
|
+
# only one consumer for it simultaneously.
|
89
|
+
if claim_partition
|
90
|
+
@last_committed_offset = consumer.group.retrieve_offset(partition)
|
91
|
+
case start_offset = last_committed_offset || initial_offset
|
92
|
+
when :earliest_offset, -2
|
93
|
+
consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at the earliest available offset..."
|
94
|
+
when :latest_offset, -1
|
95
|
+
consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} for new messages..."
|
96
|
+
else
|
97
|
+
consumer.logger.info "Starting consumer for #{partition.topic.name}/#{partition.id} at offset #{start_offset}..."
|
98
|
+
end
|
99
|
+
|
100
|
+
begin
|
101
|
+
pc = Poseidon::PartitionConsumer.consumer_for_partition(
|
102
|
+
consumer.group.name,
|
103
|
+
consumer.cluster.brokers.values.map(&:addr),
|
104
|
+
partition.topic.name,
|
105
|
+
partition.id,
|
106
|
+
start_offset
|
107
|
+
)
|
108
|
+
|
109
|
+
until interrupted?
|
110
|
+
pc.fetch(max_wait_ms: max_wait_ms).each do |message|
|
111
|
+
message = Message.new(partition.topic.name, partition.id, message)
|
112
|
+
handler.call(message)
|
113
|
+
@last_processed_offset = message.offset
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
rescue Poseidon::Errors::OffsetOutOfRange
|
118
|
+
pc.close
|
119
|
+
|
120
|
+
consumer.logger.warn "Offset #{start_offset} is no longer available for #{partition.topic.name}/#{partition.id}!"
|
121
|
+
case initial_offset
|
122
|
+
when :earliest_offset, -2
|
123
|
+
consumer.logger.warn "Instead, start consuming #{partition.topic.name}/#{partition.id} at the earliest available offset."
|
124
|
+
when :latest_offset, -1
|
125
|
+
consumer.logger.warn "Instead, start onsuming #{partition.topic.name}/#{partition.id} for new messages only."
|
126
|
+
end
|
127
|
+
|
128
|
+
start_offset = initial_offset
|
129
|
+
retry
|
130
|
+
|
131
|
+
ensure
|
132
|
+
consumer.logger.debug "Stopping consumer for #{partition.topic.name}/#{partition.id}..."
|
133
|
+
pc.close
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
commit_last_offset
|
138
|
+
consumer.logger.info "Committed offset #{last_committed_offset - 1} for #{partition.topic.name}/#{partition.id}..." if last_committed_offset
|
139
|
+
|
140
|
+
consumer.instance.release_partition(partition)
|
141
|
+
consumer.logger.debug "Released claim for partition #{partition.topic.name}/#{partition.id}."
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
require "kazoo"
|
2
|
+
require "poseidon"
|
3
|
+
require "thread"
|
4
|
+
require "logger"
|
5
|
+
|
6
|
+
require "kafka/consumer/message"
|
7
|
+
require "kafka/consumer/partition_consumer"
|
8
|
+
require "kafka/consumer/version"
|
9
|
+
|
10
|
+
module Kafka
|
11
|
+
class Consumer
|
12
|
+
BACKPRESSURE_MESSAGE_LIMIT = 1000
|
13
|
+
|
14
|
+
include Enumerable
|
15
|
+
|
16
|
+
attr_reader :subscription,
|
17
|
+
:cluster, :group, :instance,
|
18
|
+
:max_wait_ms, :initial_offset,
|
19
|
+
:logger
|
20
|
+
|
21
|
+
def initialize(name, subscription, zookeeper: [], chroot: '', max_wait_ms: 200, initial_offset: :latest_offset, logger: nil)
|
22
|
+
@name, @subscription = name, subscription
|
23
|
+
@max_wait_ms, @initial_offset = max_wait_ms, initial_offset
|
24
|
+
@logger = logger || Logger.new($stdout)
|
25
|
+
|
26
|
+
@cluster = Kazoo::Cluster.new(zookeeper, chroot: chroot)
|
27
|
+
@group = Kazoo::Consumergroup.new(@cluster, name)
|
28
|
+
@group.create unless @group.exists?
|
29
|
+
|
30
|
+
@instance = @group.instantiate
|
31
|
+
@instance.register(topics)
|
32
|
+
end
|
33
|
+
|
34
|
+
def name
|
35
|
+
group.name
|
36
|
+
end
|
37
|
+
|
38
|
+
def id
|
39
|
+
instance.id
|
40
|
+
end
|
41
|
+
|
42
|
+
def topics
|
43
|
+
@topics ||= begin
|
44
|
+
topic_names = Array(subscription)
|
45
|
+
topic_names.map { |topic_name| cluster.topics.fetch(topic_name) }
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def partitions
|
50
|
+
topics.flat_map(&:partitions).sort_by { |partition| [partition.leader.id, partition.topic.name, partition.id] }
|
51
|
+
end
|
52
|
+
|
53
|
+
def interrupt
|
54
|
+
Thread.new do
|
55
|
+
Thread.current.abort_on_exception = true
|
56
|
+
|
57
|
+
logger.info "Stopping partition consumers..."
|
58
|
+
@consumer_manager[:interrupted] = true
|
59
|
+
|
60
|
+
# Make sure to wake up the manager thread, so it can shut down
|
61
|
+
continue
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def interrupted?
|
66
|
+
@consumer_manager[:interrupted]
|
67
|
+
end
|
68
|
+
|
69
|
+
def stop
|
70
|
+
interrupt
|
71
|
+
wait
|
72
|
+
end
|
73
|
+
|
74
|
+
def wait
|
75
|
+
@consumer_manager.join if @consumer_manager.alive?
|
76
|
+
end
|
77
|
+
|
78
|
+
def dead?
|
79
|
+
@consumer_manager.status == false
|
80
|
+
end
|
81
|
+
|
82
|
+
def each(&block)
|
83
|
+
mutex = Mutex.new
|
84
|
+
|
85
|
+
handler = lambda do |message|
|
86
|
+
mutex.synchronize do
|
87
|
+
block.call(message)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
@consumer_manager = Thread.new do
|
92
|
+
Thread.current.abort_on_exception = true
|
93
|
+
manage_partition_consumers(handler)
|
94
|
+
end
|
95
|
+
|
96
|
+
wait
|
97
|
+
end
|
98
|
+
|
99
|
+
def self.distribute_partitions(instances, partitions)
|
100
|
+
return {} if instances.empty?
|
101
|
+
partitions_per_instance = partitions.length.to_f / instances.length.to_f
|
102
|
+
|
103
|
+
partitions.group_by.with_index do |partition, index|
|
104
|
+
instance_index = index.fdiv(partitions_per_instance).floor
|
105
|
+
instances[instance_index]
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
private
|
110
|
+
|
111
|
+
def continue
|
112
|
+
@consumer_manager.run if @consumer_manager.status == 'sleep'
|
113
|
+
end
|
114
|
+
|
115
|
+
def manage_partition_consumers(handler)
|
116
|
+
logger.info "Registered for #{group.name} as #{instance.id}"
|
117
|
+
|
118
|
+
@partition_consumers = {}
|
119
|
+
|
120
|
+
until interrupted?
|
121
|
+
running_instances, change = group.watch_instances { continue }
|
122
|
+
logger.info "#{running_instances.length} instances have been registered: #{running_instances.map(&:id).join(', ')}."
|
123
|
+
|
124
|
+
# Distribute the partitions over the running instances. Afterwards, we can see
|
125
|
+
# what partitions are assigned to this particular instance. Because all instances
|
126
|
+
# run the same algorithm on the same sorted lists of instances and partitions,
|
127
|
+
# all instances should be in agreement of the distribtion.
|
128
|
+
distributed_partitions = self.class.distribute_partitions(running_instances, partitions)
|
129
|
+
my_partitions = distributed_partitions[@instance]
|
130
|
+
|
131
|
+
logger.info "Claiming #{my_partitions.length} out of #{partitions.length} partitions."
|
132
|
+
|
133
|
+
# based onw hat partitions we should be consuming and the partitions
|
134
|
+
# that we already are consuming, figure out what partition consumers
|
135
|
+
# to stop and start
|
136
|
+
partitions_to_stop = @partition_consumers.keys - my_partitions
|
137
|
+
partitions_to_start = my_partitions - @partition_consumers.keys
|
138
|
+
|
139
|
+
# Stop the partition consumers we should no longer be running in parallel
|
140
|
+
if partitions_to_stop.length > 0
|
141
|
+
logger.info "Stopping #{partitions_to_stop.length} out of #{@partition_consumers.length} partition consumers."
|
142
|
+
|
143
|
+
threads = []
|
144
|
+
partitions_to_stop.each do |partition|
|
145
|
+
partition_consumer = @partition_consumers.delete(partition)
|
146
|
+
threads << Thread.new { partition_consumer.stop }
|
147
|
+
end
|
148
|
+
threads.each(&:join)
|
149
|
+
end
|
150
|
+
|
151
|
+
# Start all the partition consumers we are missing.
|
152
|
+
if partitions_to_start.length > 0
|
153
|
+
logger.info "Starting #{partitions_to_start.length} new partition consumers."
|
154
|
+
|
155
|
+
partitions_to_start.each do |partition|
|
156
|
+
@partition_consumers[partition] = PartitionConsumer.new(self, partition,
|
157
|
+
max_wait_ms: max_wait_ms, initial_offset: initial_offset, handler: handler)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
unless change.completed?
|
162
|
+
logger.debug "Suspended consumer manager thread."
|
163
|
+
Thread.stop
|
164
|
+
logger.debug "Consumer manager thread woke up..."
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
logger.debug "Consumer interrupted."
|
169
|
+
|
170
|
+
# Stop all running partition consumers
|
171
|
+
threads = []
|
172
|
+
@partition_consumers.each_value do |partition_consumer|
|
173
|
+
threads << Thread.new { partition_consumer.stop }
|
174
|
+
end
|
175
|
+
threads.each(&:join)
|
176
|
+
|
177
|
+
# Deregister the instance. This should trigger a rebalance in all the remaining instances.
|
178
|
+
@instance.deregister
|
179
|
+
logger.debug "Consumer group instance #{instance.id} was deregistered"
|
180
|
+
|
181
|
+
cluster.close
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
data/lib/kazoo/broker.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
module Kazoo
|
2
|
+
class Broker
|
3
|
+
attr_reader :cluster, :id, :host, :port, :jmx_port
|
4
|
+
|
5
|
+
def initialize(cluster, id, host, port, jmx_port: nil)
|
6
|
+
@cluster = cluster
|
7
|
+
@id, @host, @port = id, host, port
|
8
|
+
@jmx_port = jmx_port
|
9
|
+
end
|
10
|
+
|
11
|
+
def led_partitions
|
12
|
+
result, threads, mutex = [], ThreadGroup.new, Mutex.new
|
13
|
+
cluster.partitions.each do |partition|
|
14
|
+
t = Thread.new do
|
15
|
+
select = partition.leader == self
|
16
|
+
mutex.synchronize { result << partition } if select
|
17
|
+
end
|
18
|
+
threads.add(t)
|
19
|
+
end
|
20
|
+
threads.list.each(&:join)
|
21
|
+
result
|
22
|
+
end
|
23
|
+
|
24
|
+
def replicated_partitions
|
25
|
+
result, threads, mutex = [], ThreadGroup.new, Mutex.new
|
26
|
+
cluster.partitions.each do |partition|
|
27
|
+
t = Thread.new do
|
28
|
+
select = partition.replicas.include?(self)
|
29
|
+
mutex.synchronize { result << partition } if select
|
30
|
+
end
|
31
|
+
threads.add(t)
|
32
|
+
end
|
33
|
+
threads.list.each(&:join)
|
34
|
+
result
|
35
|
+
end
|
36
|
+
|
37
|
+
def critical?(replicas: 1)
|
38
|
+
result, threads, mutex = false, ThreadGroup.new, Mutex.new
|
39
|
+
replicated_partitions.each do |partition|
|
40
|
+
t = Thread.new do
|
41
|
+
isr = partition.isr.reject { |r| r == self }
|
42
|
+
mutex.synchronize { result = true if isr.length < replicas }
|
43
|
+
end
|
44
|
+
threads.add(t)
|
45
|
+
end
|
46
|
+
threads.list.each(&:join)
|
47
|
+
result
|
48
|
+
end
|
49
|
+
|
50
|
+
def addr
|
51
|
+
"#{host}:#{port}"
|
52
|
+
end
|
53
|
+
|
54
|
+
def eql?(other)
|
55
|
+
other.is_a?(Kazoo::Broker) && other.cluster == self.cluster && other.id == self.id
|
56
|
+
end
|
57
|
+
|
58
|
+
alias_method :==, :eql?
|
59
|
+
|
60
|
+
def hash
|
61
|
+
[self.cluster, self.id].hash
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.from_json(cluster, id, json)
|
65
|
+
new(cluster, id.to_i, json.fetch('host'), json.fetch('port'), jmx_port: json.fetch('jmx_port', nil))
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
data/lib/kazoo/cli.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'kazoo'
|
2
|
+
require 'thor'
|
3
|
+
|
4
|
+
module Kazoo
|
5
|
+
class CLI < Thor
|
6
|
+
class_option :zookeeper, :type => :string, :default => ENV['ZOOKEEPER_PEERS']
|
7
|
+
class_option :chroot, :type => :string, :default => ""
|
8
|
+
|
9
|
+
desc "cluster", "Describes the Kafka cluster as registered in Zookeeper"
|
10
|
+
def cluster
|
11
|
+
validate_class_options!
|
12
|
+
|
13
|
+
kafka_cluster.brokers.values.sort_by(&:id).each do |broker|
|
14
|
+
$stdout.puts "#{broker.id}:\t#{broker.addr}\t(hosts #{broker.replicated_partitions.length} partitions, leads #{broker.led_partitions.length})"
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
desc "topics", "Lists all topics in the cluster"
|
19
|
+
def topics
|
20
|
+
validate_class_options!
|
21
|
+
|
22
|
+
kafka_cluster.topics.values.sort_by(&:name).each do |topic|
|
23
|
+
$stdout.puts topic.name
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
option :topic, :type => :string
|
28
|
+
desc "partitions", "Lists partitions"
|
29
|
+
def partitions
|
30
|
+
validate_class_options!
|
31
|
+
|
32
|
+
topics = kafka_cluster.topics.values
|
33
|
+
topics.select! { |t| t.name == options[:topic] } if options[:topic]
|
34
|
+
topics.sort_by!(&:name)
|
35
|
+
|
36
|
+
topics.each do |topic|
|
37
|
+
topic.partitions.each do |partition|
|
38
|
+
$stdout.puts "#{partition.topic.name}/#{partition.id}\tReplicas: #{partition.replicas.map(&:id).join(",")}"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
option :replicas, :type => :numeric, :default => 1
|
44
|
+
desc "critical <broker>", "Determine whether a broker is critical"
|
45
|
+
def critical(broker_name)
|
46
|
+
validate_class_options!
|
47
|
+
|
48
|
+
if broker(broker_name).critical?(replicas: options[:replicas])
|
49
|
+
raise Thor::Error, "WARNING: broker #{broker_name} is critical and cannot be stopped safely!"
|
50
|
+
else
|
51
|
+
$stdout.puts "Broker #{broker_name} is non-critical and can be stopped safely."
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def validate_class_options!
|
59
|
+
if options[:zookeeper].nil? || options[:zookeeper] == ''
|
60
|
+
raise Thor::InvocationError, "Please supply --zookeeper argument, or set the ZOOKEEPER_PEERS environment variable"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def broker(name_or_id)
|
65
|
+
broker = if name_or_id =~ /\A\d+\z/
|
66
|
+
kafka_cluster.brokers[name_or_id.to_i]
|
67
|
+
else
|
68
|
+
kafka_cluster.brokers.values.detect { |b| b.addr == name_or_id } || cluster.brokers.values.detect { |b| b.host == name_or_id }
|
69
|
+
end
|
70
|
+
|
71
|
+
raise Thor::InvocationError, "Broker #{name_or_id.inspect} not found!" if broker.nil?
|
72
|
+
broker
|
73
|
+
end
|
74
|
+
|
75
|
+
def kafka_cluster
|
76
|
+
@kafka_cluster ||= Kazoo::Cluster.new(options[:zookeeper], chroot: options[:chroot])
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|