ruby-kafka 0.3.2 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +2 -2
- data/circle.yml +1 -1
- data/examples/firehose-consumer.rb +3 -0
- data/lib/kafka.rb +9 -0
- data/lib/kafka/broker.rb +0 -5
- data/lib/kafka/broker_pool.rb +4 -12
- data/lib/kafka/client.rb +24 -3
- data/lib/kafka/compressor.rb +3 -2
- data/lib/kafka/connection.rb +3 -3
- data/lib/kafka/connection_builder.rb +25 -0
- data/lib/kafka/consumer.rb +94 -104
- data/lib/kafka/consumer_group.rb +6 -0
- data/lib/kafka/fetched_batch.rb +13 -1
- data/lib/kafka/heartbeat.rb +16 -0
- data/lib/kafka/instrumenter.rb +25 -0
- data/lib/kafka/pending_message_queue.rb +7 -3
- data/lib/kafka/produce_operation.rb +22 -5
- data/lib/kafka/producer.rb +22 -8
- data/lib/kafka/round_robin_assignment_strategy.rb +3 -1
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +1 -1
- metadata +9 -7
- data/lib/kafka/instrumentation.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf114586f8ec65d1db3ade1d082fc82de140f241
|
4
|
+
data.tar.gz: a202f5dd06b0339a8e3895e48824acba6d266ba5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4788eabf90baf70adb425860c98073058dd601247f1dd447aeb3d154025a5466c19130c7a8d00984ff0e9a5771f980a39a33ffec8bc4384695388999ceda2a13
|
7
|
+
data.tar.gz: 8f5572d8e9dcf46737236ac6a6467fc5d1bd60e091e889ffe056131c084bd47699c59bd404e11b0ce8ec3c5b5c800d8d07c318c6d3abd2e75585d4168079c70c
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,12 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## v0.3.3
|
8
|
+
|
9
|
+
- Allow clearing a producer's buffer (Martin Nowak).
|
10
|
+
- Improved Consumer API.
|
11
|
+
- Instrument producer errors.
|
12
|
+
|
7
13
|
## v0.3.2
|
8
14
|
|
9
15
|
- Experimental batch consumer API.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ The Producer API is currently beta level and used in production. There's an alph
|
|
6
6
|
|
7
7
|
Although parts of this library work with Kafka 0.8 – specifically, the Producer API – it's being tested and developed against Kafka 0.9. The Consumer API will be 0.9 only.
|
8
8
|
|
9
|
-
|
9
|
+
## Table of Contents
|
10
10
|
|
11
11
|
1. [Installation](#installation)
|
12
12
|
2. [Usage](#usage)
|
@@ -244,7 +244,7 @@ producer = kafka.producer
|
|
244
244
|
|
245
245
|
producer.produce("hello", topic: "greetings")
|
246
246
|
|
247
|
-
# If this line fails with Kafka::DeliveryFailed we *may* have succeeded in
|
247
|
+
# If this line fails with Kafka::DeliveryFailed we *may* have succeeded in delivering
|
248
248
|
# the message to Kafka but won't know for sure.
|
249
249
|
producer.deliver_messages
|
250
250
|
|
data/circle.yml
CHANGED
data/lib/kafka.rb
CHANGED
@@ -119,6 +119,15 @@ module Kafka
|
|
119
119
|
class DeliveryFailed < Error
|
120
120
|
end
|
121
121
|
|
122
|
+
class HeartbeatError < Error
|
123
|
+
end
|
124
|
+
|
125
|
+
class OffsetCommitError < Error
|
126
|
+
end
|
127
|
+
|
128
|
+
class FetchError < Error
|
129
|
+
end
|
130
|
+
|
122
131
|
# Initializes a new Kafka client.
|
123
132
|
#
|
124
133
|
# @see Client#initialize
|
data/lib/kafka/broker.rb
CHANGED
@@ -4,11 +4,6 @@ require "kafka/protocol"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Broker
|
7
|
-
def self.connect(node_id: nil, logger:, **options)
|
8
|
-
connection = Connection.new(logger: logger, **options)
|
9
|
-
new(connection: connection, node_id: node_id, logger: logger)
|
10
|
-
end
|
11
|
-
|
12
7
|
def initialize(connection:, node_id: nil, logger:)
|
13
8
|
@connection = connection
|
14
9
|
@node_id = node_id
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -2,27 +2,19 @@ require "kafka/broker"
|
|
2
2
|
|
3
3
|
module Kafka
|
4
4
|
class BrokerPool
|
5
|
-
def initialize(
|
6
|
-
@client_id = client_id
|
7
|
-
@connect_timeout = connect_timeout
|
8
|
-
@socket_timeout = socket_timeout
|
5
|
+
def initialize(connection_builder:, logger:)
|
9
6
|
@logger = logger
|
7
|
+
@connection_builder = connection_builder
|
10
8
|
@brokers = {}
|
11
|
-
@ssl_context = ssl_context
|
12
9
|
end
|
13
10
|
|
14
11
|
def connect(host, port, node_id: nil)
|
15
12
|
return @brokers.fetch(node_id) if @brokers.key?(node_id)
|
16
13
|
|
17
|
-
broker = Broker.
|
18
|
-
|
19
|
-
port: port,
|
14
|
+
broker = Broker.new(
|
15
|
+
connection: @connection_builder.build_connection(host, port),
|
20
16
|
node_id: node_id,
|
21
|
-
client_id: @client_id,
|
22
|
-
connect_timeout: @connect_timeout,
|
23
|
-
socket_timeout: @socket_timeout,
|
24
17
|
logger: @logger,
|
25
|
-
ssl_context: @ssl_context,
|
26
18
|
)
|
27
19
|
|
28
20
|
@brokers[node_id] = broker unless node_id.nil?
|
data/lib/kafka/client.rb
CHANGED
@@ -3,9 +3,12 @@ require "openssl"
|
|
3
3
|
require "kafka/cluster"
|
4
4
|
require "kafka/producer"
|
5
5
|
require "kafka/consumer"
|
6
|
+
require "kafka/heartbeat"
|
6
7
|
require "kafka/async_producer"
|
7
8
|
require "kafka/fetched_message"
|
8
9
|
require "kafka/fetch_operation"
|
10
|
+
require "kafka/connection_builder"
|
11
|
+
require "kafka/instrumenter"
|
9
12
|
|
10
13
|
module Kafka
|
11
14
|
class Client
|
@@ -37,15 +40,22 @@ module Kafka
|
|
37
40
|
# @return [Client]
|
38
41
|
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil)
|
39
42
|
@logger = logger || Logger.new(nil)
|
43
|
+
@instrumenter = Instrumenter.new(client_id: client_id)
|
40
44
|
|
41
45
|
ssl_context = build_ssl_context(ssl_ca_cert, ssl_client_cert, ssl_client_cert_key)
|
42
46
|
|
43
|
-
|
47
|
+
connection_builder = ConnectionBuilder.new(
|
44
48
|
client_id: client_id,
|
45
49
|
connect_timeout: connect_timeout,
|
46
50
|
socket_timeout: socket_timeout,
|
47
|
-
logger: @logger,
|
48
51
|
ssl_context: ssl_context,
|
52
|
+
logger: @logger,
|
53
|
+
instrumenter: @instrumenter,
|
54
|
+
)
|
55
|
+
|
56
|
+
broker_pool = BrokerPool.new(
|
57
|
+
connection_builder: connection_builder,
|
58
|
+
logger: @logger,
|
49
59
|
)
|
50
60
|
|
51
61
|
@cluster = Cluster.new(
|
@@ -89,11 +99,13 @@ module Kafka
|
|
89
99
|
compressor = Compressor.new(
|
90
100
|
codec_name: compression_codec,
|
91
101
|
threshold: compression_threshold,
|
102
|
+
instrumenter: @instrumenter,
|
92
103
|
)
|
93
104
|
|
94
105
|
Producer.new(
|
95
106
|
cluster: @cluster,
|
96
107
|
logger: @logger,
|
108
|
+
instrumenter: @instrumenter,
|
97
109
|
compressor: compressor,
|
98
110
|
ack_timeout: ack_timeout,
|
99
111
|
required_acks: required_acks,
|
@@ -139,8 +151,10 @@ module Kafka
|
|
139
151
|
# @param offset_commit_threshold [Integer] the number of messages that can be
|
140
152
|
# processed before their offsets are committed. If zero, offset commits are
|
141
153
|
# not triggered by message processing.
|
154
|
+
# @param heartbeat_interval [Integer] the interval between heartbeats; must be less
|
155
|
+
# than the session window.
|
142
156
|
# @return [Consumer]
|
143
|
-
def consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0)
|
157
|
+
def consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10)
|
144
158
|
group = ConsumerGroup.new(
|
145
159
|
cluster: @cluster,
|
146
160
|
logger: @logger,
|
@@ -155,12 +169,19 @@ module Kafka
|
|
155
169
|
commit_threshold: offset_commit_threshold,
|
156
170
|
)
|
157
171
|
|
172
|
+
heartbeat = Heartbeat.new(
|
173
|
+
group: group,
|
174
|
+
interval: heartbeat_interval,
|
175
|
+
)
|
176
|
+
|
158
177
|
Consumer.new(
|
159
178
|
cluster: @cluster,
|
160
179
|
logger: @logger,
|
180
|
+
instrumenter: @instrumenter,
|
161
181
|
group: group,
|
162
182
|
offset_manager: offset_manager,
|
163
183
|
session_timeout: session_timeout,
|
184
|
+
heartbeat: heartbeat,
|
164
185
|
)
|
165
186
|
end
|
166
187
|
|
data/lib/kafka/compressor.rb
CHANGED
@@ -20,9 +20,10 @@ module Kafka
|
|
20
20
|
# @param codec_name [Symbol, nil]
|
21
21
|
# @param threshold [Integer] the minimum number of messages in a message set
|
22
22
|
# that will trigger compression.
|
23
|
-
def initialize(codec_name:, threshold:)
|
23
|
+
def initialize(codec_name:, threshold:, instrumenter:)
|
24
24
|
@codec = Compression.find_codec(codec_name)
|
25
25
|
@threshold = threshold
|
26
|
+
@instrumenter = instrumenter
|
26
27
|
end
|
27
28
|
|
28
29
|
# @param message_set [Protocol::MessageSet]
|
@@ -45,7 +46,7 @@ module Kafka
|
|
45
46
|
def compress_data(message_set)
|
46
47
|
data = Protocol::Encoder.encode_with(message_set)
|
47
48
|
|
48
|
-
|
49
|
+
@instrumenter.instrument("compress.compressor") do |notification|
|
49
50
|
compressed_data = @codec.compress(data)
|
50
51
|
|
51
52
|
notification[:message_count] = message_set.size
|
data/lib/kafka/connection.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require "stringio"
|
2
2
|
require "kafka/socket_with_timeout"
|
3
3
|
require "kafka/ssl_socket_with_timeout"
|
4
|
-
require "kafka/instrumentation"
|
5
4
|
require "kafka/protocol/request_message"
|
6
5
|
require "kafka/protocol/encoder"
|
7
6
|
require "kafka/protocol/decoder"
|
@@ -43,9 +42,10 @@ module Kafka
|
|
43
42
|
# broker. Default is 10 seconds.
|
44
43
|
#
|
45
44
|
# @return [Connection] a new connection.
|
46
|
-
def initialize(host:, port:, client_id:, logger:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
|
45
|
+
def initialize(host:, port:, client_id:, logger:, instrumenter:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
|
47
46
|
@host, @port, @client_id = host, port, client_id
|
48
47
|
@logger = logger
|
48
|
+
@instrumenter = instrumenter
|
49
49
|
|
50
50
|
@connect_timeout = connect_timeout || CONNECT_TIMEOUT
|
51
51
|
@socket_timeout = socket_timeout || SOCKET_TIMEOUT
|
@@ -82,7 +82,7 @@ module Kafka
|
|
82
82
|
response_size: 0,
|
83
83
|
}
|
84
84
|
|
85
|
-
|
85
|
+
@instrumenter.instrument("request.connection", notification) do
|
86
86
|
open unless open?
|
87
87
|
|
88
88
|
@correlation_id += 1
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Kafka
|
2
|
+
class ConnectionBuilder
|
3
|
+
def initialize(client_id:, logger:, instrumenter:, connect_timeout:, socket_timeout:, ssl_context:)
|
4
|
+
@client_id = client_id
|
5
|
+
@logger = logger
|
6
|
+
@instrumenter = instrumenter
|
7
|
+
@connect_timeout = connect_timeout
|
8
|
+
@socket_timeout = socket_timeout
|
9
|
+
@ssl_context = ssl_context
|
10
|
+
end
|
11
|
+
|
12
|
+
def build_connection(host, port)
|
13
|
+
Connection.new(
|
14
|
+
host: host,
|
15
|
+
port: port,
|
16
|
+
client_id: @client_id,
|
17
|
+
connect_timeout: @connect_timeout,
|
18
|
+
socket_timeout: @socket_timeout,
|
19
|
+
logger: @logger,
|
20
|
+
instrumenter: @instrumenter,
|
21
|
+
ssl_context: @ssl_context,
|
22
|
+
)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -43,15 +43,14 @@ module Kafka
|
|
43
43
|
#
|
44
44
|
class Consumer
|
45
45
|
|
46
|
-
def initialize(cluster:, logger:, group:, offset_manager:, session_timeout:)
|
46
|
+
def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, session_timeout:, heartbeat:)
|
47
47
|
@cluster = cluster
|
48
48
|
@logger = logger
|
49
|
+
@instrumenter = instrumenter
|
49
50
|
@group = group
|
50
51
|
@offset_manager = offset_manager
|
51
52
|
@session_timeout = session_timeout
|
52
|
-
|
53
|
-
# Send two heartbeats in each session window, just to be sure.
|
54
|
-
@heartbeat_interval = @session_timeout / 2
|
53
|
+
@heartbeat = heartbeat
|
55
54
|
|
56
55
|
# Whether or not the consumer is currently consuming messages.
|
57
56
|
@running = false
|
@@ -75,6 +74,10 @@ module Kafka
|
|
75
74
|
nil
|
76
75
|
end
|
77
76
|
|
77
|
+
def stop
|
78
|
+
@running = false
|
79
|
+
end
|
80
|
+
|
78
81
|
# Fetches and enumerates the messages in the topics that the consumer group
|
79
82
|
# subscribes to.
|
80
83
|
#
|
@@ -86,96 +89,99 @@ module Kafka
|
|
86
89
|
# that is tasked with taking over processing of these partitions will resume
|
87
90
|
# at the last committed offsets.
|
88
91
|
#
|
92
|
+
# @param min_bytes [Integer] the minimum number of bytes to read before
|
93
|
+
# returning messages from the server; if `max_wait_time` is reached, this
|
94
|
+
# is ignored.
|
95
|
+
# @param max_wait_time [Integer] the maximum duration of time to wait before
|
96
|
+
# returning messages from the server, in seconds.
|
89
97
|
# @yieldparam message [Kafka::FetchedMessage] a message fetched from Kafka.
|
90
98
|
# @return [nil]
|
91
|
-
def each_message
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
yield message
|
109
|
-
end
|
110
|
-
|
111
|
-
@offset_manager.commit_offsets_if_necessary
|
112
|
-
|
113
|
-
send_heartbeat_if_necessary
|
114
|
-
mark_message_as_processed(message)
|
115
|
-
|
116
|
-
break if !@running
|
99
|
+
def each_message(min_bytes: 1, max_wait_time: 5)
|
100
|
+
consumer_loop do
|
101
|
+
batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
|
102
|
+
|
103
|
+
batches.each do |batch|
|
104
|
+
batch.messages.each do |message|
|
105
|
+
@instrumenter.instrument("process_message.consumer") do |notification|
|
106
|
+
notification.update(
|
107
|
+
topic: message.topic,
|
108
|
+
partition: message.partition,
|
109
|
+
offset: message.offset,
|
110
|
+
offset_lag: batch.highwater_mark_offset - message.offset,
|
111
|
+
key: message.key,
|
112
|
+
value: message.value,
|
113
|
+
)
|
114
|
+
|
115
|
+
yield message
|
117
116
|
end
|
117
|
+
|
118
|
+
@offset_manager.commit_offsets_if_necessary
|
119
|
+
|
120
|
+
@heartbeat.send_if_necessary
|
121
|
+
mark_message_as_processed(message)
|
122
|
+
|
123
|
+
return if !@running
|
118
124
|
end
|
119
|
-
rescue ConnectionError => e
|
120
|
-
@logger.error "Connection error while sending heartbeat; rejoining"
|
121
|
-
join_group
|
122
|
-
rescue UnknownMemberId
|
123
|
-
@logger.error "Kicked out of group; rejoining"
|
124
|
-
join_group
|
125
|
-
rescue RebalanceInProgress
|
126
|
-
@logger.error "Group is rebalancing; rejoining"
|
127
|
-
join_group
|
128
|
-
rescue IllegalGeneration
|
129
|
-
@logger.error "Group has transitioned to a new generation; rejoining"
|
130
|
-
join_group
|
131
125
|
end
|
132
126
|
end
|
133
|
-
ensure
|
134
|
-
# In order to quickly have the consumer group re-balance itself, it's
|
135
|
-
# important that members explicitly tell Kafka when they're leaving.
|
136
|
-
@offset_manager.commit_offsets
|
137
|
-
@group.leave
|
138
|
-
@running = false
|
139
127
|
end
|
140
128
|
|
141
|
-
|
142
|
-
|
129
|
+
# Fetches and enumerates the messages in the topics that the consumer group
|
130
|
+
# subscribes to.
|
131
|
+
#
|
132
|
+
# Each batch of messages is yielded to the provided block. If the block returns
|
133
|
+
# without raising an exception, the batch will be considered successfully
|
134
|
+
# processed. At regular intervals the offset of the most recent successfully
|
135
|
+
# processed message batch in each partition will be committed to the Kafka
|
136
|
+
# offset store. If the consumer crashes or leaves the group, the group member
|
137
|
+
# that is tasked with taking over processing of these partitions will resume
|
138
|
+
# at the last committed offsets.
|
139
|
+
#
|
140
|
+
# @param min_bytes [Integer] the minimum number of bytes to read before
|
141
|
+
# returning messages from the server; if `max_wait_time` is reached, this
|
142
|
+
# is ignored.
|
143
|
+
# @param max_wait_time [Integer] the maximum duration of time to wait before
|
144
|
+
# returning messages from the server, in seconds.
|
145
|
+
# @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
|
146
|
+
# @return [nil]
|
147
|
+
def each_batch(min_bytes: 1, max_wait_time: 5)
|
148
|
+
consumer_loop do
|
149
|
+
batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
|
150
|
+
|
151
|
+
batches.each do |batch|
|
152
|
+
unless batch.empty?
|
153
|
+
@instrumenter.instrument("process_batch.consumer") do |notification|
|
154
|
+
notification.update(
|
155
|
+
topic: batch.topic,
|
156
|
+
partition: batch.partition,
|
157
|
+
highwater_mark_offset: batch.highwater_mark_offset,
|
158
|
+
message_count: batch.messages.count,
|
159
|
+
)
|
160
|
+
|
161
|
+
yield batch
|
162
|
+
end
|
163
|
+
|
164
|
+
mark_message_as_processed(batch.messages.last)
|
165
|
+
end
|
166
|
+
|
167
|
+
@offset_manager.commit_offsets_if_necessary
|
168
|
+
|
169
|
+
@heartbeat.send_if_necessary
|
170
|
+
|
171
|
+
return if !@running
|
172
|
+
end
|
173
|
+
end
|
143
174
|
end
|
144
175
|
|
145
|
-
|
146
|
-
loop do
|
147
|
-
begin
|
148
|
-
fetch_batches.each do |batch|
|
149
|
-
unless batch.empty?
|
150
|
-
Instrumentation.instrument("process_batch.consumer.kafka") do |notification|
|
151
|
-
notification.update(
|
152
|
-
topic: batch.topic,
|
153
|
-
partition: batch.partition,
|
154
|
-
highwater_mark_offset: batch.highwater_mark_offset,
|
155
|
-
message_count: batch.messages.count,
|
156
|
-
)
|
157
|
-
|
158
|
-
yield batch
|
159
|
-
end
|
160
|
-
|
161
|
-
mark_message_as_processed(batch.messages.last)
|
162
|
-
end
|
176
|
+
private
|
163
177
|
|
164
|
-
|
178
|
+
def consumer_loop
|
179
|
+
@running = true
|
165
180
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
join_group
|
171
|
-
rescue UnknownMemberId
|
172
|
-
@logger.error "Kicked out of group; rejoining"
|
173
|
-
join_group
|
174
|
-
rescue RebalanceInProgress
|
175
|
-
@logger.error "Group is rebalancing; rejoining"
|
176
|
-
join_group
|
177
|
-
rescue IllegalGeneration
|
178
|
-
@logger.error "Group has transitioned to a new generation; rejoining"
|
181
|
+
while @running
|
182
|
+
begin
|
183
|
+
yield
|
184
|
+
rescue HeartbeatError, OffsetCommitError, FetchError
|
179
185
|
join_group
|
180
186
|
end
|
181
187
|
end
|
@@ -184,29 +190,28 @@ module Kafka
|
|
184
190
|
# important that members explicitly tell Kafka when they're leaving.
|
185
191
|
@offset_manager.commit_offsets
|
186
192
|
@group.leave
|
193
|
+
@running = false
|
187
194
|
end
|
188
195
|
|
189
|
-
private
|
190
|
-
|
191
196
|
def join_group
|
192
197
|
@offset_manager.clear_offsets
|
193
198
|
@group.join
|
194
199
|
end
|
195
200
|
|
196
|
-
def fetch_batches
|
201
|
+
def fetch_batches(min_bytes:, max_wait_time:)
|
197
202
|
join_group unless @group.member?
|
198
203
|
|
199
204
|
assigned_partitions = @group.assigned_partitions
|
200
205
|
|
201
|
-
|
206
|
+
@heartbeat.send_if_necessary
|
202
207
|
|
203
208
|
raise "No partitions assigned!" if assigned_partitions.empty?
|
204
209
|
|
205
210
|
operation = FetchOperation.new(
|
206
211
|
cluster: @cluster,
|
207
212
|
logger: @logger,
|
208
|
-
min_bytes:
|
209
|
-
max_wait_time:
|
213
|
+
min_bytes: min_bytes,
|
214
|
+
max_wait_time: max_wait_time,
|
210
215
|
)
|
211
216
|
|
212
217
|
assigned_partitions.each do |topic, partitions|
|
@@ -223,22 +228,7 @@ module Kafka
|
|
223
228
|
rescue ConnectionError => e
|
224
229
|
@logger.error "Connection error while fetching messages: #{e}"
|
225
230
|
|
226
|
-
|
227
|
-
end
|
228
|
-
|
229
|
-
# Sends a heartbeat if it would be necessary in order to avoid getting
|
230
|
-
# kicked out of the consumer group.
|
231
|
-
#
|
232
|
-
# Each consumer needs to send a heartbeat with a frequency defined by
|
233
|
-
# `session_timeout`.
|
234
|
-
#
|
235
|
-
def send_heartbeat_if_necessary
|
236
|
-
@last_heartbeat ||= Time.now
|
237
|
-
|
238
|
-
if Time.now > @last_heartbeat + @heartbeat_interval
|
239
|
-
@group.heartbeat
|
240
|
-
@last_heartbeat = Time.now
|
241
|
-
end
|
231
|
+
raise FetchError, e
|
242
232
|
end
|
243
233
|
|
244
234
|
def mark_message_as_processed(message)
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -71,6 +71,9 @@ module Kafka
|
|
71
71
|
Protocol.handle_error(error_code)
|
72
72
|
end
|
73
73
|
end
|
74
|
+
rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
|
75
|
+
@logger.error "Error committing offsets: #{e}"
|
76
|
+
raise OffsetCommitError, e
|
74
77
|
end
|
75
78
|
|
76
79
|
def heartbeat
|
@@ -83,6 +86,9 @@ module Kafka
|
|
83
86
|
)
|
84
87
|
|
85
88
|
Protocol.handle_error(response.error_code)
|
89
|
+
rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
|
90
|
+
@logger.error "Error sending heartbeat: #{e}"
|
91
|
+
raise HeartbeatError, e
|
86
92
|
end
|
87
93
|
|
88
94
|
private
|
data/lib/kafka/fetched_batch.rb
CHANGED
@@ -1,6 +1,18 @@
|
|
1
1
|
module Kafka
|
2
|
+
|
3
|
+
# An ordered sequence of messages fetched from a Kafka partition.
|
2
4
|
class FetchedBatch
|
3
|
-
|
5
|
+
# @return [String]
|
6
|
+
attr_reader :topic
|
7
|
+
|
8
|
+
# @return [Integer]
|
9
|
+
attr_reader :partition
|
10
|
+
|
11
|
+
# @return [Integer] the offset of the most recent message in the partition.
|
12
|
+
attr_reader :highwater_mark_offset
|
13
|
+
|
14
|
+
# @return [Array<Kafka::FetchedMessage>]
|
15
|
+
attr_reader :messages
|
4
16
|
|
5
17
|
def initialize(topic:, partition:, highwater_mark_offset:, messages:)
|
6
18
|
@topic = topic
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Heartbeat
|
3
|
+
def initialize(group:, interval:)
|
4
|
+
@group = group
|
5
|
+
@interval = interval
|
6
|
+
@last_heartbeat = Time.now
|
7
|
+
end
|
8
|
+
|
9
|
+
def send_if_necessary
|
10
|
+
if Time.now > @last_heartbeat + @interval
|
11
|
+
@group.heartbeat
|
12
|
+
@last_heartbeat = Time.now
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Instrumenter
|
3
|
+
NAMESPACE = "kafka"
|
4
|
+
|
5
|
+
def initialize(default_payload = {})
|
6
|
+
@default_payload = default_payload
|
7
|
+
|
8
|
+
if defined?(ActiveSupport::Notifications)
|
9
|
+
@backend = ActiveSupport::Notifications
|
10
|
+
else
|
11
|
+
@backend = nil
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def instrument(event_name, payload = {}, &block)
|
16
|
+
if @backend
|
17
|
+
payload.update(@default_payload)
|
18
|
+
|
19
|
+
@backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
|
20
|
+
else
|
21
|
+
block.call(payload) if block
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -7,9 +7,7 @@ module Kafka
|
|
7
7
|
attr_reader :size, :bytesize
|
8
8
|
|
9
9
|
def initialize
|
10
|
-
|
11
|
-
@size = 0
|
12
|
-
@bytesize = 0
|
10
|
+
clear
|
13
11
|
end
|
14
12
|
|
15
13
|
def write(message)
|
@@ -22,6 +20,12 @@ module Kafka
|
|
22
20
|
@messages.empty?
|
23
21
|
end
|
24
22
|
|
23
|
+
def clear
|
24
|
+
@messages = []
|
25
|
+
@size = 0
|
26
|
+
@bytesize = 0
|
27
|
+
end
|
28
|
+
|
25
29
|
# Yields each message in the queue to the provided block, removing the
|
26
30
|
# message after the block has processed it. If the block raises an
|
27
31
|
# exception, the message will be retained in the queue.
|
@@ -25,17 +25,18 @@ module Kafka
|
|
25
25
|
# * `sent_message_count` – the number of messages that were successfully sent.
|
26
26
|
#
|
27
27
|
class ProduceOperation
|
28
|
-
def initialize(cluster:, buffer:, compressor:, required_acks:, ack_timeout:, logger:)
|
28
|
+
def initialize(cluster:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
|
29
29
|
@cluster = cluster
|
30
30
|
@buffer = buffer
|
31
31
|
@required_acks = required_acks
|
32
32
|
@ack_timeout = ack_timeout
|
33
33
|
@compressor = compressor
|
34
34
|
@logger = logger
|
35
|
+
@instrumenter = instrumenter
|
35
36
|
end
|
36
37
|
|
37
38
|
def execute
|
38
|
-
|
39
|
+
@instrumenter.instrument("send_messages.producer") do |notification|
|
39
40
|
message_count = @buffer.size
|
40
41
|
|
41
42
|
notification[:message_count] = message_count
|
@@ -62,7 +63,13 @@ module Kafka
|
|
62
63
|
messages_for_broker[broker] ||= MessageBuffer.new
|
63
64
|
messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
|
64
65
|
rescue Kafka::Error => e
|
65
|
-
@logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e}"
|
66
|
+
@logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e.message}"
|
67
|
+
|
68
|
+
@instrumenter.instrument("partition_error.producer", {
|
69
|
+
topic: topic,
|
70
|
+
partition: partition,
|
71
|
+
exception: [e.class.to_s, e.message],
|
72
|
+
})
|
66
73
|
|
67
74
|
# We can't send the messages right now, so we'll just keep them in the buffer.
|
68
75
|
# We'll mark the cluster as stale in order to force a metadata refresh.
|
@@ -108,10 +115,20 @@ module Kafka
|
|
108
115
|
ack_time = Time.now
|
109
116
|
|
110
117
|
begin
|
111
|
-
|
118
|
+
begin
|
119
|
+
Protocol.handle_error(partition_info.error_code)
|
120
|
+
rescue ProtocolError => e
|
121
|
+
@instrumenter.instrument("partition_error.producer", {
|
122
|
+
topic: topic,
|
123
|
+
partition: partition,
|
124
|
+
exception: [e.class.to_s, e.message],
|
125
|
+
})
|
126
|
+
|
127
|
+
raise e
|
128
|
+
end
|
112
129
|
|
113
130
|
messages.each do |message|
|
114
|
-
|
131
|
+
@instrumenter.instrument("ack_message.producer", {
|
115
132
|
key: message.key,
|
116
133
|
value: message.value,
|
117
134
|
topic: topic,
|
data/lib/kafka/producer.rb
CHANGED
@@ -130,9 +130,10 @@ module Kafka
|
|
130
130
|
#
|
131
131
|
class Producer
|
132
132
|
|
133
|
-
def initialize(cluster:, logger:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
133
|
+
def initialize(cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
134
134
|
@cluster = cluster
|
135
135
|
@logger = logger
|
136
|
+
@instrumenter = instrumenter
|
136
137
|
@required_acks = required_acks
|
137
138
|
@ack_timeout = ack_timeout
|
138
139
|
@max_retries = max_retries
|
@@ -203,7 +204,7 @@ module Kafka
|
|
203
204
|
@target_topics.add(topic)
|
204
205
|
@pending_message_queue.write(message)
|
205
206
|
|
206
|
-
|
207
|
+
@instrumenter.instrument("produce_message.producer", {
|
207
208
|
value: value,
|
208
209
|
key: key,
|
209
210
|
topic: topic,
|
@@ -228,7 +229,7 @@ module Kafka
|
|
228
229
|
# There's no need to do anything if the buffer is empty.
|
229
230
|
return if buffer_size == 0
|
230
231
|
|
231
|
-
|
232
|
+
@instrumenter.instrument("deliver_messages.producer") do |notification|
|
232
233
|
message_count = buffer_size
|
233
234
|
|
234
235
|
notification[:message_count] = message_count
|
@@ -253,6 +254,14 @@ module Kafka
|
|
253
254
|
@pending_message_queue.bytesize + @buffer.bytesize
|
254
255
|
end
|
255
256
|
|
257
|
+
# Deletes all buffered messages.
|
258
|
+
#
|
259
|
+
# @return [nil]
|
260
|
+
def clear_buffer
|
261
|
+
@buffer.clear
|
262
|
+
@pending_message_queue.clear
|
263
|
+
end
|
264
|
+
|
256
265
|
# Closes all connections to the brokers.
|
257
266
|
#
|
258
267
|
# @return [nil]
|
@@ -274,6 +283,7 @@ module Kafka
|
|
274
283
|
ack_timeout: @ack_timeout,
|
275
284
|
compressor: @compressor,
|
276
285
|
logger: @logger,
|
286
|
+
instrumenter: @instrumenter,
|
277
287
|
)
|
278
288
|
|
279
289
|
loop do
|
@@ -286,6 +296,13 @@ module Kafka
|
|
286
296
|
assign_partitions!
|
287
297
|
operation.execute
|
288
298
|
|
299
|
+
if @required_acks.zero?
|
300
|
+
# No response is returned by the brokers, so we can't know which messages
|
301
|
+
# have been successfully written. Our only option is to assume that they all
|
302
|
+
# have.
|
303
|
+
@buffer.clear
|
304
|
+
end
|
305
|
+
|
289
306
|
if buffer_size.zero?
|
290
307
|
break
|
291
308
|
elsif attempt <= @max_retries
|
@@ -298,11 +315,8 @@ module Kafka
|
|
298
315
|
end
|
299
316
|
end
|
300
317
|
|
301
|
-
|
302
|
-
|
303
|
-
# have been successfully written. Our only option is to assume that they all
|
304
|
-
# have.
|
305
|
-
@buffer.clear
|
318
|
+
unless @pending_message_queue.empty?
|
319
|
+
raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
|
306
320
|
end
|
307
321
|
|
308
322
|
unless @buffer.empty?
|
@@ -30,7 +30,9 @@ module Kafka
|
|
30
30
|
}.values
|
31
31
|
|
32
32
|
members.zip(partitions_per_member).each do |member_id, member_partitions|
|
33
|
-
|
33
|
+
unless member_partitions.nil?
|
34
|
+
group_assignment[member_id].assign(topic, member_partitions)
|
35
|
+
end
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
data/lib/kafka/version.rb
CHANGED
data/ruby-kafka.gemspec
CHANGED
@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
-
spec.add_development_dependency "bundler", "
|
30
|
+
spec.add_development_dependency "bundler", ">= 1.9.5"
|
31
31
|
spec.add_development_dependency "rake", "~> 10.0"
|
32
32
|
spec.add_development_dependency "rspec"
|
33
33
|
spec.add_development_dependency "pry"
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 1.9.5
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 1.9.5
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -177,13 +177,15 @@ files:
|
|
177
177
|
- lib/kafka/compression.rb
|
178
178
|
- lib/kafka/compressor.rb
|
179
179
|
- lib/kafka/connection.rb
|
180
|
+
- lib/kafka/connection_builder.rb
|
180
181
|
- lib/kafka/consumer.rb
|
181
182
|
- lib/kafka/consumer_group.rb
|
182
183
|
- lib/kafka/fetch_operation.rb
|
183
184
|
- lib/kafka/fetched_batch.rb
|
184
185
|
- lib/kafka/fetched_message.rb
|
185
186
|
- lib/kafka/gzip_codec.rb
|
186
|
-
- lib/kafka/
|
187
|
+
- lib/kafka/heartbeat.rb
|
188
|
+
- lib/kafka/instrumenter.rb
|
187
189
|
- lib/kafka/message_buffer.rb
|
188
190
|
- lib/kafka/offset_manager.rb
|
189
191
|
- lib/kafka/partitioner.rb
|
@@ -1,13 +0,0 @@
|
|
1
|
-
module Kafka
|
2
|
-
class NullInstrumentation
|
3
|
-
def self.instrument(name, payload = {})
|
4
|
-
yield payload if block_given?
|
5
|
-
end
|
6
|
-
end
|
7
|
-
|
8
|
-
if defined?(ActiveSupport::Notifications)
|
9
|
-
Instrumentation = ActiveSupport::Notifications
|
10
|
-
else
|
11
|
-
Instrumentation = NullInstrumentation
|
12
|
-
end
|
13
|
-
end
|