ruby-kafka 0.3.2 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +2 -2
- data/circle.yml +1 -1
- data/examples/firehose-consumer.rb +3 -0
- data/lib/kafka.rb +9 -0
- data/lib/kafka/broker.rb +0 -5
- data/lib/kafka/broker_pool.rb +4 -12
- data/lib/kafka/client.rb +24 -3
- data/lib/kafka/compressor.rb +3 -2
- data/lib/kafka/connection.rb +3 -3
- data/lib/kafka/connection_builder.rb +25 -0
- data/lib/kafka/consumer.rb +94 -104
- data/lib/kafka/consumer_group.rb +6 -0
- data/lib/kafka/fetched_batch.rb +13 -1
- data/lib/kafka/heartbeat.rb +16 -0
- data/lib/kafka/instrumenter.rb +25 -0
- data/lib/kafka/pending_message_queue.rb +7 -3
- data/lib/kafka/produce_operation.rb +22 -5
- data/lib/kafka/producer.rb +22 -8
- data/lib/kafka/round_robin_assignment_strategy.rb +3 -1
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +1 -1
- metadata +9 -7
- data/lib/kafka/instrumentation.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bf114586f8ec65d1db3ade1d082fc82de140f241
|
4
|
+
data.tar.gz: a202f5dd06b0339a8e3895e48824acba6d266ba5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4788eabf90baf70adb425860c98073058dd601247f1dd447aeb3d154025a5466c19130c7a8d00984ff0e9a5771f980a39a33ffec8bc4384695388999ceda2a13
|
7
|
+
data.tar.gz: 8f5572d8e9dcf46737236ac6a6467fc5d1bd60e091e889ffe056131c084bd47699c59bd404e11b0ce8ec3c5b5c800d8d07c318c6d3abd2e75585d4168079c70c
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,12 @@ Changes and additions to the library will be listed here.
|
|
4
4
|
|
5
5
|
## Unreleased
|
6
6
|
|
7
|
+
## v0.3.3
|
8
|
+
|
9
|
+
- Allow clearing a producer's buffer (Martin Nowak).
|
10
|
+
- Improved Consumer API.
|
11
|
+
- Instrument producer errors.
|
12
|
+
|
7
13
|
## v0.3.2
|
8
14
|
|
9
15
|
- Experimental batch consumer API.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ The Producer API is currently beta level and used in production. There's an alph
|
|
6
6
|
|
7
7
|
Although parts of this library work with Kafka 0.8 – specifically, the Producer API – it's being tested and developed against Kafka 0.9. The Consumer API will be 0.9 only.
|
8
8
|
|
9
|
-
|
9
|
+
## Table of Contents
|
10
10
|
|
11
11
|
1. [Installation](#installation)
|
12
12
|
2. [Usage](#usage)
|
@@ -244,7 +244,7 @@ producer = kafka.producer
|
|
244
244
|
|
245
245
|
producer.produce("hello", topic: "greetings")
|
246
246
|
|
247
|
-
# If this line fails with Kafka::DeliveryFailed we *may* have succeeded in
|
247
|
+
# If this line fails with Kafka::DeliveryFailed we *may* have succeeded in delivering
|
248
248
|
# the message to Kafka but won't know for sure.
|
249
249
|
producer.deliver_messages
|
250
250
|
|
data/circle.yml
CHANGED
data/lib/kafka.rb
CHANGED
@@ -119,6 +119,15 @@ module Kafka
|
|
119
119
|
class DeliveryFailed < Error
|
120
120
|
end
|
121
121
|
|
122
|
+
class HeartbeatError < Error
|
123
|
+
end
|
124
|
+
|
125
|
+
class OffsetCommitError < Error
|
126
|
+
end
|
127
|
+
|
128
|
+
class FetchError < Error
|
129
|
+
end
|
130
|
+
|
122
131
|
# Initializes a new Kafka client.
|
123
132
|
#
|
124
133
|
# @see Client#initialize
|
data/lib/kafka/broker.rb
CHANGED
@@ -4,11 +4,6 @@ require "kafka/protocol"
|
|
4
4
|
|
5
5
|
module Kafka
|
6
6
|
class Broker
|
7
|
-
def self.connect(node_id: nil, logger:, **options)
|
8
|
-
connection = Connection.new(logger: logger, **options)
|
9
|
-
new(connection: connection, node_id: node_id, logger: logger)
|
10
|
-
end
|
11
|
-
|
12
7
|
def initialize(connection:, node_id: nil, logger:)
|
13
8
|
@connection = connection
|
14
9
|
@node_id = node_id
|
data/lib/kafka/broker_pool.rb
CHANGED
@@ -2,27 +2,19 @@ require "kafka/broker"
|
|
2
2
|
|
3
3
|
module Kafka
|
4
4
|
class BrokerPool
|
5
|
-
def initialize(
|
6
|
-
@client_id = client_id
|
7
|
-
@connect_timeout = connect_timeout
|
8
|
-
@socket_timeout = socket_timeout
|
5
|
+
def initialize(connection_builder:, logger:)
|
9
6
|
@logger = logger
|
7
|
+
@connection_builder = connection_builder
|
10
8
|
@brokers = {}
|
11
|
-
@ssl_context = ssl_context
|
12
9
|
end
|
13
10
|
|
14
11
|
def connect(host, port, node_id: nil)
|
15
12
|
return @brokers.fetch(node_id) if @brokers.key?(node_id)
|
16
13
|
|
17
|
-
broker = Broker.
|
18
|
-
|
19
|
-
port: port,
|
14
|
+
broker = Broker.new(
|
15
|
+
connection: @connection_builder.build_connection(host, port),
|
20
16
|
node_id: node_id,
|
21
|
-
client_id: @client_id,
|
22
|
-
connect_timeout: @connect_timeout,
|
23
|
-
socket_timeout: @socket_timeout,
|
24
17
|
logger: @logger,
|
25
|
-
ssl_context: @ssl_context,
|
26
18
|
)
|
27
19
|
|
28
20
|
@brokers[node_id] = broker unless node_id.nil?
|
data/lib/kafka/client.rb
CHANGED
@@ -3,9 +3,12 @@ require "openssl"
|
|
3
3
|
require "kafka/cluster"
|
4
4
|
require "kafka/producer"
|
5
5
|
require "kafka/consumer"
|
6
|
+
require "kafka/heartbeat"
|
6
7
|
require "kafka/async_producer"
|
7
8
|
require "kafka/fetched_message"
|
8
9
|
require "kafka/fetch_operation"
|
10
|
+
require "kafka/connection_builder"
|
11
|
+
require "kafka/instrumenter"
|
9
12
|
|
10
13
|
module Kafka
|
11
14
|
class Client
|
@@ -37,15 +40,22 @@ module Kafka
|
|
37
40
|
# @return [Client]
|
38
41
|
def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil)
|
39
42
|
@logger = logger || Logger.new(nil)
|
43
|
+
@instrumenter = Instrumenter.new(client_id: client_id)
|
40
44
|
|
41
45
|
ssl_context = build_ssl_context(ssl_ca_cert, ssl_client_cert, ssl_client_cert_key)
|
42
46
|
|
43
|
-
|
47
|
+
connection_builder = ConnectionBuilder.new(
|
44
48
|
client_id: client_id,
|
45
49
|
connect_timeout: connect_timeout,
|
46
50
|
socket_timeout: socket_timeout,
|
47
|
-
logger: @logger,
|
48
51
|
ssl_context: ssl_context,
|
52
|
+
logger: @logger,
|
53
|
+
instrumenter: @instrumenter,
|
54
|
+
)
|
55
|
+
|
56
|
+
broker_pool = BrokerPool.new(
|
57
|
+
connection_builder: connection_builder,
|
58
|
+
logger: @logger,
|
49
59
|
)
|
50
60
|
|
51
61
|
@cluster = Cluster.new(
|
@@ -89,11 +99,13 @@ module Kafka
|
|
89
99
|
compressor = Compressor.new(
|
90
100
|
codec_name: compression_codec,
|
91
101
|
threshold: compression_threshold,
|
102
|
+
instrumenter: @instrumenter,
|
92
103
|
)
|
93
104
|
|
94
105
|
Producer.new(
|
95
106
|
cluster: @cluster,
|
96
107
|
logger: @logger,
|
108
|
+
instrumenter: @instrumenter,
|
97
109
|
compressor: compressor,
|
98
110
|
ack_timeout: ack_timeout,
|
99
111
|
required_acks: required_acks,
|
@@ -139,8 +151,10 @@ module Kafka
|
|
139
151
|
# @param offset_commit_threshold [Integer] the number of messages that can be
|
140
152
|
# processed before their offsets are committed. If zero, offset commits are
|
141
153
|
# not triggered by message processing.
|
154
|
+
# @param heartbeat_interval [Integer] the interval between heartbeats; must be less
|
155
|
+
# than the session window.
|
142
156
|
# @return [Consumer]
|
143
|
-
def consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0)
|
157
|
+
def consumer(group_id:, session_timeout: 30, offset_commit_interval: 10, offset_commit_threshold: 0, heartbeat_interval: 10)
|
144
158
|
group = ConsumerGroup.new(
|
145
159
|
cluster: @cluster,
|
146
160
|
logger: @logger,
|
@@ -155,12 +169,19 @@ module Kafka
|
|
155
169
|
commit_threshold: offset_commit_threshold,
|
156
170
|
)
|
157
171
|
|
172
|
+
heartbeat = Heartbeat.new(
|
173
|
+
group: group,
|
174
|
+
interval: heartbeat_interval,
|
175
|
+
)
|
176
|
+
|
158
177
|
Consumer.new(
|
159
178
|
cluster: @cluster,
|
160
179
|
logger: @logger,
|
180
|
+
instrumenter: @instrumenter,
|
161
181
|
group: group,
|
162
182
|
offset_manager: offset_manager,
|
163
183
|
session_timeout: session_timeout,
|
184
|
+
heartbeat: heartbeat,
|
164
185
|
)
|
165
186
|
end
|
166
187
|
|
data/lib/kafka/compressor.rb
CHANGED
@@ -20,9 +20,10 @@ module Kafka
|
|
20
20
|
# @param codec_name [Symbol, nil]
|
21
21
|
# @param threshold [Integer] the minimum number of messages in a message set
|
22
22
|
# that will trigger compression.
|
23
|
-
def initialize(codec_name:, threshold:)
|
23
|
+
def initialize(codec_name:, threshold:, instrumenter:)
|
24
24
|
@codec = Compression.find_codec(codec_name)
|
25
25
|
@threshold = threshold
|
26
|
+
@instrumenter = instrumenter
|
26
27
|
end
|
27
28
|
|
28
29
|
# @param message_set [Protocol::MessageSet]
|
@@ -45,7 +46,7 @@ module Kafka
|
|
45
46
|
def compress_data(message_set)
|
46
47
|
data = Protocol::Encoder.encode_with(message_set)
|
47
48
|
|
48
|
-
|
49
|
+
@instrumenter.instrument("compress.compressor") do |notification|
|
49
50
|
compressed_data = @codec.compress(data)
|
50
51
|
|
51
52
|
notification[:message_count] = message_set.size
|
data/lib/kafka/connection.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require "stringio"
|
2
2
|
require "kafka/socket_with_timeout"
|
3
3
|
require "kafka/ssl_socket_with_timeout"
|
4
|
-
require "kafka/instrumentation"
|
5
4
|
require "kafka/protocol/request_message"
|
6
5
|
require "kafka/protocol/encoder"
|
7
6
|
require "kafka/protocol/decoder"
|
@@ -43,9 +42,10 @@ module Kafka
|
|
43
42
|
# broker. Default is 10 seconds.
|
44
43
|
#
|
45
44
|
# @return [Connection] a new connection.
|
46
|
-
def initialize(host:, port:, client_id:, logger:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
|
45
|
+
def initialize(host:, port:, client_id:, logger:, instrumenter:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
|
47
46
|
@host, @port, @client_id = host, port, client_id
|
48
47
|
@logger = logger
|
48
|
+
@instrumenter = instrumenter
|
49
49
|
|
50
50
|
@connect_timeout = connect_timeout || CONNECT_TIMEOUT
|
51
51
|
@socket_timeout = socket_timeout || SOCKET_TIMEOUT
|
@@ -82,7 +82,7 @@ module Kafka
|
|
82
82
|
response_size: 0,
|
83
83
|
}
|
84
84
|
|
85
|
-
|
85
|
+
@instrumenter.instrument("request.connection", notification) do
|
86
86
|
open unless open?
|
87
87
|
|
88
88
|
@correlation_id += 1
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Kafka
|
2
|
+
class ConnectionBuilder
|
3
|
+
def initialize(client_id:, logger:, instrumenter:, connect_timeout:, socket_timeout:, ssl_context:)
|
4
|
+
@client_id = client_id
|
5
|
+
@logger = logger
|
6
|
+
@instrumenter = instrumenter
|
7
|
+
@connect_timeout = connect_timeout
|
8
|
+
@socket_timeout = socket_timeout
|
9
|
+
@ssl_context = ssl_context
|
10
|
+
end
|
11
|
+
|
12
|
+
def build_connection(host, port)
|
13
|
+
Connection.new(
|
14
|
+
host: host,
|
15
|
+
port: port,
|
16
|
+
client_id: @client_id,
|
17
|
+
connect_timeout: @connect_timeout,
|
18
|
+
socket_timeout: @socket_timeout,
|
19
|
+
logger: @logger,
|
20
|
+
instrumenter: @instrumenter,
|
21
|
+
ssl_context: @ssl_context,
|
22
|
+
)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/kafka/consumer.rb
CHANGED
@@ -43,15 +43,14 @@ module Kafka
|
|
43
43
|
#
|
44
44
|
class Consumer
|
45
45
|
|
46
|
-
def initialize(cluster:, logger:, group:, offset_manager:, session_timeout:)
|
46
|
+
def initialize(cluster:, logger:, instrumenter:, group:, offset_manager:, session_timeout:, heartbeat:)
|
47
47
|
@cluster = cluster
|
48
48
|
@logger = logger
|
49
|
+
@instrumenter = instrumenter
|
49
50
|
@group = group
|
50
51
|
@offset_manager = offset_manager
|
51
52
|
@session_timeout = session_timeout
|
52
|
-
|
53
|
-
# Send two heartbeats in each session window, just to be sure.
|
54
|
-
@heartbeat_interval = @session_timeout / 2
|
53
|
+
@heartbeat = heartbeat
|
55
54
|
|
56
55
|
# Whether or not the consumer is currently consuming messages.
|
57
56
|
@running = false
|
@@ -75,6 +74,10 @@ module Kafka
|
|
75
74
|
nil
|
76
75
|
end
|
77
76
|
|
77
|
+
def stop
|
78
|
+
@running = false
|
79
|
+
end
|
80
|
+
|
78
81
|
# Fetches and enumerates the messages in the topics that the consumer group
|
79
82
|
# subscribes to.
|
80
83
|
#
|
@@ -86,96 +89,99 @@ module Kafka
|
|
86
89
|
# that is tasked with taking over processing of these partitions will resume
|
87
90
|
# at the last committed offsets.
|
88
91
|
#
|
92
|
+
# @param min_bytes [Integer] the minimum number of bytes to read before
|
93
|
+
# returning messages from the server; if `max_wait_time` is reached, this
|
94
|
+
# is ignored.
|
95
|
+
# @param max_wait_time [Integer] the maximum duration of time to wait before
|
96
|
+
# returning messages from the server, in seconds.
|
89
97
|
# @yieldparam message [Kafka::FetchedMessage] a message fetched from Kafka.
|
90
98
|
# @return [nil]
|
91
|
-
def each_message
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
yield message
|
109
|
-
end
|
110
|
-
|
111
|
-
@offset_manager.commit_offsets_if_necessary
|
112
|
-
|
113
|
-
send_heartbeat_if_necessary
|
114
|
-
mark_message_as_processed(message)
|
115
|
-
|
116
|
-
break if !@running
|
99
|
+
def each_message(min_bytes: 1, max_wait_time: 5)
|
100
|
+
consumer_loop do
|
101
|
+
batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
|
102
|
+
|
103
|
+
batches.each do |batch|
|
104
|
+
batch.messages.each do |message|
|
105
|
+
@instrumenter.instrument("process_message.consumer") do |notification|
|
106
|
+
notification.update(
|
107
|
+
topic: message.topic,
|
108
|
+
partition: message.partition,
|
109
|
+
offset: message.offset,
|
110
|
+
offset_lag: batch.highwater_mark_offset - message.offset,
|
111
|
+
key: message.key,
|
112
|
+
value: message.value,
|
113
|
+
)
|
114
|
+
|
115
|
+
yield message
|
117
116
|
end
|
117
|
+
|
118
|
+
@offset_manager.commit_offsets_if_necessary
|
119
|
+
|
120
|
+
@heartbeat.send_if_necessary
|
121
|
+
mark_message_as_processed(message)
|
122
|
+
|
123
|
+
return if !@running
|
118
124
|
end
|
119
|
-
rescue ConnectionError => e
|
120
|
-
@logger.error "Connection error while sending heartbeat; rejoining"
|
121
|
-
join_group
|
122
|
-
rescue UnknownMemberId
|
123
|
-
@logger.error "Kicked out of group; rejoining"
|
124
|
-
join_group
|
125
|
-
rescue RebalanceInProgress
|
126
|
-
@logger.error "Group is rebalancing; rejoining"
|
127
|
-
join_group
|
128
|
-
rescue IllegalGeneration
|
129
|
-
@logger.error "Group has transitioned to a new generation; rejoining"
|
130
|
-
join_group
|
131
125
|
end
|
132
126
|
end
|
133
|
-
ensure
|
134
|
-
# In order to quickly have the consumer group re-balance itself, it's
|
135
|
-
# important that members explicitly tell Kafka when they're leaving.
|
136
|
-
@offset_manager.commit_offsets
|
137
|
-
@group.leave
|
138
|
-
@running = false
|
139
127
|
end
|
140
128
|
|
141
|
-
|
142
|
-
|
129
|
+
# Fetches and enumerates the messages in the topics that the consumer group
|
130
|
+
# subscribes to.
|
131
|
+
#
|
132
|
+
# Each batch of messages is yielded to the provided block. If the block returns
|
133
|
+
# without raising an exception, the batch will be considered successfully
|
134
|
+
# processed. At regular intervals the offset of the most recent successfully
|
135
|
+
# processed message batch in each partition will be committed to the Kafka
|
136
|
+
# offset store. If the consumer crashes or leaves the group, the group member
|
137
|
+
# that is tasked with taking over processing of these partitions will resume
|
138
|
+
# at the last committed offsets.
|
139
|
+
#
|
140
|
+
# @param min_bytes [Integer] the minimum number of bytes to read before
|
141
|
+
# returning messages from the server; if `max_wait_time` is reached, this
|
142
|
+
# is ignored.
|
143
|
+
# @param max_wait_time [Integer] the maximum duration of time to wait before
|
144
|
+
# returning messages from the server, in seconds.
|
145
|
+
# @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
|
146
|
+
# @return [nil]
|
147
|
+
def each_batch(min_bytes: 1, max_wait_time: 5)
|
148
|
+
consumer_loop do
|
149
|
+
batches = fetch_batches(min_bytes: min_bytes, max_wait_time: max_wait_time)
|
150
|
+
|
151
|
+
batches.each do |batch|
|
152
|
+
unless batch.empty?
|
153
|
+
@instrumenter.instrument("process_batch.consumer") do |notification|
|
154
|
+
notification.update(
|
155
|
+
topic: batch.topic,
|
156
|
+
partition: batch.partition,
|
157
|
+
highwater_mark_offset: batch.highwater_mark_offset,
|
158
|
+
message_count: batch.messages.count,
|
159
|
+
)
|
160
|
+
|
161
|
+
yield batch
|
162
|
+
end
|
163
|
+
|
164
|
+
mark_message_as_processed(batch.messages.last)
|
165
|
+
end
|
166
|
+
|
167
|
+
@offset_manager.commit_offsets_if_necessary
|
168
|
+
|
169
|
+
@heartbeat.send_if_necessary
|
170
|
+
|
171
|
+
return if !@running
|
172
|
+
end
|
173
|
+
end
|
143
174
|
end
|
144
175
|
|
145
|
-
|
146
|
-
loop do
|
147
|
-
begin
|
148
|
-
fetch_batches.each do |batch|
|
149
|
-
unless batch.empty?
|
150
|
-
Instrumentation.instrument("process_batch.consumer.kafka") do |notification|
|
151
|
-
notification.update(
|
152
|
-
topic: batch.topic,
|
153
|
-
partition: batch.partition,
|
154
|
-
highwater_mark_offset: batch.highwater_mark_offset,
|
155
|
-
message_count: batch.messages.count,
|
156
|
-
)
|
157
|
-
|
158
|
-
yield batch
|
159
|
-
end
|
160
|
-
|
161
|
-
mark_message_as_processed(batch.messages.last)
|
162
|
-
end
|
176
|
+
private
|
163
177
|
|
164
|
-
|
178
|
+
def consumer_loop
|
179
|
+
@running = true
|
165
180
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
join_group
|
171
|
-
rescue UnknownMemberId
|
172
|
-
@logger.error "Kicked out of group; rejoining"
|
173
|
-
join_group
|
174
|
-
rescue RebalanceInProgress
|
175
|
-
@logger.error "Group is rebalancing; rejoining"
|
176
|
-
join_group
|
177
|
-
rescue IllegalGeneration
|
178
|
-
@logger.error "Group has transitioned to a new generation; rejoining"
|
181
|
+
while @running
|
182
|
+
begin
|
183
|
+
yield
|
184
|
+
rescue HeartbeatError, OffsetCommitError, FetchError
|
179
185
|
join_group
|
180
186
|
end
|
181
187
|
end
|
@@ -184,29 +190,28 @@ module Kafka
|
|
184
190
|
# important that members explicitly tell Kafka when they're leaving.
|
185
191
|
@offset_manager.commit_offsets
|
186
192
|
@group.leave
|
193
|
+
@running = false
|
187
194
|
end
|
188
195
|
|
189
|
-
private
|
190
|
-
|
191
196
|
def join_group
|
192
197
|
@offset_manager.clear_offsets
|
193
198
|
@group.join
|
194
199
|
end
|
195
200
|
|
196
|
-
def fetch_batches
|
201
|
+
def fetch_batches(min_bytes:, max_wait_time:)
|
197
202
|
join_group unless @group.member?
|
198
203
|
|
199
204
|
assigned_partitions = @group.assigned_partitions
|
200
205
|
|
201
|
-
|
206
|
+
@heartbeat.send_if_necessary
|
202
207
|
|
203
208
|
raise "No partitions assigned!" if assigned_partitions.empty?
|
204
209
|
|
205
210
|
operation = FetchOperation.new(
|
206
211
|
cluster: @cluster,
|
207
212
|
logger: @logger,
|
208
|
-
min_bytes:
|
209
|
-
max_wait_time:
|
213
|
+
min_bytes: min_bytes,
|
214
|
+
max_wait_time: max_wait_time,
|
210
215
|
)
|
211
216
|
|
212
217
|
assigned_partitions.each do |topic, partitions|
|
@@ -223,22 +228,7 @@ module Kafka
|
|
223
228
|
rescue ConnectionError => e
|
224
229
|
@logger.error "Connection error while fetching messages: #{e}"
|
225
230
|
|
226
|
-
|
227
|
-
end
|
228
|
-
|
229
|
-
# Sends a heartbeat if it would be necessary in order to avoid getting
|
230
|
-
# kicked out of the consumer group.
|
231
|
-
#
|
232
|
-
# Each consumer needs to send a heartbeat with a frequency defined by
|
233
|
-
# `session_timeout`.
|
234
|
-
#
|
235
|
-
def send_heartbeat_if_necessary
|
236
|
-
@last_heartbeat ||= Time.now
|
237
|
-
|
238
|
-
if Time.now > @last_heartbeat + @heartbeat_interval
|
239
|
-
@group.heartbeat
|
240
|
-
@last_heartbeat = Time.now
|
241
|
-
end
|
231
|
+
raise FetchError, e
|
242
232
|
end
|
243
233
|
|
244
234
|
def mark_message_as_processed(message)
|
data/lib/kafka/consumer_group.rb
CHANGED
@@ -71,6 +71,9 @@ module Kafka
|
|
71
71
|
Protocol.handle_error(error_code)
|
72
72
|
end
|
73
73
|
end
|
74
|
+
rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
|
75
|
+
@logger.error "Error committing offsets: #{e}"
|
76
|
+
raise OffsetCommitError, e
|
74
77
|
end
|
75
78
|
|
76
79
|
def heartbeat
|
@@ -83,6 +86,9 @@ module Kafka
|
|
83
86
|
)
|
84
87
|
|
85
88
|
Protocol.handle_error(response.error_code)
|
89
|
+
rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
|
90
|
+
@logger.error "Error sending heartbeat: #{e}"
|
91
|
+
raise HeartbeatError, e
|
86
92
|
end
|
87
93
|
|
88
94
|
private
|
data/lib/kafka/fetched_batch.rb
CHANGED
@@ -1,6 +1,18 @@
|
|
1
1
|
module Kafka
|
2
|
+
|
3
|
+
# An ordered sequence of messages fetched from a Kafka partition.
|
2
4
|
class FetchedBatch
|
3
|
-
|
5
|
+
# @return [String]
|
6
|
+
attr_reader :topic
|
7
|
+
|
8
|
+
# @return [Integer]
|
9
|
+
attr_reader :partition
|
10
|
+
|
11
|
+
# @return [Integer] the offset of the most recent message in the partition.
|
12
|
+
attr_reader :highwater_mark_offset
|
13
|
+
|
14
|
+
# @return [Array<Kafka::FetchedMessage>]
|
15
|
+
attr_reader :messages
|
4
16
|
|
5
17
|
def initialize(topic:, partition:, highwater_mark_offset:, messages:)
|
6
18
|
@topic = topic
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Heartbeat
|
3
|
+
def initialize(group:, interval:)
|
4
|
+
@group = group
|
5
|
+
@interval = interval
|
6
|
+
@last_heartbeat = Time.now
|
7
|
+
end
|
8
|
+
|
9
|
+
def send_if_necessary
|
10
|
+
if Time.now > @last_heartbeat + @interval
|
11
|
+
@group.heartbeat
|
12
|
+
@last_heartbeat = Time.now
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Kafka
|
2
|
+
class Instrumenter
|
3
|
+
NAMESPACE = "kafka"
|
4
|
+
|
5
|
+
def initialize(default_payload = {})
|
6
|
+
@default_payload = default_payload
|
7
|
+
|
8
|
+
if defined?(ActiveSupport::Notifications)
|
9
|
+
@backend = ActiveSupport::Notifications
|
10
|
+
else
|
11
|
+
@backend = nil
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def instrument(event_name, payload = {}, &block)
|
16
|
+
if @backend
|
17
|
+
payload.update(@default_payload)
|
18
|
+
|
19
|
+
@backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
|
20
|
+
else
|
21
|
+
block.call(payload) if block
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -7,9 +7,7 @@ module Kafka
|
|
7
7
|
attr_reader :size, :bytesize
|
8
8
|
|
9
9
|
def initialize
|
10
|
-
|
11
|
-
@size = 0
|
12
|
-
@bytesize = 0
|
10
|
+
clear
|
13
11
|
end
|
14
12
|
|
15
13
|
def write(message)
|
@@ -22,6 +20,12 @@ module Kafka
|
|
22
20
|
@messages.empty?
|
23
21
|
end
|
24
22
|
|
23
|
+
def clear
|
24
|
+
@messages = []
|
25
|
+
@size = 0
|
26
|
+
@bytesize = 0
|
27
|
+
end
|
28
|
+
|
25
29
|
# Yields each message in the queue to the provided block, removing the
|
26
30
|
# message after the block has processed it. If the block raises an
|
27
31
|
# exception, the message will be retained in the queue.
|
@@ -25,17 +25,18 @@ module Kafka
|
|
25
25
|
# * `sent_message_count` – the number of messages that were successfully sent.
|
26
26
|
#
|
27
27
|
class ProduceOperation
|
28
|
-
def initialize(cluster:, buffer:, compressor:, required_acks:, ack_timeout:, logger:)
|
28
|
+
def initialize(cluster:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
|
29
29
|
@cluster = cluster
|
30
30
|
@buffer = buffer
|
31
31
|
@required_acks = required_acks
|
32
32
|
@ack_timeout = ack_timeout
|
33
33
|
@compressor = compressor
|
34
34
|
@logger = logger
|
35
|
+
@instrumenter = instrumenter
|
35
36
|
end
|
36
37
|
|
37
38
|
def execute
|
38
|
-
|
39
|
+
@instrumenter.instrument("send_messages.producer") do |notification|
|
39
40
|
message_count = @buffer.size
|
40
41
|
|
41
42
|
notification[:message_count] = message_count
|
@@ -62,7 +63,13 @@ module Kafka
|
|
62
63
|
messages_for_broker[broker] ||= MessageBuffer.new
|
63
64
|
messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
|
64
65
|
rescue Kafka::Error => e
|
65
|
-
@logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e}"
|
66
|
+
@logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e.message}"
|
67
|
+
|
68
|
+
@instrumenter.instrument("partition_error.producer", {
|
69
|
+
topic: topic,
|
70
|
+
partition: partition,
|
71
|
+
exception: [e.class.to_s, e.message],
|
72
|
+
})
|
66
73
|
|
67
74
|
# We can't send the messages right now, so we'll just keep them in the buffer.
|
68
75
|
# We'll mark the cluster as stale in order to force a metadata refresh.
|
@@ -108,10 +115,20 @@ module Kafka
|
|
108
115
|
ack_time = Time.now
|
109
116
|
|
110
117
|
begin
|
111
|
-
|
118
|
+
begin
|
119
|
+
Protocol.handle_error(partition_info.error_code)
|
120
|
+
rescue ProtocolError => e
|
121
|
+
@instrumenter.instrument("partition_error.producer", {
|
122
|
+
topic: topic,
|
123
|
+
partition: partition,
|
124
|
+
exception: [e.class.to_s, e.message],
|
125
|
+
})
|
126
|
+
|
127
|
+
raise e
|
128
|
+
end
|
112
129
|
|
113
130
|
messages.each do |message|
|
114
|
-
|
131
|
+
@instrumenter.instrument("ack_message.producer", {
|
115
132
|
key: message.key,
|
116
133
|
value: message.value,
|
117
134
|
topic: topic,
|
data/lib/kafka/producer.rb
CHANGED
@@ -130,9 +130,10 @@ module Kafka
|
|
130
130
|
#
|
131
131
|
class Producer
|
132
132
|
|
133
|
-
def initialize(cluster:, logger:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
133
|
+
def initialize(cluster:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
|
134
134
|
@cluster = cluster
|
135
135
|
@logger = logger
|
136
|
+
@instrumenter = instrumenter
|
136
137
|
@required_acks = required_acks
|
137
138
|
@ack_timeout = ack_timeout
|
138
139
|
@max_retries = max_retries
|
@@ -203,7 +204,7 @@ module Kafka
|
|
203
204
|
@target_topics.add(topic)
|
204
205
|
@pending_message_queue.write(message)
|
205
206
|
|
206
|
-
|
207
|
+
@instrumenter.instrument("produce_message.producer", {
|
207
208
|
value: value,
|
208
209
|
key: key,
|
209
210
|
topic: topic,
|
@@ -228,7 +229,7 @@ module Kafka
|
|
228
229
|
# There's no need to do anything if the buffer is empty.
|
229
230
|
return if buffer_size == 0
|
230
231
|
|
231
|
-
|
232
|
+
@instrumenter.instrument("deliver_messages.producer") do |notification|
|
232
233
|
message_count = buffer_size
|
233
234
|
|
234
235
|
notification[:message_count] = message_count
|
@@ -253,6 +254,14 @@ module Kafka
|
|
253
254
|
@pending_message_queue.bytesize + @buffer.bytesize
|
254
255
|
end
|
255
256
|
|
257
|
+
# Deletes all buffered messages.
|
258
|
+
#
|
259
|
+
# @return [nil]
|
260
|
+
def clear_buffer
|
261
|
+
@buffer.clear
|
262
|
+
@pending_message_queue.clear
|
263
|
+
end
|
264
|
+
|
256
265
|
# Closes all connections to the brokers.
|
257
266
|
#
|
258
267
|
# @return [nil]
|
@@ -274,6 +283,7 @@ module Kafka
|
|
274
283
|
ack_timeout: @ack_timeout,
|
275
284
|
compressor: @compressor,
|
276
285
|
logger: @logger,
|
286
|
+
instrumenter: @instrumenter,
|
277
287
|
)
|
278
288
|
|
279
289
|
loop do
|
@@ -286,6 +296,13 @@ module Kafka
|
|
286
296
|
assign_partitions!
|
287
297
|
operation.execute
|
288
298
|
|
299
|
+
if @required_acks.zero?
|
300
|
+
# No response is returned by the brokers, so we can't know which messages
|
301
|
+
# have been successfully written. Our only option is to assume that they all
|
302
|
+
# have.
|
303
|
+
@buffer.clear
|
304
|
+
end
|
305
|
+
|
289
306
|
if buffer_size.zero?
|
290
307
|
break
|
291
308
|
elsif attempt <= @max_retries
|
@@ -298,11 +315,8 @@ module Kafka
|
|
298
315
|
end
|
299
316
|
end
|
300
317
|
|
301
|
-
|
302
|
-
|
303
|
-
# have been successfully written. Our only option is to assume that they all
|
304
|
-
# have.
|
305
|
-
@buffer.clear
|
318
|
+
unless @pending_message_queue.empty?
|
319
|
+
raise DeliveryFailed, "Failed to assign partitions to #{@pending_message_queue.size} messages"
|
306
320
|
end
|
307
321
|
|
308
322
|
unless @buffer.empty?
|
@@ -30,7 +30,9 @@ module Kafka
|
|
30
30
|
}.values
|
31
31
|
|
32
32
|
members.zip(partitions_per_member).each do |member_id, member_partitions|
|
33
|
-
|
33
|
+
unless member_partitions.nil?
|
34
|
+
group_assignment[member_id].assign(topic, member_partitions)
|
35
|
+
end
|
34
36
|
end
|
35
37
|
end
|
36
38
|
|
data/lib/kafka/version.rb
CHANGED
data/ruby-kafka.gemspec
CHANGED
@@ -27,7 +27,7 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
28
|
spec.require_paths = ["lib"]
|
29
29
|
|
30
|
-
spec.add_development_dependency "bundler", "
|
30
|
+
spec.add_development_dependency "bundler", ">= 1.9.5"
|
31
31
|
spec.add_development_dependency "rake", "~> 10.0"
|
32
32
|
spec.add_development_dependency "rspec"
|
33
33
|
spec.add_development_dependency "pry"
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Schierbeck
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
19
|
+
version: 1.9.5
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: 1.9.5
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -177,13 +177,15 @@ files:
|
|
177
177
|
- lib/kafka/compression.rb
|
178
178
|
- lib/kafka/compressor.rb
|
179
179
|
- lib/kafka/connection.rb
|
180
|
+
- lib/kafka/connection_builder.rb
|
180
181
|
- lib/kafka/consumer.rb
|
181
182
|
- lib/kafka/consumer_group.rb
|
182
183
|
- lib/kafka/fetch_operation.rb
|
183
184
|
- lib/kafka/fetched_batch.rb
|
184
185
|
- lib/kafka/fetched_message.rb
|
185
186
|
- lib/kafka/gzip_codec.rb
|
186
|
-
- lib/kafka/
|
187
|
+
- lib/kafka/heartbeat.rb
|
188
|
+
- lib/kafka/instrumenter.rb
|
187
189
|
- lib/kafka/message_buffer.rb
|
188
190
|
- lib/kafka/offset_manager.rb
|
189
191
|
- lib/kafka/partitioner.rb
|
@@ -1,13 +0,0 @@
|
|
1
|
-
module Kafka
|
2
|
-
class NullInstrumentation
|
3
|
-
def self.instrument(name, payload = {})
|
4
|
-
yield payload if block_given?
|
5
|
-
end
|
6
|
-
end
|
7
|
-
|
8
|
-
if defined?(ActiveSupport::Notifications)
|
9
|
-
Instrumentation = ActiveSupport::Notifications
|
10
|
-
else
|
11
|
-
Instrumentation = NullInstrumentation
|
12
|
-
end
|
13
|
-
end
|