ruby-kafka 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -3
- data/Rakefile +1 -1
- data/examples/simple-consumer.rb +48 -0
- data/examples/simple-producer.rb +7 -1
- data/lib/kafka.rb +1 -1
- data/lib/kafka/broker.rb +32 -0
- data/lib/kafka/broker_pool.rb +16 -102
- data/lib/kafka/client.rb +91 -9
- data/lib/kafka/cluster.rb +130 -0
- data/lib/kafka/connection.rb +1 -3
- data/lib/kafka/fetch_operation.rb +127 -0
- data/lib/kafka/fetched_message.rb +27 -0
- data/lib/kafka/instrumentation.rb +1 -1
- data/lib/kafka/message_buffer.rb +8 -1
- data/lib/kafka/partitioner.rb +13 -8
- data/lib/kafka/pending_message.rb +13 -0
- data/lib/kafka/produce_operation.rb +116 -0
- data/lib/kafka/producer.rb +64 -30
- data/lib/kafka/protocol.rb +9 -0
- data/lib/kafka/protocol/decoder.rb +7 -0
- data/lib/kafka/protocol/fetch_request.rb +53 -0
- data/lib/kafka/protocol/fetch_response.rb +75 -0
- data/lib/kafka/protocol/list_offset_request.rb +41 -0
- data/lib/kafka/protocol/list_offset_response.rb +82 -0
- data/lib/kafka/protocol/message.rb +15 -0
- data/lib/kafka/protocol/message_set.rb +25 -0
- data/lib/kafka/protocol/metadata_response.rb +3 -1
- data/lib/kafka/protocol/produce_request.rb +3 -0
- data/lib/kafka/protocol/topic_metadata_request.rb +4 -0
- data/lib/kafka/socket_with_timeout.rb +3 -3
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +3 -1
- metadata +14 -4
- data/lib/kafka/transmission.rb +0 -76
data/lib/kafka/producer.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "kafka/partitioner"
|
2
2
|
require "kafka/message_buffer"
|
3
|
-
require "kafka/
|
4
|
-
require "kafka/
|
3
|
+
require "kafka/produce_operation"
|
4
|
+
require "kafka/pending_message"
|
5
5
|
|
6
6
|
module Kafka
|
7
7
|
|
@@ -17,7 +17,7 @@ module Kafka
|
|
17
17
|
# producer = kafka.get_producer
|
18
18
|
#
|
19
19
|
# This is done in order to share a logger as well as a pool of broker connections across
|
20
|
-
# different producers. This also means that you don't need to pass the `
|
20
|
+
# different producers. This also means that you don't need to pass the `cluster` and
|
21
21
|
# `logger` options to `#get_producer`. See {#initialize} for the list of other options
|
22
22
|
# you can pass in.
|
23
23
|
#
|
@@ -86,8 +86,7 @@ module Kafka
|
|
86
86
|
|
87
87
|
# Initializes a new Producer.
|
88
88
|
#
|
89
|
-
# @param
|
90
|
-
# Typically passed in for you.
|
89
|
+
# @param cluster [Cluster] the cluster client. Typically passed in for you.
|
91
90
|
#
|
92
91
|
# @param logger [Logger] the logger that should be used. Typically passed
|
93
92
|
# in for you.
|
@@ -107,15 +106,20 @@ module Kafka
|
|
107
106
|
# @param max_buffer_size [Integer] the number of messages allowed in the buffer
|
108
107
|
# before new writes will raise BufferOverflow exceptions.
|
109
108
|
#
|
110
|
-
def initialize(
|
111
|
-
@
|
109
|
+
def initialize(cluster:, logger:, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
|
110
|
+
@cluster = cluster
|
112
111
|
@logger = logger
|
113
112
|
@required_acks = required_acks
|
114
113
|
@ack_timeout = ack_timeout
|
115
114
|
@max_retries = max_retries
|
116
115
|
@retry_backoff = retry_backoff
|
117
116
|
@max_buffer_size = max_buffer_size
|
117
|
+
|
118
|
+
# A buffer organized by topic/partition.
|
118
119
|
@buffer = MessageBuffer.new
|
120
|
+
|
121
|
+
# Messages added by `#produce` but not yet assigned a partition.
|
122
|
+
@pending_messages = []
|
119
123
|
end
|
120
124
|
|
121
125
|
# Produces a message to the specified topic. Note that messages are buffered in
|
@@ -152,18 +156,15 @@ module Kafka
|
|
152
156
|
raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
|
153
157
|
end
|
154
158
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
partition
|
160
|
-
|
161
|
-
|
162
|
-
message = Protocol::Message.new(key: key, value: value)
|
163
|
-
|
164
|
-
@buffer.write(message, topic: topic, partition: partition)
|
159
|
+
@pending_messages << PendingMessage.new(
|
160
|
+
value: value,
|
161
|
+
key: key,
|
162
|
+
topic: topic,
|
163
|
+
partition: partition,
|
164
|
+
partition_key: partition_key,
|
165
|
+
)
|
165
166
|
|
166
|
-
|
167
|
+
nil
|
167
168
|
end
|
168
169
|
|
169
170
|
# Sends all buffered messages to the Kafka brokers.
|
@@ -178,8 +179,12 @@ module Kafka
|
|
178
179
|
def send_messages
|
179
180
|
attempt = 0
|
180
181
|
|
181
|
-
|
182
|
-
|
182
|
+
# Make sure we get metadata for this topic.
|
183
|
+
target_topics = @pending_messages.map(&:topic).uniq
|
184
|
+
@cluster.add_target_topics(target_topics)
|
185
|
+
|
186
|
+
operation = ProduceOperation.new(
|
187
|
+
cluster: @cluster,
|
183
188
|
buffer: @buffer,
|
184
189
|
required_acks: @required_acks,
|
185
190
|
ack_timeout: @ack_timeout,
|
@@ -187,21 +192,21 @@ module Kafka
|
|
187
192
|
)
|
188
193
|
|
189
194
|
loop do
|
190
|
-
@logger.info "Sending #{@buffer.size} messages"
|
191
|
-
|
192
195
|
attempt += 1
|
193
|
-
transmission.send_messages
|
194
196
|
|
195
|
-
|
196
|
-
|
197
|
+
@cluster.refresh_metadata_if_necessary!
|
198
|
+
|
199
|
+
assign_partitions!
|
200
|
+
operation.execute
|
201
|
+
|
202
|
+
if @pending_messages.empty? && @buffer.empty?
|
197
203
|
break
|
198
204
|
elsif attempt <= @max_retries
|
199
|
-
@logger.warn "Failed to
|
200
|
-
@logger.info "Waiting #{@retry_backoff}s before retrying"
|
205
|
+
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
201
206
|
|
202
207
|
sleep @retry_backoff
|
203
208
|
else
|
204
|
-
@logger.error "Failed to
|
209
|
+
@logger.error "Failed to send all messages; keeping remaining messages in buffer"
|
205
210
|
break
|
206
211
|
end
|
207
212
|
end
|
@@ -224,14 +229,43 @@ module Kafka
|
|
224
229
|
#
|
225
230
|
# @return [Integer] buffer size.
|
226
231
|
def buffer_size
|
227
|
-
@buffer.size
|
232
|
+
@pending_messages.size + @buffer.size
|
228
233
|
end
|
229
234
|
|
230
235
|
# Closes all connections to the brokers.
|
231
236
|
#
|
232
237
|
# @return [nil]
|
233
238
|
def shutdown
|
234
|
-
@
|
239
|
+
@cluster.disconnect
|
240
|
+
end
|
241
|
+
|
242
|
+
private
|
243
|
+
|
244
|
+
def assign_partitions!
|
245
|
+
until @pending_messages.empty?
|
246
|
+
# We want to keep the message in the first-stage buffer in case there's an error.
|
247
|
+
message = @pending_messages.first
|
248
|
+
|
249
|
+
partition = message.partition
|
250
|
+
|
251
|
+
if partition.nil?
|
252
|
+
partition_count = @cluster.partitions_for(message.topic).count
|
253
|
+
partition = Partitioner.partition_for_key(partition_count, message)
|
254
|
+
end
|
255
|
+
|
256
|
+
@buffer.write(
|
257
|
+
value: message.value,
|
258
|
+
key: message.key,
|
259
|
+
topic: message.topic,
|
260
|
+
partition: partition,
|
261
|
+
)
|
262
|
+
|
263
|
+
# Now it's safe to remove the message from the first-stage buffer.
|
264
|
+
@pending_messages.shift
|
265
|
+
end
|
266
|
+
rescue Kafka::Error => e
|
267
|
+
@logger.error "Failed to assign pending message to a partition: #{e}"
|
268
|
+
@cluster.mark_as_stale!
|
235
269
|
end
|
236
270
|
end
|
237
271
|
end
|
data/lib/kafka/protocol.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
module Kafka
|
2
2
|
module Protocol
|
3
|
+
# The replica id of non-brokers is always -1.
|
4
|
+
REPLICA_ID = -1
|
5
|
+
|
3
6
|
APIS = {
|
4
7
|
0 => :produce,
|
8
|
+
1 => :fetch,
|
9
|
+
2 => :list_offset,
|
5
10
|
3 => :topic_metadata,
|
6
11
|
}
|
7
12
|
|
@@ -45,3 +50,7 @@ require "kafka/protocol/topic_metadata_request"
|
|
45
50
|
require "kafka/protocol/metadata_response"
|
46
51
|
require "kafka/protocol/produce_request"
|
47
52
|
require "kafka/protocol/produce_response"
|
53
|
+
require "kafka/protocol/fetch_request"
|
54
|
+
require "kafka/protocol/fetch_response"
|
55
|
+
require "kafka/protocol/list_offset_request"
|
56
|
+
require "kafka/protocol/list_offset_response"
|
@@ -5,6 +5,9 @@ module Kafka
|
|
5
5
|
# from it. The Kafka protocol is not self-describing, so a client must call
|
6
6
|
# these methods in just the right order for things to work.
|
7
7
|
class Decoder
|
8
|
+
def self.from_string(str)
|
9
|
+
new(StringIO.new(str))
|
10
|
+
end
|
8
11
|
|
9
12
|
# Initializes a new decoder.
|
10
13
|
#
|
@@ -13,6 +16,10 @@ module Kafka
|
|
13
16
|
@io = io
|
14
17
|
end
|
15
18
|
|
19
|
+
def eof?
|
20
|
+
@io.eof?
|
21
|
+
end
|
22
|
+
|
16
23
|
# Decodes an 8-bit integer from the IO object.
|
17
24
|
#
|
18
25
|
# @return [Integer]
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
|
4
|
+
# A request to fetch messages from a given partition.
|
5
|
+
#
|
6
|
+
# ## API Specification
|
7
|
+
#
|
8
|
+
# FetchRequest => ReplicaId MaxWaitTime MinBytes [TopicName [Partition FetchOffset MaxBytes]]
|
9
|
+
# ReplicaId => int32
|
10
|
+
# MaxWaitTime => int32
|
11
|
+
# MinBytes => int32
|
12
|
+
# TopicName => string
|
13
|
+
# Partition => int32
|
14
|
+
# FetchOffset => int64
|
15
|
+
# MaxBytes => int32
|
16
|
+
#
|
17
|
+
class FetchRequest
|
18
|
+
|
19
|
+
# @param max_wait_time [Integer]
|
20
|
+
# @param min_bytes [Integer]
|
21
|
+
# @param topics [Hash]
|
22
|
+
def initialize(max_wait_time:, min_bytes:, topics:)
|
23
|
+
@replica_id = REPLICA_ID
|
24
|
+
@max_wait_time = max_wait_time
|
25
|
+
@min_bytes = min_bytes
|
26
|
+
@topics = topics
|
27
|
+
end
|
28
|
+
|
29
|
+
def api_key
|
30
|
+
1
|
31
|
+
end
|
32
|
+
|
33
|
+
def encode(encoder)
|
34
|
+
encoder.write_int32(@replica_id)
|
35
|
+
encoder.write_int32(@max_wait_time)
|
36
|
+
encoder.write_int32(@min_bytes)
|
37
|
+
|
38
|
+
encoder.write_array(@topics) do |topic, partitions|
|
39
|
+
encoder.write_string(topic)
|
40
|
+
|
41
|
+
encoder.write_array(partitions) do |partition, config|
|
42
|
+
fetch_offset = config.fetch(:fetch_offset)
|
43
|
+
max_bytes = config.fetch(:max_bytes)
|
44
|
+
|
45
|
+
encoder.write_int32(partition)
|
46
|
+
encoder.write_int64(fetch_offset)
|
47
|
+
encoder.write_int32(max_bytes)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "kafka/protocol/message_set"
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
module Protocol
|
5
|
+
|
6
|
+
# A response to a fetch request.
|
7
|
+
#
|
8
|
+
# ## API Specification
|
9
|
+
#
|
10
|
+
# FetchResponse => [TopicName [Partition ErrorCode HighwaterMarkOffset MessageSetSize MessageSet]]
|
11
|
+
# TopicName => string
|
12
|
+
# Partition => int32
|
13
|
+
# ErrorCode => int16
|
14
|
+
# HighwaterMarkOffset => int64
|
15
|
+
# MessageSetSize => int32
|
16
|
+
#
|
17
|
+
class FetchResponse
|
18
|
+
class FetchedPartition
|
19
|
+
attr_reader :partition, :error_code
|
20
|
+
attr_reader :highwater_mark_offset, :messages
|
21
|
+
|
22
|
+
def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
|
23
|
+
@partition = partition
|
24
|
+
@error_code = error_code
|
25
|
+
@highwater_mark_offset = highwater_mark_offset
|
26
|
+
@messages = messages
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class FetchedTopic
|
31
|
+
attr_reader :name, :partitions
|
32
|
+
|
33
|
+
def initialize(name:, partitions:)
|
34
|
+
@name = name
|
35
|
+
@partitions = partitions
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :topics
|
40
|
+
|
41
|
+
def initialize(topics: [])
|
42
|
+
@topics = topics
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.decode(decoder)
|
46
|
+
topics = decoder.array do
|
47
|
+
topic_name = decoder.string
|
48
|
+
|
49
|
+
partitions = decoder.array do
|
50
|
+
partition = decoder.int32
|
51
|
+
error_code = decoder.int16
|
52
|
+
highwater_mark_offset = decoder.int64
|
53
|
+
|
54
|
+
message_set_decoder = Decoder.from_string(decoder.bytes)
|
55
|
+
message_set = MessageSet.decode(message_set_decoder)
|
56
|
+
|
57
|
+
FetchedPartition.new(
|
58
|
+
partition: partition,
|
59
|
+
error_code: error_code,
|
60
|
+
highwater_mark_offset: highwater_mark_offset,
|
61
|
+
messages: message_set.messages,
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
FetchedTopic.new(
|
66
|
+
name: topic_name,
|
67
|
+
partitions: partitions,
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
new(topics: topics)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
# A request to list the available offsets for a set of topics/partitions.
|
4
|
+
#
|
5
|
+
# ## API Specification
|
6
|
+
#
|
7
|
+
# OffsetRequest => ReplicaId [TopicName [Partition Time MaxNumberOfOffsets]]
|
8
|
+
# ReplicaId => int32
|
9
|
+
# TopicName => string
|
10
|
+
# Partition => int32
|
11
|
+
# Time => int64
|
12
|
+
# MaxNumberOfOffsets => int32
|
13
|
+
#
|
14
|
+
class ListOffsetRequest
|
15
|
+
|
16
|
+
# @param topics [Hash]
|
17
|
+
def initialize(topics:)
|
18
|
+
@replica_id = REPLICA_ID
|
19
|
+
@topics = topics
|
20
|
+
end
|
21
|
+
|
22
|
+
def api_key
|
23
|
+
2
|
24
|
+
end
|
25
|
+
|
26
|
+
def encode(encoder)
|
27
|
+
encoder.write_int32(@replica_id)
|
28
|
+
|
29
|
+
encoder.write_array(@topics) do |topic, partitions|
|
30
|
+
encoder.write_string(topic)
|
31
|
+
|
32
|
+
encoder.write_array(partitions) do |partition|
|
33
|
+
encoder.write_int32(partition.fetch(:partition))
|
34
|
+
encoder.write_int64(partition.fetch(:time))
|
35
|
+
encoder.write_int32(partition.fetch(:max_offsets))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
|
4
|
+
# A response to a list offset request.
|
5
|
+
#
|
6
|
+
# ## API Specification
|
7
|
+
#
|
8
|
+
# OffsetResponse => [TopicName [PartitionOffsets]]
|
9
|
+
# PartitionOffsets => Partition ErrorCode [Offset]
|
10
|
+
# Partition => int32
|
11
|
+
# ErrorCode => int16
|
12
|
+
# Offset => int64
|
13
|
+
#
|
14
|
+
class ListOffsetResponse
|
15
|
+
class TopicOffsetInfo
|
16
|
+
attr_reader :name, :partition_offsets
|
17
|
+
|
18
|
+
def initialize(name:, partition_offsets:)
|
19
|
+
@name = name
|
20
|
+
@partition_offsets = partition_offsets
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class PartitionOffsetInfo
|
25
|
+
attr_reader :partition, :error_code, :offsets
|
26
|
+
|
27
|
+
def initialize(partition:, error_code:, offsets:)
|
28
|
+
@partition = partition
|
29
|
+
@error_code = error_code
|
30
|
+
@offsets = offsets
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :topics
|
35
|
+
|
36
|
+
def initialize(topics:)
|
37
|
+
@topics = topics
|
38
|
+
end
|
39
|
+
|
40
|
+
def offset_for(topic, partition)
|
41
|
+
topic_info = @topics.find {|t| t.name == topic }
|
42
|
+
|
43
|
+
if topic_info.nil?
|
44
|
+
raise UnknownTopicOrPartition, "Unknown topic #{topic}"
|
45
|
+
end
|
46
|
+
|
47
|
+
partition_info = topic_info
|
48
|
+
.partition_offsets
|
49
|
+
.find {|p| p.partition == partition }
|
50
|
+
|
51
|
+
if partition_info.nil?
|
52
|
+
raise UnknownTopicOrPartition, "Unknown partition #{topic}/#{partition}"
|
53
|
+
end
|
54
|
+
|
55
|
+
Protocol.handle_error(partition_info.error_code)
|
56
|
+
|
57
|
+
partition_info.offsets.first
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.decode(decoder)
|
61
|
+
topics = decoder.array do
|
62
|
+
name = decoder.string
|
63
|
+
|
64
|
+
partition_offsets = decoder.array do
|
65
|
+
PartitionOffsetInfo.new(
|
66
|
+
partition: decoder.int32,
|
67
|
+
error_code: decoder.int16,
|
68
|
+
offsets: decoder.array { decoder.int64 },
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
TopicOffsetInfo.new(
|
73
|
+
name: name,
|
74
|
+
partition_offsets: partition_offsets
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
new(topics: topics)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|