ruby-kafka 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -3
- data/Rakefile +1 -1
- data/examples/simple-consumer.rb +48 -0
- data/examples/simple-producer.rb +7 -1
- data/lib/kafka.rb +1 -1
- data/lib/kafka/broker.rb +32 -0
- data/lib/kafka/broker_pool.rb +16 -102
- data/lib/kafka/client.rb +91 -9
- data/lib/kafka/cluster.rb +130 -0
- data/lib/kafka/connection.rb +1 -3
- data/lib/kafka/fetch_operation.rb +127 -0
- data/lib/kafka/fetched_message.rb +27 -0
- data/lib/kafka/instrumentation.rb +1 -1
- data/lib/kafka/message_buffer.rb +8 -1
- data/lib/kafka/partitioner.rb +13 -8
- data/lib/kafka/pending_message.rb +13 -0
- data/lib/kafka/produce_operation.rb +116 -0
- data/lib/kafka/producer.rb +64 -30
- data/lib/kafka/protocol.rb +9 -0
- data/lib/kafka/protocol/decoder.rb +7 -0
- data/lib/kafka/protocol/fetch_request.rb +53 -0
- data/lib/kafka/protocol/fetch_response.rb +75 -0
- data/lib/kafka/protocol/list_offset_request.rb +41 -0
- data/lib/kafka/protocol/list_offset_response.rb +82 -0
- data/lib/kafka/protocol/message.rb +15 -0
- data/lib/kafka/protocol/message_set.rb +25 -0
- data/lib/kafka/protocol/metadata_response.rb +3 -1
- data/lib/kafka/protocol/produce_request.rb +3 -0
- data/lib/kafka/protocol/topic_metadata_request.rb +4 -0
- data/lib/kafka/socket_with_timeout.rb +3 -3
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +3 -1
- metadata +14 -4
- data/lib/kafka/transmission.rb +0 -76
data/lib/kafka/producer.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "kafka/partitioner"
|
2
2
|
require "kafka/message_buffer"
|
3
|
-
require "kafka/
|
4
|
-
require "kafka/
|
3
|
+
require "kafka/produce_operation"
|
4
|
+
require "kafka/pending_message"
|
5
5
|
|
6
6
|
module Kafka
|
7
7
|
|
@@ -17,7 +17,7 @@ module Kafka
|
|
17
17
|
# producer = kafka.get_producer
|
18
18
|
#
|
19
19
|
# This is done in order to share a logger as well as a pool of broker connections across
|
20
|
-
# different producers. This also means that you don't need to pass the `
|
20
|
+
# different producers. This also means that you don't need to pass the `cluster` and
|
21
21
|
# `logger` options to `#get_producer`. See {#initialize} for the list of other options
|
22
22
|
# you can pass in.
|
23
23
|
#
|
@@ -86,8 +86,7 @@ module Kafka
|
|
86
86
|
|
87
87
|
# Initializes a new Producer.
|
88
88
|
#
|
89
|
-
# @param
|
90
|
-
# Typically passed in for you.
|
89
|
+
# @param cluster [Cluster] the cluster client. Typically passed in for you.
|
91
90
|
#
|
92
91
|
# @param logger [Logger] the logger that should be used. Typically passed
|
93
92
|
# in for you.
|
@@ -107,15 +106,20 @@ module Kafka
|
|
107
106
|
# @param max_buffer_size [Integer] the number of messages allowed in the buffer
|
108
107
|
# before new writes will raise BufferOverflow exceptions.
|
109
108
|
#
|
110
|
-
def initialize(
|
111
|
-
@
|
109
|
+
def initialize(cluster:, logger:, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
|
110
|
+
@cluster = cluster
|
112
111
|
@logger = logger
|
113
112
|
@required_acks = required_acks
|
114
113
|
@ack_timeout = ack_timeout
|
115
114
|
@max_retries = max_retries
|
116
115
|
@retry_backoff = retry_backoff
|
117
116
|
@max_buffer_size = max_buffer_size
|
117
|
+
|
118
|
+
# A buffer organized by topic/partition.
|
118
119
|
@buffer = MessageBuffer.new
|
120
|
+
|
121
|
+
# Messages added by `#produce` but not yet assigned a partition.
|
122
|
+
@pending_messages = []
|
119
123
|
end
|
120
124
|
|
121
125
|
# Produces a message to the specified topic. Note that messages are buffered in
|
@@ -152,18 +156,15 @@ module Kafka
|
|
152
156
|
raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
|
153
157
|
end
|
154
158
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
partition
|
160
|
-
|
161
|
-
|
162
|
-
message = Protocol::Message.new(key: key, value: value)
|
163
|
-
|
164
|
-
@buffer.write(message, topic: topic, partition: partition)
|
159
|
+
@pending_messages << PendingMessage.new(
|
160
|
+
value: value,
|
161
|
+
key: key,
|
162
|
+
topic: topic,
|
163
|
+
partition: partition,
|
164
|
+
partition_key: partition_key,
|
165
|
+
)
|
165
166
|
|
166
|
-
|
167
|
+
nil
|
167
168
|
end
|
168
169
|
|
169
170
|
# Sends all buffered messages to the Kafka brokers.
|
@@ -178,8 +179,12 @@ module Kafka
|
|
178
179
|
def send_messages
|
179
180
|
attempt = 0
|
180
181
|
|
181
|
-
|
182
|
-
|
182
|
+
# Make sure we get metadata for this topic.
|
183
|
+
target_topics = @pending_messages.map(&:topic).uniq
|
184
|
+
@cluster.add_target_topics(target_topics)
|
185
|
+
|
186
|
+
operation = ProduceOperation.new(
|
187
|
+
cluster: @cluster,
|
183
188
|
buffer: @buffer,
|
184
189
|
required_acks: @required_acks,
|
185
190
|
ack_timeout: @ack_timeout,
|
@@ -187,21 +192,21 @@ module Kafka
|
|
187
192
|
)
|
188
193
|
|
189
194
|
loop do
|
190
|
-
@logger.info "Sending #{@buffer.size} messages"
|
191
|
-
|
192
195
|
attempt += 1
|
193
|
-
transmission.send_messages
|
194
196
|
|
195
|
-
|
196
|
-
|
197
|
+
@cluster.refresh_metadata_if_necessary!
|
198
|
+
|
199
|
+
assign_partitions!
|
200
|
+
operation.execute
|
201
|
+
|
202
|
+
if @pending_messages.empty? && @buffer.empty?
|
197
203
|
break
|
198
204
|
elsif attempt <= @max_retries
|
199
|
-
@logger.warn "Failed to
|
200
|
-
@logger.info "Waiting #{@retry_backoff}s before retrying"
|
205
|
+
@logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
|
201
206
|
|
202
207
|
sleep @retry_backoff
|
203
208
|
else
|
204
|
-
@logger.error "Failed to
|
209
|
+
@logger.error "Failed to send all messages; keeping remaining messages in buffer"
|
205
210
|
break
|
206
211
|
end
|
207
212
|
end
|
@@ -224,14 +229,43 @@ module Kafka
|
|
224
229
|
#
|
225
230
|
# @return [Integer] buffer size.
|
226
231
|
def buffer_size
|
227
|
-
@buffer.size
|
232
|
+
@pending_messages.size + @buffer.size
|
228
233
|
end
|
229
234
|
|
230
235
|
# Closes all connections to the brokers.
|
231
236
|
#
|
232
237
|
# @return [nil]
|
233
238
|
def shutdown
|
234
|
-
@
|
239
|
+
@cluster.disconnect
|
240
|
+
end
|
241
|
+
|
242
|
+
private
|
243
|
+
|
244
|
+
def assign_partitions!
|
245
|
+
until @pending_messages.empty?
|
246
|
+
# We want to keep the message in the first-stage buffer in case there's an error.
|
247
|
+
message = @pending_messages.first
|
248
|
+
|
249
|
+
partition = message.partition
|
250
|
+
|
251
|
+
if partition.nil?
|
252
|
+
partition_count = @cluster.partitions_for(message.topic).count
|
253
|
+
partition = Partitioner.partition_for_key(partition_count, message)
|
254
|
+
end
|
255
|
+
|
256
|
+
@buffer.write(
|
257
|
+
value: message.value,
|
258
|
+
key: message.key,
|
259
|
+
topic: message.topic,
|
260
|
+
partition: partition,
|
261
|
+
)
|
262
|
+
|
263
|
+
# Now it's safe to remove the message from the first-stage buffer.
|
264
|
+
@pending_messages.shift
|
265
|
+
end
|
266
|
+
rescue Kafka::Error => e
|
267
|
+
@logger.error "Failed to assign pending message to a partition: #{e}"
|
268
|
+
@cluster.mark_as_stale!
|
235
269
|
end
|
236
270
|
end
|
237
271
|
end
|
data/lib/kafka/protocol.rb
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
module Kafka
|
2
2
|
module Protocol
|
3
|
+
# The replica id of non-brokers is always -1.
|
4
|
+
REPLICA_ID = -1
|
5
|
+
|
3
6
|
APIS = {
|
4
7
|
0 => :produce,
|
8
|
+
1 => :fetch,
|
9
|
+
2 => :list_offset,
|
5
10
|
3 => :topic_metadata,
|
6
11
|
}
|
7
12
|
|
@@ -45,3 +50,7 @@ require "kafka/protocol/topic_metadata_request"
|
|
45
50
|
require "kafka/protocol/metadata_response"
|
46
51
|
require "kafka/protocol/produce_request"
|
47
52
|
require "kafka/protocol/produce_response"
|
53
|
+
require "kafka/protocol/fetch_request"
|
54
|
+
require "kafka/protocol/fetch_response"
|
55
|
+
require "kafka/protocol/list_offset_request"
|
56
|
+
require "kafka/protocol/list_offset_response"
|
@@ -5,6 +5,9 @@ module Kafka
|
|
5
5
|
# from it. The Kafka protocol is not self-describing, so a client must call
|
6
6
|
# these methods in just the right order for things to work.
|
7
7
|
class Decoder
|
8
|
+
def self.from_string(str)
|
9
|
+
new(StringIO.new(str))
|
10
|
+
end
|
8
11
|
|
9
12
|
# Initializes a new decoder.
|
10
13
|
#
|
@@ -13,6 +16,10 @@ module Kafka
|
|
13
16
|
@io = io
|
14
17
|
end
|
15
18
|
|
19
|
+
def eof?
|
20
|
+
@io.eof?
|
21
|
+
end
|
22
|
+
|
16
23
|
# Decodes an 8-bit integer from the IO object.
|
17
24
|
#
|
18
25
|
# @return [Integer]
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
|
4
|
+
# A request to fetch messages from a given partition.
|
5
|
+
#
|
6
|
+
# ## API Specification
|
7
|
+
#
|
8
|
+
# FetchRequest => ReplicaId MaxWaitTime MinBytes [TopicName [Partition FetchOffset MaxBytes]]
|
9
|
+
# ReplicaId => int32
|
10
|
+
# MaxWaitTime => int32
|
11
|
+
# MinBytes => int32
|
12
|
+
# TopicName => string
|
13
|
+
# Partition => int32
|
14
|
+
# FetchOffset => int64
|
15
|
+
# MaxBytes => int32
|
16
|
+
#
|
17
|
+
class FetchRequest
|
18
|
+
|
19
|
+
# @param max_wait_time [Integer]
|
20
|
+
# @param min_bytes [Integer]
|
21
|
+
# @param topics [Hash]
|
22
|
+
def initialize(max_wait_time:, min_bytes:, topics:)
|
23
|
+
@replica_id = REPLICA_ID
|
24
|
+
@max_wait_time = max_wait_time
|
25
|
+
@min_bytes = min_bytes
|
26
|
+
@topics = topics
|
27
|
+
end
|
28
|
+
|
29
|
+
def api_key
|
30
|
+
1
|
31
|
+
end
|
32
|
+
|
33
|
+
def encode(encoder)
|
34
|
+
encoder.write_int32(@replica_id)
|
35
|
+
encoder.write_int32(@max_wait_time)
|
36
|
+
encoder.write_int32(@min_bytes)
|
37
|
+
|
38
|
+
encoder.write_array(@topics) do |topic, partitions|
|
39
|
+
encoder.write_string(topic)
|
40
|
+
|
41
|
+
encoder.write_array(partitions) do |partition, config|
|
42
|
+
fetch_offset = config.fetch(:fetch_offset)
|
43
|
+
max_bytes = config.fetch(:max_bytes)
|
44
|
+
|
45
|
+
encoder.write_int32(partition)
|
46
|
+
encoder.write_int64(fetch_offset)
|
47
|
+
encoder.write_int32(max_bytes)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
require "kafka/protocol/message_set"
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
module Protocol
|
5
|
+
|
6
|
+
# A response to a fetch request.
|
7
|
+
#
|
8
|
+
# ## API Specification
|
9
|
+
#
|
10
|
+
# FetchResponse => [TopicName [Partition ErrorCode HighwaterMarkOffset MessageSetSize MessageSet]]
|
11
|
+
# TopicName => string
|
12
|
+
# Partition => int32
|
13
|
+
# ErrorCode => int16
|
14
|
+
# HighwaterMarkOffset => int64
|
15
|
+
# MessageSetSize => int32
|
16
|
+
#
|
17
|
+
class FetchResponse
|
18
|
+
class FetchedPartition
|
19
|
+
attr_reader :partition, :error_code
|
20
|
+
attr_reader :highwater_mark_offset, :messages
|
21
|
+
|
22
|
+
def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
|
23
|
+
@partition = partition
|
24
|
+
@error_code = error_code
|
25
|
+
@highwater_mark_offset = highwater_mark_offset
|
26
|
+
@messages = messages
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class FetchedTopic
|
31
|
+
attr_reader :name, :partitions
|
32
|
+
|
33
|
+
def initialize(name:, partitions:)
|
34
|
+
@name = name
|
35
|
+
@partitions = partitions
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :topics
|
40
|
+
|
41
|
+
def initialize(topics: [])
|
42
|
+
@topics = topics
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.decode(decoder)
|
46
|
+
topics = decoder.array do
|
47
|
+
topic_name = decoder.string
|
48
|
+
|
49
|
+
partitions = decoder.array do
|
50
|
+
partition = decoder.int32
|
51
|
+
error_code = decoder.int16
|
52
|
+
highwater_mark_offset = decoder.int64
|
53
|
+
|
54
|
+
message_set_decoder = Decoder.from_string(decoder.bytes)
|
55
|
+
message_set = MessageSet.decode(message_set_decoder)
|
56
|
+
|
57
|
+
FetchedPartition.new(
|
58
|
+
partition: partition,
|
59
|
+
error_code: error_code,
|
60
|
+
highwater_mark_offset: highwater_mark_offset,
|
61
|
+
messages: message_set.messages,
|
62
|
+
)
|
63
|
+
end
|
64
|
+
|
65
|
+
FetchedTopic.new(
|
66
|
+
name: topic_name,
|
67
|
+
partitions: partitions,
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
new(topics: topics)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
# A request to list the available offsets for a set of topics/partitions.
|
4
|
+
#
|
5
|
+
# ## API Specification
|
6
|
+
#
|
7
|
+
# OffsetRequest => ReplicaId [TopicName [Partition Time MaxNumberOfOffsets]]
|
8
|
+
# ReplicaId => int32
|
9
|
+
# TopicName => string
|
10
|
+
# Partition => int32
|
11
|
+
# Time => int64
|
12
|
+
# MaxNumberOfOffsets => int32
|
13
|
+
#
|
14
|
+
class ListOffsetRequest
|
15
|
+
|
16
|
+
# @param topics [Hash]
|
17
|
+
def initialize(topics:)
|
18
|
+
@replica_id = REPLICA_ID
|
19
|
+
@topics = topics
|
20
|
+
end
|
21
|
+
|
22
|
+
def api_key
|
23
|
+
2
|
24
|
+
end
|
25
|
+
|
26
|
+
def encode(encoder)
|
27
|
+
encoder.write_int32(@replica_id)
|
28
|
+
|
29
|
+
encoder.write_array(@topics) do |topic, partitions|
|
30
|
+
encoder.write_string(topic)
|
31
|
+
|
32
|
+
encoder.write_array(partitions) do |partition|
|
33
|
+
encoder.write_int32(partition.fetch(:partition))
|
34
|
+
encoder.write_int64(partition.fetch(:time))
|
35
|
+
encoder.write_int32(partition.fetch(:max_offsets))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module Kafka
|
2
|
+
module Protocol
|
3
|
+
|
4
|
+
# A response to a list offset request.
|
5
|
+
#
|
6
|
+
# ## API Specification
|
7
|
+
#
|
8
|
+
# OffsetResponse => [TopicName [PartitionOffsets]]
|
9
|
+
# PartitionOffsets => Partition ErrorCode [Offset]
|
10
|
+
# Partition => int32
|
11
|
+
# ErrorCode => int16
|
12
|
+
# Offset => int64
|
13
|
+
#
|
14
|
+
class ListOffsetResponse
|
15
|
+
class TopicOffsetInfo
|
16
|
+
attr_reader :name, :partition_offsets
|
17
|
+
|
18
|
+
def initialize(name:, partition_offsets:)
|
19
|
+
@name = name
|
20
|
+
@partition_offsets = partition_offsets
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
class PartitionOffsetInfo
|
25
|
+
attr_reader :partition, :error_code, :offsets
|
26
|
+
|
27
|
+
def initialize(partition:, error_code:, offsets:)
|
28
|
+
@partition = partition
|
29
|
+
@error_code = error_code
|
30
|
+
@offsets = offsets
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
attr_reader :topics
|
35
|
+
|
36
|
+
def initialize(topics:)
|
37
|
+
@topics = topics
|
38
|
+
end
|
39
|
+
|
40
|
+
def offset_for(topic, partition)
|
41
|
+
topic_info = @topics.find {|t| t.name == topic }
|
42
|
+
|
43
|
+
if topic_info.nil?
|
44
|
+
raise UnknownTopicOrPartition, "Unknown topic #{topic}"
|
45
|
+
end
|
46
|
+
|
47
|
+
partition_info = topic_info
|
48
|
+
.partition_offsets
|
49
|
+
.find {|p| p.partition == partition }
|
50
|
+
|
51
|
+
if partition_info.nil?
|
52
|
+
raise UnknownTopicOrPartition, "Unknown partition #{topic}/#{partition}"
|
53
|
+
end
|
54
|
+
|
55
|
+
Protocol.handle_error(partition_info.error_code)
|
56
|
+
|
57
|
+
partition_info.offsets.first
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.decode(decoder)
|
61
|
+
topics = decoder.array do
|
62
|
+
name = decoder.string
|
63
|
+
|
64
|
+
partition_offsets = decoder.array do
|
65
|
+
PartitionOffsetInfo.new(
|
66
|
+
partition: decoder.int32,
|
67
|
+
error_code: decoder.int16,
|
68
|
+
offsets: decoder.array { decoder.int64 },
|
69
|
+
)
|
70
|
+
end
|
71
|
+
|
72
|
+
TopicOffsetInfo.new(
|
73
|
+
name: name,
|
74
|
+
partition_offsets: partition_offsets
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
new(topics: topics)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|