ruby-kafka 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  require "kafka/partitioner"
2
2
  require "kafka/message_buffer"
3
- require "kafka/protocol/message"
4
- require "kafka/transmission"
3
+ require "kafka/produce_operation"
4
+ require "kafka/pending_message"
5
5
 
6
6
  module Kafka
7
7
 
@@ -17,7 +17,7 @@ module Kafka
17
17
  # producer = kafka.get_producer
18
18
  #
19
19
  # This is done in order to share a logger as well as a pool of broker connections across
20
- # different producers. This also means that you don't need to pass the `broker_pool` and
20
+ # different producers. This also means that you don't need to pass the `cluster` and
21
21
  # `logger` options to `#get_producer`. See {#initialize} for the list of other options
22
22
  # you can pass in.
23
23
  #
@@ -86,8 +86,7 @@ module Kafka
86
86
 
87
87
  # Initializes a new Producer.
88
88
  #
89
- # @param broker_pool [BrokerPool] the broker pool representing the cluster.
90
- # Typically passed in for you.
89
+ # @param cluster [Cluster] the cluster client. Typically passed in for you.
91
90
  #
92
91
  # @param logger [Logger] the logger that should be used. Typically passed
93
92
  # in for you.
@@ -107,15 +106,20 @@ module Kafka
107
106
  # @param max_buffer_size [Integer] the number of messages allowed in the buffer
108
107
  # before new writes will raise BufferOverflow exceptions.
109
108
  #
110
- def initialize(broker_pool:, logger:, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
111
- @broker_pool = broker_pool
109
+ def initialize(cluster:, logger:, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
110
+ @cluster = cluster
112
111
  @logger = logger
113
112
  @required_acks = required_acks
114
113
  @ack_timeout = ack_timeout
115
114
  @max_retries = max_retries
116
115
  @retry_backoff = retry_backoff
117
116
  @max_buffer_size = max_buffer_size
117
+
118
+ # A buffer organized by topic/partition.
118
119
  @buffer = MessageBuffer.new
120
+
121
+ # Messages added by `#produce` but not yet assigned a partition.
122
+ @pending_messages = []
119
123
  end
120
124
 
121
125
  # Produces a message to the specified topic. Note that messages are buffered in
@@ -152,18 +156,15 @@ module Kafka
152
156
  raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
153
157
  end
154
158
 
155
- if partition.nil?
156
- # If no explicit partition key is specified we use the message key instead.
157
- partition_key ||= key
158
- partitioner = Partitioner.new(@broker_pool.partitions_for(topic))
159
- partition = partitioner.partition_for_key(partition_key)
160
- end
161
-
162
- message = Protocol::Message.new(key: key, value: value)
163
-
164
- @buffer.write(message, topic: topic, partition: partition)
159
+ @pending_messages << PendingMessage.new(
160
+ value: value,
161
+ key: key,
162
+ topic: topic,
163
+ partition: partition,
164
+ partition_key: partition_key,
165
+ )
165
166
 
166
- partition
167
+ nil
167
168
  end
168
169
 
169
170
  # Sends all buffered messages to the Kafka brokers.
@@ -178,8 +179,12 @@ module Kafka
178
179
  def send_messages
179
180
  attempt = 0
180
181
 
181
- transmission = Transmission.new(
182
- broker_pool: @broker_pool,
182
+ # Make sure we get metadata for this topic.
183
+ target_topics = @pending_messages.map(&:topic).uniq
184
+ @cluster.add_target_topics(target_topics)
185
+
186
+ operation = ProduceOperation.new(
187
+ cluster: @cluster,
183
188
  buffer: @buffer,
184
189
  required_acks: @required_acks,
185
190
  ack_timeout: @ack_timeout,
@@ -187,21 +192,21 @@ module Kafka
187
192
  )
188
193
 
189
194
  loop do
190
- @logger.info "Sending #{@buffer.size} messages"
191
-
192
195
  attempt += 1
193
- transmission.send_messages
194
196
 
195
- if @buffer.empty?
196
- @logger.info "Successfully transmitted all messages"
197
+ @cluster.refresh_metadata_if_necessary!
198
+
199
+ assign_partitions!
200
+ operation.execute
201
+
202
+ if @pending_messages.empty? && @buffer.empty?
197
203
  break
198
204
  elsif attempt <= @max_retries
199
- @logger.warn "Failed to transmit all messages, retry #{attempt} of #{@max_retries}"
200
- @logger.info "Waiting #{@retry_backoff}s before retrying"
205
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
201
206
 
202
207
  sleep @retry_backoff
203
208
  else
204
- @logger.error "Failed to transmit all messages; keeping remaining messages in buffer"
209
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
205
210
  break
206
211
  end
207
212
  end
@@ -224,14 +229,43 @@ module Kafka
224
229
  #
225
230
  # @return [Integer] buffer size.
226
231
  def buffer_size
227
- @buffer.size
232
+ @pending_messages.size + @buffer.size
228
233
  end
229
234
 
230
235
  # Closes all connections to the brokers.
231
236
  #
232
237
  # @return [nil]
233
238
  def shutdown
234
- @broker_pool.shutdown
239
+ @cluster.disconnect
240
+ end
241
+
242
+ private
243
+
244
+ def assign_partitions!
245
+ until @pending_messages.empty?
246
+ # We want to keep the message in the first-stage buffer in case there's an error.
247
+ message = @pending_messages.first
248
+
249
+ partition = message.partition
250
+
251
+ if partition.nil?
252
+ partition_count = @cluster.partitions_for(message.topic).count
253
+ partition = Partitioner.partition_for_key(partition_count, message)
254
+ end
255
+
256
+ @buffer.write(
257
+ value: message.value,
258
+ key: message.key,
259
+ topic: message.topic,
260
+ partition: partition,
261
+ )
262
+
263
+ # Now it's safe to remove the message from the first-stage buffer.
264
+ @pending_messages.shift
265
+ end
266
+ rescue Kafka::Error => e
267
+ @logger.error "Failed to assign pending message to a partition: #{e}"
268
+ @cluster.mark_as_stale!
235
269
  end
236
270
  end
237
271
  end
@@ -1,7 +1,12 @@
1
1
  module Kafka
2
2
  module Protocol
3
+ # The replica id of non-brokers is always -1.
4
+ REPLICA_ID = -1
5
+
3
6
  APIS = {
4
7
  0 => :produce,
8
+ 1 => :fetch,
9
+ 2 => :list_offset,
5
10
  3 => :topic_metadata,
6
11
  }
7
12
 
@@ -45,3 +50,7 @@ require "kafka/protocol/topic_metadata_request"
45
50
  require "kafka/protocol/metadata_response"
46
51
  require "kafka/protocol/produce_request"
47
52
  require "kafka/protocol/produce_response"
53
+ require "kafka/protocol/fetch_request"
54
+ require "kafka/protocol/fetch_response"
55
+ require "kafka/protocol/list_offset_request"
56
+ require "kafka/protocol/list_offset_response"
@@ -5,6 +5,9 @@ module Kafka
5
5
  # from it. The Kafka protocol is not self-describing, so a client must call
6
6
  # these methods in just the right order for things to work.
7
7
  class Decoder
8
+ def self.from_string(str)
9
+ new(StringIO.new(str))
10
+ end
8
11
 
9
12
  # Initializes a new decoder.
10
13
  #
@@ -13,6 +16,10 @@ module Kafka
13
16
  @io = io
14
17
  end
15
18
 
19
+ def eof?
20
+ @io.eof?
21
+ end
22
+
16
23
  # Decodes an 8-bit integer from the IO object.
17
24
  #
18
25
  # @return [Integer]
@@ -0,0 +1,53 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ # A request to fetch messages from a given partition.
5
+ #
6
+ # ## API Specification
7
+ #
8
+ # FetchRequest => ReplicaId MaxWaitTime MinBytes [TopicName [Partition FetchOffset MaxBytes]]
9
+ # ReplicaId => int32
10
+ # MaxWaitTime => int32
11
+ # MinBytes => int32
12
+ # TopicName => string
13
+ # Partition => int32
14
+ # FetchOffset => int64
15
+ # MaxBytes => int32
16
+ #
17
+ class FetchRequest
18
+
19
+ # @param max_wait_time [Integer]
20
+ # @param min_bytes [Integer]
21
+ # @param topics [Hash]
22
+ def initialize(max_wait_time:, min_bytes:, topics:)
23
+ @replica_id = REPLICA_ID
24
+ @max_wait_time = max_wait_time
25
+ @min_bytes = min_bytes
26
+ @topics = topics
27
+ end
28
+
29
+ def api_key
30
+ 1
31
+ end
32
+
33
+ def encode(encoder)
34
+ encoder.write_int32(@replica_id)
35
+ encoder.write_int32(@max_wait_time)
36
+ encoder.write_int32(@min_bytes)
37
+
38
+ encoder.write_array(@topics) do |topic, partitions|
39
+ encoder.write_string(topic)
40
+
41
+ encoder.write_array(partitions) do |partition, config|
42
+ fetch_offset = config.fetch(:fetch_offset)
43
+ max_bytes = config.fetch(:max_bytes)
44
+
45
+ encoder.write_int32(partition)
46
+ encoder.write_int64(fetch_offset)
47
+ encoder.write_int32(max_bytes)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,75 @@
1
+ require "kafka/protocol/message_set"
2
+
3
+ module Kafka
4
+ module Protocol
5
+
6
+ # A response to a fetch request.
7
+ #
8
+ # ## API Specification
9
+ #
10
+ # FetchResponse => [TopicName [Partition ErrorCode HighwaterMarkOffset MessageSetSize MessageSet]]
11
+ # TopicName => string
12
+ # Partition => int32
13
+ # ErrorCode => int16
14
+ # HighwaterMarkOffset => int64
15
+ # MessageSetSize => int32
16
+ #
17
+ class FetchResponse
18
+ class FetchedPartition
19
+ attr_reader :partition, :error_code
20
+ attr_reader :highwater_mark_offset, :messages
21
+
22
+ def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
23
+ @partition = partition
24
+ @error_code = error_code
25
+ @highwater_mark_offset = highwater_mark_offset
26
+ @messages = messages
27
+ end
28
+ end
29
+
30
+ class FetchedTopic
31
+ attr_reader :name, :partitions
32
+
33
+ def initialize(name:, partitions:)
34
+ @name = name
35
+ @partitions = partitions
36
+ end
37
+ end
38
+
39
+ attr_reader :topics
40
+
41
+ def initialize(topics: [])
42
+ @topics = topics
43
+ end
44
+
45
+ def self.decode(decoder)
46
+ topics = decoder.array do
47
+ topic_name = decoder.string
48
+
49
+ partitions = decoder.array do
50
+ partition = decoder.int32
51
+ error_code = decoder.int16
52
+ highwater_mark_offset = decoder.int64
53
+
54
+ message_set_decoder = Decoder.from_string(decoder.bytes)
55
+ message_set = MessageSet.decode(message_set_decoder)
56
+
57
+ FetchedPartition.new(
58
+ partition: partition,
59
+ error_code: error_code,
60
+ highwater_mark_offset: highwater_mark_offset,
61
+ messages: message_set.messages,
62
+ )
63
+ end
64
+
65
+ FetchedTopic.new(
66
+ name: topic_name,
67
+ partitions: partitions,
68
+ )
69
+ end
70
+
71
+ new(topics: topics)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,41 @@
1
+ module Kafka
2
+ module Protocol
3
+ # A request to list the available offsets for a set of topics/partitions.
4
+ #
5
+ # ## API Specification
6
+ #
7
+ # OffsetRequest => ReplicaId [TopicName [Partition Time MaxNumberOfOffsets]]
8
+ # ReplicaId => int32
9
+ # TopicName => string
10
+ # Partition => int32
11
+ # Time => int64
12
+ # MaxNumberOfOffsets => int32
13
+ #
14
+ class ListOffsetRequest
15
+
16
+ # @param topics [Hash]
17
+ def initialize(topics:)
18
+ @replica_id = REPLICA_ID
19
+ @topics = topics
20
+ end
21
+
22
+ def api_key
23
+ 2
24
+ end
25
+
26
+ def encode(encoder)
27
+ encoder.write_int32(@replica_id)
28
+
29
+ encoder.write_array(@topics) do |topic, partitions|
30
+ encoder.write_string(topic)
31
+
32
+ encoder.write_array(partitions) do |partition|
33
+ encoder.write_int32(partition.fetch(:partition))
34
+ encoder.write_int64(partition.fetch(:time))
35
+ encoder.write_int32(partition.fetch(:max_offsets))
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,82 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ # A response to a list offset request.
5
+ #
6
+ # ## API Specification
7
+ #
8
+ # OffsetResponse => [TopicName [PartitionOffsets]]
9
+ # PartitionOffsets => Partition ErrorCode [Offset]
10
+ # Partition => int32
11
+ # ErrorCode => int16
12
+ # Offset => int64
13
+ #
14
+ class ListOffsetResponse
15
+ class TopicOffsetInfo
16
+ attr_reader :name, :partition_offsets
17
+
18
+ def initialize(name:, partition_offsets:)
19
+ @name = name
20
+ @partition_offsets = partition_offsets
21
+ end
22
+ end
23
+
24
+ class PartitionOffsetInfo
25
+ attr_reader :partition, :error_code, :offsets
26
+
27
+ def initialize(partition:, error_code:, offsets:)
28
+ @partition = partition
29
+ @error_code = error_code
30
+ @offsets = offsets
31
+ end
32
+ end
33
+
34
+ attr_reader :topics
35
+
36
+ def initialize(topics:)
37
+ @topics = topics
38
+ end
39
+
40
+ def offset_for(topic, partition)
41
+ topic_info = @topics.find {|t| t.name == topic }
42
+
43
+ if topic_info.nil?
44
+ raise UnknownTopicOrPartition, "Unknown topic #{topic}"
45
+ end
46
+
47
+ partition_info = topic_info
48
+ .partition_offsets
49
+ .find {|p| p.partition == partition }
50
+
51
+ if partition_info.nil?
52
+ raise UnknownTopicOrPartition, "Unknown partition #{topic}/#{partition}"
53
+ end
54
+
55
+ Protocol.handle_error(partition_info.error_code)
56
+
57
+ partition_info.offsets.first
58
+ end
59
+
60
+ def self.decode(decoder)
61
+ topics = decoder.array do
62
+ name = decoder.string
63
+
64
+ partition_offsets = decoder.array do
65
+ PartitionOffsetInfo.new(
66
+ partition: decoder.int32,
67
+ error_code: decoder.int16,
68
+ offsets: decoder.array { decoder.int64 },
69
+ )
70
+ end
71
+
72
+ TopicOffsetInfo.new(
73
+ name: name,
74
+ partition_offsets: partition_offsets
75
+ )
76
+ end
77
+
78
+ new(topics: topics)
79
+ end
80
+ end
81
+ end
82
+ end