ruby-kafka 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,7 @@
1
1
  require "kafka/partitioner"
2
2
  require "kafka/message_buffer"
3
- require "kafka/protocol/message"
4
- require "kafka/transmission"
3
+ require "kafka/produce_operation"
4
+ require "kafka/pending_message"
5
5
 
6
6
  module Kafka
7
7
 
@@ -17,7 +17,7 @@ module Kafka
17
17
  # producer = kafka.get_producer
18
18
  #
19
19
  # This is done in order to share a logger as well as a pool of broker connections across
20
- # different producers. This also means that you don't need to pass the `broker_pool` and
20
+ # different producers. This also means that you don't need to pass the `cluster` and
21
21
  # `logger` options to `#get_producer`. See {#initialize} for the list of other options
22
22
  # you can pass in.
23
23
  #
@@ -86,8 +86,7 @@ module Kafka
86
86
 
87
87
  # Initializes a new Producer.
88
88
  #
89
- # @param broker_pool [BrokerPool] the broker pool representing the cluster.
90
- # Typically passed in for you.
89
+ # @param cluster [Cluster] the cluster client. Typically passed in for you.
91
90
  #
92
91
  # @param logger [Logger] the logger that should be used. Typically passed
93
92
  # in for you.
@@ -107,15 +106,20 @@ module Kafka
107
106
  # @param max_buffer_size [Integer] the number of messages allowed in the buffer
108
107
  # before new writes will raise BufferOverflow exceptions.
109
108
  #
110
- def initialize(broker_pool:, logger:, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
111
- @broker_pool = broker_pool
109
+ def initialize(cluster:, logger:, ack_timeout: 5, required_acks: 1, max_retries: 2, retry_backoff: 1, max_buffer_size: 1000)
110
+ @cluster = cluster
112
111
  @logger = logger
113
112
  @required_acks = required_acks
114
113
  @ack_timeout = ack_timeout
115
114
  @max_retries = max_retries
116
115
  @retry_backoff = retry_backoff
117
116
  @max_buffer_size = max_buffer_size
117
+
118
+ # A buffer organized by topic/partition.
118
119
  @buffer = MessageBuffer.new
120
+
121
+ # Messages added by `#produce` but not yet assigned a partition.
122
+ @pending_messages = []
119
123
  end
120
124
 
121
125
  # Produces a message to the specified topic. Note that messages are buffered in
@@ -152,18 +156,15 @@ module Kafka
152
156
  raise BufferOverflow, "Max buffer size #{@max_buffer_size} exceeded"
153
157
  end
154
158
 
155
- if partition.nil?
156
- # If no explicit partition key is specified we use the message key instead.
157
- partition_key ||= key
158
- partitioner = Partitioner.new(@broker_pool.partitions_for(topic))
159
- partition = partitioner.partition_for_key(partition_key)
160
- end
161
-
162
- message = Protocol::Message.new(key: key, value: value)
163
-
164
- @buffer.write(message, topic: topic, partition: partition)
159
+ @pending_messages << PendingMessage.new(
160
+ value: value,
161
+ key: key,
162
+ topic: topic,
163
+ partition: partition,
164
+ partition_key: partition_key,
165
+ )
165
166
 
166
- partition
167
+ nil
167
168
  end
168
169
 
169
170
  # Sends all buffered messages to the Kafka brokers.
@@ -178,8 +179,12 @@ module Kafka
178
179
  def send_messages
179
180
  attempt = 0
180
181
 
181
- transmission = Transmission.new(
182
- broker_pool: @broker_pool,
182
+ # Make sure we get metadata for this topic.
183
+ target_topics = @pending_messages.map(&:topic).uniq
184
+ @cluster.add_target_topics(target_topics)
185
+
186
+ operation = ProduceOperation.new(
187
+ cluster: @cluster,
183
188
  buffer: @buffer,
184
189
  required_acks: @required_acks,
185
190
  ack_timeout: @ack_timeout,
@@ -187,21 +192,21 @@ module Kafka
187
192
  )
188
193
 
189
194
  loop do
190
- @logger.info "Sending #{@buffer.size} messages"
191
-
192
195
  attempt += 1
193
- transmission.send_messages
194
196
 
195
- if @buffer.empty?
196
- @logger.info "Successfully transmitted all messages"
197
+ @cluster.refresh_metadata_if_necessary!
198
+
199
+ assign_partitions!
200
+ operation.execute
201
+
202
+ if @pending_messages.empty? && @buffer.empty?
197
203
  break
198
204
  elsif attempt <= @max_retries
199
- @logger.warn "Failed to transmit all messages, retry #{attempt} of #{@max_retries}"
200
- @logger.info "Waiting #{@retry_backoff}s before retrying"
205
+ @logger.warn "Failed to send all messages; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
201
206
 
202
207
  sleep @retry_backoff
203
208
  else
204
- @logger.error "Failed to transmit all messages; keeping remaining messages in buffer"
209
+ @logger.error "Failed to send all messages; keeping remaining messages in buffer"
205
210
  break
206
211
  end
207
212
  end
@@ -224,14 +229,43 @@ module Kafka
224
229
  #
225
230
  # @return [Integer] buffer size.
226
231
  def buffer_size
227
- @buffer.size
232
+ @pending_messages.size + @buffer.size
228
233
  end
229
234
 
230
235
  # Closes all connections to the brokers.
231
236
  #
232
237
  # @return [nil]
233
238
  def shutdown
234
- @broker_pool.shutdown
239
+ @cluster.disconnect
240
+ end
241
+
242
+ private
243
+
244
+ def assign_partitions!
245
+ until @pending_messages.empty?
246
+ # We want to keep the message in the first-stage buffer in case there's an error.
247
+ message = @pending_messages.first
248
+
249
+ partition = message.partition
250
+
251
+ if partition.nil?
252
+ partition_count = @cluster.partitions_for(message.topic).count
253
+ partition = Partitioner.partition_for_key(partition_count, message)
254
+ end
255
+
256
+ @buffer.write(
257
+ value: message.value,
258
+ key: message.key,
259
+ topic: message.topic,
260
+ partition: partition,
261
+ )
262
+
263
+ # Now it's safe to remove the message from the first-stage buffer.
264
+ @pending_messages.shift
265
+ end
266
+ rescue Kafka::Error => e
267
+ @logger.error "Failed to assign pending message to a partition: #{e}"
268
+ @cluster.mark_as_stale!
235
269
  end
236
270
  end
237
271
  end
@@ -1,7 +1,12 @@
1
1
  module Kafka
2
2
  module Protocol
3
+ # The replica id of non-brokers is always -1.
4
+ REPLICA_ID = -1
5
+
3
6
  APIS = {
4
7
  0 => :produce,
8
+ 1 => :fetch,
9
+ 2 => :list_offset,
5
10
  3 => :topic_metadata,
6
11
  }
7
12
 
@@ -45,3 +50,7 @@ require "kafka/protocol/topic_metadata_request"
45
50
  require "kafka/protocol/metadata_response"
46
51
  require "kafka/protocol/produce_request"
47
52
  require "kafka/protocol/produce_response"
53
+ require "kafka/protocol/fetch_request"
54
+ require "kafka/protocol/fetch_response"
55
+ require "kafka/protocol/list_offset_request"
56
+ require "kafka/protocol/list_offset_response"
@@ -5,6 +5,9 @@ module Kafka
5
5
  # from it. The Kafka protocol is not self-describing, so a client must call
6
6
  # these methods in just the right order for things to work.
7
7
  class Decoder
8
+ def self.from_string(str)
9
+ new(StringIO.new(str))
10
+ end
8
11
 
9
12
  # Initializes a new decoder.
10
13
  #
@@ -13,6 +16,10 @@ module Kafka
13
16
  @io = io
14
17
  end
15
18
 
19
+ def eof?
20
+ @io.eof?
21
+ end
22
+
16
23
  # Decodes an 8-bit integer from the IO object.
17
24
  #
18
25
  # @return [Integer]
@@ -0,0 +1,53 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ # A request to fetch messages from a given partition.
5
+ #
6
+ # ## API Specification
7
+ #
8
+ # FetchRequest => ReplicaId MaxWaitTime MinBytes [TopicName [Partition FetchOffset MaxBytes]]
9
+ # ReplicaId => int32
10
+ # MaxWaitTime => int32
11
+ # MinBytes => int32
12
+ # TopicName => string
13
+ # Partition => int32
14
+ # FetchOffset => int64
15
+ # MaxBytes => int32
16
+ #
17
+ class FetchRequest
18
+
19
+ # @param max_wait_time [Integer]
20
+ # @param min_bytes [Integer]
21
+ # @param topics [Hash]
22
+ def initialize(max_wait_time:, min_bytes:, topics:)
23
+ @replica_id = REPLICA_ID
24
+ @max_wait_time = max_wait_time
25
+ @min_bytes = min_bytes
26
+ @topics = topics
27
+ end
28
+
29
+ def api_key
30
+ 1
31
+ end
32
+
33
+ def encode(encoder)
34
+ encoder.write_int32(@replica_id)
35
+ encoder.write_int32(@max_wait_time)
36
+ encoder.write_int32(@min_bytes)
37
+
38
+ encoder.write_array(@topics) do |topic, partitions|
39
+ encoder.write_string(topic)
40
+
41
+ encoder.write_array(partitions) do |partition, config|
42
+ fetch_offset = config.fetch(:fetch_offset)
43
+ max_bytes = config.fetch(:max_bytes)
44
+
45
+ encoder.write_int32(partition)
46
+ encoder.write_int64(fetch_offset)
47
+ encoder.write_int32(max_bytes)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,75 @@
1
+ require "kafka/protocol/message_set"
2
+
3
+ module Kafka
4
+ module Protocol
5
+
6
+ # A response to a fetch request.
7
+ #
8
+ # ## API Specification
9
+ #
10
+ # FetchResponse => [TopicName [Partition ErrorCode HighwaterMarkOffset MessageSetSize MessageSet]]
11
+ # TopicName => string
12
+ # Partition => int32
13
+ # ErrorCode => int16
14
+ # HighwaterMarkOffset => int64
15
+ # MessageSetSize => int32
16
+ #
17
+ class FetchResponse
18
+ class FetchedPartition
19
+ attr_reader :partition, :error_code
20
+ attr_reader :highwater_mark_offset, :messages
21
+
22
+ def initialize(partition:, error_code:, highwater_mark_offset:, messages:)
23
+ @partition = partition
24
+ @error_code = error_code
25
+ @highwater_mark_offset = highwater_mark_offset
26
+ @messages = messages
27
+ end
28
+ end
29
+
30
+ class FetchedTopic
31
+ attr_reader :name, :partitions
32
+
33
+ def initialize(name:, partitions:)
34
+ @name = name
35
+ @partitions = partitions
36
+ end
37
+ end
38
+
39
+ attr_reader :topics
40
+
41
+ def initialize(topics: [])
42
+ @topics = topics
43
+ end
44
+
45
+ def self.decode(decoder)
46
+ topics = decoder.array do
47
+ topic_name = decoder.string
48
+
49
+ partitions = decoder.array do
50
+ partition = decoder.int32
51
+ error_code = decoder.int16
52
+ highwater_mark_offset = decoder.int64
53
+
54
+ message_set_decoder = Decoder.from_string(decoder.bytes)
55
+ message_set = MessageSet.decode(message_set_decoder)
56
+
57
+ FetchedPartition.new(
58
+ partition: partition,
59
+ error_code: error_code,
60
+ highwater_mark_offset: highwater_mark_offset,
61
+ messages: message_set.messages,
62
+ )
63
+ end
64
+
65
+ FetchedTopic.new(
66
+ name: topic_name,
67
+ partitions: partitions,
68
+ )
69
+ end
70
+
71
+ new(topics: topics)
72
+ end
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,41 @@
1
+ module Kafka
2
+ module Protocol
3
+ # A request to list the available offsets for a set of topics/partitions.
4
+ #
5
+ # ## API Specification
6
+ #
7
+ # OffsetRequest => ReplicaId [TopicName [Partition Time MaxNumberOfOffsets]]
8
+ # ReplicaId => int32
9
+ # TopicName => string
10
+ # Partition => int32
11
+ # Time => int64
12
+ # MaxNumberOfOffsets => int32
13
+ #
14
+ class ListOffsetRequest
15
+
16
+ # @param topics [Hash]
17
+ def initialize(topics:)
18
+ @replica_id = REPLICA_ID
19
+ @topics = topics
20
+ end
21
+
22
+ def api_key
23
+ 2
24
+ end
25
+
26
+ def encode(encoder)
27
+ encoder.write_int32(@replica_id)
28
+
29
+ encoder.write_array(@topics) do |topic, partitions|
30
+ encoder.write_string(topic)
31
+
32
+ encoder.write_array(partitions) do |partition|
33
+ encoder.write_int32(partition.fetch(:partition))
34
+ encoder.write_int64(partition.fetch(:time))
35
+ encoder.write_int32(partition.fetch(:max_offsets))
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,82 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ # A response to a list offset request.
5
+ #
6
+ # ## API Specification
7
+ #
8
+ # OffsetResponse => [TopicName [PartitionOffsets]]
9
+ # PartitionOffsets => Partition ErrorCode [Offset]
10
+ # Partition => int32
11
+ # ErrorCode => int16
12
+ # Offset => int64
13
+ #
14
+ class ListOffsetResponse
15
+ class TopicOffsetInfo
16
+ attr_reader :name, :partition_offsets
17
+
18
+ def initialize(name:, partition_offsets:)
19
+ @name = name
20
+ @partition_offsets = partition_offsets
21
+ end
22
+ end
23
+
24
+ class PartitionOffsetInfo
25
+ attr_reader :partition, :error_code, :offsets
26
+
27
+ def initialize(partition:, error_code:, offsets:)
28
+ @partition = partition
29
+ @error_code = error_code
30
+ @offsets = offsets
31
+ end
32
+ end
33
+
34
+ attr_reader :topics
35
+
36
+ def initialize(topics:)
37
+ @topics = topics
38
+ end
39
+
40
+ def offset_for(topic, partition)
41
+ topic_info = @topics.find {|t| t.name == topic }
42
+
43
+ if topic_info.nil?
44
+ raise UnknownTopicOrPartition, "Unknown topic #{topic}"
45
+ end
46
+
47
+ partition_info = topic_info
48
+ .partition_offsets
49
+ .find {|p| p.partition == partition }
50
+
51
+ if partition_info.nil?
52
+ raise UnknownTopicOrPartition, "Unknown partition #{topic}/#{partition}"
53
+ end
54
+
55
+ Protocol.handle_error(partition_info.error_code)
56
+
57
+ partition_info.offsets.first
58
+ end
59
+
60
+ def self.decode(decoder)
61
+ topics = decoder.array do
62
+ name = decoder.string
63
+
64
+ partition_offsets = decoder.array do
65
+ PartitionOffsetInfo.new(
66
+ partition: decoder.int32,
67
+ error_code: decoder.int16,
68
+ offsets: decoder.array { decoder.int64 },
69
+ )
70
+ end
71
+
72
+ TopicOffsetInfo.new(
73
+ name: name,
74
+ partition_offsets: partition_offsets
75
+ )
76
+ end
77
+
78
+ new(topics: topics)
79
+ end
80
+ end
81
+ end
82
+ end