ruby-kafka 0.5.0 → 0.5.1.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/kafka/cluster.rb CHANGED
@@ -1,5 +1,5 @@
1
- require "set"
2
1
  require "kafka/broker_pool"
2
+ require "set"
3
3
 
4
4
  module Kafka
5
5
 
@@ -49,6 +49,21 @@ module Kafka
49
49
  end
50
50
  end
51
51
 
52
+ def api_info(api_key)
53
+ apis.find {|api| api.api_key == api_key }
54
+ end
55
+
56
+ def apis
57
+ @apis ||=
58
+ begin
59
+ response = random_broker.api_versions
60
+
61
+ Protocol.handle_error(response.error_code)
62
+
63
+ response.apis
64
+ end
65
+ end
66
+
52
67
  # Clears the list of target topics.
53
68
  #
54
69
  # @see #add_target_topics
@@ -130,6 +145,41 @@ module Kafka
130
145
  raise
131
146
  end
132
147
 
148
+ def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30)
149
+ options = {
150
+ topics: {
151
+ name => {
152
+ num_partitions: num_partitions,
153
+ replication_factor: replication_factor,
154
+ }
155
+ },
156
+ timeout: timeout,
157
+ }
158
+
159
+ broker = controller_broker
160
+
161
+ @logger.info "Creating topic `#{name}` using controller broker #{broker}"
162
+
163
+ response = broker.create_topics(**options)
164
+
165
+ response.errors.each do |topic, error_code|
166
+ Protocol.handle_error(error_code)
167
+ end
168
+
169
+ begin
170
+ partitions_for(name).each do |info|
171
+ Protocol.handle_error(info.partition_error_code)
172
+ end
173
+ rescue Kafka::LeaderNotAvailable
174
+ @logger.warn "Leader not yet available for `#{name}`, waiting 1s..."
175
+ sleep 1
176
+
177
+ retry
178
+ end
179
+
180
+ @logger.info "Topic `#{name}` was created"
181
+ end
182
+
133
183
  def resolve_offsets(topic, partitions, offset)
134
184
  add_target_topics([topic])
135
185
  refresh_metadata_if_necessary!
@@ -178,6 +228,7 @@ module Kafka
178
228
  end
179
229
 
180
230
  def topics
231
+ refresh_metadata_if_necessary!
181
232
  cluster_info.topics.map(&:topic_name)
182
233
  end
183
234
 
@@ -213,11 +264,15 @@ module Kafka
213
264
  broker = @broker_pool.connect(node.hostname, node.port)
214
265
  cluster_info = broker.fetch_metadata(topics: @target_topics)
215
266
 
216
- @stale = false
267
+ if cluster_info.brokers.empty?
268
+ @logger.error "No brokers in cluster"
269
+ else
270
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
217
271
 
218
- @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
272
+ @stale = false
219
273
 
220
- return cluster_info
274
+ return cluster_info
275
+ end
221
276
  rescue Error => e
222
277
  @logger.error "Failed to fetch metadata from #{node}: #{e}"
223
278
  errors << [node, e]
@@ -231,10 +286,19 @@ module Kafka
231
286
  raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
232
287
  end
233
288
 
289
+ def random_broker
290
+ node_id = cluster_info.brokers.sample.node_id
291
+ connect_to_broker(node_id)
292
+ end
293
+
234
294
  def connect_to_broker(broker_id)
235
295
  info = cluster_info.find_broker(broker_id)
236
296
 
237
297
  @broker_pool.connect(info.host, info.port, node_id: info.node_id)
238
298
  end
299
+
300
+ def controller_broker
301
+ connect_to_broker(cluster_info.controller_id)
302
+ end
239
303
  end
240
304
  end
@@ -48,7 +48,7 @@ module Kafka
48
48
  # broker. Default is 10 seconds.
49
49
  #
50
50
  # @return [Connection] a new connection.
51
- def initialize(host:, port:, client_id:, logger:, instrumenter:, sasl_authenticator:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
51
+ def initialize(host:, port:, client_id:, logger:, instrumenter:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
52
52
  @host, @port, @client_id = host, port, client_id
53
53
  @logger = logger
54
54
  @instrumenter = instrumenter
@@ -56,11 +56,6 @@ module Kafka
56
56
  @connect_timeout = connect_timeout || CONNECT_TIMEOUT
57
57
  @socket_timeout = socket_timeout || SOCKET_TIMEOUT
58
58
  @ssl_context = ssl_context
59
- @sasl_authenticator = sasl_authenticator
60
- end
61
-
62
- def address_match?(host, port)
63
- @host == host && @port == port
64
59
  end
65
60
 
66
61
  def to_s
@@ -75,8 +70,6 @@ module Kafka
75
70
  @logger.debug "Closing socket to #{to_s}"
76
71
 
77
72
  @socket.close if @socket
78
-
79
- @socket = nil
80
73
  end
81
74
 
82
75
  # Sends a request over the connection.
@@ -86,20 +79,25 @@ module Kafka
86
79
  #
87
80
  # @return [Object] the response.
88
81
  def send_request(request)
82
+ api_name = Protocol.api_name(request.api_key)
83
+
89
84
  # Default notification payload.
90
85
  notification = {
91
86
  broker_host: @host,
92
- api: Protocol.api_name(request.api_key),
87
+ api: api_name,
93
88
  request_size: 0,
94
89
  response_size: 0,
95
90
  }
96
91
 
97
92
  @instrumenter.instrument("request.connection", notification) do
98
93
  open unless open?
99
- reopen if idle?
94
+
95
+ raise IdleConnection if idle?
100
96
 
101
97
  @correlation_id += 1
102
98
 
99
+ @logger.debug "Sending #{api_name} API request #{@correlation_id} to #{to_s}"
100
+
103
101
  write_request(request, notification)
104
102
 
105
103
  response_class = request.response_class
@@ -133,7 +131,6 @@ module Kafka
133
131
  @correlation_id = 0
134
132
 
135
133
  @last_request = nil
136
- @sasl_authenticator.authenticate!(self)
137
134
  rescue Errno::ETIMEDOUT => e
138
135
  @logger.error "Timed out while trying to connect to #{self}: #{e}"
139
136
  raise ConnectionError, e
@@ -142,11 +139,6 @@ module Kafka
142
139
  raise ConnectionError, e
143
140
  end
144
141
 
145
- def reopen
146
- close
147
- open
148
- end
149
-
150
142
  def idle?
151
143
  @last_request && @last_request < Time.now - IDLE_TIMEOUT
152
144
  end
@@ -157,8 +149,6 @@ module Kafka
157
149
  #
158
150
  # @return [nil]
159
151
  def write_request(request, notification)
160
- @logger.debug "Sending request #{@correlation_id} to #{to_s}"
161
-
162
152
  message = Kafka::Protocol::RequestMessage.new(
163
153
  api_key: request.api_key,
164
154
  api_version: request.respond_to?(:api_version) ? request.api_version : 0,
@@ -20,9 +20,10 @@ module Kafka
20
20
  logger: @logger,
21
21
  instrumenter: @instrumenter,
22
22
  ssl_context: @ssl_context,
23
- sasl_authenticator: @sasl_authenticator
24
23
  )
25
24
 
25
+ @sasl_authenticator.authenticate!(connection)
26
+
26
27
  connection
27
28
  end
28
29
 
@@ -179,6 +179,8 @@ module Kafka
179
179
  # @param min_bytes [Integer] the minimum number of bytes to read before
180
180
  # returning messages from each broker; if `max_wait_time` is reached, this
181
181
  # is ignored.
182
+ # @param max_bytes [Integer] the maximum number of bytes to read before
183
+ # returning messages from each broker.
182
184
  # @param max_wait_time [Integer, Float] the maximum duration of time to wait before
183
185
  # returning messages from each broker, in seconds.
184
186
  # @param automatically_mark_as_processed [Boolean] whether to automatically
@@ -190,10 +192,11 @@ module Kafka
190
192
  # The original exception will be returned by calling `#cause` on the
191
193
  # {Kafka::ProcessingError} instance.
192
194
  # @return [nil]
193
- def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
195
+ def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
194
196
  consumer_loop do
195
197
  batches = fetch_batches(
196
198
  min_bytes: min_bytes,
199
+ max_bytes: max_bytes,
197
200
  max_wait_time: max_wait_time,
198
201
  automatically_mark_as_processed: automatically_mark_as_processed
199
202
  )
@@ -253,6 +256,8 @@ module Kafka
253
256
  # @param min_bytes [Integer] the minimum number of bytes to read before
254
257
  # returning messages from each broker; if `max_wait_time` is reached, this
255
258
  # is ignored.
259
+ # @param max_bytes [Integer] the maximum number of bytes to read before
260
+ # returning messages from each broker.
256
261
  # @param max_wait_time [Integer, Float] the maximum duration of time to wait before
257
262
  # returning messages from each broker, in seconds.
258
263
  # @param automatically_mark_as_processed [Boolean] whether to automatically
@@ -261,10 +266,11 @@ module Kafka
261
266
  # messages can be committed to Kafka.
262
267
  # @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
263
268
  # @return [nil]
264
- def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
269
+ def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
265
270
  consumer_loop do
266
271
  batches = fetch_batches(
267
272
  min_bytes: min_bytes,
273
+ max_bytes: max_bytes,
268
274
  max_wait_time: max_wait_time,
269
275
  automatically_mark_as_processed: automatically_mark_as_processed
270
276
  )
@@ -349,6 +355,9 @@ module Kafka
349
355
  yield
350
356
  rescue HeartbeatError, OffsetCommitError
351
357
  join_group
358
+ rescue RebalanceInProgress
359
+ @logger.warn "Group rebalance in progress, re-joining..."
360
+ join_group
352
361
  rescue FetchError, NotLeaderForPartition, UnknownTopicOrPartition
353
362
  @cluster.mark_as_stale!
354
363
  rescue LeaderNotAvailable => e
@@ -370,7 +379,7 @@ module Kafka
370
379
 
371
380
  def make_final_offsets_commit!(attempts = 3)
372
381
  @offset_manager.commit_offsets
373
- rescue ConnectionError, EOFError
382
+ rescue ConnectionError, OffsetCommitError, EOFError
374
383
  # It's important to make sure final offsets commit is done
375
384
  # As otherwise messages that have been processed after last auto-commit
376
385
  # will be processed again and that may be huge amount of messages
@@ -379,6 +388,8 @@ module Kafka
379
388
  @logger.error "Retrying to make final offsets commit (#{attempts} attempts left)"
380
389
  sleep(0.1)
381
390
  make_final_offsets_commit!(attempts - 1)
391
+ rescue Kafka::Error => e
392
+ @logger.error "Encountered error while shutting down; #{e.class}: #{e.message}"
382
393
  end
383
394
 
384
395
  def join_group
@@ -400,7 +411,7 @@ module Kafka
400
411
  end
401
412
  end
402
413
 
403
- def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
414
+ def fetch_batches(min_bytes:, max_bytes:, max_wait_time:, automatically_mark_as_processed:)
404
415
  join_group unless @group.member?
405
416
 
406
417
  subscribed_partitions = @group.subscribed_partitions
@@ -411,6 +422,7 @@ module Kafka
411
422
  cluster: @cluster,
412
423
  logger: @logger,
413
424
  min_bytes: min_bytes,
425
+ max_bytes: max_bytes,
414
426
  max_wait_time: max_wait_time,
415
427
  )
416
428
 
@@ -18,10 +18,11 @@ module Kafka
18
18
  # operation.execute
19
19
  #
20
20
  class FetchOperation
21
- def initialize(cluster:, logger:, min_bytes: 1, max_wait_time: 5)
21
+ def initialize(cluster:, logger:, min_bytes: 1, max_bytes: 10485760, max_wait_time: 5)
22
22
  @cluster = cluster
23
23
  @logger = logger
24
24
  @min_bytes = min_bytes
25
+ @max_bytes = max_bytes
25
26
  @max_wait_time = max_wait_time
26
27
  @topics = {}
27
28
  end
@@ -66,6 +67,7 @@ module Kafka
66
67
  options = {
67
68
  max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
68
69
  min_bytes: @min_bytes,
70
+ max_bytes: @max_bytes,
69
71
  topics: topics,
70
72
  }
71
73
 
@@ -24,6 +24,8 @@ module Kafka
24
24
  LEAVE_GROUP_API = 13
25
25
  SYNC_GROUP_API = 14
26
26
  SASL_HANDSHAKE_API = 17
27
+ API_VERSIONS_API = 18
28
+ CREATE_TOPICS_API = 19
27
29
 
28
30
  # A mapping from numeric API keys to symbolic API names.
29
31
  APIS = {
@@ -39,6 +41,8 @@ module Kafka
39
41
  LEAVE_GROUP_API => :leave_group,
40
42
  SYNC_GROUP_API => :sync_group,
41
43
  SASL_HANDSHAKE_API => :sasl_handshake,
44
+ API_VERSIONS_API => :api_versions,
45
+ CREATE_TOPICS_API => :create_topics,
42
46
  }
43
47
 
44
48
  # A mapping from numeric error codes to exception classes.
@@ -131,5 +135,9 @@ require "kafka/protocol/offset_fetch_request"
131
135
  require "kafka/protocol/offset_fetch_response"
132
136
  require "kafka/protocol/offset_commit_request"
133
137
  require "kafka/protocol/offset_commit_response"
138
+ require "kafka/protocol/api_versions_request"
139
+ require "kafka/protocol/api_versions_response"
134
140
  require "kafka/protocol/sasl_handshake_request"
135
141
  require "kafka/protocol/sasl_handshake_response"
142
+ require "kafka/protocol/create_topics_request"
143
+ require "kafka/protocol/create_topics_response"
@@ -0,0 +1,19 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ class ApiVersionsRequest
5
+ def api_key
6
+ API_VERSIONS_API
7
+ end
8
+
9
+ def encode(encoder)
10
+ # Nothing to do.
11
+ end
12
+
13
+ def response_class
14
+ Protocol::ApiVersionsResponse
15
+ end
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,47 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ class ApiVersionsResponse
5
+ class ApiInfo
6
+ attr_reader :api_key, :min_version, :max_version
7
+
8
+ def initialize(api_key:, min_version:, max_version:)
9
+ @api_key, @min_version, @max_version = api_key, min_version, max_version
10
+ end
11
+
12
+ def api_name
13
+ Protocol.api_name(api_key)
14
+ end
15
+
16
+ def to_s
17
+ "#{api_name}=#{min_version}..#{max_version}"
18
+ end
19
+
20
+ def inspect
21
+ "#<Kafka api version #{to_s}>"
22
+ end
23
+ end
24
+
25
+ attr_reader :error_code, :apis
26
+
27
+ def initialize(error_code:, apis:)
28
+ @error_code = error_code
29
+ @apis = apis
30
+ end
31
+
32
+ def self.decode(decoder)
33
+ error_code = decoder.int16
34
+
35
+ apis = decoder.array do
36
+ ApiInfo.new(
37
+ api_key: decoder.int16,
38
+ min_version: decoder.int16,
39
+ max_version: decoder.int16,
40
+ )
41
+ end
42
+
43
+ new(error_code: error_code, apis: apis)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,40 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ class CreateTopicsRequest
5
+ def initialize(topics:, timeout:)
6
+ @topics, @timeout = topics, timeout
7
+ end
8
+
9
+ def api_key
10
+ CREATE_TOPICS_API
11
+ end
12
+
13
+ def api_version
14
+ 0
15
+ end
16
+
17
+ def response_class
18
+ Protocol::CreateTopicsResponse
19
+ end
20
+
21
+ def encode(encoder)
22
+ encoder.write_array(@topics) do |topic, config|
23
+ encoder.write_string(topic)
24
+ encoder.write_int32(config.fetch(:num_partitions))
25
+ encoder.write_int16(config.fetch(:replication_factor))
26
+
27
+ # Replica assignments. We don't care.
28
+ encoder.write_array([])
29
+
30
+ # Config entries. We don't care.
31
+ encoder.write_array([])
32
+ end
33
+
34
+ # Timeout is in ms.
35
+ encoder.write_int32(@timeout * 1000)
36
+ end
37
+ end
38
+
39
+ end
40
+ end