ruby-kafka 0.5.0 → 0.5.1.beta1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/kafka/cluster.rb CHANGED
@@ -1,5 +1,5 @@
1
- require "set"
2
1
  require "kafka/broker_pool"
2
+ require "set"
3
3
 
4
4
  module Kafka
5
5
 
@@ -49,6 +49,21 @@ module Kafka
49
49
  end
50
50
  end
51
51
 
52
+ def api_info(api_key)
53
+ apis.find {|api| api.api_key == api_key }
54
+ end
55
+
56
+ def apis
57
+ @apis ||=
58
+ begin
59
+ response = random_broker.api_versions
60
+
61
+ Protocol.handle_error(response.error_code)
62
+
63
+ response.apis
64
+ end
65
+ end
66
+
52
67
  # Clears the list of target topics.
53
68
  #
54
69
  # @see #add_target_topics
@@ -130,6 +145,41 @@ module Kafka
130
145
  raise
131
146
  end
132
147
 
148
+ def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30)
149
+ options = {
150
+ topics: {
151
+ name => {
152
+ num_partitions: num_partitions,
153
+ replication_factor: replication_factor,
154
+ }
155
+ },
156
+ timeout: timeout,
157
+ }
158
+
159
+ broker = controller_broker
160
+
161
+ @logger.info "Creating topic `#{name}` using controller broker #{broker}"
162
+
163
+ response = broker.create_topics(**options)
164
+
165
+ response.errors.each do |topic, error_code|
166
+ Protocol.handle_error(error_code)
167
+ end
168
+
169
+ begin
170
+ partitions_for(name).each do |info|
171
+ Protocol.handle_error(info.partition_error_code)
172
+ end
173
+ rescue Kafka::LeaderNotAvailable
174
+ @logger.warn "Leader not yet available for `#{name}`, waiting 1s..."
175
+ sleep 1
176
+
177
+ retry
178
+ end
179
+
180
+ @logger.info "Topic `#{name}` was created"
181
+ end
182
+
133
183
  def resolve_offsets(topic, partitions, offset)
134
184
  add_target_topics([topic])
135
185
  refresh_metadata_if_necessary!
@@ -178,6 +228,7 @@ module Kafka
178
228
  end
179
229
 
180
230
  def topics
231
+ refresh_metadata_if_necessary!
181
232
  cluster_info.topics.map(&:topic_name)
182
233
  end
183
234
 
@@ -213,11 +264,15 @@ module Kafka
213
264
  broker = @broker_pool.connect(node.hostname, node.port)
214
265
  cluster_info = broker.fetch_metadata(topics: @target_topics)
215
266
 
216
- @stale = false
267
+ if cluster_info.brokers.empty?
268
+ @logger.error "No brokers in cluster"
269
+ else
270
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
217
271
 
218
- @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
272
+ @stale = false
219
273
 
220
- return cluster_info
274
+ return cluster_info
275
+ end
221
276
  rescue Error => e
222
277
  @logger.error "Failed to fetch metadata from #{node}: #{e}"
223
278
  errors << [node, e]
@@ -231,10 +286,19 @@ module Kafka
231
286
  raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
232
287
  end
233
288
 
289
+ def random_broker
290
+ node_id = cluster_info.brokers.sample.node_id
291
+ connect_to_broker(node_id)
292
+ end
293
+
234
294
  def connect_to_broker(broker_id)
235
295
  info = cluster_info.find_broker(broker_id)
236
296
 
237
297
  @broker_pool.connect(info.host, info.port, node_id: info.node_id)
238
298
  end
299
+
300
+ def controller_broker
301
+ connect_to_broker(cluster_info.controller_id)
302
+ end
239
303
  end
240
304
  end
@@ -48,7 +48,7 @@ module Kafka
48
48
  # broker. Default is 10 seconds.
49
49
  #
50
50
  # @return [Connection] a new connection.
51
- def initialize(host:, port:, client_id:, logger:, instrumenter:, sasl_authenticator:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
51
+ def initialize(host:, port:, client_id:, logger:, instrumenter:, connect_timeout: nil, socket_timeout: nil, ssl_context: nil)
52
52
  @host, @port, @client_id = host, port, client_id
53
53
  @logger = logger
54
54
  @instrumenter = instrumenter
@@ -56,11 +56,6 @@ module Kafka
56
56
  @connect_timeout = connect_timeout || CONNECT_TIMEOUT
57
57
  @socket_timeout = socket_timeout || SOCKET_TIMEOUT
58
58
  @ssl_context = ssl_context
59
- @sasl_authenticator = sasl_authenticator
60
- end
61
-
62
- def address_match?(host, port)
63
- @host == host && @port == port
64
59
  end
65
60
 
66
61
  def to_s
@@ -75,8 +70,6 @@ module Kafka
75
70
  @logger.debug "Closing socket to #{to_s}"
76
71
 
77
72
  @socket.close if @socket
78
-
79
- @socket = nil
80
73
  end
81
74
 
82
75
  # Sends a request over the connection.
@@ -86,20 +79,25 @@ module Kafka
86
79
  #
87
80
  # @return [Object] the response.
88
81
  def send_request(request)
82
+ api_name = Protocol.api_name(request.api_key)
83
+
89
84
  # Default notification payload.
90
85
  notification = {
91
86
  broker_host: @host,
92
- api: Protocol.api_name(request.api_key),
87
+ api: api_name,
93
88
  request_size: 0,
94
89
  response_size: 0,
95
90
  }
96
91
 
97
92
  @instrumenter.instrument("request.connection", notification) do
98
93
  open unless open?
99
- reopen if idle?
94
+
95
+ raise IdleConnection if idle?
100
96
 
101
97
  @correlation_id += 1
102
98
 
99
+ @logger.debug "Sending #{api_name} API request #{@correlation_id} to #{to_s}"
100
+
103
101
  write_request(request, notification)
104
102
 
105
103
  response_class = request.response_class
@@ -133,7 +131,6 @@ module Kafka
133
131
  @correlation_id = 0
134
132
 
135
133
  @last_request = nil
136
- @sasl_authenticator.authenticate!(self)
137
134
  rescue Errno::ETIMEDOUT => e
138
135
  @logger.error "Timed out while trying to connect to #{self}: #{e}"
139
136
  raise ConnectionError, e
@@ -142,11 +139,6 @@ module Kafka
142
139
  raise ConnectionError, e
143
140
  end
144
141
 
145
- def reopen
146
- close
147
- open
148
- end
149
-
150
142
  def idle?
151
143
  @last_request && @last_request < Time.now - IDLE_TIMEOUT
152
144
  end
@@ -157,8 +149,6 @@ module Kafka
157
149
  #
158
150
  # @return [nil]
159
151
  def write_request(request, notification)
160
- @logger.debug "Sending request #{@correlation_id} to #{to_s}"
161
-
162
152
  message = Kafka::Protocol::RequestMessage.new(
163
153
  api_key: request.api_key,
164
154
  api_version: request.respond_to?(:api_version) ? request.api_version : 0,
@@ -20,9 +20,10 @@ module Kafka
20
20
  logger: @logger,
21
21
  instrumenter: @instrumenter,
22
22
  ssl_context: @ssl_context,
23
- sasl_authenticator: @sasl_authenticator
24
23
  )
25
24
 
25
+ @sasl_authenticator.authenticate!(connection)
26
+
26
27
  connection
27
28
  end
28
29
 
@@ -179,6 +179,8 @@ module Kafka
179
179
  # @param min_bytes [Integer] the minimum number of bytes to read before
180
180
  # returning messages from each broker; if `max_wait_time` is reached, this
181
181
  # is ignored.
182
+ # @param max_bytes [Integer] the maximum number of bytes to read before
183
+ # returning messages from each broker.
182
184
  # @param max_wait_time [Integer, Float] the maximum duration of time to wait before
183
185
  # returning messages from each broker, in seconds.
184
186
  # @param automatically_mark_as_processed [Boolean] whether to automatically
@@ -190,10 +192,11 @@ module Kafka
190
192
  # The original exception will be returned by calling `#cause` on the
191
193
  # {Kafka::ProcessingError} instance.
192
194
  # @return [nil]
193
- def each_message(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
195
+ def each_message(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
194
196
  consumer_loop do
195
197
  batches = fetch_batches(
196
198
  min_bytes: min_bytes,
199
+ max_bytes: max_bytes,
197
200
  max_wait_time: max_wait_time,
198
201
  automatically_mark_as_processed: automatically_mark_as_processed
199
202
  )
@@ -253,6 +256,8 @@ module Kafka
253
256
  # @param min_bytes [Integer] the minimum number of bytes to read before
254
257
  # returning messages from each broker; if `max_wait_time` is reached, this
255
258
  # is ignored.
259
+ # @param max_bytes [Integer] the maximum number of bytes to read before
260
+ # returning messages from each broker.
256
261
  # @param max_wait_time [Integer, Float] the maximum duration of time to wait before
257
262
  # returning messages from each broker, in seconds.
258
263
  # @param automatically_mark_as_processed [Boolean] whether to automatically
@@ -261,10 +266,11 @@ module Kafka
261
266
  # messages can be committed to Kafka.
262
267
  # @yieldparam batch [Kafka::FetchedBatch] a message batch fetched from Kafka.
263
268
  # @return [nil]
264
- def each_batch(min_bytes: 1, max_wait_time: 1, automatically_mark_as_processed: true)
269
+ def each_batch(min_bytes: 1, max_bytes: 10485760, max_wait_time: 1, automatically_mark_as_processed: true)
265
270
  consumer_loop do
266
271
  batches = fetch_batches(
267
272
  min_bytes: min_bytes,
273
+ max_bytes: max_bytes,
268
274
  max_wait_time: max_wait_time,
269
275
  automatically_mark_as_processed: automatically_mark_as_processed
270
276
  )
@@ -349,6 +355,9 @@ module Kafka
349
355
  yield
350
356
  rescue HeartbeatError, OffsetCommitError
351
357
  join_group
358
+ rescue RebalanceInProgress
359
+ @logger.warn "Group rebalance in progress, re-joining..."
360
+ join_group
352
361
  rescue FetchError, NotLeaderForPartition, UnknownTopicOrPartition
353
362
  @cluster.mark_as_stale!
354
363
  rescue LeaderNotAvailable => e
@@ -370,7 +379,7 @@ module Kafka
370
379
 
371
380
  def make_final_offsets_commit!(attempts = 3)
372
381
  @offset_manager.commit_offsets
373
- rescue ConnectionError, EOFError
382
+ rescue ConnectionError, OffsetCommitError, EOFError
374
383
  # It's important to make sure final offsets commit is done
375
384
  # As otherwise messages that have been processed after last auto-commit
376
385
  # will be processed again and that may be huge amount of messages
@@ -379,6 +388,8 @@ module Kafka
379
388
  @logger.error "Retrying to make final offsets commit (#{attempts} attempts left)"
380
389
  sleep(0.1)
381
390
  make_final_offsets_commit!(attempts - 1)
391
+ rescue Kafka::Error => e
392
+ @logger.error "Encountered error while shutting down; #{e.class}: #{e.message}"
382
393
  end
383
394
 
384
395
  def join_group
@@ -400,7 +411,7 @@ module Kafka
400
411
  end
401
412
  end
402
413
 
403
- def fetch_batches(min_bytes:, max_wait_time:, automatically_mark_as_processed:)
414
+ def fetch_batches(min_bytes:, max_bytes:, max_wait_time:, automatically_mark_as_processed:)
404
415
  join_group unless @group.member?
405
416
 
406
417
  subscribed_partitions = @group.subscribed_partitions
@@ -411,6 +422,7 @@ module Kafka
411
422
  cluster: @cluster,
412
423
  logger: @logger,
413
424
  min_bytes: min_bytes,
425
+ max_bytes: max_bytes,
414
426
  max_wait_time: max_wait_time,
415
427
  )
416
428
 
@@ -18,10 +18,11 @@ module Kafka
18
18
  # operation.execute
19
19
  #
20
20
  class FetchOperation
21
- def initialize(cluster:, logger:, min_bytes: 1, max_wait_time: 5)
21
+ def initialize(cluster:, logger:, min_bytes: 1, max_bytes: 10485760, max_wait_time: 5)
22
22
  @cluster = cluster
23
23
  @logger = logger
24
24
  @min_bytes = min_bytes
25
+ @max_bytes = max_bytes
25
26
  @max_wait_time = max_wait_time
26
27
  @topics = {}
27
28
  end
@@ -66,6 +67,7 @@ module Kafka
66
67
  options = {
67
68
  max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
68
69
  min_bytes: @min_bytes,
70
+ max_bytes: @max_bytes,
69
71
  topics: topics,
70
72
  }
71
73
 
@@ -24,6 +24,8 @@ module Kafka
24
24
  LEAVE_GROUP_API = 13
25
25
  SYNC_GROUP_API = 14
26
26
  SASL_HANDSHAKE_API = 17
27
+ API_VERSIONS_API = 18
28
+ CREATE_TOPICS_API = 19
27
29
 
28
30
  # A mapping from numeric API keys to symbolic API names.
29
31
  APIS = {
@@ -39,6 +41,8 @@ module Kafka
39
41
  LEAVE_GROUP_API => :leave_group,
40
42
  SYNC_GROUP_API => :sync_group,
41
43
  SASL_HANDSHAKE_API => :sasl_handshake,
44
+ API_VERSIONS_API => :api_versions,
45
+ CREATE_TOPICS_API => :create_topics,
42
46
  }
43
47
 
44
48
  # A mapping from numeric error codes to exception classes.
@@ -131,5 +135,9 @@ require "kafka/protocol/offset_fetch_request"
131
135
  require "kafka/protocol/offset_fetch_response"
132
136
  require "kafka/protocol/offset_commit_request"
133
137
  require "kafka/protocol/offset_commit_response"
138
+ require "kafka/protocol/api_versions_request"
139
+ require "kafka/protocol/api_versions_response"
134
140
  require "kafka/protocol/sasl_handshake_request"
135
141
  require "kafka/protocol/sasl_handshake_response"
142
+ require "kafka/protocol/create_topics_request"
143
+ require "kafka/protocol/create_topics_response"
@@ -0,0 +1,19 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ class ApiVersionsRequest
5
+ def api_key
6
+ API_VERSIONS_API
7
+ end
8
+
9
+ def encode(encoder)
10
+ # Nothing to do.
11
+ end
12
+
13
+ def response_class
14
+ Protocol::ApiVersionsResponse
15
+ end
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,47 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ class ApiVersionsResponse
5
+ class ApiInfo
6
+ attr_reader :api_key, :min_version, :max_version
7
+
8
+ def initialize(api_key:, min_version:, max_version:)
9
+ @api_key, @min_version, @max_version = api_key, min_version, max_version
10
+ end
11
+
12
+ def api_name
13
+ Protocol.api_name(api_key)
14
+ end
15
+
16
+ def to_s
17
+ "#{api_name}=#{min_version}..#{max_version}"
18
+ end
19
+
20
+ def inspect
21
+ "#<Kafka api version #{to_s}>"
22
+ end
23
+ end
24
+
25
+ attr_reader :error_code, :apis
26
+
27
+ def initialize(error_code:, apis:)
28
+ @error_code = error_code
29
+ @apis = apis
30
+ end
31
+
32
+ def self.decode(decoder)
33
+ error_code = decoder.int16
34
+
35
+ apis = decoder.array do
36
+ ApiInfo.new(
37
+ api_key: decoder.int16,
38
+ min_version: decoder.int16,
39
+ max_version: decoder.int16,
40
+ )
41
+ end
42
+
43
+ new(error_code: error_code, apis: apis)
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,40 @@
1
+ module Kafka
2
+ module Protocol
3
+
4
+ class CreateTopicsRequest
5
+ def initialize(topics:, timeout:)
6
+ @topics, @timeout = topics, timeout
7
+ end
8
+
9
+ def api_key
10
+ CREATE_TOPICS_API
11
+ end
12
+
13
+ def api_version
14
+ 0
15
+ end
16
+
17
+ def response_class
18
+ Protocol::CreateTopicsResponse
19
+ end
20
+
21
+ def encode(encoder)
22
+ encoder.write_array(@topics) do |topic, config|
23
+ encoder.write_string(topic)
24
+ encoder.write_int32(config.fetch(:num_partitions))
25
+ encoder.write_int16(config.fetch(:replication_factor))
26
+
27
+ # Replica assignments. We don't care.
28
+ encoder.write_array([])
29
+
30
+ # Config entries. We don't care.
31
+ encoder.write_array([])
32
+ end
33
+
34
+ # Timeout is in ms.
35
+ encoder.write_int32(@timeout * 1000)
36
+ end
37
+ end
38
+
39
+ end
40
+ end