ruby-kafka 1.1.0.beta1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/kafka/client.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
2
3
 
3
4
  require "kafka/ssl_context"
@@ -38,8 +39,8 @@ module Kafka
38
39
  # @param ssl_ca_cert [String, Array<String>, nil] a PEM encoded CA cert, or an Array of
39
40
  # PEM encoded CA certs, to use with an SSL connection.
40
41
  #
41
- # @param ssl_ca_cert_file_path [String, nil] a path on the filesystem to a PEM encoded CA cert
42
- # to use with an SSL connection.
42
+ # @param ssl_ca_cert_file_path [String, Array<String>, nil] a path on the filesystem, or an
43
+ # Array of paths, to PEM encoded CA cert(s) to use with an SSL connection.
43
44
  #
44
45
  # @param ssl_client_cert [String, nil] a PEM encoded client cert to use with an
45
46
  # SSL connection. Must be used in combination with ssl_client_cert_key.
@@ -62,19 +63,34 @@ module Kafka
62
63
  #
63
64
  # @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
64
65
  #
66
+ # @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
67
+ # system's default certificate store.
68
+ #
69
+ # @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
70
+ #
65
71
  # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
66
72
  # implements method token. See {Sasl::OAuth#initialize}
67
73
  #
74
+ # @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
75
+ # the SSL certificate and the signing chain of the certificate have the correct domains
76
+ # based on the CA certificate
77
+ #
78
+ # @param resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed brokers.
79
+ # If a broker is resolved to multiple IP addresses, the client tries to connect to each
80
+ # of the addresses until it can connect.
81
+ #
68
82
  # @return [Client]
69
83
  def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
70
84
  ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
71
85
  ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
72
86
  sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
73
87
  sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
74
- sasl_over_ssl: true, ssl_ca_certs_from_system: false, sasl_oauth_token_provider: nil, ssl_verify_hostname: true)
88
+ sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true,
89
+ resolve_seed_brokers: false)
75
90
  @logger = TaggedLogger.new(logger)
76
91
  @instrumenter = Instrumenter.new(client_id: client_id)
77
92
  @seed_brokers = normalize_seed_brokers(seed_brokers)
93
+ @resolve_seed_brokers = resolve_seed_brokers
78
94
 
79
95
  ssl_context = SslContext.build(
80
96
  ca_cert_file_path: ssl_ca_cert_file_path,
@@ -115,6 +131,7 @@ module Kafka
115
131
  )
116
132
 
117
133
  @cluster = initialize_cluster
134
+ @partitioner = partitioner || Partitioner.new
118
135
  end
119
136
 
120
137
  # Delivers a single message to the Kafka cluster.
@@ -153,7 +170,7 @@ module Kafka
153
170
 
154
171
  if partition.nil?
155
172
  partition_count = @cluster.partitions_for(topic).count
156
- partition = Partitioner.partition_for_key(partition_count, message)
173
+ partition = @partitioner.call(partition_count, message)
157
174
  end
158
175
 
159
176
  buffer = MessageBuffer.new
@@ -194,6 +211,8 @@ module Kafka
194
211
  attempt = 1
195
212
 
196
213
  begin
214
+ @cluster.refresh_metadata_if_necessary!
215
+
197
216
  operation.execute
198
217
 
199
218
  unless buffer.empty?
@@ -244,6 +263,9 @@ module Kafka
244
263
  # be in a message set before it should be compressed. Note that message sets
245
264
  # are per-partition rather than per-topic or per-producer.
246
265
  #
266
+ # @param interceptors [Array<Object>] a list of producer interceptors the implement
267
+ # `call(Kafka::PendingMessage)`.
268
+ #
247
269
  # @return [Kafka::Producer] the Kafka producer.
248
270
  def producer(
249
271
  compression_codec: nil,
@@ -257,7 +279,8 @@ module Kafka
257
279
  idempotent: false,
258
280
  transactional: false,
259
281
  transactional_id: nil,
260
- transactional_timeout: 60
282
+ transactional_timeout: 60,
283
+ interceptors: []
261
284
  )
262
285
  cluster = initialize_cluster
263
286
  compressor = Compressor.new(
@@ -287,6 +310,8 @@ module Kafka
287
310
  retry_backoff: retry_backoff,
288
311
  max_buffer_size: max_buffer_size,
289
312
  max_buffer_bytesize: max_buffer_bytesize,
313
+ partitioner: @partitioner,
314
+ interceptors: interceptors
290
315
  )
291
316
  end
292
317
 
@@ -339,6 +364,10 @@ module Kafka
339
364
  # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
340
365
  # If it is 0, the topic list won't be refreshed (default)
341
366
  # If it is n (n > 0), the topic list will be refreshed every n seconds
367
+ # @param interceptors [Array<Object>] a list of consumer interceptors that implement
368
+ # `call(Kafka::FetchedBatch)`.
369
+ # @param assignment_strategy [Object] a partition assignment strategy that
370
+ # implements `protocol_type()`, `user_data()`, and `assign(members:, partitions:)`
342
371
  # @return [Consumer]
343
372
  def consumer(
344
373
  group_id:,
@@ -349,7 +378,9 @@ module Kafka
349
378
  heartbeat_interval: 10,
350
379
  offset_retention_time: nil,
351
380
  fetcher_max_queue_size: 100,
352
- refresh_topic_interval: 0
381
+ refresh_topic_interval: 0,
382
+ interceptors: [],
383
+ assignment_strategy: nil
353
384
  )
354
385
  cluster = initialize_cluster
355
386
 
@@ -368,6 +399,7 @@ module Kafka
368
399
  rebalance_timeout: rebalance_timeout,
369
400
  retention_time: retention_time,
370
401
  instrumenter: instrumenter,
402
+ assignment_strategy: assignment_strategy
371
403
  )
372
404
 
373
405
  fetcher = Fetcher.new(
@@ -403,7 +435,8 @@ module Kafka
403
435
  fetcher: fetcher,
404
436
  session_timeout: session_timeout,
405
437
  heartbeat: heartbeat,
406
- refresh_topic_interval: refresh_topic_interval
438
+ refresh_topic_interval: refresh_topic_interval,
439
+ interceptors: interceptors
407
440
  )
408
441
  end
409
442
 
@@ -785,6 +818,7 @@ module Kafka
785
818
  seed_brokers: @seed_brokers,
786
819
  broker_pool: broker_pool,
787
820
  logger: @logger,
821
+ resolve_seed_brokers: @resolve_seed_brokers,
788
822
  )
789
823
  end
790
824
 
data/lib/kafka/cluster.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/broker_pool"
4
+ require "resolv"
4
5
  require "set"
5
6
 
6
7
  module Kafka
@@ -18,7 +19,8 @@ module Kafka
18
19
  # @param seed_brokers [Array<URI>]
19
20
  # @param broker_pool [Kafka::BrokerPool]
20
21
  # @param logger [Logger]
21
- def initialize(seed_brokers:, broker_pool:, logger:)
22
+ # @param resolve_seed_brokers [Boolean] See {Kafka::Client#initialize}
23
+ def initialize(seed_brokers:, broker_pool:, logger:, resolve_seed_brokers: false)
22
24
  if seed_brokers.empty?
23
25
  raise ArgumentError, "At least one seed broker must be configured"
24
26
  end
@@ -26,6 +28,7 @@ module Kafka
26
28
  @logger = TaggedLogger.new(logger)
27
29
  @seed_brokers = seed_brokers
28
30
  @broker_pool = broker_pool
31
+ @resolve_seed_brokers = resolve_seed_brokers
29
32
  @cluster_info = nil
30
33
  @stale = true
31
34
 
@@ -117,7 +120,7 @@ module Kafka
117
120
 
118
121
  # Finds the broker acting as the coordinator of the given group.
119
122
  #
120
- # @param group_id: [String]
123
+ # @param group_id [String]
121
124
  # @return [Broker] the broker that's currently coordinator.
122
125
  def get_group_coordinator(group_id:)
123
126
  @logger.debug "Getting group coordinator for `#{group_id}`"
@@ -127,7 +130,7 @@ module Kafka
127
130
 
128
131
  # Finds the broker acting as the coordinator of the given transaction.
129
132
  #
130
- # @param transactional_id: [String]
133
+ # @param transactional_id [String]
131
134
  # @return [Broker] the broker that's currently coordinator.
132
135
  def get_transaction_coordinator(transactional_id:)
133
136
  @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
@@ -418,32 +421,35 @@ module Kafka
418
421
  # @return [Protocol::MetadataResponse] the cluster metadata.
419
422
  def fetch_cluster_info
420
423
  errors = []
421
-
422
424
  @seed_brokers.shuffle.each do |node|
423
- @logger.info "Fetching cluster metadata from #{node}"
424
-
425
- begin
426
- broker = @broker_pool.connect(node.hostname, node.port)
427
- cluster_info = broker.fetch_metadata(topics: @target_topics)
428
-
429
- if cluster_info.brokers.empty?
430
- @logger.error "No brokers in cluster"
431
- else
432
- @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
433
-
434
- @stale = false
435
-
436
- return cluster_info
425
+ (@resolve_seed_brokers ? Resolv.getaddresses(node.hostname).shuffle : [node.hostname]).each do |hostname_or_ip|
426
+ node_info = node.to_s
427
+ node_info << " (#{hostname_or_ip})" if node.hostname != hostname_or_ip
428
+ @logger.info "Fetching cluster metadata from #{node_info}"
429
+
430
+ begin
431
+ broker = @broker_pool.connect(hostname_or_ip, node.port)
432
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
433
+
434
+ if cluster_info.brokers.empty?
435
+ @logger.error "No brokers in cluster"
436
+ else
437
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
438
+
439
+ @stale = false
440
+
441
+ return cluster_info
442
+ end
443
+ rescue Error => e
444
+ @logger.error "Failed to fetch metadata from #{node_info}: #{e}"
445
+ errors << [node_info, e]
446
+ ensure
447
+ broker.disconnect unless broker.nil?
437
448
  end
438
- rescue Error => e
439
- @logger.error "Failed to fetch metadata from #{node}: #{e}"
440
- errors << [node, e]
441
- ensure
442
- broker.disconnect unless broker.nil?
443
449
  end
444
450
  end
445
451
 
446
- error_description = errors.map {|node, exception| "- #{node}: #{exception}" }.join("\n")
452
+ error_description = errors.map {|node_info, exception| "- #{node_info}: #{exception}" }.join("\n")
447
453
 
448
454
  raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
449
455
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/consumer_group"
4
+ require "kafka/interceptors"
4
5
  require "kafka/offset_manager"
5
6
  require "kafka/fetcher"
6
7
  require "kafka/pause"
@@ -44,7 +45,8 @@ module Kafka
44
45
  #
45
46
  class Consumer
46
47
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:, refresh_topic_interval: 0)
48
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
49
+ session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
48
50
  @cluster = cluster
49
51
  @logger = TaggedLogger.new(logger)
50
52
  @instrumenter = instrumenter
@@ -54,6 +56,7 @@ module Kafka
54
56
  @fetcher = fetcher
55
57
  @heartbeat = heartbeat
56
58
  @refresh_topic_interval = refresh_topic_interval
59
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
57
60
 
58
61
  @pauses = Hash.new {|h, k|
59
62
  h[k] = Hash.new {|h2, k2|
@@ -76,7 +79,7 @@ module Kafka
76
79
  @current_offsets = Hash.new { |h, k| h[k] = {} }
77
80
 
78
81
  # Map storing subscribed topics with their configuration
79
- @subscribed_topics = Concurrent::Map.new
82
+ @subscribed_topics = Hash.new
80
83
 
81
84
  # Set storing topics that matched topics in @subscribed_topics
82
85
  @matched_topics = Set.new
@@ -220,6 +223,7 @@ module Kafka
220
223
  batches = fetch_batches
221
224
 
222
225
  batches.each do |batch|
226
+ batch = @interceptors.call(batch)
223
227
  batch.messages.each do |message|
224
228
  notification = {
225
229
  topic: message.topic,
@@ -311,12 +315,13 @@ module Kafka
311
315
  unless batch.empty?
312
316
  raw_messages = batch.messages
313
317
  batch.messages = raw_messages.reject(&:is_control_record)
318
+ batch = @interceptors.call(batch)
314
319
 
315
320
  notification = {
316
321
  topic: batch.topic,
317
322
  partition: batch.partition,
318
323
  last_offset: batch.last_offset,
319
- last_create_time: batch.messages.last.try(:create_time),
324
+ last_create_time: batch.messages.last && batch.messages.last.create_time,
320
325
  offset_lag: batch.offset_lag,
321
326
  highwater_mark_offset: batch.highwater_mark_offset,
322
327
  message_count: batch.messages.count,
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/member_assignment"
4
+
5
+ module Kafka
6
+ class ConsumerGroup
7
+
8
+ # A consumer group partition assignor
9
+ class Assignor
10
+ Partition = Struct.new(:topic, :partition_id)
11
+
12
+ # @param cluster [Kafka::Cluster]
13
+ # @param strategy [Object] an object that implements #protocol_type,
14
+ # #user_data, and #assign.
15
+ def initialize(cluster:, strategy:)
16
+ @cluster = cluster
17
+ @strategy = strategy
18
+ end
19
+
20
+ def protocol_name
21
+ @strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
22
+ end
23
+
24
+ def user_data
25
+ @strategy.user_data if @strategy.respond_to?(:user_data)
26
+ end
27
+
28
+ # Assign the topic partitions to the group members.
29
+ #
30
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
31
+ # mapping member ids to metadata.
32
+ # @param topics [Array<String>] topics
33
+ # @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
34
+ # ids to assignments.
35
+ def assign(members:, topics:)
36
+ topic_partitions = topics.flat_map do |topic|
37
+ begin
38
+ partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
39
+ rescue UnknownTopicOrPartition
40
+ raise UnknownTopicOrPartition, "unknown topic #{topic}"
41
+ end
42
+ partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
43
+ end
44
+
45
+ group_assignment = {}
46
+
47
+ members.each_key do |member_id|
48
+ group_assignment[member_id] = Protocol::MemberAssignment.new
49
+ end
50
+ @strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
51
+ Array(partitions).each do |partition|
52
+ group_assignment[member_id].assign(partition.topic, [partition.partition_id])
53
+ end
54
+ end
55
+
56
+ group_assignment
57
+ rescue Kafka::LeaderNotAvailable
58
+ sleep 1
59
+ retry
60
+ end
61
+ end
62
+ end
63
+ end
@@ -1,13 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "kafka/consumer_group/assignor"
4
5
  require "kafka/round_robin_assignment_strategy"
5
6
 
6
7
  module Kafka
7
8
  class ConsumerGroup
8
9
  attr_reader :assigned_partitions, :generation_id, :group_id
9
10
 
10
- def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:)
11
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
11
12
  @cluster = cluster
12
13
  @logger = TaggedLogger.new(logger)
13
14
  @group_id = group_id
@@ -19,7 +20,10 @@ module Kafka
19
20
  @members = {}
20
21
  @topics = Set.new
21
22
  @assigned_partitions = {}
22
- @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
23
+ @assignor = Assignor.new(
24
+ cluster: cluster,
25
+ strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
26
+ )
23
27
  @retention_time = retention_time
24
28
  end
25
29
 
@@ -113,9 +117,12 @@ module Kafka
113
117
 
114
118
  Protocol.handle_error(response.error_code)
115
119
  end
116
- rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
120
+ rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
117
121
  @logger.error "Error sending heartbeat: #{e}"
118
122
  raise HeartbeatError, e
123
+ rescue RebalanceInProgress => e
124
+ @logger.warn "Error sending heartbeat: #{e}"
125
+ raise HeartbeatError, e
119
126
  rescue NotCoordinatorForGroup
120
127
  @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
121
128
  sleep 1
@@ -144,6 +151,8 @@ module Kafka
144
151
  rebalance_timeout: @rebalance_timeout,
145
152
  member_id: @member_id,
146
153
  topics: @topics,
154
+ protocol_name: @assignor.protocol_name,
155
+ user_data: @assignor.user_data,
147
156
  )
148
157
 
149
158
  Protocol.handle_error(response.error_code)
@@ -161,6 +170,12 @@ module Kafka
161
170
  @member_id = ""
162
171
  sleep 1
163
172
 
173
+ retry
174
+ rescue CoordinatorLoadInProgress
175
+ @logger.error "Coordinator broker still loading, retrying in 1s..."
176
+
177
+ sleep 1
178
+
164
179
  retry
165
180
  end
166
181
 
@@ -174,9 +189,14 @@ module Kafka
174
189
  if group_leader?
175
190
  @logger.info "Chosen as leader of group `#{@group_id}`"
176
191
 
177
- group_assignment = @assignment_strategy.assign(
178
- members: @members.keys,
179
- topics: @topics,
192
+ topics = Set.new
193
+ @members.each do |_member, metadata|
194
+ metadata.topics.each { |t| topics.add(t) }
195
+ end
196
+
197
+ group_assignment = @assignor.assign(
198
+ members: @members,
199
+ topics: topics,
180
200
  )
181
201
  end
182
202