ruby-kafka 0.7.10 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -45,6 +45,10 @@ module Kafka
45
45
  new_topics = topics - @target_topics
46
46
 
47
47
  unless new_topics.empty?
48
+ if new_topics.any? { |topic| topic.nil? or topic.empty? }
49
+ raise ArgumentError, "Topic must not be nil or empty"
50
+ end
51
+
48
52
  @logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
49
53
 
50
54
  @target_topics.merge(new_topics)
@@ -139,6 +143,40 @@ module Kafka
139
143
  end
140
144
  end
141
145
 
146
+ def describe_configs(broker_id, configs = [])
147
+ options = {
148
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
149
+ }
150
+
151
+ info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
152
+ broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
153
+
154
+ response = broker.describe_configs(**options)
155
+
156
+ response.resources.each do |resource|
157
+ Protocol.handle_error(resource.error_code, resource.error_message)
158
+ end
159
+
160
+ response.resources.first.configs
161
+ end
162
+
163
+ def alter_configs(broker_id, configs = [])
164
+ options = {
165
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
166
+ }
167
+
168
+ info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
169
+ broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
170
+
171
+ response = broker.alter_configs(**options)
172
+
173
+ response.resources.each do |resource|
174
+ Protocol.handle_error(resource.error_code, resource.error_message)
175
+ end
176
+
177
+ nil
178
+ end
179
+
142
180
  def partitions_for(topic)
143
181
  add_target_topics([topic])
144
182
  refresh_metadata_if_necessary!
@@ -252,6 +290,20 @@ module Kafka
252
290
  group
253
291
  end
254
292
 
293
+ def fetch_group_offsets(group_id)
294
+ topics = get_group_coordinator(group_id: group_id)
295
+ .fetch_offsets(group_id: group_id, topics: nil)
296
+ .topics
297
+
298
+ topics.each do |_, partitions|
299
+ partitions.each do |_, response|
300
+ Protocol.handle_error(response.error_code)
301
+ end
302
+ end
303
+
304
+ topics
305
+ end
306
+
255
307
  def create_partitions_for(name, num_partitions:, timeout:)
256
308
  options = {
257
309
  topics: [[name, num_partitions, nil]],
@@ -58,6 +58,9 @@ module Kafka
58
58
  @connect_timeout = connect_timeout || CONNECT_TIMEOUT
59
59
  @socket_timeout = socket_timeout || SOCKET_TIMEOUT
60
60
  @ssl_context = ssl_context
61
+
62
+ @socket = nil
63
+ @last_request = nil
61
64
  end
62
65
 
63
66
  def to_s
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/consumer_group"
4
+ require "kafka/interceptors"
4
5
  require "kafka/offset_manager"
5
6
  require "kafka/fetcher"
6
7
  require "kafka/pause"
@@ -44,7 +45,8 @@ module Kafka
44
45
  #
45
46
  class Consumer
46
47
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
48
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
49
+ session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
48
50
  @cluster = cluster
49
51
  @logger = TaggedLogger.new(logger)
50
52
  @instrumenter = instrumenter
@@ -53,6 +55,8 @@ module Kafka
53
55
  @session_timeout = session_timeout
54
56
  @fetcher = fetcher
55
57
  @heartbeat = heartbeat
58
+ @refresh_topic_interval = refresh_topic_interval
59
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
56
60
 
57
61
  @pauses = Hash.new {|h, k|
58
62
  h[k] = Hash.new {|h2, k2|
@@ -73,6 +77,15 @@ module Kafka
73
77
  # when user commits message other than last in a batch, this would make ruby-kafka refetch
74
78
  # some already consumed messages
75
79
  @current_offsets = Hash.new { |h, k| h[k] = {} }
80
+
81
+ # Map storing subscribed topics with their configuration
82
+ @subscribed_topics = Hash.new
83
+
84
+ # Set storing topics that matched topics in @subscribed_topics
85
+ @matched_topics = Set.new
86
+
87
+ # Whether join_group must be executed again because new topics are added
88
+ @join_group_for_new_topics = false
76
89
  end
77
90
 
78
91
  # Subscribes the consumer to a topic.
@@ -97,13 +110,12 @@ module Kafka
97
110
  def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
98
111
  default_offset ||= start_from_beginning ? :earliest : :latest
99
112
 
100
- if topic_or_regex.is_a?(Regexp)
101
- cluster_topics.select { |topic| topic =~ topic_or_regex }.each do |topic|
102
- subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
103
- end
104
- else
105
- subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
106
- end
113
+ @subscribed_topics[topic_or_regex] = {
114
+ default_offset: default_offset,
115
+ start_from_beginning: start_from_beginning,
116
+ max_bytes_per_partition: max_bytes_per_partition
117
+ }
118
+ scan_for_subscribing
107
119
 
108
120
  nil
109
121
  end
@@ -116,7 +128,6 @@ module Kafka
116
128
  def stop
117
129
  @running = false
118
130
  @fetcher.stop
119
- @cluster.disconnect
120
131
  end
121
132
 
122
133
  # Pause processing of a specific topic partition.
@@ -212,6 +223,7 @@ module Kafka
212
223
  batches = fetch_batches
213
224
 
214
225
  batches.each do |batch|
226
+ batch = @interceptors.call(batch)
215
227
  batch.messages.each do |message|
216
228
  notification = {
217
229
  topic: message.topic,
@@ -303,11 +315,13 @@ module Kafka
303
315
  unless batch.empty?
304
316
  raw_messages = batch.messages
305
317
  batch.messages = raw_messages.reject(&:is_control_record)
318
+ batch = @interceptors.call(batch)
306
319
 
307
320
  notification = {
308
321
  topic: batch.topic,
309
322
  partition: batch.partition,
310
323
  last_offset: batch.last_offset,
324
+ last_create_time: batch.messages.last && batch.messages.last.create_time,
311
325
  offset_lag: batch.offset_lag,
312
326
  highwater_mark_offset: batch.highwater_mark_offset,
313
327
  message_count: batch.messages.count,
@@ -401,6 +415,7 @@ module Kafka
401
415
  while running?
402
416
  begin
403
417
  @instrumenter.instrument("loop.consumer") do
418
+ refresh_topic_list_if_enabled
404
419
  yield
405
420
  end
406
421
  rescue HeartbeatError
@@ -432,6 +447,7 @@ module Kafka
432
447
  # important that members explicitly tell Kafka when they're leaving.
433
448
  make_final_offsets_commit!
434
449
  @group.leave rescue nil
450
+ @cluster.disconnect
435
451
  @running = false
436
452
  @logger.pop_tags
437
453
  end
@@ -452,6 +468,8 @@ module Kafka
452
468
  end
453
469
 
454
470
  def join_group
471
+ @join_group_for_new_topics = false
472
+
455
473
  old_generation_id = @group.generation_id
456
474
 
457
475
  @group.join
@@ -513,11 +531,19 @@ module Kafka
513
531
  end
514
532
  end
515
533
 
534
+ def refresh_topic_list_if_enabled
535
+ return if @refresh_topic_interval <= 0
536
+ return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
537
+
538
+ scan_for_subscribing
539
+ @refreshed_at = Time.now
540
+ end
541
+
516
542
  def fetch_batches
517
543
  # Return early if the consumer has been stopped.
518
544
  return [] if shutting_down?
519
545
 
520
- join_group unless @group.member?
546
+ join_group if !@group.member? || @join_group_for_new_topics
521
547
 
522
548
  trigger_heartbeat
523
549
 
@@ -525,7 +551,7 @@ module Kafka
525
551
 
526
552
  if !@fetcher.data?
527
553
  @logger.debug "No batches to process"
528
- sleep 2
554
+ sleep(@fetcher.max_wait_time || 2)
529
555
  []
530
556
  else
531
557
  tag, message = @fetcher.poll
@@ -571,10 +597,34 @@ module Kafka
571
597
  end
572
598
  end
573
599
 
600
+ def scan_for_subscribing
601
+ @subscribed_topics.each do |topic_or_regex, config|
602
+ default_offset = config.fetch(:default_offset)
603
+ start_from_beginning = config.fetch(:start_from_beginning)
604
+ max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
605
+ if topic_or_regex.is_a?(Regexp)
606
+ subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
607
+ else
608
+ subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
609
+ end
610
+ end
611
+ end
612
+
613
+ def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
614
+ cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
615
+ subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
616
+ end
617
+ end
618
+
574
619
  def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
620
+ return if @matched_topics.include?(topic)
621
+ @matched_topics.add(topic)
622
+ @join_group_for_new_topics = true
623
+
575
624
  @group.subscribe(topic)
576
625
  @offset_manager.set_default_offset(topic, default_offset)
577
626
  @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
627
+ @cluster.mark_as_stale!
578
628
  end
579
629
 
580
630
  def cluster_topics
@@ -1,24 +1,29 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "set"
4
+ require "kafka/consumer_group/assignor"
4
5
  require "kafka/round_robin_assignment_strategy"
5
6
 
6
7
  module Kafka
7
8
  class ConsumerGroup
8
9
  attr_reader :assigned_partitions, :generation_id, :group_id
9
10
 
10
- def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
11
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
11
12
  @cluster = cluster
12
13
  @logger = TaggedLogger.new(logger)
13
14
  @group_id = group_id
14
15
  @session_timeout = session_timeout
16
+ @rebalance_timeout = rebalance_timeout
15
17
  @instrumenter = instrumenter
16
18
  @member_id = ""
17
19
  @generation_id = nil
18
20
  @members = {}
19
21
  @topics = Set.new
20
22
  @assigned_partitions = {}
21
- @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
23
+ @assignor = Assignor.new(
24
+ cluster: cluster,
25
+ strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
26
+ )
22
27
  @retention_time = retention_time
23
28
  end
24
29
 
@@ -112,9 +117,12 @@ module Kafka
112
117
 
113
118
  Protocol.handle_error(response.error_code)
114
119
  end
115
- rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
120
+ rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
116
121
  @logger.error "Error sending heartbeat: #{e}"
117
122
  raise HeartbeatError, e
123
+ rescue RebalanceInProgress => e
124
+ @logger.warn "Error sending heartbeat: #{e}"
125
+ raise HeartbeatError, e
118
126
  rescue NotCoordinatorForGroup
119
127
  @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
120
128
  sleep 1
@@ -140,7 +148,11 @@ module Kafka
140
148
  response = coordinator.join_group(
141
149
  group_id: @group_id,
142
150
  session_timeout: @session_timeout,
151
+ rebalance_timeout: @rebalance_timeout,
143
152
  member_id: @member_id,
153
+ topics: @topics,
154
+ protocol_name: @assignor.protocol_name,
155
+ user_data: @assignor.user_data,
144
156
  )
145
157
 
146
158
  Protocol.handle_error(response.error_code)
@@ -158,6 +170,12 @@ module Kafka
158
170
  @member_id = ""
159
171
  sleep 1
160
172
 
173
+ retry
174
+ rescue CoordinatorLoadInProgress
175
+ @logger.error "Coordinator broker still loading, retrying in 1s..."
176
+
177
+ sleep 1
178
+
161
179
  retry
162
180
  end
163
181
 
@@ -171,8 +189,8 @@ module Kafka
171
189
  if group_leader?
172
190
  @logger.info "Chosen as leader of group `#{@group_id}`"
173
191
 
174
- group_assignment = @assignment_strategy.assign(
175
- members: @members.keys,
192
+ group_assignment = @assignor.assign(
193
+ members: @members,
176
194
  topics: @topics,
177
195
  )
178
196
  end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/member_assignment"
4
+
5
+ module Kafka
6
+ class ConsumerGroup
7
+
8
+ # A consumer group partition assignor
9
+ class Assignor
10
+ Partition = Struct.new(:topic, :partition_id)
11
+
12
+ # @param cluster [Kafka::Cluster]
13
+ # @param strategy [Object] an object that implements #protocol_type,
14
+ # #user_data, and #assign.
15
+ def initialize(cluster:, strategy:)
16
+ @cluster = cluster
17
+ @strategy = strategy
18
+ end
19
+
20
+ def protocol_name
21
+ @strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
22
+ end
23
+
24
+ def user_data
25
+ @strategy.user_data if @strategy.respond_to?(:user_data)
26
+ end
27
+
28
+ # Assign the topic partitions to the group members.
29
+ #
30
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
31
+ # mapping member ids to metadata.
32
+ # @param topics [Array<String>] topics
33
+ # @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
34
+ # ids to assignments.
35
+ def assign(members:, topics:)
36
+ topic_partitions = topics.flat_map do |topic|
37
+ begin
38
+ partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
39
+ rescue UnknownTopicOrPartition
40
+ raise UnknownTopicOrPartition, "unknown topic #{topic}"
41
+ end
42
+ partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
43
+ end
44
+
45
+ group_assignment = {}
46
+
47
+ members.each_key do |member_id|
48
+ group_assignment[member_id] = Protocol::MemberAssignment.new
49
+ end
50
+ @strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
51
+ Array(partitions).each do |partition|
52
+ group_assignment[member_id].assign(partition.topic, [partition.partition_id])
53
+ end
54
+ end
55
+
56
+ group_assignment
57
+ rescue Kafka::LeaderNotAvailable
58
+ sleep 1
59
+ retry
60
+ end
61
+ end
62
+ end
63
+ end
@@ -31,7 +31,7 @@ module Kafka
31
31
 
32
32
  class << self
33
33
  def statsd
34
- @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
35
35
  end
36
36
 
37
37
  def statsd=(statsd)
@@ -40,7 +40,7 @@ module Kafka
40
40
  end
41
41
 
42
42
  def host
43
- @host ||= default_host
43
+ @host
44
44
  end
45
45
 
46
46
  def host=(host)
@@ -49,7 +49,7 @@ module Kafka
49
49
  end
50
50
 
51
51
  def port
52
- @port ||= default_port
52
+ @port
53
53
  end
54
54
 
55
55
  def port=(port)
@@ -57,6 +57,15 @@ module Kafka
57
57
  clear
58
58
  end
59
59
 
60
+ def socket_path
61
+ @socket_path
62
+ end
63
+
64
+ def socket_path=(socket_path)
65
+ @socket_path = socket_path
66
+ clear
67
+ end
68
+
60
69
  def namespace
61
70
  @namespace ||= STATSD_NAMESPACE
62
71
  end
@@ -77,14 +86,6 @@ module Kafka
77
86
 
78
87
  private
79
88
 
80
- def default_host
81
- ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
82
- end
83
-
84
- def default_port
85
- ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
86
- end
87
-
88
89
  def clear
89
90
  @statsd && @statsd.close
90
91
  @statsd = nil
@@ -95,8 +96,8 @@ module Kafka
95
96
  private
96
97
 
97
98
  %w[increment histogram count timing gauge].each do |type|
98
- define_method(type) do |*args|
99
- emit(type, *args)
99
+ define_method(type) do |*args, **kwargs|
100
+ emit(type, *args, **kwargs)
100
101
  end
101
102
  end
102
103
 
@@ -168,6 +169,8 @@ module Kafka
168
169
  def process_batch(event)
169
170
  offset = event.payload.fetch(:last_offset)
170
171
  messages = event.payload.fetch(:message_count)
172
+ create_time = event.payload.fetch(:last_create_time)
173
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
171
174
 
172
175
  tags = {
173
176
  client: event.payload.fetch(:client_id),
@@ -184,6 +187,10 @@ module Kafka
184
187
  end
185
188
 
186
189
  gauge("consumer.offset", offset, tags: tags)
190
+
191
+ if time_lag
192
+ gauge("consumer.time_lag", time_lag, tags: tags)
193
+ end
187
194
  end
188
195
 
189
196
  def fetch_batch(event)