ruby-kafka 0.7.9 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -45,6 +45,10 @@ module Kafka
45
45
  new_topics = topics - @target_topics
46
46
 
47
47
  unless new_topics.empty?
48
+ if new_topics.any? { |topic| topic.nil? or topic.empty? }
49
+ raise ArgumentError, "Topic must not be nil or empty"
50
+ end
51
+
48
52
  @logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
49
53
 
50
54
  @target_topics.merge(new_topics)
@@ -139,6 +143,40 @@ module Kafka
139
143
  end
140
144
  end
141
145
 
146
+ def describe_configs(broker_id, configs = [])
147
+ options = {
148
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
149
+ }
150
+
151
+ info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
152
+ broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
153
+
154
+ response = broker.describe_configs(**options)
155
+
156
+ response.resources.each do |resource|
157
+ Protocol.handle_error(resource.error_code, resource.error_message)
158
+ end
159
+
160
+ response.resources.first.configs
161
+ end
162
+
163
+ def alter_configs(broker_id, configs = [])
164
+ options = {
165
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
166
+ }
167
+
168
+ info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
169
+ broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
170
+
171
+ response = broker.alter_configs(**options)
172
+
173
+ response.resources.each do |resource|
174
+ Protocol.handle_error(resource.error_code, resource.error_message)
175
+ end
176
+
177
+ nil
178
+ end
179
+
142
180
  def partitions_for(topic)
143
181
  add_target_topics([topic])
144
182
  refresh_metadata_if_necessary!
@@ -252,6 +290,20 @@ module Kafka
252
290
  group
253
291
  end
254
292
 
293
+ def fetch_group_offsets(group_id)
294
+ topics = get_group_coordinator(group_id: group_id)
295
+ .fetch_offsets(group_id: group_id, topics: nil)
296
+ .topics
297
+
298
+ topics.each do |_, partitions|
299
+ partitions.each do |_, response|
300
+ Protocol.handle_error(response.error_code)
301
+ end
302
+ end
303
+
304
+ topics
305
+ end
306
+
255
307
  def create_partitions_for(name, num_partitions:, timeout:)
256
308
  options = {
257
309
  topics: [[name, num_partitions, nil]],
@@ -58,6 +58,9 @@ module Kafka
58
58
  @connect_timeout = connect_timeout || CONNECT_TIMEOUT
59
59
  @socket_timeout = socket_timeout || SOCKET_TIMEOUT
60
60
  @ssl_context = ssl_context
61
+
62
+ @socket = nil
63
+ @last_request = nil
61
64
  end
62
65
 
63
66
  def to_s
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "kafka/consumer_group"
4
+ require "kafka/interceptors"
4
5
  require "kafka/offset_manager"
5
6
  require "kafka/fetcher"
6
7
  require "kafka/pause"
@@ -44,7 +45,8 @@ module Kafka
44
45
  #
45
46
  class Consumer
46
47
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
48
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:,
49
+ session_timeout:, heartbeat:, refresh_topic_interval: 0, interceptors: [])
48
50
  @cluster = cluster
49
51
  @logger = TaggedLogger.new(logger)
50
52
  @instrumenter = instrumenter
@@ -53,6 +55,8 @@ module Kafka
53
55
  @session_timeout = session_timeout
54
56
  @fetcher = fetcher
55
57
  @heartbeat = heartbeat
58
+ @refresh_topic_interval = refresh_topic_interval
59
+ @interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
56
60
 
57
61
  @pauses = Hash.new {|h, k|
58
62
  h[k] = Hash.new {|h2, k2|
@@ -73,6 +77,15 @@ module Kafka
73
77
  # when user commits message other than last in a batch, this would make ruby-kafka refetch
74
78
  # some already consumed messages
75
79
  @current_offsets = Hash.new { |h, k| h[k] = {} }
80
+
81
+ # Map storing subscribed topics with their configuration
82
+ @subscribed_topics = Hash.new
83
+
84
+ # Set storing topics that matched topics in @subscribed_topics
85
+ @matched_topics = Set.new
86
+
87
+ # Whether join_group must be executed again because new topics are added
88
+ @join_group_for_new_topics = false
76
89
  end
77
90
 
78
91
  # Subscribes the consumer to a topic.
@@ -97,13 +110,12 @@ module Kafka
97
110
  def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
98
111
  default_offset ||= start_from_beginning ? :earliest : :latest
99
112
 
100
- if topic_or_regex.is_a?(Regexp)
101
- cluster_topics.select { |topic| topic =~ topic_or_regex }.each do |topic|
102
- subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
103
- end
104
- else
105
- subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
106
- end
113
+ @subscribed_topics[topic_or_regex] = {
114
+ default_offset: default_offset,
115
+ start_from_beginning: start_from_beginning,
116
+ max_bytes_per_partition: max_bytes_per_partition
117
+ }
118
+ scan_for_subscribing
107
119
 
108
120
  nil
109
121
  end
@@ -116,7 +128,6 @@ module Kafka
116
128
  def stop
117
129
  @running = false
118
130
  @fetcher.stop
119
- @cluster.disconnect
120
131
  end
121
132
 
122
133
  # Pause processing of a specific topic partition.
@@ -212,6 +223,7 @@ module Kafka
212
223
  batches = fetch_batches
213
224
 
214
225
  batches.each do |batch|
226
+ batch = @interceptors.call(batch)
215
227
  batch.messages.each do |message|
216
228
  notification = {
217
229
  topic: message.topic,
@@ -303,11 +315,13 @@ module Kafka
303
315
  unless batch.empty?
304
316
  raw_messages = batch.messages
305
317
  batch.messages = raw_messages.reject(&:is_control_record)
318
+ batch = @interceptors.call(batch)
306
319
 
307
320
  notification = {
308
321
  topic: batch.topic,
309
322
  partition: batch.partition,
310
323
  last_offset: batch.last_offset,
324
+ last_create_time: batch.messages.last && batch.messages.last.create_time,
311
325
  offset_lag: batch.offset_lag,
312
326
  highwater_mark_offset: batch.highwater_mark_offset,
313
327
  message_count: batch.messages.count,
@@ -401,6 +415,7 @@ module Kafka
401
415
  while running?
402
416
  begin
403
417
  @instrumenter.instrument("loop.consumer") do
418
+ refresh_topic_list_if_enabled
404
419
  yield
405
420
  end
406
421
  rescue HeartbeatError
@@ -432,6 +447,7 @@ module Kafka
432
447
  # important that members explicitly tell Kafka when they're leaving.
433
448
  make_final_offsets_commit!
434
449
  @group.leave rescue nil
450
+ @cluster.disconnect
435
451
  @running = false
436
452
  @logger.pop_tags
437
453
  end
@@ -452,6 +468,8 @@ module Kafka
452
468
  end
453
469
 
454
470
  def join_group
471
+ @join_group_for_new_topics = false
472
+
455
473
  old_generation_id = @group.generation_id
456
474
 
457
475
  @group.join
@@ -513,11 +531,19 @@ module Kafka
513
531
  end
514
532
  end
515
533
 
534
+ def refresh_topic_list_if_enabled
535
+ return if @refresh_topic_interval <= 0
536
+ return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
537
+
538
+ scan_for_subscribing
539
+ @refreshed_at = Time.now
540
+ end
541
+
516
542
  def fetch_batches
517
543
  # Return early if the consumer has been stopped.
518
544
  return [] if shutting_down?
519
545
 
520
- join_group unless @group.member?
546
+ join_group if !@group.member? || @join_group_for_new_topics
521
547
 
522
548
  trigger_heartbeat
523
549
 
@@ -525,7 +551,7 @@ module Kafka
525
551
 
526
552
  if !@fetcher.data?
527
553
  @logger.debug "No batches to process"
528
- sleep 2
554
+ sleep(@fetcher.max_wait_time || 2)
529
555
  []
530
556
  else
531
557
  tag, message = @fetcher.poll
@@ -571,10 +597,34 @@ module Kafka
571
597
  end
572
598
  end
573
599
 
600
+ def scan_for_subscribing
601
+ @subscribed_topics.each do |topic_or_regex, config|
602
+ default_offset = config.fetch(:default_offset)
603
+ start_from_beginning = config.fetch(:start_from_beginning)
604
+ max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
605
+ if topic_or_regex.is_a?(Regexp)
606
+ subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
607
+ else
608
+ subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
609
+ end
610
+ end
611
+ end
612
+
613
+ def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
614
+ cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
615
+ subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
616
+ end
617
+ end
618
+
574
619
  def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
620
+ return if @matched_topics.include?(topic)
621
+ @matched_topics.add(topic)
622
+ @join_group_for_new_topics = true
623
+
575
624
  @group.subscribe(topic)
576
625
  @offset_manager.set_default_offset(topic, default_offset)
577
626
  @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
627
+ @cluster.mark_as_stale!
578
628
  end
579
629
 
580
630
  def cluster_topics
@@ -7,11 +7,12 @@ module Kafka
7
7
  class ConsumerGroup
8
8
  attr_reader :assigned_partitions, :generation_id, :group_id
9
9
 
10
- def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
10
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:)
11
11
  @cluster = cluster
12
12
  @logger = TaggedLogger.new(logger)
13
13
  @group_id = group_id
14
14
  @session_timeout = session_timeout
15
+ @rebalance_timeout = rebalance_timeout
15
16
  @instrumenter = instrumenter
16
17
  @member_id = ""
17
18
  @generation_id = nil
@@ -140,7 +141,9 @@ module Kafka
140
141
  response = coordinator.join_group(
141
142
  group_id: @group_id,
142
143
  session_timeout: @session_timeout,
144
+ rebalance_timeout: @rebalance_timeout,
143
145
  member_id: @member_id,
146
+ topics: @topics,
144
147
  )
145
148
 
146
149
  Protocol.handle_error(response.error_code)
@@ -158,6 +161,12 @@ module Kafka
158
161
  @member_id = ""
159
162
  sleep 1
160
163
 
164
+ retry
165
+ rescue CoordinatorLoadInProgress
166
+ @logger.error "Coordinator broker still loading, retrying in 1s..."
167
+
168
+ sleep 1
169
+
161
170
  retry
162
171
  end
163
172
 
@@ -31,7 +31,7 @@ module Kafka
31
31
 
32
32
  class << self
33
33
  def statsd
34
- @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
35
35
  end
36
36
 
37
37
  def statsd=(statsd)
@@ -40,7 +40,7 @@ module Kafka
40
40
  end
41
41
 
42
42
  def host
43
- @host ||= default_host
43
+ @host
44
44
  end
45
45
 
46
46
  def host=(host)
@@ -49,7 +49,7 @@ module Kafka
49
49
  end
50
50
 
51
51
  def port
52
- @port ||= default_port
52
+ @port
53
53
  end
54
54
 
55
55
  def port=(port)
@@ -57,6 +57,15 @@ module Kafka
57
57
  clear
58
58
  end
59
59
 
60
+ def socket_path
61
+ @socket_path
62
+ end
63
+
64
+ def socket_path=(socket_path)
65
+ @socket_path = socket_path
66
+ clear
67
+ end
68
+
60
69
  def namespace
61
70
  @namespace ||= STATSD_NAMESPACE
62
71
  end
@@ -77,14 +86,6 @@ module Kafka
77
86
 
78
87
  private
79
88
 
80
- def default_host
81
- ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
82
- end
83
-
84
- def default_port
85
- ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
86
- end
87
-
88
89
  def clear
89
90
  @statsd && @statsd.close
90
91
  @statsd = nil
@@ -95,8 +96,8 @@ module Kafka
95
96
  private
96
97
 
97
98
  %w[increment histogram count timing gauge].each do |type|
98
- define_method(type) do |*args|
99
- emit(type, *args)
99
+ define_method(type) do |*args, **kwargs|
100
+ emit(type, *args, **kwargs)
100
101
  end
101
102
  end
102
103
 
@@ -168,6 +169,8 @@ module Kafka
168
169
  def process_batch(event)
169
170
  offset = event.payload.fetch(:last_offset)
170
171
  messages = event.payload.fetch(:message_count)
172
+ create_time = event.payload.fetch(:last_create_time)
173
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
171
174
 
172
175
  tags = {
173
176
  client: event.payload.fetch(:client_id),
@@ -184,6 +187,10 @@ module Kafka
184
187
  end
185
188
 
186
189
  gauge("consumer.offset", offset, tags: tags)
190
+
191
+ if time_lag
192
+ gauge("consumer.time_lag", time_lag, tags: tags)
193
+ end
187
194
  end
188
195
 
189
196
  def fetch_batch(event)
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
4
4
 
5
5
  module Kafka
6
6
  class Fetcher
7
- attr_reader :queue
7
+ attr_reader :queue, :max_wait_time
8
8
 
9
9
  def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
10
10
  @cluster = cluster
@@ -17,6 +17,9 @@ module Kafka
17
17
  @commands = Queue.new
18
18
  @next_offsets = Hash.new { |h, k| h[k] = {} }
19
19
 
20
+ # We are only running when someone calls start.
21
+ @running = false
22
+
20
23
  # Long poll until at least this many bytes can be fetched.
21
24
  @min_bytes = 1
22
25
 
@@ -110,7 +113,7 @@ module Kafka
110
113
  elsif @queue.size < @max_queue_size
111
114
  step
112
115
  else
113
- @logger.warn "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
116
+ @logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
114
117
  sleep 1
115
118
  end
116
119
  ensure
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Holds a list of interceptors that implement `call`
5
+ # and wraps calls to a chain of custom interceptors.
6
+ class Interceptors
7
+ def initialize(interceptors:, logger:)
8
+ @interceptors = interceptors || []
9
+ @logger = TaggedLogger.new(logger)
10
+ end
11
+
12
+ # This method is called when the client produces a message or once the batches are fetched.
13
+ # The message returned from the first call is passed to the second interceptor call, and so on in an
14
+ # interceptor chain. This method does not throw exceptions.
15
+ #
16
+ # @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
17
+ # fetched batch.
18
+ #
19
+ # @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
20
+ # returned by the last interceptor.
21
+ def call(intercepted)
22
+ @interceptors.each do |interceptor|
23
+ begin
24
+ intercepted = interceptor.call(intercepted)
25
+ rescue Exception => e
26
+ @logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
27
+ end
28
+ end
29
+
30
+ intercepted
31
+ end
32
+ end
33
+ end
@@ -50,9 +50,20 @@ module Kafka
50
50
  # @param offset [Integer] the offset of the message that should be marked as processed.
51
51
  # @return [nil]
52
52
  def mark_as_processed(topic, partition, offset)
53
- @uncommitted_offsets += 1
53
+ unless @group.assigned_to?(topic, partition)
54
+ @logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
55
+ return
56
+ end
54
57
  @processed_offsets[topic] ||= {}
55
58
 
59
+ last_processed_offset = @processed_offsets[topic][partition] || -1
60
+ if last_processed_offset > offset + 1
61
+ @logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
62
+ return
63
+ end
64
+
65
+ @uncommitted_offsets += 1
66
+
56
67
  # The committed offset should always be the offset of the next message that the
57
68
  # application will read, thus adding one to the last message processed.
58
69
  @processed_offsets[topic][partition] = offset + 1