ruby-kafka 1.0.0 → 1.1.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ebe10cb2c93b4387a14d069c4291751808abed924cbee6f4ff2f00770aaa1c2
4
- data.tar.gz: 1e392b4f2b5137cd83bd191b5c629fd98b41697d389fb5c68c3100d03c3ff218
3
+ metadata.gz: 1e5516c0741a1532bd320d8202e4cd23d18af5ddaf9581c6a39c69230b4c59e1
4
+ data.tar.gz: 66b47c941718a9a3a6e9f0523ec60dc6dd8b96ef2c82fda3571f7d150464cfeb
5
5
  SHA512:
6
- metadata.gz: 037a8fe1a7495f5e3d723e047392f68a1373e91f8a00337beb7ae6e053d031d6ab1b8597767a1b8b43399850e23493295180a24250e0c2f09b4f626316cd6915
7
- data.tar.gz: 108d7160593e452f27c1e828438f4cd8c2c6c009f081a0ff949ca330f9487b948aad73cf23ba39e12adb0ed9f773c667cfeebaad1625b33770057b7b69b3dcb2
6
+ metadata.gz: cb040bc7a76c13f131b59e2a15d9274542a4768595de221198b4e574aeb36ff5095ac14b0737b6a988cd4be7863f4bd44cccc7d2ad48ae2ee3f72f81256b08ca
7
+ data.tar.gz: 012474c8b6390eb2348906f320caa3759c12d8e9205713cd173f98a1463a432caa8c1c5cd700a2bea059e6601c5482b158d72af1fb199d3f9751f3cb1a3b5451
@@ -4,6 +4,14 @@ Changes and additions to the library will be listed here.
4
4
 
5
5
  ## Unreleased
6
6
 
7
+ ## 1.1.0
8
+
9
+ - Instrument `create_time` from last message in batch (#811).
10
+ - Add client function for fetching topic replica count (#822).
11
+ - Allow consumers to refresh the topic lists (#818).
12
+ - Disconnect after leaving a group (#817).
13
+ - Use `max_wait_time` as the sleep instead of hardcoded 2 seconds (#825).
14
+
7
15
  ## 1.0.0
8
16
 
9
17
  - Add client methods to manage configs (#759)
@@ -336,6 +336,9 @@ module Kafka
336
336
  # @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
337
337
  # are stored for further processing. Note, that each item in the queue represents a
338
338
  # response from a single broker.
339
+ # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
340
+ # If it is 0, the topic list won't be refreshed (default)
341
+ # If it is n (n > 0), the topic list will be refreshed every n seconds
339
342
  # @return [Consumer]
340
343
  def consumer(
341
344
  group_id:,
@@ -345,7 +348,8 @@ module Kafka
345
348
  offset_commit_threshold: 0,
346
349
  heartbeat_interval: 10,
347
350
  offset_retention_time: nil,
348
- fetcher_max_queue_size: 100
351
+ fetcher_max_queue_size: 100,
352
+ refresh_topic_interval: 0
349
353
  )
350
354
  cluster = initialize_cluster
351
355
 
@@ -399,6 +403,7 @@ module Kafka
399
403
  fetcher: fetcher,
400
404
  session_timeout: session_timeout,
401
405
  heartbeat: heartbeat,
406
+ refresh_topic_interval: refresh_topic_interval
402
407
  )
403
408
  end
404
409
 
@@ -694,6 +699,14 @@ module Kafka
694
699
  @cluster.partitions_for(topic).count
695
700
  end
696
701
 
702
+ # Counts the number of replicas for a topic's partition
703
+ #
704
+ # @param topic [String]
705
+ # @return [Integer] the number of replica nodes for the topic's partition
706
+ def replica_count_for(topic)
707
+ @cluster.partitions_for(topic).first.replicas.count
708
+ end
709
+
697
710
  # Retrieve the offset of the last message in a partition. If there are no
698
711
  # messages in the partition -1 is returned.
699
712
  #
@@ -44,7 +44,7 @@ module Kafka
44
44
  #
45
45
  class Consumer
46
46
 
47
- def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:)
47
+ def initialize(cluster:, logger:, instrumenter:, group:, fetcher:, offset_manager:, session_timeout:, heartbeat:, refresh_topic_interval: 0)
48
48
  @cluster = cluster
49
49
  @logger = TaggedLogger.new(logger)
50
50
  @instrumenter = instrumenter
@@ -53,6 +53,7 @@ module Kafka
53
53
  @session_timeout = session_timeout
54
54
  @fetcher = fetcher
55
55
  @heartbeat = heartbeat
56
+ @refresh_topic_interval = refresh_topic_interval
56
57
 
57
58
  @pauses = Hash.new {|h, k|
58
59
  h[k] = Hash.new {|h2, k2|
@@ -73,6 +74,15 @@ module Kafka
73
74
  # when user commits message other than last in a batch, this would make ruby-kafka refetch
74
75
  # some already consumed messages
75
76
  @current_offsets = Hash.new { |h, k| h[k] = {} }
77
+
78
+ # Map storing subscribed topics with their configuration
79
+ @subscribed_topics = Concurrent::Map.new
80
+
81
+ # Set storing topics that matched topics in @subscribed_topics
82
+ @matched_topics = Set.new
83
+
84
+ # Whether join_group must be executed again because new topics are added
85
+ @join_group_for_new_topics = false
76
86
  end
77
87
 
78
88
  # Subscribes the consumer to a topic.
@@ -97,13 +107,12 @@ module Kafka
97
107
  def subscribe(topic_or_regex, default_offset: nil, start_from_beginning: true, max_bytes_per_partition: 1048576)
98
108
  default_offset ||= start_from_beginning ? :earliest : :latest
99
109
 
100
- if topic_or_regex.is_a?(Regexp)
101
- cluster_topics.select { |topic| topic =~ topic_or_regex }.each do |topic|
102
- subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
103
- end
104
- else
105
- subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
106
- end
110
+ @subscribed_topics[topic_or_regex] = {
111
+ default_offset: default_offset,
112
+ start_from_beginning: start_from_beginning,
113
+ max_bytes_per_partition: max_bytes_per_partition
114
+ }
115
+ scan_for_subscribing
107
116
 
108
117
  nil
109
118
  end
@@ -116,7 +125,6 @@ module Kafka
116
125
  def stop
117
126
  @running = false
118
127
  @fetcher.stop
119
- @cluster.disconnect
120
128
  end
121
129
 
122
130
  # Pause processing of a specific topic partition.
@@ -308,6 +316,7 @@ module Kafka
308
316
  topic: batch.topic,
309
317
  partition: batch.partition,
310
318
  last_offset: batch.last_offset,
319
+ last_create_time: batch.messages.last.try(:create_time),
311
320
  offset_lag: batch.offset_lag,
312
321
  highwater_mark_offset: batch.highwater_mark_offset,
313
322
  message_count: batch.messages.count,
@@ -401,6 +410,7 @@ module Kafka
401
410
  while running?
402
411
  begin
403
412
  @instrumenter.instrument("loop.consumer") do
413
+ refresh_topic_list_if_enabled
404
414
  yield
405
415
  end
406
416
  rescue HeartbeatError
@@ -432,6 +442,7 @@ module Kafka
432
442
  # important that members explicitly tell Kafka when they're leaving.
433
443
  make_final_offsets_commit!
434
444
  @group.leave rescue nil
445
+ @cluster.disconnect
435
446
  @running = false
436
447
  @logger.pop_tags
437
448
  end
@@ -452,6 +463,8 @@ module Kafka
452
463
  end
453
464
 
454
465
  def join_group
466
+ @join_group_for_new_topics = false
467
+
455
468
  old_generation_id = @group.generation_id
456
469
 
457
470
  @group.join
@@ -513,11 +526,19 @@ module Kafka
513
526
  end
514
527
  end
515
528
 
529
+ def refresh_topic_list_if_enabled
530
+ return if @refresh_topic_interval <= 0
531
+ return if @refreshed_at && @refreshed_at + @refresh_topic_interval > Time.now
532
+
533
+ scan_for_subscribing
534
+ @refreshed_at = Time.now
535
+ end
536
+
516
537
  def fetch_batches
517
538
  # Return early if the consumer has been stopped.
518
539
  return [] if shutting_down?
519
540
 
520
- join_group unless @group.member?
541
+ join_group if !@group.member? || @join_group_for_new_topics
521
542
 
522
543
  trigger_heartbeat
523
544
 
@@ -525,7 +546,7 @@ module Kafka
525
546
 
526
547
  if !@fetcher.data?
527
548
  @logger.debug "No batches to process"
528
- sleep 2
549
+ sleep(@fetcher.max_wait_time || 2)
529
550
  []
530
551
  else
531
552
  tag, message = @fetcher.poll
@@ -571,10 +592,34 @@ module Kafka
571
592
  end
572
593
  end
573
594
 
595
+ def scan_for_subscribing
596
+ @subscribed_topics.each do |topic_or_regex, config|
597
+ default_offset = config.fetch(:default_offset)
598
+ start_from_beginning = config.fetch(:start_from_beginning)
599
+ max_bytes_per_partition = config.fetch(:max_bytes_per_partition)
600
+ if topic_or_regex.is_a?(Regexp)
601
+ subscribe_to_regex(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
602
+ else
603
+ subscribe_to_topic(topic_or_regex, default_offset, start_from_beginning, max_bytes_per_partition)
604
+ end
605
+ end
606
+ end
607
+
608
+ def subscribe_to_regex(topic_regex, default_offset, start_from_beginning, max_bytes_per_partition)
609
+ cluster_topics.select { |topic| topic =~ topic_regex }.each do |topic|
610
+ subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
611
+ end
612
+ end
613
+
574
614
  def subscribe_to_topic(topic, default_offset, start_from_beginning, max_bytes_per_partition)
615
+ return if @matched_topics.include?(topic)
616
+ @matched_topics.add(topic)
617
+ @join_group_for_new_topics = true
618
+
575
619
  @group.subscribe(topic)
576
620
  @offset_manager.set_default_offset(topic, default_offset)
577
621
  @fetcher.subscribe(topic, max_bytes_per_partition: max_bytes_per_partition)
622
+ @cluster.mark_as_stale!
578
623
  end
579
624
 
580
625
  def cluster_topics
@@ -160,6 +160,8 @@ module Kafka
160
160
  def process_batch(event)
161
161
  offset = event.payload.fetch(:last_offset)
162
162
  messages = event.payload.fetch(:message_count)
163
+ create_time = event.payload.fetch(:last_create_time)
164
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
163
165
 
164
166
  tags = {
165
167
  client: event.payload.fetch(:client_id),
@@ -176,6 +178,10 @@ module Kafka
176
178
  end
177
179
 
178
180
  gauge("consumer.offset", offset, tags: tags)
181
+
182
+ if time_lag
183
+ gauge("consumer.time_lag", time_lag, tags: tags)
184
+ end
179
185
  end
180
186
 
181
187
  def fetch_batch(event)
@@ -4,7 +4,7 @@ require "kafka/fetch_operation"
4
4
 
5
5
  module Kafka
6
6
  class Fetcher
7
- attr_reader :queue
7
+ attr_reader :queue, :max_wait_time
8
8
 
9
9
  def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
10
10
  @cluster = cluster
@@ -34,7 +34,7 @@ module Kafka
34
34
  #
35
35
  class MetadataResponse
36
36
  class PartitionMetadata
37
- attr_reader :partition_id, :leader
37
+ attr_reader :partition_id, :leader, :replicas
38
38
 
39
39
  attr_reader :partition_error_code
40
40
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Kafka
4
- VERSION = "1.0.0"
4
+ VERSION = "1.1.0.beta1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Schierbeck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-25 00:00:00.000000000 Z
11
+ date: 2020-05-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: digest-crc
@@ -489,9 +489,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
489
489
  version: 2.1.0
490
490
  required_rubygems_version: !ruby/object:Gem::Requirement
491
491
  requirements:
492
- - - ">="
492
+ - - ">"
493
493
  - !ruby/object:Gem::Version
494
- version: '0'
494
+ version: 1.3.1
495
495
  requirements: []
496
496
  rubyforge_project:
497
497
  rubygems_version: 2.7.6