ruby-kafka-temp-fork 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +310 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1342 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka.rb +373 -0
  28. data/lib/kafka/async_producer.rb +291 -0
  29. data/lib/kafka/broker.rb +217 -0
  30. data/lib/kafka/broker_info.rb +16 -0
  31. data/lib/kafka/broker_pool.rb +41 -0
  32. data/lib/kafka/broker_uri.rb +43 -0
  33. data/lib/kafka/client.rb +833 -0
  34. data/lib/kafka/cluster.rb +513 -0
  35. data/lib/kafka/compression.rb +45 -0
  36. data/lib/kafka/compressor.rb +86 -0
  37. data/lib/kafka/connection.rb +223 -0
  38. data/lib/kafka/connection_builder.rb +33 -0
  39. data/lib/kafka/consumer.rb +642 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/consumer_group/assignor.rb +63 -0
  42. data/lib/kafka/crc32_hash.rb +15 -0
  43. data/lib/kafka/datadog.rb +420 -0
  44. data/lib/kafka/digest.rb +22 -0
  45. data/lib/kafka/fetch_operation.rb +115 -0
  46. data/lib/kafka/fetched_batch.rb +58 -0
  47. data/lib/kafka/fetched_batch_generator.rb +120 -0
  48. data/lib/kafka/fetched_message.rb +48 -0
  49. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  50. data/lib/kafka/fetcher.rb +224 -0
  51. data/lib/kafka/gzip_codec.rb +34 -0
  52. data/lib/kafka/heartbeat.rb +25 -0
  53. data/lib/kafka/instrumenter.rb +38 -0
  54. data/lib/kafka/interceptors.rb +33 -0
  55. data/lib/kafka/lz4_codec.rb +27 -0
  56. data/lib/kafka/message_buffer.rb +87 -0
  57. data/lib/kafka/murmur2_hash.rb +17 -0
  58. data/lib/kafka/offset_manager.rb +259 -0
  59. data/lib/kafka/partitioner.rb +40 -0
  60. data/lib/kafka/pause.rb +92 -0
  61. data/lib/kafka/pending_message.rb +29 -0
  62. data/lib/kafka/pending_message_queue.rb +41 -0
  63. data/lib/kafka/produce_operation.rb +205 -0
  64. data/lib/kafka/producer.rb +528 -0
  65. data/lib/kafka/prometheus.rb +316 -0
  66. data/lib/kafka/protocol.rb +225 -0
  67. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  68. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  69. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  70. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  71. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  72. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  73. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  74. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  75. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  76. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  77. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  78. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  79. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  80. data/lib/kafka/protocol/decoder.rb +175 -0
  81. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  82. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  83. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  84. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  85. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  86. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  87. data/lib/kafka/protocol/encoder.rb +184 -0
  88. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  89. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  90. data/lib/kafka/protocol/fetch_request.rb +70 -0
  91. data/lib/kafka/protocol/fetch_response.rb +136 -0
  92. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  93. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  94. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  95. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  96. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  97. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  98. data/lib/kafka/protocol/join_group_request.rb +47 -0
  99. data/lib/kafka/protocol/join_group_response.rb +41 -0
  100. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  101. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  102. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  103. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  104. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  105. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  106. data/lib/kafka/protocol/member_assignment.rb +42 -0
  107. data/lib/kafka/protocol/message.rb +172 -0
  108. data/lib/kafka/protocol/message_set.rb +55 -0
  109. data/lib/kafka/protocol/metadata_request.rb +31 -0
  110. data/lib/kafka/protocol/metadata_response.rb +185 -0
  111. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  112. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  113. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  114. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  115. data/lib/kafka/protocol/produce_request.rb +94 -0
  116. data/lib/kafka/protocol/produce_response.rb +63 -0
  117. data/lib/kafka/protocol/record.rb +88 -0
  118. data/lib/kafka/protocol/record_batch.rb +223 -0
  119. data/lib/kafka/protocol/request_message.rb +26 -0
  120. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  121. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  122. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  123. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  124. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  125. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  126. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +61 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/ruby-kafka-temp-fork.rb +5 -0
  143. data/ruby-kafka-temp-fork.gemspec +54 -0
  144. metadata +520 -0
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/crc32_hash"
4
+ require "kafka/murmur2_hash"
5
+
6
+ module Kafka
7
+ module Digest
8
+ FUNCTIONS_BY_NAME = {
9
+ :crc32 => Crc32Hash.new,
10
+ :murmur2 => Murmur2Hash.new
11
+ }.freeze
12
+
13
+ def self.find_digest(name)
14
+ digest = FUNCTIONS_BY_NAME.fetch(name) do
15
+ raise LoadError, "Unknown hash function #{name}"
16
+ end
17
+
18
+ digest.load
19
+ digest
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetched_offset_resolver"
4
+ require "kafka/fetched_batch_generator"
5
+
6
+ module Kafka
7
+
8
+ # Fetches messages from one or more partitions.
9
+ #
10
+ # operation = Kafka::FetchOperation.new(
11
+ # cluster: cluster,
12
+ # logger: logger,
13
+ # min_bytes: 1,
14
+ # max_wait_time: 10,
15
+ # )
16
+ #
17
+ # # These calls will schedule fetches from the specified topics/partitions.
18
+ # operation.fetch_from_partition("greetings", 42, offset: :latest, max_bytes: 100000)
19
+ # operation.fetch_from_partition("goodbyes", 13, offset: :latest, max_bytes: 100000)
20
+ #
21
+ # operation.execute
22
+ #
23
+ class FetchOperation
24
+ def initialize(cluster:, logger:, min_bytes: 1, max_bytes: 10485760, max_wait_time: 5)
25
+ @cluster = cluster
26
+ @logger = TaggedLogger.new(logger)
27
+ @min_bytes = min_bytes
28
+ @max_bytes = max_bytes
29
+ @max_wait_time = max_wait_time
30
+ @topics = {}
31
+
32
+ @offset_resolver = Kafka::FetchedOffsetResolver.new(
33
+ logger: logger
34
+ )
35
+ end
36
+
37
+ def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
38
+ if offset == :earliest
39
+ offset = -2
40
+ elsif offset == :latest
41
+ offset = -1
42
+ end
43
+
44
+ @topics[topic] ||= {}
45
+ @topics[topic][partition] = {
46
+ fetch_offset: offset,
47
+ max_bytes: max_bytes,
48
+ }
49
+ end
50
+
51
+ def execute
52
+ @cluster.add_target_topics(@topics.keys)
53
+ @cluster.refresh_metadata_if_necessary!
54
+
55
+ topics_by_broker = {}
56
+
57
+ if @topics.none? {|topic, partitions| partitions.any? }
58
+ raise NoPartitionsToFetchFrom
59
+ end
60
+
61
+ @topics.each do |topic, partitions|
62
+ partitions.each do |partition, options|
63
+ broker = @cluster.get_leader(topic, partition)
64
+
65
+ topics_by_broker[broker] ||= {}
66
+ topics_by_broker[broker][topic] ||= {}
67
+ topics_by_broker[broker][topic][partition] = options
68
+ end
69
+ end
70
+
71
+ topics_by_broker.flat_map do |broker, topics|
72
+ @offset_resolver.resolve!(broker, topics)
73
+
74
+ options = {
75
+ max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
76
+ min_bytes: @min_bytes,
77
+ max_bytes: @max_bytes,
78
+ topics: topics,
79
+ }
80
+
81
+ response = broker.fetch_messages(**options)
82
+
83
+ response.topics.flat_map do |fetched_topic|
84
+ fetched_topic.partitions.map do |fetched_partition|
85
+ begin
86
+ Protocol.handle_error(fetched_partition.error_code)
87
+ rescue Kafka::OffsetOutOfRange => e
88
+ e.topic = fetched_topic.name
89
+ e.partition = fetched_partition.partition
90
+ e.offset = topics.fetch(e.topic).fetch(e.partition).fetch(:fetch_offset)
91
+
92
+ raise e
93
+ rescue Kafka::Error => e
94
+ topic = fetched_topic.name
95
+ partition = fetched_partition.partition
96
+ @logger.error "Failed to fetch from #{topic}/#{partition}: #{e.message}"
97
+ raise e
98
+ end
99
+
100
+ Kafka::FetchedBatchGenerator.new(
101
+ fetched_topic.name,
102
+ fetched_partition,
103
+ topics.fetch(fetched_topic.name).fetch(fetched_partition.partition).fetch(:fetch_offset),
104
+ logger: @logger
105
+ ).generate
106
+ end
107
+ end
108
+ end
109
+ rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
110
+ @cluster.mark_as_stale!
111
+
112
+ raise
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ # An ordered sequence of messages fetched from a Kafka partition.
6
+ class FetchedBatch
7
+ # @return [String]
8
+ attr_reader :topic
9
+
10
+ # @return [Integer]
11
+ attr_reader :partition
12
+
13
+ # @return [Integer]
14
+ attr_reader :last_offset
15
+
16
+ # @return [Integer]
17
+ attr_reader :leader_epoch
18
+
19
+ # @return [Integer] the offset of the most recent message in the partition.
20
+ attr_reader :highwater_mark_offset
21
+
22
+ # @return [Array<Kafka::FetchedMessage>]
23
+ attr_accessor :messages
24
+
25
+ def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil, leader_epoch: nil)
26
+ @topic = topic
27
+ @partition = partition
28
+ @highwater_mark_offset = highwater_mark_offset
29
+ @messages = messages
30
+ @last_offset = last_offset
31
+ @leader_epoch = leader_epoch
32
+ end
33
+
34
+ def empty?
35
+ @messages.empty?
36
+ end
37
+
38
+ def unknown_last_offset?
39
+ @last_offset.nil?
40
+ end
41
+
42
+ def first_offset
43
+ if empty?
44
+ nil
45
+ else
46
+ messages.first.offset
47
+ end
48
+ end
49
+
50
+ def offset_lag
51
+ if empty?
52
+ 0
53
+ else
54
+ (highwater_mark_offset - 1) - last_offset
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetched_batch"
4
+
5
+ module Kafka
6
+ class FetchedBatchGenerator
7
+ COMMITTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x01".freeze
8
+ ABORTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x00".freeze
9
+
10
+ def initialize(topic, fetched_partition, offset, logger:)
11
+ @topic = topic
12
+ @fetched_partition = fetched_partition
13
+ @logger = TaggedLogger.new(logger)
14
+ @offset = offset
15
+ end
16
+
17
+ def generate
18
+ if @fetched_partition.messages.empty?
19
+ empty_fetched_batch
20
+ elsif @fetched_partition.messages.first.is_a?(Kafka::Protocol::MessageSet)
21
+ extract_messages
22
+ else
23
+ extract_records
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def empty_fetched_batch
30
+ FetchedBatch.new(
31
+ topic: @topic,
32
+ partition: @fetched_partition.partition,
33
+ last_offset: nil,
34
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
35
+ messages: []
36
+ )
37
+ end
38
+
39
+ def extract_messages
40
+ last_offset = nil
41
+ messages = @fetched_partition.messages.flat_map do |message_set|
42
+ message_set.messages.map do |message|
43
+ last_offset = message.offset if last_offset.nil? || last_offset < message.offset
44
+ if message.offset >= @offset
45
+ FetchedMessage.new(
46
+ message: message,
47
+ topic: @topic,
48
+ partition: @fetched_partition.partition
49
+ )
50
+ end
51
+ end.compact
52
+ end
53
+ FetchedBatch.new(
54
+ topic: @topic,
55
+ partition: @fetched_partition.partition,
56
+ last_offset: last_offset,
57
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
58
+ messages: messages
59
+ )
60
+ end
61
+
62
+ def extract_records
63
+ records = []
64
+ last_offset = nil
65
+ leader_epoch = nil
66
+ aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
67
+ aborted_producer_ids = {}
68
+
69
+ @fetched_partition.messages.each do |record_batch|
70
+ last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
71
+ leader_epoch = record_batch.partition_leader_epoch if leader_epoch.nil? || leader_epoch < record_batch.partition_leader_epoch
72
+ # Find the list of aborted producer IDs less than current offset
73
+ unless aborted_transactions.empty?
74
+ if aborted_transactions.first.first_offset <= record_batch.last_offset
75
+ aborted_transaction = aborted_transactions.shift
76
+ aborted_producer_ids[aborted_transaction.producer_id] = aborted_transaction.first_offset
77
+ end
78
+ end
79
+
80
+ if abort_marker?(record_batch)
81
+ # Abort marker, remove the producer from the aborted list
82
+ aborted_producer_ids.delete(record_batch.producer_id)
83
+ elsif aborted_producer_ids.key?(record_batch.producer_id) && record_batch.in_transaction
84
+ # Reject aborted record batch
85
+ @logger.info("Reject #{record_batch.records.size} aborted records of topic '#{@topic}', partition #{@fetched_partition.partition}, from offset #{record_batch.first_offset}")
86
+ next
87
+ end
88
+
89
+ record_batch.records.each do |record|
90
+ if !record.is_control_record && record.offset >= @offset
91
+ records << FetchedMessage.new(
92
+ message: record,
93
+ topic: @topic,
94
+ partition: @fetched_partition.partition
95
+ )
96
+ end
97
+ end
98
+ end
99
+
100
+ FetchedBatch.new(
101
+ topic: @topic,
102
+ partition: @fetched_partition.partition,
103
+ last_offset: last_offset,
104
+ leader_epoch: leader_epoch,
105
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
106
+ messages: records
107
+ )
108
+ end
109
+
110
+ def abort_marker?(record_batch)
111
+ return false unless record_batch.is_control_batch
112
+
113
+ if record_batch.records.empty?
114
+ raise "Invalid control record batch at topic '#{@topic}', partition #{@fetched_partition}"
115
+ end
116
+
117
+ record_batch.records.first.key == ABORTED_TRANSACTION_SIGNAL
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class FetchedMessage
5
+ # @return [String] the name of the topic that the message was written to.
6
+ attr_reader :topic
7
+
8
+ # @return [Integer] the partition number that the message was written to.
9
+ attr_reader :partition
10
+
11
+ def initialize(message:, topic:, partition:)
12
+ @message = message
13
+ @topic = topic
14
+ @partition = partition
15
+ end
16
+
17
+ # @return [String] the value of the message.
18
+ def value
19
+ @message.value
20
+ end
21
+
22
+ # @return [String] the key of the message.
23
+ def key
24
+ @message.key
25
+ end
26
+
27
+ # @return [Integer] the offset of the message in the partition.
28
+ def offset
29
+ @message.offset
30
+ end
31
+
32
+ # @return [Time] the timestamp of the message.
33
+ def create_time
34
+ @message.create_time
35
+ end
36
+
37
+ # @return [Hash<String, String>] the headers of the message.
38
+ def headers
39
+ @message.headers
40
+ end
41
+
42
+ # @return [Boolean] whether this record is a control record
43
+ def is_control_record
44
+ @message.is_control_record
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class FetchedOffsetResolver
5
+ def initialize(logger:)
6
+ @logger = TaggedLogger.new(logger)
7
+ end
8
+
9
+ def resolve!(broker, topics)
10
+ pending_topics = filter_pending_topics(topics)
11
+ return topics if pending_topics.empty?
12
+
13
+ response = broker.list_offsets(topics: pending_topics)
14
+
15
+ pending_topics.each do |topic, partitions|
16
+ partitions.each do |options|
17
+ partition = options.fetch(:partition)
18
+ resolved_offset = response.offset_for(topic, partition)
19
+
20
+ @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
21
+
22
+ topics[topic][partition][:fetch_offset] = resolved_offset || 0
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def filter_pending_topics(topics)
30
+ pending_topics = {}
31
+ topics.each do |topic, partitions|
32
+ partitions.each do |partition, options|
33
+ offset = options.fetch(:fetch_offset)
34
+ next if offset >= 0
35
+
36
+ @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
37
+
38
+ pending_topics[topic] ||= []
39
+ pending_topics[topic] << {
40
+ partition: partition,
41
+ time: offset
42
+ }
43
+ end
44
+ end
45
+ pending_topics
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetch_operation"
4
+
5
+ module Kafka
6
+ class Fetcher
7
+ attr_reader :queue, :max_wait_time
8
+
9
+ def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
10
+ @cluster = cluster
11
+ @logger = TaggedLogger.new(logger)
12
+ @instrumenter = instrumenter
13
+ @max_queue_size = max_queue_size
14
+ @group = group
15
+
16
+ @queue = Queue.new
17
+ @commands = Queue.new
18
+ @next_offsets = Hash.new { |h, k| h[k] = {} }
19
+
20
+ # We are only running when someone calls start.
21
+ @running = false
22
+
23
+ # Long poll until at least this many bytes can be fetched.
24
+ @min_bytes = 1
25
+
26
+ # Long poll at most this number of seconds.
27
+ @max_wait_time = 1
28
+
29
+ # The maximum number of bytes to fetch for any given fetch request.
30
+ @max_bytes = 10485760
31
+
32
+ # The maximum number of bytes to fetch per partition, by topic.
33
+ @max_bytes_per_partition = {}
34
+
35
+ # An incrementing counter used to synchronize resets between the
36
+ # foreground and background thread.
37
+ @current_reset_counter = 0
38
+ end
39
+
40
+ def subscribe(topic, max_bytes_per_partition:)
41
+ @commands << [:subscribe, [topic, max_bytes_per_partition]]
42
+ end
43
+
44
+ def seek(topic, partition, offset)
45
+ @commands << [:seek, [topic, partition, offset]]
46
+ end
47
+
48
+ def configure(min_bytes:, max_bytes:, max_wait_time:)
49
+ @commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
50
+ end
51
+
52
+ def start
53
+ return if @running
54
+
55
+ @running = true
56
+
57
+ @thread = Thread.new do
58
+ while @running
59
+ loop
60
+ end
61
+ @logger.info "#{@group} Fetcher thread exited."
62
+ end
63
+ @thread.abort_on_exception = true
64
+ end
65
+
66
+ def stop
67
+ return unless @running
68
+ @commands << [:stop, []]
69
+ @thread.join
70
+ end
71
+
72
+ def reset
73
+ @current_reset_counter = current_reset_counter + 1
74
+ @commands << [:reset]
75
+ end
76
+
77
+ def data?
78
+ !@queue.empty?
79
+ end
80
+
81
+ def poll
82
+ tag, message, reset_counter = @queue.deq
83
+
84
+ # Batches are tagged with the current reset counter value. If the batch
85
+ # has a reset_counter < current_reset_counter, we know it was fetched
86
+ # prior to the most recent reset and should be discarded.
87
+ if tag == :batches && message.any? && current_reset_counter > reset_counter
88
+ @logger.warn "Skipping stale messages buffered prior to reset"
89
+ return tag, []
90
+ end
91
+
92
+ return [tag, message]
93
+ end
94
+
95
+ private
96
+
97
+ attr_reader :current_reset_counter
98
+
99
+ def loop
100
+ @logger.push_tags(@group.to_s)
101
+ @instrumenter.instrument("loop.fetcher", {
102
+ queue_size: @queue.size,
103
+ })
104
+
105
+ return unless @running
106
+
107
+ if !@commands.empty?
108
+ cmd, args = @commands.deq
109
+
110
+ @logger.debug "Handling fetcher command: #{cmd}"
111
+
112
+ send("handle_#{cmd}", *args)
113
+ elsif @queue.size < @max_queue_size
114
+ step
115
+ else
116
+ @logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
117
+ sleep 1
118
+ end
119
+ ensure
120
+ @logger.pop_tags
121
+ end
122
+
123
+ def handle_configure(min_bytes, max_bytes, max_wait_time)
124
+ @min_bytes = min_bytes
125
+ @max_bytes = max_bytes
126
+ @max_wait_time = max_wait_time
127
+ end
128
+
129
+ def handle_reset
130
+ @next_offsets.clear
131
+ @queue.clear
132
+ end
133
+
134
+ def handle_stop(*)
135
+ @running = false
136
+ @commands.clear
137
+
138
+ # After stopping, we need to reconfigure the topics and partitions to fetch
139
+ # from. Otherwise we'd keep fetching from a bunch of partitions we may no
140
+ # longer be assigned.
141
+ handle_reset
142
+ end
143
+
144
+ def handle_subscribe(topic, max_bytes_per_partition)
145
+ @logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
146
+ @max_bytes_per_partition[topic] = max_bytes_per_partition
147
+ end
148
+
149
+ def handle_seek(topic, partition, offset)
150
+ @instrumenter.instrument('seek.consumer',
151
+ group_id: @group.group_id,
152
+ topic: topic,
153
+ partition: partition,
154
+ offset: offset)
155
+ @logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
156
+ @next_offsets[topic][partition] = offset
157
+ end
158
+
159
+ def step
160
+ batches = fetch_batches
161
+
162
+ batches.each do |batch|
163
+ unless batch.empty?
164
+ @instrumenter.instrument("fetch_batch.consumer", {
165
+ topic: batch.topic,
166
+ partition: batch.partition,
167
+ offset_lag: batch.offset_lag,
168
+ highwater_mark_offset: batch.highwater_mark_offset,
169
+ message_count: batch.messages.count,
170
+ })
171
+ end
172
+
173
+ @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
174
+ end
175
+
176
+ @queue << [:batches, batches, current_reset_counter]
177
+ rescue Kafka::NoPartitionsToFetchFrom
178
+ @logger.warn "No partitions to fetch from, sleeping for 1s"
179
+ sleep 1
180
+ rescue Kafka::Error => e
181
+ @queue << [:exception, e]
182
+ end
183
+
184
+ def fetch_batches
185
+ @logger.debug "Fetching batches"
186
+
187
+ operation = FetchOperation.new(
188
+ cluster: @cluster,
189
+ logger: @logger,
190
+ min_bytes: @min_bytes,
191
+ max_bytes: @max_bytes,
192
+ max_wait_time: @max_wait_time,
193
+ )
194
+
195
+ @next_offsets.each do |topic, partitions|
196
+ # Fetch at most this many bytes from any single partition.
197
+ max_bytes = @max_bytes_per_partition[topic]
198
+
199
+ partitions.each do |partition, offset|
200
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
201
+ end
202
+ end
203
+
204
+ operation.execute
205
+ rescue UnknownTopicOrPartition
206
+ @logger.error "Failed to fetch from some partitions. Maybe a rebalance has happened? Refreshing cluster info."
207
+
208
+ # Our cluster information has become stale, we need to refresh it.
209
+ @cluster.refresh_metadata!
210
+
211
+ # Don't overwhelm the brokers in case this keeps happening.
212
+ sleep 10
213
+
214
+ retry
215
+ rescue NoPartitionsToFetchFrom
216
+ backoff = @max_wait_time > 0 ? @max_wait_time : 1
217
+
218
+ @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
219
+ sleep backoff
220
+
221
+ []
222
+ end
223
+ end
224
+ end