ruby-kafka-aws-iam 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetched_offset_resolver"
4
+ require "kafka/fetched_batch_generator"
5
+
6
+ module Kafka
7
+
8
+ # Fetches messages from one or more partitions.
9
+ #
10
+ # operation = Kafka::FetchOperation.new(
11
+ # cluster: cluster,
12
+ # logger: logger,
13
+ # min_bytes: 1,
14
+ # max_wait_time: 10,
15
+ # )
16
+ #
17
+ # # These calls will schedule fetches from the specified topics/partitions.
18
+ # operation.fetch_from_partition("greetings", 42, offset: :latest, max_bytes: 100000)
19
+ # operation.fetch_from_partition("goodbyes", 13, offset: :latest, max_bytes: 100000)
20
+ #
21
+ # operation.execute
22
+ #
23
+ class FetchOperation
24
+ def initialize(cluster:, logger:, min_bytes: 1, max_bytes: 10485760, max_wait_time: 5)
25
+ @cluster = cluster
26
+ @logger = TaggedLogger.new(logger)
27
+ @min_bytes = min_bytes
28
+ @max_bytes = max_bytes
29
+ @max_wait_time = max_wait_time
30
+ @topics = {}
31
+
32
+ @offset_resolver = Kafka::FetchedOffsetResolver.new(
33
+ logger: logger
34
+ )
35
+ end
36
+
37
+ def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
38
+ if offset == :earliest
39
+ offset = -2
40
+ elsif offset == :latest
41
+ offset = -1
42
+ end
43
+
44
+ @topics[topic] ||= {}
45
+ @topics[topic][partition] = {
46
+ fetch_offset: offset,
47
+ max_bytes: max_bytes,
48
+ }
49
+ end
50
+
51
+ def execute
52
+ @cluster.add_target_topics(@topics.keys)
53
+ @cluster.refresh_metadata_if_necessary!
54
+
55
+ topics_by_broker = {}
56
+
57
+ if @topics.none? {|topic, partitions| partitions.any? }
58
+ raise NoPartitionsToFetchFrom
59
+ end
60
+
61
+ @topics.each do |topic, partitions|
62
+ partitions.each do |partition, options|
63
+ broker = @cluster.get_leader(topic, partition)
64
+
65
+ topics_by_broker[broker] ||= {}
66
+ topics_by_broker[broker][topic] ||= {}
67
+ topics_by_broker[broker][topic][partition] = options
68
+ end
69
+ end
70
+
71
+ topics_by_broker.flat_map do |broker, topics|
72
+ @offset_resolver.resolve!(broker, topics)
73
+
74
+ options = {
75
+ max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
76
+ min_bytes: @min_bytes,
77
+ max_bytes: @max_bytes,
78
+ topics: topics,
79
+ }
80
+
81
+ response = broker.fetch_messages(**options)
82
+
83
+ response.topics.flat_map do |fetched_topic|
84
+ fetched_topic.partitions.map do |fetched_partition|
85
+ begin
86
+ Protocol.handle_error(fetched_partition.error_code)
87
+ rescue Kafka::OffsetOutOfRange => e
88
+ e.topic = fetched_topic.name
89
+ e.partition = fetched_partition.partition
90
+ e.offset = topics.fetch(e.topic).fetch(e.partition).fetch(:fetch_offset)
91
+
92
+ raise e
93
+ rescue Kafka::Error => e
94
+ topic = fetched_topic.name
95
+ partition = fetched_partition.partition
96
+ @logger.error "Failed to fetch from #{topic}/#{partition}: #{e.message}"
97
+ raise e
98
+ end
99
+
100
+ Kafka::FetchedBatchGenerator.new(
101
+ fetched_topic.name,
102
+ fetched_partition,
103
+ topics.fetch(fetched_topic.name).fetch(fetched_partition.partition).fetch(:fetch_offset),
104
+ logger: @logger
105
+ ).generate
106
+ end
107
+ end
108
+ end
109
+ rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
110
+ @cluster.mark_as_stale!
111
+
112
+ raise
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ # An ordered sequence of messages fetched from a Kafka partition.
6
+ class FetchedBatch
7
+ # @return [String]
8
+ attr_reader :topic
9
+
10
+ # @return [Integer]
11
+ attr_reader :partition
12
+
13
+ # @return [Integer]
14
+ attr_reader :last_offset
15
+
16
+ # @return [Integer]
17
+ attr_reader :leader_epoch
18
+
19
+ # @return [Integer] the offset of the most recent message in the partition.
20
+ attr_reader :highwater_mark_offset
21
+
22
+ # @return [Array<Kafka::FetchedMessage>]
23
+ attr_accessor :messages
24
+
25
+ def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil, leader_epoch: nil)
26
+ @topic = topic
27
+ @partition = partition
28
+ @highwater_mark_offset = highwater_mark_offset
29
+ @messages = messages
30
+ @last_offset = last_offset
31
+ @leader_epoch = leader_epoch
32
+ end
33
+
34
+ def empty?
35
+ @messages.empty?
36
+ end
37
+
38
+ def unknown_last_offset?
39
+ @last_offset.nil?
40
+ end
41
+
42
+ def first_offset
43
+ if empty?
44
+ nil
45
+ else
46
+ messages.first.offset
47
+ end
48
+ end
49
+
50
+ def offset_lag
51
+ if empty?
52
+ 0
53
+ else
54
+ (highwater_mark_offset - 1) - last_offset
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetched_batch"
4
+
5
+ module Kafka
6
+ class FetchedBatchGenerator
7
+ COMMITTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x01".freeze
8
+ ABORTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x00".freeze
9
+
10
+ def initialize(topic, fetched_partition, offset, logger:)
11
+ @topic = topic
12
+ @fetched_partition = fetched_partition
13
+ @logger = TaggedLogger.new(logger)
14
+ @offset = offset
15
+ end
16
+
17
+ def generate
18
+ if @fetched_partition.messages.empty?
19
+ empty_fetched_batch
20
+ elsif @fetched_partition.messages.first.is_a?(Kafka::Protocol::MessageSet)
21
+ extract_messages
22
+ else
23
+ extract_records
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def empty_fetched_batch
30
+ FetchedBatch.new(
31
+ topic: @topic,
32
+ partition: @fetched_partition.partition,
33
+ last_offset: nil,
34
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
35
+ messages: []
36
+ )
37
+ end
38
+
39
+ def extract_messages
40
+ last_offset = nil
41
+ messages = @fetched_partition.messages.flat_map do |message_set|
42
+ message_set.messages.map do |message|
43
+ last_offset = message.offset if last_offset.nil? || last_offset < message.offset
44
+ if message.offset >= @offset
45
+ FetchedMessage.new(
46
+ message: message,
47
+ topic: @topic,
48
+ partition: @fetched_partition.partition
49
+ )
50
+ end
51
+ end.compact
52
+ end
53
+ FetchedBatch.new(
54
+ topic: @topic,
55
+ partition: @fetched_partition.partition,
56
+ last_offset: last_offset,
57
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
58
+ messages: messages
59
+ )
60
+ end
61
+
62
+ def extract_records
63
+ records = []
64
+ last_offset = nil
65
+ leader_epoch = nil
66
+ aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
67
+ aborted_producer_ids = {}
68
+
69
+ @fetched_partition.messages.each do |record_batch|
70
+ last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
71
+ leader_epoch = record_batch.partition_leader_epoch if leader_epoch.nil? || leader_epoch < record_batch.partition_leader_epoch
72
+ # Find the list of aborted producer IDs less than current offset
73
+ unless aborted_transactions.empty?
74
+ if aborted_transactions.first.first_offset <= record_batch.last_offset
75
+ aborted_transaction = aborted_transactions.shift
76
+ aborted_producer_ids[aborted_transaction.producer_id] = aborted_transaction.first_offset
77
+ end
78
+ end
79
+
80
+ if abort_marker?(record_batch)
81
+ # Abort marker, remove the producer from the aborted list
82
+ aborted_producer_ids.delete(record_batch.producer_id)
83
+ elsif aborted_producer_ids.key?(record_batch.producer_id) && record_batch.in_transaction
84
+ # Reject aborted record batch
85
+ @logger.info("Reject #{record_batch.records.size} aborted records of topic '#{@topic}', partition #{@fetched_partition.partition}, from offset #{record_batch.first_offset}")
86
+ next
87
+ end
88
+
89
+ record_batch.records.each do |record|
90
+ if !record.is_control_record && record.offset >= @offset
91
+ records << FetchedMessage.new(
92
+ message: record,
93
+ topic: @topic,
94
+ partition: @fetched_partition.partition
95
+ )
96
+ end
97
+ end
98
+ end
99
+
100
+ FetchedBatch.new(
101
+ topic: @topic,
102
+ partition: @fetched_partition.partition,
103
+ last_offset: last_offset,
104
+ leader_epoch: leader_epoch,
105
+ highwater_mark_offset: @fetched_partition.highwater_mark_offset,
106
+ messages: records
107
+ )
108
+ end
109
+
110
+ def abort_marker?(record_batch)
111
+ return false unless record_batch.is_control_batch
112
+
113
+ if record_batch.records.empty?
114
+ raise "Invalid control record batch at topic '#{@topic}', partition #{@fetched_partition}"
115
+ end
116
+
117
+ record_batch.records.first.key == ABORTED_TRANSACTION_SIGNAL
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class FetchedMessage
5
+ # @return [String] the name of the topic that the message was written to.
6
+ attr_reader :topic
7
+
8
+ # @return [Integer] the partition number that the message was written to.
9
+ attr_reader :partition
10
+
11
+ def initialize(message:, topic:, partition:)
12
+ @message = message
13
+ @topic = topic
14
+ @partition = partition
15
+ end
16
+
17
+ # @return [String] the value of the message.
18
+ def value
19
+ @message.value
20
+ end
21
+
22
+ # @return [String] the key of the message.
23
+ def key
24
+ @message.key
25
+ end
26
+
27
+ # @return [Integer] the offset of the message in the partition.
28
+ def offset
29
+ @message.offset
30
+ end
31
+
32
+ # @return [Time] the timestamp of the message.
33
+ def create_time
34
+ @message.create_time
35
+ end
36
+
37
+ # @return [Hash<String, String>] the headers of the message.
38
+ def headers
39
+ @message.headers
40
+ end
41
+
42
+ # @return [Boolean] whether this record is a control record
43
+ def is_control_record
44
+ @message.is_control_record
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class FetchedOffsetResolver
5
+ def initialize(logger:)
6
+ @logger = TaggedLogger.new(logger)
7
+ end
8
+
9
+ def resolve!(broker, topics)
10
+ pending_topics = filter_pending_topics(topics)
11
+ return topics if pending_topics.empty?
12
+
13
+ response = broker.list_offsets(topics: pending_topics)
14
+
15
+ pending_topics.each do |topic, partitions|
16
+ partitions.each do |options|
17
+ partition = options.fetch(:partition)
18
+ resolved_offset = response.offset_for(topic, partition)
19
+
20
+ @logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
21
+
22
+ topics[topic][partition][:fetch_offset] = resolved_offset || 0
23
+ end
24
+ end
25
+ end
26
+
27
+ private
28
+
29
+ def filter_pending_topics(topics)
30
+ pending_topics = {}
31
+ topics.each do |topic, partitions|
32
+ partitions.each do |partition, options|
33
+ offset = options.fetch(:fetch_offset)
34
+ next if offset >= 0
35
+
36
+ @logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
37
+
38
+ pending_topics[topic] ||= []
39
+ pending_topics[topic] << {
40
+ partition: partition,
41
+ time: offset
42
+ }
43
+ end
44
+ end
45
+ pending_topics
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,224 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/fetch_operation"
4
+
5
+ module Kafka
6
+ class Fetcher
7
+ attr_reader :queue, :max_wait_time
8
+
9
+ def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
10
+ @cluster = cluster
11
+ @logger = TaggedLogger.new(logger)
12
+ @instrumenter = instrumenter
13
+ @max_queue_size = max_queue_size
14
+ @group = group
15
+
16
+ @queue = Queue.new
17
+ @commands = Queue.new
18
+ @next_offsets = Hash.new { |h, k| h[k] = {} }
19
+
20
+ # We are only running when someone calls start.
21
+ @running = false
22
+
23
+ # Long poll until at least this many bytes can be fetched.
24
+ @min_bytes = 1
25
+
26
+ # Long poll at most this number of seconds.
27
+ @max_wait_time = 1
28
+
29
+ # The maximum number of bytes to fetch for any given fetch request.
30
+ @max_bytes = 10485760
31
+
32
+ # The maximum number of bytes to fetch per partition, by topic.
33
+ @max_bytes_per_partition = {}
34
+
35
+ # An incrementing counter used to synchronize resets between the
36
+ # foreground and background thread.
37
+ @current_reset_counter = 0
38
+ end
39
+
40
+ def subscribe(topic, max_bytes_per_partition:)
41
+ @commands << [:subscribe, [topic, max_bytes_per_partition]]
42
+ end
43
+
44
+ def seek(topic, partition, offset)
45
+ @commands << [:seek, [topic, partition, offset]]
46
+ end
47
+
48
+ def configure(min_bytes:, max_bytes:, max_wait_time:)
49
+ @commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
50
+ end
51
+
52
+ def start
53
+ return if @running
54
+
55
+ @running = true
56
+
57
+ @thread = Thread.new do
58
+ while @running
59
+ loop
60
+ end
61
+ @logger.info "#{@group} Fetcher thread exited."
62
+ end
63
+ @thread.abort_on_exception = true
64
+ end
65
+
66
+ def stop
67
+ return unless @running
68
+ @commands << [:stop, []]
69
+ @thread.join
70
+ end
71
+
72
+ def reset
73
+ @current_reset_counter = current_reset_counter + 1
74
+ @commands << [:reset]
75
+ end
76
+
77
+ def data?
78
+ !@queue.empty?
79
+ end
80
+
81
+ def poll
82
+ tag, message, reset_counter = @queue.deq
83
+
84
+ # Batches are tagged with the current reset counter value. If the batch
85
+ # has a reset_counter < current_reset_counter, we know it was fetched
86
+ # prior to the most recent reset and should be discarded.
87
+ if tag == :batches && message.any? && current_reset_counter > reset_counter
88
+ @logger.warn "Skipping stale messages buffered prior to reset"
89
+ return tag, []
90
+ end
91
+
92
+ return [tag, message]
93
+ end
94
+
95
+ private
96
+
97
+ attr_reader :current_reset_counter
98
+
99
+ def loop
100
+ @logger.push_tags(@group.to_s)
101
+ @instrumenter.instrument("loop.fetcher", {
102
+ queue_size: @queue.size,
103
+ })
104
+
105
+ return unless @running
106
+
107
+ if !@commands.empty?
108
+ cmd, args = @commands.deq
109
+
110
+ @logger.debug "Handling fetcher command: #{cmd}"
111
+
112
+ send("handle_#{cmd}", *args)
113
+ elsif @queue.size < @max_queue_size
114
+ step
115
+ else
116
+ @logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
117
+ sleep 1
118
+ end
119
+ ensure
120
+ @logger.pop_tags
121
+ end
122
+
123
+ def handle_configure(min_bytes, max_bytes, max_wait_time)
124
+ @min_bytes = min_bytes
125
+ @max_bytes = max_bytes
126
+ @max_wait_time = max_wait_time
127
+ end
128
+
129
+ def handle_reset
130
+ @next_offsets.clear
131
+ @queue.clear
132
+ end
133
+
134
+ def handle_stop(*)
135
+ @running = false
136
+ @commands.clear
137
+
138
+ # After stopping, we need to reconfigure the topics and partitions to fetch
139
+ # from. Otherwise we'd keep fetching from a bunch of partitions we may no
140
+ # longer be assigned.
141
+ handle_reset
142
+ end
143
+
144
+ def handle_subscribe(topic, max_bytes_per_partition)
145
+ @logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
146
+ @max_bytes_per_partition[topic] = max_bytes_per_partition
147
+ end
148
+
149
+ def handle_seek(topic, partition, offset)
150
+ @instrumenter.instrument('seek.consumer',
151
+ group_id: @group.group_id,
152
+ topic: topic,
153
+ partition: partition,
154
+ offset: offset)
155
+ @logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
156
+ @next_offsets[topic][partition] = offset
157
+ end
158
+
159
+ def step
160
+ batches = fetch_batches
161
+
162
+ batches.each do |batch|
163
+ unless batch.empty?
164
+ @instrumenter.instrument("fetch_batch.consumer", {
165
+ topic: batch.topic,
166
+ partition: batch.partition,
167
+ offset_lag: batch.offset_lag,
168
+ highwater_mark_offset: batch.highwater_mark_offset,
169
+ message_count: batch.messages.count,
170
+ })
171
+ end
172
+
173
+ @next_offsets[batch.topic][batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
174
+ end
175
+
176
+ @queue << [:batches, batches, current_reset_counter]
177
+ rescue Kafka::NoPartitionsToFetchFrom
178
+ @logger.warn "No partitions to fetch from, sleeping for 1s"
179
+ sleep 1
180
+ rescue Kafka::Error => e
181
+ @queue << [:exception, e]
182
+ end
183
+
184
+ def fetch_batches
185
+ @logger.debug "Fetching batches"
186
+
187
+ operation = FetchOperation.new(
188
+ cluster: @cluster,
189
+ logger: @logger,
190
+ min_bytes: @min_bytes,
191
+ max_bytes: @max_bytes,
192
+ max_wait_time: @max_wait_time,
193
+ )
194
+
195
+ @next_offsets.each do |topic, partitions|
196
+ # Fetch at most this many bytes from any single partition.
197
+ max_bytes = @max_bytes_per_partition[topic]
198
+
199
+ partitions.each do |partition, offset|
200
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
201
+ end
202
+ end
203
+
204
+ operation.execute
205
+ rescue UnknownTopicOrPartition
206
+ @logger.error "Failed to fetch from some partitions. Maybe a rebalance has happened? Refreshing cluster info."
207
+
208
+ # Our cluster information has become stale, we need to refresh it.
209
+ @cluster.refresh_metadata!
210
+
211
+ # Don't overwhelm the brokers in case this keeps happening.
212
+ sleep 10
213
+
214
+ retry
215
+ rescue NoPartitionsToFetchFrom
216
+ backoff = @max_wait_time > 0 ? @max_wait_time : 1
217
+
218
+ @logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
219
+ sleep backoff
220
+
221
+ []
222
+ end
223
+ end
224
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class GzipCodec
5
+ def codec_id
6
+ 1
7
+ end
8
+
9
+ def produce_api_min_version
10
+ 0
11
+ end
12
+
13
+ def load
14
+ require "zlib"
15
+ end
16
+
17
+ def compress(data)
18
+ buffer = StringIO.new
19
+ buffer.set_encoding(Encoding::BINARY)
20
+
21
+ writer = Zlib::GzipWriter.new(buffer, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
22
+ writer.write(data)
23
+ writer.close
24
+
25
+ buffer.string
26
+ end
27
+
28
+ def decompress(data)
29
+ buffer = StringIO.new(data)
30
+ reader = Zlib::GzipReader.new(buffer)
31
+ reader.read
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Heartbeat
5
+ def initialize(group:, interval:, instrumenter:)
6
+ @group = group
7
+ @interval = interval
8
+ @last_heartbeat = Time.now
9
+ @instrumenter = instrumenter
10
+ end
11
+
12
+ def trigger!
13
+ @instrumenter.instrument('heartbeat.consumer',
14
+ group_id: @group.group_id,
15
+ topic_partitions: @group.assigned_partitions) do
16
+ @group.heartbeat
17
+ @last_heartbeat = Time.now
18
+ end
19
+ end
20
+
21
+ def trigger
22
+ trigger! if Time.now > @last_heartbeat + @interval
23
+ end
24
+ end
25
+ end