ruby-kafka-aws-iam 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +393 -0
- data/.github/workflows/stale.yml +19 -0
- data/.gitignore +13 -0
- data/.readygo +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +44 -0
- data/.ruby-version +1 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +314 -0
- data/Gemfile +5 -0
- data/ISSUE_TEMPLATE.md +23 -0
- data/LICENSE.txt +176 -0
- data/Procfile +2 -0
- data/README.md +1356 -0
- data/Rakefile +8 -0
- data/benchmarks/message_encoding.rb +23 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/docker-compose.yml +39 -0
- data/examples/consumer-group.rb +35 -0
- data/examples/firehose-consumer.rb +64 -0
- data/examples/firehose-producer.rb +54 -0
- data/examples/simple-consumer.rb +34 -0
- data/examples/simple-producer.rb +42 -0
- data/examples/ssl-producer.rb +44 -0
- data/lib/kafka/async_producer.rb +297 -0
- data/lib/kafka/broker.rb +217 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +838 -0
- data/lib/kafka/cluster.rb +513 -0
- data/lib/kafka/compression.rb +45 -0
- data/lib/kafka/compressor.rb +86 -0
- data/lib/kafka/connection.rb +228 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +642 -0
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +231 -0
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +420 -0
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +58 -0
- data/lib/kafka/fetched_batch_generator.rb +120 -0
- data/lib/kafka/fetched_message.rb +48 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +224 -0
- data/lib/kafka/gzip_codec.rb +34 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/lz4_codec.rb +27 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +259 -0
- data/lib/kafka/partitioner.rb +40 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +528 -0
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +47 -0
- data/lib/kafka/protocol/join_group_response.rb +41 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +94 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +223 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +26 -0
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
- data/lib/kafka/protocol.rb +225 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
- data/lib/kafka/sasl/awsmskiam.rb +128 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +180 -0
- data/lib/kafka/sasl_authenticator.rb +73 -0
- data/lib/kafka/snappy_codec.rb +29 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +77 -0
- data/lib/kafka/transaction_manager.rb +306 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- data/lib/kafka/zstd_codec.rb +27 -0
- data/lib/kafka.rb +373 -0
- data/lib/ruby-kafka.rb +5 -0
- data/ruby-kafka.gemspec +54 -0
- metadata +520 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/fetched_offset_resolver"
|
4
|
+
require "kafka/fetched_batch_generator"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
|
8
|
+
# Fetches messages from one or more partitions.
|
9
|
+
#
|
10
|
+
# operation = Kafka::FetchOperation.new(
|
11
|
+
# cluster: cluster,
|
12
|
+
# logger: logger,
|
13
|
+
# min_bytes: 1,
|
14
|
+
# max_wait_time: 10,
|
15
|
+
# )
|
16
|
+
#
|
17
|
+
# # These calls will schedule fetches from the specified topics/partitions.
|
18
|
+
# operation.fetch_from_partition("greetings", 42, offset: :latest, max_bytes: 100000)
|
19
|
+
# operation.fetch_from_partition("goodbyes", 13, offset: :latest, max_bytes: 100000)
|
20
|
+
#
|
21
|
+
# operation.execute
|
22
|
+
#
|
23
|
+
class FetchOperation
|
24
|
+
def initialize(cluster:, logger:, min_bytes: 1, max_bytes: 10485760, max_wait_time: 5)
|
25
|
+
@cluster = cluster
|
26
|
+
@logger = TaggedLogger.new(logger)
|
27
|
+
@min_bytes = min_bytes
|
28
|
+
@max_bytes = max_bytes
|
29
|
+
@max_wait_time = max_wait_time
|
30
|
+
@topics = {}
|
31
|
+
|
32
|
+
@offset_resolver = Kafka::FetchedOffsetResolver.new(
|
33
|
+
logger: logger
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
|
38
|
+
if offset == :earliest
|
39
|
+
offset = -2
|
40
|
+
elsif offset == :latest
|
41
|
+
offset = -1
|
42
|
+
end
|
43
|
+
|
44
|
+
@topics[topic] ||= {}
|
45
|
+
@topics[topic][partition] = {
|
46
|
+
fetch_offset: offset,
|
47
|
+
max_bytes: max_bytes,
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def execute
|
52
|
+
@cluster.add_target_topics(@topics.keys)
|
53
|
+
@cluster.refresh_metadata_if_necessary!
|
54
|
+
|
55
|
+
topics_by_broker = {}
|
56
|
+
|
57
|
+
if @topics.none? {|topic, partitions| partitions.any? }
|
58
|
+
raise NoPartitionsToFetchFrom
|
59
|
+
end
|
60
|
+
|
61
|
+
@topics.each do |topic, partitions|
|
62
|
+
partitions.each do |partition, options|
|
63
|
+
broker = @cluster.get_leader(topic, partition)
|
64
|
+
|
65
|
+
topics_by_broker[broker] ||= {}
|
66
|
+
topics_by_broker[broker][topic] ||= {}
|
67
|
+
topics_by_broker[broker][topic][partition] = options
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
topics_by_broker.flat_map do |broker, topics|
|
72
|
+
@offset_resolver.resolve!(broker, topics)
|
73
|
+
|
74
|
+
options = {
|
75
|
+
max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
|
76
|
+
min_bytes: @min_bytes,
|
77
|
+
max_bytes: @max_bytes,
|
78
|
+
topics: topics,
|
79
|
+
}
|
80
|
+
|
81
|
+
response = broker.fetch_messages(**options)
|
82
|
+
|
83
|
+
response.topics.flat_map do |fetched_topic|
|
84
|
+
fetched_topic.partitions.map do |fetched_partition|
|
85
|
+
begin
|
86
|
+
Protocol.handle_error(fetched_partition.error_code)
|
87
|
+
rescue Kafka::OffsetOutOfRange => e
|
88
|
+
e.topic = fetched_topic.name
|
89
|
+
e.partition = fetched_partition.partition
|
90
|
+
e.offset = topics.fetch(e.topic).fetch(e.partition).fetch(:fetch_offset)
|
91
|
+
|
92
|
+
raise e
|
93
|
+
rescue Kafka::Error => e
|
94
|
+
topic = fetched_topic.name
|
95
|
+
partition = fetched_partition.partition
|
96
|
+
@logger.error "Failed to fetch from #{topic}/#{partition}: #{e.message}"
|
97
|
+
raise e
|
98
|
+
end
|
99
|
+
|
100
|
+
Kafka::FetchedBatchGenerator.new(
|
101
|
+
fetched_topic.name,
|
102
|
+
fetched_partition,
|
103
|
+
topics.fetch(fetched_topic.name).fetch(fetched_partition.partition).fetch(:fetch_offset),
|
104
|
+
logger: @logger
|
105
|
+
).generate
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
|
110
|
+
@cluster.mark_as_stale!
|
111
|
+
|
112
|
+
raise
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
|
5
|
+
# An ordered sequence of messages fetched from a Kafka partition.
|
6
|
+
class FetchedBatch
|
7
|
+
# @return [String]
|
8
|
+
attr_reader :topic
|
9
|
+
|
10
|
+
# @return [Integer]
|
11
|
+
attr_reader :partition
|
12
|
+
|
13
|
+
# @return [Integer]
|
14
|
+
attr_reader :last_offset
|
15
|
+
|
16
|
+
# @return [Integer]
|
17
|
+
attr_reader :leader_epoch
|
18
|
+
|
19
|
+
# @return [Integer] the offset of the most recent message in the partition.
|
20
|
+
attr_reader :highwater_mark_offset
|
21
|
+
|
22
|
+
# @return [Array<Kafka::FetchedMessage>]
|
23
|
+
attr_accessor :messages
|
24
|
+
|
25
|
+
def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil, leader_epoch: nil)
|
26
|
+
@topic = topic
|
27
|
+
@partition = partition
|
28
|
+
@highwater_mark_offset = highwater_mark_offset
|
29
|
+
@messages = messages
|
30
|
+
@last_offset = last_offset
|
31
|
+
@leader_epoch = leader_epoch
|
32
|
+
end
|
33
|
+
|
34
|
+
def empty?
|
35
|
+
@messages.empty?
|
36
|
+
end
|
37
|
+
|
38
|
+
def unknown_last_offset?
|
39
|
+
@last_offset.nil?
|
40
|
+
end
|
41
|
+
|
42
|
+
def first_offset
|
43
|
+
if empty?
|
44
|
+
nil
|
45
|
+
else
|
46
|
+
messages.first.offset
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def offset_lag
|
51
|
+
if empty?
|
52
|
+
0
|
53
|
+
else
|
54
|
+
(highwater_mark_offset - 1) - last_offset
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/fetched_batch"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class FetchedBatchGenerator
|
7
|
+
COMMITTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x01".freeze
|
8
|
+
ABORTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x00".freeze
|
9
|
+
|
10
|
+
def initialize(topic, fetched_partition, offset, logger:)
|
11
|
+
@topic = topic
|
12
|
+
@fetched_partition = fetched_partition
|
13
|
+
@logger = TaggedLogger.new(logger)
|
14
|
+
@offset = offset
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate
|
18
|
+
if @fetched_partition.messages.empty?
|
19
|
+
empty_fetched_batch
|
20
|
+
elsif @fetched_partition.messages.first.is_a?(Kafka::Protocol::MessageSet)
|
21
|
+
extract_messages
|
22
|
+
else
|
23
|
+
extract_records
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def empty_fetched_batch
|
30
|
+
FetchedBatch.new(
|
31
|
+
topic: @topic,
|
32
|
+
partition: @fetched_partition.partition,
|
33
|
+
last_offset: nil,
|
34
|
+
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
35
|
+
messages: []
|
36
|
+
)
|
37
|
+
end
|
38
|
+
|
39
|
+
def extract_messages
|
40
|
+
last_offset = nil
|
41
|
+
messages = @fetched_partition.messages.flat_map do |message_set|
|
42
|
+
message_set.messages.map do |message|
|
43
|
+
last_offset = message.offset if last_offset.nil? || last_offset < message.offset
|
44
|
+
if message.offset >= @offset
|
45
|
+
FetchedMessage.new(
|
46
|
+
message: message,
|
47
|
+
topic: @topic,
|
48
|
+
partition: @fetched_partition.partition
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end.compact
|
52
|
+
end
|
53
|
+
FetchedBatch.new(
|
54
|
+
topic: @topic,
|
55
|
+
partition: @fetched_partition.partition,
|
56
|
+
last_offset: last_offset,
|
57
|
+
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
58
|
+
messages: messages
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
def extract_records
|
63
|
+
records = []
|
64
|
+
last_offset = nil
|
65
|
+
leader_epoch = nil
|
66
|
+
aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
|
67
|
+
aborted_producer_ids = {}
|
68
|
+
|
69
|
+
@fetched_partition.messages.each do |record_batch|
|
70
|
+
last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
|
71
|
+
leader_epoch = record_batch.partition_leader_epoch if leader_epoch.nil? || leader_epoch < record_batch.partition_leader_epoch
|
72
|
+
# Find the list of aborted producer IDs less than current offset
|
73
|
+
unless aborted_transactions.empty?
|
74
|
+
if aborted_transactions.first.first_offset <= record_batch.last_offset
|
75
|
+
aborted_transaction = aborted_transactions.shift
|
76
|
+
aborted_producer_ids[aborted_transaction.producer_id] = aborted_transaction.first_offset
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
if abort_marker?(record_batch)
|
81
|
+
# Abort marker, remove the producer from the aborted list
|
82
|
+
aborted_producer_ids.delete(record_batch.producer_id)
|
83
|
+
elsif aborted_producer_ids.key?(record_batch.producer_id) && record_batch.in_transaction
|
84
|
+
# Reject aborted record batch
|
85
|
+
@logger.info("Reject #{record_batch.records.size} aborted records of topic '#{@topic}', partition #{@fetched_partition.partition}, from offset #{record_batch.first_offset}")
|
86
|
+
next
|
87
|
+
end
|
88
|
+
|
89
|
+
record_batch.records.each do |record|
|
90
|
+
if !record.is_control_record && record.offset >= @offset
|
91
|
+
records << FetchedMessage.new(
|
92
|
+
message: record,
|
93
|
+
topic: @topic,
|
94
|
+
partition: @fetched_partition.partition
|
95
|
+
)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
FetchedBatch.new(
|
101
|
+
topic: @topic,
|
102
|
+
partition: @fetched_partition.partition,
|
103
|
+
last_offset: last_offset,
|
104
|
+
leader_epoch: leader_epoch,
|
105
|
+
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
106
|
+
messages: records
|
107
|
+
)
|
108
|
+
end
|
109
|
+
|
110
|
+
def abort_marker?(record_batch)
|
111
|
+
return false unless record_batch.is_control_batch
|
112
|
+
|
113
|
+
if record_batch.records.empty?
|
114
|
+
raise "Invalid control record batch at topic '#{@topic}', partition #{@fetched_partition}"
|
115
|
+
end
|
116
|
+
|
117
|
+
record_batch.records.first.key == ABORTED_TRANSACTION_SIGNAL
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class FetchedMessage
|
5
|
+
# @return [String] the name of the topic that the message was written to.
|
6
|
+
attr_reader :topic
|
7
|
+
|
8
|
+
# @return [Integer] the partition number that the message was written to.
|
9
|
+
attr_reader :partition
|
10
|
+
|
11
|
+
def initialize(message:, topic:, partition:)
|
12
|
+
@message = message
|
13
|
+
@topic = topic
|
14
|
+
@partition = partition
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String] the value of the message.
|
18
|
+
def value
|
19
|
+
@message.value
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [String] the key of the message.
|
23
|
+
def key
|
24
|
+
@message.key
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Integer] the offset of the message in the partition.
|
28
|
+
def offset
|
29
|
+
@message.offset
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Time] the timestamp of the message.
|
33
|
+
def create_time
|
34
|
+
@message.create_time
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [Hash<String, String>] the headers of the message.
|
38
|
+
def headers
|
39
|
+
@message.headers
|
40
|
+
end
|
41
|
+
|
42
|
+
# @return [Boolean] whether this record is a control record
|
43
|
+
def is_control_record
|
44
|
+
@message.is_control_record
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class FetchedOffsetResolver
|
5
|
+
def initialize(logger:)
|
6
|
+
@logger = TaggedLogger.new(logger)
|
7
|
+
end
|
8
|
+
|
9
|
+
def resolve!(broker, topics)
|
10
|
+
pending_topics = filter_pending_topics(topics)
|
11
|
+
return topics if pending_topics.empty?
|
12
|
+
|
13
|
+
response = broker.list_offsets(topics: pending_topics)
|
14
|
+
|
15
|
+
pending_topics.each do |topic, partitions|
|
16
|
+
partitions.each do |options|
|
17
|
+
partition = options.fetch(:partition)
|
18
|
+
resolved_offset = response.offset_for(topic, partition)
|
19
|
+
|
20
|
+
@logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
|
21
|
+
|
22
|
+
topics[topic][partition][:fetch_offset] = resolved_offset || 0
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def filter_pending_topics(topics)
|
30
|
+
pending_topics = {}
|
31
|
+
topics.each do |topic, partitions|
|
32
|
+
partitions.each do |partition, options|
|
33
|
+
offset = options.fetch(:fetch_offset)
|
34
|
+
next if offset >= 0
|
35
|
+
|
36
|
+
@logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
|
37
|
+
|
38
|
+
pending_topics[topic] ||= []
|
39
|
+
pending_topics[topic] << {
|
40
|
+
partition: partition,
|
41
|
+
time: offset
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
pending_topics
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,224 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/fetch_operation"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class Fetcher
|
7
|
+
attr_reader :queue, :max_wait_time
|
8
|
+
|
9
|
+
def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
|
10
|
+
@cluster = cluster
|
11
|
+
@logger = TaggedLogger.new(logger)
|
12
|
+
@instrumenter = instrumenter
|
13
|
+
@max_queue_size = max_queue_size
|
14
|
+
@group = group
|
15
|
+
|
16
|
+
@queue = Queue.new
|
17
|
+
@commands = Queue.new
|
18
|
+
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
19
|
+
|
20
|
+
# We are only running when someone calls start.
|
21
|
+
@running = false
|
22
|
+
|
23
|
+
# Long poll until at least this many bytes can be fetched.
|
24
|
+
@min_bytes = 1
|
25
|
+
|
26
|
+
# Long poll at most this number of seconds.
|
27
|
+
@max_wait_time = 1
|
28
|
+
|
29
|
+
# The maximum number of bytes to fetch for any given fetch request.
|
30
|
+
@max_bytes = 10485760
|
31
|
+
|
32
|
+
# The maximum number of bytes to fetch per partition, by topic.
|
33
|
+
@max_bytes_per_partition = {}
|
34
|
+
|
35
|
+
# An incrementing counter used to synchronize resets between the
|
36
|
+
# foreground and background thread.
|
37
|
+
@current_reset_counter = 0
|
38
|
+
end
|
39
|
+
|
40
|
+
def subscribe(topic, max_bytes_per_partition:)
|
41
|
+
@commands << [:subscribe, [topic, max_bytes_per_partition]]
|
42
|
+
end
|
43
|
+
|
44
|
+
def seek(topic, partition, offset)
|
45
|
+
@commands << [:seek, [topic, partition, offset]]
|
46
|
+
end
|
47
|
+
|
48
|
+
def configure(min_bytes:, max_bytes:, max_wait_time:)
|
49
|
+
@commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
|
50
|
+
end
|
51
|
+
|
52
|
+
def start
|
53
|
+
return if @running
|
54
|
+
|
55
|
+
@running = true
|
56
|
+
|
57
|
+
@thread = Thread.new do
|
58
|
+
while @running
|
59
|
+
loop
|
60
|
+
end
|
61
|
+
@logger.info "#{@group} Fetcher thread exited."
|
62
|
+
end
|
63
|
+
@thread.abort_on_exception = true
|
64
|
+
end
|
65
|
+
|
66
|
+
def stop
|
67
|
+
return unless @running
|
68
|
+
@commands << [:stop, []]
|
69
|
+
@thread.join
|
70
|
+
end
|
71
|
+
|
72
|
+
def reset
|
73
|
+
@current_reset_counter = current_reset_counter + 1
|
74
|
+
@commands << [:reset]
|
75
|
+
end
|
76
|
+
|
77
|
+
def data?
|
78
|
+
!@queue.empty?
|
79
|
+
end
|
80
|
+
|
81
|
+
def poll
|
82
|
+
tag, message, reset_counter = @queue.deq
|
83
|
+
|
84
|
+
# Batches are tagged with the current reset counter value. If the batch
|
85
|
+
# has a reset_counter < current_reset_counter, we know it was fetched
|
86
|
+
# prior to the most recent reset and should be discarded.
|
87
|
+
if tag == :batches && message.any? && current_reset_counter > reset_counter
|
88
|
+
@logger.warn "Skipping stale messages buffered prior to reset"
|
89
|
+
return tag, []
|
90
|
+
end
|
91
|
+
|
92
|
+
return [tag, message]
|
93
|
+
end
|
94
|
+
|
95
|
+
private
|
96
|
+
|
97
|
+
attr_reader :current_reset_counter
|
98
|
+
|
99
|
+
def loop
|
100
|
+
@logger.push_tags(@group.to_s)
|
101
|
+
@instrumenter.instrument("loop.fetcher", {
|
102
|
+
queue_size: @queue.size,
|
103
|
+
})
|
104
|
+
|
105
|
+
return unless @running
|
106
|
+
|
107
|
+
if !@commands.empty?
|
108
|
+
cmd, args = @commands.deq
|
109
|
+
|
110
|
+
@logger.debug "Handling fetcher command: #{cmd}"
|
111
|
+
|
112
|
+
send("handle_#{cmd}", *args)
|
113
|
+
elsif @queue.size < @max_queue_size
|
114
|
+
step
|
115
|
+
else
|
116
|
+
@logger.info "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
|
117
|
+
sleep 1
|
118
|
+
end
|
119
|
+
ensure
|
120
|
+
@logger.pop_tags
|
121
|
+
end
|
122
|
+
|
123
|
+
def handle_configure(min_bytes, max_bytes, max_wait_time)
|
124
|
+
@min_bytes = min_bytes
|
125
|
+
@max_bytes = max_bytes
|
126
|
+
@max_wait_time = max_wait_time
|
127
|
+
end
|
128
|
+
|
129
|
+
def handle_reset
|
130
|
+
@next_offsets.clear
|
131
|
+
@queue.clear
|
132
|
+
end
|
133
|
+
|
134
|
+
def handle_stop(*)
|
135
|
+
@running = false
|
136
|
+
@commands.clear
|
137
|
+
|
138
|
+
# After stopping, we need to reconfigure the topics and partitions to fetch
|
139
|
+
# from. Otherwise we'd keep fetching from a bunch of partitions we may no
|
140
|
+
# longer be assigned.
|
141
|
+
handle_reset
|
142
|
+
end
|
143
|
+
|
144
|
+
def handle_subscribe(topic, max_bytes_per_partition)
|
145
|
+
@logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
|
146
|
+
@max_bytes_per_partition[topic] = max_bytes_per_partition
|
147
|
+
end
|
148
|
+
|
149
|
+
def handle_seek(topic, partition, offset)
|
150
|
+
@instrumenter.instrument('seek.consumer',
|
151
|
+
group_id: @group.group_id,
|
152
|
+
topic: topic,
|
153
|
+
partition: partition,
|
154
|
+
offset: offset)
|
155
|
+
@logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
|
156
|
+
@next_offsets[topic][partition] = offset
|
157
|
+
end
|
158
|
+
|
159
|
+
def step
|
160
|
+
batches = fetch_batches
|
161
|
+
|
162
|
+
batches.each do |batch|
|
163
|
+
unless batch.empty?
|
164
|
+
@instrumenter.instrument("fetch_batch.consumer", {
|
165
|
+
topic: batch.topic,
|
166
|
+
partition: batch.partition,
|
167
|
+
offset_lag: batch.offset_lag,
|
168
|
+
highwater_mark_offset: batch.highwater_mark_offset,
|
169
|
+
message_count: batch.messages.count,
|
170
|
+
})
|
171
|
+
end
|
172
|
+
|
173
|
+
@next_offsets[batch.topic][batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
|
174
|
+
end
|
175
|
+
|
176
|
+
@queue << [:batches, batches, current_reset_counter]
|
177
|
+
rescue Kafka::NoPartitionsToFetchFrom
|
178
|
+
@logger.warn "No partitions to fetch from, sleeping for 1s"
|
179
|
+
sleep 1
|
180
|
+
rescue Kafka::Error => e
|
181
|
+
@queue << [:exception, e]
|
182
|
+
end
|
183
|
+
|
184
|
+
def fetch_batches
|
185
|
+
@logger.debug "Fetching batches"
|
186
|
+
|
187
|
+
operation = FetchOperation.new(
|
188
|
+
cluster: @cluster,
|
189
|
+
logger: @logger,
|
190
|
+
min_bytes: @min_bytes,
|
191
|
+
max_bytes: @max_bytes,
|
192
|
+
max_wait_time: @max_wait_time,
|
193
|
+
)
|
194
|
+
|
195
|
+
@next_offsets.each do |topic, partitions|
|
196
|
+
# Fetch at most this many bytes from any single partition.
|
197
|
+
max_bytes = @max_bytes_per_partition[topic]
|
198
|
+
|
199
|
+
partitions.each do |partition, offset|
|
200
|
+
operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
operation.execute
|
205
|
+
rescue UnknownTopicOrPartition
|
206
|
+
@logger.error "Failed to fetch from some partitions. Maybe a rebalance has happened? Refreshing cluster info."
|
207
|
+
|
208
|
+
# Our cluster information has become stale, we need to refresh it.
|
209
|
+
@cluster.refresh_metadata!
|
210
|
+
|
211
|
+
# Don't overwhelm the brokers in case this keeps happening.
|
212
|
+
sleep 10
|
213
|
+
|
214
|
+
retry
|
215
|
+
rescue NoPartitionsToFetchFrom
|
216
|
+
backoff = @max_wait_time > 0 ? @max_wait_time : 1
|
217
|
+
|
218
|
+
@logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
|
219
|
+
sleep backoff
|
220
|
+
|
221
|
+
[]
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class GzipCodec
|
5
|
+
def codec_id
|
6
|
+
1
|
7
|
+
end
|
8
|
+
|
9
|
+
def produce_api_min_version
|
10
|
+
0
|
11
|
+
end
|
12
|
+
|
13
|
+
def load
|
14
|
+
require "zlib"
|
15
|
+
end
|
16
|
+
|
17
|
+
def compress(data)
|
18
|
+
buffer = StringIO.new
|
19
|
+
buffer.set_encoding(Encoding::BINARY)
|
20
|
+
|
21
|
+
writer = Zlib::GzipWriter.new(buffer, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
|
22
|
+
writer.write(data)
|
23
|
+
writer.close
|
24
|
+
|
25
|
+
buffer.string
|
26
|
+
end
|
27
|
+
|
28
|
+
def decompress(data)
|
29
|
+
buffer = StringIO.new(data)
|
30
|
+
reader = Zlib::GzipReader.new(buffer)
|
31
|
+
reader.read
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Heartbeat
|
5
|
+
def initialize(group:, interval:, instrumenter:)
|
6
|
+
@group = group
|
7
|
+
@interval = interval
|
8
|
+
@last_heartbeat = Time.now
|
9
|
+
@instrumenter = instrumenter
|
10
|
+
end
|
11
|
+
|
12
|
+
def trigger!
|
13
|
+
@instrumenter.instrument('heartbeat.consumer',
|
14
|
+
group_id: @group.group_id,
|
15
|
+
topic_partitions: @group.assigned_partitions) do
|
16
|
+
@group.heartbeat
|
17
|
+
@last_heartbeat = Time.now
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def trigger
|
22
|
+
trigger! if Time.now > @last_heartbeat + @interval
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|