ruby-kafka-custom 0.7.7.26
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/kafka/async_producer.rb +279 -0
- data/lib/kafka/broker.rb +205 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +754 -0
- data/lib/kafka/cluster.rb +455 -0
- data/lib/kafka/compression.rb +43 -0
- data/lib/kafka/compressor.rb +85 -0
- data/lib/kafka/connection.rb +220 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +592 -0
- data/lib/kafka/consumer_group.rb +208 -0
- data/lib/kafka/datadog.rb +413 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +54 -0
- data/lib/kafka/fetched_batch_generator.rb +117 -0
- data/lib/kafka/fetched_message.rb +47 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +221 -0
- data/lib/kafka/gzip_codec.rb +30 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/lz4_codec.rb +23 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/offset_manager.rb +248 -0
- data/lib/kafka/partitioner.rb +35 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +504 -0
- data/lib/kafka/protocol.rb +217 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +41 -0
- data/lib/kafka/protocol/join_group_response.rb +33 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +92 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +222 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +23 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +177 -0
- data/lib/kafka/sasl_authenticator.rb +61 -0
- data/lib/kafka/snappy_codec.rb +25 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +72 -0
- data/lib/kafka/transaction_manager.rb +261 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- metadata +461 -0
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/fetched_offset_resolver"
|
4
|
+
require "kafka/fetched_batch_generator"
|
5
|
+
|
6
|
+
module Kafka
|
7
|
+
|
8
|
+
# Fetches messages from one or more partitions.
|
9
|
+
#
|
10
|
+
# operation = Kafka::FetchOperation.new(
|
11
|
+
# cluster: cluster,
|
12
|
+
# logger: logger,
|
13
|
+
# min_bytes: 1,
|
14
|
+
# max_wait_time: 10,
|
15
|
+
# )
|
16
|
+
#
|
17
|
+
# # These calls will schedule fetches from the specified topics/partitions.
|
18
|
+
# operation.fetch_from_partition("greetings", 42, offset: :latest, max_bytes: 100000)
|
19
|
+
# operation.fetch_from_partition("goodbyes", 13, offset: :latest, max_bytes: 100000)
|
20
|
+
#
|
21
|
+
# operation.execute
|
22
|
+
#
|
23
|
+
class FetchOperation
|
24
|
+
def initialize(cluster:, logger:, min_bytes: 1, max_bytes: 10485760, max_wait_time: 5)
|
25
|
+
@cluster = cluster
|
26
|
+
@logger = TaggedLogger.new(logger)
|
27
|
+
@min_bytes = min_bytes
|
28
|
+
@max_bytes = max_bytes
|
29
|
+
@max_wait_time = max_wait_time
|
30
|
+
@topics = {}
|
31
|
+
|
32
|
+
@offset_resolver = Kafka::FetchedOffsetResolver.new(
|
33
|
+
logger: logger
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
def fetch_from_partition(topic, partition, offset: :latest, max_bytes: 1048576)
|
38
|
+
if offset == :earliest
|
39
|
+
offset = -2
|
40
|
+
elsif offset == :latest
|
41
|
+
offset = -1
|
42
|
+
end
|
43
|
+
|
44
|
+
@topics[topic] ||= {}
|
45
|
+
@topics[topic][partition] = {
|
46
|
+
fetch_offset: offset,
|
47
|
+
max_bytes: max_bytes,
|
48
|
+
}
|
49
|
+
end
|
50
|
+
|
51
|
+
def execute
|
52
|
+
@cluster.add_target_topics(@topics.keys)
|
53
|
+
@cluster.refresh_metadata_if_necessary!
|
54
|
+
|
55
|
+
topics_by_broker = {}
|
56
|
+
|
57
|
+
if @topics.none? {|topic, partitions| partitions.any? }
|
58
|
+
raise NoPartitionsToFetchFrom
|
59
|
+
end
|
60
|
+
|
61
|
+
@topics.each do |topic, partitions|
|
62
|
+
partitions.each do |partition, options|
|
63
|
+
broker = @cluster.get_leader(topic, partition)
|
64
|
+
|
65
|
+
topics_by_broker[broker] ||= {}
|
66
|
+
topics_by_broker[broker][topic] ||= {}
|
67
|
+
topics_by_broker[broker][topic][partition] = options
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
topics_by_broker.flat_map do |broker, topics|
|
72
|
+
@offset_resolver.resolve!(broker, topics)
|
73
|
+
|
74
|
+
options = {
|
75
|
+
max_wait_time: @max_wait_time * 1000, # Kafka expects ms, not secs
|
76
|
+
min_bytes: @min_bytes,
|
77
|
+
max_bytes: @max_bytes,
|
78
|
+
topics: topics,
|
79
|
+
}
|
80
|
+
|
81
|
+
response = broker.fetch_messages(**options)
|
82
|
+
|
83
|
+
response.topics.flat_map do |fetched_topic|
|
84
|
+
fetched_topic.partitions.map do |fetched_partition|
|
85
|
+
begin
|
86
|
+
Protocol.handle_error(fetched_partition.error_code)
|
87
|
+
rescue Kafka::OffsetOutOfRange => e
|
88
|
+
e.topic = fetched_topic.name
|
89
|
+
e.partition = fetched_partition.partition
|
90
|
+
e.offset = topics.fetch(e.topic).fetch(e.partition).fetch(:fetch_offset)
|
91
|
+
|
92
|
+
raise e
|
93
|
+
rescue Kafka::Error => e
|
94
|
+
topic = fetched_topic.name
|
95
|
+
partition = fetched_partition.partition
|
96
|
+
@logger.error "Failed to fetch from #{topic}/#{partition}: #{e.message}"
|
97
|
+
raise e
|
98
|
+
end
|
99
|
+
|
100
|
+
Kafka::FetchedBatchGenerator.new(
|
101
|
+
fetched_topic.name,
|
102
|
+
fetched_partition,
|
103
|
+
topics.fetch(fetched_topic.name).fetch(fetched_partition.partition).fetch(:fetch_offset),
|
104
|
+
logger: @logger
|
105
|
+
).generate
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
rescue Kafka::ConnectionError, Kafka::LeaderNotAvailable, Kafka::NotLeaderForPartition
|
110
|
+
@cluster.mark_as_stale!
|
111
|
+
|
112
|
+
raise
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
|
5
|
+
# An ordered sequence of messages fetched from a Kafka partition.
|
6
|
+
class FetchedBatch
|
7
|
+
# @return [String]
|
8
|
+
attr_reader :topic
|
9
|
+
|
10
|
+
# @return [Integer]
|
11
|
+
attr_reader :partition
|
12
|
+
|
13
|
+
# @return [Integer]
|
14
|
+
attr_reader :last_offset
|
15
|
+
|
16
|
+
# @return [Integer] the offset of the most recent message in the partition.
|
17
|
+
attr_reader :highwater_mark_offset
|
18
|
+
|
19
|
+
# @return [Array<Kafka::FetchedMessage>]
|
20
|
+
attr_accessor :messages
|
21
|
+
|
22
|
+
def initialize(topic:, partition:, highwater_mark_offset:, messages:, last_offset: nil)
|
23
|
+
@topic = topic
|
24
|
+
@partition = partition
|
25
|
+
@highwater_mark_offset = highwater_mark_offset
|
26
|
+
@messages = messages
|
27
|
+
@last_offset = last_offset
|
28
|
+
end
|
29
|
+
|
30
|
+
def empty?
|
31
|
+
@messages.empty?
|
32
|
+
end
|
33
|
+
|
34
|
+
def unknown_last_offset?
|
35
|
+
@last_offset.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
def first_offset
|
39
|
+
if empty?
|
40
|
+
nil
|
41
|
+
else
|
42
|
+
messages.first.offset
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def offset_lag
|
47
|
+
if empty?
|
48
|
+
0
|
49
|
+
else
|
50
|
+
(highwater_mark_offset - 1) - last_offset
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/fetched_batch"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class FetchedBatchGenerator
|
7
|
+
COMMITTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x01".freeze
|
8
|
+
ABORTED_TRANSACTION_SIGNAL = "\x00\x00\x00\x00".freeze
|
9
|
+
|
10
|
+
def initialize(topic, fetched_partition, offset, logger:)
|
11
|
+
@topic = topic
|
12
|
+
@fetched_partition = fetched_partition
|
13
|
+
@logger = TaggedLogger.new(logger)
|
14
|
+
@offset = offset
|
15
|
+
end
|
16
|
+
|
17
|
+
def generate
|
18
|
+
if @fetched_partition.messages.empty?
|
19
|
+
empty_fetched_batch
|
20
|
+
elsif @fetched_partition.messages.first.is_a?(Kafka::Protocol::MessageSet)
|
21
|
+
extract_messages
|
22
|
+
else
|
23
|
+
extract_records
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def empty_fetched_batch
|
30
|
+
FetchedBatch.new(
|
31
|
+
topic: @topic,
|
32
|
+
partition: @fetched_partition.partition,
|
33
|
+
last_offset: nil,
|
34
|
+
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
35
|
+
messages: []
|
36
|
+
)
|
37
|
+
end
|
38
|
+
|
39
|
+
def extract_messages
|
40
|
+
last_offset = nil
|
41
|
+
messages = @fetched_partition.messages.flat_map do |message_set|
|
42
|
+
message_set.messages.map do |message|
|
43
|
+
last_offset = message.offset if last_offset.nil? || last_offset < message.offset
|
44
|
+
if message.offset >= @offset
|
45
|
+
FetchedMessage.new(
|
46
|
+
message: message,
|
47
|
+
topic: @topic,
|
48
|
+
partition: @fetched_partition.partition
|
49
|
+
)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
FetchedBatch.new(
|
54
|
+
topic: @topic,
|
55
|
+
partition: @fetched_partition.partition,
|
56
|
+
last_offset: last_offset,
|
57
|
+
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
58
|
+
messages: messages
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
def extract_records
|
63
|
+
records = []
|
64
|
+
last_offset = nil
|
65
|
+
aborted_transactions = @fetched_partition.aborted_transactions.sort_by(&:first_offset)
|
66
|
+
aborted_producer_ids = {}
|
67
|
+
|
68
|
+
@fetched_partition.messages.each do |record_batch|
|
69
|
+
last_offset = record_batch.last_offset if last_offset.nil? || last_offset < record_batch.last_offset
|
70
|
+
# Find the list of aborted producer IDs less than current offset
|
71
|
+
unless aborted_transactions.empty?
|
72
|
+
if aborted_transactions.first.first_offset <= record_batch.last_offset
|
73
|
+
aborted_transaction = aborted_transactions.shift
|
74
|
+
aborted_producer_ids[aborted_transaction.producer_id] = aborted_transaction.first_offset
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
if abort_marker?(record_batch)
|
79
|
+
# Abort marker, remove the producer from the aborted list
|
80
|
+
aborted_producer_ids.delete(record_batch.producer_id)
|
81
|
+
elsif aborted_producer_ids.key?(record_batch.producer_id) && record_batch.in_transaction
|
82
|
+
# Reject aborted record batch
|
83
|
+
@logger.info("Reject #{record_batch.records.size} aborted records of topic '#{@topic}', partition #{@fetched_partition.partition}, from offset #{record_batch.first_offset}")
|
84
|
+
next
|
85
|
+
end
|
86
|
+
|
87
|
+
record_batch.records.each do |record|
|
88
|
+
if !record.is_control_record && record.offset >= @offset
|
89
|
+
records << FetchedMessage.new(
|
90
|
+
message: record,
|
91
|
+
topic: @topic,
|
92
|
+
partition: @fetched_partition.partition
|
93
|
+
)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
FetchedBatch.new(
|
99
|
+
topic: @topic,
|
100
|
+
partition: @fetched_partition.partition,
|
101
|
+
last_offset: last_offset,
|
102
|
+
highwater_mark_offset: @fetched_partition.highwater_mark_offset,
|
103
|
+
messages: records
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
def abort_marker?(record_batch)
|
108
|
+
return false unless record_batch.is_control_batch
|
109
|
+
|
110
|
+
if record_batch.records.empty?
|
111
|
+
raise "Invalid control record batch at topic '#{@topic}', partition #{@fetched_partition}"
|
112
|
+
end
|
113
|
+
|
114
|
+
record_batch.records.first.key == ABORTED_TRANSACTION_SIGNAL
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class FetchedMessage
|
5
|
+
# @return [String] the name of the topic that the message was written to.
|
6
|
+
attr_reader :topic
|
7
|
+
|
8
|
+
# @return [Integer] the partition number that the message was written to.
|
9
|
+
attr_reader :partition
|
10
|
+
|
11
|
+
def initialize(message:, topic:, partition:)
|
12
|
+
@message = message
|
13
|
+
@topic = topic
|
14
|
+
@partition = partition
|
15
|
+
end
|
16
|
+
|
17
|
+
# @return [String] the value of the message.
|
18
|
+
def value
|
19
|
+
@message.value
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [String] the key of the message.
|
23
|
+
def key
|
24
|
+
@message.key
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Integer] the offset of the message in the partition.
|
28
|
+
def offset
|
29
|
+
@message.offset
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Time] the timestamp of the message.
|
33
|
+
def create_time
|
34
|
+
@message.create_time
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [Hash<String, String>] the headers of the message.
|
38
|
+
def headers
|
39
|
+
@message.headers
|
40
|
+
end
|
41
|
+
|
42
|
+
# @return [Boolean] whether this record is a control record
|
43
|
+
def is_control_record
|
44
|
+
@message.is_control_record
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class FetchedOffsetResolver
|
5
|
+
def initialize(logger:)
|
6
|
+
@logger = TaggedLogger.new(logger)
|
7
|
+
end
|
8
|
+
|
9
|
+
def resolve!(broker, topics)
|
10
|
+
pending_topics = filter_pending_topics(topics)
|
11
|
+
return topics if pending_topics.empty?
|
12
|
+
|
13
|
+
response = broker.list_offsets(topics: pending_topics)
|
14
|
+
|
15
|
+
pending_topics.each do |topic, partitions|
|
16
|
+
partitions.each do |options|
|
17
|
+
partition = options.fetch(:partition)
|
18
|
+
resolved_offset = response.offset_for(topic, partition)
|
19
|
+
|
20
|
+
@logger.debug "Offset for #{topic}/#{partition} is #{resolved_offset.inspect}"
|
21
|
+
|
22
|
+
topics[topic][partition][:fetch_offset] = resolved_offset || 0
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def filter_pending_topics(topics)
|
30
|
+
pending_topics = {}
|
31
|
+
topics.each do |topic, partitions|
|
32
|
+
partitions.each do |partition, options|
|
33
|
+
offset = options.fetch(:fetch_offset)
|
34
|
+
next if offset >= 0
|
35
|
+
|
36
|
+
@logger.debug "Resolving offset `#{offset}` for #{topic}/#{partition}..."
|
37
|
+
|
38
|
+
pending_topics[topic] ||= []
|
39
|
+
pending_topics[topic] << {
|
40
|
+
partition: partition,
|
41
|
+
time: offset
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
45
|
+
pending_topics
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,221 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/fetch_operation"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
class Fetcher
|
7
|
+
attr_reader :queue
|
8
|
+
|
9
|
+
def initialize(cluster:, logger:, instrumenter:, max_queue_size:, group:)
|
10
|
+
@cluster = cluster
|
11
|
+
@logger = TaggedLogger.new(logger)
|
12
|
+
@instrumenter = instrumenter
|
13
|
+
@max_queue_size = max_queue_size
|
14
|
+
@group = group
|
15
|
+
|
16
|
+
@queue = Queue.new
|
17
|
+
@commands = Queue.new
|
18
|
+
@next_offsets = Hash.new { |h, k| h[k] = {} }
|
19
|
+
|
20
|
+
# Long poll until at least this many bytes can be fetched.
|
21
|
+
@min_bytes = 1
|
22
|
+
|
23
|
+
# Long poll at most this number of seconds.
|
24
|
+
@max_wait_time = 1
|
25
|
+
|
26
|
+
# The maximum number of bytes to fetch for any given fetch request.
|
27
|
+
@max_bytes = 10485760
|
28
|
+
|
29
|
+
# The maximum number of bytes to fetch per partition, by topic.
|
30
|
+
@max_bytes_per_partition = {}
|
31
|
+
|
32
|
+
# An incrementing counter used to synchronize resets between the
|
33
|
+
# foreground and background thread.
|
34
|
+
@current_reset_counter = 0
|
35
|
+
end
|
36
|
+
|
37
|
+
def subscribe(topic, max_bytes_per_partition:)
|
38
|
+
@commands << [:subscribe, [topic, max_bytes_per_partition]]
|
39
|
+
end
|
40
|
+
|
41
|
+
def seek(topic, partition, offset)
|
42
|
+
@commands << [:seek, [topic, partition, offset]]
|
43
|
+
end
|
44
|
+
|
45
|
+
def configure(min_bytes:, max_bytes:, max_wait_time:)
|
46
|
+
@commands << [:configure, [min_bytes, max_bytes, max_wait_time]]
|
47
|
+
end
|
48
|
+
|
49
|
+
def start
|
50
|
+
return if @running
|
51
|
+
|
52
|
+
@running = true
|
53
|
+
|
54
|
+
@thread = Thread.new do
|
55
|
+
while @running
|
56
|
+
loop
|
57
|
+
end
|
58
|
+
@logger.info "#{@group} Fetcher thread exited."
|
59
|
+
end
|
60
|
+
@thread.abort_on_exception = true
|
61
|
+
end
|
62
|
+
|
63
|
+
def stop
|
64
|
+
return unless @running
|
65
|
+
@commands << [:stop, []]
|
66
|
+
@thread.join
|
67
|
+
end
|
68
|
+
|
69
|
+
def reset
|
70
|
+
@current_reset_counter = current_reset_counter + 1
|
71
|
+
@commands << [:reset]
|
72
|
+
end
|
73
|
+
|
74
|
+
def data?
|
75
|
+
!@queue.empty?
|
76
|
+
end
|
77
|
+
|
78
|
+
def poll
|
79
|
+
tag, message, reset_counter = @queue.deq
|
80
|
+
|
81
|
+
# Batches are tagged with the current reset counter value. If the batch
|
82
|
+
# has a reset_counter < current_reset_counter, we know it was fetched
|
83
|
+
# prior to the most recent reset and should be discarded.
|
84
|
+
if tag == :batches && message.any? && current_reset_counter > reset_counter
|
85
|
+
@logger.warn "Skipping stale messages buffered prior to reset"
|
86
|
+
return tag, []
|
87
|
+
end
|
88
|
+
|
89
|
+
return [tag, message]
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
attr_reader :current_reset_counter
|
95
|
+
|
96
|
+
def loop
|
97
|
+
@logger.push_tags(@group.to_s)
|
98
|
+
@instrumenter.instrument("loop.fetcher", {
|
99
|
+
queue_size: @queue.size,
|
100
|
+
})
|
101
|
+
|
102
|
+
return unless @running
|
103
|
+
|
104
|
+
if !@commands.empty?
|
105
|
+
cmd, args = @commands.deq
|
106
|
+
|
107
|
+
@logger.debug "Handling fetcher command: #{cmd}"
|
108
|
+
|
109
|
+
send("handle_#{cmd}", *args)
|
110
|
+
elsif @queue.size < @max_queue_size
|
111
|
+
step
|
112
|
+
else
|
113
|
+
@logger.warn "Reached max fetcher queue size (#{@max_queue_size}), sleeping 1s"
|
114
|
+
sleep 1
|
115
|
+
end
|
116
|
+
ensure
|
117
|
+
@logger.pop_tags
|
118
|
+
end
|
119
|
+
|
120
|
+
def handle_configure(min_bytes, max_bytes, max_wait_time)
|
121
|
+
@min_bytes = min_bytes
|
122
|
+
@max_bytes = max_bytes
|
123
|
+
@max_wait_time = max_wait_time
|
124
|
+
end
|
125
|
+
|
126
|
+
def handle_reset
|
127
|
+
@next_offsets.clear
|
128
|
+
@queue.clear
|
129
|
+
end
|
130
|
+
|
131
|
+
def handle_stop(*)
|
132
|
+
@running = false
|
133
|
+
@commands.clear
|
134
|
+
|
135
|
+
# After stopping, we need to reconfigure the topics and partitions to fetch
|
136
|
+
# from. Otherwise we'd keep fetching from a bunch of partitions we may no
|
137
|
+
# longer be assigned.
|
138
|
+
handle_reset
|
139
|
+
end
|
140
|
+
|
141
|
+
def handle_subscribe(topic, max_bytes_per_partition)
|
142
|
+
@logger.info "Will fetch at most #{max_bytes_per_partition} bytes at a time per partition from #{topic}"
|
143
|
+
@max_bytes_per_partition[topic] = max_bytes_per_partition
|
144
|
+
end
|
145
|
+
|
146
|
+
def handle_seek(topic, partition, offset)
|
147
|
+
@instrumenter.instrument('seek.consumer',
|
148
|
+
group_id: @group.group_id,
|
149
|
+
topic: topic,
|
150
|
+
partition: partition,
|
151
|
+
offset: offset)
|
152
|
+
@logger.info "Seeking #{topic}/#{partition} to offset #{offset}"
|
153
|
+
@next_offsets[topic][partition] = offset
|
154
|
+
end
|
155
|
+
|
156
|
+
def step
|
157
|
+
batches = fetch_batches
|
158
|
+
|
159
|
+
batches.each do |batch|
|
160
|
+
unless batch.empty?
|
161
|
+
@instrumenter.instrument("fetch_batch.consumer", {
|
162
|
+
topic: batch.topic,
|
163
|
+
partition: batch.partition,
|
164
|
+
offset_lag: batch.offset_lag,
|
165
|
+
highwater_mark_offset: batch.highwater_mark_offset,
|
166
|
+
message_count: batch.messages.count,
|
167
|
+
})
|
168
|
+
end
|
169
|
+
|
170
|
+
@next_offsets[batch.topic][batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
|
171
|
+
end
|
172
|
+
|
173
|
+
@queue << [:batches, batches, current_reset_counter]
|
174
|
+
rescue Kafka::NoPartitionsToFetchFrom
|
175
|
+
@logger.warn "No partitions to fetch from, sleeping for 1s"
|
176
|
+
sleep 1
|
177
|
+
rescue Kafka::Error => e
|
178
|
+
@queue << [:exception, e]
|
179
|
+
end
|
180
|
+
|
181
|
+
def fetch_batches
|
182
|
+
@logger.debug "Fetching batches"
|
183
|
+
|
184
|
+
operation = FetchOperation.new(
|
185
|
+
cluster: @cluster,
|
186
|
+
logger: @logger,
|
187
|
+
min_bytes: @min_bytes,
|
188
|
+
max_bytes: @max_bytes,
|
189
|
+
max_wait_time: @max_wait_time,
|
190
|
+
)
|
191
|
+
|
192
|
+
@next_offsets.each do |topic, partitions|
|
193
|
+
# Fetch at most this many bytes from any single partition.
|
194
|
+
max_bytes = @max_bytes_per_partition[topic]
|
195
|
+
|
196
|
+
partitions.each do |partition, offset|
|
197
|
+
operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
operation.execute
|
202
|
+
rescue UnknownTopicOrPartition
|
203
|
+
@logger.error "Failed to fetch from some partitions. Maybe a rebalance has happened? Refreshing cluster info."
|
204
|
+
|
205
|
+
# Our cluster information has become stale, we need to refresh it.
|
206
|
+
@cluster.refresh_metadata!
|
207
|
+
|
208
|
+
# Don't overwhelm the brokers in case this keeps happening.
|
209
|
+
sleep 10
|
210
|
+
|
211
|
+
retry
|
212
|
+
rescue NoPartitionsToFetchFrom
|
213
|
+
backoff = @max_wait_time > 0 ? @max_wait_time : 1
|
214
|
+
|
215
|
+
@logger.info "There are no partitions to fetch from, sleeping for #{backoff}s"
|
216
|
+
sleep backoff
|
217
|
+
|
218
|
+
[]
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|