ruby-kafka-temp-fork 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +393 -0
- data/.github/workflows/stale.yml +19 -0
- data/.gitignore +13 -0
- data/.readygo +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +44 -0
- data/.ruby-version +1 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +310 -0
- data/Gemfile +5 -0
- data/ISSUE_TEMPLATE.md +23 -0
- data/LICENSE.txt +176 -0
- data/Procfile +2 -0
- data/README.md +1342 -0
- data/Rakefile +8 -0
- data/benchmarks/message_encoding.rb +23 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/docker-compose.yml +39 -0
- data/examples/consumer-group.rb +35 -0
- data/examples/firehose-consumer.rb +64 -0
- data/examples/firehose-producer.rb +54 -0
- data/examples/simple-consumer.rb +34 -0
- data/examples/simple-producer.rb +42 -0
- data/examples/ssl-producer.rb +44 -0
- data/lib/kafka.rb +373 -0
- data/lib/kafka/async_producer.rb +291 -0
- data/lib/kafka/broker.rb +217 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +833 -0
- data/lib/kafka/cluster.rb +513 -0
- data/lib/kafka/compression.rb +45 -0
- data/lib/kafka/compressor.rb +86 -0
- data/lib/kafka/connection.rb +223 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +642 -0
- data/lib/kafka/consumer_group.rb +231 -0
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +420 -0
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +58 -0
- data/lib/kafka/fetched_batch_generator.rb +120 -0
- data/lib/kafka/fetched_message.rb +48 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +224 -0
- data/lib/kafka/gzip_codec.rb +34 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/lz4_codec.rb +27 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +259 -0
- data/lib/kafka/partitioner.rb +40 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +528 -0
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol.rb +225 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +47 -0
- data/lib/kafka/protocol/join_group_response.rb +41 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +94 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +223 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +26 -0
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +180 -0
- data/lib/kafka/sasl_authenticator.rb +61 -0
- data/lib/kafka/snappy_codec.rb +29 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +77 -0
- data/lib/kafka/transaction_manager.rb +306 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- data/lib/kafka/zstd_codec.rb +27 -0
- data/lib/ruby-kafka-temp-fork.rb +5 -0
- data/ruby-kafka-temp-fork.gemspec +54 -0
- metadata +520 -0
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class GzipCodec
|
5
|
+
def codec_id
|
6
|
+
1
|
7
|
+
end
|
8
|
+
|
9
|
+
def produce_api_min_version
|
10
|
+
0
|
11
|
+
end
|
12
|
+
|
13
|
+
def load
|
14
|
+
require "zlib"
|
15
|
+
end
|
16
|
+
|
17
|
+
def compress(data)
|
18
|
+
buffer = StringIO.new
|
19
|
+
buffer.set_encoding(Encoding::BINARY)
|
20
|
+
|
21
|
+
writer = Zlib::GzipWriter.new(buffer, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
|
22
|
+
writer.write(data)
|
23
|
+
writer.close
|
24
|
+
|
25
|
+
buffer.string
|
26
|
+
end
|
27
|
+
|
28
|
+
def decompress(data)
|
29
|
+
buffer = StringIO.new(data)
|
30
|
+
reader = Zlib::GzipReader.new(buffer)
|
31
|
+
reader.read
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Heartbeat
|
5
|
+
def initialize(group:, interval:, instrumenter:)
|
6
|
+
@group = group
|
7
|
+
@interval = interval
|
8
|
+
@last_heartbeat = Time.now
|
9
|
+
@instrumenter = instrumenter
|
10
|
+
end
|
11
|
+
|
12
|
+
def trigger!
|
13
|
+
@instrumenter.instrument('heartbeat.consumer',
|
14
|
+
group_id: @group.group_id,
|
15
|
+
topic_partitions: @group.assigned_partitions) do
|
16
|
+
@group.heartbeat
|
17
|
+
@last_heartbeat = Time.now
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def trigger
|
22
|
+
trigger! if Time.now > @last_heartbeat + @interval
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Instrumenter
|
5
|
+
NAMESPACE = "kafka"
|
6
|
+
|
7
|
+
def initialize(default_payload = {})
|
8
|
+
@default_payload = default_payload
|
9
|
+
|
10
|
+
if defined?(ActiveSupport::Notifications)
|
11
|
+
@backend = ActiveSupport::Notifications
|
12
|
+
else
|
13
|
+
@backend = nil
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def instrument(event_name, payload = {}, &block)
|
18
|
+
if @backend
|
19
|
+
payload.update(@default_payload)
|
20
|
+
|
21
|
+
@backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
|
22
|
+
else
|
23
|
+
block.call(payload) if block
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class DecoratingInstrumenter
|
29
|
+
def initialize(backend, extra_payload = {})
|
30
|
+
@backend = backend
|
31
|
+
@extra_payload = extra_payload
|
32
|
+
end
|
33
|
+
|
34
|
+
def instrument(event_name, payload = {}, &block)
|
35
|
+
@backend.instrument(event_name, @extra_payload.merge(payload), &block)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class LZ4Codec
|
5
|
+
def codec_id
|
6
|
+
3
|
7
|
+
end
|
8
|
+
|
9
|
+
def produce_api_min_version
|
10
|
+
0
|
11
|
+
end
|
12
|
+
|
13
|
+
def load
|
14
|
+
require "extlz4"
|
15
|
+
rescue LoadError
|
16
|
+
raise LoadError, "using lz4 compression requires adding a dependency on the `extlz4` gem to your Gemfile."
|
17
|
+
end
|
18
|
+
|
19
|
+
def compress(data)
|
20
|
+
LZ4.encode(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def decompress(data)
|
24
|
+
LZ4.decode(data)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/protocol/message"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
|
7
|
+
# Buffers messages for specific topics/partitions.
|
8
|
+
class MessageBuffer
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
attr_reader :size, :bytesize
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@buffer = {}
|
15
|
+
@size = 0
|
16
|
+
@bytesize = 0
|
17
|
+
end
|
18
|
+
|
19
|
+
def write(value:, key:, topic:, partition:, create_time: Time.now, headers: {})
|
20
|
+
message = Protocol::Record.new(key: key, value: value, create_time: create_time, headers: headers)
|
21
|
+
|
22
|
+
buffer_for(topic, partition) << message
|
23
|
+
|
24
|
+
@size += 1
|
25
|
+
@bytesize += message.bytesize
|
26
|
+
end
|
27
|
+
|
28
|
+
def concat(messages, topic:, partition:)
|
29
|
+
buffer_for(topic, partition).concat(messages)
|
30
|
+
|
31
|
+
@size += messages.count
|
32
|
+
@bytesize += messages.map(&:bytesize).reduce(0, :+)
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_h
|
36
|
+
@buffer
|
37
|
+
end
|
38
|
+
|
39
|
+
def empty?
|
40
|
+
@buffer.empty?
|
41
|
+
end
|
42
|
+
|
43
|
+
def each
|
44
|
+
@buffer.each do |topic, messages_for_topic|
|
45
|
+
messages_for_topic.each do |partition, messages_for_partition|
|
46
|
+
yield topic, partition, messages_for_partition
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Clears buffered messages for the given topic and partition.
|
52
|
+
#
|
53
|
+
# @param topic [String] the name of the topic.
|
54
|
+
# @param partition [Integer] the partition id.
|
55
|
+
#
|
56
|
+
# @return [nil]
|
57
|
+
def clear_messages(topic:, partition:)
|
58
|
+
return unless @buffer.key?(topic) && @buffer[topic].key?(partition)
|
59
|
+
|
60
|
+
@size -= @buffer[topic][partition].count
|
61
|
+
@bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(0, :+)
|
62
|
+
|
63
|
+
@buffer[topic].delete(partition)
|
64
|
+
@buffer.delete(topic) if @buffer[topic].empty?
|
65
|
+
end
|
66
|
+
|
67
|
+
def messages_for(topic:, partition:)
|
68
|
+
buffer_for(topic, partition)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Clears messages across all topics and partitions.
|
72
|
+
#
|
73
|
+
# @return [nil]
|
74
|
+
def clear
|
75
|
+
@buffer = {}
|
76
|
+
@size = 0
|
77
|
+
@bytesize = 0
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def buffer_for(topic, partition)
|
83
|
+
@buffer[topic] ||= {}
|
84
|
+
@buffer[topic][partition] ||= []
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Murmur2Hash
|
5
|
+
SEED = [0x9747b28c].pack('L')
|
6
|
+
|
7
|
+
def load
|
8
|
+
require 'digest/murmurhash'
|
9
|
+
rescue LoadError
|
10
|
+
raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
|
11
|
+
end
|
12
|
+
|
13
|
+
def hash(value)
|
14
|
+
::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
|
5
|
+
# Manages a consumer's position in partitions, figures out where to resume processing
|
6
|
+
# from, etc.
|
7
|
+
class OffsetManager
|
8
|
+
|
9
|
+
# The default broker setting for offsets.retention.minutes is 1440.
|
10
|
+
DEFAULT_RETENTION_TIME = 1440 * 60
|
11
|
+
|
12
|
+
def initialize(cluster:, group:, fetcher:, logger:, commit_interval:, commit_threshold:, offset_retention_time:)
|
13
|
+
@cluster = cluster
|
14
|
+
@group = group
|
15
|
+
@fetcher = fetcher
|
16
|
+
@logger = TaggedLogger.new(logger)
|
17
|
+
@commit_interval = commit_interval
|
18
|
+
@commit_threshold = commit_threshold
|
19
|
+
|
20
|
+
@uncommitted_offsets = 0
|
21
|
+
@processed_offsets = {}
|
22
|
+
@default_offsets = {}
|
23
|
+
@committed_offsets = nil
|
24
|
+
@resolved_offsets = {}
|
25
|
+
@last_commit = Time.now
|
26
|
+
@last_recommit = nil
|
27
|
+
@recommit_interval = (offset_retention_time || DEFAULT_RETENTION_TIME) / 2
|
28
|
+
end
|
29
|
+
|
30
|
+
# Set the default offset for a topic.
|
31
|
+
#
|
32
|
+
# When the consumer is started for the first time, or in cases where it gets stuck and
|
33
|
+
# has to reset its position, it must start either with the earliest messages or with
|
34
|
+
# the latest, skipping to the very end of each partition.
|
35
|
+
#
|
36
|
+
# @param topic [String] the name of the topic.
|
37
|
+
# @param default_offset [Symbol] either `:earliest` or `:latest`.
|
38
|
+
# @return [nil]
|
39
|
+
def set_default_offset(topic, default_offset)
|
40
|
+
@default_offsets[topic] = default_offset
|
41
|
+
end
|
42
|
+
|
43
|
+
# Mark a message as having been processed.
|
44
|
+
#
|
45
|
+
# When offsets are committed, the message's offset will be stored in Kafka so
|
46
|
+
# that we can resume from this point at a later time.
|
47
|
+
#
|
48
|
+
# @param topic [String] the name of the topic.
|
49
|
+
# @param partition [Integer] the partition number.
|
50
|
+
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
|
+
# @return [nil]
|
52
|
+
def mark_as_processed(topic, partition, offset)
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
57
|
+
@processed_offsets[topic] ||= {}
|
58
|
+
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
67
|
+
# The committed offset should always be the offset of the next message that the
|
68
|
+
# application will read, thus adding one to the last message processed.
|
69
|
+
@processed_offsets[topic][partition] = offset + 1
|
70
|
+
@logger.debug "Marking #{topic}/#{partition}:#{offset} as processed"
|
71
|
+
end
|
72
|
+
|
73
|
+
# Move the consumer's position in the partition back to the configured default
|
74
|
+
# offset, either the first or latest in the partition.
|
75
|
+
#
|
76
|
+
# @param topic [String] the name of the topic.
|
77
|
+
# @param partition [Integer] the partition number.
|
78
|
+
# @return [nil]
|
79
|
+
def seek_to_default(topic, partition)
|
80
|
+
# Remove any cached offset, in case things have changed broker-side.
|
81
|
+
clear_resolved_offset(topic)
|
82
|
+
|
83
|
+
offset = resolve_offset(topic, partition)
|
84
|
+
|
85
|
+
seek_to(topic, partition, offset)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Move the consumer's position in the partition to the specified offset.
|
89
|
+
#
|
90
|
+
# @param topic [String] the name of the topic.
|
91
|
+
# @param partition [Integer] the partition number.
|
92
|
+
# @param offset [Integer] the offset that the consumer position should be moved to.
|
93
|
+
# @return [nil]
|
94
|
+
def seek_to(topic, partition, offset)
|
95
|
+
@processed_offsets[topic] ||= {}
|
96
|
+
@processed_offsets[topic][partition] = offset
|
97
|
+
|
98
|
+
@fetcher.seek(topic, partition, offset)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Return the next offset that should be fetched for the specified partition.
|
102
|
+
#
|
103
|
+
# @param topic [String] the name of the topic.
|
104
|
+
# @param partition [Integer] the partition number.
|
105
|
+
# @return [Integer] the next offset that should be fetched.
|
106
|
+
def next_offset_for(topic, partition)
|
107
|
+
offset = @processed_offsets.fetch(topic, {}).fetch(partition) {
|
108
|
+
committed_offset_for(topic, partition)
|
109
|
+
}
|
110
|
+
|
111
|
+
# A negative offset means that no offset has been committed, so we need to
|
112
|
+
# resolve the default offset for the topic.
|
113
|
+
if offset < 0
|
114
|
+
resolve_offset(topic, partition)
|
115
|
+
else
|
116
|
+
# The next offset is the last offset.
|
117
|
+
offset
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Commit offsets of messages that have been marked as processed.
|
122
|
+
#
|
123
|
+
# If `recommit` is set to true, we will also commit the existing positions
|
124
|
+
# even if no messages have been processed on a partition. This is done
|
125
|
+
# in order to avoid the offset information expiring in cases where messages
|
126
|
+
# are very rare -- it's essentially a keep-alive.
|
127
|
+
#
|
128
|
+
# @param recommit [Boolean] whether to recommit offsets that have already been
|
129
|
+
# committed.
|
130
|
+
# @return [nil]
|
131
|
+
def commit_offsets(recommit = false)
|
132
|
+
offsets = offsets_to_commit(recommit)
|
133
|
+
unless offsets.empty?
|
134
|
+
@logger.debug "Committing offsets#{recommit ? ' with recommit' : ''}: #{prettify_offsets(offsets)}"
|
135
|
+
|
136
|
+
@group.commit_offsets(offsets)
|
137
|
+
|
138
|
+
@last_commit = Time.now
|
139
|
+
@last_recommit = Time.now if recommit
|
140
|
+
|
141
|
+
@uncommitted_offsets = 0
|
142
|
+
@committed_offsets = nil
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Commit offsets if necessary, according to the offset commit policy specified
|
147
|
+
# when initializing the class.
|
148
|
+
#
|
149
|
+
# @return [nil]
|
150
|
+
def commit_offsets_if_necessary
|
151
|
+
recommit = recommit_timeout_reached?
|
152
|
+
if recommit || commit_timeout_reached? || commit_threshold_reached?
|
153
|
+
commit_offsets(recommit)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Clear all stored offset information.
|
158
|
+
#
|
159
|
+
# @return [nil]
|
160
|
+
def clear_offsets
|
161
|
+
@processed_offsets.clear
|
162
|
+
@resolved_offsets.clear
|
163
|
+
|
164
|
+
# Clear the cached commits from the brokers.
|
165
|
+
@committed_offsets = nil
|
166
|
+
end
|
167
|
+
|
168
|
+
# Clear stored offset information for all partitions except those specified
|
169
|
+
# in `excluded`.
|
170
|
+
#
|
171
|
+
# offset_manager.clear_offsets_excluding("my-topic" => [1, 2, 3])
|
172
|
+
#
|
173
|
+
# @return [nil]
|
174
|
+
def clear_offsets_excluding(excluded)
|
175
|
+
# Clear all offsets that aren't in `excluded`.
|
176
|
+
@processed_offsets.each do |topic, partitions|
|
177
|
+
partitions.keep_if do |partition, _|
|
178
|
+
excluded.fetch(topic, []).include?(partition)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Clear the cached commits from the brokers.
|
183
|
+
@committed_offsets = nil
|
184
|
+
@resolved_offsets.clear
|
185
|
+
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
189
|
+
def clear_resolved_offset(topic)
|
190
|
+
@resolved_offsets.delete(topic)
|
191
|
+
end
|
192
|
+
|
193
|
+
def resolve_offset(topic, partition)
|
194
|
+
@resolved_offsets[topic] ||= fetch_resolved_offsets(topic)
|
195
|
+
@resolved_offsets[topic].fetch(partition)
|
196
|
+
end
|
197
|
+
|
198
|
+
def fetch_resolved_offsets(topic)
|
199
|
+
default_offset = @default_offsets.fetch(topic)
|
200
|
+
partitions = @group.assigned_partitions.fetch(topic)
|
201
|
+
|
202
|
+
@cluster.resolve_offsets(topic, partitions, default_offset)
|
203
|
+
end
|
204
|
+
|
205
|
+
def seconds_since(time)
|
206
|
+
Time.now - time
|
207
|
+
end
|
208
|
+
|
209
|
+
def seconds_since_last_commit
|
210
|
+
seconds_since(@last_commit)
|
211
|
+
end
|
212
|
+
|
213
|
+
def committed_offsets
|
214
|
+
@committed_offsets ||= @group.fetch_offsets
|
215
|
+
end
|
216
|
+
|
217
|
+
def committed_offset_for(topic, partition)
|
218
|
+
committed_offsets.offset_for(topic, partition)
|
219
|
+
end
|
220
|
+
|
221
|
+
def offsets_to_commit(recommit = false)
|
222
|
+
if recommit
|
223
|
+
offsets_to_recommit.merge!(@processed_offsets) do |_topic, committed, processed|
|
224
|
+
committed.merge!(processed)
|
225
|
+
end
|
226
|
+
else
|
227
|
+
@processed_offsets
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def offsets_to_recommit
|
232
|
+
committed_offsets.topics.each_with_object({}) do |(topic, partition_info), offsets|
|
233
|
+
topic_offsets = partition_info.keys.each_with_object({}) do |partition, partition_map|
|
234
|
+
offset = committed_offsets.offset_for(topic, partition)
|
235
|
+
partition_map[partition] = offset unless offset == -1
|
236
|
+
end
|
237
|
+
offsets[topic] = topic_offsets unless topic_offsets.empty?
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def recommit_timeout_reached?
|
242
|
+
@last_recommit.nil? || seconds_since(@last_recommit) >= @recommit_interval
|
243
|
+
end
|
244
|
+
|
245
|
+
def commit_timeout_reached?
|
246
|
+
@commit_interval != 0 && seconds_since_last_commit >= @commit_interval
|
247
|
+
end
|
248
|
+
|
249
|
+
def commit_threshold_reached?
|
250
|
+
@commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
|
251
|
+
end
|
252
|
+
|
253
|
+
def prettify_offsets(offsets)
|
254
|
+
offsets.flat_map do |topic, partitions|
|
255
|
+
partitions.map { |partition, offset| "#{topic}/#{partition}:#{offset}" }
|
256
|
+
end.join(', ')
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|