ruby-kafka-aws-iam 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +393 -0
- data/.github/workflows/stale.yml +19 -0
- data/.gitignore +13 -0
- data/.readygo +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +44 -0
- data/.ruby-version +1 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +314 -0
- data/Gemfile +5 -0
- data/ISSUE_TEMPLATE.md +23 -0
- data/LICENSE.txt +176 -0
- data/Procfile +2 -0
- data/README.md +1356 -0
- data/Rakefile +8 -0
- data/benchmarks/message_encoding.rb +23 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/docker-compose.yml +39 -0
- data/examples/consumer-group.rb +35 -0
- data/examples/firehose-consumer.rb +64 -0
- data/examples/firehose-producer.rb +54 -0
- data/examples/simple-consumer.rb +34 -0
- data/examples/simple-producer.rb +42 -0
- data/examples/ssl-producer.rb +44 -0
- data/lib/kafka/async_producer.rb +297 -0
- data/lib/kafka/broker.rb +217 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +838 -0
- data/lib/kafka/cluster.rb +513 -0
- data/lib/kafka/compression.rb +45 -0
- data/lib/kafka/compressor.rb +86 -0
- data/lib/kafka/connection.rb +228 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +642 -0
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +231 -0
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +420 -0
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +58 -0
- data/lib/kafka/fetched_batch_generator.rb +120 -0
- data/lib/kafka/fetched_message.rb +48 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +224 -0
- data/lib/kafka/gzip_codec.rb +34 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/lz4_codec.rb +27 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +259 -0
- data/lib/kafka/partitioner.rb +40 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +528 -0
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +47 -0
- data/lib/kafka/protocol/join_group_response.rb +41 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +94 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +223 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +26 -0
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
- data/lib/kafka/protocol.rb +225 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
- data/lib/kafka/sasl/awsmskiam.rb +128 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +180 -0
- data/lib/kafka/sasl_authenticator.rb +73 -0
- data/lib/kafka/snappy_codec.rb +29 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +77 -0
- data/lib/kafka/transaction_manager.rb +306 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- data/lib/kafka/zstd_codec.rb +27 -0
- data/lib/kafka.rb +373 -0
- data/lib/ruby-kafka.rb +5 -0
- data/ruby-kafka.gemspec +54 -0
- metadata +520 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Instrumenter
|
5
|
+
NAMESPACE = "kafka"
|
6
|
+
|
7
|
+
def initialize(default_payload = {})
|
8
|
+
@default_payload = default_payload
|
9
|
+
|
10
|
+
if defined?(ActiveSupport::Notifications)
|
11
|
+
@backend = ActiveSupport::Notifications
|
12
|
+
else
|
13
|
+
@backend = nil
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def instrument(event_name, payload = {}, &block)
|
18
|
+
if @backend
|
19
|
+
payload.update(@default_payload)
|
20
|
+
|
21
|
+
@backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
|
22
|
+
else
|
23
|
+
block.call(payload) if block
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class DecoratingInstrumenter
|
29
|
+
def initialize(backend, extra_payload = {})
|
30
|
+
@backend = backend
|
31
|
+
@extra_payload = extra_payload
|
32
|
+
end
|
33
|
+
|
34
|
+
def instrument(event_name, payload = {}, &block)
|
35
|
+
@backend.instrument(event_name, @extra_payload.merge(payload), &block)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Holds a list of interceptors that implement `call`
|
5
|
+
# and wraps calls to a chain of custom interceptors.
|
6
|
+
class Interceptors
|
7
|
+
def initialize(interceptors:, logger:)
|
8
|
+
@interceptors = interceptors || []
|
9
|
+
@logger = TaggedLogger.new(logger)
|
10
|
+
end
|
11
|
+
|
12
|
+
# This method is called when the client produces a message or once the batches are fetched.
|
13
|
+
# The message returned from the first call is passed to the second interceptor call, and so on in an
|
14
|
+
# interceptor chain. This method does not throw exceptions.
|
15
|
+
#
|
16
|
+
# @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
|
17
|
+
# fetched batch.
|
18
|
+
#
|
19
|
+
# @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
|
20
|
+
# returned by the last interceptor.
|
21
|
+
def call(intercepted)
|
22
|
+
@interceptors.each do |interceptor|
|
23
|
+
begin
|
24
|
+
intercepted = interceptor.call(intercepted)
|
25
|
+
rescue Exception => e
|
26
|
+
@logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
intercepted
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class LZ4Codec
|
5
|
+
def codec_id
|
6
|
+
3
|
7
|
+
end
|
8
|
+
|
9
|
+
def produce_api_min_version
|
10
|
+
0
|
11
|
+
end
|
12
|
+
|
13
|
+
def load
|
14
|
+
require "extlz4"
|
15
|
+
rescue LoadError
|
16
|
+
raise LoadError, "using lz4 compression requires adding a dependency on the `extlz4` gem to your Gemfile."
|
17
|
+
end
|
18
|
+
|
19
|
+
def compress(data)
|
20
|
+
LZ4.encode(data)
|
21
|
+
end
|
22
|
+
|
23
|
+
def decompress(data)
|
24
|
+
LZ4.decode(data)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/protocol/message"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
|
7
|
+
# Buffers messages for specific topics/partitions.
|
8
|
+
class MessageBuffer
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
attr_reader :size, :bytesize
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@buffer = {}
|
15
|
+
@size = 0
|
16
|
+
@bytesize = 0
|
17
|
+
end
|
18
|
+
|
19
|
+
def write(value:, key:, topic:, partition:, create_time: Time.now, headers: {})
|
20
|
+
message = Protocol::Record.new(key: key, value: value, create_time: create_time, headers: headers)
|
21
|
+
|
22
|
+
buffer_for(topic, partition) << message
|
23
|
+
|
24
|
+
@size += 1
|
25
|
+
@bytesize += message.bytesize
|
26
|
+
end
|
27
|
+
|
28
|
+
def concat(messages, topic:, partition:)
|
29
|
+
buffer_for(topic, partition).concat(messages)
|
30
|
+
|
31
|
+
@size += messages.count
|
32
|
+
@bytesize += messages.map(&:bytesize).reduce(0, :+)
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_h
|
36
|
+
@buffer
|
37
|
+
end
|
38
|
+
|
39
|
+
def empty?
|
40
|
+
@buffer.empty?
|
41
|
+
end
|
42
|
+
|
43
|
+
def each
|
44
|
+
@buffer.each do |topic, messages_for_topic|
|
45
|
+
messages_for_topic.each do |partition, messages_for_partition|
|
46
|
+
yield topic, partition, messages_for_partition
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Clears buffered messages for the given topic and partition.
|
52
|
+
#
|
53
|
+
# @param topic [String] the name of the topic.
|
54
|
+
# @param partition [Integer] the partition id.
|
55
|
+
#
|
56
|
+
# @return [nil]
|
57
|
+
def clear_messages(topic:, partition:)
|
58
|
+
return unless @buffer.key?(topic) && @buffer[topic].key?(partition)
|
59
|
+
|
60
|
+
@size -= @buffer[topic][partition].count
|
61
|
+
@bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(0, :+)
|
62
|
+
|
63
|
+
@buffer[topic].delete(partition)
|
64
|
+
@buffer.delete(topic) if @buffer[topic].empty?
|
65
|
+
end
|
66
|
+
|
67
|
+
def messages_for(topic:, partition:)
|
68
|
+
buffer_for(topic, partition)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Clears messages across all topics and partitions.
|
72
|
+
#
|
73
|
+
# @return [nil]
|
74
|
+
def clear
|
75
|
+
@buffer = {}
|
76
|
+
@size = 0
|
77
|
+
@bytesize = 0
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def buffer_for(topic, partition)
|
83
|
+
@buffer[topic] ||= {}
|
84
|
+
@buffer[topic][partition] ||= []
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class Murmur2Hash
|
5
|
+
SEED = [0x9747b28c].pack('L')
|
6
|
+
|
7
|
+
def load
|
8
|
+
require 'digest/murmurhash'
|
9
|
+
rescue LoadError
|
10
|
+
raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
|
11
|
+
end
|
12
|
+
|
13
|
+
def hash(value)
|
14
|
+
::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,259 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
|
5
|
+
# Manages a consumer's position in partitions, figures out where to resume processing
|
6
|
+
# from, etc.
|
7
|
+
class OffsetManager
|
8
|
+
|
9
|
+
# The default broker setting for offsets.retention.minutes is 1440.
|
10
|
+
DEFAULT_RETENTION_TIME = 1440 * 60
|
11
|
+
|
12
|
+
def initialize(cluster:, group:, fetcher:, logger:, commit_interval:, commit_threshold:, offset_retention_time:)
|
13
|
+
@cluster = cluster
|
14
|
+
@group = group
|
15
|
+
@fetcher = fetcher
|
16
|
+
@logger = TaggedLogger.new(logger)
|
17
|
+
@commit_interval = commit_interval
|
18
|
+
@commit_threshold = commit_threshold
|
19
|
+
|
20
|
+
@uncommitted_offsets = 0
|
21
|
+
@processed_offsets = {}
|
22
|
+
@default_offsets = {}
|
23
|
+
@committed_offsets = nil
|
24
|
+
@resolved_offsets = {}
|
25
|
+
@last_commit = Time.now
|
26
|
+
@last_recommit = nil
|
27
|
+
@recommit_interval = (offset_retention_time || DEFAULT_RETENTION_TIME) / 2
|
28
|
+
end
|
29
|
+
|
30
|
+
# Set the default offset for a topic.
|
31
|
+
#
|
32
|
+
# When the consumer is started for the first time, or in cases where it gets stuck and
|
33
|
+
# has to reset its position, it must start either with the earliest messages or with
|
34
|
+
# the latest, skipping to the very end of each partition.
|
35
|
+
#
|
36
|
+
# @param topic [String] the name of the topic.
|
37
|
+
# @param default_offset [Symbol] either `:earliest` or `:latest`.
|
38
|
+
# @return [nil]
|
39
|
+
def set_default_offset(topic, default_offset)
|
40
|
+
@default_offsets[topic] = default_offset
|
41
|
+
end
|
42
|
+
|
43
|
+
# Mark a message as having been processed.
|
44
|
+
#
|
45
|
+
# When offsets are committed, the message's offset will be stored in Kafka so
|
46
|
+
# that we can resume from this point at a later time.
|
47
|
+
#
|
48
|
+
# @param topic [String] the name of the topic.
|
49
|
+
# @param partition [Integer] the partition number.
|
50
|
+
# @param offset [Integer] the offset of the message that should be marked as processed.
|
51
|
+
# @return [nil]
|
52
|
+
def mark_as_processed(topic, partition, offset)
|
53
|
+
unless @group.assigned_to?(topic, partition)
|
54
|
+
@logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
|
55
|
+
return
|
56
|
+
end
|
57
|
+
@processed_offsets[topic] ||= {}
|
58
|
+
|
59
|
+
last_processed_offset = @processed_offsets[topic][partition] || -1
|
60
|
+
if last_processed_offset > offset + 1
|
61
|
+
@logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
|
62
|
+
return
|
63
|
+
end
|
64
|
+
|
65
|
+
@uncommitted_offsets += 1
|
66
|
+
|
67
|
+
# The committed offset should always be the offset of the next message that the
|
68
|
+
# application will read, thus adding one to the last message processed.
|
69
|
+
@processed_offsets[topic][partition] = offset + 1
|
70
|
+
@logger.debug "Marking #{topic}/#{partition}:#{offset} as processed"
|
71
|
+
end
|
72
|
+
|
73
|
+
# Move the consumer's position in the partition back to the configured default
|
74
|
+
# offset, either the first or latest in the partition.
|
75
|
+
#
|
76
|
+
# @param topic [String] the name of the topic.
|
77
|
+
# @param partition [Integer] the partition number.
|
78
|
+
# @return [nil]
|
79
|
+
def seek_to_default(topic, partition)
|
80
|
+
# Remove any cached offset, in case things have changed broker-side.
|
81
|
+
clear_resolved_offset(topic)
|
82
|
+
|
83
|
+
offset = resolve_offset(topic, partition)
|
84
|
+
|
85
|
+
seek_to(topic, partition, offset)
|
86
|
+
end
|
87
|
+
|
88
|
+
# Move the consumer's position in the partition to the specified offset.
|
89
|
+
#
|
90
|
+
# @param topic [String] the name of the topic.
|
91
|
+
# @param partition [Integer] the partition number.
|
92
|
+
# @param offset [Integer] the offset that the consumer position should be moved to.
|
93
|
+
# @return [nil]
|
94
|
+
def seek_to(topic, partition, offset)
|
95
|
+
@processed_offsets[topic] ||= {}
|
96
|
+
@processed_offsets[topic][partition] = offset
|
97
|
+
|
98
|
+
@fetcher.seek(topic, partition, offset)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Return the next offset that should be fetched for the specified partition.
|
102
|
+
#
|
103
|
+
# @param topic [String] the name of the topic.
|
104
|
+
# @param partition [Integer] the partition number.
|
105
|
+
# @return [Integer] the next offset that should be fetched.
|
106
|
+
def next_offset_for(topic, partition)
|
107
|
+
offset = @processed_offsets.fetch(topic, {}).fetch(partition) {
|
108
|
+
committed_offset_for(topic, partition)
|
109
|
+
}
|
110
|
+
|
111
|
+
# A negative offset means that no offset has been committed, so we need to
|
112
|
+
# resolve the default offset for the topic.
|
113
|
+
if offset < 0
|
114
|
+
resolve_offset(topic, partition)
|
115
|
+
else
|
116
|
+
# The next offset is the last offset.
|
117
|
+
offset
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
# Commit offsets of messages that have been marked as processed.
|
122
|
+
#
|
123
|
+
# If `recommit` is set to true, we will also commit the existing positions
|
124
|
+
# even if no messages have been processed on a partition. This is done
|
125
|
+
# in order to avoid the offset information expiring in cases where messages
|
126
|
+
# are very rare -- it's essentially a keep-alive.
|
127
|
+
#
|
128
|
+
# @param recommit [Boolean] whether to recommit offsets that have already been
|
129
|
+
# committed.
|
130
|
+
# @return [nil]
|
131
|
+
def commit_offsets(recommit = false)
|
132
|
+
offsets = offsets_to_commit(recommit)
|
133
|
+
unless offsets.empty?
|
134
|
+
@logger.debug "Committing offsets#{recommit ? ' with recommit' : ''}: #{prettify_offsets(offsets)}"
|
135
|
+
|
136
|
+
@group.commit_offsets(offsets)
|
137
|
+
|
138
|
+
@last_commit = Time.now
|
139
|
+
@last_recommit = Time.now if recommit
|
140
|
+
|
141
|
+
@uncommitted_offsets = 0
|
142
|
+
@committed_offsets = nil
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Commit offsets if necessary, according to the offset commit policy specified
|
147
|
+
# when initializing the class.
|
148
|
+
#
|
149
|
+
# @return [nil]
|
150
|
+
def commit_offsets_if_necessary
|
151
|
+
recommit = recommit_timeout_reached?
|
152
|
+
if recommit || commit_timeout_reached? || commit_threshold_reached?
|
153
|
+
commit_offsets(recommit)
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# Clear all stored offset information.
|
158
|
+
#
|
159
|
+
# @return [nil]
|
160
|
+
def clear_offsets
|
161
|
+
@processed_offsets.clear
|
162
|
+
@resolved_offsets.clear
|
163
|
+
|
164
|
+
# Clear the cached commits from the brokers.
|
165
|
+
@committed_offsets = nil
|
166
|
+
end
|
167
|
+
|
168
|
+
# Clear stored offset information for all partitions except those specified
|
169
|
+
# in `excluded`.
|
170
|
+
#
|
171
|
+
# offset_manager.clear_offsets_excluding("my-topic" => [1, 2, 3])
|
172
|
+
#
|
173
|
+
# @return [nil]
|
174
|
+
def clear_offsets_excluding(excluded)
|
175
|
+
# Clear all offsets that aren't in `excluded`.
|
176
|
+
@processed_offsets.each do |topic, partitions|
|
177
|
+
partitions.keep_if do |partition, _|
|
178
|
+
excluded.fetch(topic, []).include?(partition)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# Clear the cached commits from the brokers.
|
183
|
+
@committed_offsets = nil
|
184
|
+
@resolved_offsets.clear
|
185
|
+
end
|
186
|
+
|
187
|
+
private
|
188
|
+
|
189
|
+
def clear_resolved_offset(topic)
|
190
|
+
@resolved_offsets.delete(topic)
|
191
|
+
end
|
192
|
+
|
193
|
+
def resolve_offset(topic, partition)
|
194
|
+
@resolved_offsets[topic] ||= fetch_resolved_offsets(topic)
|
195
|
+
@resolved_offsets[topic].fetch(partition)
|
196
|
+
end
|
197
|
+
|
198
|
+
def fetch_resolved_offsets(topic)
|
199
|
+
default_offset = @default_offsets.fetch(topic)
|
200
|
+
partitions = @group.assigned_partitions.fetch(topic)
|
201
|
+
|
202
|
+
@cluster.resolve_offsets(topic, partitions, default_offset)
|
203
|
+
end
|
204
|
+
|
205
|
+
def seconds_since(time)
|
206
|
+
Time.now - time
|
207
|
+
end
|
208
|
+
|
209
|
+
def seconds_since_last_commit
|
210
|
+
seconds_since(@last_commit)
|
211
|
+
end
|
212
|
+
|
213
|
+
def committed_offsets
|
214
|
+
@committed_offsets ||= @group.fetch_offsets
|
215
|
+
end
|
216
|
+
|
217
|
+
def committed_offset_for(topic, partition)
|
218
|
+
committed_offsets.offset_for(topic, partition)
|
219
|
+
end
|
220
|
+
|
221
|
+
def offsets_to_commit(recommit = false)
|
222
|
+
if recommit
|
223
|
+
offsets_to_recommit.merge!(@processed_offsets) do |_topic, committed, processed|
|
224
|
+
committed.merge!(processed)
|
225
|
+
end
|
226
|
+
else
|
227
|
+
@processed_offsets
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def offsets_to_recommit
|
232
|
+
committed_offsets.topics.each_with_object({}) do |(topic, partition_info), offsets|
|
233
|
+
topic_offsets = partition_info.keys.each_with_object({}) do |partition, partition_map|
|
234
|
+
offset = committed_offsets.offset_for(topic, partition)
|
235
|
+
partition_map[partition] = offset unless offset == -1
|
236
|
+
end
|
237
|
+
offsets[topic] = topic_offsets unless topic_offsets.empty?
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
def recommit_timeout_reached?
|
242
|
+
@last_recommit.nil? || seconds_since(@last_recommit) >= @recommit_interval
|
243
|
+
end
|
244
|
+
|
245
|
+
def commit_timeout_reached?
|
246
|
+
@commit_interval != 0 && seconds_since_last_commit >= @commit_interval
|
247
|
+
end
|
248
|
+
|
249
|
+
def commit_threshold_reached?
|
250
|
+
@commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
|
251
|
+
end
|
252
|
+
|
253
|
+
def prettify_offsets(offsets)
|
254
|
+
offsets.flat_map do |topic, partitions|
|
255
|
+
partitions.map { |partition, offset| "#{topic}/#{partition}:#{offset}" }
|
256
|
+
end.join(', ')
|
257
|
+
end
|
258
|
+
end
|
259
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/digest"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
|
7
|
+
# Assigns partitions to messages.
|
8
|
+
class Partitioner
|
9
|
+
# @param hash_function [Symbol, nil] the algorithm used to compute a messages
|
10
|
+
# destination partition. Default is :crc32
|
11
|
+
def initialize(hash_function: nil)
|
12
|
+
@digest = Digest.find_digest(hash_function || :crc32)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Assigns a partition number based on a partition key. If no explicit
|
16
|
+
# partition key is provided, the message key will be used instead.
|
17
|
+
#
|
18
|
+
# If the key is nil, then a random partition is selected. Otherwise, a digest
|
19
|
+
# of the key is used to deterministically find a partition. As long as the
|
20
|
+
# number of partitions doesn't change, the same key will always be assigned
|
21
|
+
# to the same partition.
|
22
|
+
#
|
23
|
+
# @param partition_count [Integer] the number of partitions in the topic.
|
24
|
+
# @param message [Kafka::PendingMessage] the message that should be assigned
|
25
|
+
# a partition.
|
26
|
+
# @return [Integer] the partition number.
|
27
|
+
def call(partition_count, message)
|
28
|
+
raise ArgumentError if partition_count == 0
|
29
|
+
|
30
|
+
# If no explicit partition key is specified we use the message key instead.
|
31
|
+
key = message.partition_key || message.key
|
32
|
+
|
33
|
+
if key.nil?
|
34
|
+
rand(partition_count)
|
35
|
+
else
|
36
|
+
@digest.hash(key) % partition_count
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
data/lib/kafka/pause.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
# Manages the pause state of a partition.
|
5
|
+
#
|
6
|
+
# The processing of messages in a partition can be paused, e.g. if there was
|
7
|
+
# an exception during processing. This could be caused by a downstream service
|
8
|
+
# not being available. A typical way of solving such an issue is to back off
|
9
|
+
# for a little while and then try again. In order to do that, _pause_ the
|
10
|
+
# partition.
|
11
|
+
class Pause
|
12
|
+
def initialize(clock: Time)
|
13
|
+
@clock = clock
|
14
|
+
@started_at = nil
|
15
|
+
@pauses = 0
|
16
|
+
@timeout = nil
|
17
|
+
@max_timeout = nil
|
18
|
+
@exponential_backoff = false
|
19
|
+
end
|
20
|
+
|
21
|
+
# Mark the partition as paused.
|
22
|
+
#
|
23
|
+
# If exponential backoff is enabled, each subsequent pause of a partition will
|
24
|
+
# cause a doubling of the actual timeout, i.e. for pause number _n_, the actual
|
25
|
+
# timeout will be _2^n * timeout_.
|
26
|
+
#
|
27
|
+
# Only when {#reset!} is called is this state cleared.
|
28
|
+
#
|
29
|
+
# @param timeout [nil, Integer] if specified, the partition will automatically
|
30
|
+
# resume after this many seconds.
|
31
|
+
# @param exponential_backoff [Boolean] whether to enable exponential timeouts.
|
32
|
+
def pause!(timeout: nil, max_timeout: nil, exponential_backoff: false)
|
33
|
+
@started_at = @clock.now
|
34
|
+
@timeout = timeout
|
35
|
+
@max_timeout = max_timeout
|
36
|
+
@exponential_backoff = exponential_backoff
|
37
|
+
@pauses += 1
|
38
|
+
end
|
39
|
+
|
40
|
+
# Resumes the partition.
|
41
|
+
#
|
42
|
+
# The number of pauses is still retained, and if the partition is paused again
|
43
|
+
# it may be with an exponential backoff.
|
44
|
+
def resume!
|
45
|
+
@started_at = nil
|
46
|
+
@timeout = nil
|
47
|
+
@max_timeout = nil
|
48
|
+
end
|
49
|
+
|
50
|
+
# Whether the partition is currently paused. The pause may have expired, in which
|
51
|
+
# case {#expired?} should be checked as well.
|
52
|
+
def paused?
|
53
|
+
# This is nil if we're not currently paused.
|
54
|
+
!@started_at.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
def pause_duration
|
58
|
+
if paused?
|
59
|
+
Time.now - @started_at
|
60
|
+
else
|
61
|
+
0
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Whether the pause has expired.
|
66
|
+
def expired?
|
67
|
+
# We never expire the pause if timeout is nil.
|
68
|
+
return false if @timeout.nil?
|
69
|
+
|
70
|
+
# Have we passed the end of the pause duration?
|
71
|
+
@clock.now >= ends_at
|
72
|
+
end
|
73
|
+
|
74
|
+
# Resets the pause state, ensuring that the next pause is not exponential.
|
75
|
+
def reset!
|
76
|
+
@pauses = 0
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def ends_at
|
82
|
+
# Apply an exponential backoff to the timeout.
|
83
|
+
backoff_factor = @exponential_backoff ? 2**(@pauses - 1) : 1
|
84
|
+
timeout = backoff_factor * @timeout
|
85
|
+
|
86
|
+
# If set, don't allow a timeout longer than max_timeout.
|
87
|
+
timeout = @max_timeout if @max_timeout && timeout > @max_timeout
|
88
|
+
|
89
|
+
@started_at + timeout
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
class PendingMessage
|
5
|
+
attr_reader :value, :key, :headers, :topic, :partition, :partition_key, :create_time, :bytesize
|
6
|
+
|
7
|
+
def initialize(value:, key:, headers: {}, topic:, partition:, partition_key:, create_time:)
|
8
|
+
@value = value
|
9
|
+
@key = key
|
10
|
+
@headers = headers
|
11
|
+
@topic = topic
|
12
|
+
@partition = partition
|
13
|
+
@partition_key = partition_key
|
14
|
+
@create_time = create_time
|
15
|
+
@bytesize = key.to_s.bytesize + value.to_s.bytesize
|
16
|
+
end
|
17
|
+
|
18
|
+
def ==(other)
|
19
|
+
@value == other.value &&
|
20
|
+
@key == other.key &&
|
21
|
+
@topic == other.topic &&
|
22
|
+
@headers == other.headers &&
|
23
|
+
@partition == other.partition &&
|
24
|
+
@partition_key == other.partition_key &&
|
25
|
+
@create_time == other.create_time &&
|
26
|
+
@bytesize == other.bytesize
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
|
5
|
+
class PendingMessageQueue
|
6
|
+
attr_reader :size, :bytesize
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
clear
|
10
|
+
end
|
11
|
+
|
12
|
+
def write(message)
|
13
|
+
@messages << message
|
14
|
+
@size += 1
|
15
|
+
@bytesize += message.bytesize
|
16
|
+
end
|
17
|
+
|
18
|
+
def empty?
|
19
|
+
@messages.empty?
|
20
|
+
end
|
21
|
+
|
22
|
+
def clear
|
23
|
+
@messages = []
|
24
|
+
@size = 0
|
25
|
+
@bytesize = 0
|
26
|
+
end
|
27
|
+
|
28
|
+
def replace(messages)
|
29
|
+
clear
|
30
|
+
messages.each {|message| write(message) }
|
31
|
+
end
|
32
|
+
|
33
|
+
# Yields each message in the queue.
|
34
|
+
#
|
35
|
+
# @yieldparam [PendingMessage] message
|
36
|
+
# @return [nil]
|
37
|
+
def each(&block)
|
38
|
+
@messages.each(&block)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|