ruby-kafka-aws-iam 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Instrumenter
5
+ NAMESPACE = "kafka"
6
+
7
+ def initialize(default_payload = {})
8
+ @default_payload = default_payload
9
+
10
+ if defined?(ActiveSupport::Notifications)
11
+ @backend = ActiveSupport::Notifications
12
+ else
13
+ @backend = nil
14
+ end
15
+ end
16
+
17
+ def instrument(event_name, payload = {}, &block)
18
+ if @backend
19
+ payload.update(@default_payload)
20
+
21
+ @backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
22
+ else
23
+ block.call(payload) if block
24
+ end
25
+ end
26
+ end
27
+
28
+ class DecoratingInstrumenter
29
+ def initialize(backend, extra_payload = {})
30
+ @backend = backend
31
+ @extra_payload = extra_payload
32
+ end
33
+
34
+ def instrument(event_name, payload = {}, &block)
35
+ @backend.instrument(event_name, @extra_payload.merge(payload), &block)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Holds a list of interceptors that implement `call`
5
+ # and wraps calls to a chain of custom interceptors.
6
+ class Interceptors
7
+ def initialize(interceptors:, logger:)
8
+ @interceptors = interceptors || []
9
+ @logger = TaggedLogger.new(logger)
10
+ end
11
+
12
+ # This method is called when the client produces a message or once the batches are fetched.
13
+ # The message returned from the first call is passed to the second interceptor call, and so on in an
14
+ # interceptor chain. This method does not throw exceptions.
15
+ #
16
+ # @param intercepted [Kafka::PendingMessage || Kafka::FetchedBatch] the produced message or
17
+ # fetched batch.
18
+ #
19
+ # @return [Kafka::PendingMessage || Kafka::FetchedBatch] the intercepted message or batch
20
+ # returned by the last interceptor.
21
+ def call(intercepted)
22
+ @interceptors.each do |interceptor|
23
+ begin
24
+ intercepted = interceptor.call(intercepted)
25
+ rescue Exception => e
26
+ @logger.warn "Error executing interceptor for topic: #{intercepted.topic} partition: #{intercepted.partition}: #{e.message}\n#{e.backtrace.join("\n")}"
27
+ end
28
+ end
29
+
30
+ intercepted
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class LZ4Codec
5
+ def codec_id
6
+ 3
7
+ end
8
+
9
+ def produce_api_min_version
10
+ 0
11
+ end
12
+
13
+ def load
14
+ require "extlz4"
15
+ rescue LoadError
16
+ raise LoadError, "using lz4 compression requires adding a dependency on the `extlz4` gem to your Gemfile."
17
+ end
18
+
19
+ def compress(data)
20
+ LZ4.encode(data)
21
+ end
22
+
23
+ def decompress(data)
24
+ LZ4.decode(data)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/message"
4
+
5
+ module Kafka
6
+
7
+ # Buffers messages for specific topics/partitions.
8
+ class MessageBuffer
9
+ include Enumerable
10
+
11
+ attr_reader :size, :bytesize
12
+
13
+ def initialize
14
+ @buffer = {}
15
+ @size = 0
16
+ @bytesize = 0
17
+ end
18
+
19
+ def write(value:, key:, topic:, partition:, create_time: Time.now, headers: {})
20
+ message = Protocol::Record.new(key: key, value: value, create_time: create_time, headers: headers)
21
+
22
+ buffer_for(topic, partition) << message
23
+
24
+ @size += 1
25
+ @bytesize += message.bytesize
26
+ end
27
+
28
+ def concat(messages, topic:, partition:)
29
+ buffer_for(topic, partition).concat(messages)
30
+
31
+ @size += messages.count
32
+ @bytesize += messages.map(&:bytesize).reduce(0, :+)
33
+ end
34
+
35
+ def to_h
36
+ @buffer
37
+ end
38
+
39
+ def empty?
40
+ @buffer.empty?
41
+ end
42
+
43
+ def each
44
+ @buffer.each do |topic, messages_for_topic|
45
+ messages_for_topic.each do |partition, messages_for_partition|
46
+ yield topic, partition, messages_for_partition
47
+ end
48
+ end
49
+ end
50
+
51
+ # Clears buffered messages for the given topic and partition.
52
+ #
53
+ # @param topic [String] the name of the topic.
54
+ # @param partition [Integer] the partition id.
55
+ #
56
+ # @return [nil]
57
+ def clear_messages(topic:, partition:)
58
+ return unless @buffer.key?(topic) && @buffer[topic].key?(partition)
59
+
60
+ @size -= @buffer[topic][partition].count
61
+ @bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(0, :+)
62
+
63
+ @buffer[topic].delete(partition)
64
+ @buffer.delete(topic) if @buffer[topic].empty?
65
+ end
66
+
67
+ def messages_for(topic:, partition:)
68
+ buffer_for(topic, partition)
69
+ end
70
+
71
+ # Clears messages across all topics and partitions.
72
+ #
73
+ # @return [nil]
74
+ def clear
75
+ @buffer = {}
76
+ @size = 0
77
+ @bytesize = 0
78
+ end
79
+
80
+ private
81
+
82
+ def buffer_for(topic, partition)
83
+ @buffer[topic] ||= {}
84
+ @buffer[topic][partition] ||= []
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Murmur2Hash
5
+ SEED = [0x9747b28c].pack('L')
6
+
7
+ def load
8
+ require 'digest/murmurhash'
9
+ rescue LoadError
10
+ raise LoadError, "using murmur2 hashing requires adding a dependency on the `digest-murmurhash` gem to your Gemfile."
11
+ end
12
+
13
+ def hash(value)
14
+ ::Digest::MurmurHash2.rawdigest(value, SEED) & 0x7fffffff
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,259 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ # Manages a consumer's position in partitions, figures out where to resume processing
6
+ # from, etc.
7
+ class OffsetManager
8
+
9
+ # The default broker setting for offsets.retention.minutes is 1440.
10
+ DEFAULT_RETENTION_TIME = 1440 * 60
11
+
12
+ def initialize(cluster:, group:, fetcher:, logger:, commit_interval:, commit_threshold:, offset_retention_time:)
13
+ @cluster = cluster
14
+ @group = group
15
+ @fetcher = fetcher
16
+ @logger = TaggedLogger.new(logger)
17
+ @commit_interval = commit_interval
18
+ @commit_threshold = commit_threshold
19
+
20
+ @uncommitted_offsets = 0
21
+ @processed_offsets = {}
22
+ @default_offsets = {}
23
+ @committed_offsets = nil
24
+ @resolved_offsets = {}
25
+ @last_commit = Time.now
26
+ @last_recommit = nil
27
+ @recommit_interval = (offset_retention_time || DEFAULT_RETENTION_TIME) / 2
28
+ end
29
+
30
+ # Set the default offset for a topic.
31
+ #
32
+ # When the consumer is started for the first time, or in cases where it gets stuck and
33
+ # has to reset its position, it must start either with the earliest messages or with
34
+ # the latest, skipping to the very end of each partition.
35
+ #
36
+ # @param topic [String] the name of the topic.
37
+ # @param default_offset [Symbol] either `:earliest` or `:latest`.
38
+ # @return [nil]
39
+ def set_default_offset(topic, default_offset)
40
+ @default_offsets[topic] = default_offset
41
+ end
42
+
43
+ # Mark a message as having been processed.
44
+ #
45
+ # When offsets are committed, the message's offset will be stored in Kafka so
46
+ # that we can resume from this point at a later time.
47
+ #
48
+ # @param topic [String] the name of the topic.
49
+ # @param partition [Integer] the partition number.
50
+ # @param offset [Integer] the offset of the message that should be marked as processed.
51
+ # @return [nil]
52
+ def mark_as_processed(topic, partition, offset)
53
+ unless @group.assigned_to?(topic, partition)
54
+ @logger.debug "Not marking #{topic}/#{partition}:#{offset} as processed for partition not assigned to this consumer."
55
+ return
56
+ end
57
+ @processed_offsets[topic] ||= {}
58
+
59
+ last_processed_offset = @processed_offsets[topic][partition] || -1
60
+ if last_processed_offset > offset + 1
61
+ @logger.debug "Not overwriting newer offset #{topic}/#{partition}:#{last_processed_offset - 1} with older #{offset}"
62
+ return
63
+ end
64
+
65
+ @uncommitted_offsets += 1
66
+
67
+ # The committed offset should always be the offset of the next message that the
68
+ # application will read, thus adding one to the last message processed.
69
+ @processed_offsets[topic][partition] = offset + 1
70
+ @logger.debug "Marking #{topic}/#{partition}:#{offset} as processed"
71
+ end
72
+
73
+ # Move the consumer's position in the partition back to the configured default
74
+ # offset, either the first or latest in the partition.
75
+ #
76
+ # @param topic [String] the name of the topic.
77
+ # @param partition [Integer] the partition number.
78
+ # @return [nil]
79
+ def seek_to_default(topic, partition)
80
+ # Remove any cached offset, in case things have changed broker-side.
81
+ clear_resolved_offset(topic)
82
+
83
+ offset = resolve_offset(topic, partition)
84
+
85
+ seek_to(topic, partition, offset)
86
+ end
87
+
88
+ # Move the consumer's position in the partition to the specified offset.
89
+ #
90
+ # @param topic [String] the name of the topic.
91
+ # @param partition [Integer] the partition number.
92
+ # @param offset [Integer] the offset that the consumer position should be moved to.
93
+ # @return [nil]
94
+ def seek_to(topic, partition, offset)
95
+ @processed_offsets[topic] ||= {}
96
+ @processed_offsets[topic][partition] = offset
97
+
98
+ @fetcher.seek(topic, partition, offset)
99
+ end
100
+
101
+ # Return the next offset that should be fetched for the specified partition.
102
+ #
103
+ # @param topic [String] the name of the topic.
104
+ # @param partition [Integer] the partition number.
105
+ # @return [Integer] the next offset that should be fetched.
106
+ def next_offset_for(topic, partition)
107
+ offset = @processed_offsets.fetch(topic, {}).fetch(partition) {
108
+ committed_offset_for(topic, partition)
109
+ }
110
+
111
+ # A negative offset means that no offset has been committed, so we need to
112
+ # resolve the default offset for the topic.
113
+ if offset < 0
114
+ resolve_offset(topic, partition)
115
+ else
116
+ # The next offset is the last offset.
117
+ offset
118
+ end
119
+ end
120
+
121
+ # Commit offsets of messages that have been marked as processed.
122
+ #
123
+ # If `recommit` is set to true, we will also commit the existing positions
124
+ # even if no messages have been processed on a partition. This is done
125
+ # in order to avoid the offset information expiring in cases where messages
126
+ # are very rare -- it's essentially a keep-alive.
127
+ #
128
+ # @param recommit [Boolean] whether to recommit offsets that have already been
129
+ # committed.
130
+ # @return [nil]
131
+ def commit_offsets(recommit = false)
132
+ offsets = offsets_to_commit(recommit)
133
+ unless offsets.empty?
134
+ @logger.debug "Committing offsets#{recommit ? ' with recommit' : ''}: #{prettify_offsets(offsets)}"
135
+
136
+ @group.commit_offsets(offsets)
137
+
138
+ @last_commit = Time.now
139
+ @last_recommit = Time.now if recommit
140
+
141
+ @uncommitted_offsets = 0
142
+ @committed_offsets = nil
143
+ end
144
+ end
145
+
146
+ # Commit offsets if necessary, according to the offset commit policy specified
147
+ # when initializing the class.
148
+ #
149
+ # @return [nil]
150
+ def commit_offsets_if_necessary
151
+ recommit = recommit_timeout_reached?
152
+ if recommit || commit_timeout_reached? || commit_threshold_reached?
153
+ commit_offsets(recommit)
154
+ end
155
+ end
156
+
157
+ # Clear all stored offset information.
158
+ #
159
+ # @return [nil]
160
+ def clear_offsets
161
+ @processed_offsets.clear
162
+ @resolved_offsets.clear
163
+
164
+ # Clear the cached commits from the brokers.
165
+ @committed_offsets = nil
166
+ end
167
+
168
+ # Clear stored offset information for all partitions except those specified
169
+ # in `excluded`.
170
+ #
171
+ # offset_manager.clear_offsets_excluding("my-topic" => [1, 2, 3])
172
+ #
173
+ # @return [nil]
174
+ def clear_offsets_excluding(excluded)
175
+ # Clear all offsets that aren't in `excluded`.
176
+ @processed_offsets.each do |topic, partitions|
177
+ partitions.keep_if do |partition, _|
178
+ excluded.fetch(topic, []).include?(partition)
179
+ end
180
+ end
181
+
182
+ # Clear the cached commits from the brokers.
183
+ @committed_offsets = nil
184
+ @resolved_offsets.clear
185
+ end
186
+
187
+ private
188
+
189
+ def clear_resolved_offset(topic)
190
+ @resolved_offsets.delete(topic)
191
+ end
192
+
193
+ def resolve_offset(topic, partition)
194
+ @resolved_offsets[topic] ||= fetch_resolved_offsets(topic)
195
+ @resolved_offsets[topic].fetch(partition)
196
+ end
197
+
198
+ def fetch_resolved_offsets(topic)
199
+ default_offset = @default_offsets.fetch(topic)
200
+ partitions = @group.assigned_partitions.fetch(topic)
201
+
202
+ @cluster.resolve_offsets(topic, partitions, default_offset)
203
+ end
204
+
205
+ def seconds_since(time)
206
+ Time.now - time
207
+ end
208
+
209
+ def seconds_since_last_commit
210
+ seconds_since(@last_commit)
211
+ end
212
+
213
+ def committed_offsets
214
+ @committed_offsets ||= @group.fetch_offsets
215
+ end
216
+
217
+ def committed_offset_for(topic, partition)
218
+ committed_offsets.offset_for(topic, partition)
219
+ end
220
+
221
+ def offsets_to_commit(recommit = false)
222
+ if recommit
223
+ offsets_to_recommit.merge!(@processed_offsets) do |_topic, committed, processed|
224
+ committed.merge!(processed)
225
+ end
226
+ else
227
+ @processed_offsets
228
+ end
229
+ end
230
+
231
+ def offsets_to_recommit
232
+ committed_offsets.topics.each_with_object({}) do |(topic, partition_info), offsets|
233
+ topic_offsets = partition_info.keys.each_with_object({}) do |partition, partition_map|
234
+ offset = committed_offsets.offset_for(topic, partition)
235
+ partition_map[partition] = offset unless offset == -1
236
+ end
237
+ offsets[topic] = topic_offsets unless topic_offsets.empty?
238
+ end
239
+ end
240
+
241
+ def recommit_timeout_reached?
242
+ @last_recommit.nil? || seconds_since(@last_recommit) >= @recommit_interval
243
+ end
244
+
245
+ def commit_timeout_reached?
246
+ @commit_interval != 0 && seconds_since_last_commit >= @commit_interval
247
+ end
248
+
249
+ def commit_threshold_reached?
250
+ @commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
251
+ end
252
+
253
+ def prettify_offsets(offsets)
254
+ offsets.flat_map do |topic, partitions|
255
+ partitions.map { |partition, offset| "#{topic}/#{partition}:#{offset}" }
256
+ end.join(', ')
257
+ end
258
+ end
259
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/digest"
4
+
5
+ module Kafka
6
+
7
+ # Assigns partitions to messages.
8
+ class Partitioner
9
+ # @param hash_function [Symbol, nil] the algorithm used to compute a messages
10
+ # destination partition. Default is :crc32
11
+ def initialize(hash_function: nil)
12
+ @digest = Digest.find_digest(hash_function || :crc32)
13
+ end
14
+
15
+ # Assigns a partition number based on a partition key. If no explicit
16
+ # partition key is provided, the message key will be used instead.
17
+ #
18
+ # If the key is nil, then a random partition is selected. Otherwise, a digest
19
+ # of the key is used to deterministically find a partition. As long as the
20
+ # number of partitions doesn't change, the same key will always be assigned
21
+ # to the same partition.
22
+ #
23
+ # @param partition_count [Integer] the number of partitions in the topic.
24
+ # @param message [Kafka::PendingMessage] the message that should be assigned
25
+ # a partition.
26
+ # @return [Integer] the partition number.
27
+ def call(partition_count, message)
28
+ raise ArgumentError if partition_count == 0
29
+
30
+ # If no explicit partition key is specified we use the message key instead.
31
+ key = message.partition_key || message.key
32
+
33
+ if key.nil?
34
+ rand(partition_count)
35
+ else
36
+ @digest.hash(key) % partition_count
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Manages the pause state of a partition.
5
+ #
6
+ # The processing of messages in a partition can be paused, e.g. if there was
7
+ # an exception during processing. This could be caused by a downstream service
8
+ # not being available. A typical way of solving such an issue is to back off
9
+ # for a little while and then try again. In order to do that, _pause_ the
10
+ # partition.
11
+ class Pause
12
+ def initialize(clock: Time)
13
+ @clock = clock
14
+ @started_at = nil
15
+ @pauses = 0
16
+ @timeout = nil
17
+ @max_timeout = nil
18
+ @exponential_backoff = false
19
+ end
20
+
21
+ # Mark the partition as paused.
22
+ #
23
+ # If exponential backoff is enabled, each subsequent pause of a partition will
24
+ # cause a doubling of the actual timeout, i.e. for pause number _n_, the actual
25
+ # timeout will be _2^n * timeout_.
26
+ #
27
+ # Only when {#reset!} is called is this state cleared.
28
+ #
29
+ # @param timeout [nil, Integer] if specified, the partition will automatically
30
+ # resume after this many seconds.
31
+ # @param exponential_backoff [Boolean] whether to enable exponential timeouts.
32
+ def pause!(timeout: nil, max_timeout: nil, exponential_backoff: false)
33
+ @started_at = @clock.now
34
+ @timeout = timeout
35
+ @max_timeout = max_timeout
36
+ @exponential_backoff = exponential_backoff
37
+ @pauses += 1
38
+ end
39
+
40
+ # Resumes the partition.
41
+ #
42
+ # The number of pauses is still retained, and if the partition is paused again
43
+ # it may be with an exponential backoff.
44
+ def resume!
45
+ @started_at = nil
46
+ @timeout = nil
47
+ @max_timeout = nil
48
+ end
49
+
50
+ # Whether the partition is currently paused. The pause may have expired, in which
51
+ # case {#expired?} should be checked as well.
52
+ def paused?
53
+ # This is nil if we're not currently paused.
54
+ !@started_at.nil?
55
+ end
56
+
57
+ def pause_duration
58
+ if paused?
59
+ Time.now - @started_at
60
+ else
61
+ 0
62
+ end
63
+ end
64
+
65
+ # Whether the pause has expired.
66
+ def expired?
67
+ # We never expire the pause if timeout is nil.
68
+ return false if @timeout.nil?
69
+
70
+ # Have we passed the end of the pause duration?
71
+ @clock.now >= ends_at
72
+ end
73
+
74
+ # Resets the pause state, ensuring that the next pause is not exponential.
75
+ def reset!
76
+ @pauses = 0
77
+ end
78
+
79
+ private
80
+
81
+ def ends_at
82
+ # Apply an exponential backoff to the timeout.
83
+ backoff_factor = @exponential_backoff ? 2**(@pauses - 1) : 1
84
+ timeout = backoff_factor * @timeout
85
+
86
+ # If set, don't allow a timeout longer than max_timeout.
87
+ timeout = @max_timeout if @max_timeout && timeout > @max_timeout
88
+
89
+ @started_at + timeout
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class PendingMessage
5
+ attr_reader :value, :key, :headers, :topic, :partition, :partition_key, :create_time, :bytesize
6
+
7
+ def initialize(value:, key:, headers: {}, topic:, partition:, partition_key:, create_time:)
8
+ @value = value
9
+ @key = key
10
+ @headers = headers
11
+ @topic = topic
12
+ @partition = partition
13
+ @partition_key = partition_key
14
+ @create_time = create_time
15
+ @bytesize = key.to_s.bytesize + value.to_s.bytesize
16
+ end
17
+
18
+ def ==(other)
19
+ @value == other.value &&
20
+ @key == other.key &&
21
+ @topic == other.topic &&
22
+ @headers == other.headers &&
23
+ @partition == other.partition &&
24
+ @partition_key == other.partition_key &&
25
+ @create_time == other.create_time &&
26
+ @bytesize == other.bytesize
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ class PendingMessageQueue
6
+ attr_reader :size, :bytesize
7
+
8
+ def initialize
9
+ clear
10
+ end
11
+
12
+ def write(message)
13
+ @messages << message
14
+ @size += 1
15
+ @bytesize += message.bytesize
16
+ end
17
+
18
+ def empty?
19
+ @messages.empty?
20
+ end
21
+
22
+ def clear
23
+ @messages = []
24
+ @size = 0
25
+ @bytesize = 0
26
+ end
27
+
28
+ def replace(messages)
29
+ clear
30
+ messages.each {|message| write(message) }
31
+ end
32
+
33
+ # Yields each message in the queue.
34
+ #
35
+ # @yieldparam [PendingMessage] message
36
+ # @return [nil]
37
+ def each(&block)
38
+ @messages.each(&block)
39
+ end
40
+ end
41
+ end