ruby-kafka-custom 0.7.7.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class GzipCodec
5
+ def codec_id
6
+ 1
7
+ end
8
+
9
+ def load
10
+ require "zlib"
11
+ end
12
+
13
+ def compress(data)
14
+ buffer = StringIO.new
15
+ buffer.set_encoding(Encoding::BINARY)
16
+
17
+ writer = Zlib::GzipWriter.new(buffer, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
18
+ writer.write(data)
19
+ writer.close
20
+
21
+ buffer.string
22
+ end
23
+
24
+ def decompress(data)
25
+ buffer = StringIO.new(data)
26
+ reader = Zlib::GzipReader.new(buffer)
27
+ reader.read
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Heartbeat
5
+ def initialize(group:, interval:, instrumenter:)
6
+ @group = group
7
+ @interval = interval
8
+ @last_heartbeat = Time.now
9
+ @instrumenter = instrumenter
10
+ end
11
+
12
+ def trigger!
13
+ @instrumenter.instrument('heartbeat.consumer',
14
+ group_id: @group.group_id,
15
+ topic_partitions: @group.assigned_partitions) do
16
+ @group.heartbeat
17
+ @last_heartbeat = Time.now
18
+ end
19
+ end
20
+
21
+ def trigger
22
+ trigger! if Time.now > @last_heartbeat + @interval
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Instrumenter
5
+ NAMESPACE = "kafka"
6
+
7
+ def initialize(default_payload = {})
8
+ @default_payload = default_payload
9
+
10
+ if defined?(ActiveSupport::Notifications)
11
+ @backend = ActiveSupport::Notifications
12
+ else
13
+ @backend = nil
14
+ end
15
+ end
16
+
17
+ def instrument(event_name, payload = {}, &block)
18
+ if @backend
19
+ payload.update(@default_payload)
20
+
21
+ @backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
22
+ else
23
+ block.call(payload) if block
24
+ end
25
+ end
26
+ end
27
+
28
+ class DecoratingInstrumenter
29
+ def initialize(backend, extra_payload = {})
30
+ @backend = backend
31
+ @extra_payload = extra_payload
32
+ end
33
+
34
+ def instrument(event_name, payload = {}, &block)
35
+ @backend.instrument(event_name, @extra_payload.merge(payload), &block)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class LZ4Codec
5
+ def codec_id
6
+ 3
7
+ end
8
+
9
+ def load
10
+ require "extlz4"
11
+ rescue LoadError
12
+ raise LoadError, "using lz4 compression requires adding a dependency on the `extlz4` gem to your Gemfile."
13
+ end
14
+
15
+ def compress(data)
16
+ LZ4.encode(data)
17
+ end
18
+
19
+ def decompress(data)
20
+ LZ4.decode(data)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/message"
4
+
5
+ module Kafka
6
+
7
+ # Buffers messages for specific topics/partitions.
8
+ class MessageBuffer
9
+ include Enumerable
10
+
11
+ attr_reader :size, :bytesize
12
+
13
+ def initialize
14
+ @buffer = {}
15
+ @size = 0
16
+ @bytesize = 0
17
+ end
18
+
19
+ def write(value:, key:, topic:, partition:, create_time: Time.now, headers: {})
20
+ message = Protocol::Record.new(key: key, value: value, create_time: create_time, headers: headers)
21
+
22
+ buffer_for(topic, partition) << message
23
+
24
+ @size += 1
25
+ @bytesize += message.bytesize
26
+ end
27
+
28
+ def concat(messages, topic:, partition:)
29
+ buffer_for(topic, partition).concat(messages)
30
+
31
+ @size += messages.count
32
+ @bytesize += messages.map(&:bytesize).reduce(0, :+)
33
+ end
34
+
35
+ def to_h
36
+ @buffer
37
+ end
38
+
39
+ def empty?
40
+ @buffer.empty?
41
+ end
42
+
43
+ def each
44
+ @buffer.each do |topic, messages_for_topic|
45
+ messages_for_topic.each do |partition, messages_for_partition|
46
+ yield topic, partition, messages_for_partition
47
+ end
48
+ end
49
+ end
50
+
51
+ # Clears buffered messages for the given topic and partition.
52
+ #
53
+ # @param topic [String] the name of the topic.
54
+ # @param partition [Integer] the partition id.
55
+ #
56
+ # @return [nil]
57
+ def clear_messages(topic:, partition:)
58
+ return unless @buffer.key?(topic) && @buffer[topic].key?(partition)
59
+
60
+ @size -= @buffer[topic][partition].count
61
+ @bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(0, :+)
62
+
63
+ @buffer[topic].delete(partition)
64
+ @buffer.delete(topic) if @buffer[topic].empty?
65
+ end
66
+
67
+ def messages_for(topic:, partition:)
68
+ buffer_for(topic, partition)
69
+ end
70
+
71
+ # Clears messages across all topics and partitions.
72
+ #
73
+ # @return [nil]
74
+ def clear
75
+ @buffer = {}
76
+ @size = 0
77
+ @bytesize = 0
78
+ end
79
+
80
+ private
81
+
82
+ def buffer_for(topic, partition)
83
+ @buffer[topic] ||= {}
84
+ @buffer[topic][partition] ||= []
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,248 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ # Manages a consumer's position in partitions, figures out where to resume processing
6
+ # from, etc.
7
+ class OffsetManager
8
+
9
+ # The default broker setting for offsets.retention.minutes is 1440.
10
+ DEFAULT_RETENTION_TIME = 1440 * 60
11
+
12
+ def initialize(cluster:, group:, fetcher:, logger:, commit_interval:, commit_threshold:, offset_retention_time:)
13
+ @cluster = cluster
14
+ @group = group
15
+ @fetcher = fetcher
16
+ @logger = TaggedLogger.new(logger)
17
+ @commit_interval = commit_interval
18
+ @commit_threshold = commit_threshold
19
+
20
+ @uncommitted_offsets = 0
21
+ @processed_offsets = {}
22
+ @default_offsets = {}
23
+ @committed_offsets = nil
24
+ @resolved_offsets = {}
25
+ @last_commit = Time.now
26
+ @last_recommit = nil
27
+ @recommit_interval = (offset_retention_time || DEFAULT_RETENTION_TIME) / 2
28
+ end
29
+
30
+ # Set the default offset for a topic.
31
+ #
32
+ # When the consumer is started for the first time, or in cases where it gets stuck and
33
+ # has to reset its position, it must start either with the earliest messages or with
34
+ # the latest, skipping to the very end of each partition.
35
+ #
36
+ # @param topic [String] the name of the topic.
37
+ # @param default_offset [Symbol] either `:earliest` or `:latest`.
38
+ # @return [nil]
39
+ def set_default_offset(topic, default_offset)
40
+ @default_offsets[topic] = default_offset
41
+ end
42
+
43
+ # Mark a message as having been processed.
44
+ #
45
+ # When offsets are committed, the message's offset will be stored in Kafka so
46
+ # that we can resume from this point at a later time.
47
+ #
48
+ # @param topic [String] the name of the topic.
49
+ # @param partition [Integer] the partition number.
50
+ # @param offset [Integer] the offset of the message that should be marked as processed.
51
+ # @return [nil]
52
+ def mark_as_processed(topic, partition, offset)
53
+ @uncommitted_offsets += 1
54
+ @processed_offsets[topic] ||= {}
55
+
56
+ # The committed offset should always be the offset of the next message that the
57
+ # application will read, thus adding one to the last message processed.
58
+ @processed_offsets[topic][partition] = offset + 1
59
+ @logger.debug "Marking #{topic}/#{partition}:#{offset} as processed"
60
+ end
61
+
62
+ # Move the consumer's position in the partition back to the configured default
63
+ # offset, either the first or latest in the partition.
64
+ #
65
+ # @param topic [String] the name of the topic.
66
+ # @param partition [Integer] the partition number.
67
+ # @return [nil]
68
+ def seek_to_default(topic, partition)
69
+ # Remove any cached offset, in case things have changed broker-side.
70
+ clear_resolved_offset(topic)
71
+
72
+ offset = resolve_offset(topic, partition)
73
+
74
+ seek_to(topic, partition, offset)
75
+ end
76
+
77
+ # Move the consumer's position in the partition to the specified offset.
78
+ #
79
+ # @param topic [String] the name of the topic.
80
+ # @param partition [Integer] the partition number.
81
+ # @param offset [Integer] the offset that the consumer position should be moved to.
82
+ # @return [nil]
83
+ def seek_to(topic, partition, offset)
84
+ @processed_offsets[topic] ||= {}
85
+ @processed_offsets[topic][partition] = offset
86
+
87
+ @fetcher.seek(topic, partition, offset)
88
+ end
89
+
90
+ # Return the next offset that should be fetched for the specified partition.
91
+ #
92
+ # @param topic [String] the name of the topic.
93
+ # @param partition [Integer] the partition number.
94
+ # @return [Integer] the next offset that should be fetched.
95
+ def next_offset_for(topic, partition)
96
+ offset = @processed_offsets.fetch(topic, {}).fetch(partition) {
97
+ committed_offset_for(topic, partition)
98
+ }
99
+
100
+ # A negative offset means that no offset has been committed, so we need to
101
+ # resolve the default offset for the topic.
102
+ if offset < 0
103
+ resolve_offset(topic, partition)
104
+ else
105
+ # The next offset is the last offset.
106
+ offset
107
+ end
108
+ end
109
+
110
+ # Commit offsets of messages that have been marked as processed.
111
+ #
112
+ # If `recommit` is set to true, we will also commit the existing positions
113
+ # even if no messages have been processed on a partition. This is done
114
+ # in order to avoid the offset information expiring in cases where messages
115
+ # are very rare -- it's essentially a keep-alive.
116
+ #
117
+ # @param recommit [Boolean] whether to recommit offsets that have already been
118
+ # committed.
119
+ # @return [nil]
120
+ def commit_offsets(recommit = false)
121
+ offsets = offsets_to_commit(recommit)
122
+ unless offsets.empty?
123
+ @logger.debug "Committing offsets#{recommit ? ' with recommit' : ''}: #{prettify_offsets(offsets)}"
124
+
125
+ @group.commit_offsets(offsets)
126
+
127
+ @last_commit = Time.now
128
+ @last_recommit = Time.now if recommit
129
+
130
+ @uncommitted_offsets = 0
131
+ @committed_offsets = nil
132
+ end
133
+ end
134
+
135
+ # Commit offsets if necessary, according to the offset commit policy specified
136
+ # when initializing the class.
137
+ #
138
+ # @return [nil]
139
+ def commit_offsets_if_necessary
140
+ recommit = recommit_timeout_reached?
141
+ if recommit || commit_timeout_reached? || commit_threshold_reached?
142
+ commit_offsets(recommit)
143
+ end
144
+ end
145
+
146
+ # Clear all stored offset information.
147
+ #
148
+ # @return [nil]
149
+ def clear_offsets
150
+ @processed_offsets.clear
151
+ @resolved_offsets.clear
152
+
153
+ # Clear the cached commits from the brokers.
154
+ @committed_offsets = nil
155
+ end
156
+
157
+ # Clear stored offset information for all partitions except those specified
158
+ # in `excluded`.
159
+ #
160
+ # offset_manager.clear_offsets_excluding("my-topic" => [1, 2, 3])
161
+ #
162
+ # @return [nil]
163
+ def clear_offsets_excluding(excluded)
164
+ # Clear all offsets that aren't in `excluded`.
165
+ @processed_offsets.each do |topic, partitions|
166
+ partitions.keep_if do |partition, _|
167
+ excluded.fetch(topic, []).include?(partition)
168
+ end
169
+ end
170
+
171
+ # Clear the cached commits from the brokers.
172
+ @committed_offsets = nil
173
+ @resolved_offsets.clear
174
+ end
175
+
176
+ private
177
+
178
+ def clear_resolved_offset(topic)
179
+ @resolved_offsets.delete(topic)
180
+ end
181
+
182
+ def resolve_offset(topic, partition)
183
+ @resolved_offsets[topic] ||= fetch_resolved_offsets(topic)
184
+ @resolved_offsets[topic].fetch(partition)
185
+ end
186
+
187
+ def fetch_resolved_offsets(topic)
188
+ default_offset = @default_offsets.fetch(topic)
189
+ partitions = @group.assigned_partitions.fetch(topic)
190
+
191
+ @cluster.resolve_offsets(topic, partitions, default_offset)
192
+ end
193
+
194
+ def seconds_since(time)
195
+ Time.now - time
196
+ end
197
+
198
+ def seconds_since_last_commit
199
+ seconds_since(@last_commit)
200
+ end
201
+
202
+ def committed_offsets
203
+ @committed_offsets ||= @group.fetch_offsets
204
+ end
205
+
206
+ def committed_offset_for(topic, partition)
207
+ committed_offsets.offset_for(topic, partition)
208
+ end
209
+
210
+ def offsets_to_commit(recommit = false)
211
+ if recommit
212
+ offsets_to_recommit.merge!(@processed_offsets) do |_topic, committed, processed|
213
+ committed.merge!(processed)
214
+ end
215
+ else
216
+ @processed_offsets
217
+ end
218
+ end
219
+
220
+ def offsets_to_recommit
221
+ committed_offsets.topics.each_with_object({}) do |(topic, partition_info), offsets|
222
+ topic_offsets = partition_info.keys.each_with_object({}) do |partition, partition_map|
223
+ offset = committed_offsets.offset_for(topic, partition)
224
+ partition_map[partition] = offset unless offset == -1
225
+ end
226
+ offsets[topic] = topic_offsets unless topic_offsets.empty?
227
+ end
228
+ end
229
+
230
+ def recommit_timeout_reached?
231
+ @last_recommit.nil? || seconds_since(@last_recommit) >= @recommit_interval
232
+ end
233
+
234
+ def commit_timeout_reached?
235
+ @commit_interval != 0 && seconds_since_last_commit >= @commit_interval
236
+ end
237
+
238
+ def commit_threshold_reached?
239
+ @commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
240
+ end
241
+
242
+ def prettify_offsets(offsets)
243
+ offsets.flat_map do |topic, partitions|
244
+ partitions.map { |partition, offset| "#{topic}/#{partition}:#{offset}" }
245
+ end.join(', ')
246
+ end
247
+ end
248
+ end