ruby-kafka-custom 0.7.7.26

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class GzipCodec
5
+ def codec_id
6
+ 1
7
+ end
8
+
9
+ def load
10
+ require "zlib"
11
+ end
12
+
13
+ def compress(data)
14
+ buffer = StringIO.new
15
+ buffer.set_encoding(Encoding::BINARY)
16
+
17
+ writer = Zlib::GzipWriter.new(buffer, Zlib::DEFAULT_COMPRESSION, Zlib::DEFAULT_STRATEGY)
18
+ writer.write(data)
19
+ writer.close
20
+
21
+ buffer.string
22
+ end
23
+
24
+ def decompress(data)
25
+ buffer = StringIO.new(data)
26
+ reader = Zlib::GzipReader.new(buffer)
27
+ reader.read
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Heartbeat
5
+ def initialize(group:, interval:, instrumenter:)
6
+ @group = group
7
+ @interval = interval
8
+ @last_heartbeat = Time.now
9
+ @instrumenter = instrumenter
10
+ end
11
+
12
+ def trigger!
13
+ @instrumenter.instrument('heartbeat.consumer',
14
+ group_id: @group.group_id,
15
+ topic_partitions: @group.assigned_partitions) do
16
+ @group.heartbeat
17
+ @last_heartbeat = Time.now
18
+ end
19
+ end
20
+
21
+ def trigger
22
+ trigger! if Time.now > @last_heartbeat + @interval
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class Instrumenter
5
+ NAMESPACE = "kafka"
6
+
7
+ def initialize(default_payload = {})
8
+ @default_payload = default_payload
9
+
10
+ if defined?(ActiveSupport::Notifications)
11
+ @backend = ActiveSupport::Notifications
12
+ else
13
+ @backend = nil
14
+ end
15
+ end
16
+
17
+ def instrument(event_name, payload = {}, &block)
18
+ if @backend
19
+ payload.update(@default_payload)
20
+
21
+ @backend.instrument("#{event_name}.#{NAMESPACE}", payload, &block)
22
+ else
23
+ block.call(payload) if block
24
+ end
25
+ end
26
+ end
27
+
28
+ class DecoratingInstrumenter
29
+ def initialize(backend, extra_payload = {})
30
+ @backend = backend
31
+ @extra_payload = extra_payload
32
+ end
33
+
34
+ def instrument(event_name, payload = {}, &block)
35
+ @backend.instrument(event_name, @extra_payload.merge(payload), &block)
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class LZ4Codec
5
+ def codec_id
6
+ 3
7
+ end
8
+
9
+ def load
10
+ require "extlz4"
11
+ rescue LoadError
12
+ raise LoadError, "using lz4 compression requires adding a dependency on the `extlz4` gem to your Gemfile."
13
+ end
14
+
15
+ def compress(data)
16
+ LZ4.encode(data)
17
+ end
18
+
19
+ def decompress(data)
20
+ LZ4.decode(data)
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,87 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/message"
4
+
5
+ module Kafka
6
+
7
+ # Buffers messages for specific topics/partitions.
8
+ class MessageBuffer
9
+ include Enumerable
10
+
11
+ attr_reader :size, :bytesize
12
+
13
+ def initialize
14
+ @buffer = {}
15
+ @size = 0
16
+ @bytesize = 0
17
+ end
18
+
19
+ def write(value:, key:, topic:, partition:, create_time: Time.now, headers: {})
20
+ message = Protocol::Record.new(key: key, value: value, create_time: create_time, headers: headers)
21
+
22
+ buffer_for(topic, partition) << message
23
+
24
+ @size += 1
25
+ @bytesize += message.bytesize
26
+ end
27
+
28
+ def concat(messages, topic:, partition:)
29
+ buffer_for(topic, partition).concat(messages)
30
+
31
+ @size += messages.count
32
+ @bytesize += messages.map(&:bytesize).reduce(0, :+)
33
+ end
34
+
35
+ def to_h
36
+ @buffer
37
+ end
38
+
39
+ def empty?
40
+ @buffer.empty?
41
+ end
42
+
43
+ def each
44
+ @buffer.each do |topic, messages_for_topic|
45
+ messages_for_topic.each do |partition, messages_for_partition|
46
+ yield topic, partition, messages_for_partition
47
+ end
48
+ end
49
+ end
50
+
51
+ # Clears buffered messages for the given topic and partition.
52
+ #
53
+ # @param topic [String] the name of the topic.
54
+ # @param partition [Integer] the partition id.
55
+ #
56
+ # @return [nil]
57
+ def clear_messages(topic:, partition:)
58
+ return unless @buffer.key?(topic) && @buffer[topic].key?(partition)
59
+
60
+ @size -= @buffer[topic][partition].count
61
+ @bytesize -= @buffer[topic][partition].map(&:bytesize).reduce(0, :+)
62
+
63
+ @buffer[topic].delete(partition)
64
+ @buffer.delete(topic) if @buffer[topic].empty?
65
+ end
66
+
67
+ def messages_for(topic:, partition:)
68
+ buffer_for(topic, partition)
69
+ end
70
+
71
+ # Clears messages across all topics and partitions.
72
+ #
73
+ # @return [nil]
74
+ def clear
75
+ @buffer = {}
76
+ @size = 0
77
+ @bytesize = 0
78
+ end
79
+
80
+ private
81
+
82
+ def buffer_for(topic, partition)
83
+ @buffer[topic] ||= {}
84
+ @buffer[topic][partition] ||= []
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,248 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ # Manages a consumer's position in partitions, figures out where to resume processing
6
+ # from, etc.
7
+ class OffsetManager
8
+
9
+ # The default broker setting for offsets.retention.minutes is 1440.
10
+ DEFAULT_RETENTION_TIME = 1440 * 60
11
+
12
+ def initialize(cluster:, group:, fetcher:, logger:, commit_interval:, commit_threshold:, offset_retention_time:)
13
+ @cluster = cluster
14
+ @group = group
15
+ @fetcher = fetcher
16
+ @logger = TaggedLogger.new(logger)
17
+ @commit_interval = commit_interval
18
+ @commit_threshold = commit_threshold
19
+
20
+ @uncommitted_offsets = 0
21
+ @processed_offsets = {}
22
+ @default_offsets = {}
23
+ @committed_offsets = nil
24
+ @resolved_offsets = {}
25
+ @last_commit = Time.now
26
+ @last_recommit = nil
27
+ @recommit_interval = (offset_retention_time || DEFAULT_RETENTION_TIME) / 2
28
+ end
29
+
30
+ # Set the default offset for a topic.
31
+ #
32
+ # When the consumer is started for the first time, or in cases where it gets stuck and
33
+ # has to reset its position, it must start either with the earliest messages or with
34
+ # the latest, skipping to the very end of each partition.
35
+ #
36
+ # @param topic [String] the name of the topic.
37
+ # @param default_offset [Symbol] either `:earliest` or `:latest`.
38
+ # @return [nil]
39
+ def set_default_offset(topic, default_offset)
40
+ @default_offsets[topic] = default_offset
41
+ end
42
+
43
+ # Mark a message as having been processed.
44
+ #
45
+ # When offsets are committed, the message's offset will be stored in Kafka so
46
+ # that we can resume from this point at a later time.
47
+ #
48
+ # @param topic [String] the name of the topic.
49
+ # @param partition [Integer] the partition number.
50
+ # @param offset [Integer] the offset of the message that should be marked as processed.
51
+ # @return [nil]
52
+ def mark_as_processed(topic, partition, offset)
53
+ @uncommitted_offsets += 1
54
+ @processed_offsets[topic] ||= {}
55
+
56
+ # The committed offset should always be the offset of the next message that the
57
+ # application will read, thus adding one to the last message processed.
58
+ @processed_offsets[topic][partition] = offset + 1
59
+ @logger.debug "Marking #{topic}/#{partition}:#{offset} as processed"
60
+ end
61
+
62
+ # Move the consumer's position in the partition back to the configured default
63
+ # offset, either the first or latest in the partition.
64
+ #
65
+ # @param topic [String] the name of the topic.
66
+ # @param partition [Integer] the partition number.
67
+ # @return [nil]
68
+ def seek_to_default(topic, partition)
69
+ # Remove any cached offset, in case things have changed broker-side.
70
+ clear_resolved_offset(topic)
71
+
72
+ offset = resolve_offset(topic, partition)
73
+
74
+ seek_to(topic, partition, offset)
75
+ end
76
+
77
+ # Move the consumer's position in the partition to the specified offset.
78
+ #
79
+ # @param topic [String] the name of the topic.
80
+ # @param partition [Integer] the partition number.
81
+ # @param offset [Integer] the offset that the consumer position should be moved to.
82
+ # @return [nil]
83
+ def seek_to(topic, partition, offset)
84
+ @processed_offsets[topic] ||= {}
85
+ @processed_offsets[topic][partition] = offset
86
+
87
+ @fetcher.seek(topic, partition, offset)
88
+ end
89
+
90
+ # Return the next offset that should be fetched for the specified partition.
91
+ #
92
+ # @param topic [String] the name of the topic.
93
+ # @param partition [Integer] the partition number.
94
+ # @return [Integer] the next offset that should be fetched.
95
+ def next_offset_for(topic, partition)
96
+ offset = @processed_offsets.fetch(topic, {}).fetch(partition) {
97
+ committed_offset_for(topic, partition)
98
+ }
99
+
100
+ # A negative offset means that no offset has been committed, so we need to
101
+ # resolve the default offset for the topic.
102
+ if offset < 0
103
+ resolve_offset(topic, partition)
104
+ else
105
+ # The next offset is the last offset.
106
+ offset
107
+ end
108
+ end
109
+
110
+ # Commit offsets of messages that have been marked as processed.
111
+ #
112
+ # If `recommit` is set to true, we will also commit the existing positions
113
+ # even if no messages have been processed on a partition. This is done
114
+ # in order to avoid the offset information expiring in cases where messages
115
+ # are very rare -- it's essentially a keep-alive.
116
+ #
117
+ # @param recommit [Boolean] whether to recommit offsets that have already been
118
+ # committed.
119
+ # @return [nil]
120
+ def commit_offsets(recommit = false)
121
+ offsets = offsets_to_commit(recommit)
122
+ unless offsets.empty?
123
+ @logger.debug "Committing offsets#{recommit ? ' with recommit' : ''}: #{prettify_offsets(offsets)}"
124
+
125
+ @group.commit_offsets(offsets)
126
+
127
+ @last_commit = Time.now
128
+ @last_recommit = Time.now if recommit
129
+
130
+ @uncommitted_offsets = 0
131
+ @committed_offsets = nil
132
+ end
133
+ end
134
+
135
+ # Commit offsets if necessary, according to the offset commit policy specified
136
+ # when initializing the class.
137
+ #
138
+ # @return [nil]
139
+ def commit_offsets_if_necessary
140
+ recommit = recommit_timeout_reached?
141
+ if recommit || commit_timeout_reached? || commit_threshold_reached?
142
+ commit_offsets(recommit)
143
+ end
144
+ end
145
+
146
+ # Clear all stored offset information.
147
+ #
148
+ # @return [nil]
149
+ def clear_offsets
150
+ @processed_offsets.clear
151
+ @resolved_offsets.clear
152
+
153
+ # Clear the cached commits from the brokers.
154
+ @committed_offsets = nil
155
+ end
156
+
157
+ # Clear stored offset information for all partitions except those specified
158
+ # in `excluded`.
159
+ #
160
+ # offset_manager.clear_offsets_excluding("my-topic" => [1, 2, 3])
161
+ #
162
+ # @return [nil]
163
+ def clear_offsets_excluding(excluded)
164
+ # Clear all offsets that aren't in `excluded`.
165
+ @processed_offsets.each do |topic, partitions|
166
+ partitions.keep_if do |partition, _|
167
+ excluded.fetch(topic, []).include?(partition)
168
+ end
169
+ end
170
+
171
+ # Clear the cached commits from the brokers.
172
+ @committed_offsets = nil
173
+ @resolved_offsets.clear
174
+ end
175
+
176
+ private
177
+
178
+ def clear_resolved_offset(topic)
179
+ @resolved_offsets.delete(topic)
180
+ end
181
+
182
+ def resolve_offset(topic, partition)
183
+ @resolved_offsets[topic] ||= fetch_resolved_offsets(topic)
184
+ @resolved_offsets[topic].fetch(partition)
185
+ end
186
+
187
+ def fetch_resolved_offsets(topic)
188
+ default_offset = @default_offsets.fetch(topic)
189
+ partitions = @group.assigned_partitions.fetch(topic)
190
+
191
+ @cluster.resolve_offsets(topic, partitions, default_offset)
192
+ end
193
+
194
+ def seconds_since(time)
195
+ Time.now - time
196
+ end
197
+
198
+ def seconds_since_last_commit
199
+ seconds_since(@last_commit)
200
+ end
201
+
202
+ def committed_offsets
203
+ @committed_offsets ||= @group.fetch_offsets
204
+ end
205
+
206
+ def committed_offset_for(topic, partition)
207
+ committed_offsets.offset_for(topic, partition)
208
+ end
209
+
210
+ def offsets_to_commit(recommit = false)
211
+ if recommit
212
+ offsets_to_recommit.merge!(@processed_offsets) do |_topic, committed, processed|
213
+ committed.merge!(processed)
214
+ end
215
+ else
216
+ @processed_offsets
217
+ end
218
+ end
219
+
220
+ def offsets_to_recommit
221
+ committed_offsets.topics.each_with_object({}) do |(topic, partition_info), offsets|
222
+ topic_offsets = partition_info.keys.each_with_object({}) do |partition, partition_map|
223
+ offset = committed_offsets.offset_for(topic, partition)
224
+ partition_map[partition] = offset unless offset == -1
225
+ end
226
+ offsets[topic] = topic_offsets unless topic_offsets.empty?
227
+ end
228
+ end
229
+
230
+ def recommit_timeout_reached?
231
+ @last_recommit.nil? || seconds_since(@last_recommit) >= @recommit_interval
232
+ end
233
+
234
+ def commit_timeout_reached?
235
+ @commit_interval != 0 && seconds_since_last_commit >= @commit_interval
236
+ end
237
+
238
+ def commit_threshold_reached?
239
+ @commit_threshold != 0 && @uncommitted_offsets >= @commit_threshold
240
+ end
241
+
242
+ def prettify_offsets(offsets)
243
+ offsets.flat_map do |topic, partitions|
244
+ partitions.map { |partition, offset| "#{topic}/#{partition}:#{offset}" }
245
+ end.join(', ')
246
+ end
247
+ end
248
+ end