ruby-kafka-aws-iam 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/config.yml +393 -0
- data/.github/workflows/stale.yml +19 -0
- data/.gitignore +13 -0
- data/.readygo +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +44 -0
- data/.ruby-version +1 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +314 -0
- data/Gemfile +5 -0
- data/ISSUE_TEMPLATE.md +23 -0
- data/LICENSE.txt +176 -0
- data/Procfile +2 -0
- data/README.md +1356 -0
- data/Rakefile +8 -0
- data/benchmarks/message_encoding.rb +23 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/docker-compose.yml +39 -0
- data/examples/consumer-group.rb +35 -0
- data/examples/firehose-consumer.rb +64 -0
- data/examples/firehose-producer.rb +54 -0
- data/examples/simple-consumer.rb +34 -0
- data/examples/simple-producer.rb +42 -0
- data/examples/ssl-producer.rb +44 -0
- data/lib/kafka/async_producer.rb +297 -0
- data/lib/kafka/broker.rb +217 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +838 -0
- data/lib/kafka/cluster.rb +513 -0
- data/lib/kafka/compression.rb +45 -0
- data/lib/kafka/compressor.rb +86 -0
- data/lib/kafka/connection.rb +228 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +642 -0
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/consumer_group.rb +231 -0
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +420 -0
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +58 -0
- data/lib/kafka/fetched_batch_generator.rb +120 -0
- data/lib/kafka/fetched_message.rb +48 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +224 -0
- data/lib/kafka/gzip_codec.rb +34 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/lz4_codec.rb +27 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +259 -0
- data/lib/kafka/partitioner.rb +40 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +528 -0
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +47 -0
- data/lib/kafka/protocol/join_group_response.rb +41 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +94 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +223 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +26 -0
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
- data/lib/kafka/protocol.rb +225 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
- data/lib/kafka/sasl/awsmskiam.rb +128 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +180 -0
- data/lib/kafka/sasl_authenticator.rb +73 -0
- data/lib/kafka/snappy_codec.rb +29 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +77 -0
- data/lib/kafka/transaction_manager.rb +306 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- data/lib/kafka/zstd_codec.rb +27 -0
- data/lib/kafka.rb +373 -0
- data/lib/ruby-kafka.rb +5 -0
- data/ruby-kafka.gemspec +54 -0
- metadata +520 -0
@@ -0,0 +1,513 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/broker_pool"
|
4
|
+
require "resolv"
|
5
|
+
require "set"
|
6
|
+
|
7
|
+
module Kafka
|
8
|
+
|
9
|
+
# A cluster represents the state of a Kafka cluster. It needs to be initialized
|
10
|
+
# with a non-empty list of seed brokers. The first seed broker that the cluster can connect
|
11
|
+
# to will be asked for the cluster metadata, which allows the cluster to map topic
|
12
|
+
# partitions to the current leader for those partitions.
|
13
|
+
class Cluster
|
14
|
+
|
15
|
+
# Initializes a Cluster with a set of seed brokers.
|
16
|
+
#
|
17
|
+
# The cluster will try to fetch cluster metadata from one of the brokers.
|
18
|
+
#
|
19
|
+
# @param seed_brokers [Array<URI>]
|
20
|
+
# @param broker_pool [Kafka::BrokerPool]
|
21
|
+
# @param logger [Logger]
|
22
|
+
# @param resolve_seed_brokers [Boolean] See {Kafka::Client#initialize}
|
23
|
+
def initialize(seed_brokers:, broker_pool:, logger:, resolve_seed_brokers: false)
|
24
|
+
if seed_brokers.empty?
|
25
|
+
raise ArgumentError, "At least one seed broker must be configured"
|
26
|
+
end
|
27
|
+
|
28
|
+
@logger = TaggedLogger.new(logger)
|
29
|
+
@seed_brokers = seed_brokers
|
30
|
+
@broker_pool = broker_pool
|
31
|
+
@resolve_seed_brokers = resolve_seed_brokers
|
32
|
+
@cluster_info = nil
|
33
|
+
@stale = true
|
34
|
+
|
35
|
+
# This is the set of topics we need metadata for. If empty, metadata for
|
36
|
+
# all topics will be fetched.
|
37
|
+
@target_topics = Set.new
|
38
|
+
end
|
39
|
+
|
40
|
+
# Adds a list of topics to the target list. Only the topics on this list will
|
41
|
+
# be queried for metadata.
|
42
|
+
#
|
43
|
+
# @param topics [Array<String>]
|
44
|
+
# @return [nil]
|
45
|
+
def add_target_topics(topics)
|
46
|
+
topics = Set.new(topics)
|
47
|
+
unless topics.subset?(@target_topics)
|
48
|
+
new_topics = topics - @target_topics
|
49
|
+
|
50
|
+
unless new_topics.empty?
|
51
|
+
if new_topics.any? { |topic| topic.nil? or topic.empty? }
|
52
|
+
raise ArgumentError, "Topic must not be nil or empty"
|
53
|
+
end
|
54
|
+
|
55
|
+
@logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
|
56
|
+
|
57
|
+
@target_topics.merge(new_topics)
|
58
|
+
|
59
|
+
refresh_metadata!
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def api_info(api_key)
|
65
|
+
apis.find {|api| api.api_key == api_key }
|
66
|
+
end
|
67
|
+
|
68
|
+
def supports_api?(api_key, version = nil)
|
69
|
+
info = api_info(api_key)
|
70
|
+
if info.nil?
|
71
|
+
return false
|
72
|
+
elsif version.nil?
|
73
|
+
return true
|
74
|
+
else
|
75
|
+
return info.version_supported?(version)
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def apis
|
80
|
+
@apis ||=
|
81
|
+
begin
|
82
|
+
response = random_broker.api_versions
|
83
|
+
|
84
|
+
Protocol.handle_error(response.error_code)
|
85
|
+
|
86
|
+
response.apis
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# Clears the list of target topics.
|
91
|
+
#
|
92
|
+
# @see #add_target_topics
|
93
|
+
# @return [nil]
|
94
|
+
def clear_target_topics
|
95
|
+
@target_topics.clear
|
96
|
+
refresh_metadata!
|
97
|
+
end
|
98
|
+
|
99
|
+
def mark_as_stale!
|
100
|
+
@stale = true
|
101
|
+
end
|
102
|
+
|
103
|
+
def refresh_metadata!
|
104
|
+
@cluster_info = nil
|
105
|
+
cluster_info
|
106
|
+
end
|
107
|
+
|
108
|
+
def refresh_metadata_if_necessary!
|
109
|
+
refresh_metadata! if @stale
|
110
|
+
end
|
111
|
+
|
112
|
+
# Finds the broker acting as the leader of the given topic and partition.
|
113
|
+
#
|
114
|
+
# @param topic [String]
|
115
|
+
# @param partition [Integer]
|
116
|
+
# @return [Broker] the broker that's currently leader.
|
117
|
+
def get_leader(topic, partition)
|
118
|
+
connect_to_broker(get_leader_id(topic, partition))
|
119
|
+
end
|
120
|
+
|
121
|
+
# Finds the broker acting as the coordinator of the given group.
|
122
|
+
#
|
123
|
+
# @param group_id [String]
|
124
|
+
# @return [Broker] the broker that's currently coordinator.
|
125
|
+
def get_group_coordinator(group_id:)
|
126
|
+
@logger.debug "Getting group coordinator for `#{group_id}`"
|
127
|
+
refresh_metadata_if_necessary!
|
128
|
+
get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
|
129
|
+
end
|
130
|
+
|
131
|
+
# Finds the broker acting as the coordinator of the given transaction.
|
132
|
+
#
|
133
|
+
# @param transactional_id [String]
|
134
|
+
# @return [Broker] the broker that's currently coordinator.
|
135
|
+
def get_transaction_coordinator(transactional_id:)
|
136
|
+
@logger.debug "Getting transaction coordinator for `#{transactional_id}`"
|
137
|
+
|
138
|
+
refresh_metadata_if_necessary!
|
139
|
+
|
140
|
+
if transactional_id.nil?
|
141
|
+
# Get a random_broker
|
142
|
+
@logger.debug "Transaction ID is not available. Choose a random broker."
|
143
|
+
return random_broker
|
144
|
+
else
|
145
|
+
get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def describe_configs(broker_id, configs = [])
|
150
|
+
options = {
|
151
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
152
|
+
}
|
153
|
+
|
154
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
155
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
156
|
+
|
157
|
+
response = broker.describe_configs(**options)
|
158
|
+
|
159
|
+
response.resources.each do |resource|
|
160
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
161
|
+
end
|
162
|
+
|
163
|
+
response.resources.first.configs
|
164
|
+
end
|
165
|
+
|
166
|
+
def alter_configs(broker_id, configs = [])
|
167
|
+
options = {
|
168
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
|
169
|
+
}
|
170
|
+
|
171
|
+
info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
|
172
|
+
broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
173
|
+
|
174
|
+
response = broker.alter_configs(**options)
|
175
|
+
|
176
|
+
response.resources.each do |resource|
|
177
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
178
|
+
end
|
179
|
+
|
180
|
+
nil
|
181
|
+
end
|
182
|
+
|
183
|
+
def partitions_for(topic)
|
184
|
+
add_target_topics([topic])
|
185
|
+
refresh_metadata_if_necessary!
|
186
|
+
cluster_info.partitions_for(topic)
|
187
|
+
rescue Kafka::ProtocolError
|
188
|
+
mark_as_stale!
|
189
|
+
raise
|
190
|
+
end
|
191
|
+
|
192
|
+
def create_topic(name, num_partitions:, replication_factor:, timeout:, config:)
|
193
|
+
options = {
|
194
|
+
topics: {
|
195
|
+
name => {
|
196
|
+
num_partitions: num_partitions,
|
197
|
+
replication_factor: replication_factor,
|
198
|
+
config: config,
|
199
|
+
}
|
200
|
+
},
|
201
|
+
timeout: timeout,
|
202
|
+
}
|
203
|
+
|
204
|
+
broker = controller_broker
|
205
|
+
|
206
|
+
@logger.info "Creating topic `#{name}` using controller broker #{broker}"
|
207
|
+
|
208
|
+
response = broker.create_topics(**options)
|
209
|
+
|
210
|
+
response.errors.each do |topic, error_code|
|
211
|
+
Protocol.handle_error(error_code)
|
212
|
+
end
|
213
|
+
|
214
|
+
begin
|
215
|
+
partitions_for(name).each do |info|
|
216
|
+
Protocol.handle_error(info.partition_error_code)
|
217
|
+
end
|
218
|
+
rescue Kafka::LeaderNotAvailable
|
219
|
+
@logger.warn "Leader not yet available for `#{name}`, waiting 1s..."
|
220
|
+
sleep 1
|
221
|
+
|
222
|
+
retry
|
223
|
+
rescue Kafka::UnknownTopicOrPartition
|
224
|
+
@logger.warn "Topic `#{name}` not yet created, waiting 1s..."
|
225
|
+
sleep 1
|
226
|
+
|
227
|
+
retry
|
228
|
+
end
|
229
|
+
|
230
|
+
@logger.info "Topic `#{name}` was created"
|
231
|
+
end
|
232
|
+
|
233
|
+
def delete_topic(name, timeout:)
|
234
|
+
options = {
|
235
|
+
topics: [name],
|
236
|
+
timeout: timeout,
|
237
|
+
}
|
238
|
+
|
239
|
+
broker = controller_broker
|
240
|
+
|
241
|
+
@logger.info "Deleting topic `#{name}` using controller broker #{broker}"
|
242
|
+
|
243
|
+
response = broker.delete_topics(**options)
|
244
|
+
|
245
|
+
response.errors.each do |topic, error_code|
|
246
|
+
Protocol.handle_error(error_code)
|
247
|
+
end
|
248
|
+
|
249
|
+
@logger.info "Topic `#{name}` was deleted"
|
250
|
+
end
|
251
|
+
|
252
|
+
def describe_topic(name, configs = [])
|
253
|
+
options = {
|
254
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
|
255
|
+
}
|
256
|
+
broker = controller_broker
|
257
|
+
|
258
|
+
@logger.info "Fetching topic `#{name}`'s configs using controller broker #{broker}"
|
259
|
+
|
260
|
+
response = broker.describe_configs(**options)
|
261
|
+
|
262
|
+
response.resources.each do |resource|
|
263
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
264
|
+
end
|
265
|
+
topic_description = response.resources.first
|
266
|
+
topic_description.configs.each_with_object({}) do |config, hash|
|
267
|
+
hash[config.name] = config.value
|
268
|
+
end
|
269
|
+
end
|
270
|
+
|
271
|
+
def alter_topic(name, configs = {})
|
272
|
+
options = {
|
273
|
+
resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
|
274
|
+
}
|
275
|
+
|
276
|
+
broker = controller_broker
|
277
|
+
|
278
|
+
@logger.info "Altering the config for topic `#{name}` using controller broker #{broker}"
|
279
|
+
|
280
|
+
response = broker.alter_configs(**options)
|
281
|
+
|
282
|
+
response.resources.each do |resource|
|
283
|
+
Protocol.handle_error(resource.error_code, resource.error_message)
|
284
|
+
end
|
285
|
+
|
286
|
+
nil
|
287
|
+
end
|
288
|
+
|
289
|
+
def describe_group(group_id)
|
290
|
+
response = get_group_coordinator(group_id: group_id).describe_groups(group_ids: [group_id])
|
291
|
+
group = response.groups.first
|
292
|
+
Protocol.handle_error(group.error_code)
|
293
|
+
group
|
294
|
+
end
|
295
|
+
|
296
|
+
def fetch_group_offsets(group_id)
|
297
|
+
topics = get_group_coordinator(group_id: group_id)
|
298
|
+
.fetch_offsets(group_id: group_id, topics: nil)
|
299
|
+
.topics
|
300
|
+
|
301
|
+
topics.each do |_, partitions|
|
302
|
+
partitions.each do |_, response|
|
303
|
+
Protocol.handle_error(response.error_code)
|
304
|
+
end
|
305
|
+
end
|
306
|
+
|
307
|
+
topics
|
308
|
+
end
|
309
|
+
|
310
|
+
def create_partitions_for(name, num_partitions:, timeout:)
|
311
|
+
options = {
|
312
|
+
topics: [[name, num_partitions, nil]],
|
313
|
+
timeout: timeout
|
314
|
+
}
|
315
|
+
|
316
|
+
broker = controller_broker
|
317
|
+
|
318
|
+
@logger.info "Creating #{num_partitions} partition(s) for topic `#{name}` using controller broker #{broker}"
|
319
|
+
|
320
|
+
response = broker.create_partitions(**options)
|
321
|
+
|
322
|
+
response.errors.each do |topic, error_code, error_message|
|
323
|
+
Protocol.handle_error(error_code, error_message)
|
324
|
+
end
|
325
|
+
mark_as_stale!
|
326
|
+
|
327
|
+
@logger.info "Topic `#{name}` was updated"
|
328
|
+
end
|
329
|
+
|
330
|
+
def resolve_offsets(topic, partitions, offset)
|
331
|
+
add_target_topics([topic])
|
332
|
+
refresh_metadata_if_necessary!
|
333
|
+
|
334
|
+
partitions_by_broker = partitions.each_with_object({}) {|partition, hsh|
|
335
|
+
broker = get_leader(topic, partition)
|
336
|
+
|
337
|
+
hsh[broker] ||= []
|
338
|
+
hsh[broker] << partition
|
339
|
+
}
|
340
|
+
|
341
|
+
if offset == :earliest
|
342
|
+
offset = -2
|
343
|
+
elsif offset == :latest
|
344
|
+
offset = -1
|
345
|
+
end
|
346
|
+
|
347
|
+
offsets = {}
|
348
|
+
|
349
|
+
partitions_by_broker.each do |broker, broker_partitions|
|
350
|
+
response = broker.list_offsets(
|
351
|
+
topics: {
|
352
|
+
topic => broker_partitions.map {|partition|
|
353
|
+
{
|
354
|
+
partition: partition,
|
355
|
+
time: offset
|
356
|
+
}
|
357
|
+
}
|
358
|
+
}
|
359
|
+
)
|
360
|
+
|
361
|
+
broker_partitions.each do |partition|
|
362
|
+
offsets[partition] = response.offset_for(topic, partition)
|
363
|
+
end
|
364
|
+
end
|
365
|
+
|
366
|
+
offsets
|
367
|
+
rescue Kafka::ProtocolError
|
368
|
+
mark_as_stale!
|
369
|
+
raise
|
370
|
+
end
|
371
|
+
|
372
|
+
def resolve_offset(topic, partition, offset)
|
373
|
+
resolve_offsets(topic, [partition], offset).fetch(partition)
|
374
|
+
end
|
375
|
+
|
376
|
+
def topics
|
377
|
+
refresh_metadata_if_necessary!
|
378
|
+
cluster_info.topics.select do |topic|
|
379
|
+
topic.topic_error_code == 0
|
380
|
+
end.map(&:topic_name)
|
381
|
+
end
|
382
|
+
|
383
|
+
# Lists all topics in the cluster.
|
384
|
+
def list_topics
|
385
|
+
response = random_broker.fetch_metadata(topics: nil)
|
386
|
+
response.topics.select do |topic|
|
387
|
+
topic.topic_error_code == 0
|
388
|
+
end.map(&:topic_name)
|
389
|
+
end
|
390
|
+
|
391
|
+
def list_groups
|
392
|
+
refresh_metadata_if_necessary!
|
393
|
+
cluster_info.brokers.map do |broker|
|
394
|
+
response = connect_to_broker(broker.node_id).list_groups
|
395
|
+
Protocol.handle_error(response.error_code)
|
396
|
+
response.groups.map(&:group_id)
|
397
|
+
end.flatten.uniq
|
398
|
+
end
|
399
|
+
|
400
|
+
def disconnect
|
401
|
+
@broker_pool.close
|
402
|
+
end
|
403
|
+
|
404
|
+
def cluster_info
|
405
|
+
@cluster_info ||= fetch_cluster_info
|
406
|
+
end
|
407
|
+
|
408
|
+
private
|
409
|
+
|
410
|
+
def get_leader_id(topic, partition)
|
411
|
+
cluster_info.find_leader_id(topic, partition)
|
412
|
+
end
|
413
|
+
|
414
|
+
# Fetches the cluster metadata.
|
415
|
+
#
|
416
|
+
# This is used to update the partition leadership information, among other things.
|
417
|
+
# The methods will go through each node listed in `seed_brokers`, connecting to the
|
418
|
+
# first one that is available. This node will be queried for the cluster metadata.
|
419
|
+
#
|
420
|
+
# @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
|
421
|
+
# @return [Protocol::MetadataResponse] the cluster metadata.
|
422
|
+
def fetch_cluster_info
|
423
|
+
errors = []
|
424
|
+
@seed_brokers.shuffle.each do |node|
|
425
|
+
(@resolve_seed_brokers ? Resolv.getaddresses(node.hostname).shuffle : [node.hostname]).each do |hostname_or_ip|
|
426
|
+
node_info = node.to_s
|
427
|
+
node_info << " (#{hostname_or_ip})" if node.hostname != hostname_or_ip
|
428
|
+
@logger.info "Fetching cluster metadata from #{node_info}"
|
429
|
+
|
430
|
+
begin
|
431
|
+
broker = @broker_pool.connect(hostname_or_ip, node.port)
|
432
|
+
cluster_info = broker.fetch_metadata(topics: @target_topics)
|
433
|
+
|
434
|
+
if cluster_info.brokers.empty?
|
435
|
+
@logger.error "No brokers in cluster"
|
436
|
+
else
|
437
|
+
@logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
|
438
|
+
|
439
|
+
@stale = false
|
440
|
+
|
441
|
+
return cluster_info
|
442
|
+
end
|
443
|
+
rescue Error => e
|
444
|
+
@logger.error "Failed to fetch metadata from #{node_info}: #{e}"
|
445
|
+
errors << [node_info, e]
|
446
|
+
ensure
|
447
|
+
broker.disconnect unless broker.nil?
|
448
|
+
end
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
error_description = errors.map {|node_info, exception| "- #{node_info}: #{exception}" }.join("\n")
|
453
|
+
|
454
|
+
raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
|
455
|
+
end
|
456
|
+
|
457
|
+
def random_broker
|
458
|
+
refresh_metadata_if_necessary!
|
459
|
+
node_id = cluster_info.brokers.sample.node_id
|
460
|
+
connect_to_broker(node_id)
|
461
|
+
end
|
462
|
+
|
463
|
+
def connect_to_broker(broker_id)
|
464
|
+
info = cluster_info.find_broker(broker_id)
|
465
|
+
|
466
|
+
@broker_pool.connect(info.host, info.port, node_id: info.node_id)
|
467
|
+
end
|
468
|
+
|
469
|
+
def controller_broker
|
470
|
+
connect_to_broker(cluster_info.controller_id)
|
471
|
+
end
|
472
|
+
|
473
|
+
def get_coordinator(coordinator_type, coordinator_key)
|
474
|
+
cluster_info.brokers.each do |broker_info|
|
475
|
+
begin
|
476
|
+
broker = connect_to_broker(broker_info.node_id)
|
477
|
+
response = broker.find_coordinator(
|
478
|
+
coordinator_type: coordinator_type,
|
479
|
+
coordinator_key: coordinator_key
|
480
|
+
)
|
481
|
+
|
482
|
+
Protocol.handle_error(response.error_code, response.error_message)
|
483
|
+
|
484
|
+
coordinator_id = response.coordinator_id
|
485
|
+
|
486
|
+
@logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
|
487
|
+
|
488
|
+
# It's possible that a new broker is introduced to the cluster and
|
489
|
+
# becomes the coordinator before we have a chance to refresh_metadata.
|
490
|
+
coordinator = begin
|
491
|
+
connect_to_broker(coordinator_id)
|
492
|
+
rescue Kafka::NoSuchBroker
|
493
|
+
@logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
|
494
|
+
refresh_metadata!
|
495
|
+
connect_to_broker(coordinator_id)
|
496
|
+
end
|
497
|
+
|
498
|
+
@logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
|
499
|
+
|
500
|
+
return coordinator
|
501
|
+
rescue CoordinatorNotAvailable
|
502
|
+
@logger.debug "Coordinator not available; retrying in 1s"
|
503
|
+
sleep 1
|
504
|
+
retry
|
505
|
+
rescue ConnectionError => e
|
506
|
+
@logger.error "Failed to get coordinator info from #{broker}: #{e}"
|
507
|
+
end
|
508
|
+
end
|
509
|
+
|
510
|
+
raise Kafka::Error, "Failed to find coordinator"
|
511
|
+
end
|
512
|
+
end
|
513
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/snappy_codec"
|
4
|
+
require "kafka/gzip_codec"
|
5
|
+
require "kafka/lz4_codec"
|
6
|
+
require "kafka/zstd_codec"
|
7
|
+
|
8
|
+
module Kafka
|
9
|
+
module Compression
|
10
|
+
CODECS_BY_NAME = {
|
11
|
+
:gzip => GzipCodec.new,
|
12
|
+
:snappy => SnappyCodec.new,
|
13
|
+
:lz4 => LZ4Codec.new,
|
14
|
+
:zstd => ZstdCodec.new,
|
15
|
+
}.freeze
|
16
|
+
|
17
|
+
CODECS_BY_ID = CODECS_BY_NAME.each_with_object({}) do |(_, codec), hash|
|
18
|
+
hash[codec.codec_id] = codec
|
19
|
+
end.freeze
|
20
|
+
|
21
|
+
def self.codecs
|
22
|
+
CODECS_BY_NAME.keys
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.find_codec(name)
|
26
|
+
codec = CODECS_BY_NAME.fetch(name) do
|
27
|
+
raise "Unknown compression codec #{name}"
|
28
|
+
end
|
29
|
+
|
30
|
+
codec.load
|
31
|
+
|
32
|
+
codec
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.find_codec_by_id(codec_id)
|
36
|
+
codec = CODECS_BY_ID.fetch(codec_id) do
|
37
|
+
raise "Unknown codec id #{codec_id}"
|
38
|
+
end
|
39
|
+
|
40
|
+
codec.load
|
41
|
+
|
42
|
+
codec
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "kafka/compression"
|
4
|
+
|
5
|
+
module Kafka
|
6
|
+
|
7
|
+
# Compresses message sets using a specified codec.
|
8
|
+
#
|
9
|
+
# A message set is only compressed if its size meets the defined threshold.
|
10
|
+
#
|
11
|
+
# ## Instrumentation
|
12
|
+
#
|
13
|
+
# Whenever a message set is compressed, the notification
|
14
|
+
# `compress.compressor.kafka` will be emitted with the following payload:
|
15
|
+
#
|
16
|
+
# * `message_count` – the number of messages in the message set.
|
17
|
+
# * `uncompressed_bytesize` – the byte size of the original data.
|
18
|
+
# * `compressed_bytesize` – the byte size of the compressed data.
|
19
|
+
#
|
20
|
+
class Compressor
|
21
|
+
attr_reader :codec
|
22
|
+
|
23
|
+
# @param codec_name [Symbol, nil]
|
24
|
+
# @param threshold [Integer] the minimum number of messages in a message set
|
25
|
+
# that will trigger compression.
|
26
|
+
def initialize(codec_name: nil, threshold: 1, instrumenter:)
|
27
|
+
# Codec may be nil, in which case we won't compress.
|
28
|
+
@codec = codec_name && Compression.find_codec(codec_name)
|
29
|
+
|
30
|
+
@threshold = threshold
|
31
|
+
@instrumenter = instrumenter
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param record_batch [Protocol::RecordBatch]
|
35
|
+
# @param offset [Integer] used to simulate broker behaviour in tests
|
36
|
+
# @return [Protocol::RecordBatch]
|
37
|
+
def compress(record_batch, offset: -1)
|
38
|
+
if record_batch.is_a?(Protocol::RecordBatch)
|
39
|
+
compress_record_batch(record_batch)
|
40
|
+
else
|
41
|
+
# Deprecated message set format
|
42
|
+
compress_message_set(record_batch, offset)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def compress_message_set(message_set, offset)
|
49
|
+
return message_set if @codec.nil? || message_set.size < @threshold
|
50
|
+
|
51
|
+
data = Protocol::Encoder.encode_with(message_set)
|
52
|
+
compressed_data = @codec.compress(data)
|
53
|
+
|
54
|
+
@instrumenter.instrument("compress.compressor") do |notification|
|
55
|
+
notification[:message_count] = message_set.size
|
56
|
+
notification[:uncompressed_bytesize] = data.bytesize
|
57
|
+
notification[:compressed_bytesize] = compressed_data.bytesize
|
58
|
+
end
|
59
|
+
|
60
|
+
wrapper_message = Protocol::Message.new(
|
61
|
+
value: compressed_data,
|
62
|
+
codec_id: @codec.codec_id,
|
63
|
+
offset: offset
|
64
|
+
)
|
65
|
+
|
66
|
+
Protocol::MessageSet.new(messages: [wrapper_message])
|
67
|
+
end
|
68
|
+
|
69
|
+
def compress_record_batch(record_batch)
|
70
|
+
if @codec.nil? || record_batch.size < @threshold
|
71
|
+
record_batch.codec_id = 0
|
72
|
+
return Protocol::Encoder.encode_with(record_batch)
|
73
|
+
end
|
74
|
+
|
75
|
+
record_batch.codec_id = @codec.codec_id
|
76
|
+
data = Protocol::Encoder.encode_with(record_batch)
|
77
|
+
|
78
|
+
@instrumenter.instrument("compress.compressor") do |notification|
|
79
|
+
notification[:message_count] = record_batch.size
|
80
|
+
notification[:compressed_bytesize] = data.bytesize
|
81
|
+
end
|
82
|
+
|
83
|
+
data
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|