ruby-kafka-aws-iam 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,513 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/broker_pool"
4
+ require "resolv"
5
+ require "set"
6
+
7
+ module Kafka
8
+
9
+ # A cluster represents the state of a Kafka cluster. It needs to be initialized
10
+ # with a non-empty list of seed brokers. The first seed broker that the cluster can connect
11
+ # to will be asked for the cluster metadata, which allows the cluster to map topic
12
+ # partitions to the current leader for those partitions.
13
+ class Cluster
14
+
15
+ # Initializes a Cluster with a set of seed brokers.
16
+ #
17
+ # The cluster will try to fetch cluster metadata from one of the brokers.
18
+ #
19
+ # @param seed_brokers [Array<URI>]
20
+ # @param broker_pool [Kafka::BrokerPool]
21
+ # @param logger [Logger]
22
+ # @param resolve_seed_brokers [Boolean] See {Kafka::Client#initialize}
23
+ def initialize(seed_brokers:, broker_pool:, logger:, resolve_seed_brokers: false)
24
+ if seed_brokers.empty?
25
+ raise ArgumentError, "At least one seed broker must be configured"
26
+ end
27
+
28
+ @logger = TaggedLogger.new(logger)
29
+ @seed_brokers = seed_brokers
30
+ @broker_pool = broker_pool
31
+ @resolve_seed_brokers = resolve_seed_brokers
32
+ @cluster_info = nil
33
+ @stale = true
34
+
35
+ # This is the set of topics we need metadata for. If empty, metadata for
36
+ # all topics will be fetched.
37
+ @target_topics = Set.new
38
+ end
39
+
40
+ # Adds a list of topics to the target list. Only the topics on this list will
41
+ # be queried for metadata.
42
+ #
43
+ # @param topics [Array<String>]
44
+ # @return [nil]
45
+ def add_target_topics(topics)
46
+ topics = Set.new(topics)
47
+ unless topics.subset?(@target_topics)
48
+ new_topics = topics - @target_topics
49
+
50
+ unless new_topics.empty?
51
+ if new_topics.any? { |topic| topic.nil? or topic.empty? }
52
+ raise ArgumentError, "Topic must not be nil or empty"
53
+ end
54
+
55
+ @logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
56
+
57
+ @target_topics.merge(new_topics)
58
+
59
+ refresh_metadata!
60
+ end
61
+ end
62
+ end
63
+
64
+ def api_info(api_key)
65
+ apis.find {|api| api.api_key == api_key }
66
+ end
67
+
68
+ def supports_api?(api_key, version = nil)
69
+ info = api_info(api_key)
70
+ if info.nil?
71
+ return false
72
+ elsif version.nil?
73
+ return true
74
+ else
75
+ return info.version_supported?(version)
76
+ end
77
+ end
78
+
79
+ def apis
80
+ @apis ||=
81
+ begin
82
+ response = random_broker.api_versions
83
+
84
+ Protocol.handle_error(response.error_code)
85
+
86
+ response.apis
87
+ end
88
+ end
89
+
90
+ # Clears the list of target topics.
91
+ #
92
+ # @see #add_target_topics
93
+ # @return [nil]
94
+ def clear_target_topics
95
+ @target_topics.clear
96
+ refresh_metadata!
97
+ end
98
+
99
+ def mark_as_stale!
100
+ @stale = true
101
+ end
102
+
103
+ def refresh_metadata!
104
+ @cluster_info = nil
105
+ cluster_info
106
+ end
107
+
108
+ def refresh_metadata_if_necessary!
109
+ refresh_metadata! if @stale
110
+ end
111
+
112
+ # Finds the broker acting as the leader of the given topic and partition.
113
+ #
114
+ # @param topic [String]
115
+ # @param partition [Integer]
116
+ # @return [Broker] the broker that's currently leader.
117
+ def get_leader(topic, partition)
118
+ connect_to_broker(get_leader_id(topic, partition))
119
+ end
120
+
121
+ # Finds the broker acting as the coordinator of the given group.
122
+ #
123
+ # @param group_id [String]
124
+ # @return [Broker] the broker that's currently coordinator.
125
+ def get_group_coordinator(group_id:)
126
+ @logger.debug "Getting group coordinator for `#{group_id}`"
127
+ refresh_metadata_if_necessary!
128
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
129
+ end
130
+
131
+ # Finds the broker acting as the coordinator of the given transaction.
132
+ #
133
+ # @param transactional_id [String]
134
+ # @return [Broker] the broker that's currently coordinator.
135
+ def get_transaction_coordinator(transactional_id:)
136
+ @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
137
+
138
+ refresh_metadata_if_necessary!
139
+
140
+ if transactional_id.nil?
141
+ # Get a random_broker
142
+ @logger.debug "Transaction ID is not available. Choose a random broker."
143
+ return random_broker
144
+ else
145
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
146
+ end
147
+ end
148
+
149
+ def describe_configs(broker_id, configs = [])
150
+ options = {
151
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
152
+ }
153
+
154
+ info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
155
+ broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
156
+
157
+ response = broker.describe_configs(**options)
158
+
159
+ response.resources.each do |resource|
160
+ Protocol.handle_error(resource.error_code, resource.error_message)
161
+ end
162
+
163
+ response.resources.first.configs
164
+ end
165
+
166
+ def alter_configs(broker_id, configs = [])
167
+ options = {
168
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_CLUSTER, broker_id.to_s, configs]]
169
+ }
170
+
171
+ info = cluster_info.brokers.find {|broker| broker.node_id == broker_id }
172
+ broker = @broker_pool.connect(info.host, info.port, node_id: info.node_id)
173
+
174
+ response = broker.alter_configs(**options)
175
+
176
+ response.resources.each do |resource|
177
+ Protocol.handle_error(resource.error_code, resource.error_message)
178
+ end
179
+
180
+ nil
181
+ end
182
+
183
+ def partitions_for(topic)
184
+ add_target_topics([topic])
185
+ refresh_metadata_if_necessary!
186
+ cluster_info.partitions_for(topic)
187
+ rescue Kafka::ProtocolError
188
+ mark_as_stale!
189
+ raise
190
+ end
191
+
192
+ def create_topic(name, num_partitions:, replication_factor:, timeout:, config:)
193
+ options = {
194
+ topics: {
195
+ name => {
196
+ num_partitions: num_partitions,
197
+ replication_factor: replication_factor,
198
+ config: config,
199
+ }
200
+ },
201
+ timeout: timeout,
202
+ }
203
+
204
+ broker = controller_broker
205
+
206
+ @logger.info "Creating topic `#{name}` using controller broker #{broker}"
207
+
208
+ response = broker.create_topics(**options)
209
+
210
+ response.errors.each do |topic, error_code|
211
+ Protocol.handle_error(error_code)
212
+ end
213
+
214
+ begin
215
+ partitions_for(name).each do |info|
216
+ Protocol.handle_error(info.partition_error_code)
217
+ end
218
+ rescue Kafka::LeaderNotAvailable
219
+ @logger.warn "Leader not yet available for `#{name}`, waiting 1s..."
220
+ sleep 1
221
+
222
+ retry
223
+ rescue Kafka::UnknownTopicOrPartition
224
+ @logger.warn "Topic `#{name}` not yet created, waiting 1s..."
225
+ sleep 1
226
+
227
+ retry
228
+ end
229
+
230
+ @logger.info "Topic `#{name}` was created"
231
+ end
232
+
233
+ def delete_topic(name, timeout:)
234
+ options = {
235
+ topics: [name],
236
+ timeout: timeout,
237
+ }
238
+
239
+ broker = controller_broker
240
+
241
+ @logger.info "Deleting topic `#{name}` using controller broker #{broker}"
242
+
243
+ response = broker.delete_topics(**options)
244
+
245
+ response.errors.each do |topic, error_code|
246
+ Protocol.handle_error(error_code)
247
+ end
248
+
249
+ @logger.info "Topic `#{name}` was deleted"
250
+ end
251
+
252
+ def describe_topic(name, configs = [])
253
+ options = {
254
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
255
+ }
256
+ broker = controller_broker
257
+
258
+ @logger.info "Fetching topic `#{name}`'s configs using controller broker #{broker}"
259
+
260
+ response = broker.describe_configs(**options)
261
+
262
+ response.resources.each do |resource|
263
+ Protocol.handle_error(resource.error_code, resource.error_message)
264
+ end
265
+ topic_description = response.resources.first
266
+ topic_description.configs.each_with_object({}) do |config, hash|
267
+ hash[config.name] = config.value
268
+ end
269
+ end
270
+
271
+ def alter_topic(name, configs = {})
272
+ options = {
273
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
274
+ }
275
+
276
+ broker = controller_broker
277
+
278
+ @logger.info "Altering the config for topic `#{name}` using controller broker #{broker}"
279
+
280
+ response = broker.alter_configs(**options)
281
+
282
+ response.resources.each do |resource|
283
+ Protocol.handle_error(resource.error_code, resource.error_message)
284
+ end
285
+
286
+ nil
287
+ end
288
+
289
+ def describe_group(group_id)
290
+ response = get_group_coordinator(group_id: group_id).describe_groups(group_ids: [group_id])
291
+ group = response.groups.first
292
+ Protocol.handle_error(group.error_code)
293
+ group
294
+ end
295
+
296
+ def fetch_group_offsets(group_id)
297
+ topics = get_group_coordinator(group_id: group_id)
298
+ .fetch_offsets(group_id: group_id, topics: nil)
299
+ .topics
300
+
301
+ topics.each do |_, partitions|
302
+ partitions.each do |_, response|
303
+ Protocol.handle_error(response.error_code)
304
+ end
305
+ end
306
+
307
+ topics
308
+ end
309
+
310
+ def create_partitions_for(name, num_partitions:, timeout:)
311
+ options = {
312
+ topics: [[name, num_partitions, nil]],
313
+ timeout: timeout
314
+ }
315
+
316
+ broker = controller_broker
317
+
318
+ @logger.info "Creating #{num_partitions} partition(s) for topic `#{name}` using controller broker #{broker}"
319
+
320
+ response = broker.create_partitions(**options)
321
+
322
+ response.errors.each do |topic, error_code, error_message|
323
+ Protocol.handle_error(error_code, error_message)
324
+ end
325
+ mark_as_stale!
326
+
327
+ @logger.info "Topic `#{name}` was updated"
328
+ end
329
+
330
+ def resolve_offsets(topic, partitions, offset)
331
+ add_target_topics([topic])
332
+ refresh_metadata_if_necessary!
333
+
334
+ partitions_by_broker = partitions.each_with_object({}) {|partition, hsh|
335
+ broker = get_leader(topic, partition)
336
+
337
+ hsh[broker] ||= []
338
+ hsh[broker] << partition
339
+ }
340
+
341
+ if offset == :earliest
342
+ offset = -2
343
+ elsif offset == :latest
344
+ offset = -1
345
+ end
346
+
347
+ offsets = {}
348
+
349
+ partitions_by_broker.each do |broker, broker_partitions|
350
+ response = broker.list_offsets(
351
+ topics: {
352
+ topic => broker_partitions.map {|partition|
353
+ {
354
+ partition: partition,
355
+ time: offset
356
+ }
357
+ }
358
+ }
359
+ )
360
+
361
+ broker_partitions.each do |partition|
362
+ offsets[partition] = response.offset_for(topic, partition)
363
+ end
364
+ end
365
+
366
+ offsets
367
+ rescue Kafka::ProtocolError
368
+ mark_as_stale!
369
+ raise
370
+ end
371
+
372
+ def resolve_offset(topic, partition, offset)
373
+ resolve_offsets(topic, [partition], offset).fetch(partition)
374
+ end
375
+
376
+ def topics
377
+ refresh_metadata_if_necessary!
378
+ cluster_info.topics.select do |topic|
379
+ topic.topic_error_code == 0
380
+ end.map(&:topic_name)
381
+ end
382
+
383
+ # Lists all topics in the cluster.
384
+ def list_topics
385
+ response = random_broker.fetch_metadata(topics: nil)
386
+ response.topics.select do |topic|
387
+ topic.topic_error_code == 0
388
+ end.map(&:topic_name)
389
+ end
390
+
391
+ def list_groups
392
+ refresh_metadata_if_necessary!
393
+ cluster_info.brokers.map do |broker|
394
+ response = connect_to_broker(broker.node_id).list_groups
395
+ Protocol.handle_error(response.error_code)
396
+ response.groups.map(&:group_id)
397
+ end.flatten.uniq
398
+ end
399
+
400
+ def disconnect
401
+ @broker_pool.close
402
+ end
403
+
404
+ def cluster_info
405
+ @cluster_info ||= fetch_cluster_info
406
+ end
407
+
408
+ private
409
+
410
+ def get_leader_id(topic, partition)
411
+ cluster_info.find_leader_id(topic, partition)
412
+ end
413
+
414
+ # Fetches the cluster metadata.
415
+ #
416
+ # This is used to update the partition leadership information, among other things.
417
+ # The methods will go through each node listed in `seed_brokers`, connecting to the
418
+ # first one that is available. This node will be queried for the cluster metadata.
419
+ #
420
+ # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
421
+ # @return [Protocol::MetadataResponse] the cluster metadata.
422
+ def fetch_cluster_info
423
+ errors = []
424
+ @seed_brokers.shuffle.each do |node|
425
+ (@resolve_seed_brokers ? Resolv.getaddresses(node.hostname).shuffle : [node.hostname]).each do |hostname_or_ip|
426
+ node_info = node.to_s
427
+ node_info << " (#{hostname_or_ip})" if node.hostname != hostname_or_ip
428
+ @logger.info "Fetching cluster metadata from #{node_info}"
429
+
430
+ begin
431
+ broker = @broker_pool.connect(hostname_or_ip, node.port)
432
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
433
+
434
+ if cluster_info.brokers.empty?
435
+ @logger.error "No brokers in cluster"
436
+ else
437
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
438
+
439
+ @stale = false
440
+
441
+ return cluster_info
442
+ end
443
+ rescue Error => e
444
+ @logger.error "Failed to fetch metadata from #{node_info}: #{e}"
445
+ errors << [node_info, e]
446
+ ensure
447
+ broker.disconnect unless broker.nil?
448
+ end
449
+ end
450
+ end
451
+
452
+ error_description = errors.map {|node_info, exception| "- #{node_info}: #{exception}" }.join("\n")
453
+
454
+ raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
455
+ end
456
+
457
+ def random_broker
458
+ refresh_metadata_if_necessary!
459
+ node_id = cluster_info.brokers.sample.node_id
460
+ connect_to_broker(node_id)
461
+ end
462
+
463
+ def connect_to_broker(broker_id)
464
+ info = cluster_info.find_broker(broker_id)
465
+
466
+ @broker_pool.connect(info.host, info.port, node_id: info.node_id)
467
+ end
468
+
469
+ def controller_broker
470
+ connect_to_broker(cluster_info.controller_id)
471
+ end
472
+
473
+ def get_coordinator(coordinator_type, coordinator_key)
474
+ cluster_info.brokers.each do |broker_info|
475
+ begin
476
+ broker = connect_to_broker(broker_info.node_id)
477
+ response = broker.find_coordinator(
478
+ coordinator_type: coordinator_type,
479
+ coordinator_key: coordinator_key
480
+ )
481
+
482
+ Protocol.handle_error(response.error_code, response.error_message)
483
+
484
+ coordinator_id = response.coordinator_id
485
+
486
+ @logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
487
+
488
+ # It's possible that a new broker is introduced to the cluster and
489
+ # becomes the coordinator before we have a chance to refresh_metadata.
490
+ coordinator = begin
491
+ connect_to_broker(coordinator_id)
492
+ rescue Kafka::NoSuchBroker
493
+ @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
494
+ refresh_metadata!
495
+ connect_to_broker(coordinator_id)
496
+ end
497
+
498
+ @logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
499
+
500
+ return coordinator
501
+ rescue CoordinatorNotAvailable
502
+ @logger.debug "Coordinator not available; retrying in 1s"
503
+ sleep 1
504
+ retry
505
+ rescue ConnectionError => e
506
+ @logger.error "Failed to get coordinator info from #{broker}: #{e}"
507
+ end
508
+ end
509
+
510
+ raise Kafka::Error, "Failed to find coordinator"
511
+ end
512
+ end
513
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/snappy_codec"
4
+ require "kafka/gzip_codec"
5
+ require "kafka/lz4_codec"
6
+ require "kafka/zstd_codec"
7
+
8
+ module Kafka
9
+ module Compression
10
+ CODECS_BY_NAME = {
11
+ :gzip => GzipCodec.new,
12
+ :snappy => SnappyCodec.new,
13
+ :lz4 => LZ4Codec.new,
14
+ :zstd => ZstdCodec.new,
15
+ }.freeze
16
+
17
+ CODECS_BY_ID = CODECS_BY_NAME.each_with_object({}) do |(_, codec), hash|
18
+ hash[codec.codec_id] = codec
19
+ end.freeze
20
+
21
+ def self.codecs
22
+ CODECS_BY_NAME.keys
23
+ end
24
+
25
+ def self.find_codec(name)
26
+ codec = CODECS_BY_NAME.fetch(name) do
27
+ raise "Unknown compression codec #{name}"
28
+ end
29
+
30
+ codec.load
31
+
32
+ codec
33
+ end
34
+
35
+ def self.find_codec_by_id(codec_id)
36
+ codec = CODECS_BY_ID.fetch(codec_id) do
37
+ raise "Unknown codec id #{codec_id}"
38
+ end
39
+
40
+ codec.load
41
+
42
+ codec
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/compression"
4
+
5
+ module Kafka
6
+
7
+ # Compresses message sets using a specified codec.
8
+ #
9
+ # A message set is only compressed if its size meets the defined threshold.
10
+ #
11
+ # ## Instrumentation
12
+ #
13
+ # Whenever a message set is compressed, the notification
14
+ # `compress.compressor.kafka` will be emitted with the following payload:
15
+ #
16
+ # * `message_count` – the number of messages in the message set.
17
+ # * `uncompressed_bytesize` – the byte size of the original data.
18
+ # * `compressed_bytesize` – the byte size of the compressed data.
19
+ #
20
+ class Compressor
21
+ attr_reader :codec
22
+
23
+ # @param codec_name [Symbol, nil]
24
+ # @param threshold [Integer] the minimum number of messages in a message set
25
+ # that will trigger compression.
26
+ def initialize(codec_name: nil, threshold: 1, instrumenter:)
27
+ # Codec may be nil, in which case we won't compress.
28
+ @codec = codec_name && Compression.find_codec(codec_name)
29
+
30
+ @threshold = threshold
31
+ @instrumenter = instrumenter
32
+ end
33
+
34
+ # @param record_batch [Protocol::RecordBatch]
35
+ # @param offset [Integer] used to simulate broker behaviour in tests
36
+ # @return [Protocol::RecordBatch]
37
+ def compress(record_batch, offset: -1)
38
+ if record_batch.is_a?(Protocol::RecordBatch)
39
+ compress_record_batch(record_batch)
40
+ else
41
+ # Deprecated message set format
42
+ compress_message_set(record_batch, offset)
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def compress_message_set(message_set, offset)
49
+ return message_set if @codec.nil? || message_set.size < @threshold
50
+
51
+ data = Protocol::Encoder.encode_with(message_set)
52
+ compressed_data = @codec.compress(data)
53
+
54
+ @instrumenter.instrument("compress.compressor") do |notification|
55
+ notification[:message_count] = message_set.size
56
+ notification[:uncompressed_bytesize] = data.bytesize
57
+ notification[:compressed_bytesize] = compressed_data.bytesize
58
+ end
59
+
60
+ wrapper_message = Protocol::Message.new(
61
+ value: compressed_data,
62
+ codec_id: @codec.codec_id,
63
+ offset: offset
64
+ )
65
+
66
+ Protocol::MessageSet.new(messages: [wrapper_message])
67
+ end
68
+
69
+ def compress_record_batch(record_batch)
70
+ if @codec.nil? || record_batch.size < @threshold
71
+ record_batch.codec_id = 0
72
+ return Protocol::Encoder.encode_with(record_batch)
73
+ end
74
+
75
+ record_batch.codec_id = @codec.codec_id
76
+ data = Protocol::Encoder.encode_with(record_batch)
77
+
78
+ @instrumenter.instrument("compress.compressor") do |notification|
79
+ notification[:message_count] = record_batch.size
80
+ notification[:compressed_bytesize] = data.bytesize
81
+ end
82
+
83
+ data
84
+ end
85
+ end
86
+ end