ruby-kafka-custom 0.7.7.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,455 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/broker_pool"
4
+ require "set"
5
+
6
+ module Kafka
7
+
8
+ # A cluster represents the state of a Kafka cluster. It needs to be initialized
9
+ # with a non-empty list of seed brokers. The first seed broker that the cluster can connect
10
+ # to will be asked for the cluster metadata, which allows the cluster to map topic
11
+ # partitions to the current leader for those partitions.
12
+ class Cluster
13
+
14
+ # Initializes a Cluster with a set of seed brokers.
15
+ #
16
+ # The cluster will try to fetch cluster metadata from one of the brokers.
17
+ #
18
+ # @param seed_brokers [Array<URI>]
19
+ # @param broker_pool [Kafka::BrokerPool]
20
+ # @param logger [Logger]
21
+ def initialize(seed_brokers:, broker_pool:, logger:)
22
+ if seed_brokers.empty?
23
+ raise ArgumentError, "At least one seed broker must be configured"
24
+ end
25
+
26
+ @logger = TaggedLogger.new(logger)
27
+ @seed_brokers = seed_brokers
28
+ @broker_pool = broker_pool
29
+ @cluster_info = nil
30
+ @stale = true
31
+
32
+ # This is the set of topics we need metadata for. If empty, metadata for
33
+ # all topics will be fetched.
34
+ @target_topics = Set.new
35
+ end
36
+
37
+ # Adds a list of topics to the target list. Only the topics on this list will
38
+ # be queried for metadata.
39
+ #
40
+ # @param topics [Array<String>]
41
+ # @return [nil]
42
+ def add_target_topics(topics)
43
+ topics = Set.new(topics)
44
+ unless topics.subset?(@target_topics)
45
+ new_topics = topics - @target_topics
46
+
47
+ unless new_topics.empty?
48
+ @logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
49
+
50
+ @target_topics.merge(new_topics)
51
+
52
+ refresh_metadata!
53
+ end
54
+ end
55
+ end
56
+
57
+ def api_info(api_key)
58
+ apis.find {|api| api.api_key == api_key }
59
+ end
60
+
61
+ def supports_api?(api_key, version = nil)
62
+ info = api_info(api_key)
63
+ if info.nil?
64
+ return false
65
+ elsif version.nil?
66
+ return true
67
+ else
68
+ return info.version_supported?(version)
69
+ end
70
+ end
71
+
72
+ def apis
73
+ @apis ||=
74
+ begin
75
+ response = random_broker.api_versions
76
+
77
+ Protocol.handle_error(response.error_code)
78
+
79
+ response.apis
80
+ end
81
+ end
82
+
83
+ # Clears the list of target topics.
84
+ #
85
+ # @see #add_target_topics
86
+ # @return [nil]
87
+ def clear_target_topics
88
+ @target_topics.clear
89
+ refresh_metadata!
90
+ end
91
+
92
+ def mark_as_stale!
93
+ @stale = true
94
+ end
95
+
96
+ def refresh_metadata!
97
+ @cluster_info = nil
98
+ cluster_info
99
+ end
100
+
101
+ def refresh_metadata_if_necessary!
102
+ refresh_metadata! if @stale
103
+ end
104
+
105
+ # Finds the broker acting as the leader of the given topic and partition.
106
+ #
107
+ # @param topic [String]
108
+ # @param partition [Integer]
109
+ # @return [Broker] the broker that's currently leader.
110
+ def get_leader(topic, partition)
111
+ connect_to_broker(get_leader_id(topic, partition))
112
+ end
113
+
114
+ # Finds the broker acting as the coordinator of the given group.
115
+ #
116
+ # @param group_id: [String]
117
+ # @return [Broker] the broker that's currently coordinator.
118
+ def get_group_coordinator(group_id:)
119
+ @logger.debug "Getting group coordinator for `#{group_id}`"
120
+ refresh_metadata_if_necessary!
121
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
122
+ end
123
+
124
+ # Finds the broker acting as the coordinator of the given transaction.
125
+ #
126
+ # @param transactional_id: [String]
127
+ # @return [Broker] the broker that's currently coordinator.
128
+ def get_transaction_coordinator(transactional_id:)
129
+ @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
130
+
131
+ refresh_metadata_if_necessary!
132
+
133
+ if transactional_id.nil?
134
+ # Get a random_broker
135
+ @logger.debug "Transaction ID is not available. Choose a random broker."
136
+ return random_broker
137
+ else
138
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
139
+ end
140
+ end
141
+
142
+ def partitions_for(topic)
143
+ add_target_topics([topic])
144
+ refresh_metadata_if_necessary!
145
+ cluster_info.partitions_for(topic)
146
+ rescue Kafka::ProtocolError
147
+ mark_as_stale!
148
+ raise
149
+ end
150
+
151
+ def create_topic(name, num_partitions:, replication_factor:, timeout:, config:)
152
+ options = {
153
+ topics: {
154
+ name => {
155
+ num_partitions: num_partitions,
156
+ replication_factor: replication_factor,
157
+ config: config,
158
+ }
159
+ },
160
+ timeout: timeout,
161
+ }
162
+
163
+ broker = controller_broker
164
+
165
+ @logger.info "Creating topic `#{name}` using controller broker #{broker}"
166
+
167
+ response = broker.create_topics(**options)
168
+
169
+ response.errors.each do |topic, error_code|
170
+ Protocol.handle_error(error_code)
171
+ end
172
+
173
+ begin
174
+ partitions_for(name).each do |info|
175
+ Protocol.handle_error(info.partition_error_code)
176
+ end
177
+ rescue Kafka::LeaderNotAvailable
178
+ @logger.warn "Leader not yet available for `#{name}`, waiting 1s..."
179
+ sleep 1
180
+
181
+ retry
182
+ rescue Kafka::UnknownTopicOrPartition
183
+ @logger.warn "Topic `#{name}` not yet created, waiting 1s..."
184
+ sleep 1
185
+
186
+ retry
187
+ end
188
+
189
+ @logger.info "Topic `#{name}` was created"
190
+ end
191
+
192
+ def delete_topic(name, timeout:)
193
+ options = {
194
+ topics: [name],
195
+ timeout: timeout,
196
+ }
197
+
198
+ broker = controller_broker
199
+
200
+ @logger.info "Deleting topic `#{name}` using controller broker #{broker}"
201
+
202
+ response = broker.delete_topics(**options)
203
+
204
+ response.errors.each do |topic, error_code|
205
+ Protocol.handle_error(error_code)
206
+ end
207
+
208
+ @logger.info "Topic `#{name}` was deleted"
209
+ end
210
+
211
+ def describe_topic(name, configs = [])
212
+ options = {
213
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
214
+ }
215
+ broker = controller_broker
216
+
217
+ @logger.info "Fetching topic `#{name}`'s configs using controller broker #{broker}"
218
+
219
+ response = broker.describe_configs(**options)
220
+
221
+ response.resources.each do |resource|
222
+ Protocol.handle_error(resource.error_code, resource.error_message)
223
+ end
224
+ topic_description = response.resources.first
225
+ topic_description.configs.each_with_object({}) do |config, hash|
226
+ hash[config.name] = config.value
227
+ end
228
+ end
229
+
230
+ def alter_topic(name, configs = {})
231
+ options = {
232
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
233
+ }
234
+
235
+ broker = controller_broker
236
+
237
+ @logger.info "Altering the config for topic `#{name}` using controller broker #{broker}"
238
+
239
+ response = broker.alter_configs(**options)
240
+
241
+ response.resources.each do |resource|
242
+ Protocol.handle_error(resource.error_code, resource.error_message)
243
+ end
244
+
245
+ nil
246
+ end
247
+
248
+ def describe_group(group_id)
249
+ response = get_group_coordinator(group_id: group_id).describe_groups(group_ids: [group_id])
250
+ group = response.groups.first
251
+ Protocol.handle_error(group.error_code)
252
+ group
253
+ end
254
+
255
+ def create_partitions_for(name, num_partitions:, timeout:)
256
+ options = {
257
+ topics: [[name, num_partitions, nil]],
258
+ timeout: timeout
259
+ }
260
+
261
+ broker = controller_broker
262
+
263
+ @logger.info "Creating #{num_partitions} partition(s) for topic `#{name}` using controller broker #{broker}"
264
+
265
+ response = broker.create_partitions(**options)
266
+
267
+ response.errors.each do |topic, error_code, error_message|
268
+ Protocol.handle_error(error_code, error_message)
269
+ end
270
+ mark_as_stale!
271
+
272
+ @logger.info "Topic `#{name}` was updated"
273
+ end
274
+
275
+ def resolve_offsets(topic, partitions, offset)
276
+ add_target_topics([topic])
277
+ refresh_metadata_if_necessary!
278
+
279
+ partitions_by_broker = partitions.each_with_object({}) {|partition, hsh|
280
+ broker = get_leader(topic, partition)
281
+
282
+ hsh[broker] ||= []
283
+ hsh[broker] << partition
284
+ }
285
+
286
+ if offset == :earliest
287
+ offset = -2
288
+ elsif offset == :latest
289
+ offset = -1
290
+ end
291
+
292
+ offsets = {}
293
+
294
+ partitions_by_broker.each do |broker, broker_partitions|
295
+ response = broker.list_offsets(
296
+ topics: {
297
+ topic => broker_partitions.map {|partition|
298
+ {
299
+ partition: partition,
300
+ time: offset
301
+ }
302
+ }
303
+ }
304
+ )
305
+
306
+ broker_partitions.each do |partition|
307
+ offsets[partition] = response.offset_for(topic, partition)
308
+ end
309
+ end
310
+
311
+ offsets
312
+ rescue Kafka::ProtocolError
313
+ mark_as_stale!
314
+ raise
315
+ end
316
+
317
+ def resolve_offset(topic, partition, offset)
318
+ resolve_offsets(topic, [partition], offset).fetch(partition)
319
+ end
320
+
321
+ def topics
322
+ refresh_metadata_if_necessary!
323
+ cluster_info.topics.select do |topic|
324
+ topic.topic_error_code == 0
325
+ end.map(&:topic_name)
326
+ end
327
+
328
+ # Lists all topics in the cluster.
329
+ def list_topics
330
+ response = random_broker.fetch_metadata(topics: nil)
331
+ response.topics.select do |topic|
332
+ topic.topic_error_code == 0
333
+ end.map(&:topic_name)
334
+ end
335
+
336
+ def list_groups
337
+ refresh_metadata_if_necessary!
338
+ cluster_info.brokers.map do |broker|
339
+ response = connect_to_broker(broker.node_id).list_groups
340
+ Protocol.handle_error(response.error_code)
341
+ response.groups.map(&:group_id)
342
+ end.flatten.uniq
343
+ end
344
+
345
+ def disconnect
346
+ @broker_pool.close
347
+ end
348
+
349
+ def cluster_info
350
+ @cluster_info ||= fetch_cluster_info
351
+ end
352
+
353
+ private
354
+
355
+ def get_leader_id(topic, partition)
356
+ cluster_info.find_leader_id(topic, partition)
357
+ end
358
+
359
+ # Fetches the cluster metadata.
360
+ #
361
+ # This is used to update the partition leadership information, among other things.
362
+ # The methods will go through each node listed in `seed_brokers`, connecting to the
363
+ # first one that is available. This node will be queried for the cluster metadata.
364
+ #
365
+ # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
366
+ # @return [Protocol::MetadataResponse] the cluster metadata.
367
+ def fetch_cluster_info
368
+ errors = []
369
+
370
+ @seed_brokers.shuffle.each do |node|
371
+ @logger.info "Fetching cluster metadata from #{node}"
372
+
373
+ begin
374
+ broker = @broker_pool.connect(node.hostname, node.port)
375
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
376
+
377
+ if cluster_info.brokers.empty?
378
+ @logger.error "No brokers in cluster"
379
+ else
380
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
381
+
382
+ @stale = false
383
+
384
+ return cluster_info
385
+ end
386
+ rescue Error => e
387
+ @logger.error "Failed to fetch metadata from #{node}: #{e}"
388
+ errors << [node, e]
389
+ ensure
390
+ broker.disconnect unless broker.nil?
391
+ end
392
+ end
393
+
394
+ error_description = errors.map {|node, exception| "- #{node}: #{exception}" }.join("\n")
395
+
396
+ raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
397
+ end
398
+
399
+ def random_broker
400
+ refresh_metadata_if_necessary!
401
+ node_id = cluster_info.brokers.sample.node_id
402
+ connect_to_broker(node_id)
403
+ end
404
+
405
+ def connect_to_broker(broker_id)
406
+ info = cluster_info.find_broker(broker_id)
407
+
408
+ @broker_pool.connect(info.host, info.port, node_id: info.node_id)
409
+ end
410
+
411
+ def controller_broker
412
+ connect_to_broker(cluster_info.controller_id)
413
+ end
414
+
415
+ def get_coordinator(coordinator_type, coordinator_key)
416
+ cluster_info.brokers.each do |broker_info|
417
+ begin
418
+ broker = connect_to_broker(broker_info.node_id)
419
+ response = broker.find_coordinator(
420
+ coordinator_type: coordinator_type,
421
+ coordinator_key: coordinator_key
422
+ )
423
+
424
+ Protocol.handle_error(response.error_code, response.error_message)
425
+
426
+ coordinator_id = response.coordinator_id
427
+
428
+ @logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
429
+
430
+ # It's possible that a new broker is introduced to the cluster and
431
+ # becomes the coordinator before we have a chance to refresh_metadata.
432
+ coordinator = begin
433
+ connect_to_broker(coordinator_id)
434
+ rescue Kafka::NoSuchBroker
435
+ @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
436
+ refresh_metadata!
437
+ connect_to_broker(coordinator_id)
438
+ end
439
+
440
+ @logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
441
+
442
+ return coordinator
443
+ rescue CoordinatorNotAvailable
444
+ @logger.debug "Coordinator not available; retrying in 1s"
445
+ sleep 1
446
+ retry
447
+ rescue ConnectionError => e
448
+ @logger.error "Failed to get coordinator info from #{broker}: #{e}"
449
+ end
450
+ end
451
+
452
+ raise Kafka::Error, "Failed to find coordinator"
453
+ end
454
+ end
455
+ end