ruby-kafka-custom 0.7.7.26

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,455 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/broker_pool"
4
+ require "set"
5
+
6
+ module Kafka
7
+
8
+ # A cluster represents the state of a Kafka cluster. It needs to be initialized
9
+ # with a non-empty list of seed brokers. The first seed broker that the cluster can connect
10
+ # to will be asked for the cluster metadata, which allows the cluster to map topic
11
+ # partitions to the current leader for those partitions.
12
+ class Cluster
13
+
14
+ # Initializes a Cluster with a set of seed brokers.
15
+ #
16
+ # The cluster will try to fetch cluster metadata from one of the brokers.
17
+ #
18
+ # @param seed_brokers [Array<URI>]
19
+ # @param broker_pool [Kafka::BrokerPool]
20
+ # @param logger [Logger]
21
+ def initialize(seed_brokers:, broker_pool:, logger:)
22
+ if seed_brokers.empty?
23
+ raise ArgumentError, "At least one seed broker must be configured"
24
+ end
25
+
26
+ @logger = TaggedLogger.new(logger)
27
+ @seed_brokers = seed_brokers
28
+ @broker_pool = broker_pool
29
+ @cluster_info = nil
30
+ @stale = true
31
+
32
+ # This is the set of topics we need metadata for. If empty, metadata for
33
+ # all topics will be fetched.
34
+ @target_topics = Set.new
35
+ end
36
+
37
+ # Adds a list of topics to the target list. Only the topics on this list will
38
+ # be queried for metadata.
39
+ #
40
+ # @param topics [Array<String>]
41
+ # @return [nil]
42
+ def add_target_topics(topics)
43
+ topics = Set.new(topics)
44
+ unless topics.subset?(@target_topics)
45
+ new_topics = topics - @target_topics
46
+
47
+ unless new_topics.empty?
48
+ @logger.info "New topics added to target list: #{new_topics.to_a.join(', ')}"
49
+
50
+ @target_topics.merge(new_topics)
51
+
52
+ refresh_metadata!
53
+ end
54
+ end
55
+ end
56
+
57
+ def api_info(api_key)
58
+ apis.find {|api| api.api_key == api_key }
59
+ end
60
+
61
+ def supports_api?(api_key, version = nil)
62
+ info = api_info(api_key)
63
+ if info.nil?
64
+ return false
65
+ elsif version.nil?
66
+ return true
67
+ else
68
+ return info.version_supported?(version)
69
+ end
70
+ end
71
+
72
+ def apis
73
+ @apis ||=
74
+ begin
75
+ response = random_broker.api_versions
76
+
77
+ Protocol.handle_error(response.error_code)
78
+
79
+ response.apis
80
+ end
81
+ end
82
+
83
+ # Clears the list of target topics.
84
+ #
85
+ # @see #add_target_topics
86
+ # @return [nil]
87
+ def clear_target_topics
88
+ @target_topics.clear
89
+ refresh_metadata!
90
+ end
91
+
92
+ def mark_as_stale!
93
+ @stale = true
94
+ end
95
+
96
+ def refresh_metadata!
97
+ @cluster_info = nil
98
+ cluster_info
99
+ end
100
+
101
+ def refresh_metadata_if_necessary!
102
+ refresh_metadata! if @stale
103
+ end
104
+
105
+ # Finds the broker acting as the leader of the given topic and partition.
106
+ #
107
+ # @param topic [String]
108
+ # @param partition [Integer]
109
+ # @return [Broker] the broker that's currently leader.
110
+ def get_leader(topic, partition)
111
+ connect_to_broker(get_leader_id(topic, partition))
112
+ end
113
+
114
+ # Finds the broker acting as the coordinator of the given group.
115
+ #
116
+ # @param group_id: [String]
117
+ # @return [Broker] the broker that's currently coordinator.
118
+ def get_group_coordinator(group_id:)
119
+ @logger.debug "Getting group coordinator for `#{group_id}`"
120
+ refresh_metadata_if_necessary!
121
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_GROUP, group_id)
122
+ end
123
+
124
+ # Finds the broker acting as the coordinator of the given transaction.
125
+ #
126
+ # @param transactional_id: [String]
127
+ # @return [Broker] the broker that's currently coordinator.
128
+ def get_transaction_coordinator(transactional_id:)
129
+ @logger.debug "Getting transaction coordinator for `#{transactional_id}`"
130
+
131
+ refresh_metadata_if_necessary!
132
+
133
+ if transactional_id.nil?
134
+ # Get a random_broker
135
+ @logger.debug "Transaction ID is not available. Choose a random broker."
136
+ return random_broker
137
+ else
138
+ get_coordinator(Kafka::Protocol::COORDINATOR_TYPE_TRANSACTION, transactional_id)
139
+ end
140
+ end
141
+
142
+ def partitions_for(topic)
143
+ add_target_topics([topic])
144
+ refresh_metadata_if_necessary!
145
+ cluster_info.partitions_for(topic)
146
+ rescue Kafka::ProtocolError
147
+ mark_as_stale!
148
+ raise
149
+ end
150
+
151
+ def create_topic(name, num_partitions:, replication_factor:, timeout:, config:)
152
+ options = {
153
+ topics: {
154
+ name => {
155
+ num_partitions: num_partitions,
156
+ replication_factor: replication_factor,
157
+ config: config,
158
+ }
159
+ },
160
+ timeout: timeout,
161
+ }
162
+
163
+ broker = controller_broker
164
+
165
+ @logger.info "Creating topic `#{name}` using controller broker #{broker}"
166
+
167
+ response = broker.create_topics(**options)
168
+
169
+ response.errors.each do |topic, error_code|
170
+ Protocol.handle_error(error_code)
171
+ end
172
+
173
+ begin
174
+ partitions_for(name).each do |info|
175
+ Protocol.handle_error(info.partition_error_code)
176
+ end
177
+ rescue Kafka::LeaderNotAvailable
178
+ @logger.warn "Leader not yet available for `#{name}`, waiting 1s..."
179
+ sleep 1
180
+
181
+ retry
182
+ rescue Kafka::UnknownTopicOrPartition
183
+ @logger.warn "Topic `#{name}` not yet created, waiting 1s..."
184
+ sleep 1
185
+
186
+ retry
187
+ end
188
+
189
+ @logger.info "Topic `#{name}` was created"
190
+ end
191
+
192
+ def delete_topic(name, timeout:)
193
+ options = {
194
+ topics: [name],
195
+ timeout: timeout,
196
+ }
197
+
198
+ broker = controller_broker
199
+
200
+ @logger.info "Deleting topic `#{name}` using controller broker #{broker}"
201
+
202
+ response = broker.delete_topics(**options)
203
+
204
+ response.errors.each do |topic, error_code|
205
+ Protocol.handle_error(error_code)
206
+ end
207
+
208
+ @logger.info "Topic `#{name}` was deleted"
209
+ end
210
+
211
+ def describe_topic(name, configs = [])
212
+ options = {
213
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
214
+ }
215
+ broker = controller_broker
216
+
217
+ @logger.info "Fetching topic `#{name}`'s configs using controller broker #{broker}"
218
+
219
+ response = broker.describe_configs(**options)
220
+
221
+ response.resources.each do |resource|
222
+ Protocol.handle_error(resource.error_code, resource.error_message)
223
+ end
224
+ topic_description = response.resources.first
225
+ topic_description.configs.each_with_object({}) do |config, hash|
226
+ hash[config.name] = config.value
227
+ end
228
+ end
229
+
230
+ def alter_topic(name, configs = {})
231
+ options = {
232
+ resources: [[Kafka::Protocol::RESOURCE_TYPE_TOPIC, name, configs]]
233
+ }
234
+
235
+ broker = controller_broker
236
+
237
+ @logger.info "Altering the config for topic `#{name}` using controller broker #{broker}"
238
+
239
+ response = broker.alter_configs(**options)
240
+
241
+ response.resources.each do |resource|
242
+ Protocol.handle_error(resource.error_code, resource.error_message)
243
+ end
244
+
245
+ nil
246
+ end
247
+
248
+ def describe_group(group_id)
249
+ response = get_group_coordinator(group_id: group_id).describe_groups(group_ids: [group_id])
250
+ group = response.groups.first
251
+ Protocol.handle_error(group.error_code)
252
+ group
253
+ end
254
+
255
+ def create_partitions_for(name, num_partitions:, timeout:)
256
+ options = {
257
+ topics: [[name, num_partitions, nil]],
258
+ timeout: timeout
259
+ }
260
+
261
+ broker = controller_broker
262
+
263
+ @logger.info "Creating #{num_partitions} partition(s) for topic `#{name}` using controller broker #{broker}"
264
+
265
+ response = broker.create_partitions(**options)
266
+
267
+ response.errors.each do |topic, error_code, error_message|
268
+ Protocol.handle_error(error_code, error_message)
269
+ end
270
+ mark_as_stale!
271
+
272
+ @logger.info "Topic `#{name}` was updated"
273
+ end
274
+
275
+ def resolve_offsets(topic, partitions, offset)
276
+ add_target_topics([topic])
277
+ refresh_metadata_if_necessary!
278
+
279
+ partitions_by_broker = partitions.each_with_object({}) {|partition, hsh|
280
+ broker = get_leader(topic, partition)
281
+
282
+ hsh[broker] ||= []
283
+ hsh[broker] << partition
284
+ }
285
+
286
+ if offset == :earliest
287
+ offset = -2
288
+ elsif offset == :latest
289
+ offset = -1
290
+ end
291
+
292
+ offsets = {}
293
+
294
+ partitions_by_broker.each do |broker, broker_partitions|
295
+ response = broker.list_offsets(
296
+ topics: {
297
+ topic => broker_partitions.map {|partition|
298
+ {
299
+ partition: partition,
300
+ time: offset
301
+ }
302
+ }
303
+ }
304
+ )
305
+
306
+ broker_partitions.each do |partition|
307
+ offsets[partition] = response.offset_for(topic, partition)
308
+ end
309
+ end
310
+
311
+ offsets
312
+ rescue Kafka::ProtocolError
313
+ mark_as_stale!
314
+ raise
315
+ end
316
+
317
+ def resolve_offset(topic, partition, offset)
318
+ resolve_offsets(topic, [partition], offset).fetch(partition)
319
+ end
320
+
321
+ def topics
322
+ refresh_metadata_if_necessary!
323
+ cluster_info.topics.select do |topic|
324
+ topic.topic_error_code == 0
325
+ end.map(&:topic_name)
326
+ end
327
+
328
+ # Lists all topics in the cluster.
329
+ def list_topics
330
+ response = random_broker.fetch_metadata(topics: nil)
331
+ response.topics.select do |topic|
332
+ topic.topic_error_code == 0
333
+ end.map(&:topic_name)
334
+ end
335
+
336
+ def list_groups
337
+ refresh_metadata_if_necessary!
338
+ cluster_info.brokers.map do |broker|
339
+ response = connect_to_broker(broker.node_id).list_groups
340
+ Protocol.handle_error(response.error_code)
341
+ response.groups.map(&:group_id)
342
+ end.flatten.uniq
343
+ end
344
+
345
+ def disconnect
346
+ @broker_pool.close
347
+ end
348
+
349
+ def cluster_info
350
+ @cluster_info ||= fetch_cluster_info
351
+ end
352
+
353
+ private
354
+
355
+ def get_leader_id(topic, partition)
356
+ cluster_info.find_leader_id(topic, partition)
357
+ end
358
+
359
+ # Fetches the cluster metadata.
360
+ #
361
+ # This is used to update the partition leadership information, among other things.
362
+ # The methods will go through each node listed in `seed_brokers`, connecting to the
363
+ # first one that is available. This node will be queried for the cluster metadata.
364
+ #
365
+ # @raise [ConnectionError] if none of the nodes in `seed_brokers` are available.
366
+ # @return [Protocol::MetadataResponse] the cluster metadata.
367
+ def fetch_cluster_info
368
+ errors = []
369
+
370
+ @seed_brokers.shuffle.each do |node|
371
+ @logger.info "Fetching cluster metadata from #{node}"
372
+
373
+ begin
374
+ broker = @broker_pool.connect(node.hostname, node.port)
375
+ cluster_info = broker.fetch_metadata(topics: @target_topics)
376
+
377
+ if cluster_info.brokers.empty?
378
+ @logger.error "No brokers in cluster"
379
+ else
380
+ @logger.info "Discovered cluster metadata; nodes: #{cluster_info.brokers.join(', ')}"
381
+
382
+ @stale = false
383
+
384
+ return cluster_info
385
+ end
386
+ rescue Error => e
387
+ @logger.error "Failed to fetch metadata from #{node}: #{e}"
388
+ errors << [node, e]
389
+ ensure
390
+ broker.disconnect unless broker.nil?
391
+ end
392
+ end
393
+
394
+ error_description = errors.map {|node, exception| "- #{node}: #{exception}" }.join("\n")
395
+
396
+ raise ConnectionError, "Could not connect to any of the seed brokers:\n#{error_description}"
397
+ end
398
+
399
+ def random_broker
400
+ refresh_metadata_if_necessary!
401
+ node_id = cluster_info.brokers.sample.node_id
402
+ connect_to_broker(node_id)
403
+ end
404
+
405
+ def connect_to_broker(broker_id)
406
+ info = cluster_info.find_broker(broker_id)
407
+
408
+ @broker_pool.connect(info.host, info.port, node_id: info.node_id)
409
+ end
410
+
411
+ def controller_broker
412
+ connect_to_broker(cluster_info.controller_id)
413
+ end
414
+
415
+ def get_coordinator(coordinator_type, coordinator_key)
416
+ cluster_info.brokers.each do |broker_info|
417
+ begin
418
+ broker = connect_to_broker(broker_info.node_id)
419
+ response = broker.find_coordinator(
420
+ coordinator_type: coordinator_type,
421
+ coordinator_key: coordinator_key
422
+ )
423
+
424
+ Protocol.handle_error(response.error_code, response.error_message)
425
+
426
+ coordinator_id = response.coordinator_id
427
+
428
+ @logger.debug "Coordinator for `#{coordinator_key}` is #{coordinator_id}. Connecting..."
429
+
430
+ # It's possible that a new broker is introduced to the cluster and
431
+ # becomes the coordinator before we have a chance to refresh_metadata.
432
+ coordinator = begin
433
+ connect_to_broker(coordinator_id)
434
+ rescue Kafka::NoSuchBroker
435
+ @logger.debug "Broker #{coordinator_id} missing from broker cache, refreshing"
436
+ refresh_metadata!
437
+ connect_to_broker(coordinator_id)
438
+ end
439
+
440
+ @logger.debug "Connected to coordinator: #{coordinator} for `#{coordinator_key}`"
441
+
442
+ return coordinator
443
+ rescue CoordinatorNotAvailable
444
+ @logger.debug "Coordinator not available; retrying in 1s"
445
+ sleep 1
446
+ retry
447
+ rescue ConnectionError => e
448
+ @logger.error "Failed to get coordinator info from #{broker}: #{e}"
449
+ end
450
+ end
451
+
452
+ raise Kafka::Error, "Failed to find coordinator"
453
+ end
454
+ end
455
+ end