ruby-kafka-aws-iam 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,838 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "kafka/ssl_context"
5
+ require "kafka/cluster"
6
+ require "kafka/transaction_manager"
7
+ require "kafka/broker_info"
8
+ require "kafka/producer"
9
+ require "kafka/consumer"
10
+ require "kafka/heartbeat"
11
+ require "kafka/broker_uri"
12
+ require "kafka/async_producer"
13
+ require "kafka/fetched_message"
14
+ require "kafka/fetch_operation"
15
+ require "kafka/connection_builder"
16
+ require "kafka/instrumenter"
17
+ require "kafka/sasl_authenticator"
18
+ require "kafka/tagged_logger"
19
+
20
+ module Kafka
21
+ class Client
22
+ # Initializes a new Kafka client.
23
+ #
24
+ # @param seed_brokers [Array<String>, String] the list of brokers used to initialize
25
+ # the client. Either an Array of connections, or a comma separated string of connections.
26
+ # A connection can either be a string of "host:port" or a full URI with a scheme.
27
+ # If there's a scheme it's ignored and only host/port are used.
28
+ #
29
+ # @param client_id [String] the identifier for this application.
30
+ #
31
+ # @param logger [Logger] the logger that should be used by the client.
32
+ #
33
+ # @param connect_timeout [Integer, nil] the timeout setting for connecting
34
+ # to brokers. See {BrokerPool#initialize}.
35
+ #
36
+ # @param socket_timeout [Integer, nil] the timeout setting for socket
37
+ # connections. See {BrokerPool#initialize}.
38
+ #
39
+ # @param ssl_ca_cert [String, Array<String>, nil] a PEM encoded CA cert, or an Array of
40
+ # PEM encoded CA certs, to use with an SSL connection.
41
+ #
42
+ # @param ssl_ca_cert_file_path [String, Array<String>, nil] a path on the filesystem, or an
43
+ # Array of paths, to PEM encoded CA cert(s) to use with an SSL connection.
44
+ #
45
+ # @param ssl_client_cert [String, nil] a PEM encoded client cert to use with an
46
+ # SSL connection. Must be used in combination with ssl_client_cert_key.
47
+ #
48
+ # @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
49
+ # SSL connection. Must be used in combination with ssl_client_cert.
50
+ #
51
+ # @param ssl_client_cert_key_password [String, nil] the password required to read the
52
+ # ssl_client_cert_key. Must be used in combination with ssl_client_cert_key.
53
+ #
54
+ # @param sasl_gssapi_principal [String, nil] a KRB5 principal
55
+ #
56
+ # @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
57
+ #
58
+ # @param sasl_scram_username [String, nil] SCRAM username
59
+ #
60
+ # @param sasl_scram_password [String, nil] SCRAM password
61
+ #
62
+ # @param sasl_scram_mechanism [String, nil] Scram mechanism, either "sha256" or "sha512"
63
+ #
64
+ # @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
65
+ #
66
+ # @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
67
+ # system's default certificate store.
68
+ #
69
+ # @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
70
+ #
71
+ # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
72
+ # implements method token. See {Sasl::OAuth#initialize}
73
+ #
74
+ # @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
75
+ # the SSL certificate and the signing chain of the certificate have the correct domains
76
+ # based on the CA certificate
77
+ #
78
+ # @param resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed brokers.
79
+ # If a broker is resolved to multiple IP addresses, the client tries to connect to each
80
+ # of the addresses until it can connect.
81
+ #
82
+ # @return [Client]
83
+ def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
84
+ ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
85
+ ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
86
+ sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
87
+ sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
88
+ sasl_aws_msk_iam_access_key_id: nil,
89
+ sasl_aws_msk_iam_secret_key_id: nil, sasl_aws_msk_iam_aws_region: nil,
90
+ sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true,
91
+ resolve_seed_brokers: false)
92
+ @logger = TaggedLogger.new(logger)
93
+ @instrumenter = Instrumenter.new(client_id: client_id)
94
+ @seed_brokers = normalize_seed_brokers(seed_brokers)
95
+ @resolve_seed_brokers = resolve_seed_brokers
96
+
97
+ ssl_context = SslContext.build(
98
+ ca_cert_file_path: ssl_ca_cert_file_path,
99
+ ca_cert: ssl_ca_cert,
100
+ client_cert: ssl_client_cert,
101
+ client_cert_key: ssl_client_cert_key,
102
+ client_cert_key_password: ssl_client_cert_key_password,
103
+ client_cert_chain: ssl_client_cert_chain,
104
+ ca_certs_from_system: ssl_ca_certs_from_system,
105
+ verify_hostname: ssl_verify_hostname
106
+ )
107
+
108
+ sasl_authenticator = SaslAuthenticator.new(
109
+ sasl_gssapi_principal: sasl_gssapi_principal,
110
+ sasl_gssapi_keytab: sasl_gssapi_keytab,
111
+ sasl_plain_authzid: sasl_plain_authzid,
112
+ sasl_plain_username: sasl_plain_username,
113
+ sasl_plain_password: sasl_plain_password,
114
+ sasl_scram_username: sasl_scram_username,
115
+ sasl_scram_password: sasl_scram_password,
116
+ sasl_scram_mechanism: sasl_scram_mechanism,
117
+ sasl_aws_msk_iam_access_key_id: sasl_aws_msk_iam_access_key_id,
118
+ sasl_aws_msk_iam_secret_key_id: sasl_aws_msk_iam_secret_key_id,
119
+ sasl_aws_msk_iam_aws_region: sasl_aws_msk_iam_aws_region,
120
+ sasl_oauth_token_provider: sasl_oauth_token_provider,
121
+ logger: @logger
122
+ )
123
+
124
+ if sasl_authenticator.enabled? && sasl_over_ssl && ssl_context.nil?
125
+ raise ArgumentError, "SASL authentication requires that SSL is configured"
126
+ end
127
+
128
+ @connection_builder = ConnectionBuilder.new(
129
+ client_id: client_id,
130
+ connect_timeout: connect_timeout,
131
+ socket_timeout: socket_timeout,
132
+ ssl_context: ssl_context,
133
+ logger: @logger,
134
+ instrumenter: @instrumenter,
135
+ sasl_authenticator: sasl_authenticator
136
+ )
137
+
138
+ @cluster = initialize_cluster
139
+ @partitioner = partitioner || Partitioner.new
140
+ end
141
+
142
+ # Delivers a single message to the Kafka cluster.
143
+ #
144
+ # **Note:** Only use this API for low-throughput scenarios. If you want to deliver
145
+ # many messages at a high rate, or if you want to configure the way messages are
146
+ # sent, use the {#producer} or {#async_producer} APIs instead.
147
+ #
148
+ # @param value [String, nil] the message value.
149
+ # @param key [String, nil] the message key.
150
+ # @param headers [Hash<String, String>] the headers for the message.
151
+ # @param topic [String] the topic that the message should be written to.
152
+ # @param partition [Integer, nil] the partition that the message should be written
153
+ # to, or `nil` if either `partition_key` is passed or the partition should be
154
+ # chosen at random.
155
+ # @param partition_key [String] a value used to deterministically choose a
156
+ # partition to write to.
157
+ # @param retries [Integer] the number of times to retry the delivery before giving
158
+ # up.
159
+ # @return [nil]
160
+ def deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1)
161
+ create_time = Time.now
162
+
163
+ # We want to fail fast if `topic` isn't a String
164
+ topic = topic.to_str
165
+
166
+ message = PendingMessage.new(
167
+ value: value,
168
+ key: key,
169
+ headers: headers,
170
+ topic: topic,
171
+ partition: partition,
172
+ partition_key: partition_key,
173
+ create_time: create_time
174
+ )
175
+
176
+ if partition.nil?
177
+ partition_count = @cluster.partitions_for(topic).count
178
+ partition = @partitioner.call(partition_count, message)
179
+ end
180
+
181
+ buffer = MessageBuffer.new
182
+
183
+ buffer.write(
184
+ value: message.value,
185
+ key: message.key,
186
+ headers: message.headers,
187
+ topic: message.topic,
188
+ partition: partition,
189
+ create_time: message.create_time,
190
+ )
191
+
192
+ @cluster.add_target_topics([topic])
193
+
194
+ compressor = Compressor.new(
195
+ instrumenter: @instrumenter,
196
+ )
197
+
198
+ transaction_manager = TransactionManager.new(
199
+ cluster: @cluster,
200
+ logger: @logger,
201
+ idempotent: false,
202
+ transactional: false
203
+ )
204
+
205
+ operation = ProduceOperation.new(
206
+ cluster: @cluster,
207
+ transaction_manager: transaction_manager,
208
+ buffer: buffer,
209
+ required_acks: 1,
210
+ ack_timeout: 10,
211
+ compressor: compressor,
212
+ logger: @logger,
213
+ instrumenter: @instrumenter,
214
+ )
215
+
216
+ attempt = 1
217
+
218
+ begin
219
+ @cluster.refresh_metadata_if_necessary!
220
+
221
+ operation.execute
222
+
223
+ unless buffer.empty?
224
+ raise DeliveryFailed.new(nil, [message])
225
+ end
226
+ rescue Kafka::Error => e
227
+ @cluster.mark_as_stale!
228
+
229
+ if attempt >= (retries + 1)
230
+ raise
231
+ else
232
+ attempt += 1
233
+ @logger.warn "Error while delivering message, #{e.class}: #{e.message}; retrying after 1s..."
234
+
235
+ sleep 1
236
+
237
+ retry
238
+ end
239
+ end
240
+ end
241
+
242
+ # Initializes a new Kafka producer.
243
+ #
244
+ # @param ack_timeout [Integer] The number of seconds a broker can wait for
245
+ # replicas to acknowledge a write before responding with a timeout.
246
+ #
247
+ # @param required_acks [Integer, Symbol] The number of replicas that must acknowledge
248
+ # a write, or `:all` if all in-sync replicas must acknowledge.
249
+ #
250
+ # @param max_retries [Integer] the number of retries that should be attempted
251
+ # before giving up sending messages to the cluster. Does not include the
252
+ # original attempt.
253
+ #
254
+ # @param retry_backoff [Integer] the number of seconds to wait between retries.
255
+ #
256
+ # @param max_buffer_size [Integer] the number of messages allowed in the buffer
257
+ # before new writes will raise {BufferOverflow} exceptions.
258
+ #
259
+ # @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
260
+ # attempting to produce messages when the buffer reaches this size will
261
+ # result in {BufferOverflow} being raised.
262
+ #
263
+ # @param compression_codec [Symbol, nil] the name of the compression codec to
264
+ # use, or nil if no compression should be performed. Valid codecs: `:snappy`,
265
+ # `:gzip`, `:lz4`, `:zstd`
266
+ #
267
+ # @param compression_threshold [Integer] the number of messages that needs to
268
+ # be in a message set before it should be compressed. Note that message sets
269
+ # are per-partition rather than per-topic or per-producer.
270
+ #
271
+ # @param interceptors [Array<Object>] a list of producer interceptors the implement
272
+ # `call(Kafka::PendingMessage)`.
273
+ #
274
+ # @return [Kafka::Producer] the Kafka producer.
275
+ def producer(
276
+ compression_codec: nil,
277
+ compression_threshold: 1,
278
+ ack_timeout: 5,
279
+ required_acks: :all,
280
+ max_retries: 2,
281
+ retry_backoff: 1,
282
+ max_buffer_size: 1000,
283
+ max_buffer_bytesize: 10_000_000,
284
+ idempotent: false,
285
+ transactional: false,
286
+ transactional_id: nil,
287
+ transactional_timeout: 60,
288
+ interceptors: []
289
+ )
290
+ cluster = initialize_cluster
291
+ compressor = Compressor.new(
292
+ codec_name: compression_codec,
293
+ threshold: compression_threshold,
294
+ instrumenter: @instrumenter,
295
+ )
296
+
297
+ transaction_manager = TransactionManager.new(
298
+ cluster: cluster,
299
+ logger: @logger,
300
+ idempotent: idempotent,
301
+ transactional: transactional,
302
+ transactional_id: transactional_id,
303
+ transactional_timeout: transactional_timeout,
304
+ )
305
+
306
+ Producer.new(
307
+ cluster: cluster,
308
+ transaction_manager: transaction_manager,
309
+ logger: @logger,
310
+ instrumenter: @instrumenter,
311
+ compressor: compressor,
312
+ ack_timeout: ack_timeout,
313
+ required_acks: required_acks,
314
+ max_retries: max_retries,
315
+ retry_backoff: retry_backoff,
316
+ max_buffer_size: max_buffer_size,
317
+ max_buffer_bytesize: max_buffer_bytesize,
318
+ partitioner: @partitioner,
319
+ interceptors: interceptors
320
+ )
321
+ end
322
+
323
+ # Creates a new AsyncProducer instance.
324
+ #
325
+ # All parameters allowed by {#producer} can be passed. In addition to this,
326
+ # a few extra parameters can be passed when creating an async producer.
327
+ #
328
+ # @param max_queue_size [Integer] the maximum number of messages allowed in
329
+ # the queue.
330
+ # @param delivery_threshold [Integer] if greater than zero, the number of
331
+ # buffered messages that will automatically trigger a delivery.
332
+ # @param delivery_interval [Integer] if greater than zero, the number of
333
+ # seconds between automatic message deliveries.
334
+ #
335
+ # @see AsyncProducer
336
+ # @return [AsyncProducer]
337
+ def async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1, retry_backoff: 0, **options)
338
+ sync_producer = producer(**options)
339
+
340
+ AsyncProducer.new(
341
+ sync_producer: sync_producer,
342
+ delivery_interval: delivery_interval,
343
+ delivery_threshold: delivery_threshold,
344
+ max_queue_size: max_queue_size,
345
+ max_retries: max_retries,
346
+ retry_backoff: retry_backoff,
347
+ instrumenter: @instrumenter,
348
+ logger: @logger,
349
+ )
350
+ end
351
+
352
+ # Creates a new Kafka consumer.
353
+ #
354
+ # @param group_id [String] the id of the group that the consumer should join.
355
+ # @param session_timeout [Integer] the number of seconds after which, if a client
356
+ # hasn't contacted the Kafka cluster, it will be kicked out of the group.
357
+ # @param offset_commit_interval [Integer] the interval between offset commits,
358
+ # in seconds.
359
+ # @param offset_commit_threshold [Integer] the number of messages that can be
360
+ # processed before their offsets are committed. If zero, offset commits are
361
+ # not triggered by message processing.
362
+ # @param heartbeat_interval [Integer] the interval between heartbeats; must be less
363
+ # than the session window.
364
+ # @param offset_retention_time [Integer] the time period that committed
365
+ # offsets will be retained, in seconds. Defaults to the broker setting.
366
+ # @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
367
+ # are stored for further processing. Note, that each item in the queue represents a
368
+ # response from a single broker.
369
+ # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
370
+ # If it is 0, the topic list won't be refreshed (default)
371
+ # If it is n (n > 0), the topic list will be refreshed every n seconds
372
+ # @param interceptors [Array<Object>] a list of consumer interceptors that implement
373
+ # `call(Kafka::FetchedBatch)`.
374
+ # @param assignment_strategy [Object] a partition assignment strategy that
375
+ # implements `protocol_type()`, `user_data()`, and `assign(members:, partitions:)`
376
+ # @return [Consumer]
377
+ def consumer(
378
+ group_id:,
379
+ session_timeout: 30,
380
+ rebalance_timeout: 60,
381
+ offset_commit_interval: 10,
382
+ offset_commit_threshold: 0,
383
+ heartbeat_interval: 10,
384
+ offset_retention_time: nil,
385
+ fetcher_max_queue_size: 100,
386
+ refresh_topic_interval: 0,
387
+ interceptors: [],
388
+ assignment_strategy: nil
389
+ )
390
+ cluster = initialize_cluster
391
+
392
+ instrumenter = DecoratingInstrumenter.new(@instrumenter, {
393
+ group_id: group_id,
394
+ })
395
+
396
+ # The Kafka protocol expects the retention time to be in ms.
397
+ retention_time = (offset_retention_time && offset_retention_time * 1_000) || -1
398
+
399
+ group = ConsumerGroup.new(
400
+ cluster: cluster,
401
+ logger: @logger,
402
+ group_id: group_id,
403
+ session_timeout: session_timeout,
404
+ rebalance_timeout: rebalance_timeout,
405
+ retention_time: retention_time,
406
+ instrumenter: instrumenter,
407
+ assignment_strategy: assignment_strategy
408
+ )
409
+
410
+ fetcher = Fetcher.new(
411
+ cluster: initialize_cluster,
412
+ group: group,
413
+ logger: @logger,
414
+ instrumenter: instrumenter,
415
+ max_queue_size: fetcher_max_queue_size
416
+ )
417
+
418
+ offset_manager = OffsetManager.new(
419
+ cluster: cluster,
420
+ group: group,
421
+ fetcher: fetcher,
422
+ logger: @logger,
423
+ commit_interval: offset_commit_interval,
424
+ commit_threshold: offset_commit_threshold,
425
+ offset_retention_time: offset_retention_time
426
+ )
427
+
428
+ heartbeat = Heartbeat.new(
429
+ group: group,
430
+ interval: heartbeat_interval,
431
+ instrumenter: instrumenter
432
+ )
433
+
434
+ Consumer.new(
435
+ cluster: cluster,
436
+ logger: @logger,
437
+ instrumenter: instrumenter,
438
+ group: group,
439
+ offset_manager: offset_manager,
440
+ fetcher: fetcher,
441
+ session_timeout: session_timeout,
442
+ heartbeat: heartbeat,
443
+ refresh_topic_interval: refresh_topic_interval,
444
+ interceptors: interceptors
445
+ )
446
+ end
447
+
448
+ # Fetches a batch of messages from a single partition. Note that it's possible
449
+ # to get back empty batches.
450
+ #
451
+ # The starting point for the fetch can be configured with the `:offset` argument.
452
+ # If you pass a number, the fetch will start at that offset. However, there are
453
+ # two special Symbol values that can be passed instead:
454
+ #
455
+ # * `:earliest` — the first offset in the partition.
456
+ # * `:latest` — the next offset that will be written to, effectively making the
457
+ # call block until there is a new message in the partition.
458
+ #
459
+ # The Kafka protocol specifies the numeric values of these two options: -2 and -1,
460
+ # respectively. You can also pass in these numbers directly.
461
+ #
462
+ # ## Example
463
+ #
464
+ # When enumerating the messages in a partition, you typically fetch batches
465
+ # sequentially.
466
+ #
467
+ # offset = :earliest
468
+ #
469
+ # loop do
470
+ # messages = kafka.fetch_messages(
471
+ # topic: "my-topic",
472
+ # partition: 42,
473
+ # offset: offset,
474
+ # )
475
+ #
476
+ # messages.each do |message|
477
+ # puts message.offset, message.key, message.value
478
+ #
479
+ # # Set the next offset that should be read to be the subsequent
480
+ # # offset.
481
+ # offset = message.offset + 1
482
+ # end
483
+ # end
484
+ #
485
+ # See a working example in `examples/simple-consumer.rb`.
486
+ #
487
+ # @param topic [String] the topic that messages should be fetched from.
488
+ #
489
+ # @param partition [Integer] the partition that messages should be fetched from.
490
+ #
491
+ # @param offset [Integer, Symbol] the offset to start reading from. Default is
492
+ # the latest offset.
493
+ #
494
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
495
+ # the server responds, in seconds.
496
+ #
497
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
498
+ # zero, the broker will respond immediately, but the response may be empty.
499
+ # The default is 1 byte, which means that the broker will respond as soon as
500
+ # a message is written to the partition.
501
+ #
502
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
503
+ # response message set. Default is 1 MB. You need to set this higher if you
504
+ # expect messages to be larger than this.
505
+ #
506
+ # @return [Array<Kafka::FetchedMessage>] the messages returned from the broker.
507
+ def fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1)
508
+ operation = FetchOperation.new(
509
+ cluster: @cluster,
510
+ logger: @logger,
511
+ min_bytes: min_bytes,
512
+ max_bytes: max_bytes,
513
+ max_wait_time: max_wait_time,
514
+ )
515
+
516
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
517
+
518
+ attempt = 1
519
+
520
+ begin
521
+ operation.execute.flat_map {|batch| batch.messages }
522
+ rescue Kafka::Error => e
523
+ @cluster.mark_as_stale!
524
+
525
+ if attempt >= (retries + 1)
526
+ raise
527
+ else
528
+ attempt += 1
529
+ @logger.warn "Error while fetching messages, #{e.class}: #{e.message}; retrying..."
530
+ retry
531
+ end
532
+ end
533
+ end
534
+
535
+ # Enumerate all messages in a topic.
536
+ #
537
+ # @param topic [String] the topic to consume messages from.
538
+ #
539
+ # @param start_from_beginning [Boolean] whether to start from the beginning
540
+ # of the topic or just subscribe to new messages being produced.
541
+ #
542
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
543
+ # the server responds, in seconds.
544
+ #
545
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
546
+ # zero, the broker will respond immediately, but the response may be empty.
547
+ # The default is 1 byte, which means that the broker will respond as soon as
548
+ # a message is written to the partition.
549
+ #
550
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
551
+ # response message set. Default is 1 MB. You need to set this higher if you
552
+ # expect messages to be larger than this.
553
+ #
554
+ # @return [nil]
555
+ def each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block)
556
+ default_offset ||= start_from_beginning ? :earliest : :latest
557
+ offsets = Hash.new { default_offset }
558
+
559
+ loop do
560
+ operation = FetchOperation.new(
561
+ cluster: @cluster,
562
+ logger: @logger,
563
+ min_bytes: min_bytes,
564
+ max_wait_time: max_wait_time,
565
+ )
566
+
567
+ @cluster.partitions_for(topic).map(&:partition_id).each do |partition|
568
+ partition_offset = offsets[partition]
569
+ operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes)
570
+ end
571
+
572
+ batches = operation.execute
573
+
574
+ batches.each do |batch|
575
+ batch.messages.each(&block)
576
+ offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
577
+ end
578
+ end
579
+ end
580
+
581
+ # Describe broker configs
582
+ #
583
+ # @param broker_id [int] the id of the broker
584
+ # @param configs [Array] array of config keys.
585
+ # @return [Array<Kafka::Protocol::DescribeConfigsResponse::ConfigEntry>]
586
+ def describe_configs(broker_id, configs = [])
587
+ @cluster.describe_configs(broker_id, configs)
588
+ end
589
+
590
+ # Alter broker configs
591
+ #
592
+ # @param broker_id [int] the id of the broker
593
+ # @param configs [Array] array of config strings.
594
+ # @return [nil]
595
+ def alter_configs(broker_id, configs = [])
596
+ @cluster.alter_configs(broker_id, configs)
597
+ end
598
+
599
+ # Creates a topic in the cluster.
600
+ #
601
+ # @example Creating a topic with log compaction
602
+ # # Enable log compaction:
603
+ # config = { "cleanup.policy" => "compact" }
604
+ #
605
+ # # Create the topic:
606
+ # kafka.create_topic("dns-mappings", config: config)
607
+ #
608
+ # @param name [String] the name of the topic.
609
+ # @param num_partitions [Integer] the number of partitions that should be created
610
+ # in the topic.
611
+ # @param replication_factor [Integer] the replication factor of the topic.
612
+ # @param timeout [Integer] a duration of time to wait for the topic to be
613
+ # completely created.
614
+ # @param config [Hash] topic configuration entries. See
615
+ # [the Kafka documentation](https://kafka.apache.org/documentation/#topicconfigs)
616
+ # for more information.
617
+ # @raise [Kafka::TopicAlreadyExists] if the topic already exists.
618
+ # @return [nil]
619
+ def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {})
620
+ @cluster.create_topic(
621
+ name,
622
+ num_partitions: num_partitions,
623
+ replication_factor: replication_factor,
624
+ timeout: timeout,
625
+ config: config,
626
+ )
627
+ end
628
+
629
+ # Delete a topic in the cluster.
630
+ #
631
+ # @param name [String] the name of the topic.
632
+ # @param timeout [Integer] a duration of time to wait for the topic to be
633
+ # completely marked deleted.
634
+ # @return [nil]
635
+ def delete_topic(name, timeout: 30)
636
+ @cluster.delete_topic(name, timeout: timeout)
637
+ end
638
+
639
+ # Describe the configuration of a topic.
640
+ #
641
+ # Retrieves the topic configuration from the Kafka brokers. Configuration names
642
+ # refer to [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
643
+ #
644
+ # @note This is an alpha level API and is subject to change.
645
+ #
646
+ # @example Describing the cleanup policy config of a topic
647
+ # kafka = Kafka.new(["kafka1:9092"])
648
+ # kafka.describe_topic("my-topic", ["cleanup.policy"])
649
+ # #=> { "cleanup.policy" => "delete" }
650
+ #
651
+ # @param name [String] the name of the topic.
652
+ # @param configs [Array<String>] array of desired config names.
653
+ # @return [Hash<String, String>]
654
+ def describe_topic(name, configs = [])
655
+ @cluster.describe_topic(name, configs)
656
+ end
657
+
658
+ # Alter the configuration of a topic.
659
+ #
660
+ # Configuration keys must match
661
+ # [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
662
+ #
663
+ # @note This is an alpha level API and is subject to change.
664
+ #
665
+ # @example Describing the cleanup policy config of a topic
666
+ # kafka = Kafka.new(["kafka1:9092"])
667
+ # kafka.alter_topic("my-topic", "cleanup.policy" => "delete", "max.message.byte" => "100000")
668
+ #
669
+ # @param name [String] the name of the topic.
670
+ # @param configs [Hash<String, String>] hash of desired config keys and values.
671
+ # @return [nil]
672
+ def alter_topic(name, configs = {})
673
+ @cluster.alter_topic(name, configs)
674
+ end
675
+
676
+ # Describe a consumer group
677
+ #
678
+ # @param group_id [String] the id of the consumer group
679
+ # @return [Kafka::Protocol::DescribeGroupsResponse::Group]
680
+ def describe_group(group_id)
681
+ @cluster.describe_group(group_id)
682
+ end
683
+
684
+ # Fetch all committed offsets for a consumer group
685
+ #
686
+ # @param group_id [String] the id of the consumer group
687
+ # @return [Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo>>]
688
+ def fetch_group_offsets(group_id)
689
+ @cluster.fetch_group_offsets(group_id)
690
+ end
691
+
692
+ # Create partitions for a topic.
693
+ #
694
+ # @param name [String] the name of the topic.
695
+ # @param num_partitions [Integer] the number of desired partitions for
696
+ # the topic
697
+ # @param timeout [Integer] a duration of time to wait for the new
698
+ # partitions to be added.
699
+ # @return [nil]
700
+ def create_partitions_for(name, num_partitions: 1, timeout: 30)
701
+ @cluster.create_partitions_for(name, num_partitions: num_partitions, timeout: timeout)
702
+ end
703
+
704
+ # Lists all topics in the cluster.
705
+ #
706
+ # @return [Array<String>] the list of topic names.
707
+ def topics
708
+ attempts = 0
709
+ begin
710
+ attempts += 1
711
+ @cluster.list_topics
712
+ rescue Kafka::ConnectionError
713
+ @cluster.mark_as_stale!
714
+ retry unless attempts > 1
715
+ raise
716
+ end
717
+ end
718
+
719
+ # Lists all consumer groups in the cluster
720
+ #
721
+ # @return [Array<String>] the list of group ids
722
+ def groups
723
+ @cluster.list_groups
724
+ end
725
+
726
+ def has_topic?(topic)
727
+ @cluster.clear_target_topics
728
+ @cluster.add_target_topics([topic])
729
+ @cluster.topics.include?(topic)
730
+ end
731
+
732
+ # Counts the number of partitions in a topic.
733
+ #
734
+ # @param topic [String]
735
+ # @return [Integer] the number of partitions in the topic.
736
+ def partitions_for(topic)
737
+ @cluster.partitions_for(topic).count
738
+ end
739
+
740
+ # Counts the number of replicas for a topic's partition
741
+ #
742
+ # @param topic [String]
743
+ # @return [Integer] the number of replica nodes for the topic's partition
744
+ def replica_count_for(topic)
745
+ @cluster.partitions_for(topic).first.replicas.count
746
+ end
747
+
748
+ # Retrieve the offset of the last message in a partition. If there are no
749
+ # messages in the partition -1 is returned.
750
+ #
751
+ # @param topic [String]
752
+ # @param partition [Integer]
753
+ # @return [Integer] the offset of the last message in the partition, or -1 if
754
+ # there are no messages in the partition.
755
+ def last_offset_for(topic, partition)
756
+ # The offset resolution API will return the offset of the "next" message to
757
+ # be written when resolving the "latest" offset, so we subtract one.
758
+ @cluster.resolve_offset(topic, partition, :latest) - 1
759
+ end
760
+
761
+ # Retrieve the offset of the last message in each partition of the specified topics.
762
+ #
763
+ # @param topics [Array<String>] topic names.
764
+ # @return [Hash<String, Hash<Integer, Integer>>]
765
+ # @example
766
+ # last_offsets_for('topic-1', 'topic-2') # =>
767
+ # # {
768
+ # # 'topic-1' => { 0 => 100, 1 => 100 },
769
+ # # 'topic-2' => { 0 => 100, 1 => 100 }
770
+ # # }
771
+ def last_offsets_for(*topics)
772
+ @cluster.add_target_topics(topics)
773
+ topics.map {|topic|
774
+ partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
775
+ partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
776
+ [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
777
+ }.to_h
778
+ end
779
+
780
+ # Check whether current cluster supports a specific version or not
781
+ #
782
+ # @param api_key [Integer] API key.
783
+ # @param version [Integer] API version.
784
+ # @return [Boolean]
785
+ def supports_api?(api_key, version = nil)
786
+ @cluster.supports_api?(api_key, version)
787
+ end
788
+
789
+ def apis
790
+ @cluster.apis
791
+ end
792
+
793
+ # List all brokers in the cluster.
794
+ #
795
+ # @return [Array<Kafka::BrokerInfo>] the list of brokers.
796
+ def brokers
797
+ @cluster.cluster_info.brokers
798
+ end
799
+
800
+ # The current controller broker in the cluster.
801
+ #
802
+ # @return [Kafka::BrokerInfo] information on the controller broker.
803
+ def controller_broker
804
+ brokers.find {|broker| broker.node_id == @cluster.cluster_info.controller_id }
805
+ end
806
+
807
+ # Closes all connections to the Kafka brokers and frees up used resources.
808
+ #
809
+ # @return [nil]
810
+ def close
811
+ @cluster.disconnect
812
+ end
813
+
814
+ private
815
+
816
+ def initialize_cluster
817
+ broker_pool = BrokerPool.new(
818
+ connection_builder: @connection_builder,
819
+ logger: @logger,
820
+ )
821
+
822
+ Cluster.new(
823
+ seed_brokers: @seed_brokers,
824
+ broker_pool: broker_pool,
825
+ logger: @logger,
826
+ resolve_seed_brokers: @resolve_seed_brokers,
827
+ )
828
+ end
829
+
830
+ def normalize_seed_brokers(seed_brokers)
831
+ if seed_brokers.is_a?(String)
832
+ seed_brokers = seed_brokers.split(",")
833
+ end
834
+
835
+ seed_brokers.map {|str| BrokerUri.parse(str) }
836
+ end
837
+ end
838
+ end