ruby-kafka-aws-iam 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,838 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "kafka/ssl_context"
5
+ require "kafka/cluster"
6
+ require "kafka/transaction_manager"
7
+ require "kafka/broker_info"
8
+ require "kafka/producer"
9
+ require "kafka/consumer"
10
+ require "kafka/heartbeat"
11
+ require "kafka/broker_uri"
12
+ require "kafka/async_producer"
13
+ require "kafka/fetched_message"
14
+ require "kafka/fetch_operation"
15
+ require "kafka/connection_builder"
16
+ require "kafka/instrumenter"
17
+ require "kafka/sasl_authenticator"
18
+ require "kafka/tagged_logger"
19
+
20
+ module Kafka
21
+ class Client
22
+ # Initializes a new Kafka client.
23
+ #
24
+ # @param seed_brokers [Array<String>, String] the list of brokers used to initialize
25
+ # the client. Either an Array of connections, or a comma separated string of connections.
26
+ # A connection can either be a string of "host:port" or a full URI with a scheme.
27
+ # If there's a scheme it's ignored and only host/port are used.
28
+ #
29
+ # @param client_id [String] the identifier for this application.
30
+ #
31
+ # @param logger [Logger] the logger that should be used by the client.
32
+ #
33
+ # @param connect_timeout [Integer, nil] the timeout setting for connecting
34
+ # to brokers. See {BrokerPool#initialize}.
35
+ #
36
+ # @param socket_timeout [Integer, nil] the timeout setting for socket
37
+ # connections. See {BrokerPool#initialize}.
38
+ #
39
+ # @param ssl_ca_cert [String, Array<String>, nil] a PEM encoded CA cert, or an Array of
40
+ # PEM encoded CA certs, to use with an SSL connection.
41
+ #
42
+ # @param ssl_ca_cert_file_path [String, Array<String>, nil] a path on the filesystem, or an
43
+ # Array of paths, to PEM encoded CA cert(s) to use with an SSL connection.
44
+ #
45
+ # @param ssl_client_cert [String, nil] a PEM encoded client cert to use with an
46
+ # SSL connection. Must be used in combination with ssl_client_cert_key.
47
+ #
48
+ # @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
49
+ # SSL connection. Must be used in combination with ssl_client_cert.
50
+ #
51
+ # @param ssl_client_cert_key_password [String, nil] the password required to read the
52
+ # ssl_client_cert_key. Must be used in combination with ssl_client_cert_key.
53
+ #
54
+ # @param sasl_gssapi_principal [String, nil] a KRB5 principal
55
+ #
56
+ # @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
57
+ #
58
+ # @param sasl_scram_username [String, nil] SCRAM username
59
+ #
60
+ # @param sasl_scram_password [String, nil] SCRAM password
61
+ #
62
+ # @param sasl_scram_mechanism [String, nil] Scram mechanism, either "sha256" or "sha512"
63
+ #
64
+ # @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
65
+ #
66
+ # @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
67
+ # system's default certificate store.
68
+ #
69
+ # @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
70
+ #
71
+ # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
72
+ # implements method token. See {Sasl::OAuth#initialize}
73
+ #
74
+ # @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
75
+ # the SSL certificate and the signing chain of the certificate have the correct domains
76
+ # based on the CA certificate
77
+ #
78
+ # @param resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed brokers.
79
+ # If a broker is resolved to multiple IP addresses, the client tries to connect to each
80
+ # of the addresses until it can connect.
81
+ #
82
+ # @return [Client]
83
+ def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
84
+ ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
85
+ ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
86
+ sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
87
+ sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
88
+ sasl_aws_msk_iam_access_key_id: nil,
89
+ sasl_aws_msk_iam_secret_key_id: nil, sasl_aws_msk_iam_aws_region: nil,
90
+ sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true,
91
+ resolve_seed_brokers: false)
92
+ @logger = TaggedLogger.new(logger)
93
+ @instrumenter = Instrumenter.new(client_id: client_id)
94
+ @seed_brokers = normalize_seed_brokers(seed_brokers)
95
+ @resolve_seed_brokers = resolve_seed_brokers
96
+
97
+ ssl_context = SslContext.build(
98
+ ca_cert_file_path: ssl_ca_cert_file_path,
99
+ ca_cert: ssl_ca_cert,
100
+ client_cert: ssl_client_cert,
101
+ client_cert_key: ssl_client_cert_key,
102
+ client_cert_key_password: ssl_client_cert_key_password,
103
+ client_cert_chain: ssl_client_cert_chain,
104
+ ca_certs_from_system: ssl_ca_certs_from_system,
105
+ verify_hostname: ssl_verify_hostname
106
+ )
107
+
108
+ sasl_authenticator = SaslAuthenticator.new(
109
+ sasl_gssapi_principal: sasl_gssapi_principal,
110
+ sasl_gssapi_keytab: sasl_gssapi_keytab,
111
+ sasl_plain_authzid: sasl_plain_authzid,
112
+ sasl_plain_username: sasl_plain_username,
113
+ sasl_plain_password: sasl_plain_password,
114
+ sasl_scram_username: sasl_scram_username,
115
+ sasl_scram_password: sasl_scram_password,
116
+ sasl_scram_mechanism: sasl_scram_mechanism,
117
+ sasl_aws_msk_iam_access_key_id: sasl_aws_msk_iam_access_key_id,
118
+ sasl_aws_msk_iam_secret_key_id: sasl_aws_msk_iam_secret_key_id,
119
+ sasl_aws_msk_iam_aws_region: sasl_aws_msk_iam_aws_region,
120
+ sasl_oauth_token_provider: sasl_oauth_token_provider,
121
+ logger: @logger
122
+ )
123
+
124
+ if sasl_authenticator.enabled? && sasl_over_ssl && ssl_context.nil?
125
+ raise ArgumentError, "SASL authentication requires that SSL is configured"
126
+ end
127
+
128
+ @connection_builder = ConnectionBuilder.new(
129
+ client_id: client_id,
130
+ connect_timeout: connect_timeout,
131
+ socket_timeout: socket_timeout,
132
+ ssl_context: ssl_context,
133
+ logger: @logger,
134
+ instrumenter: @instrumenter,
135
+ sasl_authenticator: sasl_authenticator
136
+ )
137
+
138
+ @cluster = initialize_cluster
139
+ @partitioner = partitioner || Partitioner.new
140
+ end
141
+
142
+ # Delivers a single message to the Kafka cluster.
143
+ #
144
+ # **Note:** Only use this API for low-throughput scenarios. If you want to deliver
145
+ # many messages at a high rate, or if you want to configure the way messages are
146
+ # sent, use the {#producer} or {#async_producer} APIs instead.
147
+ #
148
+ # @param value [String, nil] the message value.
149
+ # @param key [String, nil] the message key.
150
+ # @param headers [Hash<String, String>] the headers for the message.
151
+ # @param topic [String] the topic that the message should be written to.
152
+ # @param partition [Integer, nil] the partition that the message should be written
153
+ # to, or `nil` if either `partition_key` is passed or the partition should be
154
+ # chosen at random.
155
+ # @param partition_key [String] a value used to deterministically choose a
156
+ # partition to write to.
157
+ # @param retries [Integer] the number of times to retry the delivery before giving
158
+ # up.
159
+ # @return [nil]
160
+ def deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1)
161
+ create_time = Time.now
162
+
163
+ # We want to fail fast if `topic` isn't a String
164
+ topic = topic.to_str
165
+
166
+ message = PendingMessage.new(
167
+ value: value,
168
+ key: key,
169
+ headers: headers,
170
+ topic: topic,
171
+ partition: partition,
172
+ partition_key: partition_key,
173
+ create_time: create_time
174
+ )
175
+
176
+ if partition.nil?
177
+ partition_count = @cluster.partitions_for(topic).count
178
+ partition = @partitioner.call(partition_count, message)
179
+ end
180
+
181
+ buffer = MessageBuffer.new
182
+
183
+ buffer.write(
184
+ value: message.value,
185
+ key: message.key,
186
+ headers: message.headers,
187
+ topic: message.topic,
188
+ partition: partition,
189
+ create_time: message.create_time,
190
+ )
191
+
192
+ @cluster.add_target_topics([topic])
193
+
194
+ compressor = Compressor.new(
195
+ instrumenter: @instrumenter,
196
+ )
197
+
198
+ transaction_manager = TransactionManager.new(
199
+ cluster: @cluster,
200
+ logger: @logger,
201
+ idempotent: false,
202
+ transactional: false
203
+ )
204
+
205
+ operation = ProduceOperation.new(
206
+ cluster: @cluster,
207
+ transaction_manager: transaction_manager,
208
+ buffer: buffer,
209
+ required_acks: 1,
210
+ ack_timeout: 10,
211
+ compressor: compressor,
212
+ logger: @logger,
213
+ instrumenter: @instrumenter,
214
+ )
215
+
216
+ attempt = 1
217
+
218
+ begin
219
+ @cluster.refresh_metadata_if_necessary!
220
+
221
+ operation.execute
222
+
223
+ unless buffer.empty?
224
+ raise DeliveryFailed.new(nil, [message])
225
+ end
226
+ rescue Kafka::Error => e
227
+ @cluster.mark_as_stale!
228
+
229
+ if attempt >= (retries + 1)
230
+ raise
231
+ else
232
+ attempt += 1
233
+ @logger.warn "Error while delivering message, #{e.class}: #{e.message}; retrying after 1s..."
234
+
235
+ sleep 1
236
+
237
+ retry
238
+ end
239
+ end
240
+ end
241
+
242
+ # Initializes a new Kafka producer.
243
+ #
244
+ # @param ack_timeout [Integer] The number of seconds a broker can wait for
245
+ # replicas to acknowledge a write before responding with a timeout.
246
+ #
247
+ # @param required_acks [Integer, Symbol] The number of replicas that must acknowledge
248
+ # a write, or `:all` if all in-sync replicas must acknowledge.
249
+ #
250
+ # @param max_retries [Integer] the number of retries that should be attempted
251
+ # before giving up sending messages to the cluster. Does not include the
252
+ # original attempt.
253
+ #
254
+ # @param retry_backoff [Integer] the number of seconds to wait between retries.
255
+ #
256
+ # @param max_buffer_size [Integer] the number of messages allowed in the buffer
257
+ # before new writes will raise {BufferOverflow} exceptions.
258
+ #
259
+ # @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
260
+ # attempting to produce messages when the buffer reaches this size will
261
+ # result in {BufferOverflow} being raised.
262
+ #
263
+ # @param compression_codec [Symbol, nil] the name of the compression codec to
264
+ # use, or nil if no compression should be performed. Valid codecs: `:snappy`,
265
+ # `:gzip`, `:lz4`, `:zstd`
266
+ #
267
+ # @param compression_threshold [Integer] the number of messages that needs to
268
+ # be in a message set before it should be compressed. Note that message sets
269
+ # are per-partition rather than per-topic or per-producer.
270
+ #
271
+ # @param interceptors [Array<Object>] a list of producer interceptors the implement
272
+ # `call(Kafka::PendingMessage)`.
273
+ #
274
+ # @return [Kafka::Producer] the Kafka producer.
275
+ def producer(
276
+ compression_codec: nil,
277
+ compression_threshold: 1,
278
+ ack_timeout: 5,
279
+ required_acks: :all,
280
+ max_retries: 2,
281
+ retry_backoff: 1,
282
+ max_buffer_size: 1000,
283
+ max_buffer_bytesize: 10_000_000,
284
+ idempotent: false,
285
+ transactional: false,
286
+ transactional_id: nil,
287
+ transactional_timeout: 60,
288
+ interceptors: []
289
+ )
290
+ cluster = initialize_cluster
291
+ compressor = Compressor.new(
292
+ codec_name: compression_codec,
293
+ threshold: compression_threshold,
294
+ instrumenter: @instrumenter,
295
+ )
296
+
297
+ transaction_manager = TransactionManager.new(
298
+ cluster: cluster,
299
+ logger: @logger,
300
+ idempotent: idempotent,
301
+ transactional: transactional,
302
+ transactional_id: transactional_id,
303
+ transactional_timeout: transactional_timeout,
304
+ )
305
+
306
+ Producer.new(
307
+ cluster: cluster,
308
+ transaction_manager: transaction_manager,
309
+ logger: @logger,
310
+ instrumenter: @instrumenter,
311
+ compressor: compressor,
312
+ ack_timeout: ack_timeout,
313
+ required_acks: required_acks,
314
+ max_retries: max_retries,
315
+ retry_backoff: retry_backoff,
316
+ max_buffer_size: max_buffer_size,
317
+ max_buffer_bytesize: max_buffer_bytesize,
318
+ partitioner: @partitioner,
319
+ interceptors: interceptors
320
+ )
321
+ end
322
+
323
+ # Creates a new AsyncProducer instance.
324
+ #
325
+ # All parameters allowed by {#producer} can be passed. In addition to this,
326
+ # a few extra parameters can be passed when creating an async producer.
327
+ #
328
+ # @param max_queue_size [Integer] the maximum number of messages allowed in
329
+ # the queue.
330
+ # @param delivery_threshold [Integer] if greater than zero, the number of
331
+ # buffered messages that will automatically trigger a delivery.
332
+ # @param delivery_interval [Integer] if greater than zero, the number of
333
+ # seconds between automatic message deliveries.
334
+ #
335
+ # @see AsyncProducer
336
+ # @return [AsyncProducer]
337
+ def async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1, retry_backoff: 0, **options)
338
+ sync_producer = producer(**options)
339
+
340
+ AsyncProducer.new(
341
+ sync_producer: sync_producer,
342
+ delivery_interval: delivery_interval,
343
+ delivery_threshold: delivery_threshold,
344
+ max_queue_size: max_queue_size,
345
+ max_retries: max_retries,
346
+ retry_backoff: retry_backoff,
347
+ instrumenter: @instrumenter,
348
+ logger: @logger,
349
+ )
350
+ end
351
+
352
+ # Creates a new Kafka consumer.
353
+ #
354
+ # @param group_id [String] the id of the group that the consumer should join.
355
+ # @param session_timeout [Integer] the number of seconds after which, if a client
356
+ # hasn't contacted the Kafka cluster, it will be kicked out of the group.
357
+ # @param offset_commit_interval [Integer] the interval between offset commits,
358
+ # in seconds.
359
+ # @param offset_commit_threshold [Integer] the number of messages that can be
360
+ # processed before their offsets are committed. If zero, offset commits are
361
+ # not triggered by message processing.
362
+ # @param heartbeat_interval [Integer] the interval between heartbeats; must be less
363
+ # than the session window.
364
+ # @param offset_retention_time [Integer] the time period that committed
365
+ # offsets will be retained, in seconds. Defaults to the broker setting.
366
+ # @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
367
+ # are stored for further processing. Note, that each item in the queue represents a
368
+ # response from a single broker.
369
+ # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
370
+ # If it is 0, the topic list won't be refreshed (default)
371
+ # If it is n (n > 0), the topic list will be refreshed every n seconds
372
+ # @param interceptors [Array<Object>] a list of consumer interceptors that implement
373
+ # `call(Kafka::FetchedBatch)`.
374
+ # @param assignment_strategy [Object] a partition assignment strategy that
375
+ # implements `protocol_type()`, `user_data()`, and `assign(members:, partitions:)`
376
+ # @return [Consumer]
377
+ def consumer(
378
+ group_id:,
379
+ session_timeout: 30,
380
+ rebalance_timeout: 60,
381
+ offset_commit_interval: 10,
382
+ offset_commit_threshold: 0,
383
+ heartbeat_interval: 10,
384
+ offset_retention_time: nil,
385
+ fetcher_max_queue_size: 100,
386
+ refresh_topic_interval: 0,
387
+ interceptors: [],
388
+ assignment_strategy: nil
389
+ )
390
+ cluster = initialize_cluster
391
+
392
+ instrumenter = DecoratingInstrumenter.new(@instrumenter, {
393
+ group_id: group_id,
394
+ })
395
+
396
+ # The Kafka protocol expects the retention time to be in ms.
397
+ retention_time = (offset_retention_time && offset_retention_time * 1_000) || -1
398
+
399
+ group = ConsumerGroup.new(
400
+ cluster: cluster,
401
+ logger: @logger,
402
+ group_id: group_id,
403
+ session_timeout: session_timeout,
404
+ rebalance_timeout: rebalance_timeout,
405
+ retention_time: retention_time,
406
+ instrumenter: instrumenter,
407
+ assignment_strategy: assignment_strategy
408
+ )
409
+
410
+ fetcher = Fetcher.new(
411
+ cluster: initialize_cluster,
412
+ group: group,
413
+ logger: @logger,
414
+ instrumenter: instrumenter,
415
+ max_queue_size: fetcher_max_queue_size
416
+ )
417
+
418
+ offset_manager = OffsetManager.new(
419
+ cluster: cluster,
420
+ group: group,
421
+ fetcher: fetcher,
422
+ logger: @logger,
423
+ commit_interval: offset_commit_interval,
424
+ commit_threshold: offset_commit_threshold,
425
+ offset_retention_time: offset_retention_time
426
+ )
427
+
428
+ heartbeat = Heartbeat.new(
429
+ group: group,
430
+ interval: heartbeat_interval,
431
+ instrumenter: instrumenter
432
+ )
433
+
434
+ Consumer.new(
435
+ cluster: cluster,
436
+ logger: @logger,
437
+ instrumenter: instrumenter,
438
+ group: group,
439
+ offset_manager: offset_manager,
440
+ fetcher: fetcher,
441
+ session_timeout: session_timeout,
442
+ heartbeat: heartbeat,
443
+ refresh_topic_interval: refresh_topic_interval,
444
+ interceptors: interceptors
445
+ )
446
+ end
447
+
448
+ # Fetches a batch of messages from a single partition. Note that it's possible
449
+ # to get back empty batches.
450
+ #
451
+ # The starting point for the fetch can be configured with the `:offset` argument.
452
+ # If you pass a number, the fetch will start at that offset. However, there are
453
+ # two special Symbol values that can be passed instead:
454
+ #
455
+ # * `:earliest` — the first offset in the partition.
456
+ # * `:latest` — the next offset that will be written to, effectively making the
457
+ # call block until there is a new message in the partition.
458
+ #
459
+ # The Kafka protocol specifies the numeric values of these two options: -2 and -1,
460
+ # respectively. You can also pass in these numbers directly.
461
+ #
462
+ # ## Example
463
+ #
464
+ # When enumerating the messages in a partition, you typically fetch batches
465
+ # sequentially.
466
+ #
467
+ # offset = :earliest
468
+ #
469
+ # loop do
470
+ # messages = kafka.fetch_messages(
471
+ # topic: "my-topic",
472
+ # partition: 42,
473
+ # offset: offset,
474
+ # )
475
+ #
476
+ # messages.each do |message|
477
+ # puts message.offset, message.key, message.value
478
+ #
479
+ # # Set the next offset that should be read to be the subsequent
480
+ # # offset.
481
+ # offset = message.offset + 1
482
+ # end
483
+ # end
484
+ #
485
+ # See a working example in `examples/simple-consumer.rb`.
486
+ #
487
+ # @param topic [String] the topic that messages should be fetched from.
488
+ #
489
+ # @param partition [Integer] the partition that messages should be fetched from.
490
+ #
491
+ # @param offset [Integer, Symbol] the offset to start reading from. Default is
492
+ # the latest offset.
493
+ #
494
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
495
+ # the server responds, in seconds.
496
+ #
497
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
498
+ # zero, the broker will respond immediately, but the response may be empty.
499
+ # The default is 1 byte, which means that the broker will respond as soon as
500
+ # a message is written to the partition.
501
+ #
502
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
503
+ # response message set. Default is 1 MB. You need to set this higher if you
504
+ # expect messages to be larger than this.
505
+ #
506
+ # @return [Array<Kafka::FetchedMessage>] the messages returned from the broker.
507
+ def fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1)
508
+ operation = FetchOperation.new(
509
+ cluster: @cluster,
510
+ logger: @logger,
511
+ min_bytes: min_bytes,
512
+ max_bytes: max_bytes,
513
+ max_wait_time: max_wait_time,
514
+ )
515
+
516
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
517
+
518
+ attempt = 1
519
+
520
+ begin
521
+ operation.execute.flat_map {|batch| batch.messages }
522
+ rescue Kafka::Error => e
523
+ @cluster.mark_as_stale!
524
+
525
+ if attempt >= (retries + 1)
526
+ raise
527
+ else
528
+ attempt += 1
529
+ @logger.warn "Error while fetching messages, #{e.class}: #{e.message}; retrying..."
530
+ retry
531
+ end
532
+ end
533
+ end
534
+
535
+ # Enumerate all messages in a topic.
536
+ #
537
+ # @param topic [String] the topic to consume messages from.
538
+ #
539
+ # @param start_from_beginning [Boolean] whether to start from the beginning
540
+ # of the topic or just subscribe to new messages being produced.
541
+ #
542
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
543
+ # the server responds, in seconds.
544
+ #
545
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
546
+ # zero, the broker will respond immediately, but the response may be empty.
547
+ # The default is 1 byte, which means that the broker will respond as soon as
548
+ # a message is written to the partition.
549
+ #
550
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
551
+ # response message set. Default is 1 MB. You need to set this higher if you
552
+ # expect messages to be larger than this.
553
+ #
554
+ # @return [nil]
555
+ def each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block)
556
+ default_offset ||= start_from_beginning ? :earliest : :latest
557
+ offsets = Hash.new { default_offset }
558
+
559
+ loop do
560
+ operation = FetchOperation.new(
561
+ cluster: @cluster,
562
+ logger: @logger,
563
+ min_bytes: min_bytes,
564
+ max_wait_time: max_wait_time,
565
+ )
566
+
567
+ @cluster.partitions_for(topic).map(&:partition_id).each do |partition|
568
+ partition_offset = offsets[partition]
569
+ operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes)
570
+ end
571
+
572
+ batches = operation.execute
573
+
574
+ batches.each do |batch|
575
+ batch.messages.each(&block)
576
+ offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
577
+ end
578
+ end
579
+ end
580
+
581
+ # Describe broker configs
582
+ #
583
+ # @param broker_id [int] the id of the broker
584
+ # @param configs [Array] array of config keys.
585
+ # @return [Array<Kafka::Protocol::DescribeConfigsResponse::ConfigEntry>]
586
+ def describe_configs(broker_id, configs = [])
587
+ @cluster.describe_configs(broker_id, configs)
588
+ end
589
+
590
+ # Alter broker configs
591
+ #
592
+ # @param broker_id [int] the id of the broker
593
+ # @param configs [Array] array of config strings.
594
+ # @return [nil]
595
+ def alter_configs(broker_id, configs = [])
596
+ @cluster.alter_configs(broker_id, configs)
597
+ end
598
+
599
+ # Creates a topic in the cluster.
600
+ #
601
+ # @example Creating a topic with log compaction
602
+ # # Enable log compaction:
603
+ # config = { "cleanup.policy" => "compact" }
604
+ #
605
+ # # Create the topic:
606
+ # kafka.create_topic("dns-mappings", config: config)
607
+ #
608
+ # @param name [String] the name of the topic.
609
+ # @param num_partitions [Integer] the number of partitions that should be created
610
+ # in the topic.
611
+ # @param replication_factor [Integer] the replication factor of the topic.
612
+ # @param timeout [Integer] a duration of time to wait for the topic to be
613
+ # completely created.
614
+ # @param config [Hash] topic configuration entries. See
615
+ # [the Kafka documentation](https://kafka.apache.org/documentation/#topicconfigs)
616
+ # for more information.
617
+ # @raise [Kafka::TopicAlreadyExists] if the topic already exists.
618
+ # @return [nil]
619
+ def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {})
620
+ @cluster.create_topic(
621
+ name,
622
+ num_partitions: num_partitions,
623
+ replication_factor: replication_factor,
624
+ timeout: timeout,
625
+ config: config,
626
+ )
627
+ end
628
+
629
+ # Delete a topic in the cluster.
630
+ #
631
+ # @param name [String] the name of the topic.
632
+ # @param timeout [Integer] a duration of time to wait for the topic to be
633
+ # completely marked deleted.
634
+ # @return [nil]
635
+ def delete_topic(name, timeout: 30)
636
+ @cluster.delete_topic(name, timeout: timeout)
637
+ end
638
+
639
+ # Describe the configuration of a topic.
640
+ #
641
+ # Retrieves the topic configuration from the Kafka brokers. Configuration names
642
+ # refer to [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
643
+ #
644
+ # @note This is an alpha level API and is subject to change.
645
+ #
646
+ # @example Describing the cleanup policy config of a topic
647
+ # kafka = Kafka.new(["kafka1:9092"])
648
+ # kafka.describe_topic("my-topic", ["cleanup.policy"])
649
+ # #=> { "cleanup.policy" => "delete" }
650
+ #
651
+ # @param name [String] the name of the topic.
652
+ # @param configs [Array<String>] array of desired config names.
653
+ # @return [Hash<String, String>]
654
+ def describe_topic(name, configs = [])
655
+ @cluster.describe_topic(name, configs)
656
+ end
657
+
658
+ # Alter the configuration of a topic.
659
+ #
660
+ # Configuration keys must match
661
+ # [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
662
+ #
663
+ # @note This is an alpha level API and is subject to change.
664
+ #
665
+ # @example Describing the cleanup policy config of a topic
666
+ # kafka = Kafka.new(["kafka1:9092"])
667
+ # kafka.alter_topic("my-topic", "cleanup.policy" => "delete", "max.message.byte" => "100000")
668
+ #
669
+ # @param name [String] the name of the topic.
670
+ # @param configs [Hash<String, String>] hash of desired config keys and values.
671
+ # @return [nil]
672
+ def alter_topic(name, configs = {})
673
+ @cluster.alter_topic(name, configs)
674
+ end
675
+
676
+ # Describe a consumer group
677
+ #
678
+ # @param group_id [String] the id of the consumer group
679
+ # @return [Kafka::Protocol::DescribeGroupsResponse::Group]
680
+ def describe_group(group_id)
681
+ @cluster.describe_group(group_id)
682
+ end
683
+
684
+ # Fetch all committed offsets for a consumer group
685
+ #
686
+ # @param group_id [String] the id of the consumer group
687
+ # @return [Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo>>]
688
+ def fetch_group_offsets(group_id)
689
+ @cluster.fetch_group_offsets(group_id)
690
+ end
691
+
692
+ # Create partitions for a topic.
693
+ #
694
+ # @param name [String] the name of the topic.
695
+ # @param num_partitions [Integer] the number of desired partitions for
696
+ # the topic
697
+ # @param timeout [Integer] a duration of time to wait for the new
698
+ # partitions to be added.
699
+ # @return [nil]
700
+ def create_partitions_for(name, num_partitions: 1, timeout: 30)
701
+ @cluster.create_partitions_for(name, num_partitions: num_partitions, timeout: timeout)
702
+ end
703
+
704
+ # Lists all topics in the cluster.
705
+ #
706
+ # @return [Array<String>] the list of topic names.
707
+ def topics
708
+ attempts = 0
709
+ begin
710
+ attempts += 1
711
+ @cluster.list_topics
712
+ rescue Kafka::ConnectionError
713
+ @cluster.mark_as_stale!
714
+ retry unless attempts > 1
715
+ raise
716
+ end
717
+ end
718
+
719
+ # Lists all consumer groups in the cluster
720
+ #
721
+ # @return [Array<String>] the list of group ids
722
+ def groups
723
+ @cluster.list_groups
724
+ end
725
+
726
+ def has_topic?(topic)
727
+ @cluster.clear_target_topics
728
+ @cluster.add_target_topics([topic])
729
+ @cluster.topics.include?(topic)
730
+ end
731
+
732
+ # Counts the number of partitions in a topic.
733
+ #
734
+ # @param topic [String]
735
+ # @return [Integer] the number of partitions in the topic.
736
+ def partitions_for(topic)
737
+ @cluster.partitions_for(topic).count
738
+ end
739
+
740
+ # Counts the number of replicas for a topic's partition
741
+ #
742
+ # @param topic [String]
743
+ # @return [Integer] the number of replica nodes for the topic's partition
744
+ def replica_count_for(topic)
745
+ @cluster.partitions_for(topic).first.replicas.count
746
+ end
747
+
748
+ # Retrieve the offset of the last message in a partition. If there are no
749
+ # messages in the partition -1 is returned.
750
+ #
751
+ # @param topic [String]
752
+ # @param partition [Integer]
753
+ # @return [Integer] the offset of the last message in the partition, or -1 if
754
+ # there are no messages in the partition.
755
+ def last_offset_for(topic, partition)
756
+ # The offset resolution API will return the offset of the "next" message to
757
+ # be written when resolving the "latest" offset, so we subtract one.
758
+ @cluster.resolve_offset(topic, partition, :latest) - 1
759
+ end
760
+
761
+ # Retrieve the offset of the last message in each partition of the specified topics.
762
+ #
763
+ # @param topics [Array<String>] topic names.
764
+ # @return [Hash<String, Hash<Integer, Integer>>]
765
+ # @example
766
+ # last_offsets_for('topic-1', 'topic-2') # =>
767
+ # # {
768
+ # # 'topic-1' => { 0 => 100, 1 => 100 },
769
+ # # 'topic-2' => { 0 => 100, 1 => 100 }
770
+ # # }
771
+ def last_offsets_for(*topics)
772
+ @cluster.add_target_topics(topics)
773
+ topics.map {|topic|
774
+ partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
775
+ partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
776
+ [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
777
+ }.to_h
778
+ end
779
+
780
+ # Check whether current cluster supports a specific version or not
781
+ #
782
+ # @param api_key [Integer] API key.
783
+ # @param version [Integer] API version.
784
+ # @return [Boolean]
785
+ def supports_api?(api_key, version = nil)
786
+ @cluster.supports_api?(api_key, version)
787
+ end
788
+
789
+ def apis
790
+ @cluster.apis
791
+ end
792
+
793
+ # List all brokers in the cluster.
794
+ #
795
+ # @return [Array<Kafka::BrokerInfo>] the list of brokers.
796
+ def brokers
797
+ @cluster.cluster_info.brokers
798
+ end
799
+
800
+ # The current controller broker in the cluster.
801
+ #
802
+ # @return [Kafka::BrokerInfo] information on the controller broker.
803
+ def controller_broker
804
+ brokers.find {|broker| broker.node_id == @cluster.cluster_info.controller_id }
805
+ end
806
+
807
+ # Closes all connections to the Kafka brokers and frees up used resources.
808
+ #
809
+ # @return [nil]
810
+ def close
811
+ @cluster.disconnect
812
+ end
813
+
814
+ private
815
+
816
+ def initialize_cluster
817
+ broker_pool = BrokerPool.new(
818
+ connection_builder: @connection_builder,
819
+ logger: @logger,
820
+ )
821
+
822
+ Cluster.new(
823
+ seed_brokers: @seed_brokers,
824
+ broker_pool: broker_pool,
825
+ logger: @logger,
826
+ resolve_seed_brokers: @resolve_seed_brokers,
827
+ )
828
+ end
829
+
830
+ def normalize_seed_brokers(seed_brokers)
831
+ if seed_brokers.is_a?(String)
832
+ seed_brokers = seed_brokers.split(",")
833
+ end
834
+
835
+ seed_brokers.map {|str| BrokerUri.parse(str) }
836
+ end
837
+ end
838
+ end