ruby-kafka-temp-fork 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +310 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1342 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka.rb +373 -0
  28. data/lib/kafka/async_producer.rb +291 -0
  29. data/lib/kafka/broker.rb +217 -0
  30. data/lib/kafka/broker_info.rb +16 -0
  31. data/lib/kafka/broker_pool.rb +41 -0
  32. data/lib/kafka/broker_uri.rb +43 -0
  33. data/lib/kafka/client.rb +833 -0
  34. data/lib/kafka/cluster.rb +513 -0
  35. data/lib/kafka/compression.rb +45 -0
  36. data/lib/kafka/compressor.rb +86 -0
  37. data/lib/kafka/connection.rb +223 -0
  38. data/lib/kafka/connection_builder.rb +33 -0
  39. data/lib/kafka/consumer.rb +642 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/consumer_group/assignor.rb +63 -0
  42. data/lib/kafka/crc32_hash.rb +15 -0
  43. data/lib/kafka/datadog.rb +420 -0
  44. data/lib/kafka/digest.rb +22 -0
  45. data/lib/kafka/fetch_operation.rb +115 -0
  46. data/lib/kafka/fetched_batch.rb +58 -0
  47. data/lib/kafka/fetched_batch_generator.rb +120 -0
  48. data/lib/kafka/fetched_message.rb +48 -0
  49. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  50. data/lib/kafka/fetcher.rb +224 -0
  51. data/lib/kafka/gzip_codec.rb +34 -0
  52. data/lib/kafka/heartbeat.rb +25 -0
  53. data/lib/kafka/instrumenter.rb +38 -0
  54. data/lib/kafka/interceptors.rb +33 -0
  55. data/lib/kafka/lz4_codec.rb +27 -0
  56. data/lib/kafka/message_buffer.rb +87 -0
  57. data/lib/kafka/murmur2_hash.rb +17 -0
  58. data/lib/kafka/offset_manager.rb +259 -0
  59. data/lib/kafka/partitioner.rb +40 -0
  60. data/lib/kafka/pause.rb +92 -0
  61. data/lib/kafka/pending_message.rb +29 -0
  62. data/lib/kafka/pending_message_queue.rb +41 -0
  63. data/lib/kafka/produce_operation.rb +205 -0
  64. data/lib/kafka/producer.rb +528 -0
  65. data/lib/kafka/prometheus.rb +316 -0
  66. data/lib/kafka/protocol.rb +225 -0
  67. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  68. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  69. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  70. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  71. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  72. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  73. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  74. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  75. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  76. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  77. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  78. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  79. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  80. data/lib/kafka/protocol/decoder.rb +175 -0
  81. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  82. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  83. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  84. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  85. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  86. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  87. data/lib/kafka/protocol/encoder.rb +184 -0
  88. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  89. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  90. data/lib/kafka/protocol/fetch_request.rb +70 -0
  91. data/lib/kafka/protocol/fetch_response.rb +136 -0
  92. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  93. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  94. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  95. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  96. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  97. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  98. data/lib/kafka/protocol/join_group_request.rb +47 -0
  99. data/lib/kafka/protocol/join_group_response.rb +41 -0
  100. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  101. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  102. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  103. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  104. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  105. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  106. data/lib/kafka/protocol/member_assignment.rb +42 -0
  107. data/lib/kafka/protocol/message.rb +172 -0
  108. data/lib/kafka/protocol/message_set.rb +55 -0
  109. data/lib/kafka/protocol/metadata_request.rb +31 -0
  110. data/lib/kafka/protocol/metadata_response.rb +185 -0
  111. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  112. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  113. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  114. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  115. data/lib/kafka/protocol/produce_request.rb +94 -0
  116. data/lib/kafka/protocol/produce_response.rb +63 -0
  117. data/lib/kafka/protocol/record.rb +88 -0
  118. data/lib/kafka/protocol/record_batch.rb +223 -0
  119. data/lib/kafka/protocol/request_message.rb +26 -0
  120. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  121. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  122. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  123. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  124. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  125. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  126. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +61 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/ruby-kafka-temp-fork.rb +5 -0
  143. data/ruby-kafka-temp-fork.gemspec +54 -0
  144. metadata +520 -0
@@ -0,0 +1,16 @@
1
+ # Represents a broker in a Kafka cluster.
2
+ module Kafka
3
+ class BrokerInfo
4
+ attr_reader :node_id, :host, :port
5
+
6
+ def initialize(node_id:, host:, port:)
7
+ @node_id = node_id
8
+ @host = host
9
+ @port = port
10
+ end
11
+
12
+ def to_s
13
+ "#{host}:#{port} (node_id=#{node_id})"
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/broker"
4
+
5
+ module Kafka
6
+ class BrokerPool
7
+ def initialize(connection_builder:, logger:)
8
+ @logger = TaggedLogger.new(logger)
9
+ @connection_builder = connection_builder
10
+ @brokers = {}
11
+ end
12
+
13
+ def connect(host, port, node_id: nil)
14
+ if @brokers.key?(node_id)
15
+ broker = @brokers.fetch(node_id)
16
+ return broker if broker.address_match?(host, port)
17
+ broker.disconnect
18
+ @brokers[node_id] = nil
19
+ end
20
+
21
+ broker = Broker.new(
22
+ connection_builder: @connection_builder,
23
+ host: host,
24
+ port: port,
25
+ node_id: node_id,
26
+ logger: @logger,
27
+ )
28
+
29
+ @brokers[node_id] = broker unless node_id.nil?
30
+
31
+ broker
32
+ end
33
+
34
+ def close
35
+ @brokers.each do |id, broker|
36
+ @logger.info "Disconnecting broker #{id}"
37
+ broker.disconnect
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module Kafka
6
+ module BrokerUri
7
+ DEFAULT_PORT = 9092
8
+ URI_SCHEMES = ["kafka", "kafka+ssl", "plaintext", "ssl"]
9
+
10
+ # Parses a Kafka broker URI string.
11
+ #
12
+ # Examples of valid strings:
13
+ # * `kafka1.something`
14
+ # * `kafka1.something:1234`
15
+ # * `kafka://kafka1.something:1234`
16
+ # * `kafka+ssl://kafka1.something:1234`
17
+ # * `plaintext://kafka1.something:1234`
18
+ #
19
+ # @param str [String] a Kafka broker URI string.
20
+ # @return [URI]
21
+ def self.parse(str)
22
+ # Make sure there's a scheme part if it's missing.
23
+ str = "kafka://" + str unless str.include?("://")
24
+
25
+ uri = URI.parse(str)
26
+ uri.port ||= DEFAULT_PORT
27
+
28
+ # Map some schemes to others.
29
+ case uri.scheme
30
+ when 'plaintext'
31
+ uri.scheme = 'kafka'
32
+ when 'ssl'
33
+ uri.scheme = 'kafka+ssl'
34
+ end
35
+
36
+ unless URI_SCHEMES.include?(uri.scheme)
37
+ raise Kafka::Error, "invalid protocol `#{uri.scheme}` in `#{str}`"
38
+ end
39
+
40
+ uri
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,833 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "kafka/ssl_context"
5
+ require "kafka/cluster"
6
+ require "kafka/transaction_manager"
7
+ require "kafka/broker_info"
8
+ require "kafka/producer"
9
+ require "kafka/consumer"
10
+ require "kafka/heartbeat"
11
+ require "kafka/broker_uri"
12
+ require "kafka/async_producer"
13
+ require "kafka/fetched_message"
14
+ require "kafka/fetch_operation"
15
+ require "kafka/connection_builder"
16
+ require "kafka/instrumenter"
17
+ require "kafka/sasl_authenticator"
18
+ require "kafka/tagged_logger"
19
+
20
+ module Kafka
21
+ class Client
22
+ # Initializes a new Kafka client.
23
+ #
24
+ # @param seed_brokers [Array<String>, String] the list of brokers used to initialize
25
+ # the client. Either an Array of connections, or a comma separated string of connections.
26
+ # A connection can either be a string of "host:port" or a full URI with a scheme.
27
+ # If there's a scheme it's ignored and only host/port are used.
28
+ #
29
+ # @param client_id [String] the identifier for this application.
30
+ #
31
+ # @param logger [Logger] the logger that should be used by the client.
32
+ #
33
+ # @param connect_timeout [Integer, nil] the timeout setting for connecting
34
+ # to brokers. See {BrokerPool#initialize}.
35
+ #
36
+ # @param socket_timeout [Integer, nil] the timeout setting for socket
37
+ # connections. See {BrokerPool#initialize}.
38
+ #
39
+ # @param ssl_ca_cert [String, Array<String>, nil] a PEM encoded CA cert, or an Array of
40
+ # PEM encoded CA certs, to use with an SSL connection.
41
+ #
42
+ # @param ssl_ca_cert_file_path [String, Array<String>, nil] a path on the filesystem, or an
43
+ # Array of paths, to PEM encoded CA cert(s) to use with an SSL connection.
44
+ #
45
+ # @param ssl_client_cert [String, nil] a PEM encoded client cert to use with an
46
+ # SSL connection. Must be used in combination with ssl_client_cert_key.
47
+ #
48
+ # @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
49
+ # SSL connection. Must be used in combination with ssl_client_cert.
50
+ #
51
+ # @param ssl_client_cert_key_password [String, nil] the password required to read the
52
+ # ssl_client_cert_key. Must be used in combination with ssl_client_cert_key.
53
+ #
54
+ # @param sasl_gssapi_principal [String, nil] a KRB5 principal
55
+ #
56
+ # @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
57
+ #
58
+ # @param sasl_scram_username [String, nil] SCRAM username
59
+ #
60
+ # @param sasl_scram_password [String, nil] SCRAM password
61
+ #
62
+ # @param sasl_scram_mechanism [String, nil] Scram mechanism, either "sha256" or "sha512"
63
+ #
64
+ # @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
65
+ #
66
+ # @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
67
+ # system's default certificate store.
68
+ #
69
+ # @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
70
+ #
71
+ # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
72
+ # implements method token. See {Sasl::OAuth#initialize}
73
+ #
74
+ # @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
75
+ # the SSL certificate and the signing chain of the certificate have the correct domains
76
+ # based on the CA certificate
77
+ #
78
+ # @param resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed brokers.
79
+ # If a broker is resolved to multiple IP addresses, the client tries to connect to each
80
+ # of the addresses until it can connect.
81
+ #
82
+ # @return [Client]
83
+ def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
84
+ ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
85
+ ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
86
+ sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
87
+ sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
88
+ sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true,
89
+ resolve_seed_brokers: false)
90
+ @logger = TaggedLogger.new(logger)
91
+ @instrumenter = Instrumenter.new(client_id: client_id)
92
+ @seed_brokers = normalize_seed_brokers(seed_brokers)
93
+ @resolve_seed_brokers = resolve_seed_brokers
94
+
95
+ ssl_context = SslContext.build(
96
+ ca_cert_file_path: ssl_ca_cert_file_path,
97
+ ca_cert: ssl_ca_cert,
98
+ client_cert: ssl_client_cert,
99
+ client_cert_key: ssl_client_cert_key,
100
+ client_cert_key_password: ssl_client_cert_key_password,
101
+ client_cert_chain: ssl_client_cert_chain,
102
+ ca_certs_from_system: ssl_ca_certs_from_system,
103
+ verify_hostname: ssl_verify_hostname
104
+ )
105
+
106
+ sasl_authenticator = SaslAuthenticator.new(
107
+ sasl_gssapi_principal: sasl_gssapi_principal,
108
+ sasl_gssapi_keytab: sasl_gssapi_keytab,
109
+ sasl_plain_authzid: sasl_plain_authzid,
110
+ sasl_plain_username: sasl_plain_username,
111
+ sasl_plain_password: sasl_plain_password,
112
+ sasl_scram_username: sasl_scram_username,
113
+ sasl_scram_password: sasl_scram_password,
114
+ sasl_scram_mechanism: sasl_scram_mechanism,
115
+ sasl_oauth_token_provider: sasl_oauth_token_provider,
116
+ logger: @logger
117
+ )
118
+
119
+ if sasl_authenticator.enabled? && sasl_over_ssl && ssl_context.nil?
120
+ raise ArgumentError, "SASL authentication requires that SSL is configured"
121
+ end
122
+
123
+ @connection_builder = ConnectionBuilder.new(
124
+ client_id: client_id,
125
+ connect_timeout: connect_timeout,
126
+ socket_timeout: socket_timeout,
127
+ ssl_context: ssl_context,
128
+ logger: @logger,
129
+ instrumenter: @instrumenter,
130
+ sasl_authenticator: sasl_authenticator
131
+ )
132
+
133
+ @cluster = initialize_cluster
134
+ @partitioner = partitioner || Partitioner.new
135
+ end
136
+
137
+ # Delivers a single message to the Kafka cluster.
138
+ #
139
+ # **Note:** Only use this API for low-throughput scenarios. If you want to deliver
140
+ # many messages at a high rate, or if you want to configure the way messages are
141
+ # sent, use the {#producer} or {#async_producer} APIs instead.
142
+ #
143
+ # @param value [String, nil] the message value.
144
+ # @param key [String, nil] the message key.
145
+ # @param headers [Hash<String, String>] the headers for the message.
146
+ # @param topic [String] the topic that the message should be written to.
147
+ # @param partition [Integer, nil] the partition that the message should be written
148
+ # to, or `nil` if either `partition_key` is passed or the partition should be
149
+ # chosen at random.
150
+ # @param partition_key [String] a value used to deterministically choose a
151
+ # partition to write to.
152
+ # @param retries [Integer] the number of times to retry the delivery before giving
153
+ # up.
154
+ # @return [nil]
155
+ def deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1)
156
+ create_time = Time.now
157
+
158
+ # We want to fail fast if `topic` isn't a String
159
+ topic = topic.to_str
160
+
161
+ message = PendingMessage.new(
162
+ value: value,
163
+ key: key,
164
+ headers: headers,
165
+ topic: topic,
166
+ partition: partition,
167
+ partition_key: partition_key,
168
+ create_time: create_time
169
+ )
170
+
171
+ if partition.nil?
172
+ partition_count = @cluster.partitions_for(topic).count
173
+ partition = @partitioner.call(partition_count, message)
174
+ end
175
+
176
+ buffer = MessageBuffer.new
177
+
178
+ buffer.write(
179
+ value: message.value,
180
+ key: message.key,
181
+ headers: message.headers,
182
+ topic: message.topic,
183
+ partition: partition,
184
+ create_time: message.create_time,
185
+ )
186
+
187
+ @cluster.add_target_topics([topic])
188
+
189
+ compressor = Compressor.new(
190
+ instrumenter: @instrumenter,
191
+ )
192
+
193
+ transaction_manager = TransactionManager.new(
194
+ cluster: @cluster,
195
+ logger: @logger,
196
+ idempotent: false,
197
+ transactional: false
198
+ )
199
+
200
+ operation = ProduceOperation.new(
201
+ cluster: @cluster,
202
+ transaction_manager: transaction_manager,
203
+ buffer: buffer,
204
+ required_acks: 1,
205
+ ack_timeout: 10,
206
+ compressor: compressor,
207
+ logger: @logger,
208
+ instrumenter: @instrumenter,
209
+ )
210
+
211
+ attempt = 1
212
+
213
+ begin
214
+ @cluster.refresh_metadata_if_necessary!
215
+
216
+ operation.execute
217
+
218
+ unless buffer.empty?
219
+ raise DeliveryFailed.new(nil, [message])
220
+ end
221
+ rescue Kafka::Error => e
222
+ @cluster.mark_as_stale!
223
+
224
+ if attempt >= (retries + 1)
225
+ raise
226
+ else
227
+ attempt += 1
228
+ @logger.warn "Error while delivering message, #{e.class}: #{e.message}; retrying after 1s..."
229
+
230
+ sleep 1
231
+
232
+ retry
233
+ end
234
+ end
235
+ end
236
+
237
+ # Initializes a new Kafka producer.
238
+ #
239
+ # @param ack_timeout [Integer] The number of seconds a broker can wait for
240
+ # replicas to acknowledge a write before responding with a timeout.
241
+ #
242
+ # @param required_acks [Integer, Symbol] The number of replicas that must acknowledge
243
+ # a write, or `:all` if all in-sync replicas must acknowledge.
244
+ #
245
+ # @param max_retries [Integer] the number of retries that should be attempted
246
+ # before giving up sending messages to the cluster. Does not include the
247
+ # original attempt.
248
+ #
249
+ # @param retry_backoff [Integer] the number of seconds to wait between retries.
250
+ #
251
+ # @param max_buffer_size [Integer] the number of messages allowed in the buffer
252
+ # before new writes will raise {BufferOverflow} exceptions.
253
+ #
254
+ # @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
255
+ # attempting to produce messages when the buffer reaches this size will
256
+ # result in {BufferOverflow} being raised.
257
+ #
258
+ # @param compression_codec [Symbol, nil] the name of the compression codec to
259
+ # use, or nil if no compression should be performed. Valid codecs: `:snappy`,
260
+ # `:gzip`, `:lz4`, `:zstd`
261
+ #
262
+ # @param compression_threshold [Integer] the number of messages that needs to
263
+ # be in a message set before it should be compressed. Note that message sets
264
+ # are per-partition rather than per-topic or per-producer.
265
+ #
266
+ # @param interceptors [Array<Object>] a list of producer interceptors the implement
267
+ # `call(Kafka::PendingMessage)`.
268
+ #
269
+ # @return [Kafka::Producer] the Kafka producer.
270
+ def producer(
271
+ compression_codec: nil,
272
+ compression_threshold: 1,
273
+ ack_timeout: 5,
274
+ required_acks: :all,
275
+ max_retries: 2,
276
+ retry_backoff: 1,
277
+ max_buffer_size: 1000,
278
+ max_buffer_bytesize: 10_000_000,
279
+ idempotent: false,
280
+ transactional: false,
281
+ transactional_id: nil,
282
+ transactional_timeout: 60,
283
+ interceptors: []
284
+ )
285
+ cluster = initialize_cluster
286
+ compressor = Compressor.new(
287
+ codec_name: compression_codec,
288
+ threshold: compression_threshold,
289
+ instrumenter: @instrumenter,
290
+ )
291
+
292
+ transaction_manager = TransactionManager.new(
293
+ cluster: cluster,
294
+ logger: @logger,
295
+ idempotent: idempotent,
296
+ transactional: transactional,
297
+ transactional_id: transactional_id,
298
+ transactional_timeout: transactional_timeout,
299
+ )
300
+
301
+ Producer.new(
302
+ cluster: cluster,
303
+ transaction_manager: transaction_manager,
304
+ logger: @logger,
305
+ instrumenter: @instrumenter,
306
+ compressor: compressor,
307
+ ack_timeout: ack_timeout,
308
+ required_acks: required_acks,
309
+ max_retries: max_retries,
310
+ retry_backoff: retry_backoff,
311
+ max_buffer_size: max_buffer_size,
312
+ max_buffer_bytesize: max_buffer_bytesize,
313
+ partitioner: @partitioner,
314
+ interceptors: interceptors
315
+ )
316
+ end
317
+
318
+ # Creates a new AsyncProducer instance.
319
+ #
320
+ # All parameters allowed by {#producer} can be passed. In addition to this,
321
+ # a few extra parameters can be passed when creating an async producer.
322
+ #
323
+ # @param max_queue_size [Integer] the maximum number of messages allowed in
324
+ # the queue.
325
+ # @param delivery_threshold [Integer] if greater than zero, the number of
326
+ # buffered messages that will automatically trigger a delivery.
327
+ # @param delivery_interval [Integer] if greater than zero, the number of
328
+ # seconds between automatic message deliveries.
329
+ #
330
+ # @see AsyncProducer
331
+ # @return [AsyncProducer]
332
+ def async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1, retry_backoff: 0, **options)
333
+ sync_producer = producer(**options)
334
+
335
+ AsyncProducer.new(
336
+ sync_producer: sync_producer,
337
+ delivery_interval: delivery_interval,
338
+ delivery_threshold: delivery_threshold,
339
+ max_queue_size: max_queue_size,
340
+ max_retries: max_retries,
341
+ retry_backoff: retry_backoff,
342
+ instrumenter: @instrumenter,
343
+ logger: @logger,
344
+ )
345
+ end
346
+
347
+ # Creates a new Kafka consumer.
348
+ #
349
+ # @param group_id [String] the id of the group that the consumer should join.
350
+ # @param session_timeout [Integer] the number of seconds after which, if a client
351
+ # hasn't contacted the Kafka cluster, it will be kicked out of the group.
352
+ # @param offset_commit_interval [Integer] the interval between offset commits,
353
+ # in seconds.
354
+ # @param offset_commit_threshold [Integer] the number of messages that can be
355
+ # processed before their offsets are committed. If zero, offset commits are
356
+ # not triggered by message processing.
357
+ # @param heartbeat_interval [Integer] the interval between heartbeats; must be less
358
+ # than the session window.
359
+ # @param offset_retention_time [Integer] the time period that committed
360
+ # offsets will be retained, in seconds. Defaults to the broker setting.
361
+ # @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
362
+ # are stored for further processing. Note, that each item in the queue represents a
363
+ # response from a single broker.
364
+ # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
365
+ # If it is 0, the topic list won't be refreshed (default)
366
+ # If it is n (n > 0), the topic list will be refreshed every n seconds
367
+ # @param interceptors [Array<Object>] a list of consumer interceptors that implement
368
+ # `call(Kafka::FetchedBatch)`.
369
+ # @param assignment_strategy [Object] a partition assignment strategy that
370
+ # implements `protocol_type()`, `user_data()`, and `assign(members:, partitions:)`
371
+ # @return [Consumer]
372
+ def consumer(
373
+ group_id:,
374
+ session_timeout: 30,
375
+ rebalance_timeout: 60,
376
+ offset_commit_interval: 10,
377
+ offset_commit_threshold: 0,
378
+ heartbeat_interval: 10,
379
+ offset_retention_time: nil,
380
+ fetcher_max_queue_size: 100,
381
+ refresh_topic_interval: 0,
382
+ interceptors: [],
383
+ assignment_strategy: nil
384
+ )
385
+ cluster = initialize_cluster
386
+
387
+ instrumenter = DecoratingInstrumenter.new(@instrumenter, {
388
+ group_id: group_id,
389
+ })
390
+
391
+ # The Kafka protocol expects the retention time to be in ms.
392
+ retention_time = (offset_retention_time && offset_retention_time * 1_000) || -1
393
+
394
+ group = ConsumerGroup.new(
395
+ cluster: cluster,
396
+ logger: @logger,
397
+ group_id: group_id,
398
+ session_timeout: session_timeout,
399
+ rebalance_timeout: rebalance_timeout,
400
+ retention_time: retention_time,
401
+ instrumenter: instrumenter,
402
+ assignment_strategy: assignment_strategy
403
+ )
404
+
405
+ fetcher = Fetcher.new(
406
+ cluster: initialize_cluster,
407
+ group: group,
408
+ logger: @logger,
409
+ instrumenter: instrumenter,
410
+ max_queue_size: fetcher_max_queue_size
411
+ )
412
+
413
+ offset_manager = OffsetManager.new(
414
+ cluster: cluster,
415
+ group: group,
416
+ fetcher: fetcher,
417
+ logger: @logger,
418
+ commit_interval: offset_commit_interval,
419
+ commit_threshold: offset_commit_threshold,
420
+ offset_retention_time: offset_retention_time
421
+ )
422
+
423
+ heartbeat = Heartbeat.new(
424
+ group: group,
425
+ interval: heartbeat_interval,
426
+ instrumenter: instrumenter
427
+ )
428
+
429
+ Consumer.new(
430
+ cluster: cluster,
431
+ logger: @logger,
432
+ instrumenter: instrumenter,
433
+ group: group,
434
+ offset_manager: offset_manager,
435
+ fetcher: fetcher,
436
+ session_timeout: session_timeout,
437
+ heartbeat: heartbeat,
438
+ refresh_topic_interval: refresh_topic_interval,
439
+ interceptors: interceptors
440
+ )
441
+ end
442
+
443
+ # Fetches a batch of messages from a single partition. Note that it's possible
444
+ # to get back empty batches.
445
+ #
446
+ # The starting point for the fetch can be configured with the `:offset` argument.
447
+ # If you pass a number, the fetch will start at that offset. However, there are
448
+ # two special Symbol values that can be passed instead:
449
+ #
450
+ # * `:earliest` — the first offset in the partition.
451
+ # * `:latest` — the next offset that will be written to, effectively making the
452
+ # call block until there is a new message in the partition.
453
+ #
454
+ # The Kafka protocol specifies the numeric values of these two options: -2 and -1,
455
+ # respectively. You can also pass in these numbers directly.
456
+ #
457
+ # ## Example
458
+ #
459
+ # When enumerating the messages in a partition, you typically fetch batches
460
+ # sequentially.
461
+ #
462
+ # offset = :earliest
463
+ #
464
+ # loop do
465
+ # messages = kafka.fetch_messages(
466
+ # topic: "my-topic",
467
+ # partition: 42,
468
+ # offset: offset,
469
+ # )
470
+ #
471
+ # messages.each do |message|
472
+ # puts message.offset, message.key, message.value
473
+ #
474
+ # # Set the next offset that should be read to be the subsequent
475
+ # # offset.
476
+ # offset = message.offset + 1
477
+ # end
478
+ # end
479
+ #
480
+ # See a working example in `examples/simple-consumer.rb`.
481
+ #
482
+ # @param topic [String] the topic that messages should be fetched from.
483
+ #
484
+ # @param partition [Integer] the partition that messages should be fetched from.
485
+ #
486
+ # @param offset [Integer, Symbol] the offset to start reading from. Default is
487
+ # the latest offset.
488
+ #
489
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
490
+ # the server responds, in seconds.
491
+ #
492
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
493
+ # zero, the broker will respond immediately, but the response may be empty.
494
+ # The default is 1 byte, which means that the broker will respond as soon as
495
+ # a message is written to the partition.
496
+ #
497
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
498
+ # response message set. Default is 1 MB. You need to set this higher if you
499
+ # expect messages to be larger than this.
500
+ #
501
+ # @return [Array<Kafka::FetchedMessage>] the messages returned from the broker.
502
+ def fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1)
503
+ operation = FetchOperation.new(
504
+ cluster: @cluster,
505
+ logger: @logger,
506
+ min_bytes: min_bytes,
507
+ max_bytes: max_bytes,
508
+ max_wait_time: max_wait_time,
509
+ )
510
+
511
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
512
+
513
+ attempt = 1
514
+
515
+ begin
516
+ operation.execute.flat_map {|batch| batch.messages }
517
+ rescue Kafka::Error => e
518
+ @cluster.mark_as_stale!
519
+
520
+ if attempt >= (retries + 1)
521
+ raise
522
+ else
523
+ attempt += 1
524
+ @logger.warn "Error while fetching messages, #{e.class}: #{e.message}; retrying..."
525
+ retry
526
+ end
527
+ end
528
+ end
529
+
530
+ # Enumerate all messages in a topic.
531
+ #
532
+ # @param topic [String] the topic to consume messages from.
533
+ #
534
+ # @param start_from_beginning [Boolean] whether to start from the beginning
535
+ # of the topic or just subscribe to new messages being produced.
536
+ #
537
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
538
+ # the server responds, in seconds.
539
+ #
540
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
541
+ # zero, the broker will respond immediately, but the response may be empty.
542
+ # The default is 1 byte, which means that the broker will respond as soon as
543
+ # a message is written to the partition.
544
+ #
545
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
546
+ # response message set. Default is 1 MB. You need to set this higher if you
547
+ # expect messages to be larger than this.
548
+ #
549
+ # @return [nil]
550
+ def each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block)
551
+ default_offset ||= start_from_beginning ? :earliest : :latest
552
+ offsets = Hash.new { default_offset }
553
+
554
+ loop do
555
+ operation = FetchOperation.new(
556
+ cluster: @cluster,
557
+ logger: @logger,
558
+ min_bytes: min_bytes,
559
+ max_wait_time: max_wait_time,
560
+ )
561
+
562
+ @cluster.partitions_for(topic).map(&:partition_id).each do |partition|
563
+ partition_offset = offsets[partition]
564
+ operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes)
565
+ end
566
+
567
+ batches = operation.execute
568
+
569
+ batches.each do |batch|
570
+ batch.messages.each(&block)
571
+ offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
572
+ end
573
+ end
574
+ end
575
+
576
+ # Describe broker configs
577
+ #
578
+ # @param broker_id [int] the id of the broker
579
+ # @param configs [Array] array of config keys.
580
+ # @return [Array<Kafka::Protocol::DescribeConfigsResponse::ConfigEntry>]
581
+ def describe_configs(broker_id, configs = [])
582
+ @cluster.describe_configs(broker_id, configs)
583
+ end
584
+
585
+ # Alter broker configs
586
+ #
587
+ # @param broker_id [int] the id of the broker
588
+ # @param configs [Array] array of config strings.
589
+ # @return [nil]
590
+ def alter_configs(broker_id, configs = [])
591
+ @cluster.alter_configs(broker_id, configs)
592
+ end
593
+
594
+ # Creates a topic in the cluster.
595
+ #
596
+ # @example Creating a topic with log compaction
597
+ # # Enable log compaction:
598
+ # config = { "cleanup.policy" => "compact" }
599
+ #
600
+ # # Create the topic:
601
+ # kafka.create_topic("dns-mappings", config: config)
602
+ #
603
+ # @param name [String] the name of the topic.
604
+ # @param num_partitions [Integer] the number of partitions that should be created
605
+ # in the topic.
606
+ # @param replication_factor [Integer] the replication factor of the topic.
607
+ # @param timeout [Integer] a duration of time to wait for the topic to be
608
+ # completely created.
609
+ # @param config [Hash] topic configuration entries. See
610
+ # [the Kafka documentation](https://kafka.apache.org/documentation/#topicconfigs)
611
+ # for more information.
612
+ # @raise [Kafka::TopicAlreadyExists] if the topic already exists.
613
+ # @return [nil]
614
+ def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {})
615
+ @cluster.create_topic(
616
+ name,
617
+ num_partitions: num_partitions,
618
+ replication_factor: replication_factor,
619
+ timeout: timeout,
620
+ config: config,
621
+ )
622
+ end
623
+
624
+ # Delete a topic in the cluster.
625
+ #
626
+ # @param name [String] the name of the topic.
627
+ # @param timeout [Integer] a duration of time to wait for the topic to be
628
+ # completely marked deleted.
629
+ # @return [nil]
630
+ def delete_topic(name, timeout: 30)
631
+ @cluster.delete_topic(name, timeout: timeout)
632
+ end
633
+
634
+ # Describe the configuration of a topic.
635
+ #
636
+ # Retrieves the topic configuration from the Kafka brokers. Configuration names
637
+ # refer to [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
638
+ #
639
+ # @note This is an alpha level API and is subject to change.
640
+ #
641
+ # @example Describing the cleanup policy config of a topic
642
+ # kafka = Kafka.new(["kafka1:9092"])
643
+ # kafka.describe_topic("my-topic", ["cleanup.policy"])
644
+ # #=> { "cleanup.policy" => "delete" }
645
+ #
646
+ # @param name [String] the name of the topic.
647
+ # @param configs [Array<String>] array of desired config names.
648
+ # @return [Hash<String, String>]
649
+ def describe_topic(name, configs = [])
650
+ @cluster.describe_topic(name, configs)
651
+ end
652
+
653
+ # Alter the configuration of a topic.
654
+ #
655
+ # Configuration keys must match
656
+ # [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
657
+ #
658
+ # @note This is an alpha level API and is subject to change.
659
+ #
660
+ # @example Describing the cleanup policy config of a topic
661
+ # kafka = Kafka.new(["kafka1:9092"])
662
+ # kafka.alter_topic("my-topic", "cleanup.policy" => "delete", "max.message.byte" => "100000")
663
+ #
664
+ # @param name [String] the name of the topic.
665
+ # @param configs [Hash<String, String>] hash of desired config keys and values.
666
+ # @return [nil]
667
+ def alter_topic(name, configs = {})
668
+ @cluster.alter_topic(name, configs)
669
+ end
670
+
671
+ # Describe a consumer group
672
+ #
673
+ # @param group_id [String] the id of the consumer group
674
+ # @return [Kafka::Protocol::DescribeGroupsResponse::Group]
675
+ def describe_group(group_id)
676
+ @cluster.describe_group(group_id)
677
+ end
678
+
679
+ # Fetch all committed offsets for a consumer group
680
+ #
681
+ # @param group_id [String] the id of the consumer group
682
+ # @return [Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo>>]
683
+ def fetch_group_offsets(group_id)
684
+ @cluster.fetch_group_offsets(group_id)
685
+ end
686
+
687
+ # Create partitions for a topic.
688
+ #
689
+ # @param name [String] the name of the topic.
690
+ # @param num_partitions [Integer] the number of desired partitions for
691
+ # the topic
692
+ # @param timeout [Integer] a duration of time to wait for the new
693
+ # partitions to be added.
694
+ # @return [nil]
695
+ def create_partitions_for(name, num_partitions: 1, timeout: 30)
696
+ @cluster.create_partitions_for(name, num_partitions: num_partitions, timeout: timeout)
697
+ end
698
+
699
+ # Lists all topics in the cluster.
700
+ #
701
+ # @return [Array<String>] the list of topic names.
702
+ def topics
703
+ attempts = 0
704
+ begin
705
+ attempts += 1
706
+ @cluster.list_topics
707
+ rescue Kafka::ConnectionError
708
+ @cluster.mark_as_stale!
709
+ retry unless attempts > 1
710
+ raise
711
+ end
712
+ end
713
+
714
+ # Lists all consumer groups in the cluster
715
+ #
716
+ # @return [Array<String>] the list of group ids
717
+ def groups
718
+ @cluster.list_groups
719
+ end
720
+
721
+ def has_topic?(topic)
722
+ @cluster.clear_target_topics
723
+ @cluster.add_target_topics([topic])
724
+ @cluster.topics.include?(topic)
725
+ end
726
+
727
+ # Counts the number of partitions in a topic.
728
+ #
729
+ # @param topic [String]
730
+ # @return [Integer] the number of partitions in the topic.
731
+ def partitions_for(topic)
732
+ @cluster.partitions_for(topic).count
733
+ end
734
+
735
+ # Counts the number of replicas for a topic's partition
736
+ #
737
+ # @param topic [String]
738
+ # @return [Integer] the number of replica nodes for the topic's partition
739
+ def replica_count_for(topic)
740
+ @cluster.partitions_for(topic).first.replicas.count
741
+ end
742
+
743
+ # Retrieve the offset of the last message in a partition. If there are no
744
+ # messages in the partition -1 is returned.
745
+ #
746
+ # @param topic [String]
747
+ # @param partition [Integer]
748
+ # @return [Integer] the offset of the last message in the partition, or -1 if
749
+ # there are no messages in the partition.
750
+ def last_offset_for(topic, partition)
751
+ # The offset resolution API will return the offset of the "next" message to
752
+ # be written when resolving the "latest" offset, so we subtract one.
753
+ @cluster.resolve_offset(topic, partition, :latest) - 1
754
+ end
755
+
756
+ # Retrieve the offset of the last message in each partition of the specified topics.
757
+ #
758
+ # @param topics [Array<String>] topic names.
759
+ # @return [Hash<String, Hash<Integer, Integer>>]
760
+ # @example
761
+ # last_offsets_for('topic-1', 'topic-2') # =>
762
+ # # {
763
+ # # 'topic-1' => { 0 => 100, 1 => 100 },
764
+ # # 'topic-2' => { 0 => 100, 1 => 100 }
765
+ # # }
766
+ def last_offsets_for(*topics)
767
+ @cluster.add_target_topics(topics)
768
+ topics.map {|topic|
769
+ partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
770
+ partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
771
+ [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
772
+ }.to_h
773
+ end
774
+
775
+ # Check whether current cluster supports a specific version or not
776
+ #
777
+ # @param api_key [Integer] API key.
778
+ # @param version [Integer] API version.
779
+ # @return [Boolean]
780
+ def supports_api?(api_key, version = nil)
781
+ @cluster.supports_api?(api_key, version)
782
+ end
783
+
784
+ def apis
785
+ @cluster.apis
786
+ end
787
+
788
+ # List all brokers in the cluster.
789
+ #
790
+ # @return [Array<Kafka::BrokerInfo>] the list of brokers.
791
+ def brokers
792
+ @cluster.cluster_info.brokers
793
+ end
794
+
795
+ # The current controller broker in the cluster.
796
+ #
797
+ # @return [Kafka::BrokerInfo] information on the controller broker.
798
+ def controller_broker
799
+ brokers.find {|broker| broker.node_id == @cluster.cluster_info.controller_id }
800
+ end
801
+
802
+ # Closes all connections to the Kafka brokers and frees up used resources.
803
+ #
804
+ # @return [nil]
805
+ def close
806
+ @cluster.disconnect
807
+ end
808
+
809
+ private
810
+
811
+ def initialize_cluster
812
+ broker_pool = BrokerPool.new(
813
+ connection_builder: @connection_builder,
814
+ logger: @logger,
815
+ )
816
+
817
+ Cluster.new(
818
+ seed_brokers: @seed_brokers,
819
+ broker_pool: broker_pool,
820
+ logger: @logger,
821
+ resolve_seed_brokers: @resolve_seed_brokers,
822
+ )
823
+ end
824
+
825
+ def normalize_seed_brokers(seed_brokers)
826
+ if seed_brokers.is_a?(String)
827
+ seed_brokers = seed_brokers.split(",")
828
+ end
829
+
830
+ seed_brokers.map {|str| BrokerUri.parse(str) }
831
+ end
832
+ end
833
+ end