ruby-kafka-temp-fork 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +310 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1342 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka.rb +373 -0
  28. data/lib/kafka/async_producer.rb +291 -0
  29. data/lib/kafka/broker.rb +217 -0
  30. data/lib/kafka/broker_info.rb +16 -0
  31. data/lib/kafka/broker_pool.rb +41 -0
  32. data/lib/kafka/broker_uri.rb +43 -0
  33. data/lib/kafka/client.rb +833 -0
  34. data/lib/kafka/cluster.rb +513 -0
  35. data/lib/kafka/compression.rb +45 -0
  36. data/lib/kafka/compressor.rb +86 -0
  37. data/lib/kafka/connection.rb +223 -0
  38. data/lib/kafka/connection_builder.rb +33 -0
  39. data/lib/kafka/consumer.rb +642 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/consumer_group/assignor.rb +63 -0
  42. data/lib/kafka/crc32_hash.rb +15 -0
  43. data/lib/kafka/datadog.rb +420 -0
  44. data/lib/kafka/digest.rb +22 -0
  45. data/lib/kafka/fetch_operation.rb +115 -0
  46. data/lib/kafka/fetched_batch.rb +58 -0
  47. data/lib/kafka/fetched_batch_generator.rb +120 -0
  48. data/lib/kafka/fetched_message.rb +48 -0
  49. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  50. data/lib/kafka/fetcher.rb +224 -0
  51. data/lib/kafka/gzip_codec.rb +34 -0
  52. data/lib/kafka/heartbeat.rb +25 -0
  53. data/lib/kafka/instrumenter.rb +38 -0
  54. data/lib/kafka/interceptors.rb +33 -0
  55. data/lib/kafka/lz4_codec.rb +27 -0
  56. data/lib/kafka/message_buffer.rb +87 -0
  57. data/lib/kafka/murmur2_hash.rb +17 -0
  58. data/lib/kafka/offset_manager.rb +259 -0
  59. data/lib/kafka/partitioner.rb +40 -0
  60. data/lib/kafka/pause.rb +92 -0
  61. data/lib/kafka/pending_message.rb +29 -0
  62. data/lib/kafka/pending_message_queue.rb +41 -0
  63. data/lib/kafka/produce_operation.rb +205 -0
  64. data/lib/kafka/producer.rb +528 -0
  65. data/lib/kafka/prometheus.rb +316 -0
  66. data/lib/kafka/protocol.rb +225 -0
  67. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  68. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  69. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  70. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  71. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  72. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  73. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  74. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  75. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  76. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  77. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  78. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  79. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  80. data/lib/kafka/protocol/decoder.rb +175 -0
  81. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  82. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  83. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  84. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  85. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  86. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  87. data/lib/kafka/protocol/encoder.rb +184 -0
  88. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  89. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  90. data/lib/kafka/protocol/fetch_request.rb +70 -0
  91. data/lib/kafka/protocol/fetch_response.rb +136 -0
  92. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  93. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  94. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  95. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  96. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  97. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  98. data/lib/kafka/protocol/join_group_request.rb +47 -0
  99. data/lib/kafka/protocol/join_group_response.rb +41 -0
  100. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  101. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  102. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  103. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  104. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  105. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  106. data/lib/kafka/protocol/member_assignment.rb +42 -0
  107. data/lib/kafka/protocol/message.rb +172 -0
  108. data/lib/kafka/protocol/message_set.rb +55 -0
  109. data/lib/kafka/protocol/metadata_request.rb +31 -0
  110. data/lib/kafka/protocol/metadata_response.rb +185 -0
  111. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  112. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  113. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  114. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  115. data/lib/kafka/protocol/produce_request.rb +94 -0
  116. data/lib/kafka/protocol/produce_response.rb +63 -0
  117. data/lib/kafka/protocol/record.rb +88 -0
  118. data/lib/kafka/protocol/record_batch.rb +223 -0
  119. data/lib/kafka/protocol/request_message.rb +26 -0
  120. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  121. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  122. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  123. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  124. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  125. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  126. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +61 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/ruby-kafka-temp-fork.rb +5 -0
  143. data/ruby-kafka-temp-fork.gemspec +54 -0
  144. metadata +520 -0
@@ -0,0 +1,16 @@
1
+ # Represents a broker in a Kafka cluster.
2
+ module Kafka
3
+ class BrokerInfo
4
+ attr_reader :node_id, :host, :port
5
+
6
+ def initialize(node_id:, host:, port:)
7
+ @node_id = node_id
8
+ @host = host
9
+ @port = port
10
+ end
11
+
12
+ def to_s
13
+ "#{host}:#{port} (node_id=#{node_id})"
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/broker"
4
+
5
+ module Kafka
6
+ class BrokerPool
7
+ def initialize(connection_builder:, logger:)
8
+ @logger = TaggedLogger.new(logger)
9
+ @connection_builder = connection_builder
10
+ @brokers = {}
11
+ end
12
+
13
+ def connect(host, port, node_id: nil)
14
+ if @brokers.key?(node_id)
15
+ broker = @brokers.fetch(node_id)
16
+ return broker if broker.address_match?(host, port)
17
+ broker.disconnect
18
+ @brokers[node_id] = nil
19
+ end
20
+
21
+ broker = Broker.new(
22
+ connection_builder: @connection_builder,
23
+ host: host,
24
+ port: port,
25
+ node_id: node_id,
26
+ logger: @logger,
27
+ )
28
+
29
+ @brokers[node_id] = broker unless node_id.nil?
30
+
31
+ broker
32
+ end
33
+
34
+ def close
35
+ @brokers.each do |id, broker|
36
+ @logger.info "Disconnecting broker #{id}"
37
+ broker.disconnect
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+
5
+ module Kafka
6
+ module BrokerUri
7
+ DEFAULT_PORT = 9092
8
+ URI_SCHEMES = ["kafka", "kafka+ssl", "plaintext", "ssl"]
9
+
10
+ # Parses a Kafka broker URI string.
11
+ #
12
+ # Examples of valid strings:
13
+ # * `kafka1.something`
14
+ # * `kafka1.something:1234`
15
+ # * `kafka://kafka1.something:1234`
16
+ # * `kafka+ssl://kafka1.something:1234`
17
+ # * `plaintext://kafka1.something:1234`
18
+ #
19
+ # @param str [String] a Kafka broker URI string.
20
+ # @return [URI]
21
+ def self.parse(str)
22
+ # Make sure there's a scheme part if it's missing.
23
+ str = "kafka://" + str unless str.include?("://")
24
+
25
+ uri = URI.parse(str)
26
+ uri.port ||= DEFAULT_PORT
27
+
28
+ # Map some schemes to others.
29
+ case uri.scheme
30
+ when 'plaintext'
31
+ uri.scheme = 'kafka'
32
+ when 'ssl'
33
+ uri.scheme = 'kafka+ssl'
34
+ end
35
+
36
+ unless URI_SCHEMES.include?(uri.scheme)
37
+ raise Kafka::Error, "invalid protocol `#{uri.scheme}` in `#{str}`"
38
+ end
39
+
40
+ uri
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,833 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "kafka/ssl_context"
5
+ require "kafka/cluster"
6
+ require "kafka/transaction_manager"
7
+ require "kafka/broker_info"
8
+ require "kafka/producer"
9
+ require "kafka/consumer"
10
+ require "kafka/heartbeat"
11
+ require "kafka/broker_uri"
12
+ require "kafka/async_producer"
13
+ require "kafka/fetched_message"
14
+ require "kafka/fetch_operation"
15
+ require "kafka/connection_builder"
16
+ require "kafka/instrumenter"
17
+ require "kafka/sasl_authenticator"
18
+ require "kafka/tagged_logger"
19
+
20
+ module Kafka
21
+ class Client
22
+ # Initializes a new Kafka client.
23
+ #
24
+ # @param seed_brokers [Array<String>, String] the list of brokers used to initialize
25
+ # the client. Either an Array of connections, or a comma separated string of connections.
26
+ # A connection can either be a string of "host:port" or a full URI with a scheme.
27
+ # If there's a scheme it's ignored and only host/port are used.
28
+ #
29
+ # @param client_id [String] the identifier for this application.
30
+ #
31
+ # @param logger [Logger] the logger that should be used by the client.
32
+ #
33
+ # @param connect_timeout [Integer, nil] the timeout setting for connecting
34
+ # to brokers. See {BrokerPool#initialize}.
35
+ #
36
+ # @param socket_timeout [Integer, nil] the timeout setting for socket
37
+ # connections. See {BrokerPool#initialize}.
38
+ #
39
+ # @param ssl_ca_cert [String, Array<String>, nil] a PEM encoded CA cert, or an Array of
40
+ # PEM encoded CA certs, to use with an SSL connection.
41
+ #
42
+ # @param ssl_ca_cert_file_path [String, Array<String>, nil] a path on the filesystem, or an
43
+ # Array of paths, to PEM encoded CA cert(s) to use with an SSL connection.
44
+ #
45
+ # @param ssl_client_cert [String, nil] a PEM encoded client cert to use with an
46
+ # SSL connection. Must be used in combination with ssl_client_cert_key.
47
+ #
48
+ # @param ssl_client_cert_key [String, nil] a PEM encoded client cert key to use with an
49
+ # SSL connection. Must be used in combination with ssl_client_cert.
50
+ #
51
+ # @param ssl_client_cert_key_password [String, nil] the password required to read the
52
+ # ssl_client_cert_key. Must be used in combination with ssl_client_cert_key.
53
+ #
54
+ # @param sasl_gssapi_principal [String, nil] a KRB5 principal
55
+ #
56
+ # @param sasl_gssapi_keytab [String, nil] a KRB5 keytab filepath
57
+ #
58
+ # @param sasl_scram_username [String, nil] SCRAM username
59
+ #
60
+ # @param sasl_scram_password [String, nil] SCRAM password
61
+ #
62
+ # @param sasl_scram_mechanism [String, nil] Scram mechanism, either "sha256" or "sha512"
63
+ #
64
+ # @param sasl_over_ssl [Boolean] whether to enforce SSL with SASL
65
+ #
66
+ # @param ssl_ca_certs_from_system [Boolean] whether to use the CA certs from the
67
+ # system's default certificate store.
68
+ #
69
+ # @param partitioner [Partitioner, nil] the partitioner that should be used by the client.
70
+ #
71
+ # @param sasl_oauth_token_provider [Object, nil] OAuthBearer Token Provider instance that
72
+ # implements method token. See {Sasl::OAuth#initialize}
73
+ #
74
+ # @param ssl_verify_hostname [Boolean, true] whether to verify that the host serving
75
+ # the SSL certificate and the signing chain of the certificate have the correct domains
76
+ # based on the CA certificate
77
+ #
78
+ # @param resolve_seed_brokers [Boolean] whether to resolve each hostname of the seed brokers.
79
+ # If a broker is resolved to multiple IP addresses, the client tries to connect to each
80
+ # of the addresses until it can connect.
81
+ #
82
+ # @return [Client]
83
+ def initialize(seed_brokers:, client_id: "ruby-kafka", logger: nil, connect_timeout: nil, socket_timeout: nil,
84
+ ssl_ca_cert_file_path: nil, ssl_ca_cert: nil, ssl_client_cert: nil, ssl_client_cert_key: nil,
85
+ ssl_client_cert_key_password: nil, ssl_client_cert_chain: nil, sasl_gssapi_principal: nil,
86
+ sasl_gssapi_keytab: nil, sasl_plain_authzid: '', sasl_plain_username: nil, sasl_plain_password: nil,
87
+ sasl_scram_username: nil, sasl_scram_password: nil, sasl_scram_mechanism: nil,
88
+ sasl_over_ssl: true, ssl_ca_certs_from_system: false, partitioner: nil, sasl_oauth_token_provider: nil, ssl_verify_hostname: true,
89
+ resolve_seed_brokers: false)
90
+ @logger = TaggedLogger.new(logger)
91
+ @instrumenter = Instrumenter.new(client_id: client_id)
92
+ @seed_brokers = normalize_seed_brokers(seed_brokers)
93
+ @resolve_seed_brokers = resolve_seed_brokers
94
+
95
+ ssl_context = SslContext.build(
96
+ ca_cert_file_path: ssl_ca_cert_file_path,
97
+ ca_cert: ssl_ca_cert,
98
+ client_cert: ssl_client_cert,
99
+ client_cert_key: ssl_client_cert_key,
100
+ client_cert_key_password: ssl_client_cert_key_password,
101
+ client_cert_chain: ssl_client_cert_chain,
102
+ ca_certs_from_system: ssl_ca_certs_from_system,
103
+ verify_hostname: ssl_verify_hostname
104
+ )
105
+
106
+ sasl_authenticator = SaslAuthenticator.new(
107
+ sasl_gssapi_principal: sasl_gssapi_principal,
108
+ sasl_gssapi_keytab: sasl_gssapi_keytab,
109
+ sasl_plain_authzid: sasl_plain_authzid,
110
+ sasl_plain_username: sasl_plain_username,
111
+ sasl_plain_password: sasl_plain_password,
112
+ sasl_scram_username: sasl_scram_username,
113
+ sasl_scram_password: sasl_scram_password,
114
+ sasl_scram_mechanism: sasl_scram_mechanism,
115
+ sasl_oauth_token_provider: sasl_oauth_token_provider,
116
+ logger: @logger
117
+ )
118
+
119
+ if sasl_authenticator.enabled? && sasl_over_ssl && ssl_context.nil?
120
+ raise ArgumentError, "SASL authentication requires that SSL is configured"
121
+ end
122
+
123
+ @connection_builder = ConnectionBuilder.new(
124
+ client_id: client_id,
125
+ connect_timeout: connect_timeout,
126
+ socket_timeout: socket_timeout,
127
+ ssl_context: ssl_context,
128
+ logger: @logger,
129
+ instrumenter: @instrumenter,
130
+ sasl_authenticator: sasl_authenticator
131
+ )
132
+
133
+ @cluster = initialize_cluster
134
+ @partitioner = partitioner || Partitioner.new
135
+ end
136
+
137
+ # Delivers a single message to the Kafka cluster.
138
+ #
139
+ # **Note:** Only use this API for low-throughput scenarios. If you want to deliver
140
+ # many messages at a high rate, or if you want to configure the way messages are
141
+ # sent, use the {#producer} or {#async_producer} APIs instead.
142
+ #
143
+ # @param value [String, nil] the message value.
144
+ # @param key [String, nil] the message key.
145
+ # @param headers [Hash<String, String>] the headers for the message.
146
+ # @param topic [String] the topic that the message should be written to.
147
+ # @param partition [Integer, nil] the partition that the message should be written
148
+ # to, or `nil` if either `partition_key` is passed or the partition should be
149
+ # chosen at random.
150
+ # @param partition_key [String] a value used to deterministically choose a
151
+ # partition to write to.
152
+ # @param retries [Integer] the number of times to retry the delivery before giving
153
+ # up.
154
+ # @return [nil]
155
+ def deliver_message(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, retries: 1)
156
+ create_time = Time.now
157
+
158
+ # We want to fail fast if `topic` isn't a String
159
+ topic = topic.to_str
160
+
161
+ message = PendingMessage.new(
162
+ value: value,
163
+ key: key,
164
+ headers: headers,
165
+ topic: topic,
166
+ partition: partition,
167
+ partition_key: partition_key,
168
+ create_time: create_time
169
+ )
170
+
171
+ if partition.nil?
172
+ partition_count = @cluster.partitions_for(topic).count
173
+ partition = @partitioner.call(partition_count, message)
174
+ end
175
+
176
+ buffer = MessageBuffer.new
177
+
178
+ buffer.write(
179
+ value: message.value,
180
+ key: message.key,
181
+ headers: message.headers,
182
+ topic: message.topic,
183
+ partition: partition,
184
+ create_time: message.create_time,
185
+ )
186
+
187
+ @cluster.add_target_topics([topic])
188
+
189
+ compressor = Compressor.new(
190
+ instrumenter: @instrumenter,
191
+ )
192
+
193
+ transaction_manager = TransactionManager.new(
194
+ cluster: @cluster,
195
+ logger: @logger,
196
+ idempotent: false,
197
+ transactional: false
198
+ )
199
+
200
+ operation = ProduceOperation.new(
201
+ cluster: @cluster,
202
+ transaction_manager: transaction_manager,
203
+ buffer: buffer,
204
+ required_acks: 1,
205
+ ack_timeout: 10,
206
+ compressor: compressor,
207
+ logger: @logger,
208
+ instrumenter: @instrumenter,
209
+ )
210
+
211
+ attempt = 1
212
+
213
+ begin
214
+ @cluster.refresh_metadata_if_necessary!
215
+
216
+ operation.execute
217
+
218
+ unless buffer.empty?
219
+ raise DeliveryFailed.new(nil, [message])
220
+ end
221
+ rescue Kafka::Error => e
222
+ @cluster.mark_as_stale!
223
+
224
+ if attempt >= (retries + 1)
225
+ raise
226
+ else
227
+ attempt += 1
228
+ @logger.warn "Error while delivering message, #{e.class}: #{e.message}; retrying after 1s..."
229
+
230
+ sleep 1
231
+
232
+ retry
233
+ end
234
+ end
235
+ end
236
+
237
+ # Initializes a new Kafka producer.
238
+ #
239
+ # @param ack_timeout [Integer] The number of seconds a broker can wait for
240
+ # replicas to acknowledge a write before responding with a timeout.
241
+ #
242
+ # @param required_acks [Integer, Symbol] The number of replicas that must acknowledge
243
+ # a write, or `:all` if all in-sync replicas must acknowledge.
244
+ #
245
+ # @param max_retries [Integer] the number of retries that should be attempted
246
+ # before giving up sending messages to the cluster. Does not include the
247
+ # original attempt.
248
+ #
249
+ # @param retry_backoff [Integer] the number of seconds to wait between retries.
250
+ #
251
+ # @param max_buffer_size [Integer] the number of messages allowed in the buffer
252
+ # before new writes will raise {BufferOverflow} exceptions.
253
+ #
254
+ # @param max_buffer_bytesize [Integer] the maximum size of the buffer in bytes.
255
+ # attempting to produce messages when the buffer reaches this size will
256
+ # result in {BufferOverflow} being raised.
257
+ #
258
+ # @param compression_codec [Symbol, nil] the name of the compression codec to
259
+ # use, or nil if no compression should be performed. Valid codecs: `:snappy`,
260
+ # `:gzip`, `:lz4`, `:zstd`
261
+ #
262
+ # @param compression_threshold [Integer] the number of messages that needs to
263
+ # be in a message set before it should be compressed. Note that message sets
264
+ # are per-partition rather than per-topic or per-producer.
265
+ #
266
+ # @param interceptors [Array<Object>] a list of producer interceptors the implement
267
+ # `call(Kafka::PendingMessage)`.
268
+ #
269
+ # @return [Kafka::Producer] the Kafka producer.
270
+ def producer(
271
+ compression_codec: nil,
272
+ compression_threshold: 1,
273
+ ack_timeout: 5,
274
+ required_acks: :all,
275
+ max_retries: 2,
276
+ retry_backoff: 1,
277
+ max_buffer_size: 1000,
278
+ max_buffer_bytesize: 10_000_000,
279
+ idempotent: false,
280
+ transactional: false,
281
+ transactional_id: nil,
282
+ transactional_timeout: 60,
283
+ interceptors: []
284
+ )
285
+ cluster = initialize_cluster
286
+ compressor = Compressor.new(
287
+ codec_name: compression_codec,
288
+ threshold: compression_threshold,
289
+ instrumenter: @instrumenter,
290
+ )
291
+
292
+ transaction_manager = TransactionManager.new(
293
+ cluster: cluster,
294
+ logger: @logger,
295
+ idempotent: idempotent,
296
+ transactional: transactional,
297
+ transactional_id: transactional_id,
298
+ transactional_timeout: transactional_timeout,
299
+ )
300
+
301
+ Producer.new(
302
+ cluster: cluster,
303
+ transaction_manager: transaction_manager,
304
+ logger: @logger,
305
+ instrumenter: @instrumenter,
306
+ compressor: compressor,
307
+ ack_timeout: ack_timeout,
308
+ required_acks: required_acks,
309
+ max_retries: max_retries,
310
+ retry_backoff: retry_backoff,
311
+ max_buffer_size: max_buffer_size,
312
+ max_buffer_bytesize: max_buffer_bytesize,
313
+ partitioner: @partitioner,
314
+ interceptors: interceptors
315
+ )
316
+ end
317
+
318
+ # Creates a new AsyncProducer instance.
319
+ #
320
+ # All parameters allowed by {#producer} can be passed. In addition to this,
321
+ # a few extra parameters can be passed when creating an async producer.
322
+ #
323
+ # @param max_queue_size [Integer] the maximum number of messages allowed in
324
+ # the queue.
325
+ # @param delivery_threshold [Integer] if greater than zero, the number of
326
+ # buffered messages that will automatically trigger a delivery.
327
+ # @param delivery_interval [Integer] if greater than zero, the number of
328
+ # seconds between automatic message deliveries.
329
+ #
330
+ # @see AsyncProducer
331
+ # @return [AsyncProducer]
332
+ def async_producer(delivery_interval: 0, delivery_threshold: 0, max_queue_size: 1000, max_retries: -1, retry_backoff: 0, **options)
333
+ sync_producer = producer(**options)
334
+
335
+ AsyncProducer.new(
336
+ sync_producer: sync_producer,
337
+ delivery_interval: delivery_interval,
338
+ delivery_threshold: delivery_threshold,
339
+ max_queue_size: max_queue_size,
340
+ max_retries: max_retries,
341
+ retry_backoff: retry_backoff,
342
+ instrumenter: @instrumenter,
343
+ logger: @logger,
344
+ )
345
+ end
346
+
347
+ # Creates a new Kafka consumer.
348
+ #
349
+ # @param group_id [String] the id of the group that the consumer should join.
350
+ # @param session_timeout [Integer] the number of seconds after which, if a client
351
+ # hasn't contacted the Kafka cluster, it will be kicked out of the group.
352
+ # @param offset_commit_interval [Integer] the interval between offset commits,
353
+ # in seconds.
354
+ # @param offset_commit_threshold [Integer] the number of messages that can be
355
+ # processed before their offsets are committed. If zero, offset commits are
356
+ # not triggered by message processing.
357
+ # @param heartbeat_interval [Integer] the interval between heartbeats; must be less
358
+ # than the session window.
359
+ # @param offset_retention_time [Integer] the time period that committed
360
+ # offsets will be retained, in seconds. Defaults to the broker setting.
361
+ # @param fetcher_max_queue_size [Integer] max number of items in the fetch queue that
362
+ # are stored for further processing. Note, that each item in the queue represents a
363
+ # response from a single broker.
364
+ # @param refresh_topic_interval [Integer] interval of refreshing the topic list.
365
+ # If it is 0, the topic list won't be refreshed (default)
366
+ # If it is n (n > 0), the topic list will be refreshed every n seconds
367
+ # @param interceptors [Array<Object>] a list of consumer interceptors that implement
368
+ # `call(Kafka::FetchedBatch)`.
369
+ # @param assignment_strategy [Object] a partition assignment strategy that
370
+ # implements `protocol_type()`, `user_data()`, and `assign(members:, partitions:)`
371
+ # @return [Consumer]
372
+ def consumer(
373
+ group_id:,
374
+ session_timeout: 30,
375
+ rebalance_timeout: 60,
376
+ offset_commit_interval: 10,
377
+ offset_commit_threshold: 0,
378
+ heartbeat_interval: 10,
379
+ offset_retention_time: nil,
380
+ fetcher_max_queue_size: 100,
381
+ refresh_topic_interval: 0,
382
+ interceptors: [],
383
+ assignment_strategy: nil
384
+ )
385
+ cluster = initialize_cluster
386
+
387
+ instrumenter = DecoratingInstrumenter.new(@instrumenter, {
388
+ group_id: group_id,
389
+ })
390
+
391
+ # The Kafka protocol expects the retention time to be in ms.
392
+ retention_time = (offset_retention_time && offset_retention_time * 1_000) || -1
393
+
394
+ group = ConsumerGroup.new(
395
+ cluster: cluster,
396
+ logger: @logger,
397
+ group_id: group_id,
398
+ session_timeout: session_timeout,
399
+ rebalance_timeout: rebalance_timeout,
400
+ retention_time: retention_time,
401
+ instrumenter: instrumenter,
402
+ assignment_strategy: assignment_strategy
403
+ )
404
+
405
+ fetcher = Fetcher.new(
406
+ cluster: initialize_cluster,
407
+ group: group,
408
+ logger: @logger,
409
+ instrumenter: instrumenter,
410
+ max_queue_size: fetcher_max_queue_size
411
+ )
412
+
413
+ offset_manager = OffsetManager.new(
414
+ cluster: cluster,
415
+ group: group,
416
+ fetcher: fetcher,
417
+ logger: @logger,
418
+ commit_interval: offset_commit_interval,
419
+ commit_threshold: offset_commit_threshold,
420
+ offset_retention_time: offset_retention_time
421
+ )
422
+
423
+ heartbeat = Heartbeat.new(
424
+ group: group,
425
+ interval: heartbeat_interval,
426
+ instrumenter: instrumenter
427
+ )
428
+
429
+ Consumer.new(
430
+ cluster: cluster,
431
+ logger: @logger,
432
+ instrumenter: instrumenter,
433
+ group: group,
434
+ offset_manager: offset_manager,
435
+ fetcher: fetcher,
436
+ session_timeout: session_timeout,
437
+ heartbeat: heartbeat,
438
+ refresh_topic_interval: refresh_topic_interval,
439
+ interceptors: interceptors
440
+ )
441
+ end
442
+
443
+ # Fetches a batch of messages from a single partition. Note that it's possible
444
+ # to get back empty batches.
445
+ #
446
+ # The starting point for the fetch can be configured with the `:offset` argument.
447
+ # If you pass a number, the fetch will start at that offset. However, there are
448
+ # two special Symbol values that can be passed instead:
449
+ #
450
+ # * `:earliest` — the first offset in the partition.
451
+ # * `:latest` — the next offset that will be written to, effectively making the
452
+ # call block until there is a new message in the partition.
453
+ #
454
+ # The Kafka protocol specifies the numeric values of these two options: -2 and -1,
455
+ # respectively. You can also pass in these numbers directly.
456
+ #
457
+ # ## Example
458
+ #
459
+ # When enumerating the messages in a partition, you typically fetch batches
460
+ # sequentially.
461
+ #
462
+ # offset = :earliest
463
+ #
464
+ # loop do
465
+ # messages = kafka.fetch_messages(
466
+ # topic: "my-topic",
467
+ # partition: 42,
468
+ # offset: offset,
469
+ # )
470
+ #
471
+ # messages.each do |message|
472
+ # puts message.offset, message.key, message.value
473
+ #
474
+ # # Set the next offset that should be read to be the subsequent
475
+ # # offset.
476
+ # offset = message.offset + 1
477
+ # end
478
+ # end
479
+ #
480
+ # See a working example in `examples/simple-consumer.rb`.
481
+ #
482
+ # @param topic [String] the topic that messages should be fetched from.
483
+ #
484
+ # @param partition [Integer] the partition that messages should be fetched from.
485
+ #
486
+ # @param offset [Integer, Symbol] the offset to start reading from. Default is
487
+ # the latest offset.
488
+ #
489
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
490
+ # the server responds, in seconds.
491
+ #
492
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
493
+ # zero, the broker will respond immediately, but the response may be empty.
494
+ # The default is 1 byte, which means that the broker will respond as soon as
495
+ # a message is written to the partition.
496
+ #
497
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
498
+ # response message set. Default is 1 MB. You need to set this higher if you
499
+ # expect messages to be larger than this.
500
+ #
501
+ # @return [Array<Kafka::FetchedMessage>] the messages returned from the broker.
502
+ def fetch_messages(topic:, partition:, offset: :latest, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, retries: 1)
503
+ operation = FetchOperation.new(
504
+ cluster: @cluster,
505
+ logger: @logger,
506
+ min_bytes: min_bytes,
507
+ max_bytes: max_bytes,
508
+ max_wait_time: max_wait_time,
509
+ )
510
+
511
+ operation.fetch_from_partition(topic, partition, offset: offset, max_bytes: max_bytes)
512
+
513
+ attempt = 1
514
+
515
+ begin
516
+ operation.execute.flat_map {|batch| batch.messages }
517
+ rescue Kafka::Error => e
518
+ @cluster.mark_as_stale!
519
+
520
+ if attempt >= (retries + 1)
521
+ raise
522
+ else
523
+ attempt += 1
524
+ @logger.warn "Error while fetching messages, #{e.class}: #{e.message}; retrying..."
525
+ retry
526
+ end
527
+ end
528
+ end
529
+
530
+ # Enumerate all messages in a topic.
531
+ #
532
+ # @param topic [String] the topic to consume messages from.
533
+ #
534
+ # @param start_from_beginning [Boolean] whether to start from the beginning
535
+ # of the topic or just subscribe to new messages being produced.
536
+ #
537
+ # @param max_wait_time [Integer] the maximum amount of time to wait before
538
+ # the server responds, in seconds.
539
+ #
540
+ # @param min_bytes [Integer] the minimum number of bytes to wait for. If set to
541
+ # zero, the broker will respond immediately, but the response may be empty.
542
+ # The default is 1 byte, which means that the broker will respond as soon as
543
+ # a message is written to the partition.
544
+ #
545
+ # @param max_bytes [Integer] the maximum number of bytes to include in the
546
+ # response message set. Default is 1 MB. You need to set this higher if you
547
+ # expect messages to be larger than this.
548
+ #
549
+ # @return [nil]
550
+ def each_message(topic:, start_from_beginning: true, max_wait_time: 5, min_bytes: 1, max_bytes: 1048576, &block)
551
+ default_offset ||= start_from_beginning ? :earliest : :latest
552
+ offsets = Hash.new { default_offset }
553
+
554
+ loop do
555
+ operation = FetchOperation.new(
556
+ cluster: @cluster,
557
+ logger: @logger,
558
+ min_bytes: min_bytes,
559
+ max_wait_time: max_wait_time,
560
+ )
561
+
562
+ @cluster.partitions_for(topic).map(&:partition_id).each do |partition|
563
+ partition_offset = offsets[partition]
564
+ operation.fetch_from_partition(topic, partition, offset: partition_offset, max_bytes: max_bytes)
565
+ end
566
+
567
+ batches = operation.execute
568
+
569
+ batches.each do |batch|
570
+ batch.messages.each(&block)
571
+ offsets[batch.partition] = batch.last_offset + 1 unless batch.unknown_last_offset?
572
+ end
573
+ end
574
+ end
575
+
576
+ # Describe broker configs
577
+ #
578
+ # @param broker_id [int] the id of the broker
579
+ # @param configs [Array] array of config keys.
580
+ # @return [Array<Kafka::Protocol::DescribeConfigsResponse::ConfigEntry>]
581
+ def describe_configs(broker_id, configs = [])
582
+ @cluster.describe_configs(broker_id, configs)
583
+ end
584
+
585
+ # Alter broker configs
586
+ #
587
+ # @param broker_id [int] the id of the broker
588
+ # @param configs [Array] array of config strings.
589
+ # @return [nil]
590
+ def alter_configs(broker_id, configs = [])
591
+ @cluster.alter_configs(broker_id, configs)
592
+ end
593
+
594
+ # Creates a topic in the cluster.
595
+ #
596
+ # @example Creating a topic with log compaction
597
+ # # Enable log compaction:
598
+ # config = { "cleanup.policy" => "compact" }
599
+ #
600
+ # # Create the topic:
601
+ # kafka.create_topic("dns-mappings", config: config)
602
+ #
603
+ # @param name [String] the name of the topic.
604
+ # @param num_partitions [Integer] the number of partitions that should be created
605
+ # in the topic.
606
+ # @param replication_factor [Integer] the replication factor of the topic.
607
+ # @param timeout [Integer] a duration of time to wait for the topic to be
608
+ # completely created.
609
+ # @param config [Hash] topic configuration entries. See
610
+ # [the Kafka documentation](https://kafka.apache.org/documentation/#topicconfigs)
611
+ # for more information.
612
+ # @raise [Kafka::TopicAlreadyExists] if the topic already exists.
613
+ # @return [nil]
614
+ def create_topic(name, num_partitions: 1, replication_factor: 1, timeout: 30, config: {})
615
+ @cluster.create_topic(
616
+ name,
617
+ num_partitions: num_partitions,
618
+ replication_factor: replication_factor,
619
+ timeout: timeout,
620
+ config: config,
621
+ )
622
+ end
623
+
624
+ # Delete a topic in the cluster.
625
+ #
626
+ # @param name [String] the name of the topic.
627
+ # @param timeout [Integer] a duration of time to wait for the topic to be
628
+ # completely marked deleted.
629
+ # @return [nil]
630
+ def delete_topic(name, timeout: 30)
631
+ @cluster.delete_topic(name, timeout: timeout)
632
+ end
633
+
634
+ # Describe the configuration of a topic.
635
+ #
636
+ # Retrieves the topic configuration from the Kafka brokers. Configuration names
637
+ # refer to [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
638
+ #
639
+ # @note This is an alpha level API and is subject to change.
640
+ #
641
+ # @example Describing the cleanup policy config of a topic
642
+ # kafka = Kafka.new(["kafka1:9092"])
643
+ # kafka.describe_topic("my-topic", ["cleanup.policy"])
644
+ # #=> { "cleanup.policy" => "delete" }
645
+ #
646
+ # @param name [String] the name of the topic.
647
+ # @param configs [Array<String>] array of desired config names.
648
+ # @return [Hash<String, String>]
649
+ def describe_topic(name, configs = [])
650
+ @cluster.describe_topic(name, configs)
651
+ end
652
+
653
+ # Alter the configuration of a topic.
654
+ #
655
+ # Configuration keys must match
656
+ # [Kafka's topic-level configs](https://kafka.apache.org/documentation/#topicconfigs).
657
+ #
658
+ # @note This is an alpha level API and is subject to change.
659
+ #
660
+ # @example Describing the cleanup policy config of a topic
661
+ # kafka = Kafka.new(["kafka1:9092"])
662
+ # kafka.alter_topic("my-topic", "cleanup.policy" => "delete", "max.message.byte" => "100000")
663
+ #
664
+ # @param name [String] the name of the topic.
665
+ # @param configs [Hash<String, String>] hash of desired config keys and values.
666
+ # @return [nil]
667
+ def alter_topic(name, configs = {})
668
+ @cluster.alter_topic(name, configs)
669
+ end
670
+
671
+ # Describe a consumer group
672
+ #
673
+ # @param group_id [String] the id of the consumer group
674
+ # @return [Kafka::Protocol::DescribeGroupsResponse::Group]
675
+ def describe_group(group_id)
676
+ @cluster.describe_group(group_id)
677
+ end
678
+
679
+ # Fetch all committed offsets for a consumer group
680
+ #
681
+ # @param group_id [String] the id of the consumer group
682
+ # @return [Hash<String, Hash<Integer, Kafka::Protocol::OffsetFetchResponse::PartitionOffsetInfo>>]
683
+ def fetch_group_offsets(group_id)
684
+ @cluster.fetch_group_offsets(group_id)
685
+ end
686
+
687
+ # Create partitions for a topic.
688
+ #
689
+ # @param name [String] the name of the topic.
690
+ # @param num_partitions [Integer] the number of desired partitions for
691
+ # the topic
692
+ # @param timeout [Integer] a duration of time to wait for the new
693
+ # partitions to be added.
694
+ # @return [nil]
695
+ def create_partitions_for(name, num_partitions: 1, timeout: 30)
696
+ @cluster.create_partitions_for(name, num_partitions: num_partitions, timeout: timeout)
697
+ end
698
+
699
+ # Lists all topics in the cluster.
700
+ #
701
+ # @return [Array<String>] the list of topic names.
702
+ def topics
703
+ attempts = 0
704
+ begin
705
+ attempts += 1
706
+ @cluster.list_topics
707
+ rescue Kafka::ConnectionError
708
+ @cluster.mark_as_stale!
709
+ retry unless attempts > 1
710
+ raise
711
+ end
712
+ end
713
+
714
+ # Lists all consumer groups in the cluster
715
+ #
716
+ # @return [Array<String>] the list of group ids
717
+ def groups
718
+ @cluster.list_groups
719
+ end
720
+
721
+ def has_topic?(topic)
722
+ @cluster.clear_target_topics
723
+ @cluster.add_target_topics([topic])
724
+ @cluster.topics.include?(topic)
725
+ end
726
+
727
+ # Counts the number of partitions in a topic.
728
+ #
729
+ # @param topic [String]
730
+ # @return [Integer] the number of partitions in the topic.
731
+ def partitions_for(topic)
732
+ @cluster.partitions_for(topic).count
733
+ end
734
+
735
+ # Counts the number of replicas for a topic's partition
736
+ #
737
+ # @param topic [String]
738
+ # @return [Integer] the number of replica nodes for the topic's partition
739
+ def replica_count_for(topic)
740
+ @cluster.partitions_for(topic).first.replicas.count
741
+ end
742
+
743
+ # Retrieve the offset of the last message in a partition. If there are no
744
+ # messages in the partition -1 is returned.
745
+ #
746
+ # @param topic [String]
747
+ # @param partition [Integer]
748
+ # @return [Integer] the offset of the last message in the partition, or -1 if
749
+ # there are no messages in the partition.
750
+ def last_offset_for(topic, partition)
751
+ # The offset resolution API will return the offset of the "next" message to
752
+ # be written when resolving the "latest" offset, so we subtract one.
753
+ @cluster.resolve_offset(topic, partition, :latest) - 1
754
+ end
755
+
756
+ # Retrieve the offset of the last message in each partition of the specified topics.
757
+ #
758
+ # @param topics [Array<String>] topic names.
759
+ # @return [Hash<String, Hash<Integer, Integer>>]
760
+ # @example
761
+ # last_offsets_for('topic-1', 'topic-2') # =>
762
+ # # {
763
+ # # 'topic-1' => { 0 => 100, 1 => 100 },
764
+ # # 'topic-2' => { 0 => 100, 1 => 100 }
765
+ # # }
766
+ def last_offsets_for(*topics)
767
+ @cluster.add_target_topics(topics)
768
+ topics.map {|topic|
769
+ partition_ids = @cluster.partitions_for(topic).collect(&:partition_id)
770
+ partition_offsets = @cluster.resolve_offsets(topic, partition_ids, :latest)
771
+ [topic, partition_offsets.collect { |k, v| [k, v - 1] }.to_h]
772
+ }.to_h
773
+ end
774
+
775
+ # Check whether current cluster supports a specific version or not
776
+ #
777
+ # @param api_key [Integer] API key.
778
+ # @param version [Integer] API version.
779
+ # @return [Boolean]
780
+ def supports_api?(api_key, version = nil)
781
+ @cluster.supports_api?(api_key, version)
782
+ end
783
+
784
+ def apis
785
+ @cluster.apis
786
+ end
787
+
788
+ # List all brokers in the cluster.
789
+ #
790
+ # @return [Array<Kafka::BrokerInfo>] the list of brokers.
791
+ def brokers
792
+ @cluster.cluster_info.brokers
793
+ end
794
+
795
+ # The current controller broker in the cluster.
796
+ #
797
+ # @return [Kafka::BrokerInfo] information on the controller broker.
798
+ def controller_broker
799
+ brokers.find {|broker| broker.node_id == @cluster.cluster_info.controller_id }
800
+ end
801
+
802
+ # Closes all connections to the Kafka brokers and frees up used resources.
803
+ #
804
+ # @return [nil]
805
+ def close
806
+ @cluster.disconnect
807
+ end
808
+
809
+ private
810
+
811
+ def initialize_cluster
812
+ broker_pool = BrokerPool.new(
813
+ connection_builder: @connection_builder,
814
+ logger: @logger,
815
+ )
816
+
817
+ Cluster.new(
818
+ seed_brokers: @seed_brokers,
819
+ broker_pool: broker_pool,
820
+ logger: @logger,
821
+ resolve_seed_brokers: @resolve_seed_brokers,
822
+ )
823
+ end
824
+
825
+ def normalize_seed_brokers(seed_brokers)
826
+ if seed_brokers.is_a?(String)
827
+ seed_brokers = seed_brokers.split(",")
828
+ end
829
+
830
+ seed_brokers.map {|str| BrokerUri.parse(str) }
831
+ end
832
+ end
833
+ end