ruby-kafka-custom 0.7.7.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8e6fc2fd4483589bb8c66bc13bfefd842aa86107
4
+ data.tar.gz: 564f7d4f5ae481026f163016de00ec0f2ccaa382
5
+ SHA512:
6
+ metadata.gz: 992f706b07522171fe1e07c7b5e5da66f2cef048771781de023289c0eab841723d578d1ae8e3445636da73a9e28ff0b87ae2d0cd740a3d4cce8f62cb23c482e6
7
+ data.tar.gz: 3a28bfbe9044b48ec890a307756228d4d46fbc4cd21eaf9713f314375a95e33e3b9045580cf80cb3930ed4eb0e53bd185a854f2dc37fe5600d7e53dfe1f3393d
@@ -0,0 +1,279 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "thread"
4
+
5
+ module Kafka
6
+
7
+ # A Kafka producer that does all its work in the background so as to not block
8
+ # the calling thread. Calls to {#deliver_messages} are asynchronous and return
9
+ # immediately.
10
+ #
11
+ # In addition to this property it's possible to define automatic delivery
12
+ # policies. These allow placing an upper bound on the number of buffered
13
+ # messages and the time between message deliveries.
14
+ #
15
+ # * If `delivery_threshold` is set to a value _n_ higher than zero, the producer
16
+ # will automatically deliver its messages once its buffer size reaches _n_.
17
+ # * If `delivery_interval` is set to a value _n_ higher than zero, the producer
18
+ # will automatically deliver its messages every _n_ seconds.
19
+ #
20
+ # By default, automatic delivery is disabled and you'll have to call
21
+ # {#deliver_messages} manually.
22
+ #
23
+ # ## Buffer Overflow and Backpressure
24
+ #
25
+ # The calling thread communicates with the background thread doing the actual
26
+ # work using a thread safe queue. While the background thread is busy delivering
27
+ # messages, new messages will be buffered in the queue. In order to avoid
28
+ # the queue growing uncontrollably in cases where the background thread gets
29
+ # stuck or can't follow the pace of the calling thread, there's a maximum
30
+ # number of messages that is allowed to be buffered. You can configure this
31
+ # value by setting `max_queue_size`.
32
+ #
33
+ # If you produce messages faster than the background producer thread can
34
+ # deliver them to Kafka you will eventually fill the producer's buffer. Once
35
+ # this happens, the background thread will stop popping messages off the
36
+ # queue until it can successfully deliver the buffered messages. The queue
37
+ # will therefore grow in size, potentially hitting the `max_queue_size` limit.
38
+ # Once this happens, calls to {#produce} will raise a {BufferOverflow} error.
39
+ #
40
+ # Depending on your use case you may want to slow down the rate of messages
41
+ # being produced or perhaps halt your application completely until the
42
+ # producer can deliver the buffered messages and clear the message queue.
43
+ #
44
+ # ## Example
45
+ #
46
+ # producer = kafka.async_producer(
47
+ # # Keep at most 1.000 messages in the buffer before delivering:
48
+ # delivery_threshold: 1000,
49
+ #
50
+ # # Deliver messages every 30 seconds:
51
+ # delivery_interval: 30,
52
+ # )
53
+ #
54
+ # # There's no need to manually call #deliver_messages, it will happen
55
+ # # automatically in the background.
56
+ # producer.produce("hello", topic: "greetings")
57
+ #
58
+ # # Remember to shut down the producer when you're done with it.
59
+ # producer.shutdown
60
+ #
61
+ class AsyncProducer
62
+ THREAD_MUTEX = Mutex.new
63
+
64
+ # Initializes a new AsyncProducer.
65
+ #
66
+ # @param sync_producer [Kafka::Producer] the synchronous producer that should
67
+ # be used in the background.
68
+ # @param max_queue_size [Integer] the maximum number of messages allowed in
69
+ # the queue.
70
+ # @param delivery_threshold [Integer] if greater than zero, the number of
71
+ # buffered messages that will automatically trigger a delivery.
72
+ # @param delivery_interval [Integer] if greater than zero, the number of
73
+ # seconds between automatic message deliveries.
74
+ #
75
+ def initialize(sync_producer:, max_queue_size: 1000, delivery_threshold: 0, delivery_interval: 0, max_retries: -1, retry_backoff: 0, instrumenter:, logger:)
76
+ raise ArgumentError unless max_queue_size > 0
77
+ raise ArgumentError unless delivery_threshold >= 0
78
+ raise ArgumentError unless delivery_interval >= 0
79
+
80
+ @queue = Queue.new
81
+ @max_queue_size = max_queue_size
82
+ @instrumenter = instrumenter
83
+ @logger = TaggedLogger.new(logger)
84
+
85
+ @worker = Worker.new(
86
+ queue: @queue,
87
+ producer: sync_producer,
88
+ delivery_threshold: delivery_threshold,
89
+ max_retries: max_retries,
90
+ retry_backoff: retry_backoff,
91
+ instrumenter: instrumenter,
92
+ logger: logger
93
+ )
94
+
95
+ # The timer will no-op if the delivery interval is zero.
96
+ @timer = Timer.new(queue: @queue, interval: delivery_interval)
97
+ end
98
+
99
+ # Produces a message to the specified topic.
100
+ #
101
+ # @see Kafka::Producer#produce
102
+ # @param (see Kafka::Producer#produce)
103
+ # @raise [BufferOverflow] if the message queue is full.
104
+ # @return [nil]
105
+ def produce(value, topic:, **options)
106
+ ensure_threads_running!
107
+
108
+ if @queue.size >= @max_queue_size
109
+ buffer_overflow topic,
110
+ "Cannot produce to #{topic}, max queue size (#{@max_queue_size} messages) reached"
111
+ end
112
+
113
+ args = [value, **options.merge(topic: topic)]
114
+ @queue << [:produce, args]
115
+
116
+ @instrumenter.instrument("enqueue_message.async_producer", {
117
+ topic: topic,
118
+ queue_size: @queue.size,
119
+ max_queue_size: @max_queue_size,
120
+ })
121
+
122
+ nil
123
+ end
124
+
125
+ # Asynchronously delivers the buffered messages. This method will return
126
+ # immediately and the actual work will be done in the background.
127
+ #
128
+ # @see Kafka::Producer#deliver_messages
129
+ # @return [nil]
130
+ def deliver_messages
131
+ @queue << [:deliver_messages, nil]
132
+
133
+ nil
134
+ end
135
+
136
+ # Shuts down the producer, releasing the network resources used. This
137
+ # method will block until the buffered messages have been delivered.
138
+ #
139
+ # @see Kafka::Producer#shutdown
140
+ # @return [nil]
141
+ def shutdown
142
+ @timer_thread && @timer_thread.exit
143
+ @queue << [:shutdown, nil]
144
+ @worker_thread && @worker_thread.join
145
+
146
+ nil
147
+ end
148
+
149
+ private
150
+
151
+ def ensure_threads_running!
152
+ THREAD_MUTEX.synchronize do
153
+ @worker_thread = nil unless @worker_thread && @worker_thread.alive?
154
+ @worker_thread ||= Thread.new { @worker.run }
155
+ end
156
+
157
+ THREAD_MUTEX.synchronize do
158
+ @timer_thread = nil unless @timer_thread && @timer_thread.alive?
159
+ @timer_thread ||= Thread.new { @timer.run }
160
+ end
161
+ end
162
+
163
+ def buffer_overflow(topic, message)
164
+ @instrumenter.instrument("buffer_overflow.async_producer", {
165
+ topic: topic,
166
+ })
167
+
168
+ raise BufferOverflow, message
169
+ end
170
+
171
+ class Timer
172
+ def initialize(interval:, queue:)
173
+ @queue = queue
174
+ @interval = interval
175
+ end
176
+
177
+ def run
178
+ # Permanently sleep if the timer interval is zero.
179
+ Thread.stop if @interval.zero?
180
+
181
+ loop do
182
+ sleep(@interval)
183
+ @queue << [:deliver_messages, nil]
184
+ end
185
+ end
186
+ end
187
+
188
+ class Worker
189
+ def initialize(queue:, producer:, delivery_threshold:, max_retries: -1, retry_backoff: 0, instrumenter:, logger:)
190
+ @queue = queue
191
+ @producer = producer
192
+ @delivery_threshold = delivery_threshold
193
+ @max_retries = max_retries
194
+ @retry_backoff = retry_backoff
195
+ @instrumenter = instrumenter
196
+ @logger = TaggedLogger.new(logger)
197
+ end
198
+
199
+ def run
200
+ @logger.push_tags(@producer.to_s)
201
+ @logger.info "Starting async producer in the background..."
202
+
203
+ loop do
204
+ operation, payload = @queue.pop
205
+
206
+ case operation
207
+ when :produce
208
+ produce(*payload)
209
+ deliver_messages if threshold_reached?
210
+ when :deliver_messages
211
+ deliver_messages
212
+ when :shutdown
213
+ begin
214
+ # Deliver any pending messages first.
215
+ @producer.deliver_messages
216
+ rescue Error => e
217
+ @logger.error("Failed to deliver messages during shutdown: #{e.message}")
218
+
219
+ @instrumenter.instrument("drop_messages.async_producer", {
220
+ message_count: @producer.buffer_size + @queue.size,
221
+ })
222
+ end
223
+
224
+ # Stop the run loop.
225
+ break
226
+ else
227
+ raise "Unknown operation #{operation.inspect}"
228
+ end
229
+ end
230
+ rescue Kafka::Error => e
231
+ @logger.error "Unexpected Kafka error #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
232
+ @logger.info "Restarting in 10 seconds..."
233
+
234
+ sleep 10
235
+ retry
236
+ rescue Exception => e
237
+ @logger.error "Unexpected Kafka error #{e.class}: #{e.message}\n#{e.backtrace.join("\n")}"
238
+ @logger.error "Async producer crashed!"
239
+ ensure
240
+ @producer.shutdown
241
+ @logger.pop_tags
242
+ end
243
+
244
+ private
245
+
246
+ def produce(*args)
247
+ retries = 0
248
+ begin
249
+ @producer.produce(*args)
250
+ rescue BufferOverflow => e
251
+ deliver_messages
252
+ if @max_retries == -1
253
+ retry
254
+ elsif retries < @max_retries
255
+ retries += 1
256
+ sleep @retry_backoff**retries
257
+ retry
258
+ else
259
+ @logger.error("Failed to asynchronously produce messages due to BufferOverflow")
260
+ @instrumenter.instrument("error.async_producer", { error: e })
261
+ end
262
+ end
263
+ end
264
+
265
+ def deliver_messages
266
+ @producer.deliver_messages
267
+ rescue DeliveryFailed, ConnectionError => e
268
+ # Failed to deliver messages -- nothing to do but log and try again later.
269
+ @logger.error("Failed to asynchronously deliver messages: #{e.message}")
270
+ @instrumenter.instrument("error.async_producer", { error: e })
271
+ end
272
+
273
+ def threshold_reached?
274
+ @delivery_threshold > 0 &&
275
+ @producer.buffer_size >= @delivery_threshold
276
+ end
277
+ end
278
+ end
279
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+ require "kafka/connection"
5
+ require "kafka/protocol"
6
+
7
+ module Kafka
8
+ class Broker
9
+ def initialize(connection_builder:, host:, port:, node_id: nil, logger:)
10
+ @connection_builder = connection_builder
11
+ @connection = nil
12
+ @host = host
13
+ @port = port
14
+ @node_id = node_id
15
+ @logger = TaggedLogger.new(logger)
16
+ end
17
+
18
+ def address_match?(host, port)
19
+ host == @host && port == @port
20
+ end
21
+
22
+ # @return [String]
23
+ def to_s
24
+ "#{@host}:#{@port} (node_id=#{@node_id.inspect})"
25
+ end
26
+
27
+ # @return [nil]
28
+ def disconnect
29
+ connection.close if connected?
30
+ end
31
+
32
+ # @return [Boolean]
33
+ def connected?
34
+ !@connection.nil?
35
+ end
36
+
37
+ # Fetches cluster metadata from the broker.
38
+ #
39
+ # @param (see Kafka::Protocol::MetadataRequest#initialize)
40
+ # @return [Kafka::Protocol::MetadataResponse]
41
+ def fetch_metadata(**options)
42
+ request = Protocol::MetadataRequest.new(**options)
43
+
44
+ send_request(request)
45
+ end
46
+
47
+ # Fetches messages from a specified topic and partition.
48
+ #
49
+ # @param (see Kafka::Protocol::FetchRequest#initialize)
50
+ # @return [Kafka::Protocol::FetchResponse]
51
+ def fetch_messages(**options)
52
+ request = Protocol::FetchRequest.new(**options)
53
+
54
+ send_request(request)
55
+ end
56
+
57
+ # Lists the offset of the specified topics and partitions.
58
+ #
59
+ # @param (see Kafka::Protocol::ListOffsetRequest#initialize)
60
+ # @return [Kafka::Protocol::ListOffsetResponse]
61
+ def list_offsets(**options)
62
+ request = Protocol::ListOffsetRequest.new(**options)
63
+
64
+ send_request(request)
65
+ end
66
+
67
+ # Produces a set of messages to the broker.
68
+ #
69
+ # @param (see Kafka::Protocol::ProduceRequest#initialize)
70
+ # @return [Kafka::Protocol::ProduceResponse]
71
+ def produce(**options)
72
+ request = Protocol::ProduceRequest.new(**options)
73
+
74
+ send_request(request)
75
+ end
76
+
77
+ def fetch_offsets(**options)
78
+ request = Protocol::OffsetFetchRequest.new(**options)
79
+
80
+ send_request(request)
81
+ end
82
+
83
+ def commit_offsets(**options)
84
+ request = Protocol::OffsetCommitRequest.new(**options)
85
+
86
+ send_request(request)
87
+ end
88
+
89
+ def join_group(**options)
90
+ request = Protocol::JoinGroupRequest.new(**options)
91
+
92
+ send_request(request)
93
+ end
94
+
95
+ def sync_group(**options)
96
+ request = Protocol::SyncGroupRequest.new(**options)
97
+
98
+ send_request(request)
99
+ end
100
+
101
+ def leave_group(**options)
102
+ request = Protocol::LeaveGroupRequest.new(**options)
103
+
104
+ send_request(request)
105
+ end
106
+
107
+ def find_coordinator(**options)
108
+ request = Protocol::FindCoordinatorRequest.new(**options)
109
+
110
+ send_request(request)
111
+ end
112
+
113
+ def heartbeat(**options)
114
+ request = Protocol::HeartbeatRequest.new(**options)
115
+
116
+ send_request(request)
117
+ end
118
+
119
+ def create_topics(**options)
120
+ request = Protocol::CreateTopicsRequest.new(**options)
121
+
122
+ send_request(request)
123
+ end
124
+
125
+ def delete_topics(**options)
126
+ request = Protocol::DeleteTopicsRequest.new(**options)
127
+
128
+ send_request(request)
129
+ end
130
+
131
+ def describe_configs(**options)
132
+ request = Protocol::DescribeConfigsRequest.new(**options)
133
+
134
+ send_request(request)
135
+ end
136
+
137
+ def alter_configs(**options)
138
+ request = Protocol::AlterConfigsRequest.new(**options)
139
+
140
+ send_request(request)
141
+ end
142
+
143
+ def create_partitions(**options)
144
+ request = Protocol::CreatePartitionsRequest.new(**options)
145
+
146
+ send_request(request)
147
+ end
148
+
149
+ def list_groups
150
+ request = Protocol::ListGroupsRequest.new
151
+
152
+ send_request(request)
153
+ end
154
+
155
+ def api_versions
156
+ request = Protocol::ApiVersionsRequest.new
157
+
158
+ send_request(request)
159
+ end
160
+
161
+ def describe_groups(**options)
162
+ request = Protocol::DescribeGroupsRequest.new(**options)
163
+
164
+ send_request(request)
165
+ end
166
+
167
+ def init_producer_id(**options)
168
+ request = Protocol::InitProducerIDRequest.new(**options)
169
+
170
+ send_request(request)
171
+ end
172
+
173
+ def add_partitions_to_txn(**options)
174
+ request = Protocol::AddPartitionsToTxnRequest.new(**options)
175
+
176
+ send_request(request)
177
+ end
178
+
179
+ def end_txn(**options)
180
+ request = Protocol::EndTxnRequest.new(**options)
181
+
182
+ send_request(request)
183
+ end
184
+
185
+ private
186
+
187
+ def send_request(request)
188
+ connection.send_request(request)
189
+ rescue IdleConnection
190
+ @logger.warn "Connection has been unused for too long, re-connecting..."
191
+ @connection.close rescue nil
192
+ @connection = nil
193
+ retry
194
+ rescue ConnectionError
195
+ @connection.close rescue nil
196
+ @connection = nil
197
+
198
+ raise
199
+ end
200
+
201
+ def connection
202
+ @connection ||= @connection_builder.build_connection(@host, @port)
203
+ end
204
+ end
205
+ end