ruby-kafka 0.7.9 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +135 -3
- data/.github/workflows/stale.yml +19 -0
- data/.ruby-version +1 -1
- data/CHANGELOG.md +26 -0
- data/README.md +46 -0
- data/lib/kafka/async_producer.rb +5 -2
- data/lib/kafka/client.rb +68 -4
- data/lib/kafka/cluster.rb +52 -0
- data/lib/kafka/connection.rb +3 -0
- data/lib/kafka/consumer.rb +61 -11
- data/lib/kafka/consumer_group.rb +10 -1
- data/lib/kafka/datadog.rb +20 -13
- data/lib/kafka/fetcher.rb +5 -2
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/offset_manager.rb +12 -1
- data/lib/kafka/partitioner.rb +1 -1
- data/lib/kafka/producer.rb +13 -5
- data/lib/kafka/prometheus.rb +78 -79
- data/lib/kafka/protocol/join_group_request.rb +8 -2
- data/lib/kafka/protocol/metadata_response.rb +1 -1
- data/lib/kafka/protocol/offset_fetch_request.rb +3 -1
- data/lib/kafka/protocol/record_batch.rb +5 -4
- data/lib/kafka/ssl_context.rb +4 -3
- data/lib/kafka/tagged_logger.rb +3 -2
- data/lib/kafka/version.rb +1 -1
- data/ruby-kafka.gemspec +4 -4
- metadata +19 -11
data/lib/kafka/partitioner.rb
CHANGED
@@ -19,7 +19,7 @@ module Kafka
|
|
19
19
|
# @param message [Kafka::PendingMessage] the message that should be assigned
|
20
20
|
# a partition.
|
21
21
|
# @return [Integer] the partition number.
|
22
|
-
def
|
22
|
+
def call(partition_count, message)
|
23
23
|
raise ArgumentError if partition_count == 0
|
24
24
|
|
25
25
|
# If no explicit partition key is specified we use the message key instead.
|
data/lib/kafka/producer.rb
CHANGED
@@ -7,6 +7,7 @@ require "kafka/produce_operation"
|
|
7
7
|
require "kafka/pending_message_queue"
|
8
8
|
require "kafka/pending_message"
|
9
9
|
require "kafka/compressor"
|
10
|
+
require "kafka/interceptors"
|
10
11
|
|
11
12
|
module Kafka
|
12
13
|
# Allows sending messages to a Kafka cluster.
|
@@ -129,7 +130,9 @@ module Kafka
|
|
129
130
|
class Producer
|
130
131
|
class AbortTransaction < StandardError; end
|
131
132
|
|
132
|
-
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
133
|
+
def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:,
|
134
|
+
required_acks:, max_retries:, retry_backoff:, max_buffer_size:,
|
135
|
+
max_buffer_bytesize:, partitioner:, interceptors: [])
|
133
136
|
@cluster = cluster
|
134
137
|
@transaction_manager = transaction_manager
|
135
138
|
@logger = TaggedLogger.new(logger)
|
@@ -141,6 +144,8 @@ module Kafka
|
|
141
144
|
@max_buffer_size = max_buffer_size
|
142
145
|
@max_buffer_bytesize = max_buffer_bytesize
|
143
146
|
@compressor = compressor
|
147
|
+
@partitioner = partitioner
|
148
|
+
@interceptors = Interceptors.new(interceptors: interceptors, logger: logger)
|
144
149
|
|
145
150
|
# The set of topics that are produced to.
|
146
151
|
@target_topics = Set.new
|
@@ -188,15 +193,18 @@ module Kafka
|
|
188
193
|
# @raise [BufferOverflow] if the maximum buffer size has been reached.
|
189
194
|
# @return [nil]
|
190
195
|
def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
|
191
|
-
|
196
|
+
# We want to fail fast if `topic` isn't a String
|
197
|
+
topic = topic.to_str
|
198
|
+
|
199
|
+
message = @interceptors.call(PendingMessage.new(
|
192
200
|
value: value && value.to_s,
|
193
201
|
key: key && key.to_s,
|
194
202
|
headers: headers,
|
195
|
-
topic: topic
|
203
|
+
topic: topic,
|
196
204
|
partition: partition && Integer(partition),
|
197
205
|
partition_key: partition_key && partition_key.to_s,
|
198
206
|
create_time: create_time
|
199
|
-
)
|
207
|
+
))
|
200
208
|
|
201
209
|
if buffer_size >= @max_buffer_size
|
202
210
|
buffer_overflow topic,
|
@@ -452,7 +460,7 @@ module Kafka
|
|
452
460
|
|
453
461
|
if partition.nil?
|
454
462
|
partition_count = @cluster.partitions_for(message.topic).count
|
455
|
-
partition =
|
463
|
+
partition = @partitioner.call(partition_count, message)
|
456
464
|
end
|
457
465
|
|
458
466
|
@buffer.write(
|
data/lib/kafka/prometheus.rb
CHANGED
@@ -42,11 +42,11 @@ module Kafka
|
|
42
42
|
class ConnectionSubscriber < ActiveSupport::Subscriber
|
43
43
|
def initialize
|
44
44
|
super
|
45
|
-
@api_calls = Prometheus.registry.counter(:api_calls, 'Total calls')
|
46
|
-
@api_latency = Prometheus.registry.histogram(:api_latency, 'Latency',
|
47
|
-
@api_request_size = Prometheus.registry.histogram(:api_request_size, 'Request size',
|
48
|
-
@api_response_size = Prometheus.registry.histogram(:api_response_size, 'Response size',
|
49
|
-
@api_errors = Prometheus.registry.counter(:api_errors, 'Errors')
|
45
|
+
@api_calls = Prometheus.registry.counter(:api_calls, docstring: 'Total calls', labels: [:client, :api, :broker])
|
46
|
+
@api_latency = Prometheus.registry.histogram(:api_latency, docstring: 'Latency', buckets: LATENCY_BUCKETS, labels: [:client, :api, :broker])
|
47
|
+
@api_request_size = Prometheus.registry.histogram(:api_request_size, docstring: 'Request size', buckets: SIZE_BUCKETS, labels: [:client, :api, :broker])
|
48
|
+
@api_response_size = Prometheus.registry.histogram(:api_response_size, docstring: 'Response size', buckets: SIZE_BUCKETS, labels: [:client, :api, :broker])
|
49
|
+
@api_errors = Prometheus.registry.counter(:api_errors, docstring: 'Errors', labels: [:client, :api, :broker])
|
50
50
|
end
|
51
51
|
|
52
52
|
def request(event)
|
@@ -58,34 +58,34 @@ module Kafka
|
|
58
58
|
request_size = event.payload.fetch(:request_size, 0)
|
59
59
|
response_size = event.payload.fetch(:response_size, 0)
|
60
60
|
|
61
|
-
@api_calls.increment(key)
|
62
|
-
@api_latency.observe(
|
63
|
-
@api_request_size.observe(
|
64
|
-
@api_response_size.observe(
|
65
|
-
@api_errors.increment(key) if event.payload.key?(:exception)
|
61
|
+
@api_calls.increment(labels: key)
|
62
|
+
@api_latency.observe(event.duration, labels: key)
|
63
|
+
@api_request_size.observe(request_size, labels: key)
|
64
|
+
@api_response_size.observe(response_size, labels: key)
|
65
|
+
@api_errors.increment(labels: key) if event.payload.key?(:exception)
|
66
66
|
end
|
67
67
|
end
|
68
68
|
|
69
69
|
class ConsumerSubscriber < ActiveSupport::Subscriber
|
70
70
|
def initialize
|
71
71
|
super
|
72
|
-
@process_messages = Prometheus.registry.counter(:consumer_process_messages, 'Total messages')
|
73
|
-
@process_message_errors = Prometheus.registry.counter(:consumer_process_message_errors, 'Total errors')
|
72
|
+
@process_messages = Prometheus.registry.counter(:consumer_process_messages, docstring: 'Total messages', labels: [:client, :group_id, :topic, :partition])
|
73
|
+
@process_message_errors = Prometheus.registry.counter(:consumer_process_message_errors, docstring: 'Total errors', labels: [:client, :group_id, :topic, :partition])
|
74
74
|
@process_message_latency =
|
75
|
-
Prometheus.registry.histogram(:consumer_process_message_latency, 'Latency',
|
76
|
-
@offset_lag = Prometheus.registry.gauge(:consumer_offset_lag, 'Offset lag')
|
77
|
-
@time_lag = Prometheus.registry.gauge(:consumer_time_lag, 'Time lag of message')
|
78
|
-
@process_batch_errors = Prometheus.registry.counter(:consumer_process_batch_errors, 'Total errors in batch')
|
75
|
+
Prometheus.registry.histogram(:consumer_process_message_latency, docstring: 'Latency', buckets: LATENCY_BUCKETS, labels: [:client, :group_id, :topic, :partition])
|
76
|
+
@offset_lag = Prometheus.registry.gauge(:consumer_offset_lag, docstring: 'Offset lag', labels: [:client, :group_id, :topic, :partition])
|
77
|
+
@time_lag = Prometheus.registry.gauge(:consumer_time_lag, docstring: 'Time lag of message', labels: [:client, :group_id, :topic, :partition])
|
78
|
+
@process_batch_errors = Prometheus.registry.counter(:consumer_process_batch_errors, docstring: 'Total errors in batch', labels: [:client, :group_id, :topic, :partition])
|
79
79
|
@process_batch_latency =
|
80
|
-
Prometheus.registry.histogram(:consumer_process_batch_latency, 'Latency in batch',
|
81
|
-
@batch_size = Prometheus.registry.histogram(:consumer_batch_size, 'Size of batch',
|
82
|
-
@join_group = Prometheus.registry.histogram(:consumer_join_group, 'Time to join group',
|
83
|
-
@join_group_errors = Prometheus.registry.counter(:consumer_join_group_errors, 'Total error in joining group')
|
84
|
-
@sync_group = Prometheus.registry.histogram(:consumer_sync_group, 'Time to sync group',
|
85
|
-
@sync_group_errors = Prometheus.registry.counter(:consumer_sync_group_errors, 'Total error in syncing group')
|
86
|
-
@leave_group = Prometheus.registry.histogram(:consumer_leave_group, 'Time to leave group',
|
87
|
-
@leave_group_errors = Prometheus.registry.counter(:consumer_leave_group_errors, 'Total error in leaving group')
|
88
|
-
@pause_duration = Prometheus.registry.gauge(:consumer_pause_duration, 'Pause duration')
|
80
|
+
Prometheus.registry.histogram(:consumer_process_batch_latency, docstring: 'Latency in batch', buckets: LATENCY_BUCKETS, labels: [:client, :group_id, :topic, :partition])
|
81
|
+
@batch_size = Prometheus.registry.histogram(:consumer_batch_size, docstring: 'Size of batch', buckets: SIZE_BUCKETS, labels: [:client, :group_id, :topic, :partition])
|
82
|
+
@join_group = Prometheus.registry.histogram(:consumer_join_group, docstring: 'Time to join group', buckets: DELAY_BUCKETS, labels: [:client, :group_id])
|
83
|
+
@join_group_errors = Prometheus.registry.counter(:consumer_join_group_errors, docstring: 'Total error in joining group', labels: [:client, :group_id])
|
84
|
+
@sync_group = Prometheus.registry.histogram(:consumer_sync_group, docstring: 'Time to sync group', buckets: DELAY_BUCKETS, labels: [:client, :group_id])
|
85
|
+
@sync_group_errors = Prometheus.registry.counter(:consumer_sync_group_errors, docstring: 'Total error in syncing group', labels: [:client, :group_id])
|
86
|
+
@leave_group = Prometheus.registry.histogram(:consumer_leave_group, docstring: 'Time to leave group', buckets: DELAY_BUCKETS, labels: [:client, :group_id])
|
87
|
+
@leave_group_errors = Prometheus.registry.counter(:consumer_leave_group_errors, docstring: 'Total error in leaving group', labels: [:client, :group_id])
|
88
|
+
@pause_duration = Prometheus.registry.gauge(:consumer_pause_duration, docstring: 'Pause duration', labels: [:client, :group_id, :topic, :partition])
|
89
89
|
end
|
90
90
|
|
91
91
|
def process_message(event)
|
@@ -102,18 +102,18 @@ module Kafka
|
|
102
102
|
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
103
103
|
|
104
104
|
if event.payload.key?(:exception)
|
105
|
-
@process_message_errors.increment(key)
|
105
|
+
@process_message_errors.increment(labels: key)
|
106
106
|
else
|
107
|
-
@process_message_latency.observe(
|
108
|
-
@process_messages.increment(key)
|
107
|
+
@process_message_latency.observe(event.duration, labels: key)
|
108
|
+
@process_messages.increment(labels: key)
|
109
109
|
end
|
110
110
|
|
111
|
-
@offset_lag.set(
|
111
|
+
@offset_lag.set(offset_lag, labels: key)
|
112
112
|
|
113
113
|
# Not all messages have timestamps.
|
114
114
|
return unless time_lag
|
115
115
|
|
116
|
-
@time_lag.set(
|
116
|
+
@time_lag.set(time_lag, labels: key)
|
117
117
|
end
|
118
118
|
|
119
119
|
def process_batch(event)
|
@@ -126,10 +126,10 @@ module Kafka
|
|
126
126
|
message_count = event.payload.fetch(:message_count)
|
127
127
|
|
128
128
|
if event.payload.key?(:exception)
|
129
|
-
@process_batch_errors.increment(key)
|
129
|
+
@process_batch_errors.increment(labels: key)
|
130
130
|
else
|
131
|
-
@process_batch_latency.observe(
|
132
|
-
@process_messages.increment(
|
131
|
+
@process_batch_latency.observe(event.duration, labels: key)
|
132
|
+
@process_messages.increment(by: message_count, labels: key)
|
133
133
|
end
|
134
134
|
end
|
135
135
|
|
@@ -143,29 +143,29 @@ module Kafka
|
|
143
143
|
offset_lag = event.payload.fetch(:offset_lag)
|
144
144
|
batch_size = event.payload.fetch(:message_count)
|
145
145
|
|
146
|
-
@batch_size.observe(
|
147
|
-
@offset_lag.set(
|
146
|
+
@batch_size.observe(batch_size, labels: key)
|
147
|
+
@offset_lag.set(offset_lag, labels: key)
|
148
148
|
end
|
149
149
|
|
150
150
|
def join_group(event)
|
151
151
|
key = { client: event.payload.fetch(:client_id), group_id: event.payload.fetch(:group_id) }
|
152
|
-
@join_group.observe(
|
152
|
+
@join_group.observe(event.duration, labels: key)
|
153
153
|
|
154
|
-
@join_group_errors.increment(key) if event.payload.key?(:exception)
|
154
|
+
@join_group_errors.increment(labels: key) if event.payload.key?(:exception)
|
155
155
|
end
|
156
156
|
|
157
157
|
def sync_group(event)
|
158
158
|
key = { client: event.payload.fetch(:client_id), group_id: event.payload.fetch(:group_id) }
|
159
|
-
@sync_group.observe(
|
159
|
+
@sync_group.observe(event.duration, labels: key)
|
160
160
|
|
161
|
-
@sync_group_errors.increment(key) if event.payload.key?(:exception)
|
161
|
+
@sync_group_errors.increment(labels: key) if event.payload.key?(:exception)
|
162
162
|
end
|
163
163
|
|
164
164
|
def leave_group(event)
|
165
165
|
key = { client: event.payload.fetch(:client_id), group_id: event.payload.fetch(:group_id) }
|
166
|
-
@leave_group.observe(
|
166
|
+
@leave_group.observe(event.duration, labels: key)
|
167
167
|
|
168
|
-
@leave_group_errors.increment(key) if event.payload.key?(:exception)
|
168
|
+
@leave_group_errors.increment(labels: key) if event.payload.key?(:exception)
|
169
169
|
end
|
170
170
|
|
171
171
|
def pause_status(event)
|
@@ -177,28 +177,28 @@ module Kafka
|
|
177
177
|
}
|
178
178
|
|
179
179
|
duration = event.payload.fetch(:duration)
|
180
|
-
@pause_duration.set(
|
180
|
+
@pause_duration.set(duration, labels: key)
|
181
181
|
end
|
182
182
|
end
|
183
183
|
|
184
184
|
class ProducerSubscriber < ActiveSupport::Subscriber
|
185
185
|
def initialize
|
186
186
|
super
|
187
|
-
@produce_messages = Prometheus.registry.counter(:producer_produced_messages, 'Produced messages total')
|
187
|
+
@produce_messages = Prometheus.registry.counter(:producer_produced_messages, docstring: 'Produced messages total', labels: [:client, :topic])
|
188
188
|
@produce_message_size =
|
189
|
-
Prometheus.registry.histogram(:producer_message_size, 'Message size',
|
190
|
-
@buffer_size = Prometheus.registry.histogram(:producer_buffer_size, 'Buffer size',
|
191
|
-
@buffer_fill_ratio = Prometheus.registry.histogram(:producer_buffer_fill_ratio, 'Buffer fill ratio')
|
192
|
-
@buffer_fill_percentage = Prometheus.registry.histogram(:producer_buffer_fill_percentage, 'Buffer fill percentage')
|
193
|
-
@produce_errors = Prometheus.registry.counter(:producer_produce_errors, 'Produce errors')
|
194
|
-
@deliver_errors = Prometheus.registry.counter(:producer_deliver_errors, 'Deliver error')
|
189
|
+
Prometheus.registry.histogram(:producer_message_size, docstring: 'Message size', buckets: SIZE_BUCKETS, labels: [:client, :topic])
|
190
|
+
@buffer_size = Prometheus.registry.histogram(:producer_buffer_size, docstring: 'Buffer size', buckets: SIZE_BUCKETS, labels: [:client])
|
191
|
+
@buffer_fill_ratio = Prometheus.registry.histogram(:producer_buffer_fill_ratio, docstring: 'Buffer fill ratio', labels: [:client])
|
192
|
+
@buffer_fill_percentage = Prometheus.registry.histogram(:producer_buffer_fill_percentage, docstring: 'Buffer fill percentage', labels: [:client])
|
193
|
+
@produce_errors = Prometheus.registry.counter(:producer_produce_errors, docstring: 'Produce errors', labels: [:client, :topic])
|
194
|
+
@deliver_errors = Prometheus.registry.counter(:producer_deliver_errors, docstring: 'Deliver error', labels: [:client])
|
195
195
|
@deliver_latency =
|
196
|
-
Prometheus.registry.histogram(:producer_deliver_latency, 'Delivery latency',
|
197
|
-
@deliver_messages = Prometheus.registry.counter(:producer_deliver_messages, 'Total count of delivered messages')
|
198
|
-
@deliver_attempts = Prometheus.registry.histogram(:producer_deliver_attempts, 'Delivery attempts')
|
199
|
-
@ack_messages = Prometheus.registry.counter(:producer_ack_messages, 'Ack')
|
200
|
-
@ack_delay = Prometheus.registry.histogram(:producer_ack_delay, 'Ack delay',
|
201
|
-
@ack_errors = Prometheus.registry.counter(:producer_ack_errors, 'Ack errors')
|
196
|
+
Prometheus.registry.histogram(:producer_deliver_latency, docstring: 'Delivery latency', buckets: LATENCY_BUCKETS, labels: [:client])
|
197
|
+
@deliver_messages = Prometheus.registry.counter(:producer_deliver_messages, docstring: 'Total count of delivered messages', labels: [:client])
|
198
|
+
@deliver_attempts = Prometheus.registry.histogram(:producer_deliver_attempts, docstring: 'Delivery attempts', labels: [:client])
|
199
|
+
@ack_messages = Prometheus.registry.counter(:producer_ack_messages, docstring: 'Ack', labels: [:client, :topic])
|
200
|
+
@ack_delay = Prometheus.registry.histogram(:producer_ack_delay, docstring: 'Ack delay', buckets: LATENCY_BUCKETS, labels: [:client, :topic])
|
201
|
+
@ack_errors = Prometheus.registry.counter(:producer_ack_errors, docstring: 'Ack errors', labels: [:client, :topic])
|
202
202
|
end
|
203
203
|
|
204
204
|
def produce_message(event)
|
@@ -212,20 +212,20 @@ module Kafka
|
|
212
212
|
buffer_fill_percentage = buffer_fill_ratio * 100.0
|
213
213
|
|
214
214
|
# This gets us the write rate.
|
215
|
-
@produce_messages.increment(key)
|
216
|
-
@produce_message_size.observe(
|
215
|
+
@produce_messages.increment(labels: key)
|
216
|
+
@produce_message_size.observe(message_size, labels: key)
|
217
217
|
|
218
218
|
# This gets us the avg/max buffer size per producer.
|
219
|
-
@buffer_size.observe({ client: client }
|
219
|
+
@buffer_size.observe(buffer_size, labels: { client: client })
|
220
220
|
|
221
221
|
# This gets us the avg/max buffer fill ratio per producer.
|
222
|
-
@buffer_fill_ratio.observe({ client: client }
|
223
|
-
@buffer_fill_percentage.observe({ client: client }
|
222
|
+
@buffer_fill_ratio.observe(buffer_fill_ratio, labels: { client: client })
|
223
|
+
@buffer_fill_percentage.observe(buffer_fill_percentage, labels: { client: client })
|
224
224
|
end
|
225
225
|
|
226
226
|
def buffer_overflow(event)
|
227
227
|
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
228
|
-
@produce_errors.increment(key)
|
228
|
+
@produce_errors.increment(labels: key)
|
229
229
|
end
|
230
230
|
|
231
231
|
def deliver_messages(event)
|
@@ -233,40 +233,40 @@ module Kafka
|
|
233
233
|
message_count = event.payload.fetch(:delivered_message_count)
|
234
234
|
attempts = event.payload.fetch(:attempts)
|
235
235
|
|
236
|
-
@deliver_errors.increment(key) if event.payload.key?(:exception)
|
237
|
-
@deliver_latency.observe(
|
236
|
+
@deliver_errors.increment(labels: key) if event.payload.key?(:exception)
|
237
|
+
@deliver_latency.observe(event.duration, labels: key)
|
238
238
|
|
239
239
|
# Messages delivered to Kafka:
|
240
|
-
@deliver_messages.increment(
|
240
|
+
@deliver_messages.increment(by: message_count, labels: key)
|
241
241
|
|
242
242
|
# Number of attempts to deliver messages:
|
243
|
-
@deliver_attempts.observe(
|
243
|
+
@deliver_attempts.observe(attempts, labels: key)
|
244
244
|
end
|
245
245
|
|
246
246
|
def ack_message(event)
|
247
247
|
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
248
248
|
|
249
249
|
# Number of messages ACK'd for the topic.
|
250
|
-
@ack_messages.increment(key)
|
250
|
+
@ack_messages.increment(labels: key)
|
251
251
|
|
252
252
|
# Histogram of delay between a message being produced and it being ACK'd.
|
253
|
-
@ack_delay.observe(
|
253
|
+
@ack_delay.observe(event.payload.fetch(:delay), labels: key)
|
254
254
|
end
|
255
255
|
|
256
256
|
def topic_error(event)
|
257
257
|
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
258
258
|
|
259
|
-
@ack_errors.increment(key)
|
259
|
+
@ack_errors.increment(labels: key)
|
260
260
|
end
|
261
261
|
end
|
262
262
|
|
263
263
|
class AsyncProducerSubscriber < ActiveSupport::Subscriber
|
264
264
|
def initialize
|
265
265
|
super
|
266
|
-
@queue_size = Prometheus.registry.histogram(:async_producer_queue_size, 'Queue size',
|
267
|
-
@queue_fill_ratio = Prometheus.registry.histogram(:async_producer_queue_fill_ratio, 'Queue fill ratio')
|
268
|
-
@produce_errors = Prometheus.registry.counter(:async_producer_produce_errors, 'Producer errors')
|
269
|
-
@dropped_messages = Prometheus.registry.counter(:async_producer_dropped_messages, 'Dropped messages')
|
266
|
+
@queue_size = Prometheus.registry.histogram(:async_producer_queue_size, docstring: 'Queue size', buckets: SIZE_BUCKETS, labels: [:client, :topic])
|
267
|
+
@queue_fill_ratio = Prometheus.registry.histogram(:async_producer_queue_fill_ratio, docstring: 'Queue fill ratio', labels: [:client, :topic])
|
268
|
+
@produce_errors = Prometheus.registry.counter(:async_producer_produce_errors, docstring: 'Producer errors', labels: [:client, :topic])
|
269
|
+
@dropped_messages = Prometheus.registry.counter(:async_producer_dropped_messages, docstring: 'Dropped messages', labels: [:client])
|
270
270
|
end
|
271
271
|
|
272
272
|
def enqueue_message(event)
|
@@ -277,29 +277,28 @@ module Kafka
|
|
277
277
|
queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
|
278
278
|
|
279
279
|
# This gets us the avg/max queue size per producer.
|
280
|
-
@queue_size.observe(
|
280
|
+
@queue_size.observe(queue_size, labels: key)
|
281
281
|
|
282
282
|
# This gets us the avg/max queue fill ratio per producer.
|
283
|
-
@queue_fill_ratio.observe(
|
283
|
+
@queue_fill_ratio.observe(queue_fill_ratio, labels: key)
|
284
284
|
end
|
285
285
|
|
286
286
|
def buffer_overflow(event)
|
287
287
|
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
288
|
-
@produce_errors.increment(key)
|
288
|
+
@produce_errors.increment(labels: key)
|
289
289
|
end
|
290
290
|
|
291
291
|
def drop_messages(event)
|
292
292
|
key = { client: event.payload.fetch(:client_id) }
|
293
293
|
message_count = event.payload.fetch(:message_count)
|
294
|
-
|
295
|
-
@dropped_messages.increment(key, message_count)
|
294
|
+
@dropped_messages.increment(by: message_count, labels: key)
|
296
295
|
end
|
297
296
|
end
|
298
297
|
|
299
298
|
class FetcherSubscriber < ActiveSupport::Subscriber
|
300
299
|
def initialize
|
301
300
|
super
|
302
|
-
@queue_size = Prometheus.registry.gauge(:fetcher_queue_size, 'Queue size')
|
301
|
+
@queue_size = Prometheus.registry.gauge(:fetcher_queue_size, docstring: 'Queue size', labels: [:client, :group_id])
|
303
302
|
end
|
304
303
|
|
305
304
|
def loop(event)
|
@@ -307,7 +306,7 @@ module Kafka
|
|
307
306
|
client = event.payload.fetch(:client_id)
|
308
307
|
group_id = event.payload.fetch(:group_id)
|
309
308
|
|
310
|
-
@queue_size.set({ client: client, group_id: group_id }
|
309
|
+
@queue_size.set(queue_size, labels: { client: client, group_id: group_id })
|
311
310
|
end
|
312
311
|
end
|
313
312
|
end
|
@@ -7,13 +7,14 @@ module Kafka
|
|
7
7
|
class JoinGroupRequest
|
8
8
|
PROTOCOL_TYPE = "consumer"
|
9
9
|
|
10
|
-
def initialize(group_id:, session_timeout:, member_id:, topics: [])
|
10
|
+
def initialize(group_id:, session_timeout:, rebalance_timeout:, member_id:, topics: [])
|
11
11
|
@group_id = group_id
|
12
12
|
@session_timeout = session_timeout * 1000 # Kafka wants ms.
|
13
|
+
@rebalance_timeout = rebalance_timeout * 1000 # Kafka wants ms.
|
13
14
|
@member_id = member_id || ""
|
14
15
|
@protocol_type = PROTOCOL_TYPE
|
15
16
|
@group_protocols = {
|
16
|
-
"
|
17
|
+
"roundrobin" => ConsumerGroupProtocol.new(topics: topics),
|
17
18
|
}
|
18
19
|
end
|
19
20
|
|
@@ -21,6 +22,10 @@ module Kafka
|
|
21
22
|
JOIN_GROUP_API
|
22
23
|
end
|
23
24
|
|
25
|
+
def api_version
|
26
|
+
1
|
27
|
+
end
|
28
|
+
|
24
29
|
def response_class
|
25
30
|
JoinGroupResponse
|
26
31
|
end
|
@@ -28,6 +33,7 @@ module Kafka
|
|
28
33
|
def encode(encoder)
|
29
34
|
encoder.write_string(@group_id)
|
30
35
|
encoder.write_int32(@session_timeout)
|
36
|
+
encoder.write_int32(@rebalance_timeout)
|
31
37
|
encoder.write_string(@member_id)
|
32
38
|
encoder.write_string(@protocol_type)
|
33
39
|
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'bigdecimal'
|
1
2
|
require 'digest/crc32'
|
2
3
|
require 'kafka/protocol/record'
|
3
4
|
|
@@ -131,7 +132,7 @@ module Kafka
|
|
131
132
|
|
132
133
|
records.each_with_index do |record, index|
|
133
134
|
record.offset_delta = index
|
134
|
-
record.timestamp_delta = (record.create_time - first_timestamp).to_i
|
135
|
+
record.timestamp_delta = ((record.create_time - first_timestamp) * 1000).to_i
|
135
136
|
end
|
136
137
|
@last_offset_delta = records.length - 1
|
137
138
|
end
|
@@ -167,8 +168,8 @@ module Kafka
|
|
167
168
|
log_append_time = (attributes & TIMESTAMP_TYPE_MASK) != 0
|
168
169
|
|
169
170
|
last_offset_delta = record_batch_decoder.int32
|
170
|
-
first_timestamp = Time.at(record_batch_decoder.int64 / 1000)
|
171
|
-
max_timestamp = Time.at(record_batch_decoder.int64 / 1000)
|
171
|
+
first_timestamp = Time.at(record_batch_decoder.int64 / BigDecimal(1000))
|
172
|
+
max_timestamp = Time.at(record_batch_decoder.int64 / BigDecimal(1000))
|
172
173
|
|
173
174
|
producer_id = record_batch_decoder.int64
|
174
175
|
producer_epoch = record_batch_decoder.int16
|
@@ -188,7 +189,7 @@ module Kafka
|
|
188
189
|
until records_array_decoder.eof?
|
189
190
|
record = Record.decode(records_array_decoder)
|
190
191
|
record.offset = first_offset + record.offset_delta
|
191
|
-
record.create_time = log_append_time && max_timestamp ? max_timestamp : first_timestamp + record.timestamp_delta
|
192
|
+
record.create_time = log_append_time && max_timestamp ? max_timestamp : first_timestamp + record.timestamp_delta / BigDecimal(1000)
|
192
193
|
records_array << record
|
193
194
|
end
|
194
195
|
|