ruby-kafka-temp-fork 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/config.yml +393 -0
- data/.github/workflows/stale.yml +19 -0
- data/.gitignore +13 -0
- data/.readygo +1 -0
- data/.rspec +3 -0
- data/.rubocop.yml +44 -0
- data/.ruby-version +1 -0
- data/.yardopts +3 -0
- data/CHANGELOG.md +310 -0
- data/Gemfile +5 -0
- data/ISSUE_TEMPLATE.md +23 -0
- data/LICENSE.txt +176 -0
- data/Procfile +2 -0
- data/README.md +1342 -0
- data/Rakefile +8 -0
- data/benchmarks/message_encoding.rb +23 -0
- data/bin/console +8 -0
- data/bin/setup +5 -0
- data/docker-compose.yml +39 -0
- data/examples/consumer-group.rb +35 -0
- data/examples/firehose-consumer.rb +64 -0
- data/examples/firehose-producer.rb +54 -0
- data/examples/simple-consumer.rb +34 -0
- data/examples/simple-producer.rb +42 -0
- data/examples/ssl-producer.rb +44 -0
- data/lib/kafka.rb +373 -0
- data/lib/kafka/async_producer.rb +291 -0
- data/lib/kafka/broker.rb +217 -0
- data/lib/kafka/broker_info.rb +16 -0
- data/lib/kafka/broker_pool.rb +41 -0
- data/lib/kafka/broker_uri.rb +43 -0
- data/lib/kafka/client.rb +833 -0
- data/lib/kafka/cluster.rb +513 -0
- data/lib/kafka/compression.rb +45 -0
- data/lib/kafka/compressor.rb +86 -0
- data/lib/kafka/connection.rb +223 -0
- data/lib/kafka/connection_builder.rb +33 -0
- data/lib/kafka/consumer.rb +642 -0
- data/lib/kafka/consumer_group.rb +231 -0
- data/lib/kafka/consumer_group/assignor.rb +63 -0
- data/lib/kafka/crc32_hash.rb +15 -0
- data/lib/kafka/datadog.rb +420 -0
- data/lib/kafka/digest.rb +22 -0
- data/lib/kafka/fetch_operation.rb +115 -0
- data/lib/kafka/fetched_batch.rb +58 -0
- data/lib/kafka/fetched_batch_generator.rb +120 -0
- data/lib/kafka/fetched_message.rb +48 -0
- data/lib/kafka/fetched_offset_resolver.rb +48 -0
- data/lib/kafka/fetcher.rb +224 -0
- data/lib/kafka/gzip_codec.rb +34 -0
- data/lib/kafka/heartbeat.rb +25 -0
- data/lib/kafka/instrumenter.rb +38 -0
- data/lib/kafka/interceptors.rb +33 -0
- data/lib/kafka/lz4_codec.rb +27 -0
- data/lib/kafka/message_buffer.rb +87 -0
- data/lib/kafka/murmur2_hash.rb +17 -0
- data/lib/kafka/offset_manager.rb +259 -0
- data/lib/kafka/partitioner.rb +40 -0
- data/lib/kafka/pause.rb +92 -0
- data/lib/kafka/pending_message.rb +29 -0
- data/lib/kafka/pending_message_queue.rb +41 -0
- data/lib/kafka/produce_operation.rb +205 -0
- data/lib/kafka/producer.rb +528 -0
- data/lib/kafka/prometheus.rb +316 -0
- data/lib/kafka/protocol.rb +225 -0
- data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
- data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
- data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
- data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
- data/lib/kafka/protocol/alter_configs_request.rb +44 -0
- data/lib/kafka/protocol/alter_configs_response.rb +49 -0
- data/lib/kafka/protocol/api_versions_request.rb +21 -0
- data/lib/kafka/protocol/api_versions_response.rb +53 -0
- data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
- data/lib/kafka/protocol/create_partitions_request.rb +42 -0
- data/lib/kafka/protocol/create_partitions_response.rb +28 -0
- data/lib/kafka/protocol/create_topics_request.rb +45 -0
- data/lib/kafka/protocol/create_topics_response.rb +26 -0
- data/lib/kafka/protocol/decoder.rb +175 -0
- data/lib/kafka/protocol/delete_topics_request.rb +33 -0
- data/lib/kafka/protocol/delete_topics_response.rb +26 -0
- data/lib/kafka/protocol/describe_configs_request.rb +35 -0
- data/lib/kafka/protocol/describe_configs_response.rb +73 -0
- data/lib/kafka/protocol/describe_groups_request.rb +27 -0
- data/lib/kafka/protocol/describe_groups_response.rb +73 -0
- data/lib/kafka/protocol/encoder.rb +184 -0
- data/lib/kafka/protocol/end_txn_request.rb +29 -0
- data/lib/kafka/protocol/end_txn_response.rb +19 -0
- data/lib/kafka/protocol/fetch_request.rb +70 -0
- data/lib/kafka/protocol/fetch_response.rb +136 -0
- data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
- data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
- data/lib/kafka/protocol/heartbeat_request.rb +27 -0
- data/lib/kafka/protocol/heartbeat_response.rb +17 -0
- data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
- data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
- data/lib/kafka/protocol/join_group_request.rb +47 -0
- data/lib/kafka/protocol/join_group_response.rb +41 -0
- data/lib/kafka/protocol/leave_group_request.rb +25 -0
- data/lib/kafka/protocol/leave_group_response.rb +17 -0
- data/lib/kafka/protocol/list_groups_request.rb +23 -0
- data/lib/kafka/protocol/list_groups_response.rb +35 -0
- data/lib/kafka/protocol/list_offset_request.rb +53 -0
- data/lib/kafka/protocol/list_offset_response.rb +89 -0
- data/lib/kafka/protocol/member_assignment.rb +42 -0
- data/lib/kafka/protocol/message.rb +172 -0
- data/lib/kafka/protocol/message_set.rb +55 -0
- data/lib/kafka/protocol/metadata_request.rb +31 -0
- data/lib/kafka/protocol/metadata_response.rb +185 -0
- data/lib/kafka/protocol/offset_commit_request.rb +47 -0
- data/lib/kafka/protocol/offset_commit_response.rb +29 -0
- data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
- data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
- data/lib/kafka/protocol/produce_request.rb +94 -0
- data/lib/kafka/protocol/produce_response.rb +63 -0
- data/lib/kafka/protocol/record.rb +88 -0
- data/lib/kafka/protocol/record_batch.rb +223 -0
- data/lib/kafka/protocol/request_message.rb +26 -0
- data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
- data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
- data/lib/kafka/protocol/sync_group_request.rb +33 -0
- data/lib/kafka/protocol/sync_group_response.rb +26 -0
- data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
- data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
- data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
- data/lib/kafka/sasl/gssapi.rb +76 -0
- data/lib/kafka/sasl/oauth.rb +64 -0
- data/lib/kafka/sasl/plain.rb +39 -0
- data/lib/kafka/sasl/scram.rb +180 -0
- data/lib/kafka/sasl_authenticator.rb +61 -0
- data/lib/kafka/snappy_codec.rb +29 -0
- data/lib/kafka/socket_with_timeout.rb +96 -0
- data/lib/kafka/ssl_context.rb +66 -0
- data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
- data/lib/kafka/statsd.rb +296 -0
- data/lib/kafka/tagged_logger.rb +77 -0
- data/lib/kafka/transaction_manager.rb +306 -0
- data/lib/kafka/transaction_state_machine.rb +72 -0
- data/lib/kafka/version.rb +5 -0
- data/lib/kafka/zstd_codec.rb +27 -0
- data/lib/ruby-kafka-temp-fork.rb +5 -0
- data/ruby-kafka-temp-fork.gemspec +54 -0
- metadata +520 -0
@@ -0,0 +1,316 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Subscriber to ruby_kafka to report metrics to prometheus
|
5
|
+
#
|
6
|
+
# Usage:
|
7
|
+
# require "kafka/prometheus"
|
8
|
+
#
|
9
|
+
# Once the file has been required, no further configuration is needed, all operational
|
10
|
+
# metrics are automatically emitted (Unless PROMETHEUS_NO_AUTO_START is set).
|
11
|
+
#
|
12
|
+
# By Peter Mustel, T2 Data AB
|
13
|
+
#
|
14
|
+
begin
|
15
|
+
require 'prometheus/client'
|
16
|
+
rescue LoadError
|
17
|
+
warn 'In order to report Kafka client metrics to Prometheus you need to install the `prometheus-client` gem.'
|
18
|
+
raise
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'active_support/subscriber'
|
22
|
+
|
23
|
+
module Kafka
|
24
|
+
module Prometheus
|
25
|
+
SIZE_BUCKETS = [1, 10, 100, 1000, 10_000, 100_000, 1_000_000].freeze
|
26
|
+
LATENCY_BUCKETS = [0.0001, 0.001, 0.01, 0.1, 1.0, 10, 100, 1000].freeze
|
27
|
+
DELAY_BUCKETS = [1, 3, 10, 30, 100, 300, 1000, 3000, 10_000, 30_000].freeze
|
28
|
+
|
29
|
+
class << self
|
30
|
+
attr_accessor :registry
|
31
|
+
|
32
|
+
def start(registry = ::Prometheus::Client.registry)
|
33
|
+
@registry = registry
|
34
|
+
ConnectionSubscriber.attach_to 'connection.kafka'
|
35
|
+
ConsumerSubscriber.attach_to 'consumer.kafka'
|
36
|
+
ProducerSubscriber.attach_to 'producer.kafka'
|
37
|
+
AsyncProducerSubscriber.attach_to 'async_producer.kafka'
|
38
|
+
FetcherSubscriber.attach_to 'fetcher.kafka'
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
class ConnectionSubscriber < ActiveSupport::Subscriber
|
43
|
+
def initialize
|
44
|
+
super
|
45
|
+
@api_calls = Prometheus.registry.counter(:api_calls, docstring: 'Total calls', labels: [:client, :api, :broker])
|
46
|
+
@api_latency = Prometheus.registry.histogram(:api_latency, docstring: 'Latency', buckets: LATENCY_BUCKETS, labels: [:client, :api, :broker])
|
47
|
+
@api_request_size = Prometheus.registry.histogram(:api_request_size, docstring: 'Request size', buckets: SIZE_BUCKETS, labels: [:client, :api, :broker])
|
48
|
+
@api_response_size = Prometheus.registry.histogram(:api_response_size, docstring: 'Response size', buckets: SIZE_BUCKETS, labels: [:client, :api, :broker])
|
49
|
+
@api_errors = Prometheus.registry.counter(:api_errors, docstring: 'Errors', labels: [:client, :api, :broker])
|
50
|
+
end
|
51
|
+
|
52
|
+
def request(event)
|
53
|
+
key = {
|
54
|
+
client: event.payload.fetch(:client_id),
|
55
|
+
api: event.payload.fetch(:api, 'unknown'),
|
56
|
+
broker: event.payload.fetch(:broker_host)
|
57
|
+
}
|
58
|
+
request_size = event.payload.fetch(:request_size, 0)
|
59
|
+
response_size = event.payload.fetch(:response_size, 0)
|
60
|
+
|
61
|
+
@api_calls.increment(labels: key)
|
62
|
+
@api_latency.observe(event.duration, labels: key)
|
63
|
+
@api_request_size.observe(request_size, labels: key)
|
64
|
+
@api_response_size.observe(response_size, labels: key)
|
65
|
+
@api_errors.increment(labels: key) if event.payload.key?(:exception)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
class ConsumerSubscriber < ActiveSupport::Subscriber
|
70
|
+
def initialize
|
71
|
+
super
|
72
|
+
@process_messages = Prometheus.registry.counter(:consumer_process_messages, docstring: 'Total messages', labels: [:client, :group_id, :topic, :partition])
|
73
|
+
@process_message_errors = Prometheus.registry.counter(:consumer_process_message_errors, docstring: 'Total errors', labels: [:client, :group_id, :topic, :partition])
|
74
|
+
@process_message_latency =
|
75
|
+
Prometheus.registry.histogram(:consumer_process_message_latency, docstring: 'Latency', buckets: LATENCY_BUCKETS, labels: [:client, :group_id, :topic, :partition])
|
76
|
+
@offset_lag = Prometheus.registry.gauge(:consumer_offset_lag, docstring: 'Offset lag', labels: [:client, :group_id, :topic, :partition])
|
77
|
+
@time_lag = Prometheus.registry.gauge(:consumer_time_lag, docstring: 'Time lag of message', labels: [:client, :group_id, :topic, :partition])
|
78
|
+
@process_batch_errors = Prometheus.registry.counter(:consumer_process_batch_errors, docstring: 'Total errors in batch', labels: [:client, :group_id, :topic, :partition])
|
79
|
+
@process_batch_latency =
|
80
|
+
Prometheus.registry.histogram(:consumer_process_batch_latency, docstring: 'Latency in batch', buckets: LATENCY_BUCKETS, labels: [:client, :group_id, :topic, :partition])
|
81
|
+
@batch_size = Prometheus.registry.histogram(:consumer_batch_size, docstring: 'Size of batch', buckets: SIZE_BUCKETS, labels: [:client, :group_id, :topic, :partition])
|
82
|
+
@join_group = Prometheus.registry.histogram(:consumer_join_group, docstring: 'Time to join group', buckets: DELAY_BUCKETS, labels: [:client, :group_id])
|
83
|
+
@join_group_errors = Prometheus.registry.counter(:consumer_join_group_errors, docstring: 'Total error in joining group', labels: [:client, :group_id])
|
84
|
+
@sync_group = Prometheus.registry.histogram(:consumer_sync_group, docstring: 'Time to sync group', buckets: DELAY_BUCKETS, labels: [:client, :group_id])
|
85
|
+
@sync_group_errors = Prometheus.registry.counter(:consumer_sync_group_errors, docstring: 'Total error in syncing group', labels: [:client, :group_id])
|
86
|
+
@leave_group = Prometheus.registry.histogram(:consumer_leave_group, docstring: 'Time to leave group', buckets: DELAY_BUCKETS, labels: [:client, :group_id])
|
87
|
+
@leave_group_errors = Prometheus.registry.counter(:consumer_leave_group_errors, docstring: 'Total error in leaving group', labels: [:client, :group_id])
|
88
|
+
@pause_duration = Prometheus.registry.gauge(:consumer_pause_duration, docstring: 'Pause duration', labels: [:client, :group_id, :topic, :partition])
|
89
|
+
end
|
90
|
+
|
91
|
+
def process_message(event)
|
92
|
+
key = {
|
93
|
+
client: event.payload.fetch(:client_id),
|
94
|
+
group_id: event.payload.fetch(:group_id),
|
95
|
+
topic: event.payload.fetch(:topic),
|
96
|
+
partition: event.payload.fetch(:partition)
|
97
|
+
}
|
98
|
+
|
99
|
+
offset_lag = event.payload.fetch(:offset_lag)
|
100
|
+
create_time = event.payload.fetch(:create_time)
|
101
|
+
|
102
|
+
time_lag = create_time && ((Time.now - create_time) * 1000).to_i
|
103
|
+
|
104
|
+
if event.payload.key?(:exception)
|
105
|
+
@process_message_errors.increment(labels: key)
|
106
|
+
else
|
107
|
+
@process_message_latency.observe(event.duration, labels: key)
|
108
|
+
@process_messages.increment(labels: key)
|
109
|
+
end
|
110
|
+
|
111
|
+
@offset_lag.set(offset_lag, labels: key)
|
112
|
+
|
113
|
+
# Not all messages have timestamps.
|
114
|
+
return unless time_lag
|
115
|
+
|
116
|
+
@time_lag.set(time_lag, labels: key)
|
117
|
+
end
|
118
|
+
|
119
|
+
def process_batch(event)
|
120
|
+
key = {
|
121
|
+
client: event.payload.fetch(:client_id),
|
122
|
+
group_id: event.payload.fetch(:group_id),
|
123
|
+
topic: event.payload.fetch(:topic),
|
124
|
+
partition: event.payload.fetch(:partition)
|
125
|
+
}
|
126
|
+
message_count = event.payload.fetch(:message_count)
|
127
|
+
|
128
|
+
if event.payload.key?(:exception)
|
129
|
+
@process_batch_errors.increment(labels: key)
|
130
|
+
else
|
131
|
+
@process_batch_latency.observe(event.duration, labels: key)
|
132
|
+
@process_messages.increment(by: message_count, labels: key)
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def fetch_batch(event)
|
137
|
+
key = {
|
138
|
+
client: event.payload.fetch(:client_id),
|
139
|
+
group_id: event.payload.fetch(:group_id),
|
140
|
+
topic: event.payload.fetch(:topic),
|
141
|
+
partition: event.payload.fetch(:partition)
|
142
|
+
}
|
143
|
+
offset_lag = event.payload.fetch(:offset_lag)
|
144
|
+
batch_size = event.payload.fetch(:message_count)
|
145
|
+
|
146
|
+
@batch_size.observe(batch_size, labels: key)
|
147
|
+
@offset_lag.set(offset_lag, labels: key)
|
148
|
+
end
|
149
|
+
|
150
|
+
def join_group(event)
|
151
|
+
key = { client: event.payload.fetch(:client_id), group_id: event.payload.fetch(:group_id) }
|
152
|
+
@join_group.observe(event.duration, labels: key)
|
153
|
+
|
154
|
+
@join_group_errors.increment(labels: key) if event.payload.key?(:exception)
|
155
|
+
end
|
156
|
+
|
157
|
+
def sync_group(event)
|
158
|
+
key = { client: event.payload.fetch(:client_id), group_id: event.payload.fetch(:group_id) }
|
159
|
+
@sync_group.observe(event.duration, labels: key)
|
160
|
+
|
161
|
+
@sync_group_errors.increment(labels: key) if event.payload.key?(:exception)
|
162
|
+
end
|
163
|
+
|
164
|
+
def leave_group(event)
|
165
|
+
key = { client: event.payload.fetch(:client_id), group_id: event.payload.fetch(:group_id) }
|
166
|
+
@leave_group.observe(event.duration, labels: key)
|
167
|
+
|
168
|
+
@leave_group_errors.increment(labels: key) if event.payload.key?(:exception)
|
169
|
+
end
|
170
|
+
|
171
|
+
def pause_status(event)
|
172
|
+
key = {
|
173
|
+
client: event.payload.fetch(:client_id),
|
174
|
+
group_id: event.payload.fetch(:group_id),
|
175
|
+
topic: event.payload.fetch(:topic),
|
176
|
+
partition: event.payload.fetch(:partition)
|
177
|
+
}
|
178
|
+
|
179
|
+
duration = event.payload.fetch(:duration)
|
180
|
+
@pause_duration.set(duration, labels: key)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
class ProducerSubscriber < ActiveSupport::Subscriber
|
185
|
+
def initialize
|
186
|
+
super
|
187
|
+
@produce_messages = Prometheus.registry.counter(:producer_produced_messages, docstring: 'Produced messages total', labels: [:client, :topic])
|
188
|
+
@produce_message_size =
|
189
|
+
Prometheus.registry.histogram(:producer_message_size, docstring: 'Message size', buckets: SIZE_BUCKETS, labels: [:client, :topic])
|
190
|
+
@buffer_size = Prometheus.registry.histogram(:producer_buffer_size, docstring: 'Buffer size', buckets: SIZE_BUCKETS, labels: [:client])
|
191
|
+
@buffer_fill_ratio = Prometheus.registry.histogram(:producer_buffer_fill_ratio, docstring: 'Buffer fill ratio', labels: [:client])
|
192
|
+
@buffer_fill_percentage = Prometheus.registry.histogram(:producer_buffer_fill_percentage, docstring: 'Buffer fill percentage', labels: [:client])
|
193
|
+
@produce_errors = Prometheus.registry.counter(:producer_produce_errors, docstring: 'Produce errors', labels: [:client, :topic])
|
194
|
+
@deliver_errors = Prometheus.registry.counter(:producer_deliver_errors, docstring: 'Deliver error', labels: [:client])
|
195
|
+
@deliver_latency =
|
196
|
+
Prometheus.registry.histogram(:producer_deliver_latency, docstring: 'Delivery latency', buckets: LATENCY_BUCKETS, labels: [:client])
|
197
|
+
@deliver_messages = Prometheus.registry.counter(:producer_deliver_messages, docstring: 'Total count of delivered messages', labels: [:client])
|
198
|
+
@deliver_attempts = Prometheus.registry.histogram(:producer_deliver_attempts, docstring: 'Delivery attempts', labels: [:client])
|
199
|
+
@ack_messages = Prometheus.registry.counter(:producer_ack_messages, docstring: 'Ack', labels: [:client, :topic])
|
200
|
+
@ack_delay = Prometheus.registry.histogram(:producer_ack_delay, docstring: 'Ack delay', buckets: LATENCY_BUCKETS, labels: [:client, :topic])
|
201
|
+
@ack_errors = Prometheus.registry.counter(:producer_ack_errors, docstring: 'Ack errors', labels: [:client, :topic])
|
202
|
+
end
|
203
|
+
|
204
|
+
def produce_message(event)
|
205
|
+
client = event.payload.fetch(:client_id)
|
206
|
+
key = { client: client, topic: event.payload.fetch(:topic) }
|
207
|
+
|
208
|
+
message_size = event.payload.fetch(:message_size)
|
209
|
+
buffer_size = event.payload.fetch(:buffer_size)
|
210
|
+
max_buffer_size = event.payload.fetch(:max_buffer_size)
|
211
|
+
buffer_fill_ratio = buffer_size.to_f / max_buffer_size.to_f
|
212
|
+
buffer_fill_percentage = buffer_fill_ratio * 100.0
|
213
|
+
|
214
|
+
# This gets us the write rate.
|
215
|
+
@produce_messages.increment(labels: key)
|
216
|
+
@produce_message_size.observe(message_size, labels: key)
|
217
|
+
|
218
|
+
# This gets us the avg/max buffer size per producer.
|
219
|
+
@buffer_size.observe(buffer_size, labels: { client: client })
|
220
|
+
|
221
|
+
# This gets us the avg/max buffer fill ratio per producer.
|
222
|
+
@buffer_fill_ratio.observe(buffer_fill_ratio, labels: { client: client })
|
223
|
+
@buffer_fill_percentage.observe(buffer_fill_percentage, labels: { client: client })
|
224
|
+
end
|
225
|
+
|
226
|
+
def buffer_overflow(event)
|
227
|
+
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
228
|
+
@produce_errors.increment(labels: key)
|
229
|
+
end
|
230
|
+
|
231
|
+
def deliver_messages(event)
|
232
|
+
key = { client: event.payload.fetch(:client_id) }
|
233
|
+
message_count = event.payload.fetch(:delivered_message_count)
|
234
|
+
attempts = event.payload.fetch(:attempts)
|
235
|
+
|
236
|
+
@deliver_errors.increment(labels: key) if event.payload.key?(:exception)
|
237
|
+
@deliver_latency.observe(event.duration, labels: key)
|
238
|
+
|
239
|
+
# Messages delivered to Kafka:
|
240
|
+
@deliver_messages.increment(by: message_count, labels: key)
|
241
|
+
|
242
|
+
# Number of attempts to deliver messages:
|
243
|
+
@deliver_attempts.observe(attempts, labels: key)
|
244
|
+
end
|
245
|
+
|
246
|
+
def ack_message(event)
|
247
|
+
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
248
|
+
|
249
|
+
# Number of messages ACK'd for the topic.
|
250
|
+
@ack_messages.increment(labels: key)
|
251
|
+
|
252
|
+
# Histogram of delay between a message being produced and it being ACK'd.
|
253
|
+
@ack_delay.observe(event.payload.fetch(:delay), labels: key)
|
254
|
+
end
|
255
|
+
|
256
|
+
def topic_error(event)
|
257
|
+
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
258
|
+
|
259
|
+
@ack_errors.increment(labels: key)
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
class AsyncProducerSubscriber < ActiveSupport::Subscriber
|
264
|
+
def initialize
|
265
|
+
super
|
266
|
+
@queue_size = Prometheus.registry.histogram(:async_producer_queue_size, docstring: 'Queue size', buckets: SIZE_BUCKETS, labels: [:client, :topic])
|
267
|
+
@queue_fill_ratio = Prometheus.registry.histogram(:async_producer_queue_fill_ratio, docstring: 'Queue fill ratio', labels: [:client, :topic])
|
268
|
+
@produce_errors = Prometheus.registry.counter(:async_producer_produce_errors, docstring: 'Producer errors', labels: [:client, :topic])
|
269
|
+
@dropped_messages = Prometheus.registry.counter(:async_producer_dropped_messages, docstring: 'Dropped messages', labels: [:client])
|
270
|
+
end
|
271
|
+
|
272
|
+
def enqueue_message(event)
|
273
|
+
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
274
|
+
|
275
|
+
queue_size = event.payload.fetch(:queue_size)
|
276
|
+
max_queue_size = event.payload.fetch(:max_queue_size)
|
277
|
+
queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
|
278
|
+
|
279
|
+
# This gets us the avg/max queue size per producer.
|
280
|
+
@queue_size.observe(queue_size, labels: key)
|
281
|
+
|
282
|
+
# This gets us the avg/max queue fill ratio per producer.
|
283
|
+
@queue_fill_ratio.observe(queue_fill_ratio, labels: key)
|
284
|
+
end
|
285
|
+
|
286
|
+
def buffer_overflow(event)
|
287
|
+
key = { client: event.payload.fetch(:client_id), topic: event.payload.fetch(:topic) }
|
288
|
+
@produce_errors.increment(labels: key)
|
289
|
+
end
|
290
|
+
|
291
|
+
def drop_messages(event)
|
292
|
+
key = { client: event.payload.fetch(:client_id) }
|
293
|
+
message_count = event.payload.fetch(:message_count)
|
294
|
+
@dropped_messages.increment(by: message_count, labels: key)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
|
298
|
+
class FetcherSubscriber < ActiveSupport::Subscriber
|
299
|
+
def initialize
|
300
|
+
super
|
301
|
+
@queue_size = Prometheus.registry.gauge(:fetcher_queue_size, docstring: 'Queue size', labels: [:client, :group_id])
|
302
|
+
end
|
303
|
+
|
304
|
+
def loop(event)
|
305
|
+
queue_size = event.payload.fetch(:queue_size)
|
306
|
+
client = event.payload.fetch(:client_id)
|
307
|
+
group_id = event.payload.fetch(:group_id)
|
308
|
+
|
309
|
+
@queue_size.set(queue_size, labels: { client: client, group_id: group_id })
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
# To enable testability, it is possible to skip the start until test time
|
316
|
+
Kafka::Prometheus.start unless defined?(PROMETHEUS_NO_AUTO_START)
|
@@ -0,0 +1,225 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kafka
|
4
|
+
|
5
|
+
# The protocol layer of the library.
|
6
|
+
#
|
7
|
+
# The Kafka protocol (https://kafka.apache.org/protocol) defines a set of API
|
8
|
+
# requests, each with a well-known numeric API key, as well as a set of error
|
9
|
+
# codes with specific meanings.
|
10
|
+
#
|
11
|
+
# This module, and the classes contained in it, implement the client side of
|
12
|
+
# the protocol.
|
13
|
+
module Protocol
|
14
|
+
# The replica id of non-brokers is always -1.
|
15
|
+
REPLICA_ID = -1
|
16
|
+
|
17
|
+
PRODUCE_API = 0
|
18
|
+
FETCH_API = 1
|
19
|
+
LIST_OFFSET_API = 2
|
20
|
+
TOPIC_METADATA_API = 3
|
21
|
+
OFFSET_COMMIT_API = 8
|
22
|
+
OFFSET_FETCH_API = 9
|
23
|
+
FIND_COORDINATOR_API = 10
|
24
|
+
JOIN_GROUP_API = 11
|
25
|
+
HEARTBEAT_API = 12
|
26
|
+
LEAVE_GROUP_API = 13
|
27
|
+
SYNC_GROUP_API = 14
|
28
|
+
DESCRIBE_GROUPS_API = 15
|
29
|
+
LIST_GROUPS_API = 16
|
30
|
+
SASL_HANDSHAKE_API = 17
|
31
|
+
API_VERSIONS_API = 18
|
32
|
+
CREATE_TOPICS_API = 19
|
33
|
+
DELETE_TOPICS_API = 20
|
34
|
+
INIT_PRODUCER_ID_API = 22
|
35
|
+
ADD_PARTITIONS_TO_TXN_API = 24
|
36
|
+
ADD_OFFSETS_TO_TXN_API = 25
|
37
|
+
END_TXN_API = 26
|
38
|
+
TXN_OFFSET_COMMIT_API = 28
|
39
|
+
DESCRIBE_CONFIGS_API = 32
|
40
|
+
ALTER_CONFIGS_API = 33
|
41
|
+
CREATE_PARTITIONS_API = 37
|
42
|
+
|
43
|
+
# A mapping from numeric API keys to symbolic API names.
|
44
|
+
APIS = {
|
45
|
+
PRODUCE_API => :produce,
|
46
|
+
FETCH_API => :fetch,
|
47
|
+
LIST_OFFSET_API => :list_offset,
|
48
|
+
TOPIC_METADATA_API => :topic_metadata,
|
49
|
+
OFFSET_COMMIT_API => :offset_commit,
|
50
|
+
OFFSET_FETCH_API => :offset_fetch,
|
51
|
+
FIND_COORDINATOR_API => :find_coordinator,
|
52
|
+
JOIN_GROUP_API => :join_group,
|
53
|
+
HEARTBEAT_API => :heartbeat,
|
54
|
+
LEAVE_GROUP_API => :leave_group,
|
55
|
+
SYNC_GROUP_API => :sync_group,
|
56
|
+
SASL_HANDSHAKE_API => :sasl_handshake,
|
57
|
+
API_VERSIONS_API => :api_versions,
|
58
|
+
CREATE_TOPICS_API => :create_topics,
|
59
|
+
DELETE_TOPICS_API => :delete_topics,
|
60
|
+
INIT_PRODUCER_ID_API => :init_producer_id_api,
|
61
|
+
ADD_PARTITIONS_TO_TXN_API => :add_partitions_to_txn_api,
|
62
|
+
ADD_OFFSETS_TO_TXN_API => :add_offsets_to_txn_api,
|
63
|
+
END_TXN_API => :end_txn_api,
|
64
|
+
TXN_OFFSET_COMMIT_API => :txn_offset_commit_api,
|
65
|
+
DESCRIBE_CONFIGS_API => :describe_configs_api,
|
66
|
+
CREATE_PARTITIONS_API => :create_partitions
|
67
|
+
}
|
68
|
+
|
69
|
+
# A mapping from numeric error codes to exception classes.
|
70
|
+
ERRORS = {
|
71
|
+
-1 => UnknownError,
|
72
|
+
1 => OffsetOutOfRange,
|
73
|
+
2 => CorruptMessage,
|
74
|
+
3 => UnknownTopicOrPartition,
|
75
|
+
4 => InvalidMessageSize,
|
76
|
+
5 => LeaderNotAvailable,
|
77
|
+
6 => NotLeaderForPartition,
|
78
|
+
7 => RequestTimedOut,
|
79
|
+
8 => BrokerNotAvailable,
|
80
|
+
9 => ReplicaNotAvailable,
|
81
|
+
10 => MessageSizeTooLarge,
|
82
|
+
11 => StaleControllerEpoch,
|
83
|
+
12 => OffsetMetadataTooLarge,
|
84
|
+
13 => NetworkException,
|
85
|
+
14 => CoordinatorLoadInProgress,
|
86
|
+
15 => CoordinatorNotAvailable,
|
87
|
+
16 => NotCoordinatorForGroup,
|
88
|
+
17 => InvalidTopic,
|
89
|
+
18 => RecordListTooLarge,
|
90
|
+
19 => NotEnoughReplicas,
|
91
|
+
20 => NotEnoughReplicasAfterAppend,
|
92
|
+
21 => InvalidRequiredAcks,
|
93
|
+
22 => IllegalGeneration,
|
94
|
+
23 => InconsistentGroupProtocol,
|
95
|
+
24 => InvalidGroupId,
|
96
|
+
25 => UnknownMemberId,
|
97
|
+
26 => InvalidSessionTimeout,
|
98
|
+
27 => RebalanceInProgress,
|
99
|
+
28 => InvalidCommitOffsetSize,
|
100
|
+
29 => TopicAuthorizationFailed,
|
101
|
+
30 => GroupAuthorizationFailed,
|
102
|
+
31 => ClusterAuthorizationFailed,
|
103
|
+
32 => InvalidTimestamp,
|
104
|
+
33 => UnsupportedSaslMechanism,
|
105
|
+
34 => InvalidSaslState,
|
106
|
+
35 => UnsupportedVersion,
|
107
|
+
36 => TopicAlreadyExists,
|
108
|
+
37 => InvalidPartitions,
|
109
|
+
38 => InvalidReplicationFactor,
|
110
|
+
39 => InvalidReplicaAssignment,
|
111
|
+
40 => InvalidConfig,
|
112
|
+
41 => NotController,
|
113
|
+
42 => InvalidRequest,
|
114
|
+
43 => UnsupportedForMessageFormat,
|
115
|
+
44 => PolicyViolation,
|
116
|
+
45 => OutOfOrderSequenceNumberError,
|
117
|
+
46 => DuplicateSequenceNumberError,
|
118
|
+
47 => InvalidProducerEpochError,
|
119
|
+
48 => InvalidTxnStateError,
|
120
|
+
49 => InvalidProducerIDMappingError,
|
121
|
+
50 => InvalidTransactionTimeoutError,
|
122
|
+
51 => ConcurrentTransactionError,
|
123
|
+
52 => TransactionCoordinatorFencedError
|
124
|
+
}
|
125
|
+
|
126
|
+
# A mapping from int to corresponding resource type in symbol.
|
127
|
+
# https://github.com/apache/kafka/blob/trunk/clients/src/main/java/org/apache/kafka/common/resource/ResourceType.java
|
128
|
+
RESOURCE_TYPE_UNKNOWN = 0
|
129
|
+
RESOURCE_TYPE_ANY = 1
|
130
|
+
RESOURCE_TYPE_TOPIC = 2
|
131
|
+
RESOURCE_TYPE_GROUP = 3
|
132
|
+
RESOURCE_TYPE_CLUSTER = 4
|
133
|
+
RESOURCE_TYPE_TRANSACTIONAL_ID = 5
|
134
|
+
RESOURCE_TYPE_DELEGATION_TOKEN = 6
|
135
|
+
RESOURCE_TYPES = {
|
136
|
+
RESOURCE_TYPE_UNKNOWN => :unknown,
|
137
|
+
RESOURCE_TYPE_ANY => :any,
|
138
|
+
RESOURCE_TYPE_TOPIC => :topic,
|
139
|
+
RESOURCE_TYPE_GROUP => :group,
|
140
|
+
RESOURCE_TYPE_CLUSTER => :cluster,
|
141
|
+
RESOURCE_TYPE_TRANSACTIONAL_ID => :transactional_id,
|
142
|
+
RESOURCE_TYPE_DELEGATION_TOKEN => :delegation_token,
|
143
|
+
}
|
144
|
+
|
145
|
+
# Coordinator types. Since Kafka 0.11.0, there are types of coordinators:
|
146
|
+
# Group and Transaction
|
147
|
+
COORDINATOR_TYPE_GROUP = 0
|
148
|
+
COORDINATOR_TYPE_TRANSACTION = 1
|
149
|
+
|
150
|
+
# Handles an error code by either doing nothing (if there was no error) or
|
151
|
+
# by raising an appropriate exception.
|
152
|
+
#
|
153
|
+
# @param error_code Integer
|
154
|
+
# @raise [ProtocolError]
|
155
|
+
# @return [nil]
|
156
|
+
def self.handle_error(error_code, error_message = nil)
|
157
|
+
if error_code == 0
|
158
|
+
# No errors, yay!
|
159
|
+
elsif error = ERRORS[error_code]
|
160
|
+
raise error, error_message
|
161
|
+
else
|
162
|
+
raise UnknownError, "Unknown error with code #{error_code} #{error_message}"
|
163
|
+
end
|
164
|
+
end
|
165
|
+
|
166
|
+
# Returns the symbolic name for an API key.
|
167
|
+
#
|
168
|
+
# @param api_key Integer
|
169
|
+
# @return [Symbol]
|
170
|
+
def self.api_name(api_key)
|
171
|
+
APIS.fetch(api_key, :unknown)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
require "kafka/protocol/metadata_request"
|
177
|
+
require "kafka/protocol/metadata_response"
|
178
|
+
require "kafka/protocol/produce_request"
|
179
|
+
require "kafka/protocol/produce_response"
|
180
|
+
require "kafka/protocol/fetch_request"
|
181
|
+
require "kafka/protocol/fetch_response"
|
182
|
+
require "kafka/protocol/list_offset_request"
|
183
|
+
require "kafka/protocol/list_offset_response"
|
184
|
+
require "kafka/protocol/add_offsets_to_txn_request"
|
185
|
+
require "kafka/protocol/add_offsets_to_txn_response"
|
186
|
+
require "kafka/protocol/txn_offset_commit_request"
|
187
|
+
require "kafka/protocol/txn_offset_commit_response"
|
188
|
+
require "kafka/protocol/find_coordinator_request"
|
189
|
+
require "kafka/protocol/find_coordinator_response"
|
190
|
+
require "kafka/protocol/join_group_request"
|
191
|
+
require "kafka/protocol/join_group_response"
|
192
|
+
require "kafka/protocol/sync_group_request"
|
193
|
+
require "kafka/protocol/sync_group_response"
|
194
|
+
require "kafka/protocol/leave_group_request"
|
195
|
+
require "kafka/protocol/leave_group_response"
|
196
|
+
require "kafka/protocol/heartbeat_request"
|
197
|
+
require "kafka/protocol/heartbeat_response"
|
198
|
+
require "kafka/protocol/offset_fetch_request"
|
199
|
+
require "kafka/protocol/offset_fetch_response"
|
200
|
+
require "kafka/protocol/offset_commit_request"
|
201
|
+
require "kafka/protocol/offset_commit_response"
|
202
|
+
require "kafka/protocol/api_versions_request"
|
203
|
+
require "kafka/protocol/api_versions_response"
|
204
|
+
require "kafka/protocol/sasl_handshake_request"
|
205
|
+
require "kafka/protocol/sasl_handshake_response"
|
206
|
+
require "kafka/protocol/create_topics_request"
|
207
|
+
require "kafka/protocol/create_topics_response"
|
208
|
+
require "kafka/protocol/delete_topics_request"
|
209
|
+
require "kafka/protocol/delete_topics_response"
|
210
|
+
require "kafka/protocol/describe_configs_request"
|
211
|
+
require "kafka/protocol/describe_configs_response"
|
212
|
+
require "kafka/protocol/alter_configs_request"
|
213
|
+
require "kafka/protocol/alter_configs_response"
|
214
|
+
require "kafka/protocol/create_partitions_request"
|
215
|
+
require "kafka/protocol/create_partitions_response"
|
216
|
+
require "kafka/protocol/list_groups_request"
|
217
|
+
require "kafka/protocol/list_groups_response"
|
218
|
+
require "kafka/protocol/describe_groups_request"
|
219
|
+
require "kafka/protocol/describe_groups_response"
|
220
|
+
require "kafka/protocol/init_producer_id_request"
|
221
|
+
require "kafka/protocol/init_producer_id_response"
|
222
|
+
require "kafka/protocol/add_partitions_to_txn_request"
|
223
|
+
require "kafka/protocol/add_partitions_to_txn_response"
|
224
|
+
require "kafka/protocol/end_txn_request"
|
225
|
+
require "kafka/protocol/end_txn_response"
|