ruby-kafka-custom 0.7.7.26

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "kafka/round_robin_assignment_strategy"
5
+
6
+ module Kafka
7
+ class ConsumerGroup
8
+ attr_reader :assigned_partitions, :generation_id, :group_id
9
+
10
+ def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
11
+ @cluster = cluster
12
+ @logger = TaggedLogger.new(logger)
13
+ @group_id = group_id
14
+ @session_timeout = session_timeout
15
+ @instrumenter = instrumenter
16
+ @member_id = ""
17
+ @generation_id = nil
18
+ @members = {}
19
+ @topics = Set.new
20
+ @assigned_partitions = {}
21
+ @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
22
+ @retention_time = retention_time
23
+ end
24
+
25
+ def subscribe(topic)
26
+ @topics.add(topic)
27
+ @cluster.add_target_topics([topic])
28
+ end
29
+
30
+ def subscribed_partitions
31
+ @assigned_partitions.select { |topic, _| @topics.include?(topic) }
32
+ end
33
+
34
+ def assigned_to?(topic, partition)
35
+ subscribed_partitions.fetch(topic, []).include?(partition)
36
+ end
37
+
38
+ def member?
39
+ !@generation_id.nil?
40
+ end
41
+
42
+ def join
43
+ if @topics.empty?
44
+ raise Kafka::Error, "Cannot join group without at least one topic subscription"
45
+ end
46
+
47
+ join_group
48
+ synchronize
49
+ rescue NotCoordinatorForGroup
50
+ @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
51
+ sleep 1
52
+ @coordinator = nil
53
+ retry
54
+ rescue ConnectionError
55
+ @logger.error "Connection error while trying to join group `#{@group_id}`; retrying..."
56
+ sleep 1
57
+ @cluster.mark_as_stale!
58
+ @coordinator = nil
59
+ retry
60
+ end
61
+
62
+ def leave
63
+ @logger.info "Leaving group `#{@group_id}`"
64
+
65
+ # Having a generation id indicates that we're a member of the group.
66
+ @generation_id = nil
67
+
68
+ @instrumenter.instrument("leave_group.consumer", group_id: @group_id) do
69
+ coordinator.leave_group(group_id: @group_id, member_id: @member_id)
70
+ end
71
+ rescue ConnectionError
72
+ end
73
+
74
+ def fetch_offsets
75
+ coordinator.fetch_offsets(
76
+ group_id: @group_id,
77
+ topics: @assigned_partitions,
78
+ )
79
+ end
80
+
81
+ def commit_offsets(offsets)
82
+ response = coordinator.commit_offsets(
83
+ group_id: @group_id,
84
+ member_id: @member_id,
85
+ generation_id: @generation_id,
86
+ offsets: offsets,
87
+ retention_time: @retention_time
88
+ )
89
+
90
+ response.topics.each do |topic, partitions|
91
+ partitions.each do |partition, error_code|
92
+ Protocol.handle_error(error_code)
93
+ end
94
+ end
95
+ rescue Kafka::Error => e
96
+ @logger.error "Error committing offsets: #{e}"
97
+ raise OffsetCommitError, e
98
+ end
99
+
100
+ def heartbeat
101
+ @logger.debug "Sending heartbeat..."
102
+
103
+ @instrumenter.instrument('heartbeat.consumer',
104
+ group_id: @group_id,
105
+ topic_partitions: @assigned_partitions) do
106
+
107
+ response = coordinator.heartbeat(
108
+ group_id: @group_id,
109
+ generation_id: @generation_id,
110
+ member_id: @member_id,
111
+ )
112
+
113
+ Protocol.handle_error(response.error_code)
114
+ end
115
+ rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
116
+ @logger.error "Error sending heartbeat: #{e}"
117
+ raise HeartbeatError, e
118
+ rescue NotCoordinatorForGroup
119
+ @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
120
+ sleep 1
121
+ @coordinator = nil
122
+ retry
123
+ end
124
+
125
+ def to_s
126
+ "[#{@group_id}] {" + assigned_partitions.map { |topic, partitions|
127
+ partition_str = partitions.size > 5 ?
128
+ "#{partitions[0..4].join(', ')}..." :
129
+ partitions.join(', ')
130
+ "#{topic}: #{partition_str}"
131
+ }.join('; ') + '}:'
132
+ end
133
+
134
+ private
135
+
136
+ def join_group
137
+ @logger.info "Joining group `#{@group_id}`"
138
+
139
+ @instrumenter.instrument("join_group.consumer", group_id: @group_id) do
140
+ response = coordinator.join_group(
141
+ group_id: @group_id,
142
+ session_timeout: @session_timeout,
143
+ member_id: @member_id,
144
+ )
145
+
146
+ Protocol.handle_error(response.error_code)
147
+
148
+ @generation_id = response.generation_id
149
+ @member_id = response.member_id
150
+ @leader_id = response.leader_id
151
+ @members = response.members
152
+ end
153
+
154
+ @logger.info "Joined group `#{@group_id}` with member id `#{@member_id}`"
155
+ rescue UnknownMemberId
156
+ @logger.error "Failed to join group; resetting member id and retrying in 1s..."
157
+
158
+ @member_id = ""
159
+ sleep 1
160
+
161
+ retry
162
+ end
163
+
164
+ def group_leader?
165
+ @member_id == @leader_id
166
+ end
167
+
168
+ def synchronize
169
+ group_assignment = {}
170
+
171
+ if group_leader?
172
+ @logger.info "Chosen as leader of group `#{@group_id}`"
173
+
174
+ group_assignment = @assignment_strategy.assign(
175
+ members: @members.keys,
176
+ topics: @topics,
177
+ )
178
+ end
179
+
180
+ @instrumenter.instrument("sync_group.consumer", group_id: @group_id) do
181
+ response = coordinator.sync_group(
182
+ group_id: @group_id,
183
+ generation_id: @generation_id,
184
+ member_id: @member_id,
185
+ group_assignment: group_assignment,
186
+ )
187
+
188
+ Protocol.handle_error(response.error_code)
189
+
190
+ response.member_assignment.topics.each do |topic, assigned_partitions|
191
+ @logger.info "Partitions assigned for `#{topic}`: #{assigned_partitions.join(', ')}"
192
+ end
193
+
194
+ @assigned_partitions.replace(response.member_assignment.topics)
195
+ end
196
+ end
197
+
198
+ def coordinator
199
+ @coordinator ||= @cluster.get_group_coordinator(group_id: @group_id)
200
+ rescue CoordinatorNotAvailable
201
+ @logger.error "Group coordinator not available for group `#{@group_id}`"
202
+
203
+ sleep 1
204
+
205
+ retry
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,413 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "datadog/statsd"
5
+ rescue LoadError
6
+ $stderr.puts "In order to report Kafka client metrics to Datadog you need to install the `dogstatsd-ruby` gem."
7
+ raise
8
+ end
9
+
10
+ require "active_support/subscriber"
11
+
12
+ module Kafka
13
+
14
+ # Reports operational metrics to a Datadog agent using the modified Statsd protocol.
15
+ #
16
+ # require "kafka/datadog"
17
+ #
18
+ # # Default is "ruby_kafka".
19
+ # Kafka::Datadog.namespace = "custom-namespace"
20
+ #
21
+ # # Default is "127.0.0.1".
22
+ # Kafka::Datadog.host = "statsd.something.com"
23
+ #
24
+ # # Default is 8125.
25
+ # Kafka::Datadog.port = 1234
26
+ #
27
+ # Once the file has been required, no further configuration is needed – all operational
28
+ # metrics are automatically emitted.
29
+ module Datadog
30
+ STATSD_NAMESPACE = "ruby_kafka"
31
+
32
+ class << self
33
+ def statsd
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
35
+ end
36
+
37
+ def statsd=(statsd)
38
+ clear
39
+ @statsd = statsd
40
+ end
41
+
42
+ def host
43
+ @host ||= default_host
44
+ end
45
+
46
+ def host=(host)
47
+ @host = host
48
+ clear
49
+ end
50
+
51
+ def port
52
+ @port ||= default_port
53
+ end
54
+
55
+ def port=(port)
56
+ @port = port
57
+ clear
58
+ end
59
+
60
+ def namespace
61
+ @namespace ||= STATSD_NAMESPACE
62
+ end
63
+
64
+ def namespace=(namespace)
65
+ @namespace = namespace
66
+ clear
67
+ end
68
+
69
+ def tags
70
+ @tags ||= []
71
+ end
72
+
73
+ def tags=(tags)
74
+ @tags = tags
75
+ clear
76
+ end
77
+
78
+ private
79
+
80
+ def default_host
81
+ ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
82
+ end
83
+
84
+ def default_port
85
+ ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
86
+ end
87
+
88
+ def clear
89
+ @statsd && @statsd.close
90
+ @statsd = nil
91
+ end
92
+ end
93
+
94
+ class StatsdSubscriber < ActiveSupport::Subscriber
95
+ private
96
+
97
+ %w[increment histogram count timing gauge].each do |type|
98
+ define_method(type) do |*args|
99
+ emit(type, *args)
100
+ end
101
+ end
102
+
103
+ def emit(type, *args, tags: {})
104
+ tags = tags.map {|k, v| "#{k}:#{v}" }.to_a
105
+
106
+ Kafka::Datadog.statsd.send(type, *args, tags: tags)
107
+ end
108
+ end
109
+
110
+ class ConnectionSubscriber < StatsdSubscriber
111
+ def request(event)
112
+ client = event.payload.fetch(:client_id)
113
+ api = event.payload.fetch(:api, "unknown")
114
+ request_size = event.payload.fetch(:request_size, 0)
115
+ response_size = event.payload.fetch(:response_size, 0)
116
+ broker = event.payload.fetch(:broker_host)
117
+
118
+ tags = {
119
+ client: client,
120
+ api: api,
121
+ broker: broker
122
+ }
123
+
124
+ timing("api.latency", event.duration, tags: tags)
125
+ increment("api.calls", tags: tags)
126
+
127
+ histogram("api.request_size", request_size, tags: tags)
128
+ histogram("api.response_size", response_size, tags: tags)
129
+
130
+ if event.payload.key?(:exception)
131
+ increment("api.errors", tags: tags)
132
+ end
133
+ end
134
+
135
+ attach_to "connection.kafka"
136
+ end
137
+
138
+ class ConsumerSubscriber < StatsdSubscriber
139
+ def process_message(event)
140
+ offset = event.payload.fetch(:offset)
141
+ offset_lag = event.payload.fetch(:offset_lag)
142
+ create_time = event.payload.fetch(:create_time)
143
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
144
+
145
+ tags = {
146
+ client: event.payload.fetch(:client_id),
147
+ group_id: event.payload.fetch(:group_id),
148
+ topic: event.payload.fetch(:topic),
149
+ partition: event.payload.fetch(:partition),
150
+ }
151
+
152
+ if event.payload.key?(:exception)
153
+ increment("consumer.process_message.errors", tags: tags)
154
+ else
155
+ timing("consumer.process_message.latency", event.duration, tags: tags)
156
+ increment("consumer.messages", tags: tags)
157
+ end
158
+
159
+ gauge("consumer.offset", offset, tags: tags)
160
+ gauge("consumer.lag", offset_lag, tags: tags)
161
+
162
+ # Not all messages have timestamps.
163
+ if time_lag
164
+ gauge("consumer.time_lag", time_lag, tags: tags)
165
+ end
166
+ end
167
+
168
+ def process_batch(event)
169
+ offset = event.payload.fetch(:last_offset)
170
+ messages = event.payload.fetch(:message_count)
171
+
172
+ tags = {
173
+ client: event.payload.fetch(:client_id),
174
+ group_id: event.payload.fetch(:group_id),
175
+ topic: event.payload.fetch(:topic),
176
+ partition: event.payload.fetch(:partition),
177
+ }
178
+
179
+ if event.payload.key?(:exception)
180
+ increment("consumer.process_batch.errors", tags: tags)
181
+ else
182
+ timing("consumer.process_batch.latency", event.duration, tags: tags)
183
+ count("consumer.messages", messages, tags: tags)
184
+ end
185
+
186
+ gauge("consumer.offset", offset, tags: tags)
187
+ end
188
+
189
+ def fetch_batch(event)
190
+ lag = event.payload.fetch(:offset_lag)
191
+ batch_size = event.payload.fetch(:message_count)
192
+
193
+ tags = {
194
+ client: event.payload.fetch(:client_id),
195
+ group_id: event.payload.fetch(:group_id),
196
+ topic: event.payload.fetch(:topic),
197
+ partition: event.payload.fetch(:partition),
198
+ }
199
+
200
+ histogram("consumer.batch_size", batch_size, tags: tags)
201
+ gauge("consumer.lag", lag, tags: tags)
202
+ end
203
+
204
+ def join_group(event)
205
+ tags = {
206
+ client: event.payload.fetch(:client_id),
207
+ group_id: event.payload.fetch(:group_id),
208
+ }
209
+
210
+ timing("consumer.join_group", event.duration, tags: tags)
211
+
212
+ if event.payload.key?(:exception)
213
+ increment("consumer.join_group.errors", tags: tags)
214
+ end
215
+ end
216
+
217
+ def sync_group(event)
218
+ tags = {
219
+ client: event.payload.fetch(:client_id),
220
+ group_id: event.payload.fetch(:group_id),
221
+ }
222
+
223
+ timing("consumer.sync_group", event.duration, tags: tags)
224
+
225
+ if event.payload.key?(:exception)
226
+ increment("consumer.sync_group.errors", tags: tags)
227
+ end
228
+ end
229
+
230
+ def leave_group(event)
231
+ tags = {
232
+ client: event.payload.fetch(:client_id),
233
+ group_id: event.payload.fetch(:group_id),
234
+ }
235
+
236
+ timing("consumer.leave_group", event.duration, tags: tags)
237
+
238
+ if event.payload.key?(:exception)
239
+ increment("consumer.leave_group.errors", tags: tags)
240
+ end
241
+ end
242
+
243
+ def loop(event)
244
+ tags = {
245
+ client: event.payload.fetch(:client_id),
246
+ group_id: event.payload.fetch(:group_id),
247
+ }
248
+
249
+ histogram("consumer.loop.duration", event.duration, tags: tags)
250
+ end
251
+
252
+ def pause_status(event)
253
+ tags = {
254
+ client: event.payload.fetch(:client_id),
255
+ group_id: event.payload.fetch(:group_id),
256
+ topic: event.payload.fetch(:topic),
257
+ partition: event.payload.fetch(:partition),
258
+ }
259
+
260
+ duration = event.payload.fetch(:duration)
261
+
262
+ gauge("consumer.pause.duration", duration, tags: tags)
263
+ end
264
+
265
+ attach_to "consumer.kafka"
266
+ end
267
+
268
+ class ProducerSubscriber < StatsdSubscriber
269
+ def produce_message(event)
270
+ client = event.payload.fetch(:client_id)
271
+ topic = event.payload.fetch(:topic)
272
+ message_size = event.payload.fetch(:message_size)
273
+ buffer_size = event.payload.fetch(:buffer_size)
274
+ max_buffer_size = event.payload.fetch(:max_buffer_size)
275
+ buffer_fill_ratio = buffer_size.to_f / max_buffer_size.to_f
276
+ buffer_fill_percentage = buffer_fill_ratio * 100.0
277
+
278
+ tags = {
279
+ client: client,
280
+ topic: topic,
281
+ }
282
+
283
+ # This gets us the write rate.
284
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
285
+
286
+ # Information about typical/average/95p message size.
287
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
288
+
289
+ # Aggregate message size.
290
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
291
+
292
+ # This gets us the avg/max buffer size per producer.
293
+ histogram("producer.buffer.size", buffer_size, tags: tags)
294
+
295
+ # This gets us the avg/max buffer fill ratio per producer.
296
+ histogram("producer.buffer.fill_ratio", buffer_fill_ratio, tags: tags)
297
+ histogram("producer.buffer.fill_percentage", buffer_fill_percentage, tags: tags)
298
+ end
299
+
300
+ def buffer_overflow(event)
301
+ tags = {
302
+ client: event.payload.fetch(:client_id),
303
+ topic: event.payload.fetch(:topic),
304
+ }
305
+
306
+ increment("producer.produce.errors", tags: tags)
307
+ end
308
+
309
+ def deliver_messages(event)
310
+ client = event.payload.fetch(:client_id)
311
+ message_count = event.payload.fetch(:delivered_message_count)
312
+ attempts = event.payload.fetch(:attempts)
313
+
314
+ tags = {
315
+ client: client,
316
+ }
317
+
318
+ if event.payload.key?(:exception)
319
+ increment("producer.deliver.errors", tags: tags)
320
+ end
321
+
322
+ timing("producer.deliver.latency", event.duration, tags: tags)
323
+
324
+ # Messages delivered to Kafka:
325
+ count("producer.deliver.messages", message_count, tags: tags)
326
+
327
+ # Number of attempts to deliver messages:
328
+ histogram("producer.deliver.attempts", attempts, tags: tags)
329
+ end
330
+
331
+ def ack_message(event)
332
+ tags = {
333
+ client: event.payload.fetch(:client_id),
334
+ topic: event.payload.fetch(:topic),
335
+ }
336
+
337
+ # Number of messages ACK'd for the topic.
338
+ increment("producer.ack.messages", tags: tags)
339
+
340
+ # Histogram of delay between a message being produced and it being ACK'd.
341
+ histogram("producer.ack.delay", event.payload.fetch(:delay), tags: tags)
342
+ end
343
+
344
+ def topic_error(event)
345
+ tags = {
346
+ client: event.payload.fetch(:client_id),
347
+ topic: event.payload.fetch(:topic)
348
+ }
349
+
350
+ increment("producer.ack.errors", tags: tags)
351
+ end
352
+
353
+ attach_to "producer.kafka"
354
+ end
355
+
356
+ class AsyncProducerSubscriber < StatsdSubscriber
357
+ def enqueue_message(event)
358
+ client = event.payload.fetch(:client_id)
359
+ topic = event.payload.fetch(:topic)
360
+ queue_size = event.payload.fetch(:queue_size)
361
+ max_queue_size = event.payload.fetch(:max_queue_size)
362
+ queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
363
+
364
+ tags = {
365
+ client: client,
366
+ topic: topic,
367
+ }
368
+
369
+ # This gets us the avg/max queue size per producer.
370
+ histogram("async_producer.queue.size", queue_size, tags: tags)
371
+
372
+ # This gets us the avg/max queue fill ratio per producer.
373
+ histogram("async_producer.queue.fill_ratio", queue_fill_ratio, tags: tags)
374
+ end
375
+
376
+ def buffer_overflow(event)
377
+ tags = {
378
+ client: event.payload.fetch(:client_id),
379
+ topic: event.payload.fetch(:topic),
380
+ }
381
+
382
+ increment("async_producer.produce.errors", tags: tags)
383
+ end
384
+
385
+ def drop_messages(event)
386
+ tags = {
387
+ client: event.payload.fetch(:client_id),
388
+ }
389
+
390
+ message_count = event.payload.fetch(:message_count)
391
+
392
+ count("async_producer.dropped_messages", message_count, tags: tags)
393
+ end
394
+
395
+ attach_to "async_producer.kafka"
396
+ end
397
+
398
+ class FetcherSubscriber < StatsdSubscriber
399
+ def loop(event)
400
+ queue_size = event.payload.fetch(:queue_size)
401
+
402
+ tags = {
403
+ client: event.payload.fetch(:client_id),
404
+ group_id: event.payload.fetch(:group_id),
405
+ }
406
+
407
+ gauge("fetcher.queue_size", queue_size, tags: tags)
408
+ end
409
+
410
+ attach_to "fetcher.kafka"
411
+ end
412
+ end
413
+ end