ruby-kafka-custom 0.7.7.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "kafka/round_robin_assignment_strategy"
5
+
6
+ module Kafka
7
+ class ConsumerGroup
8
+ attr_reader :assigned_partitions, :generation_id, :group_id
9
+
10
+ def initialize(cluster:, logger:, group_id:, session_timeout:, retention_time:, instrumenter:)
11
+ @cluster = cluster
12
+ @logger = TaggedLogger.new(logger)
13
+ @group_id = group_id
14
+ @session_timeout = session_timeout
15
+ @instrumenter = instrumenter
16
+ @member_id = ""
17
+ @generation_id = nil
18
+ @members = {}
19
+ @topics = Set.new
20
+ @assigned_partitions = {}
21
+ @assignment_strategy = RoundRobinAssignmentStrategy.new(cluster: @cluster)
22
+ @retention_time = retention_time
23
+ end
24
+
25
+ def subscribe(topic)
26
+ @topics.add(topic)
27
+ @cluster.add_target_topics([topic])
28
+ end
29
+
30
+ def subscribed_partitions
31
+ @assigned_partitions.select { |topic, _| @topics.include?(topic) }
32
+ end
33
+
34
+ def assigned_to?(topic, partition)
35
+ subscribed_partitions.fetch(topic, []).include?(partition)
36
+ end
37
+
38
+ def member?
39
+ !@generation_id.nil?
40
+ end
41
+
42
+ def join
43
+ if @topics.empty?
44
+ raise Kafka::Error, "Cannot join group without at least one topic subscription"
45
+ end
46
+
47
+ join_group
48
+ synchronize
49
+ rescue NotCoordinatorForGroup
50
+ @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
51
+ sleep 1
52
+ @coordinator = nil
53
+ retry
54
+ rescue ConnectionError
55
+ @logger.error "Connection error while trying to join group `#{@group_id}`; retrying..."
56
+ sleep 1
57
+ @cluster.mark_as_stale!
58
+ @coordinator = nil
59
+ retry
60
+ end
61
+
62
+ def leave
63
+ @logger.info "Leaving group `#{@group_id}`"
64
+
65
+ # Having a generation id indicates that we're a member of the group.
66
+ @generation_id = nil
67
+
68
+ @instrumenter.instrument("leave_group.consumer", group_id: @group_id) do
69
+ coordinator.leave_group(group_id: @group_id, member_id: @member_id)
70
+ end
71
+ rescue ConnectionError
72
+ end
73
+
74
+ def fetch_offsets
75
+ coordinator.fetch_offsets(
76
+ group_id: @group_id,
77
+ topics: @assigned_partitions,
78
+ )
79
+ end
80
+
81
+ def commit_offsets(offsets)
82
+ response = coordinator.commit_offsets(
83
+ group_id: @group_id,
84
+ member_id: @member_id,
85
+ generation_id: @generation_id,
86
+ offsets: offsets,
87
+ retention_time: @retention_time
88
+ )
89
+
90
+ response.topics.each do |topic, partitions|
91
+ partitions.each do |partition, error_code|
92
+ Protocol.handle_error(error_code)
93
+ end
94
+ end
95
+ rescue Kafka::Error => e
96
+ @logger.error "Error committing offsets: #{e}"
97
+ raise OffsetCommitError, e
98
+ end
99
+
100
+ def heartbeat
101
+ @logger.debug "Sending heartbeat..."
102
+
103
+ @instrumenter.instrument('heartbeat.consumer',
104
+ group_id: @group_id,
105
+ topic_partitions: @assigned_partitions) do
106
+
107
+ response = coordinator.heartbeat(
108
+ group_id: @group_id,
109
+ generation_id: @generation_id,
110
+ member_id: @member_id,
111
+ )
112
+
113
+ Protocol.handle_error(response.error_code)
114
+ end
115
+ rescue ConnectionError, UnknownMemberId, RebalanceInProgress, IllegalGeneration => e
116
+ @logger.error "Error sending heartbeat: #{e}"
117
+ raise HeartbeatError, e
118
+ rescue NotCoordinatorForGroup
119
+ @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
120
+ sleep 1
121
+ @coordinator = nil
122
+ retry
123
+ end
124
+
125
+ def to_s
126
+ "[#{@group_id}] {" + assigned_partitions.map { |topic, partitions|
127
+ partition_str = partitions.size > 5 ?
128
+ "#{partitions[0..4].join(', ')}..." :
129
+ partitions.join(', ')
130
+ "#{topic}: #{partition_str}"
131
+ }.join('; ') + '}:'
132
+ end
133
+
134
+ private
135
+
136
+ def join_group
137
+ @logger.info "Joining group `#{@group_id}`"
138
+
139
+ @instrumenter.instrument("join_group.consumer", group_id: @group_id) do
140
+ response = coordinator.join_group(
141
+ group_id: @group_id,
142
+ session_timeout: @session_timeout,
143
+ member_id: @member_id,
144
+ )
145
+
146
+ Protocol.handle_error(response.error_code)
147
+
148
+ @generation_id = response.generation_id
149
+ @member_id = response.member_id
150
+ @leader_id = response.leader_id
151
+ @members = response.members
152
+ end
153
+
154
+ @logger.info "Joined group `#{@group_id}` with member id `#{@member_id}`"
155
+ rescue UnknownMemberId
156
+ @logger.error "Failed to join group; resetting member id and retrying in 1s..."
157
+
158
+ @member_id = ""
159
+ sleep 1
160
+
161
+ retry
162
+ end
163
+
164
+ def group_leader?
165
+ @member_id == @leader_id
166
+ end
167
+
168
+ def synchronize
169
+ group_assignment = {}
170
+
171
+ if group_leader?
172
+ @logger.info "Chosen as leader of group `#{@group_id}`"
173
+
174
+ group_assignment = @assignment_strategy.assign(
175
+ members: @members.keys,
176
+ topics: @topics,
177
+ )
178
+ end
179
+
180
+ @instrumenter.instrument("sync_group.consumer", group_id: @group_id) do
181
+ response = coordinator.sync_group(
182
+ group_id: @group_id,
183
+ generation_id: @generation_id,
184
+ member_id: @member_id,
185
+ group_assignment: group_assignment,
186
+ )
187
+
188
+ Protocol.handle_error(response.error_code)
189
+
190
+ response.member_assignment.topics.each do |topic, assigned_partitions|
191
+ @logger.info "Partitions assigned for `#{topic}`: #{assigned_partitions.join(', ')}"
192
+ end
193
+
194
+ @assigned_partitions.replace(response.member_assignment.topics)
195
+ end
196
+ end
197
+
198
+ def coordinator
199
+ @coordinator ||= @cluster.get_group_coordinator(group_id: @group_id)
200
+ rescue CoordinatorNotAvailable
201
+ @logger.error "Group coordinator not available for group `#{@group_id}`"
202
+
203
+ sleep 1
204
+
205
+ retry
206
+ end
207
+ end
208
+ end
@@ -0,0 +1,413 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "datadog/statsd"
5
+ rescue LoadError
6
+ $stderr.puts "In order to report Kafka client metrics to Datadog you need to install the `dogstatsd-ruby` gem."
7
+ raise
8
+ end
9
+
10
+ require "active_support/subscriber"
11
+
12
+ module Kafka
13
+
14
+ # Reports operational metrics to a Datadog agent using the modified Statsd protocol.
15
+ #
16
+ # require "kafka/datadog"
17
+ #
18
+ # # Default is "ruby_kafka".
19
+ # Kafka::Datadog.namespace = "custom-namespace"
20
+ #
21
+ # # Default is "127.0.0.1".
22
+ # Kafka::Datadog.host = "statsd.something.com"
23
+ #
24
+ # # Default is 8125.
25
+ # Kafka::Datadog.port = 1234
26
+ #
27
+ # Once the file has been required, no further configuration is needed – all operational
28
+ # metrics are automatically emitted.
29
+ module Datadog
30
+ STATSD_NAMESPACE = "ruby_kafka"
31
+
32
+ class << self
33
+ def statsd
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags)
35
+ end
36
+
37
+ def statsd=(statsd)
38
+ clear
39
+ @statsd = statsd
40
+ end
41
+
42
+ def host
43
+ @host ||= default_host
44
+ end
45
+
46
+ def host=(host)
47
+ @host = host
48
+ clear
49
+ end
50
+
51
+ def port
52
+ @port ||= default_port
53
+ end
54
+
55
+ def port=(port)
56
+ @port = port
57
+ clear
58
+ end
59
+
60
+ def namespace
61
+ @namespace ||= STATSD_NAMESPACE
62
+ end
63
+
64
+ def namespace=(namespace)
65
+ @namespace = namespace
66
+ clear
67
+ end
68
+
69
+ def tags
70
+ @tags ||= []
71
+ end
72
+
73
+ def tags=(tags)
74
+ @tags = tags
75
+ clear
76
+ end
77
+
78
+ private
79
+
80
+ def default_host
81
+ ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_HOST : ::Datadog::Statsd::DEFAULT_HOST
82
+ end
83
+
84
+ def default_port
85
+ ::Datadog::Statsd.const_defined?(:Connection) ? ::Datadog::Statsd::Connection::DEFAULT_PORT : ::Datadog::Statsd::DEFAULT_PORT
86
+ end
87
+
88
+ def clear
89
+ @statsd && @statsd.close
90
+ @statsd = nil
91
+ end
92
+ end
93
+
94
+ class StatsdSubscriber < ActiveSupport::Subscriber
95
+ private
96
+
97
+ %w[increment histogram count timing gauge].each do |type|
98
+ define_method(type) do |*args|
99
+ emit(type, *args)
100
+ end
101
+ end
102
+
103
+ def emit(type, *args, tags: {})
104
+ tags = tags.map {|k, v| "#{k}:#{v}" }.to_a
105
+
106
+ Kafka::Datadog.statsd.send(type, *args, tags: tags)
107
+ end
108
+ end
109
+
110
+ class ConnectionSubscriber < StatsdSubscriber
111
+ def request(event)
112
+ client = event.payload.fetch(:client_id)
113
+ api = event.payload.fetch(:api, "unknown")
114
+ request_size = event.payload.fetch(:request_size, 0)
115
+ response_size = event.payload.fetch(:response_size, 0)
116
+ broker = event.payload.fetch(:broker_host)
117
+
118
+ tags = {
119
+ client: client,
120
+ api: api,
121
+ broker: broker
122
+ }
123
+
124
+ timing("api.latency", event.duration, tags: tags)
125
+ increment("api.calls", tags: tags)
126
+
127
+ histogram("api.request_size", request_size, tags: tags)
128
+ histogram("api.response_size", response_size, tags: tags)
129
+
130
+ if event.payload.key?(:exception)
131
+ increment("api.errors", tags: tags)
132
+ end
133
+ end
134
+
135
+ attach_to "connection.kafka"
136
+ end
137
+
138
+ class ConsumerSubscriber < StatsdSubscriber
139
+ def process_message(event)
140
+ offset = event.payload.fetch(:offset)
141
+ offset_lag = event.payload.fetch(:offset_lag)
142
+ create_time = event.payload.fetch(:create_time)
143
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
144
+
145
+ tags = {
146
+ client: event.payload.fetch(:client_id),
147
+ group_id: event.payload.fetch(:group_id),
148
+ topic: event.payload.fetch(:topic),
149
+ partition: event.payload.fetch(:partition),
150
+ }
151
+
152
+ if event.payload.key?(:exception)
153
+ increment("consumer.process_message.errors", tags: tags)
154
+ else
155
+ timing("consumer.process_message.latency", event.duration, tags: tags)
156
+ increment("consumer.messages", tags: tags)
157
+ end
158
+
159
+ gauge("consumer.offset", offset, tags: tags)
160
+ gauge("consumer.lag", offset_lag, tags: tags)
161
+
162
+ # Not all messages have timestamps.
163
+ if time_lag
164
+ gauge("consumer.time_lag", time_lag, tags: tags)
165
+ end
166
+ end
167
+
168
+ def process_batch(event)
169
+ offset = event.payload.fetch(:last_offset)
170
+ messages = event.payload.fetch(:message_count)
171
+
172
+ tags = {
173
+ client: event.payload.fetch(:client_id),
174
+ group_id: event.payload.fetch(:group_id),
175
+ topic: event.payload.fetch(:topic),
176
+ partition: event.payload.fetch(:partition),
177
+ }
178
+
179
+ if event.payload.key?(:exception)
180
+ increment("consumer.process_batch.errors", tags: tags)
181
+ else
182
+ timing("consumer.process_batch.latency", event.duration, tags: tags)
183
+ count("consumer.messages", messages, tags: tags)
184
+ end
185
+
186
+ gauge("consumer.offset", offset, tags: tags)
187
+ end
188
+
189
+ def fetch_batch(event)
190
+ lag = event.payload.fetch(:offset_lag)
191
+ batch_size = event.payload.fetch(:message_count)
192
+
193
+ tags = {
194
+ client: event.payload.fetch(:client_id),
195
+ group_id: event.payload.fetch(:group_id),
196
+ topic: event.payload.fetch(:topic),
197
+ partition: event.payload.fetch(:partition),
198
+ }
199
+
200
+ histogram("consumer.batch_size", batch_size, tags: tags)
201
+ gauge("consumer.lag", lag, tags: tags)
202
+ end
203
+
204
+ def join_group(event)
205
+ tags = {
206
+ client: event.payload.fetch(:client_id),
207
+ group_id: event.payload.fetch(:group_id),
208
+ }
209
+
210
+ timing("consumer.join_group", event.duration, tags: tags)
211
+
212
+ if event.payload.key?(:exception)
213
+ increment("consumer.join_group.errors", tags: tags)
214
+ end
215
+ end
216
+
217
+ def sync_group(event)
218
+ tags = {
219
+ client: event.payload.fetch(:client_id),
220
+ group_id: event.payload.fetch(:group_id),
221
+ }
222
+
223
+ timing("consumer.sync_group", event.duration, tags: tags)
224
+
225
+ if event.payload.key?(:exception)
226
+ increment("consumer.sync_group.errors", tags: tags)
227
+ end
228
+ end
229
+
230
+ def leave_group(event)
231
+ tags = {
232
+ client: event.payload.fetch(:client_id),
233
+ group_id: event.payload.fetch(:group_id),
234
+ }
235
+
236
+ timing("consumer.leave_group", event.duration, tags: tags)
237
+
238
+ if event.payload.key?(:exception)
239
+ increment("consumer.leave_group.errors", tags: tags)
240
+ end
241
+ end
242
+
243
+ def loop(event)
244
+ tags = {
245
+ client: event.payload.fetch(:client_id),
246
+ group_id: event.payload.fetch(:group_id),
247
+ }
248
+
249
+ histogram("consumer.loop.duration", event.duration, tags: tags)
250
+ end
251
+
252
+ def pause_status(event)
253
+ tags = {
254
+ client: event.payload.fetch(:client_id),
255
+ group_id: event.payload.fetch(:group_id),
256
+ topic: event.payload.fetch(:topic),
257
+ partition: event.payload.fetch(:partition),
258
+ }
259
+
260
+ duration = event.payload.fetch(:duration)
261
+
262
+ gauge("consumer.pause.duration", duration, tags: tags)
263
+ end
264
+
265
+ attach_to "consumer.kafka"
266
+ end
267
+
268
+ class ProducerSubscriber < StatsdSubscriber
269
+ def produce_message(event)
270
+ client = event.payload.fetch(:client_id)
271
+ topic = event.payload.fetch(:topic)
272
+ message_size = event.payload.fetch(:message_size)
273
+ buffer_size = event.payload.fetch(:buffer_size)
274
+ max_buffer_size = event.payload.fetch(:max_buffer_size)
275
+ buffer_fill_ratio = buffer_size.to_f / max_buffer_size.to_f
276
+ buffer_fill_percentage = buffer_fill_ratio * 100.0
277
+
278
+ tags = {
279
+ client: client,
280
+ topic: topic,
281
+ }
282
+
283
+ # This gets us the write rate.
284
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
285
+
286
+ # Information about typical/average/95p message size.
287
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
288
+
289
+ # Aggregate message size.
290
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
291
+
292
+ # This gets us the avg/max buffer size per producer.
293
+ histogram("producer.buffer.size", buffer_size, tags: tags)
294
+
295
+ # This gets us the avg/max buffer fill ratio per producer.
296
+ histogram("producer.buffer.fill_ratio", buffer_fill_ratio, tags: tags)
297
+ histogram("producer.buffer.fill_percentage", buffer_fill_percentage, tags: tags)
298
+ end
299
+
300
+ def buffer_overflow(event)
301
+ tags = {
302
+ client: event.payload.fetch(:client_id),
303
+ topic: event.payload.fetch(:topic),
304
+ }
305
+
306
+ increment("producer.produce.errors", tags: tags)
307
+ end
308
+
309
+ def deliver_messages(event)
310
+ client = event.payload.fetch(:client_id)
311
+ message_count = event.payload.fetch(:delivered_message_count)
312
+ attempts = event.payload.fetch(:attempts)
313
+
314
+ tags = {
315
+ client: client,
316
+ }
317
+
318
+ if event.payload.key?(:exception)
319
+ increment("producer.deliver.errors", tags: tags)
320
+ end
321
+
322
+ timing("producer.deliver.latency", event.duration, tags: tags)
323
+
324
+ # Messages delivered to Kafka:
325
+ count("producer.deliver.messages", message_count, tags: tags)
326
+
327
+ # Number of attempts to deliver messages:
328
+ histogram("producer.deliver.attempts", attempts, tags: tags)
329
+ end
330
+
331
+ def ack_message(event)
332
+ tags = {
333
+ client: event.payload.fetch(:client_id),
334
+ topic: event.payload.fetch(:topic),
335
+ }
336
+
337
+ # Number of messages ACK'd for the topic.
338
+ increment("producer.ack.messages", tags: tags)
339
+
340
+ # Histogram of delay between a message being produced and it being ACK'd.
341
+ histogram("producer.ack.delay", event.payload.fetch(:delay), tags: tags)
342
+ end
343
+
344
+ def topic_error(event)
345
+ tags = {
346
+ client: event.payload.fetch(:client_id),
347
+ topic: event.payload.fetch(:topic)
348
+ }
349
+
350
+ increment("producer.ack.errors", tags: tags)
351
+ end
352
+
353
+ attach_to "producer.kafka"
354
+ end
355
+
356
+ class AsyncProducerSubscriber < StatsdSubscriber
357
+ def enqueue_message(event)
358
+ client = event.payload.fetch(:client_id)
359
+ topic = event.payload.fetch(:topic)
360
+ queue_size = event.payload.fetch(:queue_size)
361
+ max_queue_size = event.payload.fetch(:max_queue_size)
362
+ queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
363
+
364
+ tags = {
365
+ client: client,
366
+ topic: topic,
367
+ }
368
+
369
+ # This gets us the avg/max queue size per producer.
370
+ histogram("async_producer.queue.size", queue_size, tags: tags)
371
+
372
+ # This gets us the avg/max queue fill ratio per producer.
373
+ histogram("async_producer.queue.fill_ratio", queue_fill_ratio, tags: tags)
374
+ end
375
+
376
+ def buffer_overflow(event)
377
+ tags = {
378
+ client: event.payload.fetch(:client_id),
379
+ topic: event.payload.fetch(:topic),
380
+ }
381
+
382
+ increment("async_producer.produce.errors", tags: tags)
383
+ end
384
+
385
+ def drop_messages(event)
386
+ tags = {
387
+ client: event.payload.fetch(:client_id),
388
+ }
389
+
390
+ message_count = event.payload.fetch(:message_count)
391
+
392
+ count("async_producer.dropped_messages", message_count, tags: tags)
393
+ end
394
+
395
+ attach_to "async_producer.kafka"
396
+ end
397
+
398
+ class FetcherSubscriber < StatsdSubscriber
399
+ def loop(event)
400
+ queue_size = event.payload.fetch(:queue_size)
401
+
402
+ tags = {
403
+ client: event.payload.fetch(:client_id),
404
+ group_id: event.payload.fetch(:group_id),
405
+ }
406
+
407
+ gauge("fetcher.queue_size", queue_size, tags: tags)
408
+ end
409
+
410
+ attach_to "fetcher.kafka"
411
+ end
412
+ end
413
+ end