ruby-kafka-temp-fork 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (144) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +310 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1342 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka.rb +373 -0
  28. data/lib/kafka/async_producer.rb +291 -0
  29. data/lib/kafka/broker.rb +217 -0
  30. data/lib/kafka/broker_info.rb +16 -0
  31. data/lib/kafka/broker_pool.rb +41 -0
  32. data/lib/kafka/broker_uri.rb +43 -0
  33. data/lib/kafka/client.rb +833 -0
  34. data/lib/kafka/cluster.rb +513 -0
  35. data/lib/kafka/compression.rb +45 -0
  36. data/lib/kafka/compressor.rb +86 -0
  37. data/lib/kafka/connection.rb +223 -0
  38. data/lib/kafka/connection_builder.rb +33 -0
  39. data/lib/kafka/consumer.rb +642 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/consumer_group/assignor.rb +63 -0
  42. data/lib/kafka/crc32_hash.rb +15 -0
  43. data/lib/kafka/datadog.rb +420 -0
  44. data/lib/kafka/digest.rb +22 -0
  45. data/lib/kafka/fetch_operation.rb +115 -0
  46. data/lib/kafka/fetched_batch.rb +58 -0
  47. data/lib/kafka/fetched_batch_generator.rb +120 -0
  48. data/lib/kafka/fetched_message.rb +48 -0
  49. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  50. data/lib/kafka/fetcher.rb +224 -0
  51. data/lib/kafka/gzip_codec.rb +34 -0
  52. data/lib/kafka/heartbeat.rb +25 -0
  53. data/lib/kafka/instrumenter.rb +38 -0
  54. data/lib/kafka/interceptors.rb +33 -0
  55. data/lib/kafka/lz4_codec.rb +27 -0
  56. data/lib/kafka/message_buffer.rb +87 -0
  57. data/lib/kafka/murmur2_hash.rb +17 -0
  58. data/lib/kafka/offset_manager.rb +259 -0
  59. data/lib/kafka/partitioner.rb +40 -0
  60. data/lib/kafka/pause.rb +92 -0
  61. data/lib/kafka/pending_message.rb +29 -0
  62. data/lib/kafka/pending_message_queue.rb +41 -0
  63. data/lib/kafka/produce_operation.rb +205 -0
  64. data/lib/kafka/producer.rb +528 -0
  65. data/lib/kafka/prometheus.rb +316 -0
  66. data/lib/kafka/protocol.rb +225 -0
  67. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  68. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  69. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  70. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  71. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  72. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  73. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  74. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  75. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  76. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  77. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  78. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  79. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  80. data/lib/kafka/protocol/decoder.rb +175 -0
  81. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  82. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  83. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  84. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  85. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  86. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  87. data/lib/kafka/protocol/encoder.rb +184 -0
  88. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  89. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  90. data/lib/kafka/protocol/fetch_request.rb +70 -0
  91. data/lib/kafka/protocol/fetch_response.rb +136 -0
  92. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  93. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  94. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  95. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  96. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  97. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  98. data/lib/kafka/protocol/join_group_request.rb +47 -0
  99. data/lib/kafka/protocol/join_group_response.rb +41 -0
  100. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  101. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  102. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  103. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  104. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  105. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  106. data/lib/kafka/protocol/member_assignment.rb +42 -0
  107. data/lib/kafka/protocol/message.rb +172 -0
  108. data/lib/kafka/protocol/message_set.rb +55 -0
  109. data/lib/kafka/protocol/metadata_request.rb +31 -0
  110. data/lib/kafka/protocol/metadata_response.rb +185 -0
  111. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  112. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  113. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  114. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  115. data/lib/kafka/protocol/produce_request.rb +94 -0
  116. data/lib/kafka/protocol/produce_response.rb +63 -0
  117. data/lib/kafka/protocol/record.rb +88 -0
  118. data/lib/kafka/protocol/record_batch.rb +223 -0
  119. data/lib/kafka/protocol/request_message.rb +26 -0
  120. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  121. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  122. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  123. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  124. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  125. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  126. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +61 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +188 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/ruby-kafka-temp-fork.rb +5 -0
  143. data/ruby-kafka-temp-fork.gemspec +54 -0
  144. metadata +520 -0
@@ -0,0 +1,231 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "kafka/consumer_group/assignor"
5
+ require "kafka/round_robin_assignment_strategy"
6
+
7
+ module Kafka
8
+ class ConsumerGroup
9
+ attr_reader :assigned_partitions, :generation_id, :group_id
10
+
11
+ def initialize(cluster:, logger:, group_id:, session_timeout:, rebalance_timeout:, retention_time:, instrumenter:, assignment_strategy:)
12
+ @cluster = cluster
13
+ @logger = TaggedLogger.new(logger)
14
+ @group_id = group_id
15
+ @session_timeout = session_timeout
16
+ @rebalance_timeout = rebalance_timeout
17
+ @instrumenter = instrumenter
18
+ @member_id = ""
19
+ @generation_id = nil
20
+ @members = {}
21
+ @topics = Set.new
22
+ @assigned_partitions = {}
23
+ @assignor = Assignor.new(
24
+ cluster: cluster,
25
+ strategy: assignment_strategy || RoundRobinAssignmentStrategy.new
26
+ )
27
+ @retention_time = retention_time
28
+ end
29
+
30
+ def subscribe(topic)
31
+ @topics.add(topic)
32
+ @cluster.add_target_topics([topic])
33
+ end
34
+
35
+ def subscribed_partitions
36
+ @assigned_partitions.select { |topic, _| @topics.include?(topic) }
37
+ end
38
+
39
+ def assigned_to?(topic, partition)
40
+ subscribed_partitions.fetch(topic, []).include?(partition)
41
+ end
42
+
43
+ def member?
44
+ !@generation_id.nil?
45
+ end
46
+
47
+ def join
48
+ if @topics.empty?
49
+ raise Kafka::Error, "Cannot join group without at least one topic subscription"
50
+ end
51
+
52
+ join_group
53
+ synchronize
54
+ rescue NotCoordinatorForGroup
55
+ @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
56
+ sleep 1
57
+ @coordinator = nil
58
+ retry
59
+ rescue ConnectionError
60
+ @logger.error "Connection error while trying to join group `#{@group_id}`; retrying..."
61
+ sleep 1
62
+ @cluster.mark_as_stale!
63
+ @coordinator = nil
64
+ retry
65
+ end
66
+
67
+ def leave
68
+ @logger.info "Leaving group `#{@group_id}`"
69
+
70
+ # Having a generation id indicates that we're a member of the group.
71
+ @generation_id = nil
72
+
73
+ @instrumenter.instrument("leave_group.consumer", group_id: @group_id) do
74
+ coordinator.leave_group(group_id: @group_id, member_id: @member_id)
75
+ end
76
+ rescue ConnectionError
77
+ end
78
+
79
+ def fetch_offsets
80
+ coordinator.fetch_offsets(
81
+ group_id: @group_id,
82
+ topics: @assigned_partitions,
83
+ )
84
+ end
85
+
86
+ def commit_offsets(offsets)
87
+ response = coordinator.commit_offsets(
88
+ group_id: @group_id,
89
+ member_id: @member_id,
90
+ generation_id: @generation_id,
91
+ offsets: offsets,
92
+ retention_time: @retention_time
93
+ )
94
+
95
+ response.topics.each do |topic, partitions|
96
+ partitions.each do |partition, error_code|
97
+ Protocol.handle_error(error_code)
98
+ end
99
+ end
100
+ rescue Kafka::Error => e
101
+ @logger.error "Error committing offsets: #{e}"
102
+ raise OffsetCommitError, e
103
+ end
104
+
105
+ def heartbeat
106
+ @logger.debug "Sending heartbeat..."
107
+
108
+ @instrumenter.instrument('heartbeat.consumer',
109
+ group_id: @group_id,
110
+ topic_partitions: @assigned_partitions) do
111
+
112
+ response = coordinator.heartbeat(
113
+ group_id: @group_id,
114
+ generation_id: @generation_id,
115
+ member_id: @member_id,
116
+ )
117
+
118
+ Protocol.handle_error(response.error_code)
119
+ end
120
+ rescue ConnectionError, UnknownMemberId, IllegalGeneration => e
121
+ @logger.error "Error sending heartbeat: #{e}"
122
+ raise HeartbeatError, e
123
+ rescue RebalanceInProgress => e
124
+ @logger.warn "Error sending heartbeat: #{e}"
125
+ raise HeartbeatError, e
126
+ rescue NotCoordinatorForGroup
127
+ @logger.error "Failed to find coordinator for group `#{@group_id}`; retrying..."
128
+ sleep 1
129
+ @coordinator = nil
130
+ retry
131
+ end
132
+
133
+ def to_s
134
+ "[#{@group_id}] {" + assigned_partitions.map { |topic, partitions|
135
+ partition_str = partitions.size > 5 ?
136
+ "#{partitions[0..4].join(', ')}..." :
137
+ partitions.join(', ')
138
+ "#{topic}: #{partition_str}"
139
+ }.join('; ') + '}:'
140
+ end
141
+
142
+ private
143
+
144
+ def join_group
145
+ @logger.info "Joining group `#{@group_id}`"
146
+
147
+ @instrumenter.instrument("join_group.consumer", group_id: @group_id) do
148
+ response = coordinator.join_group(
149
+ group_id: @group_id,
150
+ session_timeout: @session_timeout,
151
+ rebalance_timeout: @rebalance_timeout,
152
+ member_id: @member_id,
153
+ topics: @topics,
154
+ protocol_name: @assignor.protocol_name,
155
+ user_data: @assignor.user_data,
156
+ )
157
+
158
+ Protocol.handle_error(response.error_code)
159
+
160
+ @generation_id = response.generation_id
161
+ @member_id = response.member_id
162
+ @leader_id = response.leader_id
163
+ @members = response.members
164
+ end
165
+
166
+ @logger.info "Joined group `#{@group_id}` with member id `#{@member_id}`"
167
+ rescue UnknownMemberId
168
+ @logger.error "Failed to join group; resetting member id and retrying in 1s..."
169
+
170
+ @member_id = ""
171
+ sleep 1
172
+
173
+ retry
174
+ rescue CoordinatorLoadInProgress
175
+ @logger.error "Coordinator broker still loading, retrying in 1s..."
176
+
177
+ sleep 1
178
+
179
+ retry
180
+ end
181
+
182
+ def group_leader?
183
+ @member_id == @leader_id
184
+ end
185
+
186
+ def synchronize
187
+ group_assignment = {}
188
+
189
+ if group_leader?
190
+ @logger.info "Chosen as leader of group `#{@group_id}`"
191
+
192
+ topics = Set.new
193
+ @members.each do |_member, metadata|
194
+ metadata.topics.each { |t| topics.add(t) }
195
+ end
196
+
197
+ group_assignment = @assignor.assign(
198
+ members: @members,
199
+ topics: topics,
200
+ )
201
+ end
202
+
203
+ @instrumenter.instrument("sync_group.consumer", group_id: @group_id) do
204
+ response = coordinator.sync_group(
205
+ group_id: @group_id,
206
+ generation_id: @generation_id,
207
+ member_id: @member_id,
208
+ group_assignment: group_assignment,
209
+ )
210
+
211
+ Protocol.handle_error(response.error_code)
212
+
213
+ response.member_assignment.topics.each do |topic, assigned_partitions|
214
+ @logger.info "Partitions assigned for `#{topic}`: #{assigned_partitions.join(', ')}"
215
+ end
216
+
217
+ @assigned_partitions.replace(response.member_assignment.topics)
218
+ end
219
+ end
220
+
221
+ def coordinator
222
+ @coordinator ||= @cluster.get_group_coordinator(group_id: @group_id)
223
+ rescue CoordinatorNotAvailable
224
+ @logger.error "Group coordinator not available for group `#{@group_id}`"
225
+
226
+ sleep 1
227
+
228
+ retry
229
+ end
230
+ end
231
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/member_assignment"
4
+
5
+ module Kafka
6
+ class ConsumerGroup
7
+
8
+ # A consumer group partition assignor
9
+ class Assignor
10
+ Partition = Struct.new(:topic, :partition_id)
11
+
12
+ # @param cluster [Kafka::Cluster]
13
+ # @param strategy [Object] an object that implements #protocol_type,
14
+ # #user_data, and #assign.
15
+ def initialize(cluster:, strategy:)
16
+ @cluster = cluster
17
+ @strategy = strategy
18
+ end
19
+
20
+ def protocol_name
21
+ @strategy.respond_to?(:protocol_name) ? @strategy.protocol_name : @strategy.class.to_s
22
+ end
23
+
24
+ def user_data
25
+ @strategy.user_data if @strategy.respond_to?(:user_data)
26
+ end
27
+
28
+ # Assign the topic partitions to the group members.
29
+ #
30
+ # @param members [Hash<String, Kafka::Protocol::JoinGroupResponse::Metadata>] a hash
31
+ # mapping member ids to metadata.
32
+ # @param topics [Array<String>] topics
33
+ # @return [Hash<String, Kafka::Protocol::MemberAssignment>] a hash mapping member
34
+ # ids to assignments.
35
+ def assign(members:, topics:)
36
+ topic_partitions = topics.flat_map do |topic|
37
+ begin
38
+ partition_ids = @cluster.partitions_for(topic).map(&:partition_id)
39
+ rescue UnknownTopicOrPartition
40
+ raise UnknownTopicOrPartition, "unknown topic #{topic}"
41
+ end
42
+ partition_ids.map {|partition_id| Partition.new(topic, partition_id) }
43
+ end
44
+
45
+ group_assignment = {}
46
+
47
+ members.each_key do |member_id|
48
+ group_assignment[member_id] = Protocol::MemberAssignment.new
49
+ end
50
+ @strategy.call(cluster: @cluster, members: members, partitions: topic_partitions).each do |member_id, partitions|
51
+ Array(partitions).each do |partition|
52
+ group_assignment[member_id].assign(partition.topic, [partition.partition_id])
53
+ end
54
+ end
55
+
56
+ group_assignment
57
+ rescue Kafka::LeaderNotAvailable
58
+ sleep 1
59
+ retry
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ module Kafka
6
+ class Crc32Hash
7
+
8
+ # crc32 is supported natively
9
+ def load; end
10
+
11
+ def hash(value)
12
+ Zlib.crc32(value)
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,420 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "datadog/statsd"
5
+ rescue LoadError
6
+ $stderr.puts "In order to report Kafka client metrics to Datadog you need to install the `dogstatsd-ruby` gem."
7
+ raise
8
+ end
9
+
10
+ require "active_support/subscriber"
11
+
12
+ module Kafka
13
+
14
+ # Reports operational metrics to a Datadog agent using the modified Statsd protocol.
15
+ #
16
+ # require "kafka/datadog"
17
+ #
18
+ # # Default is "ruby_kafka".
19
+ # Kafka::Datadog.namespace = "custom-namespace"
20
+ #
21
+ # # Default is "127.0.0.1".
22
+ # Kafka::Datadog.host = "statsd.something.com"
23
+ #
24
+ # # Default is 8125.
25
+ # Kafka::Datadog.port = 1234
26
+ #
27
+ # Once the file has been required, no further configuration is needed – all operational
28
+ # metrics are automatically emitted.
29
+ module Datadog
30
+ STATSD_NAMESPACE = "ruby_kafka"
31
+
32
+ class << self
33
+ def statsd
34
+ @statsd ||= ::Datadog::Statsd.new(host, port, namespace: namespace, tags: tags, socket_path: socket_path)
35
+ end
36
+
37
+ def statsd=(statsd)
38
+ clear
39
+ @statsd = statsd
40
+ end
41
+
42
+ def host
43
+ @host
44
+ end
45
+
46
+ def host=(host)
47
+ @host = host
48
+ clear
49
+ end
50
+
51
+ def port
52
+ @port
53
+ end
54
+
55
+ def port=(port)
56
+ @port = port
57
+ clear
58
+ end
59
+
60
+ def socket_path
61
+ @socket_path
62
+ end
63
+
64
+ def socket_path=(socket_path)
65
+ @socket_path = socket_path
66
+ clear
67
+ end
68
+
69
+ def namespace
70
+ @namespace ||= STATSD_NAMESPACE
71
+ end
72
+
73
+ def namespace=(namespace)
74
+ @namespace = namespace
75
+ clear
76
+ end
77
+
78
+ def tags
79
+ @tags ||= []
80
+ end
81
+
82
+ def tags=(tags)
83
+ @tags = tags
84
+ clear
85
+ end
86
+
87
+ private
88
+
89
+ def clear
90
+ @statsd && @statsd.close
91
+ @statsd = nil
92
+ end
93
+ end
94
+
95
+ class StatsdSubscriber < ActiveSupport::Subscriber
96
+ private
97
+
98
+ %w[increment histogram count timing gauge].each do |type|
99
+ define_method(type) do |*args, **kwargs|
100
+ emit(type, *args, **kwargs)
101
+ end
102
+ end
103
+
104
+ def emit(type, *args, tags: {})
105
+ tags = tags.map {|k, v| "#{k}:#{v}" }.to_a
106
+
107
+ Kafka::Datadog.statsd.send(type, *args, tags: tags)
108
+ end
109
+ end
110
+
111
+ class ConnectionSubscriber < StatsdSubscriber
112
+ def request(event)
113
+ client = event.payload.fetch(:client_id)
114
+ api = event.payload.fetch(:api, "unknown")
115
+ request_size = event.payload.fetch(:request_size, 0)
116
+ response_size = event.payload.fetch(:response_size, 0)
117
+ broker = event.payload.fetch(:broker_host)
118
+
119
+ tags = {
120
+ client: client,
121
+ api: api,
122
+ broker: broker
123
+ }
124
+
125
+ timing("api.latency", event.duration, tags: tags)
126
+ increment("api.calls", tags: tags)
127
+
128
+ histogram("api.request_size", request_size, tags: tags)
129
+ histogram("api.response_size", response_size, tags: tags)
130
+
131
+ if event.payload.key?(:exception)
132
+ increment("api.errors", tags: tags)
133
+ end
134
+ end
135
+
136
+ attach_to "connection.kafka"
137
+ end
138
+
139
+ class ConsumerSubscriber < StatsdSubscriber
140
+ def process_message(event)
141
+ offset = event.payload.fetch(:offset)
142
+ offset_lag = event.payload.fetch(:offset_lag)
143
+ create_time = event.payload.fetch(:create_time)
144
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
145
+
146
+ tags = {
147
+ client: event.payload.fetch(:client_id),
148
+ group_id: event.payload.fetch(:group_id),
149
+ topic: event.payload.fetch(:topic),
150
+ partition: event.payload.fetch(:partition),
151
+ }
152
+
153
+ if event.payload.key?(:exception)
154
+ increment("consumer.process_message.errors", tags: tags)
155
+ else
156
+ timing("consumer.process_message.latency", event.duration, tags: tags)
157
+ increment("consumer.messages", tags: tags)
158
+ end
159
+
160
+ gauge("consumer.offset", offset, tags: tags)
161
+ gauge("consumer.lag", offset_lag, tags: tags)
162
+
163
+ # Not all messages have timestamps.
164
+ if time_lag
165
+ gauge("consumer.time_lag", time_lag, tags: tags)
166
+ end
167
+ end
168
+
169
+ def process_batch(event)
170
+ offset = event.payload.fetch(:last_offset)
171
+ messages = event.payload.fetch(:message_count)
172
+ create_time = event.payload.fetch(:last_create_time)
173
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
174
+
175
+ tags = {
176
+ client: event.payload.fetch(:client_id),
177
+ group_id: event.payload.fetch(:group_id),
178
+ topic: event.payload.fetch(:topic),
179
+ partition: event.payload.fetch(:partition),
180
+ }
181
+
182
+ if event.payload.key?(:exception)
183
+ increment("consumer.process_batch.errors", tags: tags)
184
+ else
185
+ timing("consumer.process_batch.latency", event.duration, tags: tags)
186
+ count("consumer.messages", messages, tags: tags)
187
+ end
188
+
189
+ gauge("consumer.offset", offset, tags: tags)
190
+
191
+ if time_lag
192
+ gauge("consumer.time_lag", time_lag, tags: tags)
193
+ end
194
+ end
195
+
196
+ def fetch_batch(event)
197
+ lag = event.payload.fetch(:offset_lag)
198
+ batch_size = event.payload.fetch(:message_count)
199
+
200
+ tags = {
201
+ client: event.payload.fetch(:client_id),
202
+ group_id: event.payload.fetch(:group_id),
203
+ topic: event.payload.fetch(:topic),
204
+ partition: event.payload.fetch(:partition),
205
+ }
206
+
207
+ histogram("consumer.batch_size", batch_size, tags: tags)
208
+ gauge("consumer.lag", lag, tags: tags)
209
+ end
210
+
211
+ def join_group(event)
212
+ tags = {
213
+ client: event.payload.fetch(:client_id),
214
+ group_id: event.payload.fetch(:group_id),
215
+ }
216
+
217
+ timing("consumer.join_group", event.duration, tags: tags)
218
+
219
+ if event.payload.key?(:exception)
220
+ increment("consumer.join_group.errors", tags: tags)
221
+ end
222
+ end
223
+
224
+ def sync_group(event)
225
+ tags = {
226
+ client: event.payload.fetch(:client_id),
227
+ group_id: event.payload.fetch(:group_id),
228
+ }
229
+
230
+ timing("consumer.sync_group", event.duration, tags: tags)
231
+
232
+ if event.payload.key?(:exception)
233
+ increment("consumer.sync_group.errors", tags: tags)
234
+ end
235
+ end
236
+
237
+ def leave_group(event)
238
+ tags = {
239
+ client: event.payload.fetch(:client_id),
240
+ group_id: event.payload.fetch(:group_id),
241
+ }
242
+
243
+ timing("consumer.leave_group", event.duration, tags: tags)
244
+
245
+ if event.payload.key?(:exception)
246
+ increment("consumer.leave_group.errors", tags: tags)
247
+ end
248
+ end
249
+
250
+ def loop(event)
251
+ tags = {
252
+ client: event.payload.fetch(:client_id),
253
+ group_id: event.payload.fetch(:group_id),
254
+ }
255
+
256
+ histogram("consumer.loop.duration", event.duration, tags: tags)
257
+ end
258
+
259
+ def pause_status(event)
260
+ tags = {
261
+ client: event.payload.fetch(:client_id),
262
+ group_id: event.payload.fetch(:group_id),
263
+ topic: event.payload.fetch(:topic),
264
+ partition: event.payload.fetch(:partition),
265
+ }
266
+
267
+ duration = event.payload.fetch(:duration)
268
+
269
+ gauge("consumer.pause.duration", duration, tags: tags)
270
+ end
271
+
272
+ attach_to "consumer.kafka"
273
+ end
274
+
275
+ class ProducerSubscriber < StatsdSubscriber
276
+ def produce_message(event)
277
+ client = event.payload.fetch(:client_id)
278
+ topic = event.payload.fetch(:topic)
279
+ message_size = event.payload.fetch(:message_size)
280
+ buffer_size = event.payload.fetch(:buffer_size)
281
+ max_buffer_size = event.payload.fetch(:max_buffer_size)
282
+ buffer_fill_ratio = buffer_size.to_f / max_buffer_size.to_f
283
+ buffer_fill_percentage = buffer_fill_ratio * 100.0
284
+
285
+ tags = {
286
+ client: client,
287
+ topic: topic,
288
+ }
289
+
290
+ # This gets us the write rate.
291
+ increment("producer.produce.messages", tags: tags.merge(topic: topic))
292
+
293
+ # Information about typical/average/95p message size.
294
+ histogram("producer.produce.message_size", message_size, tags: tags.merge(topic: topic))
295
+
296
+ # Aggregate message size.
297
+ count("producer.produce.message_size.sum", message_size, tags: tags.merge(topic: topic))
298
+
299
+ # This gets us the avg/max buffer size per producer.
300
+ histogram("producer.buffer.size", buffer_size, tags: tags)
301
+
302
+ # This gets us the avg/max buffer fill ratio per producer.
303
+ histogram("producer.buffer.fill_ratio", buffer_fill_ratio, tags: tags)
304
+ histogram("producer.buffer.fill_percentage", buffer_fill_percentage, tags: tags)
305
+ end
306
+
307
+ def buffer_overflow(event)
308
+ tags = {
309
+ client: event.payload.fetch(:client_id),
310
+ topic: event.payload.fetch(:topic),
311
+ }
312
+
313
+ increment("producer.produce.errors", tags: tags)
314
+ end
315
+
316
+ def deliver_messages(event)
317
+ client = event.payload.fetch(:client_id)
318
+ message_count = event.payload.fetch(:delivered_message_count)
319
+ attempts = event.payload.fetch(:attempts)
320
+
321
+ tags = {
322
+ client: client,
323
+ }
324
+
325
+ if event.payload.key?(:exception)
326
+ increment("producer.deliver.errors", tags: tags)
327
+ end
328
+
329
+ timing("producer.deliver.latency", event.duration, tags: tags)
330
+
331
+ # Messages delivered to Kafka:
332
+ count("producer.deliver.messages", message_count, tags: tags)
333
+
334
+ # Number of attempts to deliver messages:
335
+ histogram("producer.deliver.attempts", attempts, tags: tags)
336
+ end
337
+
338
+ def ack_message(event)
339
+ tags = {
340
+ client: event.payload.fetch(:client_id),
341
+ topic: event.payload.fetch(:topic),
342
+ }
343
+
344
+ # Number of messages ACK'd for the topic.
345
+ increment("producer.ack.messages", tags: tags)
346
+
347
+ # Histogram of delay between a message being produced and it being ACK'd.
348
+ histogram("producer.ack.delay", event.payload.fetch(:delay), tags: tags)
349
+ end
350
+
351
+ def topic_error(event)
352
+ tags = {
353
+ client: event.payload.fetch(:client_id),
354
+ topic: event.payload.fetch(:topic)
355
+ }
356
+
357
+ increment("producer.ack.errors", tags: tags)
358
+ end
359
+
360
+ attach_to "producer.kafka"
361
+ end
362
+
363
+ class AsyncProducerSubscriber < StatsdSubscriber
364
+ def enqueue_message(event)
365
+ client = event.payload.fetch(:client_id)
366
+ topic = event.payload.fetch(:topic)
367
+ queue_size = event.payload.fetch(:queue_size)
368
+ max_queue_size = event.payload.fetch(:max_queue_size)
369
+ queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
370
+
371
+ tags = {
372
+ client: client,
373
+ topic: topic,
374
+ }
375
+
376
+ # This gets us the avg/max queue size per producer.
377
+ histogram("async_producer.queue.size", queue_size, tags: tags)
378
+
379
+ # This gets us the avg/max queue fill ratio per producer.
380
+ histogram("async_producer.queue.fill_ratio", queue_fill_ratio, tags: tags)
381
+ end
382
+
383
+ def buffer_overflow(event)
384
+ tags = {
385
+ client: event.payload.fetch(:client_id),
386
+ topic: event.payload.fetch(:topic),
387
+ }
388
+
389
+ increment("async_producer.produce.errors", tags: tags)
390
+ end
391
+
392
+ def drop_messages(event)
393
+ tags = {
394
+ client: event.payload.fetch(:client_id),
395
+ }
396
+
397
+ message_count = event.payload.fetch(:message_count)
398
+
399
+ count("async_producer.dropped_messages", message_count, tags: tags)
400
+ end
401
+
402
+ attach_to "async_producer.kafka"
403
+ end
404
+
405
+ class FetcherSubscriber < StatsdSubscriber
406
+ def loop(event)
407
+ queue_size = event.payload.fetch(:queue_size)
408
+
409
+ tags = {
410
+ client: event.payload.fetch(:client_id),
411
+ group_id: event.payload.fetch(:group_id),
412
+ }
413
+
414
+ gauge("fetcher.queue_size", queue_size, tags: tags)
415
+ end
416
+
417
+ attach_to "fetcher.kafka"
418
+ end
419
+ end
420
+ end