ruby-kafka-aws-iam 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +393 -0
  3. data/.github/workflows/stale.yml +19 -0
  4. data/.gitignore +13 -0
  5. data/.readygo +1 -0
  6. data/.rspec +3 -0
  7. data/.rubocop.yml +44 -0
  8. data/.ruby-version +1 -0
  9. data/.yardopts +3 -0
  10. data/CHANGELOG.md +314 -0
  11. data/Gemfile +5 -0
  12. data/ISSUE_TEMPLATE.md +23 -0
  13. data/LICENSE.txt +176 -0
  14. data/Procfile +2 -0
  15. data/README.md +1356 -0
  16. data/Rakefile +8 -0
  17. data/benchmarks/message_encoding.rb +23 -0
  18. data/bin/console +8 -0
  19. data/bin/setup +5 -0
  20. data/docker-compose.yml +39 -0
  21. data/examples/consumer-group.rb +35 -0
  22. data/examples/firehose-consumer.rb +64 -0
  23. data/examples/firehose-producer.rb +54 -0
  24. data/examples/simple-consumer.rb +34 -0
  25. data/examples/simple-producer.rb +42 -0
  26. data/examples/ssl-producer.rb +44 -0
  27. data/lib/kafka/async_producer.rb +297 -0
  28. data/lib/kafka/broker.rb +217 -0
  29. data/lib/kafka/broker_info.rb +16 -0
  30. data/lib/kafka/broker_pool.rb +41 -0
  31. data/lib/kafka/broker_uri.rb +43 -0
  32. data/lib/kafka/client.rb +838 -0
  33. data/lib/kafka/cluster.rb +513 -0
  34. data/lib/kafka/compression.rb +45 -0
  35. data/lib/kafka/compressor.rb +86 -0
  36. data/lib/kafka/connection.rb +228 -0
  37. data/lib/kafka/connection_builder.rb +33 -0
  38. data/lib/kafka/consumer.rb +642 -0
  39. data/lib/kafka/consumer_group/assignor.rb +63 -0
  40. data/lib/kafka/consumer_group.rb +231 -0
  41. data/lib/kafka/crc32_hash.rb +15 -0
  42. data/lib/kafka/datadog.rb +420 -0
  43. data/lib/kafka/digest.rb +22 -0
  44. data/lib/kafka/fetch_operation.rb +115 -0
  45. data/lib/kafka/fetched_batch.rb +58 -0
  46. data/lib/kafka/fetched_batch_generator.rb +120 -0
  47. data/lib/kafka/fetched_message.rb +48 -0
  48. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  49. data/lib/kafka/fetcher.rb +224 -0
  50. data/lib/kafka/gzip_codec.rb +34 -0
  51. data/lib/kafka/heartbeat.rb +25 -0
  52. data/lib/kafka/instrumenter.rb +38 -0
  53. data/lib/kafka/interceptors.rb +33 -0
  54. data/lib/kafka/lz4_codec.rb +27 -0
  55. data/lib/kafka/message_buffer.rb +87 -0
  56. data/lib/kafka/murmur2_hash.rb +17 -0
  57. data/lib/kafka/offset_manager.rb +259 -0
  58. data/lib/kafka/partitioner.rb +40 -0
  59. data/lib/kafka/pause.rb +92 -0
  60. data/lib/kafka/pending_message.rb +29 -0
  61. data/lib/kafka/pending_message_queue.rb +41 -0
  62. data/lib/kafka/produce_operation.rb +205 -0
  63. data/lib/kafka/producer.rb +528 -0
  64. data/lib/kafka/prometheus.rb +316 -0
  65. data/lib/kafka/protocol/add_offsets_to_txn_request.rb +29 -0
  66. data/lib/kafka/protocol/add_offsets_to_txn_response.rb +21 -0
  67. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  68. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  69. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  70. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  71. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  72. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  73. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  74. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  75. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  76. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  77. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  78. data/lib/kafka/protocol/decoder.rb +175 -0
  79. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  80. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  81. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  82. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  83. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  84. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  85. data/lib/kafka/protocol/encoder.rb +184 -0
  86. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  87. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  88. data/lib/kafka/protocol/fetch_request.rb +70 -0
  89. data/lib/kafka/protocol/fetch_response.rb +136 -0
  90. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  91. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  92. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  93. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  94. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  95. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  96. data/lib/kafka/protocol/join_group_request.rb +47 -0
  97. data/lib/kafka/protocol/join_group_response.rb +41 -0
  98. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  99. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  100. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  101. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  102. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  103. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  104. data/lib/kafka/protocol/member_assignment.rb +42 -0
  105. data/lib/kafka/protocol/message.rb +172 -0
  106. data/lib/kafka/protocol/message_set.rb +55 -0
  107. data/lib/kafka/protocol/metadata_request.rb +31 -0
  108. data/lib/kafka/protocol/metadata_response.rb +185 -0
  109. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  110. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  111. data/lib/kafka/protocol/offset_fetch_request.rb +38 -0
  112. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  113. data/lib/kafka/protocol/produce_request.rb +94 -0
  114. data/lib/kafka/protocol/produce_response.rb +63 -0
  115. data/lib/kafka/protocol/record.rb +88 -0
  116. data/lib/kafka/protocol/record_batch.rb +223 -0
  117. data/lib/kafka/protocol/request_message.rb +26 -0
  118. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  119. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  120. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  121. data/lib/kafka/protocol/sync_group_response.rb +26 -0
  122. data/lib/kafka/protocol/txn_offset_commit_request.rb +46 -0
  123. data/lib/kafka/protocol/txn_offset_commit_response.rb +47 -0
  124. data/lib/kafka/protocol.rb +225 -0
  125. data/lib/kafka/round_robin_assignment_strategy.rb +52 -0
  126. data/lib/kafka/sasl/awsmskiam.rb +128 -0
  127. data/lib/kafka/sasl/gssapi.rb +76 -0
  128. data/lib/kafka/sasl/oauth.rb +64 -0
  129. data/lib/kafka/sasl/plain.rb +39 -0
  130. data/lib/kafka/sasl/scram.rb +180 -0
  131. data/lib/kafka/sasl_authenticator.rb +73 -0
  132. data/lib/kafka/snappy_codec.rb +29 -0
  133. data/lib/kafka/socket_with_timeout.rb +96 -0
  134. data/lib/kafka/ssl_context.rb +66 -0
  135. data/lib/kafka/ssl_socket_with_timeout.rb +192 -0
  136. data/lib/kafka/statsd.rb +296 -0
  137. data/lib/kafka/tagged_logger.rb +77 -0
  138. data/lib/kafka/transaction_manager.rb +306 -0
  139. data/lib/kafka/transaction_state_machine.rb +72 -0
  140. data/lib/kafka/version.rb +5 -0
  141. data/lib/kafka/zstd_codec.rb +27 -0
  142. data/lib/kafka.rb +373 -0
  143. data/lib/ruby-kafka.rb +5 -0
  144. data/ruby-kafka.gemspec +54 -0
  145. metadata +520 -0
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "socket"
4
+
5
+ module Kafka
6
+
7
+ # Opens sockets in a non-blocking fashion, ensuring that we're not stalling
8
+ # for long periods of time.
9
+ #
10
+ # It's possible to set timeouts for connecting to the server, for reading data,
11
+ # and for writing data. Whenever a timeout is exceeded, Errno::ETIMEDOUT is
12
+ # raised.
13
+ #
14
+ class SocketWithTimeout
15
+
16
+ # Opens a socket.
17
+ #
18
+ # @param host [String]
19
+ # @param port [Integer]
20
+ # @param connect_timeout [Integer] the connection timeout, in seconds.
21
+ # @param timeout [Integer] the read and write timeout, in seconds.
22
+ # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
23
+ def initialize(host, port, connect_timeout: nil, timeout: nil)
24
+ addr = Socket.getaddrinfo(host, nil)
25
+ sockaddr = Socket.pack_sockaddr_in(port, addr[0][3])
26
+
27
+ @timeout = timeout
28
+
29
+ @socket = Socket.new(Socket.const_get(addr[0][0]), Socket::SOCK_STREAM, 0)
30
+ @socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1)
31
+
32
+ begin
33
+ # Initiate the socket connection in the background. If it doesn't fail
34
+ # immediately it will raise an IO::WaitWritable (Errno::EINPROGRESS)
35
+ # indicating the connection is in progress.
36
+ @socket.connect_nonblock(sockaddr)
37
+ rescue IO::WaitWritable
38
+ # IO.select will block until the socket is writable or the timeout
39
+ # is exceeded, whichever comes first.
40
+ unless IO.select(nil, [@socket], nil, connect_timeout)
41
+ # IO.select returns nil when the socket is not ready before timeout
42
+ # seconds have elapsed
43
+ @socket.close
44
+ raise Errno::ETIMEDOUT
45
+ end
46
+
47
+ begin
48
+ # Verify there is now a good connection.
49
+ @socket.connect_nonblock(sockaddr)
50
+ rescue Errno::EISCONN
51
+ # The socket is connected, we're good!
52
+ end
53
+ end
54
+ end
55
+
56
+ # Reads bytes from the socket, possible with a timeout.
57
+ #
58
+ # @param num_bytes [Integer] the number of bytes to read.
59
+ # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
60
+ # @return [String] the data that was read from the socket.
61
+ def read(num_bytes)
62
+ unless IO.select([@socket], nil, nil, @timeout)
63
+ raise Errno::ETIMEDOUT
64
+ end
65
+
66
+ @socket.read(num_bytes)
67
+ rescue IO::EAGAINWaitReadable
68
+ retry
69
+ end
70
+
71
+ # Writes bytes to the socket, possible with a timeout.
72
+ #
73
+ # @param bytes [String] the data that should be written to the socket.
74
+ # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
75
+ # @return [Integer] the number of bytes written.
76
+ def write(bytes)
77
+ unless IO.select(nil, [@socket], nil, @timeout)
78
+ raise Errno::ETIMEDOUT
79
+ end
80
+
81
+ @socket.write(bytes)
82
+ end
83
+
84
+ def close
85
+ @socket.close
86
+ end
87
+
88
+ def closed?
89
+ @socket.closed?
90
+ end
91
+
92
+ def set_encoding(encoding)
93
+ @socket.set_encoding(encoding)
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "openssl"
4
+
5
+ module Kafka
6
+ module SslContext
7
+ CLIENT_CERT_DELIMITER = "\n-----END CERTIFICATE-----\n"
8
+
9
+ def self.build(ca_cert_file_path: nil, ca_cert: nil, client_cert: nil, client_cert_key: nil, client_cert_key_password: nil, client_cert_chain: nil, ca_certs_from_system: nil, verify_hostname: true)
10
+ return nil unless ca_cert_file_path || ca_cert || client_cert || client_cert_key || client_cert_key_password || client_cert_chain || ca_certs_from_system
11
+
12
+ ssl_context = OpenSSL::SSL::SSLContext.new
13
+
14
+ if client_cert && client_cert_key
15
+ if client_cert_key_password
16
+ cert_key = OpenSSL::PKey.read(client_cert_key, client_cert_key_password)
17
+ else
18
+ cert_key = OpenSSL::PKey.read(client_cert_key)
19
+ end
20
+ context_params = {
21
+ cert: OpenSSL::X509::Certificate.new(client_cert),
22
+ key: cert_key
23
+ }
24
+ if client_cert_chain
25
+ certs = []
26
+ client_cert_chain.split(CLIENT_CERT_DELIMITER).each do |cert|
27
+ cert += CLIENT_CERT_DELIMITER
28
+ certs << OpenSSL::X509::Certificate.new(cert)
29
+ end
30
+ context_params[:extra_chain_cert] = certs
31
+ end
32
+ ssl_context.set_params(context_params)
33
+ elsif client_cert && !client_cert_key
34
+ raise ArgumentError, "Kafka client initialized with `ssl_client_cert` but no `ssl_client_cert_key`. Please provide both."
35
+ elsif !client_cert && client_cert_key
36
+ raise ArgumentError, "Kafka client initialized with `ssl_client_cert_key`, but no `ssl_client_cert`. Please provide both."
37
+ elsif client_cert_chain && !client_cert
38
+ raise ArgumentError, "Kafka client initialized with `ssl_client_cert_chain`, but no `ssl_client_cert`. Please provide cert, key and chain."
39
+ elsif client_cert_chain && !client_cert_key
40
+ raise ArgumentError, "Kafka client initialized with `ssl_client_cert_chain`, but no `ssl_client_cert_key`. Please provide cert, key and chain."
41
+ elsif client_cert_key_password && !client_cert_key
42
+ raise ArgumentError, "Kafka client initialized with `ssl_client_cert_key_password`, but no `ssl_client_cert_key`. Please provide both."
43
+ end
44
+
45
+ if ca_cert || ca_cert_file_path || ca_certs_from_system
46
+ store = OpenSSL::X509::Store.new
47
+ Array(ca_cert).each do |cert|
48
+ store.add_cert(OpenSSL::X509::Certificate.new(cert))
49
+ end
50
+ Array(ca_cert_file_path).each do |cert_file_path|
51
+ store.add_file(cert_file_path)
52
+ end
53
+ if ca_certs_from_system
54
+ store.set_default_paths
55
+ end
56
+ ssl_context.cert_store = store
57
+ end
58
+
59
+ ssl_context.verify_mode = OpenSSL::SSL::VERIFY_PEER
60
+ # Verify certificate hostname if supported (ruby >= 2.4.0)
61
+ ssl_context.verify_hostname = verify_hostname if ssl_context.respond_to?(:verify_hostname=)
62
+
63
+ ssl_context
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,192 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "socket"
4
+
5
+ module Kafka
6
+
7
+ # Opens sockets in a non-blocking fashion, ensuring that we're not stalling
8
+ # for long periods of time.
9
+ #
10
+ # It's possible to set timeouts for connecting to the server, for reading data,
11
+ # and for writing data. Whenever a timeout is exceeded, Errno::ETIMEDOUT is
12
+ # raised.
13
+ #
14
+ class SSLSocketWithTimeout
15
+
16
+ # Opens a socket.
17
+ #
18
+ # @param host [String]
19
+ # @param port [Integer]
20
+ # @param connect_timeout [Integer] the connection timeout, in seconds.
21
+ # @param timeout [Integer] the read and write timeout, in seconds.
22
+ # @param ssl_context [OpenSSL::SSL::SSLContext] which SSLContext the ssl connection should use
23
+ # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
24
+ def initialize(host, port, connect_timeout: nil, timeout: nil, ssl_context:, logger: nil)
25
+ addr = Socket.getaddrinfo(host, nil)
26
+ sockaddr = Socket.pack_sockaddr_in(port, addr[0][3])
27
+
28
+ @connect_timeout = connect_timeout
29
+ @timeout = timeout
30
+ @logger = logger
31
+
32
+ @tcp_socket = Socket.new(Socket.const_get(addr[0][0]), Socket::SOCK_STREAM, 0)
33
+ @tcp_socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_NODELAY, 1)
34
+
35
+ # first initiate the TCP socket
36
+ begin
37
+ # Initiate the socket connection in the background. If it doesn't fail
38
+ # immediately it will raise an IO::WaitWritable (Errno::EINPROGRESS)
39
+ # indicating the connection is in progress.
40
+ @tcp_socket.connect_nonblock(sockaddr)
41
+ rescue IO::WaitWritable
42
+ # select will block until the socket is writable or the timeout
43
+ # is exceeded, whichever comes first.
44
+ unless select_with_timeout(@tcp_socket, :connect_write)
45
+ # select returns nil when the socket is not ready before timeout
46
+ # seconds have elapsed
47
+ @tcp_socket.close
48
+ raise Errno::ETIMEDOUT
49
+ end
50
+
51
+ begin
52
+ # Verify there is now a good connection.
53
+ @tcp_socket.connect_nonblock(sockaddr)
54
+ rescue Errno::EISCONN
55
+ # The socket is connected, we're good!
56
+ end
57
+ end
58
+
59
+ # once that's connected, we can start initiating the ssl socket
60
+ @ssl_socket = OpenSSL::SSL::SSLSocket.new(@tcp_socket, ssl_context)
61
+ @ssl_socket.hostname = host
62
+
63
+ begin
64
+ # Initiate the socket connection in the background. If it doesn't fail
65
+ # immediately it will raise an IO::WaitWritable (Errno::EINPROGRESS)
66
+ # indicating the connection is in progress.
67
+ # Unlike waiting for a tcp socket to connect, you can't time out ssl socket
68
+ # connections during the connect phase properly, because IO.select only partially works.
69
+ # Instead, you have to retry.
70
+ @ssl_socket.connect_nonblock
71
+ rescue Errno::EAGAIN, Errno::EWOULDBLOCK, IO::WaitReadable
72
+ if select_with_timeout(@ssl_socket, :connect_read)
73
+ retry
74
+ else
75
+ @ssl_socket.close
76
+ close
77
+ raise Errno::ETIMEDOUT
78
+ end
79
+ rescue IO::WaitWritable
80
+ if select_with_timeout(@ssl_socket, :connect_write)
81
+ retry
82
+ else
83
+ close
84
+ raise Errno::ETIMEDOUT
85
+ end
86
+ end
87
+ end
88
+
89
+ # Reads bytes from the socket, possible with a timeout.
90
+ #
91
+ # @param num_bytes [Integer] the number of bytes to read.
92
+ # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
93
+ # @return [String] the data that was read from the socket.
94
+ def read(num_bytes)
95
+ buffer = String.new
96
+
97
+ @logger.debug "Reading #{num_bytes} bytes from #{@ssl_socket}"
98
+ until buffer.length >= num_bytes
99
+ begin
100
+ # Unlike plain TCP sockets, SSL sockets don't support IO.select
101
+ # properly.
102
+ # Instead, timeouts happen on a per read basis, and we have to
103
+ # catch exceptions from read_nonblock and gradually build up
104
+ # our read buffer.
105
+ buffer << @ssl_socket.read_nonblock(num_bytes - buffer.length)
106
+
107
+ @logger.debug "Bytes read: #{buffer.length}"
108
+ rescue IO::WaitReadable
109
+ if select_with_timeout(@ssl_socket, :read)
110
+ retry
111
+ else
112
+ raise Errno::ETIMEDOUT
113
+ end
114
+ rescue IO::WaitWritable
115
+ if select_with_timeout(@ssl_socket, :write)
116
+ retry
117
+ else
118
+ raise Errno::ETIMEDOUT
119
+ end
120
+ end
121
+ end
122
+
123
+ buffer
124
+ end
125
+
126
+ # Writes bytes to the socket, possible with a timeout.
127
+ #
128
+ # @param bytes [String] the data that should be written to the socket.
129
+ # @raise [Errno::ETIMEDOUT] if the timeout is exceeded.
130
+ # @return [Integer] the number of bytes written.
131
+ def write(bytes)
132
+ loop do
133
+ written = 0
134
+ begin
135
+ # unlike plain tcp sockets, ssl sockets don't support IO.select
136
+ # properly.
137
+ # Instead, timeouts happen on a per write basis, and we have to
138
+ # catch exceptions from write_nonblock, and gradually build up
139
+ # our write buffer.
140
+ written += @ssl_socket.write_nonblock(bytes)
141
+ rescue Errno::EFAULT => error
142
+ raise error
143
+ rescue OpenSSL::SSL::SSLError, Errno::EAGAIN, Errno::EWOULDBLOCK, IO::WaitWritable => error
144
+ if error.is_a?(OpenSSL::SSL::SSLError) && error.message == 'write would block'
145
+ if select_with_timeout(@ssl_socket, :write)
146
+ retry
147
+ else
148
+ raise Errno::ETIMEDOUT
149
+ end
150
+ else
151
+ raise error
152
+ end
153
+ end
154
+
155
+ # Fast, common case.
156
+ break if written == bytes.size
157
+
158
+ # This takes advantage of the fact that most ruby implementations
159
+ # have Copy-On-Write strings. Thusly why requesting a subrange
160
+ # of data, we actually don't copy data because the new string
161
+ # simply references a subrange of the original.
162
+ bytes = bytes[written, bytes.size]
163
+ end
164
+ end
165
+
166
+ def close
167
+ @tcp_socket.close
168
+ @ssl_socket.close
169
+ end
170
+
171
+ def closed?
172
+ @tcp_socket.closed? || @ssl_socket.closed?
173
+ end
174
+
175
+ def set_encoding(encoding)
176
+ @tcp_socket.set_encoding(encoding)
177
+ end
178
+
179
+ def select_with_timeout(socket, type)
180
+ case type
181
+ when :connect_read
182
+ IO.select([socket], nil, nil, @connect_timeout)
183
+ when :connect_write
184
+ IO.select(nil, [socket], nil, @connect_timeout)
185
+ when :read
186
+ IO.select([socket], nil, nil, @timeout)
187
+ when :write
188
+ IO.select(nil, [socket], nil, @timeout)
189
+ end
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,296 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "statsd"
5
+ rescue LoadError
6
+ $stderr.puts "In order to report Kafka client metrics to Statsd you need to install the `statsd-ruby` gem."
7
+ raise
8
+ end
9
+
10
+ require "active_support/subscriber"
11
+
12
+ module Kafka
13
+ # Reports operational metrics to a Statsd agent.
14
+ #
15
+ # require "kafka/statsd"
16
+ #
17
+ # # Default is "ruby_kafka".
18
+ # Kafka::Statsd.namespace = "custom-namespace"
19
+ #
20
+ # # Default is "127.0.0.1".
21
+ # Kafka::Statsd.host = "statsd.something.com"
22
+ #
23
+ # # Default is 8125.
24
+ # Kafka::Statsd.port = 1234
25
+ #
26
+ # Once the file has been required, no further configuration is needed – all operational
27
+ # metrics are automatically emitted.
28
+ module Statsd
29
+ DEFAULT_NAMESPACE = "ruby_kafka"
30
+ DEFAULT_HOST = '127.0.0.1'
31
+ DEFAULT_PORT = 8125
32
+
33
+ def self.statsd
34
+ @statsd ||= ::Statsd.new(DEFAULT_HOST, DEFAULT_PORT).tap { |sd| sd.namespace = DEFAULT_NAMESPACE }
35
+ end
36
+
37
+ def self.host=(host)
38
+ statsd.host = host
39
+ statsd.connect if statsd.respond_to?(:connect)
40
+ end
41
+
42
+ def self.port=(port)
43
+ statsd.port = port
44
+ statsd.connect if statsd.respond_to?(:connect)
45
+ end
46
+
47
+ def self.namespace=(namespace)
48
+ statsd.namespace = namespace
49
+ end
50
+
51
+ class StatsdSubscriber < ActiveSupport::Subscriber
52
+ private
53
+
54
+ %w[increment count timing gauge].each do |type|
55
+ define_method(type) do |*args|
56
+ Kafka::Statsd.statsd.send(type, *args)
57
+ end
58
+ end
59
+ end
60
+
61
+ class ConnectionSubscriber < StatsdSubscriber
62
+ def request(event)
63
+ client = event.payload.fetch(:client_id)
64
+ api = event.payload.fetch(:api, "unknown")
65
+ request_size = event.payload.fetch(:request_size, 0)
66
+ response_size = event.payload.fetch(:response_size, 0)
67
+ broker = event.payload.fetch(:broker_host)
68
+
69
+ timing("api.#{client}.#{api}.#{broker}.latency", event.duration)
70
+ increment("api.#{client}.#{api}.#{broker}.calls")
71
+
72
+ timing("api.#{client}.#{api}.#{broker}.request_size", request_size)
73
+ timing("api.#{client}.#{api}.#{broker}.response_size", response_size)
74
+
75
+ if event.payload.key?(:exception)
76
+ increment("api.#{client}.#{api}.#{broker}.errors")
77
+ end
78
+ end
79
+
80
+ attach_to "connection.kafka"
81
+ end
82
+
83
+ class ConsumerSubscriber < StatsdSubscriber
84
+ def process_message(event)
85
+ offset_lag = event.payload.fetch(:offset_lag)
86
+ create_time = event.payload.fetch(:create_time)
87
+ client = event.payload.fetch(:client_id)
88
+ group_id = event.payload.fetch(:group_id)
89
+ topic = event.payload.fetch(:topic)
90
+ partition = event.payload.fetch(:partition)
91
+
92
+ time_lag = create_time && ((Time.now - create_time) * 1000).to_i
93
+
94
+ if event.payload.key?(:exception)
95
+ increment("consumer.#{client}.#{group_id}.#{topic}.#{partition}.process_message.errors")
96
+ else
97
+ timing("consumer.#{client}.#{group_id}.#{topic}.#{partition}.process_message.latency", event.duration)
98
+ increment("consumer.#{client}.#{group_id}.#{topic}.#{partition}.messages")
99
+ end
100
+
101
+ gauge("consumer.#{client}.#{group_id}.#{topic}.#{partition}.lag", offset_lag)
102
+
103
+ # Not all messages have timestamps.
104
+ if time_lag
105
+ gauge("consumer.#{client}.#{group_id}.#{topic}.#{partition}.time_lag", time_lag)
106
+ end
107
+ end
108
+
109
+ def process_batch(event)
110
+ messages = event.payload.fetch(:message_count)
111
+ client = event.payload.fetch(:client_id)
112
+ group_id = event.payload.fetch(:group_id)
113
+ topic = event.payload.fetch(:topic)
114
+ partition = event.payload.fetch(:partition)
115
+
116
+ if event.payload.key?(:exception)
117
+ increment("consumer.#{client}.#{group_id}.#{topic}.#{partition}.process_batch.errors")
118
+ else
119
+ timing("consumer.#{client}.#{group_id}.#{topic}.#{partition}.process_batch.latency", event.duration)
120
+ count("consumer.#{client}.#{group_id}.#{topic}.#{partition}.messages", messages)
121
+ end
122
+ end
123
+
124
+ def fetch_batch(event)
125
+ lag = event.payload.fetch(:offset_lag)
126
+ batch_size = event.payload.fetch(:message_count)
127
+ client = event.payload.fetch(:client_id)
128
+ group_id = event.payload.fetch(:group_id)
129
+ topic = event.payload.fetch(:topic)
130
+ partition = event.payload.fetch(:partition)
131
+
132
+ count("consumer.#{client}.#{group_id}.#{topic}.#{partition}.batch_size", batch_size)
133
+ gauge("consumer.#{client}.#{group_id}.#{topic}.#{partition}.lag", lag)
134
+ end
135
+
136
+ def join_group(event)
137
+ client = event.payload.fetch(:client_id)
138
+ group_id = event.payload.fetch(:group_id)
139
+
140
+ timing("consumer.#{client}.#{group_id}.join_group", event.duration)
141
+
142
+ if event.payload.key?(:exception)
143
+ increment("consumer.#{client}.#{group_id}.join_group.errors")
144
+ end
145
+ end
146
+
147
+ def sync_group(event)
148
+ client = event.payload.fetch(:client_id)
149
+ group_id = event.payload.fetch(:group_id)
150
+
151
+ timing("consumer.#{client}.#{group_id}.sync_group", event.duration)
152
+
153
+ if event.payload.key?(:exception)
154
+ increment("consumer.#{client}.#{group_id}.sync_group.errors")
155
+ end
156
+ end
157
+
158
+ def leave_group(event)
159
+ client = event.payload.fetch(:client_id)
160
+ group_id = event.payload.fetch(:group_id)
161
+
162
+ timing("consumer.#{client}.#{group_id}.leave_group", event.duration)
163
+
164
+ if event.payload.key?(:exception)
165
+ increment("consumer.#{client}.#{group_id}.leave_group.errors")
166
+ end
167
+ end
168
+
169
+ def pause_status(event)
170
+ client = event.payload.fetch(:client_id)
171
+ group_id = event.payload.fetch(:group_id)
172
+ topic = event.payload.fetch(:topic)
173
+ partition = event.payload.fetch(:partition)
174
+
175
+ duration = event.payload.fetch(:duration)
176
+
177
+ gauge("consumer.#{client}.#{group_id}.#{topic}.#{partition}.pause.duration", duration)
178
+ end
179
+
180
+ attach_to "consumer.kafka"
181
+ end
182
+
183
+ class ProducerSubscriber < StatsdSubscriber
184
+ def produce_message(event)
185
+ client = event.payload.fetch(:client_id)
186
+ topic = event.payload.fetch(:topic)
187
+ message_size = event.payload.fetch(:message_size)
188
+ buffer_size = event.payload.fetch(:buffer_size)
189
+ max_buffer_size = event.payload.fetch(:max_buffer_size)
190
+ buffer_fill_ratio = buffer_size.to_f / max_buffer_size.to_f
191
+ buffer_fill_percentage = buffer_fill_ratio * 100.0
192
+
193
+ # This gets us the write rate.
194
+ increment("producer.#{client}.#{topic}.produce.messages")
195
+
196
+ timing("producer.#{client}.#{topic}.produce.message_size", message_size)
197
+
198
+ # This gets us the avg/max buffer size per producer.
199
+ timing("producer.#{client}.buffer.size", buffer_size)
200
+
201
+ # This gets us the avg/max buffer fill ratio per producer.
202
+ timing("producer.#{client}.buffer.fill_ratio", buffer_fill_ratio)
203
+ timing("producer.#{client}.buffer.fill_percentage", buffer_fill_percentage)
204
+ end
205
+
206
+ def buffer_overflow(event)
207
+ client = event.payload.fetch(:client_id)
208
+ topic = event.payload.fetch(:topic)
209
+
210
+ increment("producer.#{client}.#{topic}.produce.errors")
211
+ end
212
+
213
+ def deliver_messages(event)
214
+ client = event.payload.fetch(:client_id)
215
+ message_count = event.payload.fetch(:delivered_message_count)
216
+ attempts = event.payload.fetch(:attempts)
217
+
218
+ if event.payload.key?(:exception)
219
+ increment("producer.#{client}.deliver.errors")
220
+ end
221
+
222
+ timing("producer.#{client}.deliver.latency", event.duration)
223
+
224
+ # Messages delivered to Kafka:
225
+ count("producer.#{client}.deliver.messages", message_count)
226
+
227
+ # Number of attempts to deliver messages:
228
+ timing("producer.#{client}.deliver.attempts", attempts)
229
+ end
230
+
231
+ def ack_message(event)
232
+ client = event.payload.fetch(:client_id)
233
+ topic = event.payload.fetch(:topic)
234
+
235
+ # Number of messages ACK'd for the topic.
236
+ increment("producer.#{client}.#{topic}.ack.messages")
237
+
238
+ # Histogram of delay between a message being produced and it being ACK'd.
239
+ timing("producer.#{client}.#{topic}.ack.delay", event.payload.fetch(:delay))
240
+ end
241
+
242
+ def topic_error(event)
243
+ client = event.payload.fetch(:client_id)
244
+ topic = event.payload.fetch(:topic)
245
+
246
+ increment("producer.#{client}.#{topic}.ack.errors")
247
+ end
248
+
249
+ attach_to "producer.kafka"
250
+ end
251
+
252
+ class AsyncProducerSubscriber < StatsdSubscriber
253
+ def enqueue_message(event)
254
+ client = event.payload.fetch(:client_id)
255
+ topic = event.payload.fetch(:topic)
256
+ queue_size = event.payload.fetch(:queue_size)
257
+ max_queue_size = event.payload.fetch(:max_queue_size)
258
+ queue_fill_ratio = queue_size.to_f / max_queue_size.to_f
259
+
260
+ # This gets us the avg/max queue size per producer.
261
+ timing("async_producer.#{client}.#{topic}.queue.size", queue_size)
262
+
263
+ # This gets us the avg/max queue fill ratio per producer.
264
+ timing("async_producer.#{client}.#{topic}.queue.fill_ratio", queue_fill_ratio)
265
+ end
266
+
267
+ def buffer_overflow(event)
268
+ client = event.payload.fetch(:client_id)
269
+ topic = event.payload.fetch(:topic)
270
+
271
+ increment("async_producer.#{client}.#{topic}.produce.errors")
272
+ end
273
+
274
+ def drop_messages(event)
275
+ client = event.payload.fetch(:client_id)
276
+ message_count = event.payload.fetch(:message_count)
277
+
278
+ count("async_producer.#{client}.dropped_messages", message_count)
279
+ end
280
+
281
+ attach_to "async_producer.kafka"
282
+ end
283
+
284
+ class FetcherSubscriber < StatsdSubscriber
285
+ def loop(event)
286
+ queue_size = event.payload.fetch(:queue_size)
287
+ client = event.payload.fetch(:client_id)
288
+ group_id = event.payload.fetch(:group_id)
289
+
290
+ gauge("fetcher.#{client}.#{group_id}.queue_size", queue_size)
291
+ end
292
+
293
+ attach_to "fetcher.kafka"
294
+ end
295
+ end
296
+ end