ruby-kafka-custom 0.7.7.26

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. checksums.yaml +7 -0
  2. data/lib/kafka/async_producer.rb +279 -0
  3. data/lib/kafka/broker.rb +205 -0
  4. data/lib/kafka/broker_info.rb +16 -0
  5. data/lib/kafka/broker_pool.rb +41 -0
  6. data/lib/kafka/broker_uri.rb +43 -0
  7. data/lib/kafka/client.rb +754 -0
  8. data/lib/kafka/cluster.rb +455 -0
  9. data/lib/kafka/compression.rb +43 -0
  10. data/lib/kafka/compressor.rb +85 -0
  11. data/lib/kafka/connection.rb +220 -0
  12. data/lib/kafka/connection_builder.rb +33 -0
  13. data/lib/kafka/consumer.rb +592 -0
  14. data/lib/kafka/consumer_group.rb +208 -0
  15. data/lib/kafka/datadog.rb +413 -0
  16. data/lib/kafka/fetch_operation.rb +115 -0
  17. data/lib/kafka/fetched_batch.rb +54 -0
  18. data/lib/kafka/fetched_batch_generator.rb +117 -0
  19. data/lib/kafka/fetched_message.rb +47 -0
  20. data/lib/kafka/fetched_offset_resolver.rb +48 -0
  21. data/lib/kafka/fetcher.rb +221 -0
  22. data/lib/kafka/gzip_codec.rb +30 -0
  23. data/lib/kafka/heartbeat.rb +25 -0
  24. data/lib/kafka/instrumenter.rb +38 -0
  25. data/lib/kafka/lz4_codec.rb +23 -0
  26. data/lib/kafka/message_buffer.rb +87 -0
  27. data/lib/kafka/offset_manager.rb +248 -0
  28. data/lib/kafka/partitioner.rb +35 -0
  29. data/lib/kafka/pause.rb +92 -0
  30. data/lib/kafka/pending_message.rb +29 -0
  31. data/lib/kafka/pending_message_queue.rb +41 -0
  32. data/lib/kafka/produce_operation.rb +205 -0
  33. data/lib/kafka/producer.rb +504 -0
  34. data/lib/kafka/protocol.rb +217 -0
  35. data/lib/kafka/protocol/add_partitions_to_txn_request.rb +34 -0
  36. data/lib/kafka/protocol/add_partitions_to_txn_response.rb +47 -0
  37. data/lib/kafka/protocol/alter_configs_request.rb +44 -0
  38. data/lib/kafka/protocol/alter_configs_response.rb +49 -0
  39. data/lib/kafka/protocol/api_versions_request.rb +21 -0
  40. data/lib/kafka/protocol/api_versions_response.rb +53 -0
  41. data/lib/kafka/protocol/consumer_group_protocol.rb +19 -0
  42. data/lib/kafka/protocol/create_partitions_request.rb +42 -0
  43. data/lib/kafka/protocol/create_partitions_response.rb +28 -0
  44. data/lib/kafka/protocol/create_topics_request.rb +45 -0
  45. data/lib/kafka/protocol/create_topics_response.rb +26 -0
  46. data/lib/kafka/protocol/decoder.rb +175 -0
  47. data/lib/kafka/protocol/delete_topics_request.rb +33 -0
  48. data/lib/kafka/protocol/delete_topics_response.rb +26 -0
  49. data/lib/kafka/protocol/describe_configs_request.rb +35 -0
  50. data/lib/kafka/protocol/describe_configs_response.rb +73 -0
  51. data/lib/kafka/protocol/describe_groups_request.rb +27 -0
  52. data/lib/kafka/protocol/describe_groups_response.rb +73 -0
  53. data/lib/kafka/protocol/encoder.rb +184 -0
  54. data/lib/kafka/protocol/end_txn_request.rb +29 -0
  55. data/lib/kafka/protocol/end_txn_response.rb +19 -0
  56. data/lib/kafka/protocol/fetch_request.rb +70 -0
  57. data/lib/kafka/protocol/fetch_response.rb +136 -0
  58. data/lib/kafka/protocol/find_coordinator_request.rb +29 -0
  59. data/lib/kafka/protocol/find_coordinator_response.rb +29 -0
  60. data/lib/kafka/protocol/heartbeat_request.rb +27 -0
  61. data/lib/kafka/protocol/heartbeat_response.rb +17 -0
  62. data/lib/kafka/protocol/init_producer_id_request.rb +26 -0
  63. data/lib/kafka/protocol/init_producer_id_response.rb +27 -0
  64. data/lib/kafka/protocol/join_group_request.rb +41 -0
  65. data/lib/kafka/protocol/join_group_response.rb +33 -0
  66. data/lib/kafka/protocol/leave_group_request.rb +25 -0
  67. data/lib/kafka/protocol/leave_group_response.rb +17 -0
  68. data/lib/kafka/protocol/list_groups_request.rb +23 -0
  69. data/lib/kafka/protocol/list_groups_response.rb +35 -0
  70. data/lib/kafka/protocol/list_offset_request.rb +53 -0
  71. data/lib/kafka/protocol/list_offset_response.rb +89 -0
  72. data/lib/kafka/protocol/member_assignment.rb +42 -0
  73. data/lib/kafka/protocol/message.rb +172 -0
  74. data/lib/kafka/protocol/message_set.rb +55 -0
  75. data/lib/kafka/protocol/metadata_request.rb +31 -0
  76. data/lib/kafka/protocol/metadata_response.rb +185 -0
  77. data/lib/kafka/protocol/offset_commit_request.rb +47 -0
  78. data/lib/kafka/protocol/offset_commit_response.rb +29 -0
  79. data/lib/kafka/protocol/offset_fetch_request.rb +36 -0
  80. data/lib/kafka/protocol/offset_fetch_response.rb +56 -0
  81. data/lib/kafka/protocol/produce_request.rb +92 -0
  82. data/lib/kafka/protocol/produce_response.rb +63 -0
  83. data/lib/kafka/protocol/record.rb +88 -0
  84. data/lib/kafka/protocol/record_batch.rb +222 -0
  85. data/lib/kafka/protocol/request_message.rb +26 -0
  86. data/lib/kafka/protocol/sasl_handshake_request.rb +33 -0
  87. data/lib/kafka/protocol/sasl_handshake_response.rb +28 -0
  88. data/lib/kafka/protocol/sync_group_request.rb +33 -0
  89. data/lib/kafka/protocol/sync_group_response.rb +23 -0
  90. data/lib/kafka/round_robin_assignment_strategy.rb +54 -0
  91. data/lib/kafka/sasl/gssapi.rb +76 -0
  92. data/lib/kafka/sasl/oauth.rb +64 -0
  93. data/lib/kafka/sasl/plain.rb +39 -0
  94. data/lib/kafka/sasl/scram.rb +177 -0
  95. data/lib/kafka/sasl_authenticator.rb +61 -0
  96. data/lib/kafka/snappy_codec.rb +25 -0
  97. data/lib/kafka/socket_with_timeout.rb +96 -0
  98. data/lib/kafka/ssl_context.rb +66 -0
  99. data/lib/kafka/ssl_socket_with_timeout.rb +187 -0
  100. data/lib/kafka/statsd.rb +296 -0
  101. data/lib/kafka/tagged_logger.rb +72 -0
  102. data/lib/kafka/transaction_manager.rb +261 -0
  103. data/lib/kafka/transaction_state_machine.rb +72 -0
  104. data/lib/kafka/version.rb +5 -0
  105. metadata +461 -0
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "zlib"
4
+
5
+ module Kafka
6
+
7
+ # Assigns partitions to messages.
8
+ class Partitioner
9
+
10
+ # Assigns a partition number based on a partition key. If no explicit
11
+ # partition key is provided, the message key will be used instead.
12
+ #
13
+ # If the key is nil, then a random partition is selected. Otherwise, a digest
14
+ # of the key is used to deterministically find a partition. As long as the
15
+ # number of partitions doesn't change, the same key will always be assigned
16
+ # to the same partition.
17
+ #
18
+ # @param partition_count [Integer] the number of partitions in the topic.
19
+ # @param message [Kafka::PendingMessage] the message that should be assigned
20
+ # a partition.
21
+ # @return [Integer] the partition number.
22
+ def self.partition_for_key(partition_count, message)
23
+ raise ArgumentError if partition_count == 0
24
+
25
+ # If no explicit partition key is specified we use the message key instead.
26
+ key = message.partition_key || message.key
27
+
28
+ if key.nil?
29
+ rand(partition_count)
30
+ else
31
+ Zlib.crc32(key) % partition_count
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ # Manages the pause state of a partition.
5
+ #
6
+ # The processing of messages in a partition can be paused, e.g. if there was
7
+ # an exception during processing. This could be caused by a downstream service
8
+ # not being available. A typical way of solving such an issue is to back off
9
+ # for a little while and then try again. In order to do that, _pause_ the
10
+ # partition.
11
+ class Pause
12
+ def initialize(clock: Time)
13
+ @clock = clock
14
+ @started_at = nil
15
+ @pauses = 0
16
+ @timeout = nil
17
+ @max_timeout = nil
18
+ @exponential_backoff = false
19
+ end
20
+
21
+ # Mark the partition as paused.
22
+ #
23
+ # If exponential backoff is enabled, each subsequent pause of a partition will
24
+ # cause a doubling of the actual timeout, i.e. for pause number _n_, the actual
25
+ # timeout will be _2^n * timeout_.
26
+ #
27
+ # Only when {#reset!} is called is this state cleared.
28
+ #
29
+ # @param timeout [nil, Integer] if specified, the partition will automatically
30
+ # resume after this many seconds.
31
+ # @param exponential_backoff [Boolean] whether to enable exponential timeouts.
32
+ def pause!(timeout: nil, max_timeout: nil, exponential_backoff: false)
33
+ @started_at = @clock.now
34
+ @timeout = timeout
35
+ @max_timeout = max_timeout
36
+ @exponential_backoff = exponential_backoff
37
+ @pauses += 1
38
+ end
39
+
40
+ # Resumes the partition.
41
+ #
42
+ # The number of pauses is still retained, and if the partition is paused again
43
+ # it may be with an exponential backoff.
44
+ def resume!
45
+ @started_at = nil
46
+ @timeout = nil
47
+ @max_timeout = nil
48
+ end
49
+
50
+ # Whether the partition is currently paused. The pause may have expired, in which
51
+ # case {#expired?} should be checked as well.
52
+ def paused?
53
+ # This is nil if we're not currently paused.
54
+ !@started_at.nil?
55
+ end
56
+
57
+ def pause_duration
58
+ if paused?
59
+ Time.now - @started_at
60
+ else
61
+ 0
62
+ end
63
+ end
64
+
65
+ # Whether the pause has expired.
66
+ def expired?
67
+ # We never expire the pause if timeout is nil.
68
+ return false if @timeout.nil?
69
+
70
+ # Have we passed the end of the pause duration?
71
+ @clock.now >= ends_at
72
+ end
73
+
74
+ # Resets the pause state, ensuring that the next pause is not exponential.
75
+ def reset!
76
+ @pauses = 0
77
+ end
78
+
79
+ private
80
+
81
+ def ends_at
82
+ # Apply an exponential backoff to the timeout.
83
+ backoff_factor = @exponential_backoff ? 2**(@pauses - 1) : 1
84
+ timeout = backoff_factor * @timeout
85
+
86
+ # If set, don't allow a timeout longer than max_timeout.
87
+ timeout = @max_timeout if @max_timeout && timeout > @max_timeout
88
+
89
+ @started_at + timeout
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+ class PendingMessage
5
+ attr_reader :value, :key, :headers, :topic, :partition, :partition_key, :create_time, :bytesize
6
+
7
+ def initialize(value:, key:, headers: {}, topic:, partition:, partition_key:, create_time:)
8
+ @value = value
9
+ @key = key
10
+ @headers = headers
11
+ @topic = topic
12
+ @partition = partition
13
+ @partition_key = partition_key
14
+ @create_time = create_time
15
+ @bytesize = key.to_s.bytesize + value.to_s.bytesize
16
+ end
17
+
18
+ def ==(other)
19
+ @value == other.value &&
20
+ @key == other.key &&
21
+ @topic == other.topic &&
22
+ @headers == other.headers &&
23
+ @partition == other.partition &&
24
+ @partition_key == other.partition_key &&
25
+ @create_time == other.create_time &&
26
+ @bytesize == other.bytesize
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kafka
4
+
5
+ class PendingMessageQueue
6
+ attr_reader :size, :bytesize
7
+
8
+ def initialize
9
+ clear
10
+ end
11
+
12
+ def write(message)
13
+ @messages << message
14
+ @size += 1
15
+ @bytesize += message.bytesize
16
+ end
17
+
18
+ def empty?
19
+ @messages.empty?
20
+ end
21
+
22
+ def clear
23
+ @messages = []
24
+ @size = 0
25
+ @bytesize = 0
26
+ end
27
+
28
+ def replace(messages)
29
+ clear
30
+ messages.each {|message| write(message) }
31
+ end
32
+
33
+ # Yields each message in the queue.
34
+ #
35
+ # @yieldparam [PendingMessage] message
36
+ # @return [nil]
37
+ def each(&block)
38
+ @messages.each(&block)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,205 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "kafka/protocol/message_set"
4
+ require "kafka/protocol/record_batch"
5
+
6
+ module Kafka
7
+ # A produce operation attempts to send all messages in a buffer to the Kafka cluster.
8
+ # Since topics and partitions are spread among all brokers in a cluster, this usually
9
+ # involves sending requests to several or all of the brokers.
10
+ #
11
+ # ## Instrumentation
12
+ #
13
+ # When executing the operation, an `ack_message.producer.kafka` notification will be
14
+ # emitted for each message that was successfully appended to a topic partition.
15
+ # The following keys will be found in the payload:
16
+ #
17
+ # * `:topic` — the topic that was written to.
18
+ # * `:partition` — the partition that the message set was appended to.
19
+ # * `:offset` — the offset of the message in the partition.
20
+ # * `:key` — the message key.
21
+ # * `:value` — the message value.
22
+ # * `:delay` — the time between the message was produced and when it was acknowledged.
23
+ #
24
+ # In addition to these notifications, a `send_messages.producer.kafka` notification will
25
+ # be emitted after the operation completes, regardless of whether it succeeds. This
26
+ # notification will have the following keys:
27
+ #
28
+ # * `:message_count` – the total number of messages that the operation tried to
29
+ # send. Note that not all messages may get delivered.
30
+ # * `:sent_message_count` – the number of messages that were successfully sent.
31
+ #
32
+ class ProduceOperation
33
+ def initialize(cluster:, transaction_manager:, buffer:, compressor:, required_acks:, ack_timeout:, logger:, instrumenter:)
34
+ @cluster = cluster
35
+ @transaction_manager = transaction_manager
36
+ @buffer = buffer
37
+ @required_acks = required_acks
38
+ @ack_timeout = ack_timeout
39
+ @compressor = compressor
40
+ @logger = TaggedLogger.new(logger)
41
+ @instrumenter = instrumenter
42
+ end
43
+
44
+ def execute
45
+ if (@transaction_manager.idempotent? || @transaction_manager.transactional?) && @required_acks != -1
46
+ raise 'You must set required_acks option to :all to use idempotent / transactional production'
47
+ end
48
+
49
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
50
+ raise "Produce operation can only be executed in a pending transaction"
51
+ end
52
+
53
+ @instrumenter.instrument("send_messages.producer") do |notification|
54
+ message_count = @buffer.size
55
+
56
+ notification[:message_count] = message_count
57
+
58
+ begin
59
+ if @transaction_manager.idempotent? || @transaction_manager.transactional?
60
+ @transaction_manager.init_producer_id
61
+ end
62
+ send_buffered_messages
63
+ ensure
64
+ notification[:sent_message_count] = message_count - @buffer.size
65
+ end
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def send_buffered_messages
72
+ messages_for_broker = {}
73
+ topic_partitions = {}
74
+
75
+ @buffer.each do |topic, partition, messages|
76
+ begin
77
+ broker = @cluster.get_leader(topic, partition)
78
+
79
+ @logger.debug "Current leader for #{topic}/#{partition} is node #{broker}"
80
+
81
+ topic_partitions[topic] ||= Set.new
82
+ topic_partitions[topic].add(partition)
83
+
84
+ messages_for_broker[broker] ||= MessageBuffer.new
85
+ messages_for_broker[broker].concat(messages, topic: topic, partition: partition)
86
+ rescue Kafka::Error => e
87
+ @logger.error "Could not connect to leader for partition #{topic}/#{partition}: #{e.message}"
88
+
89
+ @instrumenter.instrument("topic_error.producer", {
90
+ topic: topic,
91
+ exception: [e.class.to_s, e.message],
92
+ })
93
+
94
+ # We can't send the messages right now, so we'll just keep them in the buffer.
95
+ # We'll mark the cluster as stale in order to force a metadata refresh.
96
+ @cluster.mark_as_stale!
97
+ end
98
+ end
99
+
100
+ # Add topic and partition to transaction
101
+ if @transaction_manager.transactional?
102
+ @transaction_manager.add_partitions_to_transaction(topic_partitions)
103
+ end
104
+
105
+ messages_for_broker.each do |broker, message_buffer|
106
+ begin
107
+ @logger.info "Sending #{message_buffer.size} messages to #{broker}"
108
+
109
+ records_for_topics = {}
110
+
111
+ message_buffer.each do |topic, partition, records|
112
+ record_batch = Protocol::RecordBatch.new(
113
+ records: records,
114
+ first_sequence: @transaction_manager.next_sequence_for(
115
+ topic, partition
116
+ ),
117
+ in_transaction: @transaction_manager.transactional?,
118
+ producer_id: @transaction_manager.producer_id,
119
+ producer_epoch: @transaction_manager.producer_epoch
120
+ )
121
+ records_for_topics[topic] ||= {}
122
+ records_for_topics[topic][partition] = record_batch
123
+ end
124
+
125
+ response = broker.produce(
126
+ messages_for_topics: records_for_topics,
127
+ compressor: @compressor,
128
+ required_acks: @required_acks,
129
+ timeout: @ack_timeout * 1000, # Kafka expects the timeout in milliseconds.
130
+ transactional_id: @transaction_manager.transactional_id
131
+ )
132
+
133
+ handle_response(broker, response, records_for_topics) if response
134
+ rescue ConnectionError => e
135
+ @logger.error "Could not connect to broker #{broker}: #{e}"
136
+
137
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
138
+ @cluster.mark_as_stale!
139
+ end
140
+ end
141
+ end
142
+
143
+ def handle_response(broker, response, records_for_topics)
144
+ response.each_partition do |topic_info, partition_info|
145
+ topic = topic_info.topic
146
+ partition = partition_info.partition
147
+ record_batch = records_for_topics[topic][partition]
148
+ records = record_batch.records
149
+ ack_time = Time.now
150
+
151
+ begin
152
+ begin
153
+ Protocol.handle_error(partition_info.error_code)
154
+ rescue ProtocolError => e
155
+ @instrumenter.instrument("topic_error.producer", {
156
+ topic: topic,
157
+ exception: [e.class.to_s, e.message],
158
+ })
159
+
160
+ raise e
161
+ end
162
+
163
+ if @transaction_manager.idempotent? || @transaction_manager.transactional?
164
+ @transaction_manager.update_sequence_for(
165
+ topic, partition, record_batch.first_sequence + record_batch.size
166
+ )
167
+ end
168
+
169
+ records.each_with_index do |record, index|
170
+ @instrumenter.instrument("ack_message.producer", {
171
+ key: record.key,
172
+ value: record.value,
173
+ topic: topic,
174
+ partition: partition,
175
+ offset: partition_info.offset + index,
176
+ delay: ack_time - record.create_time,
177
+ })
178
+ end
179
+ rescue Kafka::CorruptMessage
180
+ @logger.error "Corrupt message when writing to #{topic}/#{partition} on #{broker}"
181
+ rescue Kafka::UnknownTopicOrPartition
182
+ @logger.error "Unknown topic or partition #{topic}/#{partition} on #{broker}"
183
+ @cluster.mark_as_stale!
184
+ rescue Kafka::LeaderNotAvailable
185
+ @logger.error "Leader currently not available for #{topic}/#{partition}"
186
+ @cluster.mark_as_stale!
187
+ rescue Kafka::NotLeaderForPartition
188
+ @logger.error "Broker #{broker} not currently leader for #{topic}/#{partition}"
189
+ @cluster.mark_as_stale!
190
+ rescue Kafka::RequestTimedOut
191
+ @logger.error "Timed out while writing to #{topic}/#{partition} on #{broker}"
192
+ rescue Kafka::NotEnoughReplicas
193
+ @logger.error "Not enough in-sync replicas for #{topic}/#{partition}"
194
+ rescue Kafka::NotEnoughReplicasAfterAppend
195
+ @logger.error "Messages written, but to fewer in-sync replicas than required for #{topic}/#{partition}"
196
+ else
197
+ @logger.debug "Successfully appended #{records.count} messages to #{topic}/#{partition} on #{broker}"
198
+
199
+ # The messages were successfully written; clear them from the buffer.
200
+ @buffer.clear_messages(topic: topic, partition: partition)
201
+ end
202
+ end
203
+ end
204
+ end
205
+ end
@@ -0,0 +1,504 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "kafka/partitioner"
5
+ require "kafka/message_buffer"
6
+ require "kafka/produce_operation"
7
+ require "kafka/pending_message_queue"
8
+ require "kafka/pending_message"
9
+ require "kafka/compressor"
10
+
11
+ module Kafka
12
+ # Allows sending messages to a Kafka cluster.
13
+ #
14
+ # Typically you won't instantiate this class yourself, but rather have {Kafka::Client}
15
+ # do it for you, e.g.
16
+ #
17
+ # # Will instantiate Kafka::Client
18
+ # kafka = Kafka.new(["kafka1:9092", "kafka2:9092"])
19
+ #
20
+ # # Will instantiate Kafka::Producer
21
+ # producer = kafka.producer
22
+ #
23
+ # This is done in order to share a logger as well as a pool of broker connections across
24
+ # different producers. This also means that you don't need to pass the `cluster` and
25
+ # `logger` options to `#producer`. See {#initialize} for the list of other options
26
+ # you can pass in.
27
+ #
28
+ # ## Buffering
29
+ #
30
+ # The producer buffers pending messages until {#deliver_messages} is called. Note that there is
31
+ # a maximum buffer size (default is 1,000 messages) and writing messages after the
32
+ # buffer has reached this size will result in a BufferOverflow exception. Make sure
33
+ # to periodically call {#deliver_messages} or set `max_buffer_size` to an appropriate value.
34
+ #
35
+ # Buffering messages and sending them in batches greatly improves performance, so
36
+ # try to avoid sending messages after every write. The tradeoff between throughput and
37
+ # message delays depends on your use case.
38
+ #
39
+ # ## Error Handling and Retries
40
+ #
41
+ # The design of the error handling is based on having a {MessageBuffer} hold messages
42
+ # for all topics/partitions. Whenever we want to send messages to the cluster, we
43
+ # group the buffered messages by the broker they need to be sent to and fire off a
44
+ # request to each broker. A request can be a partial success, so we go through the
45
+ # response and inspect the error code for each partition that we wrote to. If the
46
+ # write to a given partition was successful, we clear the corresponding messages
47
+ # from the buffer -- otherwise, we log the error and keep the messages in the buffer.
48
+ #
49
+ # After this, we check if the buffer is empty. If it is, we're all done. If it's
50
+ # not, we do another round of requests, this time with just the remaining messages.
51
+ # We do this for as long as `max_retries` permits.
52
+ #
53
+ # ## Compression
54
+ #
55
+ # Depending on what kind of data you produce, enabling compression may yield improved
56
+ # bandwidth and space usage. Compression in Kafka is done on entire messages sets
57
+ # rather than on individual messages. This improves the compression rate and generally
58
+ # means that compressions works better the larger your buffers get, since the message
59
+ # sets will be larger by the time they're compressed.
60
+ #
61
+ # Since many workloads have variations in throughput and distribution across partitions,
62
+ # it's possible to configure a threshold for when to enable compression by setting
63
+ # `compression_threshold`. Only if the defined number of messages are buffered for a
64
+ # partition will the messages be compressed.
65
+ #
66
+ # Compression is enabled by passing the `compression_codec` parameter with the
67
+ # name of one of the algorithms allowed by Kafka:
68
+ #
69
+ # * `:snappy` for [Snappy](http://google.github.io/snappy/) compression.
70
+ # * `:gzip` for [gzip](https://en.wikipedia.org/wiki/Gzip) compression.
71
+ #
72
+ # By default, all message sets will be compressed if you specify a compression
73
+ # codec. To increase the compression threshold, set `compression_threshold` to
74
+ # an integer value higher than one.
75
+ #
76
+ # ## Instrumentation
77
+ #
78
+ # Whenever {#produce} is called, the notification `produce_message.producer.kafka`
79
+ # will be emitted with the following payload:
80
+ #
81
+ # * `value` – the message value.
82
+ # * `key` – the message key.
83
+ # * `topic` – the topic that was produced to.
84
+ # * `buffer_size` – the buffer size after adding the message.
85
+ # * `max_buffer_size` – the maximum allowed buffer size for the producer.
86
+ #
87
+ # After {#deliver_messages} completes, the notification
88
+ # `deliver_messages.producer.kafka` will be emitted with the following payload:
89
+ #
90
+ # * `message_count` – the total number of messages that the producer tried to
91
+ # deliver. Note that not all messages may get delivered.
92
+ # * `delivered_message_count` – the number of messages that were successfully
93
+ # delivered.
94
+ # * `attempts` – the number of attempts made to deliver the messages.
95
+ #
96
+ # ## Example
97
+ #
98
+ # This is an example of an application which reads lines from stdin and writes them
99
+ # to Kafka:
100
+ #
101
+ # require "kafka"
102
+ #
103
+ # logger = Logger.new($stderr)
104
+ # brokers = ENV.fetch("KAFKA_BROKERS").split(",")
105
+ #
106
+ # # Make sure to create this topic in your Kafka cluster or configure the
107
+ # # cluster to auto-create topics.
108
+ # topic = "random-messages"
109
+ #
110
+ # kafka = Kafka.new(brokers, client_id: "simple-producer", logger: logger)
111
+ # producer = kafka.producer
112
+ #
113
+ # begin
114
+ # $stdin.each_with_index do |line, index|
115
+ # producer.produce(line, topic: topic)
116
+ #
117
+ # # Send messages for every 10 lines.
118
+ # producer.deliver_messages if index % 10 == 0
119
+ # end
120
+ # ensure
121
+ # # Make sure to send any remaining messages.
122
+ # producer.deliver_messages
123
+ #
124
+ # producer.shutdown
125
+ # end
126
+ #
127
+ class Producer
128
+ class AbortTransaction < StandardError; end
129
+
130
+ def initialize(cluster:, transaction_manager:, logger:, instrumenter:, compressor:, ack_timeout:, required_acks:, max_retries:, retry_backoff:, max_buffer_size:, max_buffer_bytesize:)
131
+ @cluster = cluster
132
+ @transaction_manager = transaction_manager
133
+ @logger = TaggedLogger.new(logger)
134
+ @instrumenter = instrumenter
135
+ @required_acks = required_acks == :all ? -1 : required_acks
136
+ @ack_timeout = ack_timeout
137
+ @max_retries = max_retries
138
+ @retry_backoff = retry_backoff
139
+ @max_buffer_size = max_buffer_size
140
+ @max_buffer_bytesize = max_buffer_bytesize
141
+ @compressor = compressor
142
+
143
+ # The set of topics that are produced to.
144
+ @target_topics = Set.new
145
+
146
+ # A buffer organized by topic/partition.
147
+ @buffer = MessageBuffer.new
148
+
149
+ # Messages added by `#produce` but not yet assigned a partition.
150
+ @pending_message_queue = PendingMessageQueue.new
151
+ end
152
+
153
+ def to_s
154
+ "Producer #{@target_topics.to_a.join(', ')}"
155
+ end
156
+
157
+ # Produces a message to the specified topic. Note that messages are buffered in
158
+ # the producer until {#deliver_messages} is called.
159
+ #
160
+ # ## Partitioning
161
+ #
162
+ # There are several options for specifying the partition that the message should
163
+ # be written to.
164
+ #
165
+ # The simplest option is to not specify a message key, partition key, or
166
+ # partition number, in which case the message will be assigned a partition at
167
+ # random.
168
+ #
169
+ # You can also specify the `partition` parameter yourself. This requires you to
170
+ # know which partitions are available, however. Oftentimes the best option is
171
+ # to specify the `partition_key` parameter: messages with the same partition
172
+ # key will always be assigned to the same partition, as long as the number of
173
+ # partitions doesn't change. You can also omit the partition key and specify
174
+ # a message key instead. The message key is part of the message payload, and
175
+ # so can carry semantic value--whether you want to have the message key double
176
+ # as a partition key is up to you.
177
+ #
178
+ # @param value [String] the message data.
179
+ # @param key [String] the message key.
180
+ # @param headers [Hash<String, String>] the headers for the message.
181
+ # @param topic [String] the topic that the message should be written to.
182
+ # @param partition [Integer] the partition that the message should be written to.
183
+ # @param partition_key [String] the key that should be used to assign a partition.
184
+ # @param create_time [Time] the timestamp that should be set on the message.
185
+ #
186
+ # @raise [BufferOverflow] if the maximum buffer size has been reached.
187
+ # @return [nil]
188
+ def produce(value, key: nil, headers: {}, topic:, partition: nil, partition_key: nil, create_time: Time.now)
189
+ message = PendingMessage.new(
190
+ value: value && value.to_s,
191
+ key: key && key.to_s,
192
+ headers: headers,
193
+ topic: topic.to_s,
194
+ partition: partition && Integer(partition),
195
+ partition_key: partition_key && partition_key.to_s,
196
+ create_time: create_time
197
+ )
198
+
199
+ if buffer_size >= @max_buffer_size
200
+ buffer_overflow topic,
201
+ "Cannot produce to #{topic}, max buffer size (#{@max_buffer_size} messages) reached"
202
+ end
203
+
204
+ if buffer_bytesize + message.bytesize >= @max_buffer_bytesize
205
+ buffer_overflow topic,
206
+ "Cannot produce to #{topic}, max buffer bytesize (#{@max_buffer_bytesize} bytes) reached"
207
+ end
208
+
209
+ # If the producer is in transactional mode, all the message production
210
+ # must be used when the producer is currently in transaction
211
+ if @transaction_manager.transactional? && !@transaction_manager.in_transaction?
212
+ raise "Cannot produce to #{topic}: You must trigger begin_transaction before producing messages"
213
+ end
214
+
215
+ @target_topics.add(topic)
216
+ @pending_message_queue.write(message)
217
+
218
+ @instrumenter.instrument("produce_message.producer", {
219
+ value: value,
220
+ key: key,
221
+ topic: topic,
222
+ create_time: create_time,
223
+ message_size: message.bytesize,
224
+ buffer_size: buffer_size,
225
+ max_buffer_size: @max_buffer_size,
226
+ })
227
+
228
+ nil
229
+ end
230
+
231
+ # Sends all buffered messages to the Kafka brokers.
232
+ #
233
+ # Depending on the value of `required_acks` used when initializing the producer,
234
+ # this call may block until the specified number of replicas have acknowledged
235
+ # the writes. The `ack_timeout` setting places an upper bound on the amount of
236
+ # time the call will block before failing.
237
+ #
238
+ # @raise [DeliveryFailed] if not all messages could be successfully sent.
239
+ # @return [nil]
240
+ def deliver_messages
241
+ # There's no need to do anything if the buffer is empty.
242
+ return if buffer_size == 0
243
+
244
+ @instrumenter.instrument("deliver_messages.producer") do |notification|
245
+ message_count = buffer_size
246
+
247
+ notification[:message_count] = message_count
248
+ notification[:attempts] = 0
249
+
250
+ begin
251
+ deliver_messages_with_retries(notification)
252
+ ensure
253
+ notification[:delivered_message_count] = message_count - buffer_size
254
+ end
255
+ end
256
+ end
257
+
258
+ # Returns the number of messages currently held in the buffer.
259
+ #
260
+ # @return [Integer] buffer size.
261
+ def buffer_size
262
+ @pending_message_queue.size + @buffer.size
263
+ end
264
+
265
+ def buffer_bytesize
266
+ @pending_message_queue.bytesize + @buffer.bytesize
267
+ end
268
+
269
+ # Deletes all buffered messages.
270
+ #
271
+ # @return [nil]
272
+ def clear_buffer
273
+ @buffer.clear
274
+ @pending_message_queue.clear
275
+ end
276
+
277
+ # Closes all connections to the brokers.
278
+ #
279
+ # @return [nil]
280
+ def shutdown
281
+ @transaction_manager.close
282
+ @cluster.disconnect
283
+ end
284
+
285
+ # Initializes the producer to ready for future transactions. This method
286
+ # should be triggered once, before any tranactions are created.
287
+ #
288
+ # @return [nil]
289
+ def init_transactions
290
+ @transaction_manager.init_transactions
291
+ end
292
+
293
+ # Mark the beginning of a transaction. This method transitions the state
294
+ # of the transaction trantiions to IN_TRANSACTION.
295
+ #
296
+ # All producing operations can only be executed while the transation is
297
+ # in this state. The records are persisted by Kafka brokers, but not visible
298
+ # the consumers until the #commit_transaction method is trigger. After a
299
+ # timeout period without committed, the transaction is timeout and
300
+ # considered as aborted.
301
+ #
302
+ # @return [nil]
303
+ def begin_transaction
304
+ @transaction_manager.begin_transaction
305
+ end
306
+
307
+ # This method commits the pending transaction, marks all the produced
308
+ # records committed. After that, they are visible to the consumers.
309
+ #
310
+ # This method can only be called if and only if the current transaction
311
+ # is at IN_TRANSACTION state.
312
+ #
313
+ # @return [nil]
314
+ def commit_transaction
315
+ @transaction_manager.commit_transaction
316
+ end
317
+
318
+ # This method abort the pending transaction, marks all the produced
319
+ # records aborted. All the records will be wiped out by the brokers and the
320
+ # cosumers don't have a chance to consume those messages, except they enable
321
+ # consuming uncommitted option.
322
+ #
323
+ # This method can only be called if and only if the current transaction
324
+ # is at IN_TRANSACTION state.
325
+ #
326
+ # @return [nil]
327
+ def abort_transaction
328
+ @transaction_manager.abort_transaction
329
+ end
330
+
331
+ # Syntactic sugar to enable easier transaction usage. Do the following steps
332
+ #
333
+ # - Start the transaction (with Producer#begin_transaction)
334
+ # - Yield the given block
335
+ # - Commit the transaction (with Producer#commit_transaction)
336
+ #
337
+ # If the block raises exception, the transaction is automatically aborted
338
+ # *before* bubble up the exception.
339
+ #
340
+ # If the block raises Kafka::Producer::AbortTransaction indicator exception,
341
+ # it aborts the transaction silently, without throwing up that exception.
342
+ #
343
+ # @return [nil]
344
+ def transaction
345
+ raise 'This method requires a block' unless block_given?
346
+ begin_transaction
347
+ yield
348
+ commit_transaction
349
+ rescue Kafka::Producer::AbortTransaction
350
+ abort_transaction
351
+ rescue
352
+ abort_transaction
353
+ raise
354
+ end
355
+
356
+ private
357
+
358
+ def deliver_messages_with_retries(notification)
359
+ attempt = 0
360
+
361
+ @cluster.add_target_topics(@target_topics)
362
+
363
+ operation = ProduceOperation.new(
364
+ cluster: @cluster,
365
+ transaction_manager: @transaction_manager,
366
+ buffer: @buffer,
367
+ required_acks: @required_acks,
368
+ ack_timeout: @ack_timeout,
369
+ compressor: @compressor,
370
+ logger: @logger,
371
+ instrumenter: @instrumenter,
372
+ )
373
+
374
+ loop do
375
+ attempt += 1
376
+
377
+ notification[:attempts] = attempt
378
+
379
+ begin
380
+ @cluster.refresh_metadata_if_necessary!
381
+ rescue ConnectionError => e
382
+ raise DeliveryFailed.new(e, buffer_messages)
383
+ end
384
+
385
+ assign_partitions!
386
+ operation.execute
387
+
388
+ if @required_acks.zero?
389
+ # No response is returned by the brokers, so we can't know which messages
390
+ # have been successfully written. Our only option is to assume that they all
391
+ # have.
392
+ @buffer.clear
393
+ end
394
+
395
+ if buffer_size.zero?
396
+ break
397
+ elsif attempt <= @max_retries
398
+ @logger.warn "Failed to send all messages to #{pretty_partitions}; attempting retry #{attempt} of #{@max_retries} after #{@retry_backoff}s"
399
+
400
+ sleep @retry_backoff
401
+ else
402
+ @logger.error "Failed to send all messages to #{pretty_partitions}; keeping remaining messages in buffer"
403
+ break
404
+ end
405
+ end
406
+
407
+ unless @pending_message_queue.empty?
408
+ # Mark the cluster as stale in order to force a cluster metadata refresh.
409
+ @cluster.mark_as_stale!
410
+ raise DeliveryFailed.new("Failed to assign partitions to #{@pending_message_queue.size} messages", buffer_messages)
411
+ end
412
+
413
+ unless @buffer.empty?
414
+ raise DeliveryFailed.new("Failed to send messages to #{pretty_partitions}", buffer_messages)
415
+ end
416
+ end
417
+
418
+ def pretty_partitions
419
+ @buffer.map {|topic, partition, _| "#{topic}/#{partition}" }.join(", ")
420
+ end
421
+
422
+ def assign_partitions!
423
+ failed_messages = []
424
+ topics_with_failures = Set.new
425
+
426
+ @pending_message_queue.each do |message|
427
+ partition = message.partition
428
+
429
+ begin
430
+ # If a message for a topic fails to receive a partition all subsequent
431
+ # messages for the topic should be retried to preserve ordering
432
+ if topics_with_failures.include?(message.topic)
433
+ failed_messages << message
434
+ next
435
+ end
436
+
437
+ if partition.nil?
438
+ partition_count = @cluster.partitions_for(message.topic).count
439
+ partition = Partitioner.partition_for_key(partition_count, message)
440
+ end
441
+
442
+ @buffer.write(
443
+ value: message.value,
444
+ key: message.key,
445
+ headers: message.headers,
446
+ topic: message.topic,
447
+ partition: partition,
448
+ create_time: message.create_time,
449
+ )
450
+ rescue Kafka::Error => e
451
+ @instrumenter.instrument("topic_error.producer", {
452
+ topic: message.topic,
453
+ exception: [e.class.to_s, e.message],
454
+ })
455
+
456
+ topics_with_failures << message.topic
457
+ failed_messages << message
458
+ end
459
+ end
460
+
461
+ if failed_messages.any?
462
+ failed_messages.group_by(&:topic).each do |topic, messages|
463
+ @logger.error "Failed to assign partitions to #{messages.count} messages in #{topic}"
464
+ end
465
+
466
+ @cluster.mark_as_stale!
467
+ end
468
+
469
+ @pending_message_queue.replace(failed_messages)
470
+ end
471
+
472
+ def buffer_messages
473
+ messages = []
474
+
475
+ @pending_message_queue.each do |message|
476
+ messages << message
477
+ end
478
+
479
+ @buffer.each do |topic, partition, messages_for_partition|
480
+ messages_for_partition.each do |message|
481
+ messages << PendingMessage.new(
482
+ value: message.value,
483
+ key: message.key,
484
+ headers: message.headers,
485
+ topic: topic,
486
+ partition: partition,
487
+ partition_key: nil,
488
+ create_time: message.create_time
489
+ )
490
+ end
491
+ end
492
+
493
+ messages
494
+ end
495
+
496
+ def buffer_overflow(topic, message)
497
+ @instrumenter.instrument("buffer_overflow.producer", {
498
+ topic: topic,
499
+ })
500
+
501
+ raise BufferOverflow, message
502
+ end
503
+ end
504
+ end