karafka-rdkafka 0.20.0.rc3-x86_64-linux-gnu

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. checksums.yaml +7 -0
  2. data/.github/CODEOWNERS +3 -0
  3. data/.github/FUNDING.yml +1 -0
  4. data/.github/workflows/ci_linux_x86_64_gnu.yml +248 -0
  5. data/.github/workflows/ci_macos_arm64.yml +301 -0
  6. data/.github/workflows/push_linux_x86_64_gnu.yml +60 -0
  7. data/.github/workflows/push_ruby.yml +37 -0
  8. data/.github/workflows/verify-action-pins.yml +16 -0
  9. data/.gitignore +15 -0
  10. data/.rspec +2 -0
  11. data/.ruby-gemset +1 -0
  12. data/.ruby-version +1 -0
  13. data/.yardopts +2 -0
  14. data/CHANGELOG.md +323 -0
  15. data/Gemfile +5 -0
  16. data/MIT-LICENSE +22 -0
  17. data/README.md +177 -0
  18. data/Rakefile +96 -0
  19. data/docker-compose.yml +25 -0
  20. data/ext/README.md +19 -0
  21. data/ext/Rakefile +131 -0
  22. data/ext/build_common.sh +361 -0
  23. data/ext/build_linux_x86_64_gnu.sh +306 -0
  24. data/ext/build_macos_arm64.sh +550 -0
  25. data/ext/librdkafka.so +0 -0
  26. data/karafka-rdkafka.gemspec +61 -0
  27. data/lib/rdkafka/abstract_handle.rb +116 -0
  28. data/lib/rdkafka/admin/acl_binding_result.rb +51 -0
  29. data/lib/rdkafka/admin/config_binding_result.rb +30 -0
  30. data/lib/rdkafka/admin/config_resource_binding_result.rb +18 -0
  31. data/lib/rdkafka/admin/create_acl_handle.rb +28 -0
  32. data/lib/rdkafka/admin/create_acl_report.rb +24 -0
  33. data/lib/rdkafka/admin/create_partitions_handle.rb +30 -0
  34. data/lib/rdkafka/admin/create_partitions_report.rb +6 -0
  35. data/lib/rdkafka/admin/create_topic_handle.rb +32 -0
  36. data/lib/rdkafka/admin/create_topic_report.rb +24 -0
  37. data/lib/rdkafka/admin/delete_acl_handle.rb +30 -0
  38. data/lib/rdkafka/admin/delete_acl_report.rb +23 -0
  39. data/lib/rdkafka/admin/delete_groups_handle.rb +28 -0
  40. data/lib/rdkafka/admin/delete_groups_report.rb +24 -0
  41. data/lib/rdkafka/admin/delete_topic_handle.rb +32 -0
  42. data/lib/rdkafka/admin/delete_topic_report.rb +24 -0
  43. data/lib/rdkafka/admin/describe_acl_handle.rb +30 -0
  44. data/lib/rdkafka/admin/describe_acl_report.rb +24 -0
  45. data/lib/rdkafka/admin/describe_configs_handle.rb +33 -0
  46. data/lib/rdkafka/admin/describe_configs_report.rb +48 -0
  47. data/lib/rdkafka/admin/incremental_alter_configs_handle.rb +33 -0
  48. data/lib/rdkafka/admin/incremental_alter_configs_report.rb +48 -0
  49. data/lib/rdkafka/admin.rb +832 -0
  50. data/lib/rdkafka/bindings.rb +582 -0
  51. data/lib/rdkafka/callbacks.rb +415 -0
  52. data/lib/rdkafka/config.rb +398 -0
  53. data/lib/rdkafka/consumer/headers.rb +79 -0
  54. data/lib/rdkafka/consumer/message.rb +86 -0
  55. data/lib/rdkafka/consumer/partition.rb +57 -0
  56. data/lib/rdkafka/consumer/topic_partition_list.rb +190 -0
  57. data/lib/rdkafka/consumer.rb +663 -0
  58. data/lib/rdkafka/error.rb +201 -0
  59. data/lib/rdkafka/helpers/oauth.rb +58 -0
  60. data/lib/rdkafka/helpers/time.rb +14 -0
  61. data/lib/rdkafka/metadata.rb +115 -0
  62. data/lib/rdkafka/native_kafka.rb +139 -0
  63. data/lib/rdkafka/producer/delivery_handle.rb +48 -0
  64. data/lib/rdkafka/producer/delivery_report.rb +45 -0
  65. data/lib/rdkafka/producer/partitions_count_cache.rb +216 -0
  66. data/lib/rdkafka/producer.rb +492 -0
  67. data/lib/rdkafka/version.rb +7 -0
  68. data/lib/rdkafka.rb +54 -0
  69. data/renovate.json +92 -0
  70. data/spec/rdkafka/abstract_handle_spec.rb +117 -0
  71. data/spec/rdkafka/admin/create_acl_handle_spec.rb +56 -0
  72. data/spec/rdkafka/admin/create_acl_report_spec.rb +18 -0
  73. data/spec/rdkafka/admin/create_topic_handle_spec.rb +54 -0
  74. data/spec/rdkafka/admin/create_topic_report_spec.rb +16 -0
  75. data/spec/rdkafka/admin/delete_acl_handle_spec.rb +85 -0
  76. data/spec/rdkafka/admin/delete_acl_report_spec.rb +72 -0
  77. data/spec/rdkafka/admin/delete_topic_handle_spec.rb +54 -0
  78. data/spec/rdkafka/admin/delete_topic_report_spec.rb +16 -0
  79. data/spec/rdkafka/admin/describe_acl_handle_spec.rb +85 -0
  80. data/spec/rdkafka/admin/describe_acl_report_spec.rb +73 -0
  81. data/spec/rdkafka/admin_spec.rb +769 -0
  82. data/spec/rdkafka/bindings_spec.rb +222 -0
  83. data/spec/rdkafka/callbacks_spec.rb +20 -0
  84. data/spec/rdkafka/config_spec.rb +258 -0
  85. data/spec/rdkafka/consumer/headers_spec.rb +73 -0
  86. data/spec/rdkafka/consumer/message_spec.rb +139 -0
  87. data/spec/rdkafka/consumer/partition_spec.rb +57 -0
  88. data/spec/rdkafka/consumer/topic_partition_list_spec.rb +248 -0
  89. data/spec/rdkafka/consumer_spec.rb +1299 -0
  90. data/spec/rdkafka/error_spec.rb +95 -0
  91. data/spec/rdkafka/metadata_spec.rb +79 -0
  92. data/spec/rdkafka/native_kafka_spec.rb +130 -0
  93. data/spec/rdkafka/producer/delivery_handle_spec.rb +60 -0
  94. data/spec/rdkafka/producer/delivery_report_spec.rb +25 -0
  95. data/spec/rdkafka/producer/partitions_count_cache_spec.rb +359 -0
  96. data/spec/rdkafka/producer/partitions_count_spec.rb +359 -0
  97. data/spec/rdkafka/producer_spec.rb +1234 -0
  98. data/spec/spec_helper.rb +181 -0
  99. metadata +244 -0
@@ -0,0 +1,216 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rdkafka
4
+ class Producer
5
+ # Caching mechanism for Kafka topic partition counts to avoid frequent cluster queries
6
+ #
7
+ # This cache is designed to optimize the process of obtaining partition counts for topics.
8
+ # It uses several strategies to minimize Kafka cluster queries:
9
+ #
10
+ # @note Design considerations:
11
+ #
12
+ # 1. Statistics-based updates
13
+ # When statistics callbacks are enabled (via `statistics.interval.ms`), we leverage
14
+ # this data to proactively update the partition counts cache. This approach costs
15
+ # approximately 0.02ms of processing time during each statistics interval (typically
16
+ # every 5 seconds) but eliminates the need for explicit blocking metadata queries.
17
+ #
18
+ # 2. Edge case handling
19
+ # If a user configures `statistics.interval.ms` much higher than the default cache TTL
20
+ # (30 seconds), the cache will still function correctly. When statistics updates don't
21
+ # occur frequently enough, the cache entries will expire naturally, triggering a
22
+ # blocking refresh when needed.
23
+ #
24
+ # 3. User configuration awareness
25
+ # The cache respects user-defined settings. If `topic.metadata.refresh.interval.ms` is
26
+ # set very high, the responsibility for potentially stale data falls on the user. This
27
+ # is an explicit design choice to honor user configuration preferences and align with
28
+ # librdkafka settings.
29
+ #
30
+ # 4. Process-wide efficiency
31
+ # Since this cache is shared across all Rdkafka producers and consumers within a process,
32
+ # having multiple clients improves overall efficiency. Each client contributes to keeping
33
+ # the cache updated, benefiting all other clients.
34
+ #
35
+ # 5. Thread-safety approach
36
+ # The implementation uses fine-grained locking with per-topic mutexes to minimize
37
+ # contention in multi-threaded environments while ensuring data consistency.
38
+ #
39
+ # 6. Topic recreation handling
40
+ # If a topic is deleted and recreated with fewer partitions, the cache will continue to
41
+ # report the higher count until either the TTL expires or the process is restarted. This
42
+ # design choice simplifies the implementation while relying on librdkafka's error handling
43
+ # for edge cases. In production environments, topic recreation with different partition
44
+ # counts is typically accompanied by application restarts to handle structural changes.
45
+ # This also aligns with the previous cache implementation.
46
+ class PartitionsCountCache
47
+ include Helpers::Time
48
+
49
+ # Default time-to-live for cached partition counts in seconds
50
+ #
51
+ # @note This default was chosen to balance freshness of metadata with performance
52
+ # optimization. Most Kafka cluster topology changes are planned operations, making 30
53
+ # seconds a reasonable compromise.
54
+ DEFAULT_TTL = 30
55
+
56
+ # Creates a new partition count cache
57
+ #
58
+ # @param ttl [Integer] Time-to-live in seconds for cached values
59
+ def initialize(ttl = DEFAULT_TTL)
60
+ @counts = {}
61
+ @mutex_hash = {}
62
+ # Used only for @mutex_hash access to ensure thread-safety when creating new mutexes
63
+ @mutex_for_hash = Mutex.new
64
+ @ttl = ttl
65
+ end
66
+
67
+ # Reads partition count for a topic with automatic refresh when expired
68
+ #
69
+ # This method will return the cached partition count if available and not expired.
70
+ # If the value is expired or not available, it will execute the provided block
71
+ # to fetch the current value from Kafka.
72
+ #
73
+ # @param topic [String] Kafka topic name
74
+ # @yield Block that returns the current partition count when cache needs refreshing
75
+ # @yieldreturn [Integer] Current partition count retrieved from Kafka
76
+ # @return [Integer] Partition count for the topic
77
+ #
78
+ # @note The implementation prioritizes read performance over write consistency
79
+ # since partition counts typically only increase during normal operation.
80
+ def get(topic)
81
+ current_info = @counts[topic]
82
+
83
+ if current_info.nil? || expired?(current_info[0])
84
+ new_count = yield
85
+
86
+ if current_info.nil?
87
+ # No existing data, create a new entry with mutex
88
+ set(topic, new_count)
89
+
90
+ return new_count
91
+ else
92
+ current_count = current_info[1]
93
+
94
+ if new_count > current_count
95
+ # Higher value needs mutex to update both timestamp and count
96
+ set(topic, new_count)
97
+
98
+ return new_count
99
+ else
100
+ # Same or lower value, just update timestamp without mutex
101
+ refresh_timestamp(topic)
102
+
103
+ return current_count
104
+ end
105
+ end
106
+ end
107
+
108
+ current_info[1]
109
+ end
110
+
111
+ # Update partition count for a topic when needed
112
+ #
113
+ # This method updates the partition count for a topic in the cache.
114
+ # It uses a mutex to ensure thread-safety during updates.
115
+ #
116
+ # @param topic [String] Kafka topic name
117
+ # @param new_count [Integer] New partition count value
118
+ #
119
+ # @note We prioritize higher partition counts and only accept them when using
120
+ # a mutex to ensure consistency. This design decision is based on the fact that
121
+ # partition counts in Kafka only increase during normal operation.
122
+ def set(topic, new_count)
123
+ # First check outside mutex to avoid unnecessary locking
124
+ current_info = @counts[topic]
125
+
126
+ # For lower values, we don't update count but might need to refresh timestamp
127
+ if current_info && new_count < current_info[1]
128
+ refresh_timestamp(topic)
129
+
130
+ return
131
+ end
132
+
133
+ # Only lock the specific topic mutex
134
+ mutex_for(topic).synchronize do
135
+ # Check again inside the lock as another thread might have updated
136
+ current_info = @counts[topic]
137
+
138
+ if current_info.nil?
139
+ # Create new entry
140
+ @counts[topic] = [monotonic_now, new_count]
141
+ else
142
+ current_count = current_info[1]
143
+
144
+ if new_count > current_count
145
+ # Update to higher count value
146
+ current_info[0] = monotonic_now
147
+ current_info[1] = new_count
148
+ else
149
+ # Same or lower count, update timestamp only
150
+ current_info[0] = monotonic_now
151
+ end
152
+ end
153
+ end
154
+ end
155
+
156
+ # @return [Hash] hash with ttls and partitions counts array
157
+ def to_h
158
+ @counts
159
+ end
160
+
161
+ private
162
+
163
+ # Get or create a mutex for a specific topic
164
+ #
165
+ # This method ensures that each topic has its own mutex,
166
+ # allowing operations on different topics to proceed in parallel.
167
+ #
168
+ # @param topic [String] Kafka topic name
169
+ # @return [Mutex] Mutex for the specified topic
170
+ #
171
+ # @note We use a separate mutex (@mutex_for_hash) to protect the creation
172
+ # of new topic mutexes. This pattern allows fine-grained locking while
173
+ # maintaining thread-safety.
174
+ def mutex_for(topic)
175
+ mutex = @mutex_hash[topic]
176
+
177
+ return mutex if mutex
178
+
179
+ # Use a separate mutex to protect the creation of new topic mutexes
180
+ @mutex_for_hash.synchronize do
181
+ # Check again in case another thread created it
182
+ @mutex_hash[topic] ||= Mutex.new
183
+ end
184
+
185
+ @mutex_hash[topic]
186
+ end
187
+
188
+ # Update the timestamp without acquiring the mutex
189
+ #
190
+ # This is an optimization that allows refreshing the TTL of existing entries
191
+ # without the overhead of mutex acquisition.
192
+ #
193
+ # @param topic [String] Kafka topic name
194
+ #
195
+ # @note This method is safe for refreshing existing data regardless of count
196
+ # because it only updates the timestamp, which doesn't affect the correctness
197
+ # of concurrent operations.
198
+ def refresh_timestamp(topic)
199
+ current_info = @counts[topic]
200
+
201
+ return unless current_info
202
+
203
+ # Update the timestamp in-place
204
+ current_info[0] = monotonic_now
205
+ end
206
+
207
+ # Check if a timestamp has expired based on the TTL
208
+ #
209
+ # @param timestamp [Float] Monotonic timestamp to check
210
+ # @return [Boolean] true if expired, false otherwise
211
+ def expired?(timestamp)
212
+ monotonic_now - timestamp > @ttl
213
+ end
214
+ end
215
+ end
216
+ end
@@ -0,0 +1,492 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rdkafka
4
+ # A producer for Kafka messages. To create a producer set up a {Config} and call {Config#producer producer} on that.
5
+ class Producer
6
+ include Helpers::Time
7
+ include Helpers::OAuth
8
+
9
+ # @private
10
+ @@partitions_count_cache = PartitionsCountCache.new
11
+
12
+ # Global (process wide) partitions cache. We use it to store number of topics partitions,
13
+ # either from the librdkafka statistics (if enabled) or via direct inline calls every now and
14
+ # then. Since the partitions count can only grow and should be same for all consumers and
15
+ # producers, we can use a global cache as long as we ensure that updates only move up.
16
+ #
17
+ # @note It is critical to remember, that not all users may have statistics callbacks enabled,
18
+ # hence we should not make assumption that this cache is always updated from the stats.
19
+ #
20
+ # @return [Rdkafka::Producer::PartitionsCountCache]
21
+ def self.partitions_count_cache
22
+ @@partitions_count_cache
23
+ end
24
+
25
+ # @param partitions_count_cache [Rdkafka::Producer::PartitionsCountCache]
26
+ def self.partitions_count_cache=(partitions_count_cache)
27
+ @@partitions_count_cache = partitions_count_cache
28
+ end
29
+
30
+ # Empty hash used as a default
31
+ EMPTY_HASH = {}.freeze
32
+
33
+ private_constant :EMPTY_HASH
34
+
35
+ # Raised when there was a critical issue when invoking rd_kafka_topic_new
36
+ # This is a temporary solution until https://github.com/karafka/rdkafka-ruby/issues/451 is
37
+ # resolved and this is normalized in all the places
38
+ class TopicHandleCreationError < RuntimeError; end
39
+
40
+ # @private
41
+ # Returns the current delivery callback, by default this is nil.
42
+ #
43
+ # @return [Proc, nil]
44
+ attr_reader :delivery_callback
45
+
46
+ # @private
47
+ # Returns the number of arguments accepted by the callback, by default this is nil.
48
+ #
49
+ # @return [Integer, nil]
50
+ attr_reader :delivery_callback_arity
51
+
52
+ # @private
53
+ # @param native_kafka [NativeKafka]
54
+ # @param partitioner_name [String, nil] name of the partitioner we want to use or nil to use
55
+ # the "consistent_random" default
56
+ def initialize(native_kafka, partitioner_name)
57
+ @topics_refs_map = {}
58
+ @topics_configs = {}
59
+ @native_kafka = native_kafka
60
+ @partitioner_name = partitioner_name || "consistent_random"
61
+
62
+ # Makes sure, that native kafka gets closed before it gets GCed by Ruby
63
+ ObjectSpace.define_finalizer(self, native_kafka.finalizer)
64
+ end
65
+
66
+ # Sets alternative set of configuration details that can be set per topic
67
+ # @note It is not allowed to re-set the same topic config twice because of the underlying
68
+ # librdkafka caching
69
+ # @param topic [String] The topic name
70
+ # @param config [Hash] config we want to use per topic basis
71
+ # @param config_hash [Integer] hash of the config. We expect it here instead of computing it,
72
+ # because it is already computed during the retrieval attempt in the `#produce` flow.
73
+ def set_topic_config(topic, config, config_hash)
74
+ # Ensure lock on topic reference just in case
75
+ @native_kafka.with_inner do |inner|
76
+ @topics_refs_map[topic] ||= {}
77
+ @topics_configs[topic] ||= {}
78
+
79
+ return if @topics_configs[topic].key?(config_hash)
80
+
81
+ # If config is empty, we create an empty reference that will be used with defaults
82
+ rd_topic_config = if config.empty?
83
+ nil
84
+ else
85
+ Rdkafka::Bindings.rd_kafka_topic_conf_new.tap do |topic_config|
86
+ config.each do |key, value|
87
+ error_buffer = FFI::MemoryPointer.new(:char, 256)
88
+ result = Rdkafka::Bindings.rd_kafka_topic_conf_set(
89
+ topic_config,
90
+ key.to_s,
91
+ value.to_s,
92
+ error_buffer,
93
+ 256
94
+ )
95
+
96
+ unless result == :config_ok
97
+ raise Config::ConfigError.new(error_buffer.read_string)
98
+ end
99
+ end
100
+ end
101
+ end
102
+
103
+ topic_handle = Bindings.rd_kafka_topic_new(inner, topic, rd_topic_config)
104
+
105
+ raise TopicHandleCreationError.new("Error creating topic handle for topic #{topic}") if topic_handle.null?
106
+
107
+ @topics_configs[topic][config_hash] = config
108
+ @topics_refs_map[topic][config_hash] = topic_handle
109
+ end
110
+ end
111
+
112
+ # Starts the native Kafka polling thread and kicks off the init polling
113
+ # @note Not needed to run unless explicit start was disabled
114
+ def start
115
+ @native_kafka.start
116
+ end
117
+
118
+ # @return [String] producer name
119
+ def name
120
+ @name ||= @native_kafka.with_inner do |inner|
121
+ ::Rdkafka::Bindings.rd_kafka_name(inner)
122
+ end
123
+ end
124
+
125
+ # Set a callback that will be called every time a message is successfully produced.
126
+ # The callback is called with a {DeliveryReport} and {DeliveryHandle}
127
+ #
128
+ # @param callback [Proc, #call] The callback
129
+ #
130
+ # @return [nil]
131
+ def delivery_callback=(callback)
132
+ raise TypeError.new("Callback has to be callable") unless callback.respond_to?(:call)
133
+ @delivery_callback = callback
134
+ @delivery_callback_arity = arity(callback)
135
+ end
136
+
137
+ # Init transactions
138
+ # Run once per producer
139
+ def init_transactions
140
+ closed_producer_check(__method__)
141
+
142
+ @native_kafka.with_inner do |inner|
143
+ response_ptr = Rdkafka::Bindings.rd_kafka_init_transactions(inner, -1)
144
+
145
+ Rdkafka::RdkafkaError.validate!(response_ptr) || true
146
+ end
147
+ end
148
+
149
+ def begin_transaction
150
+ closed_producer_check(__method__)
151
+
152
+ @native_kafka.with_inner do |inner|
153
+ response_ptr = Rdkafka::Bindings.rd_kafka_begin_transaction(inner)
154
+
155
+ Rdkafka::RdkafkaError.validate!(response_ptr) || true
156
+ end
157
+ end
158
+
159
+ def commit_transaction(timeout_ms = -1)
160
+ closed_producer_check(__method__)
161
+
162
+ @native_kafka.with_inner do |inner|
163
+ response_ptr = Rdkafka::Bindings.rd_kafka_commit_transaction(inner, timeout_ms)
164
+
165
+ Rdkafka::RdkafkaError.validate!(response_ptr) || true
166
+ end
167
+ end
168
+
169
+ def abort_transaction(timeout_ms = -1)
170
+ closed_producer_check(__method__)
171
+
172
+ @native_kafka.with_inner do |inner|
173
+ response_ptr = Rdkafka::Bindings.rd_kafka_abort_transaction(inner, timeout_ms)
174
+ Rdkafka::RdkafkaError.validate!(response_ptr) || true
175
+ end
176
+ end
177
+
178
+ # Sends provided offsets of a consumer to the transaction for collective commit
179
+ #
180
+ # @param consumer [Consumer] consumer that owns the given tpls
181
+ # @param tpl [Consumer::TopicPartitionList]
182
+ # @param timeout_ms [Integer] offsets send timeout
183
+ # @note Use **only** in the context of an active transaction
184
+ def send_offsets_to_transaction(consumer, tpl, timeout_ms = 5_000)
185
+ closed_producer_check(__method__)
186
+
187
+ return if tpl.empty?
188
+
189
+ cgmetadata = consumer.consumer_group_metadata_pointer
190
+ native_tpl = tpl.to_native_tpl
191
+
192
+ @native_kafka.with_inner do |inner|
193
+ response_ptr = Bindings.rd_kafka_send_offsets_to_transaction(inner, native_tpl, cgmetadata, timeout_ms)
194
+
195
+ Rdkafka::RdkafkaError.validate!(response_ptr)
196
+ end
197
+ ensure
198
+ if cgmetadata && !cgmetadata.null?
199
+ Bindings.rd_kafka_consumer_group_metadata_destroy(cgmetadata)
200
+ end
201
+
202
+ Rdkafka::Bindings.rd_kafka_topic_partition_list_destroy(native_tpl) unless native_tpl.nil?
203
+ end
204
+
205
+ # Close this producer and wait for the internal poll queue to empty.
206
+ def close
207
+ return if closed?
208
+ ObjectSpace.undefine_finalizer(self)
209
+
210
+ @native_kafka.close do
211
+ # We need to remove the topics references objects before we destroy the producer,
212
+ # otherwise they would leak out
213
+ @topics_refs_map.each_value do |refs|
214
+ refs.each_value do |ref|
215
+ Rdkafka::Bindings.rd_kafka_topic_destroy(ref)
216
+ end
217
+ end
218
+ end
219
+
220
+ @topics_refs_map.clear
221
+ end
222
+
223
+ # Whether this producer has closed
224
+ def closed?
225
+ @native_kafka.closed?
226
+ end
227
+
228
+ # Wait until all outstanding producer requests are completed, with the given timeout
229
+ # in seconds. Call this before closing a producer to ensure delivery of all messages.
230
+ #
231
+ # @param timeout_ms [Integer] how long should we wait for flush of all messages
232
+ # @return [Boolean] true if no more data and all was flushed, false in case there are still
233
+ # outgoing messages after the timeout
234
+ #
235
+ # @note We raise an exception for other errors because based on the librdkafka docs, there
236
+ # should be no other errors.
237
+ #
238
+ # @note For `timed_out` we do not raise an error to keep it backwards compatible
239
+ def flush(timeout_ms=5_000)
240
+ closed_producer_check(__method__)
241
+
242
+ error = @native_kafka.with_inner do |inner|
243
+ response = Rdkafka::Bindings.rd_kafka_flush(inner, timeout_ms)
244
+ Rdkafka::RdkafkaError.build(response)
245
+ end
246
+
247
+ # Early skip not to build the error message
248
+ return true unless error
249
+ return false if error.code == :timed_out
250
+
251
+ raise(error)
252
+ end
253
+
254
+ # Purges the outgoing queue and releases all resources.
255
+ #
256
+ # Useful when closing the producer with outgoing messages to unstable clusters or when for
257
+ # any other reasons waiting cannot go on anymore. This purges both the queue and all the
258
+ # inflight requests + updates the delivery handles statuses so they can be materialized into
259
+ # `purge_queue` errors.
260
+ def purge
261
+ closed_producer_check(__method__)
262
+
263
+ @native_kafka.with_inner do |inner|
264
+ response = Bindings.rd_kafka_purge(
265
+ inner,
266
+ Bindings::RD_KAFKA_PURGE_F_QUEUE | Bindings::RD_KAFKA_PURGE_F_INFLIGHT
267
+ )
268
+
269
+ Rdkafka::RdkafkaError.validate!(response)
270
+ end
271
+
272
+ # Wait for the purge to affect everything
273
+ sleep(0.001) until flush(100)
274
+
275
+ true
276
+ end
277
+
278
+ # Partition count for a given topic.
279
+ #
280
+ # @param topic [String] The topic name.
281
+ # @return [Integer] partition count for a given topic or `-1` if it could not be obtained.
282
+ #
283
+ # @note If 'allow.auto.create.topics' is set to true in the broker, the topic will be
284
+ # auto-created after returning nil.
285
+ #
286
+ # @note We cache the partition count for a given topic for given time. If statistics are
287
+ # enabled for any producer or consumer, it will take precedence over per instance fetching.
288
+ #
289
+ # This prevents us in case someone uses `partition_key` from querying for the count with
290
+ # each message. Instead we query at most once every 30 seconds at most if we have a valid
291
+ # partition count or every 5 seconds in case we were not able to obtain number of partitions.
292
+ def partition_count(topic)
293
+ closed_producer_check(__method__)
294
+
295
+ self.class.partitions_count_cache.get(topic) do
296
+ topic_metadata = nil
297
+
298
+ @native_kafka.with_inner do |inner|
299
+ topic_metadata = ::Rdkafka::Metadata.new(inner, topic).topics&.first
300
+ end
301
+
302
+ topic_metadata ? topic_metadata[:partition_count] : -1
303
+ end
304
+ rescue Rdkafka::RdkafkaError => e
305
+ # If the topic does not exist, it will be created or if not allowed another error will be
306
+ # raised. We here return -1 so this can happen without early error happening on metadata
307
+ # discovery.
308
+ return -1 if e.code == :unknown_topic_or_part
309
+
310
+ raise(e)
311
+ end
312
+
313
+ # Produces a message to a Kafka topic. The message is added to rdkafka's queue, call {DeliveryHandle#wait wait} on the returned delivery handle to make sure it is delivered.
314
+ #
315
+ # When no partition is specified the underlying Kafka library picks a partition based on the key. If no key is specified, a random partition will be used.
316
+ # When a timestamp is provided this is used instead of the auto-generated timestamp.
317
+ #
318
+ # @param topic [String] The topic to produce to
319
+ # @param payload [String,nil] The message's payload
320
+ # @param key [String, nil] The message's key
321
+ # @param partition [Integer,nil] Optional partition to produce to
322
+ # @param partition_key [String, nil] Optional partition key based on which partition assignment can happen
323
+ # @param timestamp [Time,Integer,nil] Optional timestamp of this message. Integer timestamp is in milliseconds since Jan 1 1970.
324
+ # @param headers [Hash<String,String|Array<String>>] Optional message headers. Values can be either a single string or an array of strings to support duplicate headers per KIP-82
325
+ # @param label [Object, nil] a label that can be assigned when producing a message that will be part of the delivery handle and the delivery report
326
+ # @param topic_config [Hash] topic config for given message dispatch. Allows to send messages to topics with different configuration
327
+ #
328
+ # @return [DeliveryHandle] Delivery handle that can be used to wait for the result of producing this message
329
+ #
330
+ # @raise [RdkafkaError] When adding the message to rdkafka's queue failed
331
+ def produce(
332
+ topic:,
333
+ payload: nil,
334
+ key: nil,
335
+ partition: nil,
336
+ partition_key: nil,
337
+ timestamp: nil,
338
+ headers: nil,
339
+ label: nil,
340
+ topic_config: EMPTY_HASH
341
+ )
342
+ closed_producer_check(__method__)
343
+
344
+ # Start by checking and converting the input
345
+
346
+ # Get payload length
347
+ payload_size = if payload.nil?
348
+ 0
349
+ else
350
+ payload.bytesize
351
+ end
352
+
353
+ # Get key length
354
+ key_size = if key.nil?
355
+ 0
356
+ else
357
+ key.bytesize
358
+ end
359
+
360
+ topic_config_hash = topic_config.hash
361
+
362
+ # Checks if we have the rdkafka topic reference object ready. It saves us on object
363
+ # allocation and allows to use custom config on demand.
364
+ set_topic_config(topic, topic_config, topic_config_hash) unless @topics_refs_map.dig(topic, topic_config_hash)
365
+ topic_ref = @topics_refs_map.dig(topic, topic_config_hash)
366
+
367
+ if partition_key
368
+ partition_count = partition_count(topic)
369
+
370
+ # Check if there are no overrides for the partitioner and use the default one only when
371
+ # no per-topic is present.
372
+ partitioner_name = @topics_configs.dig(topic, topic_config_hash, :partitioner) || @partitioner_name
373
+
374
+ # If the topic is not present, set to -1
375
+ partition = Rdkafka::Bindings.partitioner(partition_key, partition_count, partitioner_name) if partition_count.positive?
376
+ end
377
+
378
+ # If partition is nil, use -1 to let librdafka set the partition randomly or
379
+ # based on the key when present.
380
+ partition ||= -1
381
+
382
+ # If timestamp is nil use 0 and let Kafka set one. If an integer or time
383
+ # use it.
384
+ raw_timestamp = if timestamp.nil?
385
+ 0
386
+ elsif timestamp.is_a?(Integer)
387
+ timestamp
388
+ elsif timestamp.is_a?(Time)
389
+ (timestamp.to_i * 1000) + (timestamp.usec / 1000)
390
+ else
391
+ raise TypeError.new("Timestamp has to be nil, an Integer or a Time")
392
+ end
393
+
394
+ delivery_handle = DeliveryHandle.new
395
+ delivery_handle.label = label
396
+ delivery_handle.topic = topic
397
+ delivery_handle[:pending] = true
398
+ delivery_handle[:response] = -1
399
+ delivery_handle[:partition] = -1
400
+ delivery_handle[:offset] = -1
401
+ DeliveryHandle.register(delivery_handle)
402
+
403
+ args = [
404
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_RKT, :pointer, topic_ref,
405
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_MSGFLAGS, :int, Rdkafka::Bindings::RD_KAFKA_MSG_F_COPY,
406
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_VALUE, :buffer_in, payload, :size_t, payload_size,
407
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_KEY, :buffer_in, key, :size_t, key_size,
408
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_PARTITION, :int32, partition,
409
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_TIMESTAMP, :int64, raw_timestamp,
410
+ :int, Rdkafka::Bindings::RD_KAFKA_VTYPE_OPAQUE, :pointer, delivery_handle,
411
+ ]
412
+
413
+ if headers
414
+ headers.each do |key0, value0|
415
+ key = key0.to_s
416
+ if value0.is_a?(Array)
417
+ # Handle array of values per KIP-82
418
+ value0.each do |value|
419
+ value = value.to_s
420
+ args << :int << Rdkafka::Bindings::RD_KAFKA_VTYPE_HEADER
421
+ args << :string << key
422
+ args << :pointer << value
423
+ args << :size_t << value.bytesize
424
+ end
425
+ else
426
+ # Handle single value
427
+ value = value0.to_s
428
+ args << :int << Rdkafka::Bindings::RD_KAFKA_VTYPE_HEADER
429
+ args << :string << key
430
+ args << :pointer << value
431
+ args << :size_t << value.bytesize
432
+ end
433
+ end
434
+ end
435
+
436
+ args << :int << Rdkafka::Bindings::RD_KAFKA_VTYPE_END
437
+
438
+ # Produce the message
439
+ response = @native_kafka.with_inner do |inner|
440
+ Rdkafka::Bindings.rd_kafka_producev(
441
+ inner,
442
+ *args
443
+ )
444
+ end
445
+
446
+ # Raise error if the produce call was not successful
447
+ if response != 0
448
+ DeliveryHandle.remove(delivery_handle.to_ptr.address)
449
+ Rdkafka::RdkafkaError.validate!(response)
450
+ end
451
+
452
+ delivery_handle
453
+ end
454
+
455
+ # Calls (if registered) the delivery callback
456
+ #
457
+ # @param delivery_report [Producer::DeliveryReport]
458
+ # @param delivery_handle [Producer::DeliveryHandle]
459
+ def call_delivery_callback(delivery_report, delivery_handle)
460
+ return unless @delivery_callback
461
+
462
+ case @delivery_callback_arity
463
+ when 0
464
+ @delivery_callback.call
465
+ when 1
466
+ @delivery_callback.call(delivery_report)
467
+ else
468
+ @delivery_callback.call(delivery_report, delivery_handle)
469
+ end
470
+ end
471
+
472
+ # Figures out the arity of a given block/method
473
+ #
474
+ # @param callback [#call, Proc]
475
+ # @return [Integer] arity of the provided block/method
476
+ def arity(callback)
477
+ return callback.arity if callback.respond_to?(:arity)
478
+
479
+ callback.method(:call).arity
480
+ end
481
+
482
+ private
483
+
484
+ # Ensures, no operations can happen on a closed producer
485
+ #
486
+ # @param method [Symbol] name of the method that invoked producer
487
+ # @raise [Rdkafka::ClosedProducerError]
488
+ def closed_producer_check(method)
489
+ raise Rdkafka::ClosedProducerError.new(method) if closed?
490
+ end
491
+ end
492
+ end