karafka-rdkafka 0.20.0.rc3-x86_64-linux-gnu
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/CODEOWNERS +3 -0
- data/.github/FUNDING.yml +1 -0
- data/.github/workflows/ci_linux_x86_64_gnu.yml +248 -0
- data/.github/workflows/ci_macos_arm64.yml +301 -0
- data/.github/workflows/push_linux_x86_64_gnu.yml +60 -0
- data/.github/workflows/push_ruby.yml +37 -0
- data/.github/workflows/verify-action-pins.yml +16 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.yardopts +2 -0
- data/CHANGELOG.md +323 -0
- data/Gemfile +5 -0
- data/MIT-LICENSE +22 -0
- data/README.md +177 -0
- data/Rakefile +96 -0
- data/docker-compose.yml +25 -0
- data/ext/README.md +19 -0
- data/ext/Rakefile +131 -0
- data/ext/build_common.sh +361 -0
- data/ext/build_linux_x86_64_gnu.sh +306 -0
- data/ext/build_macos_arm64.sh +550 -0
- data/ext/librdkafka.so +0 -0
- data/karafka-rdkafka.gemspec +61 -0
- data/lib/rdkafka/abstract_handle.rb +116 -0
- data/lib/rdkafka/admin/acl_binding_result.rb +51 -0
- data/lib/rdkafka/admin/config_binding_result.rb +30 -0
- data/lib/rdkafka/admin/config_resource_binding_result.rb +18 -0
- data/lib/rdkafka/admin/create_acl_handle.rb +28 -0
- data/lib/rdkafka/admin/create_acl_report.rb +24 -0
- data/lib/rdkafka/admin/create_partitions_handle.rb +30 -0
- data/lib/rdkafka/admin/create_partitions_report.rb +6 -0
- data/lib/rdkafka/admin/create_topic_handle.rb +32 -0
- data/lib/rdkafka/admin/create_topic_report.rb +24 -0
- data/lib/rdkafka/admin/delete_acl_handle.rb +30 -0
- data/lib/rdkafka/admin/delete_acl_report.rb +23 -0
- data/lib/rdkafka/admin/delete_groups_handle.rb +28 -0
- data/lib/rdkafka/admin/delete_groups_report.rb +24 -0
- data/lib/rdkafka/admin/delete_topic_handle.rb +32 -0
- data/lib/rdkafka/admin/delete_topic_report.rb +24 -0
- data/lib/rdkafka/admin/describe_acl_handle.rb +30 -0
- data/lib/rdkafka/admin/describe_acl_report.rb +24 -0
- data/lib/rdkafka/admin/describe_configs_handle.rb +33 -0
- data/lib/rdkafka/admin/describe_configs_report.rb +48 -0
- data/lib/rdkafka/admin/incremental_alter_configs_handle.rb +33 -0
- data/lib/rdkafka/admin/incremental_alter_configs_report.rb +48 -0
- data/lib/rdkafka/admin.rb +832 -0
- data/lib/rdkafka/bindings.rb +582 -0
- data/lib/rdkafka/callbacks.rb +415 -0
- data/lib/rdkafka/config.rb +398 -0
- data/lib/rdkafka/consumer/headers.rb +79 -0
- data/lib/rdkafka/consumer/message.rb +86 -0
- data/lib/rdkafka/consumer/partition.rb +57 -0
- data/lib/rdkafka/consumer/topic_partition_list.rb +190 -0
- data/lib/rdkafka/consumer.rb +663 -0
- data/lib/rdkafka/error.rb +201 -0
- data/lib/rdkafka/helpers/oauth.rb +58 -0
- data/lib/rdkafka/helpers/time.rb +14 -0
- data/lib/rdkafka/metadata.rb +115 -0
- data/lib/rdkafka/native_kafka.rb +139 -0
- data/lib/rdkafka/producer/delivery_handle.rb +48 -0
- data/lib/rdkafka/producer/delivery_report.rb +45 -0
- data/lib/rdkafka/producer/partitions_count_cache.rb +216 -0
- data/lib/rdkafka/producer.rb +492 -0
- data/lib/rdkafka/version.rb +7 -0
- data/lib/rdkafka.rb +54 -0
- data/renovate.json +92 -0
- data/spec/rdkafka/abstract_handle_spec.rb +117 -0
- data/spec/rdkafka/admin/create_acl_handle_spec.rb +56 -0
- data/spec/rdkafka/admin/create_acl_report_spec.rb +18 -0
- data/spec/rdkafka/admin/create_topic_handle_spec.rb +54 -0
- data/spec/rdkafka/admin/create_topic_report_spec.rb +16 -0
- data/spec/rdkafka/admin/delete_acl_handle_spec.rb +85 -0
- data/spec/rdkafka/admin/delete_acl_report_spec.rb +72 -0
- data/spec/rdkafka/admin/delete_topic_handle_spec.rb +54 -0
- data/spec/rdkafka/admin/delete_topic_report_spec.rb +16 -0
- data/spec/rdkafka/admin/describe_acl_handle_spec.rb +85 -0
- data/spec/rdkafka/admin/describe_acl_report_spec.rb +73 -0
- data/spec/rdkafka/admin_spec.rb +769 -0
- data/spec/rdkafka/bindings_spec.rb +222 -0
- data/spec/rdkafka/callbacks_spec.rb +20 -0
- data/spec/rdkafka/config_spec.rb +258 -0
- data/spec/rdkafka/consumer/headers_spec.rb +73 -0
- data/spec/rdkafka/consumer/message_spec.rb +139 -0
- data/spec/rdkafka/consumer/partition_spec.rb +57 -0
- data/spec/rdkafka/consumer/topic_partition_list_spec.rb +248 -0
- data/spec/rdkafka/consumer_spec.rb +1299 -0
- data/spec/rdkafka/error_spec.rb +95 -0
- data/spec/rdkafka/metadata_spec.rb +79 -0
- data/spec/rdkafka/native_kafka_spec.rb +130 -0
- data/spec/rdkafka/producer/delivery_handle_spec.rb +60 -0
- data/spec/rdkafka/producer/delivery_report_spec.rb +25 -0
- data/spec/rdkafka/producer/partitions_count_cache_spec.rb +359 -0
- data/spec/rdkafka/producer/partitions_count_spec.rb +359 -0
- data/spec/rdkafka/producer_spec.rb +1234 -0
- data/spec/spec_helper.rb +181 -0
- metadata +244 -0
@@ -0,0 +1,216 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rdkafka
|
4
|
+
class Producer
|
5
|
+
# Caching mechanism for Kafka topic partition counts to avoid frequent cluster queries
|
6
|
+
#
|
7
|
+
# This cache is designed to optimize the process of obtaining partition counts for topics.
|
8
|
+
# It uses several strategies to minimize Kafka cluster queries:
|
9
|
+
#
|
10
|
+
# @note Design considerations:
|
11
|
+
#
|
12
|
+
# 1. Statistics-based updates
|
13
|
+
# When statistics callbacks are enabled (via `statistics.interval.ms`), we leverage
|
14
|
+
# this data to proactively update the partition counts cache. This approach costs
|
15
|
+
# approximately 0.02ms of processing time during each statistics interval (typically
|
16
|
+
# every 5 seconds) but eliminates the need for explicit blocking metadata queries.
|
17
|
+
#
|
18
|
+
# 2. Edge case handling
|
19
|
+
# If a user configures `statistics.interval.ms` much higher than the default cache TTL
|
20
|
+
# (30 seconds), the cache will still function correctly. When statistics updates don't
|
21
|
+
# occur frequently enough, the cache entries will expire naturally, triggering a
|
22
|
+
# blocking refresh when needed.
|
23
|
+
#
|
24
|
+
# 3. User configuration awareness
|
25
|
+
# The cache respects user-defined settings. If `topic.metadata.refresh.interval.ms` is
|
26
|
+
# set very high, the responsibility for potentially stale data falls on the user. This
|
27
|
+
# is an explicit design choice to honor user configuration preferences and align with
|
28
|
+
# librdkafka settings.
|
29
|
+
#
|
30
|
+
# 4. Process-wide efficiency
|
31
|
+
# Since this cache is shared across all Rdkafka producers and consumers within a process,
|
32
|
+
# having multiple clients improves overall efficiency. Each client contributes to keeping
|
33
|
+
# the cache updated, benefiting all other clients.
|
34
|
+
#
|
35
|
+
# 5. Thread-safety approach
|
36
|
+
# The implementation uses fine-grained locking with per-topic mutexes to minimize
|
37
|
+
# contention in multi-threaded environments while ensuring data consistency.
|
38
|
+
#
|
39
|
+
# 6. Topic recreation handling
|
40
|
+
# If a topic is deleted and recreated with fewer partitions, the cache will continue to
|
41
|
+
# report the higher count until either the TTL expires or the process is restarted. This
|
42
|
+
# design choice simplifies the implementation while relying on librdkafka's error handling
|
43
|
+
# for edge cases. In production environments, topic recreation with different partition
|
44
|
+
# counts is typically accompanied by application restarts to handle structural changes.
|
45
|
+
# This also aligns with the previous cache implementation.
|
46
|
+
class PartitionsCountCache
|
47
|
+
include Helpers::Time
|
48
|
+
|
49
|
+
# Default time-to-live for cached partition counts in seconds
|
50
|
+
#
|
51
|
+
# @note This default was chosen to balance freshness of metadata with performance
|
52
|
+
# optimization. Most Kafka cluster topology changes are planned operations, making 30
|
53
|
+
# seconds a reasonable compromise.
|
54
|
+
DEFAULT_TTL = 30
|
55
|
+
|
56
|
+
# Creates a new partition count cache
|
57
|
+
#
|
58
|
+
# @param ttl [Integer] Time-to-live in seconds for cached values
|
59
|
+
def initialize(ttl = DEFAULT_TTL)
|
60
|
+
@counts = {}
|
61
|
+
@mutex_hash = {}
|
62
|
+
# Used only for @mutex_hash access to ensure thread-safety when creating new mutexes
|
63
|
+
@mutex_for_hash = Mutex.new
|
64
|
+
@ttl = ttl
|
65
|
+
end
|
66
|
+
|
67
|
+
# Reads partition count for a topic with automatic refresh when expired
|
68
|
+
#
|
69
|
+
# This method will return the cached partition count if available and not expired.
|
70
|
+
# If the value is expired or not available, it will execute the provided block
|
71
|
+
# to fetch the current value from Kafka.
|
72
|
+
#
|
73
|
+
# @param topic [String] Kafka topic name
|
74
|
+
# @yield Block that returns the current partition count when cache needs refreshing
|
75
|
+
# @yieldreturn [Integer] Current partition count retrieved from Kafka
|
76
|
+
# @return [Integer] Partition count for the topic
|
77
|
+
#
|
78
|
+
# @note The implementation prioritizes read performance over write consistency
|
79
|
+
# since partition counts typically only increase during normal operation.
|
80
|
+
def get(topic)
|
81
|
+
current_info = @counts[topic]
|
82
|
+
|
83
|
+
if current_info.nil? || expired?(current_info[0])
|
84
|
+
new_count = yield
|
85
|
+
|
86
|
+
if current_info.nil?
|
87
|
+
# No existing data, create a new entry with mutex
|
88
|
+
set(topic, new_count)
|
89
|
+
|
90
|
+
return new_count
|
91
|
+
else
|
92
|
+
current_count = current_info[1]
|
93
|
+
|
94
|
+
if new_count > current_count
|
95
|
+
# Higher value needs mutex to update both timestamp and count
|
96
|
+
set(topic, new_count)
|
97
|
+
|
98
|
+
return new_count
|
99
|
+
else
|
100
|
+
# Same or lower value, just update timestamp without mutex
|
101
|
+
refresh_timestamp(topic)
|
102
|
+
|
103
|
+
return current_count
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
current_info[1]
|
109
|
+
end
|
110
|
+
|
111
|
+
# Update partition count for a topic when needed
|
112
|
+
#
|
113
|
+
# This method updates the partition count for a topic in the cache.
|
114
|
+
# It uses a mutex to ensure thread-safety during updates.
|
115
|
+
#
|
116
|
+
# @param topic [String] Kafka topic name
|
117
|
+
# @param new_count [Integer] New partition count value
|
118
|
+
#
|
119
|
+
# @note We prioritize higher partition counts and only accept them when using
|
120
|
+
# a mutex to ensure consistency. This design decision is based on the fact that
|
121
|
+
# partition counts in Kafka only increase during normal operation.
|
122
|
+
def set(topic, new_count)
|
123
|
+
# First check outside mutex to avoid unnecessary locking
|
124
|
+
current_info = @counts[topic]
|
125
|
+
|
126
|
+
# For lower values, we don't update count but might need to refresh timestamp
|
127
|
+
if current_info && new_count < current_info[1]
|
128
|
+
refresh_timestamp(topic)
|
129
|
+
|
130
|
+
return
|
131
|
+
end
|
132
|
+
|
133
|
+
# Only lock the specific topic mutex
|
134
|
+
mutex_for(topic).synchronize do
|
135
|
+
# Check again inside the lock as another thread might have updated
|
136
|
+
current_info = @counts[topic]
|
137
|
+
|
138
|
+
if current_info.nil?
|
139
|
+
# Create new entry
|
140
|
+
@counts[topic] = [monotonic_now, new_count]
|
141
|
+
else
|
142
|
+
current_count = current_info[1]
|
143
|
+
|
144
|
+
if new_count > current_count
|
145
|
+
# Update to higher count value
|
146
|
+
current_info[0] = monotonic_now
|
147
|
+
current_info[1] = new_count
|
148
|
+
else
|
149
|
+
# Same or lower count, update timestamp only
|
150
|
+
current_info[0] = monotonic_now
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
# @return [Hash] hash with ttls and partitions counts array
|
157
|
+
def to_h
|
158
|
+
@counts
|
159
|
+
end
|
160
|
+
|
161
|
+
private
|
162
|
+
|
163
|
+
# Get or create a mutex for a specific topic
|
164
|
+
#
|
165
|
+
# This method ensures that each topic has its own mutex,
|
166
|
+
# allowing operations on different topics to proceed in parallel.
|
167
|
+
#
|
168
|
+
# @param topic [String] Kafka topic name
|
169
|
+
# @return [Mutex] Mutex for the specified topic
|
170
|
+
#
|
171
|
+
# @note We use a separate mutex (@mutex_for_hash) to protect the creation
|
172
|
+
# of new topic mutexes. This pattern allows fine-grained locking while
|
173
|
+
# maintaining thread-safety.
|
174
|
+
def mutex_for(topic)
|
175
|
+
mutex = @mutex_hash[topic]
|
176
|
+
|
177
|
+
return mutex if mutex
|
178
|
+
|
179
|
+
# Use a separate mutex to protect the creation of new topic mutexes
|
180
|
+
@mutex_for_hash.synchronize do
|
181
|
+
# Check again in case another thread created it
|
182
|
+
@mutex_hash[topic] ||= Mutex.new
|
183
|
+
end
|
184
|
+
|
185
|
+
@mutex_hash[topic]
|
186
|
+
end
|
187
|
+
|
188
|
+
# Update the timestamp without acquiring the mutex
|
189
|
+
#
|
190
|
+
# This is an optimization that allows refreshing the TTL of existing entries
|
191
|
+
# without the overhead of mutex acquisition.
|
192
|
+
#
|
193
|
+
# @param topic [String] Kafka topic name
|
194
|
+
#
|
195
|
+
# @note This method is safe for refreshing existing data regardless of count
|
196
|
+
# because it only updates the timestamp, which doesn't affect the correctness
|
197
|
+
# of concurrent operations.
|
198
|
+
def refresh_timestamp(topic)
|
199
|
+
current_info = @counts[topic]
|
200
|
+
|
201
|
+
return unless current_info
|
202
|
+
|
203
|
+
# Update the timestamp in-place
|
204
|
+
current_info[0] = monotonic_now
|
205
|
+
end
|
206
|
+
|
207
|
+
# Check if a timestamp has expired based on the TTL
|
208
|
+
#
|
209
|
+
# @param timestamp [Float] Monotonic timestamp to check
|
210
|
+
# @return [Boolean] true if expired, false otherwise
|
211
|
+
def expired?(timestamp)
|
212
|
+
monotonic_now - timestamp > @ttl
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
@@ -0,0 +1,492 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rdkafka
|
4
|
+
# A producer for Kafka messages. To create a producer set up a {Config} and call {Config#producer producer} on that.
|
5
|
+
class Producer
|
6
|
+
include Helpers::Time
|
7
|
+
include Helpers::OAuth
|
8
|
+
|
9
|
+
# @private
|
10
|
+
@@partitions_count_cache = PartitionsCountCache.new
|
11
|
+
|
12
|
+
# Global (process wide) partitions cache. We use it to store number of topics partitions,
|
13
|
+
# either from the librdkafka statistics (if enabled) or via direct inline calls every now and
|
14
|
+
# then. Since the partitions count can only grow and should be same for all consumers and
|
15
|
+
# producers, we can use a global cache as long as we ensure that updates only move up.
|
16
|
+
#
|
17
|
+
# @note It is critical to remember, that not all users may have statistics callbacks enabled,
|
18
|
+
# hence we should not make assumption that this cache is always updated from the stats.
|
19
|
+
#
|
20
|
+
# @return [Rdkafka::Producer::PartitionsCountCache]
|
21
|
+
def self.partitions_count_cache
|
22
|
+
@@partitions_count_cache
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param partitions_count_cache [Rdkafka::Producer::PartitionsCountCache]
|
26
|
+
def self.partitions_count_cache=(partitions_count_cache)
|
27
|
+
@@partitions_count_cache = partitions_count_cache
|
28
|
+
end
|
29
|
+
|
30
|
+
# Empty hash used as a default
|
31
|
+
EMPTY_HASH = {}.freeze
|
32
|
+
|
33
|
+
private_constant :EMPTY_HASH
|
34
|
+
|
35
|
+
# Raised when there was a critical issue when invoking rd_kafka_topic_new
|
36
|
+
# This is a temporary solution until https://github.com/karafka/rdkafka-ruby/issues/451 is
|
37
|
+
# resolved and this is normalized in all the places
|
38
|
+
class TopicHandleCreationError < RuntimeError; end
|
39
|
+
|
40
|
+
# @private
|
41
|
+
# Returns the current delivery callback, by default this is nil.
|
42
|
+
#
|
43
|
+
# @return [Proc, nil]
|
44
|
+
attr_reader :delivery_callback
|
45
|
+
|
46
|
+
# @private
|
47
|
+
# Returns the number of arguments accepted by the callback, by default this is nil.
|
48
|
+
#
|
49
|
+
# @return [Integer, nil]
|
50
|
+
attr_reader :delivery_callback_arity
|
51
|
+
|
52
|
+
# @private
|
53
|
+
# @param native_kafka [NativeKafka]
|
54
|
+
# @param partitioner_name [String, nil] name of the partitioner we want to use or nil to use
|
55
|
+
# the "consistent_random" default
|
56
|
+
def initialize(native_kafka, partitioner_name)
|
57
|
+
@topics_refs_map = {}
|
58
|
+
@topics_configs = {}
|
59
|
+
@native_kafka = native_kafka
|
60
|
+
@partitioner_name = partitioner_name || "consistent_random"
|
61
|
+
|
62
|
+
# Makes sure, that native kafka gets closed before it gets GCed by Ruby
|
63
|
+
ObjectSpace.define_finalizer(self, native_kafka.finalizer)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Sets alternative set of configuration details that can be set per topic
|
67
|
+
# @note It is not allowed to re-set the same topic config twice because of the underlying
|
68
|
+
# librdkafka caching
|
69
|
+
# @param topic [String] The topic name
|
70
|
+
# @param config [Hash] config we want to use per topic basis
|
71
|
+
# @param config_hash [Integer] hash of the config. We expect it here instead of computing it,
|
72
|
+
# because it is already computed during the retrieval attempt in the `#produce` flow.
|
73
|
+
def set_topic_config(topic, config, config_hash)
|
74
|
+
# Ensure lock on topic reference just in case
|
75
|
+
@native_kafka.with_inner do |inner|
|
76
|
+
@topics_refs_map[topic] ||= {}
|
77
|
+
@topics_configs[topic] ||= {}
|
78
|
+
|
79
|
+
return if @topics_configs[topic].key?(config_hash)
|
80
|
+
|
81
|
+
# If config is empty, we create an empty reference that will be used with defaults
|
82
|
+
rd_topic_config = if config.empty?
|
83
|
+
nil
|
84
|
+
else
|
85
|
+
Rdkafka::Bindings.rd_kafka_topic_conf_new.tap do |topic_config|
|
86
|
+
config.each do |key, value|
|
87
|
+
error_buffer = FFI::MemoryPointer.new(:char, 256)
|
88
|
+
result = Rdkafka::Bindings.rd_kafka_topic_conf_set(
|
89
|
+
topic_config,
|
90
|
+
key.to_s,
|
91
|
+
value.to_s,
|
92
|
+
error_buffer,
|
93
|
+
256
|
94
|
+
)
|
95
|
+
|
96
|
+
unless result == :config_ok
|
97
|
+
raise Config::ConfigError.new(error_buffer.read_string)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
topic_handle = Bindings.rd_kafka_topic_new(inner, topic, rd_topic_config)
|
104
|
+
|
105
|
+
raise TopicHandleCreationError.new("Error creating topic handle for topic #{topic}") if topic_handle.null?
|
106
|
+
|
107
|
+
@topics_configs[topic][config_hash] = config
|
108
|
+
@topics_refs_map[topic][config_hash] = topic_handle
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Starts the native Kafka polling thread and kicks off the init polling
|
113
|
+
# @note Not needed to run unless explicit start was disabled
|
114
|
+
def start
|
115
|
+
@native_kafka.start
|
116
|
+
end
|
117
|
+
|
118
|
+
# @return [String] producer name
|
119
|
+
def name
|
120
|
+
@name ||= @native_kafka.with_inner do |inner|
|
121
|
+
::Rdkafka::Bindings.rd_kafka_name(inner)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
# Set a callback that will be called every time a message is successfully produced.
|
126
|
+
# The callback is called with a {DeliveryReport} and {DeliveryHandle}
|
127
|
+
#
|
128
|
+
# @param callback [Proc, #call] The callback
|
129
|
+
#
|
130
|
+
# @return [nil]
|
131
|
+
def delivery_callback=(callback)
|
132
|
+
raise TypeError.new("Callback has to be callable") unless callback.respond_to?(:call)
|
133
|
+
@delivery_callback = callback
|
134
|
+
@delivery_callback_arity = arity(callback)
|
135
|
+
end
|
136
|
+
|
137
|
+
# Init transactions
|
138
|
+
# Run once per producer
|
139
|
+
def init_transactions
|
140
|
+
closed_producer_check(__method__)
|
141
|
+
|
142
|
+
@native_kafka.with_inner do |inner|
|
143
|
+
response_ptr = Rdkafka::Bindings.rd_kafka_init_transactions(inner, -1)
|
144
|
+
|
145
|
+
Rdkafka::RdkafkaError.validate!(response_ptr) || true
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def begin_transaction
|
150
|
+
closed_producer_check(__method__)
|
151
|
+
|
152
|
+
@native_kafka.with_inner do |inner|
|
153
|
+
response_ptr = Rdkafka::Bindings.rd_kafka_begin_transaction(inner)
|
154
|
+
|
155
|
+
Rdkafka::RdkafkaError.validate!(response_ptr) || true
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def commit_transaction(timeout_ms = -1)
|
160
|
+
closed_producer_check(__method__)
|
161
|
+
|
162
|
+
@native_kafka.with_inner do |inner|
|
163
|
+
response_ptr = Rdkafka::Bindings.rd_kafka_commit_transaction(inner, timeout_ms)
|
164
|
+
|
165
|
+
Rdkafka::RdkafkaError.validate!(response_ptr) || true
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def abort_transaction(timeout_ms = -1)
|
170
|
+
closed_producer_check(__method__)
|
171
|
+
|
172
|
+
@native_kafka.with_inner do |inner|
|
173
|
+
response_ptr = Rdkafka::Bindings.rd_kafka_abort_transaction(inner, timeout_ms)
|
174
|
+
Rdkafka::RdkafkaError.validate!(response_ptr) || true
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
# Sends provided offsets of a consumer to the transaction for collective commit
|
179
|
+
#
|
180
|
+
# @param consumer [Consumer] consumer that owns the given tpls
|
181
|
+
# @param tpl [Consumer::TopicPartitionList]
|
182
|
+
# @param timeout_ms [Integer] offsets send timeout
|
183
|
+
# @note Use **only** in the context of an active transaction
|
184
|
+
def send_offsets_to_transaction(consumer, tpl, timeout_ms = 5_000)
|
185
|
+
closed_producer_check(__method__)
|
186
|
+
|
187
|
+
return if tpl.empty?
|
188
|
+
|
189
|
+
cgmetadata = consumer.consumer_group_metadata_pointer
|
190
|
+
native_tpl = tpl.to_native_tpl
|
191
|
+
|
192
|
+
@native_kafka.with_inner do |inner|
|
193
|
+
response_ptr = Bindings.rd_kafka_send_offsets_to_transaction(inner, native_tpl, cgmetadata, timeout_ms)
|
194
|
+
|
195
|
+
Rdkafka::RdkafkaError.validate!(response_ptr)
|
196
|
+
end
|
197
|
+
ensure
|
198
|
+
if cgmetadata && !cgmetadata.null?
|
199
|
+
Bindings.rd_kafka_consumer_group_metadata_destroy(cgmetadata)
|
200
|
+
end
|
201
|
+
|
202
|
+
Rdkafka::Bindings.rd_kafka_topic_partition_list_destroy(native_tpl) unless native_tpl.nil?
|
203
|
+
end
|
204
|
+
|
205
|
+
# Close this producer and wait for the internal poll queue to empty.
|
206
|
+
def close
|
207
|
+
return if closed?
|
208
|
+
ObjectSpace.undefine_finalizer(self)
|
209
|
+
|
210
|
+
@native_kafka.close do
|
211
|
+
# We need to remove the topics references objects before we destroy the producer,
|
212
|
+
# otherwise they would leak out
|
213
|
+
@topics_refs_map.each_value do |refs|
|
214
|
+
refs.each_value do |ref|
|
215
|
+
Rdkafka::Bindings.rd_kafka_topic_destroy(ref)
|
216
|
+
end
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
@topics_refs_map.clear
|
221
|
+
end
|
222
|
+
|
223
|
+
# Whether this producer has closed
|
224
|
+
def closed?
|
225
|
+
@native_kafka.closed?
|
226
|
+
end
|
227
|
+
|
228
|
+
# Wait until all outstanding producer requests are completed, with the given timeout
|
229
|
+
# in seconds. Call this before closing a producer to ensure delivery of all messages.
|
230
|
+
#
|
231
|
+
# @param timeout_ms [Integer] how long should we wait for flush of all messages
|
232
|
+
# @return [Boolean] true if no more data and all was flushed, false in case there are still
|
233
|
+
# outgoing messages after the timeout
|
234
|
+
#
|
235
|
+
# @note We raise an exception for other errors because based on the librdkafka docs, there
|
236
|
+
# should be no other errors.
|
237
|
+
#
|
238
|
+
# @note For `timed_out` we do not raise an error to keep it backwards compatible
|
239
|
+
def flush(timeout_ms=5_000)
|
240
|
+
closed_producer_check(__method__)
|
241
|
+
|
242
|
+
error = @native_kafka.with_inner do |inner|
|
243
|
+
response = Rdkafka::Bindings.rd_kafka_flush(inner, timeout_ms)
|
244
|
+
Rdkafka::RdkafkaError.build(response)
|
245
|
+
end
|
246
|
+
|
247
|
+
# Early skip not to build the error message
|
248
|
+
return true unless error
|
249
|
+
return false if error.code == :timed_out
|
250
|
+
|
251
|
+
raise(error)
|
252
|
+
end
|
253
|
+
|
254
|
+
# Purges the outgoing queue and releases all resources.
|
255
|
+
#
|
256
|
+
# Useful when closing the producer with outgoing messages to unstable clusters or when for
|
257
|
+
# any other reasons waiting cannot go on anymore. This purges both the queue and all the
|
258
|
+
# inflight requests + updates the delivery handles statuses so they can be materialized into
|
259
|
+
# `purge_queue` errors.
|
260
|
+
def purge
|
261
|
+
closed_producer_check(__method__)
|
262
|
+
|
263
|
+
@native_kafka.with_inner do |inner|
|
264
|
+
response = Bindings.rd_kafka_purge(
|
265
|
+
inner,
|
266
|
+
Bindings::RD_KAFKA_PURGE_F_QUEUE | Bindings::RD_KAFKA_PURGE_F_INFLIGHT
|
267
|
+
)
|
268
|
+
|
269
|
+
Rdkafka::RdkafkaError.validate!(response)
|
270
|
+
end
|
271
|
+
|
272
|
+
# Wait for the purge to affect everything
|
273
|
+
sleep(0.001) until flush(100)
|
274
|
+
|
275
|
+
true
|
276
|
+
end
|
277
|
+
|
278
|
+
# Partition count for a given topic.
|
279
|
+
#
|
280
|
+
# @param topic [String] The topic name.
|
281
|
+
# @return [Integer] partition count for a given topic or `-1` if it could not be obtained.
|
282
|
+
#
|
283
|
+
# @note If 'allow.auto.create.topics' is set to true in the broker, the topic will be
|
284
|
+
# auto-created after returning nil.
|
285
|
+
#
|
286
|
+
# @note We cache the partition count for a given topic for given time. If statistics are
|
287
|
+
# enabled for any producer or consumer, it will take precedence over per instance fetching.
|
288
|
+
#
|
289
|
+
# This prevents us in case someone uses `partition_key` from querying for the count with
|
290
|
+
# each message. Instead we query at most once every 30 seconds at most if we have a valid
|
291
|
+
# partition count or every 5 seconds in case we were not able to obtain number of partitions.
|
292
|
+
def partition_count(topic)
|
293
|
+
closed_producer_check(__method__)
|
294
|
+
|
295
|
+
self.class.partitions_count_cache.get(topic) do
|
296
|
+
topic_metadata = nil
|
297
|
+
|
298
|
+
@native_kafka.with_inner do |inner|
|
299
|
+
topic_metadata = ::Rdkafka::Metadata.new(inner, topic).topics&.first
|
300
|
+
end
|
301
|
+
|
302
|
+
topic_metadata ? topic_metadata[:partition_count] : -1
|
303
|
+
end
|
304
|
+
rescue Rdkafka::RdkafkaError => e
|
305
|
+
# If the topic does not exist, it will be created or if not allowed another error will be
|
306
|
+
# raised. We here return -1 so this can happen without early error happening on metadata
|
307
|
+
# discovery.
|
308
|
+
return -1 if e.code == :unknown_topic_or_part
|
309
|
+
|
310
|
+
raise(e)
|
311
|
+
end
|
312
|
+
|
313
|
+
# Produces a message to a Kafka topic. The message is added to rdkafka's queue, call {DeliveryHandle#wait wait} on the returned delivery handle to make sure it is delivered.
|
314
|
+
#
|
315
|
+
# When no partition is specified the underlying Kafka library picks a partition based on the key. If no key is specified, a random partition will be used.
|
316
|
+
# When a timestamp is provided this is used instead of the auto-generated timestamp.
|
317
|
+
#
|
318
|
+
# @param topic [String] The topic to produce to
|
319
|
+
# @param payload [String,nil] The message's payload
|
320
|
+
# @param key [String, nil] The message's key
|
321
|
+
# @param partition [Integer,nil] Optional partition to produce to
|
322
|
+
# @param partition_key [String, nil] Optional partition key based on which partition assignment can happen
|
323
|
+
# @param timestamp [Time,Integer,nil] Optional timestamp of this message. Integer timestamp is in milliseconds since Jan 1 1970.
|
324
|
+
# @param headers [Hash<String,String|Array<String>>] Optional message headers. Values can be either a single string or an array of strings to support duplicate headers per KIP-82
|
325
|
+
# @param label [Object, nil] a label that can be assigned when producing a message that will be part of the delivery handle and the delivery report
|
326
|
+
# @param topic_config [Hash] topic config for given message dispatch. Allows to send messages to topics with different configuration
|
327
|
+
#
|
328
|
+
# @return [DeliveryHandle] Delivery handle that can be used to wait for the result of producing this message
|
329
|
+
#
|
330
|
+
# @raise [RdkafkaError] When adding the message to rdkafka's queue failed
|
331
|
+
def produce(
|
332
|
+
topic:,
|
333
|
+
payload: nil,
|
334
|
+
key: nil,
|
335
|
+
partition: nil,
|
336
|
+
partition_key: nil,
|
337
|
+
timestamp: nil,
|
338
|
+
headers: nil,
|
339
|
+
label: nil,
|
340
|
+
topic_config: EMPTY_HASH
|
341
|
+
)
|
342
|
+
closed_producer_check(__method__)
|
343
|
+
|
344
|
+
# Start by checking and converting the input
|
345
|
+
|
346
|
+
# Get payload length
|
347
|
+
payload_size = if payload.nil?
|
348
|
+
0
|
349
|
+
else
|
350
|
+
payload.bytesize
|
351
|
+
end
|
352
|
+
|
353
|
+
# Get key length
|
354
|
+
key_size = if key.nil?
|
355
|
+
0
|
356
|
+
else
|
357
|
+
key.bytesize
|
358
|
+
end
|
359
|
+
|
360
|
+
topic_config_hash = topic_config.hash
|
361
|
+
|
362
|
+
# Checks if we have the rdkafka topic reference object ready. It saves us on object
|
363
|
+
# allocation and allows to use custom config on demand.
|
364
|
+
set_topic_config(topic, topic_config, topic_config_hash) unless @topics_refs_map.dig(topic, topic_config_hash)
|
365
|
+
topic_ref = @topics_refs_map.dig(topic, topic_config_hash)
|
366
|
+
|
367
|
+
if partition_key
|
368
|
+
partition_count = partition_count(topic)
|
369
|
+
|
370
|
+
# Check if there are no overrides for the partitioner and use the default one only when
|
371
|
+
# no per-topic is present.
|
372
|
+
partitioner_name = @topics_configs.dig(topic, topic_config_hash, :partitioner) || @partitioner_name
|
373
|
+
|
374
|
+
# If the topic is not present, set to -1
|
375
|
+
partition = Rdkafka::Bindings.partitioner(partition_key, partition_count, partitioner_name) if partition_count.positive?
|
376
|
+
end
|
377
|
+
|
378
|
+
# If partition is nil, use -1 to let librdafka set the partition randomly or
|
379
|
+
# based on the key when present.
|
380
|
+
partition ||= -1
|
381
|
+
|
382
|
+
# If timestamp is nil use 0 and let Kafka set one. If an integer or time
|
383
|
+
# use it.
|
384
|
+
raw_timestamp = if timestamp.nil?
|
385
|
+
0
|
386
|
+
elsif timestamp.is_a?(Integer)
|
387
|
+
timestamp
|
388
|
+
elsif timestamp.is_a?(Time)
|
389
|
+
(timestamp.to_i * 1000) + (timestamp.usec / 1000)
|
390
|
+
else
|
391
|
+
raise TypeError.new("Timestamp has to be nil, an Integer or a Time")
|
392
|
+
end
|
393
|
+
|
394
|
+
delivery_handle = DeliveryHandle.new
|
395
|
+
delivery_handle.label = label
|
396
|
+
delivery_handle.topic = topic
|
397
|
+
delivery_handle[:pending] = true
|
398
|
+
delivery_handle[:response] = -1
|
399
|
+
delivery_handle[:partition] = -1
|
400
|
+
delivery_handle[:offset] = -1
|
401
|
+
DeliveryHandle.register(delivery_handle)
|
402
|
+
|
403
|
+
args = [
|
404
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_RKT, :pointer, topic_ref,
|
405
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_MSGFLAGS, :int, Rdkafka::Bindings::RD_KAFKA_MSG_F_COPY,
|
406
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_VALUE, :buffer_in, payload, :size_t, payload_size,
|
407
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_KEY, :buffer_in, key, :size_t, key_size,
|
408
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_PARTITION, :int32, partition,
|
409
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_TIMESTAMP, :int64, raw_timestamp,
|
410
|
+
:int, Rdkafka::Bindings::RD_KAFKA_VTYPE_OPAQUE, :pointer, delivery_handle,
|
411
|
+
]
|
412
|
+
|
413
|
+
if headers
|
414
|
+
headers.each do |key0, value0|
|
415
|
+
key = key0.to_s
|
416
|
+
if value0.is_a?(Array)
|
417
|
+
# Handle array of values per KIP-82
|
418
|
+
value0.each do |value|
|
419
|
+
value = value.to_s
|
420
|
+
args << :int << Rdkafka::Bindings::RD_KAFKA_VTYPE_HEADER
|
421
|
+
args << :string << key
|
422
|
+
args << :pointer << value
|
423
|
+
args << :size_t << value.bytesize
|
424
|
+
end
|
425
|
+
else
|
426
|
+
# Handle single value
|
427
|
+
value = value0.to_s
|
428
|
+
args << :int << Rdkafka::Bindings::RD_KAFKA_VTYPE_HEADER
|
429
|
+
args << :string << key
|
430
|
+
args << :pointer << value
|
431
|
+
args << :size_t << value.bytesize
|
432
|
+
end
|
433
|
+
end
|
434
|
+
end
|
435
|
+
|
436
|
+
args << :int << Rdkafka::Bindings::RD_KAFKA_VTYPE_END
|
437
|
+
|
438
|
+
# Produce the message
|
439
|
+
response = @native_kafka.with_inner do |inner|
|
440
|
+
Rdkafka::Bindings.rd_kafka_producev(
|
441
|
+
inner,
|
442
|
+
*args
|
443
|
+
)
|
444
|
+
end
|
445
|
+
|
446
|
+
# Raise error if the produce call was not successful
|
447
|
+
if response != 0
|
448
|
+
DeliveryHandle.remove(delivery_handle.to_ptr.address)
|
449
|
+
Rdkafka::RdkafkaError.validate!(response)
|
450
|
+
end
|
451
|
+
|
452
|
+
delivery_handle
|
453
|
+
end
|
454
|
+
|
455
|
+
# Calls (if registered) the delivery callback
|
456
|
+
#
|
457
|
+
# @param delivery_report [Producer::DeliveryReport]
|
458
|
+
# @param delivery_handle [Producer::DeliveryHandle]
|
459
|
+
def call_delivery_callback(delivery_report, delivery_handle)
|
460
|
+
return unless @delivery_callback
|
461
|
+
|
462
|
+
case @delivery_callback_arity
|
463
|
+
when 0
|
464
|
+
@delivery_callback.call
|
465
|
+
when 1
|
466
|
+
@delivery_callback.call(delivery_report)
|
467
|
+
else
|
468
|
+
@delivery_callback.call(delivery_report, delivery_handle)
|
469
|
+
end
|
470
|
+
end
|
471
|
+
|
472
|
+
# Figures out the arity of a given block/method
|
473
|
+
#
|
474
|
+
# @param callback [#call, Proc]
|
475
|
+
# @return [Integer] arity of the provided block/method
|
476
|
+
def arity(callback)
|
477
|
+
return callback.arity if callback.respond_to?(:arity)
|
478
|
+
|
479
|
+
callback.method(:call).arity
|
480
|
+
end
|
481
|
+
|
482
|
+
private
|
483
|
+
|
484
|
+
# Ensures, no operations can happen on a closed producer
|
485
|
+
#
|
486
|
+
# @param method [Symbol] name of the method that invoked producer
|
487
|
+
# @raise [Rdkafka::ClosedProducerError]
|
488
|
+
def closed_producer_check(method)
|
489
|
+
raise Rdkafka::ClosedProducerError.new(method) if closed?
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|