karafka-rdkafka 0.19.5 → 0.20.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,13 +1,40 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka-rdkafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.5
4
+ version: 0.20.0.rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thijs Cadier
8
8
  - Maciej Mensfeld
9
9
  bindir: bin
10
- cert_chain: []
10
+ cert_chain:
11
+ - |
12
+ -----BEGIN CERTIFICATE-----
13
+ MIIEcDCCAtigAwIBAgIBATANBgkqhkiG9w0BAQsFADA/MRAwDgYDVQQDDAdjb250
14
+ YWN0MRcwFQYKCZImiZPyLGQBGRYHa2FyYWZrYTESMBAGCgmSJomT8ixkARkWAmlv
15
+ MB4XDTI0MDgyMzEwMTkyMFoXDTQ5MDgxNzEwMTkyMFowPzEQMA4GA1UEAwwHY29u
16
+ dGFjdDEXMBUGCgmSJomT8ixkARkWB2thcmFma2ExEjAQBgoJkiaJk/IsZAEZFgJp
17
+ bzCCAaIwDQYJKoZIhvcNAQEBBQADggGPADCCAYoCggGBAKjLhLjQqUlNayxkXnO+
18
+ PsmCDs/KFIzhrsYMfLZRZNaWmzV3ujljMOdDjd4snM2X06C41iVdQPWjpe3j8vVe
19
+ ZXEWR/twSbOP6Eeg8WVH2wCOo0x5i7yhVn4UBLH4JpfEMCbemVcWQ9ry9OMg4WpH
20
+ Uu4dRwxFV7hzCz3p0QfNLRI4miAxnGWcnlD98IJRjBAksTuR1Llj0vbOrDGsL9ZT
21
+ JeXP2gdRLd8SqzAFJEWrbeTBCBU7gfSh3oMg5SVDLjaqf7Kz5wC/8bDZydzanOxB
22
+ T6CDXPsCnllmvTNx2ei2T5rGYJOzJeNTmJLLK6hJWUlAvaQSvCwZRvFJ0tVGLEoS
23
+ flqSr6uGyyl1eMUsNmsH4BqPEYcAV6P2PKTv2vUR8AP0raDvZ3xL1TKvfRb8xRpo
24
+ vPopCGlY5XBWEc6QERHfVLTIVsjnls2/Ujj4h8/TSfqqYnaHKefIMLbuD/tquMjD
25
+ iWQsW2qStBV0T+U7FijKxVfrfqZP7GxQmDAc9o1iiyAa3QIDAQABo3cwdTAJBgNV
26
+ HRMEAjAAMAsGA1UdDwQEAwIEsDAdBgNVHQ4EFgQU3O4dTXmvE7YpAkszGzR9DdL9
27
+ sbEwHQYDVR0RBBYwFIESY29udGFjdEBrYXJhZmthLmlvMB0GA1UdEgQWMBSBEmNv
28
+ bnRhY3RAa2FyYWZrYS5pbzANBgkqhkiG9w0BAQsFAAOCAYEAVKTfoLXn7mqdSxIR
29
+ eqxcR6Huudg1jes81s1+X0uiRTR3hxxKZ3Y82cPsee9zYWyBrN8TA4KA0WILTru7
30
+ Ygxvzha0SRPsSiaKLmgOJ+61ebI4+bOORzIJLpD6GxCxu1r7MI4+0r1u1xe0EWi8
31
+ agkVo1k4Vi8cKMLm6Gl9b3wG9zQBw6fcgKwmpjKiNnOLP+OytzUANrIUJjoq6oal
32
+ TC+f/Uc0TLaRqUaW/bejxzDWWHoM3SU6aoLPuerglzp9zZVzihXwx3jPLUVKDFpF
33
+ Rl2lcBDxlpYGueGo0/oNzGJAAy6js8jhtHC9+19PD53vk7wHtFTZ/0ugDQYnwQ+x
34
+ oml2fAAuVWpTBCgOVFe6XCQpMKopzoxQ1PjKztW2KYxgJdIBX87SnL3aWuBQmhRd
35
+ i9zWxov0mr44TWegTVeypcWGd/0nxu1+QHVNHJrpqlPBRvwQsUm7fwmRInGpcaB8
36
+ ap8wNYvryYzrzvzUxIVFBVM5PacgkFqRmolCa8I7tdKQN+R1
37
+ -----END CERTIFICATE-----
11
38
  date: 1980-01-02 00:00:00.000000000 Z
12
39
  dependencies:
13
40
  - !ruby/object:Gem::Dependency
@@ -147,7 +174,6 @@ files:
147
174
  - ".github/CODEOWNERS"
148
175
  - ".github/FUNDING.yml"
149
176
  - ".github/workflows/ci.yml"
150
- - ".github/workflows/push.yml"
151
177
  - ".github/workflows/verify-action-pins.yml"
152
178
  - ".gitignore"
153
179
  - ".rspec"
@@ -160,7 +186,8 @@ files:
160
186
  - MIT-LICENSE
161
187
  - README.md
162
188
  - Rakefile
163
- - dist/librdkafka-2.8.0.tar.gz
189
+ - certs/cert.pem
190
+ - dist/librdkafka-2.10.0.tar.gz
164
191
  - dist/patches/rdkafka_global_init.patch
165
192
  - docker-compose.yml
166
193
  - ext/README.md
@@ -206,7 +233,6 @@ files:
206
233
  - lib/rdkafka/producer.rb
207
234
  - lib/rdkafka/producer/delivery_handle.rb
208
235
  - lib/rdkafka/producer/delivery_report.rb
209
- - lib/rdkafka/producer/partitions_count_cache.rb
210
236
  - lib/rdkafka/version.rb
211
237
  - renovate.json
212
238
  - spec/rdkafka/abstract_handle_spec.rb
@@ -234,8 +260,6 @@ files:
234
260
  - spec/rdkafka/native_kafka_spec.rb
235
261
  - spec/rdkafka/producer/delivery_handle_spec.rb
236
262
  - spec/rdkafka/producer/delivery_report_spec.rb
237
- - spec/rdkafka/producer/partitions_count_cache_spec.rb
238
- - spec/rdkafka/producer/partitions_count_spec.rb
239
263
  - spec/rdkafka/producer_spec.rb
240
264
  - spec/spec_helper.rb
241
265
  licenses:
@@ -243,7 +267,7 @@ licenses:
243
267
  metadata:
244
268
  funding_uri: https://karafka.io/#become-pro
245
269
  homepage_uri: https://karafka.io
246
- changelog_uri: https://karafka.io/docs/Changelog-Karafka-Rdkafka/
270
+ changelog_uri: https://github.com/karafka/karafka-rdkafka/blob/master/CHANGELOG.md
247
271
  bug_tracker_uri: https://github.com/karafka/karafka-rdkafka/issues
248
272
  source_code_uri: https://github.com/karafka/karafka-rdkafka
249
273
  documentation_uri: https://karafka.io/docs
@@ -293,7 +317,5 @@ test_files:
293
317
  - spec/rdkafka/native_kafka_spec.rb
294
318
  - spec/rdkafka/producer/delivery_handle_spec.rb
295
319
  - spec/rdkafka/producer/delivery_report_spec.rb
296
- - spec/rdkafka/producer/partitions_count_cache_spec.rb
297
- - spec/rdkafka/producer/partitions_count_spec.rb
298
320
  - spec/rdkafka/producer_spec.rb
299
321
  - spec/spec_helper.rb
metadata.gz.sig ADDED
Binary file
@@ -1,37 +0,0 @@
1
- name: Push Gem
2
-
3
- on:
4
- push:
5
- tags:
6
- - v*
7
-
8
- permissions:
9
- contents: read
10
-
11
- jobs:
12
- push:
13
- if: github.repository_owner == 'karafka'
14
- runs-on: ubuntu-latest
15
- environment: deployment
16
-
17
- permissions:
18
- contents: write
19
- id-token: write
20
-
21
- steps:
22
- - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
23
- with:
24
- fetch-depth: 0
25
-
26
- - name: Set up Ruby
27
- uses: ruby/setup-ruby@13e7a03dc3ac6c3798f4570bfead2aed4d96abfb # v1.244.0
28
- with:
29
- bundler-cache: false
30
-
31
- - name: Build rdkafka-ruby
32
- run: |
33
- set -e
34
- bundle install --jobs 4 --retry 3
35
- cd ext && bundle exec rake
36
-
37
- - uses: rubygems/release-gem@a25424ba2ba8b387abc8ef40807c2c85b96cbe32 # v1.1.1
@@ -1,216 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Rdkafka
4
- class Producer
5
- # Caching mechanism for Kafka topic partition counts to avoid frequent cluster queries
6
- #
7
- # This cache is designed to optimize the process of obtaining partition counts for topics.
8
- # It uses several strategies to minimize Kafka cluster queries:
9
- #
10
- # @note Design considerations:
11
- #
12
- # 1. Statistics-based updates
13
- # When statistics callbacks are enabled (via `statistics.interval.ms`), we leverage
14
- # this data to proactively update the partition counts cache. This approach costs
15
- # approximately 0.02ms of processing time during each statistics interval (typically
16
- # every 5 seconds) but eliminates the need for explicit blocking metadata queries.
17
- #
18
- # 2. Edge case handling
19
- # If a user configures `statistics.interval.ms` much higher than the default cache TTL
20
- # (30 seconds), the cache will still function correctly. When statistics updates don't
21
- # occur frequently enough, the cache entries will expire naturally, triggering a
22
- # blocking refresh when needed.
23
- #
24
- # 3. User configuration awareness
25
- # The cache respects user-defined settings. If `topic.metadata.refresh.interval.ms` is
26
- # set very high, the responsibility for potentially stale data falls on the user. This
27
- # is an explicit design choice to honor user configuration preferences and align with
28
- # librdkafka settings.
29
- #
30
- # 4. Process-wide efficiency
31
- # Since this cache is shared across all Rdkafka producers and consumers within a process,
32
- # having multiple clients improves overall efficiency. Each client contributes to keeping
33
- # the cache updated, benefiting all other clients.
34
- #
35
- # 5. Thread-safety approach
36
- # The implementation uses fine-grained locking with per-topic mutexes to minimize
37
- # contention in multi-threaded environments while ensuring data consistency.
38
- #
39
- # 6. Topic recreation handling
40
- # If a topic is deleted and recreated with fewer partitions, the cache will continue to
41
- # report the higher count until either the TTL expires or the process is restarted. This
42
- # design choice simplifies the implementation while relying on librdkafka's error handling
43
- # for edge cases. In production environments, topic recreation with different partition
44
- # counts is typically accompanied by application restarts to handle structural changes.
45
- # This also aligns with the previous cache implementation.
46
- class PartitionsCountCache
47
- include Helpers::Time
48
-
49
- # Default time-to-live for cached partition counts in seconds
50
- #
51
- # @note This default was chosen to balance freshness of metadata with performance
52
- # optimization. Most Kafka cluster topology changes are planned operations, making 30
53
- # seconds a reasonable compromise.
54
- DEFAULT_TTL = 30
55
-
56
- # Creates a new partition count cache
57
- #
58
- # @param ttl [Integer] Time-to-live in seconds for cached values
59
- def initialize(ttl = DEFAULT_TTL)
60
- @counts = {}
61
- @mutex_hash = {}
62
- # Used only for @mutex_hash access to ensure thread-safety when creating new mutexes
63
- @mutex_for_hash = Mutex.new
64
- @ttl = ttl
65
- end
66
-
67
- # Reads partition count for a topic with automatic refresh when expired
68
- #
69
- # This method will return the cached partition count if available and not expired.
70
- # If the value is expired or not available, it will execute the provided block
71
- # to fetch the current value from Kafka.
72
- #
73
- # @param topic [String] Kafka topic name
74
- # @yield Block that returns the current partition count when cache needs refreshing
75
- # @yieldreturn [Integer] Current partition count retrieved from Kafka
76
- # @return [Integer] Partition count for the topic
77
- #
78
- # @note The implementation prioritizes read performance over write consistency
79
- # since partition counts typically only increase during normal operation.
80
- def get(topic)
81
- current_info = @counts[topic]
82
-
83
- if current_info.nil? || expired?(current_info[0])
84
- new_count = yield
85
-
86
- if current_info.nil?
87
- # No existing data, create a new entry with mutex
88
- set(topic, new_count)
89
-
90
- return new_count
91
- else
92
- current_count = current_info[1]
93
-
94
- if new_count > current_count
95
- # Higher value needs mutex to update both timestamp and count
96
- set(topic, new_count)
97
-
98
- return new_count
99
- else
100
- # Same or lower value, just update timestamp without mutex
101
- refresh_timestamp(topic)
102
-
103
- return current_count
104
- end
105
- end
106
- end
107
-
108
- current_info[1]
109
- end
110
-
111
- # Update partition count for a topic when needed
112
- #
113
- # This method updates the partition count for a topic in the cache.
114
- # It uses a mutex to ensure thread-safety during updates.
115
- #
116
- # @param topic [String] Kafka topic name
117
- # @param new_count [Integer] New partition count value
118
- #
119
- # @note We prioritize higher partition counts and only accept them when using
120
- # a mutex to ensure consistency. This design decision is based on the fact that
121
- # partition counts in Kafka only increase during normal operation.
122
- def set(topic, new_count)
123
- # First check outside mutex to avoid unnecessary locking
124
- current_info = @counts[topic]
125
-
126
- # For lower values, we don't update count but might need to refresh timestamp
127
- if current_info && new_count < current_info[1]
128
- refresh_timestamp(topic)
129
-
130
- return
131
- end
132
-
133
- # Only lock the specific topic mutex
134
- mutex_for(topic).synchronize do
135
- # Check again inside the lock as another thread might have updated
136
- current_info = @counts[topic]
137
-
138
- if current_info.nil?
139
- # Create new entry
140
- @counts[topic] = [monotonic_now, new_count]
141
- else
142
- current_count = current_info[1]
143
-
144
- if new_count > current_count
145
- # Update to higher count value
146
- current_info[0] = monotonic_now
147
- current_info[1] = new_count
148
- else
149
- # Same or lower count, update timestamp only
150
- current_info[0] = monotonic_now
151
- end
152
- end
153
- end
154
- end
155
-
156
- # @return [Hash] hash with ttls and partitions counts array
157
- def to_h
158
- @counts
159
- end
160
-
161
- private
162
-
163
- # Get or create a mutex for a specific topic
164
- #
165
- # This method ensures that each topic has its own mutex,
166
- # allowing operations on different topics to proceed in parallel.
167
- #
168
- # @param topic [String] Kafka topic name
169
- # @return [Mutex] Mutex for the specified topic
170
- #
171
- # @note We use a separate mutex (@mutex_for_hash) to protect the creation
172
- # of new topic mutexes. This pattern allows fine-grained locking while
173
- # maintaining thread-safety.
174
- def mutex_for(topic)
175
- mutex = @mutex_hash[topic]
176
-
177
- return mutex if mutex
178
-
179
- # Use a separate mutex to protect the creation of new topic mutexes
180
- @mutex_for_hash.synchronize do
181
- # Check again in case another thread created it
182
- @mutex_hash[topic] ||= Mutex.new
183
- end
184
-
185
- @mutex_hash[topic]
186
- end
187
-
188
- # Update the timestamp without acquiring the mutex
189
- #
190
- # This is an optimization that allows refreshing the TTL of existing entries
191
- # without the overhead of mutex acquisition.
192
- #
193
- # @param topic [String] Kafka topic name
194
- #
195
- # @note This method is safe for refreshing existing data regardless of count
196
- # because it only updates the timestamp, which doesn't affect the correctness
197
- # of concurrent operations.
198
- def refresh_timestamp(topic)
199
- current_info = @counts[topic]
200
-
201
- return unless current_info
202
-
203
- # Update the timestamp in-place
204
- current_info[0] = monotonic_now
205
- end
206
-
207
- # Check if a timestamp has expired based on the TTL
208
- #
209
- # @param timestamp [Float] Monotonic timestamp to check
210
- # @return [Boolean] true if expired, false otherwise
211
- def expired?(timestamp)
212
- monotonic_now - timestamp > @ttl
213
- end
214
- end
215
- end
216
- end