karafka 2.1.4 → 2.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e54dc80fd5a3a93857fd6a3a175030f7cb2eee1f77668d6936b705b0789c4228
4
- data.tar.gz: e7f3b02811432987048bc599c2c4a6f0444fe13d98ca4aa8e87cebd5aaa8037a
3
+ metadata.gz: 82a8b28b55f1db0808be3d1e48616f6b2389466332c9116e263e37cab992fc65
4
+ data.tar.gz: 2f29bb9bb1c3f949d206c5c8453b35ad163219babb48687e2270e13914e78aba
5
5
  SHA512:
6
- metadata.gz: 871ed7a55421041b1b232c47b3da0f625b8f2114c00057daf883223d3da4dc16637c9f0b4714cebe4a5f33249beac9bd7abbe15ec122f5b1a45a3c953dcb1465
7
- data.tar.gz: ac2cc50c3277bd0bac7e3a2af40cc6d5a9c3aa92fc8cc3d2b12b8645329d269d0ed0d80c462d2fa6a03a677213e84802a1dfe8fdc1c494348b1a8b4fd9ef8459
6
+ metadata.gz: 93a66f4aeb49cea810bfd90cf424b3334d1dae992035e0bd9613bbd3c42f642f94fd0efd979d57df5083a46f66f522a7d3952c9e24340b8a4dc4c23aff165a0f
7
+ data.tar.gz: 4ee03b442b3029aecf0ffd636ddccb054e51f2a448c3dd642993464bfc32aa45595f26835db8a9b5b01940ab5b532e0bc22a9a3cdbcc9899320b55010473c749
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.5 (2023-06-19)
4
+ - [Improvement] Drastically improve `#revoked?` response quality by checking the real time assignment lost state on librdkafka.
5
+ - [Improvement] Improve eviction of saturated jobs that would run on already revoked assignments.
6
+ - [Improvement] Expose `#commit_offsets` and `#commit_offsets!` methods in the consumer to provide ability to commit offsets directly to Kafka without having to mark new messages as consumed.
7
+ - [Improvement] No longer skip offset commit when no messages marked as consumed as `librdkafka` has fixed the crashes there.
8
+ - [Improvement] Remove no longer needed patches.
9
+ - [Improvement] Ensure, that the coordinator revocation status is switched upon revocation detection when using `#revoked?`
10
+ - [Improvement] Add benchmarks for marking as consumed (sync and async).
11
+ - [Change] Require `karafka-core` `>= 2.1.0`
12
+ - [Change] Require `waterdrop` `>= 2.6.1`
13
+
3
14
  ## 2.1.4 (2023-06-06)
4
15
  - [Fix] `processing_lag` and `consumption_lag` on empty batch fail on shutdown usage (#1475)
5
16
 
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.4)
5
- karafka-core (>= 2.0.13, < 3.0.0)
4
+ karafka (2.1.5)
5
+ karafka-core (>= 2.1.0, < 2.2.0)
6
6
  thor (>= 0.20)
7
- waterdrop (>= 2.5.3, < 3.0.0)
7
+ waterdrop (>= 2.6.1, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.0.4.3)
14
- activesupport (= 7.0.4.3)
13
+ activejob (7.0.5)
14
+ activesupport (= 7.0.5)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.0.4.3)
16
+ activesupport (7.0.5)
17
17
  concurrent-ruby (~> 1.0, >= 1.0.2)
18
18
  i18n (>= 1.6, < 2)
19
19
  minitest (>= 5.1)
@@ -28,26 +28,26 @@ GEM
28
28
  ffi (1.15.5)
29
29
  globalid (1.1.0)
30
30
  activesupport (>= 5.0)
31
- i18n (1.13.0)
31
+ i18n (1.14.1)
32
32
  concurrent-ruby (~> 1.0)
33
- karafka-core (2.0.13)
33
+ karafka-core (2.1.0)
34
34
  concurrent-ruby (>= 1.1)
35
- karafka-rdkafka (>= 0.12.3)
36
- karafka-rdkafka (0.12.3)
35
+ karafka-rdkafka (>= 0.13.0, < 0.14.0)
36
+ karafka-rdkafka (0.13.0)
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.5.2)
40
+ karafka-web (0.6.0)
41
41
  erubi (~> 1.4)
42
- karafka (>= 2.0.40, < 3.0.0)
43
- karafka-core (>= 2.0.12, < 3.0.0)
44
- roda (~> 3.63)
42
+ karafka (>= 2.1.4, < 3.0.0)
43
+ karafka-core (>= 2.0.13, < 3.0.0)
44
+ roda (~> 3.68, >= 3.68)
45
45
  tilt (~> 2.0)
46
46
  mini_portile2 (2.8.2)
47
- minitest (5.18.0)
48
- rack (3.0.7)
47
+ minitest (5.18.1)
48
+ rack (3.0.8)
49
49
  rake (13.0.6)
50
- roda (3.68.0)
50
+ roda (3.69.0)
51
51
  rack
52
52
  rspec (3.12.0)
53
53
  rspec-core (~> 3.12.0)
@@ -69,11 +69,11 @@ GEM
69
69
  simplecov-html (0.12.3)
70
70
  simplecov_json_formatter (0.1.4)
71
71
  thor (1.2.2)
72
- tilt (2.1.0)
72
+ tilt (2.2.0)
73
73
  tzinfo (2.0.6)
74
74
  concurrent-ruby (~> 1.0)
75
- waterdrop (2.5.3)
76
- karafka-core (>= 2.0.13, < 3.0.0)
75
+ waterdrop (2.6.1)
76
+ karafka-core (>= 2.1.0, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
78
  zeitwerk (2.6.8)
79
79
 
data/karafka.gemspec CHANGED
@@ -21,9 +21,9 @@ Gem::Specification.new do |spec|
21
21
  without having to focus on things that are not your business domain.
22
22
  DESC
23
23
 
24
- spec.add_dependency 'karafka-core', '>= 2.0.13', '< 3.0.0'
24
+ spec.add_dependency 'karafka-core', '>= 2.1.0', '< 2.2.0'
25
25
  spec.add_dependency 'thor', '>= 0.20'
26
- spec.add_dependency 'waterdrop', '>= 2.5.3', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.6.1', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  if $PROGRAM_NAME.end_with?('gem')
data/lib/karafka/admin.rb CHANGED
@@ -171,7 +171,9 @@ module Karafka
171
171
  # @return [Rdkafka::Metadata] cluster metadata info
172
172
  def cluster_info
173
173
  with_admin do |admin|
174
- Rdkafka::Metadata.new(admin.instance_variable_get('@native_kafka'))
174
+ admin.instance_variable_get('@native_kafka').with_inner do |inner|
175
+ Rdkafka::Metadata.new(inner)
176
+ end
175
177
  end
176
178
  end
177
179
 
@@ -215,10 +215,18 @@ module Karafka
215
215
  end
216
216
 
217
217
  # @return [Boolean] true if partition was revoked from the current consumer
218
- # @note We know that partition got revoked because when we try to mark message as consumed,
219
- # unless if is successful, it will return false
218
+ # @note There are two "levels" on which we can know that partition was revoked. First one is
219
+ # when we loose the assignment involuntarily and second is when coordinator gets this info
220
+ # after we poll with the rebalance callbacks. The first check allows us to get this notion
221
+ # even before we poll but it gets reset when polling happens, hence we also need to switch
222
+ # the coordinator state after the revocation (but prior to running more jobs)
220
223
  def revoked?
221
- coordinator.revoked?
224
+ return true if coordinator.revoked?
225
+ return false unless client.assignment_lost?
226
+
227
+ coordinator.revoke
228
+
229
+ true
222
230
  end
223
231
 
224
232
  # @return [Boolean] are we retrying processing after an error. This can be used to provide a
@@ -41,9 +41,6 @@ module Karafka
41
41
  @buffer = RawMessagesBuffer.new
42
42
  @rebalance_manager = RebalanceManager.new
43
43
  @kafka = build_consumer
44
- # Marks if we need to offset. If we did not store offsets, we should not commit the offset
45
- # position as it will crash rdkafka
46
- @offsetting = false
47
44
  # We need to keep track of what we have paused for resuming
48
45
  # In case we loose partition, we still need to resume it, otherwise it won't be fetched
49
46
  # again if we get reassigned to it later on. We need to keep them as after revocation we
@@ -109,8 +106,14 @@ module Karafka
109
106
  end
110
107
  end
111
108
 
109
+ # @return [Boolean] true if our current assignment has been lost involuntarily.
110
+ def assignment_lost?
111
+ @mutex.synchronize do
112
+ @kafka.assignment_lost?
113
+ end
114
+ end
115
+
112
116
  # Commits the offset on a current consumer in a non-blocking or blocking way.
113
- # Ignoring a case where there would not be an offset (for example when rebalance occurs).
114
117
  #
115
118
  # @param async [Boolean] should the commit happen async or sync (async by default)
116
119
  # @return [Boolean] did committing was successful. It may be not, when we no longer own
@@ -118,7 +121,11 @@ module Karafka
118
121
  #
119
122
  # @note This will commit all the offsets for the whole consumer. In order to achieve
120
123
  # granular control over where the offset should be for particular topic partitions, the
121
- # store_offset should be used to only store new offset when we want to to be flushed
124
+ # store_offset should be used to only store new offset when we want them to be flushed
125
+ #
126
+ # @note This method for async may return `true` despite involuntary partition revocation as
127
+ # it does **not** resolve to `lost_assignment?`. It returns only the commit state operation
128
+ # result.
122
129
  def commit_offsets(async: true)
123
130
  @mutex.lock
124
131
 
@@ -235,9 +242,10 @@ module Karafka
235
242
  # @param [Karafka::Messages::Message] message that we want to mark as processed
236
243
  # @return [Boolean] true if successful. False if we no longer own given partition
237
244
  # @note This method won't trigger automatic offsets commits, rather relying on the offset
238
- # check-pointing trigger that happens with each batch processed
245
+ # check-pointing trigger that happens with each batch processed. It will however check the
246
+ # `librdkafka` assignment ownership to increase accuracy for involuntary revocations.
239
247
  def mark_as_consumed(message)
240
- store_offset(message)
248
+ store_offset(message) && !assignment_lost?
241
249
  end
242
250
 
243
251
  # Marks a given message as consumed and commits the offsets in a blocking way.
@@ -256,7 +264,6 @@ module Karafka
256
264
 
257
265
  @mutex.synchronize do
258
266
  @closed = false
259
- @offsetting = false
260
267
  @paused_tpls.clear
261
268
  @kafka = build_consumer
262
269
  end
@@ -281,7 +288,6 @@ module Karafka
281
288
  # @param message [Karafka::Messages::Message]
282
289
  # @return [Boolean] true if we could store the offset (if we still own the partition)
283
290
  def internal_store_offset(message)
284
- @offsetting = true
285
291
  @kafka.store_offset(message)
286
292
  true
287
293
  rescue Rdkafka::RdkafkaError => e
@@ -294,11 +300,11 @@ module Karafka
294
300
  # Non thread-safe message committing method
295
301
  # @param async [Boolean] should the commit happen async or sync (async by default)
296
302
  # @return [Boolean] true if offset commit worked, false if we've lost the assignment
303
+ # @note We do **not** consider `no_offset` as any problem and we allow to commit offsets
304
+ # even when no stored, because with sync commit, it refreshes the ownership state of the
305
+ # consumer in a sync way.
297
306
  def internal_commit_offsets(async: true)
298
- return true unless @offsetting
299
-
300
307
  @kafka.commit(nil, async)
301
- @offsetting = false
302
308
 
303
309
  true
304
310
  rescue Rdkafka::RdkafkaError => e
@@ -49,9 +49,8 @@ module Karafka
49
49
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
50
50
  #
51
51
  # @private
52
- # @param _ [Rdkafka::Consumer]
53
52
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
54
- def on_partitions_assigned(_, partitions)
53
+ def on_partitions_assigned(partitions)
55
54
  @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
56
55
  @changed = true
57
56
  end
@@ -59,9 +58,8 @@ module Karafka
59
58
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
60
59
  #
61
60
  # @private
62
- # @param _ [Rdkafka::Consumer]
63
61
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
64
- def on_partitions_revoked(_, partitions)
62
+ def on_partitions_revoked(partitions)
65
63
  @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
66
64
  @changed = true
67
65
  end
@@ -12,9 +12,6 @@ module Karafka
12
12
  # @param received_at [Time] moment when we've received the message
13
13
  # @return [Karafka::Messages::Message] message object with payload and metadata
14
14
  def call(kafka_message, topic, received_at)
15
- # @see https://github.com/appsignal/rdkafka-ruby/issues/168
16
- kafka_message.headers.transform_keys!(&:to_s)
17
-
18
15
  metadata = Karafka::Messages::Metadata.new(
19
16
  timestamp: kafka_message.timestamp,
20
17
  headers: kafka_message.headers,
@@ -49,14 +49,13 @@ module Karafka
49
49
  #
50
50
  # @param code [Integer]
51
51
  # @param opaque [Rdkafka::Opaque]
52
- # @param consumer [Rdkafka::Consumer]
53
52
  # @param tpl [Rdkafka::Consumer::TopicPartitionList]
54
- def trigger_callbacks(code, opaque, consumer, tpl)
53
+ def trigger_callbacks(code, opaque, tpl)
55
54
  case code
56
55
  when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
57
- opaque.call_on_partitions_assigned(consumer, tpl)
56
+ opaque.call_on_partitions_assigned(tpl)
58
57
  when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
59
- opaque.call_on_partitions_revoked(consumer, tpl)
58
+ opaque.call_on_partitions_revoked(tpl)
60
59
  end
61
60
  rescue StandardError => e
62
61
  Karafka.monitor.instrument(
@@ -92,9 +91,8 @@ module Karafka
92
91
  return unless opaque
93
92
 
94
93
  tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
95
- consumer = ::Rdkafka::Consumer.new(client_ptr)
96
94
 
97
- pr.trigger_callbacks(code, opaque, consumer, tpl)
95
+ pr.trigger_callbacks(code, opaque, tpl)
98
96
  end
99
97
  end
100
98
  end
@@ -27,12 +27,7 @@ module Karafka
27
27
  # Ignore earlier offsets than the one we already committed
28
28
  return true if coordinator.seek_offset > message.offset
29
29
  return false if revoked?
30
-
31
- unless client.mark_as_consumed(message)
32
- coordinator.revoke
33
-
34
- return false
35
- end
30
+ return revoked? unless client.mark_as_consumed(message)
36
31
 
37
32
  coordinator.seek_offset = message.offset + 1
38
33
 
@@ -49,17 +44,38 @@ module Karafka
49
44
  return true if coordinator.seek_offset > message.offset
50
45
  return false if revoked?
51
46
 
52
- unless client.mark_as_consumed!(message)
53
- coordinator.revoke
54
-
55
- return false
56
- end
47
+ return revoked? unless client.mark_as_consumed!(message)
57
48
 
58
49
  coordinator.seek_offset = message.offset + 1
59
50
 
60
51
  true
61
52
  end
62
53
 
54
+ # Triggers an async offset commit
55
+ #
56
+ # @param async [Boolean] should we use async (default) or sync commit
57
+ # @return [Boolean] true if we still own the partition.
58
+ # @note Due to its async nature, this may not fully represent the offset state in some
59
+ # edge cases (like for example going beyond max.poll.interval)
60
+ def commit_offsets(async: true)
61
+ # Do not commit if we already lost the assignment
62
+ return false if revoked?
63
+ return true if client.commit_offsets(async: async)
64
+
65
+ # This will once more check the librdkafka revocation status and will revoke the
66
+ # coordinator in case it was not revoked
67
+ revoked?
68
+ end
69
+
70
+ # Triggers a synchronous offsets commit to Kafka
71
+ #
72
+ # @return [Boolean] true if we still own the partition, false otherwise.
73
+ # @note This is fully synchronous, hence the result of this can be used in DB transactions
74
+ # etc as a way of making sure, that we still own the partition.
75
+ def commit_offsets!
76
+ commit_offsets(async: false)
77
+ end
78
+
63
79
  # No actions needed for the standard flow here
64
80
  def handle_before_enqueue
65
81
  nil
@@ -47,6 +47,7 @@ module Karafka
47
47
  fetch.max.bytes
48
48
  fetch.message.max.bytes
49
49
  fetch.min.bytes
50
+ fetch.queue.backoff.ms
50
51
  fetch.wait.max.ms
51
52
  group.id
52
53
  group.instance.id
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.1.4'
6
+ VERSION = '2.1.5'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.4
4
+ version: 2.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2023-06-06 00:00:00.000000000 Z
38
+ date: 2023-06-19 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -43,20 +43,20 @@ dependencies:
43
43
  requirements:
44
44
  - - ">="
45
45
  - !ruby/object:Gem::Version
46
- version: 2.0.13
46
+ version: 2.1.0
47
47
  - - "<"
48
48
  - !ruby/object:Gem::Version
49
- version: 3.0.0
49
+ version: 2.2.0
50
50
  type: :runtime
51
51
  prerelease: false
52
52
  version_requirements: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - - ">="
55
55
  - !ruby/object:Gem::Version
56
- version: 2.0.13
56
+ version: 2.1.0
57
57
  - - "<"
58
58
  - !ruby/object:Gem::Version
59
- version: 3.0.0
59
+ version: 2.2.0
60
60
  - !ruby/object:Gem::Dependency
61
61
  name: thor
62
62
  requirement: !ruby/object:Gem::Requirement
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.5.3
80
+ version: 2.6.1
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.5.3
90
+ version: 2.6.1
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -214,7 +214,6 @@ files:
214
214
  - lib/karafka/messages/parser.rb
215
215
  - lib/karafka/messages/seek.rb
216
216
  - lib/karafka/patches/rdkafka/bindings.rb
217
- - lib/karafka/patches/rdkafka/consumer.rb
218
217
  - lib/karafka/pro.rb
219
218
  - lib/karafka/pro/active_job/consumer.rb
220
219
  - lib/karafka/pro/active_job/dispatcher.rb
metadata.gz.sig CHANGED
@@ -1,7 +1 @@
1
- J�SR�3a����Ɍ�-7.���5kH������"�\���7���h�� ���i�� Gj\���F�e)�5
2
- ޘVAO�V�P"�i(�%v+F
3
- �#��RF��0�`���u�����3�
4
- �hL�@wVS���:�oY�'P�q�-��P&��R8r4��!����b�o=1�P��>�Q[�}&�U/i]�����-0�5����8@���w,}���o�G�Ǎ�֡�B��� 1Y ���s+i��J�R��w�e��-�"*0�7�X
5
- ���L�hWy��*�
6
- ͝飝�'����(R�{\�H]0ƴ|ݖ��P)���C�
7
- �����JB]�e�)M�:6
1
+ ���B֣ުJ=1��|�J/m^�u��ˤ�^�qS2�/���e4��4t.�T�<$�H���Up.����~�R�֪�VY^U�7� ��v`�I
@@ -1,22 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Karafka
4
- # Patches to external components
5
- module Patches
6
- # Rdkafka related patches
7
- module Rdkafka
8
- # Rdkafka::Consumer patches
9
- module Consumer
10
- # A method that allows us to get the native kafka producer name
11
- # @return [String] producer instance name
12
- # @note We need this to make sure that we allocate proper dispatched events only to
13
- # callback listeners that should publish them
14
- def name
15
- @name ||= ::Rdkafka::Bindings.rd_kafka_name(@native_kafka)
16
- end
17
- end
18
- end
19
- end
20
- end
21
-
22
- ::Rdkafka::Consumer.include ::Karafka::Patches::Rdkafka::Consumer