karafka 2.0.10 → 2.0.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f78f7cb985880d9172961be96386a0ebd37735831915f0cb9b9b46c832d2e9a9
4
- data.tar.gz: a7f5a27cb3a6f0fa5185f32e5df7615f8a6013ef0018c85fb16c87de346f9362
3
+ metadata.gz: 2c407cd113e41314102fc910cf0b35e8081c55bdadae66635afe59491b58e390
4
+ data.tar.gz: 336dad28cc65218e33a2b3bc42b69a166ff4e8e6a10c65a435e57f75eca5ac90
5
5
  SHA512:
6
- metadata.gz: 5b6fed517f69a2bd84b16e82d266251dd4b7564d52f466745f3a93be9106f866ec658581c38b1a0913dd9e126a17cf4b1f5e85c888447c6e1e4b790453e7c87e
7
- data.tar.gz: 25cfa154028dd12519d7d4b72606242a4058c9fabd1655ba727a9d73a905a942cd80374e2f7357f410a098b74b623966fa840b1c6725f71d630bf3ce92db187e
6
+ metadata.gz: 82ffee28acdaa1d126944426a7b31628380fc04aca24002ea55067f6c80a0b72a885d246a656913ea35e79fa82a9d741c35125e31b75abbbf7fb0022360e3b2b
7
+ data.tar.gz: b64f9dc4fb2461f3cbe3a774231a661365af5b1be88a6c95b1543521d19d8171514eb5111f88ceb53e309b5ab433f982b9535c2bbb4919e4c07850570424605a
checksums.yaml.gz.sig CHANGED
Binary file
@@ -52,9 +52,14 @@ jobs:
52
52
  fail-fast: false
53
53
  matrix:
54
54
  ruby:
55
+ # We run it against the oldest and the newest of a given major to make sure, that there
56
+ # are no syntax-sugars that we would use that were introduced down the road
55
57
  - '3.1'
58
+ - '3.1.0'
56
59
  - '3.0'
60
+ - '3.0.0'
57
61
  - '2.7'
62
+ - '2.7.0'
58
63
  include:
59
64
  - ruby: '3.1'
60
65
  coverage: 'true'
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.12 (2022-10-06)
4
+ - Commit stored offsets upon rebalance revocation event to reduce number of messages that are re-processed.
5
+ - Support cooperative-sticky rebalance strategy.
6
+ - Replace offset commit after each batch with a per-rebalance commit.
7
+ - User instrumentation to publish internal rebalance errors.
8
+
9
+ ## 2.0.11 (2022-09-29)
10
+ - Report early on errors related to network and on max poll interval being exceeded to indicate critical problems that will be retries but may mean some underlying problems in the system.
11
+ - Fix support of Ruby 2.7.0 to 2.7.2 (#1045)
12
+
3
13
  ## 2.0.10 (2022-09-23)
4
14
  - Improve error recovery by delegating the recovery to the existing `librdkafka` instance.
5
15
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.10)
4
+ karafka (2.0.12)
5
5
  karafka-core (>= 2.0.2, < 3.0.0)
6
6
  rdkafka (>= 0.12)
7
7
  thor (>= 0.20)
@@ -61,14 +61,13 @@ GEM
61
61
  thor (1.2.1)
62
62
  tzinfo (2.0.5)
63
63
  concurrent-ruby (~> 1.0)
64
- waterdrop (2.4.1)
64
+ waterdrop (2.4.2)
65
65
  karafka-core (>= 2.0.2, < 3.0.0)
66
66
  rdkafka (>= 0.10)
67
67
  zeitwerk (~> 2.3)
68
- zeitwerk (2.6.0)
68
+ zeitwerk (2.6.1)
69
69
 
70
70
  PLATFORMS
71
- arm64-darwin
72
71
  x86_64-linux
73
72
 
74
73
  DEPENDENCIES
@@ -188,6 +188,7 @@ module Karafka
188
188
  tpl = topic_partition_list(topic, partition) || @paused_tpls[topic][partition]
189
189
 
190
190
  return unless tpl
191
+
191
192
  # If we did not have it, it means we never paused this partition, thus no resume should
192
193
  # happen in the first place
193
194
  return unless @paused_tpls[topic].delete(partition)
@@ -317,7 +318,7 @@ module Karafka
317
318
  Rdkafka::Consumer::TopicPartitionList.new({ topic => [rdkafka_partition] })
318
319
  end
319
320
 
320
- # Performs a single poll operation.
321
+ # Performs a single poll operation and handles retries and error
321
322
  #
322
323
  # @param timeout [Integer] timeout for a single poll
323
324
  # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
@@ -330,20 +331,39 @@ module Karafka
330
331
 
331
332
  @kafka.poll(timeout)
332
333
  rescue ::Rdkafka::RdkafkaError => e
333
- # Most of the errors can be safely ignored as librdkafka will recover from them
334
- # @see https://github.com/edenhill/librdkafka/issues/1987#issuecomment-422008750
335
- # @see https://github.com/edenhill/librdkafka/wiki/Error-handling
336
- if time_poll.attempts > MAX_POLL_RETRIES || !time_poll.retryable?
334
+ early_report = false
335
+
336
+ # There are retryable issues on which we want to report fast as they are source of
337
+ # problems and can mean some bigger system instabilities
338
+ # Those are mainly network issues and exceeding the max poll interval
339
+ # We want to report early on max poll interval exceeding because it may mean that the
340
+ # underlying processing is taking too much time and it is not LRJ
341
+ case e.code
342
+ when :max_poll_exceeded # -147
343
+ early_report = true
344
+ when :network_exception # 13
345
+ early_report = true
346
+ when :transport # -195
347
+ early_report = true
348
+ end
349
+
350
+ retryable = time_poll.attempts <= MAX_POLL_RETRIES && time_poll.retryable?
351
+
352
+ if early_report || !retryable
337
353
  Karafka.monitor.instrument(
338
354
  'error.occurred',
339
355
  caller: self,
340
356
  error: e,
341
357
  type: 'connection.client.poll.error'
342
358
  )
343
-
344
- raise
345
359
  end
346
360
 
361
+ raise unless retryable
362
+
363
+ # Most of the errors can be safely ignored as librdkafka will recover from them
364
+ # @see https://github.com/edenhill/librdkafka/issues/1987#issuecomment-422008750
365
+ # @see https://github.com/edenhill/librdkafka/wiki/Error-handling
366
+
347
367
  time_poll.checkpoint
348
368
  time_poll.backoff
349
369
 
@@ -118,10 +118,6 @@ module Karafka
118
118
  build_and_schedule_consumption_jobs
119
119
 
120
120
  wait
121
-
122
- # We don't use the `#commit_offsets!` here for performance reasons. This can be achieved
123
- # if needed by using manual offset management.
124
- @client.commit_offsets
125
121
  end
126
122
 
127
123
  # If we are stopping we will no longer schedule any jobs despite polling.
@@ -14,6 +14,9 @@ module Karafka
14
14
  # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
15
  # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
16
  # from the rest of the code perspective as only those that are lost are truly revoked.
17
+ #
18
+ # @note For cooperative-sticky `#assigned_partitions` holds only the recently assigned
19
+ # partitions, not all the partitions that are owned
17
20
  class RebalanceManager
18
21
  # Empty array for internal usage not to create new objects
19
22
  EMPTY_ARRAY = [].freeze
@@ -0,0 +1,132 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Karafka
4
+ module Patches
5
+ module Rdkafka
6
+ # Binding patches that slightly change how rdkafka operates in certain places
7
+ module Bindings
8
+ include ::Rdkafka::Bindings
9
+
10
+ # Alias internally
11
+ RB = ::Rdkafka::Bindings
12
+
13
+ class << self
14
+ # Handle assignments on cooperative rebalance
15
+ #
16
+ # @param client_ptr [FFI::Pointer]
17
+ # @param code [Integer]
18
+ # @param partitions_ptr [FFI::Pointer]
19
+ def on_cooperative_rebalance(client_ptr, code, partitions_ptr)
20
+ case code
21
+ when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
22
+ RB.rd_kafka_incremental_assign(client_ptr, partitions_ptr)
23
+ when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
24
+ RB.rd_kafka_commit(client_ptr, nil, false)
25
+ RB.rd_kafka_incremental_unassign(client_ptr, partitions_ptr)
26
+ else
27
+ RB.rd_kafka_assign(client_ptr, FFI::Pointer::NULL)
28
+ end
29
+ end
30
+
31
+ # Handle assignments on a eager rebalance
32
+ #
33
+ # @param client_ptr [FFI::Pointer]
34
+ # @param code [Integer]
35
+ # @param partitions_ptr [FFI::Pointer]
36
+ def on_eager_rebalance(client_ptr, code, partitions_ptr)
37
+ case code
38
+ when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
39
+ RB.rd_kafka_assign(client_ptr, partitions_ptr)
40
+ when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
41
+ RB.rd_kafka_commit(client_ptr, nil, false)
42
+ RB.rd_kafka_assign(client_ptr, FFI::Pointer::NULL)
43
+ else
44
+ RB.rd_kafka_assign(client_ptr, FFI::Pointer::NULL)
45
+ end
46
+ end
47
+
48
+ # Trigger Karafka callbacks
49
+ #
50
+ # @param code [Integer]
51
+ # @param opaque [Rdkafka::Opaque]
52
+ # @param consumer [Rdkafka::Consumer]
53
+ # @param tpl [Rdkafka::Consumer::TopicPartitionList]
54
+ def trigger_callbacks(code, opaque, consumer, tpl)
55
+ case code
56
+ when RB::RD_KAFKA_RESP_ERR__ASSIGN_PARTITIONS
57
+ opaque.call_on_partitions_assigned(consumer, tpl)
58
+ when RB::RD_KAFKA_RESP_ERR__REVOKE_PARTITIONS
59
+ opaque.call_on_partitions_revoked(consumer, tpl)
60
+ end
61
+ rescue StandardError => e
62
+ Karafka.monitor.instrument(
63
+ 'error.occurred',
64
+ caller: self,
65
+ error: e,
66
+ type: 'connection.client.rebalance_callback.error'
67
+ )
68
+ end
69
+ end
70
+
71
+ # This patch changes few things:
72
+ # - it commits offsets (if any) upon partition revocation, so less jobs need to be
73
+ # reprocessed if they are assigned to a different process
74
+ # - reports callback errors into the errors instrumentation instead of the logger
75
+ # - catches only StandardError instead of Exception as we fully control the directly
76
+ # executed callbacks
77
+ #
78
+ # @see https://docs.confluent.io/2.0.0/clients/librdkafka/classRdKafka_1_1RebalanceCb.html
79
+ RebalanceCallback = FFI::Function.new(
80
+ :void, %i[pointer int pointer pointer]
81
+ ) do |client_ptr, code, partitions_ptr, opaque_ptr|
82
+ # Patch reference
83
+ pr = ::Karafka::Patches::Rdkafka::Bindings
84
+
85
+ if RB.rd_kafka_rebalance_protocol(client_ptr) == 'COOPERATIVE'
86
+ pr.on_cooperative_rebalance(client_ptr, code, partitions_ptr)
87
+ else
88
+ pr.on_eager_rebalance(client_ptr, code, partitions_ptr)
89
+ end
90
+
91
+ opaque = ::Rdkafka::Config.opaques[opaque_ptr.to_i]
92
+ return unless opaque
93
+
94
+ tpl = ::Rdkafka::Consumer::TopicPartitionList.from_native_tpl(partitions_ptr).freeze
95
+ consumer = ::Rdkafka::Consumer.new(client_ptr)
96
+
97
+ pr.trigger_callbacks(code, opaque, consumer, tpl)
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+
104
+ # We need to replace the original callback with ours.
105
+ # At the moment there is no API in rdkafka-ruby to do so
106
+ ::Rdkafka::Bindings.send(
107
+ :remove_const,
108
+ 'RebalanceCallback'
109
+ )
110
+
111
+ ::Rdkafka::Bindings.const_set(
112
+ 'RebalanceCallback',
113
+ Karafka::Patches::Rdkafka::Bindings::RebalanceCallback
114
+ )
115
+
116
+ ::Rdkafka::Bindings.attach_function(
117
+ :rd_kafka_rebalance_protocol,
118
+ %i[pointer],
119
+ :string
120
+ )
121
+
122
+ ::Rdkafka::Bindings.attach_function(
123
+ :rd_kafka_incremental_assign,
124
+ %i[pointer pointer],
125
+ :string
126
+ )
127
+
128
+ ::Rdkafka::Bindings.attach_function(
129
+ :rd_kafka_incremental_unassign,
130
+ %i[pointer pointer],
131
+ :string
132
+ )
@@ -21,7 +21,7 @@ module Karafka
21
21
  # We add it to make a multi-system development easier for people that don't use
22
22
  # kafka and don't understand the concept of consumer groups.
23
23
  def initialize(name)
24
- @name = name
24
+ @name = name.to_s
25
25
  @id = Karafka::App.config.consumer_mapper.call(name)
26
26
  @topics = Topics.new([])
27
27
  end
@@ -52,7 +52,7 @@ module Karafka
52
52
  def subscription_group=(name, &block)
53
53
  # We cast it here, so the routing supports symbol based but that's anyhow later on
54
54
  # validated as a string
55
- self.current_subscription_group_name = name.to_s
55
+ self.current_subscription_group_name = name
56
56
 
57
57
  Proxy.new(self, &block)
58
58
 
@@ -14,17 +14,22 @@ module Karafka
14
14
  instance_eval(&block)
15
15
  end
16
16
 
17
- # Translates the no "=" DSL of routing into elements assignments on target
18
- # @param method_name [Symbol] name of the missing method
19
- def method_missing(method_name, ...)
20
- return super unless respond_to_missing?(method_name)
17
+ # Ruby 2.7.0 to 2.7.2 do not have arg forwarding, so we fallback to the old way
18
+ arg_forwarding = RUBY_VERSION < '3.0' ? '*args, &block' : '...'
19
+
20
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
21
+ # Translates the no "=" DSL of routing into elements assignments on target
22
+ # @param method_name [Symbol] name of the missing method
23
+ def method_missing(method_name, #{arg_forwarding})
24
+ return super unless respond_to_missing?(method_name)
21
25
 
22
- if @target.respond_to?(:"#{method_name}=")
23
- @target.public_send(:"#{method_name}=", ...)
24
- else
25
- @target.public_send(method_name, ...)
26
+ if @target.respond_to?(:"\#{method_name}=")
27
+ @target.public_send(:"\#{method_name}=", #{arg_forwarding})
28
+ else
29
+ @target.public_send(method_name, #{arg_forwarding})
30
+ end
26
31
  end
27
- end
32
+ RUBY
28
33
 
29
34
  # Tells whether or not a given element exists on the target
30
35
  # @param method_name [Symbol] name of the missing method
@@ -10,6 +10,7 @@ module Karafka
10
10
  # @param message [Karafka::Messages::Message] Message object that we want to deserialize
11
11
  # @return [Hash] hash with deserialized JSON data
12
12
  def call(message)
13
+ # nil payload can be present for example for tombstone messages
13
14
  message.raw_payload.nil? ? nil : ::JSON.parse(message.raw_payload)
14
15
  end
15
16
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.10'
6
+ VERSION = '2.0.12'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.10
4
+ version: 2.0.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2022-09-23 00:00:00.000000000 Z
38
+ date: 2022-10-06 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -212,6 +212,7 @@ files:
212
212
  - lib/karafka/messages/messages.rb
213
213
  - lib/karafka/messages/metadata.rb
214
214
  - lib/karafka/messages/seek.rb
215
+ - lib/karafka/patches/rdkafka/bindings.rb
215
216
  - lib/karafka/patches/rdkafka/consumer.rb
216
217
  - lib/karafka/pro.rb
217
218
  - lib/karafka/pro/active_job/consumer.rb
metadata.gz.sig CHANGED
Binary file