karafka 2.1.7 → 2.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f405521c7a6706cc95e764a4740e7570935f7595d34481bbe33fb617e5537978
4
- data.tar.gz: cd6671c441c07e31050bbddab290ba4d31e4a580a646cfd965edf58c19ff150c
3
+ metadata.gz: 65296040c91ec5646620f047567c9229b7d9044d41d37e690f9f6685d1199e36
4
+ data.tar.gz: dedea921e5bc3b9b985861d9b6bcf6c5b5de1a148ef2c1aa72b0d96de44fb623
5
5
  SHA512:
6
- metadata.gz: 7b5e343a0d2c6e1f885c6eac6509de2f411b54e1a30ce12fac6fa18bb813d82ef666444345b92d8348ac4955cdabfc47ad3658312482f6c500ca169814f10517
7
- data.tar.gz: 1b0c319f85dde3bc20b21a842da220d513351b436b3e4de08d56e69a02c36c7c2cd4187c879596ffc73f5dffc2cc3f032c6a8cdbd958ce34138866d27aa00b2b
6
+ metadata.gz: 781e45af707a9818e9166d6c37d8be214e9334703ee598f4ce8924c99ec71283cc4364de6662fbd1bb7454600b94b71e2e32750b58630e3631235e5bf593c3f9
7
+ data.tar.gz: b86b74c4d7909d504e0b60ae536360412622451a7a9868034dca63fbc3c5e962ba122967aeb8a1491196ce6b879b25eef11497798d861fcf85d3acca40fc8163
checksums.yaml.gz.sig CHANGED
@@ -1,2 +1,2 @@
1
- 9�$��<�8��5��O��/ �<�_�:)*τޜ��|�!�s�}���6�5�[�B_��E���s��;k���'Zp��'Ŕ������${�j�.P������/}0�`T��+��V�>Ez1��
2
-
1
+ l`��-Z��W`p��\T�ӓ�v>��yz��3���?�
2
+ �Đ���%�MY��e��A;�ꦞ�������,��;0s:���&_uC��KE+7f��ls\�k��F�;m�L
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.1.9 (2023-08-06)
4
+ - **[Feature]** Introduce ability to customize pause strategy on a per topic basis (Pro).
5
+ - [Improvement] Disable the extensive messages logging in the default `karafka.rb` template.
6
+ - [Change] Require `waterdrop` `>= 2.6.6` due to extra `LoggerListener` API.
7
+
8
+ ## 2.1.8 (2023-07-29)
9
+ - [Improvement] Introduce `Karafka::BaseConsumer#used?` method to indicate, that at least one invocation of `#consume` took or will take place. This can be used as a replacement to the non-direct `messages.count` check for shutdown and revocation to ensure, that the consumption took place or is taking place (in case of running LRJ).
10
+ - [Improvement] Make `messages#to_a` return copy of the underlying array to prevent scenarios, where the mutation impacts offset management.
11
+ - [Improvement] Mitigate a librdkafka `cooperative-sticky` rebalance crash issue.
12
+ - [Improvement] Provide ability to overwrite `consumer_persistence` per subscribed topic. This is mostly useful for plugins and extensions developers.
13
+ - [Fix] Fix a case where the performance tracker would crash in case of mutation of messages to an empty state.
14
+
3
15
  ## 2.1.7 (2023-07-22)
4
16
  - [Improvement] Always query for watermarks in the Iterator to improve the initial response time.
5
17
  - [Improvement] Add `max_wait_time` option to the Iterator.
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.1.7)
4
+ karafka (2.1.9)
5
5
  karafka-core (>= 2.1.1, < 2.2.0)
6
6
  thor (>= 0.20)
7
- waterdrop (>= 2.6.2, < 3.0.0)
7
+ waterdrop (>= 2.6.6, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
@@ -37,13 +37,13 @@ GEM
37
37
  ffi (~> 1.15)
38
38
  mini_portile2 (~> 2.6)
39
39
  rake (> 12)
40
- karafka-web (0.6.1)
40
+ karafka-web (0.6.3)
41
41
  erubi (~> 1.4)
42
42
  karafka (>= 2.1.4, < 3.0.0)
43
43
  karafka-core (>= 2.0.13, < 3.0.0)
44
44
  roda (~> 3.68, >= 3.68)
45
45
  tilt (~> 2.0)
46
- mini_portile2 (2.8.2)
46
+ mini_portile2 (2.8.4)
47
47
  minitest (5.18.1)
48
48
  rack (3.0.8)
49
49
  rake (13.0.6)
@@ -72,7 +72,7 @@ GEM
72
72
  tilt (2.2.0)
73
73
  tzinfo (2.0.6)
74
74
  concurrent-ruby (~> 1.0)
75
- waterdrop (2.6.4)
75
+ waterdrop (2.6.6)
76
76
  karafka-core (>= 2.1.1, < 3.0.0)
77
77
  zeitwerk (~> 2.3)
78
78
  zeitwerk (2.6.8)
@@ -23,6 +23,11 @@ en:
23
23
  delaying.delay_format: 'needs to be equal or more than 0 and an integer'
24
24
  delaying.active_format: 'needs to be boolean'
25
25
 
26
+ pause_timeout_format: needs to be an integer bigger than 0
27
+ pause_max_timeout_format: needs to be an integer bigger than 0
28
+ pause_with_exponential_backoff_format: needs to be either true or false
29
+ pause_timeout_max_timeout_vs_pause_max_timeout: pause_timeout must be less or equal to pause_max_timeout
30
+
26
31
  config:
27
32
  encryption.active_format: 'needs to be either true or false'
28
33
  encryption.public_key_invalid: 'is not a valid public RSA key'
data/karafka.gemspec CHANGED
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_dependency 'karafka-core', '>= 2.1.1', '< 2.2.0'
25
25
  spec.add_dependency 'thor', '>= 0.20'
26
- spec.add_dependency 'waterdrop', '>= 2.6.2', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.6.6', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  if $PROGRAM_NAME.end_with?('gem')
@@ -25,6 +25,7 @@ module Karafka
25
25
  # Creates new consumer and assigns it an id
26
26
  def initialize
27
27
  @id = SecureRandom.hex(6)
28
+ @used = false
28
29
  end
29
30
 
30
31
  # Can be used to run preparation code prior to the job being enqueued
@@ -34,6 +35,7 @@ module Karafka
34
35
  # not as a part of the public api. This should not perform any extensive operations as it is
35
36
  # blocking and running in the listener thread.
36
37
  def on_before_enqueue
38
+ @used = true
37
39
  handle_before_enqueue
38
40
  rescue StandardError => e
39
41
  Karafka.monitor.instrument(
@@ -160,6 +162,14 @@ module Karafka
160
162
  # some teardown procedures (closing file handler, etc).
161
163
  def shutdown; end
162
164
 
165
+ # @return [Boolean] was this consumer in active use. Active use means running `#consume` at
166
+ # least once. Consumer may have to run `#revoked` or `#shutdown` despite not running
167
+ # `#consume` previously in delayed job cases and other cases that potentially involve running
168
+ # the `Jobs::Idle` for house-keeping
169
+ def used?
170
+ @used
171
+ end
172
+
163
173
  # Pauses processing on a given offset for the current topic partition
164
174
  #
165
175
  # After given partition is resumed, it will continue processing from the given offset
@@ -23,11 +23,17 @@ module Karafka
23
23
  # Max time for a TPL request. We increase it to compensate for remote clusters latency
24
24
  TPL_REQUEST_TIMEOUT = 2_000
25
25
 
26
+ # 1 minute of max wait for the first rebalance before a forceful attempt
27
+ # This applies only to a case when a short-lived Karafka instance with a client would be
28
+ # closed before first rebalance. Mitigates a librdkafka bug.
29
+ COOPERATIVE_STICKY_MAX_WAIT = 60_000
30
+
26
31
  # We want to make sure we never close several clients in the same moment to prevent
27
32
  # potential race conditions and other issues
28
33
  SHUTDOWN_MUTEX = Mutex.new
29
34
 
30
- private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT
35
+ private_constant :MAX_POLL_RETRIES, :SHUTDOWN_MUTEX, :TPL_REQUEST_TIMEOUT,
36
+ :COOPERATIVE_STICKY_MAX_WAIT
31
37
 
32
38
  # Creates a new consumer instance.
33
39
  #
@@ -226,6 +232,22 @@ module Karafka
226
232
  # as until all the consumers are stopped, the server will keep running serving only
227
233
  # part of the messages
228
234
  def stop
235
+ # This ensures, that we do not stop the underlying client until it passes the first
236
+ # rebalance for cooperative-sticky. Otherwise librdkafka may crash
237
+ #
238
+ # We set a timeout just in case the rebalance would never happen or would last for an
239
+ # extensive time period.
240
+ #
241
+ # @see https://github.com/confluentinc/librdkafka/issues/4312
242
+ if @subscription_group.kafka[:'partition.assignment.strategy'] == 'cooperative-sticky'
243
+ (COOPERATIVE_STICKY_MAX_WAIT / 100).times do
244
+ # If we're past the first rebalance, no need to wait
245
+ break if @rebalance_manager.active?
246
+
247
+ sleep(0.1)
248
+ end
249
+ end
250
+
229
251
  close
230
252
  end
231
253
 
@@ -192,7 +192,7 @@ module Karafka
192
192
  # Resumes processing of partitions that were paused due to an error.
193
193
  def resume_paused_partitions
194
194
  @coordinators.resume do |topic, partition|
195
- @client.resume(topic, partition)
195
+ @client.resume(topic.name, partition)
196
196
  end
197
197
  end
198
198
 
@@ -14,20 +14,20 @@ module Karafka
14
14
 
15
15
  # Creates or fetches pause tracker of a given topic partition.
16
16
  #
17
- # @param topic [String] topic name
17
+ # @param topic [::Karafka::Routing::Topic] topic
18
18
  # @param partition [Integer] partition number
19
19
  # @return [Karafka::TimeTrackers::Pause] pause tracker instance
20
20
  def fetch(topic, partition)
21
21
  @pauses[topic][partition] ||= TimeTrackers::Pause.new(
22
- timeout: Karafka::App.config.pause_timeout,
23
- max_timeout: Karafka::App.config.pause_max_timeout,
24
- exponential_backoff: Karafka::App.config.pause_with_exponential_backoff
22
+ timeout: topic.pause_timeout,
23
+ max_timeout: topic.pause_max_timeout,
24
+ exponential_backoff: topic.pause_with_exponential_backoff
25
25
  )
26
26
  end
27
27
 
28
28
  # Resumes processing of partitions for which pause time has ended.
29
29
  #
30
- # @yieldparam [String] topic name
30
+ # @yieldparam [Karafka::Routing::Topic] topic
31
31
  # @yieldparam [Integer] partition number
32
32
  def resume
33
33
  @pauses.each do |topic, partitions|
@@ -30,6 +30,7 @@ module Karafka
30
30
  @assigned_partitions = {}
31
31
  @revoked_partitions = {}
32
32
  @changed = false
33
+ @active = false
33
34
  end
34
35
 
35
36
  # Resets the rebalance manager state
@@ -46,11 +47,20 @@ module Karafka
46
47
  @changed
47
48
  end
48
49
 
50
+ # @return [Boolean] true if there was at least one rebalance
51
+ # @note This method is needed to make sure that when using cooperative-sticky, we do not
52
+ # close until first rebalance. Otherwise librdkafka may crash.
53
+ # @see https://github.com/confluentinc/librdkafka/issues/4312
54
+ def active?
55
+ @active
56
+ end
57
+
49
58
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
50
59
  #
51
60
  # @private
52
61
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
53
62
  def on_partitions_assigned(partitions)
63
+ @active = true
54
64
  @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
55
65
  @changed = true
56
66
  end
@@ -60,6 +70,7 @@ module Karafka
60
70
  # @private
61
71
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
62
72
  def on_partitions_revoked(partitions)
73
+ @active = true
63
74
  @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
64
75
  @changed = true
65
76
  end
@@ -60,10 +60,12 @@ module Karafka
60
60
  @messages_array.size
61
61
  end
62
62
 
63
- # @return [Array<Karafka::Messages::Message>] pure array with messages
63
+ # @return [Array<Karafka::Messages::Message>] copy of the pure array with messages
64
64
  def to_a
65
- @messages_array
65
+ @messages_array.dup
66
66
  end
67
+
68
+ alias count size
67
69
  end
68
70
  end
69
71
  end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class Pausing < Base
19
+ # Contract to make sure, that the pause settings on a per topic basis are as expected
20
+ class Contract < Contracts::Base
21
+ configure do |config|
22
+ config.error_messages = YAML.safe_load(
23
+ File.read(
24
+ File.join(Karafka.gem_root, 'config', 'locales', 'pro_errors.yml')
25
+ )
26
+ ).fetch('en').fetch('validations').fetch('topic')
27
+
28
+ required(:pause_timeout) { |val| val.is_a?(Integer) && val.positive? }
29
+ required(:pause_max_timeout) { |val| val.is_a?(Integer) && val.positive? }
30
+ required(:pause_with_exponential_backoff) { |val| [true, false].include?(val) }
31
+
32
+ virtual do |data, errors|
33
+ next unless errors.empty?
34
+
35
+ pause_timeout = data.fetch(:pause_timeout)
36
+ pause_max_timeout = data.fetch(:pause_max_timeout)
37
+
38
+ next if pause_timeout <= pause_max_timeout
39
+
40
+ [[%i[pause_timeout], :max_timeout_vs_pause_max_timeout]]
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ class Pausing < Base
19
+ # Expansion allowing for a per topic pause strategy definitions
20
+ module Topic
21
+ # Allows for per-topic pausing strategy setting
22
+ #
23
+ # @param timeout [Integer] how long should we wait upon processing error (milliseconds)
24
+ # @param max_timeout [Integer] what is the max timeout in case of an exponential
25
+ # backoff (milliseconds)
26
+ # @param with_exponential_backoff [Boolean] should we use exponential backoff
27
+ #
28
+ # @note We do not construct here the nested config like we do with other routing
29
+ # features, because this feature operates on the OSS layer by injection of values
30
+ # and a nested config is not needed.
31
+ def pause(timeout: nil, max_timeout: nil, with_exponential_backoff: nil)
32
+ self.pause_timeout = timeout if timeout
33
+ self.pause_max_timeout = max_timeout if max_timeout
34
+
35
+ return unless with_exponential_backoff
36
+
37
+ self.pause_with_exponential_backoff = with_exponential_backoff
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This Karafka component is a Pro component under a commercial license.
4
+ # This Karafka component is NOT licensed under LGPL.
5
+ #
6
+ # All of the commercial components are present in the lib/karafka/pro directory of this
7
+ # repository and their usage requires commercial license agreement.
8
+ #
9
+ # Karafka has also commercial-friendly license, commercial support and commercial components.
10
+ #
11
+ # By sending a pull request to the pro components, you are agreeing to transfer the copyright of
12
+ # your code to Maciej Mensfeld.
13
+
14
+ module Karafka
15
+ module Pro
16
+ module Routing
17
+ module Features
18
+ # Feature allowing for a per-route reconfiguration of the pausing strategy
19
+ # It can be useful when different topics should have different backoff policies
20
+ class Pausing < Base
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -17,14 +17,18 @@ module Karafka
17
17
  @topics = topics
18
18
  end
19
19
 
20
- # @param topic [String] topic name
20
+ # @param topic_name [String] topic name
21
21
  # @param partition [Integer] partition number
22
- def find_or_create(topic, partition)
23
- @coordinators[topic][partition] ||= @coordinator_class.new(
24
- @topics.find(topic),
25
- partition,
26
- @pauses_manager.fetch(topic, partition)
27
- )
22
+ def find_or_create(topic_name, partition)
23
+ @coordinators[topic_name][partition] ||= begin
24
+ routing_topic = @topics.find(topic_name)
25
+
26
+ @coordinator_class.new(
27
+ routing_topic,
28
+ partition,
29
+ @pauses_manager.fetch(routing_topic, partition)
30
+ )
31
+ end
28
32
  end
29
33
 
30
34
  # Resumes processing of partitions for which pause time has ended.
@@ -35,16 +39,16 @@ module Karafka
35
39
  @pauses_manager.resume(&block)
36
40
  end
37
41
 
38
- # @param topic [String] topic name
42
+ # @param topic_name [String] topic name
39
43
  # @param partition [Integer] partition number
40
- def revoke(topic, partition)
41
- return unless @coordinators[topic].key?(partition)
44
+ def revoke(topic_name, partition)
45
+ return unless @coordinators[topic_name].key?(partition)
42
46
 
43
47
  # The fact that we delete here does not change the fact that the executor still holds the
44
48
  # reference to this coordinator. We delete it here, as we will no longer process any
45
49
  # new stuff with it and we may need a new coordinator if we regain this partition, but the
46
50
  # coordinator may still be in use
47
- @coordinators[topic].delete(partition).revoke
51
+ @coordinators[topic_name].delete(partition).revoke
48
52
  end
49
53
 
50
54
  # Clears coordinators and re-created the pauses manager
@@ -48,7 +48,7 @@ module Karafka
48
48
  # We reload the consumers with each batch instead of relying on some external signals
49
49
  # when needed for consistency. That way devs may have it on or off and not in this
50
50
  # middle state, where re-creation of a consumer instance would occur only sometimes
51
- @consumer = nil unless ::Karafka::App.config.consumer_persistence
51
+ @consumer = nil unless topic.consumer_persistence
52
52
 
53
53
  # First we build messages batch...
54
54
  consumer.messages = Messages::Builders::Messages.call(
@@ -17,6 +17,10 @@ module Karafka
17
17
  max_messages
18
18
  max_wait_time
19
19
  initial_offset
20
+ consumer_persistence
21
+ pause_timeout
22
+ pause_max_timeout
23
+ pause_with_exponential_backoff
20
24
  ].freeze
21
25
 
22
26
  private_constant :INHERITABLE_ATTRIBUTES
@@ -50,7 +54,7 @@ module Karafka
50
54
 
51
55
  # @return [Class] consumer class that we should use
52
56
  def consumer
53
- if Karafka::App.config.consumer_persistence
57
+ if consumer_persistence
54
58
  # When persistence of consumers is on, no need to reload them
55
59
  @consumer
56
60
  else
@@ -43,7 +43,13 @@ class KarafkaApp < Karafka::App
43
43
  # This logger prints the producer development info using the Karafka logger.
44
44
  # It is similar to the consumer logger listener but producer oriented.
45
45
  Karafka.producer.monitor.subscribe(
46
- WaterDrop::Instrumentation::LoggerListener.new(Karafka.logger)
46
+ WaterDrop::Instrumentation::LoggerListener.new(
47
+ # Log producer operations using the Karafka logger
48
+ Karafka.logger,
49
+ # If you set this to true, logs will contain each message details
50
+ # Please note, that this can be extensive
51
+ log_messages: false
52
+ )
47
53
  )
48
54
 
49
55
  routes.draw do
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.1.7'
6
+ VERSION = '2.1.9'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.7
4
+ version: 2.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2023-07-22 00:00:00.000000000 Z
38
+ date: 2023-08-06 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.6.2
80
+ version: 2.6.6
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.6.2
90
+ version: 2.6.6
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -314,6 +314,9 @@ files:
314
314
  - lib/karafka/pro/routing/features/long_running_job/config.rb
315
315
  - lib/karafka/pro/routing/features/long_running_job/contract.rb
316
316
  - lib/karafka/pro/routing/features/long_running_job/topic.rb
317
+ - lib/karafka/pro/routing/features/pausing.rb
318
+ - lib/karafka/pro/routing/features/pausing/contract.rb
319
+ - lib/karafka/pro/routing/features/pausing/topic.rb
317
320
  - lib/karafka/pro/routing/features/throttling.rb
318
321
  - lib/karafka/pro/routing/features/throttling/config.rb
319
322
  - lib/karafka/pro/routing/features/throttling/contract.rb
metadata.gz.sig CHANGED
Binary file