karafka 2.3.0.alpha1 → 2.3.0.alpha2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de7ea23762cefa19d5f3620e92a39f0030cd8ff78f318c92f30c494d79b78163
4
- data.tar.gz: 775cfbd40d181036004dcf72dbcb84394dc8367bed0f6d69812f2324dc179d6f
3
+ metadata.gz: 07bebe70b6697a90d04154dcbfe5837f5bcfaf934073f91bbfc9e8939d9d1a6c
4
+ data.tar.gz: 13e41b276eee5142b55eb6908c8b9292bd8f802f470fb4e9bdc1f812dbd50189
5
5
  SHA512:
6
- metadata.gz: d68a4122a35afad517e4280b94f6f3d7cb3cab94fb37c11729e5e5c7a7aca082a7a272a52ff09a86e2f55ad0e078e234c88be79ce3b730527a2f6e7629ef259c
7
- data.tar.gz: aa2ddb108cc39caa8ad5c95a86d07006b5be374647e703414a7761ffd5c333010d7e53b9fd2c42216780e35f00639d8cf80126c291a59d0585424077840cc6b5
6
+ metadata.gz: fef2cbded4409d951cf7e752f25b7db016052cc5f2a77c54ccf7e8778fbf44edba74cd1b04cf82caf341cf05beda552af5c4cbd994a510c54ad1d3c4e561fd17
7
+ data.tar.gz: 00bf893d7c6f29e559530585c40ba721ab3a9f3ce5906cd7c5b6948f79725d6d99423444d9ec7e6f8593f9780aec3375e400986ed8478dfab2cbbabb5807a85d
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Karafka framework changelog
2
2
 
3
3
  ## 2.3.0 (Unreleased)
4
+ - **[Feature]** Introduce Exactly-Once Semantics within consumers `#transaction` block (Pro)
4
5
  - **[Feature]** Provide ability to multiplex subscription groups (Pro)
5
6
  - **[Feature]** Provide `Karafka::Admin::Acl` for Kafka ACL management via the Admin APIs.
6
7
  - **[Feature]** Periodic Jobs (Pro)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.3.0.alpha1)
4
+ karafka (2.3.0.alpha2)
5
5
  karafka-core (>= 2.3.0.alpha1, < 2.4.0)
6
6
  waterdrop (>= 2.6.12, < 3.0.0)
7
7
  zeitwerk (~> 2.3)
@@ -9,10 +9,10 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activejob (7.1.2)
13
- activesupport (= 7.1.2)
12
+ activejob (7.1.3)
13
+ activesupport (= 7.1.3)
14
14
  globalid (>= 0.3.6)
15
- activesupport (7.1.2)
15
+ activesupport (7.1.3)
16
16
  base64
17
17
  bigdecimal
18
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -25,7 +25,7 @@ GEM
25
25
  base64 (0.2.0)
26
26
  bigdecimal (3.1.5)
27
27
  byebug (11.1.3)
28
- concurrent-ruby (1.2.2)
28
+ concurrent-ruby (1.2.3)
29
29
  connection_pool (2.4.1)
30
30
  diff-lcs (1.5.0)
31
31
  docile (1.4.0)
data/bin/integrations CHANGED
@@ -28,7 +28,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
28
28
  CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
29
29
 
30
30
  # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
31
- MAX_BUFFER_OUTPUT = 51_200
31
+ MAX_BUFFER_OUTPUT = 102_400
32
32
 
33
33
  # Abstraction around a single test scenario execution process
34
34
  class Scenario
@@ -76,5 +76,9 @@ module Karafka
76
76
 
77
77
  # Raised when we want to un-pause listener that was not paused
78
78
  InvalidListenerPauseError = Class.new(BaseError)
79
+
80
+ # Raised in transactions when we attempt to store offset for a partition that we have lost
81
+ # This does not affect producer only transactions, hence we raise it only on offset storage
82
+ AssignmentLostError = Class.new(BaseError)
79
83
  end
80
84
  end
@@ -24,18 +24,9 @@ module Karafka
24
24
  # @note Manager operations relate to consumer groups and not subscription groups. Since
25
25
  # cluster operations can cause consumer group wide effects, we always apply only one
26
26
  # change on a consumer group.
27
- #
28
- # @note Since we collect statistical data from listeners and this happens in a background
29
- # thread, we need to make sure we lock not to have race conditions with expired data
30
- # eviction.
31
27
  class Manager < Karafka::Connection::Manager
32
28
  include Core::Helpers::Time
33
29
 
34
- # How long should we keep stale stats before evicting them completely
35
- EVICTION_DELAY = 5 * 60 * 1_000
36
-
37
- private_constant :EVICTION_DELAY
38
-
39
30
  # How long should we wait after a rebalance before doing anything on a consumer group
40
31
  #
41
32
  # @param scale_delay [Integer] How long should we wait before making any changes. Any
@@ -50,7 +41,6 @@ module Karafka
50
41
  state: '',
51
42
  join_state: '',
52
43
  state_age: 0,
53
- state_age_sync: monotonic_now,
54
44
  changed_at: monotonic_now
55
45
  }
56
46
  end
@@ -65,6 +55,9 @@ module Karafka
65
55
  def register(listeners)
66
56
  @listeners = listeners
67
57
 
58
+ # Preload all the keys into the hash so we never add keys to changes but just change them
59
+ listeners.each { |listener| @changes[listener.subscription_group.id] }
60
+
68
61
  in_sg_families do |first_subscription_group, sg_listeners|
69
62
  multiplexing = first_subscription_group.multiplexing
70
63
 
@@ -86,25 +79,22 @@ module Karafka
86
79
  # @note Please note that while we collect here per subscription group, we use those metrics
87
80
  # collectively on a whole consumer group. This reduces the friction.
88
81
  def notice(subscription_group_id, statistics)
89
- @mutex.synchronize do
90
- times = []
91
- # stateage is in microseconds
92
- # We monitor broker changes to make sure we do not introduce extra friction
93
- times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
94
- times << statistics['cgrp']['rebalance_age']
95
- times << statistics['cgrp']['stateage']
96
-
97
- # Keep the previous change age for changes that were triggered by us
98
- previous_changed_at = @changes[subscription_group_id][:changed_at]
99
-
100
- @changes[subscription_group_id] = {
101
- state_age: times.min,
102
- changed_at: previous_changed_at,
103
- join_state: statistics['cgrp']['join_state'],
104
- state: statistics['cgrp']['state'],
105
- state_age_sync: monotonic_now
106
- }
107
- end
82
+ times = []
83
+ # stateage is in microseconds
84
+ # We monitor broker changes to make sure we do not introduce extra friction
85
+ times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
86
+ times << statistics['cgrp']['rebalance_age']
87
+ times << statistics['cgrp']['stateage']
88
+
89
+ # Keep the previous change age for changes that were triggered by us
90
+ previous_changed_at = @changes[subscription_group_id][:changed_at]
91
+
92
+ @changes[subscription_group_id].merge!(
93
+ state_age: times.min,
94
+ changed_at: previous_changed_at,
95
+ join_state: statistics['cgrp']['join_state'],
96
+ state: statistics['cgrp']['state']
97
+ )
108
98
  end
109
99
 
110
100
  # Shuts down all the listeners when it is time (including moving to quiet) or rescales
@@ -158,8 +148,6 @@ module Karafka
158
148
  #
159
149
  # We always run scaling down and up because it may be applicable to different CGs
160
150
  def rescale
161
- evict
162
-
163
151
  scale_down
164
152
  scale_up
165
153
  end
@@ -232,23 +220,11 @@ module Karafka
232
220
  end
233
221
  end
234
222
 
235
- # Removes states that are no longer being reported for stopped/pending listeners
236
- def evict
237
- @mutex.synchronize do
238
- @changes.delete_if do |_, details|
239
- monotonic_now - details[:state_age_sync] >= EVICTION_DELAY
240
- end
241
- end
242
- end
243
-
244
223
  # Indicates, that something has changed on a subscription group. We consider every single
245
224
  # change we make as a change to the setup as well.
246
225
  # @param subscription_group_id [String]
247
226
  def touch(subscription_group_id)
248
- @mutex.synchronize do
249
- @changes[subscription_group_id][:changed_at] = 0
250
- @changes[subscription_group_id][:state_age_sync] = monotonic_now
251
- end
227
+ @changes[subscription_group_id][:changed_at] = 0
252
228
  end
253
229
 
254
230
  # @param sg_listeners [Array<Listener>] listeners from one multiplexed sg
@@ -257,17 +233,10 @@ module Karafka
257
233
  # are also stable. This is a strong indicator that no rebalances or other operations are
258
234
  # happening at a given moment.
259
235
  def stable?(sg_listeners)
260
- # If none of listeners has changes reported it means we did not yet start collecting
261
- # metrics about any of them and at least one must be present. We do not consider it
262
- # stable in such case as we still are waiting for metrics.
263
- return false if sg_listeners.none? do |sg_listener|
264
- @changes.key?(sg_listener.subscription_group.id)
265
- end
266
-
267
236
  sg_listeners.all? do |sg_listener|
268
- # Not all SGs may be started initially or may be stopped, we ignore them here as they
269
- # are irrelevant from the point of view of establishing stability
270
- next true unless @changes.key?(sg_listener.subscription_group.id)
237
+ # If a listener is not active, we do not take it into consideration when looking at
238
+ # the stability data
239
+ next true unless sg_listener.active?
271
240
 
272
241
  state = @changes[sg_listener.subscription_group.id]
273
242
 
@@ -27,6 +27,21 @@ module Karafka
27
27
  # Apply strategy for a non-feature based flow
28
28
  FEATURES = %i[].freeze
29
29
 
30
+ # Allows to set offset metadata that will be used with the upcoming marking as consumed
31
+ # as long as a different offset metadata was not used. After it was used either via
32
+ # `#mark_as_consumed` or `#mark_as_consumed!` it will be set back to `nil`. It is done
33
+ # that way to provide the end user with ability to influence metadata on the non-user
34
+ # initiated markings in complex flows.
35
+ #
36
+ # @param offset_metadata [String, nil] metadata we want to store with the upcoming
37
+ # marking as consumed
38
+ #
39
+ # @note Please be aware, that offset metadata set this way will be passed to any marking
40
+ # as consumed even if it was not user initiated. For example in the DLQ flow.
41
+ def store_offset_metadata(offset_metadata)
42
+ @_current_offset_metadata = offset_metadata
43
+ end
44
+
30
45
  # Marks message as consumed in an async way.
31
46
  #
32
47
  # @param message [Messages::Message] last successfully processed message.
@@ -38,7 +53,7 @@ module Karafka
38
53
  # processing another message. In case like this we do not pause on the message we've
39
54
  # already processed but rather at the next one. This applies to both sync and async
40
55
  # versions of this method.
41
- def mark_as_consumed(message, offset_metadata = nil)
56
+ def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
42
57
  if @_in_transaction
43
58
  mark_in_transaction(message, offset_metadata, true)
44
59
  else
@@ -54,6 +69,8 @@ module Karafka
54
69
  end
55
70
 
56
71
  true
72
+ ensure
73
+ @_current_offset_metadata = nil
57
74
  end
58
75
 
59
76
  # Marks message as consumed in a sync way.
@@ -62,7 +79,7 @@ module Karafka
62
79
  # @param offset_metadata [String, nil] offset metadata string or nil if nothing
63
80
  # @return [Boolean] true if we were able to mark the offset, false otherwise.
64
81
  # False indicates that we were not able and that we have lost the partition.
65
- def mark_as_consumed!(message, offset_metadata = nil)
82
+ def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
66
83
  if @_in_transaction
67
84
  mark_in_transaction(message, offset_metadata, false)
68
85
  else
@@ -79,6 +96,8 @@ module Karafka
79
96
  end
80
97
 
81
98
  true
99
+ ensure
100
+ @_current_offset_metadata = nil
82
101
  end
83
102
 
84
103
  # Starts producer transaction, saves the transaction context for transactional marking
@@ -134,6 +153,7 @@ module Karafka
134
153
  # transaction state synchronization usage as within transaction it is always sync)
135
154
  def mark_in_transaction(message, offset_metadata, async)
136
155
  raise Errors::TransactionRequiredError unless @_in_transaction
156
+ raise Errors::AssignmentLostError if revoked?
137
157
 
138
158
  producer.transaction_mark_as_consumed(
139
159
  client,
@@ -35,7 +35,7 @@ module Karafka
35
35
  # @see `Strategies::Default#mark_as_consumed` for more details
36
36
  # @param message [Messages::Message]
37
37
  # @param offset_metadata [String, nil]
38
- def mark_as_consumed(message, offset_metadata = nil)
38
+ def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
39
39
  return super unless retrying?
40
40
  return super unless topic.dead_letter_queue.independent?
41
41
  return false unless super
@@ -43,6 +43,8 @@ module Karafka
43
43
  coordinator.pause_tracker.reset
44
44
 
45
45
  true
46
+ ensure
47
+ @_current_offset_metadata = nil
46
48
  end
47
49
 
48
50
  # Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
@@ -51,7 +53,7 @@ module Karafka
51
53
  # @see `Strategies::Default#mark_as_consumed!` for more details
52
54
  # @param message [Messages::Message]
53
55
  # @param offset_metadata [String, nil]
54
- def mark_as_consumed!(message, offset_metadata = nil)
56
+ def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
55
57
  return super unless retrying?
56
58
  return super unless topic.dead_letter_queue.independent?
57
59
  return false unless super
@@ -59,6 +61,8 @@ module Karafka
59
61
  coordinator.pause_tracker.reset
60
62
 
61
63
  true
64
+ ensure
65
+ @_current_offset_metadata = nil
62
66
  end
63
67
 
64
68
  # When we encounter non-recoverable message, we skip it and go on with our lives
@@ -33,7 +33,7 @@ module Karafka
33
33
  # @note This virtual offset management uses a regular default marking API underneath.
34
34
  # We do not alter the "real" marking API, as VPs are just one of many cases we want
35
35
  # to support and we do not want to impact them with collective offsets management
36
- def mark_as_consumed(message, offset_metadata = nil)
36
+ def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
37
37
  if @_in_transaction && !collapsed?
38
38
  mark_in_transaction(message, offset_metadata, true)
39
39
  elsif collapsed?
@@ -55,11 +55,13 @@ module Karafka
55
55
  manager.markable? ? super(*manager.markable) : revoked?
56
56
  end
57
57
  end
58
+ ensure
59
+ @_current_offset_metadata = nil
58
60
  end
59
61
 
60
62
  # @param message [Karafka::Messages::Message] blocking marks message as consumed
61
63
  # @param offset_metadata [String, nil]
62
- def mark_as_consumed!(message, offset_metadata = nil)
64
+ def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
63
65
  if @_in_transaction && !collapsed?
64
66
  mark_in_transaction(message, offset_metadata, false)
65
67
  elsif collapsed?
@@ -73,6 +75,8 @@ module Karafka
73
75
  manager.markable? ? super(*manager.markable) : revoked?
74
76
  end
75
77
  end
78
+ ensure
79
+ @_current_offset_metadata = nil
76
80
  end
77
81
 
78
82
  # Stores the next offset for processing inside of the transaction when collapsed and
@@ -93,6 +97,8 @@ module Karafka
93
97
  # transaction state synchronization usage as within transaction it is always sync)
94
98
  def mark_in_transaction(message, offset_metadata, async)
95
99
  raise Errors::TransactionRequiredError unless @_in_transaction
100
+ # Prevent from attempts of offset storage when we no longer own the assignment
101
+ raise Errors::AssignmentLostError if revoked?
96
102
 
97
103
  return super if collapsed?
98
104
 
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.3.0.alpha1'
6
+ VERSION = '2.3.0.alpha2'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0.alpha1
4
+ version: 2.3.0.alpha2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-01-15 00:00:00.000000000 Z
38
+ date: 2024-01-17 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
metadata.gz.sig CHANGED
Binary file