karafka 2.3.0.alpha1 → 2.3.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: de7ea23762cefa19d5f3620e92a39f0030cd8ff78f318c92f30c494d79b78163
4
- data.tar.gz: 775cfbd40d181036004dcf72dbcb84394dc8367bed0f6d69812f2324dc179d6f
3
+ metadata.gz: 07bebe70b6697a90d04154dcbfe5837f5bcfaf934073f91bbfc9e8939d9d1a6c
4
+ data.tar.gz: 13e41b276eee5142b55eb6908c8b9292bd8f802f470fb4e9bdc1f812dbd50189
5
5
  SHA512:
6
- metadata.gz: d68a4122a35afad517e4280b94f6f3d7cb3cab94fb37c11729e5e5c7a7aca082a7a272a52ff09a86e2f55ad0e078e234c88be79ce3b730527a2f6e7629ef259c
7
- data.tar.gz: aa2ddb108cc39caa8ad5c95a86d07006b5be374647e703414a7761ffd5c333010d7e53b9fd2c42216780e35f00639d8cf80126c291a59d0585424077840cc6b5
6
+ metadata.gz: fef2cbded4409d951cf7e752f25b7db016052cc5f2a77c54ccf7e8778fbf44edba74cd1b04cf82caf341cf05beda552af5c4cbd994a510c54ad1d3c4e561fd17
7
+ data.tar.gz: 00bf893d7c6f29e559530585c40ba721ab3a9f3ce5906cd7c5b6948f79725d6d99423444d9ec7e6f8593f9780aec3375e400986ed8478dfab2cbbabb5807a85d
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Karafka framework changelog
2
2
 
3
3
  ## 2.3.0 (Unreleased)
4
+ - **[Feature]** Introduce Exactly-Once Semantics within consumers `#transaction` block (Pro)
4
5
  - **[Feature]** Provide ability to multiplex subscription groups (Pro)
5
6
  - **[Feature]** Provide `Karafka::Admin::Acl` for Kafka ACL management via the Admin APIs.
6
7
  - **[Feature]** Periodic Jobs (Pro)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.3.0.alpha1)
4
+ karafka (2.3.0.alpha2)
5
5
  karafka-core (>= 2.3.0.alpha1, < 2.4.0)
6
6
  waterdrop (>= 2.6.12, < 3.0.0)
7
7
  zeitwerk (~> 2.3)
@@ -9,10 +9,10 @@ PATH
9
9
  GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
- activejob (7.1.2)
13
- activesupport (= 7.1.2)
12
+ activejob (7.1.3)
13
+ activesupport (= 7.1.3)
14
14
  globalid (>= 0.3.6)
15
- activesupport (7.1.2)
15
+ activesupport (7.1.3)
16
16
  base64
17
17
  bigdecimal
18
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -25,7 +25,7 @@ GEM
25
25
  base64 (0.2.0)
26
26
  bigdecimal (3.1.5)
27
27
  byebug (11.1.3)
28
- concurrent-ruby (1.2.2)
28
+ concurrent-ruby (1.2.3)
29
29
  connection_pool (2.4.1)
30
30
  diff-lcs (1.5.0)
31
31
  docile (1.4.0)
data/bin/integrations CHANGED
@@ -28,7 +28,7 @@ ROOT_PATH = Pathname.new(File.expand_path(File.join(File.dirname(__FILE__), '../
28
28
  CONCURRENCY = ENV.key?('CI') ? 5 : Etc.nprocessors * 3
29
29
 
30
30
  # How may bytes do we want to keep from the stdout in the buffer for when we need to print it
31
- MAX_BUFFER_OUTPUT = 51_200
31
+ MAX_BUFFER_OUTPUT = 102_400
32
32
 
33
33
  # Abstraction around a single test scenario execution process
34
34
  class Scenario
@@ -76,5 +76,9 @@ module Karafka
76
76
 
77
77
  # Raised when we want to un-pause listener that was not paused
78
78
  InvalidListenerPauseError = Class.new(BaseError)
79
+
80
+ # Raised in transactions when we attempt to store offset for a partition that we have lost
81
+ # This does not affect producer only transactions, hence we raise it only on offset storage
82
+ AssignmentLostError = Class.new(BaseError)
79
83
  end
80
84
  end
@@ -24,18 +24,9 @@ module Karafka
24
24
  # @note Manager operations relate to consumer groups and not subscription groups. Since
25
25
  # cluster operations can cause consumer group wide effects, we always apply only one
26
26
  # change on a consumer group.
27
- #
28
- # @note Since we collect statistical data from listeners and this happens in a background
29
- # thread, we need to make sure we lock not to have race conditions with expired data
30
- # eviction.
31
27
  class Manager < Karafka::Connection::Manager
32
28
  include Core::Helpers::Time
33
29
 
34
- # How long should we keep stale stats before evicting them completely
35
- EVICTION_DELAY = 5 * 60 * 1_000
36
-
37
- private_constant :EVICTION_DELAY
38
-
39
30
  # How long should we wait after a rebalance before doing anything on a consumer group
40
31
  #
41
32
  # @param scale_delay [Integer] How long should we wait before making any changes. Any
@@ -50,7 +41,6 @@ module Karafka
50
41
  state: '',
51
42
  join_state: '',
52
43
  state_age: 0,
53
- state_age_sync: monotonic_now,
54
44
  changed_at: monotonic_now
55
45
  }
56
46
  end
@@ -65,6 +55,9 @@ module Karafka
65
55
  def register(listeners)
66
56
  @listeners = listeners
67
57
 
58
+ # Preload all the keys into the hash so we never add keys to changes but just change them
59
+ listeners.each { |listener| @changes[listener.subscription_group.id] }
60
+
68
61
  in_sg_families do |first_subscription_group, sg_listeners|
69
62
  multiplexing = first_subscription_group.multiplexing
70
63
 
@@ -86,25 +79,22 @@ module Karafka
86
79
  # @note Please note that while we collect here per subscription group, we use those metrics
87
80
  # collectively on a whole consumer group. This reduces the friction.
88
81
  def notice(subscription_group_id, statistics)
89
- @mutex.synchronize do
90
- times = []
91
- # stateage is in microseconds
92
- # We monitor broker changes to make sure we do not introduce extra friction
93
- times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
94
- times << statistics['cgrp']['rebalance_age']
95
- times << statistics['cgrp']['stateage']
96
-
97
- # Keep the previous change age for changes that were triggered by us
98
- previous_changed_at = @changes[subscription_group_id][:changed_at]
99
-
100
- @changes[subscription_group_id] = {
101
- state_age: times.min,
102
- changed_at: previous_changed_at,
103
- join_state: statistics['cgrp']['join_state'],
104
- state: statistics['cgrp']['state'],
105
- state_age_sync: monotonic_now
106
- }
107
- end
82
+ times = []
83
+ # stateage is in microseconds
84
+ # We monitor broker changes to make sure we do not introduce extra friction
85
+ times << statistics['brokers'].values.map { |stats| stats['stateage'] }.min / 1_000
86
+ times << statistics['cgrp']['rebalance_age']
87
+ times << statistics['cgrp']['stateage']
88
+
89
+ # Keep the previous change age for changes that were triggered by us
90
+ previous_changed_at = @changes[subscription_group_id][:changed_at]
91
+
92
+ @changes[subscription_group_id].merge!(
93
+ state_age: times.min,
94
+ changed_at: previous_changed_at,
95
+ join_state: statistics['cgrp']['join_state'],
96
+ state: statistics['cgrp']['state']
97
+ )
108
98
  end
109
99
 
110
100
  # Shuts down all the listeners when it is time (including moving to quiet) or rescales
@@ -158,8 +148,6 @@ module Karafka
158
148
  #
159
149
  # We always run scaling down and up because it may be applicable to different CGs
160
150
  def rescale
161
- evict
162
-
163
151
  scale_down
164
152
  scale_up
165
153
  end
@@ -232,23 +220,11 @@ module Karafka
232
220
  end
233
221
  end
234
222
 
235
- # Removes states that are no longer being reported for stopped/pending listeners
236
- def evict
237
- @mutex.synchronize do
238
- @changes.delete_if do |_, details|
239
- monotonic_now - details[:state_age_sync] >= EVICTION_DELAY
240
- end
241
- end
242
- end
243
-
244
223
  # Indicates, that something has changed on a subscription group. We consider every single
245
224
  # change we make as a change to the setup as well.
246
225
  # @param subscription_group_id [String]
247
226
  def touch(subscription_group_id)
248
- @mutex.synchronize do
249
- @changes[subscription_group_id][:changed_at] = 0
250
- @changes[subscription_group_id][:state_age_sync] = monotonic_now
251
- end
227
+ @changes[subscription_group_id][:changed_at] = 0
252
228
  end
253
229
 
254
230
  # @param sg_listeners [Array<Listener>] listeners from one multiplexed sg
@@ -257,17 +233,10 @@ module Karafka
257
233
  # are also stable. This is a strong indicator that no rebalances or other operations are
258
234
  # happening at a given moment.
259
235
  def stable?(sg_listeners)
260
- # If none of listeners has changes reported it means we did not yet start collecting
261
- # metrics about any of them and at least one must be present. We do not consider it
262
- # stable in such case as we still are waiting for metrics.
263
- return false if sg_listeners.none? do |sg_listener|
264
- @changes.key?(sg_listener.subscription_group.id)
265
- end
266
-
267
236
  sg_listeners.all? do |sg_listener|
268
- # Not all SGs may be started initially or may be stopped, we ignore them here as they
269
- # are irrelevant from the point of view of establishing stability
270
- next true unless @changes.key?(sg_listener.subscription_group.id)
237
+ # If a listener is not active, we do not take it into consideration when looking at
238
+ # the stability data
239
+ next true unless sg_listener.active?
271
240
 
272
241
  state = @changes[sg_listener.subscription_group.id]
273
242
 
@@ -27,6 +27,21 @@ module Karafka
27
27
  # Apply strategy for a non-feature based flow
28
28
  FEATURES = %i[].freeze
29
29
 
30
+ # Allows to set offset metadata that will be used with the upcoming marking as consumed
31
+ # as long as a different offset metadata was not used. After it was used either via
32
+ # `#mark_as_consumed` or `#mark_as_consumed!` it will be set back to `nil`. It is done
33
+ # that way to provide the end user with ability to influence metadata on the non-user
34
+ # initiated markings in complex flows.
35
+ #
36
+ # @param offset_metadata [String, nil] metadata we want to store with the upcoming
37
+ # marking as consumed
38
+ #
39
+ # @note Please be aware, that offset metadata set this way will be passed to any marking
40
+ # as consumed even if it was not user initiated. For example in the DLQ flow.
41
+ def store_offset_metadata(offset_metadata)
42
+ @_current_offset_metadata = offset_metadata
43
+ end
44
+
30
45
  # Marks message as consumed in an async way.
31
46
  #
32
47
  # @param message [Messages::Message] last successfully processed message.
@@ -38,7 +53,7 @@ module Karafka
38
53
  # processing another message. In case like this we do not pause on the message we've
39
54
  # already processed but rather at the next one. This applies to both sync and async
40
55
  # versions of this method.
41
- def mark_as_consumed(message, offset_metadata = nil)
56
+ def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
42
57
  if @_in_transaction
43
58
  mark_in_transaction(message, offset_metadata, true)
44
59
  else
@@ -54,6 +69,8 @@ module Karafka
54
69
  end
55
70
 
56
71
  true
72
+ ensure
73
+ @_current_offset_metadata = nil
57
74
  end
58
75
 
59
76
  # Marks message as consumed in a sync way.
@@ -62,7 +79,7 @@ module Karafka
62
79
  # @param offset_metadata [String, nil] offset metadata string or nil if nothing
63
80
  # @return [Boolean] true if we were able to mark the offset, false otherwise.
64
81
  # False indicates that we were not able and that we have lost the partition.
65
- def mark_as_consumed!(message, offset_metadata = nil)
82
+ def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
66
83
  if @_in_transaction
67
84
  mark_in_transaction(message, offset_metadata, false)
68
85
  else
@@ -79,6 +96,8 @@ module Karafka
79
96
  end
80
97
 
81
98
  true
99
+ ensure
100
+ @_current_offset_metadata = nil
82
101
  end
83
102
 
84
103
  # Starts producer transaction, saves the transaction context for transactional marking
@@ -134,6 +153,7 @@ module Karafka
134
153
  # transaction state synchronization usage as within transaction it is always sync)
135
154
  def mark_in_transaction(message, offset_metadata, async)
136
155
  raise Errors::TransactionRequiredError unless @_in_transaction
156
+ raise Errors::AssignmentLostError if revoked?
137
157
 
138
158
  producer.transaction_mark_as_consumed(
139
159
  client,
@@ -35,7 +35,7 @@ module Karafka
35
35
  # @see `Strategies::Default#mark_as_consumed` for more details
36
36
  # @param message [Messages::Message]
37
37
  # @param offset_metadata [String, nil]
38
- def mark_as_consumed(message, offset_metadata = nil)
38
+ def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
39
39
  return super unless retrying?
40
40
  return super unless topic.dead_letter_queue.independent?
41
41
  return false unless super
@@ -43,6 +43,8 @@ module Karafka
43
43
  coordinator.pause_tracker.reset
44
44
 
45
45
  true
46
+ ensure
47
+ @_current_offset_metadata = nil
46
48
  end
47
49
 
48
50
  # Override of the standard `#mark_as_consumed!`. Resets the pause tracker count in case
@@ -51,7 +53,7 @@ module Karafka
51
53
  # @see `Strategies::Default#mark_as_consumed!` for more details
52
54
  # @param message [Messages::Message]
53
55
  # @param offset_metadata [String, nil]
54
- def mark_as_consumed!(message, offset_metadata = nil)
56
+ def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
55
57
  return super unless retrying?
56
58
  return super unless topic.dead_letter_queue.independent?
57
59
  return false unless super
@@ -59,6 +61,8 @@ module Karafka
59
61
  coordinator.pause_tracker.reset
60
62
 
61
63
  true
64
+ ensure
65
+ @_current_offset_metadata = nil
62
66
  end
63
67
 
64
68
  # When we encounter non-recoverable message, we skip it and go on with our lives
@@ -33,7 +33,7 @@ module Karafka
33
33
  # @note This virtual offset management uses a regular default marking API underneath.
34
34
  # We do not alter the "real" marking API, as VPs are just one of many cases we want
35
35
  # to support and we do not want to impact them with collective offsets management
36
- def mark_as_consumed(message, offset_metadata = nil)
36
+ def mark_as_consumed(message, offset_metadata = @_current_offset_metadata)
37
37
  if @_in_transaction && !collapsed?
38
38
  mark_in_transaction(message, offset_metadata, true)
39
39
  elsif collapsed?
@@ -55,11 +55,13 @@ module Karafka
55
55
  manager.markable? ? super(*manager.markable) : revoked?
56
56
  end
57
57
  end
58
+ ensure
59
+ @_current_offset_metadata = nil
58
60
  end
59
61
 
60
62
  # @param message [Karafka::Messages::Message] blocking marks message as consumed
61
63
  # @param offset_metadata [String, nil]
62
- def mark_as_consumed!(message, offset_metadata = nil)
64
+ def mark_as_consumed!(message, offset_metadata = @_current_offset_metadata)
63
65
  if @_in_transaction && !collapsed?
64
66
  mark_in_transaction(message, offset_metadata, false)
65
67
  elsif collapsed?
@@ -73,6 +75,8 @@ module Karafka
73
75
  manager.markable? ? super(*manager.markable) : revoked?
74
76
  end
75
77
  end
78
+ ensure
79
+ @_current_offset_metadata = nil
76
80
  end
77
81
 
78
82
  # Stores the next offset for processing inside of the transaction when collapsed and
@@ -93,6 +97,8 @@ module Karafka
93
97
  # transaction state synchronization usage as within transaction it is always sync)
94
98
  def mark_in_transaction(message, offset_metadata, async)
95
99
  raise Errors::TransactionRequiredError unless @_in_transaction
100
+ # Prevent from attempts of offset storage when we no longer own the assignment
101
+ raise Errors::AssignmentLostError if revoked?
96
102
 
97
103
  return super if collapsed?
98
104
 
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.3.0.alpha1'
6
+ VERSION = '2.3.0.alpha2'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0.alpha1
4
+ version: 2.3.0.alpha2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-01-15 00:00:00.000000000 Z
38
+ date: 2024-01-17 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
metadata.gz.sig CHANGED
Binary file