karafka 2.0.0.alpha5 → 2.0.0.alpha6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 062ee1f4c49d482daa48aa1af06aae08b83fb879ecfb4a6a14d5b2b34ed2975a
4
- data.tar.gz: 28b92b8b5cea506641e339d59f53122fb18deb1637c20a2b34038ae2000b6f17
3
+ metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
4
+ data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
5
5
  SHA512:
6
- metadata.gz: '0497e94e2aa16ee20ded58e17313c9259bacb6f3fa91259e30f0cb0560b58a1eecb11447fb150e09cd659bb24d48e0cc732dc3fc7cc585a2aefe980df2e5b3f1'
7
- data.tar.gz: 280407edd6298a7e62f970d2f9a2d6d1ff6eba4737ea4369eba20c4a83b9d00a0efa24ef36ceabd9def060839039fcf925c341f520efa2ab68532d484482f4fd
6
+ metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
7
+ data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-alpha6 (2022-04-17)
4
+ - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
5
+ - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
6
+ - Replace `consumer.consume` with `consumer.consumed` event to match the behaviour
7
+ - Make sure, that offset committing happens before the `consumer.consumed` event is propagated
8
+ - Fix for failing when not installed (just a dependency) (#817)
9
+ - Evict messages from partitions that were lost upon rebalancing (#825)
10
+ - Do **not** run `#revoked` on partitions that were lost and assigned back upon rebalancing (#825)
11
+ - Remove potential duplicated that could occur upon rebalance with re-assigned partitions (#825)
12
+ - Optimize integration test suite additional consumers shutdown process (#828)
13
+ - Optimize messages eviction and duplicates removal on poll stopped due to lack of messages
14
+ - Add static group membership integration spec
15
+
3
16
  ## 2.0.0-alpha5 (2022-04-03)
4
17
  - Rename StdoutListener to LoggerListener (#811)
5
18
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.alpha5)
4
+ karafka (2.0.0.alpha6)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
data/bin/integrations CHANGED
@@ -44,17 +44,30 @@ class Scenario
44
44
  # @param path [String] path to the scenarios file
45
45
  def initialize(path)
46
46
  @path = path
47
- @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
48
- @started_at = current_time
49
47
  # Last 1024 characters from stdout
50
48
  @stdout_tail = ''
51
49
  end
52
50
 
51
+ # Starts running given scenario in a separate process
52
+ def start
53
+ @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
54
+ @started_at = current_time
55
+ end
56
+
53
57
  # @return [String] integration spec name
54
58
  def name
55
59
  @path.gsub("#{ROOT_PATH}/spec/integrations/", '')
56
60
  end
57
61
 
62
+ # @return [Boolean] true if spec is pristine
63
+ def pristine?
64
+ scenario_dir = File.dirname(@path)
65
+
66
+ # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
67
+ # to run bundle install, etc in order to run it
68
+ File.exist?(File.join(scenario_dir, 'Gemfile'))
69
+ end
70
+
58
71
  # @return [Boolean] did this scenario finished or is it still running
59
72
  def finished?
60
73
  # If the thread is running too long, kill it
@@ -73,6 +86,13 @@ class Scenario
73
86
  !@wait_thr.alive?
74
87
  end
75
88
 
89
+ # @return [Boolean] did this scenario finish successfully or not
90
+ def success?
91
+ expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
92
+
93
+ expected_exit_codes.include?(exit_code)
94
+ end
95
+
76
96
  # @return [Integer] pid of the process of this scenario
77
97
  def pid
78
98
  @wait_thr.pid
@@ -84,13 +104,6 @@ class Scenario
84
104
  @wait_thr.value&.exitstatus || 123
85
105
  end
86
106
 
87
- # @return [Boolean] did this scenario finish successfully or not
88
- def success?
89
- expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
90
-
91
- expected_exit_codes.include?(exit_code)
92
- end
93
-
94
107
  # Prints a status report when scenario is finished and stdout if it failed
95
108
  def report
96
109
  result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
@@ -109,11 +122,10 @@ class Scenario
109
122
  # Sets up a proper environment for a given spec to run and returns the run command
110
123
  # @return [String] run command
111
124
  def init_and_build_cmd
112
- scenario_dir = File.dirname(@path)
113
-
114
125
  # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
115
126
  # to run bundle install, etc in order to run it
116
- if File.exist?(File.join(scenario_dir, 'Gemfile'))
127
+ if pristine?
128
+ scenario_dir = File.dirname(@path)
117
129
  # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
118
130
  temp_dir = Dir.mktmpdir
119
131
  file_name = File.basename(@path)
@@ -141,31 +153,6 @@ class Scenario
141
153
  end
142
154
  end
143
155
 
144
- # Simple array to keep track of active integration processes thread running with info on which
145
- # test scenario is running
146
- active_scenarios = []
147
-
148
- # Finished runners
149
- finished_scenarios = []
150
-
151
- # Waits for any of the processes to be finished and tracks exit codes
152
- #
153
- # @param active_scenarios [Array] active runners
154
- # @param finished_scenarios [Hash] finished forks exit codes
155
- def wait_and_track(active_scenarios, finished_scenarios)
156
- exited = active_scenarios.find(&:finished?)
157
-
158
- if exited
159
- scenario = active_scenarios.delete(exited)
160
-
161
- scenario.report
162
-
163
- finished_scenarios << scenario
164
- else
165
- Thread.pass
166
- end
167
- end
168
-
169
156
  # Load all the specs
170
157
  specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
171
158
 
@@ -182,15 +169,40 @@ seed = (ENV['SEED'] || rand(0..10_000)).to_i
182
169
 
183
170
  puts "Random seed: #{seed}"
184
171
 
185
- specs.shuffle(random: Random.new(seed)).each do |integration_test|
186
- scenario = Scenario.new(integration_test)
172
+ scenarios = specs
173
+ .shuffle(random: Random.new(seed))
174
+ .map { |integration_test| Scenario.new(integration_test) }
187
175
 
188
- active_scenarios << scenario
176
+ regulars = scenarios.reject(&:pristine?)
177
+ pristine = scenarios.select(&:pristine?)
189
178
 
190
- wait_and_track(active_scenarios, finished_scenarios) until active_scenarios.size < CONCURRENCY
191
- end
179
+ active_scenarios = []
180
+ finished_scenarios = []
181
+
182
+ while finished_scenarios.size < scenarios.size
183
+ # If we have space to run another scenario, we add it
184
+ if active_scenarios.size < CONCURRENCY
185
+ scenario = nil
186
+ # We can run only one pristine at the same time due to concurrency issues within bundler
187
+ # Since they usually take longer than others, we try to run them as fast as possible when there
188
+ # is a slot
189
+ scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
190
+ scenario ||= regulars.pop
191
+
192
+ if scenario
193
+ scenario.start
194
+ active_scenarios << scenario
195
+ end
196
+ end
192
197
 
193
- wait_and_track(active_scenarios, finished_scenarios) while !active_scenarios.empty?
198
+ active_scenarios.select(&:finished?).each do |exited|
199
+ scenario = active_scenarios.delete(exited)
200
+ scenario.report
201
+ finished_scenarios << scenario
202
+ end
203
+
204
+ sleep(0.1)
205
+ end
194
206
 
195
207
  # Fail all if any of the tests does not have expected exit code
196
208
  raise IntegrationTestError unless finished_scenarios.all?(&:success?)
data/docker-compose.yml CHANGED
@@ -14,7 +14,9 @@ services:
14
14
  KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
15
  KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
16
16
  KAFKA_CREATE_TOPICS:
17
- "integrations_0_03:3:1,\
17
+ "integrations_0_02:2:1,\
18
+ integrations_1_02:2:1,\
19
+ integrations_0_03:3:1,\
18
20
  integrations_1_03:3:1,\
19
21
  integrations_2_03:3:1,\
20
22
  integrations_0_10:10:1,\
@@ -21,18 +21,18 @@ module Karafka
21
21
  # that may not yet kick in when error occurs. That way we pause always on the last processed
22
22
  # message.
23
23
  def on_consume
24
- Karafka.monitor.instrument('consumer.consume', caller: self) do
24
+ Karafka.monitor.instrument('consumer.consumed', caller: self) do
25
25
  consume
26
- end
27
26
 
28
- pause.reset
27
+ pause.reset
29
28
 
30
- # Mark as consumed only if manual offset management is not on
31
- return if topic.manual_offset_management
29
+ # Mark as consumed only if manual offset management is not on
30
+ return if topic.manual_offset_management
32
31
 
33
- # We use the non-blocking one here. If someone needs the blocking one, can implement it with
34
- # manual offset management
35
- mark_as_consumed(messages.last)
32
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
33
+ # with manual offset management
34
+ mark_as_consumed(messages.last)
35
+ end
36
36
  rescue StandardError => e
37
37
  Karafka.monitor.instrument(
38
38
  'error.occurred',
@@ -48,6 +48,7 @@ module Karafka
48
48
  time_poll.start
49
49
 
50
50
  @buffer.clear
51
+ @rebalance_manager.clear
51
52
 
52
53
  loop do
53
54
  # Don't fetch more messages if we do not have any time left
@@ -58,13 +59,23 @@ module Karafka
58
59
  # Fetch message within our time boundaries
59
60
  message = poll(time_poll.remaining)
60
61
 
61
- # If there are no more messages, return what we have
62
- break unless message
63
-
64
- @buffer << message
62
+ # Put a message to the buffer if there is one
63
+ @buffer << message if message
65
64
 
66
65
  # Track time spent on all of the processing and polling
67
66
  time_poll.checkpoint
67
+
68
+ # Upon polling rebalance manager might have been updated.
69
+ # If partition revocation happens, we need to remove messages from revoked partitions
70
+ # as well as ensure we do not have duplicated due to the offset reset for partitions
71
+ # that we got assigned
72
+ remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
73
+
74
+ # Finally once we've (potentially) removed revoked, etc, if no messages were returned
75
+ # we can break.
76
+ # Worth keeping in mind, that the rebalance manager might have been updated despite no
77
+ # messages being returned during a poll
78
+ break unless message
68
79
  end
69
80
 
70
81
  @buffer
@@ -84,6 +95,9 @@ module Karafka
84
95
  # Ignoring a case where there would not be an offset (for example when rebalance occurs).
85
96
  #
86
97
  # @param async [Boolean] should the commit happen async or sync (async by default)
98
+ # @return [Boolean] did committing was successful. It may be not, when we no longer own
99
+ # given partition.
100
+ #
87
101
  # @note This will commit all the offsets for the whole consumer. In order to achieve
88
102
  # granular control over where the offset should be for particular topic partitions, the
89
103
  # store_offset should be used to only store new offset when we want to to be flushed
@@ -212,6 +226,8 @@ module Karafka
212
226
  ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
213
227
 
214
228
  @kafka.close
229
+ @buffer.clear
230
+ @rebalance_manager.clear
215
231
  end
216
232
  end
217
233
 
@@ -232,7 +248,7 @@ module Karafka
232
248
  # Performs a single poll operation.
233
249
  #
234
250
  # @param timeout [Integer] timeout for a single poll
235
- # @return [Array<Rdkafka::Consumer::Message>, nil] fetched messages or nil if nothing polled
251
+ # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
236
252
  def poll(timeout)
237
253
  time_poll ||= TimeTrackers::Poll.new(timeout)
238
254
 
@@ -301,6 +317,20 @@ module Karafka
301
317
 
302
318
  consumer
303
319
  end
320
+
321
+ # We may have a case where in the middle of data polling, we've lost a partition.
322
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
323
+ # we are no longer responsible in a given process for processing those messages and they
324
+ # should have been picked up by a different process.
325
+ def remove_revoked_and_duplicated_messages
326
+ @rebalance_manager.revoked_partitions.each do |topic, partitions|
327
+ partitions.each do |partition|
328
+ @buffer.delete(topic, partition)
329
+ end
330
+ end
331
+
332
+ @buffer.uniq!
333
+ end
304
334
  end
305
335
  end
306
336
  end
@@ -42,6 +42,37 @@ module Karafka
42
42
  @groups[message.topic][message.partition] << message
43
43
  end
44
44
 
45
+ # Removes given topic and partition data out of the buffer
46
+ # This is used when there's a partition revocation
47
+ # @param topic [String] topic we're interested in
48
+ # @param partition [Integer] partition of which data we want to remove
49
+ def delete(topic, partition)
50
+ return unless @groups.key?(topic)
51
+ return unless @groups.fetch(topic).key?(partition)
52
+
53
+ topic_data = @groups.fetch(topic)
54
+ topic_data.delete(partition)
55
+
56
+ recount!
57
+
58
+ # If there are no more partitions to handle in a given topic, remove it completely
59
+ @groups.delete(topic) if topic_data.empty?
60
+ end
61
+
62
+ # Removes duplicated messages from the same partitions
63
+ # This should be used only when rebalance occurs, as we may get data again we already have
64
+ # due to the processing from the last offset. In cases like this, we may get same data
65
+ # again and we do want to ensure as few duplications as possible
66
+ def uniq!
67
+ @groups.each_value do |partitions|
68
+ partitions.each_value do |messages|
69
+ messages.uniq!(&:offset)
70
+ end
71
+ end
72
+
73
+ recount!
74
+ end
75
+
45
76
  # Removes all the data from the buffer.
46
77
  #
47
78
  # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
@@ -52,6 +83,15 @@ module Karafka
52
83
  @size = 0
53
84
  @groups.each_value(&:clear)
54
85
  end
86
+
87
+ private
88
+
89
+ # Updates the messages count if we performed any operations that could change the state
90
+ def recount!
91
+ @size = @groups.each_value.sum do |partitions|
92
+ partitions.each_value.map(&:count).sum
93
+ end
94
+ end
55
95
  end
56
96
  end
57
97
  end
@@ -9,35 +9,50 @@ module Karafka
9
9
  #
10
10
  # @note Since this does not happen really often, we try to stick with same objects for the
11
11
  # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
12
17
  class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ private_constant :EMPTY_ARRAY
22
+
13
23
  # @return [RebalanceManager]
14
24
  def initialize
15
- @assigned = {}
16
- @revoked = {}
25
+ @assigned_partitions = {}
26
+ @revoked_partitions = {}
27
+ @lost_partitions = {}
17
28
  end
18
29
 
19
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
20
- # which we've got new partitions assigned and array with ids of the partitions as the value
21
- # @note Once assigned partitions are fetched, the state will be reset since the callbacks
22
- # for new assigned partitions are set only during a state change
23
- def assigned_partitions
24
- return @assigned if @assigned.empty?
25
-
26
- result = @assigned.dup
27
- @assigned.clear
28
- result
30
+ # Resets the rebalance manager state
31
+ # This needs to be done before each polling loop as during the polling, the state may be
32
+ # changed
33
+ def clear
34
+ @assigned_partitions.clear
35
+ @revoked_partitions.clear
36
+ @lost_partitions.clear
29
37
  end
30
38
 
31
39
  # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
32
40
  # which we've lost partitions and array with ids of the partitions as the value
33
- # @note Once revoked partitions are fetched, the state will be reset since the callbacks
34
- # for new revoked partitions are set only during a state change
41
+ # @note We do not consider as lost topics and partitions that got revoked and assigned
35
42
  def revoked_partitions
36
- return @revoked if @revoked.empty?
43
+ return @revoked_partitions if @revoked_partitions.empty?
44
+ return @lost_partitions unless @lost_partitions.empty?
45
+
46
+ @revoked_partitions.each do |topic, partitions|
47
+ @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
+ end
49
+
50
+ @lost_partitions
51
+ end
37
52
 
38
- result = @revoked.dup
39
- @revoked.clear
40
- result
53
+ # @return [Boolean] true if any partitions were revoked
54
+ def revoked_partitions?
55
+ !revoked_partitions.empty?
41
56
  end
42
57
 
43
58
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -46,7 +61,7 @@ module Karafka
46
61
  # @param _ [Rdkafka::Consumer]
47
62
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
48
63
  def on_partitions_assigned(_, partitions)
49
- @assigned = partitions.to_h.transform_values { |part| part.map(&:partition) }
64
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
50
65
  end
51
66
 
52
67
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -55,7 +70,7 @@ module Karafka
55
70
  # @param _ [Rdkafka::Consumer]
56
71
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
57
72
  def on_partitions_revoked(_, partitions)
58
- @revoked = partitions.to_h.transform_values { |part| part.map(&:partition) }
73
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
59
74
  end
60
75
  end
61
76
  end
@@ -22,7 +22,7 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.consume
25
+ consumer.consumed
26
26
  consumer.revoked
27
27
  consumer.shutdown
28
28
 
@@ -82,8 +82,20 @@ if rails
82
82
  initializer 'karafka.require_karafka_boot_file' do |app|
83
83
  rails6plus = Rails.gem_version >= Gem::Version.new('6.0.0')
84
84
 
85
+ # If the boot file location is set to "false", we should not raise an exception and we
86
+ # should just not load karafka stuff. Setting this explicitly to false indicates, that
87
+ # karafka is part of the supply chain but it is not a first class citizen of a given
88
+ # system (may be just a dependency of a dependency), thus railtie should not kick in to
89
+ # load the non-existing boot file
90
+ next if Karafka.boot_file.to_s == 'false'
91
+
85
92
  karafka_boot_file = Rails.root.join(Karafka.boot_file.to_s).to_s
86
93
 
94
+ # Provide more comprehensive error for when no boot file
95
+ unless File.exist?(karafka_boot_file)
96
+ raise(Karafka::Errors::MissingBootFileError, karafka_boot_file)
97
+ end
98
+
87
99
  if rails6plus
88
100
  app.reloader.to_prepare do
89
101
  # Load Karafka boot file, so it can be used in Rails server context
@@ -60,7 +60,7 @@ module Karafka
60
60
  # option [Boolean] should we leave offset management to the user
61
61
  setting :manual_offset_management, default: false
62
62
  # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
63
- setting :max_messages, default: 100_000
63
+ setting :max_messages, default: 1_000
64
64
  # option [Integer] number of milliseconds we can wait while fetching data
65
65
  setting :max_wait_time, default: 10_000
66
66
  # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.alpha5'
6
+ VERSION = '2.0.0.alpha6'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.alpha5
4
+ version: 2.0.0.alpha6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-04-03 00:00:00.000000000 Z
37
+ date: 2022-04-17 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -282,7 +282,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
282
282
  - !ruby/object:Gem::Version
283
283
  version: 1.3.1
284
284
  requirements: []
285
- rubygems_version: 3.3.4
285
+ rubygems_version: 3.3.3
286
286
  signing_key:
287
287
  specification_version: 4
288
288
  summary: Ruby based framework for working with Apache Kafka
metadata.gz.sig CHANGED
Binary file