karafka 2.0.0.alpha5 → 2.0.0.alpha6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 062ee1f4c49d482daa48aa1af06aae08b83fb879ecfb4a6a14d5b2b34ed2975a
4
- data.tar.gz: 28b92b8b5cea506641e339d59f53122fb18deb1637c20a2b34038ae2000b6f17
3
+ metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
4
+ data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
5
5
  SHA512:
6
- metadata.gz: '0497e94e2aa16ee20ded58e17313c9259bacb6f3fa91259e30f0cb0560b58a1eecb11447fb150e09cd659bb24d48e0cc732dc3fc7cc585a2aefe980df2e5b3f1'
7
- data.tar.gz: 280407edd6298a7e62f970d2f9a2d6d1ff6eba4737ea4369eba20c4a83b9d00a0efa24ef36ceabd9def060839039fcf925c341f520efa2ab68532d484482f4fd
6
+ metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
7
+ data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-alpha6 (2022-04-17)
4
+ - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
5
+ - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
6
+ - Replace `consumer.consume` with `consumer.consumed` event to match the behaviour
7
+ - Make sure, that offset committing happens before the `consumer.consumed` event is propagated
8
+ - Fix for failing when not installed (just a dependency) (#817)
9
+ - Evict messages from partitions that were lost upon rebalancing (#825)
10
+ - Do **not** run `#revoked` on partitions that were lost and assigned back upon rebalancing (#825)
11
+ - Remove potential duplicated that could occur upon rebalance with re-assigned partitions (#825)
12
+ - Optimize integration test suite additional consumers shutdown process (#828)
13
+ - Optimize messages eviction and duplicates removal on poll stopped due to lack of messages
14
+ - Add static group membership integration spec
15
+
3
16
  ## 2.0.0-alpha5 (2022-04-03)
4
17
  - Rename StdoutListener to LoggerListener (#811)
5
18
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.alpha5)
4
+ karafka (2.0.0.alpha6)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
data/bin/integrations CHANGED
@@ -44,17 +44,30 @@ class Scenario
44
44
  # @param path [String] path to the scenarios file
45
45
  def initialize(path)
46
46
  @path = path
47
- @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
48
- @started_at = current_time
49
47
  # Last 1024 characters from stdout
50
48
  @stdout_tail = ''
51
49
  end
52
50
 
51
+ # Starts running given scenario in a separate process
52
+ def start
53
+ @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
54
+ @started_at = current_time
55
+ end
56
+
53
57
  # @return [String] integration spec name
54
58
  def name
55
59
  @path.gsub("#{ROOT_PATH}/spec/integrations/", '')
56
60
  end
57
61
 
62
+ # @return [Boolean] true if spec is pristine
63
+ def pristine?
64
+ scenario_dir = File.dirname(@path)
65
+
66
+ # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
67
+ # to run bundle install, etc in order to run it
68
+ File.exist?(File.join(scenario_dir, 'Gemfile'))
69
+ end
70
+
58
71
  # @return [Boolean] did this scenario finished or is it still running
59
72
  def finished?
60
73
  # If the thread is running too long, kill it
@@ -73,6 +86,13 @@ class Scenario
73
86
  !@wait_thr.alive?
74
87
  end
75
88
 
89
+ # @return [Boolean] did this scenario finish successfully or not
90
+ def success?
91
+ expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
92
+
93
+ expected_exit_codes.include?(exit_code)
94
+ end
95
+
76
96
  # @return [Integer] pid of the process of this scenario
77
97
  def pid
78
98
  @wait_thr.pid
@@ -84,13 +104,6 @@ class Scenario
84
104
  @wait_thr.value&.exitstatus || 123
85
105
  end
86
106
 
87
- # @return [Boolean] did this scenario finish successfully or not
88
- def success?
89
- expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
90
-
91
- expected_exit_codes.include?(exit_code)
92
- end
93
-
94
107
  # Prints a status report when scenario is finished and stdout if it failed
95
108
  def report
96
109
  result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
@@ -109,11 +122,10 @@ class Scenario
109
122
  # Sets up a proper environment for a given spec to run and returns the run command
110
123
  # @return [String] run command
111
124
  def init_and_build_cmd
112
- scenario_dir = File.dirname(@path)
113
-
114
125
  # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
115
126
  # to run bundle install, etc in order to run it
116
- if File.exist?(File.join(scenario_dir, 'Gemfile'))
127
+ if pristine?
128
+ scenario_dir = File.dirname(@path)
117
129
  # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
118
130
  temp_dir = Dir.mktmpdir
119
131
  file_name = File.basename(@path)
@@ -141,31 +153,6 @@ class Scenario
141
153
  end
142
154
  end
143
155
 
144
- # Simple array to keep track of active integration processes thread running with info on which
145
- # test scenario is running
146
- active_scenarios = []
147
-
148
- # Finished runners
149
- finished_scenarios = []
150
-
151
- # Waits for any of the processes to be finished and tracks exit codes
152
- #
153
- # @param active_scenarios [Array] active runners
154
- # @param finished_scenarios [Hash] finished forks exit codes
155
- def wait_and_track(active_scenarios, finished_scenarios)
156
- exited = active_scenarios.find(&:finished?)
157
-
158
- if exited
159
- scenario = active_scenarios.delete(exited)
160
-
161
- scenario.report
162
-
163
- finished_scenarios << scenario
164
- else
165
- Thread.pass
166
- end
167
- end
168
-
169
156
  # Load all the specs
170
157
  specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
171
158
 
@@ -182,15 +169,40 @@ seed = (ENV['SEED'] || rand(0..10_000)).to_i
182
169
 
183
170
  puts "Random seed: #{seed}"
184
171
 
185
- specs.shuffle(random: Random.new(seed)).each do |integration_test|
186
- scenario = Scenario.new(integration_test)
172
+ scenarios = specs
173
+ .shuffle(random: Random.new(seed))
174
+ .map { |integration_test| Scenario.new(integration_test) }
187
175
 
188
- active_scenarios << scenario
176
+ regulars = scenarios.reject(&:pristine?)
177
+ pristine = scenarios.select(&:pristine?)
189
178
 
190
- wait_and_track(active_scenarios, finished_scenarios) until active_scenarios.size < CONCURRENCY
191
- end
179
+ active_scenarios = []
180
+ finished_scenarios = []
181
+
182
+ while finished_scenarios.size < scenarios.size
183
+ # If we have space to run another scenario, we add it
184
+ if active_scenarios.size < CONCURRENCY
185
+ scenario = nil
186
+ # We can run only one pristine at the same time due to concurrency issues within bundler
187
+ # Since they usually take longer than others, we try to run them as fast as possible when there
188
+ # is a slot
189
+ scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
190
+ scenario ||= regulars.pop
191
+
192
+ if scenario
193
+ scenario.start
194
+ active_scenarios << scenario
195
+ end
196
+ end
192
197
 
193
- wait_and_track(active_scenarios, finished_scenarios) while !active_scenarios.empty?
198
+ active_scenarios.select(&:finished?).each do |exited|
199
+ scenario = active_scenarios.delete(exited)
200
+ scenario.report
201
+ finished_scenarios << scenario
202
+ end
203
+
204
+ sleep(0.1)
205
+ end
194
206
 
195
207
  # Fail all if any of the tests does not have expected exit code
196
208
  raise IntegrationTestError unless finished_scenarios.all?(&:success?)
data/docker-compose.yml CHANGED
@@ -14,7 +14,9 @@ services:
14
14
  KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
15
  KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
16
16
  KAFKA_CREATE_TOPICS:
17
- "integrations_0_03:3:1,\
17
+ "integrations_0_02:2:1,\
18
+ integrations_1_02:2:1,\
19
+ integrations_0_03:3:1,\
18
20
  integrations_1_03:3:1,\
19
21
  integrations_2_03:3:1,\
20
22
  integrations_0_10:10:1,\
@@ -21,18 +21,18 @@ module Karafka
21
21
  # that may not yet kick in when error occurs. That way we pause always on the last processed
22
22
  # message.
23
23
  def on_consume
24
- Karafka.monitor.instrument('consumer.consume', caller: self) do
24
+ Karafka.monitor.instrument('consumer.consumed', caller: self) do
25
25
  consume
26
- end
27
26
 
28
- pause.reset
27
+ pause.reset
29
28
 
30
- # Mark as consumed only if manual offset management is not on
31
- return if topic.manual_offset_management
29
+ # Mark as consumed only if manual offset management is not on
30
+ return if topic.manual_offset_management
32
31
 
33
- # We use the non-blocking one here. If someone needs the blocking one, can implement it with
34
- # manual offset management
35
- mark_as_consumed(messages.last)
32
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
33
+ # with manual offset management
34
+ mark_as_consumed(messages.last)
35
+ end
36
36
  rescue StandardError => e
37
37
  Karafka.monitor.instrument(
38
38
  'error.occurred',
@@ -48,6 +48,7 @@ module Karafka
48
48
  time_poll.start
49
49
 
50
50
  @buffer.clear
51
+ @rebalance_manager.clear
51
52
 
52
53
  loop do
53
54
  # Don't fetch more messages if we do not have any time left
@@ -58,13 +59,23 @@ module Karafka
58
59
  # Fetch message within our time boundaries
59
60
  message = poll(time_poll.remaining)
60
61
 
61
- # If there are no more messages, return what we have
62
- break unless message
63
-
64
- @buffer << message
62
+ # Put a message to the buffer if there is one
63
+ @buffer << message if message
65
64
 
66
65
  # Track time spent on all of the processing and polling
67
66
  time_poll.checkpoint
67
+
68
+ # Upon polling rebalance manager might have been updated.
69
+ # If partition revocation happens, we need to remove messages from revoked partitions
70
+ # as well as ensure we do not have duplicated due to the offset reset for partitions
71
+ # that we got assigned
72
+ remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
73
+
74
+ # Finally once we've (potentially) removed revoked, etc, if no messages were returned
75
+ # we can break.
76
+ # Worth keeping in mind, that the rebalance manager might have been updated despite no
77
+ # messages being returned during a poll
78
+ break unless message
68
79
  end
69
80
 
70
81
  @buffer
@@ -84,6 +95,9 @@ module Karafka
84
95
  # Ignoring a case where there would not be an offset (for example when rebalance occurs).
85
96
  #
86
97
  # @param async [Boolean] should the commit happen async or sync (async by default)
98
+ # @return [Boolean] did committing was successful. It may be not, when we no longer own
99
+ # given partition.
100
+ #
87
101
  # @note This will commit all the offsets for the whole consumer. In order to achieve
88
102
  # granular control over where the offset should be for particular topic partitions, the
89
103
  # store_offset should be used to only store new offset when we want to to be flushed
@@ -212,6 +226,8 @@ module Karafka
212
226
  ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
213
227
 
214
228
  @kafka.close
229
+ @buffer.clear
230
+ @rebalance_manager.clear
215
231
  end
216
232
  end
217
233
 
@@ -232,7 +248,7 @@ module Karafka
232
248
  # Performs a single poll operation.
233
249
  #
234
250
  # @param timeout [Integer] timeout for a single poll
235
- # @return [Array<Rdkafka::Consumer::Message>, nil] fetched messages or nil if nothing polled
251
+ # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
236
252
  def poll(timeout)
237
253
  time_poll ||= TimeTrackers::Poll.new(timeout)
238
254
 
@@ -301,6 +317,20 @@ module Karafka
301
317
 
302
318
  consumer
303
319
  end
320
+
321
+ # We may have a case where in the middle of data polling, we've lost a partition.
322
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
323
+ # we are no longer responsible in a given process for processing those messages and they
324
+ # should have been picked up by a different process.
325
+ def remove_revoked_and_duplicated_messages
326
+ @rebalance_manager.revoked_partitions.each do |topic, partitions|
327
+ partitions.each do |partition|
328
+ @buffer.delete(topic, partition)
329
+ end
330
+ end
331
+
332
+ @buffer.uniq!
333
+ end
304
334
  end
305
335
  end
306
336
  end
@@ -42,6 +42,37 @@ module Karafka
42
42
  @groups[message.topic][message.partition] << message
43
43
  end
44
44
 
45
+ # Removes given topic and partition data out of the buffer
46
+ # This is used when there's a partition revocation
47
+ # @param topic [String] topic we're interested in
48
+ # @param partition [Integer] partition of which data we want to remove
49
+ def delete(topic, partition)
50
+ return unless @groups.key?(topic)
51
+ return unless @groups.fetch(topic).key?(partition)
52
+
53
+ topic_data = @groups.fetch(topic)
54
+ topic_data.delete(partition)
55
+
56
+ recount!
57
+
58
+ # If there are no more partitions to handle in a given topic, remove it completely
59
+ @groups.delete(topic) if topic_data.empty?
60
+ end
61
+
62
+ # Removes duplicated messages from the same partitions
63
+ # This should be used only when rebalance occurs, as we may get data again we already have
64
+ # due to the processing from the last offset. In cases like this, we may get same data
65
+ # again and we do want to ensure as few duplications as possible
66
+ def uniq!
67
+ @groups.each_value do |partitions|
68
+ partitions.each_value do |messages|
69
+ messages.uniq!(&:offset)
70
+ end
71
+ end
72
+
73
+ recount!
74
+ end
75
+
45
76
  # Removes all the data from the buffer.
46
77
  #
47
78
  # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
@@ -52,6 +83,15 @@ module Karafka
52
83
  @size = 0
53
84
  @groups.each_value(&:clear)
54
85
  end
86
+
87
+ private
88
+
89
+ # Updates the messages count if we performed any operations that could change the state
90
+ def recount!
91
+ @size = @groups.each_value.sum do |partitions|
92
+ partitions.each_value.map(&:count).sum
93
+ end
94
+ end
55
95
  end
56
96
  end
57
97
  end
@@ -9,35 +9,50 @@ module Karafka
9
9
  #
10
10
  # @note Since this does not happen really often, we try to stick with same objects for the
11
11
  # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
12
17
  class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ private_constant :EMPTY_ARRAY
22
+
13
23
  # @return [RebalanceManager]
14
24
  def initialize
15
- @assigned = {}
16
- @revoked = {}
25
+ @assigned_partitions = {}
26
+ @revoked_partitions = {}
27
+ @lost_partitions = {}
17
28
  end
18
29
 
19
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
20
- # which we've got new partitions assigned and array with ids of the partitions as the value
21
- # @note Once assigned partitions are fetched, the state will be reset since the callbacks
22
- # for new assigned partitions are set only during a state change
23
- def assigned_partitions
24
- return @assigned if @assigned.empty?
25
-
26
- result = @assigned.dup
27
- @assigned.clear
28
- result
30
+ # Resets the rebalance manager state
31
+ # This needs to be done before each polling loop as during the polling, the state may be
32
+ # changed
33
+ def clear
34
+ @assigned_partitions.clear
35
+ @revoked_partitions.clear
36
+ @lost_partitions.clear
29
37
  end
30
38
 
31
39
  # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
32
40
  # which we've lost partitions and array with ids of the partitions as the value
33
- # @note Once revoked partitions are fetched, the state will be reset since the callbacks
34
- # for new revoked partitions are set only during a state change
41
+ # @note We do not consider as lost topics and partitions that got revoked and assigned
35
42
  def revoked_partitions
36
- return @revoked if @revoked.empty?
43
+ return @revoked_partitions if @revoked_partitions.empty?
44
+ return @lost_partitions unless @lost_partitions.empty?
45
+
46
+ @revoked_partitions.each do |topic, partitions|
47
+ @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
+ end
49
+
50
+ @lost_partitions
51
+ end
37
52
 
38
- result = @revoked.dup
39
- @revoked.clear
40
- result
53
+ # @return [Boolean] true if any partitions were revoked
54
+ def revoked_partitions?
55
+ !revoked_partitions.empty?
41
56
  end
42
57
 
43
58
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -46,7 +61,7 @@ module Karafka
46
61
  # @param _ [Rdkafka::Consumer]
47
62
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
48
63
  def on_partitions_assigned(_, partitions)
49
- @assigned = partitions.to_h.transform_values { |part| part.map(&:partition) }
64
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
50
65
  end
51
66
 
52
67
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -55,7 +70,7 @@ module Karafka
55
70
  # @param _ [Rdkafka::Consumer]
56
71
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
57
72
  def on_partitions_revoked(_, partitions)
58
- @revoked = partitions.to_h.transform_values { |part| part.map(&:partition) }
73
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
59
74
  end
60
75
  end
61
76
  end
@@ -22,7 +22,7 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.consume
25
+ consumer.consumed
26
26
  consumer.revoked
27
27
  consumer.shutdown
28
28
 
@@ -82,8 +82,20 @@ if rails
82
82
  initializer 'karafka.require_karafka_boot_file' do |app|
83
83
  rails6plus = Rails.gem_version >= Gem::Version.new('6.0.0')
84
84
 
85
+ # If the boot file location is set to "false", we should not raise an exception and we
86
+ # should just not load karafka stuff. Setting this explicitly to false indicates, that
87
+ # karafka is part of the supply chain but it is not a first class citizen of a given
88
+ # system (may be just a dependency of a dependency), thus railtie should not kick in to
89
+ # load the non-existing boot file
90
+ next if Karafka.boot_file.to_s == 'false'
91
+
85
92
  karafka_boot_file = Rails.root.join(Karafka.boot_file.to_s).to_s
86
93
 
94
+ # Provide more comprehensive error for when no boot file
95
+ unless File.exist?(karafka_boot_file)
96
+ raise(Karafka::Errors::MissingBootFileError, karafka_boot_file)
97
+ end
98
+
87
99
  if rails6plus
88
100
  app.reloader.to_prepare do
89
101
  # Load Karafka boot file, so it can be used in Rails server context
@@ -60,7 +60,7 @@ module Karafka
60
60
  # option [Boolean] should we leave offset management to the user
61
61
  setting :manual_offset_management, default: false
62
62
  # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
63
- setting :max_messages, default: 100_000
63
+ setting :max_messages, default: 1_000
64
64
  # option [Integer] number of milliseconds we can wait while fetching data
65
65
  setting :max_wait_time, default: 10_000
66
66
  # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.alpha5'
6
+ VERSION = '2.0.0.alpha6'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.alpha5
4
+ version: 2.0.0.alpha6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-04-03 00:00:00.000000000 Z
37
+ date: 2022-04-17 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -282,7 +282,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
282
282
  - !ruby/object:Gem::Version
283
283
  version: 1.3.1
284
284
  requirements: []
285
- rubygems_version: 3.3.4
285
+ rubygems_version: 3.3.3
286
286
  signing_key:
287
287
  specification_version: 4
288
288
  summary: Ruby based framework for working with Apache Kafka
metadata.gz.sig CHANGED
Binary file