karafka 2.0.0.alpha3 → 2.0.0.alpha6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e715fa7f45c40a200fc2d281d765f5d42ac4f03ba6e3f529b86a0adc90bc1af
4
- data.tar.gz: 50e2083063f1fd77ffc529395550e443c4166aa035c2d87716390059bec6fe32
3
+ metadata.gz: 862df94b26c24809f82e07f71c39433b90ef08f68f053a004a87aa466b872dac
4
+ data.tar.gz: 2c533cbd6c271fe282f59c2030d9cc885555242bb8bc9316d0264a5ccfd694a0
5
5
  SHA512:
6
- metadata.gz: d225ba3d118716bcf1265e06d3d781aee7a3e9e87edaab791fdf1b240dc1565a0a460a3f99ca5b8687f6e63701ff2fee3d8ffe343dc18c5e1bfdde4af9deb852
7
- data.tar.gz: 73084d0b5f3678f37b63debdcf944517e0f954bf0a21242caae3c807ced721afd26f5d6bdb864bcf5fe36bb2d098ca3dd167901a1c3ddc61b0ceb00f8a3a2d96
6
+ metadata.gz: 8ce3720e535d65f121bcbfd957286cbc41404f2aff2751622183367862db55f8231c286d5bbede4a18649eafcd085952a7fc8e7569a74dbd3633c9db906114e5
7
+ data.tar.gz: e77819ccd2be263b02958fcee71ce9228c93c7dee62fabbea61711fbbdbffa997bb9a85a7f668cf8bb596b7a98a773ace560ad5075c46e70f20cd302b354b0fe
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.0-alpha6 (2022-04-17)
4
+ - Fix a bug, where upon missing boot file and Rails, railtie would fail with a generic exception (#818)
5
+ - Fix an issue with parallel pristine specs colliding with each other during `bundle install` (#820)
6
+ - Replace `consumer.consume` with `consumer.consumed` event to match the behaviour
7
+ - Make sure, that offset committing happens before the `consumer.consumed` event is propagated
8
+ - Fix for failing when not installed (just a dependency) (#817)
9
+ - Evict messages from partitions that were lost upon rebalancing (#825)
10
+ - Do **not** run `#revoked` on partitions that were lost and assigned back upon rebalancing (#825)
11
+ - Remove potential duplicated that could occur upon rebalance with re-assigned partitions (#825)
12
+ - Optimize integration test suite additional consumers shutdown process (#828)
13
+ - Optimize messages eviction and duplicates removal on poll stopped due to lack of messages
14
+ - Add static group membership integration spec
15
+
16
+ ## 2.0.0-alpha5 (2022-04-03)
17
+ - Rename StdoutListener to LoggerListener (#811)
18
+
19
+ ## 2.0.0-alpha4 (2022-03-20)
20
+ - Rails support without ActiveJob queue adapter usage (#805)
21
+
3
22
  ## 2.0.0-alpha3 (2022-03-16)
4
23
  - Restore 'app.initialized' state and add notification on it
5
24
  - Fix the installation flow for Rails and add integration tests for this scenario
data/Gemfile.lock CHANGED
@@ -1,28 +1,28 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.0.alpha3)
4
+ karafka (2.0.0.alpha6)
5
5
  dry-configurable (~> 0.13)
6
6
  dry-monitor (~> 0.5)
7
7
  dry-validation (~> 1.7)
8
8
  rdkafka (>= 0.10)
9
9
  thor (>= 0.20)
10
- waterdrop (>= 2.2.0, < 3.0.0)
10
+ waterdrop (>= 2.3.0, < 3.0.0)
11
11
  zeitwerk (~> 2.3)
12
12
 
13
13
  GEM
14
14
  remote: https://rubygems.org/
15
15
  specs:
16
- activejob (7.0.2.2)
17
- activesupport (= 7.0.2.2)
16
+ activejob (7.0.2.3)
17
+ activesupport (= 7.0.2.3)
18
18
  globalid (>= 0.3.6)
19
- activesupport (7.0.2.2)
19
+ activesupport (7.0.2.3)
20
20
  concurrent-ruby (~> 1.0, >= 1.0.2)
21
21
  i18n (>= 1.6, < 2)
22
22
  minitest (>= 5.1)
23
23
  tzinfo (~> 2.0)
24
24
  byebug (11.1.3)
25
- concurrent-ruby (1.1.9)
25
+ concurrent-ruby (1.1.10)
26
26
  diff-lcs (1.5.0)
27
27
  docile (1.4.0)
28
28
  dry-configurable (0.14.0)
@@ -64,7 +64,7 @@ GEM
64
64
  dry-core (~> 0.5, >= 0.5)
65
65
  dry-initializer (~> 3.0)
66
66
  dry-schema (~> 1.9, >= 1.9.1)
67
- factory_bot (6.2.0)
67
+ factory_bot (6.2.1)
68
68
  activesupport (>= 5.0.0)
69
69
  ffi (1.15.5)
70
70
  globalid (1.0.0)
@@ -87,7 +87,7 @@ GEM
87
87
  rspec-expectations (3.11.0)
88
88
  diff-lcs (>= 1.2.0, < 2.0)
89
89
  rspec-support (~> 3.11.0)
90
- rspec-mocks (3.11.0)
90
+ rspec-mocks (3.11.1)
91
91
  diff-lcs (>= 1.2.0, < 2.0)
92
92
  rspec-support (~> 3.11.0)
93
93
  rspec-support (3.11.0)
@@ -100,7 +100,7 @@ GEM
100
100
  thor (1.2.1)
101
101
  tzinfo (2.0.4)
102
102
  concurrent-ruby (~> 1.0)
103
- waterdrop (2.2.0)
103
+ waterdrop (2.3.0)
104
104
  concurrent-ruby (>= 1.1)
105
105
  dry-configurable (~> 0.13)
106
106
  dry-monitor (~> 0.5)
@@ -121,4 +121,4 @@ DEPENDENCIES
121
121
  simplecov
122
122
 
123
123
  BUNDLED WITH
124
- 2.3.7
124
+ 2.3.10
data/bin/integrations CHANGED
@@ -25,7 +25,7 @@ CONCURRENCY = 4
25
25
  class Scenario
26
26
  # How long a scenario can run before we kill it
27
27
  # This is a fail-safe just in case something would hang
28
- MAX_RUN_TIME = 60 * 5
28
+ MAX_RUN_TIME = 60 * 2
29
29
 
30
30
  # There are rare cases where Karafka may force shutdown for some of the integration cases
31
31
  # This includes exactly those
@@ -44,17 +44,30 @@ class Scenario
44
44
  # @param path [String] path to the scenarios file
45
45
  def initialize(path)
46
46
  @path = path
47
- @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
48
- @started_at = current_time
49
47
  # Last 1024 characters from stdout
50
48
  @stdout_tail = ''
51
49
  end
52
50
 
51
+ # Starts running given scenario in a separate process
52
+ def start
53
+ @stdin, @stdout, @stderr, @wait_thr = Open3.popen3(init_and_build_cmd)
54
+ @started_at = current_time
55
+ end
56
+
53
57
  # @return [String] integration spec name
54
58
  def name
55
59
  @path.gsub("#{ROOT_PATH}/spec/integrations/", '')
56
60
  end
57
61
 
62
+ # @return [Boolean] true if spec is pristine
63
+ def pristine?
64
+ scenario_dir = File.dirname(@path)
65
+
66
+ # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
67
+ # to run bundle install, etc in order to run it
68
+ File.exist?(File.join(scenario_dir, 'Gemfile'))
69
+ end
70
+
58
71
  # @return [Boolean] did this scenario finished or is it still running
59
72
  def finished?
60
73
  # If the thread is running too long, kill it
@@ -73,6 +86,13 @@ class Scenario
73
86
  !@wait_thr.alive?
74
87
  end
75
88
 
89
+ # @return [Boolean] did this scenario finish successfully or not
90
+ def success?
91
+ expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
92
+
93
+ expected_exit_codes.include?(exit_code)
94
+ end
95
+
76
96
  # @return [Integer] pid of the process of this scenario
77
97
  def pid
78
98
  @wait_thr.pid
@@ -84,13 +104,6 @@ class Scenario
84
104
  @wait_thr.value&.exitstatus || 123
85
105
  end
86
106
 
87
- # @return [Boolean] did this scenario finish successfully or not
88
- def success?
89
- expected_exit_codes = EXIT_CODES[name] || EXIT_CODES[:default]
90
-
91
- expected_exit_codes.include?(exit_code)
92
- end
93
-
94
107
  # Prints a status report when scenario is finished and stdout if it failed
95
108
  def report
96
109
  result = success? ? "\e[#{32}m#{'OK'}\e[0m" : "\e[#{31}m#{'FAILED'}\e[0m"
@@ -109,11 +122,10 @@ class Scenario
109
122
  # Sets up a proper environment for a given spec to run and returns the run command
110
123
  # @return [String] run command
111
124
  def init_and_build_cmd
112
- scenario_dir = File.dirname(@path)
113
-
114
125
  # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
115
126
  # to run bundle install, etc in order to run it
116
- if File.exist?(File.join(scenario_dir, 'Gemfile'))
127
+ if pristine?
128
+ scenario_dir = File.dirname(@path)
117
129
  # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
118
130
  temp_dir = Dir.mktmpdir
119
131
  file_name = File.basename(@path)
@@ -141,31 +153,6 @@ class Scenario
141
153
  end
142
154
  end
143
155
 
144
- # Simple array to keep track of active integration processes thread running with info on which
145
- # test scenario is running
146
- active_scenarios = []
147
-
148
- # Finished runners
149
- finished_scenarios = []
150
-
151
- # Waits for any of the processes to be finished and tracks exit codes
152
- #
153
- # @param active_scenarios [Array] active runners
154
- # @param finished_scenarios [Hash] finished forks exit codes
155
- def wait_and_track(active_scenarios, finished_scenarios)
156
- exited = active_scenarios.find(&:finished?)
157
-
158
- if exited
159
- scenario = active_scenarios.delete(exited)
160
-
161
- scenario.report
162
-
163
- finished_scenarios << scenario
164
- else
165
- Thread.pass
166
- end
167
- end
168
-
169
156
  # Load all the specs
170
157
  specs = Dir[ROOT_PATH.join('spec/integrations/**/*.rb')]
171
158
 
@@ -182,15 +169,40 @@ seed = (ENV['SEED'] || rand(0..10_000)).to_i
182
169
 
183
170
  puts "Random seed: #{seed}"
184
171
 
185
- specs.shuffle(random: Random.new(seed)).each do |integration_test|
186
- scenario = Scenario.new(integration_test)
172
+ scenarios = specs
173
+ .shuffle(random: Random.new(seed))
174
+ .map { |integration_test| Scenario.new(integration_test) }
187
175
 
188
- active_scenarios << scenario
176
+ regulars = scenarios.reject(&:pristine?)
177
+ pristine = scenarios.select(&:pristine?)
189
178
 
190
- wait_and_track(active_scenarios, finished_scenarios) until active_scenarios.size < CONCURRENCY
191
- end
179
+ active_scenarios = []
180
+ finished_scenarios = []
181
+
182
+ while finished_scenarios.size < scenarios.size
183
+ # If we have space to run another scenario, we add it
184
+ if active_scenarios.size < CONCURRENCY
185
+ scenario = nil
186
+ # We can run only one pristine at the same time due to concurrency issues within bundler
187
+ # Since they usually take longer than others, we try to run them as fast as possible when there
188
+ # is a slot
189
+ scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
190
+ scenario ||= regulars.pop
191
+
192
+ if scenario
193
+ scenario.start
194
+ active_scenarios << scenario
195
+ end
196
+ end
192
197
 
193
- wait_and_track(active_scenarios, finished_scenarios) while !active_scenarios.empty?
198
+ active_scenarios.select(&:finished?).each do |exited|
199
+ scenario = active_scenarios.delete(exited)
200
+ scenario.report
201
+ finished_scenarios << scenario
202
+ end
203
+
204
+ sleep(0.1)
205
+ end
194
206
 
195
207
  # Fail all if any of the tests does not have expected exit code
196
208
  raise IntegrationTestError unless finished_scenarios.all?(&:success?)
data/bin/stress CHANGED
@@ -9,5 +9,5 @@ set -e
9
9
  while :
10
10
  do
11
11
  reset
12
- bundle exec bin/integrations $1
12
+ bin/integrations $1
13
13
  done
data/docker-compose.yml CHANGED
@@ -14,7 +14,9 @@ services:
14
14
  KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
15
15
  KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
16
16
  KAFKA_CREATE_TOPICS:
17
- "integrations_0_03:3:1,\
17
+ "integrations_0_02:2:1,\
18
+ integrations_1_02:2:1,\
19
+ integrations_0_03:3:1,\
18
20
  integrations_1_03:3:1,\
19
21
  integrations_2_03:3:1,\
20
22
  integrations_0_10:10:1,\
data/karafka.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  spec.add_dependency 'dry-validation', '~> 1.7'
22
22
  spec.add_dependency 'rdkafka', '>= 0.10'
23
23
  spec.add_dependency 'thor', '>= 0.20'
24
- spec.add_dependency 'waterdrop', '>= 2.2.0', '< 3.0.0'
24
+ spec.add_dependency 'waterdrop', '>= 2.3.0', '< 3.0.0'
25
25
  spec.add_dependency 'zeitwerk', '~> 2.3'
26
26
 
27
27
  spec.required_ruby_version = '>= 2.6.0'
@@ -1,20 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'active_job'
4
- require 'active_job/queue_adapters'
5
- require 'active_job/queue_adapters/karafka_adapter'
3
+ begin
4
+ require 'active_job'
5
+ require_relative 'queue_adapters/karafka_adapter'
6
6
 
7
- module ActiveJob
8
- # Namespace for usage simplification outside of Rails where Railtie will not kick in.
9
- # That way a require 'active_job/karafka' should be enough to use it
10
- module Karafka
7
+ module ActiveJob
8
+ # Namespace for usage simplification outside of Rails where Railtie will not kick in.
9
+ # That way a require 'active_job/karafka' should be enough to use it
10
+ module Karafka
11
+ end
11
12
  end
12
- end
13
-
14
- # We extend routing builder by adding a simple wrapper for easier jobs topics defining
15
- # This needs to be extended here as it is going to be used in karafka routes, hence doing that in
16
- # the railtie initializer would be too late
17
- ::Karafka::Routing::Builder.include ::Karafka::ActiveJob::RoutingExtensions
18
- ::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::RoutingExtensions
19
13
 
20
- # We extend ActiveJob stuff in the railtie
14
+ # We extend routing builder by adding a simple wrapper for easier jobs topics defining
15
+ # This needs to be extended here as it is going to be used in karafka routes, hence doing that in
16
+ # the railtie initializer would be too late
17
+ ::Karafka::Routing::Builder.include ::Karafka::ActiveJob::RoutingExtensions
18
+ ::Karafka::Routing::Proxy.include ::Karafka::ActiveJob::RoutingExtensions
19
+ rescue LoadError
20
+ # We extend ActiveJob stuff in the railtie
21
+ end
@@ -21,18 +21,18 @@ module Karafka
21
21
  # that may not yet kick in when error occurs. That way we pause always on the last processed
22
22
  # message.
23
23
  def on_consume
24
- Karafka.monitor.instrument('consumer.consume', caller: self) do
24
+ Karafka.monitor.instrument('consumer.consumed', caller: self) do
25
25
  consume
26
- end
27
26
 
28
- pause.reset
27
+ pause.reset
29
28
 
30
- # Mark as consumed only if manual offset management is not on
31
- return if topic.manual_offset_management
29
+ # Mark as consumed only if manual offset management is not on
30
+ return if topic.manual_offset_management
32
31
 
33
- # We use the non-blocking one here. If someone needs the blocking one, can implement it with
34
- # manual offset management
35
- mark_as_consumed(messages.last)
32
+ # We use the non-blocking one here. If someone needs the blocking one, can implement it
33
+ # with manual offset management
34
+ mark_as_consumed(messages.last)
35
+ end
36
36
  rescue StandardError => e
37
37
  Karafka.monitor.instrument(
38
38
  'error.occurred',
@@ -48,6 +48,7 @@ module Karafka
48
48
  time_poll.start
49
49
 
50
50
  @buffer.clear
51
+ @rebalance_manager.clear
51
52
 
52
53
  loop do
53
54
  # Don't fetch more messages if we do not have any time left
@@ -58,13 +59,23 @@ module Karafka
58
59
  # Fetch message within our time boundaries
59
60
  message = poll(time_poll.remaining)
60
61
 
61
- # If there are no more messages, return what we have
62
- break unless message
63
-
64
- @buffer << message
62
+ # Put a message to the buffer if there is one
63
+ @buffer << message if message
65
64
 
66
65
  # Track time spent on all of the processing and polling
67
66
  time_poll.checkpoint
67
+
68
+ # Upon polling rebalance manager might have been updated.
69
+ # If partition revocation happens, we need to remove messages from revoked partitions
70
+ # as well as ensure we do not have duplicated due to the offset reset for partitions
71
+ # that we got assigned
72
+ remove_revoked_and_duplicated_messages if @rebalance_manager.revoked_partitions?
73
+
74
+ # Finally once we've (potentially) removed revoked, etc, if no messages were returned
75
+ # we can break.
76
+ # Worth keeping in mind, that the rebalance manager might have been updated despite no
77
+ # messages being returned during a poll
78
+ break unless message
68
79
  end
69
80
 
70
81
  @buffer
@@ -84,6 +95,9 @@ module Karafka
84
95
  # Ignoring a case where there would not be an offset (for example when rebalance occurs).
85
96
  #
86
97
  # @param async [Boolean] should the commit happen async or sync (async by default)
98
+ # @return [Boolean] did committing was successful. It may be not, when we no longer own
99
+ # given partition.
100
+ #
87
101
  # @note This will commit all the offsets for the whole consumer. In order to achieve
88
102
  # granular control over where the offset should be for particular topic partitions, the
89
103
  # store_offset should be used to only store new offset when we want to to be flushed
@@ -212,6 +226,8 @@ module Karafka
212
226
  ::Karafka::Instrumentation.error_callbacks.delete(@subscription_group.id)
213
227
 
214
228
  @kafka.close
229
+ @buffer.clear
230
+ @rebalance_manager.clear
215
231
  end
216
232
  end
217
233
 
@@ -232,7 +248,7 @@ module Karafka
232
248
  # Performs a single poll operation.
233
249
  #
234
250
  # @param timeout [Integer] timeout for a single poll
235
- # @return [Array<Rdkafka::Consumer::Message>, nil] fetched messages or nil if nothing polled
251
+ # @return [Rdkafka::Consumer::Message, nil] fetched message or nil if nothing polled
236
252
  def poll(timeout)
237
253
  time_poll ||= TimeTrackers::Poll.new(timeout)
238
254
 
@@ -301,6 +317,20 @@ module Karafka
301
317
 
302
318
  consumer
303
319
  end
320
+
321
+ # We may have a case where in the middle of data polling, we've lost a partition.
322
+ # In a case like this we should remove all the pre-buffered messages from list partitions as
323
+ # we are no longer responsible in a given process for processing those messages and they
324
+ # should have been picked up by a different process.
325
+ def remove_revoked_and_duplicated_messages
326
+ @rebalance_manager.revoked_partitions.each do |topic, partitions|
327
+ partitions.each do |partition|
328
+ @buffer.delete(topic, partition)
329
+ end
330
+ end
331
+
332
+ @buffer.uniq!
333
+ end
304
334
  end
305
335
  end
306
336
  end
@@ -42,6 +42,37 @@ module Karafka
42
42
  @groups[message.topic][message.partition] << message
43
43
  end
44
44
 
45
+ # Removes given topic and partition data out of the buffer
46
+ # This is used when there's a partition revocation
47
+ # @param topic [String] topic we're interested in
48
+ # @param partition [Integer] partition of which data we want to remove
49
+ def delete(topic, partition)
50
+ return unless @groups.key?(topic)
51
+ return unless @groups.fetch(topic).key?(partition)
52
+
53
+ topic_data = @groups.fetch(topic)
54
+ topic_data.delete(partition)
55
+
56
+ recount!
57
+
58
+ # If there are no more partitions to handle in a given topic, remove it completely
59
+ @groups.delete(topic) if topic_data.empty?
60
+ end
61
+
62
+ # Removes duplicated messages from the same partitions
63
+ # This should be used only when rebalance occurs, as we may get data again we already have
64
+ # due to the processing from the last offset. In cases like this, we may get same data
65
+ # again and we do want to ensure as few duplications as possible
66
+ def uniq!
67
+ @groups.each_value do |partitions|
68
+ partitions.each_value do |messages|
69
+ messages.uniq!(&:offset)
70
+ end
71
+ end
72
+
73
+ recount!
74
+ end
75
+
45
76
  # Removes all the data from the buffer.
46
77
  #
47
78
  # @note We do not clear the whole groups hash but rather we clear the partition hashes, so
@@ -52,6 +83,15 @@ module Karafka
52
83
  @size = 0
53
84
  @groups.each_value(&:clear)
54
85
  end
86
+
87
+ private
88
+
89
+ # Updates the messages count if we performed any operations that could change the state
90
+ def recount!
91
+ @size = @groups.each_value.sum do |partitions|
92
+ partitions.each_value.map(&:count).sum
93
+ end
94
+ end
55
95
  end
56
96
  end
57
97
  end
@@ -9,35 +9,50 @@ module Karafka
9
9
  #
10
10
  # @note Since this does not happen really often, we try to stick with same objects for the
11
11
  # empty states most of the time, so we don't create many objects during the manager life
12
+ #
13
+ # @note Internally in the rebalance manager we have a notion of lost partitions. Partitions
14
+ # that are lost, are those that got revoked but did not get re-assigned back. We do not
15
+ # expose this concept outside and we normalize to have them revoked, as it is irrelevant
16
+ # from the rest of the code perspective as only those that are lost are truly revoked.
12
17
  class RebalanceManager
18
+ # Empty array for internal usage not to create new objects
19
+ EMPTY_ARRAY = [].freeze
20
+
21
+ private_constant :EMPTY_ARRAY
22
+
13
23
  # @return [RebalanceManager]
14
24
  def initialize
15
- @assigned = {}
16
- @revoked = {}
25
+ @assigned_partitions = {}
26
+ @revoked_partitions = {}
27
+ @lost_partitions = {}
17
28
  end
18
29
 
19
- # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
20
- # which we've got new partitions assigned and array with ids of the partitions as the value
21
- # @note Once assigned partitions are fetched, the state will be reset since the callbacks
22
- # for new assigned partitions are set only during a state change
23
- def assigned_partitions
24
- return @assigned if @assigned.empty?
25
-
26
- result = @assigned.dup
27
- @assigned.clear
28
- result
30
+ # Resets the rebalance manager state
31
+ # This needs to be done before each polling loop as during the polling, the state may be
32
+ # changed
33
+ def clear
34
+ @assigned_partitions.clear
35
+ @revoked_partitions.clear
36
+ @lost_partitions.clear
29
37
  end
30
38
 
31
39
  # @return [Hash<String, Array<Integer>>] hash where the keys are the names of topics for
32
40
  # which we've lost partitions and array with ids of the partitions as the value
33
- # @note Once revoked partitions are fetched, the state will be reset since the callbacks
34
- # for new revoked partitions are set only during a state change
41
+ # @note We do not consider as lost topics and partitions that got revoked and assigned
35
42
  def revoked_partitions
36
- return @revoked if @revoked.empty?
43
+ return @revoked_partitions if @revoked_partitions.empty?
44
+ return @lost_partitions unless @lost_partitions.empty?
45
+
46
+ @revoked_partitions.each do |topic, partitions|
47
+ @lost_partitions[topic] = partitions - @assigned_partitions.fetch(topic, EMPTY_ARRAY)
48
+ end
49
+
50
+ @lost_partitions
51
+ end
37
52
 
38
- result = @revoked.dup
39
- @revoked.clear
40
- result
53
+ # @return [Boolean] true if any partitions were revoked
54
+ def revoked_partitions?
55
+ !revoked_partitions.empty?
41
56
  end
42
57
 
43
58
  # Callback that kicks in inside of rdkafka, when new partitions are assigned.
@@ -46,7 +61,7 @@ module Karafka
46
61
  # @param _ [Rdkafka::Consumer]
47
62
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
48
63
  def on_partitions_assigned(_, partitions)
49
- @assigned = partitions.to_h.transform_values { |part| part.map(&:partition) }
64
+ @assigned_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
50
65
  end
51
66
 
52
67
  # Callback that kicks in inside of rdkafka, when partitions are revoked.
@@ -55,7 +70,7 @@ module Karafka
55
70
  # @param _ [Rdkafka::Consumer]
56
71
  # @param partitions [Rdkafka::Consumer::TopicPartitionList]
57
72
  def on_partitions_revoked(_, partitions)
58
- @revoked = partitions.to_h.transform_values { |part| part.map(&:partition) }
73
+ @revoked_partitions = partitions.to_h.transform_values { |part| part.map(&:partition) }
59
74
  end
60
75
  end
61
76
  end
@@ -10,6 +10,7 @@ module Karafka
10
10
  required(:id).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
11
11
  required(:kafka).filled
12
12
  required(:max_messages) { int? & gteq?(1) }
13
+ required(:initial_offset).filled(included_in?: %w[earliest latest])
13
14
  required(:max_wait_time).filled { int? & gteq?(10) }
14
15
  required(:manual_offset_management).filled(:bool?)
15
16
  required(:name).filled(:str?, format?: Karafka::Contracts::TOPIC_REGEXP)
@@ -4,7 +4,7 @@ module Karafka
4
4
  module Instrumentation
5
5
  # Default listener that hooks up to our instrumentation and uses its events for logging
6
6
  # It can be removed/replaced or anything without any harm to the Karafka app flow.
7
- class StdoutListener
7
+ class LoggerListener
8
8
  # Log levels that we use in this particular listener
9
9
  USED_LOG_LEVELS = %i[
10
10
  debug
@@ -22,7 +22,7 @@ module Karafka
22
22
  app.stopping
23
23
  app.stopped
24
24
 
25
- consumer.consume
25
+ consumer.consumed
26
26
  consumer.revoked
27
27
  consumer.shutdown
28
28
 
@@ -82,8 +82,20 @@ if rails
82
82
  initializer 'karafka.require_karafka_boot_file' do |app|
83
83
  rails6plus = Rails.gem_version >= Gem::Version.new('6.0.0')
84
84
 
85
+ # If the boot file location is set to "false", we should not raise an exception and we
86
+ # should just not load karafka stuff. Setting this explicitly to false indicates, that
87
+ # karafka is part of the supply chain but it is not a first class citizen of a given
88
+ # system (may be just a dependency of a dependency), thus railtie should not kick in to
89
+ # load the non-existing boot file
90
+ next if Karafka.boot_file.to_s == 'false'
91
+
85
92
  karafka_boot_file = Rails.root.join(Karafka.boot_file.to_s).to_s
86
93
 
94
+ # Provide more comprehensive error for when no boot file
95
+ unless File.exist?(karafka_boot_file)
96
+ raise(Karafka::Errors::MissingBootFileError, karafka_boot_file)
97
+ end
98
+
87
99
  if rails6plus
88
100
  app.reloader.to_prepare do
89
101
  # Load Karafka boot file, so it can be used in Rails server context
@@ -41,7 +41,7 @@ module Karafka
41
41
 
42
42
  kafka[:'client.id'] ||= Karafka::App.config.client_id
43
43
  kafka[:'group.id'] ||= @topics.first.consumer_group.id
44
- kafka[:'auto.offset.reset'] ||= 'earliest'
44
+ kafka[:'auto.offset.reset'] ||= @topics.first.initial_offset
45
45
  # Karafka manages the offsets based on the processing state, thus we do not rely on the
46
46
  # rdkafka offset auto-storing
47
47
  kafka[:'enable.auto.offset.store'] = 'false'
@@ -18,6 +18,7 @@ module Karafka
18
18
  kafka
19
19
  max_messages
20
20
  max_wait_time
21
+ initial_offset
21
22
  ].freeze
22
23
 
23
24
  private_constant :DISTRIBUTION_KEYS
@@ -16,6 +16,7 @@ module Karafka
16
16
  manual_offset_management
17
17
  max_messages
18
18
  max_wait_time
19
+ initial_offset
19
20
  ].freeze
20
21
 
21
22
  private_constant :INHERITABLE_ATTRIBUTES
@@ -54,10 +54,13 @@ module Karafka
54
54
  setting :consumer_persistence, default: true
55
55
  # Default deserializer for converting incoming data into ruby objects
56
56
  setting :deserializer, default: Karafka::Serialization::Json::Deserializer.new
57
+ # option [String] should we start with the earliest possible offset or latest
58
+ # This will set the `auto.offset.reset` value unless present in the kafka scope
59
+ setting :initial_offset, default: 'earliest'
57
60
  # option [Boolean] should we leave offset management to the user
58
61
  setting :manual_offset_management, default: false
59
62
  # options max_messages [Integer] how many messages do we want to fetch from Kafka in one go
60
- setting :max_messages, default: 100_000
63
+ setting :max_messages, default: 1_000
61
64
  # option [Integer] number of milliseconds we can wait while fetching data
62
65
  setting :max_wait_time, default: 10_000
63
66
  # option shutdown_timeout [Integer] the number of milliseconds after which Karafka no
@@ -40,7 +40,7 @@ class KarafkaApp < Karafka::App
40
40
  # interested in logging events for certain environments. Since instrumentation
41
41
  # notifications add extra boilerplate, if you want to achieve max performance,
42
42
  # listen to only what you really need for given environment.
43
- Karafka.monitor.subscribe(Karafka::Instrumentation::StdoutListener.new)
43
+ Karafka.monitor.subscribe(Karafka::Instrumentation::LoggerListener.new)
44
44
  # Karafka.monitor.subscribe(Karafka::Instrumentation::ProctitleListener.new)
45
45
 
46
46
  routes.draw do
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.0.alpha3'
6
+ VERSION = '2.0.0.alpha6'
7
7
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0.alpha3
4
+ version: 2.0.0.alpha6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -34,7 +34,7 @@ cert_chain:
34
34
  R2P11bWoCtr70BsccVrN8jEhzwXngMyI2gVt750Y+dbTu1KgRqZKp/ECe7ZzPzXj
35
35
  pIy9vHxTANKYVyI4qj8OrFdEM5BQNu8oQpL0iQ==
36
36
  -----END CERTIFICATE-----
37
- date: 2022-03-16 00:00:00.000000000 Z
37
+ date: 2022-04-17 00:00:00.000000000 Z
38
38
  dependencies:
39
39
  - !ruby/object:Gem::Dependency
40
40
  name: dry-configurable
@@ -112,7 +112,7 @@ dependencies:
112
112
  requirements:
113
113
  - - ">="
114
114
  - !ruby/object:Gem::Version
115
- version: 2.2.0
115
+ version: 2.3.0
116
116
  - - "<"
117
117
  - !ruby/object:Gem::Version
118
118
  version: 3.0.0
@@ -122,7 +122,7 @@ dependencies:
122
122
  requirements:
123
123
  - - ">="
124
124
  - !ruby/object:Gem::Version
125
- version: 2.2.0
125
+ version: 2.3.0
126
126
  - - "<"
127
127
  - !ruby/object:Gem::Version
128
128
  version: 3.0.0
@@ -211,9 +211,9 @@ files:
211
211
  - lib/karafka/instrumentation/callbacks/error.rb
212
212
  - lib/karafka/instrumentation/callbacks/statistics.rb
213
213
  - lib/karafka/instrumentation/logger.rb
214
+ - lib/karafka/instrumentation/logger_listener.rb
214
215
  - lib/karafka/instrumentation/monitor.rb
215
216
  - lib/karafka/instrumentation/proctitle_listener.rb
216
- - lib/karafka/instrumentation/stdout_listener.rb
217
217
  - lib/karafka/licenser.rb
218
218
  - lib/karafka/messages/batch_metadata.rb
219
219
  - lib/karafka/messages/builders/batch_metadata.rb
@@ -282,7 +282,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
282
282
  - !ruby/object:Gem::Version
283
283
  version: 1.3.1
284
284
  requirements: []
285
- rubygems_version: 3.3.4
285
+ rubygems_version: 3.3.3
286
286
  signing_key:
287
287
  specification_version: 4
288
288
  summary: Ruby based framework for working with Apache Kafka
metadata.gz.sig CHANGED
Binary file