karafka 2.0.8 → 2.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1a646d63d674e6ecb80625097426f48294fe8e8b3f3e9e020cf92645a5728251
4
- data.tar.gz: 461c58c355f84b81c3396cb4164a426c3f1e87747b3101de1e7c6fbf2a876778
3
+ metadata.gz: ee9c609249fea5e996d9506bd3e04435dbcc189addcd6383a092b080e776f525
4
+ data.tar.gz: 6ed354b21361966f1988bb90b29cea14d842fb02299f12bd30ab4ec57eaf8cc1
5
5
  SHA512:
6
- metadata.gz: 623832478b11b1fa61c7906f423417500838e30ef97256a59290d73dcfe17670cf7eb11adcf33d422ae590c6ce5f19215340c89d369f8ec5b4e609af07d0befb
7
- data.tar.gz: 97e50ed131c939d09de884d3ddba826381e7d314d3bc4660ecf947fa6582d9e6f16e023dad121612eadffcd441a73874c96e287e5ec1885e008d843357e94b93
6
+ metadata.gz: a20a1bf2d2b86fcd63bf2e036d535c1d7aa4d06943cc00a414851e8f0a0054054eff621b67bfd355456ac93f1561931aec598b43c0ca535ceaa57c9d94957378
7
+ data.tar.gz: '05996101b929a143926508a0afb69e6e6b09de04fd088dd06c83cba122efc9cfecdd7586892c80ab5409964737dd4a74b0ecea0f8a30ff1dfbf032f6bb289288'
checksums.yaml.gz.sig CHANGED
Binary file
data/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.0.9 (2022-09-22)
4
+ - Fix Singleton not visible when used in PORO (#1034)
5
+ - Divide pristine specs into pristine and poro. Pristine will still have helpers loaded, poro will have nothing.
6
+ - Fix a case where `manual_offset_management` offset upon error is not reverted to the first message in a case where there were no markings as consumed at all for multiple batches.
7
+ - Implement small reliability improvements around marking as consumed.
8
+ - Introduce a config sanity check to make sure Virtual Partitions are not used with manual offset management.
9
+ - Fix a possibility of using `active_job_topic` with Virtual Partitions and manual offset management (ActiveJob still can use due to atomicity of jobs).
10
+ - Move seek offset ownership to the coordinator to allow Virtual Partitions further development.
11
+ - Improve client shutdown in specs.
12
+ - Do not reset client on network issue and rely on `librdkafka` to do so.
13
+ - Allow for nameless (anonymous) subscription groups (#1033)
14
+
3
15
  ## 2.0.8 (2022-09-19)
4
16
  - [Breaking change] Rename Virtual Partitions `concurrency` to `max_partitions` to avoid confusion (#1023).
5
17
  - Allow for block based subscription groups management (#1030).
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.0.8)
4
+ karafka (2.0.9)
5
5
  karafka-core (>= 2.0.2, < 3.0.0)
6
6
  rdkafka (>= 0.12)
7
7
  thor (>= 0.20)
@@ -11,10 +11,10 @@ PATH
11
11
  GEM
12
12
  remote: https://rubygems.org/
13
13
  specs:
14
- activejob (7.0.3.1)
15
- activesupport (= 7.0.3.1)
14
+ activejob (7.0.4)
15
+ activesupport (= 7.0.4)
16
16
  globalid (>= 0.3.6)
17
- activesupport (7.0.3.1)
17
+ activesupport (7.0.4)
18
18
  concurrent-ruby (~> 1.0, >= 1.0.2)
19
19
  i18n (>= 1.6, < 2)
20
20
  minitest (>= 5.1)
@@ -33,7 +33,7 @@ GEM
33
33
  karafka-core (2.0.2)
34
34
  concurrent-ruby (>= 1.1)
35
35
  mini_portile2 (2.8.0)
36
- minitest (5.16.2)
36
+ minitest (5.16.3)
37
37
  rake (13.0.6)
38
38
  rdkafka (0.12.0)
39
39
  ffi (~> 1.15)
@@ -45,13 +45,13 @@ GEM
45
45
  rspec-mocks (~> 3.11.0)
46
46
  rspec-core (3.11.0)
47
47
  rspec-support (~> 3.11.0)
48
- rspec-expectations (3.11.0)
48
+ rspec-expectations (3.11.1)
49
49
  diff-lcs (>= 1.2.0, < 2.0)
50
50
  rspec-support (~> 3.11.0)
51
51
  rspec-mocks (3.11.1)
52
52
  diff-lcs (>= 1.2.0, < 2.0)
53
53
  rspec-support (~> 3.11.0)
54
- rspec-support (3.11.0)
54
+ rspec-support (3.11.1)
55
55
  simplecov (0.21.2)
56
56
  docile (~> 1.1)
57
57
  simplecov-html (~> 0.11)
@@ -68,6 +68,7 @@ GEM
68
68
  zeitwerk (2.6.0)
69
69
 
70
70
  PLATFORMS
71
+ arm64-darwin
71
72
  x86_64-linux
72
73
 
73
74
  DEPENDENCIES
@@ -79,4 +80,4 @@ DEPENDENCIES
79
80
  simplecov
80
81
 
81
82
  BUNDLED WITH
82
- 2.3.15
83
+ 2.3.22
data/bin/integrations CHANGED
@@ -2,9 +2,14 @@
2
2
 
3
3
  # Runner to run integration specs in parallel
4
4
 
5
- # Part of integration specs run pristine without bundler.
5
+ # Part of integration specs run linear without bundler.
6
6
  # If we would run bundle exec when running this code, bundler would inject its own context
7
7
  # into them, messing things up heavily
8
+ #
9
+ # Types of specs:
10
+ # - regular - can run in parallel, includes all the helpers
11
+ # - pristine - cannot run in parallel, uses custom bundler but includes helpers
12
+ # - poro - cannot run in parallel, uses custom bundler, does not include any helpers
8
13
  raise 'This code needs to be executed WITHOUT bundle exec' if Kernel.const_defined?(:Bundler)
9
14
 
10
15
  require 'open3'
@@ -64,13 +69,19 @@ class Scenario
64
69
  @path.gsub("#{ROOT_PATH}/spec/integrations/", '')
65
70
  end
66
71
 
67
- # @return [Boolean] true if spec is pristine
68
- def pristine?
72
+ # @return [Symbol] type of spec
73
+ def type
69
74
  scenario_dir = File.dirname(@path)
70
75
 
71
- # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
72
- # to run bundle install, etc in order to run it
73
- File.exist?(File.join(scenario_dir, 'Gemfile'))
76
+ return :poro if scenario_dir.end_with?('_poro')
77
+ return :pristine if scenario_dir.end_with?('_pristine')
78
+
79
+ :regular
80
+ end
81
+
82
+ # @return [Boolean] any spec that is not a regular one should not run in parallel with others
83
+ def linear?
84
+ !type == :regular
74
85
  end
75
86
 
76
87
  # @return [Boolean] did this scenario finished or is it still running
@@ -145,9 +156,23 @@ class Scenario
145
156
  # Sets up a proper environment for a given spec to run and returns the run command
146
157
  # @return [String] run command
147
158
  def init_and_build_cmd
148
- # If there is a Gemfile in a scenario directory, it means it is a pristine spec and we need
149
- # to run bundle install, etc in order to run it
150
- if pristine?
159
+ case type
160
+ when :poro
161
+ scenario_dir = File.dirname(@path)
162
+ # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
163
+ temp_dir = Dir.mktmpdir
164
+ file_name = File.basename(@path)
165
+
166
+ FileUtils.cp_r("#{scenario_dir}/.", temp_dir)
167
+
168
+ <<~CMD
169
+ cd #{temp_dir} &&
170
+ KARAFKA_GEM_DIR=#{ROOT_PATH} \
171
+ BUNDLE_AUTO_INSTALL=true \
172
+ PRISTINE_MODE=true \
173
+ bundle exec ruby #{file_name}
174
+ CMD
175
+ when :pristine
151
176
  scenario_dir = File.dirname(@path)
152
177
  # We copy the spec into a temp dir, not to pollute the spec location with logs, etc
153
178
  temp_dir = Dir.mktmpdir
@@ -196,8 +221,8 @@ scenarios = specs
196
221
  .shuffle(random: Random.new(seed))
197
222
  .map { |integration_test| Scenario.new(integration_test) }
198
223
 
199
- regulars = scenarios.reject(&:pristine?)
200
- pristine = scenarios.select(&:pristine?)
224
+ regulars = scenarios.reject(&:linear?)
225
+ linears = scenarios - regulars
201
226
 
202
227
  active_scenarios = []
203
228
  finished_scenarios = []
@@ -206,10 +231,10 @@ while finished_scenarios.size < scenarios.size
206
231
  # If we have space to run another scenario, we add it
207
232
  if active_scenarios.size < CONCURRENCY
208
233
  scenario = nil
209
- # We can run only one pristine at the same time due to concurrency issues within bundler
234
+ # We can run only one linear at the same time due to concurrency issues within bundler
210
235
  # Since they usually take longer than others, we try to run them as fast as possible when there
211
236
  # is a slot
212
- scenario = pristine.pop unless active_scenarios.any?(&:pristine?)
237
+ scenario = linears.pop unless active_scenarios.any?(&:linear?)
213
238
  scenario ||= regulars.pop
214
239
 
215
240
  if scenario
data/config/errors.yml CHANGED
@@ -57,3 +57,4 @@ en:
57
57
  consumer_format: needs to inherit from Karafka::Pro::BaseConsumer and not Karafka::Consumer
58
58
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
59
59
  virtual_partitions.max_partitions_format: needs to be equl or more than 1
60
+ manual_offset_management_not_with_virtual_partitions: cannot be used together with Virtual Partitions
@@ -19,6 +19,8 @@ module Karafka
19
19
 
20
20
  instance_eval(&block)
21
21
 
22
+ target.tags << :active_job
23
+
22
24
  # This is handled by our custom ActiveJob consumer
23
25
  # Without this, default behaviour would cause messages to skip upon shutdown as the
24
26
  # offset would be committed for the last message
@@ -77,7 +77,7 @@ module Karafka
77
77
  # with manual offset management
78
78
  mark_as_consumed(messages.last)
79
79
  else
80
- pause(@seek_offset || messages.first.offset)
80
+ pause(coordinator.seek_offset)
81
81
  end
82
82
  end
83
83
 
@@ -155,7 +155,7 @@ module Karafka
155
155
  return false
156
156
  end
157
157
 
158
- @seek_offset = message.offset + 1
158
+ coordinator.seek_offset = message.offset + 1
159
159
 
160
160
  true
161
161
  end
@@ -172,7 +172,7 @@ module Karafka
172
172
  return false
173
173
  end
174
174
 
175
- @seek_offset = message.offset + 1
175
+ coordinator.seek_offset = message.offset + 1
176
176
 
177
177
  true
178
178
  end
@@ -153,7 +153,7 @@ module Karafka
153
153
 
154
154
  pause_msg = Messages::Seek.new(topic, partition, offset)
155
155
 
156
- internal_commit_offsets(async: false)
156
+ internal_commit_offsets(async: true)
157
157
 
158
158
  # Here we do not use our cached tpls because we should not try to pause something we do
159
159
  # not own anymore.
@@ -267,8 +267,15 @@ module Karafka
267
267
 
268
268
  true
269
269
  rescue Rdkafka::RdkafkaError => e
270
- return false if e.code == :assignment_lost
271
- return true if e.code == :no_offset
270
+ case e.code
271
+ when :assignment_lost
272
+ return false
273
+ when :no_offset
274
+ return true
275
+ when :coordinator_load_in_progress
276
+ sleep(1)
277
+ retry
278
+ end
272
279
 
273
280
  raise e
274
281
  end
@@ -329,25 +336,27 @@ module Karafka
329
336
  #
330
337
  # If we would retry here, the client reset would become transparent and we would not have
331
338
  # a chance to take any actions
339
+ early_return = false
340
+
332
341
  case e.code
333
342
  when :max_poll_exceeded # -147
334
343
  reset
335
- return nil
344
+ early_return = true
336
345
  when :transport # -195
337
346
  reset
338
- return nil
339
- when :rebalance_in_progress # -27
340
- reset
341
- return nil
347
+ early_return = true
342
348
  when :not_coordinator # 16
343
349
  reset
344
- return nil
350
+ early_return = true
345
351
  when :network_exception # 13
346
- reset
347
- return nil
352
+ early_return = true
353
+ when :rebalance_in_progress # -27
354
+ early_return = true
355
+ when :coordinator_load_in_progress # 14
356
+ early_return = true
348
357
  when :unknown_topic_or_part
349
358
  # This is expected and temporary until rdkafka catches up with metadata
350
- return nil
359
+ early_return = true
351
360
  end
352
361
 
353
362
  raise if time_poll.attempts > MAX_POLL_RETRIES
@@ -356,8 +365,9 @@ module Karafka
356
365
  time_poll.checkpoint
357
366
  time_poll.backoff
358
367
 
359
- # On unknown errors we do our best to retry and handle them before raising
360
- retry
368
+ # On unknown errors we do our best to retry and handle them before raising unless we
369
+ # decide to early return
370
+ early_return ? nil : retry
361
371
  end
362
372
 
363
373
  # Builds a new rdkafka consumer instance based on the subscription group configuration
@@ -32,19 +32,23 @@ module Karafka
32
32
  def on_before_enqueue
33
33
  return unless topic.long_running_job?
34
34
 
35
- # This ensures, that when running LRJ with VP, things operate as expected
36
- coordinator.on_enqueued do |first_group_message|
35
+ # This ensures that when running LRJ with VP, things operate as expected run only once
36
+ # for all the virtual partitions collectively
37
+ coordinator.on_enqueued do
37
38
  # Pause at the first message in a batch. That way in case of a crash, we will not loose
38
- # any messages
39
- pause(first_group_message.offset, MAX_PAUSE_TIME)
39
+ # any messages.
40
+ #
41
+ # For VP it applies the same way and since VP cannot be used with MOM we should not have
42
+ # any edge cases here.
43
+ pause(coordinator.seek_offset, MAX_PAUSE_TIME)
40
44
  end
41
45
  end
42
46
 
43
47
  # Runs extra logic after consumption that is related to handling long-running jobs
44
48
  # @note This overwrites the '#on_after_consume' from the base consumer
45
49
  def on_after_consume
46
- coordinator.on_finished do |first_group_message, last_group_message|
47
- on_after_consume_regular(first_group_message, last_group_message)
50
+ coordinator.on_finished do |last_group_message|
51
+ on_after_consume_regular(last_group_message)
48
52
  end
49
53
  end
50
54
 
@@ -75,35 +79,27 @@ module Karafka
75
79
 
76
80
  # Handles the post-consumption flow depending on topic settings
77
81
  #
78
- # @param first_message [Karafka::Messages::Message]
79
- # @param last_message [Karafka::Messages::Message]
80
- def on_after_consume_regular(first_message, last_message)
82
+ # @param last_group_message [Karafka::Messages::Message]
83
+ def on_after_consume_regular(last_group_message)
81
84
  if coordinator.success?
82
85
  coordinator.pause_tracker.reset
83
86
 
84
87
  # We use the non-blocking one here. If someone needs the blocking one, can implement it
85
88
  # with manual offset management
86
89
  # Mark as consumed only if manual offset management is not on
87
- mark_as_consumed(last_message) unless topic.manual_offset_management? || revoked?
90
+ mark_as_consumed(last_group_message) unless topic.manual_offset_management? || revoked?
88
91
 
89
92
  # If this is not a long-running job there is nothing for us to do here
90
93
  return unless topic.long_running_job?
91
94
 
92
- # Once processing is done, we move to the new offset based on commits
93
- # Here, in case manual offset management is off, we have the new proper offset of a
94
- # first message from another batch from `@seek_offset`. If manual offset management
95
- # is on, we move to place where the user indicated it was finished. This can create an
96
- # interesting (yet valid) corner case, where with manual offset management on and no
97
- # marking as consumed, we end up with an infinite loop processing same messages over and
98
- # over again
99
- seek(@seek_offset || first_message.offset)
95
+ seek(coordinator.seek_offset)
100
96
 
101
97
  resume
102
98
  else
103
99
  # If processing failed, we need to pause
104
100
  # For long running job this will overwrite the default never-ending pause and will cause
105
101
  # the processing to keep going after the error backoff
106
- pause(@seek_offset || first_message.offset)
102
+ pause(coordinator.seek_offset)
107
103
  end
108
104
  end
109
105
  end
@@ -47,6 +47,22 @@ module Karafka
47
47
 
48
48
  [[%i[virtual_partitions partitioner], :respond_to_call]]
49
49
  end
50
+
51
+ # Make sure that manual offset management is not used together with Virtual Partitions
52
+ # This would not make any sense as there would be edge cases related to skipping
53
+ # messages even if there were errors.
54
+ virtual do |data, errors|
55
+ next unless errors.empty?
56
+
57
+ virtual_partitions = data[:virtual_partitions]
58
+ manual_offset_management = data[:manual_offset_management]
59
+
60
+ next unless virtual_partitions[:active]
61
+ next unless manual_offset_management
62
+ next if data[:tags].include?(:active_job)
63
+
64
+ [[%i[manual_offset_management], :not_with_virtual_partitions]]
65
+ end
50
66
  end
51
67
  end
52
68
  end
@@ -34,7 +34,6 @@ module Karafka
34
34
  @on_enqueued_invoked = false
35
35
  @on_started_invoked = false
36
36
  @on_finished_invoked = false
37
- @first_message = messages.first
38
37
  @last_message = messages.last
39
38
  end
40
39
  end
@@ -52,7 +51,7 @@ module Karafka
52
51
 
53
52
  @on_enqueued_invoked = true
54
53
 
55
- yield(@first_message, @last_message)
54
+ yield(@last_message)
56
55
  end
57
56
  end
58
57
 
@@ -63,7 +62,7 @@ module Karafka
63
62
 
64
63
  @on_started_invoked = true
65
64
 
66
- yield(@first_message, @last_message)
65
+ yield(@last_message)
67
66
  end
68
67
  end
69
68
 
@@ -77,7 +76,7 @@ module Karafka
77
76
 
78
77
  @on_finished_invoked = true
79
78
 
80
- yield(@first_message, @last_message)
79
+ yield(@last_message)
81
80
  end
82
81
  end
83
82
  end
@@ -13,6 +13,8 @@ module Karafka
13
13
  # @return [Karafka::TimeTrackers::Pause]
14
14
  attr_reader :pause_tracker
15
15
 
16
+ attr_reader :seek_offset
17
+
16
18
  # @param pause_tracker [Karafka::TimeTrackers::Pause] pause tracker for given topic partition
17
19
  def initialize(pause_tracker)
18
20
  @pause_tracker = pause_tracker
@@ -23,17 +25,30 @@ module Karafka
23
25
  end
24
26
 
25
27
  # Starts the coordinator for given consumption jobs
26
- # @param _messages [Array<Karafka::Messages::Message>] batch of message for which we are
28
+ # @param messages [Array<Karafka::Messages::Message>] batch of message for which we are
27
29
  # going to coordinate work. Not used with regular coordinator.
28
- def start(_messages)
30
+ def start(messages)
29
31
  @mutex.synchronize do
30
32
  @running_jobs = 0
31
33
  # We need to clear the consumption results hash here, otherwise we could end up storing
32
34
  # consumption results of consumer instances we no longer control
33
35
  @consumptions.clear
36
+
37
+ # We set it on the first encounter and never again, because then the offset setting
38
+ # should be up to the consumers logic (our or the end user)
39
+ # Seek offset needs to be always initialized as for case where manual offset management
40
+ # is turned on, we need to have reference to the first offset even in case of running
41
+ # multiple batches without marking any messages as consumed. Rollback needs to happen to
42
+ # the last place we know of or the last message + 1 that was marked
43
+ @seek_offset ||= messages.first.offset
34
44
  end
35
45
  end
36
46
 
47
+ # @param offset [Integer] message offset
48
+ def seek_offset=(offset)
49
+ @mutex.synchronize { @seek_offset = offset }
50
+ end
51
+
37
52
  # Increases number of jobs that we handle with this coordinator
38
53
  def increment
39
54
  @mutex.synchronize { @running_jobs += 1 }
@@ -68,9 +68,10 @@ module Karafka
68
68
 
69
69
  # Handles the simple routing case where we create one consumer group and allow for further
70
70
  # subscription group customization
71
- # @param subscription_group_name [String, Symbol] subscription group id
71
+ # @param subscription_group_name [String, Symbol] subscription group id. When not provided,
72
+ # a random uuid will be used
72
73
  # @param block [Proc] further topics definitions
73
- def subscription_group(subscription_group_name, &block)
74
+ def subscription_group(subscription_group_name = SecureRandom.uuid, &block)
74
75
  consumer_group('app') do
75
76
  target.public_send(:subscription_group=, subscription_group_name, &block)
76
77
  end
@@ -6,7 +6,7 @@ module Karafka
6
6
  # It belongs to a consumer group as from 0.6 all the topics can work in the same consumer group
7
7
  # It is a part of Karafka's DSL.
8
8
  class Topic
9
- attr_reader :id, :name, :consumer_group
9
+ attr_reader :id, :name, :consumer_group, :tags
10
10
  attr_writer :consumer
11
11
  attr_accessor :subscription_group
12
12
 
@@ -32,6 +32,7 @@ module Karafka
32
32
  # Karafka 0.6 we can handle multiple Kafka instances with the same process and we can
33
33
  # have same topic name across multiple consumer groups
34
34
  @id = "#{consumer_group.id}_#{@name}"
35
+ @tags = []
35
36
  end
36
37
 
37
38
  INHERITABLE_ATTRIBUTES.each do |attribute|
@@ -93,7 +94,8 @@ module Karafka
93
94
  name: name,
94
95
  consumer: consumer,
95
96
  consumer_group_id: consumer_group.id,
96
- subscription_group: subscription_group
97
+ subscription_group: subscription_group,
98
+ tags: tags
97
99
  ).freeze
98
100
  end
99
101
  end
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.0.8'
6
+ VERSION = '2.0.9'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -13,6 +13,7 @@
13
13
  openssl
14
14
  base64
15
15
  date
16
+ singleton
16
17
  zeitwerk
17
18
  ].each(&method(:require))
18
19
 
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.8
4
+ version: 2.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  Qf04B9ceLUaC4fPVEz10FyobjaFoY4i32xRto3XnrzeAgfEe4swLq8bQsR3w/EF3
36
36
  MGU0FeSV2Yj7Xc2x/7BzLK8xQn5l7Yy75iPF+KP3vVmDHnNl
37
37
  -----END CERTIFICATE-----
38
- date: 2022-09-19 00:00:00.000000000 Z
38
+ date: 2022-09-22 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: karafka-core
metadata.gz.sig CHANGED
Binary file