karafka 2.4.2 → 2.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5eb2aeb0bd3cd0d50d70b0fd792e8290315f3131222d3befab9b5a05bd478e07
4
- data.tar.gz: 27b4d40fe18c3daf87ea3f2b601487d6cad5a766121374772179fc5fb3049f75
3
+ metadata.gz: 1b7333a7812643bc128c721e411449dfd477adfa52968c7c291900e622cdcc04
4
+ data.tar.gz: 7062798152f0d4e097e5c4fcd228798103a680441579bbc1918c8bf5a65ce98b
5
5
  SHA512:
6
- metadata.gz: af5c4030ccd1f881558df342c3936152efc6183ccc6238246a327d6da8b716e65e3e56ec198a708b87f292bd01d9e426fb64b3c4b9a4f5b40111ee68795ec050
7
- data.tar.gz: 7d04bb79ea03cc5eedf7a8e9ecd62abc0ff2b48aea2e4df929d691b931b728017adf843aec5c248a6d4e8b42dd1957c798e799b9df82ae009795ae6b414532ac
6
+ metadata.gz: bd24bd450ac1cda02d18d7d487317139988b943ecc92c606cb713f13c819db044d6feae978873d26445e1ebf8d8823bedfd7efae58bc53c8213be9ef56a84c1d
7
+ data.tar.gz: 0f2f4b50ea90cd76f6275e0cdc9eeada5e5cd30bf6c18a219209dfe4b717d90ef88d7c6b68797e9eb0615c83ba03ee738d41164014ce74aaf27970ef07ba5974
checksums.yaml.gz.sig CHANGED
@@ -1,4 +1 @@
1
- _�l*�}3r�;�z!����O4i�䪐U�m��.�Q}�N�*�<_�l�֑Ó|-n�M����\N��B(�Ot��>ß�(��2E%P�ɟY�l[����r��v�ͤ�ō�ܤN�
2
- �}�F��Ј-Ó�����{L�&�.��"z:q�
3
- P�0=�6J��,��٭qF�U��=�)��S�|��\ YW`��FdKT��=��*�� V��L
4
- �����x�?�> ��+g�,S�=�N����0�-��V��0�}��[�������U�� �[_�Hȳ-ൠo��{<�f��)�TI��{���2�g��?���{��6T�p�������x�`rg����ZD�}�蕽n�焫X�]�I�Qi}��V�
1
+ zTS���?�D=���,��# ,
@@ -73,6 +73,7 @@ jobs:
73
73
  fail-fast: false
74
74
  matrix:
75
75
  ruby:
76
+ - '3.4.0-preview1'
76
77
  - '3.3'
77
78
  - '3.2'
78
79
  - '3.1'
@@ -113,6 +114,7 @@ jobs:
113
114
  fail-fast: false
114
115
  matrix:
115
116
  ruby:
117
+ - '3.4.0-preview1'
116
118
  - '3.3'
117
119
  - '3.2'
118
120
  - '3.1'
@@ -146,6 +148,11 @@ jobs:
146
148
 
147
149
  bundle config set without 'tools benchmarks docs'
148
150
 
151
+ - name: Fix directory permissions for Bundler
152
+ run: |
153
+ chmod -R o-w /opt/hostedtoolcache/Ruby/3*/x64/lib/ruby/gems/3*/gems
154
+ chmod +t /opt/hostedtoolcache/Ruby/3*/x64/lib/ruby/gems/3*/gems
155
+
149
156
  - name: Bundle install
150
157
  run: |
151
158
  bundle config set without development
@@ -166,6 +173,7 @@ jobs:
166
173
  fail-fast: false
167
174
  matrix:
168
175
  ruby:
176
+ - '3.4.0-preview1'
169
177
  - '3.3'
170
178
  - '3.2'
171
179
  - '3.1'
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.3.1
1
+ 3.3.3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,30 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.4.4 (2024-07-04)
4
+ - [Enhancement] Allow for offset storing from the Filtering API.
5
+ - [Enhancement] Print more extensive error info on forceful shutdown.
6
+ - [Enhancement] Include `original_key` in the DLQ dispatch headers.
7
+ - [Enhancement] Support embedding mode control management from the trap context.
8
+ - [Enhancement] Make sure, that the listener thread is stopped before restarting.
9
+ - [Fix] Do not block on hanging listener shutdown when invoking forceful shutdown.
10
+ - [Fix] Static membership fencing error is not propagated explicitly enough.
11
+ - [Fix] Make sure DLQ dispatches raw headers and not deserialized headers (same as payload).
12
+ - [Fix] Fix a typo where `ms` in logger listener would not have space before it.
13
+ - [Maintenance] Require `karafka-core` `>=` `2.4.3`.
14
+ - [Maintenance] Allow for usage of `karafka-rdkafka` `~` `0.16` to support librdkafka `2.4.0`.
15
+ - [Maintenance] Lower the precision reporting to 100 microseconds in the logger listener.
16
+
17
+ ## 2.4.3 (2024-06-12)
18
+ - [Enhancement] Allow for customization of Virtual Partitions reducer for enhanced parallelization.
19
+ - [Enhancement] Add more error codes to early report on polling issues (kidlab)
20
+ - [Enhancement] Add `transport`, `network_exception` and `coordinator_load_in_progress` alongside `timed_out` to retryable errors for the proxy.
21
+ - [Enhancement] Improve `strict_topics_namespacing` validation message.
22
+ - [Change] Remove default empty thread name from `Async` since Web has been upgraded.
23
+ - [Fix] Installer doesn't respect directories in `KARAFKA_BOOT_FILE`.
24
+ - [Fix] Fix case where non absolute boot file path would not work as expected.
25
+ - [Fix] Allow for installing Karafka in a non-existing (yet) directory
26
+ - [Maintenance] Require `waterdrop` `>=` `2.7.3` to support idempotent producer detection.
27
+
3
28
  ## 2.4.2 (2024-05-14)
4
29
  - [Enhancement] Validate ActiveJob adapter custom producer format.
5
30
  - [Fix] Internal seek does not resolve the offset correctly for time based lookup.
data/Gemfile CHANGED
@@ -19,6 +19,7 @@ end
19
19
  group :test do
20
20
  gem 'byebug'
21
21
  gem 'factory_bot'
22
+ gem 'ostruct'
22
23
  gem 'rspec'
23
24
  gem 'simplecov'
24
25
  end
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.2)
4
+ karafka (2.4.4)
5
5
  base64 (~> 0.2)
6
- karafka-core (>= 2.4.0, < 2.5.0)
7
- waterdrop (>= 2.7.0, < 3.0.0)
6
+ karafka-core (>= 2.4.3, < 2.5.0)
7
+ waterdrop (>= 2.7.3, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.1.3.2)
14
- activesupport (= 7.1.3.2)
13
+ activejob (7.1.3.4)
14
+ activesupport (= 7.1.3.4)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.1.3.2)
16
+ activesupport (7.1.3.4)
17
17
  base64
18
18
  bigdecimal
19
19
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -26,26 +26,26 @@ GEM
26
26
  base64 (0.2.0)
27
27
  bigdecimal (3.1.8)
28
28
  byebug (11.1.3)
29
- concurrent-ruby (1.2.3)
29
+ concurrent-ruby (1.3.3)
30
30
  connection_pool (2.4.1)
31
31
  diff-lcs (1.5.1)
32
32
  docile (1.4.0)
33
33
  drb (2.2.1)
34
- erubi (1.12.0)
34
+ erubi (1.13.0)
35
35
  factory_bot (6.4.6)
36
36
  activesupport (>= 5.0.0)
37
- ffi (1.16.3)
37
+ ffi (1.17.0)
38
38
  globalid (1.2.1)
39
39
  activesupport (>= 6.1)
40
40
  i18n (1.14.5)
41
41
  concurrent-ruby (~> 1.0)
42
- karafka-core (2.4.0)
43
- karafka-rdkafka (>= 0.15.0, < 0.16.0)
44
- karafka-rdkafka (0.15.1)
42
+ karafka-core (2.4.3)
43
+ karafka-rdkafka (>= 0.15.0, < 0.17.0)
44
+ karafka-rdkafka (0.16.0)
45
45
  ffi (~> 1.15)
46
46
  mini_portile2 (~> 2.6)
47
47
  rake (> 12)
48
- karafka-testing (2.4.3)
48
+ karafka-testing (2.4.4)
49
49
  karafka (>= 2.4.0, < 2.5.0)
50
50
  waterdrop (>= 2.7.0)
51
51
  karafka-web (0.9.1)
@@ -54,12 +54,13 @@ GEM
54
54
  karafka-core (>= 2.4.0, < 2.5.0)
55
55
  roda (~> 3.68, >= 3.69)
56
56
  tilt (~> 2.0)
57
- mini_portile2 (2.8.6)
58
- minitest (5.22.3)
57
+ mini_portile2 (2.8.7)
58
+ minitest (5.24.0)
59
59
  mutex_m (0.2.0)
60
- rack (3.0.11)
60
+ ostruct (0.6.0)
61
+ rack (3.1.4)
61
62
  rake (13.2.1)
62
- roda (3.79.0)
63
+ roda (3.81.0)
63
64
  rack
64
65
  rspec (3.13.0)
65
66
  rspec-core (~> 3.13.0)
@@ -67,7 +68,7 @@ GEM
67
68
  rspec-mocks (~> 3.13.0)
68
69
  rspec-core (3.13.0)
69
70
  rspec-support (~> 3.13.0)
70
- rspec-expectations (3.13.0)
71
+ rspec-expectations (3.13.1)
71
72
  diff-lcs (>= 1.2.0, < 2.0)
72
73
  rspec-support (~> 3.13.0)
73
74
  rspec-mocks (3.13.1)
@@ -80,14 +81,14 @@ GEM
80
81
  simplecov_json_formatter (~> 0.1)
81
82
  simplecov-html (0.12.3)
82
83
  simplecov_json_formatter (0.1.4)
83
- tilt (2.3.0)
84
+ tilt (2.4.0)
84
85
  tzinfo (2.0.6)
85
86
  concurrent-ruby (~> 1.0)
86
- waterdrop (2.7.2)
87
+ waterdrop (2.7.3)
87
88
  karafka-core (>= 2.4.0, < 3.0.0)
88
89
  karafka-rdkafka (>= 0.15.1)
89
90
  zeitwerk (~> 2.3)
90
- zeitwerk (2.6.13)
91
+ zeitwerk (2.6.16)
91
92
 
92
93
  PLATFORMS
93
94
  ruby
@@ -100,8 +101,9 @@ DEPENDENCIES
100
101
  karafka!
101
102
  karafka-testing (>= 2.4.0)
102
103
  karafka-web (>= 0.9.0)
104
+ ostruct
103
105
  rspec
104
106
  simplecov
105
107
 
106
108
  BUNDLED WITH
107
- 2.5.9
109
+ 2.5.14
@@ -121,8 +121,9 @@ en:
121
121
  declaratives.details_format: needs to be a hash with only symbol keys
122
122
 
123
123
  inconsistent_namespacing: |
124
- needs to be consistent namespacing style
125
- disable this validation by setting config.strict_topics_namespacing to false
124
+ needs to follow a consistent namespacing style using either dots (.) or underscores (_), but not both.
125
+ This ensures proper Kafka metrics reporting and avoids name collisions.
126
+ To disable this validation, set config.strict_topics_namespacing to false.
126
127
 
127
128
  deserializers.active_format: 'needs to be true'
128
129
  deserializers.payload_format: 'needs to respond to #call'
@@ -4,6 +4,7 @@ en:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
  virtual_partitions.offset_metadata_strategy_format: needs to be either :exact or :current
7
+ virtual_partitions.reducer_format: "needs to respond to `#call`"
7
8
 
8
9
  long_running_job.active_format: needs to be either true or false
9
10
 
data/karafka.gemspec CHANGED
@@ -22,8 +22,8 @@ Gem::Specification.new do |spec|
22
22
  DESC
23
23
 
24
24
  spec.add_dependency 'base64', '~> 0.2'
25
- spec.add_dependency 'karafka-core', '>= 2.4.0', '< 2.5.0'
26
- spec.add_dependency 'waterdrop', '>= 2.7.0', '< 3.0.0'
25
+ spec.add_dependency 'karafka-core', '>= 2.4.3', '< 2.5.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  spec.required_ruby_version = '>= 3.0.0'
@@ -21,7 +21,7 @@ module Karafka
21
21
 
22
22
  # Where should we map proper files from templates
23
23
  INSTALL_FILES_MAP = {
24
- 'karafka.rb.erb' => Karafka.boot_file.basename,
24
+ 'karafka.rb.erb' => Karafka.boot_file,
25
25
  'application_consumer.rb.erb' => 'app/consumers/application_consumer.rb',
26
26
  'example_consumer.rb.erb' => 'app/consumers/example_consumer.rb'
27
27
  }.freeze
@@ -51,6 +51,7 @@ module Karafka
51
51
 
52
52
  INSTALL_FILES_MAP.each do |source, target|
53
53
  pathed_target = Karafka.root.join(target)
54
+ FileUtils.mkdir_p File.dirname(pathed_target)
54
55
 
55
56
  template = File.read(Karafka.core_root.join("templates/#{source}"))
56
57
  render = ::ERB.new(template, trim_mode: '-').result(binding)
@@ -553,6 +553,15 @@ module Karafka
553
553
  early_report = true
554
554
  when :transport # -195
555
555
  early_report = true
556
+ when :topic_authorization_failed # 29
557
+ early_report = true
558
+ when :group_authorization_failed # 30
559
+ early_report = true
560
+ when :cluster_authorization_failed # 31
561
+ early_report = true
562
+ # This can happen for many reasons, including issues with static membership being fenced
563
+ when :fatal # -150
564
+ early_report = true
556
565
  # @see
557
566
  # https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
558
567
  # This will be raised each time poll detects a non-existing topic. When auto creation is
@@ -16,8 +16,11 @@ module Karafka
16
16
  RETRYABLE_DEFAULT_ERRORS = %i[
17
17
  all_brokers_down
18
18
  timed_out
19
+ transport
20
+ network_exception
19
21
  not_coordinator
20
22
  not_leader_for_partition
23
+ coordinator_load_in_progress
21
24
  ].freeze
22
25
 
23
26
  private_constant :RETRYABLE_DEFAULT_ERRORS
@@ -2,11 +2,28 @@
2
2
 
3
3
  module Karafka
4
4
  # Allows to start and stop Karafka as part of a different process
5
+ # Following limitations and restrictions apply:
6
+ #
7
+ # - `#start` cannot be called from a trap context - non blocking
8
+ # - `#quiet` - can be called from a trap context - non blocking
9
+ # - `#stop` - can be called from a trap context - blocking
5
10
  module Embedded
6
11
  class << self
12
+ # Lock for ensuring we do not control embedding in parallel
13
+ MUTEX = Mutex.new
14
+
15
+ private_constant :MUTEX
16
+
7
17
  # Starts Karafka without supervision and without ownership of signals in a background thread
8
18
  # so it won't interrupt other things running
9
19
  def start
20
+ MUTEX.synchronize do
21
+ # Prevent from double-starting
22
+ return if @started
23
+
24
+ @started = true
25
+ end
26
+
10
27
  Thread.new do
11
28
  Thread.current.name = 'karafka.embedded'
12
29
 
@@ -19,9 +36,32 @@ module Karafka
19
36
  #
20
37
  # @note This method is blocking because we want to wait until Karafka is stopped with final
21
38
  # process shutdown
39
+ #
40
+ # @note This method **is** safe to run from a trap context.
22
41
  def stop
23
- # Stop needs to be blocking to wait for all the things to finalize
24
- Karafka::Server.stop
42
+ # Prevent from double stopping
43
+ unless @stopping
44
+ Thread.new do
45
+ Thread.current.name = 'karafka.embedded.stopping'
46
+
47
+ stop = false
48
+
49
+ # We spawn a new thread because `#stop` may be called from a trap context
50
+ MUTEX.synchronize do
51
+ break if @stopping
52
+
53
+ @stopping = true
54
+ stop = true
55
+ end
56
+
57
+ next unless stop
58
+
59
+ Karafka::Server.stop
60
+ end
61
+ end
62
+
63
+ # Since we want to have this blocking, we wait for the background thread
64
+ sleep(0.1) until Karafka::App.terminated?
25
65
  end
26
66
 
27
67
  # Quiets Karafka upon any event
@@ -29,6 +69,8 @@ module Karafka
29
69
  # @note This method is not blocking and will not wait for Karafka to fully quiet.
30
70
  # It will trigger the quiet procedure but won't wait.
31
71
  #
72
+ # @note This method **can** be called from a trap context.
73
+ #
32
74
  # @note Please keep in mind you need to `#stop` to actually stop the server anyhow.
33
75
  def quiet
34
76
  Karafka::Server.quiet
@@ -22,13 +22,22 @@ module Karafka
22
22
  def included(base)
23
23
  base.extend ::Forwardable
24
24
 
25
- base.def_delegators :@thread, :join, :terminate, :alive?, :name
25
+ base.def_delegators :@thread, :join, :terminate, :name
26
+ end
27
+ end
28
+
29
+ # @return [Boolean] true if thread is present and is running, false otherwise
30
+ def alive?
31
+ MUTEX.synchronize do
32
+ return false unless @thread
33
+
34
+ @thread.alive?
26
35
  end
27
36
  end
28
37
 
29
38
  # Runs the `#call` method in a new thread
30
39
  # @param thread_name [String] name that we want to assign to the thread when we start it
31
- def async_call(thread_name = '')
40
+ def async_call(thread_name)
32
41
  MUTEX.synchronize do
33
42
  return if @thread&.alive?
34
43
 
@@ -41,7 +41,7 @@ module Karafka
41
41
  return unless log_polling?
42
42
 
43
43
  listener = event[:caller]
44
- time = event[:time]
44
+ time = event[:time].round(2)
45
45
  messages_count = event[:messages_buffer].size
46
46
 
47
47
  message = "[#{listener.id}] Polled #{messages_count} messages in #{time}ms"
@@ -69,14 +69,14 @@ module Karafka
69
69
  # @param event [Karafka::Core::Monitoring::Event] event details including payload
70
70
  def on_worker_processed(event)
71
71
  job = event[:job]
72
- time = event[:time]
72
+ time = event[:time].round(2)
73
73
  job_type = job.class.to_s.split('::').last
74
74
  consumer = job.executor.topic.consumer
75
75
  topic = job.executor.topic.name
76
76
  partition = job.executor.partition
77
77
  info <<~MSG.tr("\n", ' ').strip!
78
78
  [#{job.id}] #{job_type} job for #{consumer}
79
- on #{topic}/#{partition} finished in #{time}ms
79
+ on #{topic}/#{partition} finished in #{time} ms
80
80
  MSG
81
81
  end
82
82
 
@@ -306,7 +306,24 @@ module Karafka
306
306
  fatal "Runner crashed due to an error: #{error}"
307
307
  fatal details
308
308
  when 'app.stopping.error'
309
- error 'Forceful Karafka server stop'
309
+ # Counts number of workers and listeners that were still active when forcing the
310
+ # shutdown. Please note, that unless all listeners are closed, workers will not finalize
311
+ # their operations as well.
312
+ # We need to check if listeners and workers are assigned as during super early stages of
313
+ # boot they are not.
314
+ listeners = Server.listeners ? Server.listeners.count(&:active?) : 0
315
+ workers = Server.workers ? Server.workers.count(&:alive?) : 0
316
+
317
+ message = <<~MSG.tr("\n", ' ').strip!
318
+ Forceful Karafka server stop with:
319
+ #{workers} active workers and
320
+ #{listeners} active listeners
321
+ MSG
322
+
323
+ error message
324
+ when 'app.forceful_stopping.error'
325
+ error "Forceful shutdown error occurred: #{error}"
326
+ error details
310
327
  when 'librdkafka.error'
311
328
  error "librdkafka internal error occurred: #{error}"
312
329
  error details
@@ -216,6 +216,10 @@ module Karafka
216
216
  next unless multi_part_sgs_families.include?(sg_listener.subscription_group.name)
217
217
  # Skip already active connections
218
218
  next unless sg_listener.pending? || sg_listener.stopped?
219
+ # Ensure that the listener thread under which we operate is already stopped and
220
+ # is not dangling. While not likely to happen, this may protect against a
221
+ # case where a shutdown critical crash would case a restart of the same listener
222
+ next if sg_listener.alive?
219
223
 
220
224
  touch(sg_listener.subscription_group.id)
221
225
  sg_listener.start!
@@ -89,6 +89,23 @@ module Karafka
89
89
  applied.map(&:cursor).compact.min_by(&:offset)
90
90
  end
91
91
 
92
+ # @return [Boolean] did any of the filters requested offset storage during filter
93
+ # application
94
+ def mark_as_consumed?
95
+ # We can manage filtering offset only when user wanted that and there is a cursor
96
+ # to use
97
+ applied.any?(&:mark_as_consumed?) && cursor
98
+ end
99
+
100
+ # @return [Symbol] `:mark_as_consumed` or `:mark_as_consumed!`
101
+ def marking_method
102
+ candidates = applied.map(&:marking_method)
103
+
104
+ return :mark_as_consumed! if candidates.include?(:mark_as_consumed!)
105
+
106
+ :mark_as_consumed
107
+ end
108
+
92
109
  private
93
110
 
94
111
  # @return [Boolean] is filtering active
@@ -54,6 +54,18 @@ module Karafka
54
54
  def timeout
55
55
  0
56
56
  end
57
+
58
+ # @return [Boolean] should we use the cursor value to mark as consumed. If any of the
59
+ # filters returns true, we return lowers applicable cursor value (if any)
60
+ def mark_as_consumed?
61
+ false
62
+ end
63
+
64
+ # @return [Symbol] `:mark_as_consumed` or `:mark_as_consumed!`. Applicable only if
65
+ # marking is requested
66
+ def marking_method
67
+ :mark_as_consumed
68
+ end
57
69
  end
58
70
  end
59
71
  end
@@ -33,7 +33,7 @@ module Karafka
33
33
 
34
34
  # Time on message is in seconds with ms precision, so we need to convert the ttl that
35
35
  # is in ms to this format
36
- border = ::Time.now.utc - @delay / 1_000.to_f
36
+ border = ::Time.now.utc - @delay / 1_000.0
37
37
 
38
38
  messages.delete_if do |message|
39
39
  too_young = message.timestamp > border
@@ -52,7 +52,7 @@ module Karafka
52
52
  def timeout
53
53
  return 0 unless @cursor
54
54
 
55
- timeout = (@delay / 1_000.to_f) - (::Time.now.utc - @cursor.timestamp)
55
+ timeout = (@delay / 1_000.0) - (::Time.now.utc - @cursor.timestamp)
56
56
 
57
57
  timeout <= 0 ? 0 : timeout * 1_000
58
58
  end
@@ -25,6 +25,8 @@ module Karafka
25
25
  def call(topic, messages, coordinator)
26
26
  ktopic = @subscription_group.topics.find(topic)
27
27
 
28
+ vps = ktopic.virtual_partitions
29
+
28
30
  # We only partition work if we have:
29
31
  # - a virtual partitioner
30
32
  # - more than one thread to process the data
@@ -38,21 +40,19 @@ module Karafka
38
40
  #
39
41
  # This is great because it allows us to run things without the parallelization that adds
40
42
  # a bit of uncertainty and allows us to use DLQ and safely skip messages if needed.
41
- if ktopic.virtual_partitions? &&
42
- ktopic.virtual_partitions.max_partitions > 1 &&
43
- !coordinator.collapsed?
44
- # We need to reduce it to the max concurrency, so the group_id is not a direct effect
45
- # of the end user action. Otherwise the persistence layer for consumers would cache
46
- # it forever and it would cause memory leaks
47
- #
48
- # This also needs to be consistent because the aggregation here needs to warrant, that
49
- # the same partitioned message will always be assigned to the same virtual partition.
50
- # Otherwise in case of a window aggregation with VP spanning across several polls, the
51
- # data could not be complete.
43
+ if vps.active? && vps.max_partitions > 1 && !coordinator.collapsed?
52
44
  groupings = messages.group_by do |msg|
53
- key = ktopic.virtual_partitions.partitioner.call(msg).to_s.sum
54
-
55
- key % ktopic.virtual_partitions.max_partitions
45
+ # We need to reduce it to the max concurrency, so the group_id is not a direct effect
46
+ # of the end user action. Otherwise the persistence layer for consumers would cache
47
+ # it forever and it would cause memory leaks
48
+ #
49
+ # This also needs to be consistent because the aggregation here needs to warrant,
50
+ # that the same partitioned message will always be assigned to the same virtual
51
+ # partition. Otherwise in case of a window aggregation with VP spanning across
52
+ # several polls, the data could not be complete.
53
+ vps.reducer.call(
54
+ vps.partitioner.call(msg)
55
+ )
56
56
  end
57
57
 
58
58
  groupings.each do |key, messages_group|
@@ -154,11 +154,12 @@ module Karafka
154
154
  topic: topic.dead_letter_queue.topic,
155
155
  key: original_partition,
156
156
  payload: skippable_message.raw_payload,
157
- headers: skippable_message.headers.merge(
157
+ headers: skippable_message.raw_headers.merge(
158
158
  'original_topic' => topic.name,
159
159
  'original_partition' => original_partition,
160
160
  'original_offset' => skippable_message.offset.to_s,
161
161
  'original_consumer_group' => topic.consumer_group.id,
162
+ 'original_key' => skippable_message.raw_key.to_s,
162
163
  'original_attempts' => attempt.to_s
163
164
  )
164
165
  }
@@ -66,6 +66,19 @@ module Karafka
66
66
  # and this should not happen
67
67
  throttle_timeout = filter.timeout
68
68
 
69
+ # If user requested marking when applying filter, we mark. We may be in the user
70
+ # flow but even then this is not a problem. Older offsets will be ignored since
71
+ # we do not force the offset update (expected) and newer are on the user to control.
72
+ # This can be primarily used when filtering large quantities of data to mark on the
73
+ # idle runs, so lag reporting is aware that those messages were not consumed but also
74
+ # are no longer relevant
75
+ if filter.mark_as_consumed?
76
+ send(
77
+ filter.marking_method,
78
+ filter.cursor
79
+ )
80
+ end
81
+
69
82
  case filter.action
70
83
  when :skip
71
84
  nil
@@ -22,6 +22,7 @@ module Karafka
22
22
  :partitioner,
23
23
  :max_partitions,
24
24
  :offset_metadata_strategy,
25
+ :reducer,
25
26
  keyword_init: true
26
27
  ) { alias_method :active?, :active }
27
28
  end
@@ -31,6 +31,7 @@ module Karafka
31
31
  nested(:virtual_partitions) do
32
32
  required(:active) { |val| [true, false].include?(val) }
33
33
  required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
34
+ required(:reducer) { |val| val.respond_to?(:call) }
34
35
  required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
35
36
  required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
36
37
  end
@@ -26,18 +26,24 @@ module Karafka
26
26
  # @param offset_metadata_strategy [Symbol] how we should match the metadata for the
27
27
  # offset. `:exact` will match the offset matching metadata and `:current` will select
28
28
  # the most recently reported metadata
29
+ # @param reducer [nil, #call] reducer for VPs key. It allows for using a custom
30
+ # reducer to achieve enhanced parallelization when the default reducer is not enough.
29
31
  # @return [VirtualPartitions] method that allows to set the virtual partitions details
30
32
  # during the routing configuration and then allows to retrieve it
31
33
  def virtual_partitions(
32
34
  max_partitions: Karafka::App.config.concurrency,
33
35
  partitioner: nil,
34
- offset_metadata_strategy: :current
36
+ offset_metadata_strategy: :current,
37
+ reducer: nil
35
38
  )
36
39
  @virtual_partitions ||= Config.new(
37
40
  active: !partitioner.nil?,
38
41
  max_partitions: max_partitions,
39
42
  partitioner: partitioner,
40
- offset_metadata_strategy: offset_metadata_strategy
43
+ offset_metadata_strategy: offset_metadata_strategy,
44
+ # If no reducer provided, we use this one. It just runs a modulo on the sum of
45
+ # a stringified version, providing fairly good distribution.
46
+ reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions }
41
47
  )
42
48
  end
43
49
 
@@ -3,17 +3,18 @@
3
3
  module Karafka
4
4
  # Class used to run the Karafka listeners in separate threads
5
5
  class Runner
6
- def initialize
7
- @manager = App.config.internal.connection.manager
8
- @conductor = App.config.internal.connection.conductor
9
- end
6
+ include Helpers::ConfigImporter.new(
7
+ manager: %i[internal connection manager],
8
+ conductor: %i[internal connection conductor],
9
+ jobs_queue_class: %i[internal processing jobs_queue_class]
10
+ )
10
11
 
11
12
  # Starts listening on all the listeners asynchronously and handles the jobs queue closing
12
13
  # after listeners are done with their work.
13
14
  def call
14
15
  # Despite possibility of having several independent listeners, we aim to have one queue for
15
16
  # jobs across and one workers poll for that
16
- jobs_queue = App.config.internal.processing.jobs_queue_class.new
17
+ jobs_queue = jobs_queue_class.new
17
18
 
18
19
  workers = Processing::WorkersBatch.new(jobs_queue)
19
20
  listeners = Connection::ListenersBatch.new(jobs_queue)
@@ -23,7 +24,7 @@ module Karafka
23
24
  Karafka::App.run!
24
25
 
25
26
  # Register all the listeners so they can be started and managed
26
- @manager.register(listeners)
27
+ manager.register(listeners)
27
28
 
28
29
  workers.each_with_index { |worker, i| worker.async_call("karafka.worker##{i}") }
29
30
 
@@ -32,10 +33,10 @@ module Karafka
32
33
  Karafka::Server.listeners = listeners
33
34
  Karafka::Server.jobs_queue = jobs_queue
34
35
 
35
- until @manager.done?
36
- @conductor.wait
36
+ until manager.done?
37
+ conductor.wait
37
38
 
38
- @manager.control
39
+ manager.control
39
40
  end
40
41
 
41
42
  # We close the jobs queue only when no listener threads are working.
@@ -3,6 +3,12 @@
3
3
  module Karafka
4
4
  # Karafka consuming server class
5
5
  class Server
6
+ # How long should we wait on the listeners forceful shutdown when they are stuck beyond the
7
+ # shutdown timeout before forcing a bypass
8
+ FORCEFUL_SHUTDOWN_WAIT = 5
9
+
10
+ private_constant :FORCEFUL_SHUTDOWN_WAIT
11
+
6
12
  class << self
7
13
  # Set of consuming threads. Each consumer thread contains a single consumer
8
14
  attr_accessor :listeners
@@ -105,9 +111,23 @@ module Karafka
105
111
  # We're done waiting, lets kill them!
106
112
  workers.each(&:terminate)
107
113
  listeners.active.each(&:terminate)
114
+
108
115
  # We always need to shutdown clients to make sure we do not force the GC to close consumer.
109
116
  # This can cause memory leaks and crashes.
110
- listeners.each(&:shutdown)
117
+ # We run it in a separate thread in case this would hang and we ignore it after the time
118
+ # we assigned to it and force shutdown as we prefer to stop the process rather than wait
119
+ # indefinitely even with risk of VM crash as this is a last resort.
120
+ Thread.new do
121
+ listeners.each(&:shutdown)
122
+ rescue StandardError => e
123
+ # If anything wrong happened during shutdown, we also want to record it
124
+ Karafka.monitor.instrument(
125
+ 'error.occurred',
126
+ caller: self,
127
+ error: e,
128
+ type: 'app.forceful_stopping.error'
129
+ )
130
+ end.join(FORCEFUL_SHUTDOWN_WAIT)
111
131
 
112
132
  # We also do not forcefully terminate everything when running in the embedded mode,
113
133
  # otherwise we would overwrite the shutdown process of the process that started Karafka
@@ -52,7 +52,9 @@ module Karafka
52
52
  fetch.wait.max.ms
53
53
  group.id
54
54
  group.instance.id
55
+ group.protocol
55
56
  group.protocol.type
57
+ group.remote.assignor
56
58
  heartbeat.interval.ms
57
59
  interceptors
58
60
  internal.termination.signal
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.2'
6
+ VERSION = '2.4.4'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -92,7 +92,17 @@ module Karafka
92
92
  # KARAFKA_BOOT_FILE='/home/app_path/app.rb'
93
93
  # Karafka.boot_file #=> '/home/app_path/app.rb'
94
94
  def boot_file
95
- Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'karafka.rb'))
95
+ boot_file = Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'karafka.rb'))
96
+
97
+ return boot_file if boot_file.absolute?
98
+ return boot_file if boot_file.to_s == 'false'
99
+
100
+ Pathname.new(
101
+ File.expand_path(
102
+ boot_file,
103
+ Karafka.root
104
+ )
105
+ )
96
106
  end
97
107
 
98
108
  # We need to be able to overwrite both monitor and logger after the configuration in case they
data.tar.gz.sig CHANGED
@@ -1 +1,2 @@
1
- ~А+4+1���<�&���_1��ߗ���w�� �R}D{w:}y��!�`���"�,�txyޥ��0qL[X"u�:�����BU]qu�μ�:RecdVS}<�0nm����@Gnqzݝb9c<@���)&`���
1
+ �-Mnj��c��Q0��g�1�����W5��5���ҟ"����J�������w�0r趛"|�ȸ_)nDȩ�5޷x�� /5iZ?�3��FDJQs&m���BHU+`��/pc�G�%QgP�lØU�} ��6*�(Vۨ���i��E���3 Z����I.ӂ���Ԣ�:�c�^��dz�3IvR�M+�8��@v�TS���U�~��xl�Fr��o� h����%ц8\��$��dCy���*6�)o�e9qsDY9i����ĸ�"s�S
2
+ v@���+C �y�lY>(A����o��P6c�Z��}�v皂9i�41eLg/d �\GDk����6]���?d���~�0B=��%�WE9<�96��A�
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.2
4
+ version: 2.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-05-14 00:00:00.000000000 Z
38
+ date: 2024-07-04 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -57,7 +57,7 @@ dependencies:
57
57
  requirements:
58
58
  - - ">="
59
59
  - !ruby/object:Gem::Version
60
- version: 2.4.0
60
+ version: 2.4.3
61
61
  - - "<"
62
62
  - !ruby/object:Gem::Version
63
63
  version: 2.5.0
@@ -67,7 +67,7 @@ dependencies:
67
67
  requirements:
68
68
  - - ">="
69
69
  - !ruby/object:Gem::Version
70
- version: 2.4.0
70
+ version: 2.4.3
71
71
  - - "<"
72
72
  - !ruby/object:Gem::Version
73
73
  version: 2.5.0
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.7.0
80
+ version: 2.7.3
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.7.0
90
+ version: 2.7.3
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -549,7 +549,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
549
549
  - !ruby/object:Gem::Version
550
550
  version: '0'
551
551
  requirements: []
552
- rubygems_version: 3.5.9
552
+ rubygems_version: 3.5.11
553
553
  signing_key:
554
554
  specification_version: 4
555
555
  summary: Karafka is Ruby and Rails efficient Kafka processing framework.
metadata.gz.sig CHANGED
Binary file