karafka 2.4.2 → 2.4.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5eb2aeb0bd3cd0d50d70b0fd792e8290315f3131222d3befab9b5a05bd478e07
4
- data.tar.gz: 27b4d40fe18c3daf87ea3f2b601487d6cad5a766121374772179fc5fb3049f75
3
+ metadata.gz: c37b855609a9d20ca0b5cf13875d30ccb71b4072b3f07886f10445ce761c6e1e
4
+ data.tar.gz: 852fa3340f0dd1cbc2286823a2451c635568440c3e753e7235a72dc85289fcf5
5
5
  SHA512:
6
- metadata.gz: af5c4030ccd1f881558df342c3936152efc6183ccc6238246a327d6da8b716e65e3e56ec198a708b87f292bd01d9e426fb64b3c4b9a4f5b40111ee68795ec050
7
- data.tar.gz: 7d04bb79ea03cc5eedf7a8e9ecd62abc0ff2b48aea2e4df929d691b931b728017adf843aec5c248a6d4e8b42dd1957c798e799b9df82ae009795ae6b414532ac
6
+ metadata.gz: 373b460007cefdb039b5f2fd00abdc722f42c5bd8f324dad121c7e293264ca4a63cb63cb4fa5057e0f2ff856fe6c28073c04d2ffb21250dd534408ee511807c9
7
+ data.tar.gz: 595e408578801d949bb2b1e8d3337e8535473b80c7a1db34e56822214de9fd2083a594433e264ed8675c2292cd3aa73bacaef44938ad7a4cd743292097fd4d8f
checksums.yaml.gz.sig CHANGED
Binary file
@@ -73,6 +73,7 @@ jobs:
73
73
  fail-fast: false
74
74
  matrix:
75
75
  ruby:
76
+ - '3.4.0-preview1'
76
77
  - '3.3'
77
78
  - '3.2'
78
79
  - '3.1'
@@ -113,6 +114,7 @@ jobs:
113
114
  fail-fast: false
114
115
  matrix:
115
116
  ruby:
117
+ - '3.4.0-preview1'
116
118
  - '3.3'
117
119
  - '3.2'
118
120
  - '3.1'
@@ -166,6 +168,7 @@ jobs:
166
168
  fail-fast: false
167
169
  matrix:
168
170
  ruby:
171
+ - '3.4.0-preview1'
169
172
  - '3.3'
170
173
  - '3.2'
171
174
  - '3.1'
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.3.1
1
+ 3.3.3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.4.3 (2024-06-12)
4
+ - [Enhancement] Allow for customization of Virtual Partitions reducer for enhanced parallelization.
5
+ - [Enhancement] Add more error codes to early report on polling issues (kidlab)
6
+ - [Enhancement] Add `transport`, `network_exception` and `coordinator_load_in_progress` alongside `timed_out` to retryable errors for the proxy.
7
+ - [Enhancement] Improve `strict_topics_namespacing` validation message.
8
+ - [Change] Remove default empty thread name from `Async` since Web has been upgraded.
9
+ - [Fix] Installer doesn't respect directories in `KARAFKA_BOOT_FILE`.
10
+ - [Fix] Fix case where non absolute boot file path would not work as expected.
11
+ - [Fix] Allow for installing Karafka in a non-existing (yet) directory
12
+ - [Maintenance] Require `waterdrop` `>=` `2.7.3` to support idempotent producer detection.
13
+
3
14
  ## 2.4.2 (2024-05-14)
4
15
  - [Enhancement] Validate ActiveJob adapter custom producer format.
5
16
  - [Fix] Internal seek does not resolve the offset correctly for time based lookup.
data/Gemfile CHANGED
@@ -19,6 +19,7 @@ end
19
19
  group :test do
20
20
  gem 'byebug'
21
21
  gem 'factory_bot'
22
+ gem 'ostruct'
22
23
  gem 'rspec'
23
24
  gem 'simplecov'
24
25
  end
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.2)
4
+ karafka (2.4.3)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.0, < 2.5.0)
7
- waterdrop (>= 2.7.0, < 3.0.0)
7
+ waterdrop (>= 2.7.3, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.1.3.2)
14
- activesupport (= 7.1.3.2)
13
+ activejob (7.1.3.4)
14
+ activesupport (= 7.1.3.4)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.1.3.2)
16
+ activesupport (7.1.3.4)
17
17
  base64
18
18
  bigdecimal
19
19
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -26,7 +26,7 @@ GEM
26
26
  base64 (0.2.0)
27
27
  bigdecimal (3.1.8)
28
28
  byebug (11.1.3)
29
- concurrent-ruby (1.2.3)
29
+ concurrent-ruby (1.3.1)
30
30
  connection_pool (2.4.1)
31
31
  diff-lcs (1.5.1)
32
32
  docile (1.4.0)
@@ -34,7 +34,7 @@ GEM
34
34
  erubi (1.12.0)
35
35
  factory_bot (6.4.6)
36
36
  activesupport (>= 5.0.0)
37
- ffi (1.16.3)
37
+ ffi (1.17.0)
38
38
  globalid (1.2.1)
39
39
  activesupport (>= 6.1)
40
40
  i18n (1.14.5)
@@ -54,12 +54,13 @@ GEM
54
54
  karafka-core (>= 2.4.0, < 2.5.0)
55
55
  roda (~> 3.68, >= 3.69)
56
56
  tilt (~> 2.0)
57
- mini_portile2 (2.8.6)
58
- minitest (5.22.3)
57
+ mini_portile2 (2.8.7)
58
+ minitest (5.23.1)
59
59
  mutex_m (0.2.0)
60
+ ostruct (0.6.0)
60
61
  rack (3.0.11)
61
62
  rake (13.2.1)
62
- roda (3.79.0)
63
+ roda (3.80.0)
63
64
  rack
64
65
  rspec (3.13.0)
65
66
  rspec-core (~> 3.13.0)
@@ -83,11 +84,11 @@ GEM
83
84
  tilt (2.3.0)
84
85
  tzinfo (2.0.6)
85
86
  concurrent-ruby (~> 1.0)
86
- waterdrop (2.7.2)
87
+ waterdrop (2.7.3)
87
88
  karafka-core (>= 2.4.0, < 3.0.0)
88
89
  karafka-rdkafka (>= 0.15.1)
89
90
  zeitwerk (~> 2.3)
90
- zeitwerk (2.6.13)
91
+ zeitwerk (2.6.15)
91
92
 
92
93
  PLATFORMS
93
94
  ruby
@@ -100,8 +101,9 @@ DEPENDENCIES
100
101
  karafka!
101
102
  karafka-testing (>= 2.4.0)
102
103
  karafka-web (>= 0.9.0)
104
+ ostruct
103
105
  rspec
104
106
  simplecov
105
107
 
106
108
  BUNDLED WITH
107
- 2.5.9
109
+ 2.5.11
@@ -121,8 +121,9 @@ en:
121
121
  declaratives.details_format: needs to be a hash with only symbol keys
122
122
 
123
123
  inconsistent_namespacing: |
124
- needs to be consistent namespacing style
125
- disable this validation by setting config.strict_topics_namespacing to false
124
+ needs to follow a consistent namespacing style using either dots (.) or underscores (_), but not both.
125
+ This ensures proper Kafka metrics reporting and avoids name collisions.
126
+ To disable this validation, set config.strict_topics_namespacing to false.
126
127
 
127
128
  deserializers.active_format: 'needs to be true'
128
129
  deserializers.payload_format: 'needs to respond to #call'
@@ -4,6 +4,7 @@ en:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
  virtual_partitions.offset_metadata_strategy_format: needs to be either :exact or :current
7
+ virtual_partitions.reducer_format: "needs to respond to `#call`"
7
8
 
8
9
  long_running_job.active_format: needs to be either true or false
9
10
 
data/karafka.gemspec CHANGED
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_dependency 'base64', '~> 0.2'
25
25
  spec.add_dependency 'karafka-core', '>= 2.4.0', '< 2.5.0'
26
- spec.add_dependency 'waterdrop', '>= 2.7.0', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  spec.required_ruby_version = '>= 3.0.0'
@@ -21,7 +21,7 @@ module Karafka
21
21
 
22
22
  # Where should we map proper files from templates
23
23
  INSTALL_FILES_MAP = {
24
- 'karafka.rb.erb' => Karafka.boot_file.basename,
24
+ 'karafka.rb.erb' => Karafka.boot_file,
25
25
  'application_consumer.rb.erb' => 'app/consumers/application_consumer.rb',
26
26
  'example_consumer.rb.erb' => 'app/consumers/example_consumer.rb'
27
27
  }.freeze
@@ -51,6 +51,7 @@ module Karafka
51
51
 
52
52
  INSTALL_FILES_MAP.each do |source, target|
53
53
  pathed_target = Karafka.root.join(target)
54
+ FileUtils.mkdir_p File.dirname(pathed_target)
54
55
 
55
56
  template = File.read(Karafka.core_root.join("templates/#{source}"))
56
57
  render = ::ERB.new(template, trim_mode: '-').result(binding)
@@ -553,6 +553,12 @@ module Karafka
553
553
  early_report = true
554
554
  when :transport # -195
555
555
  early_report = true
556
+ when :topic_authorization_failed # 29
557
+ early_report = true
558
+ when :group_authorization_failed # 30
559
+ early_report = true
560
+ when :cluster_authorization_failed # 31
561
+ early_report = true
556
562
  # @see
557
563
  # https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
558
564
  # This will be raised each time poll detects a non-existing topic. When auto creation is
@@ -16,8 +16,11 @@ module Karafka
16
16
  RETRYABLE_DEFAULT_ERRORS = %i[
17
17
  all_brokers_down
18
18
  timed_out
19
+ transport
20
+ network_exception
19
21
  not_coordinator
20
22
  not_leader_for_partition
23
+ coordinator_load_in_progress
21
24
  ].freeze
22
25
 
23
26
  private_constant :RETRYABLE_DEFAULT_ERRORS
@@ -28,7 +28,7 @@ module Karafka
28
28
 
29
29
  # Runs the `#call` method in a new thread
30
30
  # @param thread_name [String] name that we want to assign to the thread when we start it
31
- def async_call(thread_name = '')
31
+ def async_call(thread_name)
32
32
  MUTEX.synchronize do
33
33
  return if @thread&.alive?
34
34
 
@@ -25,6 +25,8 @@ module Karafka
25
25
  def call(topic, messages, coordinator)
26
26
  ktopic = @subscription_group.topics.find(topic)
27
27
 
28
+ vps = ktopic.virtual_partitions
29
+
28
30
  # We only partition work if we have:
29
31
  # - a virtual partitioner
30
32
  # - more than one thread to process the data
@@ -38,21 +40,19 @@ module Karafka
38
40
  #
39
41
  # This is great because it allows us to run things without the parallelization that adds
40
42
  # a bit of uncertainty and allows us to use DLQ and safely skip messages if needed.
41
- if ktopic.virtual_partitions? &&
42
- ktopic.virtual_partitions.max_partitions > 1 &&
43
- !coordinator.collapsed?
44
- # We need to reduce it to the max concurrency, so the group_id is not a direct effect
45
- # of the end user action. Otherwise the persistence layer for consumers would cache
46
- # it forever and it would cause memory leaks
47
- #
48
- # This also needs to be consistent because the aggregation here needs to warrant, that
49
- # the same partitioned message will always be assigned to the same virtual partition.
50
- # Otherwise in case of a window aggregation with VP spanning across several polls, the
51
- # data could not be complete.
43
+ if vps.active? && vps.max_partitions > 1 && !coordinator.collapsed?
52
44
  groupings = messages.group_by do |msg|
53
- key = ktopic.virtual_partitions.partitioner.call(msg).to_s.sum
54
-
55
- key % ktopic.virtual_partitions.max_partitions
45
+ # We need to reduce it to the max concurrency, so the group_id is not a direct effect
46
+ # of the end user action. Otherwise the persistence layer for consumers would cache
47
+ # it forever and it would cause memory leaks
48
+ #
49
+ # This also needs to be consistent because the aggregation here needs to warrant,
50
+ # that the same partitioned message will always be assigned to the same virtual
51
+ # partition. Otherwise in case of a window aggregation with VP spanning across
52
+ # several polls, the data could not be complete.
53
+ vps.reducer.call(
54
+ vps.partitioner.call(msg)
55
+ )
56
56
  end
57
57
 
58
58
  groupings.each do |key, messages_group|
@@ -22,6 +22,7 @@ module Karafka
22
22
  :partitioner,
23
23
  :max_partitions,
24
24
  :offset_metadata_strategy,
25
+ :reducer,
25
26
  keyword_init: true
26
27
  ) { alias_method :active?, :active }
27
28
  end
@@ -31,6 +31,7 @@ module Karafka
31
31
  nested(:virtual_partitions) do
32
32
  required(:active) { |val| [true, false].include?(val) }
33
33
  required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
34
+ required(:reducer) { |val| val.respond_to?(:call) }
34
35
  required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
35
36
  required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
36
37
  end
@@ -26,18 +26,24 @@ module Karafka
26
26
  # @param offset_metadata_strategy [Symbol] how we should match the metadata for the
27
27
  # offset. `:exact` will match the offset matching metadata and `:current` will select
28
28
  # the most recently reported metadata
29
+ # @param reducer [nil, #call] reducer for VPs key. It allows for using a custom
30
+ # reducer to achieve enhanced parallelization when the default reducer is not enough.
29
31
  # @return [VirtualPartitions] method that allows to set the virtual partitions details
30
32
  # during the routing configuration and then allows to retrieve it
31
33
  def virtual_partitions(
32
34
  max_partitions: Karafka::App.config.concurrency,
33
35
  partitioner: nil,
34
- offset_metadata_strategy: :current
36
+ offset_metadata_strategy: :current,
37
+ reducer: nil
35
38
  )
36
39
  @virtual_partitions ||= Config.new(
37
40
  active: !partitioner.nil?,
38
41
  max_partitions: max_partitions,
39
42
  partitioner: partitioner,
40
- offset_metadata_strategy: offset_metadata_strategy
43
+ offset_metadata_strategy: offset_metadata_strategy,
44
+ # If no reducer provided, we use this one. It just runs a modulo on the sum of
45
+ # a stringified version, providing fairly good distribution.
46
+ reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions }
41
47
  )
42
48
  end
43
49
 
@@ -3,17 +3,18 @@
3
3
  module Karafka
4
4
  # Class used to run the Karafka listeners in separate threads
5
5
  class Runner
6
- def initialize
7
- @manager = App.config.internal.connection.manager
8
- @conductor = App.config.internal.connection.conductor
9
- end
6
+ include Helpers::ConfigImporter.new(
7
+ manager: %i[internal connection manager],
8
+ conductor: %i[internal connection conductor],
9
+ jobs_queue_class: %i[internal processing jobs_queue_class]
10
+ )
10
11
 
11
12
  # Starts listening on all the listeners asynchronously and handles the jobs queue closing
12
13
  # after listeners are done with their work.
13
14
  def call
14
15
  # Despite possibility of having several independent listeners, we aim to have one queue for
15
16
  # jobs across and one workers poll for that
16
- jobs_queue = App.config.internal.processing.jobs_queue_class.new
17
+ jobs_queue = jobs_queue_class.new
17
18
 
18
19
  workers = Processing::WorkersBatch.new(jobs_queue)
19
20
  listeners = Connection::ListenersBatch.new(jobs_queue)
@@ -23,7 +24,7 @@ module Karafka
23
24
  Karafka::App.run!
24
25
 
25
26
  # Register all the listeners so they can be started and managed
26
- @manager.register(listeners)
27
+ manager.register(listeners)
27
28
 
28
29
  workers.each_with_index { |worker, i| worker.async_call("karafka.worker##{i}") }
29
30
 
@@ -32,10 +33,10 @@ module Karafka
32
33
  Karafka::Server.listeners = listeners
33
34
  Karafka::Server.jobs_queue = jobs_queue
34
35
 
35
- until @manager.done?
36
- @conductor.wait
36
+ until manager.done?
37
+ conductor.wait
37
38
 
38
- @manager.control
39
+ manager.control
39
40
  end
40
41
 
41
42
  # We close the jobs queue only when no listener threads are working.
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.2'
6
+ VERSION = '2.4.3'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -92,7 +92,17 @@ module Karafka
92
92
  # KARAFKA_BOOT_FILE='/home/app_path/app.rb'
93
93
  # Karafka.boot_file #=> '/home/app_path/app.rb'
94
94
  def boot_file
95
- Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'karafka.rb'))
95
+ boot_file = Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'karafka.rb'))
96
+
97
+ return boot_file if boot_file.absolute?
98
+ return boot_file if boot_file.to_s == 'false'
99
+
100
+ Pathname.new(
101
+ File.expand_path(
102
+ boot_file,
103
+ Karafka.root
104
+ )
105
+ )
96
106
  end
97
107
 
98
108
  # We need to be able to overwrite both monitor and logger after the configuration in case they
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.2
4
+ version: 2.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-05-14 00:00:00.000000000 Z
38
+ date: 2024-06-12 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.7.0
80
+ version: 2.7.3
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.7.0
90
+ version: 2.7.3
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -549,7 +549,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
549
549
  - !ruby/object:Gem::Version
550
550
  version: '0'
551
551
  requirements: []
552
- rubygems_version: 3.5.9
552
+ rubygems_version: 3.5.11
553
553
  signing_key:
554
554
  specification_version: 4
555
555
  summary: Karafka is Ruby and Rails efficient Kafka processing framework.
metadata.gz.sig CHANGED
Binary file