karafka 2.4.2 → 2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5eb2aeb0bd3cd0d50d70b0fd792e8290315f3131222d3befab9b5a05bd478e07
4
- data.tar.gz: 27b4d40fe18c3daf87ea3f2b601487d6cad5a766121374772179fc5fb3049f75
3
+ metadata.gz: c37b855609a9d20ca0b5cf13875d30ccb71b4072b3f07886f10445ce761c6e1e
4
+ data.tar.gz: 852fa3340f0dd1cbc2286823a2451c635568440c3e753e7235a72dc85289fcf5
5
5
  SHA512:
6
- metadata.gz: af5c4030ccd1f881558df342c3936152efc6183ccc6238246a327d6da8b716e65e3e56ec198a708b87f292bd01d9e426fb64b3c4b9a4f5b40111ee68795ec050
7
- data.tar.gz: 7d04bb79ea03cc5eedf7a8e9ecd62abc0ff2b48aea2e4df929d691b931b728017adf843aec5c248a6d4e8b42dd1957c798e799b9df82ae009795ae6b414532ac
6
+ metadata.gz: 373b460007cefdb039b5f2fd00abdc722f42c5bd8f324dad121c7e293264ca4a63cb63cb4fa5057e0f2ff856fe6c28073c04d2ffb21250dd534408ee511807c9
7
+ data.tar.gz: 595e408578801d949bb2b1e8d3337e8535473b80c7a1db34e56822214de9fd2083a594433e264ed8675c2292cd3aa73bacaef44938ad7a4cd743292097fd4d8f
checksums.yaml.gz.sig CHANGED
Binary file
@@ -73,6 +73,7 @@ jobs:
73
73
  fail-fast: false
74
74
  matrix:
75
75
  ruby:
76
+ - '3.4.0-preview1'
76
77
  - '3.3'
77
78
  - '3.2'
78
79
  - '3.1'
@@ -113,6 +114,7 @@ jobs:
113
114
  fail-fast: false
114
115
  matrix:
115
116
  ruby:
117
+ - '3.4.0-preview1'
116
118
  - '3.3'
117
119
  - '3.2'
118
120
  - '3.1'
@@ -166,6 +168,7 @@ jobs:
166
168
  fail-fast: false
167
169
  matrix:
168
170
  ruby:
171
+ - '3.4.0-preview1'
169
172
  - '3.3'
170
173
  - '3.2'
171
174
  - '3.1'
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.3.1
1
+ 3.3.3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Karafka framework changelog
2
2
 
3
+ ## 2.4.3 (2024-06-12)
4
+ - [Enhancement] Allow for customization of Virtual Partitions reducer for enhanced parallelization.
5
+ - [Enhancement] Add more error codes to early report on polling issues (kidlab)
6
+ - [Enhancement] Add `transport`, `network_exception` and `coordinator_load_in_progress` alongside `timed_out` to retryable errors for the proxy.
7
+ - [Enhancement] Improve `strict_topics_namespacing` validation message.
8
+ - [Change] Remove default empty thread name from `Async` since Web has been upgraded.
9
+ - [Fix] Installer doesn't respect directories in `KARAFKA_BOOT_FILE`.
10
+ - [Fix] Fix case where non absolute boot file path would not work as expected.
11
+ - [Fix] Allow for installing Karafka in a non-existing (yet) directory
12
+ - [Maintenance] Require `waterdrop` `>=` `2.7.3` to support idempotent producer detection.
13
+
3
14
  ## 2.4.2 (2024-05-14)
4
15
  - [Enhancement] Validate ActiveJob adapter custom producer format.
5
16
  - [Fix] Internal seek does not resolve the offset correctly for time based lookup.
data/Gemfile CHANGED
@@ -19,6 +19,7 @@ end
19
19
  group :test do
20
20
  gem 'byebug'
21
21
  gem 'factory_bot'
22
+ gem 'ostruct'
22
23
  gem 'rspec'
23
24
  gem 'simplecov'
24
25
  end
data/Gemfile.lock CHANGED
@@ -1,19 +1,19 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- karafka (2.4.2)
4
+ karafka (2.4.3)
5
5
  base64 (~> 0.2)
6
6
  karafka-core (>= 2.4.0, < 2.5.0)
7
- waterdrop (>= 2.7.0, < 3.0.0)
7
+ waterdrop (>= 2.7.3, < 3.0.0)
8
8
  zeitwerk (~> 2.3)
9
9
 
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activejob (7.1.3.2)
14
- activesupport (= 7.1.3.2)
13
+ activejob (7.1.3.4)
14
+ activesupport (= 7.1.3.4)
15
15
  globalid (>= 0.3.6)
16
- activesupport (7.1.3.2)
16
+ activesupport (7.1.3.4)
17
17
  base64
18
18
  bigdecimal
19
19
  concurrent-ruby (~> 1.0, >= 1.0.2)
@@ -26,7 +26,7 @@ GEM
26
26
  base64 (0.2.0)
27
27
  bigdecimal (3.1.8)
28
28
  byebug (11.1.3)
29
- concurrent-ruby (1.2.3)
29
+ concurrent-ruby (1.3.1)
30
30
  connection_pool (2.4.1)
31
31
  diff-lcs (1.5.1)
32
32
  docile (1.4.0)
@@ -34,7 +34,7 @@ GEM
34
34
  erubi (1.12.0)
35
35
  factory_bot (6.4.6)
36
36
  activesupport (>= 5.0.0)
37
- ffi (1.16.3)
37
+ ffi (1.17.0)
38
38
  globalid (1.2.1)
39
39
  activesupport (>= 6.1)
40
40
  i18n (1.14.5)
@@ -54,12 +54,13 @@ GEM
54
54
  karafka-core (>= 2.4.0, < 2.5.0)
55
55
  roda (~> 3.68, >= 3.69)
56
56
  tilt (~> 2.0)
57
- mini_portile2 (2.8.6)
58
- minitest (5.22.3)
57
+ mini_portile2 (2.8.7)
58
+ minitest (5.23.1)
59
59
  mutex_m (0.2.0)
60
+ ostruct (0.6.0)
60
61
  rack (3.0.11)
61
62
  rake (13.2.1)
62
- roda (3.79.0)
63
+ roda (3.80.0)
63
64
  rack
64
65
  rspec (3.13.0)
65
66
  rspec-core (~> 3.13.0)
@@ -83,11 +84,11 @@ GEM
83
84
  tilt (2.3.0)
84
85
  tzinfo (2.0.6)
85
86
  concurrent-ruby (~> 1.0)
86
- waterdrop (2.7.2)
87
+ waterdrop (2.7.3)
87
88
  karafka-core (>= 2.4.0, < 3.0.0)
88
89
  karafka-rdkafka (>= 0.15.1)
89
90
  zeitwerk (~> 2.3)
90
- zeitwerk (2.6.13)
91
+ zeitwerk (2.6.15)
91
92
 
92
93
  PLATFORMS
93
94
  ruby
@@ -100,8 +101,9 @@ DEPENDENCIES
100
101
  karafka!
101
102
  karafka-testing (>= 2.4.0)
102
103
  karafka-web (>= 0.9.0)
104
+ ostruct
103
105
  rspec
104
106
  simplecov
105
107
 
106
108
  BUNDLED WITH
107
- 2.5.9
109
+ 2.5.11
@@ -121,8 +121,9 @@ en:
121
121
  declaratives.details_format: needs to be a hash with only symbol keys
122
122
 
123
123
  inconsistent_namespacing: |
124
- needs to be consistent namespacing style
125
- disable this validation by setting config.strict_topics_namespacing to false
124
+ needs to follow a consistent namespacing style using either dots (.) or underscores (_), but not both.
125
+ This ensures proper Kafka metrics reporting and avoids name collisions.
126
+ To disable this validation, set config.strict_topics_namespacing to false.
126
127
 
127
128
  deserializers.active_format: 'needs to be true'
128
129
  deserializers.payload_format: 'needs to respond to #call'
@@ -4,6 +4,7 @@ en:
4
4
  virtual_partitions.partitioner_respond_to_call: needs to be defined and needs to respond to `#call`
5
5
  virtual_partitions.max_partitions_format: needs to be equal or more than 1
6
6
  virtual_partitions.offset_metadata_strategy_format: needs to be either :exact or :current
7
+ virtual_partitions.reducer_format: "needs to respond to `#call`"
7
8
 
8
9
  long_running_job.active_format: needs to be either true or false
9
10
 
data/karafka.gemspec CHANGED
@@ -23,7 +23,7 @@ Gem::Specification.new do |spec|
23
23
 
24
24
  spec.add_dependency 'base64', '~> 0.2'
25
25
  spec.add_dependency 'karafka-core', '>= 2.4.0', '< 2.5.0'
26
- spec.add_dependency 'waterdrop', '>= 2.7.0', '< 3.0.0'
26
+ spec.add_dependency 'waterdrop', '>= 2.7.3', '< 3.0.0'
27
27
  spec.add_dependency 'zeitwerk', '~> 2.3'
28
28
 
29
29
  spec.required_ruby_version = '>= 3.0.0'
@@ -21,7 +21,7 @@ module Karafka
21
21
 
22
22
  # Where should we map proper files from templates
23
23
  INSTALL_FILES_MAP = {
24
- 'karafka.rb.erb' => Karafka.boot_file.basename,
24
+ 'karafka.rb.erb' => Karafka.boot_file,
25
25
  'application_consumer.rb.erb' => 'app/consumers/application_consumer.rb',
26
26
  'example_consumer.rb.erb' => 'app/consumers/example_consumer.rb'
27
27
  }.freeze
@@ -51,6 +51,7 @@ module Karafka
51
51
 
52
52
  INSTALL_FILES_MAP.each do |source, target|
53
53
  pathed_target = Karafka.root.join(target)
54
+ FileUtils.mkdir_p File.dirname(pathed_target)
54
55
 
55
56
  template = File.read(Karafka.core_root.join("templates/#{source}"))
56
57
  render = ::ERB.new(template, trim_mode: '-').result(binding)
@@ -553,6 +553,12 @@ module Karafka
553
553
  early_report = true
554
554
  when :transport # -195
555
555
  early_report = true
556
+ when :topic_authorization_failed # 29
557
+ early_report = true
558
+ when :group_authorization_failed # 30
559
+ early_report = true
560
+ when :cluster_authorization_failed # 31
561
+ early_report = true
556
562
  # @see
557
563
  # https://github.com/confluentinc/confluent-kafka-dotnet/issues/1366#issuecomment-821842990
558
564
  # This will be raised each time poll detects a non-existing topic. When auto creation is
@@ -16,8 +16,11 @@ module Karafka
16
16
  RETRYABLE_DEFAULT_ERRORS = %i[
17
17
  all_brokers_down
18
18
  timed_out
19
+ transport
20
+ network_exception
19
21
  not_coordinator
20
22
  not_leader_for_partition
23
+ coordinator_load_in_progress
21
24
  ].freeze
22
25
 
23
26
  private_constant :RETRYABLE_DEFAULT_ERRORS
@@ -28,7 +28,7 @@ module Karafka
28
28
 
29
29
  # Runs the `#call` method in a new thread
30
30
  # @param thread_name [String] name that we want to assign to the thread when we start it
31
- def async_call(thread_name = '')
31
+ def async_call(thread_name)
32
32
  MUTEX.synchronize do
33
33
  return if @thread&.alive?
34
34
 
@@ -25,6 +25,8 @@ module Karafka
25
25
  def call(topic, messages, coordinator)
26
26
  ktopic = @subscription_group.topics.find(topic)
27
27
 
28
+ vps = ktopic.virtual_partitions
29
+
28
30
  # We only partition work if we have:
29
31
  # - a virtual partitioner
30
32
  # - more than one thread to process the data
@@ -38,21 +40,19 @@ module Karafka
38
40
  #
39
41
  # This is great because it allows us to run things without the parallelization that adds
40
42
  # a bit of uncertainty and allows us to use DLQ and safely skip messages if needed.
41
- if ktopic.virtual_partitions? &&
42
- ktopic.virtual_partitions.max_partitions > 1 &&
43
- !coordinator.collapsed?
44
- # We need to reduce it to the max concurrency, so the group_id is not a direct effect
45
- # of the end user action. Otherwise the persistence layer for consumers would cache
46
- # it forever and it would cause memory leaks
47
- #
48
- # This also needs to be consistent because the aggregation here needs to warrant, that
49
- # the same partitioned message will always be assigned to the same virtual partition.
50
- # Otherwise in case of a window aggregation with VP spanning across several polls, the
51
- # data could not be complete.
43
+ if vps.active? && vps.max_partitions > 1 && !coordinator.collapsed?
52
44
  groupings = messages.group_by do |msg|
53
- key = ktopic.virtual_partitions.partitioner.call(msg).to_s.sum
54
-
55
- key % ktopic.virtual_partitions.max_partitions
45
+ # We need to reduce it to the max concurrency, so the group_id is not a direct effect
46
+ # of the end user action. Otherwise the persistence layer for consumers would cache
47
+ # it forever and it would cause memory leaks
48
+ #
49
+ # This also needs to be consistent because the aggregation here needs to warrant,
50
+ # that the same partitioned message will always be assigned to the same virtual
51
+ # partition. Otherwise in case of a window aggregation with VP spanning across
52
+ # several polls, the data could not be complete.
53
+ vps.reducer.call(
54
+ vps.partitioner.call(msg)
55
+ )
56
56
  end
57
57
 
58
58
  groupings.each do |key, messages_group|
@@ -22,6 +22,7 @@ module Karafka
22
22
  :partitioner,
23
23
  :max_partitions,
24
24
  :offset_metadata_strategy,
25
+ :reducer,
25
26
  keyword_init: true
26
27
  ) { alias_method :active?, :active }
27
28
  end
@@ -31,6 +31,7 @@ module Karafka
31
31
  nested(:virtual_partitions) do
32
32
  required(:active) { |val| [true, false].include?(val) }
33
33
  required(:partitioner) { |val| val.nil? || val.respond_to?(:call) }
34
+ required(:reducer) { |val| val.respond_to?(:call) }
34
35
  required(:max_partitions) { |val| val.is_a?(Integer) && val >= 1 }
35
36
  required(:offset_metadata_strategy) { |val| %i[exact current].include?(val) }
36
37
  end
@@ -26,18 +26,24 @@ module Karafka
26
26
  # @param offset_metadata_strategy [Symbol] how we should match the metadata for the
27
27
  # offset. `:exact` will match the offset matching metadata and `:current` will select
28
28
  # the most recently reported metadata
29
+ # @param reducer [nil, #call] reducer for VPs key. It allows for using a custom
30
+ # reducer to achieve enhanced parallelization when the default reducer is not enough.
29
31
  # @return [VirtualPartitions] method that allows to set the virtual partitions details
30
32
  # during the routing configuration and then allows to retrieve it
31
33
  def virtual_partitions(
32
34
  max_partitions: Karafka::App.config.concurrency,
33
35
  partitioner: nil,
34
- offset_metadata_strategy: :current
36
+ offset_metadata_strategy: :current,
37
+ reducer: nil
35
38
  )
36
39
  @virtual_partitions ||= Config.new(
37
40
  active: !partitioner.nil?,
38
41
  max_partitions: max_partitions,
39
42
  partitioner: partitioner,
40
- offset_metadata_strategy: offset_metadata_strategy
43
+ offset_metadata_strategy: offset_metadata_strategy,
44
+ # If no reducer provided, we use this one. It just runs a modulo on the sum of
45
+ # a stringified version, providing fairly good distribution.
46
+ reducer: reducer || ->(virtual_key) { virtual_key.to_s.sum % max_partitions }
41
47
  )
42
48
  end
43
49
 
@@ -3,17 +3,18 @@
3
3
  module Karafka
4
4
  # Class used to run the Karafka listeners in separate threads
5
5
  class Runner
6
- def initialize
7
- @manager = App.config.internal.connection.manager
8
- @conductor = App.config.internal.connection.conductor
9
- end
6
+ include Helpers::ConfigImporter.new(
7
+ manager: %i[internal connection manager],
8
+ conductor: %i[internal connection conductor],
9
+ jobs_queue_class: %i[internal processing jobs_queue_class]
10
+ )
10
11
 
11
12
  # Starts listening on all the listeners asynchronously and handles the jobs queue closing
12
13
  # after listeners are done with their work.
13
14
  def call
14
15
  # Despite possibility of having several independent listeners, we aim to have one queue for
15
16
  # jobs across and one workers poll for that
16
- jobs_queue = App.config.internal.processing.jobs_queue_class.new
17
+ jobs_queue = jobs_queue_class.new
17
18
 
18
19
  workers = Processing::WorkersBatch.new(jobs_queue)
19
20
  listeners = Connection::ListenersBatch.new(jobs_queue)
@@ -23,7 +24,7 @@ module Karafka
23
24
  Karafka::App.run!
24
25
 
25
26
  # Register all the listeners so they can be started and managed
26
- @manager.register(listeners)
27
+ manager.register(listeners)
27
28
 
28
29
  workers.each_with_index { |worker, i| worker.async_call("karafka.worker##{i}") }
29
30
 
@@ -32,10 +33,10 @@ module Karafka
32
33
  Karafka::Server.listeners = listeners
33
34
  Karafka::Server.jobs_queue = jobs_queue
34
35
 
35
- until @manager.done?
36
- @conductor.wait
36
+ until manager.done?
37
+ conductor.wait
37
38
 
38
- @manager.control
39
+ manager.control
39
40
  end
40
41
 
41
42
  # We close the jobs queue only when no listener threads are working.
@@ -3,5 +3,5 @@
3
3
  # Main module namespace
4
4
  module Karafka
5
5
  # Current Karafka version
6
- VERSION = '2.4.2'
6
+ VERSION = '2.4.3'
7
7
  end
data/lib/karafka.rb CHANGED
@@ -92,7 +92,17 @@ module Karafka
92
92
  # KARAFKA_BOOT_FILE='/home/app_path/app.rb'
93
93
  # Karafka.boot_file #=> '/home/app_path/app.rb'
94
94
  def boot_file
95
- Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'karafka.rb'))
95
+ boot_file = Pathname.new(ENV['KARAFKA_BOOT_FILE'] || File.join(Karafka.root, 'karafka.rb'))
96
+
97
+ return boot_file if boot_file.absolute?
98
+ return boot_file if boot_file.to_s == 'false'
99
+
100
+ Pathname.new(
101
+ File.expand_path(
102
+ boot_file,
103
+ Karafka.root
104
+ )
105
+ )
96
106
  end
97
107
 
98
108
  # We need to be able to overwrite both monitor and logger after the configuration in case they
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.2
4
+ version: 2.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maciej Mensfeld
@@ -35,7 +35,7 @@ cert_chain:
35
35
  AnG1dJU+yL2BK7vaVytLTstJME5mepSZ46qqIJXMuWob/YPDmVaBF39TDSG9e34s
36
36
  msG3BiCqgOgHAnL23+CN3Rt8MsuRfEtoTKpJVcCfoEoNHOkc
37
37
  -----END CERTIFICATE-----
38
- date: 2024-05-14 00:00:00.000000000 Z
38
+ date: 2024-06-12 00:00:00.000000000 Z
39
39
  dependencies:
40
40
  - !ruby/object:Gem::Dependency
41
41
  name: base64
@@ -77,7 +77,7 @@ dependencies:
77
77
  requirements:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
- version: 2.7.0
80
+ version: 2.7.3
81
81
  - - "<"
82
82
  - !ruby/object:Gem::Version
83
83
  version: 3.0.0
@@ -87,7 +87,7 @@ dependencies:
87
87
  requirements:
88
88
  - - ">="
89
89
  - !ruby/object:Gem::Version
90
- version: 2.7.0
90
+ version: 2.7.3
91
91
  - - "<"
92
92
  - !ruby/object:Gem::Version
93
93
  version: 3.0.0
@@ -549,7 +549,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
549
549
  - !ruby/object:Gem::Version
550
550
  version: '0'
551
551
  requirements: []
552
- rubygems_version: 3.5.9
552
+ rubygems_version: 3.5.11
553
553
  signing_key:
554
554
  specification_version: 4
555
555
  summary: Karafka is Ruby and Rails efficient Kafka processing framework.
metadata.gz.sig CHANGED
Binary file