karafka-rdkafka 0.18.0 → 0.19.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bccd7f77bb5788be32f9f8a3b82ffb40fc602a124fe8f6c2b9c55b6a9557ef4b
4
- data.tar.gz: c9c48010d8179b6ee80489d66c1425d0ffc6d5255b1ca103abfb5ef565d81370
3
+ metadata.gz: 5a1e9fa0ca2b5dd14aed77c653fc4d154bb566113cac11c04d51cccc4e1d9fb7
4
+ data.tar.gz: c513b0b82bdae4d9a16251a0abcd4b73a79ba21d2833814bccddcd8f6446151a
5
5
  SHA512:
6
- metadata.gz: 3ae48dd732ba5b392496c126f87d50a65fba7cd1f417828e736b82207a370d87ae64d4b8de0272aa98663b1d515dd6528a1a89bfe0833adda4c21f924b3c07e2
7
- data.tar.gz: 5ee6def7731fb42acdcf76a242241204dd837b513c559bb9e16aa58617eb52a9c7320cae5d2d5a91e18876f91237592917e94e2386eba664e160b5d41925632e
6
+ metadata.gz: 53bee0b1c513f6947ca657ca3836df05e6de31ba441aa6d85d71f523c28cad7b996ec14fae798b50bbaafb09eb00367bbc4298da5a926e3fae61cc94cb5179bb
7
+ data.tar.gz: 51e903bb75f34fa7f49a8ebd6cdff193b2ee916fd0f4823145aae6ee219bf45e51ce538f3c9c3e4de9ba58c659fa4ecaf494768df1b83f998c9d2509cda58074
checksums.yaml.gz.sig CHANGED
Binary file
@@ -22,12 +22,12 @@ jobs:
22
22
  fail-fast: false
23
23
  matrix:
24
24
  ruby:
25
- - '3.4.0-preview2'
25
+ - '3.4'
26
26
  - '3.3'
27
27
  - '3.2'
28
28
  - '3.1'
29
29
  include:
30
- - ruby: '3.3'
30
+ - ruby: '3.4'
31
31
  coverage: 'true'
32
32
  steps:
33
33
  - uses: actions/checkout@v4
@@ -54,3 +54,30 @@ jobs:
54
54
  cd ext && bundle exec rake
55
55
  cd ..
56
56
  bundle exec rspec
57
+
58
+
59
+ macos_build:
60
+ timeout-minutes: 30
61
+ runs-on: macos-latest
62
+ strategy:
63
+ fail-fast: false
64
+ matrix:
65
+ ruby:
66
+ - '3.4'
67
+ - '3.3'
68
+ - '3.2'
69
+ - '3.1'
70
+ steps:
71
+ - uses: actions/checkout@v4
72
+
73
+ - name: Set up Ruby
74
+ uses: ruby/setup-ruby@v1
75
+ with:
76
+ ruby-version: ${{matrix.ruby}}
77
+ bundler-cache: false
78
+
79
+ - name: Build rdkafka-ruby
80
+ run: |
81
+ set -e
82
+ bundle install --path vendor/bundle
83
+ cd ext && bundle exec rake
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.3.6
1
+ 3.4.1
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Rdkafka Changelog
2
2
 
3
+ ## 0.19.0 (2025-01-20)
4
+ - **[Breaking]** Deprecate and remove `#each_batch` due to data consistency concerns.
5
+ - [Enhancement] Bump librdkafka to 2.8.0
6
+ - [Fix] Restore `Rdkafka::Bindings.rd_kafka_global_init` as it was not the source of the original issue.
7
+
8
+ ## 0.18.1 (2024-12-04)
9
+ - [Fix] Do not run `Rdkafka::Bindings.rd_kafka_global_init` on require to prevent some of macos versions from hanging on Puma fork.
10
+
3
11
  ## 0.18.0 (2024-11-26)
4
12
  - **[Breaking]** Drop Ruby 3.0 support
5
13
  - [Enhancement] Bump librdkafka to 2.6.1
data/README.md CHANGED
@@ -163,6 +163,7 @@ bundle exec rake produce_messages
163
163
 
164
164
  | rdkafka-ruby | librdkafka | patches |
165
165
  |-|-|-|
166
+ | 0.19.0 (2025-01-20) | 2.8.0 (2025-01-07) | yes |
166
167
  | 0.18.0 (2024-11-26) | 2.6.1 (2024-11-18) | yes |
167
168
  | 0.17.4 (2024-09-02) | 2.5.3 (2024-09-02) | yes |
168
169
  | 0.17.0 (2024-08-01) | 2.5.0 (2024-07-10) | yes |
data/docker-compose.yml CHANGED
@@ -1,9 +1,7 @@
1
- version: '2'
2
-
3
1
  services:
4
2
  kafka:
5
3
  container_name: kafka
6
- image: confluentinc/cp-kafka:7.7.1
4
+ image: confluentinc/cp-kafka:7.8.0
7
5
 
8
6
  ports:
9
7
  - 9092:9092
data/ext/Rakefile CHANGED
@@ -16,11 +16,11 @@ task :default => :clean do
16
16
  require "mini_portile2"
17
17
  recipe = MiniPortile.new("librdkafka", Rdkafka::LIBRDKAFKA_VERSION)
18
18
 
19
- # Use default homebrew openssl if we're on mac and the directory exists
20
- # and each of flags is not empty
21
- if recipe.host&.include?("darwin") && system("which brew &> /dev/null") && Dir.exist?("#{homebrew_prefix = %x(brew --prefix openssl).strip}")
22
- ENV["CPPFLAGS"] = "-I#{homebrew_prefix}/include" unless ENV["CPPFLAGS"]
23
- ENV["LDFLAGS"] = "-L#{homebrew_prefix}/lib" unless ENV["LDFLAGS"]
19
+ # Use default homebrew openssl if we're on mac and the directory exists, is not using nix-prepared libraries
20
+ # and each of flags is not already set
21
+ if recipe.host&.include?("darwin") && system("which brew &> /dev/null") && Dir.exist?("#{homebrew_prefix = %x(brew --prefix openssl).strip}") && ENV.key?("NIX_LDFLAGS")
22
+ ENV["CPPFLAGS"] = "-I#{homebrew_prefix}/include" unless ENV.key?("CPPFLAGS")
23
+ ENV["LDFLAGS"] = "-L#{homebrew_prefix}/lib" unless ENV.key?("LDFLAGS")
24
24
  end
25
25
 
26
26
  releases = File.expand_path(File.join(File.dirname(__FILE__), '../dist'))
@@ -619,87 +619,23 @@ module Rdkafka
619
619
  end
620
620
  end
621
621
 
622
- # Poll for new messages and yield them in batches that may contain
623
- # messages from more than one partition.
624
- #
625
- # Rather than yield each message immediately as soon as it is received,
626
- # each_batch will attempt to wait for as long as `timeout_ms` in order
627
- # to create a batch of up to but no more than `max_items` in size.
628
- #
629
- # Said differently, if more than `max_items` are available within
630
- # `timeout_ms`, then `each_batch` will yield early with `max_items` in the
631
- # array, but if `timeout_ms` passes by with fewer messages arriving, it
632
- # will yield an array of fewer messages, quite possibly zero.
633
- #
634
- # In order to prevent wrongly auto committing many messages at once across
635
- # possibly many partitions, callers must explicitly indicate which messages
636
- # have been successfully processed as some consumed messages may not have
637
- # been yielded yet. To do this, the caller should set
638
- # `enable.auto.offset.store` to false and pass processed messages to
639
- # {store_offset}. It is also possible, though more complex, to set
640
- # 'enable.auto.commit' to false and then pass a manually assembled
641
- # TopicPartitionList to {commit}.
642
- #
643
- # As with `each`, iteration will end when the consumer is closed.
644
- #
645
- # Exception behavior is more complicated than with `each`, in that if
646
- # :yield_on_error is true, and an exception is raised during the
647
- # poll, and messages have already been received, they will be yielded to
648
- # the caller before the exception is allowed to propagate.
649
- #
650
- # If you are setting either auto.commit or auto.offset.store to false in
651
- # the consumer configuration, then you should let yield_on_error keep its
652
- # default value of false because you are guaranteed to see these messages
653
- # again. However, if both auto.commit and auto.offset.store are set to
654
- # true, you should set yield_on_error to true so you can process messages
655
- # that you may or may not see again.
656
- #
657
- # @param max_items [Integer] Maximum size of the yielded array of messages
658
- # @param bytes_threshold [Integer] Threshold number of total message bytes in the yielded array of messages
659
- # @param timeout_ms [Integer] max time to wait for up to max_items
660
- #
661
- # @yieldparam messages [Array] An array of received Message
662
- # @yieldparam pending_exception [Exception] normally nil, or an exception
663
- #
664
- # @yield [messages, pending_exception]
665
- # which will be propagated after processing of the partial batch is complete.
666
- #
667
- # @return [nil]
668
- #
669
- # @raise [RdkafkaError] When polling fails
622
+ # Deprecated. Please read the error message for more details.
670
623
  def each_batch(max_items: 100, bytes_threshold: Float::INFINITY, timeout_ms: 250, yield_on_error: false, &block)
671
- closed_consumer_check(__method__)
672
- slice = []
673
- bytes = 0
674
- end_time = monotonic_now + timeout_ms / 1000.0
675
- loop do
676
- break if closed?
677
- max_wait = end_time - monotonic_now
678
- max_wait_ms = if max_wait <= 0
679
- 0 # should not block, but may retrieve a message
680
- else
681
- (max_wait * 1000).floor
682
- end
683
- message = nil
684
- begin
685
- message = poll max_wait_ms
686
- rescue Rdkafka::RdkafkaError => error
687
- raise unless yield_on_error
688
- raise if slice.empty?
689
- yield slice.dup, error
690
- raise
691
- end
692
- if message
693
- slice << message
694
- bytes += message.payload.bytesize if message.payload
695
- end
696
- if slice.size == max_items || bytes >= bytes_threshold || monotonic_now >= end_time - 0.001
697
- yield slice.dup, nil
698
- slice.clear
699
- bytes = 0
700
- end_time = monotonic_now + timeout_ms / 1000.0
701
- end
702
- end
624
+ raise NotImplementedError, <<~ERROR
625
+ `each_batch` has been removed due to data consistency concerns.
626
+
627
+ This method was removed because it did not properly handle partition reassignments,
628
+ which could lead to processing messages from partitions that were no longer owned
629
+ by this consumer, resulting in duplicate message processing and data inconsistencies.
630
+
631
+ Recommended alternatives:
632
+
633
+ 1. Implement your own batching logic using rebalance callbacks to properly handle
634
+ partition revocations and ensure message processing correctness.
635
+
636
+ 2. Use a high-level batching library that supports proper partition reassignment
637
+ handling out of the box (such as the Karafka framework).
638
+ ERROR
703
639
  end
704
640
 
705
641
  # Returns pointer to the consumer group metadata. It is used only in the context of
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rdkafka
4
- VERSION = "0.18.0"
5
- LIBRDKAFKA_VERSION = "2.6.1"
6
- LIBRDKAFKA_SOURCE_SHA256 = "0ddf205ad8d36af0bc72a2fec20639ea02e1d583e353163bf7f4683d949e901b"
4
+ VERSION = "0.19.0"
5
+ LIBRDKAFKA_VERSION = "2.8.0"
6
+ LIBRDKAFKA_SOURCE_SHA256 = "5bd1c46f63265f31c6bfcedcde78703f77d28238eadf23821c2b43fc30be3e25"
7
7
  end
@@ -948,236 +948,10 @@ describe Rdkafka::Consumer do
948
948
  end
949
949
 
950
950
  describe "#each_batch" do
951
- let(:message_payload) { 'a' * 10 }
952
-
953
- before do
954
- @topic = SecureRandom.base64(10).tr('+=/', '')
955
- end
956
-
957
- after do
958
- @topic = nil
959
- end
960
-
961
- def topic_name
962
- @topic
963
- end
964
-
965
- def produce_n(n)
966
- handles = []
967
- n.times do |i|
968
- handles << producer.produce(
969
- topic: topic_name,
970
- payload: i % 10 == 0 ? nil : Time.new.to_f.to_s,
971
- key: i.to_s,
972
- partition: 0
973
- )
974
- end
975
- handles.each(&:wait)
976
- end
977
-
978
- def new_message
979
- instance_double("Rdkafka::Consumer::Message").tap do |message|
980
- allow(message).to receive(:payload).and_return(message_payload)
981
- end
982
- end
983
-
984
- it "retrieves messages produced into a topic" do
985
- # This is the only each_batch test that actually produces real messages
986
- # into a topic in the real kafka of the container.
987
- #
988
- # The other tests stub 'poll' which makes them faster and more reliable,
989
- # but it makes sense to keep a single test with a fully integrated flow.
990
- # This will help to catch breaking changes in the behavior of 'poll',
991
- # libdrkafka, or Kafka.
992
- #
993
- # This is, in effect, an integration test and the subsequent specs are
994
- # unit tests.
995
- admin = rdkafka_config.admin
996
- create_topic_handle = admin.create_topic(topic_name, 1, 1)
997
- create_topic_handle.wait(max_wait_timeout: 15.0)
998
- consumer.subscribe(topic_name)
999
- produce_n 42
1000
- all_yields = []
1001
- consumer.each_batch(max_items: 10) do |batch|
1002
- all_yields << batch
1003
- break if all_yields.flatten.size >= 42
1004
- end
1005
- expect(all_yields.flatten.first).to be_a Rdkafka::Consumer::Message
1006
- expect(all_yields.flatten.size).to eq 42
1007
- expect(all_yields.size).to be > 4
1008
- expect(all_yields.flatten.map(&:key)).to eq (0..41).map { |x| x.to_s }
1009
- admin.close
1010
- end
1011
-
1012
- it "should batch poll results and yield arrays of messages" do
1013
- consumer.subscribe(topic_name)
1014
- all_yields = []
1015
- expect(consumer)
1016
- .to receive(:poll)
1017
- .exactly(10).times
1018
- .and_return(new_message)
1019
- consumer.each_batch(max_items: 10) do |batch|
1020
- all_yields << batch
1021
- break if all_yields.flatten.size >= 10
1022
- end
1023
- expect(all_yields.first).to be_instance_of(Array)
1024
- expect(all_yields.flatten.size).to eq 10
1025
- non_empty_yields = all_yields.reject { |batch| batch.empty? }
1026
- expect(non_empty_yields.size).to be < 10
1027
- end
1028
-
1029
- it "should yield a partial batch if the timeout is hit with some messages" do
1030
- consumer.subscribe(topic_name)
1031
- poll_count = 0
1032
- expect(consumer)
1033
- .to receive(:poll)
1034
- .at_least(3).times do
1035
- poll_count = poll_count + 1
1036
- if poll_count > 2
1037
- sleep 0.1
1038
- nil
1039
- else
1040
- new_message
1041
- end
1042
- end
1043
- all_yields = []
1044
- consumer.each_batch(max_items: 10) do |batch|
1045
- all_yields << batch
1046
- break if all_yields.flatten.size >= 2
1047
- end
1048
- expect(all_yields.flatten.size).to eq 2
1049
- end
1050
-
1051
- it "should yield [] if nothing is received before the timeout" do
1052
- admin = rdkafka_config.admin
1053
- create_topic_handle = admin.create_topic(topic_name, 1, 1)
1054
- create_topic_handle.wait(max_wait_timeout: 15.0)
1055
- consumer.subscribe(topic_name)
1056
- consumer.each_batch do |batch|
1057
- expect(batch).to eq([])
1058
- break
1059
- end
1060
- admin.close
1061
- end
1062
-
1063
- it "should yield batchs of max_items in size if messages are already fetched" do
1064
- yielded_batches = []
1065
- expect(consumer)
1066
- .to receive(:poll)
1067
- .with(anything)
1068
- .exactly(20).times
1069
- .and_return(new_message)
1070
-
1071
- consumer.each_batch(max_items: 10, timeout_ms: 500) do |batch|
1072
- yielded_batches << batch
1073
- break if yielded_batches.flatten.size >= 20
1074
- break if yielded_batches.size >= 20 # so failure doesn't hang
1075
- end
1076
- expect(yielded_batches.size).to eq 2
1077
- expect(yielded_batches.map(&:size)).to eq 2.times.map { 10 }
1078
- end
1079
-
1080
- it "should yield batchs as soon as bytes_threshold is hit" do
1081
- yielded_batches = []
1082
- expect(consumer)
1083
- .to receive(:poll)
1084
- .with(anything)
1085
- .exactly(20).times
1086
- .and_return(new_message)
1087
-
1088
- consumer.each_batch(bytes_threshold: message_payload.size * 4, timeout_ms: 500) do |batch|
1089
- yielded_batches << batch
1090
- break if yielded_batches.flatten.size >= 20
1091
- break if yielded_batches.size >= 20 # so failure doesn't hang
1092
- end
1093
- expect(yielded_batches.size).to eq 5
1094
- expect(yielded_batches.map(&:size)).to eq 5.times.map { 4 }
1095
- end
1096
-
1097
- context "error raised from poll and yield_on_error is true" do
1098
- it "should yield buffered exceptions on rebalance, then break" do
1099
- config = rdkafka_consumer_config(
1100
- {
1101
- :"enable.auto.commit" => false,
1102
- :"enable.auto.offset.store" => false
1103
- }
1104
- )
1105
- consumer = config.consumer
1106
- consumer.subscribe(topic_name)
1107
- batches_yielded = []
1108
- exceptions_yielded = []
1109
- each_batch_iterations = 0
1110
- poll_count = 0
1111
- expect(consumer)
1112
- .to receive(:poll)
1113
- .with(anything)
1114
- .exactly(3).times
1115
- .and_wrap_original do |method, *args|
1116
- poll_count = poll_count + 1
1117
- if poll_count == 3
1118
- raise Rdkafka::RdkafkaError.new(27,
1119
- "partitions ... too ... heavy ... must ... rebalance")
1120
- else
1121
- new_message
1122
- end
1123
- end
1124
- expect {
1125
- consumer.each_batch(max_items: 30, yield_on_error: true) do |batch, pending_error|
1126
- batches_yielded << batch
1127
- exceptions_yielded << pending_error
1128
- each_batch_iterations = each_batch_iterations + 1
1129
- end
1130
- }.to raise_error(Rdkafka::RdkafkaError)
1131
- expect(poll_count).to eq 3
1132
- expect(each_batch_iterations).to eq 1
1133
- expect(batches_yielded.size).to eq 1
1134
- expect(batches_yielded.first.size).to eq 2
1135
- expect(exceptions_yielded.flatten.size).to eq 1
1136
- expect(exceptions_yielded.flatten.first).to be_instance_of(Rdkafka::RdkafkaError)
1137
- consumer.close
1138
- end
1139
- end
1140
-
1141
- context "error raised from poll and yield_on_error is false" do
1142
- it "should yield buffered exceptions on rebalance, then break" do
1143
- config = rdkafka_consumer_config(
1144
- {
1145
- :"enable.auto.commit" => false,
1146
- :"enable.auto.offset.store" => false
1147
- }
1148
- )
1149
- consumer = config.consumer
1150
- consumer.subscribe(topic_name)
1151
- batches_yielded = []
1152
- exceptions_yielded = []
1153
- each_batch_iterations = 0
1154
- poll_count = 0
1155
- expect(consumer)
1156
- .to receive(:poll)
1157
- .with(anything)
1158
- .exactly(3).times
1159
- .and_wrap_original do |method, *args|
1160
- poll_count = poll_count + 1
1161
- if poll_count == 3
1162
- raise Rdkafka::RdkafkaError.new(27,
1163
- "partitions ... too ... heavy ... must ... rebalance")
1164
- else
1165
- new_message
1166
- end
1167
- end
1168
- expect {
1169
- consumer.each_batch(max_items: 30, yield_on_error: false) do |batch, pending_error|
1170
- batches_yielded << batch
1171
- exceptions_yielded << pending_error
1172
- each_batch_iterations = each_batch_iterations + 1
1173
- end
1174
- }.to raise_error(Rdkafka::RdkafkaError)
1175
- expect(poll_count).to eq 3
1176
- expect(each_batch_iterations).to eq 0
1177
- expect(batches_yielded.size).to eq 0
1178
- expect(exceptions_yielded.size).to eq 0
1179
- consumer.close
1180
- end
951
+ it 'expect to raise an error' do
952
+ expect do
953
+ consumer.each_batch {}
954
+ end.to raise_error(NotImplementedError)
1181
955
  end
1182
956
  end
1183
957
 
@@ -1344,7 +1118,6 @@ describe Rdkafka::Consumer do
1344
1118
  {
1345
1119
  :subscribe => [ nil ],
1346
1120
  :unsubscribe => nil,
1347
- :each_batch => nil,
1348
1121
  :pause => [ nil ],
1349
1122
  :resume => [ nil ],
1350
1123
  :subscription => nil,
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,12 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: karafka-rdkafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.0
4
+ version: 0.19.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thijs Cadier
8
8
  - Maciej Mensfeld
9
- autorequire:
10
9
  bindir: bin
11
10
  cert_chain:
12
11
  - |
@@ -36,7 +35,7 @@ cert_chain:
36
35
  i9zWxov0mr44TWegTVeypcWGd/0nxu1+QHVNHJrpqlPBRvwQsUm7fwmRInGpcaB8
37
36
  ap8wNYvryYzrzvzUxIVFBVM5PacgkFqRmolCa8I7tdKQN+R1
38
37
  -----END CERTIFICATE-----
39
- date: 2024-11-26 00:00:00.000000000 Z
38
+ date: 2025-01-20 00:00:00.000000000 Z
40
39
  dependencies:
41
40
  - !ruby/object:Gem::Dependency
42
41
  name: ffi
@@ -186,7 +185,7 @@ files:
186
185
  - README.md
187
186
  - Rakefile
188
187
  - certs/cert.pem
189
- - dist/librdkafka-2.6.1.tar.gz
188
+ - dist/librdkafka-2.8.0.tar.gz
190
189
  - dist/patches/rdkafka_global_init.patch
191
190
  - docker-compose.yml
192
191
  - ext/README.md
@@ -261,7 +260,6 @@ files:
261
260
  - spec/rdkafka/producer/delivery_report_spec.rb
262
261
  - spec/rdkafka/producer_spec.rb
263
262
  - spec/spec_helper.rb
264
- homepage:
265
263
  licenses:
266
264
  - MIT
267
265
  metadata:
@@ -272,7 +270,6 @@ metadata:
272
270
  source_code_uri: https://github.com/karafka/karafka-rdkafka
273
271
  documentation_uri: https://karafka.io/docs
274
272
  rubygems_mfa_required: 'true'
275
- post_install_message:
276
273
  rdoc_options: []
277
274
  require_paths:
278
275
  - lib
@@ -287,8 +284,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
287
284
  - !ruby/object:Gem::Version
288
285
  version: '0'
289
286
  requirements: []
290
- rubygems_version: 3.5.22
291
- signing_key:
287
+ rubygems_version: 3.6.2
292
288
  specification_version: 4
293
289
  summary: The rdkafka gem is a modern Kafka client library for Ruby based on librdkafka.
294
290
  It wraps the production-ready C client using the ffi gem and targets Kafka 1.0+
metadata.gz.sig CHANGED
Binary file