logstash-integration-kafka 10.0.1-java → 10.5.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,26 +3,29 @@
3
3
  require "logstash/devutils/rspec/spec_helper"
4
4
  require 'logstash/outputs/kafka'
5
5
  require 'json'
6
- require 'poseidon'
6
+ require 'kafka'
7
7
 
8
8
  describe "outputs/kafka", :integration => true do
9
9
  let(:kafka_host) { 'localhost' }
10
10
  let(:kafka_port) { 9092 }
11
11
  let(:num_events) { 10 }
12
+
12
13
  let(:base_config) { {'client_id' => 'kafkaoutputspec'} }
13
- let(:event) { LogStash::Event.new({'message' => '183.60.215.50 - - [11/Sep/2014:22:00:00 +0000] "GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"', '@timestamp' => LogStash::Timestamp.at(0) }) }
14
+ let(:message_content) do
15
+ '"GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"'
16
+ end
17
+ let(:event) do
18
+ LogStash::Event.new({ 'message' =>
19
+ '183.60.215.50 - - [11/Sep/2014:22:00:00 +0000] ' + message_content,
20
+ '@timestamp' => LogStash::Timestamp.at(0)
21
+ })
22
+ end
14
23
 
24
+ let(:kafka_client) { Kafka.new ["#{kafka_host}:#{kafka_port}"] }
15
25
 
16
26
  context 'when outputting messages serialized as String' do
17
27
  let(:test_topic) { 'logstash_integration_topic1' }
18
28
  let(:num_events) { 3 }
19
- let(:consumer) do
20
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
21
- test_topic, 0, :earliest_offset)
22
- end
23
- subject do
24
- consumer.fetch
25
- end
26
29
 
27
30
  before :each do
28
31
  config = base_config.merge({"topic_id" => test_topic})
@@ -30,8 +33,10 @@ describe "outputs/kafka", :integration => true do
30
33
  end
31
34
 
32
35
  it 'should have data integrity' do
33
- expect(subject.size).to eq(num_events)
34
- subject.each do |m|
36
+ messages = fetch_messages(test_topic)
37
+
38
+ expect(messages.size).to eq(num_events)
39
+ messages.each do |m|
35
40
  expect(m.value).to eq(event.to_s)
36
41
  end
37
42
  end
@@ -41,13 +46,6 @@ describe "outputs/kafka", :integration => true do
41
46
  context 'when outputting messages serialized as Byte Array' do
42
47
  let(:test_topic) { 'topic1b' }
43
48
  let(:num_events) { 3 }
44
- let(:consumer) do
45
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
46
- test_topic, 0, :earliest_offset)
47
- end
48
- subject do
49
- consumer.fetch
50
- end
51
49
 
52
50
  before :each do
53
51
  config = base_config.merge(
@@ -60,8 +58,10 @@ describe "outputs/kafka", :integration => true do
60
58
  end
61
59
 
62
60
  it 'should have data integrity' do
63
- expect(subject.size).to eq(num_events)
64
- subject.each do |m|
61
+ messages = fetch_messages(test_topic)
62
+
63
+ expect(messages.size).to eq(num_events)
64
+ messages.each do |m|
65
65
  expect(m.value).to eq(event.to_s)
66
66
  end
67
67
  end
@@ -71,14 +71,6 @@ describe "outputs/kafka", :integration => true do
71
71
  context 'when setting message_key' do
72
72
  let(:num_events) { 10 }
73
73
  let(:test_topic) { 'logstash_integration_topic2' }
74
- let!(:consumer0) do
75
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
76
- test_topic, 0, :earliest_offset)
77
- end
78
- let!(:consumer1) do
79
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
80
- test_topic, 1, :earliest_offset)
81
- end
82
74
 
83
75
  before :each do
84
76
  config = base_config.merge({"topic_id" => test_topic, "message_key" => "static_key"})
@@ -86,19 +78,14 @@ describe "outputs/kafka", :integration => true do
86
78
  end
87
79
 
88
80
  it 'should send all events to one partition' do
89
- expect(consumer0.fetch.size == num_events || consumer1.fetch.size == num_events).to be true
81
+ data0 = fetch_messages(test_topic, partition: 0)
82
+ data1 = fetch_messages(test_topic, partition: 1)
83
+ expect(data0.size == num_events || data1.size == num_events).to be true
90
84
  end
91
85
  end
92
86
 
93
87
  context 'when using gzip compression' do
94
88
  let(:test_topic) { 'logstash_integration_gzip_topic' }
95
- let!(:consumer) do
96
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
97
- test_topic, 0, :earliest_offset)
98
- end
99
- subject do
100
- consumer.fetch
101
- end
102
89
 
103
90
  before :each do
104
91
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "gzip"})
@@ -106,8 +93,10 @@ describe "outputs/kafka", :integration => true do
106
93
  end
107
94
 
108
95
  it 'should have data integrity' do
109
- expect(subject.size).to eq(num_events)
110
- subject.each do |m|
96
+ messages = fetch_messages(test_topic)
97
+
98
+ expect(messages.size).to eq(num_events)
99
+ messages.each do |m|
111
100
  expect(m.value).to eq(event.to_s)
112
101
  end
113
102
  end
@@ -115,13 +104,6 @@ describe "outputs/kafka", :integration => true do
115
104
 
116
105
  context 'when using snappy compression' do
117
106
  let(:test_topic) { 'logstash_integration_snappy_topic' }
118
- let!(:consumer) do
119
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
120
- test_topic, 0, :earliest_offset)
121
- end
122
- subject do
123
- consumer.fetch
124
- end
125
107
 
126
108
  before :each do
127
109
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "snappy"})
@@ -129,8 +111,10 @@ describe "outputs/kafka", :integration => true do
129
111
  end
130
112
 
131
113
  it 'should have data integrity' do
132
- expect(subject.size).to eq(num_events)
133
- subject.each do |m|
114
+ messages = fetch_messages(test_topic)
115
+
116
+ expect(messages.size).to eq(num_events)
117
+ messages.each do |m|
134
118
  expect(m.value).to eq(event.to_s)
135
119
  end
136
120
  end
@@ -143,52 +127,85 @@ describe "outputs/kafka", :integration => true do
143
127
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "lz4"})
144
128
  load_kafka_data(config)
145
129
  end
130
+
131
+ # NOTE: depends on extlz4 gem which is using a C-extension
132
+ # it 'should have data integrity' do
133
+ # messages = fetch_messages(test_topic)
134
+ #
135
+ # expect(messages.size).to eq(num_events)
136
+ # messages.each do |m|
137
+ # expect(m.value).to eq(event.to_s)
138
+ # end
139
+ # end
146
140
  end
147
141
 
148
142
  context 'when using multi partition topic' do
149
- let(:num_events) { 10 }
143
+ let(:num_events) { 100 } # ~ more than (batch.size) 16,384 bytes
150
144
  let(:test_topic) { 'logstash_integration_topic3' }
151
- let!(:consumer0) do
152
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
153
- test_topic, 0, :earliest_offset)
154
- end
155
- let!(:consumer1) do
156
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
157
- test_topic, 1, :earliest_offset)
145
+
146
+ before :each do
147
+ config = base_config.merge("topic_id" => test_topic, "partitioner" => 'org.apache.kafka.clients.producer.UniformStickyPartitioner')
148
+ load_kafka_data(config) do # let's have a bit more (diverse) dataset
149
+ num_events.times.collect do
150
+ LogStash::Event.new.tap do |e|
151
+ e.set('message', event.get('message').sub('183.60.215.50') { "#{rand(126)+1}.#{rand(126)+1}.#{rand(126)+1}.#{rand(126)+1}" })
152
+ end
153
+ end
154
+ end
158
155
  end
159
156
 
160
- let!(:consumer2) do
161
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
162
- test_topic, 2, :earliest_offset)
157
+ it 'should distribute events to all partitions' do
158
+ consumer0_records = fetch_messages(test_topic, partition: 0)
159
+ consumer1_records = fetch_messages(test_topic, partition: 1)
160
+ consumer2_records = fetch_messages(test_topic, partition: 2)
161
+
162
+ all_records = consumer0_records + consumer1_records + consumer2_records
163
+ expect(all_records.size).to eq(num_events * 2)
164
+ all_records.each do |m|
165
+ expect(m.value).to include message_content
166
+ end
167
+
168
+ expect(consumer0_records.size).to be > 1
169
+ expect(consumer1_records.size).to be > 1
170
+ expect(consumer2_records.size).to be > 1
163
171
  end
172
+ end
173
+
174
+ context 'setting partitioner' do
175
+ let(:test_topic) { 'logstash_integration_partitioner_topic' }
176
+ let(:partitioner) { nil }
164
177
 
165
178
  before :each do
166
- config = base_config.merge({"topic_id" => test_topic})
179
+ @messages_offset = fetch_messages_from_all_partitions
180
+
181
+ config = base_config.merge("topic_id" => test_topic, 'partitioner' => partitioner)
167
182
  load_kafka_data(config)
168
183
  end
169
184
 
170
- it 'should distribute events to all partition' do
171
- consumer0_records = consumer0.fetch
172
- consumer1_records = consumer1.fetch
173
- consumer2_records = consumer2.fetch
174
-
175
- expect(consumer0_records.size > 1 &&
176
- consumer1_records.size > 1 &&
177
- consumer2_records.size > 1).to be true
178
-
179
- all_records = consumer0_records + consumer1_records + consumer2_records
180
- expect(all_records.size).to eq(num_events)
181
- all_records.each do |m|
182
- expect(m.value).to eq(event.to_s)
185
+ [ 'default', 'round_robin', 'uniform_sticky' ].each do |partitioner|
186
+ describe partitioner do
187
+ let(:partitioner) { partitioner }
188
+ it 'loads data' do
189
+ expect(fetch_messages_from_all_partitions - @messages_offset).to eql num_events
190
+ end
183
191
  end
184
192
  end
193
+
194
+ def fetch_messages_from_all_partitions
195
+ 3.times.map { |i| fetch_messages(test_topic, partition: i).size }.sum
196
+ end
185
197
  end
186
198
 
187
199
  def load_kafka_data(config)
188
200
  kafka = LogStash::Outputs::Kafka.new(config)
189
201
  kafka.register
190
202
  kafka.multi_receive(num_events.times.collect { event })
203
+ kafka.multi_receive(Array(yield)) if block_given?
191
204
  kafka.close
192
205
  end
193
206
 
207
+ def fetch_messages(topic, partition: 0, offset: :earliest)
208
+ kafka_client.fetch_messages(topic: topic, partition: partition, offset: offset)
209
+ end
210
+
194
211
  end
@@ -34,6 +34,68 @@ describe LogStash::Inputs::Kafka do
34
34
  subject { LogStash::Inputs::Kafka.new(config) }
35
35
 
36
36
  it "should register" do
37
- expect {subject.register}.to_not raise_error
37
+ expect { subject.register }.to_not raise_error
38
+ end
39
+
40
+ context 'with client_rack' do
41
+ let(:config) { super.merge('client_rack' => 'EU-R1') }
42
+
43
+ it "sets broker rack parameter" do
44
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
45
+ to receive(:new).with(hash_including('client.rack' => 'EU-R1')).
46
+ and_return kafka_client = double('kafka-consumer')
47
+
48
+ expect( subject.send(:create_consumer, 'sample_client-0') ).to be kafka_client
49
+ end
50
+ end
51
+
52
+ context 'string integer config' do
53
+ let(:config) { super.merge('session_timeout_ms' => '25000', 'max_poll_interval_ms' => '345000') }
54
+
55
+ it "sets integer values" do
56
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
57
+ to receive(:new).with(hash_including('session.timeout.ms' => '25000', 'max.poll.interval.ms' => '345000')).
58
+ and_return kafka_client = double('kafka-consumer')
59
+
60
+ expect( subject.send(:create_consumer, 'sample_client-1') ).to be kafka_client
61
+ end
62
+ end
63
+
64
+ context 'integer config' do
65
+ let(:config) { super.merge('session_timeout_ms' => 25200, 'max_poll_interval_ms' => 123_000) }
66
+
67
+ it "sets integer values" do
68
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
69
+ to receive(:new).with(hash_including('session.timeout.ms' => '25200', 'max.poll.interval.ms' => '123000')).
70
+ and_return kafka_client = double('kafka-consumer')
71
+
72
+ expect( subject.send(:create_consumer, 'sample_client-2') ).to be kafka_client
73
+ end
74
+ end
75
+
76
+ context 'string boolean config' do
77
+ let(:config) { super.merge('enable_auto_commit' => 'false', 'check_crcs' => 'true') }
78
+
79
+ it "sets parameters" do
80
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
81
+ to receive(:new).with(hash_including('enable.auto.commit' => 'false', 'check.crcs' => 'true')).
82
+ and_return kafka_client = double('kafka-consumer')
83
+
84
+ expect( subject.send(:create_consumer, 'sample_client-3') ).to be kafka_client
85
+ expect( subject.enable_auto_commit ).to be false
86
+ end
87
+ end
88
+
89
+ context 'boolean config' do
90
+ let(:config) { super.merge('enable_auto_commit' => true, 'check_crcs' => false) }
91
+
92
+ it "sets parameters" do
93
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
94
+ to receive(:new).with(hash_including('enable.auto.commit' => 'true', 'check.crcs' => 'false')).
95
+ and_return kafka_client = double('kafka-consumer')
96
+
97
+ expect( subject.send(:create_consumer, 'sample_client-4') ).to be kafka_client
98
+ expect( subject.enable_auto_commit ).to be true
99
+ end
38
100
  end
39
101
  end
@@ -56,14 +56,15 @@ describe "outputs/kafka" do
56
56
  end
57
57
  end
58
58
 
59
- context "when KafkaProducer#send() raises an exception" do
59
+ context "when KafkaProducer#send() raises a retriable exception" do
60
60
  let(:failcount) { (rand * 10).to_i }
61
61
  let(:sendcount) { failcount + 1 }
62
62
 
63
63
  let(:exception_classes) { [
64
64
  org.apache.kafka.common.errors.TimeoutException,
65
+ org.apache.kafka.common.errors.DisconnectException,
66
+ org.apache.kafka.common.errors.CoordinatorNotAvailableException,
65
67
  org.apache.kafka.common.errors.InterruptException,
66
- org.apache.kafka.common.errors.SerializationException
67
68
  ] }
68
69
 
69
70
  before do
@@ -88,6 +89,37 @@ describe "outputs/kafka" do
88
89
  end
89
90
  end
90
91
 
92
+ context "when KafkaProducer#send() raises a non-retriable exception" do
93
+ let(:failcount) { (rand * 10).to_i }
94
+
95
+ let(:exception_classes) { [
96
+ org.apache.kafka.common.errors.SerializationException,
97
+ org.apache.kafka.common.errors.RecordTooLargeException,
98
+ org.apache.kafka.common.errors.InvalidTopicException
99
+ ] }
100
+
101
+ before do
102
+ count = 0
103
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
104
+ .exactly(1).times
105
+ .and_wrap_original do |m, *args|
106
+ if count < failcount # fail 'failcount' times in a row.
107
+ count += 1
108
+ # Pick an exception at random
109
+ raise exception_classes.shuffle.first.new("injected exception for testing")
110
+ else
111
+ m.call(*args) # call original
112
+ end
113
+ end
114
+ end
115
+
116
+ it "should not retry" do
117
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
118
+ kafka.register
119
+ kafka.multi_receive([event])
120
+ end
121
+ end
122
+
91
123
  context "when a send fails" do
92
124
  context "and the default retries behavior is used" do
93
125
  # Fail this many times and then finally succeed.
@@ -97,7 +129,7 @@ describe "outputs/kafka" do
97
129
  let(:sendcount) { failcount + 1 }
98
130
 
99
131
  it "should retry until successful" do
100
- count = 0;
132
+ count = 0
101
133
 
102
134
  expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
103
135
  .exactly(sendcount).times
@@ -107,7 +139,7 @@ describe "outputs/kafka" do
107
139
  # inject some failures.
108
140
 
109
141
  # Return a custom Future that will raise an exception to simulate a Kafka send() problem.
110
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
142
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
111
143
  future.run
112
144
  future
113
145
  else
@@ -129,7 +161,7 @@ describe "outputs/kafka" do
129
161
  .once
130
162
  .and_wrap_original do |m, *args|
131
163
  # Always fail.
132
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
164
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
133
165
  future.run
134
166
  future
135
167
  end
@@ -143,7 +175,7 @@ describe "outputs/kafka" do
143
175
  .once
144
176
  .and_wrap_original do |m, *args|
145
177
  # Always fail.
146
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
178
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
147
179
  future.run
148
180
  future
149
181
  end
@@ -164,7 +196,7 @@ describe "outputs/kafka" do
164
196
  .at_most(max_sends).times
165
197
  .and_wrap_original do |m, *args|
166
198
  # Always fail.
167
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
199
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
168
200
  future.run
169
201
  future
170
202
  end
@@ -175,10 +207,10 @@ describe "outputs/kafka" do
175
207
 
176
208
  it 'should only sleep retries number of times' do
177
209
  expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
178
- .at_most(max_sends)
210
+ .at_most(max_sends).times
179
211
  .and_wrap_original do |m, *args|
180
212
  # Always fail.
181
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
213
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
182
214
  future.run
183
215
  future
184
216
  end
@@ -189,4 +221,25 @@ describe "outputs/kafka" do
189
221
  end
190
222
  end
191
223
  end
224
+
225
+ context 'when ssl endpoint identification disabled' do
226
+
227
+ let(:config) do
228
+ simple_kafka_config.merge('ssl_endpoint_identification_algorithm' => '', 'security_protocol' => 'SSL')
229
+ end
230
+
231
+ subject { LogStash::Outputs::Kafka.new(config) }
232
+
233
+ it 'does not configure truststore' do
234
+ expect(org.apache.kafka.clients.producer.KafkaProducer).
235
+ to receive(:new).with(hash_excluding('ssl.truststore.location' => anything))
236
+ subject.register
237
+ end
238
+
239
+ it 'sets empty ssl.endpoint.identification.algorithm' do
240
+ expect(org.apache.kafka.clients.producer.KafkaProducer).
241
+ to receive(:new).with(hash_including('ssl.endpoint.identification.algorithm' => ''))
242
+ subject.register
243
+ end
244
+ end
192
245
  end