logstash-integration-kafka 10.0.1-java → 10.5.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,26 +3,29 @@
3
3
  require "logstash/devutils/rspec/spec_helper"
4
4
  require 'logstash/outputs/kafka'
5
5
  require 'json'
6
- require 'poseidon'
6
+ require 'kafka'
7
7
 
8
8
  describe "outputs/kafka", :integration => true do
9
9
  let(:kafka_host) { 'localhost' }
10
10
  let(:kafka_port) { 9092 }
11
11
  let(:num_events) { 10 }
12
+
12
13
  let(:base_config) { {'client_id' => 'kafkaoutputspec'} }
13
- let(:event) { LogStash::Event.new({'message' => '183.60.215.50 - - [11/Sep/2014:22:00:00 +0000] "GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"', '@timestamp' => LogStash::Timestamp.at(0) }) }
14
+ let(:message_content) do
15
+ '"GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"'
16
+ end
17
+ let(:event) do
18
+ LogStash::Event.new({ 'message' =>
19
+ '183.60.215.50 - - [11/Sep/2014:22:00:00 +0000] ' + message_content,
20
+ '@timestamp' => LogStash::Timestamp.at(0)
21
+ })
22
+ end
14
23
 
24
+ let(:kafka_client) { Kafka.new ["#{kafka_host}:#{kafka_port}"] }
15
25
 
16
26
  context 'when outputting messages serialized as String' do
17
27
  let(:test_topic) { 'logstash_integration_topic1' }
18
28
  let(:num_events) { 3 }
19
- let(:consumer) do
20
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
21
- test_topic, 0, :earliest_offset)
22
- end
23
- subject do
24
- consumer.fetch
25
- end
26
29
 
27
30
  before :each do
28
31
  config = base_config.merge({"topic_id" => test_topic})
@@ -30,8 +33,10 @@ describe "outputs/kafka", :integration => true do
30
33
  end
31
34
 
32
35
  it 'should have data integrity' do
33
- expect(subject.size).to eq(num_events)
34
- subject.each do |m|
36
+ messages = fetch_messages(test_topic)
37
+
38
+ expect(messages.size).to eq(num_events)
39
+ messages.each do |m|
35
40
  expect(m.value).to eq(event.to_s)
36
41
  end
37
42
  end
@@ -41,13 +46,6 @@ describe "outputs/kafka", :integration => true do
41
46
  context 'when outputting messages serialized as Byte Array' do
42
47
  let(:test_topic) { 'topic1b' }
43
48
  let(:num_events) { 3 }
44
- let(:consumer) do
45
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
46
- test_topic, 0, :earliest_offset)
47
- end
48
- subject do
49
- consumer.fetch
50
- end
51
49
 
52
50
  before :each do
53
51
  config = base_config.merge(
@@ -60,8 +58,10 @@ describe "outputs/kafka", :integration => true do
60
58
  end
61
59
 
62
60
  it 'should have data integrity' do
63
- expect(subject.size).to eq(num_events)
64
- subject.each do |m|
61
+ messages = fetch_messages(test_topic)
62
+
63
+ expect(messages.size).to eq(num_events)
64
+ messages.each do |m|
65
65
  expect(m.value).to eq(event.to_s)
66
66
  end
67
67
  end
@@ -71,14 +71,6 @@ describe "outputs/kafka", :integration => true do
71
71
  context 'when setting message_key' do
72
72
  let(:num_events) { 10 }
73
73
  let(:test_topic) { 'logstash_integration_topic2' }
74
- let!(:consumer0) do
75
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
76
- test_topic, 0, :earliest_offset)
77
- end
78
- let!(:consumer1) do
79
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
80
- test_topic, 1, :earliest_offset)
81
- end
82
74
 
83
75
  before :each do
84
76
  config = base_config.merge({"topic_id" => test_topic, "message_key" => "static_key"})
@@ -86,19 +78,14 @@ describe "outputs/kafka", :integration => true do
86
78
  end
87
79
 
88
80
  it 'should send all events to one partition' do
89
- expect(consumer0.fetch.size == num_events || consumer1.fetch.size == num_events).to be true
81
+ data0 = fetch_messages(test_topic, partition: 0)
82
+ data1 = fetch_messages(test_topic, partition: 1)
83
+ expect(data0.size == num_events || data1.size == num_events).to be true
90
84
  end
91
85
  end
92
86
 
93
87
  context 'when using gzip compression' do
94
88
  let(:test_topic) { 'logstash_integration_gzip_topic' }
95
- let!(:consumer) do
96
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
97
- test_topic, 0, :earliest_offset)
98
- end
99
- subject do
100
- consumer.fetch
101
- end
102
89
 
103
90
  before :each do
104
91
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "gzip"})
@@ -106,8 +93,10 @@ describe "outputs/kafka", :integration => true do
106
93
  end
107
94
 
108
95
  it 'should have data integrity' do
109
- expect(subject.size).to eq(num_events)
110
- subject.each do |m|
96
+ messages = fetch_messages(test_topic)
97
+
98
+ expect(messages.size).to eq(num_events)
99
+ messages.each do |m|
111
100
  expect(m.value).to eq(event.to_s)
112
101
  end
113
102
  end
@@ -115,13 +104,6 @@ describe "outputs/kafka", :integration => true do
115
104
 
116
105
  context 'when using snappy compression' do
117
106
  let(:test_topic) { 'logstash_integration_snappy_topic' }
118
- let!(:consumer) do
119
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
120
- test_topic, 0, :earliest_offset)
121
- end
122
- subject do
123
- consumer.fetch
124
- end
125
107
 
126
108
  before :each do
127
109
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "snappy"})
@@ -129,8 +111,10 @@ describe "outputs/kafka", :integration => true do
129
111
  end
130
112
 
131
113
  it 'should have data integrity' do
132
- expect(subject.size).to eq(num_events)
133
- subject.each do |m|
114
+ messages = fetch_messages(test_topic)
115
+
116
+ expect(messages.size).to eq(num_events)
117
+ messages.each do |m|
134
118
  expect(m.value).to eq(event.to_s)
135
119
  end
136
120
  end
@@ -143,52 +127,85 @@ describe "outputs/kafka", :integration => true do
143
127
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "lz4"})
144
128
  load_kafka_data(config)
145
129
  end
130
+
131
+ # NOTE: depends on extlz4 gem which is using a C-extension
132
+ # it 'should have data integrity' do
133
+ # messages = fetch_messages(test_topic)
134
+ #
135
+ # expect(messages.size).to eq(num_events)
136
+ # messages.each do |m|
137
+ # expect(m.value).to eq(event.to_s)
138
+ # end
139
+ # end
146
140
  end
147
141
 
148
142
  context 'when using multi partition topic' do
149
- let(:num_events) { 10 }
143
+ let(:num_events) { 100 } # ~ more than (batch.size) 16,384 bytes
150
144
  let(:test_topic) { 'logstash_integration_topic3' }
151
- let!(:consumer0) do
152
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
153
- test_topic, 0, :earliest_offset)
154
- end
155
- let!(:consumer1) do
156
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
157
- test_topic, 1, :earliest_offset)
145
+
146
+ before :each do
147
+ config = base_config.merge("topic_id" => test_topic, "partitioner" => 'org.apache.kafka.clients.producer.UniformStickyPartitioner')
148
+ load_kafka_data(config) do # let's have a bit more (diverse) dataset
149
+ num_events.times.collect do
150
+ LogStash::Event.new.tap do |e|
151
+ e.set('message', event.get('message').sub('183.60.215.50') { "#{rand(126)+1}.#{rand(126)+1}.#{rand(126)+1}.#{rand(126)+1}" })
152
+ end
153
+ end
154
+ end
158
155
  end
159
156
 
160
- let!(:consumer2) do
161
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
162
- test_topic, 2, :earliest_offset)
157
+ it 'should distribute events to all partitions' do
158
+ consumer0_records = fetch_messages(test_topic, partition: 0)
159
+ consumer1_records = fetch_messages(test_topic, partition: 1)
160
+ consumer2_records = fetch_messages(test_topic, partition: 2)
161
+
162
+ all_records = consumer0_records + consumer1_records + consumer2_records
163
+ expect(all_records.size).to eq(num_events * 2)
164
+ all_records.each do |m|
165
+ expect(m.value).to include message_content
166
+ end
167
+
168
+ expect(consumer0_records.size).to be > 1
169
+ expect(consumer1_records.size).to be > 1
170
+ expect(consumer2_records.size).to be > 1
163
171
  end
172
+ end
173
+
174
+ context 'setting partitioner' do
175
+ let(:test_topic) { 'logstash_integration_partitioner_topic' }
176
+ let(:partitioner) { nil }
164
177
 
165
178
  before :each do
166
- config = base_config.merge({"topic_id" => test_topic})
179
+ @messages_offset = fetch_messages_from_all_partitions
180
+
181
+ config = base_config.merge("topic_id" => test_topic, 'partitioner' => partitioner)
167
182
  load_kafka_data(config)
168
183
  end
169
184
 
170
- it 'should distribute events to all partition' do
171
- consumer0_records = consumer0.fetch
172
- consumer1_records = consumer1.fetch
173
- consumer2_records = consumer2.fetch
174
-
175
- expect(consumer0_records.size > 1 &&
176
- consumer1_records.size > 1 &&
177
- consumer2_records.size > 1).to be true
178
-
179
- all_records = consumer0_records + consumer1_records + consumer2_records
180
- expect(all_records.size).to eq(num_events)
181
- all_records.each do |m|
182
- expect(m.value).to eq(event.to_s)
185
+ [ 'default', 'round_robin', 'uniform_sticky' ].each do |partitioner|
186
+ describe partitioner do
187
+ let(:partitioner) { partitioner }
188
+ it 'loads data' do
189
+ expect(fetch_messages_from_all_partitions - @messages_offset).to eql num_events
190
+ end
183
191
  end
184
192
  end
193
+
194
+ def fetch_messages_from_all_partitions
195
+ 3.times.map { |i| fetch_messages(test_topic, partition: i).size }.sum
196
+ end
185
197
  end
186
198
 
187
199
  def load_kafka_data(config)
188
200
  kafka = LogStash::Outputs::Kafka.new(config)
189
201
  kafka.register
190
202
  kafka.multi_receive(num_events.times.collect { event })
203
+ kafka.multi_receive(Array(yield)) if block_given?
191
204
  kafka.close
192
205
  end
193
206
 
207
+ def fetch_messages(topic, partition: 0, offset: :earliest)
208
+ kafka_client.fetch_messages(topic: topic, partition: partition, offset: offset)
209
+ end
210
+
194
211
  end
@@ -34,6 +34,68 @@ describe LogStash::Inputs::Kafka do
34
34
  subject { LogStash::Inputs::Kafka.new(config) }
35
35
 
36
36
  it "should register" do
37
- expect {subject.register}.to_not raise_error
37
+ expect { subject.register }.to_not raise_error
38
+ end
39
+
40
+ context 'with client_rack' do
41
+ let(:config) { super.merge('client_rack' => 'EU-R1') }
42
+
43
+ it "sets broker rack parameter" do
44
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
45
+ to receive(:new).with(hash_including('client.rack' => 'EU-R1')).
46
+ and_return kafka_client = double('kafka-consumer')
47
+
48
+ expect( subject.send(:create_consumer, 'sample_client-0') ).to be kafka_client
49
+ end
50
+ end
51
+
52
+ context 'string integer config' do
53
+ let(:config) { super.merge('session_timeout_ms' => '25000', 'max_poll_interval_ms' => '345000') }
54
+
55
+ it "sets integer values" do
56
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
57
+ to receive(:new).with(hash_including('session.timeout.ms' => '25000', 'max.poll.interval.ms' => '345000')).
58
+ and_return kafka_client = double('kafka-consumer')
59
+
60
+ expect( subject.send(:create_consumer, 'sample_client-1') ).to be kafka_client
61
+ end
62
+ end
63
+
64
+ context 'integer config' do
65
+ let(:config) { super.merge('session_timeout_ms' => 25200, 'max_poll_interval_ms' => 123_000) }
66
+
67
+ it "sets integer values" do
68
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
69
+ to receive(:new).with(hash_including('session.timeout.ms' => '25200', 'max.poll.interval.ms' => '123000')).
70
+ and_return kafka_client = double('kafka-consumer')
71
+
72
+ expect( subject.send(:create_consumer, 'sample_client-2') ).to be kafka_client
73
+ end
74
+ end
75
+
76
+ context 'string boolean config' do
77
+ let(:config) { super.merge('enable_auto_commit' => 'false', 'check_crcs' => 'true') }
78
+
79
+ it "sets parameters" do
80
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
81
+ to receive(:new).with(hash_including('enable.auto.commit' => 'false', 'check.crcs' => 'true')).
82
+ and_return kafka_client = double('kafka-consumer')
83
+
84
+ expect( subject.send(:create_consumer, 'sample_client-3') ).to be kafka_client
85
+ expect( subject.enable_auto_commit ).to be false
86
+ end
87
+ end
88
+
89
+ context 'boolean config' do
90
+ let(:config) { super.merge('enable_auto_commit' => true, 'check_crcs' => false) }
91
+
92
+ it "sets parameters" do
93
+ expect(org.apache.kafka.clients.consumer.KafkaConsumer).
94
+ to receive(:new).with(hash_including('enable.auto.commit' => 'true', 'check.crcs' => 'false')).
95
+ and_return kafka_client = double('kafka-consumer')
96
+
97
+ expect( subject.send(:create_consumer, 'sample_client-4') ).to be kafka_client
98
+ expect( subject.enable_auto_commit ).to be true
99
+ end
38
100
  end
39
101
  end
@@ -56,14 +56,15 @@ describe "outputs/kafka" do
56
56
  end
57
57
  end
58
58
 
59
- context "when KafkaProducer#send() raises an exception" do
59
+ context "when KafkaProducer#send() raises a retriable exception" do
60
60
  let(:failcount) { (rand * 10).to_i }
61
61
  let(:sendcount) { failcount + 1 }
62
62
 
63
63
  let(:exception_classes) { [
64
64
  org.apache.kafka.common.errors.TimeoutException,
65
+ org.apache.kafka.common.errors.DisconnectException,
66
+ org.apache.kafka.common.errors.CoordinatorNotAvailableException,
65
67
  org.apache.kafka.common.errors.InterruptException,
66
- org.apache.kafka.common.errors.SerializationException
67
68
  ] }
68
69
 
69
70
  before do
@@ -88,6 +89,37 @@ describe "outputs/kafka" do
88
89
  end
89
90
  end
90
91
 
92
+ context "when KafkaProducer#send() raises a non-retriable exception" do
93
+ let(:failcount) { (rand * 10).to_i }
94
+
95
+ let(:exception_classes) { [
96
+ org.apache.kafka.common.errors.SerializationException,
97
+ org.apache.kafka.common.errors.RecordTooLargeException,
98
+ org.apache.kafka.common.errors.InvalidTopicException
99
+ ] }
100
+
101
+ before do
102
+ count = 0
103
+ expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
104
+ .exactly(1).times
105
+ .and_wrap_original do |m, *args|
106
+ if count < failcount # fail 'failcount' times in a row.
107
+ count += 1
108
+ # Pick an exception at random
109
+ raise exception_classes.shuffle.first.new("injected exception for testing")
110
+ else
111
+ m.call(*args) # call original
112
+ end
113
+ end
114
+ end
115
+
116
+ it "should not retry" do
117
+ kafka = LogStash::Outputs::Kafka.new(simple_kafka_config)
118
+ kafka.register
119
+ kafka.multi_receive([event])
120
+ end
121
+ end
122
+
91
123
  context "when a send fails" do
92
124
  context "and the default retries behavior is used" do
93
125
  # Fail this many times and then finally succeed.
@@ -97,7 +129,7 @@ describe "outputs/kafka" do
97
129
  let(:sendcount) { failcount + 1 }
98
130
 
99
131
  it "should retry until successful" do
100
- count = 0;
132
+ count = 0
101
133
 
102
134
  expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
103
135
  .exactly(sendcount).times
@@ -107,7 +139,7 @@ describe "outputs/kafka" do
107
139
  # inject some failures.
108
140
 
109
141
  # Return a custom Future that will raise an exception to simulate a Kafka send() problem.
110
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
142
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
111
143
  future.run
112
144
  future
113
145
  else
@@ -129,7 +161,7 @@ describe "outputs/kafka" do
129
161
  .once
130
162
  .and_wrap_original do |m, *args|
131
163
  # Always fail.
132
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
164
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
133
165
  future.run
134
166
  future
135
167
  end
@@ -143,7 +175,7 @@ describe "outputs/kafka" do
143
175
  .once
144
176
  .and_wrap_original do |m, *args|
145
177
  # Always fail.
146
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
178
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
147
179
  future.run
148
180
  future
149
181
  end
@@ -164,7 +196,7 @@ describe "outputs/kafka" do
164
196
  .at_most(max_sends).times
165
197
  .and_wrap_original do |m, *args|
166
198
  # Always fail.
167
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
199
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
168
200
  future.run
169
201
  future
170
202
  end
@@ -175,10 +207,10 @@ describe "outputs/kafka" do
175
207
 
176
208
  it 'should only sleep retries number of times' do
177
209
  expect_any_instance_of(org.apache.kafka.clients.producer.KafkaProducer).to receive(:send)
178
- .at_most(max_sends)
210
+ .at_most(max_sends).times
179
211
  .and_wrap_original do |m, *args|
180
212
  # Always fail.
181
- future = java.util.concurrent.FutureTask.new { raise "Failed" }
213
+ future = java.util.concurrent.FutureTask.new { raise org.apache.kafka.common.errors.TimeoutException.new("Failed") }
182
214
  future.run
183
215
  future
184
216
  end
@@ -189,4 +221,25 @@ describe "outputs/kafka" do
189
221
  end
190
222
  end
191
223
  end
224
+
225
+ context 'when ssl endpoint identification disabled' do
226
+
227
+ let(:config) do
228
+ simple_kafka_config.merge('ssl_endpoint_identification_algorithm' => '', 'security_protocol' => 'SSL')
229
+ end
230
+
231
+ subject { LogStash::Outputs::Kafka.new(config) }
232
+
233
+ it 'does not configure truststore' do
234
+ expect(org.apache.kafka.clients.producer.KafkaProducer).
235
+ to receive(:new).with(hash_excluding('ssl.truststore.location' => anything))
236
+ subject.register
237
+ end
238
+
239
+ it 'sets empty ssl.endpoint.identification.algorithm' do
240
+ expect(org.apache.kafka.clients.producer.KafkaProducer).
241
+ to receive(:new).with(hash_including('ssl.endpoint.identification.algorithm' => ''))
242
+ subject.register
243
+ end
244
+ end
192
245
  end