logstash-integration-kafka 10.0.0-java → 10.4.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-integration-kafka'
3
- s.version = '10.0.0'
3
+ s.version = '10.4.0'
4
4
  s.licenses = ['Apache-2.0']
5
5
  s.summary = "Integration with Kafka - input and output plugins"
6
6
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
@@ -35,7 +35,7 @@ Gem::Specification.new do |s|
35
35
  "integration_plugins" => "logstash-input-kafka,logstash-output-kafka"
36
36
  }
37
37
 
38
- s.add_development_dependency 'jar-dependencies', '~> 0.3.12'
38
+ s.add_development_dependency 'jar-dependencies', '>= 0.3.12'
39
39
 
40
40
  s.platform = RUBY_PLATFORM
41
41
 
@@ -49,6 +49,6 @@ Gem::Specification.new do |s|
49
49
 
50
50
  s.add_development_dependency 'logstash-devutils'
51
51
  s.add_development_dependency 'rspec-wait'
52
- s.add_development_dependency 'poseidon'
52
+ s.add_development_dependency 'ruby-kafka'
53
53
  s.add_development_dependency 'snappy'
54
54
  end
@@ -1,7 +1,6 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/devutils/rspec/spec_helper"
3
3
  require "logstash/inputs/kafka"
4
- require "digest"
5
4
  require "rspec/wait"
6
5
 
7
6
  # Please run kafka_test_setup.sh prior to executing this integration test.
@@ -12,159 +11,129 @@ describe "inputs/kafka", :integration => true do
12
11
  let(:group_id_3) {rand(36**8).to_s(36)}
13
12
  let(:group_id_4) {rand(36**8).to_s(36)}
14
13
  let(:group_id_5) {rand(36**8).to_s(36)}
15
- let(:plain_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
16
- let(:multi_consumer_config) { plain_config.merge({"group_id" => group_id_4, "client_id" => "spec", "consumer_threads" => 3}) }
17
- let(:snappy_config) { { 'topics' => ['logstash_integration_topic_snappy'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
18
- let(:lz4_config) { { 'topics' => ['logstash_integration_topic_lz4'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
19
- let(:pattern_config) { { 'topics_pattern' => 'logstash_integration_topic_.*', 'group_id' => group_id_2, 'codec' => 'plain', 'auto_offset_reset' => 'earliest'} }
20
- let(:decorate_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_3, 'auto_offset_reset' => 'earliest', 'decorate_events' => true} }
21
- let(:manual_commit_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_5, 'auto_offset_reset' => 'earliest', 'enable_auto_commit' => 'false'} }
14
+ let(:group_id_6) {rand(36**8).to_s(36)}
15
+ let(:plain_config) do
16
+ { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_1,
17
+ 'auto_offset_reset' => 'earliest' }
18
+ end
19
+ let(:multi_consumer_config) do
20
+ plain_config.merge({"group_id" => group_id_4, "client_id" => "spec", "consumer_threads" => 3})
21
+ end
22
+ let(:snappy_config) do
23
+ { 'topics' => ['logstash_integration_topic_snappy'], 'codec' => 'plain', 'group_id' => group_id_1,
24
+ 'auto_offset_reset' => 'earliest' }
25
+ end
26
+ let(:lz4_config) do
27
+ { 'topics' => ['logstash_integration_topic_lz4'], 'codec' => 'plain', 'group_id' => group_id_1,
28
+ 'auto_offset_reset' => 'earliest' }
29
+ end
30
+ let(:pattern_config) do
31
+ { 'topics_pattern' => 'logstash_integration_topic_.*', 'group_id' => group_id_2, 'codec' => 'plain',
32
+ 'auto_offset_reset' => 'earliest' }
33
+ end
34
+ let(:decorate_config) do
35
+ { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_3,
36
+ 'auto_offset_reset' => 'earliest', 'decorate_events' => true }
37
+ end
38
+ let(:manual_commit_config) do
39
+ { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_5,
40
+ 'auto_offset_reset' => 'earliest', 'enable_auto_commit' => 'false' }
41
+ end
22
42
  let(:timeout_seconds) { 30 }
23
43
  let(:num_events) { 103 }
24
44
 
25
45
  describe "#kafka-topics" do
26
- def thread_it(kafka_input, queue)
27
- Thread.new do
28
- begin
29
- kafka_input.run(queue)
30
- end
31
- end
32
- end
33
46
 
34
47
  it "should consume all messages from plain 3-partition topic" do
35
- kafka_input = LogStash::Inputs::Kafka.new(plain_config)
36
- queue = Queue.new
37
- t = thread_it(kafka_input, queue)
38
- begin
39
- t.run
40
- wait(timeout_seconds).for {queue.length}.to eq(num_events)
41
- expect(queue.length).to eq(num_events)
42
- ensure
43
- t.kill
44
- t.join(30_000)
45
- end
48
+ queue = consume_messages(plain_config, timeout: timeout_seconds, event_count: num_events)
49
+ expect(queue.length).to eq(num_events)
46
50
  end
47
51
 
48
52
  it "should consume all messages from snappy 3-partition topic" do
49
- kafka_input = LogStash::Inputs::Kafka.new(snappy_config)
50
- queue = Queue.new
51
- t = thread_it(kafka_input, queue)
52
- begin
53
- t.run
54
- wait(timeout_seconds).for {queue.length}.to eq(num_events)
55
- expect(queue.length).to eq(num_events)
56
- ensure
57
- t.kill
58
- t.join(30_000)
59
- end
53
+ queue = consume_messages(snappy_config, timeout: timeout_seconds, event_count: num_events)
54
+ expect(queue.length).to eq(num_events)
60
55
  end
61
56
 
62
57
  it "should consume all messages from lz4 3-partition topic" do
63
- kafka_input = LogStash::Inputs::Kafka.new(lz4_config)
64
- queue = Queue.new
65
- t = thread_it(kafka_input, queue)
66
- begin
67
- t.run
68
- wait(timeout_seconds).for {queue.length}.to eq(num_events)
69
- expect(queue.length).to eq(num_events)
70
- ensure
71
- t.kill
72
- t.join(30_000)
73
- end
58
+ queue = consume_messages(lz4_config, timeout: timeout_seconds, event_count: num_events)
59
+ expect(queue.length).to eq(num_events)
74
60
  end
75
61
 
76
62
  it "should consumer all messages with multiple consumers" do
77
- kafka_input = LogStash::Inputs::Kafka.new(multi_consumer_config)
78
- queue = Queue.new
79
- t = thread_it(kafka_input, queue)
80
- begin
81
- t.run
82
- wait(timeout_seconds).for {queue.length}.to eq(num_events)
63
+ consume_messages(multi_consumer_config, timeout: timeout_seconds, event_count: num_events) do |queue, kafka_input|
83
64
  expect(queue.length).to eq(num_events)
84
65
  kafka_input.kafka_consumers.each_with_index do |consumer, i|
85
66
  expect(consumer.metrics.keys.first.tags["client-id"]).to eq("spec-#{i}")
86
67
  end
87
- ensure
88
- t.kill
89
- t.join(30_000)
90
68
  end
91
69
  end
92
70
  end
93
71
 
94
- describe "#kafka-topics-pattern" do
95
- def thread_it(kafka_input, queue)
96
- Thread.new do
97
- begin
98
- kafka_input.run(queue)
99
- end
100
- end
101
- end
102
-
72
+ context "#kafka-topics-pattern" do
103
73
  it "should consume all messages from all 3 topics" do
104
- kafka_input = LogStash::Inputs::Kafka.new(pattern_config)
105
- queue = Queue.new
106
- t = thread_it(kafka_input, queue)
107
- begin
108
- t.run
109
- wait(timeout_seconds).for {queue.length}.to eq(3*num_events)
110
- expect(queue.length).to eq(3*num_events)
111
- ensure
112
- t.kill
113
- t.join(30_000)
114
- end
74
+ total_events = num_events * 3
75
+ queue = consume_messages(pattern_config, timeout: timeout_seconds, event_count: total_events)
76
+ expect(queue.length).to eq(total_events)
115
77
  end
116
78
  end
117
79
 
118
- describe "#kafka-decorate" do
119
- def thread_it(kafka_input, queue)
120
- Thread.new do
121
- begin
122
- kafka_input.run(queue)
123
- end
124
- end
125
- end
126
-
80
+ context "#kafka-decorate" do
127
81
  it "should show the right topic and group name in decorated kafka section" do
128
82
  start = LogStash::Timestamp.now.time.to_i
129
- kafka_input = LogStash::Inputs::Kafka.new(decorate_config)
130
- queue = Queue.new
131
- t = thread_it(kafka_input, queue)
132
- begin
133
- t.run
134
- wait(timeout_seconds).for {queue.length}.to eq(num_events)
83
+ consume_messages(decorate_config, timeout: timeout_seconds, event_count: num_events) do |queue, _|
135
84
  expect(queue.length).to eq(num_events)
136
85
  event = queue.shift
137
86
  expect(event.get("[@metadata][kafka][topic]")).to eq("logstash_integration_topic_plain")
138
87
  expect(event.get("[@metadata][kafka][consumer_group]")).to eq(group_id_3)
139
88
  expect(event.get("[@metadata][kafka][timestamp]")).to be >= start
140
- ensure
141
- t.kill
142
- t.join(30_000)
143
89
  end
144
90
  end
145
91
  end
146
92
 
147
- describe "#kafka-offset-commit" do
148
- def thread_it(kafka_input, queue)
149
- Thread.new do
150
- begin
151
- kafka_input.run(queue)
93
+ context "#kafka-offset-commit" do
94
+ it "should manually commit offsets" do
95
+ queue = consume_messages(manual_commit_config, timeout: timeout_seconds, event_count: num_events)
96
+ expect(queue.length).to eq(num_events)
97
+ end
98
+ end
99
+
100
+ context 'setting partition_assignment_strategy' do
101
+ let(:test_topic) { 'logstash_integration_partitioner_topic' }
102
+ let(:consumer_config) do
103
+ plain_config.merge(
104
+ "topics" => [test_topic],
105
+ 'group_id' => group_id_6,
106
+ "client_id" => "partition_assignment_strategy-spec",
107
+ "consumer_threads" => 2,
108
+ "partition_assignment_strategy" => partition_assignment_strategy
109
+ )
110
+ end
111
+ let(:partition_assignment_strategy) { nil }
112
+
113
+ # NOTE: just verify setting works, as its a bit cumbersome to do in a unit spec
114
+ [ 'range', 'round_robin', 'sticky', 'org.apache.kafka.clients.consumer.CooperativeStickyAssignor' ].each do |partition_assignment_strategy|
115
+ describe partition_assignment_strategy do
116
+ let(:partition_assignment_strategy) { partition_assignment_strategy }
117
+ it 'consumes data' do
118
+ consume_messages(consumer_config, timeout: false, event_count: 0)
152
119
  end
153
120
  end
154
121
  end
122
+ end
155
123
 
156
- it "should manually commit offsets" do
157
- kafka_input = LogStash::Inputs::Kafka.new(manual_commit_config)
158
- queue = Queue.new
159
- t = thread_it(kafka_input, queue)
160
- begin
161
- t.run
162
- wait(timeout_seconds).for {queue.length}.to eq(num_events)
163
- expect(queue.length).to eq(num_events)
164
- ensure
165
- t.kill
166
- t.join(30_000)
167
- end
124
+ private
125
+
126
+ def consume_messages(config, queue: Queue.new, timeout:, event_count:)
127
+ kafka_input = LogStash::Inputs::Kafka.new(config)
128
+ t = Thread.new { kafka_input.run(queue) }
129
+ begin
130
+ t.run
131
+ wait(timeout).for { queue.length }.to eq(event_count) unless timeout.eql?(false)
132
+ block_given? ? yield(queue, kafka_input) : queue
133
+ ensure
134
+ t.kill
135
+ t.join(30_000)
168
136
  end
169
137
  end
138
+
170
139
  end
@@ -3,26 +3,29 @@
3
3
  require "logstash/devutils/rspec/spec_helper"
4
4
  require 'logstash/outputs/kafka'
5
5
  require 'json'
6
- require 'poseidon'
6
+ require 'kafka'
7
7
 
8
8
  describe "outputs/kafka", :integration => true do
9
9
  let(:kafka_host) { 'localhost' }
10
10
  let(:kafka_port) { 9092 }
11
11
  let(:num_events) { 10 }
12
+
12
13
  let(:base_config) { {'client_id' => 'kafkaoutputspec'} }
13
- let(:event) { LogStash::Event.new({'message' => '183.60.215.50 - - [11/Sep/2014:22:00:00 +0000] "GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"', '@timestamp' => LogStash::Timestamp.at(0) }) }
14
+ let(:message_content) do
15
+ '"GET /scripts/netcat-webserver HTTP/1.1" 200 182 "-" "Mozilla/5.0 (compatible; EasouSpider; +http://www.easou.com/search/spider.html)"'
16
+ end
17
+ let(:event) do
18
+ LogStash::Event.new({ 'message' =>
19
+ '183.60.215.50 - - [11/Sep/2014:22:00:00 +0000] ' + message_content,
20
+ '@timestamp' => LogStash::Timestamp.at(0)
21
+ })
22
+ end
14
23
 
24
+ let(:kafka_client) { Kafka.new ["#{kafka_host}:#{kafka_port}"] }
15
25
 
16
26
  context 'when outputting messages serialized as String' do
17
27
  let(:test_topic) { 'logstash_integration_topic1' }
18
28
  let(:num_events) { 3 }
19
- let(:consumer) do
20
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
21
- test_topic, 0, :earliest_offset)
22
- end
23
- subject do
24
- consumer.fetch
25
- end
26
29
 
27
30
  before :each do
28
31
  config = base_config.merge({"topic_id" => test_topic})
@@ -30,8 +33,10 @@ describe "outputs/kafka", :integration => true do
30
33
  end
31
34
 
32
35
  it 'should have data integrity' do
33
- expect(subject.size).to eq(num_events)
34
- subject.each do |m|
36
+ messages = fetch_messages(test_topic)
37
+
38
+ expect(messages.size).to eq(num_events)
39
+ messages.each do |m|
35
40
  expect(m.value).to eq(event.to_s)
36
41
  end
37
42
  end
@@ -41,13 +46,6 @@ describe "outputs/kafka", :integration => true do
41
46
  context 'when outputting messages serialized as Byte Array' do
42
47
  let(:test_topic) { 'topic1b' }
43
48
  let(:num_events) { 3 }
44
- let(:consumer) do
45
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
46
- test_topic, 0, :earliest_offset)
47
- end
48
- subject do
49
- consumer.fetch
50
- end
51
49
 
52
50
  before :each do
53
51
  config = base_config.merge(
@@ -60,8 +58,10 @@ describe "outputs/kafka", :integration => true do
60
58
  end
61
59
 
62
60
  it 'should have data integrity' do
63
- expect(subject.size).to eq(num_events)
64
- subject.each do |m|
61
+ messages = fetch_messages(test_topic)
62
+
63
+ expect(messages.size).to eq(num_events)
64
+ messages.each do |m|
65
65
  expect(m.value).to eq(event.to_s)
66
66
  end
67
67
  end
@@ -71,14 +71,6 @@ describe "outputs/kafka", :integration => true do
71
71
  context 'when setting message_key' do
72
72
  let(:num_events) { 10 }
73
73
  let(:test_topic) { 'logstash_integration_topic2' }
74
- let!(:consumer0) do
75
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
76
- test_topic, 0, :earliest_offset)
77
- end
78
- let!(:consumer1) do
79
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
80
- test_topic, 1, :earliest_offset)
81
- end
82
74
 
83
75
  before :each do
84
76
  config = base_config.merge({"topic_id" => test_topic, "message_key" => "static_key"})
@@ -86,19 +78,14 @@ describe "outputs/kafka", :integration => true do
86
78
  end
87
79
 
88
80
  it 'should send all events to one partition' do
89
- expect(consumer0.fetch.size == num_events || consumer1.fetch.size == num_events).to be true
81
+ data0 = fetch_messages(test_topic, partition: 0)
82
+ data1 = fetch_messages(test_topic, partition: 1)
83
+ expect(data0.size == num_events || data1.size == num_events).to be true
90
84
  end
91
85
  end
92
86
 
93
87
  context 'when using gzip compression' do
94
88
  let(:test_topic) { 'logstash_integration_gzip_topic' }
95
- let!(:consumer) do
96
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
97
- test_topic, 0, :earliest_offset)
98
- end
99
- subject do
100
- consumer.fetch
101
- end
102
89
 
103
90
  before :each do
104
91
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "gzip"})
@@ -106,8 +93,10 @@ describe "outputs/kafka", :integration => true do
106
93
  end
107
94
 
108
95
  it 'should have data integrity' do
109
- expect(subject.size).to eq(num_events)
110
- subject.each do |m|
96
+ messages = fetch_messages(test_topic)
97
+
98
+ expect(messages.size).to eq(num_events)
99
+ messages.each do |m|
111
100
  expect(m.value).to eq(event.to_s)
112
101
  end
113
102
  end
@@ -115,13 +104,6 @@ describe "outputs/kafka", :integration => true do
115
104
 
116
105
  context 'when using snappy compression' do
117
106
  let(:test_topic) { 'logstash_integration_snappy_topic' }
118
- let!(:consumer) do
119
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
120
- test_topic, 0, :earliest_offset)
121
- end
122
- subject do
123
- consumer.fetch
124
- end
125
107
 
126
108
  before :each do
127
109
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "snappy"})
@@ -129,8 +111,10 @@ describe "outputs/kafka", :integration => true do
129
111
  end
130
112
 
131
113
  it 'should have data integrity' do
132
- expect(subject.size).to eq(num_events)
133
- subject.each do |m|
114
+ messages = fetch_messages(test_topic)
115
+
116
+ expect(messages.size).to eq(num_events)
117
+ messages.each do |m|
134
118
  expect(m.value).to eq(event.to_s)
135
119
  end
136
120
  end
@@ -143,52 +127,85 @@ describe "outputs/kafka", :integration => true do
143
127
  config = base_config.merge({"topic_id" => test_topic, "compression_type" => "lz4"})
144
128
  load_kafka_data(config)
145
129
  end
130
+
131
+ # NOTE: depends on extlz4 gem which is using a C-extension
132
+ # it 'should have data integrity' do
133
+ # messages = fetch_messages(test_topic)
134
+ #
135
+ # expect(messages.size).to eq(num_events)
136
+ # messages.each do |m|
137
+ # expect(m.value).to eq(event.to_s)
138
+ # end
139
+ # end
146
140
  end
147
141
 
148
142
  context 'when using multi partition topic' do
149
- let(:num_events) { 10 }
143
+ let(:num_events) { 100 } # ~ more than (batch.size) 16,384 bytes
150
144
  let(:test_topic) { 'logstash_integration_topic3' }
151
- let!(:consumer0) do
152
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
153
- test_topic, 0, :earliest_offset)
154
- end
155
- let!(:consumer1) do
156
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
157
- test_topic, 1, :earliest_offset)
145
+
146
+ before :each do
147
+ config = base_config.merge("topic_id" => test_topic, "partitioner" => 'org.apache.kafka.clients.producer.UniformStickyPartitioner')
148
+ load_kafka_data(config) do # let's have a bit more (diverse) dataset
149
+ num_events.times.collect do
150
+ LogStash::Event.new.tap do |e|
151
+ e.set('message', event.get('message').sub('183.60.215.50') { "#{rand(126)+1}.#{rand(126)+1}.#{rand(126)+1}.#{rand(126)+1}" })
152
+ end
153
+ end
154
+ end
158
155
  end
159
156
 
160
- let!(:consumer2) do
161
- Poseidon::PartitionConsumer.new("my_test_consumer", kafka_host, kafka_port,
162
- test_topic, 2, :earliest_offset)
157
+ it 'should distribute events to all partitions' do
158
+ consumer0_records = fetch_messages(test_topic, partition: 0)
159
+ consumer1_records = fetch_messages(test_topic, partition: 1)
160
+ consumer2_records = fetch_messages(test_topic, partition: 2)
161
+
162
+ all_records = consumer0_records + consumer1_records + consumer2_records
163
+ expect(all_records.size).to eq(num_events * 2)
164
+ all_records.each do |m|
165
+ expect(m.value).to include message_content
166
+ end
167
+
168
+ expect(consumer0_records.size).to be > 1
169
+ expect(consumer1_records.size).to be > 1
170
+ expect(consumer2_records.size).to be > 1
163
171
  end
172
+ end
173
+
174
+ context 'setting partitioner' do
175
+ let(:test_topic) { 'logstash_integration_partitioner_topic' }
176
+ let(:partitioner) { nil }
164
177
 
165
178
  before :each do
166
- config = base_config.merge({"topic_id" => test_topic})
179
+ @messages_offset = fetch_messages_from_all_partitions
180
+
181
+ config = base_config.merge("topic_id" => test_topic, 'partitioner' => partitioner)
167
182
  load_kafka_data(config)
168
183
  end
169
184
 
170
- it 'should distribute events to all partition' do
171
- consumer0_records = consumer0.fetch
172
- consumer1_records = consumer1.fetch
173
- consumer2_records = consumer2.fetch
174
-
175
- expect(consumer0_records.size > 1 &&
176
- consumer1_records.size > 1 &&
177
- consumer2_records.size > 1).to be true
178
-
179
- all_records = consumer0_records + consumer1_records + consumer2_records
180
- expect(all_records.size).to eq(num_events)
181
- all_records.each do |m|
182
- expect(m.value).to eq(event.to_s)
185
+ [ 'default', 'round_robin', 'uniform_sticky' ].each do |partitioner|
186
+ describe partitioner do
187
+ let(:partitioner) { partitioner }
188
+ it 'loads data' do
189
+ expect(fetch_messages_from_all_partitions - @messages_offset).to eql num_events
190
+ end
183
191
  end
184
192
  end
193
+
194
+ def fetch_messages_from_all_partitions
195
+ 3.times.map { |i| fetch_messages(test_topic, partition: i).size }.sum
196
+ end
185
197
  end
186
198
 
187
199
  def load_kafka_data(config)
188
200
  kafka = LogStash::Outputs::Kafka.new(config)
189
201
  kafka.register
190
202
  kafka.multi_receive(num_events.times.collect { event })
203
+ kafka.multi_receive(Array(yield)) if block_given?
191
204
  kafka.close
192
205
  end
193
206
 
207
+ def fetch_messages(topic, partition: 0, offset: :earliest)
208
+ kafka_client.fetch_messages(topic: topic, partition: partition, offset: offset)
209
+ end
210
+
194
211
  end