logstash-integration-kafka 10.8.0-java → 10.10.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/DEVELOPER.md +1 -1
- data/docs/input-kafka.asciidoc +8 -2
- data/docs/output-kafka.asciidoc +5 -3
- data/lib/logstash/inputs/kafka.rb +15 -12
- data/lib/logstash/outputs/kafka.rb +2 -2
- data/logstash-integration-kafka.gemspec +2 -1
- data/spec/integration/inputs/kafka_spec.rb +9 -12
- data/spec/integration/outputs/kafka_spec.rb +19 -0
- data/spec/unit/inputs/kafka_spec.rb +25 -5
- metadata +17 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1033b1bc88694b441cc6b117c431792780093b27ead742684f4e903048ed54a5
|
4
|
+
data.tar.gz: e2a74687db7bba3ccc192a544142226dccb3b144bb11e5cfecd84dd4c26cbdf0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1e206f1bfbd4acbf6ca66d11f974c2116faf357da1212dfa740675dbce47ca1dea661fb0c185df687798d2f6a053dabb781d63d687cd981769d38de938c148a
|
7
|
+
data.tar.gz: ca5f79ea95cd3901b1f47b06e9465a4962c4aa585cb1441254a916fe38a5603c98b87178eb85883caac5c28215b0e42de39c1d810e742d79d33bb8c3df82501b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
## 10.10.0
|
2
|
+
|
3
|
+
- Added config setting to enable 'zstd' compression in the Kafka output [#112](https://github.com/logstash-plugins/logstash-integration-kafka/pull/112)
|
4
|
+
|
5
|
+
## 10.9.0
|
6
|
+
- Refactor: leverage codec when using schema registry [#106](https://github.com/logstash-plugins/logstash-integration-kafka/pull/106)
|
7
|
+
|
8
|
+
Previously using `schema_registry_url` parsed the payload as JSON even if `codec => 'plain'` was set, this is no longer the case.
|
9
|
+
|
10
|
+
## 10.8.2
|
11
|
+
- [DOC] Updates description of `enable_auto_commit=false` to clarify that the commit happens after data is fetched AND written to the queue [#90](https://github.com/logstash-plugins/logstash-integration-kafka/pull/90)
|
12
|
+
- Fix: update to Gradle 7 [#104](https://github.com/logstash-plugins/logstash-integration-kafka/pull/104)
|
13
|
+
- [DOC] Clarify Kafka client does not support proxy [#103](https://github.com/logstash-plugins/logstash-integration-kafka/pull/103)
|
14
|
+
|
15
|
+
## 10.8.1
|
16
|
+
- [DOC] Removed a setting recommendation that is no longer applicable for Kafka 2.0+ [#99](https://github.com/logstash-plugins/logstash-integration-kafka/pull/99)
|
17
|
+
|
1
18
|
## 10.8.0
|
2
19
|
- Added config setting to enable schema registry validation to be skipped when an authentication scheme unsupported
|
3
20
|
by the validator is used [#97](https://github.com/logstash-plugins/logstash-integration-kafka/pull/97)
|
data/DEVELOPER.md
CHANGED
@@ -62,7 +62,7 @@ See http://kafka.apache.org/documentation.html#producerconfigs for details about
|
|
62
62
|
kafka {
|
63
63
|
topic_id => ... # string (required), The topic to produce the messages to
|
64
64
|
broker_list => ... # string (optional), default: "localhost:9092", This is for bootstrapping and the producer will only use it for getting metadata
|
65
|
-
compression_codec => ... # string (optional), one of ["none", "gzip", "snappy"], default: "none"
|
65
|
+
compression_codec => ... # string (optional), one of ["none", "gzip", "snappy", "lz4", "zstd"], default: "none"
|
66
66
|
compressed_topics => ... # string (optional), default: "", This parameter allows you to set whether compression should be turned on for particular
|
67
67
|
request_required_acks => ... # number (optional), one of [-1, 0, 1], default: 0, This value controls when a produce request is considered completed
|
68
68
|
serializer_class => ... # string, (optional) default: "kafka.serializer.StringEncoder", The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
|
data/docs/input-kafka.asciidoc
CHANGED
@@ -42,6 +42,13 @@ This input supports connecting to Kafka over:
|
|
42
42
|
|
43
43
|
By default security is disabled but can be turned on as needed.
|
44
44
|
|
45
|
+
[NOTE]
|
46
|
+
=======
|
47
|
+
This plugin does not support using a proxy when communicating to the Kafka broker.
|
48
|
+
|
49
|
+
This plugin does support using a proxy when communicating to the Schema Registry using the <<plugins-{type}s-{plugin}-schema_registry_proxy>> option.
|
50
|
+
=======
|
51
|
+
|
45
52
|
The Logstash Kafka consumer handles group management and uses the default offset management
|
46
53
|
strategy using Kafka topics.
|
47
54
|
|
@@ -276,7 +283,7 @@ which the consumption will begin.
|
|
276
283
|
|
277
284
|
If true, periodically commit to Kafka the offsets of messages already returned by
|
278
285
|
the consumer. If value is `false` however, the offset is committed every time the
|
279
|
-
consumer
|
286
|
+
consumer writes data fetched from the topic to the in-memory or persistent queue.
|
280
287
|
|
281
288
|
[id="plugins-{type}s-{plugin}-exclude_internal_topics"]
|
282
289
|
===== `exclude_internal_topics`
|
@@ -415,7 +422,6 @@ The maximum delay between invocations of poll() when using consumer group manage
|
|
415
422
|
an upper bound on the amount of time that the consumer can be idle before fetching more records.
|
416
423
|
If poll() is not called before expiration of this timeout, then the consumer is considered failed and
|
417
424
|
the group will rebalance in order to reassign the partitions to another member.
|
418
|
-
The value of the configuration `request_timeout_ms` must always be larger than `max_poll_interval_ms`. ???
|
419
425
|
|
420
426
|
[id="plugins-{type}s-{plugin}-max_poll_records"]
|
421
427
|
===== `max_poll_records`
|
data/docs/output-kafka.asciidoc
CHANGED
@@ -64,6 +64,8 @@ https://kafka.apache.org/{kafka_client_doc}/documentation.html#theproducer
|
|
64
64
|
Kafka producer configuration:
|
65
65
|
https://kafka.apache.org/{kafka_client_doc}/documentation.html#producerconfigs
|
66
66
|
|
67
|
+
NOTE: This plugin does not support using a proxy when communicating to the Kafka broker.
|
68
|
+
|
67
69
|
[id="plugins-{type}s-{plugin}-options"]
|
68
70
|
==== Kafka Output Configuration Options
|
69
71
|
|
@@ -82,7 +84,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
|
|
82
84
|
| <<plugins-{type}s-{plugin}-buffer_memory>> |<<number,number>>|No
|
83
85
|
| <<plugins-{type}s-{plugin}-client_dns_lookup>> |<<string,string>>|No
|
84
86
|
| <<plugins-{type}s-{plugin}-client_id>> |<<string,string>>|No
|
85
|
-
| <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4"]`|No
|
87
|
+
| <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4", "zstd"]`|No
|
86
88
|
| <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
|
87
89
|
| <<plugins-{type}s-{plugin}-kerberos_config>> |a valid filesystem path|No
|
88
90
|
| <<plugins-{type}s-{plugin}-key_serializer>> |<<string,string>>|No
|
@@ -191,11 +193,11 @@ ip/port by allowing a logical application name to be included with the request
|
|
191
193
|
[id="plugins-{type}s-{plugin}-compression_type"]
|
192
194
|
===== `compression_type`
|
193
195
|
|
194
|
-
* Value can be any of: `none`, `gzip`, `snappy`, `lz4`
|
196
|
+
* Value can be any of: `none`, `gzip`, `snappy`, `lz4`, `zstd`
|
195
197
|
* Default value is `"none"`
|
196
198
|
|
197
199
|
The compression type for all data generated by the producer.
|
198
|
-
The default is none (
|
200
|
+
The default is none (meaning no compression). Valid values are none, gzip, snappy, lz4, or zstd.
|
199
201
|
|
200
202
|
[id="plugins-{type}s-{plugin}-jaas_path"]
|
201
203
|
===== `jaas_path`
|
@@ -63,7 +63,12 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
63
63
|
|
64
64
|
config_name 'kafka'
|
65
65
|
|
66
|
-
default :codec, 'plain'
|
66
|
+
# default :codec, 'plain' or 'json' depending whether schema registry is used
|
67
|
+
#
|
68
|
+
# @override LogStash::Inputs::Base - removing the `:default => :plain`
|
69
|
+
config :codec, :validate => :codec
|
70
|
+
# NOTE: isn't necessary due the params['codec'] = ... done in #initialize
|
71
|
+
# having the `nil` default explicit makes the behavior more noticeable.
|
67
72
|
|
68
73
|
# The frequency in milliseconds that the consumer offsets are committed to Kafka.
|
69
74
|
config :auto_commit_interval_ms, :validate => :number, :default => 5000 # Kafka default
|
@@ -249,6 +254,15 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
249
254
|
|
250
255
|
attr_reader :metadata_mode
|
251
256
|
|
257
|
+
# @overload based on schema registry change the codec default
|
258
|
+
def initialize(params = {})
|
259
|
+
unless params.key?('codec')
|
260
|
+
params['codec'] = params.key?('schema_registry_url') ? 'json' : 'plain'
|
261
|
+
end
|
262
|
+
|
263
|
+
super(params)
|
264
|
+
end
|
265
|
+
|
252
266
|
public
|
253
267
|
def register
|
254
268
|
@runner_threads = []
|
@@ -341,22 +355,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
341
355
|
def handle_record(record, codec_instance, queue)
|
342
356
|
codec_instance.decode(record.value.to_s) do |event|
|
343
357
|
decorate(event)
|
344
|
-
maybe_apply_schema(event, record)
|
345
358
|
maybe_set_metadata(event, record)
|
346
359
|
queue << event
|
347
360
|
end
|
348
361
|
end
|
349
362
|
|
350
|
-
def maybe_apply_schema(event, record)
|
351
|
-
if schema_registry_url
|
352
|
-
json = LogStash::Json.load(record.value.to_s)
|
353
|
-
json.each do |k, v|
|
354
|
-
event.set(k, v)
|
355
|
-
end
|
356
|
-
event.remove("message")
|
357
|
-
end
|
358
|
-
end
|
359
|
-
|
360
363
|
def maybe_set_metadata(event, record)
|
361
364
|
if @metadata_mode.include?(:record_props)
|
362
365
|
event.set("[@metadata][kafka][topic]", record.topic)
|
@@ -80,8 +80,8 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
|
|
80
80
|
# The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
|
81
81
|
config :buffer_memory, :validate => :number, :default => 33_554_432 # (32M) Kafka default
|
82
82
|
# The compression type for all data generated by the producer.
|
83
|
-
# The default is none (i.e. no compression). Valid values are none, gzip, or
|
84
|
-
config :compression_type, :validate => ["none", "gzip", "snappy", "lz4"], :default => "none"
|
83
|
+
# The default is none (i.e. no compression). Valid values are none, gzip, snappy, lz4 or zstd.
|
84
|
+
config :compression_type, :validate => ["none", "gzip", "snappy", "lz4", "zstd"], :default => "none"
|
85
85
|
# How DNS lookups should be done. If set to `use_all_dns_ips`, when the lookup returns multiple
|
86
86
|
# IP addresses for a hostname, they will all be attempted to connect to before failing the
|
87
87
|
# connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'logstash-integration-kafka'
|
3
|
-
s.version = '10.
|
3
|
+
s.version = '10.10.0'
|
4
4
|
s.licenses = ['Apache-2.0']
|
5
5
|
s.summary = "Integration with Kafka - input and output plugins"
|
6
6
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
|
@@ -50,6 +50,7 @@ Gem::Specification.new do |s|
|
|
50
50
|
s.add_runtime_dependency 'logstash-mixin-deprecation_logger_support', '~>1.0'
|
51
51
|
|
52
52
|
s.add_development_dependency 'logstash-devutils'
|
53
|
+
s.add_development_dependency 'logstash-codec-line'
|
53
54
|
s.add_development_dependency 'rspec-wait'
|
54
55
|
s.add_development_dependency 'digest-crc', '~> 0.5.1' # 0.6.0 started using a C-ext
|
55
56
|
s.add_development_dependency 'ruby-kafka' # depends on digest-crc
|
@@ -16,38 +16,38 @@ describe "inputs/kafka", :integration => true do
|
|
16
16
|
let(:group_id_5) {rand(36**8).to_s(36)}
|
17
17
|
let(:group_id_6) {rand(36**8).to_s(36)}
|
18
18
|
let(:plain_config) do
|
19
|
-
{ 'topics' => ['logstash_integration_topic_plain'], '
|
19
|
+
{ 'topics' => ['logstash_integration_topic_plain'], 'group_id' => group_id_1,
|
20
20
|
'auto_offset_reset' => 'earliest' }
|
21
21
|
end
|
22
22
|
let(:multi_consumer_config) do
|
23
23
|
plain_config.merge({"group_id" => group_id_4, "client_id" => "spec", "consumer_threads" => 3})
|
24
24
|
end
|
25
25
|
let(:snappy_config) do
|
26
|
-
{ 'topics' => ['logstash_integration_topic_snappy'], '
|
26
|
+
{ 'topics' => ['logstash_integration_topic_snappy'], 'group_id' => group_id_1,
|
27
27
|
'auto_offset_reset' => 'earliest' }
|
28
28
|
end
|
29
29
|
let(:lz4_config) do
|
30
|
-
{ 'topics' => ['logstash_integration_topic_lz4'], '
|
30
|
+
{ 'topics' => ['logstash_integration_topic_lz4'], 'group_id' => group_id_1,
|
31
31
|
'auto_offset_reset' => 'earliest' }
|
32
32
|
end
|
33
33
|
let(:pattern_config) do
|
34
|
-
{ 'topics_pattern' => 'logstash_integration_topic_.*', 'group_id' => group_id_2,
|
34
|
+
{ 'topics_pattern' => 'logstash_integration_topic_.*', 'group_id' => group_id_2,
|
35
35
|
'auto_offset_reset' => 'earliest' }
|
36
36
|
end
|
37
37
|
let(:decorate_config) do
|
38
|
-
{ 'topics' => ['logstash_integration_topic_plain'], '
|
38
|
+
{ 'topics' => ['logstash_integration_topic_plain'], 'group_id' => group_id_3,
|
39
39
|
'auto_offset_reset' => 'earliest', 'decorate_events' => 'true' }
|
40
40
|
end
|
41
41
|
let(:decorate_headers_config) do
|
42
|
-
{ 'topics' => ['logstash_integration_topic_plain_with_headers'], '
|
42
|
+
{ 'topics' => ['logstash_integration_topic_plain_with_headers'], 'group_id' => group_id_3,
|
43
43
|
'auto_offset_reset' => 'earliest', 'decorate_events' => 'extended' }
|
44
44
|
end
|
45
45
|
let(:decorate_bad_headers_config) do
|
46
|
-
{ 'topics' => ['logstash_integration_topic_plain_with_headers_badly'], '
|
46
|
+
{ 'topics' => ['logstash_integration_topic_plain_with_headers_badly'], 'group_id' => group_id_3,
|
47
47
|
'auto_offset_reset' => 'earliest', 'decorate_events' => 'extended' }
|
48
48
|
end
|
49
49
|
let(:manual_commit_config) do
|
50
|
-
{ 'topics' => ['logstash_integration_topic_plain'], '
|
50
|
+
{ 'topics' => ['logstash_integration_topic_plain'], 'group_id' => group_id_5,
|
51
51
|
'auto_offset_reset' => 'earliest', 'enable_auto_commit' => 'false' }
|
52
52
|
end
|
53
53
|
let(:timeout_seconds) { 30 }
|
@@ -352,10 +352,7 @@ describe "Deserializing with the schema registry", :integration => true do
|
|
352
352
|
|
353
353
|
let(:base_config) do
|
354
354
|
{
|
355
|
-
'topics' => [avro_topic_name],
|
356
|
-
'codec' => 'plain',
|
357
|
-
'group_id' => group_id_1,
|
358
|
-
'auto_offset_reset' => 'earliest'
|
355
|
+
'topics' => [avro_topic_name], 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'
|
359
356
|
}
|
360
357
|
end
|
361
358
|
|
@@ -139,6 +139,25 @@ describe "outputs/kafka", :integration => true do
|
|
139
139
|
# end
|
140
140
|
end
|
141
141
|
|
142
|
+
context 'when using zstd compression' do
|
143
|
+
let(:test_topic) { 'logstash_integration_zstd_topic' }
|
144
|
+
|
145
|
+
before :each do
|
146
|
+
config = base_config.merge({"topic_id" => test_topic, "compression_type" => "zstd"})
|
147
|
+
load_kafka_data(config)
|
148
|
+
end
|
149
|
+
|
150
|
+
# NOTE: depends on zstd-ruby gem which is using a C-extension
|
151
|
+
# it 'should have data integrity' do
|
152
|
+
# messages = fetch_messages(test_topic)
|
153
|
+
#
|
154
|
+
# expect(messages.size).to eq(num_events)
|
155
|
+
# messages.each do |m|
|
156
|
+
# expect(m.value).to eq(event.to_s)
|
157
|
+
# end
|
158
|
+
# end
|
159
|
+
end
|
160
|
+
|
142
161
|
context 'when using multi partition topic' do
|
143
162
|
let(:num_events) { 100 } # ~ more than (batch.size) 16,384 bytes
|
144
163
|
let(:test_topic) { 'logstash_integration_topic3' }
|
@@ -177,7 +177,23 @@ describe LogStash::Inputs::Kafka do
|
|
177
177
|
end
|
178
178
|
end
|
179
179
|
|
180
|
-
|
180
|
+
it 'uses plain codec by default' do
|
181
|
+
expect( subject.codec ).to respond_to :decode
|
182
|
+
expect( subject.codec.class ).to be LogStash::Codecs::Plain
|
183
|
+
end
|
184
|
+
|
185
|
+
context 'with codec option' do
|
186
|
+
|
187
|
+
let(:config) { super().merge 'codec' => 'line' }
|
188
|
+
|
189
|
+
it 'uses specified codec' do
|
190
|
+
expect( subject.codec ).to respond_to :decode
|
191
|
+
expect( subject.codec.class ).to be LogStash::Codecs::Line
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
describe "schema registry" do
|
181
197
|
let(:base_config) do {
|
182
198
|
'schema_registry_url' => 'http://localhost:8081',
|
183
199
|
'topics' => ['logstash'],
|
@@ -186,7 +202,7 @@ describe LogStash::Inputs::Kafka do
|
|
186
202
|
end
|
187
203
|
|
188
204
|
context "schema_registry_url" do
|
189
|
-
|
205
|
+
let(:config) { base_config }
|
190
206
|
|
191
207
|
it "conflict with value_deserializer_class should fail" do
|
192
208
|
config['value_deserializer_class'] = 'my.fantasy.Deserializer'
|
@@ -197,6 +213,11 @@ describe LogStash::Inputs::Kafka do
|
|
197
213
|
config['topics_pattern'] = 'topic_.*'
|
198
214
|
expect { subject.register }.to raise_error LogStash::ConfigurationError, /Option schema_registry_url prohibit the customization of topics_pattern/
|
199
215
|
end
|
216
|
+
|
217
|
+
it 'switches default codec to json' do
|
218
|
+
expect( subject.codec ).to respond_to :decode
|
219
|
+
expect( subject.codec.class ).to be LogStash::Codecs::JSON
|
220
|
+
end
|
200
221
|
end
|
201
222
|
|
202
223
|
context 'when kerberos auth is used' do
|
@@ -204,9 +225,8 @@ describe LogStash::Inputs::Kafka do
|
|
204
225
|
context "with #{protocol}" do
|
205
226
|
['auto', 'skip'].each do |vsr|
|
206
227
|
context "when validata_schema_registry is #{vsr}" do
|
207
|
-
let(:config) { base_config.merge({'security_protocol' => protocol,
|
208
|
-
|
209
|
-
}
|
228
|
+
let(:config) { base_config.merge({'security_protocol' => protocol, 'schema_registry_validation' => vsr}) }
|
229
|
+
|
210
230
|
it 'skips verification' do
|
211
231
|
expect(subject).not_to receive(:check_for_schema_registry_connectivity_and_subjects)
|
212
232
|
expect { subject.register }.not_to raise_error
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-integration-kafka
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 10.
|
4
|
+
version: 10.10.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-02-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -154,6 +154,20 @@ dependencies:
|
|
154
154
|
- - ">="
|
155
155
|
- !ruby/object:Gem::Version
|
156
156
|
version: '0'
|
157
|
+
- !ruby/object:Gem::Dependency
|
158
|
+
requirement: !ruby/object:Gem::Requirement
|
159
|
+
requirements:
|
160
|
+
- - ">="
|
161
|
+
- !ruby/object:Gem::Version
|
162
|
+
version: '0'
|
163
|
+
name: logstash-codec-line
|
164
|
+
prerelease: false
|
165
|
+
type: :development
|
166
|
+
version_requirements: !ruby/object:Gem::Requirement
|
167
|
+
requirements:
|
168
|
+
- - ">="
|
169
|
+
- !ruby/object:Gem::Version
|
170
|
+
version: '0'
|
157
171
|
- !ruby/object:Gem::Dependency
|
158
172
|
requirement: !ruby/object:Gem::Requirement
|
159
173
|
requirements:
|
@@ -280,8 +294,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
280
294
|
- !ruby/object:Gem::Version
|
281
295
|
version: '0'
|
282
296
|
requirements: []
|
283
|
-
|
284
|
-
rubygems_version: 2.6.13
|
297
|
+
rubygems_version: 3.1.6
|
285
298
|
signing_key:
|
286
299
|
specification_version: 4
|
287
300
|
summary: Integration with Kafka - input and output plugins
|