logstash-integration-kafka 10.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/CONTRIBUTORS +18 -0
- data/DEVELOPER.md +97 -0
- data/Gemfile +11 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +5 -0
- data/README.md +118 -0
- data/docs/index.asciidoc +28 -0
- data/docs/input-kafka.asciidoc +582 -0
- data/docs/output-kafka.asciidoc +441 -0
- data/lib/logstash-integration-kafka_jars.rb +8 -0
- data/lib/logstash/inputs/kafka.rb +362 -0
- data/lib/logstash/outputs/kafka.rb +384 -0
- data/logstash-integration-kafka.gemspec +54 -0
- data/spec/integration/inputs/kafka_spec.rb +170 -0
- data/spec/integration/outputs/kafka_spec.rb +194 -0
- data/spec/unit/inputs/kafka_spec.rb +39 -0
- data/spec/unit/outputs/kafka_spec.rb +192 -0
- data/vendor/jar-dependencies/com/github/luben/zstd-jni/1.4.2-1/zstd-jni-1.4.2-1.jar +0 -0
- data/vendor/jar-dependencies/org/apache/kafka/kafka-clients/2.3.0/kafka-clients-2.3.0.jar +0 -0
- data/vendor/jar-dependencies/org/lz4/lz4-java/1.6.0/lz4-java-1.6.0.jar +0 -0
- data/vendor/jar-dependencies/org/slf4j/slf4j-api/1.7.26/slf4j-api-1.7.26.jar +0 -0
- data/vendor/jar-dependencies/org/xerial/snappy/snappy-java/1.1.7.3/snappy-java-1.1.7.3.jar +0 -0
- metadata +228 -0
@@ -0,0 +1,384 @@
|
|
1
|
+
require 'logstash/namespace'
|
2
|
+
require 'logstash/outputs/base'
|
3
|
+
require 'java'
|
4
|
+
require 'logstash-integration-kafka_jars.rb'
|
5
|
+
|
6
|
+
java_import org.apache.kafka.clients.producer.ProducerRecord
|
7
|
+
|
8
|
+
# Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
|
9
|
+
# the broker.
|
10
|
+
#
|
11
|
+
# Here's a compatibility matrix that shows the Kafka client versions that are compatible with each combination
|
12
|
+
# of Logstash and the Kafka output plugin:
|
13
|
+
#
|
14
|
+
# [options="header"]
|
15
|
+
# |==========================================================
|
16
|
+
# |Kafka Client Version |Logstash Version |Plugin Version |Why?
|
17
|
+
# |0.8 |2.0.0 - 2.x.x |<3.0.0 |Legacy, 0.8 is still popular
|
18
|
+
# |0.9 |2.0.0 - 2.3.x | 3.x.x |Works with the old Ruby Event API (`event['product']['price'] = 10`)
|
19
|
+
# |0.9 |2.4.x - 5.x.x | 4.x.x |Works with the new getter/setter APIs (`event.set('[product][price]', 10)`)
|
20
|
+
# |0.10.0.x |2.4.x - 5.x.x | 5.x.x |Not compatible with the <= 0.9 broker
|
21
|
+
# |0.10.1.x |2.4.x - 5.x.x | 6.x.x |
|
22
|
+
# |==========================================================
|
23
|
+
#
|
24
|
+
# NOTE: We recommended that you use matching Kafka client and broker versions. During upgrades, you should
|
25
|
+
# upgrade brokers before clients because brokers target backwards compatibility. For example, the 0.9 broker
|
26
|
+
# is compatible with both the 0.8 consumer and 0.9 consumer APIs, but not the other way around.
|
27
|
+
#
|
28
|
+
# This output supports connecting to Kafka over:
|
29
|
+
#
|
30
|
+
# * SSL (requires plugin version 3.0.0 or later)
|
31
|
+
# * Kerberos SASL (requires plugin version 5.1.0 or later)
|
32
|
+
#
|
33
|
+
# By default security is disabled but can be turned on as needed.
|
34
|
+
#
|
35
|
+
# The only required configuration is the topic_id. The default codec is plain,
|
36
|
+
# so events will be persisted on the broker in plain format. Logstash will encode your messages with not
|
37
|
+
# only the message but also with a timestamp and hostname. If you do not want anything but your message
|
38
|
+
# passing through, you should make the output configuration something like:
|
39
|
+
# [source,ruby]
|
40
|
+
# output {
|
41
|
+
# kafka {
|
42
|
+
# codec => plain {
|
43
|
+
# format => "%{message}"
|
44
|
+
# }
|
45
|
+
# topic_id => "mytopic"
|
46
|
+
# }
|
47
|
+
# }
|
48
|
+
# For more information see http://kafka.apache.org/documentation.html#theproducer
|
49
|
+
#
|
50
|
+
# Kafka producer configuration: http://kafka.apache.org/documentation.html#newproducerconfigs
|
51
|
+
class LogStash::Outputs::Kafka < LogStash::Outputs::Base
|
52
|
+
declare_threadsafe!
|
53
|
+
|
54
|
+
config_name 'kafka'
|
55
|
+
|
56
|
+
default :codec, 'plain'
|
57
|
+
|
58
|
+
# The number of acknowledgments the producer requires the leader to have received
|
59
|
+
# before considering a request complete.
|
60
|
+
#
|
61
|
+
# acks=0, the producer will not wait for any acknowledgment from the server at all.
|
62
|
+
# acks=1, This will mean the leader will write the record to its local log but
|
63
|
+
# will respond without awaiting full acknowledgement from all followers.
|
64
|
+
# acks=all, This means the leader will wait for the full set of in-sync replicas to acknowledge the record.
|
65
|
+
config :acks, :validate => ["0", "1", "all"], :default => "1"
|
66
|
+
# The producer will attempt to batch records together into fewer requests whenever multiple
|
67
|
+
# records are being sent to the same partition. This helps performance on both the client
|
68
|
+
# and the server. This configuration controls the default batch size in bytes.
|
69
|
+
config :batch_size, :validate => :number, :default => 16384
|
70
|
+
# This is for bootstrapping and the producer will only use it for getting metadata (topics,
|
71
|
+
# partitions and replicas). The socket connections for sending the actual data will be
|
72
|
+
# established based on the broker information returned in the metadata. The format is
|
73
|
+
# `host1:port1,host2:port2`, and the list can be a subset of brokers or a VIP pointing to a
|
74
|
+
# subset of brokers.
|
75
|
+
config :bootstrap_servers, :validate => :string, :default => 'localhost:9092'
|
76
|
+
# The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
|
77
|
+
config :buffer_memory, :validate => :number, :default => 33554432
|
78
|
+
# The compression type for all data generated by the producer.
|
79
|
+
# The default is none (i.e. no compression). Valid values are none, gzip, or snappy.
|
80
|
+
config :compression_type, :validate => ["none", "gzip", "snappy", "lz4"], :default => "none"
|
81
|
+
# The id string to pass to the server when making requests.
|
82
|
+
# The purpose of this is to be able to track the source of requests beyond just
|
83
|
+
# ip/port by allowing a logical application name to be included with the request
|
84
|
+
config :client_id, :validate => :string
|
85
|
+
# Serializer class for the key of the message
|
86
|
+
config :key_serializer, :validate => :string, :default => 'org.apache.kafka.common.serialization.StringSerializer'
|
87
|
+
# The producer groups together any records that arrive in between request
|
88
|
+
# transmissions into a single batched request. Normally this occurs only under
|
89
|
+
# load when records arrive faster than they can be sent out. However in some circumstances
|
90
|
+
# the client may want to reduce the number of requests even under moderate load.
|
91
|
+
# This setting accomplishes this by adding a small amount of artificial delay—that is,
|
92
|
+
# rather than immediately sending out a record the producer will wait for up to the given delay
|
93
|
+
# to allow other records to be sent so that the sends can be batched together.
|
94
|
+
config :linger_ms, :validate => :number, :default => 0
|
95
|
+
# The maximum size of a request
|
96
|
+
config :max_request_size, :validate => :number, :default => 1048576
|
97
|
+
# The key for the message
|
98
|
+
config :message_key, :validate => :string
|
99
|
+
# the timeout setting for initial metadata request to fetch topic metadata.
|
100
|
+
config :metadata_fetch_timeout_ms, :validate => :number, :default => 60000
|
101
|
+
# the max time in milliseconds before a metadata refresh is forced.
|
102
|
+
config :metadata_max_age_ms, :validate => :number, :default => 300000
|
103
|
+
# The size of the TCP receive buffer to use when reading data
|
104
|
+
config :receive_buffer_bytes, :validate => :number, :default => 32768
|
105
|
+
# The amount of time to wait before attempting to reconnect to a given host when a connection fails.
|
106
|
+
config :reconnect_backoff_ms, :validate => :number, :default => 10
|
107
|
+
# The configuration controls the maximum amount of time the client will wait
|
108
|
+
# for the response of a request. If the response is not received before the timeout
|
109
|
+
# elapses the client will resend the request if necessary or fail the request if
|
110
|
+
# retries are exhausted.
|
111
|
+
config :request_timeout_ms, :validate => :string
|
112
|
+
# The default retry behavior is to retry until successful. To prevent data loss,
|
113
|
+
# the use of this setting is discouraged.
|
114
|
+
#
|
115
|
+
# If you choose to set `retries`, a value greater than zero will cause the
|
116
|
+
# client to only retry a fixed number of times. This will result in data loss
|
117
|
+
# if a transient error outlasts your retry count.
|
118
|
+
#
|
119
|
+
# A value less than zero is a configuration error.
|
120
|
+
config :retries, :validate => :number
|
121
|
+
# The amount of time to wait before attempting to retry a failed produce request to a given topic partition.
|
122
|
+
config :retry_backoff_ms, :validate => :number, :default => 100
|
123
|
+
# The size of the TCP send buffer to use when sending data.
|
124
|
+
config :send_buffer_bytes, :validate => :number, :default => 131072
|
125
|
+
# The truststore type.
|
126
|
+
config :ssl_truststore_type, :validate => :string
|
127
|
+
# The JKS truststore path to validate the Kafka broker's certificate.
|
128
|
+
config :ssl_truststore_location, :validate => :path
|
129
|
+
# The truststore password
|
130
|
+
config :ssl_truststore_password, :validate => :password
|
131
|
+
# The keystore type.
|
132
|
+
config :ssl_keystore_type, :validate => :string
|
133
|
+
# If client authentication is required, this setting stores the keystore path.
|
134
|
+
config :ssl_keystore_location, :validate => :path
|
135
|
+
# If client authentication is required, this setting stores the keystore password
|
136
|
+
config :ssl_keystore_password, :validate => :password
|
137
|
+
# The password of the private key in the key store file.
|
138
|
+
config :ssl_key_password, :validate => :password
|
139
|
+
# Algorithm to use when verifying host. Set to "" to disable
|
140
|
+
config :ssl_endpoint_identification_algorithm, :validate => :string, :default => 'https'
|
141
|
+
# Security protocol to use, which can be either of PLAINTEXT,SSL,SASL_PLAINTEXT,SASL_SSL
|
142
|
+
config :security_protocol, :validate => ["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"], :default => "PLAINTEXT"
|
143
|
+
# http://kafka.apache.org/documentation.html#security_sasl[SASL mechanism] used for client connections.
|
144
|
+
# This may be any mechanism for which a security provider is available.
|
145
|
+
# GSSAPI is the default mechanism.
|
146
|
+
config :sasl_mechanism, :validate => :string, :default => "GSSAPI"
|
147
|
+
# The Kerberos principal name that Kafka broker runs as.
|
148
|
+
# This can be defined either in Kafka's JAAS config or in Kafka's config.
|
149
|
+
config :sasl_kerberos_service_name, :validate => :string
|
150
|
+
# The Java Authentication and Authorization Service (JAAS) API supplies user authentication and authorization
|
151
|
+
# services for Kafka. This setting provides the path to the JAAS file. Sample JAAS file for Kafka client:
|
152
|
+
# [source,java]
|
153
|
+
# ----------------------------------
|
154
|
+
# KafkaClient {
|
155
|
+
# com.sun.security.auth.module.Krb5LoginModule required
|
156
|
+
# useTicketCache=true
|
157
|
+
# renewTicket=true
|
158
|
+
# serviceName="kafka";
|
159
|
+
# };
|
160
|
+
# ----------------------------------
|
161
|
+
#
|
162
|
+
# Please note that specifying `jaas_path` and `kerberos_config` in the config file will add these
|
163
|
+
# to the global JVM system properties. This means if you have multiple Kafka inputs, all of them would be sharing the same
|
164
|
+
# `jaas_path` and `kerberos_config`. If this is not desirable, you would have to run separate instances of Logstash on
|
165
|
+
# different JVM instances.
|
166
|
+
config :jaas_path, :validate => :path
|
167
|
+
# JAAS configuration settings. This allows JAAS config to be a part of the plugin configuration and allows for different JAAS configuration per each plugin config.
|
168
|
+
config :sasl_jaas_config, :validate => :string
|
169
|
+
# Optional path to kerberos config file. This is krb5.conf style as detailed in https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html
|
170
|
+
config :kerberos_config, :validate => :path
|
171
|
+
|
172
|
+
# The topic to produce messages to
|
173
|
+
config :topic_id, :validate => :string, :required => true
|
174
|
+
# Serializer class for the value of the message
|
175
|
+
config :value_serializer, :validate => :string, :default => 'org.apache.kafka.common.serialization.StringSerializer'
|
176
|
+
|
177
|
+
public
|
178
|
+
def register
|
179
|
+
@thread_batch_map = Concurrent::Hash.new
|
180
|
+
|
181
|
+
if !@retries.nil?
|
182
|
+
if @retries < 0
|
183
|
+
raise ConfigurationError, "A negative retry count (#{@retries}) is not valid. Must be a value >= 0"
|
184
|
+
end
|
185
|
+
|
186
|
+
@logger.warn("Kafka output is configured with finite retry. This instructs Logstash to LOSE DATA after a set number of send attempts fails. If you do not want to lose data if Kafka is down, then you must remove the retry setting.", :retries => @retries)
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
@producer = create_producer
|
191
|
+
if value_serializer == 'org.apache.kafka.common.serialization.StringSerializer'
|
192
|
+
@codec.on_event do |event, data|
|
193
|
+
write_to_kafka(event, data)
|
194
|
+
end
|
195
|
+
elsif value_serializer == 'org.apache.kafka.common.serialization.ByteArraySerializer'
|
196
|
+
@codec.on_event do |event, data|
|
197
|
+
write_to_kafka(event, data.to_java_bytes)
|
198
|
+
end
|
199
|
+
else
|
200
|
+
raise ConfigurationError, "'value_serializer' only supports org.apache.kafka.common.serialization.ByteArraySerializer and org.apache.kafka.common.serialization.StringSerializer"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
# def register
|
205
|
+
|
206
|
+
def prepare(record)
|
207
|
+
# This output is threadsafe, so we need to keep a batch per thread.
|
208
|
+
@thread_batch_map[Thread.current].add(record)
|
209
|
+
end
|
210
|
+
|
211
|
+
def multi_receive(events)
|
212
|
+
t = Thread.current
|
213
|
+
if !@thread_batch_map.include?(t)
|
214
|
+
@thread_batch_map[t] = java.util.ArrayList.new(events.size)
|
215
|
+
end
|
216
|
+
|
217
|
+
events.each do |event|
|
218
|
+
break if event == LogStash::SHUTDOWN
|
219
|
+
@codec.encode(event)
|
220
|
+
end
|
221
|
+
|
222
|
+
batch = @thread_batch_map[t]
|
223
|
+
if batch.any?
|
224
|
+
retrying_send(batch)
|
225
|
+
batch.clear
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def retrying_send(batch)
|
230
|
+
remaining = @retries
|
231
|
+
|
232
|
+
while batch.any?
|
233
|
+
if !remaining.nil?
|
234
|
+
if remaining < 0
|
235
|
+
# TODO(sissel): Offer to DLQ? Then again, if it's a transient fault,
|
236
|
+
# DLQing would make things worse (you dlq data that would be successful
|
237
|
+
# after the fault is repaired)
|
238
|
+
logger.info("Exhausted user-configured retry count when sending to Kafka. Dropping these events.",
|
239
|
+
:max_retries => @retries, :drop_count => batch.count)
|
240
|
+
break
|
241
|
+
end
|
242
|
+
|
243
|
+
remaining -= 1
|
244
|
+
end
|
245
|
+
|
246
|
+
failures = []
|
247
|
+
|
248
|
+
futures = batch.collect do |record|
|
249
|
+
begin
|
250
|
+
# send() can throw an exception even before the future is created.
|
251
|
+
@producer.send(record)
|
252
|
+
rescue org.apache.kafka.common.errors.TimeoutException => e
|
253
|
+
failures << record
|
254
|
+
nil
|
255
|
+
rescue org.apache.kafka.common.errors.InterruptException => e
|
256
|
+
failures << record
|
257
|
+
nil
|
258
|
+
rescue org.apache.kafka.common.errors.SerializationException => e
|
259
|
+
# TODO(sissel): Retrying will fail because the data itself has a problem serializing.
|
260
|
+
# TODO(sissel): Let's add DLQ here.
|
261
|
+
failures << record
|
262
|
+
nil
|
263
|
+
end
|
264
|
+
end.compact
|
265
|
+
|
266
|
+
futures.each_with_index do |future, i|
|
267
|
+
begin
|
268
|
+
result = future.get()
|
269
|
+
rescue => e
|
270
|
+
# TODO(sissel): Add metric to count failures, possibly by exception type.
|
271
|
+
logger.warn("KafkaProducer.send() failed: #{e}", :exception => e)
|
272
|
+
failures << batch[i]
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# No failures? Cool. Let's move on.
|
277
|
+
break if failures.empty?
|
278
|
+
|
279
|
+
# Otherwise, retry with any failed transmissions
|
280
|
+
if remaining.nil? || remaining >= 0
|
281
|
+
delay = @retry_backoff_ms / 1000.0
|
282
|
+
logger.info("Sending batch to Kafka failed. Will retry after a delay.", :batch_size => batch.size,
|
283
|
+
:failures => failures.size,
|
284
|
+
:sleep => delay)
|
285
|
+
batch = failures
|
286
|
+
sleep(delay)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def close
|
292
|
+
@producer.close
|
293
|
+
end
|
294
|
+
|
295
|
+
private
|
296
|
+
|
297
|
+
def write_to_kafka(event, serialized_data)
|
298
|
+
if @message_key.nil?
|
299
|
+
record = ProducerRecord.new(event.sprintf(@topic_id), serialized_data)
|
300
|
+
else
|
301
|
+
record = ProducerRecord.new(event.sprintf(@topic_id), event.sprintf(@message_key), serialized_data)
|
302
|
+
end
|
303
|
+
prepare(record)
|
304
|
+
rescue LogStash::ShutdownSignal
|
305
|
+
@logger.debug('Kafka producer got shutdown signal')
|
306
|
+
rescue => e
|
307
|
+
@logger.warn('kafka producer threw exception, restarting',
|
308
|
+
:exception => e)
|
309
|
+
end
|
310
|
+
|
311
|
+
def create_producer
|
312
|
+
begin
|
313
|
+
props = java.util.Properties.new
|
314
|
+
kafka = org.apache.kafka.clients.producer.ProducerConfig
|
315
|
+
|
316
|
+
props.put(kafka::ACKS_CONFIG, acks)
|
317
|
+
props.put(kafka::BATCH_SIZE_CONFIG, batch_size.to_s)
|
318
|
+
props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers)
|
319
|
+
props.put(kafka::BUFFER_MEMORY_CONFIG, buffer_memory.to_s)
|
320
|
+
props.put(kafka::COMPRESSION_TYPE_CONFIG, compression_type)
|
321
|
+
props.put(kafka::CLIENT_ID_CONFIG, client_id) unless client_id.nil?
|
322
|
+
props.put(kafka::KEY_SERIALIZER_CLASS_CONFIG, key_serializer)
|
323
|
+
props.put(kafka::LINGER_MS_CONFIG, linger_ms.to_s)
|
324
|
+
props.put(kafka::MAX_REQUEST_SIZE_CONFIG, max_request_size.to_s)
|
325
|
+
props.put(kafka::METADATA_MAX_AGE_CONFIG, metadata_max_age_ms) unless metadata_max_age_ms.nil?
|
326
|
+
props.put(kafka::RECEIVE_BUFFER_CONFIG, receive_buffer_bytes.to_s) unless receive_buffer_bytes.nil?
|
327
|
+
props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
|
328
|
+
props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
|
329
|
+
props.put(kafka::RETRIES_CONFIG, retries.to_s) unless retries.nil?
|
330
|
+
props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms.to_s)
|
331
|
+
props.put(kafka::SEND_BUFFER_CONFIG, send_buffer_bytes.to_s)
|
332
|
+
props.put(kafka::VALUE_SERIALIZER_CLASS_CONFIG, value_serializer)
|
333
|
+
|
334
|
+
props.put("security.protocol", security_protocol) unless security_protocol.nil?
|
335
|
+
|
336
|
+
if security_protocol == "SSL"
|
337
|
+
set_trustore_keystore_config(props)
|
338
|
+
elsif security_protocol == "SASL_PLAINTEXT"
|
339
|
+
set_sasl_config(props)
|
340
|
+
elsif security_protocol == "SASL_SSL"
|
341
|
+
set_trustore_keystore_config(props)
|
342
|
+
set_sasl_config(props)
|
343
|
+
end
|
344
|
+
|
345
|
+
|
346
|
+
org.apache.kafka.clients.producer.KafkaProducer.new(props)
|
347
|
+
rescue => e
|
348
|
+
logger.error("Unable to create Kafka producer from given configuration",
|
349
|
+
:kafka_error_message => e,
|
350
|
+
:cause => e.respond_to?(:getCause) ? e.getCause() : nil)
|
351
|
+
raise e
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def set_trustore_keystore_config(props)
|
356
|
+
if ssl_truststore_location.nil?
|
357
|
+
raise LogStash::ConfigurationError, "ssl_truststore_location must be set when SSL is enabled"
|
358
|
+
end
|
359
|
+
props.put("ssl.truststore.type", ssl_truststore_type) unless ssl_truststore_type.nil?
|
360
|
+
props.put("ssl.truststore.location", ssl_truststore_location)
|
361
|
+
props.put("ssl.truststore.password", ssl_truststore_password.value) unless ssl_truststore_password.nil?
|
362
|
+
|
363
|
+
# Client auth stuff
|
364
|
+
props.put("ssl.keystore.type", ssl_keystore_type) unless ssl_keystore_type.nil?
|
365
|
+
props.put("ssl.key.password", ssl_key_password.value) unless ssl_key_password.nil?
|
366
|
+
props.put("ssl.keystore.location", ssl_keystore_location) unless ssl_keystore_location.nil?
|
367
|
+
props.put("ssl.keystore.password", ssl_keystore_password.value) unless ssl_keystore_password.nil?
|
368
|
+
props.put("ssl.endpoint.identification.algorithm", ssl_endpoint_identification_algorithm) unless ssl_endpoint_identification_algorithm.nil?
|
369
|
+
end
|
370
|
+
|
371
|
+
def set_sasl_config(props)
|
372
|
+
java.lang.System.setProperty("java.security.auth.login.config",jaas_path) unless jaas_path.nil?
|
373
|
+
java.lang.System.setProperty("java.security.krb5.conf",kerberos_config) unless kerberos_config.nil?
|
374
|
+
|
375
|
+
props.put("sasl.mechanism",sasl_mechanism)
|
376
|
+
if sasl_mechanism == "GSSAPI" && sasl_kerberos_service_name.nil?
|
377
|
+
raise LogStash::ConfigurationError, "sasl_kerberos_service_name must be specified when SASL mechanism is GSSAPI"
|
378
|
+
end
|
379
|
+
|
380
|
+
props.put("sasl.kerberos.service.name",sasl_kerberos_service_name) unless sasl_kerberos_service_name.nil?
|
381
|
+
props.put("sasl.jaas.config", sasl_jaas_config) unless sasl_jaas_config.nil?
|
382
|
+
end
|
383
|
+
|
384
|
+
end #class LogStash::Outputs::Kafka
|
@@ -0,0 +1,54 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-integration-kafka'
|
3
|
+
s.version = '10.0.0'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = "Integration with Kafka - input and output plugins"
|
6
|
+
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
|
7
|
+
"using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program."
|
8
|
+
s.authors = ["Elastic"]
|
9
|
+
s.email = 'info@elastic.co'
|
10
|
+
s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
|
11
|
+
s.require_paths = ['lib', 'vendor/jar-dependencies']
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = Dir.glob(%w(
|
15
|
+
lib/**/*
|
16
|
+
spec/**/*
|
17
|
+
*.gemspec
|
18
|
+
*.md
|
19
|
+
CONTRIBUTORS
|
20
|
+
Gemfile
|
21
|
+
LICENSE
|
22
|
+
NOTICE.TXT
|
23
|
+
vendor/jar-dependencies/**/*.jar
|
24
|
+
vendor/jar-dependencies/**/*.rb
|
25
|
+
VERSION docs/**/*
|
26
|
+
))
|
27
|
+
|
28
|
+
# Tests
|
29
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
30
|
+
|
31
|
+
# Special flag to let us know this is actually a logstash plugin
|
32
|
+
s.metadata = {
|
33
|
+
"logstash_plugin" => "true",
|
34
|
+
"logstash_group" => "integration",
|
35
|
+
"integration_plugins" => "logstash-input-kafka,logstash-output-kafka"
|
36
|
+
}
|
37
|
+
|
38
|
+
s.add_development_dependency 'jar-dependencies', '~> 0.3.12'
|
39
|
+
|
40
|
+
s.platform = RUBY_PLATFORM
|
41
|
+
|
42
|
+
# Gem dependencies
|
43
|
+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
44
|
+
s.add_runtime_dependency "logstash-core", ">= 6.5.0"
|
45
|
+
|
46
|
+
s.add_runtime_dependency 'logstash-codec-json'
|
47
|
+
s.add_runtime_dependency 'logstash-codec-plain'
|
48
|
+
s.add_runtime_dependency 'stud', '>= 0.0.22', '< 0.1.0'
|
49
|
+
|
50
|
+
s.add_development_dependency 'logstash-devutils'
|
51
|
+
s.add_development_dependency 'rspec-wait'
|
52
|
+
s.add_development_dependency 'poseidon'
|
53
|
+
s.add_development_dependency 'snappy'
|
54
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/devutils/rspec/spec_helper"
|
3
|
+
require "logstash/inputs/kafka"
|
4
|
+
require "digest"
|
5
|
+
require "rspec/wait"
|
6
|
+
|
7
|
+
# Please run kafka_test_setup.sh prior to executing this integration test.
|
8
|
+
describe "inputs/kafka", :integration => true do
|
9
|
+
# Group ids to make sure that the consumers get all the logs.
|
10
|
+
let(:group_id_1) {rand(36**8).to_s(36)}
|
11
|
+
let(:group_id_2) {rand(36**8).to_s(36)}
|
12
|
+
let(:group_id_3) {rand(36**8).to_s(36)}
|
13
|
+
let(:group_id_4) {rand(36**8).to_s(36)}
|
14
|
+
let(:group_id_5) {rand(36**8).to_s(36)}
|
15
|
+
let(:plain_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
|
16
|
+
let(:multi_consumer_config) { plain_config.merge({"group_id" => group_id_4, "client_id" => "spec", "consumer_threads" => 3}) }
|
17
|
+
let(:snappy_config) { { 'topics' => ['logstash_integration_topic_snappy'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
|
18
|
+
let(:lz4_config) { { 'topics' => ['logstash_integration_topic_lz4'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
|
19
|
+
let(:pattern_config) { { 'topics_pattern' => 'logstash_integration_topic_.*', 'group_id' => group_id_2, 'codec' => 'plain', 'auto_offset_reset' => 'earliest'} }
|
20
|
+
let(:decorate_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_3, 'auto_offset_reset' => 'earliest', 'decorate_events' => true} }
|
21
|
+
let(:manual_commit_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_5, 'auto_offset_reset' => 'earliest', 'enable_auto_commit' => 'false'} }
|
22
|
+
let(:timeout_seconds) { 30 }
|
23
|
+
let(:num_events) { 103 }
|
24
|
+
|
25
|
+
describe "#kafka-topics" do
|
26
|
+
def thread_it(kafka_input, queue)
|
27
|
+
Thread.new do
|
28
|
+
begin
|
29
|
+
kafka_input.run(queue)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should consume all messages from plain 3-partition topic" do
|
35
|
+
kafka_input = LogStash::Inputs::Kafka.new(plain_config)
|
36
|
+
queue = Queue.new
|
37
|
+
t = thread_it(kafka_input, queue)
|
38
|
+
begin
|
39
|
+
t.run
|
40
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
41
|
+
expect(queue.length).to eq(num_events)
|
42
|
+
ensure
|
43
|
+
t.kill
|
44
|
+
t.join(30_000)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should consume all messages from snappy 3-partition topic" do
|
49
|
+
kafka_input = LogStash::Inputs::Kafka.new(snappy_config)
|
50
|
+
queue = Queue.new
|
51
|
+
t = thread_it(kafka_input, queue)
|
52
|
+
begin
|
53
|
+
t.run
|
54
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
55
|
+
expect(queue.length).to eq(num_events)
|
56
|
+
ensure
|
57
|
+
t.kill
|
58
|
+
t.join(30_000)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should consume all messages from lz4 3-partition topic" do
|
63
|
+
kafka_input = LogStash::Inputs::Kafka.new(lz4_config)
|
64
|
+
queue = Queue.new
|
65
|
+
t = thread_it(kafka_input, queue)
|
66
|
+
begin
|
67
|
+
t.run
|
68
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
69
|
+
expect(queue.length).to eq(num_events)
|
70
|
+
ensure
|
71
|
+
t.kill
|
72
|
+
t.join(30_000)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should consumer all messages with multiple consumers" do
|
77
|
+
kafka_input = LogStash::Inputs::Kafka.new(multi_consumer_config)
|
78
|
+
queue = Queue.new
|
79
|
+
t = thread_it(kafka_input, queue)
|
80
|
+
begin
|
81
|
+
t.run
|
82
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
83
|
+
expect(queue.length).to eq(num_events)
|
84
|
+
kafka_input.kafka_consumers.each_with_index do |consumer, i|
|
85
|
+
expect(consumer.metrics.keys.first.tags["client-id"]).to eq("spec-#{i}")
|
86
|
+
end
|
87
|
+
ensure
|
88
|
+
t.kill
|
89
|
+
t.join(30_000)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "#kafka-topics-pattern" do
|
95
|
+
def thread_it(kafka_input, queue)
|
96
|
+
Thread.new do
|
97
|
+
begin
|
98
|
+
kafka_input.run(queue)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should consume all messages from all 3 topics" do
|
104
|
+
kafka_input = LogStash::Inputs::Kafka.new(pattern_config)
|
105
|
+
queue = Queue.new
|
106
|
+
t = thread_it(kafka_input, queue)
|
107
|
+
begin
|
108
|
+
t.run
|
109
|
+
wait(timeout_seconds).for {queue.length}.to eq(3*num_events)
|
110
|
+
expect(queue.length).to eq(3*num_events)
|
111
|
+
ensure
|
112
|
+
t.kill
|
113
|
+
t.join(30_000)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "#kafka-decorate" do
|
119
|
+
def thread_it(kafka_input, queue)
|
120
|
+
Thread.new do
|
121
|
+
begin
|
122
|
+
kafka_input.run(queue)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should show the right topic and group name in decorated kafka section" do
|
128
|
+
start = LogStash::Timestamp.now.time.to_i
|
129
|
+
kafka_input = LogStash::Inputs::Kafka.new(decorate_config)
|
130
|
+
queue = Queue.new
|
131
|
+
t = thread_it(kafka_input, queue)
|
132
|
+
begin
|
133
|
+
t.run
|
134
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
135
|
+
expect(queue.length).to eq(num_events)
|
136
|
+
event = queue.shift
|
137
|
+
expect(event.get("[@metadata][kafka][topic]")).to eq("logstash_integration_topic_plain")
|
138
|
+
expect(event.get("[@metadata][kafka][consumer_group]")).to eq(group_id_3)
|
139
|
+
expect(event.get("[@metadata][kafka][timestamp]")).to be >= start
|
140
|
+
ensure
|
141
|
+
t.kill
|
142
|
+
t.join(30_000)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "#kafka-offset-commit" do
|
148
|
+
def thread_it(kafka_input, queue)
|
149
|
+
Thread.new do
|
150
|
+
begin
|
151
|
+
kafka_input.run(queue)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
it "should manually commit offsets" do
|
157
|
+
kafka_input = LogStash::Inputs::Kafka.new(manual_commit_config)
|
158
|
+
queue = Queue.new
|
159
|
+
t = thread_it(kafka_input, queue)
|
160
|
+
begin
|
161
|
+
t.run
|
162
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
163
|
+
expect(queue.length).to eq(num_events)
|
164
|
+
ensure
|
165
|
+
t.kill
|
166
|
+
t.join(30_000)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|