logstash-integration-kafka 10.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/CONTRIBUTORS +18 -0
- data/DEVELOPER.md +97 -0
- data/Gemfile +11 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +5 -0
- data/README.md +118 -0
- data/docs/index.asciidoc +28 -0
- data/docs/input-kafka.asciidoc +582 -0
- data/docs/output-kafka.asciidoc +441 -0
- data/lib/logstash-integration-kafka_jars.rb +8 -0
- data/lib/logstash/inputs/kafka.rb +362 -0
- data/lib/logstash/outputs/kafka.rb +384 -0
- data/logstash-integration-kafka.gemspec +54 -0
- data/spec/integration/inputs/kafka_spec.rb +170 -0
- data/spec/integration/outputs/kafka_spec.rb +194 -0
- data/spec/unit/inputs/kafka_spec.rb +39 -0
- data/spec/unit/outputs/kafka_spec.rb +192 -0
- data/vendor/jar-dependencies/com/github/luben/zstd-jni/1.4.2-1/zstd-jni-1.4.2-1.jar +0 -0
- data/vendor/jar-dependencies/org/apache/kafka/kafka-clients/2.3.0/kafka-clients-2.3.0.jar +0 -0
- data/vendor/jar-dependencies/org/lz4/lz4-java/1.6.0/lz4-java-1.6.0.jar +0 -0
- data/vendor/jar-dependencies/org/slf4j/slf4j-api/1.7.26/slf4j-api-1.7.26.jar +0 -0
- data/vendor/jar-dependencies/org/xerial/snappy/snappy-java/1.1.7.3/snappy-java-1.1.7.3.jar +0 -0
- metadata +228 -0
@@ -0,0 +1,384 @@
|
|
1
|
+
require 'logstash/namespace'
|
2
|
+
require 'logstash/outputs/base'
|
3
|
+
require 'java'
|
4
|
+
require 'logstash-integration-kafka_jars.rb'
|
5
|
+
|
6
|
+
java_import org.apache.kafka.clients.producer.ProducerRecord
|
7
|
+
|
8
|
+
# Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
|
9
|
+
# the broker.
|
10
|
+
#
|
11
|
+
# Here's a compatibility matrix that shows the Kafka client versions that are compatible with each combination
|
12
|
+
# of Logstash and the Kafka output plugin:
|
13
|
+
#
|
14
|
+
# [options="header"]
|
15
|
+
# |==========================================================
|
16
|
+
# |Kafka Client Version |Logstash Version |Plugin Version |Why?
|
17
|
+
# |0.8 |2.0.0 - 2.x.x |<3.0.0 |Legacy, 0.8 is still popular
|
18
|
+
# |0.9 |2.0.0 - 2.3.x | 3.x.x |Works with the old Ruby Event API (`event['product']['price'] = 10`)
|
19
|
+
# |0.9 |2.4.x - 5.x.x | 4.x.x |Works with the new getter/setter APIs (`event.set('[product][price]', 10)`)
|
20
|
+
# |0.10.0.x |2.4.x - 5.x.x | 5.x.x |Not compatible with the <= 0.9 broker
|
21
|
+
# |0.10.1.x |2.4.x - 5.x.x | 6.x.x |
|
22
|
+
# |==========================================================
|
23
|
+
#
|
24
|
+
# NOTE: We recommended that you use matching Kafka client and broker versions. During upgrades, you should
|
25
|
+
# upgrade brokers before clients because brokers target backwards compatibility. For example, the 0.9 broker
|
26
|
+
# is compatible with both the 0.8 consumer and 0.9 consumer APIs, but not the other way around.
|
27
|
+
#
|
28
|
+
# This output supports connecting to Kafka over:
|
29
|
+
#
|
30
|
+
# * SSL (requires plugin version 3.0.0 or later)
|
31
|
+
# * Kerberos SASL (requires plugin version 5.1.0 or later)
|
32
|
+
#
|
33
|
+
# By default security is disabled but can be turned on as needed.
|
34
|
+
#
|
35
|
+
# The only required configuration is the topic_id. The default codec is plain,
|
36
|
+
# so events will be persisted on the broker in plain format. Logstash will encode your messages with not
|
37
|
+
# only the message but also with a timestamp and hostname. If you do not want anything but your message
|
38
|
+
# passing through, you should make the output configuration something like:
|
39
|
+
# [source,ruby]
|
40
|
+
# output {
|
41
|
+
# kafka {
|
42
|
+
# codec => plain {
|
43
|
+
# format => "%{message}"
|
44
|
+
# }
|
45
|
+
# topic_id => "mytopic"
|
46
|
+
# }
|
47
|
+
# }
|
48
|
+
# For more information see http://kafka.apache.org/documentation.html#theproducer
|
49
|
+
#
|
50
|
+
# Kafka producer configuration: http://kafka.apache.org/documentation.html#newproducerconfigs
|
51
|
+
class LogStash::Outputs::Kafka < LogStash::Outputs::Base
|
52
|
+
declare_threadsafe!
|
53
|
+
|
54
|
+
config_name 'kafka'
|
55
|
+
|
56
|
+
default :codec, 'plain'
|
57
|
+
|
58
|
+
# The number of acknowledgments the producer requires the leader to have received
|
59
|
+
# before considering a request complete.
|
60
|
+
#
|
61
|
+
# acks=0, the producer will not wait for any acknowledgment from the server at all.
|
62
|
+
# acks=1, This will mean the leader will write the record to its local log but
|
63
|
+
# will respond without awaiting full acknowledgement from all followers.
|
64
|
+
# acks=all, This means the leader will wait for the full set of in-sync replicas to acknowledge the record.
|
65
|
+
config :acks, :validate => ["0", "1", "all"], :default => "1"
|
66
|
+
# The producer will attempt to batch records together into fewer requests whenever multiple
|
67
|
+
# records are being sent to the same partition. This helps performance on both the client
|
68
|
+
# and the server. This configuration controls the default batch size in bytes.
|
69
|
+
config :batch_size, :validate => :number, :default => 16384
|
70
|
+
# This is for bootstrapping and the producer will only use it for getting metadata (topics,
|
71
|
+
# partitions and replicas). The socket connections for sending the actual data will be
|
72
|
+
# established based on the broker information returned in the metadata. The format is
|
73
|
+
# `host1:port1,host2:port2`, and the list can be a subset of brokers or a VIP pointing to a
|
74
|
+
# subset of brokers.
|
75
|
+
config :bootstrap_servers, :validate => :string, :default => 'localhost:9092'
|
76
|
+
# The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
|
77
|
+
config :buffer_memory, :validate => :number, :default => 33554432
|
78
|
+
# The compression type for all data generated by the producer.
|
79
|
+
# The default is none (i.e. no compression). Valid values are none, gzip, or snappy.
|
80
|
+
config :compression_type, :validate => ["none", "gzip", "snappy", "lz4"], :default => "none"
|
81
|
+
# The id string to pass to the server when making requests.
|
82
|
+
# The purpose of this is to be able to track the source of requests beyond just
|
83
|
+
# ip/port by allowing a logical application name to be included with the request
|
84
|
+
config :client_id, :validate => :string
|
85
|
+
# Serializer class for the key of the message
|
86
|
+
config :key_serializer, :validate => :string, :default => 'org.apache.kafka.common.serialization.StringSerializer'
|
87
|
+
# The producer groups together any records that arrive in between request
|
88
|
+
# transmissions into a single batched request. Normally this occurs only under
|
89
|
+
# load when records arrive faster than they can be sent out. However in some circumstances
|
90
|
+
# the client may want to reduce the number of requests even under moderate load.
|
91
|
+
# This setting accomplishes this by adding a small amount of artificial delay—that is,
|
92
|
+
# rather than immediately sending out a record the producer will wait for up to the given delay
|
93
|
+
# to allow other records to be sent so that the sends can be batched together.
|
94
|
+
config :linger_ms, :validate => :number, :default => 0
|
95
|
+
# The maximum size of a request
|
96
|
+
config :max_request_size, :validate => :number, :default => 1048576
|
97
|
+
# The key for the message
|
98
|
+
config :message_key, :validate => :string
|
99
|
+
# the timeout setting for initial metadata request to fetch topic metadata.
|
100
|
+
config :metadata_fetch_timeout_ms, :validate => :number, :default => 60000
|
101
|
+
# the max time in milliseconds before a metadata refresh is forced.
|
102
|
+
config :metadata_max_age_ms, :validate => :number, :default => 300000
|
103
|
+
# The size of the TCP receive buffer to use when reading data
|
104
|
+
config :receive_buffer_bytes, :validate => :number, :default => 32768
|
105
|
+
# The amount of time to wait before attempting to reconnect to a given host when a connection fails.
|
106
|
+
config :reconnect_backoff_ms, :validate => :number, :default => 10
|
107
|
+
# The configuration controls the maximum amount of time the client will wait
|
108
|
+
# for the response of a request. If the response is not received before the timeout
|
109
|
+
# elapses the client will resend the request if necessary or fail the request if
|
110
|
+
# retries are exhausted.
|
111
|
+
config :request_timeout_ms, :validate => :string
|
112
|
+
# The default retry behavior is to retry until successful. To prevent data loss,
|
113
|
+
# the use of this setting is discouraged.
|
114
|
+
#
|
115
|
+
# If you choose to set `retries`, a value greater than zero will cause the
|
116
|
+
# client to only retry a fixed number of times. This will result in data loss
|
117
|
+
# if a transient error outlasts your retry count.
|
118
|
+
#
|
119
|
+
# A value less than zero is a configuration error.
|
120
|
+
config :retries, :validate => :number
|
121
|
+
# The amount of time to wait before attempting to retry a failed produce request to a given topic partition.
|
122
|
+
config :retry_backoff_ms, :validate => :number, :default => 100
|
123
|
+
# The size of the TCP send buffer to use when sending data.
|
124
|
+
config :send_buffer_bytes, :validate => :number, :default => 131072
|
125
|
+
# The truststore type.
|
126
|
+
config :ssl_truststore_type, :validate => :string
|
127
|
+
# The JKS truststore path to validate the Kafka broker's certificate.
|
128
|
+
config :ssl_truststore_location, :validate => :path
|
129
|
+
# The truststore password
|
130
|
+
config :ssl_truststore_password, :validate => :password
|
131
|
+
# The keystore type.
|
132
|
+
config :ssl_keystore_type, :validate => :string
|
133
|
+
# If client authentication is required, this setting stores the keystore path.
|
134
|
+
config :ssl_keystore_location, :validate => :path
|
135
|
+
# If client authentication is required, this setting stores the keystore password
|
136
|
+
config :ssl_keystore_password, :validate => :password
|
137
|
+
# The password of the private key in the key store file.
|
138
|
+
config :ssl_key_password, :validate => :password
|
139
|
+
# Algorithm to use when verifying host. Set to "" to disable
|
140
|
+
config :ssl_endpoint_identification_algorithm, :validate => :string, :default => 'https'
|
141
|
+
# Security protocol to use, which can be either of PLAINTEXT,SSL,SASL_PLAINTEXT,SASL_SSL
|
142
|
+
config :security_protocol, :validate => ["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"], :default => "PLAINTEXT"
|
143
|
+
# http://kafka.apache.org/documentation.html#security_sasl[SASL mechanism] used for client connections.
|
144
|
+
# This may be any mechanism for which a security provider is available.
|
145
|
+
# GSSAPI is the default mechanism.
|
146
|
+
config :sasl_mechanism, :validate => :string, :default => "GSSAPI"
|
147
|
+
# The Kerberos principal name that Kafka broker runs as.
|
148
|
+
# This can be defined either in Kafka's JAAS config or in Kafka's config.
|
149
|
+
config :sasl_kerberos_service_name, :validate => :string
|
150
|
+
# The Java Authentication and Authorization Service (JAAS) API supplies user authentication and authorization
|
151
|
+
# services for Kafka. This setting provides the path to the JAAS file. Sample JAAS file for Kafka client:
|
152
|
+
# [source,java]
|
153
|
+
# ----------------------------------
|
154
|
+
# KafkaClient {
|
155
|
+
# com.sun.security.auth.module.Krb5LoginModule required
|
156
|
+
# useTicketCache=true
|
157
|
+
# renewTicket=true
|
158
|
+
# serviceName="kafka";
|
159
|
+
# };
|
160
|
+
# ----------------------------------
|
161
|
+
#
|
162
|
+
# Please note that specifying `jaas_path` and `kerberos_config` in the config file will add these
|
163
|
+
# to the global JVM system properties. This means if you have multiple Kafka inputs, all of them would be sharing the same
|
164
|
+
# `jaas_path` and `kerberos_config`. If this is not desirable, you would have to run separate instances of Logstash on
|
165
|
+
# different JVM instances.
|
166
|
+
config :jaas_path, :validate => :path
|
167
|
+
# JAAS configuration settings. This allows JAAS config to be a part of the plugin configuration and allows for different JAAS configuration per each plugin config.
|
168
|
+
config :sasl_jaas_config, :validate => :string
|
169
|
+
# Optional path to kerberos config file. This is krb5.conf style as detailed in https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html
|
170
|
+
config :kerberos_config, :validate => :path
|
171
|
+
|
172
|
+
# The topic to produce messages to
|
173
|
+
config :topic_id, :validate => :string, :required => true
|
174
|
+
# Serializer class for the value of the message
|
175
|
+
config :value_serializer, :validate => :string, :default => 'org.apache.kafka.common.serialization.StringSerializer'
|
176
|
+
|
177
|
+
public
|
178
|
+
def register
|
179
|
+
@thread_batch_map = Concurrent::Hash.new
|
180
|
+
|
181
|
+
if !@retries.nil?
|
182
|
+
if @retries < 0
|
183
|
+
raise ConfigurationError, "A negative retry count (#{@retries}) is not valid. Must be a value >= 0"
|
184
|
+
end
|
185
|
+
|
186
|
+
@logger.warn("Kafka output is configured with finite retry. This instructs Logstash to LOSE DATA after a set number of send attempts fails. If you do not want to lose data if Kafka is down, then you must remove the retry setting.", :retries => @retries)
|
187
|
+
end
|
188
|
+
|
189
|
+
|
190
|
+
@producer = create_producer
|
191
|
+
if value_serializer == 'org.apache.kafka.common.serialization.StringSerializer'
|
192
|
+
@codec.on_event do |event, data|
|
193
|
+
write_to_kafka(event, data)
|
194
|
+
end
|
195
|
+
elsif value_serializer == 'org.apache.kafka.common.serialization.ByteArraySerializer'
|
196
|
+
@codec.on_event do |event, data|
|
197
|
+
write_to_kafka(event, data.to_java_bytes)
|
198
|
+
end
|
199
|
+
else
|
200
|
+
raise ConfigurationError, "'value_serializer' only supports org.apache.kafka.common.serialization.ByteArraySerializer and org.apache.kafka.common.serialization.StringSerializer"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
# def register
|
205
|
+
|
206
|
+
def prepare(record)
|
207
|
+
# This output is threadsafe, so we need to keep a batch per thread.
|
208
|
+
@thread_batch_map[Thread.current].add(record)
|
209
|
+
end
|
210
|
+
|
211
|
+
def multi_receive(events)
|
212
|
+
t = Thread.current
|
213
|
+
if !@thread_batch_map.include?(t)
|
214
|
+
@thread_batch_map[t] = java.util.ArrayList.new(events.size)
|
215
|
+
end
|
216
|
+
|
217
|
+
events.each do |event|
|
218
|
+
break if event == LogStash::SHUTDOWN
|
219
|
+
@codec.encode(event)
|
220
|
+
end
|
221
|
+
|
222
|
+
batch = @thread_batch_map[t]
|
223
|
+
if batch.any?
|
224
|
+
retrying_send(batch)
|
225
|
+
batch.clear
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def retrying_send(batch)
|
230
|
+
remaining = @retries
|
231
|
+
|
232
|
+
while batch.any?
|
233
|
+
if !remaining.nil?
|
234
|
+
if remaining < 0
|
235
|
+
# TODO(sissel): Offer to DLQ? Then again, if it's a transient fault,
|
236
|
+
# DLQing would make things worse (you dlq data that would be successful
|
237
|
+
# after the fault is repaired)
|
238
|
+
logger.info("Exhausted user-configured retry count when sending to Kafka. Dropping these events.",
|
239
|
+
:max_retries => @retries, :drop_count => batch.count)
|
240
|
+
break
|
241
|
+
end
|
242
|
+
|
243
|
+
remaining -= 1
|
244
|
+
end
|
245
|
+
|
246
|
+
failures = []
|
247
|
+
|
248
|
+
futures = batch.collect do |record|
|
249
|
+
begin
|
250
|
+
# send() can throw an exception even before the future is created.
|
251
|
+
@producer.send(record)
|
252
|
+
rescue org.apache.kafka.common.errors.TimeoutException => e
|
253
|
+
failures << record
|
254
|
+
nil
|
255
|
+
rescue org.apache.kafka.common.errors.InterruptException => e
|
256
|
+
failures << record
|
257
|
+
nil
|
258
|
+
rescue org.apache.kafka.common.errors.SerializationException => e
|
259
|
+
# TODO(sissel): Retrying will fail because the data itself has a problem serializing.
|
260
|
+
# TODO(sissel): Let's add DLQ here.
|
261
|
+
failures << record
|
262
|
+
nil
|
263
|
+
end
|
264
|
+
end.compact
|
265
|
+
|
266
|
+
futures.each_with_index do |future, i|
|
267
|
+
begin
|
268
|
+
result = future.get()
|
269
|
+
rescue => e
|
270
|
+
# TODO(sissel): Add metric to count failures, possibly by exception type.
|
271
|
+
logger.warn("KafkaProducer.send() failed: #{e}", :exception => e)
|
272
|
+
failures << batch[i]
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
# No failures? Cool. Let's move on.
|
277
|
+
break if failures.empty?
|
278
|
+
|
279
|
+
# Otherwise, retry with any failed transmissions
|
280
|
+
if remaining.nil? || remaining >= 0
|
281
|
+
delay = @retry_backoff_ms / 1000.0
|
282
|
+
logger.info("Sending batch to Kafka failed. Will retry after a delay.", :batch_size => batch.size,
|
283
|
+
:failures => failures.size,
|
284
|
+
:sleep => delay)
|
285
|
+
batch = failures
|
286
|
+
sleep(delay)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def close
|
292
|
+
@producer.close
|
293
|
+
end
|
294
|
+
|
295
|
+
private
|
296
|
+
|
297
|
+
def write_to_kafka(event, serialized_data)
|
298
|
+
if @message_key.nil?
|
299
|
+
record = ProducerRecord.new(event.sprintf(@topic_id), serialized_data)
|
300
|
+
else
|
301
|
+
record = ProducerRecord.new(event.sprintf(@topic_id), event.sprintf(@message_key), serialized_data)
|
302
|
+
end
|
303
|
+
prepare(record)
|
304
|
+
rescue LogStash::ShutdownSignal
|
305
|
+
@logger.debug('Kafka producer got shutdown signal')
|
306
|
+
rescue => e
|
307
|
+
@logger.warn('kafka producer threw exception, restarting',
|
308
|
+
:exception => e)
|
309
|
+
end
|
310
|
+
|
311
|
+
def create_producer
|
312
|
+
begin
|
313
|
+
props = java.util.Properties.new
|
314
|
+
kafka = org.apache.kafka.clients.producer.ProducerConfig
|
315
|
+
|
316
|
+
props.put(kafka::ACKS_CONFIG, acks)
|
317
|
+
props.put(kafka::BATCH_SIZE_CONFIG, batch_size.to_s)
|
318
|
+
props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers)
|
319
|
+
props.put(kafka::BUFFER_MEMORY_CONFIG, buffer_memory.to_s)
|
320
|
+
props.put(kafka::COMPRESSION_TYPE_CONFIG, compression_type)
|
321
|
+
props.put(kafka::CLIENT_ID_CONFIG, client_id) unless client_id.nil?
|
322
|
+
props.put(kafka::KEY_SERIALIZER_CLASS_CONFIG, key_serializer)
|
323
|
+
props.put(kafka::LINGER_MS_CONFIG, linger_ms.to_s)
|
324
|
+
props.put(kafka::MAX_REQUEST_SIZE_CONFIG, max_request_size.to_s)
|
325
|
+
props.put(kafka::METADATA_MAX_AGE_CONFIG, metadata_max_age_ms) unless metadata_max_age_ms.nil?
|
326
|
+
props.put(kafka::RECEIVE_BUFFER_CONFIG, receive_buffer_bytes.to_s) unless receive_buffer_bytes.nil?
|
327
|
+
props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
|
328
|
+
props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
|
329
|
+
props.put(kafka::RETRIES_CONFIG, retries.to_s) unless retries.nil?
|
330
|
+
props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms.to_s)
|
331
|
+
props.put(kafka::SEND_BUFFER_CONFIG, send_buffer_bytes.to_s)
|
332
|
+
props.put(kafka::VALUE_SERIALIZER_CLASS_CONFIG, value_serializer)
|
333
|
+
|
334
|
+
props.put("security.protocol", security_protocol) unless security_protocol.nil?
|
335
|
+
|
336
|
+
if security_protocol == "SSL"
|
337
|
+
set_trustore_keystore_config(props)
|
338
|
+
elsif security_protocol == "SASL_PLAINTEXT"
|
339
|
+
set_sasl_config(props)
|
340
|
+
elsif security_protocol == "SASL_SSL"
|
341
|
+
set_trustore_keystore_config(props)
|
342
|
+
set_sasl_config(props)
|
343
|
+
end
|
344
|
+
|
345
|
+
|
346
|
+
org.apache.kafka.clients.producer.KafkaProducer.new(props)
|
347
|
+
rescue => e
|
348
|
+
logger.error("Unable to create Kafka producer from given configuration",
|
349
|
+
:kafka_error_message => e,
|
350
|
+
:cause => e.respond_to?(:getCause) ? e.getCause() : nil)
|
351
|
+
raise e
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def set_trustore_keystore_config(props)
|
356
|
+
if ssl_truststore_location.nil?
|
357
|
+
raise LogStash::ConfigurationError, "ssl_truststore_location must be set when SSL is enabled"
|
358
|
+
end
|
359
|
+
props.put("ssl.truststore.type", ssl_truststore_type) unless ssl_truststore_type.nil?
|
360
|
+
props.put("ssl.truststore.location", ssl_truststore_location)
|
361
|
+
props.put("ssl.truststore.password", ssl_truststore_password.value) unless ssl_truststore_password.nil?
|
362
|
+
|
363
|
+
# Client auth stuff
|
364
|
+
props.put("ssl.keystore.type", ssl_keystore_type) unless ssl_keystore_type.nil?
|
365
|
+
props.put("ssl.key.password", ssl_key_password.value) unless ssl_key_password.nil?
|
366
|
+
props.put("ssl.keystore.location", ssl_keystore_location) unless ssl_keystore_location.nil?
|
367
|
+
props.put("ssl.keystore.password", ssl_keystore_password.value) unless ssl_keystore_password.nil?
|
368
|
+
props.put("ssl.endpoint.identification.algorithm", ssl_endpoint_identification_algorithm) unless ssl_endpoint_identification_algorithm.nil?
|
369
|
+
end
|
370
|
+
|
371
|
+
def set_sasl_config(props)
|
372
|
+
java.lang.System.setProperty("java.security.auth.login.config",jaas_path) unless jaas_path.nil?
|
373
|
+
java.lang.System.setProperty("java.security.krb5.conf",kerberos_config) unless kerberos_config.nil?
|
374
|
+
|
375
|
+
props.put("sasl.mechanism",sasl_mechanism)
|
376
|
+
if sasl_mechanism == "GSSAPI" && sasl_kerberos_service_name.nil?
|
377
|
+
raise LogStash::ConfigurationError, "sasl_kerberos_service_name must be specified when SASL mechanism is GSSAPI"
|
378
|
+
end
|
379
|
+
|
380
|
+
props.put("sasl.kerberos.service.name",sasl_kerberos_service_name) unless sasl_kerberos_service_name.nil?
|
381
|
+
props.put("sasl.jaas.config", sasl_jaas_config) unless sasl_jaas_config.nil?
|
382
|
+
end
|
383
|
+
|
384
|
+
end #class LogStash::Outputs::Kafka
|
@@ -0,0 +1,54 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-integration-kafka'
|
3
|
+
s.version = '10.0.0'
|
4
|
+
s.licenses = ['Apache-2.0']
|
5
|
+
s.summary = "Integration with Kafka - input and output plugins"
|
6
|
+
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline "+
|
7
|
+
"using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program."
|
8
|
+
s.authors = ["Elastic"]
|
9
|
+
s.email = 'info@elastic.co'
|
10
|
+
s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
|
11
|
+
s.require_paths = ['lib', 'vendor/jar-dependencies']
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = Dir.glob(%w(
|
15
|
+
lib/**/*
|
16
|
+
spec/**/*
|
17
|
+
*.gemspec
|
18
|
+
*.md
|
19
|
+
CONTRIBUTORS
|
20
|
+
Gemfile
|
21
|
+
LICENSE
|
22
|
+
NOTICE.TXT
|
23
|
+
vendor/jar-dependencies/**/*.jar
|
24
|
+
vendor/jar-dependencies/**/*.rb
|
25
|
+
VERSION docs/**/*
|
26
|
+
))
|
27
|
+
|
28
|
+
# Tests
|
29
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
30
|
+
|
31
|
+
# Special flag to let us know this is actually a logstash plugin
|
32
|
+
s.metadata = {
|
33
|
+
"logstash_plugin" => "true",
|
34
|
+
"logstash_group" => "integration",
|
35
|
+
"integration_plugins" => "logstash-input-kafka,logstash-output-kafka"
|
36
|
+
}
|
37
|
+
|
38
|
+
s.add_development_dependency 'jar-dependencies', '~> 0.3.12'
|
39
|
+
|
40
|
+
s.platform = RUBY_PLATFORM
|
41
|
+
|
42
|
+
# Gem dependencies
|
43
|
+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
44
|
+
s.add_runtime_dependency "logstash-core", ">= 6.5.0"
|
45
|
+
|
46
|
+
s.add_runtime_dependency 'logstash-codec-json'
|
47
|
+
s.add_runtime_dependency 'logstash-codec-plain'
|
48
|
+
s.add_runtime_dependency 'stud', '>= 0.0.22', '< 0.1.0'
|
49
|
+
|
50
|
+
s.add_development_dependency 'logstash-devutils'
|
51
|
+
s.add_development_dependency 'rspec-wait'
|
52
|
+
s.add_development_dependency 'poseidon'
|
53
|
+
s.add_development_dependency 'snappy'
|
54
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/devutils/rspec/spec_helper"
|
3
|
+
require "logstash/inputs/kafka"
|
4
|
+
require "digest"
|
5
|
+
require "rspec/wait"
|
6
|
+
|
7
|
+
# Please run kafka_test_setup.sh prior to executing this integration test.
|
8
|
+
describe "inputs/kafka", :integration => true do
|
9
|
+
# Group ids to make sure that the consumers get all the logs.
|
10
|
+
let(:group_id_1) {rand(36**8).to_s(36)}
|
11
|
+
let(:group_id_2) {rand(36**8).to_s(36)}
|
12
|
+
let(:group_id_3) {rand(36**8).to_s(36)}
|
13
|
+
let(:group_id_4) {rand(36**8).to_s(36)}
|
14
|
+
let(:group_id_5) {rand(36**8).to_s(36)}
|
15
|
+
let(:plain_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
|
16
|
+
let(:multi_consumer_config) { plain_config.merge({"group_id" => group_id_4, "client_id" => "spec", "consumer_threads" => 3}) }
|
17
|
+
let(:snappy_config) { { 'topics' => ['logstash_integration_topic_snappy'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
|
18
|
+
let(:lz4_config) { { 'topics' => ['logstash_integration_topic_lz4'], 'codec' => 'plain', 'group_id' => group_id_1, 'auto_offset_reset' => 'earliest'} }
|
19
|
+
let(:pattern_config) { { 'topics_pattern' => 'logstash_integration_topic_.*', 'group_id' => group_id_2, 'codec' => 'plain', 'auto_offset_reset' => 'earliest'} }
|
20
|
+
let(:decorate_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_3, 'auto_offset_reset' => 'earliest', 'decorate_events' => true} }
|
21
|
+
let(:manual_commit_config) { { 'topics' => ['logstash_integration_topic_plain'], 'codec' => 'plain', 'group_id' => group_id_5, 'auto_offset_reset' => 'earliest', 'enable_auto_commit' => 'false'} }
|
22
|
+
let(:timeout_seconds) { 30 }
|
23
|
+
let(:num_events) { 103 }
|
24
|
+
|
25
|
+
describe "#kafka-topics" do
|
26
|
+
def thread_it(kafka_input, queue)
|
27
|
+
Thread.new do
|
28
|
+
begin
|
29
|
+
kafka_input.run(queue)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it "should consume all messages from plain 3-partition topic" do
|
35
|
+
kafka_input = LogStash::Inputs::Kafka.new(plain_config)
|
36
|
+
queue = Queue.new
|
37
|
+
t = thread_it(kafka_input, queue)
|
38
|
+
begin
|
39
|
+
t.run
|
40
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
41
|
+
expect(queue.length).to eq(num_events)
|
42
|
+
ensure
|
43
|
+
t.kill
|
44
|
+
t.join(30_000)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should consume all messages from snappy 3-partition topic" do
|
49
|
+
kafka_input = LogStash::Inputs::Kafka.new(snappy_config)
|
50
|
+
queue = Queue.new
|
51
|
+
t = thread_it(kafka_input, queue)
|
52
|
+
begin
|
53
|
+
t.run
|
54
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
55
|
+
expect(queue.length).to eq(num_events)
|
56
|
+
ensure
|
57
|
+
t.kill
|
58
|
+
t.join(30_000)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
it "should consume all messages from lz4 3-partition topic" do
|
63
|
+
kafka_input = LogStash::Inputs::Kafka.new(lz4_config)
|
64
|
+
queue = Queue.new
|
65
|
+
t = thread_it(kafka_input, queue)
|
66
|
+
begin
|
67
|
+
t.run
|
68
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
69
|
+
expect(queue.length).to eq(num_events)
|
70
|
+
ensure
|
71
|
+
t.kill
|
72
|
+
t.join(30_000)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should consumer all messages with multiple consumers" do
|
77
|
+
kafka_input = LogStash::Inputs::Kafka.new(multi_consumer_config)
|
78
|
+
queue = Queue.new
|
79
|
+
t = thread_it(kafka_input, queue)
|
80
|
+
begin
|
81
|
+
t.run
|
82
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
83
|
+
expect(queue.length).to eq(num_events)
|
84
|
+
kafka_input.kafka_consumers.each_with_index do |consumer, i|
|
85
|
+
expect(consumer.metrics.keys.first.tags["client-id"]).to eq("spec-#{i}")
|
86
|
+
end
|
87
|
+
ensure
|
88
|
+
t.kill
|
89
|
+
t.join(30_000)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
describe "#kafka-topics-pattern" do
|
95
|
+
def thread_it(kafka_input, queue)
|
96
|
+
Thread.new do
|
97
|
+
begin
|
98
|
+
kafka_input.run(queue)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
it "should consume all messages from all 3 topics" do
|
104
|
+
kafka_input = LogStash::Inputs::Kafka.new(pattern_config)
|
105
|
+
queue = Queue.new
|
106
|
+
t = thread_it(kafka_input, queue)
|
107
|
+
begin
|
108
|
+
t.run
|
109
|
+
wait(timeout_seconds).for {queue.length}.to eq(3*num_events)
|
110
|
+
expect(queue.length).to eq(3*num_events)
|
111
|
+
ensure
|
112
|
+
t.kill
|
113
|
+
t.join(30_000)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
describe "#kafka-decorate" do
|
119
|
+
def thread_it(kafka_input, queue)
|
120
|
+
Thread.new do
|
121
|
+
begin
|
122
|
+
kafka_input.run(queue)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should show the right topic and group name in decorated kafka section" do
|
128
|
+
start = LogStash::Timestamp.now.time.to_i
|
129
|
+
kafka_input = LogStash::Inputs::Kafka.new(decorate_config)
|
130
|
+
queue = Queue.new
|
131
|
+
t = thread_it(kafka_input, queue)
|
132
|
+
begin
|
133
|
+
t.run
|
134
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
135
|
+
expect(queue.length).to eq(num_events)
|
136
|
+
event = queue.shift
|
137
|
+
expect(event.get("[@metadata][kafka][topic]")).to eq("logstash_integration_topic_plain")
|
138
|
+
expect(event.get("[@metadata][kafka][consumer_group]")).to eq(group_id_3)
|
139
|
+
expect(event.get("[@metadata][kafka][timestamp]")).to be >= start
|
140
|
+
ensure
|
141
|
+
t.kill
|
142
|
+
t.join(30_000)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
describe "#kafka-offset-commit" do
|
148
|
+
def thread_it(kafka_input, queue)
|
149
|
+
Thread.new do
|
150
|
+
begin
|
151
|
+
kafka_input.run(queue)
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
it "should manually commit offsets" do
|
157
|
+
kafka_input = LogStash::Inputs::Kafka.new(manual_commit_config)
|
158
|
+
queue = Queue.new
|
159
|
+
t = thread_it(kafka_input, queue)
|
160
|
+
begin
|
161
|
+
t.run
|
162
|
+
wait(timeout_seconds).for {queue.length}.to eq(num_events)
|
163
|
+
expect(queue.length).to eq(num_events)
|
164
|
+
ensure
|
165
|
+
t.kill
|
166
|
+
t.join(30_000)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|