logstash-integration-kafka 10.0.0-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,441 @@
1
+ :plugin: kafka
2
+ :type: output
3
+ :default_codec: plain
4
+
5
+ ///////////////////////////////////////////
6
+ START - GENERATED VARIABLES, DO NOT EDIT!
7
+ ///////////////////////////////////////////
8
+ :version: %VERSION%
9
+ :release_date: %RELEASE_DATE%
10
+ :changelog_url: %CHANGELOG_URL%
11
+ :include_path: ../../../../logstash/docs/include
12
+ ///////////////////////////////////////////
13
+ END - GENERATED VARIABLES, DO NOT EDIT!
14
+ ///////////////////////////////////////////
15
+
16
+ [id="plugins-{type}s-{plugin}"]
17
+
18
+ === Kafka output plugin
19
+
20
+ include::{include_path}/plugin_header.asciidoc[]
21
+
22
+ ==== Description
23
+
24
+ Write events to a Kafka topic.
25
+
26
+ This plugin uses Kafka Client 2.1.0. For broker compatibility, see the official https://cwiki.apache.org/confluence/display/KAFKA/Compatibility+Matrix[Kafka compatibility reference]. If the linked compatibility wiki is not up-to-date, please contact Kafka support/community to confirm compatibility.
27
+
28
+ If you require features not yet available in this plugin (including client version upgrades), please file an issue with details about what you need.
29
+
30
+ This output supports connecting to Kafka over:
31
+
32
+ * SSL (requires plugin version 3.0.0 or later)
33
+ * Kerberos SASL (requires plugin version 5.1.0 or later)
34
+
35
+ By default security is disabled but can be turned on as needed.
36
+
37
+ The only required configuration is the topic_id.
38
+
39
+ The default codec is plain. Logstash will encode your events with not only the message field but also with a timestamp and hostname.
40
+
41
+ If you want the full content of your events to be sent as json, you should set the codec in the output configuration like this:
42
+ [source,ruby]
43
+ output {
44
+ kafka {
45
+ codec => json
46
+ topic_id => "mytopic"
47
+ }
48
+ }
49
+
50
+ For more information see http://kafka.apache.org/documentation.html#theproducer
51
+
52
+ Kafka producer configuration: http://kafka.apache.org/documentation.html#newproducerconfigs
53
+
54
+ [id="plugins-{type}s-{plugin}-options"]
55
+ ==== Kafka Output Configuration Options
56
+
57
+ This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
58
+
59
+ [cols="<,<,<",options="header",]
60
+ |=======================================================================
61
+ |Setting |Input type|Required
62
+ | <<plugins-{type}s-{plugin}-acks>> |<<string,string>>, one of `["0", "1", "all"]`|No
63
+ | <<plugins-{type}s-{plugin}-batch_size>> |<<number,number>>|No
64
+ | <<plugins-{type}s-{plugin}-bootstrap_servers>> |<<string,string>>|No
65
+ | <<plugins-{type}s-{plugin}-buffer_memory>> |<<number,number>>|No
66
+ | <<plugins-{type}s-{plugin}-client_id>> |<<string,string>>|No
67
+ | <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4"]`|No
68
+ | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
69
+ | <<plugins-{type}s-{plugin}-kerberos_config>> |a valid filesystem path|No
70
+ | <<plugins-{type}s-{plugin}-key_serializer>> |<<string,string>>|No
71
+ | <<plugins-{type}s-{plugin}-linger_ms>> |<<number,number>>|No
72
+ | <<plugins-{type}s-{plugin}-max_request_size>> |<<number,number>>|No
73
+ | <<plugins-{type}s-{plugin}-message_key>> |<<string,string>>|No
74
+ | <<plugins-{type}s-{plugin}-metadata_fetch_timeout_ms>> |<<number,number>>|No
75
+ | <<plugins-{type}s-{plugin}-metadata_max_age_ms>> |<<number,number>>|No
76
+ | <<plugins-{type}s-{plugin}-receive_buffer_bytes>> |<<number,number>>|No
77
+ | <<plugins-{type}s-{plugin}-reconnect_backoff_ms>> |<<number,number>>|No
78
+ | <<plugins-{type}s-{plugin}-request_timeout_ms>> |<<string,string>>|No
79
+ | <<plugins-{type}s-{plugin}-retries>> |<<number,number>>|No
80
+ | <<plugins-{type}s-{plugin}-retry_backoff_ms>> |<<number,number>>|No
81
+ | <<plugins-{type}s-{plugin}-sasl_jaas_config>> |<<string,string>>|No
82
+ | <<plugins-{type}s-{plugin}-sasl_kerberos_service_name>> |<<string,string>>|No
83
+ | <<plugins-{type}s-{plugin}-sasl_mechanism>> |<<string,string>>|No
84
+ | <<plugins-{type}s-{plugin}-security_protocol>> |<<string,string>>, one of `["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"]`|No
85
+ | <<plugins-{type}s-{plugin}-send_buffer_bytes>> |<<number,number>>|No
86
+ | <<plugins-{type}s-{plugin}-ssl_endpoint_identification_algorithm>> |<<string,string>>|No
87
+ | <<plugins-{type}s-{plugin}-ssl_key_password>> |<<password,password>>|No
88
+ | <<plugins-{type}s-{plugin}-ssl_keystore_location>> |a valid filesystem path|No
89
+ | <<plugins-{type}s-{plugin}-ssl_keystore_password>> |<<password,password>>|No
90
+ | <<plugins-{type}s-{plugin}-ssl_keystore_type>> |<<string,string>>|No
91
+ | <<plugins-{type}s-{plugin}-ssl_truststore_location>> |a valid filesystem path|No
92
+ | <<plugins-{type}s-{plugin}-ssl_truststore_password>> |<<password,password>>|No
93
+ | <<plugins-{type}s-{plugin}-ssl_truststore_type>> |<<string,string>>|No
94
+ | <<plugins-{type}s-{plugin}-topic_id>> |<<string,string>>|Yes
95
+ | <<plugins-{type}s-{plugin}-value_serializer>> |<<string,string>>|No
96
+ |=======================================================================
97
+
98
+ Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
99
+ output plugins.
100
+
101
+ &nbsp;
102
+
103
+ [id="plugins-{type}s-{plugin}-acks"]
104
+ ===== `acks`
105
+
106
+ * Value can be any of: `0`, `1`, `all`
107
+ * Default value is `"1"`
108
+
109
+ The number of acknowledgments the producer requires the leader to have received
110
+ before considering a request complete.
111
+
112
+ acks=0, the producer will not wait for any acknowledgment from the server at all.
113
+ acks=1, This will mean the leader will write the record to its local log but
114
+ will respond without awaiting full acknowledgement from all followers.
115
+ acks=all, This means the leader will wait for the full set of in-sync replicas to acknowledge the record.
116
+
117
+ [id="plugins-{type}s-{plugin}-batch_size"]
118
+ ===== `batch_size`
119
+
120
+ * Value type is <<number,number>>
121
+ * Default value is `16384`
122
+
123
+ The producer will attempt to batch records together into fewer requests whenever multiple
124
+ records are being sent to the same partition. This helps performance on both the client
125
+ and the server. This configuration controls the default batch size in bytes.
126
+
127
+ [id="plugins-{type}s-{plugin}-bootstrap_servers"]
128
+ ===== `bootstrap_servers`
129
+
130
+ * Value type is <<string,string>>
131
+ * Default value is `"localhost:9092"`
132
+
133
+ This is for bootstrapping and the producer will only use it for getting metadata (topics,
134
+ partitions and replicas). The socket connections for sending the actual data will be
135
+ established based on the broker information returned in the metadata. The format is
136
+ `host1:port1,host2:port2`, and the list can be a subset of brokers or a VIP pointing to a
137
+ subset of brokers.
138
+
139
+ [id="plugins-{type}s-{plugin}-buffer_memory"]
140
+ ===== `buffer_memory`
141
+
142
+ * Value type is <<number,number>>
143
+ * Default value is `33554432`
144
+
145
+ The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
146
+
147
+ [id="plugins-{type}s-{plugin}-client_id"]
148
+ ===== `client_id`
149
+
150
+ * Value type is <<string,string>>
151
+ * There is no default value for this setting.
152
+
153
+ The id string to pass to the server when making requests.
154
+ The purpose of this is to be able to track the source of requests beyond just
155
+ ip/port by allowing a logical application name to be included with the request
156
+
157
+ [id="plugins-{type}s-{plugin}-compression_type"]
158
+ ===== `compression_type`
159
+
160
+ * Value can be any of: `none`, `gzip`, `snappy`, `lz4`
161
+ * Default value is `"none"`
162
+
163
+ The compression type for all data generated by the producer.
164
+ The default is none (i.e. no compression). Valid values are none, gzip, or snappy.
165
+
166
+ [id="plugins-{type}s-{plugin}-jaas_path"]
167
+ ===== `jaas_path`
168
+
169
+ * Value type is <<path,path>>
170
+ * There is no default value for this setting.
171
+
172
+ The Java Authentication and Authorization Service (JAAS) API supplies user authentication and authorization
173
+ services for Kafka. This setting provides the path to the JAAS file. Sample JAAS file for Kafka client:
174
+ [source,java]
175
+ ----------------------------------
176
+ KafkaClient {
177
+ com.sun.security.auth.module.Krb5LoginModule required
178
+ useTicketCache=true
179
+ renewTicket=true
180
+ serviceName="kafka";
181
+ };
182
+ ----------------------------------
183
+
184
+ Please note that specifying `jaas_path` and `kerberos_config` in the config file will add these
185
+ to the global JVM system properties. This means if you have multiple Kafka inputs, all of them would be sharing the same
186
+ `jaas_path` and `kerberos_config`. If this is not desirable, you would have to run separate instances of Logstash on
187
+ different JVM instances.
188
+
189
+ [id="plugins-{type}s-{plugin}-kerberos_config"]
190
+ ===== `kerberos_config`
191
+
192
+ * Value type is <<path,path>>
193
+ * There is no default value for this setting.
194
+
195
+ Optional path to kerberos config file. This is krb5.conf style as detailed in https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html
196
+
197
+ [id="plugins-{type}s-{plugin}-key_serializer"]
198
+ ===== `key_serializer`
199
+
200
+ * Value type is <<string,string>>
201
+ * Default value is `"org.apache.kafka.common.serialization.StringSerializer"`
202
+
203
+ Serializer class for the key of the message
204
+
205
+ [id="plugins-{type}s-{plugin}-linger_ms"]
206
+ ===== `linger_ms`
207
+
208
+ * Value type is <<number,number>>
209
+ * Default value is `0`
210
+
211
+ The producer groups together any records that arrive in between request
212
+ transmissions into a single batched request. Normally this occurs only under
213
+ load when records arrive faster than they can be sent out. However in some circumstances
214
+ the client may want to reduce the number of requests even under moderate load.
215
+ This setting accomplishes this by adding a small amount of artificial delay—that is,
216
+ rather than immediately sending out a record the producer will wait for up to the given delay
217
+ to allow other records to be sent so that the sends can be batched together.
218
+
219
+ [id="plugins-{type}s-{plugin}-max_request_size"]
220
+ ===== `max_request_size`
221
+
222
+ * Value type is <<number,number>>
223
+ * Default value is `1048576`
224
+
225
+ The maximum size of a request
226
+
227
+ [id="plugins-{type}s-{plugin}-message_key"]
228
+ ===== `message_key`
229
+
230
+ * Value type is <<string,string>>
231
+ * There is no default value for this setting.
232
+
233
+ The key for the message
234
+
235
+ [id="plugins-{type}s-{plugin}-metadata_fetch_timeout_ms"]
236
+ ===== `metadata_fetch_timeout_ms`
237
+
238
+ * Value type is <<number,number>>
239
+ * Default value is `60000`
240
+
241
+ the timeout setting for initial metadata request to fetch topic metadata.
242
+
243
+ [id="plugins-{type}s-{plugin}-metadata_max_age_ms"]
244
+ ===== `metadata_max_age_ms`
245
+
246
+ * Value type is <<number,number>>
247
+ * Default value is `300000`
248
+
249
+ the max time in milliseconds before a metadata refresh is forced.
250
+
251
+ [id="plugins-{type}s-{plugin}-receive_buffer_bytes"]
252
+ ===== `receive_buffer_bytes`
253
+
254
+ * Value type is <<number,number>>
255
+ * Default value is `32768`
256
+
257
+ The size of the TCP receive buffer to use when reading data
258
+
259
+ [id="plugins-{type}s-{plugin}-reconnect_backoff_ms"]
260
+ ===== `reconnect_backoff_ms`
261
+
262
+ * Value type is <<number,number>>
263
+ * Default value is `10`
264
+
265
+ The amount of time to wait before attempting to reconnect to a given host when a connection fails.
266
+
267
+ [id="plugins-{type}s-{plugin}-request_timeout_ms"]
268
+ ===== `request_timeout_ms`
269
+
270
+ * Value type is <<string,string>>
271
+ * There is no default value for this setting.
272
+
273
+ The configuration controls the maximum amount of time the client will wait
274
+ for the response of a request. If the response is not received before the timeout
275
+ elapses the client will resend the request if necessary or fail the request if
276
+ retries are exhausted.
277
+
278
+ [id="plugins-{type}s-{plugin}-retries"]
279
+ ===== `retries`
280
+
281
+ * Value type is <<number,number>>
282
+ * There is no default value for this setting.
283
+
284
+ The default retry behavior is to retry until successful. To prevent data loss,
285
+ the use of this setting is discouraged.
286
+
287
+ If you choose to set `retries`, a value greater than zero will cause the
288
+ client to only retry a fixed number of times. This will result in data loss
289
+ if a transport fault exists for longer than your retry count (network outage,
290
+ Kafka down, etc).
291
+
292
+ A value less than zero is a configuration error.
293
+
294
+ [id="plugins-{type}s-{plugin}-retry_backoff_ms"]
295
+ ===== `retry_backoff_ms`
296
+
297
+ * Value type is <<number,number>>
298
+ * Default value is `100`
299
+
300
+ The amount of time to wait before attempting to retry a failed produce request to a given topic partition.
301
+
302
+ [id="plugins-{type}s-{plugin}-sasl_jaas_config"]
303
+ ===== `sasl_jaas_config`
304
+
305
+ * Value type is <<string,string>>
306
+ * There is no default value for this setting.
307
+
308
+ JAAS configuration setting local to this plugin instance, as opposed to settings using config file configured using `jaas_path`, which are shared across the JVM. This allows each plugin instance to have its own configuration.
309
+
310
+ If both `sasl_jaas_config` and `jaas_path` configurations are set, the setting here takes precedence.
311
+
312
+ Example (setting for Azure Event Hub):
313
+ [source,ruby]
314
+ output {
315
+ kafka {
316
+ sasl_jaas_config => "org.apache.kafka.common.security.plain.PlainLoginModule required username='auser' password='apassword';"
317
+ }
318
+ }
319
+
320
+ [id="plugins-{type}s-{plugin}-sasl_kerberos_service_name"]
321
+ ===== `sasl_kerberos_service_name`
322
+
323
+ * Value type is <<string,string>>
324
+ * There is no default value for this setting.
325
+
326
+ The Kerberos principal name that Kafka broker runs as.
327
+ This can be defined either in Kafka's JAAS config or in Kafka's config.
328
+
329
+ [id="plugins-{type}s-{plugin}-sasl_mechanism"]
330
+ ===== `sasl_mechanism`
331
+
332
+ * Value type is <<string,string>>
333
+ * Default value is `"GSSAPI"`
334
+
335
+ http://kafka.apache.org/documentation.html#security_sasl[SASL mechanism] used for client connections.
336
+ This may be any mechanism for which a security provider is available.
337
+ GSSAPI is the default mechanism.
338
+
339
+ [id="plugins-{type}s-{plugin}-security_protocol"]
340
+ ===== `security_protocol`
341
+
342
+ * Value can be any of: `PLAINTEXT`, `SSL`, `SASL_PLAINTEXT`, `SASL_SSL`
343
+ * Default value is `"PLAINTEXT"`
344
+
345
+ Security protocol to use, which can be either of PLAINTEXT,SSL,SASL_PLAINTEXT,SASL_SSL
346
+
347
+ [id="plugins-{type}s-{plugin}-send_buffer_bytes"]
348
+ ===== `send_buffer_bytes`
349
+
350
+ * Value type is <<number,number>>
351
+ * Default value is `131072`
352
+
353
+ The size of the TCP send buffer to use when sending data.
354
+
355
+ [id="plugins-{type}s-{plugin}-ssl_endpoint_identification_algorithm"]
356
+ ===== `ssl_endpoint_identification_algorithm`
357
+
358
+ * Value type is <<string,string>>
359
+ * Default value is `"https"`
360
+
361
+ The endpoint identification algorithm, defaults to `"https"`. Set to empty string `""` to disable
362
+
363
+ [id="plugins-{type}s-{plugin}-ssl_key_password"]
364
+ ===== `ssl_key_password`
365
+
366
+ * Value type is <<password,password>>
367
+ * There is no default value for this setting.
368
+
369
+ The password of the private key in the key store file.
370
+
371
+ [id="plugins-{type}s-{plugin}-ssl_keystore_location"]
372
+ ===== `ssl_keystore_location`
373
+
374
+ * Value type is <<path,path>>
375
+ * There is no default value for this setting.
376
+
377
+ If client authentication is required, this setting stores the keystore path.
378
+
379
+ [id="plugins-{type}s-{plugin}-ssl_keystore_password"]
380
+ ===== `ssl_keystore_password`
381
+
382
+ * Value type is <<password,password>>
383
+ * There is no default value for this setting.
384
+
385
+ If client authentication is required, this setting stores the keystore password
386
+
387
+ [id="plugins-{type}s-{plugin}-ssl_keystore_type"]
388
+ ===== `ssl_keystore_type`
389
+
390
+ * Value type is <<string,string>>
391
+ * There is no default value for this setting.
392
+
393
+ The keystore type.
394
+
395
+ [id="plugins-{type}s-{plugin}-ssl_truststore_location"]
396
+ ===== `ssl_truststore_location`
397
+
398
+ * Value type is <<path,path>>
399
+ * There is no default value for this setting.
400
+
401
+ The JKS truststore path to validate the Kafka broker's certificate.
402
+
403
+ [id="plugins-{type}s-{plugin}-ssl_truststore_password"]
404
+ ===== `ssl_truststore_password`
405
+
406
+ * Value type is <<password,password>>
407
+ * There is no default value for this setting.
408
+
409
+ The truststore password
410
+
411
+ [id="plugins-{type}s-{plugin}-ssl_truststore_type"]
412
+ ===== `ssl_truststore_type`
413
+
414
+ * Value type is <<string,string>>
415
+ * There is no default value for this setting.
416
+
417
+ The truststore type.
418
+
419
+ [id="plugins-{type}s-{plugin}-topic_id"]
420
+ ===== `topic_id`
421
+
422
+ * This is a required setting.
423
+ * Value type is <<string,string>>
424
+ * There is no default value for this setting.
425
+
426
+ The topic to produce messages to
427
+
428
+ [id="plugins-{type}s-{plugin}-value_serializer"]
429
+ ===== `value_serializer`
430
+
431
+ * Value type is <<string,string>>
432
+ * Default value is `"org.apache.kafka.common.serialization.StringSerializer"`
433
+
434
+ Serializer class for the value of the message
435
+
436
+
437
+
438
+ [id="plugins-{type}s-{plugin}-common-options"]
439
+ include::{include_path}/{type}.asciidoc[]
440
+
441
+ :default_codec!:
@@ -0,0 +1,8 @@
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
2
+
3
+ require 'jar_dependencies'
4
+ require_jar('org.apache.kafka', 'kafka-clients', '2.3.0')
5
+ require_jar('com.github.luben', 'zstd-jni', '1.4.2-1')
6
+ require_jar('org.slf4j', 'slf4j-api', '1.7.26')
7
+ require_jar('org.lz4', 'lz4-java', '1.6.0')
8
+ require_jar('org.xerial.snappy', 'snappy-java', '1.1.7.3')
@@ -0,0 +1,362 @@
1
+ require 'logstash/namespace'
2
+ require 'logstash/inputs/base'
3
+ require 'stud/interval'
4
+ require 'java'
5
+ require 'logstash-integration-kafka_jars.rb'
6
+
7
+ # This input will read events from a Kafka topic. It uses the 0.10 version of
8
+ # the consumer API provided by Kafka to read messages from the broker.
9
+ #
10
+ # Here's a compatibility matrix that shows the Kafka client versions that are compatible with each combination
11
+ # of Logstash and the Kafka input plugin:
12
+ #
13
+ # [options="header"]
14
+ # |==========================================================
15
+ # |Kafka Client Version |Logstash Version |Plugin Version |Why?
16
+ # |0.8 |2.0.0 - 2.x.x |<3.0.0 |Legacy, 0.8 is still popular
17
+ # |0.9 |2.0.0 - 2.3.x | 3.x.x |Works with the old Ruby Event API (`event['product']['price'] = 10`)
18
+ # |0.9 |2.4.x - 5.x.x | 4.x.x |Works with the new getter/setter APIs (`event.set('[product][price]', 10)`)
19
+ # |0.10.0.x |2.4.x - 5.x.x | 5.x.x |Not compatible with the <= 0.9 broker
20
+ # |0.10.1.x |2.4.x - 5.x.x | 6.x.x |
21
+ # |==========================================================
22
+ #
23
+ # NOTE: We recommended that you use matching Kafka client and broker versions. During upgrades, you should
24
+ # upgrade brokers before clients because brokers target backwards compatibility. For example, the 0.9 broker
25
+ # is compatible with both the 0.8 consumer and 0.9 consumer APIs, but not the other way around.
26
+ #
27
+ # This input supports connecting to Kafka over:
28
+ #
29
+ # * SSL (requires plugin version 3.0.0 or later)
30
+ # * Kerberos SASL (requires plugin version 5.1.0 or later)
31
+ #
32
+ # By default security is disabled but can be turned on as needed.
33
+ #
34
+ # The Logstash Kafka consumer handles group management and uses the default offset management
35
+ # strategy using Kafka topics.
36
+ #
37
+ # Logstash instances by default form a single logical group to subscribe to Kafka topics
38
+ # Each Logstash Kafka consumer can run multiple threads to increase read throughput. Alternatively,
39
+ # you could run multiple Logstash instances with the same `group_id` to spread the load across
40
+ # physical machines. Messages in a topic will be distributed to all Logstash instances with
41
+ # the same `group_id`.
42
+ #
43
+ # Ideally you should have as many threads as the number of partitions for a perfect balance --
44
+ # more threads than partitions means that some threads will be idle
45
+ #
46
+ # For more information see http://kafka.apache.org/documentation.html#theconsumer
47
+ #
48
+ # Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs
49
+ #
50
+ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
51
+ config_name 'kafka'
52
+
53
+ default :codec, 'plain'
54
+
55
+ # The frequency in milliseconds that the consumer offsets are committed to Kafka.
56
+ config :auto_commit_interval_ms, :validate => :string, :default => "5000"
57
+ # What to do when there is no initial offset in Kafka or if an offset is out of range:
58
+ #
59
+ # * earliest: automatically reset the offset to the earliest offset
60
+ # * latest: automatically reset the offset to the latest offset
61
+ # * none: throw exception to the consumer if no previous offset is found for the consumer's group
62
+ # * anything else: throw exception to the consumer.
63
+ config :auto_offset_reset, :validate => :string
64
+ # A list of URLs of Kafka instances to use for establishing the initial connection to the cluster.
65
+ # This list should be in the form of `host1:port1,host2:port2` These urls are just used
66
+ # for the initial connection to discover the full cluster membership (which may change dynamically)
67
+ # so this list need not contain the full set of servers (you may want more than one, though, in
68
+ # case a server is down).
69
+ config :bootstrap_servers, :validate => :string, :default => "localhost:9092"
70
+ # Automatically check the CRC32 of the records consumed. This ensures no on-the-wire or on-disk
71
+ # corruption to the messages occurred. This check adds some overhead, so it may be
72
+ # disabled in cases seeking extreme performance.
73
+ config :check_crcs, :validate => :string
74
+ # The id string to pass to the server when making requests. The purpose of this
75
+ # is to be able to track the source of requests beyond just ip/port by allowing
76
+ # a logical application name to be included.
77
+ config :client_id, :validate => :string, :default => "logstash"
78
+ # Close idle connections after the number of milliseconds specified by this config.
79
+ config :connections_max_idle_ms, :validate => :string
80
+ # Ideally you should have as many threads as the number of partitions for a perfect
81
+ # balance — more threads than partitions means that some threads will be idle
82
+ config :consumer_threads, :validate => :number, :default => 1
83
+ # If true, periodically commit to Kafka the offsets of messages already returned by the consumer.
84
+ # This committed offset will be used when the process fails as the position from
85
+ # which the consumption will begin.
86
+ config :enable_auto_commit, :validate => :string, :default => "true"
87
+ # Whether records from internal topics (such as offsets) should be exposed to the consumer.
88
+ # If set to true the only way to receive records from an internal topic is subscribing to it.
89
+ config :exclude_internal_topics, :validate => :string
90
+ # The maximum amount of data the server should return for a fetch request. This is not an
91
+ # absolute maximum, if the first message in the first non-empty partition of the fetch is larger
92
+ # than this value, the message will still be returned to ensure that the consumer can make progress.
93
+ config :fetch_max_bytes, :validate => :string
94
+ # The maximum amount of time the server will block before answering the fetch request if
95
+ # there isn't sufficient data to immediately satisfy `fetch_min_bytes`. This
96
+ # should be less than or equal to the timeout used in `poll_timeout_ms`
97
+ config :fetch_max_wait_ms, :validate => :string
98
+ # The minimum amount of data the server should return for a fetch request. If insufficient
99
+ # data is available the request will wait for that much data to accumulate
100
+ # before answering the request.
101
+ config :fetch_min_bytes, :validate => :string
102
+ # The identifier of the group this consumer belongs to. Consumer group is a single logical subscriber
103
+ # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
104
+ # Logstash instances with the same `group_id`
105
+ config :group_id, :validate => :string, :default => "logstash"
106
+ # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
107
+ # that the consumer's session stays active and to facilitate rebalancing when new
108
+ # consumers join or leave the group. The value must be set lower than
109
+ # `session.timeout.ms`, but typically should be set no higher than 1/3 of that value.
110
+ # It can be adjusted even lower to control the expected time for normal rebalances.
111
+ config :heartbeat_interval_ms, :validate => :string
112
+ # Java Class used to deserialize the record's key
113
+ config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
114
+ # The maximum delay between invocations of poll() when using consumer group management. This places
115
+ # an upper bound on the amount of time that the consumer can be idle before fetching more records.
116
+ # If poll() is not called before expiration of this timeout, then the consumer is considered failed and
117
+ # the group will rebalance in order to reassign the partitions to another member.
118
+ # The value of the configuration `request_timeout_ms` must always be larger than max_poll_interval_ms
119
+ config :max_poll_interval_ms, :validate => :string
120
+ # The maximum amount of data per-partition the server will return. The maximum total memory used for a
121
+ # request will be <code>#partitions * max.partition.fetch.bytes</code>. This size must be at least
122
+ # as large as the maximum message size the server allows or else it is possible for the producer to
123
+ # send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying
124
+ # to fetch a large message on a certain partition.
125
+ config :max_partition_fetch_bytes, :validate => :string
126
+ # The maximum number of records returned in a single call to poll().
127
+ config :max_poll_records, :validate => :string
128
+ # The period of time in milliseconds after which we force a refresh of metadata even if
129
+ # we haven't seen any partition leadership changes to proactively discover any new brokers or partitions
130
+ config :metadata_max_age_ms, :validate => :string
131
+ # The class name of the partition assignment strategy that the client will use to distribute
132
+ # partition ownership amongst consumer instances
133
+ config :partition_assignment_strategy, :validate => :string
134
+ # The size of the TCP receive buffer (SO_RCVBUF) to use when reading data.
135
+ config :receive_buffer_bytes, :validate => :string
136
+ # The amount of time to wait before attempting to reconnect to a given host.
137
+ # This avoids repeatedly connecting to a host in a tight loop.
138
+ # This backoff applies to all requests sent by the consumer to the broker.
139
+ config :reconnect_backoff_ms, :validate => :string
140
+ # The configuration controls the maximum amount of time the client will wait
141
+ # for the response of a request. If the response is not received before the timeout
142
+ # elapses the client will resend the request if necessary or fail the request if
143
+ # retries are exhausted.
144
+ config :request_timeout_ms, :validate => :string
145
+ # The amount of time to wait before attempting to retry a failed fetch request
146
+ # to a given topic partition. This avoids repeated fetching-and-failing in a tight loop.
147
+ config :retry_backoff_ms, :validate => :string
148
+ # The size of the TCP send buffer (SO_SNDBUF) to use when sending data
149
+ config :send_buffer_bytes, :validate => :string
150
+ # The timeout after which, if the `poll_timeout_ms` is not invoked, the consumer is marked dead
151
+ # and a rebalance operation is triggered for the group identified by `group_id`
152
+ config :session_timeout_ms, :validate => :string
153
+ # Java Class used to deserialize the record's value
154
+ config :value_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
155
+ # A list of topics to subscribe to, defaults to ["logstash"].
156
+ config :topics, :validate => :array, :default => ["logstash"]
157
+ # A topic regex pattern to subscribe to.
158
+ # The topics configuration will be ignored when using this configuration.
159
+ config :topics_pattern, :validate => :string
160
+ # Time kafka consumer will wait to receive new messages from topics
161
+ config :poll_timeout_ms, :validate => :number, :default => 100
162
+ # The truststore type.
163
+ config :ssl_truststore_type, :validate => :string
164
+ # The JKS truststore path to validate the Kafka broker's certificate.
165
+ config :ssl_truststore_location, :validate => :path
166
+ # The truststore password
167
+ config :ssl_truststore_password, :validate => :password
168
+ # The keystore type.
169
+ config :ssl_keystore_type, :validate => :string
170
+ # If client authentication is required, this setting stores the keystore path.
171
+ config :ssl_keystore_location, :validate => :path
172
+ # If client authentication is required, this setting stores the keystore password
173
+ config :ssl_keystore_password, :validate => :password
174
+ # The password of the private key in the key store file.
175
+ config :ssl_key_password, :validate => :password
176
+ # Algorithm to use when verifying host. Set to "" to disable
177
+ config :ssl_endpoint_identification_algorithm, :validate => :string, :default => 'https'
178
+ # Security protocol to use, which can be either of PLAINTEXT,SSL,SASL_PLAINTEXT,SASL_SSL
179
+ config :security_protocol, :validate => ["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"], :default => "PLAINTEXT"
180
+ # http://kafka.apache.org/documentation.html#security_sasl[SASL mechanism] used for client connections.
181
+ # This may be any mechanism for which a security provider is available.
182
+ # GSSAPI is the default mechanism.
183
+ config :sasl_mechanism, :validate => :string, :default => "GSSAPI"
184
+ # The Kerberos principal name that Kafka broker runs as.
185
+ # This can be defined either in Kafka's JAAS config or in Kafka's config.
186
+ config :sasl_kerberos_service_name, :validate => :string
187
+ # The Java Authentication and Authorization Service (JAAS) API supplies user authentication and authorization
188
+ # services for Kafka. This setting provides the path to the JAAS file. Sample JAAS file for Kafka client:
189
+ # [source,java]
190
+ # ----------------------------------
191
+ # KafkaClient {
192
+ # com.sun.security.auth.module.Krb5LoginModule required
193
+ # useTicketCache=true
194
+ # renewTicket=true
195
+ # serviceName="kafka";
196
+ # };
197
+ # ----------------------------------
198
+ #
199
+ # Please note that specifying `jaas_path` and `kerberos_config` in the config file will add these
200
+ # to the global JVM system properties. This means if you have multiple Kafka inputs, all of them would be sharing the same
201
+ # `jaas_path` and `kerberos_config`. If this is not desirable, you would have to run separate instances of Logstash on
202
+ # different JVM instances.
203
+ config :jaas_path, :validate => :path
204
+ # JAAS configuration settings. This allows JAAS config to be a part of the plugin configuration and allows for different JAAS configuration per each plugin config.
205
+ config :sasl_jaas_config, :validate => :string
206
+ # Optional path to kerberos config file. This is krb5.conf style as detailed in https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html
207
+ config :kerberos_config, :validate => :path
208
+ # Option to add Kafka metadata like topic, message size to the event.
209
+ # This will add a field named `kafka` to the logstash event containing the following attributes:
210
+ # `topic`: The topic this message is associated with
211
+ # `consumer_group`: The consumer group used to read in this event
212
+ # `partition`: The partition this message is associated with
213
+ # `offset`: The offset from the partition this message is associated with
214
+ # `key`: A ByteBuffer containing the message key
215
+ # `timestamp`: The timestamp of this message
216
+ config :decorate_events, :validate => :boolean, :default => false
217
+
218
+
219
+ public
220
+ def register
221
+ @runner_threads = []
222
+ end # def register
223
+
224
+ public
225
+ def run(logstash_queue)
226
+ @runner_consumers = consumer_threads.times.map { |i| create_consumer("#{client_id}-#{i}") }
227
+ @runner_threads = @runner_consumers.map { |consumer| thread_runner(logstash_queue, consumer) }
228
+ @runner_threads.each { |t| t.join }
229
+ end # def run
230
+
231
+ public
232
+ def stop
233
+ # if we have consumers, wake them up to unblock our runner threads
234
+ @runner_consumers && @runner_consumers.each(&:wakeup)
235
+ end
236
+
237
+ public
238
+ def kafka_consumers
239
+ @runner_consumers
240
+ end
241
+
242
+ private
243
+ def thread_runner(logstash_queue, consumer)
244
+ Thread.new do
245
+ begin
246
+ unless @topics_pattern.nil?
247
+ nooplistener = org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener.new
248
+ pattern = java.util.regex.Pattern.compile(@topics_pattern)
249
+ consumer.subscribe(pattern, nooplistener)
250
+ else
251
+ consumer.subscribe(topics);
252
+ end
253
+ codec_instance = @codec.clone
254
+ while !stop?
255
+ records = consumer.poll(poll_timeout_ms)
256
+ next unless records.count > 0
257
+ for record in records do
258
+ codec_instance.decode(record.value.to_s) do |event|
259
+ decorate(event)
260
+ if @decorate_events
261
+ event.set("[@metadata][kafka][topic]", record.topic)
262
+ event.set("[@metadata][kafka][consumer_group]", @group_id)
263
+ event.set("[@metadata][kafka][partition]", record.partition)
264
+ event.set("[@metadata][kafka][offset]", record.offset)
265
+ event.set("[@metadata][kafka][key]", record.key)
266
+ event.set("[@metadata][kafka][timestamp]", record.timestamp)
267
+ end
268
+ logstash_queue << event
269
+ end
270
+ end
271
+ # Manual offset commit
272
+ if @enable_auto_commit == "false"
273
+ consumer.commitSync
274
+ end
275
+ end
276
+ rescue org.apache.kafka.common.errors.WakeupException => e
277
+ raise e if !stop?
278
+ ensure
279
+ consumer.close
280
+ end
281
+ end
282
+ end
283
+
284
+ private
285
+ def create_consumer(client_id)
286
+ begin
287
+ props = java.util.Properties.new
288
+ kafka = org.apache.kafka.clients.consumer.ConsumerConfig
289
+
290
+ props.put(kafka::AUTO_COMMIT_INTERVAL_MS_CONFIG, auto_commit_interval_ms)
291
+ props.put(kafka::AUTO_OFFSET_RESET_CONFIG, auto_offset_reset) unless auto_offset_reset.nil?
292
+ props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers)
293
+ props.put(kafka::CHECK_CRCS_CONFIG, check_crcs) unless check_crcs.nil?
294
+ props.put(kafka::CLIENT_ID_CONFIG, client_id)
295
+ props.put(kafka::CONNECTIONS_MAX_IDLE_MS_CONFIG, connections_max_idle_ms) unless connections_max_idle_ms.nil?
296
+ props.put(kafka::ENABLE_AUTO_COMMIT_CONFIG, enable_auto_commit)
297
+ props.put(kafka::EXCLUDE_INTERNAL_TOPICS_CONFIG, exclude_internal_topics) unless exclude_internal_topics.nil?
298
+ props.put(kafka::FETCH_MAX_BYTES_CONFIG, fetch_max_bytes) unless fetch_max_bytes.nil?
299
+ props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms) unless fetch_max_wait_ms.nil?
300
+ props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes) unless fetch_min_bytes.nil?
301
+ props.put(kafka::GROUP_ID_CONFIG, group_id)
302
+ props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms) unless heartbeat_interval_ms.nil?
303
+ props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
304
+ props.put(kafka::MAX_PARTITION_FETCH_BYTES_CONFIG, max_partition_fetch_bytes) unless max_partition_fetch_bytes.nil?
305
+ props.put(kafka::MAX_POLL_RECORDS_CONFIG, max_poll_records) unless max_poll_records.nil?
306
+ props.put(kafka::MAX_POLL_INTERVAL_MS_CONFIG, max_poll_interval_ms) unless max_poll_interval_ms.nil?
307
+ props.put(kafka::METADATA_MAX_AGE_CONFIG, metadata_max_age_ms) unless metadata_max_age_ms.nil?
308
+ props.put(kafka::PARTITION_ASSIGNMENT_STRATEGY_CONFIG, partition_assignment_strategy) unless partition_assignment_strategy.nil?
309
+ props.put(kafka::RECEIVE_BUFFER_CONFIG, receive_buffer_bytes) unless receive_buffer_bytes.nil?
310
+ props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
311
+ props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
312
+ props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms) unless retry_backoff_ms.nil?
313
+ props.put(kafka::SEND_BUFFER_CONFIG, send_buffer_bytes) unless send_buffer_bytes.nil?
314
+ props.put(kafka::SESSION_TIMEOUT_MS_CONFIG, session_timeout_ms) unless session_timeout_ms.nil?
315
+ props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, value_deserializer_class)
316
+
317
+ props.put("security.protocol", security_protocol) unless security_protocol.nil?
318
+
319
+ if security_protocol == "SSL"
320
+ set_trustore_keystore_config(props)
321
+ elsif security_protocol == "SASL_PLAINTEXT"
322
+ set_sasl_config(props)
323
+ elsif security_protocol == "SASL_SSL"
324
+ set_trustore_keystore_config(props)
325
+ set_sasl_config(props)
326
+ end
327
+
328
+ org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
329
+ rescue => e
330
+ logger.error("Unable to create Kafka consumer from given configuration",
331
+ :kafka_error_message => e,
332
+ :cause => e.respond_to?(:getCause) ? e.getCause() : nil)
333
+ raise e
334
+ end
335
+ end
336
+
337
+ def set_trustore_keystore_config(props)
338
+ props.put("ssl.truststore.type", ssl_truststore_type) unless ssl_truststore_type.nil?
339
+ props.put("ssl.truststore.location", ssl_truststore_location) unless ssl_truststore_location.nil?
340
+ props.put("ssl.truststore.password", ssl_truststore_password.value) unless ssl_truststore_password.nil?
341
+
342
+ # Client auth stuff
343
+ props.put("ssl.keystore.type", ssl_keystore_type) unless ssl_keystore_type.nil?
344
+ props.put("ssl.key.password", ssl_key_password.value) unless ssl_key_password.nil?
345
+ props.put("ssl.keystore.location", ssl_keystore_location) unless ssl_keystore_location.nil?
346
+ props.put("ssl.keystore.password", ssl_keystore_password.value) unless ssl_keystore_password.nil?
347
+ props.put("ssl.endpoint.identification.algorithm", ssl_endpoint_identification_algorithm) unless ssl_endpoint_identification_algorithm.nil?
348
+ end
349
+
350
+ def set_sasl_config(props)
351
+ java.lang.System.setProperty("java.security.auth.login.config",jaas_path) unless jaas_path.nil?
352
+ java.lang.System.setProperty("java.security.krb5.conf",kerberos_config) unless kerberos_config.nil?
353
+
354
+ props.put("sasl.mechanism",sasl_mechanism)
355
+ if sasl_mechanism == "GSSAPI" && sasl_kerberos_service_name.nil?
356
+ raise LogStash::ConfigurationError, "sasl_kerberos_service_name must be specified when SASL mechanism is GSSAPI"
357
+ end
358
+
359
+ props.put("sasl.kerberos.service.name",sasl_kerberos_service_name) unless sasl_kerberos_service_name.nil?
360
+ props.put("sasl.jaas.config", sasl_jaas_config) unless sasl_jaas_config.nil?
361
+ end
362
+ end #class LogStash::Inputs::Kafka