logstash-integration-kafka 10.0.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,441 @@
1
+ :plugin: kafka
2
+ :type: output
3
+ :default_codec: plain
4
+
5
+ ///////////////////////////////////////////
6
+ START - GENERATED VARIABLES, DO NOT EDIT!
7
+ ///////////////////////////////////////////
8
+ :version: %VERSION%
9
+ :release_date: %RELEASE_DATE%
10
+ :changelog_url: %CHANGELOG_URL%
11
+ :include_path: ../../../../logstash/docs/include
12
+ ///////////////////////////////////////////
13
+ END - GENERATED VARIABLES, DO NOT EDIT!
14
+ ///////////////////////////////////////////
15
+
16
+ [id="plugins-{type}s-{plugin}"]
17
+
18
+ === Kafka output plugin
19
+
20
+ include::{include_path}/plugin_header.asciidoc[]
21
+
22
+ ==== Description
23
+
24
+ Write events to a Kafka topic.
25
+
26
+ This plugin uses Kafka Client 2.1.0. For broker compatibility, see the official https://cwiki.apache.org/confluence/display/KAFKA/Compatibility+Matrix[Kafka compatibility reference]. If the linked compatibility wiki is not up-to-date, please contact Kafka support/community to confirm compatibility.
27
+
28
+ If you require features not yet available in this plugin (including client version upgrades), please file an issue with details about what you need.
29
+
30
+ This output supports connecting to Kafka over:
31
+
32
+ * SSL (requires plugin version 3.0.0 or later)
33
+ * Kerberos SASL (requires plugin version 5.1.0 or later)
34
+
35
+ By default security is disabled but can be turned on as needed.
36
+
37
+ The only required configuration is the topic_id.
38
+
39
+ The default codec is plain. Logstash will encode your events with not only the message field but also with a timestamp and hostname.
40
+
41
+ If you want the full content of your events to be sent as json, you should set the codec in the output configuration like this:
42
+ [source,ruby]
43
+ output {
44
+ kafka {
45
+ codec => json
46
+ topic_id => "mytopic"
47
+ }
48
+ }
49
+
50
+ For more information see http://kafka.apache.org/documentation.html#theproducer
51
+
52
+ Kafka producer configuration: http://kafka.apache.org/documentation.html#newproducerconfigs
53
+
54
+ [id="plugins-{type}s-{plugin}-options"]
55
+ ==== Kafka Output Configuration Options
56
+
57
+ This plugin supports the following configuration options plus the <<plugins-{type}s-{plugin}-common-options>> described later.
58
+
59
+ [cols="<,<,<",options="header",]
60
+ |=======================================================================
61
+ |Setting |Input type|Required
62
+ | <<plugins-{type}s-{plugin}-acks>> |<<string,string>>, one of `["0", "1", "all"]`|No
63
+ | <<plugins-{type}s-{plugin}-batch_size>> |<<number,number>>|No
64
+ | <<plugins-{type}s-{plugin}-bootstrap_servers>> |<<string,string>>|No
65
+ | <<plugins-{type}s-{plugin}-buffer_memory>> |<<number,number>>|No
66
+ | <<plugins-{type}s-{plugin}-client_id>> |<<string,string>>|No
67
+ | <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4"]`|No
68
+ | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
69
+ | <<plugins-{type}s-{plugin}-kerberos_config>> |a valid filesystem path|No
70
+ | <<plugins-{type}s-{plugin}-key_serializer>> |<<string,string>>|No
71
+ | <<plugins-{type}s-{plugin}-linger_ms>> |<<number,number>>|No
72
+ | <<plugins-{type}s-{plugin}-max_request_size>> |<<number,number>>|No
73
+ | <<plugins-{type}s-{plugin}-message_key>> |<<string,string>>|No
74
+ | <<plugins-{type}s-{plugin}-metadata_fetch_timeout_ms>> |<<number,number>>|No
75
+ | <<plugins-{type}s-{plugin}-metadata_max_age_ms>> |<<number,number>>|No
76
+ | <<plugins-{type}s-{plugin}-receive_buffer_bytes>> |<<number,number>>|No
77
+ | <<plugins-{type}s-{plugin}-reconnect_backoff_ms>> |<<number,number>>|No
78
+ | <<plugins-{type}s-{plugin}-request_timeout_ms>> |<<string,string>>|No
79
+ | <<plugins-{type}s-{plugin}-retries>> |<<number,number>>|No
80
+ | <<plugins-{type}s-{plugin}-retry_backoff_ms>> |<<number,number>>|No
81
+ | <<plugins-{type}s-{plugin}-sasl_jaas_config>> |<<string,string>>|No
82
+ | <<plugins-{type}s-{plugin}-sasl_kerberos_service_name>> |<<string,string>>|No
83
+ | <<plugins-{type}s-{plugin}-sasl_mechanism>> |<<string,string>>|No
84
+ | <<plugins-{type}s-{plugin}-security_protocol>> |<<string,string>>, one of `["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"]`|No
85
+ | <<plugins-{type}s-{plugin}-send_buffer_bytes>> |<<number,number>>|No
86
+ | <<plugins-{type}s-{plugin}-ssl_endpoint_identification_algorithm>> |<<string,string>>|No
87
+ | <<plugins-{type}s-{plugin}-ssl_key_password>> |<<password,password>>|No
88
+ | <<plugins-{type}s-{plugin}-ssl_keystore_location>> |a valid filesystem path|No
89
+ | <<plugins-{type}s-{plugin}-ssl_keystore_password>> |<<password,password>>|No
90
+ | <<plugins-{type}s-{plugin}-ssl_keystore_type>> |<<string,string>>|No
91
+ | <<plugins-{type}s-{plugin}-ssl_truststore_location>> |a valid filesystem path|No
92
+ | <<plugins-{type}s-{plugin}-ssl_truststore_password>> |<<password,password>>|No
93
+ | <<plugins-{type}s-{plugin}-ssl_truststore_type>> |<<string,string>>|No
94
+ | <<plugins-{type}s-{plugin}-topic_id>> |<<string,string>>|Yes
95
+ | <<plugins-{type}s-{plugin}-value_serializer>> |<<string,string>>|No
96
+ |=======================================================================
97
+
98
+ Also see <<plugins-{type}s-{plugin}-common-options>> for a list of options supported by all
99
+ output plugins.
100
+
101
+ &nbsp;
102
+
103
+ [id="plugins-{type}s-{plugin}-acks"]
104
+ ===== `acks`
105
+
106
+ * Value can be any of: `0`, `1`, `all`
107
+ * Default value is `"1"`
108
+
109
+ The number of acknowledgments the producer requires the leader to have received
110
+ before considering a request complete.
111
+
112
+ acks=0, the producer will not wait for any acknowledgment from the server at all.
113
+ acks=1, This will mean the leader will write the record to its local log but
114
+ will respond without awaiting full acknowledgement from all followers.
115
+ acks=all, This means the leader will wait for the full set of in-sync replicas to acknowledge the record.
116
+
117
+ [id="plugins-{type}s-{plugin}-batch_size"]
118
+ ===== `batch_size`
119
+
120
+ * Value type is <<number,number>>
121
+ * Default value is `16384`
122
+
123
+ The producer will attempt to batch records together into fewer requests whenever multiple
124
+ records are being sent to the same partition. This helps performance on both the client
125
+ and the server. This configuration controls the default batch size in bytes.
126
+
127
+ [id="plugins-{type}s-{plugin}-bootstrap_servers"]
128
+ ===== `bootstrap_servers`
129
+
130
+ * Value type is <<string,string>>
131
+ * Default value is `"localhost:9092"`
132
+
133
+ This is for bootstrapping and the producer will only use it for getting metadata (topics,
134
+ partitions and replicas). The socket connections for sending the actual data will be
135
+ established based on the broker information returned in the metadata. The format is
136
+ `host1:port1,host2:port2`, and the list can be a subset of brokers or a VIP pointing to a
137
+ subset of brokers.
138
+
139
+ [id="plugins-{type}s-{plugin}-buffer_memory"]
140
+ ===== `buffer_memory`
141
+
142
+ * Value type is <<number,number>>
143
+ * Default value is `33554432`
144
+
145
+ The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
146
+
147
+ [id="plugins-{type}s-{plugin}-client_id"]
148
+ ===== `client_id`
149
+
150
+ * Value type is <<string,string>>
151
+ * There is no default value for this setting.
152
+
153
+ The id string to pass to the server when making requests.
154
+ The purpose of this is to be able to track the source of requests beyond just
155
+ ip/port by allowing a logical application name to be included with the request
156
+
157
+ [id="plugins-{type}s-{plugin}-compression_type"]
158
+ ===== `compression_type`
159
+
160
+ * Value can be any of: `none`, `gzip`, `snappy`, `lz4`
161
+ * Default value is `"none"`
162
+
163
+ The compression type for all data generated by the producer.
164
+ The default is none (i.e. no compression). Valid values are none, gzip, or snappy.
165
+
166
+ [id="plugins-{type}s-{plugin}-jaas_path"]
167
+ ===== `jaas_path`
168
+
169
+ * Value type is <<path,path>>
170
+ * There is no default value for this setting.
171
+
172
+ The Java Authentication and Authorization Service (JAAS) API supplies user authentication and authorization
173
+ services for Kafka. This setting provides the path to the JAAS file. Sample JAAS file for Kafka client:
174
+ [source,java]
175
+ ----------------------------------
176
+ KafkaClient {
177
+ com.sun.security.auth.module.Krb5LoginModule required
178
+ useTicketCache=true
179
+ renewTicket=true
180
+ serviceName="kafka";
181
+ };
182
+ ----------------------------------
183
+
184
+ Please note that specifying `jaas_path` and `kerberos_config` in the config file will add these
185
+ to the global JVM system properties. This means if you have multiple Kafka inputs, all of them would be sharing the same
186
+ `jaas_path` and `kerberos_config`. If this is not desirable, you would have to run separate instances of Logstash on
187
+ different JVM instances.
188
+
189
+ [id="plugins-{type}s-{plugin}-kerberos_config"]
190
+ ===== `kerberos_config`
191
+
192
+ * Value type is <<path,path>>
193
+ * There is no default value for this setting.
194
+
195
+ Optional path to kerberos config file. This is krb5.conf style as detailed in https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html
196
+
197
+ [id="plugins-{type}s-{plugin}-key_serializer"]
198
+ ===== `key_serializer`
199
+
200
+ * Value type is <<string,string>>
201
+ * Default value is `"org.apache.kafka.common.serialization.StringSerializer"`
202
+
203
+ Serializer class for the key of the message
204
+
205
+ [id="plugins-{type}s-{plugin}-linger_ms"]
206
+ ===== `linger_ms`
207
+
208
+ * Value type is <<number,number>>
209
+ * Default value is `0`
210
+
211
+ The producer groups together any records that arrive in between request
212
+ transmissions into a single batched request. Normally this occurs only under
213
+ load when records arrive faster than they can be sent out. However in some circumstances
214
+ the client may want to reduce the number of requests even under moderate load.
215
+ This setting accomplishes this by adding a small amount of artificial delay—that is,
216
+ rather than immediately sending out a record the producer will wait for up to the given delay
217
+ to allow other records to be sent so that the sends can be batched together.
218
+
219
+ [id="plugins-{type}s-{plugin}-max_request_size"]
220
+ ===== `max_request_size`
221
+
222
+ * Value type is <<number,number>>
223
+ * Default value is `1048576`
224
+
225
+ The maximum size of a request
226
+
227
+ [id="plugins-{type}s-{plugin}-message_key"]
228
+ ===== `message_key`
229
+
230
+ * Value type is <<string,string>>
231
+ * There is no default value for this setting.
232
+
233
+ The key for the message
234
+
235
+ [id="plugins-{type}s-{plugin}-metadata_fetch_timeout_ms"]
236
+ ===== `metadata_fetch_timeout_ms`
237
+
238
+ * Value type is <<number,number>>
239
+ * Default value is `60000`
240
+
241
+ the timeout setting for initial metadata request to fetch topic metadata.
242
+
243
+ [id="plugins-{type}s-{plugin}-metadata_max_age_ms"]
244
+ ===== `metadata_max_age_ms`
245
+
246
+ * Value type is <<number,number>>
247
+ * Default value is `300000`
248
+
249
+ the max time in milliseconds before a metadata refresh is forced.
250
+
251
+ [id="plugins-{type}s-{plugin}-receive_buffer_bytes"]
252
+ ===== `receive_buffer_bytes`
253
+
254
+ * Value type is <<number,number>>
255
+ * Default value is `32768`
256
+
257
+ The size of the TCP receive buffer to use when reading data
258
+
259
+ [id="plugins-{type}s-{plugin}-reconnect_backoff_ms"]
260
+ ===== `reconnect_backoff_ms`
261
+
262
+ * Value type is <<number,number>>
263
+ * Default value is `10`
264
+
265
+ The amount of time to wait before attempting to reconnect to a given host when a connection fails.
266
+
267
+ [id="plugins-{type}s-{plugin}-request_timeout_ms"]
268
+ ===== `request_timeout_ms`
269
+
270
+ * Value type is <<string,string>>
271
+ * There is no default value for this setting.
272
+
273
+ The configuration controls the maximum amount of time the client will wait
274
+ for the response of a request. If the response is not received before the timeout
275
+ elapses the client will resend the request if necessary or fail the request if
276
+ retries are exhausted.
277
+
278
+ [id="plugins-{type}s-{plugin}-retries"]
279
+ ===== `retries`
280
+
281
+ * Value type is <<number,number>>
282
+ * There is no default value for this setting.
283
+
284
+ The default retry behavior is to retry until successful. To prevent data loss,
285
+ the use of this setting is discouraged.
286
+
287
+ If you choose to set `retries`, a value greater than zero will cause the
288
+ client to only retry a fixed number of times. This will result in data loss
289
+ if a transport fault exists for longer than your retry count (network outage,
290
+ Kafka down, etc).
291
+
292
+ A value less than zero is a configuration error.
293
+
294
+ [id="plugins-{type}s-{plugin}-retry_backoff_ms"]
295
+ ===== `retry_backoff_ms`
296
+
297
+ * Value type is <<number,number>>
298
+ * Default value is `100`
299
+
300
+ The amount of time to wait before attempting to retry a failed produce request to a given topic partition.
301
+
302
+ [id="plugins-{type}s-{plugin}-sasl_jaas_config"]
303
+ ===== `sasl_jaas_config`
304
+
305
+ * Value type is <<string,string>>
306
+ * There is no default value for this setting.
307
+
308
+ JAAS configuration setting local to this plugin instance, as opposed to settings using config file configured using `jaas_path`, which are shared across the JVM. This allows each plugin instance to have its own configuration.
309
+
310
+ If both `sasl_jaas_config` and `jaas_path` configurations are set, the setting here takes precedence.
311
+
312
+ Example (setting for Azure Event Hub):
313
+ [source,ruby]
314
+ output {
315
+ kafka {
316
+ sasl_jaas_config => "org.apache.kafka.common.security.plain.PlainLoginModule required username='auser' password='apassword';"
317
+ }
318
+ }
319
+
320
+ [id="plugins-{type}s-{plugin}-sasl_kerberos_service_name"]
321
+ ===== `sasl_kerberos_service_name`
322
+
323
+ * Value type is <<string,string>>
324
+ * There is no default value for this setting.
325
+
326
+ The Kerberos principal name that Kafka broker runs as.
327
+ This can be defined either in Kafka's JAAS config or in Kafka's config.
328
+
329
+ [id="plugins-{type}s-{plugin}-sasl_mechanism"]
330
+ ===== `sasl_mechanism`
331
+
332
+ * Value type is <<string,string>>
333
+ * Default value is `"GSSAPI"`
334
+
335
+ http://kafka.apache.org/documentation.html#security_sasl[SASL mechanism] used for client connections.
336
+ This may be any mechanism for which a security provider is available.
337
+ GSSAPI is the default mechanism.
338
+
339
+ [id="plugins-{type}s-{plugin}-security_protocol"]
340
+ ===== `security_protocol`
341
+
342
+ * Value can be any of: `PLAINTEXT`, `SSL`, `SASL_PLAINTEXT`, `SASL_SSL`
343
+ * Default value is `"PLAINTEXT"`
344
+
345
+ Security protocol to use, which can be either of PLAINTEXT,SSL,SASL_PLAINTEXT,SASL_SSL
346
+
347
+ [id="plugins-{type}s-{plugin}-send_buffer_bytes"]
348
+ ===== `send_buffer_bytes`
349
+
350
+ * Value type is <<number,number>>
351
+ * Default value is `131072`
352
+
353
+ The size of the TCP send buffer to use when sending data.
354
+
355
+ [id="plugins-{type}s-{plugin}-ssl_endpoint_identification_algorithm"]
356
+ ===== `ssl_endpoint_identification_algorithm`
357
+
358
+ * Value type is <<string,string>>
359
+ * Default value is `"https"`
360
+
361
+ The endpoint identification algorithm, defaults to `"https"`. Set to empty string `""` to disable
362
+
363
+ [id="plugins-{type}s-{plugin}-ssl_key_password"]
364
+ ===== `ssl_key_password`
365
+
366
+ * Value type is <<password,password>>
367
+ * There is no default value for this setting.
368
+
369
+ The password of the private key in the key store file.
370
+
371
+ [id="plugins-{type}s-{plugin}-ssl_keystore_location"]
372
+ ===== `ssl_keystore_location`
373
+
374
+ * Value type is <<path,path>>
375
+ * There is no default value for this setting.
376
+
377
+ If client authentication is required, this setting stores the keystore path.
378
+
379
+ [id="plugins-{type}s-{plugin}-ssl_keystore_password"]
380
+ ===== `ssl_keystore_password`
381
+
382
+ * Value type is <<password,password>>
383
+ * There is no default value for this setting.
384
+
385
+ If client authentication is required, this setting stores the keystore password
386
+
387
+ [id="plugins-{type}s-{plugin}-ssl_keystore_type"]
388
+ ===== `ssl_keystore_type`
389
+
390
+ * Value type is <<string,string>>
391
+ * There is no default value for this setting.
392
+
393
+ The keystore type.
394
+
395
+ [id="plugins-{type}s-{plugin}-ssl_truststore_location"]
396
+ ===== `ssl_truststore_location`
397
+
398
+ * Value type is <<path,path>>
399
+ * There is no default value for this setting.
400
+
401
+ The JKS truststore path to validate the Kafka broker's certificate.
402
+
403
+ [id="plugins-{type}s-{plugin}-ssl_truststore_password"]
404
+ ===== `ssl_truststore_password`
405
+
406
+ * Value type is <<password,password>>
407
+ * There is no default value for this setting.
408
+
409
+ The truststore password
410
+
411
+ [id="plugins-{type}s-{plugin}-ssl_truststore_type"]
412
+ ===== `ssl_truststore_type`
413
+
414
+ * Value type is <<string,string>>
415
+ * There is no default value for this setting.
416
+
417
+ The truststore type.
418
+
419
+ [id="plugins-{type}s-{plugin}-topic_id"]
420
+ ===== `topic_id`
421
+
422
+ * This is a required setting.
423
+ * Value type is <<string,string>>
424
+ * There is no default value for this setting.
425
+
426
+ The topic to produce messages to
427
+
428
+ [id="plugins-{type}s-{plugin}-value_serializer"]
429
+ ===== `value_serializer`
430
+
431
+ * Value type is <<string,string>>
432
+ * Default value is `"org.apache.kafka.common.serialization.StringSerializer"`
433
+
434
+ Serializer class for the value of the message
435
+
436
+
437
+
438
+ [id="plugins-{type}s-{plugin}-common-options"]
439
+ include::{include_path}/{type}.asciidoc[]
440
+
441
+ :default_codec!:
@@ -0,0 +1,8 @@
1
+ # AUTOGENERATED BY THE GRADLE SCRIPT. DO NOT EDIT.
2
+
3
+ require 'jar_dependencies'
4
+ require_jar('org.apache.kafka', 'kafka-clients', '2.3.0')
5
+ require_jar('com.github.luben', 'zstd-jni', '1.4.2-1')
6
+ require_jar('org.slf4j', 'slf4j-api', '1.7.26')
7
+ require_jar('org.lz4', 'lz4-java', '1.6.0')
8
+ require_jar('org.xerial.snappy', 'snappy-java', '1.1.7.3')
@@ -0,0 +1,362 @@
1
+ require 'logstash/namespace'
2
+ require 'logstash/inputs/base'
3
+ require 'stud/interval'
4
+ require 'java'
5
+ require 'logstash-integration-kafka_jars.rb'
6
+
7
+ # This input will read events from a Kafka topic. It uses the 0.10 version of
8
+ # the consumer API provided by Kafka to read messages from the broker.
9
+ #
10
+ # Here's a compatibility matrix that shows the Kafka client versions that are compatible with each combination
11
+ # of Logstash and the Kafka input plugin:
12
+ #
13
+ # [options="header"]
14
+ # |==========================================================
15
+ # |Kafka Client Version |Logstash Version |Plugin Version |Why?
16
+ # |0.8 |2.0.0 - 2.x.x |<3.0.0 |Legacy, 0.8 is still popular
17
+ # |0.9 |2.0.0 - 2.3.x | 3.x.x |Works with the old Ruby Event API (`event['product']['price'] = 10`)
18
+ # |0.9 |2.4.x - 5.x.x | 4.x.x |Works with the new getter/setter APIs (`event.set('[product][price]', 10)`)
19
+ # |0.10.0.x |2.4.x - 5.x.x | 5.x.x |Not compatible with the <= 0.9 broker
20
+ # |0.10.1.x |2.4.x - 5.x.x | 6.x.x |
21
+ # |==========================================================
22
+ #
23
+ # NOTE: We recommended that you use matching Kafka client and broker versions. During upgrades, you should
24
+ # upgrade brokers before clients because brokers target backwards compatibility. For example, the 0.9 broker
25
+ # is compatible with both the 0.8 consumer and 0.9 consumer APIs, but not the other way around.
26
+ #
27
+ # This input supports connecting to Kafka over:
28
+ #
29
+ # * SSL (requires plugin version 3.0.0 or later)
30
+ # * Kerberos SASL (requires plugin version 5.1.0 or later)
31
+ #
32
+ # By default security is disabled but can be turned on as needed.
33
+ #
34
+ # The Logstash Kafka consumer handles group management and uses the default offset management
35
+ # strategy using Kafka topics.
36
+ #
37
+ # Logstash instances by default form a single logical group to subscribe to Kafka topics
38
+ # Each Logstash Kafka consumer can run multiple threads to increase read throughput. Alternatively,
39
+ # you could run multiple Logstash instances with the same `group_id` to spread the load across
40
+ # physical machines. Messages in a topic will be distributed to all Logstash instances with
41
+ # the same `group_id`.
42
+ #
43
+ # Ideally you should have as many threads as the number of partitions for a perfect balance --
44
+ # more threads than partitions means that some threads will be idle
45
+ #
46
+ # For more information see http://kafka.apache.org/documentation.html#theconsumer
47
+ #
48
+ # Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs
49
+ #
50
+ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
51
+ config_name 'kafka'
52
+
53
+ default :codec, 'plain'
54
+
55
+ # The frequency in milliseconds that the consumer offsets are committed to Kafka.
56
+ config :auto_commit_interval_ms, :validate => :string, :default => "5000"
57
+ # What to do when there is no initial offset in Kafka or if an offset is out of range:
58
+ #
59
+ # * earliest: automatically reset the offset to the earliest offset
60
+ # * latest: automatically reset the offset to the latest offset
61
+ # * none: throw exception to the consumer if no previous offset is found for the consumer's group
62
+ # * anything else: throw exception to the consumer.
63
+ config :auto_offset_reset, :validate => :string
64
+ # A list of URLs of Kafka instances to use for establishing the initial connection to the cluster.
65
+ # This list should be in the form of `host1:port1,host2:port2` These urls are just used
66
+ # for the initial connection to discover the full cluster membership (which may change dynamically)
67
+ # so this list need not contain the full set of servers (you may want more than one, though, in
68
+ # case a server is down).
69
+ config :bootstrap_servers, :validate => :string, :default => "localhost:9092"
70
+ # Automatically check the CRC32 of the records consumed. This ensures no on-the-wire or on-disk
71
+ # corruption to the messages occurred. This check adds some overhead, so it may be
72
+ # disabled in cases seeking extreme performance.
73
+ config :check_crcs, :validate => :string
74
+ # The id string to pass to the server when making requests. The purpose of this
75
+ # is to be able to track the source of requests beyond just ip/port by allowing
76
+ # a logical application name to be included.
77
+ config :client_id, :validate => :string, :default => "logstash"
78
+ # Close idle connections after the number of milliseconds specified by this config.
79
+ config :connections_max_idle_ms, :validate => :string
80
+ # Ideally you should have as many threads as the number of partitions for a perfect
81
+ # balance — more threads than partitions means that some threads will be idle
82
+ config :consumer_threads, :validate => :number, :default => 1
83
+ # If true, periodically commit to Kafka the offsets of messages already returned by the consumer.
84
+ # This committed offset will be used when the process fails as the position from
85
+ # which the consumption will begin.
86
+ config :enable_auto_commit, :validate => :string, :default => "true"
87
+ # Whether records from internal topics (such as offsets) should be exposed to the consumer.
88
+ # If set to true the only way to receive records from an internal topic is subscribing to it.
89
+ config :exclude_internal_topics, :validate => :string
90
+ # The maximum amount of data the server should return for a fetch request. This is not an
91
+ # absolute maximum, if the first message in the first non-empty partition of the fetch is larger
92
+ # than this value, the message will still be returned to ensure that the consumer can make progress.
93
+ config :fetch_max_bytes, :validate => :string
94
+ # The maximum amount of time the server will block before answering the fetch request if
95
+ # there isn't sufficient data to immediately satisfy `fetch_min_bytes`. This
96
+ # should be less than or equal to the timeout used in `poll_timeout_ms`
97
+ config :fetch_max_wait_ms, :validate => :string
98
+ # The minimum amount of data the server should return for a fetch request. If insufficient
99
+ # data is available the request will wait for that much data to accumulate
100
+ # before answering the request.
101
+ config :fetch_min_bytes, :validate => :string
102
+ # The identifier of the group this consumer belongs to. Consumer group is a single logical subscriber
103
+ # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
104
+ # Logstash instances with the same `group_id`
105
+ config :group_id, :validate => :string, :default => "logstash"
106
+ # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
107
+ # that the consumer's session stays active and to facilitate rebalancing when new
108
+ # consumers join or leave the group. The value must be set lower than
109
+ # `session.timeout.ms`, but typically should be set no higher than 1/3 of that value.
110
+ # It can be adjusted even lower to control the expected time for normal rebalances.
111
+ config :heartbeat_interval_ms, :validate => :string
112
+ # Java Class used to deserialize the record's key
113
+ config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
114
+ # The maximum delay between invocations of poll() when using consumer group management. This places
115
+ # an upper bound on the amount of time that the consumer can be idle before fetching more records.
116
+ # If poll() is not called before expiration of this timeout, then the consumer is considered failed and
117
+ # the group will rebalance in order to reassign the partitions to another member.
118
+ # The value of the configuration `request_timeout_ms` must always be larger than max_poll_interval_ms
119
+ config :max_poll_interval_ms, :validate => :string
120
+ # The maximum amount of data per-partition the server will return. The maximum total memory used for a
121
+ # request will be <code>#partitions * max.partition.fetch.bytes</code>. This size must be at least
122
+ # as large as the maximum message size the server allows or else it is possible for the producer to
123
+ # send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying
124
+ # to fetch a large message on a certain partition.
125
+ config :max_partition_fetch_bytes, :validate => :string
126
+ # The maximum number of records returned in a single call to poll().
127
+ config :max_poll_records, :validate => :string
128
+ # The period of time in milliseconds after which we force a refresh of metadata even if
129
+ # we haven't seen any partition leadership changes to proactively discover any new brokers or partitions
130
+ config :metadata_max_age_ms, :validate => :string
131
+ # The class name of the partition assignment strategy that the client will use to distribute
132
+ # partition ownership amongst consumer instances
133
+ config :partition_assignment_strategy, :validate => :string
134
+ # The size of the TCP receive buffer (SO_RCVBUF) to use when reading data.
135
+ config :receive_buffer_bytes, :validate => :string
136
+ # The amount of time to wait before attempting to reconnect to a given host.
137
+ # This avoids repeatedly connecting to a host in a tight loop.
138
+ # This backoff applies to all requests sent by the consumer to the broker.
139
+ config :reconnect_backoff_ms, :validate => :string
140
+ # The configuration controls the maximum amount of time the client will wait
141
+ # for the response of a request. If the response is not received before the timeout
142
+ # elapses the client will resend the request if necessary or fail the request if
143
+ # retries are exhausted.
144
+ config :request_timeout_ms, :validate => :string
145
+ # The amount of time to wait before attempting to retry a failed fetch request
146
+ # to a given topic partition. This avoids repeated fetching-and-failing in a tight loop.
147
+ config :retry_backoff_ms, :validate => :string
148
+ # The size of the TCP send buffer (SO_SNDBUF) to use when sending data
149
+ config :send_buffer_bytes, :validate => :string
150
+ # The timeout after which, if the `poll_timeout_ms` is not invoked, the consumer is marked dead
151
+ # and a rebalance operation is triggered for the group identified by `group_id`
152
+ config :session_timeout_ms, :validate => :string
153
+ # Java Class used to deserialize the record's value
154
+ config :value_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
155
+ # A list of topics to subscribe to, defaults to ["logstash"].
156
+ config :topics, :validate => :array, :default => ["logstash"]
157
+ # A topic regex pattern to subscribe to.
158
+ # The topics configuration will be ignored when using this configuration.
159
+ config :topics_pattern, :validate => :string
160
+ # Time kafka consumer will wait to receive new messages from topics
161
+ config :poll_timeout_ms, :validate => :number, :default => 100
162
+ # The truststore type.
163
+ config :ssl_truststore_type, :validate => :string
164
+ # The JKS truststore path to validate the Kafka broker's certificate.
165
+ config :ssl_truststore_location, :validate => :path
166
+ # The truststore password
167
+ config :ssl_truststore_password, :validate => :password
168
+ # The keystore type.
169
+ config :ssl_keystore_type, :validate => :string
170
+ # If client authentication is required, this setting stores the keystore path.
171
+ config :ssl_keystore_location, :validate => :path
172
+ # If client authentication is required, this setting stores the keystore password
173
+ config :ssl_keystore_password, :validate => :password
174
+ # The password of the private key in the key store file.
175
+ config :ssl_key_password, :validate => :password
176
+ # Algorithm to use when verifying host. Set to "" to disable
177
+ config :ssl_endpoint_identification_algorithm, :validate => :string, :default => 'https'
178
+ # Security protocol to use, which can be either of PLAINTEXT,SSL,SASL_PLAINTEXT,SASL_SSL
179
+ config :security_protocol, :validate => ["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"], :default => "PLAINTEXT"
180
+ # http://kafka.apache.org/documentation.html#security_sasl[SASL mechanism] used for client connections.
181
+ # This may be any mechanism for which a security provider is available.
182
+ # GSSAPI is the default mechanism.
183
+ config :sasl_mechanism, :validate => :string, :default => "GSSAPI"
184
+ # The Kerberos principal name that Kafka broker runs as.
185
+ # This can be defined either in Kafka's JAAS config or in Kafka's config.
186
+ config :sasl_kerberos_service_name, :validate => :string
187
+ # The Java Authentication and Authorization Service (JAAS) API supplies user authentication and authorization
188
+ # services for Kafka. This setting provides the path to the JAAS file. Sample JAAS file for Kafka client:
189
+ # [source,java]
190
+ # ----------------------------------
191
+ # KafkaClient {
192
+ # com.sun.security.auth.module.Krb5LoginModule required
193
+ # useTicketCache=true
194
+ # renewTicket=true
195
+ # serviceName="kafka";
196
+ # };
197
+ # ----------------------------------
198
+ #
199
+ # Please note that specifying `jaas_path` and `kerberos_config` in the config file will add these
200
+ # to the global JVM system properties. This means if you have multiple Kafka inputs, all of them would be sharing the same
201
+ # `jaas_path` and `kerberos_config`. If this is not desirable, you would have to run separate instances of Logstash on
202
+ # different JVM instances.
203
+ config :jaas_path, :validate => :path
204
+ # JAAS configuration settings. This allows JAAS config to be a part of the plugin configuration and allows for different JAAS configuration per each plugin config.
205
+ config :sasl_jaas_config, :validate => :string
206
+ # Optional path to kerberos config file. This is krb5.conf style as detailed in https://web.mit.edu/kerberos/krb5-1.12/doc/admin/conf_files/krb5_conf.html
207
+ config :kerberos_config, :validate => :path
208
+ # Option to add Kafka metadata like topic, message size to the event.
209
+ # This will add a field named `kafka` to the logstash event containing the following attributes:
210
+ # `topic`: The topic this message is associated with
211
+ # `consumer_group`: The consumer group used to read in this event
212
+ # `partition`: The partition this message is associated with
213
+ # `offset`: The offset from the partition this message is associated with
214
+ # `key`: A ByteBuffer containing the message key
215
+ # `timestamp`: The timestamp of this message
216
+ config :decorate_events, :validate => :boolean, :default => false
217
+
218
+
219
+ public
220
+ def register
221
+ @runner_threads = []
222
+ end # def register
223
+
224
+ public
225
+ def run(logstash_queue)
226
+ @runner_consumers = consumer_threads.times.map { |i| create_consumer("#{client_id}-#{i}") }
227
+ @runner_threads = @runner_consumers.map { |consumer| thread_runner(logstash_queue, consumer) }
228
+ @runner_threads.each { |t| t.join }
229
+ end # def run
230
+
231
+ public
232
+ def stop
233
+ # if we have consumers, wake them up to unblock our runner threads
234
+ @runner_consumers && @runner_consumers.each(&:wakeup)
235
+ end
236
+
237
+ public
238
+ def kafka_consumers
239
+ @runner_consumers
240
+ end
241
+
242
+ private
243
+ def thread_runner(logstash_queue, consumer)
244
+ Thread.new do
245
+ begin
246
+ unless @topics_pattern.nil?
247
+ nooplistener = org.apache.kafka.clients.consumer.internals.NoOpConsumerRebalanceListener.new
248
+ pattern = java.util.regex.Pattern.compile(@topics_pattern)
249
+ consumer.subscribe(pattern, nooplistener)
250
+ else
251
+ consumer.subscribe(topics);
252
+ end
253
+ codec_instance = @codec.clone
254
+ while !stop?
255
+ records = consumer.poll(poll_timeout_ms)
256
+ next unless records.count > 0
257
+ for record in records do
258
+ codec_instance.decode(record.value.to_s) do |event|
259
+ decorate(event)
260
+ if @decorate_events
261
+ event.set("[@metadata][kafka][topic]", record.topic)
262
+ event.set("[@metadata][kafka][consumer_group]", @group_id)
263
+ event.set("[@metadata][kafka][partition]", record.partition)
264
+ event.set("[@metadata][kafka][offset]", record.offset)
265
+ event.set("[@metadata][kafka][key]", record.key)
266
+ event.set("[@metadata][kafka][timestamp]", record.timestamp)
267
+ end
268
+ logstash_queue << event
269
+ end
270
+ end
271
+ # Manual offset commit
272
+ if @enable_auto_commit == "false"
273
+ consumer.commitSync
274
+ end
275
+ end
276
+ rescue org.apache.kafka.common.errors.WakeupException => e
277
+ raise e if !stop?
278
+ ensure
279
+ consumer.close
280
+ end
281
+ end
282
+ end
283
+
284
+ private
285
+ def create_consumer(client_id)
286
+ begin
287
+ props = java.util.Properties.new
288
+ kafka = org.apache.kafka.clients.consumer.ConsumerConfig
289
+
290
+ props.put(kafka::AUTO_COMMIT_INTERVAL_MS_CONFIG, auto_commit_interval_ms)
291
+ props.put(kafka::AUTO_OFFSET_RESET_CONFIG, auto_offset_reset) unless auto_offset_reset.nil?
292
+ props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers)
293
+ props.put(kafka::CHECK_CRCS_CONFIG, check_crcs) unless check_crcs.nil?
294
+ props.put(kafka::CLIENT_ID_CONFIG, client_id)
295
+ props.put(kafka::CONNECTIONS_MAX_IDLE_MS_CONFIG, connections_max_idle_ms) unless connections_max_idle_ms.nil?
296
+ props.put(kafka::ENABLE_AUTO_COMMIT_CONFIG, enable_auto_commit)
297
+ props.put(kafka::EXCLUDE_INTERNAL_TOPICS_CONFIG, exclude_internal_topics) unless exclude_internal_topics.nil?
298
+ props.put(kafka::FETCH_MAX_BYTES_CONFIG, fetch_max_bytes) unless fetch_max_bytes.nil?
299
+ props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms) unless fetch_max_wait_ms.nil?
300
+ props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes) unless fetch_min_bytes.nil?
301
+ props.put(kafka::GROUP_ID_CONFIG, group_id)
302
+ props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms) unless heartbeat_interval_ms.nil?
303
+ props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
304
+ props.put(kafka::MAX_PARTITION_FETCH_BYTES_CONFIG, max_partition_fetch_bytes) unless max_partition_fetch_bytes.nil?
305
+ props.put(kafka::MAX_POLL_RECORDS_CONFIG, max_poll_records) unless max_poll_records.nil?
306
+ props.put(kafka::MAX_POLL_INTERVAL_MS_CONFIG, max_poll_interval_ms) unless max_poll_interval_ms.nil?
307
+ props.put(kafka::METADATA_MAX_AGE_CONFIG, metadata_max_age_ms) unless metadata_max_age_ms.nil?
308
+ props.put(kafka::PARTITION_ASSIGNMENT_STRATEGY_CONFIG, partition_assignment_strategy) unless partition_assignment_strategy.nil?
309
+ props.put(kafka::RECEIVE_BUFFER_CONFIG, receive_buffer_bytes) unless receive_buffer_bytes.nil?
310
+ props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
311
+ props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
312
+ props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms) unless retry_backoff_ms.nil?
313
+ props.put(kafka::SEND_BUFFER_CONFIG, send_buffer_bytes) unless send_buffer_bytes.nil?
314
+ props.put(kafka::SESSION_TIMEOUT_MS_CONFIG, session_timeout_ms) unless session_timeout_ms.nil?
315
+ props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, value_deserializer_class)
316
+
317
+ props.put("security.protocol", security_protocol) unless security_protocol.nil?
318
+
319
+ if security_protocol == "SSL"
320
+ set_trustore_keystore_config(props)
321
+ elsif security_protocol == "SASL_PLAINTEXT"
322
+ set_sasl_config(props)
323
+ elsif security_protocol == "SASL_SSL"
324
+ set_trustore_keystore_config(props)
325
+ set_sasl_config(props)
326
+ end
327
+
328
+ org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
329
+ rescue => e
330
+ logger.error("Unable to create Kafka consumer from given configuration",
331
+ :kafka_error_message => e,
332
+ :cause => e.respond_to?(:getCause) ? e.getCause() : nil)
333
+ raise e
334
+ end
335
+ end
336
+
337
+ def set_trustore_keystore_config(props)
338
+ props.put("ssl.truststore.type", ssl_truststore_type) unless ssl_truststore_type.nil?
339
+ props.put("ssl.truststore.location", ssl_truststore_location) unless ssl_truststore_location.nil?
340
+ props.put("ssl.truststore.password", ssl_truststore_password.value) unless ssl_truststore_password.nil?
341
+
342
+ # Client auth stuff
343
+ props.put("ssl.keystore.type", ssl_keystore_type) unless ssl_keystore_type.nil?
344
+ props.put("ssl.key.password", ssl_key_password.value) unless ssl_key_password.nil?
345
+ props.put("ssl.keystore.location", ssl_keystore_location) unless ssl_keystore_location.nil?
346
+ props.put("ssl.keystore.password", ssl_keystore_password.value) unless ssl_keystore_password.nil?
347
+ props.put("ssl.endpoint.identification.algorithm", ssl_endpoint_identification_algorithm) unless ssl_endpoint_identification_algorithm.nil?
348
+ end
349
+
350
+ def set_sasl_config(props)
351
+ java.lang.System.setProperty("java.security.auth.login.config",jaas_path) unless jaas_path.nil?
352
+ java.lang.System.setProperty("java.security.krb5.conf",kerberos_config) unless kerberos_config.nil?
353
+
354
+ props.put("sasl.mechanism",sasl_mechanism)
355
+ if sasl_mechanism == "GSSAPI" && sasl_kerberos_service_name.nil?
356
+ raise LogStash::ConfigurationError, "sasl_kerberos_service_name must be specified when SASL mechanism is GSSAPI"
357
+ end
358
+
359
+ props.put("sasl.kerberos.service.name",sasl_kerberos_service_name) unless sasl_kerberos_service_name.nil?
360
+ props.put("sasl.jaas.config", sasl_jaas_config) unless sasl_jaas_config.nil?
361
+ end
362
+ end #class LogStash::Inputs::Kafka