logstash-integration-kafka 10.9.0-java → 11.3.2-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -2
  3. data/DEVELOPER.md +1 -1
  4. data/docs/index.asciidoc +1 -1
  5. data/docs/input-kafka.asciidoc +96 -8
  6. data/docs/output-kafka.asciidoc +20 -5
  7. data/lib/logstash/inputs/kafka.rb +42 -21
  8. data/lib/logstash/outputs/kafka.rb +7 -12
  9. data/lib/logstash/plugin_mixins/kafka/avro_schema_registry.rb +139 -0
  10. data/lib/logstash/plugin_mixins/kafka/common.rb +55 -0
  11. data/lib/logstash-integration-kafka_jars.rb +9 -14
  12. data/logstash-integration-kafka.gemspec +2 -2
  13. data/spec/integration/inputs/kafka_spec.rb +184 -20
  14. data/spec/integration/outputs/kafka_spec.rb +21 -1
  15. data/spec/unit/inputs/kafka_spec.rb +28 -5
  16. data/spec/unit/outputs/kafka_spec.rb +8 -0
  17. data/vendor/jar-dependencies/com/github/luben/zstd-jni/1.5.5-4/zstd-jni-1.5.5-4.jar +0 -0
  18. data/vendor/jar-dependencies/io/confluent/kafka-avro-serializer/7.4.0/kafka-avro-serializer-7.4.0.jar +0 -0
  19. data/vendor/jar-dependencies/io/confluent/kafka-schema-registry-client/7.4.0/kafka-schema-registry-client-7.4.0.jar +0 -0
  20. data/vendor/jar-dependencies/io/confluent/kafka-schema-serializer/7.4.0/kafka-schema-serializer-7.4.0.jar +0 -0
  21. data/vendor/jar-dependencies/org/apache/avro/avro/1.11.3/avro-1.11.3.jar +0 -0
  22. data/vendor/jar-dependencies/org/apache/kafka/kafka-clients/3.4.1/kafka-clients-3.4.1.jar +0 -0
  23. data/vendor/jar-dependencies/org/lz4/lz4-java/1.8.0/lz4-java-1.8.0.jar +0 -0
  24. data/vendor/jar-dependencies/org/slf4j/slf4j-api/{1.7.30/slf4j-api-1.7.30.jar → 1.7.36/slf4j-api-1.7.36.jar} +0 -0
  25. data/vendor/jar-dependencies/org/xerial/snappy/snappy-java/1.1.10.5/snappy-java-1.1.10.5.jar +0 -0
  26. metadata +16 -21
  27. data/lib/logstash/plugin_mixins/common.rb +0 -107
  28. data/lib/logstash/plugin_mixins/kafka_support.rb +0 -29
  29. data/vendor/jar-dependencies/com/github/luben/zstd-jni/1.4.4-7/zstd-jni-1.4.4-7.jar +0 -0
  30. data/vendor/jar-dependencies/io/confluent/common-config/5.5.1/common-config-5.5.1.jar +0 -0
  31. data/vendor/jar-dependencies/io/confluent/common-utils/5.5.1/common-utils-5.5.1.jar +0 -0
  32. data/vendor/jar-dependencies/io/confluent/kafka-avro-serializer/5.5.1/kafka-avro-serializer-5.5.1.jar +0 -0
  33. data/vendor/jar-dependencies/io/confluent/kafka-schema-registry-client/5.5.1/kafka-schema-registry-client-5.5.1.jar +0 -0
  34. data/vendor/jar-dependencies/io/confluent/kafka-schema-serializer/5.5.1/kafka-schema-serializer-5.5.1.jar +0 -0
  35. data/vendor/jar-dependencies/javax/ws/rs/javax.ws.rs-api/2.1.1/javax.ws.rs-api-2.1.1.jar +0 -0
  36. data/vendor/jar-dependencies/org/apache/avro/avro/1.9.2/avro-1.9.2.jar +0 -0
  37. data/vendor/jar-dependencies/org/apache/kafka/kafka-clients/2.5.1/kafka-clients-2.5.1.jar +0 -0
  38. data/vendor/jar-dependencies/org/apache/kafka/kafka_2.12/2.5.1/kafka_2.12-2.5.1.jar +0 -0
  39. data/vendor/jar-dependencies/org/glassfish/jersey/core/jersey-common/2.33/jersey-common-2.33.jar +0 -0
  40. data/vendor/jar-dependencies/org/lz4/lz4-java/1.7.1/lz4-java-1.7.1.jar +0 -0
  41. data/vendor/jar-dependencies/org/xerial/snappy/snappy-java/1.1.7.3/snappy-java-1.1.7.3.jar +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df9c89cdfcc2db6702409ec45ffb8d9f8f2b9274304889df1527e0697ccfcf95
4
- data.tar.gz: e907ad2e277d27c8cdbe98ebd203af86a3505a85574d64ace4402607c075a69e
3
+ metadata.gz: cd3536a455fdf7a64f5882e81c0648022d572660d4dc45841249374c1cd7e406
4
+ data.tar.gz: 26281f8584e29961625c8e385cfd599fd43444cbedd84d191b5542f603d00701
5
5
  SHA512:
6
- metadata.gz: bac93eb957af9028a6efc6e31a66c94818ae61333fa738daa6606abdd325dbea206fea2cd905d891f2b341a7bc983f8eaf5a5471015ac9548bc902f941b4a0d9
7
- data.tar.gz: 456739a2409ef5a42f007a23c8d0dbfceb3518e8e65b528c0f48266f2a219c2415a83a507fdab3ba028cbc5493d645c080ce191a0d39d7c1787557abded9a0e1
6
+ metadata.gz: 226f3c894edb182246d36bc283f7fc8d5d8471d9d411b98054a3770f7e414ca321ddbe401012c53ff5bbf4fd74da0340193153ebf7204a74e03802dc8c3df8ad
7
+ data.tar.gz: 12a5454b826df30697e4f505e96893687202b8cd17536599a5832a518653274218a278d0139c3150f7c48806e823251f91f3b583ee9b60f828104b5e052fea1b
data/CHANGELOG.md CHANGED
@@ -1,6 +1,44 @@
1
+ ## 11.3.2
2
+ - Fix: update Avro library [#150](https://api.github.com/repos/logstash-plugins/logstash-integration-kafka/pulls/150)
3
+
4
+ ## 11.3.1
5
+ - Fix: update snappy dependency [#148](https://github.com/logstash-plugins/logstash-integration-kafka/pull/148)
6
+
7
+ ## 11.3.0
8
+ - Bump kafka client to 3.4.1 [#145](https://github.com/logstash-plugins/logstash-integration-kafka/pull/145)
9
+
10
+ ## 11.2.1
11
+ - Fix nil exception to empty headers of record during event metadata assignment [#140](https://github.com/logstash-plugins/logstash-integration-kafka/pull/140)
12
+
13
+ ## 11.2.0
14
+ - Added TLS truststore and keystore settings specifically to access the schema registry [#137](https://github.com/logstash-plugins/logstash-integration-kafka/pull/137)
15
+
16
+ ## 11.1.0
17
+ - Added config `group_instance_id` to use the Kafka's consumer static membership feature [#135](https://github.com/logstash-plugins/logstash-integration-kafka/pull/135)
18
+
19
+ ## 11.0.0
20
+ - Changed Kafka client to 3.3.1, requires Logstash >= 8.3.0.
21
+ - Deprecated `default` value for setting `client_dns_lookup` forcing to `use_all_dns_ips` when explicitly used [#130](https://github.com/logstash-plugins/logstash-integration-kafka/pull/130)
22
+ - Changed the consumer's poll from using the one that blocks on metadata retrieval to the one that doesn't [#136](https://github.com/logstash-plugins/logstash-integration-kafka/pull/133)
23
+
24
+ ## 10.12.1
25
+ - Fix: update Avro library on 10.x [#149](https://api.github.com/repos/logstash-plugins/logstash-integration-kafka/pulls/149)
26
+
27
+ ## 10.12.0
28
+ - bump kafka client to 2.8.1 [#115](https://github.com/logstash-plugins/logstash-integration-kafka/pull/115)
29
+
30
+ ## 10.11.0
31
+ - Feat: added connections_max_idle_ms setting for output [#118](https://github.com/logstash-plugins/logstash-integration-kafka/pull/118)
32
+ - Refactor: mixins to follow shared mixin module naming
33
+
34
+ ## 10.10.1
35
+ - Update CHANGELOG.md [#114](https://api.github.com/repos/logstash-plugins/logstash-integration-kafka/pulls/114)
36
+
37
+ ## 10.10.0
38
+ - Added config setting to enable 'zstd' compression in the Kafka output [#112](https://github.com/logstash-plugins/logstash-integration-kafka/pull/112)
39
+
1
40
  ## 10.9.0
2
41
  - Refactor: leverage codec when using schema registry [#106](https://github.com/logstash-plugins/logstash-integration-kafka/pull/106)
3
-
4
42
  Previously using `schema_registry_url` parsed the payload as JSON even if `codec => 'plain'` was set, this is no longer the case.
5
43
 
6
44
  ## 10.8.2
@@ -87,7 +125,6 @@
87
125
  - Fix links in changelog pointing to stand-alone plugin changelogs.
88
126
  - Refactor: scope java_import to plugin class
89
127
 
90
-
91
128
  ## 10.0.0
92
129
  - Initial release of the Kafka Integration Plugin, which combines
93
130
  previously-separate Kafka plugins and shared dependencies into a single
data/DEVELOPER.md CHANGED
@@ -62,7 +62,7 @@ See http://kafka.apache.org/documentation.html#producerconfigs for details about
62
62
  kafka {
63
63
  topic_id => ... # string (required), The topic to produce the messages to
64
64
  broker_list => ... # string (optional), default: "localhost:9092", This is for bootstrapping and the producer will only use it for getting metadata
65
- compression_codec => ... # string (optional), one of ["none", "gzip", "snappy"], default: "none"
65
+ compression_codec => ... # string (optional), one of ["none", "gzip", "snappy", "lz4", "zstd"], default: "none"
66
66
  compressed_topics => ... # string (optional), default: "", This parameter allows you to set whether compression should be turned on for particular
67
67
  request_required_acks => ... # number (optional), one of [-1, 0, 1], default: 0, This value controls when a produce request is considered completed
68
68
  serializer_class => ... # string, (optional) default: "kafka.serializer.StringEncoder", The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
data/docs/index.asciidoc CHANGED
@@ -1,7 +1,7 @@
1
1
  :plugin: kafka
2
2
  :type: integration
3
3
  :no_codec:
4
- :kafka_client: 2.5.1
4
+ :kafka_client: 3.4.1
5
5
 
6
6
  ///////////////////////////////////////////
7
7
  START - GENERATED VARIABLES, DO NOT EDIT!
@@ -2,8 +2,8 @@
2
2
  :plugin: kafka
3
3
  :type: input
4
4
  :default_codec: plain
5
- :kafka_client: 2.5
6
- :kafka_client_doc: 25
5
+ :kafka_client: 3.4
6
+ :kafka_client_doc: 34
7
7
 
8
8
  ///////////////////////////////////////////
9
9
  START - GENERATED VARIABLES, DO NOT EDIT!
@@ -113,6 +113,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
113
113
  | <<plugins-{type}s-{plugin}-fetch_max_wait_ms>> |<<number,number>>|No
114
114
  | <<plugins-{type}s-{plugin}-fetch_min_bytes>> |<<number,number>>|No
115
115
  | <<plugins-{type}s-{plugin}-group_id>> |<<string,string>>|No
116
+ | <<plugins-{type}s-{plugin}-group_instance_id>> |<<string,string>>|No
116
117
  | <<plugins-{type}s-{plugin}-heartbeat_interval_ms>> |<<number,number>>|No
117
118
  | <<plugins-{type}s-{plugin}-isolation_level>> |<<string,string>>|No
118
119
  | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
@@ -134,6 +135,12 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
134
135
  | <<plugins-{type}s-{plugin}-schema_registry_key>> |<<string,string>>|No
135
136
  | <<plugins-{type}s-{plugin}-schema_registry_proxy>> |<<uri,uri>>|No
136
137
  | <<plugins-{type}s-{plugin}-schema_registry_secret>> |<<string,string>>|No
138
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_keystore_location>> |a valid filesystem path|No
139
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_keystore_password>> |<<password,password>>|No
140
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_keystore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
141
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_truststore_location>> |a valid filesystem path|No
142
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_truststore_password>> |<<password,password>>|No
143
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_truststore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
137
144
  | <<plugins-{type}s-{plugin}-schema_registry_url>> |<<uri,uri>>|No
138
145
  | <<plugins-{type}s-{plugin}-schema_registry_validation>> |<<string,string>>|No
139
146
  | <<plugins-{type}s-{plugin}-security_protocol>> |<<string,string>>, one of `["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"]`|No
@@ -143,10 +150,10 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
143
150
  | <<plugins-{type}s-{plugin}-ssl_key_password>> |<<password,password>>|No
144
151
  | <<plugins-{type}s-{plugin}-ssl_keystore_location>> |a valid filesystem path|No
145
152
  | <<plugins-{type}s-{plugin}-ssl_keystore_password>> |<<password,password>>|No
146
- | <<plugins-{type}s-{plugin}-ssl_keystore_type>> |<<string,string>>|No
153
+ | <<plugins-{type}s-{plugin}-ssl_keystore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
147
154
  | <<plugins-{type}s-{plugin}-ssl_truststore_location>> |a valid filesystem path|No
148
155
  | <<plugins-{type}s-{plugin}-ssl_truststore_password>> |<<password,password>>|No
149
- | <<plugins-{type}s-{plugin}-ssl_truststore_type>> |<<string,string>>|No
156
+ | <<plugins-{type}s-{plugin}-ssl_truststore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
150
157
  | <<plugins-{type}s-{plugin}-topics>> |<<array,array>>|No
151
158
  | <<plugins-{type}s-{plugin}-topics_pattern>> |<<string,string>>|No
152
159
  | <<plugins-{type}s-{plugin}-value_deserializer_class>> |<<string,string>>|No
@@ -211,6 +218,12 @@ IP addresses for a hostname, they will all be attempted to connect to before fai
211
218
  connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
212
219
  resolved and expanded into a list of canonical names.
213
220
 
221
+ [NOTE]
222
+ ====
223
+ Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed.
224
+ If explicitly configured it fallbacks to `use_all_dns_ips`.
225
+ ====
226
+
214
227
  [id="plugins-{type}s-{plugin}-client_id"]
215
228
  ===== `client_id`
216
229
 
@@ -338,6 +351,28 @@ NOTE: In cases when multiple inputs are being used in a single pipeline, reading
338
351
  it's essential to set a different `group_id => ...` for each input. Setting a unique `client_id => ...`
339
352
  is also recommended.
340
353
 
354
+ [id="plugins-{type}s-{plugin}-group_instance_id"]
355
+ ===== `group_instance_id`
356
+
357
+ * Value type is <<string,string>>
358
+ * There is no default value for this setting.
359
+
360
+ The static membership identifier for this Logstash Kafka consumer. Static membership feature was introduced in
361
+ https://cwiki.apache.org/confluence/display/KAFKA/KIP-345%3A+Introduce+static+membership+protocol+to+reduce+consumer+rebalances[KIP-345],
362
+ available under Kafka property `group.instance.id`.
363
+ Its purpose is to avoid rebalances in situations in which a lot of data
364
+ has to be forwarded after a consumer goes offline.
365
+ This feature mitigates cases where the service state is heavy and the rebalance of one topic partition from instance
366
+ A to B would cause a huge amount of data to be transferred.
367
+ A client that goes offline/online frequently can avoid frequent and heavy rebalances by using this option.
368
+
369
+ NOTE: The `group_instance_id` setting must be unique across all the clients belonging to the same <<plugins-{type}s-{plugin}-group_id>>.
370
+ Otherwise, another client connecting with same `group.instance.id` value would cause the oldest instance to be disconnected.
371
+ You can set this value to use information such as a hostname, an IP, or anything that uniquely identifies the client application.
372
+
373
+ NOTE: In cases when multiple threads are configured and `consumer_threads` is greater than one, a suffix is appended to
374
+ the `group_instance_id` to avoid collisions.
375
+
341
376
  [id="plugins-{type}s-{plugin}-heartbeat_interval_ms"]
342
377
  ===== `heartbeat_interval_ms`
343
378
 
@@ -569,6 +604,54 @@ Set the address of a forward HTTP proxy. An empty string is treated as if proxy
569
604
 
570
605
  Set the password for basic authorization to access remote Schema Registry.
571
606
 
607
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_keystore_location"]
608
+ ===== `schema_registry_ssl_keystore_location`
609
+
610
+ * Value type is <<path,path>>
611
+ * There is no default value for this setting.
612
+
613
+ If schema registry client authentication is required, this setting stores the keystore path.
614
+
615
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_keystore_password"]
616
+ ===== `schema_registry_ssl_keystore_password`
617
+
618
+ * Value type is <<password,password>>
619
+ * There is no default value for this setting.
620
+
621
+ If schema registry authentication is required, this setting stores the keystore password.
622
+
623
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_keystore_type"]
624
+ ===== `schema_registry_ssl_keystore_type`
625
+
626
+ * Value type is <<string,string>>
627
+ * There is no default value for this setting.
628
+
629
+ The format of the keystore file. It must be either `jks` or `PKCS12`.
630
+
631
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_truststore_location"]
632
+ ===== `schema_registry_ssl_truststore_location`
633
+
634
+ * Value type is <<path,path>>
635
+ * There is no default value for this setting.
636
+
637
+ The truststore path to validate the schema registry's certificate.
638
+
639
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_truststore_password"]
640
+ ===== `schema_registry_ssl_truststore_password`
641
+
642
+ * Value type is <<password,password>>
643
+ * There is no default value for this setting.
644
+
645
+ The schema registry truststore password.
646
+
647
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_truststore_type"]
648
+ ===== `schema_registry_ssl_truststore_type`
649
+
650
+ * Value type is <<string,string>>
651
+ * There is no default value for this setting.
652
+
653
+ The format of the schema registry's truststore file. It must be either `jks` or `PKCS12`.
654
+
572
655
  [id="plugins-{type}s-{plugin}-schema_registry_url"]
573
656
  ===== `schema_registry_url`
574
657
 
@@ -659,7 +742,7 @@ If client authentication is required, this setting stores the keystore password
659
742
  * Value type is <<string,string>>
660
743
  * There is no default value for this setting.
661
744
 
662
- The keystore type.
745
+ The format of the keystore file. It must be either `jks` or `PKCS12`.
663
746
 
664
747
  [id="plugins-{type}s-{plugin}-ssl_truststore_location"]
665
748
  ===== `ssl_truststore_location`
@@ -683,7 +766,7 @@ The truststore password.
683
766
  * Value type is <<string,string>>
684
767
  * There is no default value for this setting.
685
768
 
686
- The truststore type.
769
+ The format of the truststore file. It must be either `jks` or `PKCS12`.
687
770
 
688
771
  [id="plugins-{type}s-{plugin}-topics"]
689
772
  ===== `topics`
@@ -699,8 +782,13 @@ A list of topics to subscribe to, defaults to ["logstash"].
699
782
  * Value type is <<string,string>>
700
783
  * There is no default value for this setting.
701
784
 
702
- A topic regex pattern to subscribe to.
703
- The topics configuration will be ignored when using this configuration.
785
+ A topic regular expression pattern to subscribe to.
786
+
787
+ Filtering by a regular expression is done by retrieving the full list of topic names from the broker and applying the pattern locally. When used with brokers with a lot of topics this operation could be very slow, especially if there are a lot of consumers.
788
+
789
+ NOTE: When the broker has some topics configured with ACL rules and they miss the DESCRIBE permission, then the subscription
790
+ happens but on the broker side it is logged that the subscription of some topics was denied to the configured user.
791
+
704
792
 
705
793
  [id="plugins-{type}s-{plugin}-value_deserializer_class"]
706
794
  ===== `value_deserializer_class`
@@ -2,8 +2,8 @@
2
2
  :plugin: kafka
3
3
  :type: output
4
4
  :default_codec: plain
5
- :kafka_client: 2.5
6
- :kafka_client_doc: 25
5
+ :kafka_client: 3.4
6
+ :kafka_client_doc: 34
7
7
 
8
8
  ///////////////////////////////////////////
9
9
  START - GENERATED VARIABLES, DO NOT EDIT!
@@ -84,7 +84,8 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
84
84
  | <<plugins-{type}s-{plugin}-buffer_memory>> |<<number,number>>|No
85
85
  | <<plugins-{type}s-{plugin}-client_dns_lookup>> |<<string,string>>|No
86
86
  | <<plugins-{type}s-{plugin}-client_id>> |<<string,string>>|No
87
- | <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4"]`|No
87
+ | <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4", "zstd"]`|No
88
+ | <<plugins-{type}s-{plugin}-connections_max_idle_ms>> |<<number,number>>|No
88
89
  | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
89
90
  | <<plugins-{type}s-{plugin}-kerberos_config>> |a valid filesystem path|No
90
91
  | <<plugins-{type}s-{plugin}-key_serializer>> |<<string,string>>|No
@@ -180,6 +181,12 @@ all IP addresses returned for a hostname before failing the connection.
180
181
  If set to `resolve_canonical_bootstrap_servers_only`, each entry will be
181
182
  resolved and expanded into a list of canonical names.
182
183
 
184
+ [NOTE]
185
+ ====
186
+ Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed.
187
+ If explicitly configured it fallbacks to `use_all_dns_ips`.
188
+ ====
189
+
183
190
  [id="plugins-{type}s-{plugin}-client_id"]
184
191
  ===== `client_id`
185
192
 
@@ -193,11 +200,19 @@ ip/port by allowing a logical application name to be included with the request
193
200
  [id="plugins-{type}s-{plugin}-compression_type"]
194
201
  ===== `compression_type`
195
202
 
196
- * Value can be any of: `none`, `gzip`, `snappy`, `lz4`
203
+ * Value can be any of: `none`, `gzip`, `snappy`, `lz4`, `zstd`
197
204
  * Default value is `"none"`
198
205
 
199
206
  The compression type for all data generated by the producer.
200
- The default is none (i.e. no compression). Valid values are none, gzip, snappy, or lz4.
207
+ The default is none (meaning no compression). Valid values are none, gzip, snappy, lz4, or zstd.
208
+
209
+ [id="plugins-{type}s-{plugin}-connections_max_idle_ms"]
210
+ ===== `connections_max_idle_ms`
211
+
212
+ * Value type is <<number,number>>
213
+ * Default value is `540000` milliseconds (9 minutes).
214
+
215
+ Close idle connections after the number of milliseconds specified by this config.
201
216
 
202
217
  [id="plugins-{type}s-{plugin}-jaas_path"]
203
218
  ===== `jaas_path`
@@ -2,12 +2,11 @@ require 'logstash/namespace'
2
2
  require 'logstash/inputs/base'
3
3
  require 'stud/interval'
4
4
  require 'java'
5
- require 'logstash-integration-kafka_jars.rb'
6
- require 'logstash/plugin_mixins/kafka_support'
7
- require 'manticore'
8
5
  require "json"
9
6
  require "logstash/json"
10
- require_relative '../plugin_mixins/common'
7
+ require 'logstash-integration-kafka_jars.rb'
8
+ require 'logstash/plugin_mixins/kafka/common'
9
+ require 'logstash/plugin_mixins/kafka/avro_schema_registry'
11
10
  require 'logstash/plugin_mixins/deprecation_logger_support'
12
11
 
13
12
  # This input will read events from a Kafka topic. It uses the 0.10 version of
@@ -57,8 +56,8 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
57
56
 
58
57
  DEFAULT_DESERIALIZER_CLASS = "org.apache.kafka.common.serialization.StringDeserializer"
59
58
 
60
- include LogStash::PluginMixins::KafkaSupport
61
- include ::LogStash::PluginMixins::KafkaAvroSchemaRegistry
59
+ include LogStash::PluginMixins::Kafka::Common
60
+ include LogStash::PluginMixins::Kafka::AvroSchemaRegistry
62
61
  include LogStash::PluginMixins::DeprecationLoggerSupport
63
62
 
64
63
  config_name 'kafka'
@@ -93,13 +92,12 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
93
92
  # IP addresses for a hostname, they will all be attempted to connect to before failing the
94
93
  # connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
95
94
  # resolved and expanded into a list of canonical names.
96
- config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "default"
95
+ # Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed. If explicitly configured it fallbacks to `use_all_dns_ips`.
96
+ config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "use_all_dns_ips"
97
97
  # The id string to pass to the server when making requests. The purpose of this
98
98
  # is to be able to track the source of requests beyond just ip/port by allowing
99
99
  # a logical application name to be included.
100
100
  config :client_id, :validate => :string, :default => "logstash"
101
- # Close idle connections after the number of milliseconds specified by this config.
102
- config :connections_max_idle_ms, :validate => :number, :default => 540_000 # (9m) Kafka default
103
101
  # Ideally you should have as many threads as the number of partitions for a perfect
104
102
  # balance — more threads than partitions means that some threads will be idle
105
103
  config :consumer_threads, :validate => :number, :default => 1
@@ -126,6 +124,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
126
124
  # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
127
125
  # Logstash instances with the same `group_id`
128
126
  config :group_id, :validate => :string, :default => "logstash"
127
+ # Set a static group instance id used in static membership feature to avoid rebalancing when a
128
+ # consumer goes offline. If set and `consumer_threads` is greater than 1 then for each
129
+ # consumer crated by each thread an artificial suffix is appended to the user provided `group_instance_id`
130
+ # to avoid clashing.
131
+ config :group_instance_id, :validate => :string
129
132
  # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
130
133
  # that the consumer's session stays active and to facilitate rebalancing when new
131
134
  # consumers join or leave the group. The value must be set lower than
@@ -138,7 +141,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
138
141
  # been aborted. Non-transactional messages will be returned unconditionally in either mode.
139
142
  config :isolation_level, :validate => ["read_uncommitted", "read_committed"], :default => "read_uncommitted" # Kafka default
140
143
  # Java Class used to deserialize the record's key
141
- config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
144
+ config :key_deserializer_class, :validate => :string, :default => DEFAULT_DESERIALIZER_CLASS
142
145
  # The maximum delay between invocations of poll() when using consumer group management. This places
143
146
  # an upper bound on the amount of time that the consumer can be idle before fetching more records.
144
147
  # If poll() is not called before expiration of this timeout, then the consumer is considered failed and
@@ -152,9 +155,6 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
152
155
  config :max_partition_fetch_bytes, :validate => :number, :default => 1_048_576 # (1MB) Kafka default
153
156
  # The maximum number of records returned in a single call to poll().
154
157
  config :max_poll_records, :validate => :number, :default => 500 # Kafka default
155
- # The period of time in milliseconds after which we force a refresh of metadata even if
156
- # we haven't seen any partition leadership changes to proactively discover any new brokers or partitions
157
- config :metadata_max_age_ms, :validate => :number, :default => 300_000 # (5m) Kafka default
158
158
  # The name of the partition assignment strategy that the client uses to distribute
159
159
  # partition ownership amongst consumer instances, supported options are `range`,
160
160
  # `round_robin`, `sticky` and `cooperative_sticky`
@@ -167,10 +167,6 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
167
167
  # This avoids repeatedly connecting to a host in a tight loop.
168
168
  # This backoff applies to all connection attempts by the client to a broker.
169
169
  config :reconnect_backoff_ms, :validate => :number, :default => 50 # Kafka default
170
- # The configuration controls the maximum amount of time the client will wait for the response of a request.
171
- # If the response is not received before the timeout elapses the client will resend the request if necessary
172
- # or fail the request if retries are exhausted.
173
- config :request_timeout_ms, :validate => :number, :default => 40_000 # Kafka default
174
170
  # The amount of time to wait before attempting to retry a failed fetch request
175
171
  # to a given topic partition. This avoids repeated fetching-and-failing in a tight loop.
176
172
  config :retry_backoff_ms, :validate => :number, :default => 100 # Kafka default
@@ -267,6 +263,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
267
263
  def register
268
264
  @runner_threads = []
269
265
  @metadata_mode = extract_metadata_level(@decorate_events)
266
+ reassign_dns_lookup
270
267
  @pattern ||= java.util.regex.Pattern.compile(@topics_pattern) unless @topics_pattern.nil?
271
268
  check_schema_registry_parameters
272
269
  end
@@ -295,7 +292,10 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
295
292
 
296
293
  public
297
294
  def run(logstash_queue)
298
- @runner_consumers = consumer_threads.times.map { |i| subscribe(create_consumer("#{client_id}-#{i}")) }
295
+ @runner_consumers = consumer_threads.times.map do |i|
296
+ thread_group_instance_id = consumer_threads > 1 && group_instance_id ? "#{group_instance_id}-#{i}" : group_instance_id
297
+ subscribe(create_consumer("#{client_id}-#{i}", thread_group_instance_id))
298
+ end
299
299
  @runner_threads = @runner_consumers.map.with_index { |consumer, i| thread_runner(logstash_queue, consumer,
300
300
  "kafka-input-worker-#{client_id}-#{i}") }
301
301
  @runner_threads.each(&:start)
@@ -339,10 +339,13 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
339
339
  def do_poll(consumer)
340
340
  records = []
341
341
  begin
342
- records = consumer.poll(poll_timeout_ms)
342
+ records = consumer.poll(java.time.Duration.ofMillis(poll_timeout_ms))
343
343
  rescue org.apache.kafka.common.errors.WakeupException => e
344
344
  logger.debug("Wake up from poll", :kafka_error_message => e)
345
345
  raise e unless stop?
346
+ rescue org.apache.kafka.common.errors.FencedInstanceIdException => e
347
+ logger.error("Another consumer with same group.instance.id has connected", :original_error_message => e.message)
348
+ raise e unless stop?
346
349
  rescue => e
347
350
  logger.error("Unable to poll Kafka consumer",
348
351
  :kafka_error_message => e,
@@ -370,7 +373,9 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
370
373
  event.set("[@metadata][kafka][timestamp]", record.timestamp)
371
374
  end
372
375
  if @metadata_mode.include?(:headers)
373
- record.headers.each do |header|
376
+ record.headers
377
+ .select{|h| header_with_value(h) }
378
+ .each do |header|
374
379
  s = String.from_java_bytes(header.value)
375
380
  s.force_encoding(Encoding::UTF_8)
376
381
  if s.valid_encoding?
@@ -397,7 +402,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
397
402
  end
398
403
 
399
404
  private
400
- def create_consumer(client_id)
405
+ def create_consumer(client_id, group_instance_id)
401
406
  begin
402
407
  props = java.util.Properties.new
403
408
  kafka = org.apache.kafka.clients.consumer.ConsumerConfig
@@ -415,6 +420,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
415
420
  props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms.to_s) unless fetch_max_wait_ms.nil?
416
421
  props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes.to_s) unless fetch_min_bytes.nil?
417
422
  props.put(kafka::GROUP_ID_CONFIG, group_id)
423
+ props.put(kafka::GROUP_INSTANCE_ID_CONFIG, group_instance_id) unless group_instance_id.nil?
418
424
  props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms.to_s) unless heartbeat_interval_ms.nil?
419
425
  props.put(kafka::ISOLATION_LEVEL_CONFIG, isolation_level)
420
426
  props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
@@ -456,6 +462,17 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
456
462
  set_trustore_keystore_config(props)
457
463
  set_sasl_config(props)
458
464
  end
465
+ if schema_registry_ssl_truststore_location
466
+ props.put('schema.registry.ssl.truststore.location', schema_registry_ssl_truststore_location)
467
+ props.put('schema.registry.ssl.truststore.password', schema_registry_ssl_truststore_password.value)
468
+ props.put('schema.registry.ssl.truststore.type', schema_registry_ssl_truststore_type)
469
+ end
470
+
471
+ if schema_registry_ssl_keystore_location
472
+ props.put('schema.registry.ssl.keystore.location', schema_registry_ssl_keystore_location)
473
+ props.put('schema.registry.ssl.keystore.password', schema_registry_ssl_keystore_password.value)
474
+ props.put('schema.registry.ssl.keystore.type', schema_registry_ssl_keystore_type)
475
+ end
459
476
 
460
477
  org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
461
478
  rescue => e
@@ -484,4 +501,8 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
484
501
  end
485
502
  end
486
503
 
504
+ def header_with_value(header)
505
+ !header.nil? && !header.value.nil? && !header.key.nil?
506
+ end
507
+
487
508
  end #class LogStash::Inputs::Kafka
@@ -2,7 +2,7 @@ require 'logstash/namespace'
2
2
  require 'logstash/outputs/base'
3
3
  require 'java'
4
4
  require 'logstash-integration-kafka_jars.rb'
5
- require 'logstash/plugin_mixins/kafka_support'
5
+ require 'logstash/plugin_mixins/kafka/common'
6
6
 
7
7
  # Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
8
8
  # the broker.
@@ -51,7 +51,7 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
51
51
 
52
52
  java_import org.apache.kafka.clients.producer.ProducerRecord
53
53
 
54
- include LogStash::PluginMixins::KafkaSupport
54
+ include LogStash::PluginMixins::Kafka::Common
55
55
 
56
56
  declare_threadsafe!
57
57
 
@@ -80,13 +80,14 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
80
80
  # The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
81
81
  config :buffer_memory, :validate => :number, :default => 33_554_432 # (32M) Kafka default
82
82
  # The compression type for all data generated by the producer.
83
- # The default is none (i.e. no compression). Valid values are none, gzip, or snappy.
84
- config :compression_type, :validate => ["none", "gzip", "snappy", "lz4"], :default => "none"
83
+ # The default is none (i.e. no compression). Valid values are none, gzip, snappy, lz4 or zstd.
84
+ config :compression_type, :validate => ["none", "gzip", "snappy", "lz4", "zstd"], :default => "none"
85
85
  # How DNS lookups should be done. If set to `use_all_dns_ips`, when the lookup returns multiple
86
86
  # IP addresses for a hostname, they will all be attempted to connect to before failing the
87
87
  # connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
88
88
  # resolved and expanded into a list of canonical names.
89
- config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "default"
89
+ # Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed. If explicitly configured it fallbacks to `use_all_dns_ips`.
90
+ config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "use_all_dns_ips"
90
91
  # The id string to pass to the server when making requests.
91
92
  # The purpose of this is to be able to track the source of requests beyond just
92
93
  # ip/port by allowing a logical application name to be included with the request
@@ -107,19 +108,12 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
107
108
  config :message_key, :validate => :string
108
109
  # the timeout setting for initial metadata request to fetch topic metadata.
109
110
  config :metadata_fetch_timeout_ms, :validate => :number, :default => 60_000
110
- # the max time in milliseconds before a metadata refresh is forced.
111
- config :metadata_max_age_ms, :validate => :number, :default => 300_000 # (5m) Kafka default
112
111
  # Partitioner to use - can be `default`, `uniform_sticky`, `round_robin` or a fully qualified class name of a custom partitioner.
113
112
  config :partitioner, :validate => :string
114
113
  # The size of the TCP receive buffer to use when reading data
115
114
  config :receive_buffer_bytes, :validate => :number, :default => 32_768 # (32KB) Kafka default
116
115
  # The amount of time to wait before attempting to reconnect to a given host when a connection fails.
117
116
  config :reconnect_backoff_ms, :validate => :number, :default => 50 # Kafka default
118
- # The configuration controls the maximum amount of time the client will wait
119
- # for the response of a request. If the response is not received before the timeout
120
- # elapses the client will resend the request if necessary or fail the request if
121
- # retries are exhausted.
122
- config :request_timeout_ms, :validate => :number, :default => 40_000 # (40s) Kafka default
123
117
  # The default retry behavior is to retry until successful. To prevent data loss,
124
118
  # the use of this setting is discouraged.
125
119
  #
@@ -197,6 +191,7 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
197
191
  logger.warn("Kafka output is configured with finite retry. This instructs Logstash to LOSE DATA after a set number of send attempts fails. If you do not want to lose data if Kafka is down, then you must remove the retry setting.", :retries => @retries)
198
192
  end
199
193
 
194
+ reassign_dns_lookup
200
195
 
201
196
  @producer = create_producer
202
197
  if value_serializer == 'org.apache.kafka.common.serialization.StringSerializer'