logstash-integration-kafka 10.9.0-java → 11.3.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -2
  3. data/DEVELOPER.md +1 -1
  4. data/docs/index.asciidoc +1 -1
  5. data/docs/input-kafka.asciidoc +96 -8
  6. data/docs/output-kafka.asciidoc +20 -5
  7. data/lib/logstash/inputs/kafka.rb +42 -21
  8. data/lib/logstash/outputs/kafka.rb +7 -12
  9. data/lib/logstash/plugin_mixins/kafka/avro_schema_registry.rb +139 -0
  10. data/lib/logstash/plugin_mixins/kafka/common.rb +55 -0
  11. data/lib/logstash-integration-kafka_jars.rb +9 -14
  12. data/logstash-integration-kafka.gemspec +2 -2
  13. data/spec/integration/inputs/kafka_spec.rb +184 -20
  14. data/spec/integration/outputs/kafka_spec.rb +21 -1
  15. data/spec/unit/inputs/kafka_spec.rb +28 -5
  16. data/spec/unit/outputs/kafka_spec.rb +8 -0
  17. data/vendor/jar-dependencies/com/github/luben/zstd-jni/1.5.5-4/zstd-jni-1.5.5-4.jar +0 -0
  18. data/vendor/jar-dependencies/io/confluent/kafka-avro-serializer/7.4.0/kafka-avro-serializer-7.4.0.jar +0 -0
  19. data/vendor/jar-dependencies/io/confluent/kafka-schema-registry-client/7.4.0/kafka-schema-registry-client-7.4.0.jar +0 -0
  20. data/vendor/jar-dependencies/io/confluent/kafka-schema-serializer/7.4.0/kafka-schema-serializer-7.4.0.jar +0 -0
  21. data/vendor/jar-dependencies/org/apache/avro/avro/1.11.3/avro-1.11.3.jar +0 -0
  22. data/vendor/jar-dependencies/org/apache/kafka/kafka-clients/3.4.1/kafka-clients-3.4.1.jar +0 -0
  23. data/vendor/jar-dependencies/org/lz4/lz4-java/1.8.0/lz4-java-1.8.0.jar +0 -0
  24. data/vendor/jar-dependencies/org/slf4j/slf4j-api/{1.7.30/slf4j-api-1.7.30.jar → 1.7.36/slf4j-api-1.7.36.jar} +0 -0
  25. data/vendor/jar-dependencies/org/xerial/snappy/snappy-java/1.1.10.5/snappy-java-1.1.10.5.jar +0 -0
  26. metadata +16 -21
  27. data/lib/logstash/plugin_mixins/common.rb +0 -107
  28. data/lib/logstash/plugin_mixins/kafka_support.rb +0 -29
  29. data/vendor/jar-dependencies/com/github/luben/zstd-jni/1.4.4-7/zstd-jni-1.4.4-7.jar +0 -0
  30. data/vendor/jar-dependencies/io/confluent/common-config/5.5.1/common-config-5.5.1.jar +0 -0
  31. data/vendor/jar-dependencies/io/confluent/common-utils/5.5.1/common-utils-5.5.1.jar +0 -0
  32. data/vendor/jar-dependencies/io/confluent/kafka-avro-serializer/5.5.1/kafka-avro-serializer-5.5.1.jar +0 -0
  33. data/vendor/jar-dependencies/io/confluent/kafka-schema-registry-client/5.5.1/kafka-schema-registry-client-5.5.1.jar +0 -0
  34. data/vendor/jar-dependencies/io/confluent/kafka-schema-serializer/5.5.1/kafka-schema-serializer-5.5.1.jar +0 -0
  35. data/vendor/jar-dependencies/javax/ws/rs/javax.ws.rs-api/2.1.1/javax.ws.rs-api-2.1.1.jar +0 -0
  36. data/vendor/jar-dependencies/org/apache/avro/avro/1.9.2/avro-1.9.2.jar +0 -0
  37. data/vendor/jar-dependencies/org/apache/kafka/kafka-clients/2.5.1/kafka-clients-2.5.1.jar +0 -0
  38. data/vendor/jar-dependencies/org/apache/kafka/kafka_2.12/2.5.1/kafka_2.12-2.5.1.jar +0 -0
  39. data/vendor/jar-dependencies/org/glassfish/jersey/core/jersey-common/2.33/jersey-common-2.33.jar +0 -0
  40. data/vendor/jar-dependencies/org/lz4/lz4-java/1.7.1/lz4-java-1.7.1.jar +0 -0
  41. data/vendor/jar-dependencies/org/xerial/snappy/snappy-java/1.1.7.3/snappy-java-1.1.7.3.jar +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: df9c89cdfcc2db6702409ec45ffb8d9f8f2b9274304889df1527e0697ccfcf95
4
- data.tar.gz: e907ad2e277d27c8cdbe98ebd203af86a3505a85574d64ace4402607c075a69e
3
+ metadata.gz: cd3536a455fdf7a64f5882e81c0648022d572660d4dc45841249374c1cd7e406
4
+ data.tar.gz: 26281f8584e29961625c8e385cfd599fd43444cbedd84d191b5542f603d00701
5
5
  SHA512:
6
- metadata.gz: bac93eb957af9028a6efc6e31a66c94818ae61333fa738daa6606abdd325dbea206fea2cd905d891f2b341a7bc983f8eaf5a5471015ac9548bc902f941b4a0d9
7
- data.tar.gz: 456739a2409ef5a42f007a23c8d0dbfceb3518e8e65b528c0f48266f2a219c2415a83a507fdab3ba028cbc5493d645c080ce191a0d39d7c1787557abded9a0e1
6
+ metadata.gz: 226f3c894edb182246d36bc283f7fc8d5d8471d9d411b98054a3770f7e414ca321ddbe401012c53ff5bbf4fd74da0340193153ebf7204a74e03802dc8c3df8ad
7
+ data.tar.gz: 12a5454b826df30697e4f505e96893687202b8cd17536599a5832a518653274218a278d0139c3150f7c48806e823251f91f3b583ee9b60f828104b5e052fea1b
data/CHANGELOG.md CHANGED
@@ -1,6 +1,44 @@
1
+ ## 11.3.2
2
+ - Fix: update Avro library [#150](https://api.github.com/repos/logstash-plugins/logstash-integration-kafka/pulls/150)
3
+
4
+ ## 11.3.1
5
+ - Fix: update snappy dependency [#148](https://github.com/logstash-plugins/logstash-integration-kafka/pull/148)
6
+
7
+ ## 11.3.0
8
+ - Bump kafka client to 3.4.1 [#145](https://github.com/logstash-plugins/logstash-integration-kafka/pull/145)
9
+
10
+ ## 11.2.1
11
+ - Fix nil exception to empty headers of record during event metadata assignment [#140](https://github.com/logstash-plugins/logstash-integration-kafka/pull/140)
12
+
13
+ ## 11.2.0
14
+ - Added TLS truststore and keystore settings specifically to access the schema registry [#137](https://github.com/logstash-plugins/logstash-integration-kafka/pull/137)
15
+
16
+ ## 11.1.0
17
+ - Added config `group_instance_id` to use the Kafka's consumer static membership feature [#135](https://github.com/logstash-plugins/logstash-integration-kafka/pull/135)
18
+
19
+ ## 11.0.0
20
+ - Changed Kafka client to 3.3.1, requires Logstash >= 8.3.0.
21
+ - Deprecated `default` value for setting `client_dns_lookup` forcing to `use_all_dns_ips` when explicitly used [#130](https://github.com/logstash-plugins/logstash-integration-kafka/pull/130)
22
+ - Changed the consumer's poll from using the one that blocks on metadata retrieval to the one that doesn't [#136](https://github.com/logstash-plugins/logstash-integration-kafka/pull/133)
23
+
24
+ ## 10.12.1
25
+ - Fix: update Avro library on 10.x [#149](https://api.github.com/repos/logstash-plugins/logstash-integration-kafka/pulls/149)
26
+
27
+ ## 10.12.0
28
+ - bump kafka client to 2.8.1 [#115](https://github.com/logstash-plugins/logstash-integration-kafka/pull/115)
29
+
30
+ ## 10.11.0
31
+ - Feat: added connections_max_idle_ms setting for output [#118](https://github.com/logstash-plugins/logstash-integration-kafka/pull/118)
32
+ - Refactor: mixins to follow shared mixin module naming
33
+
34
+ ## 10.10.1
35
+ - Update CHANGELOG.md [#114](https://api.github.com/repos/logstash-plugins/logstash-integration-kafka/pulls/114)
36
+
37
+ ## 10.10.0
38
+ - Added config setting to enable 'zstd' compression in the Kafka output [#112](https://github.com/logstash-plugins/logstash-integration-kafka/pull/112)
39
+
1
40
  ## 10.9.0
2
41
  - Refactor: leverage codec when using schema registry [#106](https://github.com/logstash-plugins/logstash-integration-kafka/pull/106)
3
-
4
42
  Previously using `schema_registry_url` parsed the payload as JSON even if `codec => 'plain'` was set, this is no longer the case.
5
43
 
6
44
  ## 10.8.2
@@ -87,7 +125,6 @@
87
125
  - Fix links in changelog pointing to stand-alone plugin changelogs.
88
126
  - Refactor: scope java_import to plugin class
89
127
 
90
-
91
128
  ## 10.0.0
92
129
  - Initial release of the Kafka Integration Plugin, which combines
93
130
  previously-separate Kafka plugins and shared dependencies into a single
data/DEVELOPER.md CHANGED
@@ -62,7 +62,7 @@ See http://kafka.apache.org/documentation.html#producerconfigs for details about
62
62
  kafka {
63
63
  topic_id => ... # string (required), The topic to produce the messages to
64
64
  broker_list => ... # string (optional), default: "localhost:9092", This is for bootstrapping and the producer will only use it for getting metadata
65
- compression_codec => ... # string (optional), one of ["none", "gzip", "snappy"], default: "none"
65
+ compression_codec => ... # string (optional), one of ["none", "gzip", "snappy", "lz4", "zstd"], default: "none"
66
66
  compressed_topics => ... # string (optional), default: "", This parameter allows you to set whether compression should be turned on for particular
67
67
  request_required_acks => ... # number (optional), one of [-1, 0, 1], default: 0, This value controls when a produce request is considered completed
68
68
  serializer_class => ... # string, (optional) default: "kafka.serializer.StringEncoder", The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
data/docs/index.asciidoc CHANGED
@@ -1,7 +1,7 @@
1
1
  :plugin: kafka
2
2
  :type: integration
3
3
  :no_codec:
4
- :kafka_client: 2.5.1
4
+ :kafka_client: 3.4.1
5
5
 
6
6
  ///////////////////////////////////////////
7
7
  START - GENERATED VARIABLES, DO NOT EDIT!
@@ -2,8 +2,8 @@
2
2
  :plugin: kafka
3
3
  :type: input
4
4
  :default_codec: plain
5
- :kafka_client: 2.5
6
- :kafka_client_doc: 25
5
+ :kafka_client: 3.4
6
+ :kafka_client_doc: 34
7
7
 
8
8
  ///////////////////////////////////////////
9
9
  START - GENERATED VARIABLES, DO NOT EDIT!
@@ -113,6 +113,7 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
113
113
  | <<plugins-{type}s-{plugin}-fetch_max_wait_ms>> |<<number,number>>|No
114
114
  | <<plugins-{type}s-{plugin}-fetch_min_bytes>> |<<number,number>>|No
115
115
  | <<plugins-{type}s-{plugin}-group_id>> |<<string,string>>|No
116
+ | <<plugins-{type}s-{plugin}-group_instance_id>> |<<string,string>>|No
116
117
  | <<plugins-{type}s-{plugin}-heartbeat_interval_ms>> |<<number,number>>|No
117
118
  | <<plugins-{type}s-{plugin}-isolation_level>> |<<string,string>>|No
118
119
  | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
@@ -134,6 +135,12 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
134
135
  | <<plugins-{type}s-{plugin}-schema_registry_key>> |<<string,string>>|No
135
136
  | <<plugins-{type}s-{plugin}-schema_registry_proxy>> |<<uri,uri>>|No
136
137
  | <<plugins-{type}s-{plugin}-schema_registry_secret>> |<<string,string>>|No
138
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_keystore_location>> |a valid filesystem path|No
139
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_keystore_password>> |<<password,password>>|No
140
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_keystore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
141
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_truststore_location>> |a valid filesystem path|No
142
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_truststore_password>> |<<password,password>>|No
143
+ | <<plugins-{type}s-{plugin}-schema_registry_ssl_truststore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
137
144
  | <<plugins-{type}s-{plugin}-schema_registry_url>> |<<uri,uri>>|No
138
145
  | <<plugins-{type}s-{plugin}-schema_registry_validation>> |<<string,string>>|No
139
146
  | <<plugins-{type}s-{plugin}-security_protocol>> |<<string,string>>, one of `["PLAINTEXT", "SSL", "SASL_PLAINTEXT", "SASL_SSL"]`|No
@@ -143,10 +150,10 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
143
150
  | <<plugins-{type}s-{plugin}-ssl_key_password>> |<<password,password>>|No
144
151
  | <<plugins-{type}s-{plugin}-ssl_keystore_location>> |a valid filesystem path|No
145
152
  | <<plugins-{type}s-{plugin}-ssl_keystore_password>> |<<password,password>>|No
146
- | <<plugins-{type}s-{plugin}-ssl_keystore_type>> |<<string,string>>|No
153
+ | <<plugins-{type}s-{plugin}-ssl_keystore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
147
154
  | <<plugins-{type}s-{plugin}-ssl_truststore_location>> |a valid filesystem path|No
148
155
  | <<plugins-{type}s-{plugin}-ssl_truststore_password>> |<<password,password>>|No
149
- | <<plugins-{type}s-{plugin}-ssl_truststore_type>> |<<string,string>>|No
156
+ | <<plugins-{type}s-{plugin}-ssl_truststore_type>> |<<string,string>>, one of `["jks", "PKCS12"]`|No
150
157
  | <<plugins-{type}s-{plugin}-topics>> |<<array,array>>|No
151
158
  | <<plugins-{type}s-{plugin}-topics_pattern>> |<<string,string>>|No
152
159
  | <<plugins-{type}s-{plugin}-value_deserializer_class>> |<<string,string>>|No
@@ -211,6 +218,12 @@ IP addresses for a hostname, they will all be attempted to connect to before fai
211
218
  connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
212
219
  resolved and expanded into a list of canonical names.
213
220
 
221
+ [NOTE]
222
+ ====
223
+ Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed.
224
+ If explicitly configured it fallbacks to `use_all_dns_ips`.
225
+ ====
226
+
214
227
  [id="plugins-{type}s-{plugin}-client_id"]
215
228
  ===== `client_id`
216
229
 
@@ -338,6 +351,28 @@ NOTE: In cases when multiple inputs are being used in a single pipeline, reading
338
351
  it's essential to set a different `group_id => ...` for each input. Setting a unique `client_id => ...`
339
352
  is also recommended.
340
353
 
354
+ [id="plugins-{type}s-{plugin}-group_instance_id"]
355
+ ===== `group_instance_id`
356
+
357
+ * Value type is <<string,string>>
358
+ * There is no default value for this setting.
359
+
360
+ The static membership identifier for this Logstash Kafka consumer. Static membership feature was introduced in
361
+ https://cwiki.apache.org/confluence/display/KAFKA/KIP-345%3A+Introduce+static+membership+protocol+to+reduce+consumer+rebalances[KIP-345],
362
+ available under Kafka property `group.instance.id`.
363
+ Its purpose is to avoid rebalances in situations in which a lot of data
364
+ has to be forwarded after a consumer goes offline.
365
+ This feature mitigates cases where the service state is heavy and the rebalance of one topic partition from instance
366
+ A to B would cause a huge amount of data to be transferred.
367
+ A client that goes offline/online frequently can avoid frequent and heavy rebalances by using this option.
368
+
369
+ NOTE: The `group_instance_id` setting must be unique across all the clients belonging to the same <<plugins-{type}s-{plugin}-group_id>>.
370
+ Otherwise, another client connecting with same `group.instance.id` value would cause the oldest instance to be disconnected.
371
+ You can set this value to use information such as a hostname, an IP, or anything that uniquely identifies the client application.
372
+
373
+ NOTE: In cases when multiple threads are configured and `consumer_threads` is greater than one, a suffix is appended to
374
+ the `group_instance_id` to avoid collisions.
375
+
341
376
  [id="plugins-{type}s-{plugin}-heartbeat_interval_ms"]
342
377
  ===== `heartbeat_interval_ms`
343
378
 
@@ -569,6 +604,54 @@ Set the address of a forward HTTP proxy. An empty string is treated as if proxy
569
604
 
570
605
  Set the password for basic authorization to access remote Schema Registry.
571
606
 
607
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_keystore_location"]
608
+ ===== `schema_registry_ssl_keystore_location`
609
+
610
+ * Value type is <<path,path>>
611
+ * There is no default value for this setting.
612
+
613
+ If schema registry client authentication is required, this setting stores the keystore path.
614
+
615
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_keystore_password"]
616
+ ===== `schema_registry_ssl_keystore_password`
617
+
618
+ * Value type is <<password,password>>
619
+ * There is no default value for this setting.
620
+
621
+ If schema registry authentication is required, this setting stores the keystore password.
622
+
623
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_keystore_type"]
624
+ ===== `schema_registry_ssl_keystore_type`
625
+
626
+ * Value type is <<string,string>>
627
+ * There is no default value for this setting.
628
+
629
+ The format of the keystore file. It must be either `jks` or `PKCS12`.
630
+
631
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_truststore_location"]
632
+ ===== `schema_registry_ssl_truststore_location`
633
+
634
+ * Value type is <<path,path>>
635
+ * There is no default value for this setting.
636
+
637
+ The truststore path to validate the schema registry's certificate.
638
+
639
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_truststore_password"]
640
+ ===== `schema_registry_ssl_truststore_password`
641
+
642
+ * Value type is <<password,password>>
643
+ * There is no default value for this setting.
644
+
645
+ The schema registry truststore password.
646
+
647
+ [id="plugins-{type}s-{plugin}-schema_registry_ssl_truststore_type"]
648
+ ===== `schema_registry_ssl_truststore_type`
649
+
650
+ * Value type is <<string,string>>
651
+ * There is no default value for this setting.
652
+
653
+ The format of the schema registry's truststore file. It must be either `jks` or `PKCS12`.
654
+
572
655
  [id="plugins-{type}s-{plugin}-schema_registry_url"]
573
656
  ===== `schema_registry_url`
574
657
 
@@ -659,7 +742,7 @@ If client authentication is required, this setting stores the keystore password
659
742
  * Value type is <<string,string>>
660
743
  * There is no default value for this setting.
661
744
 
662
- The keystore type.
745
+ The format of the keystore file. It must be either `jks` or `PKCS12`.
663
746
 
664
747
  [id="plugins-{type}s-{plugin}-ssl_truststore_location"]
665
748
  ===== `ssl_truststore_location`
@@ -683,7 +766,7 @@ The truststore password.
683
766
  * Value type is <<string,string>>
684
767
  * There is no default value for this setting.
685
768
 
686
- The truststore type.
769
+ The format of the truststore file. It must be either `jks` or `PKCS12`.
687
770
 
688
771
  [id="plugins-{type}s-{plugin}-topics"]
689
772
  ===== `topics`
@@ -699,8 +782,13 @@ A list of topics to subscribe to, defaults to ["logstash"].
699
782
  * Value type is <<string,string>>
700
783
  * There is no default value for this setting.
701
784
 
702
- A topic regex pattern to subscribe to.
703
- The topics configuration will be ignored when using this configuration.
785
+ A topic regular expression pattern to subscribe to.
786
+
787
+ Filtering by a regular expression is done by retrieving the full list of topic names from the broker and applying the pattern locally. When used with brokers with a lot of topics this operation could be very slow, especially if there are a lot of consumers.
788
+
789
+ NOTE: When the broker has some topics configured with ACL rules and they miss the DESCRIBE permission, then the subscription
790
+ happens but on the broker side it is logged that the subscription of some topics was denied to the configured user.
791
+
704
792
 
705
793
  [id="plugins-{type}s-{plugin}-value_deserializer_class"]
706
794
  ===== `value_deserializer_class`
@@ -2,8 +2,8 @@
2
2
  :plugin: kafka
3
3
  :type: output
4
4
  :default_codec: plain
5
- :kafka_client: 2.5
6
- :kafka_client_doc: 25
5
+ :kafka_client: 3.4
6
+ :kafka_client_doc: 34
7
7
 
8
8
  ///////////////////////////////////////////
9
9
  START - GENERATED VARIABLES, DO NOT EDIT!
@@ -84,7 +84,8 @@ See the https://kafka.apache.org/{kafka_client_doc}/documentation for more detai
84
84
  | <<plugins-{type}s-{plugin}-buffer_memory>> |<<number,number>>|No
85
85
  | <<plugins-{type}s-{plugin}-client_dns_lookup>> |<<string,string>>|No
86
86
  | <<plugins-{type}s-{plugin}-client_id>> |<<string,string>>|No
87
- | <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4"]`|No
87
+ | <<plugins-{type}s-{plugin}-compression_type>> |<<string,string>>, one of `["none", "gzip", "snappy", "lz4", "zstd"]`|No
88
+ | <<plugins-{type}s-{plugin}-connections_max_idle_ms>> |<<number,number>>|No
88
89
  | <<plugins-{type}s-{plugin}-jaas_path>> |a valid filesystem path|No
89
90
  | <<plugins-{type}s-{plugin}-kerberos_config>> |a valid filesystem path|No
90
91
  | <<plugins-{type}s-{plugin}-key_serializer>> |<<string,string>>|No
@@ -180,6 +181,12 @@ all IP addresses returned for a hostname before failing the connection.
180
181
  If set to `resolve_canonical_bootstrap_servers_only`, each entry will be
181
182
  resolved and expanded into a list of canonical names.
182
183
 
184
+ [NOTE]
185
+ ====
186
+ Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed.
187
+ If explicitly configured it fallbacks to `use_all_dns_ips`.
188
+ ====
189
+
183
190
  [id="plugins-{type}s-{plugin}-client_id"]
184
191
  ===== `client_id`
185
192
 
@@ -193,11 +200,19 @@ ip/port by allowing a logical application name to be included with the request
193
200
  [id="plugins-{type}s-{plugin}-compression_type"]
194
201
  ===== `compression_type`
195
202
 
196
- * Value can be any of: `none`, `gzip`, `snappy`, `lz4`
203
+ * Value can be any of: `none`, `gzip`, `snappy`, `lz4`, `zstd`
197
204
  * Default value is `"none"`
198
205
 
199
206
  The compression type for all data generated by the producer.
200
- The default is none (i.e. no compression). Valid values are none, gzip, snappy, or lz4.
207
+ The default is none (meaning no compression). Valid values are none, gzip, snappy, lz4, or zstd.
208
+
209
+ [id="plugins-{type}s-{plugin}-connections_max_idle_ms"]
210
+ ===== `connections_max_idle_ms`
211
+
212
+ * Value type is <<number,number>>
213
+ * Default value is `540000` milliseconds (9 minutes).
214
+
215
+ Close idle connections after the number of milliseconds specified by this config.
201
216
 
202
217
  [id="plugins-{type}s-{plugin}-jaas_path"]
203
218
  ===== `jaas_path`
@@ -2,12 +2,11 @@ require 'logstash/namespace'
2
2
  require 'logstash/inputs/base'
3
3
  require 'stud/interval'
4
4
  require 'java'
5
- require 'logstash-integration-kafka_jars.rb'
6
- require 'logstash/plugin_mixins/kafka_support'
7
- require 'manticore'
8
5
  require "json"
9
6
  require "logstash/json"
10
- require_relative '../plugin_mixins/common'
7
+ require 'logstash-integration-kafka_jars.rb'
8
+ require 'logstash/plugin_mixins/kafka/common'
9
+ require 'logstash/plugin_mixins/kafka/avro_schema_registry'
11
10
  require 'logstash/plugin_mixins/deprecation_logger_support'
12
11
 
13
12
  # This input will read events from a Kafka topic. It uses the 0.10 version of
@@ -57,8 +56,8 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
57
56
 
58
57
  DEFAULT_DESERIALIZER_CLASS = "org.apache.kafka.common.serialization.StringDeserializer"
59
58
 
60
- include LogStash::PluginMixins::KafkaSupport
61
- include ::LogStash::PluginMixins::KafkaAvroSchemaRegistry
59
+ include LogStash::PluginMixins::Kafka::Common
60
+ include LogStash::PluginMixins::Kafka::AvroSchemaRegistry
62
61
  include LogStash::PluginMixins::DeprecationLoggerSupport
63
62
 
64
63
  config_name 'kafka'
@@ -93,13 +92,12 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
93
92
  # IP addresses for a hostname, they will all be attempted to connect to before failing the
94
93
  # connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
95
94
  # resolved and expanded into a list of canonical names.
96
- config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "default"
95
+ # Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed. If explicitly configured it fallbacks to `use_all_dns_ips`.
96
+ config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "use_all_dns_ips"
97
97
  # The id string to pass to the server when making requests. The purpose of this
98
98
  # is to be able to track the source of requests beyond just ip/port by allowing
99
99
  # a logical application name to be included.
100
100
  config :client_id, :validate => :string, :default => "logstash"
101
- # Close idle connections after the number of milliseconds specified by this config.
102
- config :connections_max_idle_ms, :validate => :number, :default => 540_000 # (9m) Kafka default
103
101
  # Ideally you should have as many threads as the number of partitions for a perfect
104
102
  # balance — more threads than partitions means that some threads will be idle
105
103
  config :consumer_threads, :validate => :number, :default => 1
@@ -126,6 +124,11 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
126
124
  # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
127
125
  # Logstash instances with the same `group_id`
128
126
  config :group_id, :validate => :string, :default => "logstash"
127
+ # Set a static group instance id used in static membership feature to avoid rebalancing when a
128
+ # consumer goes offline. If set and `consumer_threads` is greater than 1 then for each
129
+ # consumer crated by each thread an artificial suffix is appended to the user provided `group_instance_id`
130
+ # to avoid clashing.
131
+ config :group_instance_id, :validate => :string
129
132
  # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
130
133
  # that the consumer's session stays active and to facilitate rebalancing when new
131
134
  # consumers join or leave the group. The value must be set lower than
@@ -138,7 +141,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
138
141
  # been aborted. Non-transactional messages will be returned unconditionally in either mode.
139
142
  config :isolation_level, :validate => ["read_uncommitted", "read_committed"], :default => "read_uncommitted" # Kafka default
140
143
  # Java Class used to deserialize the record's key
141
- config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
144
+ config :key_deserializer_class, :validate => :string, :default => DEFAULT_DESERIALIZER_CLASS
142
145
  # The maximum delay between invocations of poll() when using consumer group management. This places
143
146
  # an upper bound on the amount of time that the consumer can be idle before fetching more records.
144
147
  # If poll() is not called before expiration of this timeout, then the consumer is considered failed and
@@ -152,9 +155,6 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
152
155
  config :max_partition_fetch_bytes, :validate => :number, :default => 1_048_576 # (1MB) Kafka default
153
156
  # The maximum number of records returned in a single call to poll().
154
157
  config :max_poll_records, :validate => :number, :default => 500 # Kafka default
155
- # The period of time in milliseconds after which we force a refresh of metadata even if
156
- # we haven't seen any partition leadership changes to proactively discover any new brokers or partitions
157
- config :metadata_max_age_ms, :validate => :number, :default => 300_000 # (5m) Kafka default
158
158
  # The name of the partition assignment strategy that the client uses to distribute
159
159
  # partition ownership amongst consumer instances, supported options are `range`,
160
160
  # `round_robin`, `sticky` and `cooperative_sticky`
@@ -167,10 +167,6 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
167
167
  # This avoids repeatedly connecting to a host in a tight loop.
168
168
  # This backoff applies to all connection attempts by the client to a broker.
169
169
  config :reconnect_backoff_ms, :validate => :number, :default => 50 # Kafka default
170
- # The configuration controls the maximum amount of time the client will wait for the response of a request.
171
- # If the response is not received before the timeout elapses the client will resend the request if necessary
172
- # or fail the request if retries are exhausted.
173
- config :request_timeout_ms, :validate => :number, :default => 40_000 # Kafka default
174
170
  # The amount of time to wait before attempting to retry a failed fetch request
175
171
  # to a given topic partition. This avoids repeated fetching-and-failing in a tight loop.
176
172
  config :retry_backoff_ms, :validate => :number, :default => 100 # Kafka default
@@ -267,6 +263,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
267
263
  def register
268
264
  @runner_threads = []
269
265
  @metadata_mode = extract_metadata_level(@decorate_events)
266
+ reassign_dns_lookup
270
267
  @pattern ||= java.util.regex.Pattern.compile(@topics_pattern) unless @topics_pattern.nil?
271
268
  check_schema_registry_parameters
272
269
  end
@@ -295,7 +292,10 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
295
292
 
296
293
  public
297
294
  def run(logstash_queue)
298
- @runner_consumers = consumer_threads.times.map { |i| subscribe(create_consumer("#{client_id}-#{i}")) }
295
+ @runner_consumers = consumer_threads.times.map do |i|
296
+ thread_group_instance_id = consumer_threads > 1 && group_instance_id ? "#{group_instance_id}-#{i}" : group_instance_id
297
+ subscribe(create_consumer("#{client_id}-#{i}", thread_group_instance_id))
298
+ end
299
299
  @runner_threads = @runner_consumers.map.with_index { |consumer, i| thread_runner(logstash_queue, consumer,
300
300
  "kafka-input-worker-#{client_id}-#{i}") }
301
301
  @runner_threads.each(&:start)
@@ -339,10 +339,13 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
339
339
  def do_poll(consumer)
340
340
  records = []
341
341
  begin
342
- records = consumer.poll(poll_timeout_ms)
342
+ records = consumer.poll(java.time.Duration.ofMillis(poll_timeout_ms))
343
343
  rescue org.apache.kafka.common.errors.WakeupException => e
344
344
  logger.debug("Wake up from poll", :kafka_error_message => e)
345
345
  raise e unless stop?
346
+ rescue org.apache.kafka.common.errors.FencedInstanceIdException => e
347
+ logger.error("Another consumer with same group.instance.id has connected", :original_error_message => e.message)
348
+ raise e unless stop?
346
349
  rescue => e
347
350
  logger.error("Unable to poll Kafka consumer",
348
351
  :kafka_error_message => e,
@@ -370,7 +373,9 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
370
373
  event.set("[@metadata][kafka][timestamp]", record.timestamp)
371
374
  end
372
375
  if @metadata_mode.include?(:headers)
373
- record.headers.each do |header|
376
+ record.headers
377
+ .select{|h| header_with_value(h) }
378
+ .each do |header|
374
379
  s = String.from_java_bytes(header.value)
375
380
  s.force_encoding(Encoding::UTF_8)
376
381
  if s.valid_encoding?
@@ -397,7 +402,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
397
402
  end
398
403
 
399
404
  private
400
- def create_consumer(client_id)
405
+ def create_consumer(client_id, group_instance_id)
401
406
  begin
402
407
  props = java.util.Properties.new
403
408
  kafka = org.apache.kafka.clients.consumer.ConsumerConfig
@@ -415,6 +420,7 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
415
420
  props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms.to_s) unless fetch_max_wait_ms.nil?
416
421
  props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes.to_s) unless fetch_min_bytes.nil?
417
422
  props.put(kafka::GROUP_ID_CONFIG, group_id)
423
+ props.put(kafka::GROUP_INSTANCE_ID_CONFIG, group_instance_id) unless group_instance_id.nil?
418
424
  props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms.to_s) unless heartbeat_interval_ms.nil?
419
425
  props.put(kafka::ISOLATION_LEVEL_CONFIG, isolation_level)
420
426
  props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
@@ -456,6 +462,17 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
456
462
  set_trustore_keystore_config(props)
457
463
  set_sasl_config(props)
458
464
  end
465
+ if schema_registry_ssl_truststore_location
466
+ props.put('schema.registry.ssl.truststore.location', schema_registry_ssl_truststore_location)
467
+ props.put('schema.registry.ssl.truststore.password', schema_registry_ssl_truststore_password.value)
468
+ props.put('schema.registry.ssl.truststore.type', schema_registry_ssl_truststore_type)
469
+ end
470
+
471
+ if schema_registry_ssl_keystore_location
472
+ props.put('schema.registry.ssl.keystore.location', schema_registry_ssl_keystore_location)
473
+ props.put('schema.registry.ssl.keystore.password', schema_registry_ssl_keystore_password.value)
474
+ props.put('schema.registry.ssl.keystore.type', schema_registry_ssl_keystore_type)
475
+ end
459
476
 
460
477
  org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
461
478
  rescue => e
@@ -484,4 +501,8 @@ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
484
501
  end
485
502
  end
486
503
 
504
+ def header_with_value(header)
505
+ !header.nil? && !header.value.nil? && !header.key.nil?
506
+ end
507
+
487
508
  end #class LogStash::Inputs::Kafka
@@ -2,7 +2,7 @@ require 'logstash/namespace'
2
2
  require 'logstash/outputs/base'
3
3
  require 'java'
4
4
  require 'logstash-integration-kafka_jars.rb'
5
- require 'logstash/plugin_mixins/kafka_support'
5
+ require 'logstash/plugin_mixins/kafka/common'
6
6
 
7
7
  # Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
8
8
  # the broker.
@@ -51,7 +51,7 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
51
51
 
52
52
  java_import org.apache.kafka.clients.producer.ProducerRecord
53
53
 
54
- include LogStash::PluginMixins::KafkaSupport
54
+ include LogStash::PluginMixins::Kafka::Common
55
55
 
56
56
  declare_threadsafe!
57
57
 
@@ -80,13 +80,14 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
80
80
  # The total bytes of memory the producer can use to buffer records waiting to be sent to the server.
81
81
  config :buffer_memory, :validate => :number, :default => 33_554_432 # (32M) Kafka default
82
82
  # The compression type for all data generated by the producer.
83
- # The default is none (i.e. no compression). Valid values are none, gzip, or snappy.
84
- config :compression_type, :validate => ["none", "gzip", "snappy", "lz4"], :default => "none"
83
+ # The default is none (i.e. no compression). Valid values are none, gzip, snappy, lz4 or zstd.
84
+ config :compression_type, :validate => ["none", "gzip", "snappy", "lz4", "zstd"], :default => "none"
85
85
  # How DNS lookups should be done. If set to `use_all_dns_ips`, when the lookup returns multiple
86
86
  # IP addresses for a hostname, they will all be attempted to connect to before failing the
87
87
  # connection. If the value is `resolve_canonical_bootstrap_servers_only` each entry will be
88
88
  # resolved and expanded into a list of canonical names.
89
- config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "default"
89
+ # Starting from Kafka 3 `default` value for `client.dns.lookup` value has been removed. If explicitly configured it fallbacks to `use_all_dns_ips`.
90
+ config :client_dns_lookup, :validate => ["default", "use_all_dns_ips", "resolve_canonical_bootstrap_servers_only"], :default => "use_all_dns_ips"
90
91
  # The id string to pass to the server when making requests.
91
92
  # The purpose of this is to be able to track the source of requests beyond just
92
93
  # ip/port by allowing a logical application name to be included with the request
@@ -107,19 +108,12 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
107
108
  config :message_key, :validate => :string
108
109
  # the timeout setting for initial metadata request to fetch topic metadata.
109
110
  config :metadata_fetch_timeout_ms, :validate => :number, :default => 60_000
110
- # the max time in milliseconds before a metadata refresh is forced.
111
- config :metadata_max_age_ms, :validate => :number, :default => 300_000 # (5m) Kafka default
112
111
  # Partitioner to use - can be `default`, `uniform_sticky`, `round_robin` or a fully qualified class name of a custom partitioner.
113
112
  config :partitioner, :validate => :string
114
113
  # The size of the TCP receive buffer to use when reading data
115
114
  config :receive_buffer_bytes, :validate => :number, :default => 32_768 # (32KB) Kafka default
116
115
  # The amount of time to wait before attempting to reconnect to a given host when a connection fails.
117
116
  config :reconnect_backoff_ms, :validate => :number, :default => 50 # Kafka default
118
- # The configuration controls the maximum amount of time the client will wait
119
- # for the response of a request. If the response is not received before the timeout
120
- # elapses the client will resend the request if necessary or fail the request if
121
- # retries are exhausted.
122
- config :request_timeout_ms, :validate => :number, :default => 40_000 # (40s) Kafka default
123
117
  # The default retry behavior is to retry until successful. To prevent data loss,
124
118
  # the use of this setting is discouraged.
125
119
  #
@@ -197,6 +191,7 @@ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
197
191
  logger.warn("Kafka output is configured with finite retry. This instructs Logstash to LOSE DATA after a set number of send attempts fails. If you do not want to lose data if Kafka is down, then you must remove the retry setting.", :retries => @retries)
198
192
  end
199
193
 
194
+ reassign_dns_lookup
200
195
 
201
196
  @producer = create_producer
202
197
  if value_serializer == 'org.apache.kafka.common.serialization.StringSerializer'