logstash-input-kafka 2.1.0 → 3.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b5ae836d613cddbb4afc4b9789ffb96e5133703d
4
- data.tar.gz: 3b200feb6e0b8096ccd6f211cc8903e920599e45
3
+ metadata.gz: 9c608112bad7e897363c4d3328b7979746f5825e
4
+ data.tar.gz: 67cc809f94c52b10bad640ab916601958824ed83
5
5
  SHA512:
6
- metadata.gz: 6fc55d881fb22f59a2455efa4e654c570d0eefa08c6cc62d429e4dfe67eb0b83eadfc7c6ee97ddd869e880c0f43afda3a99b64839b34904d9cefd8208f688c32
7
- data.tar.gz: 779f59d1de7ba09b9ec5f3022de0059bf34e10feec22572012412b46d59e55d30ba76d166f02d4274d93202d28a93c1afc76d6ead6181c31f23b1bd91f5f9383
6
+ metadata.gz: 2df9dd8df82666322cbbced05a00cb930b489c640a121d57dbe947d2ecb9ba9f025c9457fba0ae085bbe0c4fb47c95cebca3727531da36b70df43d6d54001c20
7
+ data.tar.gz: b3c4d7f98f3a81932279bc157665862a71ae1f3f62e6d88f0fb8a56dac5f82cb3984fa00ae904496dad4eef1528e0b885f2ff56046e671f6293956d606c02998
data/CHANGELOG.md CHANGED
@@ -1,25 +1,3 @@
1
- # 2.1.0
2
- - Users can now use custom log4j file to control logging output.
3
-
4
- # 2.0.9
5
- - Fix shutdown sequence bug where a shutdown event found itself in queue after a shutdown was initiated
6
-
7
- # 2.0.7
8
- - Update to jruby-kafka 1.6 which includes Kafka 0.8.2.2 enabling LZ4 decompression.
9
-
10
- # 2.0.6
11
- - Depend on logstash-core-plugin-api instead of logstash-core, removing the need to mass update plugins on major releases of logstash
12
-
13
- # 2.0.5
14
- - New dependency requirements for logstash-core for the 5.0 release
15
-
16
- ## 2.0.4
17
- - Fix safe shutdown while plugin waits on Kafka for new events
18
- - Expose auto_commit_interval_ms to control offset commit frequency
19
-
20
- ## 2.0.3
21
- - Fix infinite loop when no new messages are found in Kafka
22
-
23
1
  ## 2.0.0
24
2
  - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
25
3
  instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
data/Gemfile CHANGED
@@ -1,2 +1,3 @@
1
1
  source 'https://rubygems.org'
2
- gemspec
2
+
3
+ gemspec
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2012–2016 Elasticsearch <http://www.elastic.co>
1
+ Copyright (c) 2012–2015 Elasticsearch <http://www.elastic.co>
2
2
 
3
3
  Licensed under the Apache License, Version 2.0 (the "License");
4
4
  you may not use this file except in compliance with the License.
data/README.md CHANGED
@@ -1,6 +1,7 @@
1
1
  # Logstash Plugin
2
2
 
3
- [![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-input-kafka.svg)](https://travis-ci.org/logstash-plugins/logstash-input-kafka)
3
+ [![Build
4
+ Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-kafka-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Inputs/job/logstash-plugin-input-kafka-unit/)
4
5
 
5
6
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
6
7
 
@@ -55,12 +56,7 @@ gem "logstash-filter-awesome", :path => "/your/local/logstash-filter-awesome"
55
56
  ```
56
57
  - Install plugin
57
58
  ```sh
58
- # Logstash 2.3 and higher
59
- bin/logstash-plugin install --no-verify
60
-
61
- # Prior to Logstash 2.3
62
59
  bin/plugin install --no-verify
63
-
64
60
  ```
65
61
  - Run Logstash with your plugin
66
62
  ```sh
@@ -78,12 +74,7 @@ gem build logstash-filter-awesome.gemspec
78
74
  ```
79
75
  - Install the plugin from the Logstash home
80
76
  ```sh
81
- # Logstash 2.3 and higher
82
- bin/logstash-plugin install --no-verify
83
-
84
- # Prior to Logstash 2.3
85
- bin/plugin install --no-verify
86
-
77
+ bin/plugin install /your/local/plugin/logstash-filter-awesome.gem
87
78
  ```
88
79
  - Start Logstash and proceed to test the plugin
89
80
 
@@ -1,30 +1,23 @@
1
1
  require 'logstash/namespace'
2
2
  require 'logstash/inputs/base'
3
- require 'jruby-kafka'
4
3
  require 'stud/interval'
4
+ require 'java'
5
+ require 'logstash-input-kafka_jars.rb'
5
6
 
6
- # This input will read events from a Kafka topic. It uses the high level consumer API provided
7
- # by Kafka to read messages from the broker. It also maintains the state of what has been
8
- # consumed using Zookeeper. The default input codec is json.
7
+ # This input will read events from a Kafka topic. It uses the the newly designed
8
+ # 0.9 version of consumer API[https://cwiki.apache.org/confluence/display/KAFKA/Kafka+0.9+Consumer+Rewrite+Design]
9
+ # provided by Kafka to read messages from the broker. This consumer is backward compatible and can
10
+ # be used with 0.8.x brokers.
9
11
  #
10
- # Here's a compatibility matrix that shows the Kafka broker and client versions that are compatible with each combination
11
- # of Logstash and the Kafka input plugin:
12
- #
13
- # [options="header"]
14
- # |==========================================================
15
- # |Kafka Broker Version |Kafka Client Version |Logstash Version |Plugin Version |Why?
16
- # |0.8 |0.8 |2.0.0 - 2.x.x |<3.0.0 |Legacy, 0.8 is still popular
17
- # |0.9 |0.9 |2.0.0 - 2.3.x | 3.x.x |Works with the old Ruby Event API (`event['product']['price'] = 10`)
18
- # |0.9 |0.9 |2.4.0 - 5.0.x | 4.x.x |Works with the new getter/setter APIs (`event.set('[product][price]', 10)`)
19
- # |0.10 |0.10 |2.4.0 - 5.0.x | 5.x.x |Not compatible with the 0.9 broker
20
- # |==========================================================
21
- #
22
- # NOTE: It's a good idea to upgrade brokers before consumers/producers because brokers target backwards compatibility.
23
- # For example, the 0.9 broker will work with both the 0.8 consumer and 0.9 consumer APIs, but not the other way around.
24
- #
25
- # You must configure `topic_id`, `white_list` or `black_list`. By default it will connect to a
26
- # Zookeeper running on localhost. All the broker information is read from Zookeeper state.
12
+ # The Logstash consumer handles group management and uses the default Kafka offset management
13
+ # strategy using Kafka topics.
27
14
  #
15
+ # Logstash instances by default form a single logical group to subscribe to Kafka topics
16
+ # Each Logstash Kafka consumer can run multiple threads to increase read throughput. Alternatively,
17
+ # you could run multiple Logstash instances with the same `group_id` to spread the load across
18
+ # physical machines. Messages in a topic will be distributed to all Logstash instances with
19
+ # the same `group_id`.
20
+ #
28
21
  # Ideally you should have as many threads as the number of partitions for a perfect balance --
29
22
  # more threads than partitions means that some threads will be idle
30
23
  #
@@ -35,180 +28,184 @@ require 'stud/interval'
35
28
  class LogStash::Inputs::Kafka < LogStash::Inputs::Base
36
29
  config_name 'kafka'
37
30
 
38
- default :codec, 'json'
31
+ default :codec, 'plain'
39
32
 
40
- # Specifies the ZooKeeper connection string in the form hostname:port where host and port are
41
- # the host and port of a ZooKeeper server. You can also specify multiple hosts in the form
42
- # `hostname1:port1,hostname2:port2,hostname3:port3`.
33
+ # The frequency in milliseconds that the consumer offsets are committed to Kafka.
34
+ config :auto_commit_interval_ms, :validate => :string, :default => "10"
35
+ # What to do when there is no initial offset in Kafka or if an offset is out of range:
43
36
  #
44
- # The server may also have a ZooKeeper chroot path as part of it's ZooKeeper connection string
45
- # which puts its data under some path in the global ZooKeeper namespace. If so the consumer
46
- # should use the same chroot path in its connection string. For example to give a chroot path of
47
- # `/chroot/path` you would give the connection string as
48
- # `hostname1:port1,hostname2:port2,hostname3:port3/chroot/path`.
49
- config :zk_connect, :validate => :string, :default => 'localhost:2181'
50
- # A string that uniquely identifies the group of consumer processes to which this consumer
51
- # belongs. By setting the same group id multiple processes indicate that they are all part of
52
- # the same consumer group.
53
- config :group_id, :validate => :string, :default => 'logstash'
54
- # The topic to consume messages from
55
- config :topic_id, :validate => :string, :default => nil
56
- # Whitelist of topics to include for consumption.
57
- config :white_list, :validate => :string, :default => nil
58
- # Blacklist of topics to exclude from consumption.
59
- config :black_list, :validate => :string, :default => nil
60
- # Reset the consumer group to start at the earliest message present in the log by clearing any
61
- # offsets for the group stored in Zookeeper. This is destructive! Must be used in conjunction
62
- # with auto_offset_reset => 'smallest'
63
- config :reset_beginning, :validate => :boolean, :default => false
64
- # `smallest` or `largest` - (optional, default `largest`) If the consumer does not already
65
- # have an established offset or offset is invalid, start with the earliest message present in the
66
- # log (`smallest`) or after the last message in the log (`largest`).
67
- config :auto_offset_reset, :validate => %w( largest smallest ), :default => 'largest'
68
- # The frequency in ms that the consumer offsets are committed to zookeeper.
69
- config :auto_commit_interval_ms, :validate => :number, :default => 1000
70
- # Number of threads to read from the partitions. Ideally you should have as many threads as the
71
- # number of partitions for a perfect balance. More threads than partitions means that some
72
- # threads will be idle. Less threads means a single thread could be consuming from more than
73
- # one partition
74
- config :consumer_threads, :validate => :number, :default => 1
75
- # Internal Logstash queue size used to hold events in memory after it has been read from Kafka
76
- config :queue_size, :validate => :number, :default => 20
77
- # When a new consumer joins a consumer group the set of consumers attempt to "rebalance" the
78
- # load to assign partitions to each consumer. If the set of consumers changes while this
79
- # assignment is taking place the rebalance will fail and retry. This setting controls the
80
- # maximum number of attempts before giving up.
81
- config :rebalance_max_retries, :validate => :number, :default => 4
82
- # Backoff time between retries during rebalance.
83
- config :rebalance_backoff_ms, :validate => :number, :default => 2000
84
- # Throw a timeout exception to the consumer if no message is available for consumption after
85
- # the specified interval
86
- config :consumer_timeout_ms, :validate => :number, :default => -1
87
- # Option to restart the consumer loop on error
88
- config :consumer_restart_on_error, :validate => :boolean, :default => true
89
- # Time in millis to wait for consumer to restart after an error
90
- config :consumer_restart_sleep_ms, :validate => :number, :default => 0
91
- # Option to add Kafka metadata like topic, message size to the event.
92
- # This will add a field named `kafka` to the logstash event containing the following attributes:
93
- # `msg_size`: The complete serialized size of this message in bytes (including crc, header attributes, etc)
94
- # `topic`: The topic this message is associated with
95
- # `consumer_group`: The consumer group used to read in this event
96
- # `partition`: The partition this message is associated with
97
- # `offset`: The offset from the partition this message is associated with
98
- # `key`: A ByteBuffer containing the message key
99
- config :decorate_events, :validate => :boolean, :default => false
100
- # A unique id for the consumer; generated automatically if not set.
101
- config :consumer_id, :validate => :string, :default => nil
102
- # The number of byes of messages to attempt to fetch for each topic-partition in each fetch
103
- # request. These bytes will be read into memory for each partition, so this helps control
104
- # the memory used by the consumer. The fetch request size must be at least as large as the
105
- # maximum message size the server allows or else it is possible for the producer to send
106
- # messages larger than the consumer can fetch.
107
- config :fetch_message_max_bytes, :validate => :number, :default => 1048576
108
- # The serializer class for messages. The default decoder takes a byte[] and returns the same byte[]
109
- config :decoder_class, :validate => :string, :default => 'kafka.serializer.DefaultDecoder'
110
- # The serializer class for keys (defaults to the same default as for messages)
111
- config :key_decoder_class, :validate => :string, :default => 'kafka.serializer.DefaultDecoder'
112
-
113
- class KafkaShutdownEvent; end
114
- KAFKA_SHUTDOWN_EVENT = KafkaShutdownEvent.new
37
+ # * earliest: automatically reset the offset to the earliest offset
38
+ # * latest: automatically reset the offset to the latest offset
39
+ # * none: throw exception to the consumer if no previous offset is found for the consumer's group
40
+ # * anything else: throw exception to the consumer.
41
+ config :auto_offset_reset, :validate => :string
42
+ # A list of URLs to use for establishing the initial connection to the cluster.
43
+ # This list should be in the form of `host1:port1,host2:port2` These urls are just used
44
+ # for the initial connection to discover the full cluster membership (which may change dynamically)
45
+ # so this list need not contain the full set of servers (you may want more than one, though, in
46
+ # case a server is down).
47
+ config :bootstrap_servers, :validate => :string, :default => "localhost:9092"
48
+ # Automatically check the CRC32 of the records consumed. This ensures no on-the-wire or on-disk
49
+ # corruption to the messages occurred. This check adds some overhead, so it may be
50
+ # disabled in cases seeking extreme performance.
51
+ config :check_crcs, :validate => :string
52
+ # The id string to pass to the server when making requests. The purpose of this
53
+ # is to be able to track the source of requests beyond just ip/port by allowing
54
+ # a logical application name to be included.
55
+ config :client_id, :validate => :string, :default => "logstash"
56
+ # Close idle connections after the number of milliseconds specified by this config.
57
+ config :connections_max_idle_ms, :validate => :string
58
+ # If true, periodically commit to Kafka the offsets of messages already returned by the consumer.
59
+ # This committed offset will be used when the process fails as the position from
60
+ # which the consumption will begin.
61
+ config :enable_auto_commit, :validate => :string, :default => "true"
62
+ # The maximum amount of time the server will block before answering the fetch request if
63
+ # there isn't sufficient data to immediately satisfy `fetch_min_bytes`. This
64
+ # should be less than or equal to the timeout used in `poll_timeout_ms`
65
+ config :fetch_max_wait_ms, :validate => :string
66
+ # The minimum amount of data the server should return for a fetch request. If insufficient
67
+ # data is available the request will wait for that much data to accumulate
68
+ # before answering the request.
69
+ config :fetch_min_bytes, :validate => :string
70
+ # The identifier of the group this consumer belongs to. Consumer group is a single logical subscriber
71
+ # that happens to be made up of multiple processors. Messages in a topic will be distributed to all
72
+ # Logstash instances with the same `group_id`
73
+ config :group_id, :validate => :string, :default => "logstash"
74
+ # The expected time between heartbeats to the consumer coordinator. Heartbeats are used to ensure
75
+ # that the consumer's session stays active and to facilitate rebalancing when new
76
+ # consumers join or leave the group. The value must be set lower than
77
+ # `session.timeout.ms`, but typically should be set no higher than 1/3 of that value.
78
+ # It can be adjusted even lower to control the expected time for normal rebalances.
79
+ config :heartbeat_interval_ms, :validate => :string
80
+ # Java Class used to deserialize the record's key
81
+ config :key_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
82
+ # The maximum amount of data per-partition the server will return. The maximum total memory used for a
83
+ # request will be <code>#partitions * max.partition.fetch.bytes</code>. This size must be at least
84
+ # as large as the maximum message size the server allows or else it is possible for the producer to
85
+ # send messages larger than the consumer can fetch. If that happens, the consumer can get stuck trying
86
+ # to fetch a large message on a certain partition.
87
+ config :max_partition_fetch_bytes, :validate => :string
88
+ # The class name of the partition assignment strategy that the client will use to distribute
89
+ # partition ownership amongst consumer instances
90
+ config :partition_assignment_strategy, :validate => :string
91
+ # The size of the TCP receive buffer (SO_RCVBUF) to use when reading data.
92
+ config :receive_buffer_bytes, :validate => :string
93
+ # The amount of time to wait before attempting to reconnect to a given host.
94
+ # This avoids repeatedly connecting to a host in a tight loop.
95
+ # This backoff applies to all requests sent by the consumer to the broker.
96
+ config :reconnect_backoff_ms, :validate => :string
97
+ # The configuration controls the maximum amount of time the client will wait
98
+ # for the response of a request. If the response is not received before the timeout
99
+ # elapses the client will resend the request if necessary or fail the request if
100
+ # retries are exhausted.
101
+ config :request_timeout_ms, :validate => :string
102
+ # The amount of time to wait before attempting to retry a failed fetch request
103
+ # to a given topic partition. This avoids repeated fetching-and-failing in a tight loop.
104
+ config :retry_backoff_ms, :validate => :string
105
+ # The timeout after which, if the `poll_timeout_ms` is not invoked, the consumer is marked dead
106
+ # and a rebalance operation is triggered for the group identified by `group_id`
107
+ config :session_timeout_ms, :validate => :string, :default => "30000"
108
+ # Java Class used to deserialize the record's value
109
+ config :value_deserializer_class, :validate => :string, :default => "org.apache.kafka.common.serialization.StringDeserializer"
110
+ # Ideally you should have as many threads as the number of partitions for a perfect
111
+ # balance — more threads than partitions means that some threads will be idle
112
+ config :num_threads, :validate => :number, :default => 1
113
+ # A list of topics to subscribe to.
114
+ config :topics, :validate => :array, :required => true
115
+ # Time kafka consumer will wait to receive new messages from topics
116
+ config :poll_timeout_ms, :validate => :number, :default => 100
117
+ # Enable SSL/TLS secured communication to Kafka broker. Note that secure communication
118
+ # is only available with a broker running v0.9 of Kafka.
119
+ config :ssl, :validate => :boolean, :default => false
120
+ # The JKS truststore path to validate the Kafka broker's certificate.
121
+ config :ssl_truststore_location, :validate => :path
122
+ # The truststore password
123
+ config :ssl_truststore_password, :validate => :password
124
+ # If client authentication is required, this setting stores the keystore path.
125
+ config :ssl_keystore_location, :validate => :path
126
+ # If client authentication is required, this setting stores the keystore password
127
+ config :ssl_keystore_password, :validate => :password
115
128
 
129
+
116
130
  public
117
131
  def register
118
- options = {
119
- :zk_connect => @zk_connect,
120
- :group_id => @group_id,
121
- :topic_id => @topic_id,
122
- :auto_offset_reset => @auto_offset_reset,
123
- :auto_commit_interval => @auto_commit_interval_ms,
124
- :rebalance_max_retries => @rebalance_max_retries,
125
- :rebalance_backoff_ms => @rebalance_backoff_ms,
126
- :consumer_timeout_ms => @consumer_timeout_ms,
127
- :consumer_restart_on_error => @consumer_restart_on_error,
128
- :consumer_restart_sleep_ms => @consumer_restart_sleep_ms,
129
- :consumer_id => @consumer_id,
130
- :fetch_message_max_bytes => @fetch_message_max_bytes,
131
- :allow_topics => @white_list,
132
- :filter_topics => @black_list,
133
- :value_decoder_class => @decoder_class,
134
- :key_decoder_class => @key_decoder_class
135
- }
136
- if @reset_beginning
137
- options[:reset_beginning] = 'from-beginning'
138
- end # if :reset_beginning
139
- topic_or_filter = [@topic_id, @white_list, @black_list].compact
140
- if topic_or_filter.count == 0
141
- raise LogStash::ConfigurationError, 'topic_id, white_list or black_list required.'
142
- elsif topic_or_filter.count > 1
143
- raise LogStash::ConfigurationError, 'Invalid combination of topic_id, white_list or black_list. Use only one.'
144
- end
145
- @kafka_client_queue = SizedQueue.new(@queue_size)
146
- @consumer_group = create_consumer_group(options)
147
- @logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
132
+ @runner_threads = []
148
133
  end # def register
149
134
 
150
135
  public
151
136
  def run(logstash_queue)
152
- # noinspection JRubyStringImportInspection
153
- java_import 'kafka.common.ConsumerRebalanceFailedException'
154
- @logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
155
- begin
156
- @consumer_group.run(@consumer_threads,@kafka_client_queue)
157
-
158
- while !stop?
159
- event = @kafka_client_queue.pop
160
- if event == KAFKA_SHUTDOWN_EVENT
161
- break
162
- end
163
- queue_event(event, logstash_queue)
164
- end
165
-
166
- until @kafka_client_queue.empty?
167
- event = @kafka_client_queue.pop
168
- if event == KAFKA_SHUTDOWN_EVENT
169
- break
170
- end
171
- queue_event(event, logstash_queue)
172
- end
173
-
174
- @logger.info('Done running kafka input')
175
- rescue => e
176
- @logger.warn('kafka client threw exception, restarting',
177
- :exception => e)
178
- Stud.stoppable_sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000) { stop? }
179
- retry if !stop?
180
- end
137
+ @runner_consumers = num_threads.times.map { || create_consumer }
138
+ @runner_threads = @runner_consumers.map { |consumer| thread_runner(logstash_queue, consumer) }
139
+ @runner_threads.each { |t| t.join }
181
140
  end # def run
182
141
 
183
142
  public
184
143
  def stop
185
- @kafka_client_queue.push(KAFKA_SHUTDOWN_EVENT)
186
- @consumer_group.shutdown if @consumer_group.running?
144
+ @runner_consumers.each { |c| c.wakeup }
187
145
  end
188
146
 
189
147
  private
190
- def create_consumer_group(options)
191
- Kafka::Group.new(options)
148
+ def thread_runner(logstash_queue, consumer)
149
+ Thread.new do
150
+ begin
151
+ consumer.subscribe(topics);
152
+ while !stop?
153
+ records = consumer.poll(poll_timeout_ms);
154
+ for record in records do
155
+ @codec.decode(record.value.to_s) do |event|
156
+ logstash_queue << event
157
+ end
158
+ end
159
+ end
160
+ rescue org.apache.kafka.common.errors.WakeupException => e
161
+ raise e if !stop?
162
+ ensure
163
+ consumer.close
164
+ end
165
+ end
192
166
  end
193
167
 
194
168
  private
195
- def queue_event(message_and_metadata, output_queue)
169
+ def create_consumer
196
170
  begin
197
- @codec.decode("#{message_and_metadata.message}") do |event|
198
- decorate(event)
199
- if @decorate_events
200
- event['kafka'] = {'msg_size' => message_and_metadata.message.size,
201
- 'topic' => message_and_metadata.topic,
202
- 'consumer_group' => @group_id,
203
- 'partition' => message_and_metadata.partition,
204
- 'offset' => message_and_metadata.offset,
205
- 'key' => message_and_metadata.key}
206
- end
207
- output_queue << event
208
- end # @codec.decode
209
- rescue => e # parse or event creation error
210
- @logger.error('Failed to create event', :message => "#{message_and_metadata.message}", :exception => e,
211
- :backtrace => e.backtrace)
212
- end # begin
213
- end # def queue_event
171
+ props = java.util.Properties.new
172
+ kafka = org.apache.kafka.clients.consumer.ConsumerConfig
173
+
174
+ props.put(kafka::AUTO_COMMIT_INTERVAL_MS_CONFIG, auto_commit_interval_ms)
175
+ props.put(kafka::AUTO_OFFSET_RESET_CONFIG, auto_offset_reset) unless auto_offset_reset.nil?
176
+ props.put(kafka::BOOTSTRAP_SERVERS_CONFIG, bootstrap_servers)
177
+ props.put(kafka::CHECK_CRCS_CONFIG, check_crcs) unless check_crcs.nil?
178
+ props.put(kafka::CLIENT_ID_CONFIG, client_id)
179
+ props.put(kafka::CONNECTIONS_MAX_IDLE_MS_CONFIG, connections_max_idle_ms) unless connections_max_idle_ms.nil?
180
+ props.put(kafka::ENABLE_AUTO_COMMIT_CONFIG, enable_auto_commit)
181
+ props.put(kafka::FETCH_MAX_WAIT_MS_CONFIG, fetch_max_wait_ms) unless fetch_max_wait_ms.nil?
182
+ props.put(kafka::FETCH_MIN_BYTES_CONFIG, fetch_min_bytes) unless fetch_min_bytes.nil?
183
+ props.put(kafka::GROUP_ID_CONFIG, group_id)
184
+ props.put(kafka::HEARTBEAT_INTERVAL_MS_CONFIG, heartbeat_interval_ms) unless heartbeat_interval_ms.nil?
185
+ props.put(kafka::KEY_DESERIALIZER_CLASS_CONFIG, key_deserializer_class)
186
+ props.put(kafka::MAX_PARTITION_FETCH_BYTES_CONFIG, max_partition_fetch_bytes) unless max_partition_fetch_bytes.nil?
187
+ props.put(kafka::PARTITION_ASSIGNMENT_STRATEGY_CONFIG, partition_assignment_strategy) unless partition_assignment_strategy.nil?
188
+ props.put(kafka::RECEIVE_BUFFER_CONFIG, receive_buffer_bytes) unless receive_buffer_bytes.nil?
189
+ props.put(kafka::RECONNECT_BACKOFF_MS_CONFIG, reconnect_backoff_ms) unless reconnect_backoff_ms.nil?
190
+ props.put(kafka::REQUEST_TIMEOUT_MS_CONFIG, request_timeout_ms) unless request_timeout_ms.nil?
191
+ props.put(kafka::RETRY_BACKOFF_MS_CONFIG, retry_backoff_ms) unless retry_backoff_ms.nil?
192
+ props.put(kafka::SESSION_TIMEOUT_MS_CONFIG, session_timeout_ms) unless session_timeout_ms.nil?
193
+ props.put(kafka::VALUE_DESERIALIZER_CLASS_CONFIG, value_deserializer_class)
194
+
195
+ if ssl
196
+ props.put("security.protocol", "SSL")
197
+ props.put("ssl.truststore.location", ssl_truststore_location)
198
+ props.put("ssl.truststore.password", ssl_truststore_password.value) unless ssl_truststore_password.nil?
199
+
200
+ #Client auth stuff
201
+ props.put("ssl.keystore.location", ssl_keystore_location) unless ssl_keystore_location.nil?
202
+ props.put("ssl.keystore.password", ssl_keystore_password.value) unless ssl_keystore_password.nil?
203
+ end
204
+
205
+ org.apache.kafka.clients.consumer.KafkaConsumer.new(props)
206
+ rescue => e
207
+ logger.error("Unable to create Kafka consumer from given configuration", :kafka_error_message => e)
208
+ throw e
209
+ end
210
+ end
214
211
  end #class LogStash::Inputs::Kafka
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+ require 'logstash/environment'
3
+
4
+ root_dir = File.expand_path(File.join(File.dirname(__FILE__), ".."))
5
+ LogStash::Environment.load_runtime_jars! File.join(root_dir, "vendor")
@@ -1,10 +1,10 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-input-kafka'
4
- s.version = '2.1.0'
4
+ s.version = '3.0.0.beta1'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = 'This input will read events from a Kafka topic. It uses the high level consumer API provided by Kafka to read messages from the broker'
7
- s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
7
+ s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
8
8
  s.authors = ['Elasticsearch']
9
9
  s.email = 'info@elastic.co'
10
10
  s.homepage = "http://www.elastic.co/guide/en/logstash/current/index.html"
@@ -20,13 +20,10 @@ Gem::Specification.new do |s|
20
20
  s.metadata = { 'logstash_plugin' => 'true', 'group' => 'input'}
21
21
 
22
22
  # Gem dependencies
23
- s.add_runtime_dependency "logstash-core-plugin-api", "~> 1.0"
23
+ s.add_runtime_dependency 'logstash-core', ">= 2.0.0.beta2", "< 3.0.0"
24
24
  s.add_runtime_dependency 'logstash-codec-json'
25
25
  s.add_runtime_dependency 'logstash-codec-plain'
26
26
  s.add_runtime_dependency 'stud', '>= 0.0.22', '< 0.1.0'
27
-
28
- s.add_runtime_dependency 'jruby-kafka', '1.5.0'
29
-
30
27
  s.add_development_dependency 'logstash-devutils'
31
28
  end
32
29
 
@@ -0,0 +1,21 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/kafka"
4
+
5
+ describe "input/kafka", :integration => true do
6
+ before do
7
+ props = java.util.Properties.new
8
+ props.put("bootstrap.servers", bootstrap_servers)
9
+ props.put("acks", "all")
10
+ props.put("retries", "0")
11
+ props.put("batch.size", "16384")
12
+ props.put("linger.ms", "1")
13
+ props.put("buffer.memory", "33554432")
14
+ props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer")
15
+ props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer")
16
+ producer = org.apache.kafka.clients.producer.KafkaProducer.new(props)
17
+ 1000.times do |i|
18
+ producer.send(org.apache.kafka.clients.producer.ProducerRecord("test", i.to_s, i.to_s))
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/inputs/kafka"
4
+ require "concurrent"
5
+
6
+ class MockConsumer
7
+ def initialize
8
+ @wake = Concurrent::AtomicBoolean.new(false)
9
+ end
10
+
11
+ def subscribe(topics)
12
+ end
13
+
14
+ def poll(ms)
15
+ if @wake.value
16
+ raise org.apache.kafka.common.errors.WakeupException.new
17
+ else
18
+ 10.times.map do
19
+ org.apache.kafka.clients.consumer.ConsumerRecord.new("test", 0, 0, "key", "value")
20
+ end
21
+ end
22
+ end
23
+
24
+ def close
25
+ end
26
+
27
+ def wakeup
28
+ @wake.make_true
29
+ end
30
+ end
31
+
32
+ describe LogStash::Inputs::Kafka do
33
+ let(:config) { { 'topics' => ['test'], 'num_threads' => 4 } }
34
+ subject { LogStash::Inputs::Kafka.new(config) }
35
+
36
+ it "should register" do
37
+ expect {subject.register}.to_not raise_error
38
+ end
39
+
40
+ it "should run" do
41
+ expect(subject).to receive(:new_consumer) do
42
+ MockConsumer.new
43
+ end.exactly(4).times
44
+
45
+ subject.register
46
+ q = Queue.new
47
+ Thread.new do
48
+ while q.size < 13
49
+ end
50
+ subject.do_stop
51
+ end
52
+ subject.run(q)
53
+
54
+ expect(q.size).to eq(40)
55
+ end
56
+ end
metadata CHANGED
@@ -1,33 +1,39 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-kafka
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 3.0.0.beta1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elasticsearch
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-12-06 00:00:00.000000000 Z
11
+ date: 2016-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - "~>"
16
+ - - '>='
17
17
  - !ruby/object:Gem::Version
18
- version: '1.0'
19
- name: logstash-core-plugin-api
18
+ version: 2.0.0.beta2
19
+ - - <
20
+ - !ruby/object:Gem::Version
21
+ version: 3.0.0
22
+ name: logstash-core
20
23
  prerelease: false
21
24
  type: :runtime
22
25
  version_requirements: !ruby/object:Gem::Requirement
23
26
  requirements:
24
- - - "~>"
27
+ - - '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 2.0.0.beta2
30
+ - - <
25
31
  - !ruby/object:Gem::Version
26
- version: '1.0'
32
+ version: 3.0.0
27
33
  - !ruby/object:Gem::Dependency
28
34
  requirement: !ruby/object:Gem::Requirement
29
35
  requirements:
30
- - - ">="
36
+ - - '>='
31
37
  - !ruby/object:Gem::Version
32
38
  version: '0'
33
39
  name: logstash-codec-json
@@ -35,13 +41,13 @@ dependencies:
35
41
  type: :runtime
36
42
  version_requirements: !ruby/object:Gem::Requirement
37
43
  requirements:
38
- - - ">="
44
+ - - '>='
39
45
  - !ruby/object:Gem::Version
40
46
  version: '0'
41
47
  - !ruby/object:Gem::Dependency
42
48
  requirement: !ruby/object:Gem::Requirement
43
49
  requirements:
44
- - - ">="
50
+ - - '>='
45
51
  - !ruby/object:Gem::Version
46
52
  version: '0'
47
53
  name: logstash-codec-plain
@@ -49,16 +55,16 @@ dependencies:
49
55
  type: :runtime
50
56
  version_requirements: !ruby/object:Gem::Requirement
51
57
  requirements:
52
- - - ">="
58
+ - - '>='
53
59
  - !ruby/object:Gem::Version
54
60
  version: '0'
55
61
  - !ruby/object:Gem::Dependency
56
62
  requirement: !ruby/object:Gem::Requirement
57
63
  requirements:
58
- - - ">="
64
+ - - '>='
59
65
  - !ruby/object:Gem::Version
60
66
  version: 0.0.22
61
- - - "<"
67
+ - - <
62
68
  - !ruby/object:Gem::Version
63
69
  version: 0.1.0
64
70
  name: stud
@@ -66,30 +72,16 @@ dependencies:
66
72
  type: :runtime
67
73
  version_requirements: !ruby/object:Gem::Requirement
68
74
  requirements:
69
- - - ">="
75
+ - - '>='
70
76
  - !ruby/object:Gem::Version
71
77
  version: 0.0.22
72
- - - "<"
78
+ - - <
73
79
  - !ruby/object:Gem::Version
74
80
  version: 0.1.0
75
81
  - !ruby/object:Gem::Dependency
76
82
  requirement: !ruby/object:Gem::Requirement
77
83
  requirements:
78
- - - '='
79
- - !ruby/object:Gem::Version
80
- version: 1.5.0
81
- name: jruby-kafka
82
- prerelease: false
83
- type: :runtime
84
- version_requirements: !ruby/object:Gem::Requirement
85
- requirements:
86
- - - '='
87
- - !ruby/object:Gem::Version
88
- version: 1.5.0
89
- - !ruby/object:Gem::Dependency
90
- requirement: !ruby/object:Gem::Requirement
91
- requirements:
92
- - - ">="
84
+ - - '>='
93
85
  - !ruby/object:Gem::Version
94
86
  version: '0'
95
87
  name: logstash-devutils
@@ -97,10 +89,10 @@ dependencies:
97
89
  type: :development
98
90
  version_requirements: !ruby/object:Gem::Requirement
99
91
  requirements:
100
- - - ">="
92
+ - - '>='
101
93
  - !ruby/object:Gem::Version
102
94
  version: '0'
103
- description: This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program
95
+ description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
104
96
  email: info@elastic.co
105
97
  executables: []
106
98
  extensions: []
@@ -113,9 +105,14 @@ files:
113
105
  - LICENSE
114
106
  - NOTICE.TXT
115
107
  - README.md
108
+ - lib/logstash-input-kafka_jars.rb
116
109
  - lib/logstash/inputs/kafka.rb
117
110
  - logstash-input-kafka.gemspec
118
- - spec/inputs/kafka_spec.rb
111
+ - spec/integration/inputs/kafka_spec.rb
112
+ - spec/unit/inputs/kafka_spec.rb
113
+ - vendor/jar-dependencies/runtime-jars/kafka-clients-0.9.0.0.jar
114
+ - vendor/jar-dependencies/runtime-jars/slf4j-api-1.7.13.jar
115
+ - vendor/jar-dependencies/runtime-jars/slf4j-noop-1.7.13.jar
119
116
  homepage: http://www.elastic.co/guide/en/logstash/current/index.html
120
117
  licenses:
121
118
  - Apache License (2.0)
@@ -128,19 +125,20 @@ require_paths:
128
125
  - lib
129
126
  required_ruby_version: !ruby/object:Gem::Requirement
130
127
  requirements:
131
- - - ">="
128
+ - - '>='
132
129
  - !ruby/object:Gem::Version
133
130
  version: '0'
134
131
  required_rubygems_version: !ruby/object:Gem::Requirement
135
132
  requirements:
136
- - - ">="
133
+ - - '>'
137
134
  - !ruby/object:Gem::Version
138
- version: '0'
135
+ version: 1.3.1
139
136
  requirements: []
140
137
  rubyforge_project:
141
- rubygems_version: 2.4.8
138
+ rubygems_version: 2.4.5
142
139
  signing_key:
143
140
  specification_version: 4
144
141
  summary: This input will read events from a Kafka topic. It uses the high level consumer API provided by Kafka to read messages from the broker
145
142
  test_files:
146
- - spec/inputs/kafka_spec.rb
143
+ - spec/integration/inputs/kafka_spec.rb
144
+ - spec/unit/inputs/kafka_spec.rb
@@ -1,133 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/devutils/rspec/spec_helper"
3
- require "logstash/inputs/kafka"
4
- require 'jruby-kafka'
5
-
6
- class LogStash::Inputs::TestKafka < LogStash::Inputs::Kafka
7
- private
8
- def queue_event(msg, output_queue)
9
- super(msg, output_queue)
10
- do_stop
11
- end
12
- end
13
-
14
- class TestMessageAndMetadata
15
- attr_reader :topic, :partition, :key, :message, :offset
16
- def initialize(topic, partition, key, message, offset)
17
- @topic = topic
18
- @partition = partition
19
- @key = key
20
- @message = message
21
- @offset = offset
22
- end
23
- end
24
-
25
-
26
- class TestKafkaGroup < Kafka::Group
27
- def run(a_num_threads, a_queue)
28
- blah = TestMessageAndMetadata.new(@topic, 0, nil, 'Kafka message', 1)
29
- a_queue << blah
30
- end
31
- end
32
-
33
- class LogStash::Inputs::TestInfiniteKafka < LogStash::Inputs::Kafka
34
- private
35
- def queue_event(msg, output_queue)
36
- super(msg, output_queue)
37
- end
38
- end
39
-
40
- class TestInfiniteKafkaGroup < Kafka::Group
41
- def run(a_num_threads, a_queue)
42
- blah = TestMessageAndMetadata.new(@topic, 0, nil, 'Kafka message', 1)
43
- Thread.new do
44
- while true
45
- a_queue << blah
46
- sleep 10
47
- end
48
- end
49
- end
50
- end
51
-
52
- describe LogStash::Inputs::Kafka do
53
- let (:kafka_config) {{'topic_id' => 'test'}}
54
- let (:empty_config) {{}}
55
- let (:bad_kafka_config) {{'topic_id' => 'test', 'white_list' => 'other_topic'}}
56
- let (:white_list_kafka_config) {{'white_list' => 'other_topic'}}
57
- let (:decorated_kafka_config) {{'topic_id' => 'test', 'decorate_events' => true}}
58
-
59
- it "should register" do
60
- input = LogStash::Plugin.lookup("input", "kafka").new(kafka_config)
61
- expect {input.register}.to_not raise_error
62
- end
63
-
64
- it "should register with whitelist" do
65
- input = LogStash::Plugin.lookup("input", "kafka").new(white_list_kafka_config)
66
- expect {input.register}.to_not raise_error
67
- end
68
-
69
- it "should fail with multiple topic configs" do
70
- input = LogStash::Plugin.lookup("input", "kafka").new(empty_config)
71
- expect {input.register}.to raise_error
72
- end
73
-
74
- it "should fail without topic configs" do
75
- input = LogStash::Plugin.lookup("input", "kafka").new(bad_kafka_config)
76
- expect {input.register}.to raise_error
77
- end
78
-
79
- it_behaves_like "an interruptible input plugin" do
80
- let(:config) { kafka_config }
81
- let(:mock_kafka_plugin) { LogStash::Inputs::TestInfiniteKafka.new(config) }
82
-
83
- before :each do
84
- allow(LogStash::Inputs::Kafka).to receive(:new).and_return(mock_kafka_plugin)
85
- expect(subject).to receive(:create_consumer_group) do |options|
86
- TestInfiniteKafkaGroup.new(options)
87
- end
88
- end
89
- end
90
-
91
- it 'should populate kafka config with default values' do
92
- kafka = LogStash::Inputs::TestKafka.new(kafka_config)
93
- insist {kafka.zk_connect} == 'localhost:2181'
94
- insist {kafka.topic_id} == 'test'
95
- insist {kafka.group_id} == 'logstash'
96
- !insist { kafka.reset_beginning }
97
- end
98
-
99
- it 'should retrieve event from kafka' do
100
- kafka = LogStash::Inputs::TestKafka.new(kafka_config)
101
- expect(kafka).to receive(:create_consumer_group) do |options|
102
- TestKafkaGroup.new(options)
103
- end
104
- kafka.register
105
-
106
- logstash_queue = Queue.new
107
- kafka.run logstash_queue
108
- e = logstash_queue.pop
109
- insist { e['message'] } == 'Kafka message'
110
- # no metadata by default
111
- insist { e['kafka'] } == nil
112
- end
113
-
114
- it 'should retrieve a decorated event from kafka' do
115
- kafka = LogStash::Inputs::TestKafka.new(decorated_kafka_config)
116
- expect(kafka).to receive(:create_consumer_group) do |options|
117
- TestKafkaGroup.new(options)
118
- end
119
- kafka.register
120
-
121
- logstash_queue = Queue.new
122
- kafka.run logstash_queue
123
- e = logstash_queue.pop
124
- insist { e['message'] } == 'Kafka message'
125
- # no metadata by default
126
- insist { e['kafka']['topic'] } == 'test'
127
- insist { e['kafka']['consumer_group'] } == 'logstash'
128
- insist { e['kafka']['msg_size'] } == 13
129
- insist { e['kafka']['partition'] } == 0
130
- insist { e['kafka']['key'] } == nil
131
- insist { e['kafka']['offset'] } == 1
132
- end
133
- end