logstash-input-kafka 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +3 -0
- data/Gemfile +3 -0
- data/LICENSE +13 -0
- data/README.md +40 -0
- data/Rakefile +6 -0
- data/lib/logstash/inputs/kafka.rb +153 -0
- data/logstash-input-kafka.gemspec +32 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/inputs/kafka.rb +57 -0
- metadata +107 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
!binary "U0hBMQ==":
|
|
3
|
+
metadata.gz: !binary |-
|
|
4
|
+
N2M1OTFjOGRmNTgyNWEyNjg3NTM0NDhkM2Y5NzNhNDUxYmQ5NjY1OQ==
|
|
5
|
+
data.tar.gz: !binary |-
|
|
6
|
+
NGYzOTRhZmYyMDExYzc1MTAxMGI1ODM3ODc4OWYwZGUyODI5NGY0Yw==
|
|
7
|
+
SHA512:
|
|
8
|
+
metadata.gz: !binary |-
|
|
9
|
+
OGJhZTZlYWZmOTc1OWYxYzg1YWVjNzYzMzJjMzQ0MTgyODczYjlhODUyZDhl
|
|
10
|
+
Zjc2YWY5NmY4Y2NmNmM5MDJjNTI2ZmU0ZGE2MTIwMGNhZTk1MmM4NGMwZTY4
|
|
11
|
+
MzQ4MjE2N2Q4NWExYmIxODY3MzE4ZDk4YjdkNDU5ZGY0MTU2NWI=
|
|
12
|
+
data.tar.gz: !binary |-
|
|
13
|
+
YzY1Mjg1NjM2MmUwMTc5MzNlNWQyODRiNmQzMTEyYTQ2MDU2N2ZlZmZmZTMx
|
|
14
|
+
ZGRiNjRlNmRjZWU3MjcxZmE3NDFiMzAzOTUxOTNkNTQyYTljNjNhZTFhNjY4
|
|
15
|
+
ZjhiNzVlMmIzZmNmMzZmNzQxZTY5ZTY3MjY3OTFjZmY3NWU0ODM=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
|
|
2
|
+
|
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
you may not use this file except in compliance with the License.
|
|
5
|
+
You may obtain a copy of the License at
|
|
6
|
+
|
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
|
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
See the License for the specific language governing permissions and
|
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
logstash-input-kafka
|
|
2
|
+
====================
|
|
3
|
+
|
|
4
|
+
Apache Kafka input for Logstash. This input will consume messages from a Kafka topic using the high level consumer API exposed by Kafka.
|
|
5
|
+
|
|
6
|
+
For more information about Kafka, refer to this [documentation](http://kafka.apache.org/documentation.html)
|
|
7
|
+
|
|
8
|
+
Information about high level consumer API can be found [here](http://kafka.apache.org/documentation.html#highlevelconsumerapi)
|
|
9
|
+
|
|
10
|
+
Logstash Configuration
|
|
11
|
+
====================
|
|
12
|
+
|
|
13
|
+
See http://kafka.apache.org/documentation.html#consumerconfigs for details about the Kafka consumer options.
|
|
14
|
+
|
|
15
|
+
input {
|
|
16
|
+
kafka {
|
|
17
|
+
topic_id => ... # string (required), The topic to consume messages from
|
|
18
|
+
zk_connect => ... # string (optional), default: "localhost:2181", Specifies the ZooKeeper connection string in the form hostname:port
|
|
19
|
+
group_id => ... # string (optional), default: "logstash", A string that uniquely identifies the group of consumer processes
|
|
20
|
+
reset_beginning => ... # boolean (optional), default: false, Specify whether to jump to beginning of the queue when there is no initial offset in ZK
|
|
21
|
+
consumer_threads => ... # number (optional), default: 1, Number of threads to read from the partitions
|
|
22
|
+
queue_size => ... # number (optional), default: 20, Internal Logstash queue size used to hold events in memory
|
|
23
|
+
rebalance_max_retries => ... # number (optional), default: 4
|
|
24
|
+
rebalance_backoff_ms => ... # number (optional), default: 2000
|
|
25
|
+
consumer_timeout_ms => ... # number (optional), default: -1
|
|
26
|
+
consumer_restart_on_error => ... # boolean (optional), default: true
|
|
27
|
+
consumer_restart_sleep_ms => ... # number (optional), default: 0
|
|
28
|
+
decorate_events => ... # boolean (optional), default: false, Option to add Kafka metadata like topic, message size to the event
|
|
29
|
+
consumer_id => ... # string (optional) default: nil
|
|
30
|
+
fetch_message_max_bytes => ... # number (optional) default: 1048576
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
The default codec is json
|
|
35
|
+
|
|
36
|
+
Dependencies
|
|
37
|
+
====================
|
|
38
|
+
|
|
39
|
+
* Apache Kafka version 0.8.1.1
|
|
40
|
+
* jruby-kafka library
|
data/Rakefile
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
require 'logstash/namespace'
|
|
2
|
+
require 'logstash/inputs/base'
|
|
3
|
+
require 'logstash-input-kafka_jars'
|
|
4
|
+
|
|
5
|
+
# This input will read events from a Kafka topic. It uses the high level consumer API provided
|
|
6
|
+
# by Kafka to read messages from the broker. It also maintains the state of what has been
|
|
7
|
+
# consumed using Zookeeper. The default input codec is json
|
|
8
|
+
#
|
|
9
|
+
# The only required configuration is the topic name. By default it will connect to a Zookeeper
|
|
10
|
+
# running on localhost. All the broker information is read from Zookeeper state
|
|
11
|
+
#
|
|
12
|
+
# Ideally you should have as many threads as the number of partitions for a perfect balance --
|
|
13
|
+
# more threads than partitions means that some threads will be idle
|
|
14
|
+
#
|
|
15
|
+
# For more information see http://kafka.apache.org/documentation.html#theconsumer
|
|
16
|
+
#
|
|
17
|
+
# Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs
|
|
18
|
+
#
|
|
19
|
+
class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
|
20
|
+
config_name 'kafka'
|
|
21
|
+
milestone 1
|
|
22
|
+
|
|
23
|
+
default :codec, 'json'
|
|
24
|
+
|
|
25
|
+
# Specifies the ZooKeeper connection string in the form hostname:port where host and port are
|
|
26
|
+
# the host and port of a ZooKeeper server. You can also specify multiple hosts in the form
|
|
27
|
+
# hostname1:port1,hostname2:port2,hostname3:port3.
|
|
28
|
+
#
|
|
29
|
+
# The server may also have a ZooKeeper chroot path as part of it's ZooKeeper connection string
|
|
30
|
+
# which puts its data under some path in the global ZooKeeper namespace. If so the consumer
|
|
31
|
+
# should use the same chroot path in its connection string. For example to give a chroot path of
|
|
32
|
+
# /chroot/path you would give the connection string as
|
|
33
|
+
# hostname1:port1,hostname2:port2,hostname3:port3/chroot/path.
|
|
34
|
+
config :zk_connect, :validate => :string, :default => 'localhost:2181'
|
|
35
|
+
# A string that uniquely identifies the group of consumer processes to which this consumer
|
|
36
|
+
# belongs. By setting the same group id multiple processes indicate that they are all part of
|
|
37
|
+
# the same consumer group.
|
|
38
|
+
config :group_id, :validate => :string, :default => 'logstash'
|
|
39
|
+
# The topic to consume messages from
|
|
40
|
+
config :topic_id, :validate => :string, :required => true
|
|
41
|
+
# Specify whether to jump to beginning of the queue when there is no initial offset in
|
|
42
|
+
# ZooKeeper, or if an offset is out of range. If this is false, messages are consumed
|
|
43
|
+
# from the latest offset
|
|
44
|
+
#
|
|
45
|
+
# If reset_beginning is true, the consumer will check ZooKeeper to see if any other group members
|
|
46
|
+
# are present and active. If not, the consumer deletes any offset information in the ZooKeeper
|
|
47
|
+
# and starts at the smallest offset. If other group members are present reset_beginning will not
|
|
48
|
+
# work and the consumer threads will rejoin the consumer group.
|
|
49
|
+
config :reset_beginning, :validate => :boolean, :default => false
|
|
50
|
+
# Number of threads to read from the partitions. Ideally you should have as many threads as the
|
|
51
|
+
# number of partitions for a perfect balance. More threads than partitions means that some
|
|
52
|
+
# threads will be idle. Less threads means a single thread could be consuming from more than
|
|
53
|
+
# one partition
|
|
54
|
+
config :consumer_threads, :validate => :number, :default => 1
|
|
55
|
+
# Internal Logstash queue size used to hold events in memory after it has been read from Kafka
|
|
56
|
+
config :queue_size, :validate => :number, :default => 20
|
|
57
|
+
# When a new consumer joins a consumer group the set of consumers attempt to "rebalance" the
|
|
58
|
+
# load to assign partitions to each consumer. If the set of consumers changes while this
|
|
59
|
+
# assignment is taking place the rebalance will fail and retry. This setting controls the
|
|
60
|
+
# maximum number of attempts before giving up.
|
|
61
|
+
config :rebalance_max_retries, :validate => :number, :default => 4
|
|
62
|
+
# Backoff time between retries during rebalance.
|
|
63
|
+
config :rebalance_backoff_ms, :validate => :number, :default => 2000
|
|
64
|
+
# Throw a timeout exception to the consumer if no message is available for consumption after
|
|
65
|
+
# the specified interval
|
|
66
|
+
config :consumer_timeout_ms, :validate => :number, :default => -1
|
|
67
|
+
# Option to restart the consumer loop on error
|
|
68
|
+
config :consumer_restart_on_error, :validate => :boolean, :default => true
|
|
69
|
+
# Time in millis to wait for consumer to restart after an error
|
|
70
|
+
config :consumer_restart_sleep_ms, :validate => :number, :default => 0
|
|
71
|
+
# Option to add Kafka metadata like topic, message size to the event
|
|
72
|
+
config :decorate_events, :validate => :boolean, :default => false
|
|
73
|
+
# A unique id for the consumer; generated automatically if not set.
|
|
74
|
+
config :consumer_id, :validate => :string, :default => nil
|
|
75
|
+
# The number of byes of messages to attempt to fetch for each topic-partition in each fetch
|
|
76
|
+
# request. These bytes will be read into memory for each partition, so this helps control
|
|
77
|
+
# the memory used by the consumer. The fetch request size must be at least as large as the
|
|
78
|
+
# maximum message size the server allows or else it is possible for the producer to send
|
|
79
|
+
# messages larger than the consumer can fetch.
|
|
80
|
+
config :fetch_message_max_bytes, :validate => :number, :default => 1048576
|
|
81
|
+
|
|
82
|
+
public
|
|
83
|
+
def register
|
|
84
|
+
require 'jruby-kafka'
|
|
85
|
+
options = {
|
|
86
|
+
:zk_connect => @zk_connect,
|
|
87
|
+
:group_id => @group_id,
|
|
88
|
+
:topic_id => @topic_id,
|
|
89
|
+
:rebalance_max_retries => @rebalance_max_retries,
|
|
90
|
+
:rebalance_backoff_ms => @rebalance_backoff_ms,
|
|
91
|
+
:consumer_timeout_ms => @consumer_timeout_ms,
|
|
92
|
+
:consumer_restart_on_error => @consumer_restart_on_error,
|
|
93
|
+
:consumer_restart_sleep_ms => @consumer_restart_sleep_ms,
|
|
94
|
+
:consumer_id => @consumer_id,
|
|
95
|
+
:fetch_message_max_bytes => @fetch_message_max_bytes
|
|
96
|
+
}
|
|
97
|
+
if @reset_beginning
|
|
98
|
+
options[:reset_beginning] = 'from-beginning'
|
|
99
|
+
end # if :reset_beginning
|
|
100
|
+
@kafka_client_queue = SizedQueue.new(@queue_size)
|
|
101
|
+
@consumer_group = Kafka::Group.new(options)
|
|
102
|
+
@logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
|
|
103
|
+
end # def register
|
|
104
|
+
|
|
105
|
+
public
|
|
106
|
+
def run(logstash_queue)
|
|
107
|
+
# noinspection JRubyStringImportInspection
|
|
108
|
+
java_import 'kafka.common.ConsumerRebalanceFailedException'
|
|
109
|
+
@logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
|
|
110
|
+
begin
|
|
111
|
+
@consumer_group.run(@consumer_threads,@kafka_client_queue)
|
|
112
|
+
begin
|
|
113
|
+
while true
|
|
114
|
+
event = @kafka_client_queue.pop
|
|
115
|
+
queue_event("#{event}",logstash_queue)
|
|
116
|
+
end
|
|
117
|
+
rescue LogStash::ShutdownSignal
|
|
118
|
+
@logger.info('Kafka got shutdown signal')
|
|
119
|
+
@consumer_group.shutdown
|
|
120
|
+
end
|
|
121
|
+
until @kafka_client_queue.empty?
|
|
122
|
+
queue_event("#{@kafka_client_queue.pop}",logstash_queue)
|
|
123
|
+
end
|
|
124
|
+
@logger.info('Done running kafka input')
|
|
125
|
+
rescue => e
|
|
126
|
+
@logger.warn('kafka client threw exception, restarting',
|
|
127
|
+
:exception => e)
|
|
128
|
+
if @consumer_group.running?
|
|
129
|
+
@consumer_group.shutdown
|
|
130
|
+
end
|
|
131
|
+
sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000)
|
|
132
|
+
retry
|
|
133
|
+
end
|
|
134
|
+
finished
|
|
135
|
+
end # def run
|
|
136
|
+
|
|
137
|
+
private
|
|
138
|
+
def queue_event(msg, output_queue)
|
|
139
|
+
begin
|
|
140
|
+
@codec.decode(msg) do |event|
|
|
141
|
+
decorate(event)
|
|
142
|
+
if @decorate_events
|
|
143
|
+
event['kafka'] = {:msg_size => msg.bytesize, :topic => @topic_id, :consumer_group => @group_id}
|
|
144
|
+
end
|
|
145
|
+
output_queue << event
|
|
146
|
+
end # @codec.decode
|
|
147
|
+
rescue => e # parse or event creation error
|
|
148
|
+
@logger.error('Failed to create event', :message => msg, :exception => e,
|
|
149
|
+
:backtrace => e.backtrace)
|
|
150
|
+
end # begin
|
|
151
|
+
end # def queue_event
|
|
152
|
+
|
|
153
|
+
end #class LogStash::Inputs::Kafka
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
Gem::Specification.new do |s|
|
|
2
|
+
|
|
3
|
+
s.name = 'logstash-input-kafka'
|
|
4
|
+
s.version = '0.1.0'
|
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
|
6
|
+
s.summary = 'This input will read events from a Kafka topic. It uses the high level consumer API provided by Kafka to read messages from the broker'
|
|
7
|
+
s.description = 'This input will read events from a Kafka topic. It uses the high level consumer API provided by Kafka to read messages from the broker'
|
|
8
|
+
s.authors = ['Elasticsearch']
|
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
|
10
|
+
s.homepage = 'http://logstash.net/'
|
|
11
|
+
s.require_paths = ['lib']
|
|
12
|
+
|
|
13
|
+
# Files
|
|
14
|
+
s.files = `git ls-files`.split($\)
|
|
15
|
+
|
|
16
|
+
# Tests
|
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
|
18
|
+
|
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
|
20
|
+
s.metadata = { 'logstash_plugin' => 'true', 'group' => 'input'}
|
|
21
|
+
|
|
22
|
+
# Jar dependencies
|
|
23
|
+
s.requirements << "jar 'org.apache.kafka:kafka_2.10', '0.8.1.1'"
|
|
24
|
+
|
|
25
|
+
# Gem dependencies
|
|
26
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
|
27
|
+
s.add_runtime_dependency 'jar-dependencies', ['~> 0.1.0']
|
|
28
|
+
|
|
29
|
+
s.add_runtime_dependency 'jruby-kafka', ['>=0.2.1']
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
require "gem_publisher"
|
|
2
|
+
|
|
3
|
+
desc "Publish gem to RubyGems.org"
|
|
4
|
+
task :publish_gem do |t|
|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
|
7
|
+
puts "Published #{gem}" if gem
|
|
8
|
+
end
|
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
require "net/http"
|
|
2
|
+
require "uri"
|
|
3
|
+
require "digest/sha1"
|
|
4
|
+
|
|
5
|
+
def vendor(*args)
|
|
6
|
+
return File.join("vendor", *args)
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
|
10
|
+
mkdir task.name
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def fetch(url, sha1, output)
|
|
14
|
+
|
|
15
|
+
puts "Downloading #{url}"
|
|
16
|
+
actual_sha1 = download(url, output)
|
|
17
|
+
|
|
18
|
+
if actual_sha1 != sha1
|
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
|
20
|
+
end
|
|
21
|
+
end # def fetch
|
|
22
|
+
|
|
23
|
+
def file_fetch(url, sha1)
|
|
24
|
+
filename = File.basename( URI(url).path )
|
|
25
|
+
output = "vendor/#{filename}"
|
|
26
|
+
task output => [ "vendor/" ] do
|
|
27
|
+
begin
|
|
28
|
+
actual_sha1 = file_sha1(output)
|
|
29
|
+
if actual_sha1 != sha1
|
|
30
|
+
fetch(url, sha1, output)
|
|
31
|
+
end
|
|
32
|
+
rescue Errno::ENOENT
|
|
33
|
+
fetch(url, sha1, output)
|
|
34
|
+
end
|
|
35
|
+
end.invoke
|
|
36
|
+
|
|
37
|
+
return output
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def file_sha1(path)
|
|
41
|
+
digest = Digest::SHA1.new
|
|
42
|
+
fd = File.new(path, "r")
|
|
43
|
+
while true
|
|
44
|
+
begin
|
|
45
|
+
digest << fd.sysread(16384)
|
|
46
|
+
rescue EOFError
|
|
47
|
+
break
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
return digest.hexdigest
|
|
51
|
+
ensure
|
|
52
|
+
fd.close if fd
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def download(url, output)
|
|
56
|
+
uri = URI(url)
|
|
57
|
+
digest = Digest::SHA1.new
|
|
58
|
+
tmp = "#{output}.tmp"
|
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
|
61
|
+
http.request(request) do |response|
|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
|
64
|
+
count = 0
|
|
65
|
+
File.open(tmp, "w") do |fd|
|
|
66
|
+
response.read_body do |chunk|
|
|
67
|
+
fd.write(chunk)
|
|
68
|
+
digest << chunk
|
|
69
|
+
if size > 0 && $stdout.tty?
|
|
70
|
+
count += chunk.bytesize
|
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
File.rename(tmp, output)
|
|
80
|
+
|
|
81
|
+
return digest.hexdigest
|
|
82
|
+
rescue SocketError => e
|
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
|
84
|
+
raise
|
|
85
|
+
ensure
|
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
|
87
|
+
end # def download
|
|
88
|
+
|
|
89
|
+
def untar(tarball, &block)
|
|
90
|
+
require "archive/tar/minitar"
|
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
|
92
|
+
# Pull out typesdb
|
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
|
94
|
+
tar.each do |entry|
|
|
95
|
+
path = block.call(entry)
|
|
96
|
+
next if path.nil?
|
|
97
|
+
parent = File.dirname(path)
|
|
98
|
+
|
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
|
100
|
+
|
|
101
|
+
# Skip this file if the output file is the same size
|
|
102
|
+
if entry.directory?
|
|
103
|
+
mkdir path unless File.directory?(path)
|
|
104
|
+
else
|
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
|
106
|
+
if File.exists?(path)
|
|
107
|
+
stat = File.stat(path)
|
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
|
109
|
+
# expose headers in the entry.
|
|
110
|
+
entry_size = entry.instance_eval { @size }
|
|
111
|
+
# If file sizes are same, skip writing.
|
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
|
113
|
+
end
|
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
|
115
|
+
File.open(path, "w") do |fd|
|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
|
120
|
+
# TODO(sissel): File a bug about this.
|
|
121
|
+
while !entry.eof?
|
|
122
|
+
chunk = entry.read(16384)
|
|
123
|
+
fd.write(chunk)
|
|
124
|
+
end
|
|
125
|
+
#IO.copy_stream(entry, fd)
|
|
126
|
+
end
|
|
127
|
+
File.chmod(entry_mode, path)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
tar.close
|
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
|
132
|
+
end # def untar
|
|
133
|
+
|
|
134
|
+
def ungz(file)
|
|
135
|
+
|
|
136
|
+
outpath = file.gsub('.gz', '')
|
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
|
138
|
+
begin
|
|
139
|
+
File.open(outpath, "w") do |out|
|
|
140
|
+
IO::copy_stream(tgz, out)
|
|
141
|
+
end
|
|
142
|
+
File.unlink(file)
|
|
143
|
+
rescue
|
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
|
145
|
+
raise
|
|
146
|
+
end
|
|
147
|
+
tgz.close
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
desc "Process any vendor files required for this plugin"
|
|
151
|
+
task "vendor" do |task, args|
|
|
152
|
+
|
|
153
|
+
@files.each do |file|
|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
|
155
|
+
if download =~ /.tar.gz/
|
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
|
157
|
+
untar(download) do |entry|
|
|
158
|
+
if !file['files'].nil?
|
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
|
160
|
+
out = entry.full_name.split("/").last
|
|
161
|
+
end
|
|
162
|
+
File.join('vendor', out)
|
|
163
|
+
end
|
|
164
|
+
elsif download =~ /.gz/
|
|
165
|
+
ungz(download)
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'rspec'
|
|
4
|
+
require 'insist'
|
|
5
|
+
require 'logstash/namespace'
|
|
6
|
+
require 'logstash/inputs/kafka'
|
|
7
|
+
require 'logstash/errors'
|
|
8
|
+
|
|
9
|
+
describe LogStash::Inputs::Kafka do
|
|
10
|
+
extend LogStash::RSpec
|
|
11
|
+
|
|
12
|
+
let (:kafka_config) {{:topic_id => 'test'}}
|
|
13
|
+
|
|
14
|
+
it 'should populate kafka config with default values' do
|
|
15
|
+
kafka = LogStash::Inputs::Kafka.new(kafka_config)
|
|
16
|
+
insist {kafka.zk_connect} == 'localhost:2181'
|
|
17
|
+
insist {kafka.topic_id} == 'test'
|
|
18
|
+
insist {kafka.group_id} == 'logstash'
|
|
19
|
+
!insist { kafka.reset_beginning }
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
it 'should register and load kafka jars without errors' do
|
|
23
|
+
kafka = LogStash::Inputs::Kafka.new(kafka_config)
|
|
24
|
+
kafka.register
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
it 'should retrieve event from kafka' do
|
|
28
|
+
# Extend class to control behavior
|
|
29
|
+
class LogStash::Inputs::TestKafka < LogStash::Inputs::Kafka
|
|
30
|
+
milestone 1
|
|
31
|
+
private
|
|
32
|
+
def queue_event(msg, output_queue)
|
|
33
|
+
super(msg, output_queue)
|
|
34
|
+
# need to raise exception here to stop the infinite loop
|
|
35
|
+
raise LogStash::ShutdownSignal
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
kafka = LogStash::Inputs::TestKafka.new(kafka_config)
|
|
40
|
+
kafka.register
|
|
41
|
+
|
|
42
|
+
class Kafka::Group
|
|
43
|
+
public
|
|
44
|
+
def run(a_num_threads, a_queue)
|
|
45
|
+
a_queue << 'Kafka message'
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
logstash_queue = Queue.new
|
|
50
|
+
kafka.run logstash_queue
|
|
51
|
+
e = logstash_queue.pop
|
|
52
|
+
insist { e['message'] } == 'Kafka message'
|
|
53
|
+
# no metadata by default
|
|
54
|
+
insist { e['kafka'] } == nil
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: logstash-input-kafka
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Elasticsearch
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2014-11-05 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: logstash
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - ! '>='
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: 1.4.0
|
|
20
|
+
- - <
|
|
21
|
+
- !ruby/object:Gem::Version
|
|
22
|
+
version: 2.0.0
|
|
23
|
+
type: :runtime
|
|
24
|
+
prerelease: false
|
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
26
|
+
requirements:
|
|
27
|
+
- - ! '>='
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: 1.4.0
|
|
30
|
+
- - <
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 2.0.0
|
|
33
|
+
- !ruby/object:Gem::Dependency
|
|
34
|
+
name: jar-dependencies
|
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ~>
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 0.1.0
|
|
40
|
+
type: :runtime
|
|
41
|
+
prerelease: false
|
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ~>
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 0.1.0
|
|
47
|
+
- !ruby/object:Gem::Dependency
|
|
48
|
+
name: jruby-kafka
|
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ! '>='
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 0.2.1
|
|
54
|
+
type: :runtime
|
|
55
|
+
prerelease: false
|
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ! '>='
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: 0.2.1
|
|
61
|
+
description: This input will read events from a Kafka topic. It uses the high level
|
|
62
|
+
consumer API provided by Kafka to read messages from the broker
|
|
63
|
+
email: richard.pijnenburg@elasticsearch.com
|
|
64
|
+
executables: []
|
|
65
|
+
extensions: []
|
|
66
|
+
extra_rdoc_files: []
|
|
67
|
+
files:
|
|
68
|
+
- .gitignore
|
|
69
|
+
- Gemfile
|
|
70
|
+
- LICENSE
|
|
71
|
+
- README.md
|
|
72
|
+
- Rakefile
|
|
73
|
+
- lib/logstash/inputs/kafka.rb
|
|
74
|
+
- logstash-input-kafka.gemspec
|
|
75
|
+
- rakelib/publish.rake
|
|
76
|
+
- rakelib/vendor.rake
|
|
77
|
+
- spec/inputs/kafka.rb
|
|
78
|
+
homepage: http://logstash.net/
|
|
79
|
+
licenses:
|
|
80
|
+
- Apache License (2.0)
|
|
81
|
+
metadata:
|
|
82
|
+
logstash_plugin: 'true'
|
|
83
|
+
group: input
|
|
84
|
+
post_install_message:
|
|
85
|
+
rdoc_options: []
|
|
86
|
+
require_paths:
|
|
87
|
+
- lib
|
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
89
|
+
requirements:
|
|
90
|
+
- - ! '>='
|
|
91
|
+
- !ruby/object:Gem::Version
|
|
92
|
+
version: '0'
|
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
|
+
requirements:
|
|
95
|
+
- - ! '>='
|
|
96
|
+
- !ruby/object:Gem::Version
|
|
97
|
+
version: '0'
|
|
98
|
+
requirements:
|
|
99
|
+
- jar 'org.apache.kafka:kafka_2.10', '0.8.1.1'
|
|
100
|
+
rubyforge_project:
|
|
101
|
+
rubygems_version: 2.4.1
|
|
102
|
+
signing_key:
|
|
103
|
+
specification_version: 4
|
|
104
|
+
summary: This input will read events from a Kafka topic. It uses the high level consumer
|
|
105
|
+
API provided by Kafka to read messages from the broker
|
|
106
|
+
test_files:
|
|
107
|
+
- spec/inputs/kafka.rb
|