logstash-kafka 0.7.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/logstash/inputs/kafka.rb +152 -0
- data/lib/logstash/outputs/kafka.rb +159 -0
- metadata +60 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 00adb861cf25fabd675f94d437b0e4dc36a7fbad
|
4
|
+
data.tar.gz: ce5d97a0e3b9081bb60f595364da29125e2b6a5d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1b0675791f6af79c8cc64371787cb322ba108bdc2d431cf55bf49a543b92a8d4cab09efeb593803b50b05d7dd88f6f3404d76f3207de665c816d5b061183abd2
|
7
|
+
data.tar.gz: 919df956e078237ee9f01de276227b5f61ab454faf830e794962c76fc2132ce0cd928137aeaa648ef36e4fc8e2e9b66a3e893101cde6f150595ac017d401edec
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'logstash/namespace'
|
2
|
+
require 'logstash/inputs/base'
|
3
|
+
require 'jruby-kafka'
|
4
|
+
|
5
|
+
# This input will read events from a Kafka topic. It uses the high level consumer API provided
|
6
|
+
# by Kafka to read messages from the broker. It also maintains the state of what has been
|
7
|
+
# consumed using Zookeeper. The default input codec is json
|
8
|
+
#
|
9
|
+
# The only required configuration is the topic name. By default it will connect to a Zookeeper
|
10
|
+
# running on localhost. All the broker information is read from Zookeeper state
|
11
|
+
#
|
12
|
+
# Ideally you should have as many threads as the number of partitions for a perfect balance --
|
13
|
+
# more threads than partitions means that some threads will be idle
|
14
|
+
#
|
15
|
+
# For more information see http://kafka.apache.org/documentation.html#theconsumer
|
16
|
+
#
|
17
|
+
# Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs
|
18
|
+
#
|
19
|
+
class LogStash::Inputs::Kafka < LogStash::Inputs::Base
|
20
|
+
config_name 'kafka'
|
21
|
+
milestone 1
|
22
|
+
|
23
|
+
default :codec, 'json'
|
24
|
+
|
25
|
+
# Specifies the ZooKeeper connection string in the form hostname:port where host and port are
|
26
|
+
# the host and port of a ZooKeeper server. You can also specify multiple hosts in the form
|
27
|
+
# hostname1:port1,hostname2:port2,hostname3:port3.
|
28
|
+
#
|
29
|
+
# The server may also have a ZooKeeper chroot path as part of it's ZooKeeper connection string
|
30
|
+
# which puts its data under some path in the global ZooKeeper namespace. If so the consumer
|
31
|
+
# should use the same chroot path in its connection string. For example to give a chroot path of
|
32
|
+
# /chroot/path you would give the connection string as
|
33
|
+
# hostname1:port1,hostname2:port2,hostname3:port3/chroot/path.
|
34
|
+
config :zk_connect, :validate => :string, :default => 'localhost:2181'
|
35
|
+
# A string that uniquely identifies the group of consumer processes to which this consumer
|
36
|
+
# belongs. By setting the same group id multiple processes indicate that they are all part of
|
37
|
+
# the same consumer group.
|
38
|
+
config :group_id, :validate => :string, :default => 'logstash'
|
39
|
+
# The topic to consume messages from
|
40
|
+
config :topic_id, :validate => :string, :required => true
|
41
|
+
# Specify whether to jump to beginning of the queue when there is no initial offset in
|
42
|
+
# ZooKeeper, or if an offset is out of range. If this is false, messages are consumed
|
43
|
+
# from the latest offset
|
44
|
+
#
|
45
|
+
# If reset_beginning is true, the consumer will check ZooKeeper to see if any other group members
|
46
|
+
# are present and active. If not, the consumer deletes any offset information in the ZooKeeper
|
47
|
+
# and starts at the smallest offset. If other group members are present reset_beginning will not
|
48
|
+
# work and the consumer threads will rejoin the consumer group.
|
49
|
+
config :reset_beginning, :validate => :boolean, :default => false
|
50
|
+
# Number of threads to read from the partitions. Ideally you should have as many threads as the
|
51
|
+
# number of partitions for a perfect balance. More threads than partitions means that some
|
52
|
+
# threads will be idle. Less threads means a single thread could be consuming from more than
|
53
|
+
# one partition
|
54
|
+
config :consumer_threads, :validate => :number, :default => 1
|
55
|
+
# Internal Logstash queue size used to hold events in memory after it has been read from Kafka
|
56
|
+
config :queue_size, :validate => :number, :default => 20
|
57
|
+
# When a new consumer joins a consumer group the set of consumers attempt to "rebalance" the
|
58
|
+
# load to assign partitions to each consumer. If the set of consumers changes while this
|
59
|
+
# assignment is taking place the rebalance will fail and retry. This setting controls the
|
60
|
+
# maximum number of attempts before giving up.
|
61
|
+
config :rebalance_max_retries, :validate => :number, :default => 4
|
62
|
+
# Backoff time between retries during rebalance.
|
63
|
+
config :rebalance_backoff_ms, :validate => :number, :default => 2000
|
64
|
+
# Throw a timeout exception to the consumer if no message is available for consumption after
|
65
|
+
# the specified interval
|
66
|
+
config :consumer_timeout_ms, :validate => :number, :default => -1
|
67
|
+
# Option to restart the consumer loop on error
|
68
|
+
config :consumer_restart_on_error, :validate => :boolean, :default => true
|
69
|
+
# Time in millis to wait for consumer to restart after an error
|
70
|
+
config :consumer_restart_sleep_ms, :validate => :number, :default => 0
|
71
|
+
# Option to add Kafka metadata like topic, message size to the event
|
72
|
+
config :decorate_events, :validate => :boolean, :default => false
|
73
|
+
# A unique id for the consumer; generated automatically if not set.
|
74
|
+
config :consumer_id, :validate => :string, :default => nil
|
75
|
+
# The number of byes of messages to attempt to fetch for each topic-partition in each fetch
|
76
|
+
# request. These bytes will be read into memory for each partition, so this helps control
|
77
|
+
# the memory used by the consumer. The fetch request size must be at least as large as the
|
78
|
+
# maximum message size the server allows or else it is possible for the producer to send
|
79
|
+
# messages larger than the consumer can fetch.
|
80
|
+
config :fetch_message_max_bytes, :validate => :number, :default => 1048576
|
81
|
+
|
82
|
+
public
|
83
|
+
def register
|
84
|
+
LogStash::Logger.setup_log4j(@logger)
|
85
|
+
options = {
|
86
|
+
:zk_connect => @zk_connect,
|
87
|
+
:group_id => @group_id,
|
88
|
+
:topic_id => @topic_id,
|
89
|
+
:rebalance_max_retries => @rebalance_max_retries,
|
90
|
+
:rebalance_backoff_ms => @rebalance_backoff_ms,
|
91
|
+
:consumer_timeout_ms => @consumer_timeout_ms,
|
92
|
+
:consumer_restart_on_error => @consumer_restart_on_error,
|
93
|
+
:consumer_restart_sleep_ms => @consumer_restart_sleep_ms,
|
94
|
+
:consumer_id => @consumer_id,
|
95
|
+
:fetch_message_max_bytes => @fetch_message_max_bytes
|
96
|
+
}
|
97
|
+
if @reset_beginning
|
98
|
+
options[:reset_beginning] = 'from-beginning'
|
99
|
+
end # if :reset_beginning
|
100
|
+
@kafka_client_queue = SizedQueue.new(@queue_size)
|
101
|
+
@consumer_group = Kafka::Group.new(options)
|
102
|
+
@logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
|
103
|
+
end # def register
|
104
|
+
|
105
|
+
public
|
106
|
+
def run(logstash_queue)
|
107
|
+
java_import 'kafka.common.ConsumerRebalanceFailedException'
|
108
|
+
@logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
|
109
|
+
begin
|
110
|
+
@consumer_group.run(@consumer_threads,@kafka_client_queue)
|
111
|
+
begin
|
112
|
+
while true
|
113
|
+
event = @kafka_client_queue.pop
|
114
|
+
queue_event("#{event}",logstash_queue)
|
115
|
+
end
|
116
|
+
rescue LogStash::ShutdownSignal
|
117
|
+
@logger.info('Kafka got shutdown signal')
|
118
|
+
@consumer_group.shutdown
|
119
|
+
end
|
120
|
+
until @kafka_client_queue.empty?
|
121
|
+
queue_event("#{@kafka_client_queue.pop}",logstash_queue)
|
122
|
+
end
|
123
|
+
@logger.info('Done running kafka input')
|
124
|
+
rescue => e
|
125
|
+
@logger.warn('kafka client threw exception, restarting',
|
126
|
+
:exception => e)
|
127
|
+
if @consumer_group.running?
|
128
|
+
@consumer_group.shutdown
|
129
|
+
end
|
130
|
+
sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000)
|
131
|
+
retry
|
132
|
+
end
|
133
|
+
finished
|
134
|
+
end # def run
|
135
|
+
|
136
|
+
private
|
137
|
+
def queue_event(msg, output_queue)
|
138
|
+
begin
|
139
|
+
@codec.decode(msg) do |event|
|
140
|
+
decorate(event)
|
141
|
+
if @decorate_events
|
142
|
+
event['kafka'] = {'msg_size' => msg.bytesize, 'topic' => @topic_id, 'consumer_group' => @group_id}
|
143
|
+
end
|
144
|
+
output_queue << event
|
145
|
+
end # @codec.decode
|
146
|
+
rescue => e # parse or event creation error
|
147
|
+
@logger.error('Failed to create event', :message => msg, :exception => e,
|
148
|
+
:backtrace => e.backtrace)
|
149
|
+
end # begin
|
150
|
+
end # def queue_event
|
151
|
+
|
152
|
+
end #class LogStash::Inputs::Kafka
|
@@ -0,0 +1,159 @@
|
|
1
|
+
require 'logstash/namespace'
|
2
|
+
require 'logstash/outputs/base'
|
3
|
+
require 'jruby-kafka'
|
4
|
+
|
5
|
+
# Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
|
6
|
+
# the broker.
|
7
|
+
#
|
8
|
+
# The only required configuration is the topic name. The default codec is json,
|
9
|
+
# so events will be persisted on the broker in json format. If you select a codec of plain,
|
10
|
+
# Logstash will encode your messages with not only the message but also with a timestamp and
|
11
|
+
# hostname. If you do not want anything but your message passing through, you should make the output
|
12
|
+
# configuration something like:
|
13
|
+
# output {
|
14
|
+
# kafka {
|
15
|
+
# codec => plain {
|
16
|
+
# format => "%{message}"
|
17
|
+
# }
|
18
|
+
# }
|
19
|
+
# }
|
20
|
+
# For more information see http://kafka.apache.org/documentation.html#theproducer
|
21
|
+
#
|
22
|
+
# Kafka producer configuration: http://kafka.apache.org/documentation.html#producerconfigs
|
23
|
+
class LogStash::Outputs::Kafka < LogStash::Outputs::Base
|
24
|
+
config_name 'kafka'
|
25
|
+
milestone 1
|
26
|
+
|
27
|
+
default :codec, 'json'
|
28
|
+
# This is for bootstrapping and the producer will only use it for getting metadata (topics,
|
29
|
+
# partitions and replicas). The socket connections for sending the actual data will be
|
30
|
+
# established based on the broker information returned in the metadata. The format is
|
31
|
+
# host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a
|
32
|
+
# subset of brokers.
|
33
|
+
config :broker_list, :validate => :string, :default => 'localhost:9092'
|
34
|
+
# The topic to produce the messages to
|
35
|
+
config :topic_id, :validate => :string, :required => true
|
36
|
+
# This parameter allows you to specify the compression codec for all data generated by this
|
37
|
+
# producer. Valid values are "none", "gzip" and "snappy".
|
38
|
+
config :compression_codec, :validate => %w( none gzip snappy ), :default => 'none'
|
39
|
+
# This parameter allows you to set whether compression should be turned on for particular
|
40
|
+
# topics. If the compression codec is anything other than NoCompressionCodec,
|
41
|
+
# enable compression only for specified topics if any. If the list of compressed topics is
|
42
|
+
# empty, then enable the specified compression codec for all topics. If the compression codec
|
43
|
+
# is NoCompressionCodec, compression is disabled for all topics
|
44
|
+
config :compressed_topics, :validate => :string, :default => ''
|
45
|
+
# This value controls when a produce request is considered completed. Specifically,
|
46
|
+
# how many other brokers must have committed the data to their log and acknowledged this to the
|
47
|
+
# leader. For more info, see -- http://kafka.apache.org/documentation.html#producerconfigs
|
48
|
+
config :request_required_acks, :validate => [-1,0,1], :default => 0
|
49
|
+
# The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
|
50
|
+
config :serializer_class, :validate => :string, :default => 'kafka.serializer.StringEncoder'
|
51
|
+
# The partitioner class for partitioning messages amongst partitions in the topic. The default
|
52
|
+
# partitioner is based on the hash of the key. If the key is null,
|
53
|
+
# the message is sent to a random partition in the broker.
|
54
|
+
# NOTE: topic_metadata_refresh_interval_ms controls how long the producer will distribute to a
|
55
|
+
# partition in the topic. This defaults to 10 mins, so the producer will continue to write to a
|
56
|
+
# single partition for 10 mins before it switches
|
57
|
+
config :partitioner_class, :validate => :string, :default => 'kafka.producer.DefaultPartitioner'
|
58
|
+
# The amount of time the broker will wait trying to meet the request.required.acks requirement
|
59
|
+
# before sending back an error to the client.
|
60
|
+
config :request_timeout_ms, :validate => :number, :default => 10000
|
61
|
+
# This parameter specifies whether the messages are sent asynchronously in a background thread.
|
62
|
+
# Valid values are (1) async for asynchronous send and (2) sync for synchronous send. By
|
63
|
+
# setting the producer to async we allow batching together of requests (which is great for
|
64
|
+
# throughput) but open the possibility of a failure of the client machine dropping unsent data.
|
65
|
+
config :producer_type, :validate => %w( sync async ), :default => 'sync'
|
66
|
+
# The serializer class for keys (defaults to the same as for messages if nothing is given)
|
67
|
+
config :key_serializer_class, :validate => :string, :default => nil
|
68
|
+
# This property will cause the producer to automatically retry a failed send request. This
|
69
|
+
# property specifies the number of retries when such failures occur. Note that setting a
|
70
|
+
# non-zero value here can lead to duplicates in the case of network errors that cause a message
|
71
|
+
# to be sent but the acknowledgement to be lost.
|
72
|
+
config :message_send_max_retries, :validate => :number, :default => 3
|
73
|
+
# Before each retry, the producer refreshes the metadata of relevant topics to see if a new
|
74
|
+
# leader has been elected. Since leader election takes a bit of time,
|
75
|
+
# this property specifies the amount of time that the producer waits before refreshing the
|
76
|
+
# metadata.
|
77
|
+
config :retry_backoff_ms, :validate => :number, :default => 100
|
78
|
+
# The producer generally refreshes the topic metadata from brokers when there is a failure
|
79
|
+
# (partition missing, leader not available...). It will also poll regularly (default: every
|
80
|
+
# 10min so 600000ms). If you set this to a negative value, metadata will only get refreshed on
|
81
|
+
# failure. If you set this to zero, the metadata will get refreshed after each message sent
|
82
|
+
# (not recommended). Important note: the refresh happen only AFTER the message is sent,
|
83
|
+
# so if the producer never sends a message the metadata is never refreshed
|
84
|
+
config :topic_metadata_refresh_interval_ms, :validate => :number, :default => 600 * 1000
|
85
|
+
# Maximum time to buffer data when using async mode. For example a setting of 100 will try to
|
86
|
+
# batch together 100ms of messages to send at once. This will improve throughput but adds
|
87
|
+
# message delivery latency due to the buffering.
|
88
|
+
config :queue_buffering_max_ms, :validate => :number, :default => 5000
|
89
|
+
# The maximum number of unsent messages that can be queued up the producer when using async
|
90
|
+
# mode before either the producer must be blocked or data must be dropped.
|
91
|
+
config :queue_buffering_max_messages, :validate => :number, :default => 10000
|
92
|
+
# The amount of time to block before dropping messages when running in async mode and the
|
93
|
+
# buffer has reached queue.buffering.max.messages. If set to 0 events will be enqueued
|
94
|
+
# immediately or dropped if the queue is full (the producer send call will never block). If set
|
95
|
+
# to -1 the producer will block indefinitely and never willingly drop a send.
|
96
|
+
config :queue_enqueue_timeout_ms, :validate => :number, :default => -1
|
97
|
+
# The number of messages to send in one batch when using async mode. The producer will wait
|
98
|
+
# until either this number of messages are ready to send or queue.buffer.max.ms is reached.
|
99
|
+
config :batch_num_messages, :validate => :number, :default => 200
|
100
|
+
# Socket write buffer size
|
101
|
+
config :send_buffer_bytes, :validate => :number, :default => 100 * 1024
|
102
|
+
# The client id is a user-specified string sent in each request to help trace calls. It should
|
103
|
+
# logically identify the application making the request.
|
104
|
+
config :client_id, :validate => :string, :default => ''
|
105
|
+
|
106
|
+
public
|
107
|
+
def register
|
108
|
+
LogStash::Logger.setup_log4j(@logger)
|
109
|
+
options = {
|
110
|
+
:broker_list => @broker_list,
|
111
|
+
:compression_codec => @compression_codec,
|
112
|
+
:compressed_topics => @compressed_topics,
|
113
|
+
:request_required_acks => @request_required_acks,
|
114
|
+
:serializer_class => @serializer_class,
|
115
|
+
:partitioner_class => @partitioner_class,
|
116
|
+
:request_timeout_ms => @request_timeout_ms,
|
117
|
+
:producer_type => @producer_type,
|
118
|
+
:key_serializer_class => @key_serializer_class,
|
119
|
+
:message_send_max_retries => @message_send_max_retries,
|
120
|
+
:retry_backoff_ms => @retry_backoff_ms,
|
121
|
+
:topic_metadata_refresh_interval_ms => @topic_metadata_refresh_interval_ms,
|
122
|
+
:queue_buffering_max_ms => @queue_buffering_max_ms,
|
123
|
+
:queue_buffering_max_messages => @queue_buffering_max_messages,
|
124
|
+
:queue_enqueue_timeout_ms => @queue_enqueue_timeout_ms,
|
125
|
+
:batch_num_messages => @batch_num_messages,
|
126
|
+
:send_buffer_bytes => @send_buffer_bytes,
|
127
|
+
:client_id => @client_id
|
128
|
+
}
|
129
|
+
@producer = Kafka::Producer.new(options)
|
130
|
+
@producer.connect
|
131
|
+
|
132
|
+
@logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list)
|
133
|
+
|
134
|
+
@codec.on_event do |event|
|
135
|
+
begin
|
136
|
+
@producer.send_msg(@topic_id,nil,event)
|
137
|
+
rescue LogStash::ShutdownSignal
|
138
|
+
@logger.info('Kafka producer got shutdown signal')
|
139
|
+
rescue => e
|
140
|
+
@logger.warn('kafka producer threw exception, restarting',
|
141
|
+
:exception => e)
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end # def register
|
145
|
+
|
146
|
+
def receive(event)
|
147
|
+
return unless output?(event)
|
148
|
+
if event == LogStash::SHUTDOWN
|
149
|
+
finished
|
150
|
+
return
|
151
|
+
end
|
152
|
+
@codec.encode(event)
|
153
|
+
end
|
154
|
+
|
155
|
+
def teardown
|
156
|
+
@producer.close
|
157
|
+
end
|
158
|
+
|
159
|
+
end #class LogStash::Outputs::Kafka
|
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-kafka
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.7.0
|
5
|
+
platform: java
|
6
|
+
authors:
|
7
|
+
- Joseph Lawson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-01-20 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: jruby-kafka
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.0.0.beta
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.0.0.beta
|
27
|
+
description: this is primarily to be used as an interface for logstash
|
28
|
+
email:
|
29
|
+
- joe@joekiller.com
|
30
|
+
executables: []
|
31
|
+
extensions: []
|
32
|
+
extra_rdoc_files: []
|
33
|
+
files:
|
34
|
+
- lib/logstash/inputs/kafka.rb
|
35
|
+
- lib/logstash/outputs/kafka.rb
|
36
|
+
homepage: https://github.com/joekiller/jruby-kafka
|
37
|
+
licenses:
|
38
|
+
- Apache 2.0
|
39
|
+
metadata: {}
|
40
|
+
post_install_message:
|
41
|
+
rdoc_options: []
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: '0'
|
49
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
requirements: []
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 2.2.2
|
57
|
+
signing_key:
|
58
|
+
specification_version: 4
|
59
|
+
summary: jruby Kafka wrapper
|
60
|
+
test_files: []
|