logstash-output-kafka 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YzJlOTQxODRkY2NhZjJlOWZkMGViYjg5ODJmMzExMmZiNmFmYTQyMg==
5
+ data.tar.gz: !binary |-
6
+ NzAzMjUyYjQ3Y2E1ODE0YzhhNzVlZDM0YjNmMThjMTYyOGRiYjRjMQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZGEwOWFkM2E1NWE1NzE2NTllZWM3MWExMzU3ZjA5ODQwYzk2ODUzODRhZDY0
10
+ YmIzMDYxYzFkOTMzZDA1OGJjMWU1MDM5MTJmMWU0ODQ0NWM1ZGE0OGE0MDdi
11
+ NjZkNDFkYzRiZWM2ODkwN2I4NjQwNzVhNmIzNWE1ZDA1ZGMzYTg=
12
+ data.tar.gz: !binary |-
13
+ YmM3NjhlMmRjMTViYjhiZTUzYWI2YmMwMWVlZjQ5MTNlNDAzNTM5NTZjNDQ2
14
+ ZDk3ZjgzMjAyYTgwYmU0NDhlYTUxZTZlN2YwY2NhZTU4OTA1OTMyNWFiYTFj
15
+ NDgyOGU5M2UzZGEwNGI2ODkyYTM2N2I4M2I3YWQxOGU1Y2UxZWY=
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ logstash-output-kafka
2
+ ====================
3
+
4
+ Apache Kafka output for Logstash. This output will produce messages to a Kafka topic using the producer API exposed by Kafka.
5
+
6
+ For more information about Kafka, refer to this [documentation](http://kafka.apache.org/documentation.html)
7
+
8
+ Information about producer API can be found [here](http://kafka.apache.org/documentation.html#apidesign)
9
+
10
+ Logstash Configuration
11
+ ====================
12
+
13
+ See http://kafka.apache.org/documentation.html#producerconfigs for details about the Kafka producer options.
14
+
15
+ output {
16
+ kafka {
17
+ topic_id => ... # string (required), The topic to produce the messages to
18
+ broker_list => ... # string (optional), default: "localhost:9092", This is for bootstrapping and the producer will only use it for getting metadata
19
+ compression_codec => ... # string (optional), one of ["none", "gzip", "snappy"], default: "none"
20
+ compressed_topics => ... # string (optional), default: "", This parameter allows you to set whether compression should be turned on for particular
21
+ request_required_acks => ... # number (optional), one of [-1, 0, 1], default: 0, This value controls when a produce request is considered completed
22
+ serializer_class => ... # string, (optional) default: "kafka.serializer.StringEncoder", The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
23
+ partitioner_class => ... # string (optional) default: "kafka.producer.DefaultPartitioner"
24
+ request_timeout_ms => ... # number (optional) default: 10000
25
+ producer_type => ... # string (optional), one of ["sync", "async"] default => 'sync'
26
+ key_serializer_class => ... # string (optional) default: nil
27
+ message_send_max_retries => ... # number (optional) default: 3
28
+ retry_backoff_ms => ... # number (optional) default: 100
29
+ topic_metadata_refresh_interval_ms => ... # number (optional) default: 600 * 1000
30
+ queue_buffering_max_ms => ... # number (optional) default: 5000
31
+ queue_buffering_max_messages => ... # number (optional) default: 10000
32
+ queue_enqueue_timeout_ms => ... # number (optional) default: -1
33
+ batch_num_messages => ... # number (optional) default: 200
34
+ send_buffer_bytes => ... # number (optional) default: 100 * 1024
35
+ client_id => ... # string (optional) default: ""
36
+ }
37
+ }
38
+
39
+ The default codec is json for outputs. If you select a codec of plain, logstash will encode your messages with not only the message
40
+ but also with a timestamp and hostname. If you do not want anything but your message passing through, you should make
41
+ the output configuration something like:
42
+
43
+ output {
44
+ kafka {
45
+ codec => plain {
46
+ format => "%{message}"
47
+ }
48
+ }
49
+ }
50
+
51
+
52
+ Dependencies
53
+ ====================
54
+
55
+ * Apache Kafka version 0.8.1.1
56
+ * jruby-kafka library
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require 'gem_publisher'
2
+
3
+ desc 'Publish gem to RubyGems.org'
4
+ task :publish_gem do |t|
5
+ gem = GemPublisher.publish_if_updated('logstash-output-kafka.gemspec', :rubygems)
6
+ puts "Published #{gem}" if gem
7
+ end
8
+
9
+ task :default do
10
+ system('rake -T')
11
+ end
@@ -0,0 +1,158 @@
1
+ require 'logstash/namespace'
2
+ require 'logstash/outputs/base'
3
+ require 'logstash-output-kafka_jars'
4
+
5
+ # Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
6
+ # the broker.
7
+ #
8
+ # The only required configuration is the topic name. The default codec is json,
9
+ # so events will be persisted on the broker in json format. If you select a codec of plain,
10
+ # Logstash will encode your messages with not only the message but also with a timestamp and
11
+ # hostname. If you do not want anything but your message passing through, you should make the output
12
+ # configuration something like:
13
+ # output {
14
+ # kafka {
15
+ # codec => plain {
16
+ # format => "%{message}"
17
+ # }
18
+ # }
19
+ # }
20
+ # For more information see http://kafka.apache.org/documentation.html#theproducer
21
+ #
22
+ # Kafka producer configuration: http://kafka.apache.org/documentation.html#producerconfigs
23
+ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
24
+ config_name 'kafka'
25
+ milestone 1
26
+
27
+ default :codec, 'json'
28
+ # This is for bootstrapping and the producer will only use it for getting metadata (topics,
29
+ # partitions and replicas). The socket connections for sending the actual data will be
30
+ # established based on the broker information returned in the metadata. The format is
31
+ # host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a
32
+ # subset of brokers.
33
+ config :broker_list, :validate => :string, :default => 'localhost:9092'
34
+ # The topic to produce the messages to
35
+ config :topic_id, :validate => :string, :required => true
36
+ # This parameter allows you to specify the compression codec for all data generated by this
37
+ # producer. Valid values are "none", "gzip" and "snappy".
38
+ config :compression_codec, :validate => %w( none gzip snappy ), :default => 'none'
39
+ # This parameter allows you to set whether compression should be turned on for particular
40
+ # topics. If the compression codec is anything other than NoCompressionCodec,
41
+ # enable compression only for specified topics if any. If the list of compressed topics is
42
+ # empty, then enable the specified compression codec for all topics. If the compression codec
43
+ # is NoCompressionCodec, compression is disabled for all topics
44
+ config :compressed_topics, :validate => :string, :default => ''
45
+ # This value controls when a produce request is considered completed. Specifically,
46
+ # how many other brokers must have committed the data to their log and acknowledged this to the
47
+ # leader. For more info, see -- http://kafka.apache.org/documentation.html#producerconfigs
48
+ config :request_required_acks, :validate => [-1,0,1], :default => 0
49
+ # The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
50
+ config :serializer_class, :validate => :string, :default => 'kafka.serializer.StringEncoder'
51
+ # The partitioner class for partitioning messages amongst partitions in the topic. The default
52
+ # partitioner is based on the hash of the key. If the key is null,
53
+ # the message is sent to a random partition in the broker.
54
+ # NOTE: topic_metadata_refresh_interval_ms controls how long the producer will distribute to a
55
+ # partition in the topic. This defaults to 10 mins, so the producer will continue to write to a
56
+ # single partition for 10 mins before it switches
57
+ config :partitioner_class, :validate => :string, :default => 'kafka.producer.DefaultPartitioner'
58
+ # The amount of time the broker will wait trying to meet the request.required.acks requirement
59
+ # before sending back an error to the client.
60
+ config :request_timeout_ms, :validate => :number, :default => 10000
61
+ # This parameter specifies whether the messages are sent asynchronously in a background thread.
62
+ # Valid values are (1) async for asynchronous send and (2) sync for synchronous send. By
63
+ # setting the producer to async we allow batching together of requests (which is great for
64
+ # throughput) but open the possibility of a failure of the client machine dropping unsent data.
65
+ config :producer_type, :validate => %w( sync async ), :default => 'sync'
66
+ # The serializer class for keys (defaults to the same as for messages if nothing is given)
67
+ config :key_serializer_class, :validate => :string, :default => nil
68
+ # This property will cause the producer to automatically retry a failed send request. This
69
+ # property specifies the number of retries when such failures occur. Note that setting a
70
+ # non-zero value here can lead to duplicates in the case of network errors that cause a message
71
+ # to be sent but the acknowledgement to be lost.
72
+ config :message_send_max_retries, :validate => :number, :default => 3
73
+ # Before each retry, the producer refreshes the metadata of relevant topics to see if a new
74
+ # leader has been elected. Since leader election takes a bit of time,
75
+ # this property specifies the amount of time that the producer waits before refreshing the
76
+ # metadata.
77
+ config :retry_backoff_ms, :validate => :number, :default => 100
78
+ # The producer generally refreshes the topic metadata from brokers when there is a failure
79
+ # (partition missing, leader not available...). It will also poll regularly (default: every
80
+ # 10min so 600000ms). If you set this to a negative value, metadata will only get refreshed on
81
+ # failure. If you set this to zero, the metadata will get refreshed after each message sent
82
+ # (not recommended). Important note: the refresh happen only AFTER the message is sent,
83
+ # so if the producer never sends a message the metadata is never refreshed
84
+ config :topic_metadata_refresh_interval_ms, :validate => :number, :default => 600 * 1000
85
+ # Maximum time to buffer data when using async mode. For example a setting of 100 will try to
86
+ # batch together 100ms of messages to send at once. This will improve throughput but adds
87
+ # message delivery latency due to the buffering.
88
+ config :queue_buffering_max_ms, :validate => :number, :default => 5000
89
+ # The maximum number of unsent messages that can be queued up the producer when using async
90
+ # mode before either the producer must be blocked or data must be dropped.
91
+ config :queue_buffering_max_messages, :validate => :number, :default => 10000
92
+ # The amount of time to block before dropping messages when running in async mode and the
93
+ # buffer has reached queue.buffering.max.messages. If set to 0 events will be enqueued
94
+ # immediately or dropped if the queue is full (the producer send call will never block). If set
95
+ # to -1 the producer will block indefinitely and never willingly drop a send.
96
+ config :queue_enqueue_timeout_ms, :validate => :number, :default => -1
97
+ # The number of messages to send in one batch when using async mode. The producer will wait
98
+ # until either this number of messages are ready to send or queue.buffer.max.ms is reached.
99
+ config :batch_num_messages, :validate => :number, :default => 200
100
+ # Socket write buffer size
101
+ config :send_buffer_bytes, :validate => :number, :default => 100 * 1024
102
+ # The client id is a user-specified string sent in each request to help trace calls. It should
103
+ # logically identify the application making the request.
104
+ config :client_id, :validate => :string, :default => ''
105
+
106
+ public
107
+ def register
108
+ require 'jruby-kafka'
109
+ options = {
110
+ :broker_list => @broker_list,
111
+ :compression_codec => @compression_codec,
112
+ :compressed_topics => @compressed_topics,
113
+ :request_required_acks => @request_required_acks,
114
+ :serializer_class => @serializer_class,
115
+ :partitioner_class => @partitioner_class,
116
+ :request_timeout_ms => @request_timeout_ms,
117
+ :producer_type => @producer_type,
118
+ :key_serializer_class => @key_serializer_class,
119
+ :message_send_max_retries => @message_send_max_retries,
120
+ :retry_backoff_ms => @retry_backoff_ms,
121
+ :topic_metadata_refresh_interval_ms => @topic_metadata_refresh_interval_ms,
122
+ :queue_buffering_max_ms => @queue_buffering_max_ms,
123
+ :queue_buffering_max_messages => @queue_buffering_max_messages,
124
+ :queue_enqueue_timeout_ms => @queue_enqueue_timeout_ms,
125
+ :batch_num_messages => @batch_num_messages,
126
+ :send_buffer_bytes => @send_buffer_bytes,
127
+ :client_id => @client_id
128
+ }
129
+ @producer = Kafka::Producer.new(options)
130
+ @producer.connect
131
+
132
+ @logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list)
133
+
134
+ @codec.on_event do |event|
135
+ begin
136
+ @producer.send_msg(@topic_id,nil,event)
137
+ rescue LogStash::ShutdownSignal
138
+ @logger.info('Kafka producer got shutdown signal')
139
+ rescue => e
140
+ @logger.warn('kafka producer threw exception, restarting',
141
+ :exception => e)
142
+ end
143
+ end
144
+ end # def register
145
+
146
+ def receive(event)
147
+ return unless output?(event)
148
+ if event == LogStash::SHUTDOWN
149
+ finished
150
+ return
151
+ end
152
+ @codec.encode(event)
153
+ end
154
+
155
+ def teardown
156
+ @producer.close
157
+ end
158
+ end #class LogStash::Outputs::Kafka
@@ -0,0 +1,34 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-kafka'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = 'Output events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on the broker'
7
+ s.description = 'Output events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on the broker'
8
+ s.authors = ['Elasticsearch']
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = 'http://logstash.net/'
11
+ s.require_paths = ['lib']
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { 'logstash_plugin' => 'true', 'group' => 'output'}
21
+
22
+ # Jar dependencies
23
+ s.requirements << "jar 'org.apache.kafka:kafka_2.9.2', '0.8.1.1'"
24
+ s.requirements << "jar 'log4j:log4j', '1.2.14'"
25
+
26
+ # Gem dependencies
27
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
28
+ s.add_runtime_dependency 'logstash-codec-plain'
29
+ s.add_runtime_dependency 'logstash-codec-json'
30
+
31
+ s.add_runtime_dependency 'jar-dependencies'
32
+
33
+ s.add_runtime_dependency 'jruby-kafka', ['>=0.2.1']
34
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rspec'
4
+ require 'insist'
5
+ require 'logstash/namespace'
6
+ require 'logstash/timestamp'
7
+ require 'logstash/outputs/kafka'
8
+
9
+ describe LogStash::Outputs::Kafka do
10
+
11
+ let (:kafka_config) {{:topic_id => 'test'}}
12
+
13
+ it 'should populate kafka config with default values' do
14
+ kafka = LogStash::Outputs::Kafka.new(kafka_config)
15
+ insist {kafka.broker_list} == 'localhost:9092'
16
+ insist {kafka.topic_id} == 'test'
17
+ insist {kafka.compression_codec} == 'none'
18
+ insist {kafka.serializer_class} == 'kafka.serializer.StringEncoder'
19
+ insist {kafka.partitioner_class} == 'kafka.producer.DefaultPartitioner'
20
+ insist {kafka.producer_type} == 'sync'
21
+ end
22
+
23
+ it 'should register and load kafka jars without errors' do
24
+ kafka = LogStash::Outputs::Kafka.new(kafka_config)
25
+ kafka.register
26
+ end
27
+
28
+ it 'should send logstash event to kafka broker' do
29
+ timestamp = LogStash::Timestamp.now
30
+ expect_any_instance_of(Kafka::Producer)
31
+ .to receive(:send_msg)
32
+ .with('test', nil, "{\"message\":\"hello world\",\"host\":\"test\",\"@timestamp\":\"#{timestamp}\",\"@version\":\"1\"}")
33
+ e = LogStash::Event.new({:message => 'hello world', :host => 'test', '@timestamp' => timestamp})
34
+ kafka = LogStash::Outputs::Kafka.new(kafka_config)
35
+ kafka.register
36
+ kafka.receive(e)
37
+ end
38
+
39
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-codec-plain
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-codec-json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: jar-dependencies
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: jruby-kafka
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: 0.2.1
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 0.2.1
89
+ description: Output events to a Kafka topic. This uses the Kafka Producer API to write
90
+ messages to a topic on the broker
91
+ email: richard.pijnenburg@elasticsearch.com
92
+ executables: []
93
+ extensions: []
94
+ extra_rdoc_files: []
95
+ files:
96
+ - .gitignore
97
+ - Gemfile
98
+ - LICENSE
99
+ - README.md
100
+ - Rakefile
101
+ - lib/logstash/outputs/kafka.rb
102
+ - logstash-output-kafka.gemspec
103
+ - spec/outputs/kafka.rb
104
+ homepage: http://logstash.net/
105
+ licenses:
106
+ - Apache License (2.0)
107
+ metadata:
108
+ logstash_plugin: 'true'
109
+ group: output
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ! '>='
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements:
125
+ - jar 'org.apache.kafka:kafka_2.9.2', '0.8.1.1'
126
+ - jar 'log4j:log4j', '1.2.14'
127
+ rubyforge_project:
128
+ rubygems_version: 2.4.1
129
+ signing_key:
130
+ specification_version: 4
131
+ summary: Output events to a Kafka topic. This uses the Kafka Producer API to write
132
+ messages to a topic on the broker
133
+ test_files:
134
+ - spec/outputs/kafka.rb