logstash-output-kafka 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YzJlOTQxODRkY2NhZjJlOWZkMGViYjg5ODJmMzExMmZiNmFmYTQyMg==
5
+ data.tar.gz: !binary |-
6
+ NzAzMjUyYjQ3Y2E1ODE0YzhhNzVlZDM0YjNmMThjMTYyOGRiYjRjMQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZGEwOWFkM2E1NWE1NzE2NTllZWM3MWExMzU3ZjA5ODQwYzk2ODUzODRhZDY0
10
+ YmIzMDYxYzFkOTMzZDA1OGJjMWU1MDM5MTJmMWU0ODQ0NWM1ZGE0OGE0MDdi
11
+ NjZkNDFkYzRiZWM2ODkwN2I4NjQwNzVhNmIzNWE1ZDA1ZGMzYTg=
12
+ data.tar.gz: !binary |-
13
+ YmM3NjhlMmRjMTViYjhiZTUzYWI2YmMwMWVlZjQ5MTNlNDAzNTM5NTZjNDQ2
14
+ ZDk3ZjgzMjAyYTgwYmU0NDhlYTUxZTZlN2YwY2NhZTU4OTA1OTMyNWFiYTFj
15
+ NDgyOGU5M2UzZGEwNGI2ODkyYTM2N2I4M2I3YWQxOGU1Y2UxZWY=
data/.gitignore ADDED
@@ -0,0 +1,3 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,56 @@
1
+ logstash-output-kafka
2
+ ====================
3
+
4
+ Apache Kafka output for Logstash. This output will produce messages to a Kafka topic using the producer API exposed by Kafka.
5
+
6
+ For more information about Kafka, refer to this [documentation](http://kafka.apache.org/documentation.html)
7
+
8
+ Information about producer API can be found [here](http://kafka.apache.org/documentation.html#apidesign)
9
+
10
+ Logstash Configuration
11
+ ====================
12
+
13
+ See http://kafka.apache.org/documentation.html#producerconfigs for details about the Kafka producer options.
14
+
15
+ output {
16
+ kafka {
17
+ topic_id => ... # string (required), The topic to produce the messages to
18
+ broker_list => ... # string (optional), default: "localhost:9092", This is for bootstrapping and the producer will only use it for getting metadata
19
+ compression_codec => ... # string (optional), one of ["none", "gzip", "snappy"], default: "none"
20
+ compressed_topics => ... # string (optional), default: "", This parameter allows you to set whether compression should be turned on for particular
21
+ request_required_acks => ... # number (optional), one of [-1, 0, 1], default: 0, This value controls when a produce request is considered completed
22
+ serializer_class => ... # string, (optional) default: "kafka.serializer.StringEncoder", The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
23
+ partitioner_class => ... # string (optional) default: "kafka.producer.DefaultPartitioner"
24
+ request_timeout_ms => ... # number (optional) default: 10000
25
+ producer_type => ... # string (optional), one of ["sync", "async"] default => 'sync'
26
+ key_serializer_class => ... # string (optional) default: nil
27
+ message_send_max_retries => ... # number (optional) default: 3
28
+ retry_backoff_ms => ... # number (optional) default: 100
29
+ topic_metadata_refresh_interval_ms => ... # number (optional) default: 600 * 1000
30
+ queue_buffering_max_ms => ... # number (optional) default: 5000
31
+ queue_buffering_max_messages => ... # number (optional) default: 10000
32
+ queue_enqueue_timeout_ms => ... # number (optional) default: -1
33
+ batch_num_messages => ... # number (optional) default: 200
34
+ send_buffer_bytes => ... # number (optional) default: 100 * 1024
35
+ client_id => ... # string (optional) default: ""
36
+ }
37
+ }
38
+
39
+ The default codec is json for outputs. If you select a codec of plain, logstash will encode your messages with not only the message
40
+ but also with a timestamp and hostname. If you do not want anything but your message passing through, you should make
41
+ the output configuration something like:
42
+
43
+ output {
44
+ kafka {
45
+ codec => plain {
46
+ format => "%{message}"
47
+ }
48
+ }
49
+ }
50
+
51
+
52
+ Dependencies
53
+ ====================
54
+
55
+ * Apache Kafka version 0.8.1.1
56
+ * jruby-kafka library
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ require 'gem_publisher'
2
+
3
+ desc 'Publish gem to RubyGems.org'
4
+ task :publish_gem do |t|
5
+ gem = GemPublisher.publish_if_updated('logstash-output-kafka.gemspec', :rubygems)
6
+ puts "Published #{gem}" if gem
7
+ end
8
+
9
+ task :default do
10
+ system('rake -T')
11
+ end
@@ -0,0 +1,158 @@
1
+ require 'logstash/namespace'
2
+ require 'logstash/outputs/base'
3
+ require 'logstash-output-kafka_jars'
4
+
5
+ # Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
6
+ # the broker.
7
+ #
8
+ # The only required configuration is the topic name. The default codec is json,
9
+ # so events will be persisted on the broker in json format. If you select a codec of plain,
10
+ # Logstash will encode your messages with not only the message but also with a timestamp and
11
+ # hostname. If you do not want anything but your message passing through, you should make the output
12
+ # configuration something like:
13
+ # output {
14
+ # kafka {
15
+ # codec => plain {
16
+ # format => "%{message}"
17
+ # }
18
+ # }
19
+ # }
20
+ # For more information see http://kafka.apache.org/documentation.html#theproducer
21
+ #
22
+ # Kafka producer configuration: http://kafka.apache.org/documentation.html#producerconfigs
23
+ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
24
+ config_name 'kafka'
25
+ milestone 1
26
+
27
+ default :codec, 'json'
28
+ # This is for bootstrapping and the producer will only use it for getting metadata (topics,
29
+ # partitions and replicas). The socket connections for sending the actual data will be
30
+ # established based on the broker information returned in the metadata. The format is
31
+ # host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a
32
+ # subset of brokers.
33
+ config :broker_list, :validate => :string, :default => 'localhost:9092'
34
+ # The topic to produce the messages to
35
+ config :topic_id, :validate => :string, :required => true
36
+ # This parameter allows you to specify the compression codec for all data generated by this
37
+ # producer. Valid values are "none", "gzip" and "snappy".
38
+ config :compression_codec, :validate => %w( none gzip snappy ), :default => 'none'
39
+ # This parameter allows you to set whether compression should be turned on for particular
40
+ # topics. If the compression codec is anything other than NoCompressionCodec,
41
+ # enable compression only for specified topics if any. If the list of compressed topics is
42
+ # empty, then enable the specified compression codec for all topics. If the compression codec
43
+ # is NoCompressionCodec, compression is disabled for all topics
44
+ config :compressed_topics, :validate => :string, :default => ''
45
+ # This value controls when a produce request is considered completed. Specifically,
46
+ # how many other brokers must have committed the data to their log and acknowledged this to the
47
+ # leader. For more info, see -- http://kafka.apache.org/documentation.html#producerconfigs
48
+ config :request_required_acks, :validate => [-1,0,1], :default => 0
49
+ # The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
50
+ config :serializer_class, :validate => :string, :default => 'kafka.serializer.StringEncoder'
51
+ # The partitioner class for partitioning messages amongst partitions in the topic. The default
52
+ # partitioner is based on the hash of the key. If the key is null,
53
+ # the message is sent to a random partition in the broker.
54
+ # NOTE: topic_metadata_refresh_interval_ms controls how long the producer will distribute to a
55
+ # partition in the topic. This defaults to 10 mins, so the producer will continue to write to a
56
+ # single partition for 10 mins before it switches
57
+ config :partitioner_class, :validate => :string, :default => 'kafka.producer.DefaultPartitioner'
58
+ # The amount of time the broker will wait trying to meet the request.required.acks requirement
59
+ # before sending back an error to the client.
60
+ config :request_timeout_ms, :validate => :number, :default => 10000
61
+ # This parameter specifies whether the messages are sent asynchronously in a background thread.
62
+ # Valid values are (1) async for asynchronous send and (2) sync for synchronous send. By
63
+ # setting the producer to async we allow batching together of requests (which is great for
64
+ # throughput) but open the possibility of a failure of the client machine dropping unsent data.
65
+ config :producer_type, :validate => %w( sync async ), :default => 'sync'
66
+ # The serializer class for keys (defaults to the same as for messages if nothing is given)
67
+ config :key_serializer_class, :validate => :string, :default => nil
68
+ # This property will cause the producer to automatically retry a failed send request. This
69
+ # property specifies the number of retries when such failures occur. Note that setting a
70
+ # non-zero value here can lead to duplicates in the case of network errors that cause a message
71
+ # to be sent but the acknowledgement to be lost.
72
+ config :message_send_max_retries, :validate => :number, :default => 3
73
+ # Before each retry, the producer refreshes the metadata of relevant topics to see if a new
74
+ # leader has been elected. Since leader election takes a bit of time,
75
+ # this property specifies the amount of time that the producer waits before refreshing the
76
+ # metadata.
77
+ config :retry_backoff_ms, :validate => :number, :default => 100
78
+ # The producer generally refreshes the topic metadata from brokers when there is a failure
79
+ # (partition missing, leader not available...). It will also poll regularly (default: every
80
+ # 10min so 600000ms). If you set this to a negative value, metadata will only get refreshed on
81
+ # failure. If you set this to zero, the metadata will get refreshed after each message sent
82
+ # (not recommended). Important note: the refresh happen only AFTER the message is sent,
83
+ # so if the producer never sends a message the metadata is never refreshed
84
+ config :topic_metadata_refresh_interval_ms, :validate => :number, :default => 600 * 1000
85
+ # Maximum time to buffer data when using async mode. For example a setting of 100 will try to
86
+ # batch together 100ms of messages to send at once. This will improve throughput but adds
87
+ # message delivery latency due to the buffering.
88
+ config :queue_buffering_max_ms, :validate => :number, :default => 5000
89
+ # The maximum number of unsent messages that can be queued up the producer when using async
90
+ # mode before either the producer must be blocked or data must be dropped.
91
+ config :queue_buffering_max_messages, :validate => :number, :default => 10000
92
+ # The amount of time to block before dropping messages when running in async mode and the
93
+ # buffer has reached queue.buffering.max.messages. If set to 0 events will be enqueued
94
+ # immediately or dropped if the queue is full (the producer send call will never block). If set
95
+ # to -1 the producer will block indefinitely and never willingly drop a send.
96
+ config :queue_enqueue_timeout_ms, :validate => :number, :default => -1
97
+ # The number of messages to send in one batch when using async mode. The producer will wait
98
+ # until either this number of messages are ready to send or queue.buffer.max.ms is reached.
99
+ config :batch_num_messages, :validate => :number, :default => 200
100
+ # Socket write buffer size
101
+ config :send_buffer_bytes, :validate => :number, :default => 100 * 1024
102
+ # The client id is a user-specified string sent in each request to help trace calls. It should
103
+ # logically identify the application making the request.
104
+ config :client_id, :validate => :string, :default => ''
105
+
106
+ public
107
+ def register
108
+ require 'jruby-kafka'
109
+ options = {
110
+ :broker_list => @broker_list,
111
+ :compression_codec => @compression_codec,
112
+ :compressed_topics => @compressed_topics,
113
+ :request_required_acks => @request_required_acks,
114
+ :serializer_class => @serializer_class,
115
+ :partitioner_class => @partitioner_class,
116
+ :request_timeout_ms => @request_timeout_ms,
117
+ :producer_type => @producer_type,
118
+ :key_serializer_class => @key_serializer_class,
119
+ :message_send_max_retries => @message_send_max_retries,
120
+ :retry_backoff_ms => @retry_backoff_ms,
121
+ :topic_metadata_refresh_interval_ms => @topic_metadata_refresh_interval_ms,
122
+ :queue_buffering_max_ms => @queue_buffering_max_ms,
123
+ :queue_buffering_max_messages => @queue_buffering_max_messages,
124
+ :queue_enqueue_timeout_ms => @queue_enqueue_timeout_ms,
125
+ :batch_num_messages => @batch_num_messages,
126
+ :send_buffer_bytes => @send_buffer_bytes,
127
+ :client_id => @client_id
128
+ }
129
+ @producer = Kafka::Producer.new(options)
130
+ @producer.connect
131
+
132
+ @logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list)
133
+
134
+ @codec.on_event do |event|
135
+ begin
136
+ @producer.send_msg(@topic_id,nil,event)
137
+ rescue LogStash::ShutdownSignal
138
+ @logger.info('Kafka producer got shutdown signal')
139
+ rescue => e
140
+ @logger.warn('kafka producer threw exception, restarting',
141
+ :exception => e)
142
+ end
143
+ end
144
+ end # def register
145
+
146
+ def receive(event)
147
+ return unless output?(event)
148
+ if event == LogStash::SHUTDOWN
149
+ finished
150
+ return
151
+ end
152
+ @codec.encode(event)
153
+ end
154
+
155
+ def teardown
156
+ @producer.close
157
+ end
158
+ end #class LogStash::Outputs::Kafka
@@ -0,0 +1,34 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-kafka'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = 'Output events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on the broker'
7
+ s.description = 'Output events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on the broker'
8
+ s.authors = ['Elasticsearch']
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = 'http://logstash.net/'
11
+ s.require_paths = ['lib']
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { 'logstash_plugin' => 'true', 'group' => 'output'}
21
+
22
+ # Jar dependencies
23
+ s.requirements << "jar 'org.apache.kafka:kafka_2.9.2', '0.8.1.1'"
24
+ s.requirements << "jar 'log4j:log4j', '1.2.14'"
25
+
26
+ # Gem dependencies
27
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
28
+ s.add_runtime_dependency 'logstash-codec-plain'
29
+ s.add_runtime_dependency 'logstash-codec-json'
30
+
31
+ s.add_runtime_dependency 'jar-dependencies'
32
+
33
+ s.add_runtime_dependency 'jruby-kafka', ['>=0.2.1']
34
+ end
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rspec'
4
+ require 'insist'
5
+ require 'logstash/namespace'
6
+ require 'logstash/timestamp'
7
+ require 'logstash/outputs/kafka'
8
+
9
+ describe LogStash::Outputs::Kafka do
10
+
11
+ let (:kafka_config) {{:topic_id => 'test'}}
12
+
13
+ it 'should populate kafka config with default values' do
14
+ kafka = LogStash::Outputs::Kafka.new(kafka_config)
15
+ insist {kafka.broker_list} == 'localhost:9092'
16
+ insist {kafka.topic_id} == 'test'
17
+ insist {kafka.compression_codec} == 'none'
18
+ insist {kafka.serializer_class} == 'kafka.serializer.StringEncoder'
19
+ insist {kafka.partitioner_class} == 'kafka.producer.DefaultPartitioner'
20
+ insist {kafka.producer_type} == 'sync'
21
+ end
22
+
23
+ it 'should register and load kafka jars without errors' do
24
+ kafka = LogStash::Outputs::Kafka.new(kafka_config)
25
+ kafka.register
26
+ end
27
+
28
+ it 'should send logstash event to kafka broker' do
29
+ timestamp = LogStash::Timestamp.now
30
+ expect_any_instance_of(Kafka::Producer)
31
+ .to receive(:send_msg)
32
+ .with('test', nil, "{\"message\":\"hello world\",\"host\":\"test\",\"@timestamp\":\"#{timestamp}\",\"@version\":\"1\"}")
33
+ e = LogStash::Event.new({:message => 'hello world', :host => 'test', '@timestamp' => timestamp})
34
+ kafka = LogStash::Outputs::Kafka.new(kafka_config)
35
+ kafka.register
36
+ kafka.receive(e)
37
+ end
38
+
39
+ end
metadata ADDED
@@ -0,0 +1,134 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-codec-plain
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-codec-json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: jar-dependencies
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: jruby-kafka
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: 0.2.1
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 0.2.1
89
+ description: Output events to a Kafka topic. This uses the Kafka Producer API to write
90
+ messages to a topic on the broker
91
+ email: richard.pijnenburg@elasticsearch.com
92
+ executables: []
93
+ extensions: []
94
+ extra_rdoc_files: []
95
+ files:
96
+ - .gitignore
97
+ - Gemfile
98
+ - LICENSE
99
+ - README.md
100
+ - Rakefile
101
+ - lib/logstash/outputs/kafka.rb
102
+ - logstash-output-kafka.gemspec
103
+ - spec/outputs/kafka.rb
104
+ homepage: http://logstash.net/
105
+ licenses:
106
+ - Apache License (2.0)
107
+ metadata:
108
+ logstash_plugin: 'true'
109
+ group: output
110
+ post_install_message:
111
+ rdoc_options: []
112
+ require_paths:
113
+ - lib
114
+ required_ruby_version: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ! '>='
117
+ - !ruby/object:Gem::Version
118
+ version: '0'
119
+ required_rubygems_version: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ! '>='
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
124
+ requirements:
125
+ - jar 'org.apache.kafka:kafka_2.9.2', '0.8.1.1'
126
+ - jar 'log4j:log4j', '1.2.14'
127
+ rubyforge_project:
128
+ rubygems_version: 2.4.1
129
+ signing_key:
130
+ specification_version: 4
131
+ summary: Output events to a Kafka topic. This uses the Kafka Producer API to write
132
+ messages to a topic on the broker
133
+ test_files:
134
+ - spec/outputs/kafka.rb