logstash-kafka 0.7.0-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 00adb861cf25fabd675f94d437b0e4dc36a7fbad
4
+ data.tar.gz: ce5d97a0e3b9081bb60f595364da29125e2b6a5d
5
+ SHA512:
6
+ metadata.gz: 1b0675791f6af79c8cc64371787cb322ba108bdc2d431cf55bf49a543b92a8d4cab09efeb593803b50b05d7dd88f6f3404d76f3207de665c816d5b061183abd2
7
+ data.tar.gz: 919df956e078237ee9f01de276227b5f61ab454faf830e794962c76fc2132ce0cd928137aeaa648ef36e4fc8e2e9b66a3e893101cde6f150595ac017d401edec
@@ -0,0 +1,152 @@
1
+ require 'logstash/namespace'
2
+ require 'logstash/inputs/base'
3
+ require 'jruby-kafka'
4
+
5
+ # This input will read events from a Kafka topic. It uses the high level consumer API provided
6
+ # by Kafka to read messages from the broker. It also maintains the state of what has been
7
+ # consumed using Zookeeper. The default input codec is json
8
+ #
9
+ # The only required configuration is the topic name. By default it will connect to a Zookeeper
10
+ # running on localhost. All the broker information is read from Zookeeper state
11
+ #
12
+ # Ideally you should have as many threads as the number of partitions for a perfect balance --
13
+ # more threads than partitions means that some threads will be idle
14
+ #
15
+ # For more information see http://kafka.apache.org/documentation.html#theconsumer
16
+ #
17
+ # Kafka consumer configuration: http://kafka.apache.org/documentation.html#consumerconfigs
18
+ #
19
+ class LogStash::Inputs::Kafka < LogStash::Inputs::Base
20
+ config_name 'kafka'
21
+ milestone 1
22
+
23
+ default :codec, 'json'
24
+
25
+ # Specifies the ZooKeeper connection string in the form hostname:port where host and port are
26
+ # the host and port of a ZooKeeper server. You can also specify multiple hosts in the form
27
+ # hostname1:port1,hostname2:port2,hostname3:port3.
28
+ #
29
+ # The server may also have a ZooKeeper chroot path as part of it's ZooKeeper connection string
30
+ # which puts its data under some path in the global ZooKeeper namespace. If so the consumer
31
+ # should use the same chroot path in its connection string. For example to give a chroot path of
32
+ # /chroot/path you would give the connection string as
33
+ # hostname1:port1,hostname2:port2,hostname3:port3/chroot/path.
34
+ config :zk_connect, :validate => :string, :default => 'localhost:2181'
35
+ # A string that uniquely identifies the group of consumer processes to which this consumer
36
+ # belongs. By setting the same group id multiple processes indicate that they are all part of
37
+ # the same consumer group.
38
+ config :group_id, :validate => :string, :default => 'logstash'
39
+ # The topic to consume messages from
40
+ config :topic_id, :validate => :string, :required => true
41
+ # Specify whether to jump to beginning of the queue when there is no initial offset in
42
+ # ZooKeeper, or if an offset is out of range. If this is false, messages are consumed
43
+ # from the latest offset
44
+ #
45
+ # If reset_beginning is true, the consumer will check ZooKeeper to see if any other group members
46
+ # are present and active. If not, the consumer deletes any offset information in the ZooKeeper
47
+ # and starts at the smallest offset. If other group members are present reset_beginning will not
48
+ # work and the consumer threads will rejoin the consumer group.
49
+ config :reset_beginning, :validate => :boolean, :default => false
50
+ # Number of threads to read from the partitions. Ideally you should have as many threads as the
51
+ # number of partitions for a perfect balance. More threads than partitions means that some
52
+ # threads will be idle. Less threads means a single thread could be consuming from more than
53
+ # one partition
54
+ config :consumer_threads, :validate => :number, :default => 1
55
+ # Internal Logstash queue size used to hold events in memory after it has been read from Kafka
56
+ config :queue_size, :validate => :number, :default => 20
57
+ # When a new consumer joins a consumer group the set of consumers attempt to "rebalance" the
58
+ # load to assign partitions to each consumer. If the set of consumers changes while this
59
+ # assignment is taking place the rebalance will fail and retry. This setting controls the
60
+ # maximum number of attempts before giving up.
61
+ config :rebalance_max_retries, :validate => :number, :default => 4
62
+ # Backoff time between retries during rebalance.
63
+ config :rebalance_backoff_ms, :validate => :number, :default => 2000
64
+ # Throw a timeout exception to the consumer if no message is available for consumption after
65
+ # the specified interval
66
+ config :consumer_timeout_ms, :validate => :number, :default => -1
67
+ # Option to restart the consumer loop on error
68
+ config :consumer_restart_on_error, :validate => :boolean, :default => true
69
+ # Time in millis to wait for consumer to restart after an error
70
+ config :consumer_restart_sleep_ms, :validate => :number, :default => 0
71
+ # Option to add Kafka metadata like topic, message size to the event
72
+ config :decorate_events, :validate => :boolean, :default => false
73
+ # A unique id for the consumer; generated automatically if not set.
74
+ config :consumer_id, :validate => :string, :default => nil
75
+ # The number of byes of messages to attempt to fetch for each topic-partition in each fetch
76
+ # request. These bytes will be read into memory for each partition, so this helps control
77
+ # the memory used by the consumer. The fetch request size must be at least as large as the
78
+ # maximum message size the server allows or else it is possible for the producer to send
79
+ # messages larger than the consumer can fetch.
80
+ config :fetch_message_max_bytes, :validate => :number, :default => 1048576
81
+
82
+ public
83
+ def register
84
+ LogStash::Logger.setup_log4j(@logger)
85
+ options = {
86
+ :zk_connect => @zk_connect,
87
+ :group_id => @group_id,
88
+ :topic_id => @topic_id,
89
+ :rebalance_max_retries => @rebalance_max_retries,
90
+ :rebalance_backoff_ms => @rebalance_backoff_ms,
91
+ :consumer_timeout_ms => @consumer_timeout_ms,
92
+ :consumer_restart_on_error => @consumer_restart_on_error,
93
+ :consumer_restart_sleep_ms => @consumer_restart_sleep_ms,
94
+ :consumer_id => @consumer_id,
95
+ :fetch_message_max_bytes => @fetch_message_max_bytes
96
+ }
97
+ if @reset_beginning
98
+ options[:reset_beginning] = 'from-beginning'
99
+ end # if :reset_beginning
100
+ @kafka_client_queue = SizedQueue.new(@queue_size)
101
+ @consumer_group = Kafka::Group.new(options)
102
+ @logger.info('Registering kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
103
+ end # def register
104
+
105
+ public
106
+ def run(logstash_queue)
107
+ java_import 'kafka.common.ConsumerRebalanceFailedException'
108
+ @logger.info('Running kafka', :group_id => @group_id, :topic_id => @topic_id, :zk_connect => @zk_connect)
109
+ begin
110
+ @consumer_group.run(@consumer_threads,@kafka_client_queue)
111
+ begin
112
+ while true
113
+ event = @kafka_client_queue.pop
114
+ queue_event("#{event}",logstash_queue)
115
+ end
116
+ rescue LogStash::ShutdownSignal
117
+ @logger.info('Kafka got shutdown signal')
118
+ @consumer_group.shutdown
119
+ end
120
+ until @kafka_client_queue.empty?
121
+ queue_event("#{@kafka_client_queue.pop}",logstash_queue)
122
+ end
123
+ @logger.info('Done running kafka input')
124
+ rescue => e
125
+ @logger.warn('kafka client threw exception, restarting',
126
+ :exception => e)
127
+ if @consumer_group.running?
128
+ @consumer_group.shutdown
129
+ end
130
+ sleep(Float(@consumer_restart_sleep_ms) * 1 / 1000)
131
+ retry
132
+ end
133
+ finished
134
+ end # def run
135
+
136
+ private
137
+ def queue_event(msg, output_queue)
138
+ begin
139
+ @codec.decode(msg) do |event|
140
+ decorate(event)
141
+ if @decorate_events
142
+ event['kafka'] = {'msg_size' => msg.bytesize, 'topic' => @topic_id, 'consumer_group' => @group_id}
143
+ end
144
+ output_queue << event
145
+ end # @codec.decode
146
+ rescue => e # parse or event creation error
147
+ @logger.error('Failed to create event', :message => msg, :exception => e,
148
+ :backtrace => e.backtrace)
149
+ end # begin
150
+ end # def queue_event
151
+
152
+ end #class LogStash::Inputs::Kafka
@@ -0,0 +1,159 @@
1
+ require 'logstash/namespace'
2
+ require 'logstash/outputs/base'
3
+ require 'jruby-kafka'
4
+
5
+ # Write events to a Kafka topic. This uses the Kafka Producer API to write messages to a topic on
6
+ # the broker.
7
+ #
8
+ # The only required configuration is the topic name. The default codec is json,
9
+ # so events will be persisted on the broker in json format. If you select a codec of plain,
10
+ # Logstash will encode your messages with not only the message but also with a timestamp and
11
+ # hostname. If you do not want anything but your message passing through, you should make the output
12
+ # configuration something like:
13
+ # output {
14
+ # kafka {
15
+ # codec => plain {
16
+ # format => "%{message}"
17
+ # }
18
+ # }
19
+ # }
20
+ # For more information see http://kafka.apache.org/documentation.html#theproducer
21
+ #
22
+ # Kafka producer configuration: http://kafka.apache.org/documentation.html#producerconfigs
23
+ class LogStash::Outputs::Kafka < LogStash::Outputs::Base
24
+ config_name 'kafka'
25
+ milestone 1
26
+
27
+ default :codec, 'json'
28
+ # This is for bootstrapping and the producer will only use it for getting metadata (topics,
29
+ # partitions and replicas). The socket connections for sending the actual data will be
30
+ # established based on the broker information returned in the metadata. The format is
31
+ # host1:port1,host2:port2, and the list can be a subset of brokers or a VIP pointing to a
32
+ # subset of brokers.
33
+ config :broker_list, :validate => :string, :default => 'localhost:9092'
34
+ # The topic to produce the messages to
35
+ config :topic_id, :validate => :string, :required => true
36
+ # This parameter allows you to specify the compression codec for all data generated by this
37
+ # producer. Valid values are "none", "gzip" and "snappy".
38
+ config :compression_codec, :validate => %w( none gzip snappy ), :default => 'none'
39
+ # This parameter allows you to set whether compression should be turned on for particular
40
+ # topics. If the compression codec is anything other than NoCompressionCodec,
41
+ # enable compression only for specified topics if any. If the list of compressed topics is
42
+ # empty, then enable the specified compression codec for all topics. If the compression codec
43
+ # is NoCompressionCodec, compression is disabled for all topics
44
+ config :compressed_topics, :validate => :string, :default => ''
45
+ # This value controls when a produce request is considered completed. Specifically,
46
+ # how many other brokers must have committed the data to their log and acknowledged this to the
47
+ # leader. For more info, see -- http://kafka.apache.org/documentation.html#producerconfigs
48
+ config :request_required_acks, :validate => [-1,0,1], :default => 0
49
+ # The serializer class for messages. The default encoder takes a byte[] and returns the same byte[]
50
+ config :serializer_class, :validate => :string, :default => 'kafka.serializer.StringEncoder'
51
+ # The partitioner class for partitioning messages amongst partitions in the topic. The default
52
+ # partitioner is based on the hash of the key. If the key is null,
53
+ # the message is sent to a random partition in the broker.
54
+ # NOTE: topic_metadata_refresh_interval_ms controls how long the producer will distribute to a
55
+ # partition in the topic. This defaults to 10 mins, so the producer will continue to write to a
56
+ # single partition for 10 mins before it switches
57
+ config :partitioner_class, :validate => :string, :default => 'kafka.producer.DefaultPartitioner'
58
+ # The amount of time the broker will wait trying to meet the request.required.acks requirement
59
+ # before sending back an error to the client.
60
+ config :request_timeout_ms, :validate => :number, :default => 10000
61
+ # This parameter specifies whether the messages are sent asynchronously in a background thread.
62
+ # Valid values are (1) async for asynchronous send and (2) sync for synchronous send. By
63
+ # setting the producer to async we allow batching together of requests (which is great for
64
+ # throughput) but open the possibility of a failure of the client machine dropping unsent data.
65
+ config :producer_type, :validate => %w( sync async ), :default => 'sync'
66
+ # The serializer class for keys (defaults to the same as for messages if nothing is given)
67
+ config :key_serializer_class, :validate => :string, :default => nil
68
+ # This property will cause the producer to automatically retry a failed send request. This
69
+ # property specifies the number of retries when such failures occur. Note that setting a
70
+ # non-zero value here can lead to duplicates in the case of network errors that cause a message
71
+ # to be sent but the acknowledgement to be lost.
72
+ config :message_send_max_retries, :validate => :number, :default => 3
73
+ # Before each retry, the producer refreshes the metadata of relevant topics to see if a new
74
+ # leader has been elected. Since leader election takes a bit of time,
75
+ # this property specifies the amount of time that the producer waits before refreshing the
76
+ # metadata.
77
+ config :retry_backoff_ms, :validate => :number, :default => 100
78
+ # The producer generally refreshes the topic metadata from brokers when there is a failure
79
+ # (partition missing, leader not available...). It will also poll regularly (default: every
80
+ # 10min so 600000ms). If you set this to a negative value, metadata will only get refreshed on
81
+ # failure. If you set this to zero, the metadata will get refreshed after each message sent
82
+ # (not recommended). Important note: the refresh happen only AFTER the message is sent,
83
+ # so if the producer never sends a message the metadata is never refreshed
84
+ config :topic_metadata_refresh_interval_ms, :validate => :number, :default => 600 * 1000
85
+ # Maximum time to buffer data when using async mode. For example a setting of 100 will try to
86
+ # batch together 100ms of messages to send at once. This will improve throughput but adds
87
+ # message delivery latency due to the buffering.
88
+ config :queue_buffering_max_ms, :validate => :number, :default => 5000
89
+ # The maximum number of unsent messages that can be queued up the producer when using async
90
+ # mode before either the producer must be blocked or data must be dropped.
91
+ config :queue_buffering_max_messages, :validate => :number, :default => 10000
92
+ # The amount of time to block before dropping messages when running in async mode and the
93
+ # buffer has reached queue.buffering.max.messages. If set to 0 events will be enqueued
94
+ # immediately or dropped if the queue is full (the producer send call will never block). If set
95
+ # to -1 the producer will block indefinitely and never willingly drop a send.
96
+ config :queue_enqueue_timeout_ms, :validate => :number, :default => -1
97
+ # The number of messages to send in one batch when using async mode. The producer will wait
98
+ # until either this number of messages are ready to send or queue.buffer.max.ms is reached.
99
+ config :batch_num_messages, :validate => :number, :default => 200
100
+ # Socket write buffer size
101
+ config :send_buffer_bytes, :validate => :number, :default => 100 * 1024
102
+ # The client id is a user-specified string sent in each request to help trace calls. It should
103
+ # logically identify the application making the request.
104
+ config :client_id, :validate => :string, :default => ''
105
+
106
+ public
107
+ def register
108
+ LogStash::Logger.setup_log4j(@logger)
109
+ options = {
110
+ :broker_list => @broker_list,
111
+ :compression_codec => @compression_codec,
112
+ :compressed_topics => @compressed_topics,
113
+ :request_required_acks => @request_required_acks,
114
+ :serializer_class => @serializer_class,
115
+ :partitioner_class => @partitioner_class,
116
+ :request_timeout_ms => @request_timeout_ms,
117
+ :producer_type => @producer_type,
118
+ :key_serializer_class => @key_serializer_class,
119
+ :message_send_max_retries => @message_send_max_retries,
120
+ :retry_backoff_ms => @retry_backoff_ms,
121
+ :topic_metadata_refresh_interval_ms => @topic_metadata_refresh_interval_ms,
122
+ :queue_buffering_max_ms => @queue_buffering_max_ms,
123
+ :queue_buffering_max_messages => @queue_buffering_max_messages,
124
+ :queue_enqueue_timeout_ms => @queue_enqueue_timeout_ms,
125
+ :batch_num_messages => @batch_num_messages,
126
+ :send_buffer_bytes => @send_buffer_bytes,
127
+ :client_id => @client_id
128
+ }
129
+ @producer = Kafka::Producer.new(options)
130
+ @producer.connect
131
+
132
+ @logger.info('Registering kafka producer', :topic_id => @topic_id, :broker_list => @broker_list)
133
+
134
+ @codec.on_event do |event|
135
+ begin
136
+ @producer.send_msg(@topic_id,nil,event)
137
+ rescue LogStash::ShutdownSignal
138
+ @logger.info('Kafka producer got shutdown signal')
139
+ rescue => e
140
+ @logger.warn('kafka producer threw exception, restarting',
141
+ :exception => e)
142
+ end
143
+ end
144
+ end # def register
145
+
146
+ def receive(event)
147
+ return unless output?(event)
148
+ if event == LogStash::SHUTDOWN
149
+ finished
150
+ return
151
+ end
152
+ @codec.encode(event)
153
+ end
154
+
155
+ def teardown
156
+ @producer.close
157
+ end
158
+
159
+ end #class LogStash::Outputs::Kafka
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.7.0
5
+ platform: java
6
+ authors:
7
+ - Joseph Lawson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-01-20 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jruby-kafka
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.0.beta
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.0.0.beta
27
+ description: this is primarily to be used as an interface for logstash
28
+ email:
29
+ - joe@joekiller.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/logstash/inputs/kafka.rb
35
+ - lib/logstash/outputs/kafka.rb
36
+ homepage: https://github.com/joekiller/jruby-kafka
37
+ licenses:
38
+ - Apache 2.0
39
+ metadata: {}
40
+ post_install_message:
41
+ rdoc_options: []
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ required_rubygems_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ requirements: []
55
+ rubyforge_project:
56
+ rubygems_version: 2.2.2
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: jruby Kafka wrapper
60
+ test_files: []