jruby-kafka 0.1.1-java

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: af575b070c304d11ac6aa1af46eaad548b61661b
4
+ data.tar.gz: 4bc817399ca6126b32c0df3f6072ae695fc49be4
5
+ SHA512:
6
+ metadata.gz: a9fcd6fd838e6a2d87bd96ca8606bd19639c04e81722eb10e0be58b99369159af3eb595f7fa56c96da1938924a764cb6fedf526bce5dc156f2535fe7dcd80969
7
+ data.tar.gz: 8a40f64f4c7f1dbeed36daf62336610d96c88343a628f523ff529c9b0ab53c75638de657b8a6fdee08db6a992312f68c9db777cbeab0e51c829699bb357f4cfc
@@ -0,0 +1,17 @@
1
+ # Because of a problem with a kafka dependency, jbundler 0.5.5 does not work. Therefore, you
2
+ # need to do one of the following to have your Kafka jar dependencies available:
3
+ #
4
+ # - run your app under jbundler (adds a layer to bundler to manage jar dependencies)
5
+ # - already have the Kafka jars loaded before requiring jruby-kafka
6
+ # - set KAFKA_PATH in the environment to point to a Kafka binary installation
7
+ #
8
+ if not JBUNDLER_CLASSPATH and ENV['KAFKA_PATH']
9
+ require 'jruby-kafka/loader'
10
+ end
11
+
12
+ require "jruby-kafka/consumer"
13
+ require "jruby-kafka/group"
14
+ require "jruby-kafka/producer"
15
+
16
+ module Kafka
17
+ end
@@ -0,0 +1,44 @@
1
+ require "java"
2
+ require "jruby-kafka/namespace"
3
+
4
+ java_import 'kafka.consumer.ConsumerIterator'
5
+ java_import 'kafka.consumer.KafkaStream'
6
+ java_import 'kafka.common.ConsumerRebalanceFailedException'
7
+ java_import 'kafka.consumer.ConsumerTimeoutException'
8
+
9
+ class Kafka::Consumer
10
+ include Java::JavaLang::Runnable
11
+ java_signature 'void run()'
12
+
13
+ @m_stream
14
+ @m_threadNumber
15
+ @m_queue
16
+
17
+ def initialize(a_stream, a_threadNumber, a_queue, a_bool_restart_on_exception, a_sleep_ms)
18
+ @m_threadNumber = a_threadNumber
19
+ @m_stream = a_stream
20
+ @m_queue = a_queue
21
+ @m_restart_on_exception = a_bool_restart_on_exception
22
+ @m_sleep_ms = 1.0 / 1000.0 * Float(a_sleep_ms)
23
+ end
24
+
25
+ def run
26
+ it = @m_stream.iterator()
27
+ begin
28
+ while it.hasNext()
29
+ begin
30
+ @m_queue << it.next().message()
31
+ end
32
+ end
33
+ rescue Exception => e
34
+ puts("#{self.class.name} caught exception: #{e.class.name}")
35
+ puts(e.message) if e.message != ''
36
+ if @m_restart_on_exception
37
+ sleep(@m_sleep_ms)
38
+ retry
39
+ else
40
+ raise e
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,9 @@
1
+ require "jruby-kafka/namespace"
2
+
3
+ class KafkaError < StandardError
4
+ attr_reader :object
5
+
6
+ def initialize(object)
7
+ @object = object
8
+ end
9
+ end
@@ -0,0 +1,216 @@
1
+ # basically we are porting this https://cwiki.apache.org/confluence/display/KAFKA/Consumer+Group+Example
2
+
3
+ require "java"
4
+
5
+ require "jruby-kafka/namespace"
6
+ require "jruby-kafka/consumer"
7
+ require "jruby-kafka/error"
8
+
9
+ java_import 'java.util.concurrent.ExecutorService'
10
+ java_import 'java.util.concurrent.Executors'
11
+ java_import 'org.I0Itec.zkclient.exception.ZkException'
12
+
13
+ class Kafka::Group
14
+ @consumer
15
+ @executor
16
+ @topic
17
+ @auto_offset_reset
18
+ @zk_connect
19
+ @group_id
20
+
21
+ # Create a Kafka client group
22
+ #
23
+ # options:
24
+ # :zk_connect => "localhost:2181" - REQUIRED: The connection string for the
25
+ # zookeeper connection in the form host:port. Multiple URLS can be given to allow fail-over.
26
+ # :zk_connect_timeout => "6000" - (optional) The max time that the client waits while establishing a connection to zookeeper.
27
+ # :group_id => "group" - REQUIRED: The group id to consume on.
28
+ # :topic_id => "topic" - REQUIRED: The topic id to consume on.
29
+ # :reset_beginning => "from-beginning" - (optional) If the consumer does not already have an established offset
30
+ # to consume from, start with the earliest message present in the log rather than the latest message.
31
+ # :consumer_restart_on_error => "true" - (optional) Controls if consumer threads are to restart on caught exceptions.
32
+ # exceptions are logged.
33
+ def initialize(options={})
34
+ validate_required_arguments(options)
35
+
36
+ @zk_connect = options[:zk_connect]
37
+ @group_id = options[:group_id]
38
+ @topic = options[:topic_id]
39
+ @zk_session_timeout = '6000'
40
+ @zk_connect_timeout = '6000'
41
+ @zk_sync_time = '2000'
42
+ @auto_offset_reset = 'largest'
43
+ @auto_commit_interval = '1000'
44
+ @running = false
45
+ @rebalance_max_retries = '4'
46
+ @rebalance_backoff_ms = '2000'
47
+ @socket_timeout_ms = "#{30 * 1000}"
48
+ @socket_receive_buffer_bytes = "#{64 * 1024}"
49
+ @fetch_message_max_bytes = "#{1024 * 1024}"
50
+ @auto_commit_enable = "#{true}"
51
+ @queued_max_message_chunks = '10'
52
+ @fetch_min_bytes = '1'
53
+ @fetch_wait_max_ms = '100'
54
+ @refresh_leader_backoff_ms = '200'
55
+ @consumer_timeout_ms = '-1'
56
+ @consumer_restart_on_error = "#{false}"
57
+ @consumer_restart_sleep_ms = '0'
58
+ @consumer_id = nil
59
+
60
+ if options[:zk_connect_timeout]
61
+ @zk_connect_timeout = "#{options[:zk_connect_timeout]}"
62
+ end
63
+ if options[:zk_session_timeout]
64
+ @zk_session_timeout = "#{options[:zk_session_timeout]}"
65
+ end
66
+ if options[:zk_sync_time]
67
+ @zk_sync_time = "#{options[:zk_sync_time]}"
68
+ end
69
+ if options[:auto_commit_interval]
70
+ @auto_commit_interval = "#{options[:auto_commit_interval]}"
71
+ end
72
+
73
+ if options[:rebalance_max_retries]
74
+ @rebalance_max_retries = "#{options[:rebalance_max_retries]}"
75
+ end
76
+
77
+ if options[:rebalance_backoff_ms]
78
+ @rebalance_backoff_ms = "#{options[:rebalance_backoff_ms]}"
79
+ end
80
+
81
+ if options[:socket_timeout_ms]
82
+ @socket_timeout_ms = "#{options[:socket_timeout_ms]}"
83
+ end
84
+
85
+ if options[:socket_receive_buffer_bytes]
86
+ @socket_receive_buffer_bytes = "#{options[:socket_receive_buffer_bytes]}"
87
+ end
88
+
89
+ if options[:fetch_message_max_bytes]
90
+ @fetch_message_max_bytes = "#{options[:fetch_message_max_bytes]}"
91
+ end
92
+
93
+ if options[:auto_commit_enable]
94
+ @auto_commit_enable = "#{options[:auto_commit_enable]}"
95
+ end
96
+
97
+ if options[:queued_max_message_chunks]
98
+ @queued_max_message_chunks = "#{options[:queued_max_message_chunks]}"
99
+ end
100
+
101
+ if options[:fetch_min_bytes]
102
+ @fetch_min_bytes = "#{options[:fetch_min_bytes]}"
103
+ end
104
+
105
+ if options[:fetch_wait_max_ms]
106
+ @fetch_wait_max_ms = "#{options[:fetch_wait_max_ms]}"
107
+ end
108
+
109
+ if options[:refresh_leader_backoff_ms]
110
+ @refresh_leader_backoff_ms = "#{options[:refresh_leader_backoff_ms]}"
111
+ end
112
+
113
+ if options[:consumer_timeout_ms]
114
+ @consumer_timeout_ms = "#{options[:consumer_timeout_ms]}"
115
+ end
116
+
117
+ if options[:consumer_restart_on_error]
118
+ @consumer_restart_on_error = "#{options[:consumer_restart_on_error]}"
119
+ end
120
+
121
+ if options[:consumer_restart_sleep_ms]
122
+ @consumer_restart_sleep_ms = "#{options[:consumer_restart_sleep_ms]}"
123
+ end
124
+
125
+
126
+ if options[:reset_beginning]
127
+ if options[:reset_beginning] == 'from-beginning'
128
+ @auto_offset_reset = 'smallest'
129
+ else
130
+ @auto_offset_reset = 'largest'
131
+ end
132
+ end
133
+
134
+ if options[:consumer_id]
135
+ @consumer_id = options[:consumer_id]
136
+ end
137
+ end
138
+
139
+ private
140
+ def validate_required_arguments(options={})
141
+ [:zk_connect, :group_id, :topic_id].each do |opt|
142
+ raise(ArgumentError, "#{opt} is required.") unless options[opt]
143
+ end
144
+ end
145
+
146
+ public
147
+ def shutdown()
148
+ if @consumer
149
+ @consumer.shutdown()
150
+ end
151
+ if @executor
152
+ @executor.shutdown()
153
+ end
154
+ @running = false
155
+ end
156
+
157
+ public
158
+ def run(a_numThreads, a_queue)
159
+ begin
160
+ if @auto_offset_reset == 'smallest'
161
+ Java::kafka::utils::ZkUtils.maybeDeletePath(@zk_connect, "/consumers/#{@group_id}")
162
+ end
163
+
164
+ @consumer = Java::kafka::consumer::Consumer.createJavaConsumerConnector(createConsumerConfig())
165
+ rescue ZkException => e
166
+ raise KafkaError.new(e), "Got ZkException: #{e}"
167
+ end
168
+ topicCountMap = java.util.HashMap.new()
169
+ thread_value = a_numThreads.to_java Java::int
170
+ topicCountMap.put(@topic, thread_value)
171
+ consumerMap = @consumer.createMessageStreams(topicCountMap)
172
+ streams = Array.new(consumerMap[@topic])
173
+
174
+ @executor = Executors.newFixedThreadPool(a_numThreads)
175
+ @executor_submit = @executor.java_method(:submit, [Java::JavaLang::Runnable.java_class])
176
+
177
+ threadNumber = 0
178
+ for stream in streams
179
+ @executor_submit.call(Kafka::Consumer.new(stream, threadNumber, a_queue, @consumer_restart_on_error, @consumer_restart_sleep_ms))
180
+ threadNumber += 1
181
+ end
182
+ @running = true
183
+ end
184
+
185
+ public
186
+ def running?
187
+ @running
188
+ end
189
+
190
+ private
191
+ def createConsumerConfig()
192
+ properties = java.util.Properties.new()
193
+ properties.put("zookeeper.connect", @zk_connect)
194
+ properties.put("group.id", @group_id)
195
+ properties.put("zookeeper.connection.timeout.ms", @zk_connect_timeout)
196
+ properties.put("zookeeper.session.timeout.ms", @zk_session_timeout)
197
+ properties.put("zookeeper.sync.time.ms", @zk_sync_time)
198
+ properties.put("auto.commit.interval.ms", @auto_commit_interval)
199
+ properties.put("auto.offset.reset", @auto_offset_reset)
200
+ properties.put("rebalance.max.retries", @rebalance_max_retries)
201
+ properties.put("rebalance.backoff.ms", @rebalance_backoff_ms)
202
+ properties.put("socket.timeout.ms", @socket_timeout_ms)
203
+ properties.put("socket.receive.buffer.bytes", @socket_receive_buffer_bytes)
204
+ properties.put("fetch.message.max.bytes", @fetch_message_max_bytes)
205
+ properties.put("auto.commit.enable", @auto_commit_enable)
206
+ properties.put("queued.max.message.chunks", @queued_max_message_chunks)
207
+ properties.put("fetch.min.bytes", @fetch_min_bytes)
208
+ properties.put("fetch.wait.max.ms", @fetch_wait_max_ms)
209
+ properties.put("refresh.leader.backoff.ms", @refresh_leader_backoff_ms)
210
+ properties.put("consumer.timeout.ms", @consumer_timeout_ms)
211
+ unless @consumer_id.nil?
212
+ properties.put('consumer.id', @consumer_id)
213
+ end
214
+ return Java::kafka::consumer::ConsumerConfig.new(properties)
215
+ end
216
+ end
@@ -0,0 +1,12 @@
1
+ module Kafka
2
+ def self.load_jars(kafka_path = nil)
3
+ kafka_path ||= ENV['KAFKA_PATH']
4
+
5
+ raise 'Please set KAFKA_PATH' unless kafka_path
6
+ dir = File.join(kafka_path, 'libs')
7
+ jars = Dir.glob(File.join(dir, '*.jar'))
8
+ raise "KAFKA_PATH set, but #{dir} contains no jar files." if jars.empty?
9
+ jars.each { |jar| require jar }
10
+ end
11
+ end
12
+
@@ -0,0 +1,3 @@
1
+ module Kafka
2
+
3
+ end
@@ -0,0 +1,97 @@
1
+ # basically we are porting this https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+Producer+Example
2
+
3
+ require "java"
4
+
5
+ require "jruby-kafka/namespace"
6
+ require "jruby-kafka/error"
7
+
8
+ class Kafka::Producer
9
+ java_import 'kafka.producer.ProducerConfig'
10
+ java_import 'kafka.producer.KeyedMessage'
11
+ KafkaProducer = Java::kafka.javaapi.producer.Producer
12
+ java_import 'kafka.message.NoCompressionCodec'
13
+ java_import 'kafka.message.GZIPCompressionCodec'
14
+ java_import 'kafka.message.SnappyCompressionCodec'
15
+
16
+ VALIDATIONS = {
17
+ 'request.required.acks' => %w[ 0 1 -1 ],
18
+ 'required.codecs' => [NoCompressionCodec.name, GZIPCompressionCodec.name, SnappyCompressionCodec.name],
19
+ 'producer.type' => %w[ sync async ]
20
+ }
21
+
22
+ REQUIRED = %w[
23
+ metadata.broker.list
24
+ ]
25
+
26
+ # List of all available options extracted from http://kafka.apache.org/documentation.html#producerconfigs Apr. 27, 2014
27
+ # If new options are added, they should just work. Please add them to the list so that we can get handy warnings.
28
+ KNOWN = %w[
29
+ acks max.request.size receive.buffer.bytes
30
+ batch.num.messages message.send.max.retries reconnect.backoff.ms
31
+ batch.size metadata.broker.list request.required.acks
32
+ block.on.buffer.full metadata.fetch.timeout.ms request.timeout.ms
33
+ bootstrap.servers metadata.max.age.ms retries
34
+ buffer.memory metric.reporters retry.backoff.ms
35
+ client.id metrics.num.samples retry.backoff.ms
36
+ client.id metrics.sample.window.ms send.buffer.bytes
37
+ compressed.topics partitioner.class send.buffer.bytes
38
+ compression.codec producer.type serializer.class
39
+ compression.type queue.buffering.max.messages timeout.ms
40
+ key.serializer.class queue.buffering.max.ms topic.metadata.refresh.interval.ms
41
+ linger.ms queue.enqueue.timeout.ms
42
+ ]
43
+
44
+ attr_reader :producer, :send_method, :options
45
+
46
+ # Create a Kafka Producer
47
+ #
48
+ # options:
49
+ # metadata_broker_list: ["localhost:9092"] - REQUIRED: a seed list of kafka brokers
50
+ def initialize(opts = {})
51
+ @options = opts.reduce({}) do |opts, (k, v)|
52
+ opts[k.to_s.gsub(/_/, '.')] = v
53
+ opts
54
+ end
55
+ if options['broker.list']
56
+ options['metadata.broker.list'] = options.delete 'broker.list'
57
+ end
58
+ if options['compressed.topics'].to_s == 'none'
59
+ options.delete 'compressed.topics'
60
+ end
61
+ if options['metadata.broker.list'].is_a? Array
62
+ options['metadata.broker.list'] = options['metadata.broker.list'].join(',')
63
+ end
64
+ validate_arguments
65
+ @send_method = proc { throw StandardError.new "Producer is not connected" }
66
+ end
67
+
68
+ def connect()
69
+ @producer = KafkaProducer.new(createProducerConfig)
70
+ @send_method = producer.java_method :send, [KeyedMessage]
71
+ end
72
+
73
+ # throws FailedToSendMessageException or if not connected, StandardError.
74
+ def sendMsg(topic, key, msg)
75
+ send_method.call(KeyedMessage.new(topic, key, msg))
76
+ end
77
+
78
+ private
79
+
80
+ def validate_arguments
81
+ errors = []
82
+ missing = REQUIRED.reject { |opt| options[opt] }
83
+ errors = ["Required settings: #{ missing.join(', ')}"] if missing.any?
84
+ invalid = VALIDATIONS.reject { |opt, valid| options[opt].nil? or valid.include? options[opt].to_s }
85
+ errors += invalid.map { |opt, valid| "#{ opt } should be one of: [#{ valid.join(', ')}]" }
86
+ fail StandardError.new "Invalid configuration arguments: #{ errors.join('; ') }" if errors.any?
87
+ options.keys.each do |opt|
88
+ STDERR.puts "WARNING: Unknown configuration key: #{opt}" unless KNOWN.include? opt
89
+ end
90
+ end
91
+
92
+ def createProducerConfig()
93
+ properties = java.util.Properties.new()
94
+ options.each { |opt, value| properties.put opt, value.to_s }
95
+ return ProducerConfig.new(properties)
96
+ end
97
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jruby-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: java
6
+ authors:
7
+ - Joseph Lawson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jbundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.5.5
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '='
23
+ - !ruby/object:Gem::Version
24
+ version: 0.5.5
25
+ prerelease: false
26
+ type: :runtime
27
+ description: this is primarily to be used as an interface for logstash
28
+ email:
29
+ - joe@joekiller.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/jruby-kafka.rb
35
+ - lib/jruby-kafka/consumer.rb
36
+ - lib/jruby-kafka/error.rb
37
+ - lib/jruby-kafka/group.rb
38
+ - lib/jruby-kafka/loader.rb
39
+ - lib/jruby-kafka/namespace.rb
40
+ - lib/jruby-kafka/producer.rb
41
+ homepage: https://github.com/joekiller/jruby-kafka
42
+ licenses:
43
+ - Apache 2.0
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements:
61
+ - jar 'org.apache.kafka:kafka_2.9.2', '0.8.1'
62
+ - jar 'log4j:log4j', '1.2.14'
63
+ rubyforge_project:
64
+ rubygems_version: 2.2.2
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: jruby Kafka wrapper
68
+ test_files: []