jruby-kafka 0.1.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: af575b070c304d11ac6aa1af46eaad548b61661b
4
+ data.tar.gz: 4bc817399ca6126b32c0df3f6072ae695fc49be4
5
+ SHA512:
6
+ metadata.gz: a9fcd6fd838e6a2d87bd96ca8606bd19639c04e81722eb10e0be58b99369159af3eb595f7fa56c96da1938924a764cb6fedf526bce5dc156f2535fe7dcd80969
7
+ data.tar.gz: 8a40f64f4c7f1dbeed36daf62336610d96c88343a628f523ff529c9b0ab53c75638de657b8a6fdee08db6a992312f68c9db777cbeab0e51c829699bb357f4cfc
@@ -0,0 +1,17 @@
1
+ # Because of a problem with a kafka dependency, jbundler 0.5.5 does not work. Therefore, you
2
+ # need to do one of the following to have your Kafka jar dependencies available:
3
+ #
4
+ # - run your app under jbundler (adds a layer to bundler to manage jar dependencies)
5
+ # - already have the Kafka jars loaded before requiring jruby-kafka
6
+ # - set KAFKA_PATH in the environment to point to a Kafka binary installation
7
+ #
8
+ if not JBUNDLER_CLASSPATH and ENV['KAFKA_PATH']
9
+ require 'jruby-kafka/loader'
10
+ end
11
+
12
+ require "jruby-kafka/consumer"
13
+ require "jruby-kafka/group"
14
+ require "jruby-kafka/producer"
15
+
16
+ module Kafka
17
+ end
@@ -0,0 +1,44 @@
1
+ require "java"
2
+ require "jruby-kafka/namespace"
3
+
4
+ java_import 'kafka.consumer.ConsumerIterator'
5
+ java_import 'kafka.consumer.KafkaStream'
6
+ java_import 'kafka.common.ConsumerRebalanceFailedException'
7
+ java_import 'kafka.consumer.ConsumerTimeoutException'
8
+
9
+ class Kafka::Consumer
10
+ include Java::JavaLang::Runnable
11
+ java_signature 'void run()'
12
+
13
+ @m_stream
14
+ @m_threadNumber
15
+ @m_queue
16
+
17
+ def initialize(a_stream, a_threadNumber, a_queue, a_bool_restart_on_exception, a_sleep_ms)
18
+ @m_threadNumber = a_threadNumber
19
+ @m_stream = a_stream
20
+ @m_queue = a_queue
21
+ @m_restart_on_exception = a_bool_restart_on_exception
22
+ @m_sleep_ms = 1.0 / 1000.0 * Float(a_sleep_ms)
23
+ end
24
+
25
+ def run
26
+ it = @m_stream.iterator()
27
+ begin
28
+ while it.hasNext()
29
+ begin
30
+ @m_queue << it.next().message()
31
+ end
32
+ end
33
+ rescue Exception => e
34
+ puts("#{self.class.name} caught exception: #{e.class.name}")
35
+ puts(e.message) if e.message != ''
36
+ if @m_restart_on_exception
37
+ sleep(@m_sleep_ms)
38
+ retry
39
+ else
40
+ raise e
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,9 @@
1
+ require "jruby-kafka/namespace"
2
+
3
+ class KafkaError < StandardError
4
+ attr_reader :object
5
+
6
+ def initialize(object)
7
+ @object = object
8
+ end
9
+ end
@@ -0,0 +1,216 @@
1
+ # basically we are porting this https://cwiki.apache.org/confluence/display/KAFKA/Consumer+Group+Example
2
+
3
+ require "java"
4
+
5
+ require "jruby-kafka/namespace"
6
+ require "jruby-kafka/consumer"
7
+ require "jruby-kafka/error"
8
+
9
+ java_import 'java.util.concurrent.ExecutorService'
10
+ java_import 'java.util.concurrent.Executors'
11
+ java_import 'org.I0Itec.zkclient.exception.ZkException'
12
+
13
+ class Kafka::Group
14
+ @consumer
15
+ @executor
16
+ @topic
17
+ @auto_offset_reset
18
+ @zk_connect
19
+ @group_id
20
+
21
+ # Create a Kafka client group
22
+ #
23
+ # options:
24
+ # :zk_connect => "localhost:2181" - REQUIRED: The connection string for the
25
+ # zookeeper connection in the form host:port. Multiple URLS can be given to allow fail-over.
26
+ # :zk_connect_timeout => "6000" - (optional) The max time that the client waits while establishing a connection to zookeeper.
27
+ # :group_id => "group" - REQUIRED: The group id to consume on.
28
+ # :topic_id => "topic" - REQUIRED: The topic id to consume on.
29
+ # :reset_beginning => "from-beginning" - (optional) If the consumer does not already have an established offset
30
+ # to consume from, start with the earliest message present in the log rather than the latest message.
31
+ # :consumer_restart_on_error => "true" - (optional) Controls if consumer threads are to restart on caught exceptions.
32
+ # exceptions are logged.
33
+ def initialize(options={})
34
+ validate_required_arguments(options)
35
+
36
+ @zk_connect = options[:zk_connect]
37
+ @group_id = options[:group_id]
38
+ @topic = options[:topic_id]
39
+ @zk_session_timeout = '6000'
40
+ @zk_connect_timeout = '6000'
41
+ @zk_sync_time = '2000'
42
+ @auto_offset_reset = 'largest'
43
+ @auto_commit_interval = '1000'
44
+ @running = false
45
+ @rebalance_max_retries = '4'
46
+ @rebalance_backoff_ms = '2000'
47
+ @socket_timeout_ms = "#{30 * 1000}"
48
+ @socket_receive_buffer_bytes = "#{64 * 1024}"
49
+ @fetch_message_max_bytes = "#{1024 * 1024}"
50
+ @auto_commit_enable = "#{true}"
51
+ @queued_max_message_chunks = '10'
52
+ @fetch_min_bytes = '1'
53
+ @fetch_wait_max_ms = '100'
54
+ @refresh_leader_backoff_ms = '200'
55
+ @consumer_timeout_ms = '-1'
56
+ @consumer_restart_on_error = "#{false}"
57
+ @consumer_restart_sleep_ms = '0'
58
+ @consumer_id = nil
59
+
60
+ if options[:zk_connect_timeout]
61
+ @zk_connect_timeout = "#{options[:zk_connect_timeout]}"
62
+ end
63
+ if options[:zk_session_timeout]
64
+ @zk_session_timeout = "#{options[:zk_session_timeout]}"
65
+ end
66
+ if options[:zk_sync_time]
67
+ @zk_sync_time = "#{options[:zk_sync_time]}"
68
+ end
69
+ if options[:auto_commit_interval]
70
+ @auto_commit_interval = "#{options[:auto_commit_interval]}"
71
+ end
72
+
73
+ if options[:rebalance_max_retries]
74
+ @rebalance_max_retries = "#{options[:rebalance_max_retries]}"
75
+ end
76
+
77
+ if options[:rebalance_backoff_ms]
78
+ @rebalance_backoff_ms = "#{options[:rebalance_backoff_ms]}"
79
+ end
80
+
81
+ if options[:socket_timeout_ms]
82
+ @socket_timeout_ms = "#{options[:socket_timeout_ms]}"
83
+ end
84
+
85
+ if options[:socket_receive_buffer_bytes]
86
+ @socket_receive_buffer_bytes = "#{options[:socket_receive_buffer_bytes]}"
87
+ end
88
+
89
+ if options[:fetch_message_max_bytes]
90
+ @fetch_message_max_bytes = "#{options[:fetch_message_max_bytes]}"
91
+ end
92
+
93
+ if options[:auto_commit_enable]
94
+ @auto_commit_enable = "#{options[:auto_commit_enable]}"
95
+ end
96
+
97
+ if options[:queued_max_message_chunks]
98
+ @queued_max_message_chunks = "#{options[:queued_max_message_chunks]}"
99
+ end
100
+
101
+ if options[:fetch_min_bytes]
102
+ @fetch_min_bytes = "#{options[:fetch_min_bytes]}"
103
+ end
104
+
105
+ if options[:fetch_wait_max_ms]
106
+ @fetch_wait_max_ms = "#{options[:fetch_wait_max_ms]}"
107
+ end
108
+
109
+ if options[:refresh_leader_backoff_ms]
110
+ @refresh_leader_backoff_ms = "#{options[:refresh_leader_backoff_ms]}"
111
+ end
112
+
113
+ if options[:consumer_timeout_ms]
114
+ @consumer_timeout_ms = "#{options[:consumer_timeout_ms]}"
115
+ end
116
+
117
+ if options[:consumer_restart_on_error]
118
+ @consumer_restart_on_error = "#{options[:consumer_restart_on_error]}"
119
+ end
120
+
121
+ if options[:consumer_restart_sleep_ms]
122
+ @consumer_restart_sleep_ms = "#{options[:consumer_restart_sleep_ms]}"
123
+ end
124
+
125
+
126
+ if options[:reset_beginning]
127
+ if options[:reset_beginning] == 'from-beginning'
128
+ @auto_offset_reset = 'smallest'
129
+ else
130
+ @auto_offset_reset = 'largest'
131
+ end
132
+ end
133
+
134
+ if options[:consumer_id]
135
+ @consumer_id = options[:consumer_id]
136
+ end
137
+ end
138
+
139
+ private
140
+ def validate_required_arguments(options={})
141
+ [:zk_connect, :group_id, :topic_id].each do |opt|
142
+ raise(ArgumentError, "#{opt} is required.") unless options[opt]
143
+ end
144
+ end
145
+
146
+ public
147
+ def shutdown()
148
+ if @consumer
149
+ @consumer.shutdown()
150
+ end
151
+ if @executor
152
+ @executor.shutdown()
153
+ end
154
+ @running = false
155
+ end
156
+
157
+ public
158
+ def run(a_numThreads, a_queue)
159
+ begin
160
+ if @auto_offset_reset == 'smallest'
161
+ Java::kafka::utils::ZkUtils.maybeDeletePath(@zk_connect, "/consumers/#{@group_id}")
162
+ end
163
+
164
+ @consumer = Java::kafka::consumer::Consumer.createJavaConsumerConnector(createConsumerConfig())
165
+ rescue ZkException => e
166
+ raise KafkaError.new(e), "Got ZkException: #{e}"
167
+ end
168
+ topicCountMap = java.util.HashMap.new()
169
+ thread_value = a_numThreads.to_java Java::int
170
+ topicCountMap.put(@topic, thread_value)
171
+ consumerMap = @consumer.createMessageStreams(topicCountMap)
172
+ streams = Array.new(consumerMap[@topic])
173
+
174
+ @executor = Executors.newFixedThreadPool(a_numThreads)
175
+ @executor_submit = @executor.java_method(:submit, [Java::JavaLang::Runnable.java_class])
176
+
177
+ threadNumber = 0
178
+ for stream in streams
179
+ @executor_submit.call(Kafka::Consumer.new(stream, threadNumber, a_queue, @consumer_restart_on_error, @consumer_restart_sleep_ms))
180
+ threadNumber += 1
181
+ end
182
+ @running = true
183
+ end
184
+
185
+ public
186
+ def running?
187
+ @running
188
+ end
189
+
190
+ private
191
+ def createConsumerConfig()
192
+ properties = java.util.Properties.new()
193
+ properties.put("zookeeper.connect", @zk_connect)
194
+ properties.put("group.id", @group_id)
195
+ properties.put("zookeeper.connection.timeout.ms", @zk_connect_timeout)
196
+ properties.put("zookeeper.session.timeout.ms", @zk_session_timeout)
197
+ properties.put("zookeeper.sync.time.ms", @zk_sync_time)
198
+ properties.put("auto.commit.interval.ms", @auto_commit_interval)
199
+ properties.put("auto.offset.reset", @auto_offset_reset)
200
+ properties.put("rebalance.max.retries", @rebalance_max_retries)
201
+ properties.put("rebalance.backoff.ms", @rebalance_backoff_ms)
202
+ properties.put("socket.timeout.ms", @socket_timeout_ms)
203
+ properties.put("socket.receive.buffer.bytes", @socket_receive_buffer_bytes)
204
+ properties.put("fetch.message.max.bytes", @fetch_message_max_bytes)
205
+ properties.put("auto.commit.enable", @auto_commit_enable)
206
+ properties.put("queued.max.message.chunks", @queued_max_message_chunks)
207
+ properties.put("fetch.min.bytes", @fetch_min_bytes)
208
+ properties.put("fetch.wait.max.ms", @fetch_wait_max_ms)
209
+ properties.put("refresh.leader.backoff.ms", @refresh_leader_backoff_ms)
210
+ properties.put("consumer.timeout.ms", @consumer_timeout_ms)
211
+ unless @consumer_id.nil?
212
+ properties.put('consumer.id', @consumer_id)
213
+ end
214
+ return Java::kafka::consumer::ConsumerConfig.new(properties)
215
+ end
216
+ end
@@ -0,0 +1,12 @@
1
+ module Kafka
2
+ def self.load_jars(kafka_path = nil)
3
+ kafka_path ||= ENV['KAFKA_PATH']
4
+
5
+ raise 'Please set KAFKA_PATH' unless kafka_path
6
+ dir = File.join(kafka_path, 'libs')
7
+ jars = Dir.glob(File.join(dir, '*.jar'))
8
+ raise "KAFKA_PATH set, but #{dir} contains no jar files." if jars.empty?
9
+ jars.each { |jar| require jar }
10
+ end
11
+ end
12
+
@@ -0,0 +1,3 @@
1
+ module Kafka
2
+
3
+ end
@@ -0,0 +1,97 @@
1
+ # basically we are porting this https://cwiki.apache.org/confluence/display/KAFKA/0.8.0+Producer+Example
2
+
3
+ require "java"
4
+
5
+ require "jruby-kafka/namespace"
6
+ require "jruby-kafka/error"
7
+
8
+ class Kafka::Producer
9
+ java_import 'kafka.producer.ProducerConfig'
10
+ java_import 'kafka.producer.KeyedMessage'
11
+ KafkaProducer = Java::kafka.javaapi.producer.Producer
12
+ java_import 'kafka.message.NoCompressionCodec'
13
+ java_import 'kafka.message.GZIPCompressionCodec'
14
+ java_import 'kafka.message.SnappyCompressionCodec'
15
+
16
+ VALIDATIONS = {
17
+ 'request.required.acks' => %w[ 0 1 -1 ],
18
+ 'required.codecs' => [NoCompressionCodec.name, GZIPCompressionCodec.name, SnappyCompressionCodec.name],
19
+ 'producer.type' => %w[ sync async ]
20
+ }
21
+
22
+ REQUIRED = %w[
23
+ metadata.broker.list
24
+ ]
25
+
26
+ # List of all available options extracted from http://kafka.apache.org/documentation.html#producerconfigs Apr. 27, 2014
27
+ # If new options are added, they should just work. Please add them to the list so that we can get handy warnings.
28
+ KNOWN = %w[
29
+ acks max.request.size receive.buffer.bytes
30
+ batch.num.messages message.send.max.retries reconnect.backoff.ms
31
+ batch.size metadata.broker.list request.required.acks
32
+ block.on.buffer.full metadata.fetch.timeout.ms request.timeout.ms
33
+ bootstrap.servers metadata.max.age.ms retries
34
+ buffer.memory metric.reporters retry.backoff.ms
35
+ client.id metrics.num.samples retry.backoff.ms
36
+ client.id metrics.sample.window.ms send.buffer.bytes
37
+ compressed.topics partitioner.class send.buffer.bytes
38
+ compression.codec producer.type serializer.class
39
+ compression.type queue.buffering.max.messages timeout.ms
40
+ key.serializer.class queue.buffering.max.ms topic.metadata.refresh.interval.ms
41
+ linger.ms queue.enqueue.timeout.ms
42
+ ]
43
+
44
+ attr_reader :producer, :send_method, :options
45
+
46
+ # Create a Kafka Producer
47
+ #
48
+ # options:
49
+ # metadata_broker_list: ["localhost:9092"] - REQUIRED: a seed list of kafka brokers
50
+ def initialize(opts = {})
51
+ @options = opts.reduce({}) do |opts, (k, v)|
52
+ opts[k.to_s.gsub(/_/, '.')] = v
53
+ opts
54
+ end
55
+ if options['broker.list']
56
+ options['metadata.broker.list'] = options.delete 'broker.list'
57
+ end
58
+ if options['compressed.topics'].to_s == 'none'
59
+ options.delete 'compressed.topics'
60
+ end
61
+ if options['metadata.broker.list'].is_a? Array
62
+ options['metadata.broker.list'] = options['metadata.broker.list'].join(',')
63
+ end
64
+ validate_arguments
65
+ @send_method = proc { throw StandardError.new "Producer is not connected" }
66
+ end
67
+
68
+ def connect()
69
+ @producer = KafkaProducer.new(createProducerConfig)
70
+ @send_method = producer.java_method :send, [KeyedMessage]
71
+ end
72
+
73
+ # throws FailedToSendMessageException or if not connected, StandardError.
74
+ def sendMsg(topic, key, msg)
75
+ send_method.call(KeyedMessage.new(topic, key, msg))
76
+ end
77
+
78
+ private
79
+
80
+ def validate_arguments
81
+ errors = []
82
+ missing = REQUIRED.reject { |opt| options[opt] }
83
+ errors = ["Required settings: #{ missing.join(', ')}"] if missing.any?
84
+ invalid = VALIDATIONS.reject { |opt, valid| options[opt].nil? or valid.include? options[opt].to_s }
85
+ errors += invalid.map { |opt, valid| "#{ opt } should be one of: [#{ valid.join(', ')}]" }
86
+ fail StandardError.new "Invalid configuration arguments: #{ errors.join('; ') }" if errors.any?
87
+ options.keys.each do |opt|
88
+ STDERR.puts "WARNING: Unknown configuration key: #{opt}" unless KNOWN.include? opt
89
+ end
90
+ end
91
+
92
+ def createProducerConfig()
93
+ properties = java.util.Properties.new()
94
+ options.each { |opt, value| properties.put opt, value.to_s }
95
+ return ProducerConfig.new(properties)
96
+ end
97
+ end
metadata ADDED
@@ -0,0 +1,68 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jruby-kafka
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: java
6
+ authors:
7
+ - Joseph Lawson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-04 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: jbundler
15
+ version_requirements: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.5.5
20
+ requirement: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - '='
23
+ - !ruby/object:Gem::Version
24
+ version: 0.5.5
25
+ prerelease: false
26
+ type: :runtime
27
+ description: this is primarily to be used as an interface for logstash
28
+ email:
29
+ - joe@joekiller.com
30
+ executables: []
31
+ extensions: []
32
+ extra_rdoc_files: []
33
+ files:
34
+ - lib/jruby-kafka.rb
35
+ - lib/jruby-kafka/consumer.rb
36
+ - lib/jruby-kafka/error.rb
37
+ - lib/jruby-kafka/group.rb
38
+ - lib/jruby-kafka/loader.rb
39
+ - lib/jruby-kafka/namespace.rb
40
+ - lib/jruby-kafka/producer.rb
41
+ homepage: https://github.com/joekiller/jruby-kafka
42
+ licenses:
43
+ - Apache 2.0
44
+ metadata: {}
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements:
61
+ - jar 'org.apache.kafka:kafka_2.9.2', '0.8.1'
62
+ - jar 'log4j:log4j', '1.2.14'
63
+ rubyforge_project:
64
+ rubygems_version: 2.2.2
65
+ signing_key:
66
+ specification_version: 4
67
+ summary: jruby Kafka wrapper
68
+ test_files: []