sorceror_poseidon_cluster 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +8 -0
- data/.travis.yml +7 -0
- data/Gemfile +10 -0
- data/Gemfile.lock +85 -0
- data/README.md +95 -0
- data/Rakefile +21 -0
- data/examples/consumer_group.rb +33 -0
- data/lib/poseidon/cluster.rb +28 -0
- data/lib/poseidon/consumer_group.rb +467 -0
- data/lib/poseidon_cluster.rb +1 -0
- data/scenario/.gitignore +1 -0
- data/scenario/consumer.rb +17 -0
- data/scenario/producer.rb +23 -0
- data/scenario/run.rb +35 -0
- data/scenario/scenario.rb +134 -0
- data/sorceror_poseidon_cluster.gemspec +27 -0
- data/spec/lib/poseidon/cluster_spec.rb +19 -0
- data/spec/lib/poseidon/consumer_group_spec.rb +313 -0
- data/spec/spec_helper.rb +14 -0
- metadata +184 -0
@@ -0,0 +1,467 @@
|
|
1
|
+
# A ConsumerGroup operates on all partitions of a single topic. The goal is to ensure
|
2
|
+
# each topic message is consumed only once, no matter of the number of consumer instances within
|
3
|
+
# a cluster, as described in: http://kafka.apache.org/documentation.html#distributionimpl.
|
4
|
+
#
|
5
|
+
# The ConsumerGroup internally creates multiple PartitionConsumer instances. It uses Zookkeper
|
6
|
+
# and follows a simple consumer rebalancing algorithms which allows all the consumers
|
7
|
+
# in a group to come into consensus on which consumer is consuming which partitions. Each
|
8
|
+
# ConsumerGroup can 'claim' 0-n partitions and will consume their messages until another
|
9
|
+
# ConsumerGroup instance joins or leaves the cluster.
|
10
|
+
#
|
11
|
+
# Please note: ConsumerGroups themselves don't implement any threading or concurrency.
|
12
|
+
# When consuming messages, they simply round-robin across the claimed partitions. If you wish
|
13
|
+
# to parallelize consumption simply create multiple ConsumerGroups instances. The built-in
|
14
|
+
# concensus algorithm will automatically rebalance the available partitions between them and you
|
15
|
+
# can then decide for yourself if you want to run them in multiple thread or processes, ideally
|
16
|
+
# on multiple boxes.
|
17
|
+
#
|
18
|
+
# Unlike stated in the Kafka documentation, consumer rebalancing is *only* triggered on each
|
19
|
+
# addition or removal of consumers within the same group, while the addition of broker nodes
|
20
|
+
# and/or partition *does currently not trigger* a rebalancing cycle.
|
21
|
+
#
|
22
|
+
# @api public
|
23
|
+
class Poseidon::ConsumerGroup
|
24
|
+
DEFAULT_CLAIM_TIMEOUT = 30
|
25
|
+
DEFAULT_LOOP_DELAY = 1
|
26
|
+
|
27
|
+
# Poseidon::ConsumerGroup::Consumer is internally used by Poseidon::ConsumerGroup.
|
28
|
+
# Don't invoke it directly.
|
29
|
+
#
|
30
|
+
# @api private
|
31
|
+
class Consumer < ::Poseidon::PartitionConsumer
|
32
|
+
|
33
|
+
# @attr_reader [Integer] partition consumer partition
|
34
|
+
attr_reader :partition
|
35
|
+
|
36
|
+
# @api private
|
37
|
+
def initialize(group, partition, options = {})
|
38
|
+
broker = group.leader(partition)
|
39
|
+
offset = group.offset(partition)
|
40
|
+
offset = (options[:trail] ? :latest_offset : :earliest_offset) if offset == 0
|
41
|
+
options.delete(:trail)
|
42
|
+
super group.id, broker.host, broker.port, group.topic, partition, offset, options
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# @param [Integer] pnum number of partitions size
|
48
|
+
# @param [Array<String>] cids consumer IDs
|
49
|
+
# @param [String] id consumer ID
|
50
|
+
# @return [Range, NilClass] selectable range, if any
|
51
|
+
def self.pick(pnum, cids, id)
|
52
|
+
cids = cids.sort
|
53
|
+
pos = cids.index(id)
|
54
|
+
return unless pos && pos < cids.size
|
55
|
+
|
56
|
+
step = pnum.fdiv(cids.size).ceil
|
57
|
+
frst = pos*step
|
58
|
+
last = (pos+1)*step-1
|
59
|
+
last = pnum-1 if last > pnum-1
|
60
|
+
return if last < 0 || last < frst
|
61
|
+
|
62
|
+
(frst..last)
|
63
|
+
end
|
64
|
+
|
65
|
+
# @attr_reader [String] name Group name
|
66
|
+
attr_reader :name
|
67
|
+
|
68
|
+
# @attr_reader [String] topic Topic name
|
69
|
+
attr_reader :topic
|
70
|
+
|
71
|
+
# @attr_reader [Poseidon::BrokerPool] pool Broker pool
|
72
|
+
attr_reader :pool
|
73
|
+
|
74
|
+
# @attr_reader [ZK::Client] zk Zookeeper client
|
75
|
+
attr_reader :zk
|
76
|
+
|
77
|
+
# @attr_reader [Hash] options Consumer options
|
78
|
+
attr_reader :options
|
79
|
+
|
80
|
+
# Create a new consumer group, which processes all partition of the specified topic.
|
81
|
+
#
|
82
|
+
# @param [String] name Group name
|
83
|
+
# @param [Array<String>] brokers A list of known brokers, e.g. ["localhost:9092"]
|
84
|
+
# @param [Array<String>] zookeepers A list of known zookeepers, e.g. ["localhost:2181"]
|
85
|
+
# @param [String] topic Topic to operate on
|
86
|
+
# @param [Hash] options Consumer options
|
87
|
+
# @option options [Integer] :max_bytes Maximum number of bytes to fetch. Default: 1048576 (1MB)
|
88
|
+
# @option options [Integer] :max_wait_ms How long to block until the server sends us data. Default: 100 (100ms)
|
89
|
+
# @option options [Integer] :min_bytes Smallest amount of data the server should send us. Default: 0 (Send us data as soon as it is ready)
|
90
|
+
# @option options [Integer] :claim_timeout Maximum number of seconds to wait for a partition claim. Default: 10
|
91
|
+
# @option options [Integer] :loop_delay Number of seconds to delay the next fetch (in #fetch_loop) if nothing was returned. Default: 1
|
92
|
+
# @option options [Integer] :socket_timeout_ms broker connection wait timeout in ms. Default: 10000
|
93
|
+
# @option options [Boolean] :register Automatically register instance and start consuming. Default: true
|
94
|
+
# @option options [Boolean] :trail Starts reading messages from the latest partitions offsets and skips 'old' messages . Default: false
|
95
|
+
#
|
96
|
+
# @api public
|
97
|
+
def initialize(name, brokers, zookeepers, topic, options = {})
|
98
|
+
@name = name
|
99
|
+
@topic = topic
|
100
|
+
@zk = ::ZK.new(zookeepers.join(","))
|
101
|
+
# Poseidon::BrokerPool doesn't provide default value for this option
|
102
|
+
# Configuring default value like this isn't beautiful, though.. by kssminus
|
103
|
+
options[:socket_timeout_ms] ||= 10000
|
104
|
+
@options = options
|
105
|
+
@consumers = []
|
106
|
+
@pool = ::Poseidon::BrokerPool.new(id, brokers, options[:socket_timeout_ms])
|
107
|
+
@mutex = Mutex.new
|
108
|
+
|
109
|
+
register! unless options[:register] == false
|
110
|
+
end
|
111
|
+
|
112
|
+
# @return [String] a globally unique identifier
|
113
|
+
def id
|
114
|
+
@id ||= [name, Poseidon::Cluster.guid].join("-")
|
115
|
+
end
|
116
|
+
|
117
|
+
# @return [Hash<Symbol,String>] registry paths
|
118
|
+
def registries
|
119
|
+
@registries ||= {
|
120
|
+
consumer: "/consumers/#{name}/ids",
|
121
|
+
owner: "/consumers/#{name}/owners/#{topic}",
|
122
|
+
offset: "/consumers/#{name}/offsets/#{topic}",
|
123
|
+
}
|
124
|
+
end
|
125
|
+
|
126
|
+
# @return [Poseidon::ClusterMetadata] cluster metadata
|
127
|
+
def metadata
|
128
|
+
@metadata ||= Poseidon::ClusterMetadata.new.tap {|m| m.update pool.fetch_metadata([topic]) }
|
129
|
+
end
|
130
|
+
|
131
|
+
# @return [Poseidon::TopicMetadata] topic metadata
|
132
|
+
def topic_metadata
|
133
|
+
@topic_metadata ||= metadata.metadata_for_topics([topic])[topic]
|
134
|
+
end
|
135
|
+
|
136
|
+
# @return [Boolean] true if registered
|
137
|
+
def registered?
|
138
|
+
!!zk.children(consumer_path, ignore: :no_node)
|
139
|
+
end
|
140
|
+
|
141
|
+
# @return [Boolean] true if registration was successful, false if already registered
|
142
|
+
def register!
|
143
|
+
return false if registered?
|
144
|
+
|
145
|
+
# Register instance
|
146
|
+
registries.each do |_, path|
|
147
|
+
zk.mkdir_p(path)
|
148
|
+
end
|
149
|
+
zk.create(consumer_path, "{}", ephemeral: true)
|
150
|
+
zk.register(registries[:consumer]) {|_| rebalance! }
|
151
|
+
|
152
|
+
# Rebalance
|
153
|
+
rebalance!
|
154
|
+
end
|
155
|
+
|
156
|
+
# Reloads metadata/broker/partition information
|
157
|
+
def reload
|
158
|
+
@metadata = @topic_metadata = nil
|
159
|
+
metadata
|
160
|
+
self
|
161
|
+
end
|
162
|
+
|
163
|
+
# Closes the consumer group gracefully, only really useful in tests
|
164
|
+
# @api private
|
165
|
+
def close
|
166
|
+
synchronize { release_all! }
|
167
|
+
zk.close
|
168
|
+
end
|
169
|
+
|
170
|
+
# @param [Integer] partition
|
171
|
+
# @return [Poseidon::Protocol::Broker] the leader for the given partition
|
172
|
+
def leader(partition)
|
173
|
+
metadata.lead_broker_for_partition(topic, partition)
|
174
|
+
end
|
175
|
+
|
176
|
+
# @param [Integer] partition
|
177
|
+
# @return [Integer] the latest stored offset for the given partition
|
178
|
+
def offset(partition)
|
179
|
+
data, _ = zk.get offset_path(partition), ignore: :no_node
|
180
|
+
data.to_i
|
181
|
+
end
|
182
|
+
|
183
|
+
# Commits the latest offset for a partition
|
184
|
+
# @param [Integer] partition
|
185
|
+
# @param [Integer] offset
|
186
|
+
def commit(partition, offset)
|
187
|
+
zk.set offset_path(partition), offset.to_s
|
188
|
+
unlock(offset)
|
189
|
+
rescue ZK::Exceptions::NoNode
|
190
|
+
zk.create offset_path(partition), offset.to_s, {:ignore => :node_exists}
|
191
|
+
unlock(offset)
|
192
|
+
end
|
193
|
+
|
194
|
+
# Sorted partitions by broker address (so partitions on the same broker are clustered together)
|
195
|
+
# @return [Array<Poseidon::Protocol::PartitionMetadata>] sorted partitions
|
196
|
+
def partitions
|
197
|
+
return [] unless topic_metadata
|
198
|
+
|
199
|
+
topic_metadata.available_partitions.sort_by do |part|
|
200
|
+
broker = metadata.brokers[part.leader]
|
201
|
+
[broker.host, broker.port].join(":")
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Partitions currently claimed and consumed by this group instance
|
206
|
+
# @return [Array<Integer>] partition IDs
|
207
|
+
def claimed
|
208
|
+
@consumers.map(&:partition).sort
|
209
|
+
end
|
210
|
+
|
211
|
+
# Checks out a single partition consumer. Round-robins between claimed partitions.
|
212
|
+
#
|
213
|
+
# @yield [consumer] The processing block
|
214
|
+
# @yieldparam [Consumer] consumer The consumer instance
|
215
|
+
# @yieldreturn [Boolean] return false to stop auto-commit
|
216
|
+
#
|
217
|
+
# @param [Hash] opts
|
218
|
+
# @option opts [Boolean] :commit Automatically commit consumer offset (default: true)
|
219
|
+
# @return [Boolean] true if a consumer was checked out, false if none could be claimed
|
220
|
+
#
|
221
|
+
# @example
|
222
|
+
#
|
223
|
+
# ok = group.checkout do |consumer|
|
224
|
+
# puts "Checked out consumer for partition #{consumer.partition}"
|
225
|
+
# end
|
226
|
+
# ok # => true if the block was run, false otherwise
|
227
|
+
#
|
228
|
+
# @api public
|
229
|
+
def checkout(opts = {})
|
230
|
+
register!
|
231
|
+
|
232
|
+
lock
|
233
|
+
|
234
|
+
@current_consumer = @consumers.shift
|
235
|
+
|
236
|
+
if @current_consumer.nil?
|
237
|
+
unlock
|
238
|
+
return false
|
239
|
+
end
|
240
|
+
|
241
|
+
@consumers.push @current_consumer
|
242
|
+
commit = yield @current_consumer
|
243
|
+
|
244
|
+
unless opts[:commit] == false || commit == false
|
245
|
+
commit @current_consumer.partition, @current_consumer.offset
|
246
|
+
end
|
247
|
+
true
|
248
|
+
rescue StandardError => e
|
249
|
+
unlock
|
250
|
+
raise e
|
251
|
+
end
|
252
|
+
|
253
|
+
# Convenience method to fetch messages from the broker.
|
254
|
+
# Round-robins between claimed partitions.
|
255
|
+
#
|
256
|
+
# @yield [partition, messages] The processing block
|
257
|
+
# @yieldparam [Integer] partition The source partition
|
258
|
+
# @yieldparam [Array<Message>] messages The fetched messages
|
259
|
+
# @yieldreturn [Boolean] return false to prevent auto-commit
|
260
|
+
#
|
261
|
+
# @param [Hash] opts
|
262
|
+
# @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
|
263
|
+
# @return [Boolean] true if messages were fetched, false if none could be claimed
|
264
|
+
#
|
265
|
+
# @example
|
266
|
+
#
|
267
|
+
# ok = group.fetch do |n, messages|
|
268
|
+
# puts "Fetched #{messages.size} messages for partition #{n}"
|
269
|
+
# end
|
270
|
+
# ok # => true if the block was run, false otherwise
|
271
|
+
#
|
272
|
+
# @api public
|
273
|
+
def fetch(opts = {})
|
274
|
+
checkout(opts) do |consumer|
|
275
|
+
payloads = consumer.fetch
|
276
|
+
unless payloads.empty?
|
277
|
+
yield consumer.partition, payloads
|
278
|
+
else
|
279
|
+
if opts[:commit] == false
|
280
|
+
commit consumer.partition, consumer.offset
|
281
|
+
end
|
282
|
+
true
|
283
|
+
end
|
284
|
+
end
|
285
|
+
end
|
286
|
+
|
287
|
+
# Initializes an infinite fetch loop. This method blocks!
|
288
|
+
#
|
289
|
+
# Will wait for `loop_delay` seconds after each failed fetch. This may happen when there is
|
290
|
+
# no new data or when the consumer hasn't claimed any partitions.
|
291
|
+
#
|
292
|
+
# SPECIAL ATTENTION:
|
293
|
+
# When 'breaking out' of the loop, you must do it before processing the messages, as the
|
294
|
+
# the last offset will not be committed. Please see examples below.
|
295
|
+
#
|
296
|
+
# @yield [partition, messages] The processing block
|
297
|
+
# @yieldparam [Integer] partition The source partition, may be -1 if no partitions are claimed
|
298
|
+
# @yieldparam [Array<Message>] messages The fetched messages
|
299
|
+
# @yieldreturn [Boolean] return false to prevent auto-commit
|
300
|
+
#
|
301
|
+
# @param [Hash] opts
|
302
|
+
# @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
|
303
|
+
# @option opts [Boolean] :loop_delay Delay override in seconds after unsuccessful fetch.
|
304
|
+
#
|
305
|
+
# @example
|
306
|
+
#
|
307
|
+
# group.fetch_loop do |n, messages|
|
308
|
+
# puts "Fetched #{messages.size} messages for partition #{n}"
|
309
|
+
# end
|
310
|
+
# puts "Done" # => this code is never reached
|
311
|
+
#
|
312
|
+
# @example Stopping the loop (wrong)
|
313
|
+
#
|
314
|
+
# counts = Hash.new(0)
|
315
|
+
# group.fetch_loop do |n, messages|
|
316
|
+
# counts[n] += messages.size
|
317
|
+
# puts "Status: #{counts.inspect}"
|
318
|
+
# break if counts[0] > 100
|
319
|
+
# end
|
320
|
+
# puts "Result: #{counts.inspect}"
|
321
|
+
# puts "Offset: #{group.offset(0)}"
|
322
|
+
#
|
323
|
+
# # Output:
|
324
|
+
# # Status: {0=>30}
|
325
|
+
# # Status: {0=>60}
|
326
|
+
# # Status: {0=>90}
|
327
|
+
# # Status: {0=>120}
|
328
|
+
# # Result: {0=>120}
|
329
|
+
# # Offset: 90 # => Last offset was not committed!
|
330
|
+
#
|
331
|
+
# @example Stopping the loop (correct)
|
332
|
+
#
|
333
|
+
# counts = Hash.new(0)
|
334
|
+
# group.fetch_loop do |n, messages|
|
335
|
+
# break if counts[0] > 100
|
336
|
+
# counts[n] += messages.size
|
337
|
+
# puts "Status: #{counts.inspect}"
|
338
|
+
# end
|
339
|
+
# puts "Result: #{counts.inspect}"
|
340
|
+
# puts "Offset: #{group.offset(0)}"
|
341
|
+
#
|
342
|
+
# # Output:
|
343
|
+
# # Status: {0=>30}
|
344
|
+
# # Status: {0=>60}
|
345
|
+
# # Status: {0=>90}
|
346
|
+
# # Status: {0=>120}
|
347
|
+
# # Result: {0=>120}
|
348
|
+
# # Offset: 120
|
349
|
+
#
|
350
|
+
# @api public
|
351
|
+
def fetch_loop(opts = {})
|
352
|
+
delay = opts[:loop_delay] || options[:loop_delay] || DEFAULT_LOOP_DELAY
|
353
|
+
|
354
|
+
loop do
|
355
|
+
mp = false
|
356
|
+
ok = fetch(opts) do |n, messages|
|
357
|
+
mp = !messages.empty?
|
358
|
+
yield n, messages
|
359
|
+
end
|
360
|
+
|
361
|
+
# Yield over an empty array if nothing claimed,
|
362
|
+
# to allow user to e.g. break out of the loop
|
363
|
+
unless ok
|
364
|
+
yield -1, []
|
365
|
+
end
|
366
|
+
|
367
|
+
# Sleep if either not claimes or nothing returned
|
368
|
+
unless ok && mp
|
369
|
+
sleep delay
|
370
|
+
end
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
protected
|
375
|
+
|
376
|
+
# Rebalance algorithm:
|
377
|
+
#
|
378
|
+
# * let CG be all consumers in the same group that consume topic T
|
379
|
+
# * let PT be all partitions producing topic T
|
380
|
+
# * sort CG
|
381
|
+
# * sort PT (so partitions on the same broker are clustered together)
|
382
|
+
# * let POS be our index position in CG and let N = size(PT)/size(CG)
|
383
|
+
# * assign partitions from POS*N to (POS+1)*N-1
|
384
|
+
def rebalance!
|
385
|
+
return if @pending
|
386
|
+
|
387
|
+
@pending = true
|
388
|
+
synchronize do
|
389
|
+
@pending = nil
|
390
|
+
release_all!
|
391
|
+
reload
|
392
|
+
|
393
|
+
ids = zk.children(registries[:consumer], watch: true)
|
394
|
+
pms = partitions
|
395
|
+
rng = self.class.pick(pms.size, ids, id)
|
396
|
+
|
397
|
+
pms[rng].each do |pm|
|
398
|
+
if @pending
|
399
|
+
release_all!
|
400
|
+
break
|
401
|
+
end
|
402
|
+
|
403
|
+
consumer = claim!(pm.id)
|
404
|
+
@consumers.push(consumer) if consumer
|
405
|
+
end if rng
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
# Release all consumer claims
|
410
|
+
def release_all!
|
411
|
+
@consumers.each {|c| release!(c.partition) }
|
412
|
+
@consumers.clear
|
413
|
+
end
|
414
|
+
|
415
|
+
private
|
416
|
+
|
417
|
+
def lock
|
418
|
+
@mutex.lock
|
419
|
+
end
|
420
|
+
|
421
|
+
def unlock(offset=nil)
|
422
|
+
raise "Mutex should be locked, possibly committing out of order" unless @mutex.locked?
|
423
|
+
|
424
|
+
if offset
|
425
|
+
@mutex.unlock if @current_consumer.offset == offset
|
426
|
+
else
|
427
|
+
@mutex.unlock
|
428
|
+
end
|
429
|
+
end
|
430
|
+
|
431
|
+
def synchronize
|
432
|
+
@mutex.synchronize { yield }
|
433
|
+
end
|
434
|
+
|
435
|
+
# Claim the ownership of the partition for this consumer
|
436
|
+
# @raise [Timeout::Error]
|
437
|
+
def claim!(partition)
|
438
|
+
path = claim_path(partition)
|
439
|
+
Timeout.timeout options[:claim_timout] || DEFAULT_CLAIM_TIMEOUT do
|
440
|
+
while zk.create(path, id, ephemeral: true, ignore: :node_exists).nil?
|
441
|
+
return if @pending
|
442
|
+
sleep(0.1)
|
443
|
+
end
|
444
|
+
end
|
445
|
+
Consumer.new self, partition, options.dup
|
446
|
+
end
|
447
|
+
|
448
|
+
# Release ownership of the partition
|
449
|
+
def release!(partition)
|
450
|
+
zk.delete claim_path(partition), ignore: :no_node
|
451
|
+
end
|
452
|
+
|
453
|
+
# @return [String] zookeeper ownership claim path
|
454
|
+
def claim_path(partition)
|
455
|
+
"#{registries[:owner]}/#{partition}"
|
456
|
+
end
|
457
|
+
|
458
|
+
# @return [String] zookeeper offset storage path
|
459
|
+
def offset_path(partition)
|
460
|
+
"#{registries[:offset]}/#{partition}"
|
461
|
+
end
|
462
|
+
|
463
|
+
# @return [String] zookeeper consumer registration path
|
464
|
+
def consumer_path
|
465
|
+
"#{registries[:consumer]}/#{id}"
|
466
|
+
end
|
467
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'poseidon/cluster'
|
data/scenario/.gitignore
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
output.txt
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'poseidon_cluster'
|
4
|
+
|
5
|
+
name = ARGV[0].to_s
|
6
|
+
output = File.open(ARGV[1], "a")
|
7
|
+
output.sync = true
|
8
|
+
|
9
|
+
total = 0
|
10
|
+
consumer = Poseidon::ConsumerGroup.new "my-group", ["localhost:29092"], ["localhost:22181"], "my-topic", max_bytes: 256*1024
|
11
|
+
consumer.fetch_loop do |n, messages|
|
12
|
+
break if name[0] > 'Q' && total > 0
|
13
|
+
messages.each do |m|
|
14
|
+
output.write "#{name},#{n},#{m.value}\n"
|
15
|
+
end
|
16
|
+
total += messages.size
|
17
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'bundler/setup'
|
3
|
+
require 'poseidon'
|
4
|
+
|
5
|
+
limit, offset = ARGV[0].to_i, ARGV[1].to_i
|
6
|
+
producer = Poseidon::Producer.new ["localhost:29092"], "poseidon-producer"
|
7
|
+
|
8
|
+
while limit > 0 do
|
9
|
+
batch = limit > 10000 ? 10000 : limit
|
10
|
+
limit -= batch
|
11
|
+
|
12
|
+
messages = (0...batch).map do
|
13
|
+
num = offset.to_s.rjust(8, "0")
|
14
|
+
offset += 1
|
15
|
+
Poseidon::MessageToSend.new "my-topic", num, Time.now.to_s+num
|
16
|
+
end
|
17
|
+
|
18
|
+
10.times do
|
19
|
+
ok = producer.send_messages messages
|
20
|
+
break if ok
|
21
|
+
sleep(1)
|
22
|
+
end
|
23
|
+
end
|
data/scenario/run.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'timeout'
|
5
|
+
require File.expand_path("../scenario", __FILE__)
|
6
|
+
|
7
|
+
# Start Zookeeper & Kafka
|
8
|
+
Scenario.run do
|
9
|
+
5.times do
|
10
|
+
produce 1000
|
11
|
+
end
|
12
|
+
consume "A"
|
13
|
+
consume "B"
|
14
|
+
consume "C"
|
15
|
+
checkpoint!
|
16
|
+
|
17
|
+
15.times { produce 1000 }
|
18
|
+
consume "D"
|
19
|
+
10.times { produce 1000 }
|
20
|
+
consume "X"
|
21
|
+
10.times { produce 1000 }
|
22
|
+
checkpoint!
|
23
|
+
|
24
|
+
20.times { produce 1000 }
|
25
|
+
consume "E"
|
26
|
+
consume "F"
|
27
|
+
15.times { produce 1000 }
|
28
|
+
consume "Y"
|
29
|
+
50.times { produce 100 }
|
30
|
+
20.times { produce 1000 }
|
31
|
+
|
32
|
+
checkpoint!
|
33
|
+
end
|
34
|
+
|
35
|
+
|
@@ -0,0 +1,134 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'pathname'
|
3
|
+
|
4
|
+
module Scenario
|
5
|
+
extend self
|
6
|
+
|
7
|
+
ROOT = Pathname.new(File.expand_path("../", __FILE__))
|
8
|
+
VERSION = "0.8.1.1"
|
9
|
+
SERVER = ROOT.join "kafka_2.10-#{VERSION}"
|
10
|
+
|
11
|
+
TOPIC_NAME = "my-topic"
|
12
|
+
KAFKA_BIN = SERVER.join("bin", "kafka-server-start.sh")
|
13
|
+
KAFKA_CFG = SERVER.join("config", "server-poseidon.properties")
|
14
|
+
KAFKA_TMP = "/tmp/kafka-logs-poseidon"
|
15
|
+
ZOOKP_BIN = SERVER.join("bin", "zookeeper-server-start.sh")
|
16
|
+
ZOOKP_CFG = SERVER.join("config", "zookeeper-poseidon.properties")
|
17
|
+
ZOOKP_TMP = "/tmp/zookeeper-poseidon"
|
18
|
+
LOG4J_CFG = SERVER.join("config", "log4j.properties")
|
19
|
+
OUTPUT = Scenario::ROOT.join("output.txt")
|
20
|
+
|
21
|
+
@@pids = {}
|
22
|
+
@@total = 0
|
23
|
+
|
24
|
+
def run(&block)
|
25
|
+
setup
|
26
|
+
instance_eval(&block)
|
27
|
+
rescue => e
|
28
|
+
abort [e, *e.backtrace[0,20]].join("\n")
|
29
|
+
ensure
|
30
|
+
teardown
|
31
|
+
end
|
32
|
+
|
33
|
+
def setup
|
34
|
+
FileUtils.rm_rf OUTPUT.to_s
|
35
|
+
configure
|
36
|
+
|
37
|
+
# Ensure all required files are present
|
38
|
+
[KAFKA_BIN, ZOOKP_BIN, KAFKA_CFG, ZOOKP_CFG].each do |path|
|
39
|
+
abort "Unable to locate #{path}. File does not exist!" unless path.file?
|
40
|
+
end
|
41
|
+
|
42
|
+
Signal.trap("INT") { teardown }
|
43
|
+
|
44
|
+
spawn KAFKA_BIN, KAFKA_CFG
|
45
|
+
spawn ZOOKP_BIN, ZOOKP_CFG
|
46
|
+
sleep(2)
|
47
|
+
end
|
48
|
+
|
49
|
+
def teardown
|
50
|
+
@@pids.each do |_, pid|
|
51
|
+
Process.kill :TERM, pid
|
52
|
+
end
|
53
|
+
sleep(1)
|
54
|
+
FileUtils.rm_rf KAFKA_TMP.to_s
|
55
|
+
FileUtils.rm_rf ZOOKP_TMP.to_s
|
56
|
+
|
57
|
+
fail! unless numlines == @@total
|
58
|
+
end
|
59
|
+
|
60
|
+
def configure
|
61
|
+
download
|
62
|
+
|
63
|
+
KAFKA_CFG.open("w") do |f|
|
64
|
+
f.write SERVER.join("config", "server.properties").read.
|
65
|
+
sub("=9092", "=29092").
|
66
|
+
sub(":2181", ":22181").
|
67
|
+
sub("num.partitions=2", "num.partitions=12").
|
68
|
+
sub("log.flush.interval.ms=1000", "log.flush.interval.ms=10").
|
69
|
+
sub("/tmp/kafka-logs", KAFKA_TMP)
|
70
|
+
end
|
71
|
+
ZOOKP_CFG.open("w") do |f|
|
72
|
+
f.write SERVER.join("config", "zookeeper.properties").read.
|
73
|
+
sub("/tmp/zookeeper", ZOOKP_TMP).
|
74
|
+
sub("=2181", "=22181")
|
75
|
+
end
|
76
|
+
content = LOG4J_CFG.read
|
77
|
+
LOG4J_CFG.open("w") do |f|
|
78
|
+
f.write content.gsub("INFO", "FATAL")
|
79
|
+
end if content.include?("INFO")
|
80
|
+
end
|
81
|
+
|
82
|
+
def download
|
83
|
+
return if SERVER.directory?
|
84
|
+
sh "cd #{ROOT} && curl http://www.mirrorservice.org/sites/ftp.apache.org/kafka/#{VERSION}/kafka_2.10-#{VERSION}.tgz | tar xz"
|
85
|
+
end
|
86
|
+
|
87
|
+
def checkpoint!(timeout = 100)
|
88
|
+
puts "--> Verifying #{@@total}"
|
89
|
+
timeout.times do
|
90
|
+
if numlines > @@total
|
91
|
+
break
|
92
|
+
elsif numlines < @@total
|
93
|
+
sleep(1)
|
94
|
+
else
|
95
|
+
return
|
96
|
+
end
|
97
|
+
end
|
98
|
+
fail!
|
99
|
+
end
|
100
|
+
|
101
|
+
def consume(name)
|
102
|
+
puts "--> Launching consumer #{name}"
|
103
|
+
spawn ROOT.join("consumer.rb"), name, OUTPUT
|
104
|
+
end
|
105
|
+
|
106
|
+
def produce(count)
|
107
|
+
puts "--> Producing messages #{@@total}-#{@@total+count-1}"
|
108
|
+
sh ROOT.join("producer.rb"), count, @@total
|
109
|
+
@@total += count
|
110
|
+
end
|
111
|
+
|
112
|
+
def numlines
|
113
|
+
`wc -l #{OUTPUT} 2> /dev/null`.to_i
|
114
|
+
end
|
115
|
+
|
116
|
+
def abort(message)
|
117
|
+
Kernel.abort "ERROR: #{message}"
|
118
|
+
end
|
119
|
+
|
120
|
+
def fail!
|
121
|
+
Kernel.abort "FAILED: expected #{@@total} but was #{numlines}"
|
122
|
+
end
|
123
|
+
|
124
|
+
def sh(*bits)
|
125
|
+
cmd = bits.join(" ")
|
126
|
+
system(cmd) || abort(cmd)
|
127
|
+
end
|
128
|
+
|
129
|
+
def spawn(*args)
|
130
|
+
cmd = args.join(" ")
|
131
|
+
@@pids[cmd] = Process.spawn(cmd)
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|