worker_roulette 0.0.12 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +80 -12
- data/lib/worker_roulette/version.rb +1 -1
- data/spec/benchmark/perf_test.rb +44 -60
- data/spec/integration/evented_worker_roulette_spec.rb +4 -0
- metadata +2 -2
data/README.md
CHANGED
@@ -4,13 +4,64 @@ WorkerRoulette is designed to allow large numbers of unique devices, processes,
|
|
4
4
|
|
5
5
|
WorkerRoulette lets you have thousands of competing consumers (distrubted over as many machines as you'd like) processing ordered messages from millions of totally unknown message providers. It does all this and ensures that the messages sent from each message provider are processed in exactly the order it sent them.
|
6
6
|
|
7
|
-
##
|
7
|
+
## Asynchronous Api (Evented)
|
8
8
|
```ruby
|
9
9
|
size_of_connection_pool = 100
|
10
|
-
redis_config = {host: 'localhost', timeout: 5, db: 1}
|
11
10
|
|
12
11
|
#Start it up
|
13
|
-
|
12
|
+
#the config takes size for the connection pool size, evented to specify which api to use, then the normal redis config
|
13
|
+
WorkerRoulette.start(size: size_of_connection_pool, evented: false, host: 'localhost', timeout: 5, db: 1)
|
14
|
+
|
15
|
+
#Enqueue some work
|
16
|
+
sender_id = :shady
|
17
|
+
foreman = WorkerRoulette.a_foreman(sender_id)
|
18
|
+
|
19
|
+
foreman.enqueue_work_order(['hello', 'foreman']) do |msg|
|
20
|
+
puts "work enqueued #{msg}"
|
21
|
+
end
|
22
|
+
|
23
|
+
#Pull it off
|
24
|
+
tradesman = WorkerRoulette.a_tradesman
|
25
|
+
tradesman.work_orders! do |work_orders| #drain the queue of the next available sender
|
26
|
+
work_orders.first # => ['hello', 'foreman']
|
27
|
+
end
|
28
|
+
|
29
|
+
#Enqueue some more from someone else
|
30
|
+
other_sender_id = :the_real_slim_shady
|
31
|
+
other_foreman = WorkerRoulette.a_foreman(other_sender_id)
|
32
|
+
other_foreman.enqueue_work_order({'can you get me' => 'the number nine?'}) do |msg|
|
33
|
+
puts "work enqueued #{msg}"
|
34
|
+
end
|
35
|
+
|
36
|
+
#Have the same worker pull that off
|
37
|
+
tradesman.work_orders! do |work_orders| #drain the queue of the next available sender
|
38
|
+
work_orders.first # => {'can you get me' => 'the number nine?'}
|
39
|
+
end
|
40
|
+
|
41
|
+
#Have your workers wait for work to come in
|
42
|
+
on_subscribe_callback = -> do
|
43
|
+
puts "Huzzah! We're listening!"
|
44
|
+
foreman.enqueue_work_order('will I see you later?')
|
45
|
+
foreman.enqueue_work_order('can you give me back my dime?')
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
#And they will pull it off as it comes, as long as it comes
|
50
|
+
#NB: This is NOT a blocking operation, so no worries
|
51
|
+
tradesman.wait_for_work_orders(on_subscribe_callback) do |work_orders, message, channel| #drain the queue of the next available sender
|
52
|
+
work_orders # => ['will I see you later', 'can you give me back my dime?']
|
53
|
+
message # => 'new_job_ready'
|
54
|
+
channel # => '' #the name of the channel the message was published on, if one was used -- see below
|
55
|
+
end
|
56
|
+
```
|
57
|
+
|
58
|
+
## Synchronous Api
|
59
|
+
```ruby
|
60
|
+
size_of_connection_pool = 100
|
61
|
+
|
62
|
+
#Start it up
|
63
|
+
#the config takes size for the connection pool size, evented to specify which api to use, then the normal redis config
|
64
|
+
WorkerRoulette.start(size: size_of_connection_pool, evented: false, host: 'localhost', timeout: 5, db: 1)
|
14
65
|
|
15
66
|
#Enqueue some work
|
16
67
|
sender_id = :shady
|
@@ -19,8 +70,8 @@ foreman.enqueue_work_order(['hello', 'foreman'])
|
|
19
70
|
|
20
71
|
#Pull it off
|
21
72
|
tradesman = WorkerRoulette.tradesman
|
22
|
-
|
23
|
-
|
73
|
+
work_orders = tradesman.work_orders! #drain the queue of the next available sender
|
74
|
+
work_orders.first # => ['hello', 'foreman']
|
24
75
|
|
25
76
|
#Enqueue some more from someone else
|
26
77
|
other_sender_id = :the_real_slim_shady
|
@@ -28,8 +79,8 @@ other_foreman = WorkerRoulette.foreman(other_sender_id)
|
|
28
79
|
other_foreman.enqueue_work_order({'can you get me' => 'the number nine?'})
|
29
80
|
|
30
81
|
#Have the same worker pull that off
|
31
|
-
|
32
|
-
|
82
|
+
work_orders = tradesman.work_orders! #drain the queue of the next available sender
|
83
|
+
work_orders.first # => {'can you get me' => 'the number nine?'}
|
33
84
|
|
34
85
|
#Have your workers wait for work to come in
|
35
86
|
on_subscribe_callback = -> do
|
@@ -40,9 +91,9 @@ end
|
|
40
91
|
|
41
92
|
|
42
93
|
#And they will pull it off as it comes, as long as it comes
|
43
|
-
#
|
44
|
-
tradesman.wait_for_work_orders(on_subscribe_callback) do |
|
45
|
-
|
94
|
+
#NB: This IS a blocking operation
|
95
|
+
tradesman.wait_for_work_orders(on_subscribe_callback) do |work_orders| #drain the queue of the next available sender
|
96
|
+
work_orders # => ['will I see you later', 'can you give me back my dime?']
|
46
97
|
end
|
47
98
|
```
|
48
99
|
|
@@ -65,11 +116,28 @@ tradesman.wait_for_work_orders(publish) do |work|
|
|
65
116
|
work.to_s.should_not match("evil") #channels let us ignore the other's evil orders
|
66
117
|
tradesman.unsubscribe
|
67
118
|
end
|
68
|
-
|
69
119
|
```
|
70
120
|
|
121
|
+
## Performance
|
122
|
+
Running the performance tests on my laptop, the numbers break down like this:
|
123
|
+
### Async Api
|
124
|
+
- Manual: ~4200 read-write round-trips / second
|
125
|
+
- Pubsub: ~5200 read-write round-trips / second
|
126
|
+
|
127
|
+
### Sync Api
|
128
|
+
- Manual: ~1600 read-write round-trips / second
|
129
|
+
- Pubsub: ~2000 read-write round-trips / second
|
130
|
+
|
131
|
+
To run the perf tests yourself run `bundle exec spec:perf`
|
132
|
+
|
133
|
+
## Redis Pubsub and Polling
|
134
|
+
The `wait_for_work_orders` method works using Redis' pubsub mechanism. The advantage to this is that it is very fast and minimizes network traffic. The downside is that Redis' pubsub implementation is 'fire and forget', so any subscribers who are not listening at the moment the message is published will miss it. In order to compensate for this, WorkerRoulette's Async Api creates a backup timer (using EM.add_periodic_timer) that will poll redis every 20-25 seconds for new work. Since the timer is reset every time new work comes in, if you have an active publisher, the timer may never need to fire. It only serves a backup to make sure no work is left waiting in the queues because of network problems. Since there is no one polling mechanism that works for all situations in a synchrounous environment, this feature is only available through the Async Api.
|
135
|
+
|
136
|
+
## Redis Version
|
137
|
+
WorkerRoulette uses Redis' lua scripting feature to acheive such high throughput and therefore requires a version of Redis that supports lua scripting (>= Redis 2.6)
|
138
|
+
|
71
139
|
##Caveat Emptor
|
72
|
-
While WorkerRoulette does promise to keep the messages of each consumer processed in order by competing consumers, it does NOT guarantee the order in which the queues themselves will be processed. In general, work is processed in a FIFO order, but for performance reasons this has been left a loose FIFO. For example, if Abdul enqueues some ordered messages ('1', '2', and '3') and then so do Mark and Wanda, Mark's messages may be processed first, then it would likely be Abdul's, and then Wanda's. However, even though Mark jumped the line, Abdul's messages will still be processed the order he enqueued them ('1', '2', then '3').
|
140
|
+
While WorkerRoulette does promise to keep the messages of each consumer processed in order by competing consumers, it does NOT guarantee the order in which the queues themselves will be processed. In general, work is processed in a FIFO order, but for performance reasons this has been left a loose FIFO. For example, if Abdul enqueues some ordered messages ('1', '2', and '3') and then so do Mark and Wanda, Mark's messages may be processed first, then it would likely be Abdul's, and then Wanda's. However, even though Mark jumped the line, Abdul's messages will still be processed in the order he enqueued them ('1', '2', then '3').
|
73
141
|
|
74
142
|
## Installation
|
75
143
|
|
data/spec/benchmark/perf_test.rb
CHANGED
@@ -3,38 +3,56 @@ require 'benchmark'
|
|
3
3
|
require 'eventmachine'
|
4
4
|
|
5
5
|
REDIS_CONNECTION_POOL_SIZE = 100
|
6
|
-
ITERATIONS =
|
6
|
+
ITERATIONS = 10_000
|
7
7
|
|
8
8
|
work_order = {'ding dong' => "hello_foreman_" * 100}
|
9
|
+
WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: false)
|
10
|
+
WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
9
11
|
|
10
|
-
|
11
|
-
# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
12
|
+
puts "Redis Connection Pool Size: #{REDIS_CONNECTION_POOL_SIZE}"
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
# end
|
14
|
+
Benchmark.bmbm do |x|
|
15
|
+
x.report "Time to insert and read #{ITERATIONS} large work_orders" do # ~1600 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
|
16
|
+
WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: false)
|
17
|
+
ITERATIONS.times do |iteration|
|
18
|
+
sender = 'sender_' + iteration.to_s
|
19
|
+
foreman = WorkerRoulette.foreman(sender)
|
20
|
+
foreman.enqueue_work_order(work_order)
|
21
|
+
end
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
23
|
+
ITERATIONS.times do |iteration|
|
24
|
+
sender = 'sender_' + iteration.to_s
|
25
|
+
tradesman = WorkerRoulette.tradesman
|
26
|
+
tradesman.work_orders!
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
30
|
|
31
31
|
EM::Hiredis.reconnect_timeout = 0.01
|
32
32
|
|
33
|
+
WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
34
|
+
|
33
35
|
Benchmark.bmbm do |x|
|
34
|
-
x.report "Time to
|
35
|
-
|
36
|
+
x.report "Time for tradesmans to enqueue_work_order and read #{ITERATIONS} large work_orders via pubsub" do # ~2000 work_orders / second round trip
|
37
|
+
WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: false)
|
38
|
+
ITERATIONS.times do |iteration|
|
39
|
+
p = -> do
|
40
|
+
sender = 'sender_' + iteration.to_s
|
41
|
+
foreman = WorkerRoulette.foreman(sender)
|
42
|
+
foreman.enqueue_work_order(work_order)
|
43
|
+
end
|
44
|
+
tradesman = WorkerRoulette.tradesman
|
45
|
+
tradesman.wait_for_work_orders(p) {|m| m; tradesman.unsubscribe}
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
36
49
|
|
37
|
-
|
50
|
+
WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
51
|
+
|
52
|
+
Benchmark.bmbm do |x|
|
53
|
+
x.report "Time to evently insert and read #{ITERATIONS} large work_orders" do # ~4200 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
|
54
|
+
EM.run do
|
55
|
+
WorkerRoulette.start(evented: true)
|
38
56
|
WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
39
57
|
@total = 0
|
40
58
|
@tradesman = WorkerRoulette.a_tradesman
|
@@ -54,11 +72,12 @@ Benchmark.bmbm do |x|
|
|
54
72
|
end
|
55
73
|
|
56
74
|
Benchmark.bmbm do |x|
|
57
|
-
x.report "Time to evently pubsub insert and read #{ITERATIONS} large work_orders" do # ~
|
75
|
+
x.report "Time to evently pubsub insert and read #{ITERATIONS} large work_orders" do # ~5200 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
|
58
76
|
EM.run do
|
77
|
+
WorkerRoulette.start(evented: true)
|
59
78
|
@processed = 0
|
60
79
|
@total = 0
|
61
|
-
WorkerRoulette.start(evented: true)
|
80
|
+
WorkerRoulette.start(evented: true)
|
62
81
|
WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
63
82
|
@total = 0
|
64
83
|
@tradesman = WorkerRoulette.a_tradesman
|
@@ -74,39 +93,4 @@ Benchmark.bmbm do |x|
|
|
74
93
|
end
|
75
94
|
end
|
76
95
|
|
77
|
-
|
78
|
-
|
79
|
-
# Benchmark.bmbm do |x|
|
80
|
-
# x.report "Time for tradesmans to enqueue_work_order and read #{ITERATIONS} large work_orders via pubsub" do # ~1800 work_orders / second round trip
|
81
|
-
# ITERATIONS.times do |iteration|
|
82
|
-
# p = -> do
|
83
|
-
# sender = 'sender_' + iteration.to_s
|
84
|
-
# foreman = WorkerRoulette.foreman(sender)
|
85
|
-
# foreman.enqueue_work_order(work_order)
|
86
|
-
# end
|
87
|
-
# tradesman = WorkerRoulette.tradesman
|
88
|
-
# tradesman.wait_for_work_orders(p) {|m| m; tradesman.unsubscribe}
|
89
|
-
# end
|
90
|
-
# end
|
91
|
-
# end
|
92
|
-
|
93
|
-
# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
94
|
-
|
95
|
-
# EM.run do
|
96
|
-
# EM.add_timer(6) {puts "em off";EM.stop}
|
97
|
-
# tradesmans = []
|
98
|
-
# foremans = []
|
99
|
-
# @start = Time.now
|
100
|
-
# @end = nil
|
101
|
-
# ITERATIONS.times do |iteration|
|
102
|
-
# s = WorkerRoulette.tradesman
|
103
|
-
# tradesmans << s
|
104
|
-
# sender = 'sender_' + iteration.to_s
|
105
|
-
# foreman = WorkerRoulette.foreman(sender)
|
106
|
-
# a = -> {foreman.enqueue_work_order(work_order)}
|
107
|
-
# s.wait_for_work_orders(a) {|m| @end = Time.now if iteration == (ITERATIONS - 1) }
|
108
|
-
# end
|
109
|
-
# end
|
110
|
-
|
111
|
-
# puts @end - @start
|
112
|
-
# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
96
|
+
WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
|
@@ -266,6 +266,10 @@ describe WorkerRoulette do
|
|
266
266
|
end
|
267
267
|
end
|
268
268
|
|
269
|
+
it "should return a hash with a string in the payload if OJ cannot parse the json" do
|
270
|
+
|
271
|
+
end
|
272
|
+
|
269
273
|
context "Potential Ack Success/Failure for Processing Queues" do
|
270
274
|
xit "should not delete the messages from the queue until they have been processed succcesfully"
|
271
275
|
xit "should checkout a readlock for a queue and put it back when its done processing; lock should expire after 5 minutes?"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: worker_roulette
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02-
|
12
|
+
date: 2014-02-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: oj
|