RubyGems - worker_roulette - Versions diffs - 0.0.12 → 0.1.0 - Mend

worker_roulette 0.0.12 → 0.1.0

Files changed (5) hide show

data/README.md +80 -12
data/lib/worker_roulette/version.rb +1 -1
data/spec/benchmark/perf_test.rb +44 -60
data/spec/integration/evented_worker_roulette_spec.rb +4 -0
metadata +2 -2

data/README.md CHANGED

@@ -4,13 +4,64 @@ WorkerRoulette is designed to allow large numbers of unique devices, processes,
 WorkerRoulette lets you have thousands of competing consumers (distrubted over as many machines as you'd like) processing ordered messages from millions of totally unknown message providers. It does all this and ensures that the messages sent from each message provider are processed in exactly the order it sent them.
-## General Usage
+## Asynchronous Api (Evented)
 ```ruby
 size_of_connection_pool = 100
-redis_config = {host: 'localhost', timeout: 5, db: 1}
 #Start it up
-WorkerRoulette.start(size_of_connection_pool, redis_config)
+#the config takes size for the connection pool size, evented to specify which api to use, then the normal redis config
+WorkerRoulette.start(size: size_of_connection_pool, evented: false, host: 'localhost', timeout: 5, db: 1)
+#Enqueue some work
+sender_id = :shady
+foreman = WorkerRoulette.a_foreman(sender_id)
+foreman.enqueue_work_order(['hello', 'foreman']) do |msg|
+  puts "work enqueued #{msg}"
+end
+#Pull it off
+tradesman = WorkerRoulette.a_tradesman
+tradesman.work_orders! do |work_orders| #drain the queue of the next available sender
+  work_orders.first # => ['hello', 'foreman']
+end
+#Enqueue some more from someone else
+other_sender_id = :the_real_slim_shady
+other_foreman = WorkerRoulette.a_foreman(other_sender_id)
+other_foreman.enqueue_work_order({'can you get me' => 'the number nine?'}) do |msg|
+  puts "work enqueued #{msg}"
+end
+#Have the same worker pull that off
+tradesman.work_orders! do |work_orders| #drain the queue of the next available sender
+  work_orders.first # => {'can you get me' => 'the number nine?'}
+end
+#Have your workers wait for work to come in
+on_subscribe_callback = -> do
+  puts "Huzzah! We're listening!"
+  foreman.enqueue_work_order('will I see you later?')
+  foreman.enqueue_work_order('can you give me back my dime?')
+end
+#And they will pull it off as it comes, as long as it comes
+#NB: This is NOT a blocking operation, so no worries
+tradesman.wait_for_work_orders(on_subscribe_callback) do |work_orders, message, channel| #drain the queue of the next available sender
+  work_orders # => ['will I see you later', 'can you give me back my dime?']
+  message # => 'new_job_ready'
+  channel # => '' #the name of the channel the message was published on, if one was used -- see below
+end
+```
+## Synchronous Api
+```ruby
+size_of_connection_pool = 100
+#Start it up
+#the config takes size for the connection pool size, evented to specify which api to use, then the normal redis config
+WorkerRoulette.start(size: size_of_connection_pool, evented: false, host: 'localhost', timeout: 5, db: 1)
 #Enqueue some work
 sender_id = :shady
@@ -19,8 +70,8 @@ foreman.enqueue_work_order(['hello', 'foreman'])
 #Pull it off
 tradesman = WorkerRoulette.tradesman
-messages = tradesman.work_orders! #drain the queue of the next available sender
-messages.first # => ['hello', 'foreman']
+work_orders = tradesman.work_orders! #drain the queue of the next available sender
+work_orders.first # => ['hello', 'foreman']
 #Enqueue some more from someone else
 other_sender_id = :the_real_slim_shady
@@ -28,8 +79,8 @@ other_foreman = WorkerRoulette.foreman(other_sender_id)
 other_foreman.enqueue_work_order({'can you get me' => 'the number nine?'})
 #Have the same worker pull that off
-messages = tradesman.work_orders! #drain the queue of the next available sender
-messages.first # => {'can you get me' => 'the number nine?'}
+work_orders = tradesman.work_orders! #drain the queue of the next available sender
+work_orders.first # => {'can you get me' => 'the number nine?'}
 #Have your workers wait for work to come in
 on_subscribe_callback = -> do
@@ -40,9 +91,9 @@ end
 #And they will pull it off as it comes, as long as it comes
-#(This is a blocking operation, so it is best in Threads or EventMachine.next_tick)
-tradesman.wait_for_work_orders(on_subscribe_callback) do |messages| #drain the queue of the next available sender
-  messages # => ['will I see you later', 'can you give me back my dime?']
+#NB: This IS a blocking operation
+tradesman.wait_for_work_orders(on_subscribe_callback) do |work_orders| #drain the queue of the next available sender
+  work_orders # => ['will I see you later', 'can you give me back my dime?']
 end
 ```
@@ -65,11 +116,28 @@ tradesman.wait_for_work_orders(publish) do |work|
   work.to_s.should_not match("evil")              #channels let us ignore the other's evil orders
   tradesman.unsubscribe
 end
 ```
+## Performance
+Running the performance tests on my laptop, the numbers break down like this:
+### Async Api
+  - Manual: ~4200 read-write round-trips / second
+  - Pubsub: ~5200 read-write round-trips / second
+### Sync Api
+  - Manual: ~1600 read-write round-trips / second
+  - Pubsub: ~2000 read-write round-trips / second
+To run the perf tests yourself run `bundle exec spec:perf`
+## Redis Pubsub and Polling
+The `wait_for_work_orders` method works using Redis' pubsub mechanism. The advantage to this is that it is very fast and minimizes network traffic. The downside is that Redis' pubsub implementation is 'fire and forget', so any subscribers who are not listening at the moment the message is published will miss it. In order to compensate for this, WorkerRoulette's Async Api creates a backup timer (using EM.add_periodic_timer) that will poll redis every 20-25 seconds for new work. Since the timer is reset every time new work comes in, if you have an active publisher, the timer may never need to fire. It only serves a backup to make sure no work is left waiting in the queues because of network problems. Since there is no one polling mechanism that works for all situations in a synchrounous environment, this feature is only available through the Async Api.
+## Redis Version
+WorkerRoulette uses Redis' lua scripting feature to acheive such high throughput and therefore requires a version of Redis that supports lua scripting (>= Redis 2.6)
 ##Caveat Emptor
-While WorkerRoulette does promise to keep the messages of each consumer processed in order by competing consumers, it does NOT guarantee the order in which the queues themselves will be processed. In general, work is processed in a FIFO order, but for performance reasons this has been left a loose FIFO. For example, if Abdul enqueues some ordered messages ('1', '2', and '3') and then so do Mark and Wanda, Mark's messages may be processed first, then it would likely be Abdul's, and then Wanda's. However, even though Mark jumped the line, Abdul's messages will still be processed the order he enqueued them ('1', '2', then '3').
+While WorkerRoulette does promise to keep the messages of each consumer processed in order by competing consumers, it does NOT guarantee the order in which the queues themselves will be processed. In general, work is processed in a FIFO order, but for performance reasons this has been left a loose FIFO. For example, if Abdul enqueues some ordered messages ('1', '2', and '3') and then so do Mark and Wanda, Mark's messages may be processed first, then it would likely be Abdul's, and then Wanda's. However, even though Mark jumped the line, Abdul's messages will still be processed in the order he enqueued them ('1', '2', then '3').
 ## Installation

data/lib/worker_roulette/version.rb CHANGED

@@ -1,3 +1,3 @@
 module WorkerRoulette
-  VERSION = "0.0.12"
+  VERSION = "0.1.0"
 end

data/spec/benchmark/perf_test.rb CHANGED

@@ -3,38 +3,56 @@ require 'benchmark'
 require 'eventmachine'
 REDIS_CONNECTION_POOL_SIZE = 100
-ITERATIONS = 100_000
+ITERATIONS = 10_000
 work_order = {'ding dong' => "hello_foreman_" * 100}
+WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: false)
+WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
-# WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: true)#{driver: :synchrony}
-# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
+puts "Redis Connection Pool Size: #{REDIS_CONNECTION_POOL_SIZE}"
-# puts "Redis Connection Pool Size: #{REDIS_CONNECTION_POOL_SIZE}"
-# Benchmark.bmbm do |x|
-#   x.report "Time to insert and read #{ITERATIONS} large work_orders" do # ~2500 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
-#     ITERATIONS.times do |iteration|
-#       sender = 'sender_' + iteration.to_s
-#       foreman = WorkerRoulette.foreman(sender)
-#       foreman.enqueue_work_order(work_order)
-#     end
+Benchmark.bmbm do |x|
+  x.report "Time to insert and read #{ITERATIONS} large work_orders" do # ~1600 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
+    WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: false)
+    ITERATIONS.times do |iteration|
+      sender = 'sender_' + iteration.to_s
+      foreman = WorkerRoulette.foreman(sender)
+      foreman.enqueue_work_order(work_order)
+    end
-#     ITERATIONS.times do |iteration|
-#       sender = 'sender_' + iteration.to_s
-#       tradesman = WorkerRoulette.tradesman
-#       tradesman.work_orders!
-#     end
-#   end
-# end
+    ITERATIONS.times do |iteration|
+      sender = 'sender_' + iteration.to_s
+      tradesman = WorkerRoulette.tradesman
+      tradesman.work_orders!
+    end
+  end
+end
 EM::Hiredis.reconnect_timeout = 0.01
+WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
 Benchmark.bmbm do |x|
-  x.report "Time to evently insert and read #{ITERATIONS} large work_orders" do # ~2500 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
-    EM.run do
+  x.report "Time for tradesmans to enqueue_work_order and read #{ITERATIONS} large work_orders via pubsub" do # ~2000 work_orders / second round trip
+    WorkerRoulette.start(size: REDIS_CONNECTION_POOL_SIZE, evented: false)
+    ITERATIONS.times do |iteration|
+      p = -> do
+        sender = 'sender_' + iteration.to_s
+        foreman = WorkerRoulette.foreman(sender)
+        foreman.enqueue_work_order(work_order)
+      end
+      tradesman = WorkerRoulette.tradesman
+      tradesman.wait_for_work_orders(p) {|m| m; tradesman.unsubscribe}
+    end
+  end
+end
-      WorkerRoulette.start(evented: true)#{driver: :synchrony}
+WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
+Benchmark.bmbm do |x|
+  x.report "Time to evently insert and read #{ITERATIONS} large work_orders" do # ~4200 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
+    EM.run do
+      WorkerRoulette.start(evented: true)
       WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
       @total = 0
       @tradesman = WorkerRoulette.a_tradesman
@@ -54,11 +72,12 @@ Benchmark.bmbm do |x|
 end
 Benchmark.bmbm do |x|
-  x.report "Time to evently pubsub insert and read #{ITERATIONS} large work_orders" do # ~2500 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
+  x.report "Time to evently pubsub insert and read #{ITERATIONS} large work_orders" do # ~5200 work_orders / second round trip; 50-50 read-write time; CPU and IO bound
     EM.run do
+      WorkerRoulette.start(evented: true)
       @processed = 0
       @total     = 0
-      WorkerRoulette.start(evented: true)#{driver: :synchrony}
+      WorkerRoulette.start(evented: true)
       WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
       @total = 0
       @tradesman = WorkerRoulette.a_tradesman
@@ -74,39 +93,4 @@ Benchmark.bmbm do |x|
   end
 end
-# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
-# Benchmark.bmbm do |x|
-#   x.report "Time for tradesmans to enqueue_work_order and read #{ITERATIONS} large work_orders via pubsub" do # ~1800 work_orders / second round trip
-#     ITERATIONS.times do |iteration|
-#       p = -> do
-#         sender = 'sender_' + iteration.to_s
-#         foreman = WorkerRoulette.foreman(sender)
-#         foreman.enqueue_work_order(work_order)
-#       end
-#       tradesman = WorkerRoulette.tradesman
-#       tradesman.wait_for_work_orders(p) {|m| m; tradesman.unsubscribe}
-#     end
-#   end
-# end
-# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
-# EM.run do
-#   EM.add_timer(6) {puts "em off";EM.stop}
-#   tradesmans = []
-#   foremans = []
-#   @start = Time.now
-#   @end = nil
-#   ITERATIONS.times do |iteration|
-#     s = WorkerRoulette.tradesman
-#     tradesmans << s
-#     sender = 'sender_' + iteration.to_s
-#     foreman = WorkerRoulette.foreman(sender)
-#     a = -> {foreman.enqueue_work_order(work_order)}
-#     s.wait_for_work_orders(a) {|m| @end = Time.now if iteration == (ITERATIONS - 1) }
-#   end
-# end
-# puts  @end - @start
-# WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}
+WorkerRoulette.tradesman_connection_pool.with {|r| r.flushdb}

data/spec/integration/evented_worker_roulette_spec.rb CHANGED

@@ -266,6 +266,10 @@ describe WorkerRoulette do
     end
   end
+  it "should return a hash with a string in the payload if OJ cannot parse the json" do
+  end
   context "Potential Ack Success/Failure for Processing Queues" do
     xit "should not delete the messages from the queue until they have been processed succcesfully"
     xit "should checkout a readlock for a queue and put it back when its done processing; lock should expire after 5 minutes?"

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: worker_roulette
 version: !ruby/object:Gem::Version
-  version: 0.0.12
+  version: 0.1.0
   prerelease:
 platform: ruby
 authors:
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-02-18 00:00:00.000000000 Z
+date: 2014-02-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: oj