chore-core 1.8.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/README.md +173 -150
  4. data/chore-core.gemspec +3 -3
  5. data/lib/chore.rb +31 -5
  6. data/lib/chore/cli.rb +22 -4
  7. data/lib/chore/configuration.rb +1 -1
  8. data/lib/chore/consumer.rb +54 -12
  9. data/lib/chore/fetcher.rb +12 -7
  10. data/lib/chore/hooks.rb +2 -1
  11. data/lib/chore/job.rb +19 -0
  12. data/lib/chore/manager.rb +18 -2
  13. data/lib/chore/publisher.rb +18 -2
  14. data/lib/chore/queues/filesystem/consumer.rb +126 -64
  15. data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
  16. data/lib/chore/queues/filesystem/publisher.rb +13 -19
  17. data/lib/chore/queues/sqs.rb +22 -13
  18. data/lib/chore/queues/sqs/consumer.rb +64 -51
  19. data/lib/chore/queues/sqs/publisher.rb +26 -17
  20. data/lib/chore/strategies/consumer/batcher.rb +14 -15
  21. data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
  22. data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +9 -7
  23. data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
  24. data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
  25. data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
  26. data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
  27. data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
  28. data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
  29. data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
  30. data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
  31. data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
  32. data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
  33. data/lib/chore/unit_of_work.rb +10 -1
  34. data/lib/chore/util.rb +5 -1
  35. data/lib/chore/version.rb +3 -3
  36. data/lib/chore/worker.rb +32 -3
  37. data/spec/chore/cli_spec.rb +2 -2
  38. data/spec/chore/consumer_spec.rb +1 -5
  39. data/spec/chore/duplicate_detector_spec.rb +17 -5
  40. data/spec/chore/fetcher_spec.rb +0 -11
  41. data/spec/chore/manager_spec.rb +7 -0
  42. data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
  43. data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
  44. data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
  45. data/spec/chore/queues/sqs_spec.rb +32 -41
  46. data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
  47. data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
  48. data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +7 -6
  49. data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
  50. data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +17 -2
  51. data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
  52. data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
  53. data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
  54. data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
  55. data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
  56. data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
  57. data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
  58. data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
  59. data/spec/chore/worker_spec.rb +82 -14
  60. data/spec/spec_helper.rb +1 -1
  61. data/spec/support/queues/sqs/fake_objects.rb +18 -0
  62. metadata +39 -15
@@ -0,0 +1,150 @@
1
+ require 'chore/signal'
2
+ require 'socket'
3
+ require 'chore/strategies/worker/helpers/ipc'
4
+ require 'chore/strategies/worker/helpers/preforked_worker'
5
+ require 'chore/strategies/worker/helpers/worker_manager'
6
+ require 'chore/strategies/worker/helpers/work_distributor'
7
+
8
+ module Chore
9
+ module Strategy
10
+ class PreForkedWorkerStrategy #:nodoc:
11
+ include Ipc
12
+
13
+ NUM_TO_SIGNAL = { '1' => :CHLD,
14
+ '2' => :INT,
15
+ '3' => :QUIT,
16
+ '4' => :TERM,
17
+ '5' => :USR1 }.freeze
18
+
19
+ def initialize(manager, opts = {})
20
+ @options = opts
21
+ @manager = manager
22
+ @self_read, @self_write = IO.pipe
23
+ trap_signals(NUM_TO_SIGNAL, @self_write)
24
+ @worker_manager = WorkerManager.new(create_master_socket)
25
+ at_exit { delete_socket_file }
26
+ @running = true
27
+ end
28
+
29
+ def start
30
+ Chore.logger.info "PWS: Starting up worker strategy: #{self.class.name}"
31
+ Chore.run_hooks_for(:before_first_fork)
32
+ @worker_manager.create_and_attach_workers
33
+ worker_assignment_thread
34
+ end
35
+
36
+ def stop!
37
+ Chore.logger.info "PWS: Stopping worker strategy: #{self.class.name}"
38
+ @running = false
39
+ end
40
+
41
+ private
42
+
43
+ def worker_assignment_thread
44
+ Thread.new do
45
+ begin
46
+ worker_assignment_loop
47
+ rescue Chore::TerribleMistake => e
48
+ Chore.logger.error 'PWS: Terrible mistake, shutting down Chore'
49
+ Chore.logger.error e.message
50
+ Chore.logger.error e.backtrace
51
+ @manager.shutdown!
52
+ ensure
53
+ Chore.logger.info 'PWS: worker_assignment_thread ending'
54
+ # WorkerAssignment thread is independent of the main thread.
55
+ # The main thread is waiting on the consumer threads to join,
56
+ # Due to some weird SQS behaviour, its possible that these threads
57
+ # maynot join, and the assigment thread always exits, since it's
58
+ # nonblocking. This will ensure that the master process exits.
59
+ Process.exit(true)
60
+ end
61
+ end
62
+ end
63
+
64
+ def worker_assignment_loop
65
+ while running?
66
+ w_sockets = @worker_manager.worker_sockets
67
+
68
+ # select_sockets returns a list of readable sockets
69
+ # This would include worker connections and the read end
70
+ # of the self-pipe.
71
+ #
72
+ # Note this not only returns sockets from live workers
73
+ # that are readable, but it also returns sockets from
74
+ # *dead* workers. If the worker hasn't already been reaped,
75
+ # then we might get a socket for a dead worker than will
76
+ # fail on write.
77
+ readables, = select_sockets(w_sockets, @self_read)
78
+
79
+ # If select timed out, retry
80
+ if readables.nil?
81
+ Chore.logger.debug 'PWS: All sockets busy.. retry'
82
+ next
83
+ end
84
+
85
+ # Handle the signal from the self-pipe
86
+ if readables.include?(@self_read)
87
+ handle_signal
88
+ next
89
+ end
90
+
91
+ # Confirm they're actually alive! A socket will be readable even
92
+ # if the worker has died but not yet been reaped by the master. We
93
+ # need to confirm that the "Ready" flag has actually been written by
94
+ # the worker and readable by the master.
95
+ readables.reject! {|readable| readable.eof?}
96
+
97
+ # Check again to see if there are still sockets available
98
+ if readables.empty?
99
+ Chore.logger.debug 'PWS: All sockets busy.. retry'
100
+ next
101
+ end
102
+
103
+ # Fetch and assign work for the readable worker connections
104
+ @worker_manager.ready_workers(readables) do |workers|
105
+ WorkDistributor.fetch_and_assign_jobs(workers, @manager)
106
+ end
107
+ end
108
+ Chore.logger.info 'PWS: worker_assignment_loop ending'
109
+ end
110
+
111
+ # Wrapper need around running to help writing specs for worker_assignment_loop
112
+ def running?
113
+ @running
114
+ end
115
+
116
+ def handle_signal
117
+ signal = NUM_TO_SIGNAL[@self_read.read_nonblock(1)]
118
+ Chore.logger.info "PWS: recv #{signal}"
119
+
120
+ case signal
121
+ when :CHLD
122
+ @worker_manager.respawn_terminated_workers!
123
+ when :INT, :QUIT, :TERM
124
+ Signal.reset
125
+ @worker_manager.stop_workers(signal)
126
+ @manager.shutdown!
127
+ when :USR1
128
+ Chore.reopen_logs
129
+ Chore.logger.info 'PWS: Master process reopened log'
130
+ end
131
+ end
132
+
133
+ # Wrapper around fork for specs.
134
+ def fork(&block)
135
+ Kernel.fork(&block)
136
+ end
137
+
138
+ # In the event of a trapped signal, write to the self-pipe
139
+ def trap_signals(signal_hash, write_end)
140
+ Signal.reset
141
+
142
+ signal_hash.each do |sig_num, signal|
143
+ Signal.trap(signal) do
144
+ write_end.write(sig_num)
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -10,7 +10,10 @@ module Chore
10
10
  def initialize(manager, opts={})
11
11
  @options = opts
12
12
  @manager = manager
13
+ @stopped = false
13
14
  @worker = nil
15
+ @queue = Queue.new
16
+ @queue << :worker
14
17
  end
15
18
 
16
19
  # Starts the <tt>SingleWorkerStrategy</tt>. Currently a noop
@@ -18,6 +21,11 @@ module Chore
18
21
 
19
22
  # Stops the <tt>SingleWorkerStrategy</tt> if there is a worker to stop
20
23
  def stop!
24
+ return if @stopped
25
+
26
+ @stopped = true
27
+ Chore.logger.info { "Manager #{Process.pid} stopping" }
28
+
21
29
  worker.stop! if worker
22
30
  end
23
31
 
@@ -25,16 +33,14 @@ module Chore
25
33
  # single worker strategy, this should never be called if the worker is in
26
34
  # progress.
27
35
  def assign(work)
28
- if workers_available?
29
- begin
30
- @worker = worker_klass.new(work, @options)
31
- @worker.start
32
- true
33
- ensure
34
- @worker = nil
35
- end
36
- else
37
- Chore.logger.error { "#{self.class}#assign: single worker is unavailable, but assign has been re-entered: #{caller * "\n"}" }
36
+ return unless acquire_worker
37
+
38
+ begin
39
+ @worker = worker_klass.new(work, @options)
40
+ @worker.start
41
+ true
42
+ ensure
43
+ release_worker
38
44
  end
39
45
  end
40
46
 
@@ -42,9 +48,25 @@ module Chore
42
48
  Worker
43
49
  end
44
50
 
45
- # Returns true if there is currently no worker
46
- def workers_available?
47
- @worker.nil?
51
+ private
52
+
53
+ # Attempts to essentially acquire a lock on a worker. If no workers are
54
+ # available, then this will block until one is.
55
+ def acquire_worker
56
+ result = @queue.pop
57
+
58
+ if @stopped
59
+ # Strategy has stopped since the worker was acquired
60
+ release_worker
61
+ nil
62
+ else
63
+ result
64
+ end
65
+ end
66
+
67
+ # Releases the lock on a worker so that another thread can pick it up.
68
+ def release_worker
69
+ @queue << :worker
48
70
  end
49
71
  end
50
72
  end
@@ -2,13 +2,22 @@ module Chore
2
2
  # Simple class to hold job processing information.
3
3
  # Has six attributes:
4
4
  # * +:id+ The queue implementation specific identifier for this message.
5
+ # * +:receipt_handle+ The queue implementation specific identifier for the receipt of this message.
5
6
  # * +:queue_name+ The name of the queue the job came from
6
7
  # * +:queue_timeout+ The time (in seconds) before the job will get re-enqueued if not processed
7
8
  # * +:message+ The actual data of the message.
8
9
  # * +:previous_attempts+ The number of times the work has been attempted previously.
9
10
  # * +:consumer+ The consumer instance used to fetch this message. Most queue implementations won't need access to this, but some (RabbitMQ) will. So we
10
11
  # make sure to pass it along with each message. This instance will be used by the Worker for things like <tt>complete</tt> and </tt>reject</tt>.
11
- class UnitOfWork < Struct.new(:id,:queue_name,:queue_timeout,:message,:previous_attempts,:consumer,:decoded_message, :klass)
12
+ class UnitOfWork < Struct.new(:id, :receipt_handle, :queue_name, :queue_timeout, :message, :previous_attempts, :consumer, :decoded_message, :klass)
13
+ # The time at which this unit of work was created
14
+ attr_accessor :created_at
15
+
16
+ def initialize(*) #:nodoc:
17
+ super
18
+ @created_at = Time.now
19
+ end
20
+
12
21
  # The current attempt number for the worker processing this message.
13
22
  def current_attempt
14
23
  previous_attempts + 1
@@ -2,7 +2,7 @@ module Chore
2
2
 
3
3
  # Collection of utilities and helpers used by Chore internally
4
4
  module Util
5
-
5
+
6
6
  # To avoid bringing in all of active_support, we implemented constantize here
7
7
  def constantize(camel_cased_word)
8
8
  names = camel_cased_word.split('::')
@@ -14,5 +14,9 @@ module Chore
14
14
  end
15
15
  constant
16
16
  end
17
+
18
+ def procline(str)
19
+ $0 = str
20
+ end
17
21
  end
18
22
  end
@@ -1,8 +1,8 @@
1
1
  module Chore
2
2
  module Version #:nodoc:
3
- MAJOR = 1
4
- MINOR = 8
5
- PATCH = 2
3
+ MAJOR = 4
4
+ MINOR = 0
5
+ PATCH = 0
6
6
 
7
7
  STRING = [ MAJOR, MINOR, PATCH ].join('.')
8
8
  end
@@ -42,6 +42,28 @@ module Chore
42
42
  @started_at + total_timeout
43
43
  end
44
44
 
45
+ def duplicate_work?(item)
46
+ # if we've got a duplicate, remove the message from the queue by not actually running and also not reporting any errors
47
+ payload = options[:payload_handler].payload(item.decoded_message)
48
+
49
+ # if we're hitting the custom dedupe key, we want to remove this message from the queue
50
+ if item.klass.has_dedupe_lambda?
51
+ dedupe_key = item.klass.dedupe_key(*payload)
52
+ if dedupe_key.nil? || dedupe_key.strip.empty? # if the dedupe key is nil, don't continue with the rest of the dedupe lambda logic
53
+ Chore.logger.info { "#{item.klass} dedupe key nil, skipping memcached lookup." }
54
+ return false
55
+ end
56
+
57
+ if item.consumer.duplicate_message?(dedupe_key, item.klass, item.queue_timeout)
58
+ Chore.logger.info { "Found and deleted duplicate job #{item.klass}"}
59
+ item.consumer.complete(item.id, item.receipt_handle)
60
+ return true
61
+ end
62
+ end
63
+
64
+ return false
65
+ end
66
+
45
67
  # The workhorse. Do the work, all of it. This will block for an entirely unspecified amount
46
68
  # of time based on the work to be performed. This will:
47
69
  # * Decode each message.
@@ -58,14 +80,19 @@ module Chore
58
80
  begin
59
81
  item.decoded_message = options[:payload_handler].decode(item.message)
60
82
  item.klass = options[:payload_handler].payload_class(item.decoded_message)
83
+
84
+ next if duplicate_work?(item)
85
+
86
+ Chore.run_hooks_for(:worker_to_start, item)
61
87
  start_item(item)
62
88
  rescue => e
63
89
  Chore.logger.error { "Failed to run job for #{item.message} with error: #{e.message} #{e.backtrace * "\n"}" }
64
90
  if item.current_attempt >= Chore.config.max_attempts
65
91
  Chore.run_hooks_for(:on_permanent_failure,item.queue_name,item.message,e)
66
- item.consumer.complete(item.id)
92
+ item.consumer.complete(item.id, item.receipt_handle)
67
93
  else
68
94
  Chore.run_hooks_for(:on_failure,item.message,e)
95
+ item.consumer.reject(item.id)
69
96
  end
70
97
  end
71
98
  end
@@ -85,9 +112,10 @@ module Chore
85
112
  begin
86
113
  Chore.logger.info { "Running job #{klass} with params #{message}"}
87
114
  perform_job(klass,message)
88
- item.consumer.complete(item.id)
115
+ item.consumer.complete(item.id, item.receipt_handle)
89
116
  Chore.logger.info { "Finished job #{klass} with params #{message}"}
90
117
  klass.run_hooks_for(:after_perform, message)
118
+ Chore.run_hooks_for(:worker_ended, item)
91
119
  rescue Job::RejectMessageException
92
120
  item.consumer.reject(item.id)
93
121
  Chore.logger.error { "Failed to run job for #{item.message} with error: Job raised a RejectMessageException" }
@@ -113,9 +141,10 @@ module Chore
113
141
  Chore.logger.error { "Failed to run job #{item.message} with error: #{e.message} at #{e.backtrace * "\n"}" }
114
142
  if item.current_attempt >= klass.options[:max_attempts]
115
143
  klass.run_hooks_for(:on_permanent_failure,item.queue_name,message,e)
116
- item.consumer.complete(item.id)
144
+ item.consumer.complete(item.id, item.receipt_handle)
117
145
  else
118
146
  klass.run_hooks_for(:on_failure, message, e)
147
+ item.consumer.reject(item.id)
119
148
  end
120
149
  end
121
150
 
@@ -201,8 +201,8 @@ describe Chore::CLI do
201
201
 
202
202
  context 'given no value' do
203
203
  let(:command) { [] }
204
- it 'is the default value, nil' do
205
- subject.should == nil
204
+ it 'is the default value, 1' do
205
+ subject.should == 1
206
206
  end
207
207
  end
208
208
  end
@@ -22,10 +22,6 @@ describe Chore::Consumer do
22
22
  Chore::Consumer.should respond_to :reset_connection!
23
23
  end
24
24
 
25
- it 'should have a class level cleanup method' do
26
- Chore::Consumer.should respond_to :cleanup
27
- end
28
-
29
25
  it 'should not have an implemented consume method' do
30
26
  expect { consumer.consume }.to raise_error(NotImplementedError)
31
27
  end
@@ -35,6 +31,6 @@ describe Chore::Consumer do
35
31
  end
36
32
 
37
33
  it 'should not have an implemented complete method' do
38
- expect { consumer.complete(message) }.to raise_error(NotImplementedError)
34
+ expect { consumer.complete(message, nil) }.to raise_error(NotImplementedError)
39
35
  end
40
36
  end
@@ -2,7 +2,21 @@ require 'spec_helper'
2
2
  require 'securerandom'
3
3
 
4
4
  describe Chore::DuplicateDetector do
5
- let(:memcache) { double("memcache") }
5
+ class FakeDalli
6
+ def initialize
7
+ @store = {}
8
+ end
9
+ def add(id, val, ttl=0)
10
+ if @store[id] && @store[id][:inserted] + @store[id][:ttl] > Time.now.to_i
11
+ return false
12
+ else
13
+ @store[id] = {:val => val, :ttl => ttl, :inserted => Time.now.to_i}
14
+ return true
15
+ end
16
+ end
17
+ end
18
+
19
+ let(:memcache) { FakeDalli.new }
6
20
  let(:dupe_on_cache_failure) { false }
7
21
  let(:dedupe_params) { { :memcached_client => memcache, :dupe_on_cache_failure => dupe_on_cache_failure } }
8
22
  let(:dedupe) { Chore::DuplicateDetector.new(dedupe_params)}
@@ -15,12 +29,11 @@ describe Chore::DuplicateDetector do
15
29
 
16
30
  describe "#found_duplicate" do
17
31
  it 'should not return true if the message has not already been seen' do
18
- expect(memcache).to receive(:add).and_return(true)
19
32
  expect(dedupe.found_duplicate?(message_data)).to_not be true
20
33
  end
21
34
 
22
35
  it 'should return true if the message has already been seen' do
23
- expect(memcache).to receive(:add).and_return(false)
36
+ memcache.add(message_data[:id], 1, message_data[:visibility_timeout])
24
37
  expect(dedupe.found_duplicate?(message_data)).to be true
25
38
  end
26
39
 
@@ -34,13 +47,12 @@ describe Chore::DuplicateDetector do
34
47
  end
35
48
 
36
49
  it "should set the timeout to be the queue's " do
37
- expect(memcache).to receive(:add).with(id,"1",timeout).and_return(true)
50
+ expect(memcache).to receive(:add).with(id,"1",timeout).and_call_original
38
51
  expect(dedupe.found_duplicate?(message_data)).to be false
39
52
  end
40
53
 
41
54
  it "should call #visibility_timeout once and only once" do
42
55
  expect(queue).to receive(:visibility_timeout).once
43
- expect(memcache).to receive(:add).at_least(3).times.and_return(true)
44
56
  3.times { dedupe.found_duplicate?(message_data) }
45
57
  end
46
58
 
@@ -35,15 +35,4 @@ describe Chore::Fetcher do
35
35
  fetcher.start
36
36
  end
37
37
  end
38
-
39
- describe "cleaning up" do
40
- before(:each) do
41
- manager.stub(:assign)
42
- end
43
-
44
- it "should run cleanup on each queue" do
45
- consumer.should_receive(:cleanup).with('test')
46
- fetcher.start
47
- end
48
- end
49
38
  end