chore-core 1.8.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/README.md +173 -150
  4. data/chore-core.gemspec +3 -3
  5. data/lib/chore.rb +31 -5
  6. data/lib/chore/cli.rb +22 -4
  7. data/lib/chore/configuration.rb +1 -1
  8. data/lib/chore/consumer.rb +54 -12
  9. data/lib/chore/fetcher.rb +12 -7
  10. data/lib/chore/hooks.rb +2 -1
  11. data/lib/chore/job.rb +19 -0
  12. data/lib/chore/manager.rb +18 -2
  13. data/lib/chore/publisher.rb +18 -2
  14. data/lib/chore/queues/filesystem/consumer.rb +126 -64
  15. data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
  16. data/lib/chore/queues/filesystem/publisher.rb +13 -19
  17. data/lib/chore/queues/sqs.rb +22 -13
  18. data/lib/chore/queues/sqs/consumer.rb +64 -51
  19. data/lib/chore/queues/sqs/publisher.rb +26 -17
  20. data/lib/chore/strategies/consumer/batcher.rb +14 -15
  21. data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
  22. data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +9 -7
  23. data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
  24. data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
  25. data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
  26. data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
  27. data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
  28. data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
  29. data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
  30. data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
  31. data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
  32. data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
  33. data/lib/chore/unit_of_work.rb +10 -1
  34. data/lib/chore/util.rb +5 -1
  35. data/lib/chore/version.rb +3 -3
  36. data/lib/chore/worker.rb +32 -3
  37. data/spec/chore/cli_spec.rb +2 -2
  38. data/spec/chore/consumer_spec.rb +1 -5
  39. data/spec/chore/duplicate_detector_spec.rb +17 -5
  40. data/spec/chore/fetcher_spec.rb +0 -11
  41. data/spec/chore/manager_spec.rb +7 -0
  42. data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
  43. data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
  44. data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
  45. data/spec/chore/queues/sqs_spec.rb +32 -41
  46. data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
  47. data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
  48. data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +7 -6
  49. data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
  50. data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +17 -2
  51. data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
  52. data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
  53. data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
  54. data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
  55. data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
  56. data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
  57. data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
  58. data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
  59. data/spec/chore/worker_spec.rb +82 -14
  60. data/spec/spec_helper.rb +1 -1
  61. data/spec/support/queues/sqs/fake_objects.rb +18 -0
  62. metadata +39 -15
@@ -3,21 +3,26 @@ require 'chore/publisher'
3
3
  module Chore
4
4
  module Queues
5
5
  module SQS
6
-
7
6
  # SQS Publisher, for writing messages to SQS from Chore
8
7
  class Publisher < Chore::Publisher
9
8
  @@reset_next = true
10
9
 
10
+ # @param [Hash] opts Publisher options
11
11
  def initialize(opts={})
12
12
  super
13
13
  @sqs_queues = {}
14
14
  @sqs_queue_urls = {}
15
15
  end
16
16
 
17
- # Takes a given Chore::Job instance +job+, and publishes it by looking up the +queue_name+.
17
+ # Publishes a message to an SQS queue
18
+ #
19
+ # @param [String] queue_name Name of the SQS queue
20
+ # @param [Hash] job Job instance definition, will be encoded to JSON
21
+ #
22
+ # @return [struct Aws::SQS::Types::SendMessageResult]
18
23
  def publish(queue_name,job)
19
- queue = self.queue(queue_name)
20
- queue.send_message(encode_job(job))
24
+ queue = queue(queue_name)
25
+ queue.send_message(message_body: encode_job(job))
21
26
  end
22
27
 
23
28
  # Sets a flag that instructs the publisher to reset the connection the next time it's used
@@ -25,29 +30,33 @@ module Chore
25
30
  @@reset_next = true
26
31
  end
27
32
 
28
- # Access to the configured SQS connection object
33
+ private
34
+
35
+ # SQS API client object
36
+ #
37
+ # @return [Aws::SQS::Client]
29
38
  def sqs
30
- @sqs ||= AWS::SQS.new(
31
- :access_key_id => Chore.config.aws_access_key,
32
- :secret_access_key => Chore.config.aws_secret_key,
33
- :logger => Chore.logger,
34
- :log_level => :debug)
39
+ @sqs ||= Chore::Queues::SQS.sqs_client
35
40
  end
36
41
 
37
- # Retrieves the SQS queue with the given +name+. The method will cache the results to prevent round trips on subsequent calls
42
+ # Retrieves the SQS queue object. The method will cache the results to prevent round trips on subsequent calls
43
+ #
38
44
  # If <tt>reset_connection!</tt> has been called, this will result in the connection being re-initialized,
39
45
  # as well as clear any cached results from prior calls
46
+ #
47
+ # @param [String] name Name of SQS queue
48
+ #
49
+ # @return [Aws::SQS::Queue]
40
50
  def queue(name)
41
- if @@reset_next
42
- AWS::Core::Http::ConnectionPool.pools.each do |p|
43
- p.empty!
44
- end
51
+ if @@reset_next
52
+ Aws.empty_connection_pools!
45
53
  @sqs = nil
46
54
  @@reset_next = false
47
55
  @sqs_queues = {}
48
56
  end
49
- @sqs_queue_urls[name] ||= self.sqs.queues.url_for(name)
50
- @sqs_queues[name] ||= self.sqs.queues[@sqs_queue_urls[name]]
57
+
58
+ @sqs_queue_urls[name] ||= sqs.get_queue_url(queue_name: name).queue_url
59
+ @sqs_queues[name] ||= Aws::SQS::Queue.new(url: @sqs_queue_urls[name], client: sqs)
51
60
  end
52
61
  end
53
62
  end
@@ -11,29 +11,29 @@ module Chore
11
11
  @size = size
12
12
  @batch = []
13
13
  @mutex = Mutex.new
14
- @last_message = nil
15
14
  @callback = nil
16
15
  @running = true
17
16
  end
18
17
 
19
- # The main entry point of the Batcher, <tt>schedule</tt> begins a thread with the provided +batch_timeout+
20
- # as the only argument. While the Batcher is running, it will attempt to check if either the batch is full,
21
- # or if the +batch_timeout+ has elapsed since the last batch was executed. If the batch is full, it will be executed.
22
- # If the +batch_timeout+ has elapsed, as soon as the next message enters the batch, it will be executed.
23
- #
18
+ # The main entry point of the Batcher, <tt>schedule</tt> begins a thread with the provided +batch_timeout+
19
+ # as the only argument. While the Batcher is running, it will attempt to check if either the batch is full,
20
+ # or if the +batch_timeout+ has elapsed since the oldest message was added. If either case is true, the
21
+ # items in the batch will be executed.
22
+ #
24
23
  # Calling <tt>stop</tt> will cause the thread to finish it's current check, and exit
25
- def schedule(batch_timeout=20)
24
+ def schedule(batch_timeout)
26
25
  @thread = Thread.new(batch_timeout) do |timeout|
27
- Chore.logger.info "Batching timeout thread starting"
26
+ Chore.logger.info "Batching thread starting with #{batch_timeout} second timeout"
28
27
  while @running do
29
- begin
30
- Chore.logger.debug "Last message added to batch: #{@last_message}: #{@batch.size}"
31
- if @last_message && Time.now > (@last_message + timeout)
32
- Chore.logger.debug "Batching timeout reached (#{@last_message + timeout}), current size: #{@batch.size}"
28
+ begin
29
+ oldest_item = @batch.first
30
+ timestamp = oldest_item && oldest_item.created_at
31
+ Chore.logger.debug "Oldest message in batch: #{timestamp}, size: #{@batch.size}"
32
+ if timestamp && Time.now > (timestamp + timeout)
33
+ Chore.logger.debug "Batching timeout reached (#{timestamp + timeout}), current size: #{@batch.size}"
33
34
  self.execute(true)
34
- @last_message = nil
35
35
  end
36
- sleep(1)
36
+ sleep(1)
37
37
  rescue => e
38
38
  Chore.logger.error "Batcher#schedule raised an exception: #{e.inspect}"
39
39
  end
@@ -44,7 +44,6 @@ module Chore
44
44
  # Adds the +item+ to the current batch
45
45
  def add(item)
46
46
  @batch << item
47
- @last_message = Time.now
48
47
  execute if ready?
49
48
  end
50
49
 
@@ -10,16 +10,16 @@ module Chore
10
10
  end
11
11
 
12
12
  # Begins fetching from the configured queue by way of the configured Consumer. This can only be used if you have a
13
- # single queue which can be kept up with at a relatively low volume. If you have more than a single queue configured,
14
- # it will raise an exception.
13
+ # single queue which can be kept up with at a relatively low volume. If you have more than a single queue
14
+ # configured, it will raise an exception.
15
15
  def fetch
16
16
  Chore.logger.debug "Starting up consumer strategy: #{self.class.name}"
17
17
  queues = Chore.config.queues
18
18
  raise "When using SingleConsumerStrategy only one queue can be defined. Queues: #{queues}" unless queues.size == 1
19
-
19
+
20
20
  @consumer = Chore.config.consumer.new(queues.first)
21
- @consumer.consume do |id,queue_name,queue_timeout,body,previous_attempts|
22
- work = UnitOfWork.new(id, queue_name, queue_timeout, body, previous_attempts, @consumer)
21
+ @consumer.consume do |message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts|
22
+ work = UnitOfWork.new(message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts, @consumer)
23
23
  @fetcher.manager.assign(work)
24
24
  end
25
25
  end
@@ -5,13 +5,14 @@ module Chore
5
5
  attr_accessor :batcher
6
6
 
7
7
  Chore::CLI.register_option 'batch_size', '--batch-size SIZE', Integer, 'Number of items to collect for a single worker to process'
8
+ Chore::CLI.register_option 'batch_timeout', '--batch-timeout SIZE', Integer, 'Maximum number of seconds to wait until processing a message'
8
9
  Chore::CLI.register_option 'threads_per_queue', '--threads-per-queue NUM_THREADS', Integer, 'Number of threads to create for each named queue'
9
10
 
10
11
  def initialize(fetcher)
11
12
  @fetcher = fetcher
12
13
  @batcher = Batcher.new(Chore.config.batch_size)
13
14
  @batcher.callback = lambda { |batch| @fetcher.manager.assign(batch) }
14
- @batcher.schedule
15
+ @batcher.schedule(Chore.config.batch_timeout)
15
16
  @running = true
16
17
  end
17
18
 
@@ -22,7 +23,7 @@ module Chore
22
23
  Chore.logger.debug "Starting up consumer strategy: #{self.class.name}"
23
24
  threads = []
24
25
  Chore.config.queues.each do |queue|
25
- Chore.config.threads_per_queue.times do
26
+ Chore.config.threads_per_queue.times do
26
27
  if running?
27
28
  threads << start_consumer_thread(queue)
28
29
  end
@@ -31,7 +32,7 @@ module Chore
31
32
 
32
33
  threads.each(&:join)
33
34
  end
34
-
35
+
35
36
  # If the ThreadedConsumerStrategy is currently running <tt>stop!</tt> will begin signalling it to stop
36
37
  # It will stop the batcher from forking more work, as well as set a flag which will disable it's own consuming
37
38
  # threads once they finish with their current work.
@@ -48,21 +49,22 @@ module Chore
48
49
  @running
49
50
  end
50
51
 
51
- private
52
+ private
52
53
  # Starts a consumer thread for polling the given +queue+.
53
54
  # If <tt>stop!<tt> is called, the threads will shut themsevles down.
54
55
  def start_consumer_thread(queue)
55
56
  t = Thread.new(queue) do |tQueue|
56
57
  begin
57
58
  consumer = Chore.config.consumer.new(tQueue)
58
- consumer.consume do |id, queue_name, queue_timeout, body, previous_attempts|
59
+ consumer.consume do |message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts|
59
60
  # Quick hack to force this thread to end it's work
60
61
  # if we're shutting down. Could be delayed due to the
61
62
  # weird sometimes-blocking nature of SQS.
62
63
  consumer.stop if !running?
63
- Chore.logger.debug { "Got message: #{id}"}
64
+ Chore.logger.debug { "Got message: #{message_id}"}
64
65
 
65
- work = UnitOfWork.new(id, queue_name, queue_timeout, body, previous_attempts, consumer)
66
+ work = UnitOfWork.new(message_id, message_receipt_handle, queue_name, queue_timeout, body, previous_attempts, consumer)
67
+ Chore.run_hooks_for(:consumed_from_source, work)
66
68
  @batcher.add(work)
67
69
  end
68
70
  rescue Chore::TerribleMistake
@@ -0,0 +1,120 @@
1
+ module Chore
2
+ module Strategy
3
+ class ThrottledConsumerStrategy #:nodoc:
4
+ def initialize(fetcher)
5
+ @fetcher = fetcher
6
+ @queue = SizedQueue.new(Chore.config.num_workers)
7
+ @return_queue = Queue.new
8
+ @max_queue_size = Chore.config.num_workers
9
+ @consumers_per_queue = Chore.config.threads_per_queue
10
+ @running = true
11
+ @consumers = []
12
+ end
13
+
14
+ # Begins fetching from queues by spinning up the configured
15
+ # +:threads_per_queue:+ count of threads for each
16
+ # queue you're consuming from.
17
+ # Once all threads are spun up and running, the threads are then joined.
18
+
19
+ def fetch
20
+ Chore.logger.info "TCS: Starting up: #{self.class.name}"
21
+ threads = []
22
+ Chore.config.queues.each do |consume_queue|
23
+ Chore.logger.info "TCS: Starting #{@consumers_per_queue} threads for Queue #{consume_queue}"
24
+ @consumers_per_queue.times do
25
+ next unless running?
26
+ threads << consume(consume_queue)
27
+ end
28
+ end
29
+ threads.each(&:join)
30
+ end
31
+
32
+ # If the ThreadedConsumerStrategy is currently running <tt>stop!</tt>
33
+ # will begin signalling it to stop. It will stop the batcher
34
+ # from forking more work,as well as set a flag which will disable
35
+ # it's own consuming threads once they finish with their current work.
36
+ def stop!
37
+ if running?
38
+ Chore.logger.info "TCS: Shutting down fetcher: #{self.class.name}"
39
+ @running = false
40
+ @consumers.each do |consumer|
41
+ Chore.logger.info "TCS: Stopping consumer: #{consumer.object_id}"
42
+ @queue.clear
43
+ @return_queue.clear
44
+ consumer.stop
45
+ end
46
+ end
47
+ end
48
+
49
+ # Returns whether or not the ThreadedConsumerStrategy is running or not
50
+ def running?
51
+ @running
52
+ end
53
+
54
+ # return upto number_of_free_workers work objects
55
+ def provide_work(no_free_workers)
56
+ work_units = []
57
+ free_workers = [no_free_workers, @queue.size + @return_queue.size].min
58
+ while free_workers > 0
59
+ # Drain from the return queue first, then the consumer thread queue
60
+ queue = @return_queue.empty? ? @queue : @return_queue
61
+ work_units << queue.pop
62
+ free_workers -= 1
63
+ end
64
+ work_units
65
+ end
66
+
67
+ # Gives work back to the queue in case it couldn't be assigned
68
+ #
69
+ # This will go into a separate queue so that it will be prioritized
70
+ # over other work that hasn't been attempted yet. It also avoids
71
+ # a deadlock where @queue is full and the master is waiting to return
72
+ # work that it couldn't assign.
73
+ def return_work(work_units)
74
+ work_units.each do |work|
75
+ @return_queue.push(work)
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def consume(consume_queue)
82
+ consumer = Chore.config.consumer.new(consume_queue)
83
+ @consumers << consumer
84
+ start_consumer_thread(consumer)
85
+ end
86
+
87
+ # Starts a consumer thread for polling the given +consume_queue+.
88
+ # If <tt>stop!<tt> is called, the threads will shut themsevles down.
89
+ def start_consumer_thread(consumer)
90
+ t = Thread.new(consumer) do |th|
91
+ begin
92
+ create_work_units(th)
93
+ rescue Chore::TerribleMistake => e
94
+ Chore.logger.error 'Terrible mistake, shutting down Chore'
95
+ Chore.logger.error "#{e.inspect} at #{e.backtrace}"
96
+ @fetcher.manager.shutdown!
97
+ end
98
+ end
99
+ t
100
+ end
101
+
102
+ def create_work_units(consumer)
103
+ consumer.consume do |message_id, message_receipt_handle, queue, timeout, body, previous_attempts|
104
+ # Note: The unit of work object contains a consumer object that when
105
+ # used to consume from SQS, would have a mutex (that comes as a part
106
+ # of the AWS sdk); When sending these objects across from one process
107
+ # to another, we cannot send this across (becasue of the mutex). To
108
+ # work around this, we simply ignore the consumer object when creating
109
+ # the unit of work object, and when the worker recieves the work
110
+ # object, it assigns it a consumer object.
111
+ # (to allow for communication back to the queue it was consumed from)
112
+ work = UnitOfWork.new(message_id, message_receipt_handle, queue, timeout, body, previous_attempts)
113
+ Chore.run_hooks_for(:consumed_from_source, work)
114
+ @queue.push(work) if running?
115
+ Chore.run_hooks_for(:added_to_queue, work)
116
+ end
117
+ end
118
+ end # ThrottledConsumerStrategy
119
+ end
120
+ end # Chore
@@ -3,6 +3,7 @@ require 'chore/signal'
3
3
  module Chore
4
4
  module Strategy
5
5
  class ForkedWorkerStrategy #:nodoc:
6
+ include Util
6
7
  attr_accessor :workers
7
8
 
8
9
  def initialize(manager, opts={})
@@ -63,6 +64,9 @@ module Chore
63
64
  pid = nil
64
65
  Chore.run_hooks_for(:around_fork,w) do
65
66
  pid = fork do
67
+ work.each do | item |
68
+ Chore.run_hooks_for(:fetched_off_internal_q, item)
69
+ end
66
70
  after_fork(w)
67
71
  Chore.run_hooks_for(:within_fork,w) do
68
72
  Chore.run_hooks_for(:after_fork,w)
@@ -132,7 +136,7 @@ module Chore
132
136
  def after_fork(worker)
133
137
  # Immediately swap out the process name so that it doesn't look like
134
138
  # the master process
135
- procline("Started:#{Time.now}")
139
+ procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
136
140
 
137
141
  clear_child_signals
138
142
  trap_child_signals(worker)
@@ -202,11 +206,6 @@ module Chore
202
206
  Kernel.fork(&block)
203
207
  end
204
208
 
205
- def procline(str)
206
- Chore.logger.info str
207
- $0 = "chore-#{Chore::VERSION}:#{str}"
208
- end
209
-
210
209
  def signal_children(sig, pids_to_signal = pids)
211
210
  pids_to_signal.each do |pid|
212
211
  begin
@@ -0,0 +1,87 @@
1
+ require 'socket'
2
+
3
+ module Chore
4
+ module Strategy
5
+ module Ipc #:nodoc:
6
+ BIG_ENDIAN = 'L>'.freeze
7
+ MSG_BYTES = 4
8
+ READY_MSG = 'R'
9
+
10
+ def create_master_socket
11
+ File.delete socket_file if File.exist? socket_file
12
+ UNIXServer.new(socket_file).tap do |socket|
13
+ socket_options(socket)
14
+ end
15
+ end
16
+
17
+ def child_connection(socket)
18
+ socket.accept
19
+ end
20
+
21
+ # Sending a message to a socket (must be a connected socket)
22
+ def send_msg(socket, msg)
23
+ raise 'send_msg cannot send empty messages' if msg.nil? || msg.size.zero?
24
+ message = Marshal.dump(msg)
25
+ encoded_size = [message.size].pack(BIG_ENDIAN)
26
+ encoded_message = "#{encoded_size}#{message}"
27
+ socket.send encoded_message, 0
28
+ end
29
+
30
+ # read a message from socket (must be a connected socket)
31
+ def read_msg(socket)
32
+ encoded_size = socket.recv(MSG_BYTES, Socket::MSG_PEEK)
33
+ return if encoded_size.nil? || encoded_size == ''
34
+
35
+ size = encoded_size.unpack(BIG_ENDIAN).first
36
+ encoded_message = socket.recv(MSG_BYTES + size)
37
+ Marshal.load(encoded_message[MSG_BYTES..-1])
38
+ rescue Errno::ECONNRESET => ex
39
+ Chore.logger.info "IPC: Connection was closed on socket #{socket}"
40
+ raise ex
41
+ end
42
+
43
+ def add_worker_socket
44
+ UNIXSocket.new(socket_file).tap do |socket|
45
+ socket_options(socket)
46
+ end
47
+ end
48
+
49
+ def clear_ready(socket)
50
+ _ = socket.gets
51
+ end
52
+
53
+ def signal_ready(socket)
54
+ socket.puts READY_MSG
55
+ rescue Errno::EPIPE => ex
56
+ Chore.logger.info 'IPC: Connection was shutdown by master'
57
+ raise ex
58
+ end
59
+
60
+ def select_sockets(sockets, self_pipe = nil, timeout = 0.5)
61
+ all_socks = [sockets, self_pipe].flatten.compact
62
+ IO.select(all_socks, nil, all_socks, timeout)
63
+ end
64
+
65
+ def delete_socket_file
66
+ File.unlink(socket_file)
67
+ rescue
68
+ nil
69
+ end
70
+
71
+ # Used for unit tests
72
+ def ipc_help
73
+ :available
74
+ end
75
+
76
+ private
77
+
78
+ def socket_file
79
+ "./prefork_worker_sock-#{Process.pid}"
80
+ end
81
+
82
+ def socket_options(socket)
83
+ socket.setsockopt(:SOCKET, :REUSEADDR, true)
84
+ end
85
+ end
86
+ end
87
+ end