chore-core 1.8.2 → 3.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +6 -0
  3. data/chore-core.gemspec +1 -0
  4. data/lib/chore.rb +11 -5
  5. data/lib/chore/cli.rb +21 -2
  6. data/lib/chore/consumer.rb +15 -5
  7. data/lib/chore/fetcher.rb +12 -7
  8. data/lib/chore/hooks.rb +2 -1
  9. data/lib/chore/job.rb +17 -0
  10. data/lib/chore/manager.rb +18 -2
  11. data/lib/chore/queues/filesystem/consumer.rb +116 -59
  12. data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
  13. data/lib/chore/queues/filesystem/publisher.rb +12 -18
  14. data/lib/chore/queues/sqs/consumer.rb +6 -21
  15. data/lib/chore/strategies/consumer/batcher.rb +8 -9
  16. data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +3 -1
  17. data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +121 -0
  18. data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
  19. data/lib/chore/strategies/worker/helpers/ipc.rb +88 -0
  20. data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
  21. data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
  22. data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
  23. data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
  24. data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
  25. data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
  26. data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
  27. data/lib/chore/unit_of_work.rb +8 -0
  28. data/lib/chore/util.rb +5 -1
  29. data/lib/chore/version.rb +3 -3
  30. data/lib/chore/worker.rb +29 -0
  31. data/spec/chore/cli_spec.rb +2 -2
  32. data/spec/chore/consumer_spec.rb +0 -4
  33. data/spec/chore/duplicate_detector_spec.rb +17 -5
  34. data/spec/chore/fetcher_spec.rb +0 -11
  35. data/spec/chore/manager_spec.rb +7 -0
  36. data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +71 -11
  37. data/spec/chore/queues/sqs/consumer_spec.rb +1 -3
  38. data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
  39. data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +1 -0
  40. data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
  41. data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +16 -1
  42. data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
  43. data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
  44. data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
  45. data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
  46. data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
  47. data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
  48. data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
  49. data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
  50. data/spec/chore/worker_spec.rb +69 -1
  51. metadata +33 -5
@@ -6,6 +6,8 @@ module Chore::FilesystemQueue
6
6
  NEW_JOB_DIR = "new"
7
7
  # Local directory for jobs currently in-process to be moved
8
8
  IN_PROGRESS_DIR = "inprogress"
9
+ # Local directory for configuration info
10
+ CONFIG_DIR = "config"
9
11
 
10
12
  # Retrieves the directory for in-process messages to go. If the directory for the +queue_name+ doesn't exist,
11
13
  # it will be created for you. If the directory cannot be created, an IOError will be raised
@@ -29,6 +31,23 @@ module Chore::FilesystemQueue
29
31
  prepare_dir(File.join(root_dir, queue_name))
30
32
  end
31
33
 
34
+ # The configuration for the given queue
35
+ def config_dir(queue_name)
36
+ validate_dir(queue_name, CONFIG_DIR)
37
+ end
38
+
39
+ def config_value(queue_name, config_name)
40
+ config_file = File.join(config_dir(queue_name), config_name)
41
+ if File.exists?(config_file)
42
+ File.read(config_file).strip
43
+ end
44
+ end
45
+
46
+ # Returns the timeout for +queue_name+
47
+ def queue_timeout(queue_name)
48
+ (config_value(queue_name, 'timeout') || Chore.config.default_queue_timeout).to_i
49
+ end
50
+
32
51
  private
33
52
  # Returns the directory for the given +queue_name+ and +task_state+. If the directory doesn't exist, it will be
34
53
  # created for you. If the directory cannot be created, an IOError will be raised
@@ -10,27 +10,20 @@ module Chore
10
10
  # See the top of FilesystemConsumer for comments on how this works
11
11
  include FilesystemQueue
12
12
 
13
- # Mutex for holding a lock over the files for this queue while they are in process
14
- FILE_MUTEX = Mutex.new
15
-
16
13
  # use of mutex and file locking should make this both threadsafe and safe for multiple
17
14
  # processes to use the same queue directory simultaneously.
18
15
  def publish(queue_name,job)
19
- FILE_MUTEX.synchronize do
20
- while true
21
- # keep trying to get a file with nothing in it meaning we just created it
22
- # as opposed to us getting someone else's file that hasn't been processed yet.
23
- f = File.open(filename(queue_name, job[:class].to_s), "w")
16
+ # First try encoding the job to avoid writing empty job files if this fails
17
+ encoded_job = encode_job(job)
18
+
19
+ published = false
20
+ while !published
21
+ # keep trying to get a file with nothing in it meaning we just created it
22
+ # as opposed to us getting someone else's file that hasn't been processed yet.
23
+ File.open(filename(queue_name, job[:class].to_s), "a") do |f|
24
24
  if f.flock(File::LOCK_EX | File::LOCK_NB) && f.size == 0
25
- begin
26
- f.write(job.to_json)
27
- rescue StandardError => e
28
- Chore.logger.error "#{e.class.name}: #{e.message}. Could not write #{job[:class]} job to '#{queue_name}' queue file."
29
- Chore.logger.error e.backtrace.join("\n")
30
- ensure
31
- f.flock(File::LOCK_UN)
32
- end
33
- break
25
+ f.write(encoded_job)
26
+ published = true
34
27
  end
35
28
  end
36
29
  end
@@ -40,7 +33,8 @@ module Chore
40
33
  def filename(queue_name, job_name)
41
34
  now = Time.now.strftime "%Y%m%d-%H%M%S-%6N"
42
35
  previous_attempts = 0
43
- File.join(new_dir(queue_name), "#{queue_name}-#{job_name}-#{now}.#{previous_attempts}.job")
36
+ pid = Process.pid
37
+ File.join(new_dir(queue_name), "#{queue_name}-#{job_name}-#{pid}-#{now}.#{previous_attempts}.job")
44
38
  end
45
39
  end
46
40
  end
@@ -16,12 +16,11 @@ module Chore
16
16
  Chore::CLI.register_option 'aws_access_key', '--aws-access-key KEY', 'Valid AWS Access Key'
17
17
  Chore::CLI.register_option 'aws_secret_key', '--aws-secret-key KEY', 'Valid AWS Secret Key'
18
18
  Chore::CLI.register_option 'dedupe_servers', '--dedupe-servers SERVERS', 'List of mememcache compatible server(s) to use for storing SQS Message Dedupe cache'
19
- Chore::CLI.register_option 'queue_polling_size', '--queue_polling_size NUM', Integer, 'Amount of messages to grab on each request' do |arg|
20
- raise ArgumentError, "Cannot specify a queue polling size greater than 10" if arg > 10
21
- end
22
19
 
23
20
  def initialize(queue_name, opts={})
24
21
  super(queue_name, opts)
22
+
23
+ raise Chore::TerribleMistake, "Cannot specify a queue polling size greater than 10" if sqs_polling_amount > 10
25
24
  end
26
25
 
27
26
  # Sets a flag that instructs the publisher to reset the connection the next time it's used
@@ -34,7 +33,7 @@ module Chore
34
33
  while running?
35
34
  begin
36
35
  messages = handle_messages(&handler)
37
- sleep (Chore.config.consumer_sleep_interval || 1) if messages.empty?
36
+ sleep (Chore.config.consumer_sleep_interval) if messages.empty?
38
37
  rescue AWS::SQS::Errors::NonExistentQueue => e
39
38
  Chore.logger.error "You specified a queue '#{queue_name}' that does not exist. You must create the queue before starting Chore. Shutting down..."
40
39
  raise Chore::TerribleMistake
@@ -71,7 +70,7 @@ module Chore
71
70
  msg = queue.receive_messages(:limit => sqs_polling_amount, :attributes => [:receive_count])
72
71
  messages = *msg
73
72
  messages.each do |message|
74
- unless duplicate_message?(message)
73
+ unless duplicate_message?(message.id, message.queue.url, queue_timeout)
75
74
  block.call(message.handle, queue_name, queue_timeout, message.body, message.receive_count - 1)
76
75
  end
77
76
  Chore.run_hooks_for(:on_fetch, message.handle, message.body)
@@ -79,18 +78,6 @@ module Chore
79
78
  messages
80
79
  end
81
80
 
82
- # Checks if the given message has already been received within the timeout window for this queue
83
- def duplicate_message?(message)
84
- dupe_detector.found_duplicate?(:id=>message.id, :queue=>message.queue.url, :visibility_timeout=>message.queue.visibility_timeout)
85
- end
86
-
87
- # Returns the instance of the DuplicateDetector used to ensure unique messages.
88
- # Will create one if one doesn't already exist
89
- def dupe_detector
90
- @dupes ||= DuplicateDetector.new({:servers => Chore.config.dedupe_servers,
91
- :dupe_on_cache_failure => Chore.config.dupe_on_cache_failure})
92
- end
93
-
94
81
  # Retrieves the SQS queue with the given +name+. The method will cache the results to prevent round trips on
95
82
  # subsequent calls. If <tt>reset_connection!</tt> has been called, this will result in the connection being
96
83
  # re-initialized, as well as clear any cached results from prior calls
@@ -116,13 +103,11 @@ module Chore
116
103
  def sqs
117
104
  @sqs ||= AWS::SQS.new(
118
105
  :access_key_id => Chore.config.aws_access_key,
119
- :secret_access_key => Chore.config.aws_secret_key,
120
- :logger => Chore.logger,
121
- :log_level => :debug)
106
+ :secret_access_key => Chore.config.aws_secret_key)
122
107
  end
123
108
 
124
109
  def sqs_polling_amount
125
- Chore.config.queue_polling_size || 10
110
+ Chore.config.queue_polling_size
126
111
  end
127
112
  end
128
113
  end
@@ -11,27 +11,27 @@ module Chore
11
11
  @size = size
12
12
  @batch = []
13
13
  @mutex = Mutex.new
14
- @last_message = nil
15
14
  @callback = nil
16
15
  @running = true
17
16
  end
18
17
 
19
18
  # The main entry point of the Batcher, <tt>schedule</tt> begins a thread with the provided +batch_timeout+
20
19
  # as the only argument. While the Batcher is running, it will attempt to check if either the batch is full,
21
- # or if the +batch_timeout+ has elapsed since the last batch was executed. If the batch is full, it will be executed.
22
- # If the +batch_timeout+ has elapsed, as soon as the next message enters the batch, it will be executed.
20
+ # or if the +batch_timeout+ has elapsed since the oldest message was added. If either case is true, the
21
+ # items in the batch will be executed.
23
22
  #
24
23
  # Calling <tt>stop</tt> will cause the thread to finish it's current check, and exit
25
- def schedule(batch_timeout=20)
24
+ def schedule(batch_timeout)
26
25
  @thread = Thread.new(batch_timeout) do |timeout|
27
26
  Chore.logger.info "Batching timeout thread starting"
28
27
  while @running do
29
28
  begin
30
- Chore.logger.debug "Last message added to batch: #{@last_message}: #{@batch.size}"
31
- if @last_message && Time.now > (@last_message + timeout)
32
- Chore.logger.debug "Batching timeout reached (#{@last_message + timeout}), current size: #{@batch.size}"
29
+ oldest_item = @batch.first
30
+ timestamp = oldest_item && oldest_item.created_at
31
+ Chore.logger.debug "Oldest message in batch: #{timestamp}, size: #{@batch.size}"
32
+ if timestamp && Time.now > (timestamp + timeout)
33
+ Chore.logger.debug "Batching timeout reached (#{timestamp + timeout}), current size: #{@batch.size}"
33
34
  self.execute(true)
34
- @last_message = nil
35
35
  end
36
36
  sleep(1)
37
37
  rescue => e
@@ -44,7 +44,6 @@ module Chore
44
44
  # Adds the +item+ to the current batch
45
45
  def add(item)
46
46
  @batch << item
47
- @last_message = Time.now
48
47
  execute if ready?
49
48
  end
50
49
 
@@ -5,13 +5,14 @@ module Chore
5
5
  attr_accessor :batcher
6
6
 
7
7
  Chore::CLI.register_option 'batch_size', '--batch-size SIZE', Integer, 'Number of items to collect for a single worker to process'
8
+ Chore::CLI.register_option 'batch_timeout', '--batch-timeout SIZE', Integer, 'Maximum number of seconds to wait until processing a message'
8
9
  Chore::CLI.register_option 'threads_per_queue', '--threads-per-queue NUM_THREADS', Integer, 'Number of threads to create for each named queue'
9
10
 
10
11
  def initialize(fetcher)
11
12
  @fetcher = fetcher
12
13
  @batcher = Batcher.new(Chore.config.batch_size)
13
14
  @batcher.callback = lambda { |batch| @fetcher.manager.assign(batch) }
14
- @batcher.schedule
15
+ @batcher.schedule(Chore.config.batch_timeout)
15
16
  @running = true
16
17
  end
17
18
 
@@ -63,6 +64,7 @@ module Chore
63
64
  Chore.logger.debug { "Got message: #{id}"}
64
65
 
65
66
  work = UnitOfWork.new(id, queue_name, queue_timeout, body, previous_attempts, consumer)
67
+ Chore.run_hooks_for(:consumed_from_source, work)
66
68
  @batcher.add(work)
67
69
  end
68
70
  rescue Chore::TerribleMistake
@@ -0,0 +1,121 @@
1
+ module Chore
2
+ module Strategy
3
+ class ThrottledConsumerStrategy #:nodoc:
4
+ def initialize(fetcher)
5
+ @fetcher = fetcher
6
+ @queue = SizedQueue.new(Chore.config.num_workers)
7
+ @return_queue = Queue.new
8
+ @max_queue_size = Chore.config.num_workers
9
+ @consumers_per_queue = Chore.config.threads_per_queue
10
+ @running = true
11
+ @consumers = []
12
+ end
13
+
14
+ # Begins fetching from queues by spinning up the configured
15
+ # +:threads_per_queue:+ count of threads for each
16
+ # queue you're consuming from.
17
+ # Once all threads are spun up and running, the threads are then joined.
18
+
19
+ def fetch
20
+ Chore.logger.info "TCS: Starting up: #{self.class.name}"
21
+ threads = []
22
+ Chore.config.queues.each do |consume_queue|
23
+ Chore.logger.info "TCS: Starting #{@consumers_per_queue} threads for Queue #{consume_queue}"
24
+ @consumers_per_queue.times do
25
+ next unless running?
26
+ threads << consume(consume_queue)
27
+ end
28
+ end
29
+ threads.each(&:join)
30
+ end
31
+
32
+ # If the ThreadedConsumerStrategy is currently running <tt>stop!</tt>
33
+ # will begin signalling it to stop. It will stop the batcher
34
+ # from forking more work,as well as set a flag which will disable
35
+ # it's own consuming threads once they finish with their current work.
36
+ def stop!
37
+ if running?
38
+ Chore.logger.info "TCS: Shutting down fetcher: #{self.class.name}"
39
+ @running = false
40
+ @consumers.each do |consumer|
41
+ Chore.logger.info "TCS: Stopping consumer: #{consumer.object_id}"
42
+ @queue.clear
43
+ @return_queue.clear
44
+ consumer.stop
45
+ end
46
+ end
47
+ end
48
+
49
+ # Returns whether or not the ThreadedConsumerStrategy is running or not
50
+ def running?
51
+ @running
52
+ end
53
+
54
+ # return upto number_of_free_workers work objects
55
+ def provide_work(no_free_workers)
56
+ work_units = []
57
+ free_workers = [no_free_workers, @queue.size + @return_queue.size].min
58
+ while free_workers > 0
59
+ # Drain from the return queue first, then the consumer thread queue
60
+ queue = @return_queue.empty? ? @queue : @return_queue
61
+ work_units << queue.pop
62
+ free_workers -= 1
63
+ end
64
+ work_units
65
+ end
66
+
67
+ # Gives work back to the queue in case it couldn't be assigned
68
+ #
69
+ # This will go into a separate queue so that it will be prioritized
70
+ # over other work that hasn't been attempted yet. It also avoids
71
+ # a deadlock where @queue is full and the master is waiting to return
72
+ # work that it couldn't assign.
73
+ def return_work(work_units)
74
+ work_units.each do |work|
75
+ @return_queue.push(work)
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def consume(consume_queue)
82
+ consumer = Chore.config.consumer.new(consume_queue)
83
+ @consumers << consumer
84
+ start_consumer_thread(consumer)
85
+ end
86
+
87
+ # Starts a consumer thread for polling the given +consume_queue+.
88
+ # If <tt>stop!<tt> is called, the threads will shut themsevles down.
89
+ def start_consumer_thread(consumer)
90
+ t = Thread.new(consumer) do |th|
91
+ begin
92
+ create_work_units(th)
93
+ rescue Chore::TerribleMistake => e
94
+ Chore.logger.error 'Terrible mistake, shutting down Chore'
95
+ Chore.logger.error "#{e.inspect} at #{e.backtrace}"
96
+ @fetcher.manager.shutdown!
97
+ end
98
+ end
99
+ t
100
+ end
101
+
102
+ def create_work_units(consumer)
103
+ consumer.consume do |id, queue, timeout, body, previous_attempts|
104
+ # Note: The unit of work object contains a consumer object that when
105
+ # used to consume from SQS, would have a mutex (that comes as a part
106
+ # of the AWS sdk); When sending these objects across from one process
107
+ # to another, we cannot send this across (becasue of the mutex). To
108
+ # work around this, we simply ignore the consumer object when creating
109
+ # the unit of work object, and when the worker recieves the work
110
+ # object, it assigns it a consumer object.
111
+ # (to allow for communication back to the queue it was consumed from)
112
+ work = UnitOfWork.new(id, queue, timeout, body,
113
+ previous_attempts)
114
+ Chore.run_hooks_for(:consumed_from_source, work)
115
+ @queue.push(work) if running?
116
+ Chore.run_hooks_for(:added_to_queue, work)
117
+ end
118
+ end
119
+ end # ThrottledConsumerStrategyyeah
120
+ end
121
+ end # Chore
@@ -3,6 +3,7 @@ require 'chore/signal'
3
3
  module Chore
4
4
  module Strategy
5
5
  class ForkedWorkerStrategy #:nodoc:
6
+ include Util
6
7
  attr_accessor :workers
7
8
 
8
9
  def initialize(manager, opts={})
@@ -63,6 +64,9 @@ module Chore
63
64
  pid = nil
64
65
  Chore.run_hooks_for(:around_fork,w) do
65
66
  pid = fork do
67
+ work.each do | item |
68
+ Chore.run_hooks_for(:fetched_off_internal_q, item)
69
+ end
66
70
  after_fork(w)
67
71
  Chore.run_hooks_for(:within_fork,w) do
68
72
  Chore.run_hooks_for(:after_fork,w)
@@ -132,7 +136,7 @@ module Chore
132
136
  def after_fork(worker)
133
137
  # Immediately swap out the process name so that it doesn't look like
134
138
  # the master process
135
- procline("Started:#{Time.now}")
139
+ procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
136
140
 
137
141
  clear_child_signals
138
142
  trap_child_signals(worker)
@@ -202,11 +206,6 @@ module Chore
202
206
  Kernel.fork(&block)
203
207
  end
204
208
 
205
- def procline(str)
206
- Chore.logger.info str
207
- $0 = "chore-#{Chore::VERSION}:#{str}"
208
- end
209
-
210
209
  def signal_children(sig, pids_to_signal = pids)
211
210
  pids_to_signal.each do |pid|
212
211
  begin
@@ -0,0 +1,88 @@
1
+ require 'socket'
2
+
3
+ module Chore
4
+ module Strategy
5
+ module Ipc #:nodoc:
6
+ BIG_ENDIAN = 'L>'.freeze
7
+ MSG_BYTES = 4
8
+ READY_MSG = 'R'
9
+
10
+ def create_master_socket
11
+ File.delete socket_file if File.exist? socket_file
12
+ UNIXServer.new(socket_file).tap do |socket|
13
+ socket_options(socket)
14
+ end
15
+ end
16
+
17
+ def child_connection(socket)
18
+ socket.accept
19
+ end
20
+
21
+ # Sending a message to a socket (must be a connected socket)
22
+ def send_msg(socket, msg)
23
+ raise 'send_msg cannot send empty messages' if msg.nil? || msg.size.zero?
24
+ message = Marshal.dump(msg)
25
+ encoded_size = [message.size].pack(BIG_ENDIAN)
26
+ encoded_message = "#{encoded_size}#{message}"
27
+ socket.send encoded_message, 0
28
+ end
29
+
30
+ # read a message from socket (must be a connected socket)
31
+ def read_msg(socket)
32
+ encoded_size = socket.recv(MSG_BYTES, Socket::MSG_PEEK)
33
+ return if encoded_size.nil? || encoded_size == ''
34
+
35
+ size = encoded_size.unpack(BIG_ENDIAN).first
36
+ encoded_message = socket.recv(MSG_BYTES + size)
37
+ Marshal.load(encoded_message[MSG_BYTES..-1])
38
+ rescue Errno::ECONNRESET => ex
39
+ Chore.logger.info "IPC: Connection was closed on socket #{socket}"
40
+ raise ex
41
+ end
42
+
43
+ def add_worker_socket
44
+ UNIXSocket.new(socket_file).tap do |socket|
45
+ socket_options(socket)
46
+ end
47
+ end
48
+
49
+ def clear_ready(socket)
50
+ _ = socket.gets
51
+ end
52
+
53
+ def signal_ready(socket)
54
+ socket.puts READY_MSG
55
+ rescue Errno::EPIPE => ex
56
+ Chore.logger.info 'IPC: Connection was shutdown by master'
57
+ raise ex
58
+ end
59
+
60
+ def select_sockets(sockets, self_pipe = nil, timeout = 0.5)
61
+ all_socks = [sockets, self_pipe].flatten.compact
62
+ IO.select(all_socks, nil, all_socks, timeout)
63
+ end
64
+
65
+ def delete_socket_file
66
+ File.unlink(socket_file)
67
+ rescue
68
+ nil
69
+ end
70
+
71
+ # Used for unit tests
72
+ def ipc_help
73
+ :available
74
+ end
75
+
76
+ private
77
+
78
+ # TODO: do we need this as a optional param
79
+ def socket_file
80
+ "./prefork_worker_sock-#{Process.pid}"
81
+ end
82
+
83
+ def socket_options(socket)
84
+ socket.setsockopt(:SOCKET, :REUSEADDR, true)
85
+ end
86
+ end
87
+ end
88
+ end