chore-core 1.10.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +5 -13
  2. data/LICENSE.txt +1 -1
  3. data/README.md +172 -153
  4. data/chore-core.gemspec +3 -3
  5. data/lib/chore.rb +29 -5
  6. data/lib/chore/cli.rb +22 -4
  7. data/lib/chore/configuration.rb +1 -1
  8. data/lib/chore/consumer.rb +54 -12
  9. data/lib/chore/fetcher.rb +12 -7
  10. data/lib/chore/hooks.rb +2 -1
  11. data/lib/chore/job.rb +19 -0
  12. data/lib/chore/manager.rb +17 -2
  13. data/lib/chore/publisher.rb +18 -2
  14. data/lib/chore/queues/filesystem/consumer.rb +126 -64
  15. data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
  16. data/lib/chore/queues/filesystem/publisher.rb +10 -16
  17. data/lib/chore/queues/sqs.rb +22 -13
  18. data/lib/chore/queues/sqs/consumer.rb +64 -51
  19. data/lib/chore/queues/sqs/publisher.rb +26 -17
  20. data/lib/chore/strategies/consumer/batcher.rb +6 -6
  21. data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
  22. data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +7 -6
  23. data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
  24. data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
  25. data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
  26. data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
  27. data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
  28. data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
  29. data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
  30. data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
  31. data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
  32. data/lib/chore/unit_of_work.rb +2 -1
  33. data/lib/chore/util.rb +5 -1
  34. data/lib/chore/version.rb +2 -2
  35. data/lib/chore/worker.rb +30 -3
  36. data/spec/chore/cli_spec.rb +2 -2
  37. data/spec/chore/consumer_spec.rb +1 -5
  38. data/spec/chore/duplicate_detector_spec.rb +17 -5
  39. data/spec/chore/fetcher_spec.rb +0 -11
  40. data/spec/chore/manager_spec.rb +7 -0
  41. data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
  42. data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
  43. data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
  44. data/spec/chore/queues/sqs_spec.rb +32 -41
  45. data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
  46. data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +6 -6
  47. data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
  48. data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +6 -1
  49. data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
  50. data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
  51. data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
  52. data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
  53. data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
  54. data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
  55. data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
  56. data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +1 -1
  57. data/spec/chore/worker_spec.rb +70 -15
  58. data/spec/spec_helper.rb +1 -1
  59. data/spec/support/queues/sqs/fake_objects.rb +18 -0
  60. metadata +53 -29
@@ -0,0 +1,65 @@
1
+ require 'chore/strategies/worker/helpers/ipc'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkDistributor #:nodoc:
6
+ class << self
7
+ include Ipc
8
+
9
+ def fetch_and_assign_jobs(workers, manager)
10
+ jobs = manager.fetch_work(workers.size)
11
+ raise "DW: jobs needs to be a list got #{jobs.class}" unless jobs.is_a?(Array)
12
+ if jobs.empty?
13
+ # This conditon is due to the internal consumer queue being empty.
14
+ # Assuming that the the consumer has to fetch from an external queue,
15
+ # if we return here, we would create a tight loop that would use up
16
+ # a lot the CPU's time. In order to prevent that, we wait for the
17
+ # consumer queue to be populated, by sleeping.
18
+ sleep(0.1)
19
+ return
20
+ end
21
+ jobs_to_return = assign_jobs(jobs, workers)
22
+ manager.return_work(jobs_to_return)
23
+ end
24
+
25
+ private
26
+
27
+ def assign_jobs(jobs, workers)
28
+ raise 'DW: assign_jobs got 0 workers' if workers.empty?
29
+ jobs_to_return = []
30
+ jobs.each_with_index do |job, i|
31
+ raise 'DW: More Jobs than Sockets' if workers[i].nil?
32
+ unless push_job_to_worker(job, workers[i])
33
+ jobs_to_return << job
34
+ end
35
+ end
36
+
37
+ jobs_to_return
38
+ end
39
+
40
+ def push_job_to_worker(job, worker)
41
+ Chore.run_hooks_for(:before_send_to_worker, job)
42
+ clear_ready(worker.socket)
43
+ send_msg(worker.socket, job)
44
+ true
45
+ rescue => e
46
+ Chore.logger.error "DW: Could not assign job #{job.inspect} (worker: #{worker.pid})\nException #{e.message} #{e.backtrace * "\n"}"
47
+
48
+ # We generally shouldn't get into this situations since we've already
49
+ # tested that we can read/write to the Worker's socket. However,
50
+ # the Worker could still fail between that check and pushing the
51
+ # job, so we need to allow the work to be re-assigned to handle that
52
+ # edge case.
53
+ false
54
+ end
55
+
56
+ private
57
+
58
+ # Used for unit tests
59
+ def sleep(n)
60
+ Kernel.sleep(n)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,13 @@
1
+ module Chore
2
+ module Strategy
3
+ class WorkerInfo
4
+ # Holds meta information about the worker: pid, and connection socket
5
+ attr_accessor :pid, :socket
6
+
7
+ def initialize(pid)
8
+ @pid = pid
9
+ @socket = nil
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ require 'get_process_mem'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkerKiller #:nodoc:
6
+ def initialize
7
+ @memory_limit = Chore.config.memory_limit_bytes
8
+ @request_limit = Chore.config.request_limit
9
+ @check_cycle = Chore.config.worker_check_cycle || 16
10
+ @check_count = 0
11
+ @current_requests = 0
12
+ end
13
+
14
+ def check_memory
15
+ return if @memory_limit.nil? || (@memory_limit == 0)
16
+ @check_count += 1
17
+
18
+ if @check_count == @check_cycle
19
+ rss = GetProcessMem.new.bytes.to_i
20
+ if rss > @memory_limit
21
+ Chore.logger.info "WK: (pid: #{Process.pid}) exceeded memory limit (#{rss.to_i} bytes > #{@memory_limit} bytes)"
22
+ Chore.run_hooks_for(:worker_mem_kill)
23
+ exit(true)
24
+ end
25
+ @check_count = 0
26
+ end
27
+ end
28
+
29
+ def check_requests
30
+ return if @request_limit.nil? || (@request_limit == 0)
31
+
32
+ if (@current_requests += 1) >= @request_limit
33
+ Chore.logger.info "WK: (pid: #{Process.pid}) exceeded max number of requests (limit: #{@request_limit})"
34
+ Chore.run_hooks_for(:worker_req_kill)
35
+ exit(true)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,183 @@
1
+ require 'chore/strategies/worker/helpers/ipc'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkerManager #:nodoc:
6
+ include Ipc
7
+
8
+ def initialize(master_socket)
9
+ @master_socket = master_socket
10
+ @pid_to_worker = {}
11
+ @socket_to_worker = {}
12
+ end
13
+
14
+ # Create num of missing workers and sockets and attach them for the
15
+ # master
16
+ def create_and_attach_workers
17
+ create_workers do |num_workers|
18
+ attach_workers(num_workers)
19
+ end
20
+ end
21
+
22
+ # Reap dead workers and create new ones to replace them
23
+ def respawn_terminated_workers!
24
+ Chore.logger.info 'WM: Respawning terminated workers'
25
+ reap_workers
26
+ create_and_attach_workers
27
+ end
28
+
29
+ # Stop children with the given kill signal and wait for them to die
30
+ def stop_workers(sig)
31
+ @pid_to_worker.each do |pid, worker|
32
+ begin
33
+ Chore.logger.info { "WM: Sending #{sig} to: #{pid}" }
34
+ Process.kill(sig, pid)
35
+ rescue Errno::ESRCH => e
36
+ Chore.logger.error "WM: Signal to children error: #{e}"
37
+ end
38
+ end
39
+ # TODO: Sleep for the shutdown timeout and kill any remaining workers
40
+ reap_workers
41
+ end
42
+
43
+ # Return all the worker sockets
44
+ def worker_sockets
45
+ @socket_to_worker.keys
46
+ end
47
+
48
+ # Return the workers associated with a given array of sockets.
49
+ # +block+:: A block can be provided to perform tasks on the workers
50
+ # associated with the sockets given
51
+ def ready_workers(sockets = [], &block)
52
+ workers = @socket_to_worker.values_at(*sockets)
53
+ yield workers if block_given?
54
+ workers
55
+ end
56
+
57
+ private
58
+
59
+ # Creates worker processes until we have the number of workers defined
60
+ # by the configuration. Initializes and starts a worker instance in each
61
+ # of the new processes.
62
+ # +block+:: Block can be provided to run tasks on the number of newly
63
+ # created worker processes.
64
+ def create_workers(&block)
65
+ num_created_workers = 0
66
+
67
+ while @pid_to_worker.size < Chore.config.num_workers
68
+ pid = fork do
69
+ run_worker_instance
70
+ end
71
+
72
+ Chore.logger.info "WM: created_worker #{pid}"
73
+ # Keep track of the new worker process
74
+ @pid_to_worker[pid] = WorkerInfo.new(pid)
75
+ num_created_workers += 1
76
+ end
77
+
78
+ raise 'WM: Not enough workers' if inconsistent_worker_number
79
+ Chore.logger.info "WM: created #{num_created_workers} workers"
80
+ yield num_created_workers if block_given?
81
+ num_created_workers
82
+ end
83
+
84
+ # Check that number of workers registered in master match the config
85
+ def inconsistent_worker_number
86
+ Chore.config.num_workers != @pid_to_worker.size
87
+ end
88
+
89
+ # Initialize and start a new worker instance
90
+ def run_worker_instance
91
+ PreforkedWorker.new.start_worker(@master_socket)
92
+ ensure
93
+ exit(true)
94
+ end
95
+
96
+ # Creates individual sockets for each worker to use and attaches them to
97
+ # the correct worker
98
+ def attach_workers(num)
99
+ Chore.logger.info "WM: Started attaching #{num} workers"
100
+
101
+ create_worker_sockets(num).each do |socket|
102
+ begin
103
+ readable, _, _ = select_sockets(socket, nil, 2)
104
+
105
+ if readable.nil?
106
+ Chore.logger.info "WM: #{socket} timeout waiting for a worker"
107
+ socket.close
108
+ next
109
+ end
110
+
111
+ r_socket = readable.first
112
+ reported_pid = read_msg(r_socket)
113
+
114
+ assigned_worker = @pid_to_worker[reported_pid]
115
+ assigned_worker.socket = socket
116
+ @socket_to_worker[socket] = assigned_worker
117
+
118
+ Chore.logger.info "WM: Connected #{reported_pid} with #{r_socket}"
119
+ rescue Errno::ECONNRESET
120
+ Chore.logger.info "WM: A worker failed to connect to #{socket}"
121
+ socket.close
122
+ next
123
+ end
124
+ end
125
+
126
+ # If the connection from a worker times out, we are unable to associate
127
+ # the process with a connection and so we kill the worker process
128
+ kill_unattached_workers
129
+ Chore.logger.info 'WM: Finished attaching workers'
130
+ end
131
+
132
+ # Create num amount of sockets that are available for worker connections
133
+ def create_worker_sockets(num)
134
+ Array.new(num) do
135
+ add_worker_socket
136
+ end
137
+ end
138
+
139
+ # Kill workers that failed to connect to the master
140
+ def kill_unattached_workers
141
+ @pid_to_worker.each do |pid, worker|
142
+ next unless worker.socket.nil?
143
+ Chore.logger.info "WM: kill_unattached_workers #{pid}"
144
+ Process.kill('KILL', pid)
145
+ end
146
+ end
147
+
148
+ # Wait for terminated workers to die and remove their references from
149
+ # master
150
+ def reap_workers
151
+ Chore.logger.info "WM: reaping workers.."
152
+ dead_workers = @pid_to_worker.select do |pid, worker|
153
+ reap_process(pid)
154
+ end
155
+
156
+ dead_workers.each do |pid, worker|
157
+ dead_worker = @pid_to_worker.delete(pid)
158
+ dead_worker.socket.close
159
+ @socket_to_worker.delete(dead_worker.socket)
160
+ Chore.logger.info "WM: Removed preforked worker:#{worker.pid} - #{worker.socket}"
161
+ end
162
+ end
163
+
164
+ # Non-blocking wait for process to die. Returns whether it stopped
165
+ def reap_process(pid)
166
+ status = Process.wait(pid, Process::WNOHANG)
167
+ case status
168
+ when nil # Process is still running
169
+ return false
170
+ when pid # Collected status of this pid
171
+ return true
172
+ end
173
+ rescue Errno::ECHILD
174
+ # Child process has already terminated
175
+ true
176
+ end
177
+
178
+ def fork(&block)
179
+ Kernel.fork(&block)
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,150 @@
1
+ require 'chore/signal'
2
+ require 'socket'
3
+ require 'chore/strategies/worker/helpers/ipc'
4
+ require 'chore/strategies/worker/helpers/preforked_worker'
5
+ require 'chore/strategies/worker/helpers/worker_manager'
6
+ require 'chore/strategies/worker/helpers/work_distributor'
7
+
8
+ module Chore
9
+ module Strategy
10
+ class PreForkedWorkerStrategy #:nodoc:
11
+ include Ipc
12
+
13
+ NUM_TO_SIGNAL = { '1' => :CHLD,
14
+ '2' => :INT,
15
+ '3' => :QUIT,
16
+ '4' => :TERM,
17
+ '5' => :USR1 }.freeze
18
+
19
+ def initialize(manager, opts = {})
20
+ @options = opts
21
+ @manager = manager
22
+ @self_read, @self_write = IO.pipe
23
+ trap_signals(NUM_TO_SIGNAL, @self_write)
24
+ @worker_manager = WorkerManager.new(create_master_socket)
25
+ at_exit { delete_socket_file }
26
+ @running = true
27
+ end
28
+
29
+ def start
30
+ Chore.logger.info "PWS: Starting up worker strategy: #{self.class.name}"
31
+ Chore.run_hooks_for(:before_first_fork)
32
+ @worker_manager.create_and_attach_workers
33
+ worker_assignment_thread
34
+ end
35
+
36
+ def stop!
37
+ Chore.logger.info "PWS: Stopping worker strategy: #{self.class.name}"
38
+ @running = false
39
+ end
40
+
41
+ private
42
+
43
+ def worker_assignment_thread
44
+ Thread.new do
45
+ begin
46
+ worker_assignment_loop
47
+ rescue Chore::TerribleMistake => e
48
+ Chore.logger.error 'PWS: Terrible mistake, shutting down Chore'
49
+ Chore.logger.error e.message
50
+ Chore.logger.error e.backtrace
51
+ @manager.shutdown!
52
+ ensure
53
+ Chore.logger.info 'PWS: worker_assignment_thread ending'
54
+ # WorkerAssignment thread is independent of the main thread.
55
+ # The main thread is waiting on the consumer threads to join,
56
+ # Due to some weird SQS behaviour, its possible that these threads
57
+ # maynot join, and the assigment thread always exits, since it's
58
+ # nonblocking. This will ensure that the master process exits.
59
+ Process.exit(true)
60
+ end
61
+ end
62
+ end
63
+
64
+ def worker_assignment_loop
65
+ while running?
66
+ w_sockets = @worker_manager.worker_sockets
67
+
68
+ # select_sockets returns a list of readable sockets
69
+ # This would include worker connections and the read end
70
+ # of the self-pipe.
71
+ #
72
+ # Note this not only returns sockets from live workers
73
+ # that are readable, but it also returns sockets from
74
+ # *dead* workers. If the worker hasn't already been reaped,
75
+ # then we might get a socket for a dead worker than will
76
+ # fail on write.
77
+ readables, = select_sockets(w_sockets, @self_read)
78
+
79
+ # If select timed out, retry
80
+ if readables.nil?
81
+ Chore.logger.debug 'PWS: All sockets busy.. retry'
82
+ next
83
+ end
84
+
85
+ # Handle the signal from the self-pipe
86
+ if readables.include?(@self_read)
87
+ handle_signal
88
+ next
89
+ end
90
+
91
+ # Confirm they're actually alive! A socket will be readable even
92
+ # if the worker has died but not yet been reaped by the master. We
93
+ # need to confirm that the "Ready" flag has actually been written by
94
+ # the worker and readable by the master.
95
+ readables.reject! {|readable| readable.eof?}
96
+
97
+ # Check again to see if there are still sockets available
98
+ if readables.empty?
99
+ Chore.logger.debug 'PWS: All sockets busy.. retry'
100
+ next
101
+ end
102
+
103
+ # Fetch and assign work for the readable worker connections
104
+ @worker_manager.ready_workers(readables) do |workers|
105
+ WorkDistributor.fetch_and_assign_jobs(workers, @manager)
106
+ end
107
+ end
108
+ Chore.logger.info 'PWS: worker_assignment_loop ending'
109
+ end
110
+
111
+ # Wrapper need around running to help writing specs for worker_assignment_loop
112
+ def running?
113
+ @running
114
+ end
115
+
116
+ def handle_signal
117
+ signal = NUM_TO_SIGNAL[@self_read.read_nonblock(1)]
118
+ Chore.logger.info "PWS: recv #{signal}"
119
+
120
+ case signal
121
+ when :CHLD
122
+ @worker_manager.respawn_terminated_workers!
123
+ when :INT, :QUIT, :TERM
124
+ Signal.reset
125
+ @worker_manager.stop_workers(signal)
126
+ @manager.shutdown!
127
+ when :USR1
128
+ Chore.reopen_logs
129
+ Chore.logger.info 'PWS: Master process reopened log'
130
+ end
131
+ end
132
+
133
+ # Wrapper around fork for specs.
134
+ def fork(&block)
135
+ Kernel.fork(&block)
136
+ end
137
+
138
+ # In the event of a trapped signal, write to the self-pipe
139
+ def trap_signals(signal_hash, write_end)
140
+ Signal.reset
141
+
142
+ signal_hash.each do |sig_num, signal|
143
+ Signal.trap(signal) do
144
+ write_end.write(sig_num)
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end