chore-core 1.10.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/LICENSE.txt +1 -1
- data/README.md +172 -153
- data/chore-core.gemspec +3 -3
- data/lib/chore.rb +29 -5
- data/lib/chore/cli.rb +22 -4
- data/lib/chore/configuration.rb +1 -1
- data/lib/chore/consumer.rb +54 -12
- data/lib/chore/fetcher.rb +12 -7
- data/lib/chore/hooks.rb +2 -1
- data/lib/chore/job.rb +19 -0
- data/lib/chore/manager.rb +17 -2
- data/lib/chore/publisher.rb +18 -2
- data/lib/chore/queues/filesystem/consumer.rb +126 -64
- data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
- data/lib/chore/queues/filesystem/publisher.rb +10 -16
- data/lib/chore/queues/sqs.rb +22 -13
- data/lib/chore/queues/sqs/consumer.rb +64 -51
- data/lib/chore/queues/sqs/publisher.rb +26 -17
- data/lib/chore/strategies/consumer/batcher.rb +6 -6
- data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
- data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +7 -6
- data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
- data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
- data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
- data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
- data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
- data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
- data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
- data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
- data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
- data/lib/chore/unit_of_work.rb +2 -1
- data/lib/chore/util.rb +5 -1
- data/lib/chore/version.rb +2 -2
- data/lib/chore/worker.rb +30 -3
- data/spec/chore/cli_spec.rb +2 -2
- data/spec/chore/consumer_spec.rb +1 -5
- data/spec/chore/duplicate_detector_spec.rb +17 -5
- data/spec/chore/fetcher_spec.rb +0 -11
- data/spec/chore/manager_spec.rb +7 -0
- data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
- data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
- data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
- data/spec/chore/queues/sqs_spec.rb +32 -41
- data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
- data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +6 -6
- data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
- data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +6 -1
- data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
- data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
- data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
- data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
- data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
- data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
- data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
- data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +1 -1
- data/spec/chore/worker_spec.rb +70 -15
- data/spec/spec_helper.rb +1 -1
- data/spec/support/queues/sqs/fake_objects.rb +18 -0
- metadata +53 -29
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'chore/strategies/worker/helpers/ipc'
|
2
|
+
|
3
|
+
module Chore
|
4
|
+
module Strategy
|
5
|
+
class WorkDistributor #:nodoc:
|
6
|
+
class << self
|
7
|
+
include Ipc
|
8
|
+
|
9
|
+
def fetch_and_assign_jobs(workers, manager)
|
10
|
+
jobs = manager.fetch_work(workers.size)
|
11
|
+
raise "DW: jobs needs to be a list got #{jobs.class}" unless jobs.is_a?(Array)
|
12
|
+
if jobs.empty?
|
13
|
+
# This conditon is due to the internal consumer queue being empty.
|
14
|
+
# Assuming that the the consumer has to fetch from an external queue,
|
15
|
+
# if we return here, we would create a tight loop that would use up
|
16
|
+
# a lot the CPU's time. In order to prevent that, we wait for the
|
17
|
+
# consumer queue to be populated, by sleeping.
|
18
|
+
sleep(0.1)
|
19
|
+
return
|
20
|
+
end
|
21
|
+
jobs_to_return = assign_jobs(jobs, workers)
|
22
|
+
manager.return_work(jobs_to_return)
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def assign_jobs(jobs, workers)
|
28
|
+
raise 'DW: assign_jobs got 0 workers' if workers.empty?
|
29
|
+
jobs_to_return = []
|
30
|
+
jobs.each_with_index do |job, i|
|
31
|
+
raise 'DW: More Jobs than Sockets' if workers[i].nil?
|
32
|
+
unless push_job_to_worker(job, workers[i])
|
33
|
+
jobs_to_return << job
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
jobs_to_return
|
38
|
+
end
|
39
|
+
|
40
|
+
def push_job_to_worker(job, worker)
|
41
|
+
Chore.run_hooks_for(:before_send_to_worker, job)
|
42
|
+
clear_ready(worker.socket)
|
43
|
+
send_msg(worker.socket, job)
|
44
|
+
true
|
45
|
+
rescue => e
|
46
|
+
Chore.logger.error "DW: Could not assign job #{job.inspect} (worker: #{worker.pid})\nException #{e.message} #{e.backtrace * "\n"}"
|
47
|
+
|
48
|
+
# We generally shouldn't get into this situations since we've already
|
49
|
+
# tested that we can read/write to the Worker's socket. However,
|
50
|
+
# the Worker could still fail between that check and pushing the
|
51
|
+
# job, so we need to allow the work to be re-assigned to handle that
|
52
|
+
# edge case.
|
53
|
+
false
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
# Used for unit tests
|
59
|
+
def sleep(n)
|
60
|
+
Kernel.sleep(n)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'get_process_mem'
|
2
|
+
|
3
|
+
module Chore
|
4
|
+
module Strategy
|
5
|
+
class WorkerKiller #:nodoc:
|
6
|
+
def initialize
|
7
|
+
@memory_limit = Chore.config.memory_limit_bytes
|
8
|
+
@request_limit = Chore.config.request_limit
|
9
|
+
@check_cycle = Chore.config.worker_check_cycle || 16
|
10
|
+
@check_count = 0
|
11
|
+
@current_requests = 0
|
12
|
+
end
|
13
|
+
|
14
|
+
def check_memory
|
15
|
+
return if @memory_limit.nil? || (@memory_limit == 0)
|
16
|
+
@check_count += 1
|
17
|
+
|
18
|
+
if @check_count == @check_cycle
|
19
|
+
rss = GetProcessMem.new.bytes.to_i
|
20
|
+
if rss > @memory_limit
|
21
|
+
Chore.logger.info "WK: (pid: #{Process.pid}) exceeded memory limit (#{rss.to_i} bytes > #{@memory_limit} bytes)"
|
22
|
+
Chore.run_hooks_for(:worker_mem_kill)
|
23
|
+
exit(true)
|
24
|
+
end
|
25
|
+
@check_count = 0
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def check_requests
|
30
|
+
return if @request_limit.nil? || (@request_limit == 0)
|
31
|
+
|
32
|
+
if (@current_requests += 1) >= @request_limit
|
33
|
+
Chore.logger.info "WK: (pid: #{Process.pid}) exceeded max number of requests (limit: #{@request_limit})"
|
34
|
+
Chore.run_hooks_for(:worker_req_kill)
|
35
|
+
exit(true)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
require 'chore/strategies/worker/helpers/ipc'
|
2
|
+
|
3
|
+
module Chore
|
4
|
+
module Strategy
|
5
|
+
class WorkerManager #:nodoc:
|
6
|
+
include Ipc
|
7
|
+
|
8
|
+
def initialize(master_socket)
|
9
|
+
@master_socket = master_socket
|
10
|
+
@pid_to_worker = {}
|
11
|
+
@socket_to_worker = {}
|
12
|
+
end
|
13
|
+
|
14
|
+
# Create num of missing workers and sockets and attach them for the
|
15
|
+
# master
|
16
|
+
def create_and_attach_workers
|
17
|
+
create_workers do |num_workers|
|
18
|
+
attach_workers(num_workers)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Reap dead workers and create new ones to replace them
|
23
|
+
def respawn_terminated_workers!
|
24
|
+
Chore.logger.info 'WM: Respawning terminated workers'
|
25
|
+
reap_workers
|
26
|
+
create_and_attach_workers
|
27
|
+
end
|
28
|
+
|
29
|
+
# Stop children with the given kill signal and wait for them to die
|
30
|
+
def stop_workers(sig)
|
31
|
+
@pid_to_worker.each do |pid, worker|
|
32
|
+
begin
|
33
|
+
Chore.logger.info { "WM: Sending #{sig} to: #{pid}" }
|
34
|
+
Process.kill(sig, pid)
|
35
|
+
rescue Errno::ESRCH => e
|
36
|
+
Chore.logger.error "WM: Signal to children error: #{e}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
# TODO: Sleep for the shutdown timeout and kill any remaining workers
|
40
|
+
reap_workers
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return all the worker sockets
|
44
|
+
def worker_sockets
|
45
|
+
@socket_to_worker.keys
|
46
|
+
end
|
47
|
+
|
48
|
+
# Return the workers associated with a given array of sockets.
|
49
|
+
# +block+:: A block can be provided to perform tasks on the workers
|
50
|
+
# associated with the sockets given
|
51
|
+
def ready_workers(sockets = [], &block)
|
52
|
+
workers = @socket_to_worker.values_at(*sockets)
|
53
|
+
yield workers if block_given?
|
54
|
+
workers
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
# Creates worker processes until we have the number of workers defined
|
60
|
+
# by the configuration. Initializes and starts a worker instance in each
|
61
|
+
# of the new processes.
|
62
|
+
# +block+:: Block can be provided to run tasks on the number of newly
|
63
|
+
# created worker processes.
|
64
|
+
def create_workers(&block)
|
65
|
+
num_created_workers = 0
|
66
|
+
|
67
|
+
while @pid_to_worker.size < Chore.config.num_workers
|
68
|
+
pid = fork do
|
69
|
+
run_worker_instance
|
70
|
+
end
|
71
|
+
|
72
|
+
Chore.logger.info "WM: created_worker #{pid}"
|
73
|
+
# Keep track of the new worker process
|
74
|
+
@pid_to_worker[pid] = WorkerInfo.new(pid)
|
75
|
+
num_created_workers += 1
|
76
|
+
end
|
77
|
+
|
78
|
+
raise 'WM: Not enough workers' if inconsistent_worker_number
|
79
|
+
Chore.logger.info "WM: created #{num_created_workers} workers"
|
80
|
+
yield num_created_workers if block_given?
|
81
|
+
num_created_workers
|
82
|
+
end
|
83
|
+
|
84
|
+
# Check that number of workers registered in master match the config
|
85
|
+
def inconsistent_worker_number
|
86
|
+
Chore.config.num_workers != @pid_to_worker.size
|
87
|
+
end
|
88
|
+
|
89
|
+
# Initialize and start a new worker instance
|
90
|
+
def run_worker_instance
|
91
|
+
PreforkedWorker.new.start_worker(@master_socket)
|
92
|
+
ensure
|
93
|
+
exit(true)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Creates individual sockets for each worker to use and attaches them to
|
97
|
+
# the correct worker
|
98
|
+
def attach_workers(num)
|
99
|
+
Chore.logger.info "WM: Started attaching #{num} workers"
|
100
|
+
|
101
|
+
create_worker_sockets(num).each do |socket|
|
102
|
+
begin
|
103
|
+
readable, _, _ = select_sockets(socket, nil, 2)
|
104
|
+
|
105
|
+
if readable.nil?
|
106
|
+
Chore.logger.info "WM: #{socket} timeout waiting for a worker"
|
107
|
+
socket.close
|
108
|
+
next
|
109
|
+
end
|
110
|
+
|
111
|
+
r_socket = readable.first
|
112
|
+
reported_pid = read_msg(r_socket)
|
113
|
+
|
114
|
+
assigned_worker = @pid_to_worker[reported_pid]
|
115
|
+
assigned_worker.socket = socket
|
116
|
+
@socket_to_worker[socket] = assigned_worker
|
117
|
+
|
118
|
+
Chore.logger.info "WM: Connected #{reported_pid} with #{r_socket}"
|
119
|
+
rescue Errno::ECONNRESET
|
120
|
+
Chore.logger.info "WM: A worker failed to connect to #{socket}"
|
121
|
+
socket.close
|
122
|
+
next
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# If the connection from a worker times out, we are unable to associate
|
127
|
+
# the process with a connection and so we kill the worker process
|
128
|
+
kill_unattached_workers
|
129
|
+
Chore.logger.info 'WM: Finished attaching workers'
|
130
|
+
end
|
131
|
+
|
132
|
+
# Create num amount of sockets that are available for worker connections
|
133
|
+
def create_worker_sockets(num)
|
134
|
+
Array.new(num) do
|
135
|
+
add_worker_socket
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
# Kill workers that failed to connect to the master
|
140
|
+
def kill_unattached_workers
|
141
|
+
@pid_to_worker.each do |pid, worker|
|
142
|
+
next unless worker.socket.nil?
|
143
|
+
Chore.logger.info "WM: kill_unattached_workers #{pid}"
|
144
|
+
Process.kill('KILL', pid)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Wait for terminated workers to die and remove their references from
|
149
|
+
# master
|
150
|
+
def reap_workers
|
151
|
+
Chore.logger.info "WM: reaping workers.."
|
152
|
+
dead_workers = @pid_to_worker.select do |pid, worker|
|
153
|
+
reap_process(pid)
|
154
|
+
end
|
155
|
+
|
156
|
+
dead_workers.each do |pid, worker|
|
157
|
+
dead_worker = @pid_to_worker.delete(pid)
|
158
|
+
dead_worker.socket.close
|
159
|
+
@socket_to_worker.delete(dead_worker.socket)
|
160
|
+
Chore.logger.info "WM: Removed preforked worker:#{worker.pid} - #{worker.socket}"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Non-blocking wait for process to die. Returns whether it stopped
|
165
|
+
def reap_process(pid)
|
166
|
+
status = Process.wait(pid, Process::WNOHANG)
|
167
|
+
case status
|
168
|
+
when nil # Process is still running
|
169
|
+
return false
|
170
|
+
when pid # Collected status of this pid
|
171
|
+
return true
|
172
|
+
end
|
173
|
+
rescue Errno::ECHILD
|
174
|
+
# Child process has already terminated
|
175
|
+
true
|
176
|
+
end
|
177
|
+
|
178
|
+
def fork(&block)
|
179
|
+
Kernel.fork(&block)
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
require 'chore/signal'
|
2
|
+
require 'socket'
|
3
|
+
require 'chore/strategies/worker/helpers/ipc'
|
4
|
+
require 'chore/strategies/worker/helpers/preforked_worker'
|
5
|
+
require 'chore/strategies/worker/helpers/worker_manager'
|
6
|
+
require 'chore/strategies/worker/helpers/work_distributor'
|
7
|
+
|
8
|
+
module Chore
|
9
|
+
module Strategy
|
10
|
+
class PreForkedWorkerStrategy #:nodoc:
|
11
|
+
include Ipc
|
12
|
+
|
13
|
+
NUM_TO_SIGNAL = { '1' => :CHLD,
|
14
|
+
'2' => :INT,
|
15
|
+
'3' => :QUIT,
|
16
|
+
'4' => :TERM,
|
17
|
+
'5' => :USR1 }.freeze
|
18
|
+
|
19
|
+
def initialize(manager, opts = {})
|
20
|
+
@options = opts
|
21
|
+
@manager = manager
|
22
|
+
@self_read, @self_write = IO.pipe
|
23
|
+
trap_signals(NUM_TO_SIGNAL, @self_write)
|
24
|
+
@worker_manager = WorkerManager.new(create_master_socket)
|
25
|
+
at_exit { delete_socket_file }
|
26
|
+
@running = true
|
27
|
+
end
|
28
|
+
|
29
|
+
def start
|
30
|
+
Chore.logger.info "PWS: Starting up worker strategy: #{self.class.name}"
|
31
|
+
Chore.run_hooks_for(:before_first_fork)
|
32
|
+
@worker_manager.create_and_attach_workers
|
33
|
+
worker_assignment_thread
|
34
|
+
end
|
35
|
+
|
36
|
+
def stop!
|
37
|
+
Chore.logger.info "PWS: Stopping worker strategy: #{self.class.name}"
|
38
|
+
@running = false
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def worker_assignment_thread
|
44
|
+
Thread.new do
|
45
|
+
begin
|
46
|
+
worker_assignment_loop
|
47
|
+
rescue Chore::TerribleMistake => e
|
48
|
+
Chore.logger.error 'PWS: Terrible mistake, shutting down Chore'
|
49
|
+
Chore.logger.error e.message
|
50
|
+
Chore.logger.error e.backtrace
|
51
|
+
@manager.shutdown!
|
52
|
+
ensure
|
53
|
+
Chore.logger.info 'PWS: worker_assignment_thread ending'
|
54
|
+
# WorkerAssignment thread is independent of the main thread.
|
55
|
+
# The main thread is waiting on the consumer threads to join,
|
56
|
+
# Due to some weird SQS behaviour, its possible that these threads
|
57
|
+
# maynot join, and the assigment thread always exits, since it's
|
58
|
+
# nonblocking. This will ensure that the master process exits.
|
59
|
+
Process.exit(true)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def worker_assignment_loop
|
65
|
+
while running?
|
66
|
+
w_sockets = @worker_manager.worker_sockets
|
67
|
+
|
68
|
+
# select_sockets returns a list of readable sockets
|
69
|
+
# This would include worker connections and the read end
|
70
|
+
# of the self-pipe.
|
71
|
+
#
|
72
|
+
# Note this not only returns sockets from live workers
|
73
|
+
# that are readable, but it also returns sockets from
|
74
|
+
# *dead* workers. If the worker hasn't already been reaped,
|
75
|
+
# then we might get a socket for a dead worker than will
|
76
|
+
# fail on write.
|
77
|
+
readables, = select_sockets(w_sockets, @self_read)
|
78
|
+
|
79
|
+
# If select timed out, retry
|
80
|
+
if readables.nil?
|
81
|
+
Chore.logger.debug 'PWS: All sockets busy.. retry'
|
82
|
+
next
|
83
|
+
end
|
84
|
+
|
85
|
+
# Handle the signal from the self-pipe
|
86
|
+
if readables.include?(@self_read)
|
87
|
+
handle_signal
|
88
|
+
next
|
89
|
+
end
|
90
|
+
|
91
|
+
# Confirm they're actually alive! A socket will be readable even
|
92
|
+
# if the worker has died but not yet been reaped by the master. We
|
93
|
+
# need to confirm that the "Ready" flag has actually been written by
|
94
|
+
# the worker and readable by the master.
|
95
|
+
readables.reject! {|readable| readable.eof?}
|
96
|
+
|
97
|
+
# Check again to see if there are still sockets available
|
98
|
+
if readables.empty?
|
99
|
+
Chore.logger.debug 'PWS: All sockets busy.. retry'
|
100
|
+
next
|
101
|
+
end
|
102
|
+
|
103
|
+
# Fetch and assign work for the readable worker connections
|
104
|
+
@worker_manager.ready_workers(readables) do |workers|
|
105
|
+
WorkDistributor.fetch_and_assign_jobs(workers, @manager)
|
106
|
+
end
|
107
|
+
end
|
108
|
+
Chore.logger.info 'PWS: worker_assignment_loop ending'
|
109
|
+
end
|
110
|
+
|
111
|
+
# Wrapper need around running to help writing specs for worker_assignment_loop
|
112
|
+
def running?
|
113
|
+
@running
|
114
|
+
end
|
115
|
+
|
116
|
+
def handle_signal
|
117
|
+
signal = NUM_TO_SIGNAL[@self_read.read_nonblock(1)]
|
118
|
+
Chore.logger.info "PWS: recv #{signal}"
|
119
|
+
|
120
|
+
case signal
|
121
|
+
when :CHLD
|
122
|
+
@worker_manager.respawn_terminated_workers!
|
123
|
+
when :INT, :QUIT, :TERM
|
124
|
+
Signal.reset
|
125
|
+
@worker_manager.stop_workers(signal)
|
126
|
+
@manager.shutdown!
|
127
|
+
when :USR1
|
128
|
+
Chore.reopen_logs
|
129
|
+
Chore.logger.info 'PWS: Master process reopened log'
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# Wrapper around fork for specs.
|
134
|
+
def fork(&block)
|
135
|
+
Kernel.fork(&block)
|
136
|
+
end
|
137
|
+
|
138
|
+
# In the event of a trapped signal, write to the self-pipe
|
139
|
+
def trap_signals(signal_hash, write_end)
|
140
|
+
Signal.reset
|
141
|
+
|
142
|
+
signal_hash.each do |sig_num, signal|
|
143
|
+
Signal.trap(signal) do
|
144
|
+
write_end.write(sig_num)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|