chore-core 1.8.2 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/README.md +173 -150
  4. data/chore-core.gemspec +3 -3
  5. data/lib/chore.rb +31 -5
  6. data/lib/chore/cli.rb +22 -4
  7. data/lib/chore/configuration.rb +1 -1
  8. data/lib/chore/consumer.rb +54 -12
  9. data/lib/chore/fetcher.rb +12 -7
  10. data/lib/chore/hooks.rb +2 -1
  11. data/lib/chore/job.rb +19 -0
  12. data/lib/chore/manager.rb +18 -2
  13. data/lib/chore/publisher.rb +18 -2
  14. data/lib/chore/queues/filesystem/consumer.rb +126 -64
  15. data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
  16. data/lib/chore/queues/filesystem/publisher.rb +13 -19
  17. data/lib/chore/queues/sqs.rb +22 -13
  18. data/lib/chore/queues/sqs/consumer.rb +64 -51
  19. data/lib/chore/queues/sqs/publisher.rb +26 -17
  20. data/lib/chore/strategies/consumer/batcher.rb +14 -15
  21. data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
  22. data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +9 -7
  23. data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
  24. data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
  25. data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
  26. data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
  27. data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
  28. data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
  29. data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
  30. data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
  31. data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
  32. data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
  33. data/lib/chore/unit_of_work.rb +10 -1
  34. data/lib/chore/util.rb +5 -1
  35. data/lib/chore/version.rb +3 -3
  36. data/lib/chore/worker.rb +32 -3
  37. data/spec/chore/cli_spec.rb +2 -2
  38. data/spec/chore/consumer_spec.rb +1 -5
  39. data/spec/chore/duplicate_detector_spec.rb +17 -5
  40. data/spec/chore/fetcher_spec.rb +0 -11
  41. data/spec/chore/manager_spec.rb +7 -0
  42. data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
  43. data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
  44. data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
  45. data/spec/chore/queues/sqs_spec.rb +32 -41
  46. data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
  47. data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
  48. data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +7 -6
  49. data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
  50. data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +17 -2
  51. data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
  52. data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
  53. data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
  54. data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
  55. data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
  56. data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
  57. data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
  58. data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
  59. data/spec/chore/worker_spec.rb +82 -14
  60. data/spec/spec_helper.rb +1 -1
  61. data/spec/support/queues/sqs/fake_objects.rb +18 -0
  62. metadata +39 -15
@@ -0,0 +1,163 @@
1
+ require 'chore/signal'
2
+ require 'socket'
3
+ require 'timeout'
4
+ require 'chore/strategies/worker/helpers/ipc'
5
+
6
+ module Chore
7
+ module Strategy
8
+ class PreforkedWorker #:nodoc:
9
+ include Util
10
+ include Ipc
11
+
12
+ def initialize(_opts = {})
13
+ Chore.logger.info "PFW: #{Process.pid} initializing"
14
+ @manager_pid = Process.ppid
15
+ @consumer_cache = {}
16
+ @running = true
17
+ post_fork_setup
18
+ end
19
+
20
+ def start_worker(master_socket)
21
+ Chore.logger.info 'PFW: Worker starting'
22
+ raise 'PFW: Did not get master_socket' unless master_socket
23
+ connection = connect_to_master(master_socket)
24
+ worker(connection)
25
+ rescue => e
26
+ Chore.logger.error "PFW: Shutting down #{e.message} #{e.backtrace}"
27
+ raise e
28
+ end
29
+
30
+ private
31
+
32
+ def worker(connection)
33
+ worker_killer = WorkerKiller.new
34
+ while running?
35
+ # Select on the connection to the master and the self pipe
36
+ readables, _, ex = select_sockets(connection, nil, Chore.config.shutdown_timeout)
37
+
38
+ if readables.nil? # timeout
39
+ next
40
+ end
41
+
42
+ read_socket = readables.first
43
+
44
+ # Get the work from the connection to master
45
+ work = read_msg(read_socket)
46
+
47
+ # When the Master (manager process) dies, the sockets are set to
48
+ # readable, but there is no data in the socket. In this case we check
49
+ # to see if the manager is actually dead, and in that case, we exit.
50
+ if work.nil? && is_orphan?
51
+ Chore.logger.info "PFW: Manager no longer alive; Shutting down"
52
+ break
53
+ end
54
+
55
+ unless work.nil?
56
+ # Do the work
57
+ process_work(work)
58
+
59
+ worker_killer.check_requests
60
+ worker_killer.check_memory
61
+
62
+ # Alert master that worker is ready to receive more work
63
+ signal_ready(read_socket)
64
+ end
65
+ end
66
+ rescue Errno::ECONNRESET, Errno::EPIPE
67
+ Chore.logger.info "PFW: Worker-#{Process.pid} lost connection to master, shutting down"
68
+ ensure
69
+ Chore.logger.info "PFW: Worker process terminating"
70
+ exit(true)
71
+ end
72
+
73
+ # Method wrapper around @running makes it easier to write specs
74
+ def running?
75
+ @running
76
+ end
77
+
78
+ # Connects to the master socket, sends its PID, send a ready for work
79
+ # message, and returns the connection
80
+ def connect_to_master(master_socket)
81
+ Chore.logger.info 'PFW: connect protocol started'
82
+ child_connection(master_socket).tap do |conn|
83
+ send_msg(conn, Process.pid)
84
+ signal_ready(conn)
85
+ Chore.logger.info 'PFW: connect protocol completed'
86
+ end
87
+ end
88
+
89
+ def post_fork_setup
90
+ # Immediately swap out the process name so that it doesn't look like
91
+ # the master process
92
+ procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
93
+
94
+ # We need to reset the logger after fork. This fixes a longstanding bug
95
+ # where workers would hang around and never die
96
+ Chore.logger = nil
97
+
98
+ config = Chore.config
99
+ # When we fork, the consumer's/publisher's need their connections reset.
100
+ # The specifics of this are queue dependent, and may result in a noop.
101
+ config.consumer.reset_connection!
102
+ # It is possible for this to be nil due to configuration woes with chore
103
+ config.publisher.reset_connection! if Chore.config.publisher
104
+
105
+ # Ensure that all signals are handled before we hand off a hook to the
106
+ # application.
107
+ trap_signals
108
+
109
+ Chore.run_hooks_for(:after_fork,self)
110
+ end
111
+
112
+ def process_work(work)
113
+ work = [work] unless work.is_a?(Array)
114
+ work.each do |item|
115
+ item.consumer = consumer(item.queue_name)
116
+ begin
117
+ Timeout.timeout( item.queue_timeout ) do
118
+ worker = Worker.new(item)
119
+ worker.start
120
+ end
121
+ rescue Timeout::Error => ex
122
+ Chore.logger.info "PFW: Worker #{Process.pid} timed out"
123
+ Chore.logger.info "PFW: Worker time out set at #{item.queue_timeout} seconds"
124
+ raise ex
125
+ end
126
+ end
127
+ end
128
+
129
+ # We need to resue Consumer objects because it takes 500ms to recreate
130
+ # each one.
131
+ def consumer(queue)
132
+ unless @consumer_cache.key?(queue)
133
+ raise Chore::TerribleMistake if @consumer_cache.size >= Chore.config.queues.size
134
+ @consumer_cache[queue] = Chore.config.consumer.new(queue)
135
+ end
136
+ @consumer_cache[queue]
137
+ end
138
+
139
+ def trap_signals
140
+ Signal.reset
141
+
142
+ [:INT, :QUIT, :TERM].each do |signal|
143
+ Signal.trap(signal) do
144
+ Chore.logger.info "PFW: received signal: #{signal}"
145
+ @running = false
146
+ sleep(Chore.config.shutdown_timeout)
147
+ Chore.logger.info "PFW: Worker process terminating"
148
+ exit(true)
149
+ end
150
+ end
151
+
152
+ Signal.trap(:USR1) do
153
+ Chore.reopen_logs
154
+ Chore.logger.info "PFW: Worker process reopened log"
155
+ end
156
+ end
157
+
158
+ def is_orphan?
159
+ Process.ppid != @manager_pid
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,65 @@
1
+ require 'chore/strategies/worker/helpers/ipc'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkDistributor #:nodoc:
6
+ class << self
7
+ include Ipc
8
+
9
+ def fetch_and_assign_jobs(workers, manager)
10
+ jobs = manager.fetch_work(workers.size)
11
+ raise "DW: jobs needs to be a list got #{jobs.class}" unless jobs.is_a?(Array)
12
+ if jobs.empty?
13
+ # This conditon is due to the internal consumer queue being empty.
14
+ # Assuming that the the consumer has to fetch from an external queue,
15
+ # if we return here, we would create a tight loop that would use up
16
+ # a lot the CPU's time. In order to prevent that, we wait for the
17
+ # consumer queue to be populated, by sleeping.
18
+ sleep(0.1)
19
+ return
20
+ end
21
+ jobs_to_return = assign_jobs(jobs, workers)
22
+ manager.return_work(jobs_to_return)
23
+ end
24
+
25
+ private
26
+
27
+ def assign_jobs(jobs, workers)
28
+ raise 'DW: assign_jobs got 0 workers' if workers.empty?
29
+ jobs_to_return = []
30
+ jobs.each_with_index do |job, i|
31
+ raise 'DW: More Jobs than Sockets' if workers[i].nil?
32
+ unless push_job_to_worker(job, workers[i])
33
+ jobs_to_return << job
34
+ end
35
+ end
36
+
37
+ jobs_to_return
38
+ end
39
+
40
+ def push_job_to_worker(job, worker)
41
+ Chore.run_hooks_for(:before_send_to_worker, job)
42
+ clear_ready(worker.socket)
43
+ send_msg(worker.socket, job)
44
+ true
45
+ rescue => e
46
+ Chore.logger.error "DW: Could not assign job #{job.inspect} (worker: #{worker.pid})\nException #{e.message} #{e.backtrace * "\n"}"
47
+
48
+ # We generally shouldn't get into this situations since we've already
49
+ # tested that we can read/write to the Worker's socket. However,
50
+ # the Worker could still fail between that check and pushing the
51
+ # job, so we need to allow the work to be re-assigned to handle that
52
+ # edge case.
53
+ false
54
+ end
55
+
56
+ private
57
+
58
+ # Used for unit tests
59
+ def sleep(n)
60
+ Kernel.sleep(n)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,13 @@
1
+ module Chore
2
+ module Strategy
3
+ class WorkerInfo
4
+ # Holds meta information about the worker: pid, and connection socket
5
+ attr_accessor :pid, :socket
6
+
7
+ def initialize(pid)
8
+ @pid = pid
9
+ @socket = nil
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ require 'get_process_mem'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkerKiller #:nodoc:
6
+ def initialize
7
+ @memory_limit = Chore.config.memory_limit_bytes
8
+ @request_limit = Chore.config.request_limit
9
+ @check_cycle = Chore.config.worker_check_cycle || 16
10
+ @check_count = 0
11
+ @current_requests = 0
12
+ end
13
+
14
+ def check_memory
15
+ return if @memory_limit.nil? || (@memory_limit == 0)
16
+ @check_count += 1
17
+
18
+ if @check_count == @check_cycle
19
+ rss = GetProcessMem.new.bytes.to_i
20
+ if rss > @memory_limit
21
+ Chore.logger.info "WK: (pid: #{Process.pid}) exceeded memory limit (#{rss.to_i} bytes > #{@memory_limit} bytes)"
22
+ Chore.run_hooks_for(:worker_mem_kill)
23
+ exit(true)
24
+ end
25
+ @check_count = 0
26
+ end
27
+ end
28
+
29
+ def check_requests
30
+ return if @request_limit.nil? || (@request_limit == 0)
31
+
32
+ if (@current_requests += 1) >= @request_limit
33
+ Chore.logger.info "WK: (pid: #{Process.pid}) exceeded max number of requests (limit: #{@request_limit})"
34
+ Chore.run_hooks_for(:worker_req_kill)
35
+ exit(true)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,183 @@
1
+ require 'chore/strategies/worker/helpers/ipc'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkerManager #:nodoc:
6
+ include Ipc
7
+
8
+ def initialize(master_socket)
9
+ @master_socket = master_socket
10
+ @pid_to_worker = {}
11
+ @socket_to_worker = {}
12
+ end
13
+
14
+ # Create num of missing workers and sockets and attach them for the
15
+ # master
16
+ def create_and_attach_workers
17
+ create_workers do |num_workers|
18
+ attach_workers(num_workers)
19
+ end
20
+ end
21
+
22
+ # Reap dead workers and create new ones to replace them
23
+ def respawn_terminated_workers!
24
+ Chore.logger.info 'WM: Respawning terminated workers'
25
+ reap_workers
26
+ create_and_attach_workers
27
+ end
28
+
29
+ # Stop children with the given kill signal and wait for them to die
30
+ def stop_workers(sig)
31
+ @pid_to_worker.each do |pid, worker|
32
+ begin
33
+ Chore.logger.info { "WM: Sending #{sig} to: #{pid}" }
34
+ Process.kill(sig, pid)
35
+ rescue Errno::ESRCH => e
36
+ Chore.logger.error "WM: Signal to children error: #{e}"
37
+ end
38
+ end
39
+ # TODO: Sleep for the shutdown timeout and kill any remaining workers
40
+ reap_workers
41
+ end
42
+
43
+ # Return all the worker sockets
44
+ def worker_sockets
45
+ @socket_to_worker.keys
46
+ end
47
+
48
+ # Return the workers associated with a given array of sockets.
49
+ # +block+:: A block can be provided to perform tasks on the workers
50
+ # associated with the sockets given
51
+ def ready_workers(sockets = [], &block)
52
+ workers = @socket_to_worker.values_at(*sockets)
53
+ yield workers if block_given?
54
+ workers
55
+ end
56
+
57
+ private
58
+
59
+ # Creates worker processes until we have the number of workers defined
60
+ # by the configuration. Initializes and starts a worker instance in each
61
+ # of the new processes.
62
+ # +block+:: Block can be provided to run tasks on the number of newly
63
+ # created worker processes.
64
+ def create_workers(&block)
65
+ num_created_workers = 0
66
+
67
+ while @pid_to_worker.size < Chore.config.num_workers
68
+ pid = fork do
69
+ run_worker_instance
70
+ end
71
+
72
+ Chore.logger.info "WM: created_worker #{pid}"
73
+ # Keep track of the new worker process
74
+ @pid_to_worker[pid] = WorkerInfo.new(pid)
75
+ num_created_workers += 1
76
+ end
77
+
78
+ raise 'WM: Not enough workers' if inconsistent_worker_number
79
+ Chore.logger.info "WM: created #{num_created_workers} workers"
80
+ yield num_created_workers if block_given?
81
+ num_created_workers
82
+ end
83
+
84
+ # Check that number of workers registered in master match the config
85
+ def inconsistent_worker_number
86
+ Chore.config.num_workers != @pid_to_worker.size
87
+ end
88
+
89
+ # Initialize and start a new worker instance
90
+ def run_worker_instance
91
+ PreforkedWorker.new.start_worker(@master_socket)
92
+ ensure
93
+ exit(true)
94
+ end
95
+
96
+ # Creates individual sockets for each worker to use and attaches them to
97
+ # the correct worker
98
+ def attach_workers(num)
99
+ Chore.logger.info "WM: Started attaching #{num} workers"
100
+
101
+ create_worker_sockets(num).each do |socket|
102
+ begin
103
+ readable, _, _ = select_sockets(socket, nil, 2)
104
+
105
+ if readable.nil?
106
+ Chore.logger.info "WM: #{socket} timeout waiting for a worker"
107
+ socket.close
108
+ next
109
+ end
110
+
111
+ r_socket = readable.first
112
+ reported_pid = read_msg(r_socket)
113
+
114
+ assigned_worker = @pid_to_worker[reported_pid]
115
+ assigned_worker.socket = socket
116
+ @socket_to_worker[socket] = assigned_worker
117
+
118
+ Chore.logger.info "WM: Connected #{reported_pid} with #{r_socket}"
119
+ rescue Errno::ECONNRESET
120
+ Chore.logger.info "WM: A worker failed to connect to #{socket}"
121
+ socket.close
122
+ next
123
+ end
124
+ end
125
+
126
+ # If the connection from a worker times out, we are unable to associate
127
+ # the process with a connection and so we kill the worker process
128
+ kill_unattached_workers
129
+ Chore.logger.info 'WM: Finished attaching workers'
130
+ end
131
+
132
+ # Create num amount of sockets that are available for worker connections
133
+ def create_worker_sockets(num)
134
+ Array.new(num) do
135
+ add_worker_socket
136
+ end
137
+ end
138
+
139
+ # Kill workers that failed to connect to the master
140
+ def kill_unattached_workers
141
+ @pid_to_worker.each do |pid, worker|
142
+ next unless worker.socket.nil?
143
+ Chore.logger.info "WM: kill_unattached_workers #{pid}"
144
+ Process.kill('KILL', pid)
145
+ end
146
+ end
147
+
148
+ # Wait for terminated workers to die and remove their references from
149
+ # master
150
+ def reap_workers
151
+ Chore.logger.info "WM: reaping workers.."
152
+ dead_workers = @pid_to_worker.select do |pid, worker|
153
+ reap_process(pid)
154
+ end
155
+
156
+ dead_workers.each do |pid, worker|
157
+ dead_worker = @pid_to_worker.delete(pid)
158
+ dead_worker.socket.close
159
+ @socket_to_worker.delete(dead_worker.socket)
160
+ Chore.logger.info "WM: Removed preforked worker:#{worker.pid} - #{worker.socket}"
161
+ end
162
+ end
163
+
164
+ # Non-blocking wait for process to die. Returns whether it stopped
165
+ def reap_process(pid)
166
+ status = Process.wait(pid, Process::WNOHANG)
167
+ case status
168
+ when nil # Process is still running
169
+ return false
170
+ when pid # Collected status of this pid
171
+ return true
172
+ end
173
+ rescue Errno::ECHILD
174
+ # Child process has already terminated
175
+ true
176
+ end
177
+
178
+ def fork(&block)
179
+ Kernel.fork(&block)
180
+ end
181
+ end
182
+ end
183
+ end