chore-core 1.8.2 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/README.md +173 -150
  4. data/chore-core.gemspec +3 -3
  5. data/lib/chore.rb +31 -5
  6. data/lib/chore/cli.rb +22 -4
  7. data/lib/chore/configuration.rb +1 -1
  8. data/lib/chore/consumer.rb +54 -12
  9. data/lib/chore/fetcher.rb +12 -7
  10. data/lib/chore/hooks.rb +2 -1
  11. data/lib/chore/job.rb +19 -0
  12. data/lib/chore/manager.rb +18 -2
  13. data/lib/chore/publisher.rb +18 -2
  14. data/lib/chore/queues/filesystem/consumer.rb +126 -64
  15. data/lib/chore/queues/filesystem/filesystem_queue.rb +19 -0
  16. data/lib/chore/queues/filesystem/publisher.rb +13 -19
  17. data/lib/chore/queues/sqs.rb +22 -13
  18. data/lib/chore/queues/sqs/consumer.rb +64 -51
  19. data/lib/chore/queues/sqs/publisher.rb +26 -17
  20. data/lib/chore/strategies/consumer/batcher.rb +14 -15
  21. data/lib/chore/strategies/consumer/single_consumer_strategy.rb +5 -5
  22. data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +9 -7
  23. data/lib/chore/strategies/consumer/throttled_consumer_strategy.rb +120 -0
  24. data/lib/chore/strategies/worker/forked_worker_strategy.rb +5 -6
  25. data/lib/chore/strategies/worker/helpers/ipc.rb +87 -0
  26. data/lib/chore/strategies/worker/helpers/preforked_worker.rb +163 -0
  27. data/lib/chore/strategies/worker/helpers/work_distributor.rb +65 -0
  28. data/lib/chore/strategies/worker/helpers/worker_info.rb +13 -0
  29. data/lib/chore/strategies/worker/helpers/worker_killer.rb +40 -0
  30. data/lib/chore/strategies/worker/helpers/worker_manager.rb +183 -0
  31. data/lib/chore/strategies/worker/preforked_worker_strategy.rb +150 -0
  32. data/lib/chore/strategies/worker/single_worker_strategy.rb +35 -13
  33. data/lib/chore/unit_of_work.rb +10 -1
  34. data/lib/chore/util.rb +5 -1
  35. data/lib/chore/version.rb +3 -3
  36. data/lib/chore/worker.rb +32 -3
  37. data/spec/chore/cli_spec.rb +2 -2
  38. data/spec/chore/consumer_spec.rb +1 -5
  39. data/spec/chore/duplicate_detector_spec.rb +17 -5
  40. data/spec/chore/fetcher_spec.rb +0 -11
  41. data/spec/chore/manager_spec.rb +7 -0
  42. data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +74 -16
  43. data/spec/chore/queues/sqs/consumer_spec.rb +117 -78
  44. data/spec/chore/queues/sqs/publisher_spec.rb +49 -60
  45. data/spec/chore/queues/sqs_spec.rb +32 -41
  46. data/spec/chore/strategies/consumer/batcher_spec.rb +50 -0
  47. data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +3 -3
  48. data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +7 -6
  49. data/spec/chore/strategies/consumer/throttled_consumer_strategy_spec.rb +165 -0
  50. data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +17 -2
  51. data/spec/chore/strategies/worker/helpers/ipc_spec.rb +127 -0
  52. data/spec/chore/strategies/worker/helpers/preforked_worker_spec.rb +236 -0
  53. data/spec/chore/strategies/worker/helpers/work_distributor_spec.rb +131 -0
  54. data/spec/chore/strategies/worker/helpers/worker_info_spec.rb +14 -0
  55. data/spec/chore/strategies/worker/helpers/worker_killer_spec.rb +97 -0
  56. data/spec/chore/strategies/worker/helpers/worker_manager_spec.rb +304 -0
  57. data/spec/chore/strategies/worker/preforked_worker_strategy_spec.rb +183 -0
  58. data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +25 -0
  59. data/spec/chore/worker_spec.rb +82 -14
  60. data/spec/spec_helper.rb +1 -1
  61. data/spec/support/queues/sqs/fake_objects.rb +18 -0
  62. metadata +39 -15
@@ -0,0 +1,163 @@
1
+ require 'chore/signal'
2
+ require 'socket'
3
+ require 'timeout'
4
+ require 'chore/strategies/worker/helpers/ipc'
5
+
6
+ module Chore
7
+ module Strategy
8
+ class PreforkedWorker #:nodoc:
9
+ include Util
10
+ include Ipc
11
+
12
+ def initialize(_opts = {})
13
+ Chore.logger.info "PFW: #{Process.pid} initializing"
14
+ @manager_pid = Process.ppid
15
+ @consumer_cache = {}
16
+ @running = true
17
+ post_fork_setup
18
+ end
19
+
20
+ def start_worker(master_socket)
21
+ Chore.logger.info 'PFW: Worker starting'
22
+ raise 'PFW: Did not get master_socket' unless master_socket
23
+ connection = connect_to_master(master_socket)
24
+ worker(connection)
25
+ rescue => e
26
+ Chore.logger.error "PFW: Shutting down #{e.message} #{e.backtrace}"
27
+ raise e
28
+ end
29
+
30
+ private
31
+
32
+ def worker(connection)
33
+ worker_killer = WorkerKiller.new
34
+ while running?
35
+ # Select on the connection to the master and the self pipe
36
+ readables, _, ex = select_sockets(connection, nil, Chore.config.shutdown_timeout)
37
+
38
+ if readables.nil? # timeout
39
+ next
40
+ end
41
+
42
+ read_socket = readables.first
43
+
44
+ # Get the work from the connection to master
45
+ work = read_msg(read_socket)
46
+
47
+ # When the Master (manager process) dies, the sockets are set to
48
+ # readable, but there is no data in the socket. In this case we check
49
+ # to see if the manager is actually dead, and in that case, we exit.
50
+ if work.nil? && is_orphan?
51
+ Chore.logger.info "PFW: Manager no longer alive; Shutting down"
52
+ break
53
+ end
54
+
55
+ unless work.nil?
56
+ # Do the work
57
+ process_work(work)
58
+
59
+ worker_killer.check_requests
60
+ worker_killer.check_memory
61
+
62
+ # Alert master that worker is ready to receive more work
63
+ signal_ready(read_socket)
64
+ end
65
+ end
66
+ rescue Errno::ECONNRESET, Errno::EPIPE
67
+ Chore.logger.info "PFW: Worker-#{Process.pid} lost connection to master, shutting down"
68
+ ensure
69
+ Chore.logger.info "PFW: Worker process terminating"
70
+ exit(true)
71
+ end
72
+
73
+ # Method wrapper around @running makes it easier to write specs
74
+ def running?
75
+ @running
76
+ end
77
+
78
+ # Connects to the master socket, sends its PID, send a ready for work
79
+ # message, and returns the connection
80
+ def connect_to_master(master_socket)
81
+ Chore.logger.info 'PFW: connect protocol started'
82
+ child_connection(master_socket).tap do |conn|
83
+ send_msg(conn, Process.pid)
84
+ signal_ready(conn)
85
+ Chore.logger.info 'PFW: connect protocol completed'
86
+ end
87
+ end
88
+
89
+ def post_fork_setup
90
+ # Immediately swap out the process name so that it doesn't look like
91
+ # the master process
92
+ procline("#{Chore.config.worker_procline}:Started:#{Time.now}")
93
+
94
+ # We need to reset the logger after fork. This fixes a longstanding bug
95
+ # where workers would hang around and never die
96
+ Chore.logger = nil
97
+
98
+ config = Chore.config
99
+ # When we fork, the consumer's/publisher's need their connections reset.
100
+ # The specifics of this are queue dependent, and may result in a noop.
101
+ config.consumer.reset_connection!
102
+ # It is possible for this to be nil due to configuration woes with chore
103
+ config.publisher.reset_connection! if Chore.config.publisher
104
+
105
+ # Ensure that all signals are handled before we hand off a hook to the
106
+ # application.
107
+ trap_signals
108
+
109
+ Chore.run_hooks_for(:after_fork,self)
110
+ end
111
+
112
+ def process_work(work)
113
+ work = [work] unless work.is_a?(Array)
114
+ work.each do |item|
115
+ item.consumer = consumer(item.queue_name)
116
+ begin
117
+ Timeout.timeout( item.queue_timeout ) do
118
+ worker = Worker.new(item)
119
+ worker.start
120
+ end
121
+ rescue Timeout::Error => ex
122
+ Chore.logger.info "PFW: Worker #{Process.pid} timed out"
123
+ Chore.logger.info "PFW: Worker time out set at #{item.queue_timeout} seconds"
124
+ raise ex
125
+ end
126
+ end
127
+ end
128
+
129
+ # We need to resue Consumer objects because it takes 500ms to recreate
130
+ # each one.
131
+ def consumer(queue)
132
+ unless @consumer_cache.key?(queue)
133
+ raise Chore::TerribleMistake if @consumer_cache.size >= Chore.config.queues.size
134
+ @consumer_cache[queue] = Chore.config.consumer.new(queue)
135
+ end
136
+ @consumer_cache[queue]
137
+ end
138
+
139
+ def trap_signals
140
+ Signal.reset
141
+
142
+ [:INT, :QUIT, :TERM].each do |signal|
143
+ Signal.trap(signal) do
144
+ Chore.logger.info "PFW: received signal: #{signal}"
145
+ @running = false
146
+ sleep(Chore.config.shutdown_timeout)
147
+ Chore.logger.info "PFW: Worker process terminating"
148
+ exit(true)
149
+ end
150
+ end
151
+
152
+ Signal.trap(:USR1) do
153
+ Chore.reopen_logs
154
+ Chore.logger.info "PFW: Worker process reopened log"
155
+ end
156
+ end
157
+
158
+ def is_orphan?
159
+ Process.ppid != @manager_pid
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,65 @@
1
+ require 'chore/strategies/worker/helpers/ipc'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkDistributor #:nodoc:
6
+ class << self
7
+ include Ipc
8
+
9
+ def fetch_and_assign_jobs(workers, manager)
10
+ jobs = manager.fetch_work(workers.size)
11
+ raise "DW: jobs needs to be a list got #{jobs.class}" unless jobs.is_a?(Array)
12
+ if jobs.empty?
13
+ # This conditon is due to the internal consumer queue being empty.
14
+ # Assuming that the the consumer has to fetch from an external queue,
15
+ # if we return here, we would create a tight loop that would use up
16
+ # a lot the CPU's time. In order to prevent that, we wait for the
17
+ # consumer queue to be populated, by sleeping.
18
+ sleep(0.1)
19
+ return
20
+ end
21
+ jobs_to_return = assign_jobs(jobs, workers)
22
+ manager.return_work(jobs_to_return)
23
+ end
24
+
25
+ private
26
+
27
+ def assign_jobs(jobs, workers)
28
+ raise 'DW: assign_jobs got 0 workers' if workers.empty?
29
+ jobs_to_return = []
30
+ jobs.each_with_index do |job, i|
31
+ raise 'DW: More Jobs than Sockets' if workers[i].nil?
32
+ unless push_job_to_worker(job, workers[i])
33
+ jobs_to_return << job
34
+ end
35
+ end
36
+
37
+ jobs_to_return
38
+ end
39
+
40
+ def push_job_to_worker(job, worker)
41
+ Chore.run_hooks_for(:before_send_to_worker, job)
42
+ clear_ready(worker.socket)
43
+ send_msg(worker.socket, job)
44
+ true
45
+ rescue => e
46
+ Chore.logger.error "DW: Could not assign job #{job.inspect} (worker: #{worker.pid})\nException #{e.message} #{e.backtrace * "\n"}"
47
+
48
+ # We generally shouldn't get into this situations since we've already
49
+ # tested that we can read/write to the Worker's socket. However,
50
+ # the Worker could still fail between that check and pushing the
51
+ # job, so we need to allow the work to be re-assigned to handle that
52
+ # edge case.
53
+ false
54
+ end
55
+
56
+ private
57
+
58
+ # Used for unit tests
59
+ def sleep(n)
60
+ Kernel.sleep(n)
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,13 @@
1
+ module Chore
2
+ module Strategy
3
+ class WorkerInfo
4
+ # Holds meta information about the worker: pid, and connection socket
5
+ attr_accessor :pid, :socket
6
+
7
+ def initialize(pid)
8
+ @pid = pid
9
+ @socket = nil
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,40 @@
1
+ require 'get_process_mem'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkerKiller #:nodoc:
6
+ def initialize
7
+ @memory_limit = Chore.config.memory_limit_bytes
8
+ @request_limit = Chore.config.request_limit
9
+ @check_cycle = Chore.config.worker_check_cycle || 16
10
+ @check_count = 0
11
+ @current_requests = 0
12
+ end
13
+
14
+ def check_memory
15
+ return if @memory_limit.nil? || (@memory_limit == 0)
16
+ @check_count += 1
17
+
18
+ if @check_count == @check_cycle
19
+ rss = GetProcessMem.new.bytes.to_i
20
+ if rss > @memory_limit
21
+ Chore.logger.info "WK: (pid: #{Process.pid}) exceeded memory limit (#{rss.to_i} bytes > #{@memory_limit} bytes)"
22
+ Chore.run_hooks_for(:worker_mem_kill)
23
+ exit(true)
24
+ end
25
+ @check_count = 0
26
+ end
27
+ end
28
+
29
+ def check_requests
30
+ return if @request_limit.nil? || (@request_limit == 0)
31
+
32
+ if (@current_requests += 1) >= @request_limit
33
+ Chore.logger.info "WK: (pid: #{Process.pid}) exceeded max number of requests (limit: #{@request_limit})"
34
+ Chore.run_hooks_for(:worker_req_kill)
35
+ exit(true)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,183 @@
1
+ require 'chore/strategies/worker/helpers/ipc'
2
+
3
+ module Chore
4
+ module Strategy
5
+ class WorkerManager #:nodoc:
6
+ include Ipc
7
+
8
+ def initialize(master_socket)
9
+ @master_socket = master_socket
10
+ @pid_to_worker = {}
11
+ @socket_to_worker = {}
12
+ end
13
+
14
+ # Create num of missing workers and sockets and attach them for the
15
+ # master
16
+ def create_and_attach_workers
17
+ create_workers do |num_workers|
18
+ attach_workers(num_workers)
19
+ end
20
+ end
21
+
22
+ # Reap dead workers and create new ones to replace them
23
+ def respawn_terminated_workers!
24
+ Chore.logger.info 'WM: Respawning terminated workers'
25
+ reap_workers
26
+ create_and_attach_workers
27
+ end
28
+
29
+ # Stop children with the given kill signal and wait for them to die
30
+ def stop_workers(sig)
31
+ @pid_to_worker.each do |pid, worker|
32
+ begin
33
+ Chore.logger.info { "WM: Sending #{sig} to: #{pid}" }
34
+ Process.kill(sig, pid)
35
+ rescue Errno::ESRCH => e
36
+ Chore.logger.error "WM: Signal to children error: #{e}"
37
+ end
38
+ end
39
+ # TODO: Sleep for the shutdown timeout and kill any remaining workers
40
+ reap_workers
41
+ end
42
+
43
+ # Return all the worker sockets
44
+ def worker_sockets
45
+ @socket_to_worker.keys
46
+ end
47
+
48
+ # Return the workers associated with a given array of sockets.
49
+ # +block+:: A block can be provided to perform tasks on the workers
50
+ # associated with the sockets given
51
+ def ready_workers(sockets = [], &block)
52
+ workers = @socket_to_worker.values_at(*sockets)
53
+ yield workers if block_given?
54
+ workers
55
+ end
56
+
57
+ private
58
+
59
+ # Creates worker processes until we have the number of workers defined
60
+ # by the configuration. Initializes and starts a worker instance in each
61
+ # of the new processes.
62
+ # +block+:: Block can be provided to run tasks on the number of newly
63
+ # created worker processes.
64
+ def create_workers(&block)
65
+ num_created_workers = 0
66
+
67
+ while @pid_to_worker.size < Chore.config.num_workers
68
+ pid = fork do
69
+ run_worker_instance
70
+ end
71
+
72
+ Chore.logger.info "WM: created_worker #{pid}"
73
+ # Keep track of the new worker process
74
+ @pid_to_worker[pid] = WorkerInfo.new(pid)
75
+ num_created_workers += 1
76
+ end
77
+
78
+ raise 'WM: Not enough workers' if inconsistent_worker_number
79
+ Chore.logger.info "WM: created #{num_created_workers} workers"
80
+ yield num_created_workers if block_given?
81
+ num_created_workers
82
+ end
83
+
84
+ # Check that number of workers registered in master match the config
85
+ def inconsistent_worker_number
86
+ Chore.config.num_workers != @pid_to_worker.size
87
+ end
88
+
89
+ # Initialize and start a new worker instance
90
+ def run_worker_instance
91
+ PreforkedWorker.new.start_worker(@master_socket)
92
+ ensure
93
+ exit(true)
94
+ end
95
+
96
+ # Creates individual sockets for each worker to use and attaches them to
97
+ # the correct worker
98
+ def attach_workers(num)
99
+ Chore.logger.info "WM: Started attaching #{num} workers"
100
+
101
+ create_worker_sockets(num).each do |socket|
102
+ begin
103
+ readable, _, _ = select_sockets(socket, nil, 2)
104
+
105
+ if readable.nil?
106
+ Chore.logger.info "WM: #{socket} timeout waiting for a worker"
107
+ socket.close
108
+ next
109
+ end
110
+
111
+ r_socket = readable.first
112
+ reported_pid = read_msg(r_socket)
113
+
114
+ assigned_worker = @pid_to_worker[reported_pid]
115
+ assigned_worker.socket = socket
116
+ @socket_to_worker[socket] = assigned_worker
117
+
118
+ Chore.logger.info "WM: Connected #{reported_pid} with #{r_socket}"
119
+ rescue Errno::ECONNRESET
120
+ Chore.logger.info "WM: A worker failed to connect to #{socket}"
121
+ socket.close
122
+ next
123
+ end
124
+ end
125
+
126
+ # If the connection from a worker times out, we are unable to associate
127
+ # the process with a connection and so we kill the worker process
128
+ kill_unattached_workers
129
+ Chore.logger.info 'WM: Finished attaching workers'
130
+ end
131
+
132
+ # Create num amount of sockets that are available for worker connections
133
+ def create_worker_sockets(num)
134
+ Array.new(num) do
135
+ add_worker_socket
136
+ end
137
+ end
138
+
139
+ # Kill workers that failed to connect to the master
140
+ def kill_unattached_workers
141
+ @pid_to_worker.each do |pid, worker|
142
+ next unless worker.socket.nil?
143
+ Chore.logger.info "WM: kill_unattached_workers #{pid}"
144
+ Process.kill('KILL', pid)
145
+ end
146
+ end
147
+
148
+ # Wait for terminated workers to die and remove their references from
149
+ # master
150
+ def reap_workers
151
+ Chore.logger.info "WM: reaping workers.."
152
+ dead_workers = @pid_to_worker.select do |pid, worker|
153
+ reap_process(pid)
154
+ end
155
+
156
+ dead_workers.each do |pid, worker|
157
+ dead_worker = @pid_to_worker.delete(pid)
158
+ dead_worker.socket.close
159
+ @socket_to_worker.delete(dead_worker.socket)
160
+ Chore.logger.info "WM: Removed preforked worker:#{worker.pid} - #{worker.socket}"
161
+ end
162
+ end
163
+
164
+ # Non-blocking wait for process to die. Returns whether it stopped
165
+ def reap_process(pid)
166
+ status = Process.wait(pid, Process::WNOHANG)
167
+ case status
168
+ when nil # Process is still running
169
+ return false
170
+ when pid # Collected status of this pid
171
+ return true
172
+ end
173
+ rescue Errno::ECHILD
174
+ # Child process has already terminated
175
+ true
176
+ end
177
+
178
+ def fork(&block)
179
+ Kernel.fork(&block)
180
+ end
181
+ end
182
+ end
183
+ end