qless-pool 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Changelog.md +3 -0
- data/LICENSE.txt +20 -0
- data/README.md +159 -0
- data/Rakefile +30 -0
- data/bin/qless-pool +7 -0
- data/features/basic_daemon_config.feature +68 -0
- data/features/step_definitions/daemon_steps.rb +33 -0
- data/features/step_definitions/qless-pool_steps.rb +156 -0
- data/features/support/aruba_daemon_support.rb +76 -0
- data/features/support/env.rb +1 -0
- data/lib/qless/pool.rb +415 -0
- data/lib/qless/pool/cli.rb +136 -0
- data/lib/qless/pool/logging.rb +65 -0
- data/lib/qless/pool/pool_factory.rb +43 -0
- data/lib/qless/pool/pooled_worker.rb +21 -0
- data/lib/qless/pool/tasks.rb +20 -0
- data/lib/qless/pool/version.rb +5 -0
- data/man/qless-pool.1 +88 -0
- data/man/qless-pool.1.ronn +92 -0
- data/man/qless-pool.yml.5 +46 -0
- data/man/qless-pool.yml.5.ronn +41 -0
- data/spec/mock_config.rb +6 -0
- data/spec/qless-pool-custom.yml.erb +1 -0
- data/spec/qless-pool.yml +13 -0
- data/spec/qless_pool_spec.rb +166 -0
- data/spec/spec_helper.rb +3 -0
- metadata +213 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'aruba/cucumber'
|
2
|
+
require 'aruba/api'
|
3
|
+
require 'aruba/process'
|
4
|
+
|
5
|
+
module Aruba
|
6
|
+
|
7
|
+
module Api
|
8
|
+
|
9
|
+
# this is a horrible hack, to make sure that it's done what it needs to do
|
10
|
+
# before we do our next step
|
11
|
+
def keep_trying(timeout=10, tries=0)
|
12
|
+
puts "Try: #{tries}" if @announce_env
|
13
|
+
yield
|
14
|
+
rescue RSpec::Expectations::ExpectationNotMetError
|
15
|
+
if tries < timeout
|
16
|
+
sleep 1
|
17
|
+
tries += 1
|
18
|
+
retry
|
19
|
+
else
|
20
|
+
raise
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def run_background(cmd)
|
25
|
+
@background = run(cmd)
|
26
|
+
end
|
27
|
+
|
28
|
+
def send_signal(cmd, signal)
|
29
|
+
announce_or_puts "$ kill -#{signal} #{processes[cmd].pid}" if @announce_env
|
30
|
+
processes[cmd].send_signal signal
|
31
|
+
end
|
32
|
+
|
33
|
+
def background_pid
|
34
|
+
@pid_from_pidfile || @background.pid
|
35
|
+
end
|
36
|
+
|
37
|
+
# like all_stdout, but doesn't stop processes first
|
38
|
+
def interactive_stdout
|
39
|
+
only_processes.inject("") { |out, ps| out << ps.stdout(@aruba_keep_ansi) }
|
40
|
+
end
|
41
|
+
|
42
|
+
# like all_stderr, but doesn't stop processes first
|
43
|
+
def interactive_stderr
|
44
|
+
only_processes.inject("") { |out, ps| out << ps.stderr(@aruba_keep_ansi) }
|
45
|
+
end
|
46
|
+
|
47
|
+
# like all_output, but doesn't stop processes first
|
48
|
+
def interactive_output
|
49
|
+
interactive_stdout << interactive_stderr
|
50
|
+
end
|
51
|
+
|
52
|
+
def interpolate_background_pid(string)
|
53
|
+
interpolated = string.gsub('$PID', background_pid.to_s)
|
54
|
+
announce_or_puts interpolated if @announce_env
|
55
|
+
interpolated
|
56
|
+
end
|
57
|
+
|
58
|
+
def kill_all_processes!
|
59
|
+
# stop_processes!
|
60
|
+
#rescue
|
61
|
+
# processes.each {|cmd,process| send_signal(cmd, 'KILL') }
|
62
|
+
# raise
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
class Process
|
68
|
+
def pid
|
69
|
+
@process.pid
|
70
|
+
end
|
71
|
+
def send_signal signal
|
72
|
+
@process.send :send_signal, signal
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
ENV["RAILS_ENV"] = "test"
|
data/lib/qless/pool.rb
ADDED
@@ -0,0 +1,415 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require 'qless'
|
3
|
+
require 'qless/worker'
|
4
|
+
require 'qless/pool/version'
|
5
|
+
require 'qless/pool/logging'
|
6
|
+
require 'qless/pool/pooled_worker'
|
7
|
+
require 'qless/pool/pool_factory'
|
8
|
+
require 'erb'
|
9
|
+
require 'fcntl'
|
10
|
+
require 'yaml'
|
11
|
+
|
12
|
+
module Qless
|
13
|
+
class Pool
|
14
|
+
SIG_QUEUE_MAX_SIZE = 5
|
15
|
+
DEFAULT_WORKER_INTERVAL = 5
|
16
|
+
QUEUE_SIGS = [ :QUIT, :INT, :TERM, :USR1, :USR2, :CONT, :HUP, :WINCH, ]
|
17
|
+
CHUNK_SIZE = (16 * 1024)
|
18
|
+
|
19
|
+
include Logging
|
20
|
+
extend Logging
|
21
|
+
attr_reader :config
|
22
|
+
attr_reader :workers
|
23
|
+
|
24
|
+
def initialize(config)
|
25
|
+
init_config(config)
|
26
|
+
@workers = Hash.new { |workers, queues| workers[queues] = {} }
|
27
|
+
procline "(initialized)"
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.pool_factory
|
31
|
+
@pool_factory ||= Qless::PoolFactory.new
|
32
|
+
end
|
33
|
+
|
34
|
+
def pool_factory
|
35
|
+
self.class.pool_factory
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.pool_factory=(factory)
|
39
|
+
@pool_factory = factory
|
40
|
+
end
|
41
|
+
|
42
|
+
# Config: after_prefork {{{
|
43
|
+
|
44
|
+
# The `after_prefork` hook will be run in workers if you are using the
|
45
|
+
# preforking master worker to save memory. Use this hook to reload
|
46
|
+
# database connections and so forth to ensure that they're not shared
|
47
|
+
# among workers.
|
48
|
+
#
|
49
|
+
# Call with a block to set the hook.
|
50
|
+
# Call with no arguments to return the hook.
|
51
|
+
def self.after_prefork(&block)
|
52
|
+
block ? (@after_prefork = block) : @after_prefork
|
53
|
+
end
|
54
|
+
|
55
|
+
# Set the after_prefork proc.
|
56
|
+
def self.after_prefork=(after_prefork)
|
57
|
+
@after_prefork = after_prefork
|
58
|
+
end
|
59
|
+
|
60
|
+
def call_after_prefork!
|
61
|
+
self.class.after_prefork && self.class.after_prefork.call
|
62
|
+
end
|
63
|
+
|
64
|
+
# }}}
|
65
|
+
# Config: class methods to start up the pool using the default config {{{
|
66
|
+
|
67
|
+
@config_files = ["qless-pool.yml", "config/qless-pool.yml"]
|
68
|
+
class << self; attr_accessor :config_files, :app_name; end
|
69
|
+
|
70
|
+
def self.app_name
|
71
|
+
@app_name ||= File.basename(Dir.pwd)
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.handle_winch?
|
75
|
+
@handle_winch ||= false
|
76
|
+
end
|
77
|
+
def self.handle_winch=(bool)
|
78
|
+
@handle_winch = bool
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.choose_config_file
|
82
|
+
if ENV["QLESS_POOL_CONFIG"]
|
83
|
+
ENV["QLESS_POOL_CONFIG"]
|
84
|
+
else
|
85
|
+
@config_files.detect { |f| File.exist?(f) }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def self.run
|
90
|
+
if GC.respond_to?(:copy_on_write_friendly=)
|
91
|
+
GC.copy_on_write_friendly = true
|
92
|
+
end
|
93
|
+
Qless::Pool.new(choose_config_file).start.join
|
94
|
+
end
|
95
|
+
|
96
|
+
# }}}
|
97
|
+
# Config: load config and config file {{{
|
98
|
+
|
99
|
+
def config_file
|
100
|
+
@config_file || (!@config && ::Qless::Pool.choose_config_file)
|
101
|
+
end
|
102
|
+
|
103
|
+
def init_config(config)
|
104
|
+
case config
|
105
|
+
when String, nil
|
106
|
+
@config_file = config
|
107
|
+
else
|
108
|
+
@config = config.dup
|
109
|
+
end
|
110
|
+
load_config
|
111
|
+
end
|
112
|
+
|
113
|
+
def load_config
|
114
|
+
if config_file
|
115
|
+
@config = YAML.load(ERB.new(IO.read(config_file)).result)
|
116
|
+
else
|
117
|
+
@config ||= {}
|
118
|
+
end
|
119
|
+
environment and @config[environment] and config.merge!(@config[environment])
|
120
|
+
config.delete_if {|key, value| value.is_a? Hash }
|
121
|
+
end
|
122
|
+
|
123
|
+
def environment
|
124
|
+
if defined?(Rails) && Rails.respond_to?(:env)
|
125
|
+
Rails.env
|
126
|
+
elsif defined? RAILS_ENV
|
127
|
+
RAILS_ENV
|
128
|
+
else
|
129
|
+
ENV['RACK_ENV'] || ENV['RAILS_ENV'] || ENV['QLESS_ENV']
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# }}}
|
134
|
+
|
135
|
+
# Sig handlers and self pipe management {{{
|
136
|
+
|
137
|
+
def self_pipe; @self_pipe ||= [] end
|
138
|
+
def sig_queue; @sig_queue ||= [] end
|
139
|
+
|
140
|
+
def init_self_pipe!
|
141
|
+
self_pipe.each { |io| io.close rescue nil }
|
142
|
+
self_pipe.replace(IO.pipe)
|
143
|
+
self_pipe.each { |io| io.fcntl(Fcntl::F_SETFD, Fcntl::FD_CLOEXEC) }
|
144
|
+
end
|
145
|
+
|
146
|
+
def init_sig_handlers!
|
147
|
+
QUEUE_SIGS.each { |sig| trap_deferred(sig) }
|
148
|
+
trap(:CHLD) { |_| awaken_master }
|
149
|
+
end
|
150
|
+
|
151
|
+
def awaken_master
|
152
|
+
begin
|
153
|
+
self_pipe.last.write_nonblock('.') # wakeup master process from select
|
154
|
+
rescue Errno::EAGAIN, Errno::EINTR
|
155
|
+
# pipe is full, master should wake up anyways
|
156
|
+
retry
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
class QuitNowException < Exception; end
|
161
|
+
# defer a signal for later processing in #join (master process)
|
162
|
+
def trap_deferred(signal)
|
163
|
+
trap(signal) do |sig_nr|
|
164
|
+
if @waiting_for_reaper && [:INT, :TERM].include?(signal)
|
165
|
+
log "Recieved #{signal}: short circuiting QUIT waitpid"
|
166
|
+
raise QuitNowException
|
167
|
+
end
|
168
|
+
if sig_queue.size < SIG_QUEUE_MAX_SIZE
|
169
|
+
sig_queue << signal
|
170
|
+
awaken_master
|
171
|
+
else
|
172
|
+
log "ignoring SIG#{signal}, queue=#{sig_queue.inspect}"
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def reset_sig_handlers!
|
178
|
+
QUEUE_SIGS.each {|sig| trap(sig, "DEFAULT") }
|
179
|
+
end
|
180
|
+
|
181
|
+
def handle_sig_queue!
|
182
|
+
case signal = sig_queue.shift
|
183
|
+
when :USR1, :USR2, :CONT
|
184
|
+
log "#{signal}: sending to all workers"
|
185
|
+
signal_all_workers(signal)
|
186
|
+
when :HUP
|
187
|
+
log "HUP: reload config file and reload logfiles"
|
188
|
+
load_config
|
189
|
+
Logging.reopen_logs!
|
190
|
+
log "HUP: gracefully shutdown old children (which have old logfiles open)"
|
191
|
+
signal_all_workers(:QUIT)
|
192
|
+
log "HUP: new children will inherit new logfiles"
|
193
|
+
maintain_worker_count
|
194
|
+
when :WINCH
|
195
|
+
if self.class.handle_winch?
|
196
|
+
log "WINCH: gracefully stopping all workers"
|
197
|
+
@config = {}
|
198
|
+
maintain_worker_count
|
199
|
+
end
|
200
|
+
when :QUIT
|
201
|
+
graceful_worker_shutdown_and_wait!(signal)
|
202
|
+
when :INT
|
203
|
+
graceful_worker_shutdown!(signal)
|
204
|
+
when :TERM
|
205
|
+
case self.class.term_behavior
|
206
|
+
when "graceful_worker_shutdown_and_wait"
|
207
|
+
graceful_worker_shutdown_and_wait!(signal)
|
208
|
+
when "graceful_worker_shutdown"
|
209
|
+
graceful_worker_shutdown!(signal)
|
210
|
+
else
|
211
|
+
shutdown_everything_now!(signal)
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
class << self
|
217
|
+
attr_accessor :term_behavior
|
218
|
+
end
|
219
|
+
|
220
|
+
def graceful_worker_shutdown_and_wait!(signal)
|
221
|
+
log "#{signal}: graceful shutdown, waiting for children"
|
222
|
+
signal_all_workers(:QUIT)
|
223
|
+
reap_all_workers(0) # will hang until all workers are shutdown
|
224
|
+
:break
|
225
|
+
end
|
226
|
+
|
227
|
+
def graceful_worker_shutdown!(signal)
|
228
|
+
log "#{signal}: immediate shutdown (graceful worker shutdown)"
|
229
|
+
signal_all_workers(:QUIT)
|
230
|
+
:break
|
231
|
+
end
|
232
|
+
|
233
|
+
def shutdown_everything_now!(signal)
|
234
|
+
log "#{signal}: immediate shutdown (and immediate worker shutdown)"
|
235
|
+
signal_all_workers(:TERM)
|
236
|
+
:break
|
237
|
+
end
|
238
|
+
|
239
|
+
# }}}
|
240
|
+
# start, join, and master sleep {{{
|
241
|
+
|
242
|
+
def start
|
243
|
+
procline("(starting)")
|
244
|
+
init_self_pipe!
|
245
|
+
init_sig_handlers!
|
246
|
+
maintain_worker_count
|
247
|
+
procline("(started)")
|
248
|
+
log "started manager"
|
249
|
+
report_worker_pool_pids
|
250
|
+
self
|
251
|
+
end
|
252
|
+
|
253
|
+
def report_worker_pool_pids
|
254
|
+
if workers.empty?
|
255
|
+
log "Pool is empty"
|
256
|
+
else
|
257
|
+
log "Pool contains worker PIDs: #{all_pids.inspect}"
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def join
|
262
|
+
loop do
|
263
|
+
reap_all_workers
|
264
|
+
break if handle_sig_queue! == :break
|
265
|
+
if sig_queue.empty?
|
266
|
+
master_sleep
|
267
|
+
maintain_worker_count
|
268
|
+
end
|
269
|
+
procline("managing #{all_pids.inspect}")
|
270
|
+
end
|
271
|
+
procline("(shutting down)")
|
272
|
+
#stop # gracefully shutdown all workers on our way out
|
273
|
+
log "manager finished"
|
274
|
+
#unlink_pid_safe(pid) if pid
|
275
|
+
end
|
276
|
+
|
277
|
+
def master_sleep
|
278
|
+
begin
|
279
|
+
ready = IO.select([self_pipe.first], nil, nil, 1) or return
|
280
|
+
ready.first && ready.first.first or return
|
281
|
+
loop { self_pipe.first.read_nonblock(CHUNK_SIZE) }
|
282
|
+
rescue Errno::EAGAIN, Errno::EINTR
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
# }}}
|
287
|
+
# worker process management {{{
|
288
|
+
|
289
|
+
def reap_all_workers(waitpid_flags=Process::WNOHANG)
|
290
|
+
@waiting_for_reaper = waitpid_flags == 0
|
291
|
+
begin
|
292
|
+
loop do
|
293
|
+
# -1, wait for any child process
|
294
|
+
wpid, status = Process.waitpid2(-1, waitpid_flags)
|
295
|
+
break unless wpid
|
296
|
+
|
297
|
+
if worker = delete_worker(wpid)
|
298
|
+
log "Reaped qless worker[#{status.pid}] (status: #{status.exitstatus}) queues: #{worker.job_reserver.queues.collect(&:name).join(",")}"
|
299
|
+
else
|
300
|
+
# this died before it could be killed, so it's not going to have any extra info
|
301
|
+
log "Tried to reap worker [#{status.pid}], but it had already died. (status: #{status.exitstatus})"
|
302
|
+
end
|
303
|
+
end
|
304
|
+
rescue Errno::EINTR
|
305
|
+
retry
|
306
|
+
rescue Errno::ECHILD, QuitNowException
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
# TODO: close any file descriptors connected to worker, if any
|
311
|
+
def delete_worker(pid)
|
312
|
+
worker = nil
|
313
|
+
workers.detect do |queues, pid_to_worker|
|
314
|
+
worker = pid_to_worker.delete(pid)
|
315
|
+
end
|
316
|
+
worker
|
317
|
+
end
|
318
|
+
|
319
|
+
def all_pids
|
320
|
+
workers.map {|q,workers| workers.keys }.flatten
|
321
|
+
end
|
322
|
+
|
323
|
+
def signal_all_workers(signal)
|
324
|
+
all_pids.each do |pid|
|
325
|
+
Process.kill signal, pid
|
326
|
+
end
|
327
|
+
end
|
328
|
+
|
329
|
+
# }}}
|
330
|
+
# ???: maintain_worker_count, all_known_queues {{{
|
331
|
+
|
332
|
+
def maintain_worker_count
|
333
|
+
all_known_queues.each do |queues|
|
334
|
+
delta = worker_delta_for(queues)
|
335
|
+
spawn_missing_workers_for(queues, delta) if delta > 0
|
336
|
+
quit_excess_workers_for(queues, delta.abs) if delta < 0
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
def all_known_queues
|
341
|
+
config.keys | workers.keys
|
342
|
+
end
|
343
|
+
|
344
|
+
# }}}
|
345
|
+
# methods that operate on a single grouping of queues {{{
|
346
|
+
# perhaps this means a class is waiting to be extracted
|
347
|
+
|
348
|
+
def spawn_missing_workers_for(queues, delta)
|
349
|
+
delta.times { spawn_worker!(queues) }
|
350
|
+
end
|
351
|
+
|
352
|
+
def quit_excess_workers_for(queues, delta)
|
353
|
+
pids_for(queues)[0...delta].each do |pid|
|
354
|
+
Process.kill("QUIT", pid)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
|
358
|
+
# use qless to get a number for currently running workers on
|
359
|
+
# a machine so we don't double up after a restart with long
|
360
|
+
# running jobs still active
|
361
|
+
def running_worker_count
|
362
|
+
# may want to do a zcard on ql:workers instead
|
363
|
+
count = 0
|
364
|
+
machine_hostname = Socket.gethostname
|
365
|
+
worker_info = pool_factory.client.workers.counts
|
366
|
+
worker_info.each do |worker|
|
367
|
+
hostname, pid = worker['name'].split('-')
|
368
|
+
count += 1 if machine_hostname == hostname
|
369
|
+
end
|
370
|
+
count
|
371
|
+
end
|
372
|
+
|
373
|
+
def configured_worker_count
|
374
|
+
config.values.inject {|sum,x| sum + x }
|
375
|
+
end
|
376
|
+
|
377
|
+
def worker_delta_for(queues)
|
378
|
+
delta = config.fetch(queues, 0) - workers.fetch(queues, []).size
|
379
|
+
delta = 0 if delta > 0 && running_worker_count > configured_worker_count
|
380
|
+
delta
|
381
|
+
end
|
382
|
+
|
383
|
+
def pids_for(queues)
|
384
|
+
workers[queues].keys
|
385
|
+
end
|
386
|
+
|
387
|
+
def spawn_worker!(queues)
|
388
|
+
worker = create_worker(queues)
|
389
|
+
pid = fork do
|
390
|
+
# This var gets cached, so need to clear it out in forks
|
391
|
+
# so that workers report the correct name to qless
|
392
|
+
Qless.instance_variable_set(:@worker_name, nil)
|
393
|
+
pool_factory.client.redis.client.reconnect
|
394
|
+
log_worker "Starting worker #{worker}"
|
395
|
+
call_after_prefork!
|
396
|
+
reset_sig_handlers!
|
397
|
+
#self_pipe.each {|io| io.close }
|
398
|
+
begin
|
399
|
+
worker.work(ENV['INTERVAL'] || DEFAULT_WORKER_INTERVAL) # interval, will block
|
400
|
+
rescue Errno::EINTR
|
401
|
+
log "Caught interrupted system call Errno::EINTR. Retrying."
|
402
|
+
retry
|
403
|
+
end
|
404
|
+
end
|
405
|
+
workers[queues][pid] = worker
|
406
|
+
end
|
407
|
+
|
408
|
+
def create_worker(queues)
|
409
|
+
pool_factory.worker(queues)
|
410
|
+
end
|
411
|
+
|
412
|
+
# }}}
|
413
|
+
|
414
|
+
end
|
415
|
+
end
|