server-starter 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +3 -0
- data/LICENSE +22 -0
- data/README.md +103 -0
- data/Rakefile +18 -0
- data/bin/start_server.rb +292 -0
- data/example/Gemfile +6 -0
- data/example/config/unicorn.conf +46 -0
- data/example/config.ru +11 -0
- data/example/env/TEST +1 -0
- data/example/log/.gitkeep +0 -0
- data/example/restart_server +2 -0
- data/example/start_server +16 -0
- data/lib/server/starter/helper.rb +52 -0
- data/lib/server/starter/version.rb +5 -0
- data/lib/server/starter.rb +846 -0
- data/server-starter.gemspec +22 -0
- metadata +77 -0
@@ -0,0 +1,846 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'fcntl'
|
3
|
+
require 'timeout'
|
4
|
+
require 'server/starter/version'
|
5
|
+
require 'server/starter/helper'
|
6
|
+
|
7
|
+
class Server::Starter
|
8
|
+
include Helper
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@signals_received = []
|
12
|
+
@current_worker = nil
|
13
|
+
@old_workers = {}
|
14
|
+
@last_restart_time = []
|
15
|
+
end
|
16
|
+
|
17
|
+
def start_server(opts)
|
18
|
+
# symbolize keys
|
19
|
+
opts = opts.map {|k, v| [k.to_sym, v] }.to_h
|
20
|
+
opts[:interval] ||= 1
|
21
|
+
opts[:signal_on_hup] ||= 'TERM'
|
22
|
+
opts[:signal_on_term] ||= 'TERM'
|
23
|
+
opts[:backlog] ||= Socket::SOMAXCONN
|
24
|
+
[:signal_on_hup, :signal_on_term].each do |key|
|
25
|
+
# normalize to the one that can be passed to kill
|
26
|
+
opts[key].tr!("a-z", "A-Z")
|
27
|
+
opts[key].sub!(/^SIG/i, "")
|
28
|
+
end
|
29
|
+
|
30
|
+
# prepare args
|
31
|
+
ports = Array(opts[:port])
|
32
|
+
paths = Array(opts[:path])
|
33
|
+
unless ports.empty? || paths.empty?
|
34
|
+
croak "either of ``port'' or ``path'' option is andatory"
|
35
|
+
end
|
36
|
+
unless opts[:exec] && opts[:exec].is_a?(Array)
|
37
|
+
croak "mandatory option ``exec'' is missing or not an array"
|
38
|
+
end
|
39
|
+
|
40
|
+
# set envs
|
41
|
+
ENV['ENVDIR'] = opts[:envdir] if opts[:envdir]
|
42
|
+
ENV['ENABLE_AUTO_RESTART'] = opts[:enable_auto_restart] ? '1' : nil
|
43
|
+
ENV['KILL_OLD_DELAY'] = opts[:kill_old_delay].to_s if opts[:kill_old_delay]
|
44
|
+
ENV['AUTO_RESTART_INTERVAL'] = opts[:auto_restart_interval].to_s if opts[:auto_restart_interval]
|
45
|
+
|
46
|
+
# open pid file
|
47
|
+
if opts[:pid_file]
|
48
|
+
File.open(opts[:pid_file], "w") do |fh|
|
49
|
+
fh.puts $$
|
50
|
+
end rescue die $!, "failed to open file:#{opts[:pid_file]}"
|
51
|
+
at_exit { File.unlink opts[:pid_file] rescue nil }
|
52
|
+
end
|
53
|
+
|
54
|
+
# open log file
|
55
|
+
if opts[:log_file]
|
56
|
+
File.open(opts[:log_file], "a") do |fh|
|
57
|
+
$stdout.flush
|
58
|
+
$stderr.flush
|
59
|
+
$stdout.reopen(fh) rescue die $!, "failed to reopen STDOUT to file"
|
60
|
+
$stderr.reopen(fh) rescue die $!, "failed to reopen STDERR to file"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# create guard that removes the status file
|
65
|
+
if opts[:status_file]
|
66
|
+
at_exit { File.unlink opts[:status_file] rescue nil }
|
67
|
+
end
|
68
|
+
|
69
|
+
$stderr.puts "start_server (pid:#{$$}) starting now..."
|
70
|
+
|
71
|
+
# start listening, setup envvar
|
72
|
+
socks = []
|
73
|
+
sockenvs = []
|
74
|
+
ports.each do |port|
|
75
|
+
sock = nil
|
76
|
+
begin
|
77
|
+
if port =~ /^\s*(\d+)\s*$/
|
78
|
+
# sock = Socket.new(:INET, :STREAM)
|
79
|
+
# addr = Socket.pack_sockaddr_in(port, '0.0.0.0')
|
80
|
+
# sock.setsockopt(:SOCKET, :REUSEADDR, true)
|
81
|
+
# sock.bind(addr)
|
82
|
+
# sock.listen(opts[:backlog])
|
83
|
+
sock = TCPServer.new("0.0.0.0", port)
|
84
|
+
sock.setsockopt(:SOCKET, :REUSEADDR, true)
|
85
|
+
sock.listen(opts[:backlog])
|
86
|
+
elsif port =~ /^\s*(.*)\s*:\s*(\d+)\s*$/
|
87
|
+
_bind, _port = $1, $2
|
88
|
+
sock = TCPServer.new(_bind, _port)
|
89
|
+
sock.setsockopt(:SOCKET, :REUSEADDR, true)
|
90
|
+
sock.listen(opts[:backlog])
|
91
|
+
else
|
92
|
+
croak "invalid ``port'' value:#{port}"
|
93
|
+
end
|
94
|
+
rescue
|
95
|
+
die $!, "failed to listen to port"
|
96
|
+
end
|
97
|
+
sock.fcntl(Fcntl::F_SETFD, 0) rescue die $!, "fcntl(F_SETFD, 0) failed"
|
98
|
+
sockenvs.push "#{port}=#{sock.fileno}"
|
99
|
+
socks.push sock
|
100
|
+
end
|
101
|
+
|
102
|
+
at_exit {
|
103
|
+
paths.each do |path|
|
104
|
+
File.symlink?(path) and File.unlink(path) rescue nil
|
105
|
+
end
|
106
|
+
}
|
107
|
+
paths.each do |path|
|
108
|
+
if File.symlink?(path)
|
109
|
+
warn "removing existing socket file:#{path}"
|
110
|
+
File.unlink(path) rescue die $!, "failed to remove existing socket file:#{path}"
|
111
|
+
end
|
112
|
+
File.unlink(path) rescue nil
|
113
|
+
saved_umask = File.umask(0)
|
114
|
+
begin
|
115
|
+
sock = UNIXServer.new(path)
|
116
|
+
sock.listen(opts[:backlog])
|
117
|
+
rescue
|
118
|
+
die $!, "failed to listen to file #{path}"
|
119
|
+
end
|
120
|
+
sock.fcntl(Fcntl::F_SETFD, 0) rescue die $!, "fcntl(F_SETFD, 0) failed"
|
121
|
+
sockenvs.push "path=#{sock.fileno}"
|
122
|
+
socks.push sock
|
123
|
+
end
|
124
|
+
ENV['SERVER_STARTER_PORT'] = sockenvs.join(";")
|
125
|
+
ENV['SERVER_STARTER_GENERATION'] = "0"
|
126
|
+
|
127
|
+
# setup signal handlers
|
128
|
+
%w(INT TERM HUP ALRM).each do |signal|
|
129
|
+
Signal.trap(signal) {
|
130
|
+
@signals_received.push(signal)
|
131
|
+
@signal_wait_thread.kill if @signal_wait_thread
|
132
|
+
}
|
133
|
+
end
|
134
|
+
Signal.trap('PIPE') { 'IGNORE' }
|
135
|
+
|
136
|
+
# setup status monitor
|
137
|
+
update_status =
|
138
|
+
if opts[:status_file]
|
139
|
+
Proc.new {
|
140
|
+
tmpfn = "#{opts[:status_file]}.#{$$}"
|
141
|
+
File.open(tmpfn, "w") do |tmpfh|
|
142
|
+
gen_pids = @current_worker ?
|
143
|
+
{ENV['SERVER_STARTER_GENERATION'] => @current_worker} :
|
144
|
+
{}
|
145
|
+
@old_workers.each {|pid, gen| gen_pids[gen] = pid }
|
146
|
+
gen_pids.keys.map(&:to_i).sort.each {|gen| tmpfh.puts "#{gen}:#{gen_pids[gen.to_s]}" }
|
147
|
+
end rescue die $!, "failed to create temporary file:#{tmpfn}"
|
148
|
+
begin
|
149
|
+
File.rename(tmpfn, opts[:status_file])
|
150
|
+
rescue
|
151
|
+
die $!, "failed to rename #{tmpfn} to #{opts[:status_file]}"
|
152
|
+
end
|
153
|
+
}
|
154
|
+
else
|
155
|
+
Proc.new {}
|
156
|
+
end
|
157
|
+
|
158
|
+
# setup the start_worker function
|
159
|
+
start_worker = Proc.new {
|
160
|
+
pid = nil
|
161
|
+
while true
|
162
|
+
ENV['SERVER_STARTER_GENERATION'] = (ENV['SERVER_STARTER_GENERATION'].to_i + 1).to_s
|
163
|
+
begin
|
164
|
+
pid = fork
|
165
|
+
rescue
|
166
|
+
die $!, "fork(2) failed"
|
167
|
+
end
|
168
|
+
if pid.nil? # child process
|
169
|
+
args = Array(opts[:exec]).dup
|
170
|
+
if opts[:dir]
|
171
|
+
Dir.chdir opts[:dir] rescue die $1, "failed to chdir"
|
172
|
+
end
|
173
|
+
begin
|
174
|
+
bundler_with_clean_env do
|
175
|
+
args << {:close_others => false}
|
176
|
+
exec(*args)
|
177
|
+
end
|
178
|
+
rescue
|
179
|
+
$stderr.puts "failed to exec #{args[0]}:#{$!.class} #{$!.message}"
|
180
|
+
exit(255)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
$stderr.puts "starting new worker #{pid}"
|
184
|
+
sleep opts[:interval]
|
185
|
+
break if (@signals_received - [:HUP]).size > 0
|
186
|
+
break if Process.waitpid(pid, Process::WNOHANG).nil?
|
187
|
+
$stderr.puts "new worker #{pid} seems to have failed to start, exit status:#{$?.exitstatus}"
|
188
|
+
end
|
189
|
+
# ready, update the environment
|
190
|
+
@current_worker = pid
|
191
|
+
@last_restart_time = Time.now
|
192
|
+
update_status.call
|
193
|
+
}
|
194
|
+
|
195
|
+
# setup the wait function
|
196
|
+
wait = Proc.new {
|
197
|
+
flags = @signals_received.empty? ? 0 : Process::WNOHANG
|
198
|
+
r = nil
|
199
|
+
# waitpid can not get EINTR on receiving signal, so create a thread,
|
200
|
+
# and kill the thread on receiving signal to exit blocking
|
201
|
+
#
|
202
|
+
# there is another way to use wait3 which raises EINTR on receiving signal,
|
203
|
+
# but proc-wait3 gem requires gcc, etc to compile its C codes.
|
204
|
+
#
|
205
|
+
# require 'proc/wait3'
|
206
|
+
# begin
|
207
|
+
# rusage = Process.wait3(flags)
|
208
|
+
# r = [rusage.pid, rusage.status] if rusage
|
209
|
+
# rescue Errno::EINTR
|
210
|
+
# sleep 0.1 # need to wait until Signal.trap finishes its operation, terrible
|
211
|
+
# nil
|
212
|
+
# end
|
213
|
+
@signal_wait_thread = Thread.start do
|
214
|
+
if flags != 0 && ENV['ENABLE_AUTO_RESTART']
|
215
|
+
begin
|
216
|
+
timeout(1) do
|
217
|
+
pid = Process.waitpid(-1, flags)
|
218
|
+
r = [pid, $?.exitstatus] if pid
|
219
|
+
end
|
220
|
+
rescue Timeout::Error
|
221
|
+
# Process.kill('ALRM', Process.pid)
|
222
|
+
Thread.exit
|
223
|
+
end
|
224
|
+
else
|
225
|
+
pid = Process.waitpid(-1, flags)
|
226
|
+
r = [pid, $?.exitstatus] if pid
|
227
|
+
end
|
228
|
+
end
|
229
|
+
@signal_wait_thread.join
|
230
|
+
@signal_wait_thread = nil
|
231
|
+
r
|
232
|
+
}
|
233
|
+
|
234
|
+
# setup the cleanup function
|
235
|
+
cleanup = Proc.new {|sig|
|
236
|
+
term_signal = sig == 'TERM' ? opts[:signal_on_term] : 'TERM'
|
237
|
+
@old_workers[@current_worker] = ENV['SERVER_STARTER_GENERATION']
|
238
|
+
@current_worker = nil
|
239
|
+
$stderr.print "received #{sig}, sending #{term_signal} to all workers:",
|
240
|
+
@old_workers.keys.sort.join(','), "\n"
|
241
|
+
@old_workers.keys.sort.each {|pid| Process.kill(term_signal, pid) }
|
242
|
+
while true
|
243
|
+
died_worker = Process.waitpid(-1, Process::WNOHANG)
|
244
|
+
if died_worker
|
245
|
+
$stderr.puts "worker #{died_worker} died, status:#{$?.exitstatus}"
|
246
|
+
@old_workers.delete(died_worker)
|
247
|
+
update_status.call
|
248
|
+
break if @old_workers.empty?
|
249
|
+
end
|
250
|
+
end
|
251
|
+
$stderr.puts "exiting"
|
252
|
+
}
|
253
|
+
|
254
|
+
# the main loop
|
255
|
+
start_worker.call
|
256
|
+
while true
|
257
|
+
# wait for next signal (or when auto-restart becomes necessary)
|
258
|
+
r = wait.call
|
259
|
+
# reload env if necessary
|
260
|
+
loaded_env = _reload_env
|
261
|
+
ENV['AUTO_RESTART_INTERVAL'] ||= "360" if ENV['ENABLE_AUTO_RESTART']
|
262
|
+
with_local_env(loaded_env) do
|
263
|
+
# restart if worker died
|
264
|
+
if r
|
265
|
+
died_worker, status = r
|
266
|
+
if died_worker == @current_worker
|
267
|
+
$stderr.puts "worker #{died_worker} died unexpectedly with status:#{status}, restarting"
|
268
|
+
start_worker.call
|
269
|
+
else
|
270
|
+
$stderr.puts "old worker #{died_worker} died, status:#{status}"
|
271
|
+
@old_workers.delete(died_worker)
|
272
|
+
update_status.call
|
273
|
+
end
|
274
|
+
end
|
275
|
+
# handle signals
|
276
|
+
restart = nil
|
277
|
+
while !@signals_received.empty?
|
278
|
+
sig = @signals_received.shift
|
279
|
+
if sig == 'HUP'
|
280
|
+
$stderr.puts "received HUP, spawning a new worker"
|
281
|
+
restart = true
|
282
|
+
break
|
283
|
+
elsif sig == 'ALRM'
|
284
|
+
# skip
|
285
|
+
else
|
286
|
+
return cleanup.call(sig)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
if !restart && ENV['ENABLE_AUTO_RESTART']
|
290
|
+
auto_restart_interval = ENV['AUTO_RESTART_INTERVAL'].to_i
|
291
|
+
elapsed_since_restart = Time.now - @last_restart_time
|
292
|
+
if elapsed_since_restart >= auto_restart_interval && @old_workers.empty?
|
293
|
+
$stderr.puts "autorestart triggered (interval=#{auto_restart_interval})"
|
294
|
+
restart = true
|
295
|
+
elsif elapsed_since_restart >= auto_restart_interval * 2
|
296
|
+
$stderr.puts "autorestart triggered (forced, interval=#{auto_restart_interval})"
|
297
|
+
restart = true
|
298
|
+
end
|
299
|
+
end
|
300
|
+
# restart if requested
|
301
|
+
if restart
|
302
|
+
@old_workers[@current_worker] = ENV['SERVER_STARTER_GENERATION']
|
303
|
+
start_worker.call
|
304
|
+
$stderr.print "new worker is now running, sending #{opts[:signal_on_hup]} to old workers:"
|
305
|
+
if !@old_workers.empty?
|
306
|
+
$stderr.puts @old_workers.keys.sort.join(',')
|
307
|
+
else
|
308
|
+
$stderr.puts "none"
|
309
|
+
end
|
310
|
+
kill_old_delay = ENV['KILL_OLD_DELAY'] ? ENV['KILL_OLD_DELAY'].to_i : ENV['ENABLE_AUTO_RESTART'] ? 5 : 0
|
311
|
+
if kill_old_delay != 0
|
312
|
+
$stderr.puts "sleeping #{kill_old_delay} secs before killing old workers"
|
313
|
+
sleep kill_old_delay
|
314
|
+
end
|
315
|
+
$stderr.puts "killing old workers"
|
316
|
+
@old_workers.keys.sort {|pid| Process.kill(opts[:signal_on_hup], pid) }
|
317
|
+
end
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
die "unreachable"
|
322
|
+
end
|
323
|
+
|
324
|
+
def restart_server(opts)
|
325
|
+
unless opts[:pid_file] && opts[:status_file]
|
326
|
+
die "--restart option requires --pid-file and --status-file to be set as well"
|
327
|
+
end
|
328
|
+
|
329
|
+
# get first pid
|
330
|
+
pid = Proc.new {
|
331
|
+
begin
|
332
|
+
File.open(opts[:pid_file]) do |fd|
|
333
|
+
line = fd.gets
|
334
|
+
line.chomp
|
335
|
+
end
|
336
|
+
rescue
|
337
|
+
die $!, "failed to open file:#{opts[:pid_file]}"
|
338
|
+
end
|
339
|
+
}.call
|
340
|
+
|
341
|
+
# function that returns a list of active generations in sorted order
|
342
|
+
get_generations = Proc.new {
|
343
|
+
begin
|
344
|
+
File.readlines(opts[:status_file]).map do |line|
|
345
|
+
line =~ /^(\d+):/ ? $1 : nil
|
346
|
+
end.compact.map(&:to_i).sort.uniq
|
347
|
+
rescue
|
348
|
+
die $!, "failed to open file:#{opts[:status_file]}"
|
349
|
+
end
|
350
|
+
}
|
351
|
+
|
352
|
+
# wait for this generation
|
353
|
+
wait_for = Proc.new {
|
354
|
+
gens = get_generations.call
|
355
|
+
die "no active process found in the status file" if gens.empty?
|
356
|
+
gens.last.to_i + 1
|
357
|
+
}.call
|
358
|
+
|
359
|
+
# send HUP
|
360
|
+
Process.kill('HUP', pid.to_i) rescue die $!, "failed to send SIGHUP to the server process"
|
361
|
+
|
362
|
+
# wait for the generation
|
363
|
+
while true
|
364
|
+
gens = get_generations.call
|
365
|
+
break if gens.size == 1 && gens[0].to_i == wait_for.to_i
|
366
|
+
sleep 1
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
def server_ports
|
371
|
+
die "no environment variable SERVER_STARTER_PORT. Did you start the process using server_starter?" unless ENV['SERVER_STARTER_PORT']
|
372
|
+
ENV['SERVER_STARTER_PORT'].split(';').map do |_|
|
373
|
+
_.split('=', 2)
|
374
|
+
end.to_h
|
375
|
+
end
|
376
|
+
|
377
|
+
def _reload_env
|
378
|
+
dn = ENV['ENVDIR']
|
379
|
+
return {} if dn.nil? or !File.exist?(dn)
|
380
|
+
env = {}
|
381
|
+
Dir.open(dn) do |d|
|
382
|
+
while n = d.read
|
383
|
+
next if n =~ /^\./
|
384
|
+
File.open("#{dn}/#{n}") do |fh|
|
385
|
+
first_line = fh.gets.chomp
|
386
|
+
env[n] = first_line if first_line
|
387
|
+
end
|
388
|
+
end
|
389
|
+
end
|
390
|
+
env
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
__END__
|
395
|
+
|
396
|
+
package Server::Starter;
|
397
|
+
|
398
|
+
use 5.008;
|
399
|
+
use strict;
|
400
|
+
use warnings;
|
401
|
+
use Carp;
|
402
|
+
use Fcntl;
|
403
|
+
use IO::Handle;
|
404
|
+
use IO::Socket::INET;
|
405
|
+
use IO::Socket::UNIX;
|
406
|
+
use List::MoreUtils qw(uniq);
|
407
|
+
use POSIX qw(:sys_wait_h);
|
408
|
+
use Proc::Wait3;
|
409
|
+
use Scope::Guard;
|
410
|
+
|
411
|
+
use Exporter qw(import);
|
412
|
+
|
413
|
+
our $VERSION = '0.19';
|
414
|
+
our @EXPORT_OK = qw(start_server restart_server server_ports);
|
415
|
+
|
416
|
+
my @signals_received;
|
417
|
+
|
418
|
+
sub start_server {
|
419
|
+
my $opts = {
|
420
|
+
(@_ == 1 ? @$_[0] : @_),
|
421
|
+
};
|
422
|
+
$opts->{interval} = 1
|
423
|
+
if not defined $opts->{interval};
|
424
|
+
$opts->{signal_on_hup} ||= 'TERM';
|
425
|
+
$opts->{signal_on_term} ||= 'TERM';
|
426
|
+
$opts->{backlog} ||= Socket::SOMAXCONN();
|
427
|
+
for ($opts->{signal_on_hup}, $opts->{signal_on_term}) {
|
428
|
+
# normalize to the one that can be passed to kill
|
429
|
+
tr/a-z/A-Z/;
|
430
|
+
s/^SIG//i;
|
431
|
+
}
|
432
|
+
|
433
|
+
# prepare args
|
434
|
+
my $ports = $opts->{port};
|
435
|
+
my $paths = $opts->{path};
|
436
|
+
croak "either of ``port'' or ``path'' option is mandatory\n"
|
437
|
+
unless $ports || $paths;
|
438
|
+
$ports = [ $ports ]
|
439
|
+
if ! ref $ports && defined $ports;
|
440
|
+
$paths = [ $paths ]
|
441
|
+
if ! ref $paths && defined $paths;
|
442
|
+
croak "mandatory option ``exec'' is missing or is not an arrayref\n"
|
443
|
+
unless $opts->{exec} && ref $opts->{exec} eq 'ARRAY';
|
444
|
+
|
445
|
+
# set envs
|
446
|
+
$ENV{ENVDIR} = $opts->{envdir}
|
447
|
+
if defined $opts->{envdir};
|
448
|
+
$ENV{ENABLE_AUTO_RESTART} = $opts->{enable_auto_restart}
|
449
|
+
if defined $opts->{enable_auto_restart};
|
450
|
+
$ENV{KILL_OLD_DELAY} = $opts->{kill_old_delay}
|
451
|
+
if defined $opts->{kill_old_delay};
|
452
|
+
$ENV{AUTO_RESTART_INTERVAL} = $opts->{auto_restart_interval}
|
453
|
+
if defined $opts->{auto_restart_interval};
|
454
|
+
|
455
|
+
# open pid file
|
456
|
+
my $pid_file_guard = sub {
|
457
|
+
return unless $opts->{pid_file};
|
458
|
+
open my $fh, '>', $opts->{pid_file}
|
459
|
+
or die "failed to open file:$opts->{pid_file}: $!";
|
460
|
+
print $fh "$$\n";
|
461
|
+
close $fh;
|
462
|
+
return Scope::Guard->new(
|
463
|
+
sub {
|
464
|
+
unlink $opts->{pid_file};
|
465
|
+
},
|
466
|
+
);
|
467
|
+
}->();
|
468
|
+
|
469
|
+
# open log file
|
470
|
+
if ($opts->{log_file}) {
|
471
|
+
open my $fh, '>>', $opts->{log_file}
|
472
|
+
or die "failed to open log file:$opts->{log_file}: $!";
|
473
|
+
STDOUT->flush;
|
474
|
+
STDERR->flush;
|
475
|
+
open STDOUT, '>&', $fh
|
476
|
+
or die "failed to dup STDOUT to file: $!";
|
477
|
+
open STDERR, '>&', $fh
|
478
|
+
or die "failed to dup STDERR to file: $!";
|
479
|
+
close $fh;
|
480
|
+
}
|
481
|
+
|
482
|
+
# create guard that removes the status file
|
483
|
+
my $status_file_guard = $opts->{status_file} && Scope::Guard->new(
|
484
|
+
sub {
|
485
|
+
unlink $opts->{status_file};
|
486
|
+
},
|
487
|
+
);
|
488
|
+
|
489
|
+
print STDERR "start_server (pid:$$) starting now...\n";
|
490
|
+
|
491
|
+
# start listening, setup envvar
|
492
|
+
my @sock;
|
493
|
+
my @sockenv;
|
494
|
+
for my $port (@$ports) {
|
495
|
+
my $sock;
|
496
|
+
if ($port =~ /^\s*(\d+)\s*$/) {
|
497
|
+
$sock = IO::Socket::INET->new(
|
498
|
+
Listen => $opts->{backlog},
|
499
|
+
LocalPort => $port,
|
500
|
+
Proto => 'tcp',
|
501
|
+
ReuseAddr => 1,
|
502
|
+
);
|
503
|
+
} elsif ($port =~ /^\s*(.*)\s*:\s*(\d+)\s*$/) {
|
504
|
+
$port = "$1:$2";
|
505
|
+
$sock = IO::Socket::INET->new(
|
506
|
+
Listen => $opts->{backlog},
|
507
|
+
LocalAddr => $port,
|
508
|
+
Proto => 'tcp',
|
509
|
+
ReuseAddr => 1,
|
510
|
+
);
|
511
|
+
} else {
|
512
|
+
croak "invalid ``port'' value:$port\n"
|
513
|
+
}
|
514
|
+
die "failed to listen to $port:$!"
|
515
|
+
unless $sock;
|
516
|
+
fcntl($sock, F_SETFD, my $flags = '')
|
517
|
+
or die "fcntl(F_SETFD, 0) failed:$!";
|
518
|
+
push @sockenv, "$port=" . $sock->fileno;
|
519
|
+
push @sock, $sock;
|
520
|
+
}
|
521
|
+
my $path_remove_guard = Scope::Guard->new(
|
522
|
+
sub {
|
523
|
+
-S $_ and unlink $_
|
524
|
+
for @$paths;
|
525
|
+
},
|
526
|
+
);
|
527
|
+
for my $path (@$paths) {
|
528
|
+
if (-S $path) {
|
529
|
+
warn "removing existing socket file:$path";
|
530
|
+
unlink $path
|
531
|
+
or die "failed to remove existing socket file:$path:$!";
|
532
|
+
}
|
533
|
+
unlink $path;
|
534
|
+
my $saved_umask = umask(0);
|
535
|
+
my $sock = IO::Socket::UNIX->new(
|
536
|
+
Listen => $opts->{backlog},
|
537
|
+
Local => $path,
|
538
|
+
) or die "failed to listen to file $path:$!";
|
539
|
+
umask($saved_umask);
|
540
|
+
fcntl($sock, F_SETFD, my $flags = '')
|
541
|
+
or die "fcntl(F_SETFD, 0) failed:$!";
|
542
|
+
push @sockenv, "$path=" . $sock->fileno;
|
543
|
+
push @sock, $sock;
|
544
|
+
}
|
545
|
+
$ENV{SERVER_STARTER_PORT} = join ";", @sockenv;
|
546
|
+
$ENV{SERVER_STARTER_GENERATION} = 0;
|
547
|
+
|
548
|
+
# setup signal handlers
|
549
|
+
$SIG{$_} = sub {
|
550
|
+
push @signals_received, $_[0];
|
551
|
+
} for (qw/INT TERM HUP ALRM/);
|
552
|
+
$SIG{PIPE} = 'IGNORE';
|
553
|
+
|
554
|
+
# setup status monitor
|
555
|
+
my ($current_worker, %old_workers, $last_restart_time);
|
556
|
+
my $update_status = $opts->{status_file}
|
557
|
+
? sub {
|
558
|
+
my $tmpfn = "$opts->{status_file}.$$";
|
559
|
+
open my $tmpfh, '>', $tmpfn
|
560
|
+
or die "failed to create temporary file:$tmpfn:$!";
|
561
|
+
my %gen_pid = (
|
562
|
+
($current_worker
|
563
|
+
? ($ENV{SERVER_STARTER_GENERATION} => $current_worker)
|
564
|
+
: ()),
|
565
|
+
map { $old_workers{$_} => $_ } keys %old_workers,
|
566
|
+
);
|
567
|
+
print $tmpfh "$_:$gen_pid{$_}\n"
|
568
|
+
for sort keys %gen_pid;
|
569
|
+
close $tmpfh;
|
570
|
+
rename $tmpfn, $opts->{status_file}
|
571
|
+
or die "failed to rename $tmpfn to $opts->{status_file}:$!";
|
572
|
+
} : sub {
|
573
|
+
};
|
574
|
+
|
575
|
+
# setup the start_worker function
|
576
|
+
my $start_worker = sub {
|
577
|
+
my $pid;
|
578
|
+
while (1) {
|
579
|
+
$ENV{SERVER_STARTER_GENERATION}++;
|
580
|
+
$pid = fork;
|
581
|
+
die "fork(2) failed:$!"
|
582
|
+
unless defined $pid;
|
583
|
+
if ($pid == 0) {
|
584
|
+
my @args = @{$opts->{exec}};
|
585
|
+
# child process
|
586
|
+
if (defined $opts->{dir}) {
|
587
|
+
chdir $opts->{dir} or die "failed to chdir:$!";
|
588
|
+
}
|
589
|
+
{ exec { $args[0] } @args };
|
590
|
+
print STDERR "failed to exec $args[0]$!";
|
591
|
+
exit(255);
|
592
|
+
}
|
593
|
+
print STDERR "starting new worker $pid\n";
|
594
|
+
sleep $opts->{interval};
|
595
|
+
if ((grep { $_ ne 'HUP' } @signals_received)
|
596
|
+
|| waitpid($pid, WNOHANG) <= 0) {
|
597
|
+
last;
|
598
|
+
}
|
599
|
+
print STDERR "new worker $pid seems to have failed to start, exit status:$?\n";
|
600
|
+
}
|
601
|
+
# ready, update the environment
|
602
|
+
$current_worker = $pid;
|
603
|
+
$last_restart_time = time;
|
604
|
+
$update_status->();
|
605
|
+
};
|
606
|
+
|
607
|
+
# setup the wait function
|
608
|
+
my $wait = sub {
|
609
|
+
my $block = @signals_received == 0;
|
610
|
+
my @r;
|
611
|
+
if ($block && $ENV{ENABLE_AUTO_RESTART}) {
|
612
|
+
alarm(1);
|
613
|
+
@r = wait3($block);
|
614
|
+
alarm(0);
|
615
|
+
} else {
|
616
|
+
@r = wait3($block);
|
617
|
+
}
|
618
|
+
return @r;
|
619
|
+
};
|
620
|
+
|
621
|
+
# setup the cleanup function
|
622
|
+
my $cleanup = sub {
|
623
|
+
my $sig = shift;
|
624
|
+
my $term_signal = $sig eq 'TERM' ? $opts->{signal_on_term} : 'TERM';
|
625
|
+
$old_workers{$current_worker} = $ENV{SERVER_STARTER_GENERATION};
|
626
|
+
undef $current_worker;
|
627
|
+
print STDERR "received $sig, sending $term_signal to all workers:",
|
628
|
+
join(',', sort keys %old_workers), "\n";
|
629
|
+
kill $term_signal, $_
|
630
|
+
for sort keys %old_workers;
|
631
|
+
while (%old_workers) {
|
632
|
+
if (my @r = wait3(1)) {
|
633
|
+
my ($died_worker, $status) = @r;
|
634
|
+
print STDERR "worker $died_worker died, status:$status\n";
|
635
|
+
delete $old_workers{$died_worker};
|
636
|
+
$update_status->();
|
637
|
+
}
|
638
|
+
}
|
639
|
+
print STDERR "exiting\n";
|
640
|
+
};
|
641
|
+
|
642
|
+
# the main loop
|
643
|
+
$start_worker->();
|
644
|
+
while (1) {
|
645
|
+
# wait for next signal (or when auto-restart becomes necessary)
|
646
|
+
my @r = $wait->();
|
647
|
+
# reload env if necessary
|
648
|
+
my %loaded_env = _reload_env();
|
649
|
+
my @loaded_env_keys = keys %loaded_env;
|
650
|
+
local @ENV{@loaded_env_keys} = map { $loaded_env{$_} } (@loaded_env_keys);
|
651
|
+
$ENV{AUTO_RESTART_INTERVAL} ||= 360
|
652
|
+
if $ENV{ENABLE_AUTO_RESTART};
|
653
|
+
# restart if worker died
|
654
|
+
if (@r) {
|
655
|
+
my ($died_worker, $status) = @r;
|
656
|
+
if ($died_worker == $current_worker) {
|
657
|
+
print STDERR "worker $died_worker died unexpectedly with status:$status, restarting\n";
|
658
|
+
$start_worker->();
|
659
|
+
} else {
|
660
|
+
print STDERR "old worker $died_worker died, status:$status\n";
|
661
|
+
delete $old_workers{$died_worker};
|
662
|
+
$update_status->();
|
663
|
+
}
|
664
|
+
}
|
665
|
+
# handle signals
|
666
|
+
my $restart;
|
667
|
+
while (@signals_received) {
|
668
|
+
my $sig = shift @signals_received;
|
669
|
+
if ($sig eq 'HUP') {
|
670
|
+
print STDERR "received HUP, spawning a new worker\n";
|
671
|
+
$restart = 1;
|
672
|
+
last;
|
673
|
+
} elsif ($sig eq 'ALRM') {
|
674
|
+
# skip
|
675
|
+
} else {
|
676
|
+
return $cleanup->($sig);
|
677
|
+
}
|
678
|
+
}
|
679
|
+
if (! $restart && $ENV{ENABLE_AUTO_RESTART}) {
|
680
|
+
my $auto_restart_interval = $ENV{AUTO_RESTART_INTERVAL};
|
681
|
+
my $elapsed_since_restart = time - $last_restart_time;
|
682
|
+
if ($elapsed_since_restart >= $auto_restart_interval && ! %old_workers) {
|
683
|
+
print STDERR "autorestart triggered (interval=$auto_restart_interval)\n";
|
684
|
+
$restart = 1;
|
685
|
+
} elsif ($elapsed_since_restart >= $auto_restart_interval * 2) {
|
686
|
+
print STDERR "autorestart triggered (forced, interval=$auto_restart_interval)\n";
|
687
|
+
$restart = 1;
|
688
|
+
}
|
689
|
+
}
|
690
|
+
# restart if requested
|
691
|
+
if ($restart) {
|
692
|
+
$old_workers{$current_worker} = $ENV{SERVER_STARTER_GENERATION};
|
693
|
+
$start_worker->();
|
694
|
+
print STDERR "new worker is now running, sending $opts->{signal_on_hup} to old workers:";
|
695
|
+
if (%old_workers) {
|
696
|
+
print STDERR join(',', sort keys %old_workers), "\n";
|
697
|
+
} else {
|
698
|
+
print STDERR "none\n";
|
699
|
+
}
|
700
|
+
my $kill_old_delay = defined $ENV{KILL_OLD_DELAY} ? $ENV{KILL_OLD_DELAY} : $ENV{ENABLE_AUTO_RESTART} ? 5 : 0;
|
701
|
+
if ($kill_old_delay != 0) {
|
702
|
+
print STDERR "sleeping $kill_old_delay secs before killing old workers\n";
|
703
|
+
sleep $kill_old_delay;
|
704
|
+
}
|
705
|
+
print STDERR "killing old workers\n";
|
706
|
+
kill $opts->{signal_on_hup}, $_
|
707
|
+
for sort keys %old_workers;
|
708
|
+
}
|
709
|
+
}
|
710
|
+
|
711
|
+
die "unreachable";
|
712
|
+
}
|
713
|
+
|
714
|
+
sub restart_server {
|
715
|
+
my $opts = {
|
716
|
+
(@_ == 1 ? @$_[0] : @_),
|
717
|
+
};
|
718
|
+
die "--restart option requires --pid-file and --status-file to be set as well\n"
|
719
|
+
unless $opts->{pid_file} && $opts->{status_file};
|
720
|
+
|
721
|
+
# get pid
|
722
|
+
my $pid = do {
|
723
|
+
open my $fh, '<', $opts->{pid_file}
|
724
|
+
or die "failed to open file:$opts->{pid_file}:$!";
|
725
|
+
my $line = <$fh>;
|
726
|
+
chomp $line;
|
727
|
+
$line;
|
728
|
+
};
|
729
|
+
|
730
|
+
# function that returns a list of active generations in sorted order
|
731
|
+
my $get_generations = sub {
|
732
|
+
open my $fh, '<', $opts->{status_file}
|
733
|
+
or die "failed to open file:$opts->{status_file}:$!";
|
734
|
+
uniq sort { $a <=> $b } map { /^(\d+):/ ? ($1) : () } <$fh>;
|
735
|
+
};
|
736
|
+
|
737
|
+
# wait for this generation
|
738
|
+
my $wait_for = do {
|
739
|
+
my @gens = $get_generations->()
|
740
|
+
or die "no active process found in the status file";
|
741
|
+
pop(@gens) + 1;
|
742
|
+
};
|
743
|
+
|
744
|
+
# send HUP
|
745
|
+
kill 'HUP', $pid
|
746
|
+
or die "failed to send SIGHUP to the server process:$!";
|
747
|
+
|
748
|
+
# wait for the generation
|
749
|
+
while (1) {
|
750
|
+
my @gens = $get_generations->();
|
751
|
+
last if scalar(@gens) == 1 && $gens[0] == $wait_for;
|
752
|
+
sleep 1;
|
753
|
+
}
|
754
|
+
}
|
755
|
+
|
756
|
+
sub server_ports {
|
757
|
+
die "no environment variable SERVER_STARTER_PORT. Did you start the process using server_starter?",
|
758
|
+
unless $ENV{SERVER_STARTER_PORT};
|
759
|
+
my %ports = map {
|
760
|
+
+(split /=/, $_, 2)
|
761
|
+
} split /;/, $ENV{SERVER_STARTER_PORT};
|
762
|
+
\%ports;
|
763
|
+
}
|
764
|
+
|
765
|
+
sub _reload_env {
|
766
|
+
my $dn = $ENV{ENVDIR};
|
767
|
+
return if !defined $dn or !-d $dn;
|
768
|
+
my $d;
|
769
|
+
opendir($d, $dn) or return;
|
770
|
+
my %env;
|
771
|
+
while (my $n = readdir($d)) {
|
772
|
+
next if $n =~ /^\./;
|
773
|
+
open my $fh, '<', "$dn/$n" or next;
|
774
|
+
chomp(my $v = <$fh>);
|
775
|
+
$env{$n} = $v if defined $v;
|
776
|
+
}
|
777
|
+
return %env;
|
778
|
+
}
|
779
|
+
|
780
|
+
1;
|
781
|
+
__END__
|
782
|
+
|
783
|
+
=head1 NAME
|
784
|
+
|
785
|
+
Server::Starter - a superdaemon for hot-deploying server programs
|
786
|
+
|
787
|
+
=head1 SYNOPSIS
|
788
|
+
|
789
|
+
# from command line
|
790
|
+
% start_server --port=80 my_httpd
|
791
|
+
|
792
|
+
# in my_httpd
|
793
|
+
use Server::Starter qw(server_ports);
|
794
|
+
|
795
|
+
my $listen_sock = IO::Socket::INET->new(
|
796
|
+
Proto => 'tcp',
|
797
|
+
);
|
798
|
+
$listen_sock->fdopen((values %{server_ports()})[0], 'w')
|
799
|
+
or die "failed to bind to listening socket:$!";
|
800
|
+
|
801
|
+
while (1) {
|
802
|
+
if (my $conn = $listen_sock->accept) {
|
803
|
+
....
|
804
|
+
}
|
805
|
+
}
|
806
|
+
|
807
|
+
=head1 DESCRIPTION
|
808
|
+
|
809
|
+
It is often a pain to write a server program that supports graceful restarts, with no resource leaks. L<Server::Starter> solves the problem by splitting the task into two. One is L<start_server>, a script provided as a part of the module, which works as a superdaemon that binds to zero or more TCP ports or unix sockets, and repeatedly spawns the server program that actually handles the necessary tasks (for example, responding to incoming commenctions). The spawned server programs under L<Server::Starter> call accept(2) and handle the requests.
|
810
|
+
|
811
|
+
To gracefully restart the server program, send SIGHUP to the superdaemon. The superdaemon spawns a new server program, and if (and only if) it starts up successfully, sends SIGTERM to the old server program.
|
812
|
+
|
813
|
+
By using L<Server::Starter> it is much easier to write a hot-deployable server. Following are the only requirements a server program to be run under L<Server::Starter> should conform to:
|
814
|
+
|
815
|
+
- receive file descriptors to listen to through an environment variable
|
816
|
+
- perform a graceful shutdown when receiving SIGTERM
|
817
|
+
|
818
|
+
A Net::Server personality that can be run under L<Server::Starter> exists under the name L<Net::Server::SS::PreFork>.
|
819
|
+
|
820
|
+
=head1 METHODS
|
821
|
+
|
822
|
+
=over 4
|
823
|
+
|
824
|
+
=item server_ports
|
825
|
+
|
826
|
+
Returns zero or more file descriptors on which the server program should call accept(2) in a hashref. Each element of the hashref is: (host:port|port|path_of_unix_socket) => file_descriptor.
|
827
|
+
|
828
|
+
=item start_server
|
829
|
+
|
830
|
+
Starts the superdaemon. Used by the C<start_server> script.
|
831
|
+
|
832
|
+
=back
|
833
|
+
|
834
|
+
=head1 AUTHOR
|
835
|
+
|
836
|
+
Kazuho Oku
|
837
|
+
|
838
|
+
=head1 SEE ALSO
|
839
|
+
|
840
|
+
L<Net::Server::SS::PreFork>
|
841
|
+
|
842
|
+
=head1 LICENSE
|
843
|
+
|
844
|
+
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
845
|
+
|
846
|
+
=cut
|