pazuzu 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/Gemfile +2 -0
- data/Pazuzu.gemspec +29 -0
- data/README.md +171 -0
- data/Rakefile +21 -0
- data/bin/pazuzu +6 -0
- data/bin/pazuzud +6 -0
- data/lib/pazuzu.rb +37 -0
- data/lib/pazuzu/application.rb +114 -0
- data/lib/pazuzu/cgroup.rb +73 -0
- data/lib/pazuzu/command_line/controller.rb +152 -0
- data/lib/pazuzu/control/protocol.rb +304 -0
- data/lib/pazuzu/control/socket_client.rb +30 -0
- data/lib/pazuzu/control/socket_server.rb +75 -0
- data/lib/pazuzu/instance.rb +218 -0
- data/lib/pazuzu/procfiles.rb +16 -0
- data/lib/pazuzu/supervisor.rb +201 -0
- data/lib/pazuzu/supervisor_runner.rb +95 -0
- data/lib/pazuzu/utility/annotated_logger.rb +46 -0
- data/lib/pazuzu/utility/output_tailer.rb +60 -0
- data/lib/pazuzu/utility/process_spawning.rb +40 -0
- data/lib/pazuzu/utility/rate_limiter.rb +68 -0
- data/lib/pazuzu/utility/runnable.rb +120 -0
- data/lib/pazuzu/utility/runnable_pool.rb +62 -0
- data/lib/pazuzu/version.rb +3 -0
- data/lib/pazuzu/worker.rb +173 -0
- metadata +193 -0
@@ -0,0 +1,30 @@
|
|
1
|
+
module Pazuzu
|
2
|
+
module Control
|
3
|
+
|
4
|
+
class SocketClient
|
5
|
+
|
6
|
+
def initialize(path)
|
7
|
+
@path = path
|
8
|
+
@socket = UNIXSocket.new(@path)
|
9
|
+
end
|
10
|
+
|
11
|
+
def execute(command, *arguments)
|
12
|
+
command_line = [command, *arguments].join(' ')
|
13
|
+
command_line << "\n"
|
14
|
+
|
15
|
+
@socket.write(command_line)
|
16
|
+
|
17
|
+
num_bytes = @socket.read(10).to_i(16)
|
18
|
+
if num_bytes > 0
|
19
|
+
data = @socket.read(num_bytes)
|
20
|
+
return JSON.parse(data)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
attr_reader :path
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
module Pazuzu
|
2
|
+
module Control
|
3
|
+
|
4
|
+
class SocketServer
|
5
|
+
|
6
|
+
include Utility::Runnable
|
7
|
+
|
8
|
+
def initialize(supervisor, path)
|
9
|
+
@supervisor = supervisor
|
10
|
+
@path = path
|
11
|
+
@logger = Utility::AnnotatedLogger.new(
|
12
|
+
supervisor.logger, 'Command socket server')
|
13
|
+
super()
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :path
|
17
|
+
attr_reader :supervisor
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def listen
|
22
|
+
unless @socket
|
23
|
+
File.unlink(@path) if File.exist?(@path) and File.socket?(@path)
|
24
|
+
@server = UNIXServer.new(@path)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def run_protocol
|
29
|
+
while @server
|
30
|
+
begin
|
31
|
+
socket = @server.accept
|
32
|
+
if socket
|
33
|
+
Thread.start {
|
34
|
+
begin
|
35
|
+
protocol = Protocol.new(socket, @supervisor)
|
36
|
+
protocol.run!
|
37
|
+
rescue Errno::EPIPE, Errno::ECONNRESET
|
38
|
+
# Ignore
|
39
|
+
rescue Exception => e
|
40
|
+
@logger.error "Exception during command protocol exchange: #{e.class}: #{e}"
|
41
|
+
ensure
|
42
|
+
socket.close
|
43
|
+
end
|
44
|
+
}
|
45
|
+
end
|
46
|
+
rescue Exception => e
|
47
|
+
@logger.error "Socket server exception: #{e.class}: #{e}"
|
48
|
+
sleep(0.1)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def on_starting
|
54
|
+
listen
|
55
|
+
@thread = Thread.start { run_protocol }
|
56
|
+
runnable_state.started!
|
57
|
+
end
|
58
|
+
|
59
|
+
def on_stopping
|
60
|
+
if @socket
|
61
|
+
@socket.close rescue nil
|
62
|
+
@socket = nil
|
63
|
+
end
|
64
|
+
if @thread
|
65
|
+
@thread.terminate
|
66
|
+
@thread = nil
|
67
|
+
end
|
68
|
+
@server = nil
|
69
|
+
runnable_state.stopped!
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,218 @@
|
|
1
|
+
module Pazuzu
|
2
|
+
|
3
|
+
class Instance
|
4
|
+
|
5
|
+
# Time that we give processes to shut down normally on SIGTERM.
|
6
|
+
SHUTDOWN_GRACE_TIME = 10.seconds
|
7
|
+
|
8
|
+
include Utility::Runnable
|
9
|
+
|
10
|
+
def initialize(worker, index, root_path, command_line)
|
11
|
+
@worker = worker
|
12
|
+
@index = index
|
13
|
+
@logger = Utility::AnnotatedLogger.new(
|
14
|
+
@worker.application.supervisor.logger, qname)
|
15
|
+
@root_path = root_path
|
16
|
+
@command_line = command_line
|
17
|
+
@tailer = Utility::OutputTailer.new(
|
18
|
+
:limit => 5000,
|
19
|
+
:on_line => proc { |timestamp, line| @logger.info("[output] #{line}") })
|
20
|
+
@flap_limiter = Utility::RateLimiter.new(0.1)
|
21
|
+
@recovery_count = 0
|
22
|
+
@cgroup = @worker.application.supervisor.cgroup_for_instance(self)
|
23
|
+
super()
|
24
|
+
end
|
25
|
+
|
26
|
+
# Does this instance have any unattached PIDs running?
|
27
|
+
def attachable?
|
28
|
+
run_state == :stopped && @cgroup.pids.any?
|
29
|
+
end
|
30
|
+
|
31
|
+
def qname
|
32
|
+
[@worker.qname, @index].join('.')
|
33
|
+
end
|
34
|
+
|
35
|
+
def log_entries
|
36
|
+
source = qname
|
37
|
+
return @tailer.entries.map { |(time, message)| [source, time, message] }
|
38
|
+
end
|
39
|
+
|
40
|
+
attr_reader :command_line
|
41
|
+
attr_reader :worker
|
42
|
+
attr_reader :index
|
43
|
+
attr_reader :root_path
|
44
|
+
attr_reader :pid
|
45
|
+
attr_reader :recovery_count
|
46
|
+
|
47
|
+
private
|
48
|
+
|
49
|
+
def on_starting
|
50
|
+
start_process!
|
51
|
+
end
|
52
|
+
|
53
|
+
def on_started
|
54
|
+
@logger.info 'Started'
|
55
|
+
end
|
56
|
+
|
57
|
+
def on_stopping
|
58
|
+
pids = @cgroup.pids
|
59
|
+
if pids.any?
|
60
|
+
if signal!('TERM')
|
61
|
+
@termination_deadline = Time.now + SHUTDOWN_GRACE_TIME
|
62
|
+
else
|
63
|
+
process_lost!
|
64
|
+
end
|
65
|
+
else
|
66
|
+
runnable_state.stopped!
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def on_stopped
|
71
|
+
@recovery_count = 0
|
72
|
+
@logger.info "Stopped"
|
73
|
+
end
|
74
|
+
|
75
|
+
def on_failed
|
76
|
+
end
|
77
|
+
|
78
|
+
def signal!(signal_name)
|
79
|
+
any_signaled = false
|
80
|
+
@cgroup.pids.each do |pid|
|
81
|
+
begin
|
82
|
+
Process.kill(signal_name, pid)
|
83
|
+
rescue Errno::ESRCH
|
84
|
+
else
|
85
|
+
@logger.info "Signaled instance PID=#{pid} with SIG#{signal_name}"
|
86
|
+
any_signaled = true
|
87
|
+
end
|
88
|
+
end
|
89
|
+
any_signaled
|
90
|
+
end
|
91
|
+
|
92
|
+
def start_process!
|
93
|
+
if @cgroup.pids.empty?
|
94
|
+
unless @flap_limiter.count!
|
95
|
+
return unless [:starting, :running].include?(run_state)
|
96
|
+
@logger.warn "Restarting too fast, process may be flapping"
|
97
|
+
end
|
98
|
+
|
99
|
+
reset_process!
|
100
|
+
|
101
|
+
@logger.info "Spawning process with command (in #{@root_path}): #{@command_line}"
|
102
|
+
|
103
|
+
stream = @tailer.open
|
104
|
+
pid = Process.fork do
|
105
|
+
exec!(stream)
|
106
|
+
end
|
107
|
+
stream.close
|
108
|
+
|
109
|
+
@logger.info "Spawned process with PID=#{pid}"
|
110
|
+
end
|
111
|
+
|
112
|
+
@spawned_at = Time.now
|
113
|
+
@monitor_thread ||= Thread.start { monitor_process }
|
114
|
+
end
|
115
|
+
|
116
|
+
def exec!(io_out)
|
117
|
+
@worker.setup_spawned_process!
|
118
|
+
Process.setsid
|
119
|
+
|
120
|
+
# Double fork to detach and avoid zombies
|
121
|
+
Process.fork do
|
122
|
+
Process.setsid
|
123
|
+
|
124
|
+
Dir.chdir(@root_path)
|
125
|
+
|
126
|
+
# TODO: Log to file so we can tail
|
127
|
+
$stderr.reopen("/tmp/#{qname}")
|
128
|
+
$stdout.reopen("/tmp/#{qname}")
|
129
|
+
#$stderr.reopen(io_out)
|
130
|
+
#$stdout.reopen(io_out)
|
131
|
+
|
132
|
+
begin
|
133
|
+
@cgroup.exec(@command_line)
|
134
|
+
rescue Errno::ENOENT => e
|
135
|
+
abort e.message
|
136
|
+
end
|
137
|
+
Process.exit(0)
|
138
|
+
end
|
139
|
+
|
140
|
+
# Never gets here, exit purely for aesthetic and/or superstitious reasons
|
141
|
+
Process.exit(1)
|
142
|
+
end
|
143
|
+
|
144
|
+
def process_lost!
|
145
|
+
@logger.info "Child process lost, assuming it stopped on its own"
|
146
|
+
process_stopped!
|
147
|
+
end
|
148
|
+
|
149
|
+
def process_stopped!
|
150
|
+
reset_process!
|
151
|
+
case run_state
|
152
|
+
when :running, :starting
|
153
|
+
@recovery_count += 1
|
154
|
+
@logger.info "Respawning after failure"
|
155
|
+
if run_state == :running
|
156
|
+
runnable_state.starting!
|
157
|
+
else
|
158
|
+
start_process!
|
159
|
+
end
|
160
|
+
when :stopping
|
161
|
+
runnable_state.stopped!
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def process_confirmed_running!
|
166
|
+
if @termination_deadline and @termination_deadline <= Time.now
|
167
|
+
begin
|
168
|
+
Process.kill('KILL', @pid)
|
169
|
+
@logger.info "Child did not terminate in time, forcibly killing with SIGKILL"
|
170
|
+
rescue Errno::ESRCH
|
171
|
+
@logger.info "Child process lost, assuming it stopped on its own"
|
172
|
+
process_stopped!
|
173
|
+
else
|
174
|
+
@termination_deadline = Time.now + SHUTDOWN_GRACE_TIME
|
175
|
+
end
|
176
|
+
else
|
177
|
+
if run_state == :starting
|
178
|
+
@logger.info "Child seems to be running"
|
179
|
+
runnable_state.started!
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
def reset_process!
|
185
|
+
@spawned_at = nil
|
186
|
+
@tailer.close
|
187
|
+
@termination_deadline = nil
|
188
|
+
end
|
189
|
+
|
190
|
+
def monitor_process
|
191
|
+
while [:starting, :running, :stopping].include?(run_state)
|
192
|
+
begin
|
193
|
+
case run_state
|
194
|
+
when :starting
|
195
|
+
if @cgroup.pids.any?
|
196
|
+
process_confirmed_running!
|
197
|
+
end
|
198
|
+
when :running
|
199
|
+
if @cgroup.pids.empty?
|
200
|
+
process_lost!
|
201
|
+
end
|
202
|
+
when :stopping
|
203
|
+
if @cgroup.pids.empty?
|
204
|
+
runnable_state.stopped!
|
205
|
+
end
|
206
|
+
end
|
207
|
+
sleep(2)
|
208
|
+
rescue Exception => e
|
209
|
+
@logger.error(e.message)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
ensure
|
213
|
+
@monitor_thread = nil
|
214
|
+
end
|
215
|
+
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Pazuzu
|
2
|
+
|
3
|
+
module Procfiles
|
4
|
+
|
5
|
+
def self.normalize_procfile_path(path)
|
6
|
+
path = File.expand_path(path)
|
7
|
+
if File.directory?(path)
|
8
|
+
automatic_path = File.join(path, 'Procfile')
|
9
|
+
path = automatic_path if File.exist?(automatic_path)
|
10
|
+
end
|
11
|
+
path
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,201 @@
|
|
1
|
+
module Pazuzu
|
2
|
+
|
3
|
+
class ConfigurationError < StandardError; end
|
4
|
+
|
5
|
+
# The supervisor controls the different applications registered with it.
|
6
|
+
class Supervisor
|
7
|
+
|
8
|
+
DEFAULT_SOCKET_PATH = '/var/run/pazuzud.socket'
|
9
|
+
|
10
|
+
include Utility::Runnable
|
11
|
+
|
12
|
+
def initialize(options)
|
13
|
+
options.assert_valid_keys(:config_path)
|
14
|
+
@logger = Logger.new($stderr)
|
15
|
+
@logger.level = Logger::WARN
|
16
|
+
@applications = Utility::RunnablePool.new
|
17
|
+
@config_path = options[:config_path]
|
18
|
+
Thread.start {
|
19
|
+
# Wait for PIDs, otherwise we will get zombies
|
20
|
+
loop do
|
21
|
+
begin
|
22
|
+
Process.waitpid
|
23
|
+
rescue Errno::ECHILD
|
24
|
+
sleep(1)
|
25
|
+
rescue
|
26
|
+
# Ignore
|
27
|
+
end
|
28
|
+
end
|
29
|
+
}
|
30
|
+
super()
|
31
|
+
end
|
32
|
+
|
33
|
+
def run!
|
34
|
+
load_configuration!
|
35
|
+
start!
|
36
|
+
while [:starting, :running].include?(run_state)
|
37
|
+
sleep(1)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def load_configuration!
|
42
|
+
configure!(load_config_from_yaml(@config_path))
|
43
|
+
end
|
44
|
+
|
45
|
+
def configure!(configuration)
|
46
|
+
case configuration['log_path']
|
47
|
+
when 'syslog'
|
48
|
+
@logger = SyslogLogger('pazuzu')
|
49
|
+
when nil
|
50
|
+
@logger = Logger.new($stderr)
|
51
|
+
else
|
52
|
+
@logger = Logger.new(configuration['log_path'])
|
53
|
+
end
|
54
|
+
@logger.level = Logger::DEBUG
|
55
|
+
|
56
|
+
new_socket_path = configuration['socket_path']
|
57
|
+
new_socket_path ||= DEFAULT_SOCKET_PATH
|
58
|
+
if @socket_path and new_socket_path != @socket_path
|
59
|
+
@logger.warn("Cannot change socket path after start")
|
60
|
+
else
|
61
|
+
@socket_path = new_socket_path
|
62
|
+
end
|
63
|
+
|
64
|
+
@socket_server ||= Control::SocketServer.new(self, @socket_path)
|
65
|
+
|
66
|
+
leftover_applications = @applications.children.dup
|
67
|
+
(configuration['applications'] || {}).each do |name, app_config|
|
68
|
+
if app_config
|
69
|
+
name = name.to_s
|
70
|
+
application = @applications.children.select { |a| a.name == name }.first
|
71
|
+
if application
|
72
|
+
leftover_applications.delete(application)
|
73
|
+
else
|
74
|
+
@logger.info("Adding application #{name}")
|
75
|
+
application = Application.new(self, name)
|
76
|
+
@applications.register(application)
|
77
|
+
end
|
78
|
+
application.configure!(app_config['procfile'], app_config)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
leftover_applications.each do |application|
|
82
|
+
@logger.info("Removing application #{name}")
|
83
|
+
@applications.unregister(application)
|
84
|
+
end
|
85
|
+
|
86
|
+
cgroups_config = configuration['cgroups'] || {}
|
87
|
+
|
88
|
+
new_cgroup_hiearchy_root = cgroups_config['hieararchy_root']
|
89
|
+
new_cgroup_hiearchy_root ||= 'pazuzu'
|
90
|
+
new_cgroup_hiearchy_root.gsub!(/^\/+/, '')
|
91
|
+
new_cgroup_hiearchy_root.gsub!(/\/$/, '')
|
92
|
+
if @cgroup_hieararchy_root and new_cgroup_hiearchy_root != @cgroup_hieararchy_root
|
93
|
+
@logger.warn("Cannot change cgroups hiearchy root after start")
|
94
|
+
else
|
95
|
+
@cgroup_hieararchy_root = new_cgroup_hiearchy_root
|
96
|
+
end
|
97
|
+
|
98
|
+
new_cgroup_subsystems = [cgroups_config['subsystems']].flatten.compact
|
99
|
+
new_cgroup_subsystems ||= %w(memory cpu cpuacct blkio)
|
100
|
+
if @cgroup_subsystems and new_cgroup_subsystems != @cgroup_subsystems
|
101
|
+
@logger.warn("Cannot change cgroups subsystems after start")
|
102
|
+
else
|
103
|
+
@cgroup_subsystems = new_cgroup_subsystems
|
104
|
+
end
|
105
|
+
|
106
|
+
new_cgroups_fs_root_path = cgroups_config['fs_root']
|
107
|
+
new_cgroups_fs_root_path ||= '/sys/fs/cgroup'
|
108
|
+
new_cgroups_fs_root_path.gsub!(/\/$/, '')
|
109
|
+
if @cgroups_fs_root_path and @cgroups_fs_root_path != new_cgroups_fs_root_path
|
110
|
+
@logger.warn("Cannot change cgroups root after start")
|
111
|
+
else
|
112
|
+
@cgroups_fs_root_path = new_cgroups_fs_root_path
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def applications
|
117
|
+
@applications.children
|
118
|
+
end
|
119
|
+
|
120
|
+
# Returns a +Cgroup+ object given an instance.
|
121
|
+
def cgroup_for_instance(instance)
|
122
|
+
path = [
|
123
|
+
@cgroup_hieararchy_root,
|
124
|
+
instance.worker.application.name,
|
125
|
+
instance.worker.name,
|
126
|
+
instance.index
|
127
|
+
].join('/')
|
128
|
+
Cgroup.new(@cgroups_fs_root_path, path, @cgroup_subsystems)
|
129
|
+
end
|
130
|
+
|
131
|
+
attr_reader :logger
|
132
|
+
|
133
|
+
protected
|
134
|
+
|
135
|
+
def on_starting
|
136
|
+
@applications.start!
|
137
|
+
@socket_server.start!
|
138
|
+
runnable_state.started!
|
139
|
+
end
|
140
|
+
|
141
|
+
def on_started
|
142
|
+
@logger.info "Started"
|
143
|
+
end
|
144
|
+
|
145
|
+
def on_stopping
|
146
|
+
@applications.stop!
|
147
|
+
@logger.info "Waiting for applications to stop"
|
148
|
+
begin
|
149
|
+
timeout(10) do
|
150
|
+
loop do
|
151
|
+
break if @applications.run_state == :stopped
|
152
|
+
sleep(0.5)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
rescue Timeout::Error
|
156
|
+
@logger.error "Timed out waiting for applications to stop normally, giving up"
|
157
|
+
rescue SignalException
|
158
|
+
@logger.warn "Interrupted while waiting for applications to stop"
|
159
|
+
end
|
160
|
+
@socket_server.stop!
|
161
|
+
runnable_state.stopped!
|
162
|
+
end
|
163
|
+
|
164
|
+
def on_stopped
|
165
|
+
@logger.info "Stopped"
|
166
|
+
end
|
167
|
+
|
168
|
+
def load_config_from_yaml(file_name, target = {})
|
169
|
+
File.open(file_name, 'r:utf-8') do |file|
|
170
|
+
loaded = YAML.load(file)
|
171
|
+
|
172
|
+
target.merge!(loaded.except('include'))
|
173
|
+
|
174
|
+
if (includes = loaded['include'])
|
175
|
+
includes = [includes] unless includes.is_a?(Array)
|
176
|
+
includes.flat_map { |s| Dir.glob(s) }.uniq.each do |file_name|
|
177
|
+
load_config_from_yaml(file_name, target)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
target
|
182
|
+
rescue ConfigurationError
|
183
|
+
raise
|
184
|
+
rescue Errno::ENOENT
|
185
|
+
raise ConfigurationError, "Configuration file #{file_name} not found"
|
186
|
+
rescue => e
|
187
|
+
raise ConfigurationError, "Could not read configuration file: #{e}"
|
188
|
+
end
|
189
|
+
|
190
|
+
def expand_includes(configuration)
|
191
|
+
if configuration['include']
|
192
|
+
new_config = {}
|
193
|
+
new_config.merge!(configuration)
|
194
|
+
new_config.delete('include')
|
195
|
+
end
|
196
|
+
configuration
|
197
|
+
end
|
198
|
+
|
199
|
+
end
|
200
|
+
|
201
|
+
end
|