eye 0.5.1 → 0.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGES.md +6 -0
- data/README.md +37 -35
- data/examples/puma.eye +1 -1
- data/examples/test.eye +32 -30
- data/examples/unicorn.eye +1 -1
- data/lib/eye.rb +1 -1
- data/lib/eye/checker.rb +24 -1
- data/lib/eye/checker/cpu.rb +4 -14
- data/lib/eye/checker/cputime.rb +2 -12
- data/lib/eye/checker/file_ctime.rb +2 -3
- data/lib/eye/checker/file_size.rb +8 -8
- data/lib/eye/checker/http.rb +2 -2
- data/lib/eye/checker/memory.rb +4 -14
- data/lib/eye/checker/runtime.rb +2 -12
- data/lib/eye/checker/socket.rb +1 -1
- data/lib/eye/cli.rb +6 -0
- data/lib/eye/cli/commands.rb +2 -3
- data/lib/eye/cli/render.rb +4 -4
- data/lib/eye/cli/server.rb +4 -4
- data/lib/eye/controller.rb +1 -1
- data/lib/eye/controller/load.rb +14 -13
- data/lib/eye/controller/send_command.rb +4 -4
- data/lib/eye/controller/status.rb +2 -1
- data/lib/eye/dsl.rb +1 -1
- data/lib/eye/dsl/child_process_opts.rb +2 -2
- data/lib/eye/dsl/opts.rb +5 -1
- data/lib/eye/dsl/validation.rb +2 -2
- data/lib/eye/group/chain.rb +2 -2
- data/lib/eye/notify.rb +3 -3
- data/lib/eye/process.rb +7 -7
- data/lib/eye/process/children.rb +60 -0
- data/lib/eye/process/commands.rb +40 -37
- data/lib/eye/process/config.rb +5 -5
- data/lib/eye/process/controller.rb +8 -8
- data/lib/eye/process/data.rb +4 -4
- data/lib/eye/process/monitor.rb +17 -17
- data/lib/eye/process/scheduler.rb +1 -1
- data/lib/eye/process/states.rb +3 -3
- data/lib/eye/process/system.rb +3 -3
- data/lib/eye/process/validate.rb +1 -1
- data/lib/eye/process/watchers.rb +6 -6
- data/lib/eye/server.rb +1 -1
- data/lib/eye/system.rb +4 -4
- data/lib/eye/system_resources.rb +3 -3
- data/lib/eye/trigger.rb +4 -6
- data/lib/eye/trigger/flapping.rb +2 -2
- data/lib/eye/trigger/stop_children.rb +14 -0
- metadata +4 -4
- data/lib/eye/process/child.rb +0 -60
- data/lib/eye/trigger/stop_childs.rb +0 -10
@@ -0,0 +1,60 @@
|
|
1
|
+
module Eye::Process::Children
|
2
|
+
|
3
|
+
def add_children
|
4
|
+
add_or_update_children
|
5
|
+
end
|
6
|
+
|
7
|
+
def add_or_update_children
|
8
|
+
return unless self[:monitor_children]
|
9
|
+
return unless self.up?
|
10
|
+
return if @updating_children
|
11
|
+
@updating_children = true
|
12
|
+
|
13
|
+
unless self.pid
|
14
|
+
warn "can't add children; pid not set"
|
15
|
+
return
|
16
|
+
end
|
17
|
+
|
18
|
+
now_children = Eye::SystemResources.children(self.pid)
|
19
|
+
new_children = []
|
20
|
+
exist_children = []
|
21
|
+
|
22
|
+
now_children.each do |child_pid|
|
23
|
+
if self.children[child_pid]
|
24
|
+
exist_children << child_pid
|
25
|
+
else
|
26
|
+
new_children << child_pid
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
removed_children = self.children.keys - now_children
|
31
|
+
|
32
|
+
if new_children.present?
|
33
|
+
new_children.each do |child_pid|
|
34
|
+
self.children[child_pid] = Eye::ChildProcess.new(child_pid, self[:monitor_children], logger.prefix)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if removed_children.present?
|
39
|
+
removed_children.each{|child_pid| remove_child(child_pid) }
|
40
|
+
end
|
41
|
+
|
42
|
+
h = {:new => new_children.size, :removed => removed_children.size, :exists => exist_children.size }
|
43
|
+
debug "children info: #{ h.inspect }"
|
44
|
+
|
45
|
+
@updating_children = false
|
46
|
+
h
|
47
|
+
end
|
48
|
+
|
49
|
+
def remove_children
|
50
|
+
if children.present?
|
51
|
+
children.keys.each{|child_pid| remove_child(child_pid) }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def remove_child(child_pid)
|
56
|
+
child = self.children.delete(child_pid)
|
57
|
+
child.destroy if child && child.alive?
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/lib/eye/process/commands.rb
CHANGED
@@ -6,7 +6,7 @@ module Eye::Process::Commands
|
|
6
6
|
switch :starting
|
7
7
|
|
8
8
|
unless self[:start_command]
|
9
|
-
warn 'no
|
9
|
+
warn 'no :start_command found, unmonitoring'
|
10
10
|
switch :unmonitoring, Eye::Reason.new(:no_start_command)
|
11
11
|
return :no_start_command
|
12
12
|
end
|
@@ -14,12 +14,13 @@ module Eye::Process::Commands
|
|
14
14
|
result = self[:daemonize] ? daemonize_process : execute_process
|
15
15
|
|
16
16
|
if !result[:error]
|
17
|
-
debug "process
|
17
|
+
debug "process <#{self.pid}> started successfully"
|
18
18
|
switch :started
|
19
19
|
else
|
20
|
-
error "process
|
21
|
-
|
22
|
-
|
20
|
+
error "process <#{self.pid}> failed to start (#{result[:error].inspect})"
|
21
|
+
|
22
|
+
if process_really_running?
|
23
|
+
warn "killing <#{self.pid}> due to error"
|
23
24
|
send_signal(:KILL)
|
24
25
|
sleep 0.2 # little grace
|
25
26
|
end
|
@@ -43,8 +44,8 @@ module Eye::Process::Commands
|
|
43
44
|
|
44
45
|
kill_process
|
45
46
|
|
46
|
-
if
|
47
|
-
warn
|
47
|
+
if process_really_running?
|
48
|
+
warn "process <#{self.pid}> was not stopped; try checking your command/signals or tuning the stop_timeout/stop_grace values"
|
48
49
|
|
49
50
|
switch :unmonitoring, Eye::Reason.new(:'not stopped (soft command)')
|
50
51
|
nil
|
@@ -88,20 +89,21 @@ private
|
|
88
89
|
|
89
90
|
def kill_process
|
90
91
|
unless self.pid
|
91
|
-
error '
|
92
|
+
error 'cannot stop a process without a pid'
|
92
93
|
return
|
93
94
|
end
|
94
95
|
|
95
96
|
if self[:stop_command]
|
96
97
|
cmd = prepare_command(self[:stop_command])
|
97
|
-
res = execute(cmd, config.merge(:timeout => self[:stop_timeout]))
|
98
98
|
info "executing: `#{cmd}` with stop_timeout: #{self[:stop_timeout].to_f}s and stop_grace: #{self[:stop_grace].to_f}s"
|
99
|
+
res = execute(cmd, config.merge(:timeout => self[:stop_timeout]))
|
99
100
|
|
100
101
|
if res[:error]
|
101
|
-
error "raised with #{res[:error].inspect}"
|
102
102
|
|
103
103
|
if res[:error].class == Timeout::Error
|
104
|
-
error
|
104
|
+
error "stop_command failed with #{res[:error].inspect}; try tuning the stop_timeout value"
|
105
|
+
else
|
106
|
+
error "stop_command failed with #{res[:error].inspect}"
|
105
107
|
end
|
106
108
|
end
|
107
109
|
|
@@ -118,7 +120,7 @@ private
|
|
118
120
|
delay = stop_signals.shift
|
119
121
|
signal = stop_signals.shift
|
120
122
|
|
121
|
-
if wait_for_condition(delay.to_f, 0.3){ !
|
123
|
+
if wait_for_condition(delay.to_f, 0.3){ !process_really_running? }
|
122
124
|
info 'has terminated'
|
123
125
|
break
|
124
126
|
end
|
@@ -135,8 +137,8 @@ private
|
|
135
137
|
sleep_grace(:stop_grace)
|
136
138
|
|
137
139
|
# if process not die here, by default we force kill it
|
138
|
-
if
|
139
|
-
warn "process not die after TERM
|
140
|
+
if process_really_running?
|
141
|
+
warn "process <#{self.pid}> did not die after TERM, sending KILL"
|
140
142
|
send_signal(:KILL)
|
141
143
|
sleep 0.1 # little grace
|
142
144
|
end
|
@@ -145,7 +147,7 @@ private
|
|
145
147
|
|
146
148
|
def execute_restart_command
|
147
149
|
unless self.pid
|
148
|
-
error '
|
150
|
+
error 'cannot restart a process without a pid'
|
149
151
|
return
|
150
152
|
end
|
151
153
|
|
@@ -155,10 +157,11 @@ private
|
|
155
157
|
res = execute(cmd, config.merge(:timeout => self[:restart_timeout]))
|
156
158
|
|
157
159
|
if res[:error]
|
158
|
-
error "restart raised with #{res[:error].inspect}"
|
159
160
|
|
160
161
|
if res[:error].class == Timeout::Error
|
161
|
-
error
|
162
|
+
error "restart_command failed with #{res[:error].inspect}; try tuning the restart_timeout value"
|
163
|
+
else
|
164
|
+
error "restart_command failed with #{res[:error].inspect}"
|
162
165
|
end
|
163
166
|
end
|
164
167
|
|
@@ -170,13 +173,14 @@ private
|
|
170
173
|
res = Eye::System.daemonize(self[:start_command], config)
|
171
174
|
start_time = Time.now - time_before
|
172
175
|
|
173
|
-
info "daemonizing: `#{self[:start_command]}` with start_grace: #{self[:start_grace].to_f}s, env: #{self[:environment].inspect}, working_dir: #{self[:working_dir]}
|
176
|
+
info "daemonizing: `#{self[:start_command]}` with start_grace: #{self[:start_grace].to_f}s, env: #{self[:environment].inspect}, working_dir: #{self[:working_dir]}, <#{res[:pid]}>"
|
174
177
|
|
175
178
|
if res[:error]
|
179
|
+
|
176
180
|
if res[:error].message == 'Permission denied - open'
|
177
|
-
error "
|
181
|
+
error "daemonize failed with #{res[:error].inspect}; make sure #{[self[:stdout], self[:stderr]]} are writable"
|
178
182
|
else
|
179
|
-
error "
|
183
|
+
error "daemonize failed with #{res[:error].inspect}"
|
180
184
|
end
|
181
185
|
|
182
186
|
return {:error => res[:error].inspect}
|
@@ -185,15 +189,15 @@ private
|
|
185
189
|
self.pid = res[:pid]
|
186
190
|
|
187
191
|
unless self.pid
|
188
|
-
error '
|
192
|
+
error 'no pid was returned'
|
189
193
|
return {:error => :empty_pid}
|
190
194
|
end
|
191
195
|
|
192
196
|
sleep_grace(:start_grace)
|
193
197
|
|
194
|
-
unless
|
195
|
-
error "process
|
196
|
-
return {:error => :
|
198
|
+
unless process_really_running?
|
199
|
+
error "process <#{self.pid}> not found, it may have crashed (#{check_logs_str})"
|
200
|
+
return {:error => :not_really_running}
|
197
201
|
end
|
198
202
|
|
199
203
|
unless failsafe_save_pid
|
@@ -211,14 +215,13 @@ private
|
|
211
215
|
start_time = Time.now - time_before
|
212
216
|
|
213
217
|
if res[:error]
|
218
|
+
|
214
219
|
if res[:error].message == 'Permission denied - open'
|
215
|
-
error "
|
220
|
+
error "execution failed with #{res[:error].inspect}; ensure that #{[self[:stdout], self[:stderr]]} are writable"
|
221
|
+
elsif res[:error].class == Timeout::Error
|
222
|
+
error "execution failed with #{res[:error].inspect}; try increasing the start_timeout value (the current value of #{self[:start_timeout]}s seems too short)"
|
216
223
|
else
|
217
|
-
error "
|
218
|
-
end
|
219
|
-
|
220
|
-
if res[:error].class == Timeout::Error
|
221
|
-
error "try to increase start_timeout interval (current #{self[:start_timeout]} seems too small, for process self-daemonization)"
|
224
|
+
error "execution failed with #{res[:error].inspect}"
|
222
225
|
end
|
223
226
|
|
224
227
|
return {:error => res[:error].inspect}
|
@@ -227,25 +230,25 @@ private
|
|
227
230
|
sleep_grace(:start_grace)
|
228
231
|
|
229
232
|
unless set_pid_from_file
|
230
|
-
error "exit status #{res[:exitstatus]}, pid_file(#{self[:pid_file_ex]})
|
233
|
+
error "exit status #{res[:exitstatus]}, pid_file (#{self[:pid_file_ex]}) did not appear within the start_grace period (#{self[:start_grace].to_f}s); check your start_command, or tune the start_grace value (eye expect process to create pid_file in self-daemonization mode)"
|
231
234
|
return {:error => :pid_not_found}
|
232
235
|
end
|
233
236
|
|
234
|
-
unless
|
235
|
-
error "exit status #{res[:exitstatus]}, process
|
236
|
-
return {:error => :
|
237
|
+
unless process_really_running?
|
238
|
+
error "exit status #{res[:exitstatus]}, process <#{self.pid}> (from #{self[:pid_file_ex]}) was not found; ensure that the pid_file is being updated correctly (#{check_logs_str})"
|
239
|
+
return {:error => :not_really_running}
|
237
240
|
end
|
238
241
|
|
239
242
|
res[:pid] = self.pid
|
240
|
-
info "
|
243
|
+
info "exit status #{res[:exitstatus]}, process <#{res[:pid]}> (from #{self[:pid_file_ex]}) was found"
|
241
244
|
res
|
242
245
|
end
|
243
246
|
|
244
247
|
def check_logs_str
|
245
248
|
if !self[:stdout] && !self[:stderr]
|
246
|
-
'
|
249
|
+
'you may want to configure stdout/err/all logs for this process'
|
247
250
|
else
|
248
|
-
"check
|
251
|
+
"you should check the process logs #{[self[:stdout], self[:stderr]]}"
|
249
252
|
end
|
250
253
|
end
|
251
254
|
|
data/lib/eye/process/config.rb
CHANGED
@@ -15,7 +15,7 @@ module Eye::Process::Config
|
|
15
15
|
:daemonize => false,
|
16
16
|
:auto_start => true, # auto start on monitor action
|
17
17
|
|
18
|
-
:
|
18
|
+
:children_update_period => 30.seconds,
|
19
19
|
:clear_pid => true # by default clear pid on stop
|
20
20
|
}
|
21
21
|
|
@@ -24,7 +24,7 @@ module Eye::Process::Config
|
|
24
24
|
h[:pid_file_ex] = Eye::System.normalized_file(h[:pid_file], h[:working_dir]) if h[:pid_file]
|
25
25
|
h[:checks] = {} if h[:checks].blank?
|
26
26
|
h[:triggers] = {} if h[:triggers].blank?
|
27
|
-
h[:
|
27
|
+
h[:children_update_period] = h[:monitor_children][:children_update_period] if h[:monitor_children] && h[:monitor_children][:children_update_period]
|
28
28
|
|
29
29
|
# check speedy flapping by default
|
30
30
|
if h[:triggers].blank? || !h[:triggers][:flapping]
|
@@ -52,7 +52,7 @@ module Eye::Process::Config
|
|
52
52
|
@full_name = nil
|
53
53
|
@logger = nil
|
54
54
|
|
55
|
-
debug "
|
55
|
+
debug "updating config to: #{@config.inspect}"
|
56
56
|
|
57
57
|
remove_triggers
|
58
58
|
add_triggers
|
@@ -60,10 +60,10 @@ module Eye::Process::Config
|
|
60
60
|
if up?
|
61
61
|
# rebuild checks for this process
|
62
62
|
remove_watchers
|
63
|
-
|
63
|
+
remove_children
|
64
64
|
|
65
65
|
add_watchers
|
66
|
-
|
66
|
+
add_children
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
@@ -6,16 +6,16 @@ module Eye::Process::Controller
|
|
6
6
|
|
7
7
|
def start
|
8
8
|
res = if set_pid_from_file
|
9
|
-
if
|
10
|
-
info "process
|
9
|
+
if process_really_running?
|
10
|
+
info "process <#{self.pid}> from pid_file is already running"
|
11
11
|
switch :already_running
|
12
12
|
:ok
|
13
13
|
else
|
14
|
-
info "pid_file found, but process
|
14
|
+
info "pid_file found, but process <#{self.pid}> is down, starting..."
|
15
15
|
start_process
|
16
16
|
end
|
17
17
|
else
|
18
|
-
info 'pid_file not found,
|
18
|
+
info 'pid_file not found, starting...'
|
19
19
|
start_process
|
20
20
|
end
|
21
21
|
|
@@ -40,10 +40,10 @@ module Eye::Process::Controller
|
|
40
40
|
start
|
41
41
|
else
|
42
42
|
if try_update_pid_from_file
|
43
|
-
info "process
|
43
|
+
info "process <#{self.pid}> from pid_file is already running"
|
44
44
|
switch :already_running
|
45
45
|
else
|
46
|
-
warn 'process not found,
|
46
|
+
warn 'process not found, unmonitoring'
|
47
47
|
schedule :unmonitor, Eye::Reason.new(:'not found')
|
48
48
|
end
|
49
49
|
end
|
@@ -60,7 +60,7 @@ module Eye::Process::Controller
|
|
60
60
|
end
|
61
61
|
|
62
62
|
remove_watchers
|
63
|
-
|
63
|
+
remove_children
|
64
64
|
remove_triggers
|
65
65
|
|
66
66
|
terminate
|
@@ -70,4 +70,4 @@ module Eye::Process::Controller
|
|
70
70
|
send_signal(sig) if self.pid
|
71
71
|
end
|
72
72
|
|
73
|
-
end
|
73
|
+
end
|
data/lib/eye/process/data.rb
CHANGED
@@ -19,10 +19,10 @@ module Eye::Process::Data
|
|
19
19
|
def status_data(debug = false)
|
20
20
|
p_st = self_status_data(debug)
|
21
21
|
|
22
|
-
if
|
23
|
-
p_st.merge(:subtree => Eye::Utils::AliveArray.new(
|
22
|
+
if children.present?
|
23
|
+
p_st.merge(:subtree => Eye::Utils::AliveArray.new(children.values).map{|c| c.status_data(debug) } )
|
24
24
|
elsif self[:monitor_children] && self.up?
|
25
|
-
p_st.merge(:subtree => [{name: '=loading
|
25
|
+
p_st.merge(:subtree => [{name: '=loading children='}])
|
26
26
|
else
|
27
27
|
# common state
|
28
28
|
p_st
|
@@ -51,7 +51,7 @@ module Eye::Process::Data
|
|
51
51
|
|
52
52
|
def sub_object?(obj)
|
53
53
|
return false if self.class == Eye::ChildProcess
|
54
|
-
self.
|
54
|
+
self.children.each { |_, child| return true if child == obj }
|
55
55
|
false
|
56
56
|
end
|
57
57
|
|
data/lib/eye/process/monitor.rb
CHANGED
@@ -3,30 +3,30 @@ module Eye::Process::Monitor
|
|
3
3
|
private
|
4
4
|
|
5
5
|
def check_alive_with_refresh_pid_if_needed
|
6
|
-
if
|
6
|
+
if process_really_running?
|
7
7
|
return true
|
8
8
|
|
9
9
|
else
|
10
|
-
warn 'process not
|
10
|
+
warn 'process not really running'
|
11
11
|
try_update_pid_from_file
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
15
|
def try_update_pid_from_file
|
16
|
-
# if pid file was
|
16
|
+
# if pid file was rewritten
|
17
17
|
newpid = load_pid_from_file
|
18
18
|
if newpid != self.pid
|
19
|
-
info "process changed pid to
|
19
|
+
info "process <#{self.pid}> changed pid to <#{newpid}>, updating..." if self.pid
|
20
20
|
self.pid = newpid
|
21
21
|
|
22
|
-
if
|
22
|
+
if process_really_running?
|
23
23
|
return true
|
24
24
|
else
|
25
|
-
warn "process
|
25
|
+
warn "process <#{newpid}> was not found"
|
26
26
|
return false
|
27
27
|
end
|
28
28
|
else
|
29
|
-
debug 'process not found'
|
29
|
+
debug 'process was not found'
|
30
30
|
return false
|
31
31
|
end
|
32
32
|
end
|
@@ -37,8 +37,8 @@ private
|
|
37
37
|
if up?
|
38
38
|
|
39
39
|
# check that process runned
|
40
|
-
unless
|
41
|
-
warn "check_alive: process
|
40
|
+
unless process_really_running?
|
41
|
+
warn "check_alive: process <#{self.pid}> not found"
|
42
42
|
notify :info, 'crashed!'
|
43
43
|
clear_pid_file if control_pid? && self.pid && load_pid_from_file == self.pid
|
44
44
|
|
@@ -48,25 +48,25 @@ private
|
|
48
48
|
ppid = failsafe_load_pid
|
49
49
|
|
50
50
|
if ppid != self.pid
|
51
|
-
msg = "check_alive: pid_file(#{self[:pid_file]})
|
51
|
+
msg = "check_alive: pid_file (#{self[:pid_file]}) changed by itself (<#{self.pid}> => <#{ppid}>)"
|
52
52
|
if control_pid?
|
53
|
-
msg += ",
|
53
|
+
msg += ", reverting to <#{self.pid}> (the pid_file is controlled by eye)"
|
54
54
|
unless failsafe_save_pid
|
55
|
-
msg +=
|
55
|
+
msg += ", pid_file write failed! O_o"
|
56
56
|
end
|
57
57
|
else
|
58
58
|
if ppid == nil
|
59
|
-
msg +=
|
59
|
+
msg += ", reverting to <#{self.pid}> (the pid_file is empty)"
|
60
60
|
unless failsafe_save_pid
|
61
|
-
msg +=
|
61
|
+
msg += ", pid_file write failed! O_o"
|
62
62
|
end
|
63
63
|
elsif (Time.now - pid_file_ctime > REWRITE_FACKUP_PIDFILE_PERIOD)
|
64
|
-
msg += "
|
64
|
+
msg += " over #{REWRITE_FACKUP_PIDFILE_PERIOD}s ago, reverting to <#{self.pid}>"
|
65
65
|
unless failsafe_save_pid
|
66
|
-
msg +=
|
66
|
+
msg += ", pid_file write failed! O_o"
|
67
67
|
end
|
68
68
|
else
|
69
|
-
msg += ',
|
69
|
+
msg += ', ignoring self-managed pid change'
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|