eye 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGES.md +6 -0
- data/README.md +37 -35
- data/examples/puma.eye +1 -1
- data/examples/test.eye +32 -30
- data/examples/unicorn.eye +1 -1
- data/lib/eye.rb +1 -1
- data/lib/eye/checker.rb +24 -1
- data/lib/eye/checker/cpu.rb +4 -14
- data/lib/eye/checker/cputime.rb +2 -12
- data/lib/eye/checker/file_ctime.rb +2 -3
- data/lib/eye/checker/file_size.rb +8 -8
- data/lib/eye/checker/http.rb +2 -2
- data/lib/eye/checker/memory.rb +4 -14
- data/lib/eye/checker/runtime.rb +2 -12
- data/lib/eye/checker/socket.rb +1 -1
- data/lib/eye/cli.rb +6 -0
- data/lib/eye/cli/commands.rb +2 -3
- data/lib/eye/cli/render.rb +4 -4
- data/lib/eye/cli/server.rb +4 -4
- data/lib/eye/controller.rb +1 -1
- data/lib/eye/controller/load.rb +14 -13
- data/lib/eye/controller/send_command.rb +4 -4
- data/lib/eye/controller/status.rb +2 -1
- data/lib/eye/dsl.rb +1 -1
- data/lib/eye/dsl/child_process_opts.rb +2 -2
- data/lib/eye/dsl/opts.rb +5 -1
- data/lib/eye/dsl/validation.rb +2 -2
- data/lib/eye/group/chain.rb +2 -2
- data/lib/eye/notify.rb +3 -3
- data/lib/eye/process.rb +7 -7
- data/lib/eye/process/children.rb +60 -0
- data/lib/eye/process/commands.rb +40 -37
- data/lib/eye/process/config.rb +5 -5
- data/lib/eye/process/controller.rb +8 -8
- data/lib/eye/process/data.rb +4 -4
- data/lib/eye/process/monitor.rb +17 -17
- data/lib/eye/process/scheduler.rb +1 -1
- data/lib/eye/process/states.rb +3 -3
- data/lib/eye/process/system.rb +3 -3
- data/lib/eye/process/validate.rb +1 -1
- data/lib/eye/process/watchers.rb +6 -6
- data/lib/eye/server.rb +1 -1
- data/lib/eye/system.rb +4 -4
- data/lib/eye/system_resources.rb +3 -3
- data/lib/eye/trigger.rb +4 -6
- data/lib/eye/trigger/flapping.rb +2 -2
- data/lib/eye/trigger/stop_children.rb +14 -0
- metadata +4 -4
- data/lib/eye/process/child.rb +0 -60
- data/lib/eye/trigger/stop_childs.rb +0 -10
@@ -0,0 +1,60 @@
|
|
1
|
+
module Eye::Process::Children
|
2
|
+
|
3
|
+
def add_children
|
4
|
+
add_or_update_children
|
5
|
+
end
|
6
|
+
|
7
|
+
def add_or_update_children
|
8
|
+
return unless self[:monitor_children]
|
9
|
+
return unless self.up?
|
10
|
+
return if @updating_children
|
11
|
+
@updating_children = true
|
12
|
+
|
13
|
+
unless self.pid
|
14
|
+
warn "can't add children; pid not set"
|
15
|
+
return
|
16
|
+
end
|
17
|
+
|
18
|
+
now_children = Eye::SystemResources.children(self.pid)
|
19
|
+
new_children = []
|
20
|
+
exist_children = []
|
21
|
+
|
22
|
+
now_children.each do |child_pid|
|
23
|
+
if self.children[child_pid]
|
24
|
+
exist_children << child_pid
|
25
|
+
else
|
26
|
+
new_children << child_pid
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
removed_children = self.children.keys - now_children
|
31
|
+
|
32
|
+
if new_children.present?
|
33
|
+
new_children.each do |child_pid|
|
34
|
+
self.children[child_pid] = Eye::ChildProcess.new(child_pid, self[:monitor_children], logger.prefix)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
if removed_children.present?
|
39
|
+
removed_children.each{|child_pid| remove_child(child_pid) }
|
40
|
+
end
|
41
|
+
|
42
|
+
h = {:new => new_children.size, :removed => removed_children.size, :exists => exist_children.size }
|
43
|
+
debug "children info: #{ h.inspect }"
|
44
|
+
|
45
|
+
@updating_children = false
|
46
|
+
h
|
47
|
+
end
|
48
|
+
|
49
|
+
def remove_children
|
50
|
+
if children.present?
|
51
|
+
children.keys.each{|child_pid| remove_child(child_pid) }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def remove_child(child_pid)
|
56
|
+
child = self.children.delete(child_pid)
|
57
|
+
child.destroy if child && child.alive?
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
data/lib/eye/process/commands.rb
CHANGED
@@ -6,7 +6,7 @@ module Eye::Process::Commands
|
|
6
6
|
switch :starting
|
7
7
|
|
8
8
|
unless self[:start_command]
|
9
|
-
warn 'no
|
9
|
+
warn 'no :start_command found, unmonitoring'
|
10
10
|
switch :unmonitoring, Eye::Reason.new(:no_start_command)
|
11
11
|
return :no_start_command
|
12
12
|
end
|
@@ -14,12 +14,13 @@ module Eye::Process::Commands
|
|
14
14
|
result = self[:daemonize] ? daemonize_process : execute_process
|
15
15
|
|
16
16
|
if !result[:error]
|
17
|
-
debug "process
|
17
|
+
debug "process <#{self.pid}> started successfully"
|
18
18
|
switch :started
|
19
19
|
else
|
20
|
-
error "process
|
21
|
-
|
22
|
-
|
20
|
+
error "process <#{self.pid}> failed to start (#{result[:error].inspect})"
|
21
|
+
|
22
|
+
if process_really_running?
|
23
|
+
warn "killing <#{self.pid}> due to error"
|
23
24
|
send_signal(:KILL)
|
24
25
|
sleep 0.2 # little grace
|
25
26
|
end
|
@@ -43,8 +44,8 @@ module Eye::Process::Commands
|
|
43
44
|
|
44
45
|
kill_process
|
45
46
|
|
46
|
-
if
|
47
|
-
warn
|
47
|
+
if process_really_running?
|
48
|
+
warn "process <#{self.pid}> was not stopped; try checking your command/signals or tuning the stop_timeout/stop_grace values"
|
48
49
|
|
49
50
|
switch :unmonitoring, Eye::Reason.new(:'not stopped (soft command)')
|
50
51
|
nil
|
@@ -88,20 +89,21 @@ private
|
|
88
89
|
|
89
90
|
def kill_process
|
90
91
|
unless self.pid
|
91
|
-
error '
|
92
|
+
error 'cannot stop a process without a pid'
|
92
93
|
return
|
93
94
|
end
|
94
95
|
|
95
96
|
if self[:stop_command]
|
96
97
|
cmd = prepare_command(self[:stop_command])
|
97
|
-
res = execute(cmd, config.merge(:timeout => self[:stop_timeout]))
|
98
98
|
info "executing: `#{cmd}` with stop_timeout: #{self[:stop_timeout].to_f}s and stop_grace: #{self[:stop_grace].to_f}s"
|
99
|
+
res = execute(cmd, config.merge(:timeout => self[:stop_timeout]))
|
99
100
|
|
100
101
|
if res[:error]
|
101
|
-
error "raised with #{res[:error].inspect}"
|
102
102
|
|
103
103
|
if res[:error].class == Timeout::Error
|
104
|
-
error
|
104
|
+
error "stop_command failed with #{res[:error].inspect}; try tuning the stop_timeout value"
|
105
|
+
else
|
106
|
+
error "stop_command failed with #{res[:error].inspect}"
|
105
107
|
end
|
106
108
|
end
|
107
109
|
|
@@ -118,7 +120,7 @@ private
|
|
118
120
|
delay = stop_signals.shift
|
119
121
|
signal = stop_signals.shift
|
120
122
|
|
121
|
-
if wait_for_condition(delay.to_f, 0.3){ !
|
123
|
+
if wait_for_condition(delay.to_f, 0.3){ !process_really_running? }
|
122
124
|
info 'has terminated'
|
123
125
|
break
|
124
126
|
end
|
@@ -135,8 +137,8 @@ private
|
|
135
137
|
sleep_grace(:stop_grace)
|
136
138
|
|
137
139
|
# if process not die here, by default we force kill it
|
138
|
-
if
|
139
|
-
warn "process not die after TERM
|
140
|
+
if process_really_running?
|
141
|
+
warn "process <#{self.pid}> did not die after TERM, sending KILL"
|
140
142
|
send_signal(:KILL)
|
141
143
|
sleep 0.1 # little grace
|
142
144
|
end
|
@@ -145,7 +147,7 @@ private
|
|
145
147
|
|
146
148
|
def execute_restart_command
|
147
149
|
unless self.pid
|
148
|
-
error '
|
150
|
+
error 'cannot restart a process without a pid'
|
149
151
|
return
|
150
152
|
end
|
151
153
|
|
@@ -155,10 +157,11 @@ private
|
|
155
157
|
res = execute(cmd, config.merge(:timeout => self[:restart_timeout]))
|
156
158
|
|
157
159
|
if res[:error]
|
158
|
-
error "restart raised with #{res[:error].inspect}"
|
159
160
|
|
160
161
|
if res[:error].class == Timeout::Error
|
161
|
-
error
|
162
|
+
error "restart_command failed with #{res[:error].inspect}; try tuning the restart_timeout value"
|
163
|
+
else
|
164
|
+
error "restart_command failed with #{res[:error].inspect}"
|
162
165
|
end
|
163
166
|
end
|
164
167
|
|
@@ -170,13 +173,14 @@ private
|
|
170
173
|
res = Eye::System.daemonize(self[:start_command], config)
|
171
174
|
start_time = Time.now - time_before
|
172
175
|
|
173
|
-
info "daemonizing: `#{self[:start_command]}` with start_grace: #{self[:start_grace].to_f}s, env: #{self[:environment].inspect}, working_dir: #{self[:working_dir]}
|
176
|
+
info "daemonizing: `#{self[:start_command]}` with start_grace: #{self[:start_grace].to_f}s, env: #{self[:environment].inspect}, working_dir: #{self[:working_dir]}, <#{res[:pid]}>"
|
174
177
|
|
175
178
|
if res[:error]
|
179
|
+
|
176
180
|
if res[:error].message == 'Permission denied - open'
|
177
|
-
error "
|
181
|
+
error "daemonize failed with #{res[:error].inspect}; make sure #{[self[:stdout], self[:stderr]]} are writable"
|
178
182
|
else
|
179
|
-
error "
|
183
|
+
error "daemonize failed with #{res[:error].inspect}"
|
180
184
|
end
|
181
185
|
|
182
186
|
return {:error => res[:error].inspect}
|
@@ -185,15 +189,15 @@ private
|
|
185
189
|
self.pid = res[:pid]
|
186
190
|
|
187
191
|
unless self.pid
|
188
|
-
error '
|
192
|
+
error 'no pid was returned'
|
189
193
|
return {:error => :empty_pid}
|
190
194
|
end
|
191
195
|
|
192
196
|
sleep_grace(:start_grace)
|
193
197
|
|
194
|
-
unless
|
195
|
-
error "process
|
196
|
-
return {:error => :
|
198
|
+
unless process_really_running?
|
199
|
+
error "process <#{self.pid}> not found, it may have crashed (#{check_logs_str})"
|
200
|
+
return {:error => :not_really_running}
|
197
201
|
end
|
198
202
|
|
199
203
|
unless failsafe_save_pid
|
@@ -211,14 +215,13 @@ private
|
|
211
215
|
start_time = Time.now - time_before
|
212
216
|
|
213
217
|
if res[:error]
|
218
|
+
|
214
219
|
if res[:error].message == 'Permission denied - open'
|
215
|
-
error "
|
220
|
+
error "execution failed with #{res[:error].inspect}; ensure that #{[self[:stdout], self[:stderr]]} are writable"
|
221
|
+
elsif res[:error].class == Timeout::Error
|
222
|
+
error "execution failed with #{res[:error].inspect}; try increasing the start_timeout value (the current value of #{self[:start_timeout]}s seems too short)"
|
216
223
|
else
|
217
|
-
error "
|
218
|
-
end
|
219
|
-
|
220
|
-
if res[:error].class == Timeout::Error
|
221
|
-
error "try to increase start_timeout interval (current #{self[:start_timeout]} seems too small, for process self-daemonization)"
|
224
|
+
error "execution failed with #{res[:error].inspect}"
|
222
225
|
end
|
223
226
|
|
224
227
|
return {:error => res[:error].inspect}
|
@@ -227,25 +230,25 @@ private
|
|
227
230
|
sleep_grace(:start_grace)
|
228
231
|
|
229
232
|
unless set_pid_from_file
|
230
|
-
error "exit status #{res[:exitstatus]}, pid_file(#{self[:pid_file_ex]})
|
233
|
+
error "exit status #{res[:exitstatus]}, pid_file (#{self[:pid_file_ex]}) did not appear within the start_grace period (#{self[:start_grace].to_f}s); check your start_command, or tune the start_grace value (eye expect process to create pid_file in self-daemonization mode)"
|
231
234
|
return {:error => :pid_not_found}
|
232
235
|
end
|
233
236
|
|
234
|
-
unless
|
235
|
-
error "exit status #{res[:exitstatus]}, process
|
236
|
-
return {:error => :
|
237
|
+
unless process_really_running?
|
238
|
+
error "exit status #{res[:exitstatus]}, process <#{self.pid}> (from #{self[:pid_file_ex]}) was not found; ensure that the pid_file is being updated correctly (#{check_logs_str})"
|
239
|
+
return {:error => :not_really_running}
|
237
240
|
end
|
238
241
|
|
239
242
|
res[:pid] = self.pid
|
240
|
-
info "
|
243
|
+
info "exit status #{res[:exitstatus]}, process <#{res[:pid]}> (from #{self[:pid_file_ex]}) was found"
|
241
244
|
res
|
242
245
|
end
|
243
246
|
|
244
247
|
def check_logs_str
|
245
248
|
if !self[:stdout] && !self[:stderr]
|
246
|
-
'
|
249
|
+
'you may want to configure stdout/err/all logs for this process'
|
247
250
|
else
|
248
|
-
"check
|
251
|
+
"you should check the process logs #{[self[:stdout], self[:stderr]]}"
|
249
252
|
end
|
250
253
|
end
|
251
254
|
|
data/lib/eye/process/config.rb
CHANGED
@@ -15,7 +15,7 @@ module Eye::Process::Config
|
|
15
15
|
:daemonize => false,
|
16
16
|
:auto_start => true, # auto start on monitor action
|
17
17
|
|
18
|
-
:
|
18
|
+
:children_update_period => 30.seconds,
|
19
19
|
:clear_pid => true # by default clear pid on stop
|
20
20
|
}
|
21
21
|
|
@@ -24,7 +24,7 @@ module Eye::Process::Config
|
|
24
24
|
h[:pid_file_ex] = Eye::System.normalized_file(h[:pid_file], h[:working_dir]) if h[:pid_file]
|
25
25
|
h[:checks] = {} if h[:checks].blank?
|
26
26
|
h[:triggers] = {} if h[:triggers].blank?
|
27
|
-
h[:
|
27
|
+
h[:children_update_period] = h[:monitor_children][:children_update_period] if h[:monitor_children] && h[:monitor_children][:children_update_period]
|
28
28
|
|
29
29
|
# check speedy flapping by default
|
30
30
|
if h[:triggers].blank? || !h[:triggers][:flapping]
|
@@ -52,7 +52,7 @@ module Eye::Process::Config
|
|
52
52
|
@full_name = nil
|
53
53
|
@logger = nil
|
54
54
|
|
55
|
-
debug "
|
55
|
+
debug "updating config to: #{@config.inspect}"
|
56
56
|
|
57
57
|
remove_triggers
|
58
58
|
add_triggers
|
@@ -60,10 +60,10 @@ module Eye::Process::Config
|
|
60
60
|
if up?
|
61
61
|
# rebuild checks for this process
|
62
62
|
remove_watchers
|
63
|
-
|
63
|
+
remove_children
|
64
64
|
|
65
65
|
add_watchers
|
66
|
-
|
66
|
+
add_children
|
67
67
|
end
|
68
68
|
end
|
69
69
|
|
@@ -6,16 +6,16 @@ module Eye::Process::Controller
|
|
6
6
|
|
7
7
|
def start
|
8
8
|
res = if set_pid_from_file
|
9
|
-
if
|
10
|
-
info "process
|
9
|
+
if process_really_running?
|
10
|
+
info "process <#{self.pid}> from pid_file is already running"
|
11
11
|
switch :already_running
|
12
12
|
:ok
|
13
13
|
else
|
14
|
-
info "pid_file found, but process
|
14
|
+
info "pid_file found, but process <#{self.pid}> is down, starting..."
|
15
15
|
start_process
|
16
16
|
end
|
17
17
|
else
|
18
|
-
info 'pid_file not found,
|
18
|
+
info 'pid_file not found, starting...'
|
19
19
|
start_process
|
20
20
|
end
|
21
21
|
|
@@ -40,10 +40,10 @@ module Eye::Process::Controller
|
|
40
40
|
start
|
41
41
|
else
|
42
42
|
if try_update_pid_from_file
|
43
|
-
info "process
|
43
|
+
info "process <#{self.pid}> from pid_file is already running"
|
44
44
|
switch :already_running
|
45
45
|
else
|
46
|
-
warn 'process not found,
|
46
|
+
warn 'process not found, unmonitoring'
|
47
47
|
schedule :unmonitor, Eye::Reason.new(:'not found')
|
48
48
|
end
|
49
49
|
end
|
@@ -60,7 +60,7 @@ module Eye::Process::Controller
|
|
60
60
|
end
|
61
61
|
|
62
62
|
remove_watchers
|
63
|
-
|
63
|
+
remove_children
|
64
64
|
remove_triggers
|
65
65
|
|
66
66
|
terminate
|
@@ -70,4 +70,4 @@ module Eye::Process::Controller
|
|
70
70
|
send_signal(sig) if self.pid
|
71
71
|
end
|
72
72
|
|
73
|
-
end
|
73
|
+
end
|
data/lib/eye/process/data.rb
CHANGED
@@ -19,10 +19,10 @@ module Eye::Process::Data
|
|
19
19
|
def status_data(debug = false)
|
20
20
|
p_st = self_status_data(debug)
|
21
21
|
|
22
|
-
if
|
23
|
-
p_st.merge(:subtree => Eye::Utils::AliveArray.new(
|
22
|
+
if children.present?
|
23
|
+
p_st.merge(:subtree => Eye::Utils::AliveArray.new(children.values).map{|c| c.status_data(debug) } )
|
24
24
|
elsif self[:monitor_children] && self.up?
|
25
|
-
p_st.merge(:subtree => [{name: '=loading
|
25
|
+
p_st.merge(:subtree => [{name: '=loading children='}])
|
26
26
|
else
|
27
27
|
# common state
|
28
28
|
p_st
|
@@ -51,7 +51,7 @@ module Eye::Process::Data
|
|
51
51
|
|
52
52
|
def sub_object?(obj)
|
53
53
|
return false if self.class == Eye::ChildProcess
|
54
|
-
self.
|
54
|
+
self.children.each { |_, child| return true if child == obj }
|
55
55
|
false
|
56
56
|
end
|
57
57
|
|
data/lib/eye/process/monitor.rb
CHANGED
@@ -3,30 +3,30 @@ module Eye::Process::Monitor
|
|
3
3
|
private
|
4
4
|
|
5
5
|
def check_alive_with_refresh_pid_if_needed
|
6
|
-
if
|
6
|
+
if process_really_running?
|
7
7
|
return true
|
8
8
|
|
9
9
|
else
|
10
|
-
warn 'process not
|
10
|
+
warn 'process not really running'
|
11
11
|
try_update_pid_from_file
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
15
|
def try_update_pid_from_file
|
16
|
-
# if pid file was
|
16
|
+
# if pid file was rewritten
|
17
17
|
newpid = load_pid_from_file
|
18
18
|
if newpid != self.pid
|
19
|
-
info "process changed pid to
|
19
|
+
info "process <#{self.pid}> changed pid to <#{newpid}>, updating..." if self.pid
|
20
20
|
self.pid = newpid
|
21
21
|
|
22
|
-
if
|
22
|
+
if process_really_running?
|
23
23
|
return true
|
24
24
|
else
|
25
|
-
warn "process
|
25
|
+
warn "process <#{newpid}> was not found"
|
26
26
|
return false
|
27
27
|
end
|
28
28
|
else
|
29
|
-
debug 'process not found'
|
29
|
+
debug 'process was not found'
|
30
30
|
return false
|
31
31
|
end
|
32
32
|
end
|
@@ -37,8 +37,8 @@ private
|
|
37
37
|
if up?
|
38
38
|
|
39
39
|
# check that process runned
|
40
|
-
unless
|
41
|
-
warn "check_alive: process
|
40
|
+
unless process_really_running?
|
41
|
+
warn "check_alive: process <#{self.pid}> not found"
|
42
42
|
notify :info, 'crashed!'
|
43
43
|
clear_pid_file if control_pid? && self.pid && load_pid_from_file == self.pid
|
44
44
|
|
@@ -48,25 +48,25 @@ private
|
|
48
48
|
ppid = failsafe_load_pid
|
49
49
|
|
50
50
|
if ppid != self.pid
|
51
|
-
msg = "check_alive: pid_file(#{self[:pid_file]})
|
51
|
+
msg = "check_alive: pid_file (#{self[:pid_file]}) changed by itself (<#{self.pid}> => <#{ppid}>)"
|
52
52
|
if control_pid?
|
53
|
-
msg += ",
|
53
|
+
msg += ", reverting to <#{self.pid}> (the pid_file is controlled by eye)"
|
54
54
|
unless failsafe_save_pid
|
55
|
-
msg +=
|
55
|
+
msg += ", pid_file write failed! O_o"
|
56
56
|
end
|
57
57
|
else
|
58
58
|
if ppid == nil
|
59
|
-
msg +=
|
59
|
+
msg += ", reverting to <#{self.pid}> (the pid_file is empty)"
|
60
60
|
unless failsafe_save_pid
|
61
|
-
msg +=
|
61
|
+
msg += ", pid_file write failed! O_o"
|
62
62
|
end
|
63
63
|
elsif (Time.now - pid_file_ctime > REWRITE_FACKUP_PIDFILE_PERIOD)
|
64
|
-
msg += "
|
64
|
+
msg += " over #{REWRITE_FACKUP_PIDFILE_PERIOD}s ago, reverting to <#{self.pid}>"
|
65
65
|
unless failsafe_save_pid
|
66
|
-
msg +=
|
66
|
+
msg += ", pid_file write failed! O_o"
|
67
67
|
end
|
68
68
|
else
|
69
|
-
msg += ',
|
69
|
+
msg += ', ignoring self-managed pid change'
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|