eye 0.8.celluloid15 → 0.8.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -5
- data/CHANGES.md +3 -7
- data/README.md +2 -5
- data/Rakefile +6 -6
- data/bin/leye +4 -9
- data/bin/loader_eye +15 -14
- data/examples/delayed_job.eye +3 -3
- data/examples/dependency.eye +11 -10
- data/examples/notify.eye +4 -3
- data/examples/plugin/main.eye +5 -5
- data/examples/plugin/plugin.rb +2 -10
- data/examples/process_thin.rb +8 -8
- data/examples/processes/em.rb +12 -18
- data/examples/processes/forking.rb +5 -5
- data/examples/processes/sample.rb +44 -46
- data/examples/puma.eye +8 -9
- data/examples/rbenv.eye +5 -5
- data/examples/sidekiq.eye +3 -3
- data/examples/stress_test.eye +4 -4
- data/examples/syslog.eye +1 -1
- data/examples/test.eye +2 -1
- data/examples/thin-farm.eye +8 -7
- data/examples/triggers.eye +15 -13
- data/examples/unicorn.eye +13 -12
- data/eye.gemspec +14 -16
- data/lib/eye.rb +3 -2
- data/lib/eye/application.rb +6 -5
- data/lib/eye/checker.rb +25 -44
- data/lib/eye/checker/children_count.rb +1 -1
- data/lib/eye/checker/file_ctime.rb +1 -1
- data/lib/eye/checker/http.rb +15 -13
- data/lib/eye/checker/nop.rb +0 -1
- data/lib/eye/checker/socket.rb +63 -60
- data/lib/eye/checker/ssl_socket.rb +5 -5
- data/lib/eye/child_process.rb +4 -6
- data/lib/eye/cli.rb +46 -74
- data/lib/eye/cli/commands.rb +5 -4
- data/lib/eye/cli/render.rb +41 -61
- data/lib/eye/cli/server.rb +16 -19
- data/lib/eye/client.rb +0 -1
- data/lib/eye/config.rb +33 -36
- data/lib/eye/controller.rb +3 -2
- data/lib/eye/controller/commands.rb +1 -1
- data/lib/eye/controller/helpers.rb +2 -2
- data/lib/eye/controller/load.rb +17 -19
- data/lib/eye/controller/options.rb +5 -1
- data/lib/eye/controller/send_command.rb +23 -21
- data/lib/eye/controller/status.rb +15 -17
- data/lib/eye/dsl.rb +1 -6
- data/lib/eye/dsl/application_opts.rb +3 -4
- data/lib/eye/dsl/chain.rb +2 -2
- data/lib/eye/dsl/child_process_opts.rb +3 -3
- data/lib/eye/dsl/config_opts.rb +7 -7
- data/lib/eye/dsl/group_opts.rb +3 -3
- data/lib/eye/dsl/helpers.rb +1 -1
- data/lib/eye/dsl/main.rb +3 -4
- data/lib/eye/dsl/opts.rb +28 -31
- data/lib/eye/dsl/process_opts.rb +7 -13
- data/lib/eye/dsl/pure_opts.rb +9 -13
- data/lib/eye/dsl/validation.rb +35 -48
- data/lib/eye/group.rb +8 -23
- data/lib/eye/group/chain.rb +6 -6
- data/lib/eye/loader.rb +3 -3
- data/lib/eye/local.rb +4 -9
- data/lib/eye/logger.rb +4 -11
- data/lib/eye/notify.rb +6 -10
- data/lib/eye/notify/jabber.rb +1 -1
- data/lib/eye/notify/mail.rb +2 -2
- data/lib/eye/notify/slack.rb +3 -4
- data/lib/eye/process.rb +0 -2
- data/lib/eye/process/children.rb +4 -4
- data/lib/eye/process/commands.rb +39 -38
- data/lib/eye/process/config.rb +16 -22
- data/lib/eye/process/controller.rb +19 -5
- data/lib/eye/process/data.rb +9 -11
- data/lib/eye/process/monitor.rb +76 -86
- data/lib/eye/process/notify.rb +10 -10
- data/lib/eye/process/scheduler.rb +31 -36
- data/lib/eye/process/states.rb +5 -7
- data/lib/eye/process/states_history.rb +3 -9
- data/lib/eye/process/system.rb +20 -35
- data/lib/eye/process/trigger.rb +5 -1
- data/lib/eye/process/watchers.rb +9 -12
- data/lib/eye/reason.rb +1 -4
- data/lib/eye/server.rb +1 -2
- data/lib/eye/system.rb +15 -22
- data/lib/eye/system_resources.rb +9 -18
- data/lib/eye/trigger.rb +16 -18
- data/lib/eye/trigger/check_dependency.rb +4 -7
- data/lib/eye/trigger/flapping.rb +7 -24
- data/lib/eye/trigger/starting_guard.rb +6 -7
- data/lib/eye/trigger/stop_children.rb +2 -2
- data/lib/eye/trigger/transition.rb +1 -1
- data/lib/eye/trigger/wait_dependency.rb +2 -3
- data/lib/eye/utils.rb +3 -4
- data/lib/eye/utils/alive_array.rb +4 -9
- data/lib/eye/utils/celluloid_chain.rb +10 -12
- data/lib/eye/utils/leak_19.rb +10 -0
- data/lib/eye/utils/mini_active_support.rb +16 -16
- data/lib/eye/utils/pmap.rb +0 -2
- data/lib/eye/utils/tail.rb +2 -2
- metadata +8 -39
- data/.rubocop.yml +0 -141
- data/examples/custom_check.eye +0 -24
- data/examples/custom_trigger.eye +0 -30
- data/examples/leye_example/Eyefile +0 -10
data/lib/eye/notify/slack.rb
CHANGED
@@ -8,8 +8,8 @@ class Eye::Notify::Slack < Eye::Notify
|
|
8
8
|
# end
|
9
9
|
|
10
10
|
param :webhook_url, String, true
|
11
|
-
param :channel, String, nil,
|
12
|
-
param :username, String, nil,
|
11
|
+
param :channel, String, nil, "#default"
|
12
|
+
param :username, String, nil, "eye"
|
13
13
|
|
14
14
|
param :icon, String
|
15
15
|
|
@@ -35,5 +35,4 @@ class Eye::Notify::Slack < Eye::Notify
|
|
35
35
|
payload << "> #{msg_message}"
|
36
36
|
payload
|
37
37
|
end
|
38
|
-
|
39
|
-
end
|
38
|
+
end
|
data/lib/eye/process.rb
CHANGED
data/lib/eye/process/children.rb
CHANGED
@@ -31,17 +31,17 @@ module Eye::Process::Children
|
|
31
31
|
|
32
32
|
if new_children.present?
|
33
33
|
new_children.each do |child_pid|
|
34
|
-
cfg = self[:monitor_children].try :update, notify
|
34
|
+
cfg = self[:monitor_children].try :update, :notify => self[:notify]
|
35
35
|
self.children[child_pid] = Eye::ChildProcess.new(child_pid, cfg, logger.prefix, current_actor)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
39
|
if removed_children.present?
|
40
|
-
removed_children.each
|
40
|
+
removed_children.each{|child_pid| remove_child(child_pid) }
|
41
41
|
end
|
42
42
|
|
43
|
-
h = {
|
44
|
-
debug { "children info: #{h.inspect}" }
|
43
|
+
h = {:new => new_children.size, :removed => removed_children.size, :exists => exist_children.size }
|
44
|
+
debug { "children info: #{ h.inspect }" }
|
45
45
|
|
46
46
|
@updating_children = false
|
47
47
|
h
|
data/lib/eye/process/commands.rb
CHANGED
@@ -25,6 +25,7 @@ module Eye::Process::Commands
|
|
25
25
|
sleep 0.2 # little grace
|
26
26
|
end
|
27
27
|
|
28
|
+
self.pid = nil
|
28
29
|
switch :crashed
|
29
30
|
end
|
30
31
|
|
@@ -41,8 +42,6 @@ module Eye::Process::Commands
|
|
41
42
|
|
42
43
|
switch :stopping
|
43
44
|
|
44
|
-
return unless check_identity
|
45
|
-
|
46
45
|
kill_process
|
47
46
|
|
48
47
|
if process_really_running?
|
@@ -70,11 +69,9 @@ module Eye::Process::Commands
|
|
70
69
|
switch :restarting
|
71
70
|
|
72
71
|
if self[:restart_command]
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
end
|
77
|
-
result = process_really_running? || (load_external_pid_file == :ok)
|
72
|
+
execute_restart_command
|
73
|
+
sleep_grace(:restart_grace)
|
74
|
+
result = check_alive_with_refresh_pid_if_needed
|
78
75
|
switch(result ? :restarted : :crashed)
|
79
76
|
else
|
80
77
|
stop_process
|
@@ -99,9 +96,10 @@ private
|
|
99
96
|
if self[:stop_command]
|
100
97
|
cmd = prepare_command(self[:stop_command])
|
101
98
|
info "executing: `#{cmd}` with stop_timeout: #{self[:stop_timeout].to_f}s and stop_grace: #{self[:stop_grace].to_f}s"
|
102
|
-
res = execute(cmd, config.merge(timeout
|
99
|
+
res = execute(cmd, config.merge(:timeout => self[:stop_timeout]))
|
103
100
|
|
104
101
|
if res[:error]
|
102
|
+
|
105
103
|
if res[:error].class == Timeout::Error
|
106
104
|
error "stop_command failed with #{res[:error].inspect}; try tuning the stop_timeout value"
|
107
105
|
else
|
@@ -114,6 +112,7 @@ private
|
|
114
112
|
elsif self[:stop_signals]
|
115
113
|
info "executing stop_signals #{self[:stop_signals].inspect}"
|
116
114
|
stop_signals = self[:stop_signals].clone
|
115
|
+
|
117
116
|
signal = stop_signals.shift
|
118
117
|
send_signal(signal)
|
119
118
|
|
@@ -121,7 +120,7 @@ private
|
|
121
120
|
delay = stop_signals.shift
|
122
121
|
signal = stop_signals.shift
|
123
122
|
|
124
|
-
if wait_for_condition(delay.to_f, 0.3)
|
123
|
+
if wait_for_condition(delay.to_f, 0.3){ !process_really_running? }
|
125
124
|
info 'has terminated'
|
126
125
|
break
|
127
126
|
end
|
@@ -134,6 +133,7 @@ private
|
|
134
133
|
else # default command
|
135
134
|
debug { "executing: `kill -TERM #{self.pid}` with stop_grace: #{self[:stop_grace].to_f}s" }
|
136
135
|
send_signal(:TERM)
|
136
|
+
|
137
137
|
sleep_grace(:stop_grace)
|
138
138
|
|
139
139
|
# if process not die here, by default we force kill it
|
@@ -154,7 +154,7 @@ private
|
|
154
154
|
cmd = prepare_command(self[:restart_command])
|
155
155
|
info "executing: `#{cmd}` with restart_timeout: #{self[:restart_timeout].to_f}s and restart_grace: #{self[:restart_grace].to_f}s"
|
156
156
|
|
157
|
-
res = execute(cmd, config.merge(timeout
|
157
|
+
res = execute(cmd, config.merge(:timeout => self[:restart_timeout]))
|
158
158
|
|
159
159
|
if res[:error]
|
160
160
|
|
@@ -169,9 +169,11 @@ private
|
|
169
169
|
end
|
170
170
|
|
171
171
|
def daemonize_process
|
172
|
+
time_before = Time.now
|
172
173
|
res = daemonize(self[:start_command], config)
|
173
|
-
|
174
|
-
|
174
|
+
start_time = Time.now - time_before
|
175
|
+
|
176
|
+
info "daemonizing: `#{self[:start_command]}` with start_grace: #{self[:start_grace].to_f}s, env: '#{environment_string}', <#{res[:pid]}> (in #{self[:working_dir]})"
|
175
177
|
|
176
178
|
if res[:error]
|
177
179
|
|
@@ -181,21 +183,21 @@ private
|
|
181
183
|
error "daemonize failed with #{res[:error].inspect}"
|
182
184
|
end
|
183
185
|
|
184
|
-
return {
|
186
|
+
return {:error => res[:error].inspect}
|
185
187
|
end
|
186
188
|
|
187
189
|
self.pid = res[:pid]
|
188
190
|
|
189
191
|
unless self.pid
|
190
192
|
error 'no pid was returned'
|
191
|
-
return {
|
193
|
+
return {:error => :empty_pid}
|
192
194
|
end
|
193
195
|
|
194
196
|
sleep_grace(:start_grace)
|
195
197
|
|
196
198
|
unless process_really_running?
|
197
199
|
error "process <#{self.pid}> not found, it may have crashed (#{check_logs_str})"
|
198
|
-
return {
|
200
|
+
return {:error => :not_really_running}
|
199
201
|
end
|
200
202
|
|
201
203
|
# if we using leaf child stratedy, pid should be used as last child process
|
@@ -210,46 +212,47 @@ private
|
|
210
212
|
end
|
211
213
|
|
212
214
|
if control_pid? && !failsafe_save_pid
|
213
|
-
return {
|
215
|
+
return {:error => :cant_write_pid}
|
214
216
|
end
|
215
217
|
|
216
218
|
res
|
217
219
|
end
|
218
220
|
|
219
221
|
def execute_process
|
220
|
-
info "executing: `#{self[:start_command]}` with start_timeout: #{config[:start_timeout].to_f}s"
|
221
|
-
|
222
|
-
|
222
|
+
info "executing: `#{self[:start_command]}` with start_timeout: #{config[:start_timeout].to_f}s, start_grace: #{self[:start_grace].to_f}s, env: '#{environment_string}' (in #{self[:working_dir]})"
|
223
|
+
time_before = Time.now
|
224
|
+
|
225
|
+
res = execute(self[:start_command], config.merge(:timeout => config[:start_timeout]))
|
226
|
+
start_time = Time.now - time_before
|
223
227
|
|
224
228
|
if res[:error]
|
225
229
|
|
226
230
|
if res[:error].message == 'Permission denied - open'
|
227
231
|
error "execution failed with #{res[:error].inspect}; ensure that #{[self[:stdout], self[:stderr]]} are writable"
|
228
232
|
elsif res[:error].class == Timeout::Error
|
229
|
-
error "execution failed with #{res[:error].inspect}; try increasing the start_timeout value"
|
230
|
-
"(the current value of #{self[:start_timeout]}s seems too short)"
|
233
|
+
error "execution failed with #{res[:error].inspect}; try increasing the start_timeout value (the current value of #{self[:start_timeout]}s seems too short)"
|
231
234
|
else
|
232
235
|
error "execution failed with #{res[:error].inspect}"
|
233
236
|
end
|
234
237
|
|
235
|
-
return {
|
238
|
+
return {:error => res[:error].inspect}
|
236
239
|
end
|
237
240
|
|
238
241
|
sleep_grace(:start_grace)
|
239
242
|
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
when :no_pid_file
|
244
|
-
error "exit status #{res[:exitstatus]}, pid_file (#{self[:pid_file_ex]}) did not appear within the " \
|
245
|
-
"start_grace period (#{self[:start_grace].to_f}s); check your start_command, or tune the start_grace " \
|
246
|
-
'value (eye expect process to create pid_file in self-daemonization mode)'
|
247
|
-
{ error: :pid_not_found }
|
248
|
-
when :not_running
|
249
|
-
error "exit status #{res[:exitstatus]}, process <#{@last_loaded_pid}> (from #{self[:pid_file_ex]}) was not found; " \
|
250
|
-
"ensure that the pid_file is being updated correctly (#{check_logs_str})"
|
251
|
-
{ error: :not_really_running }
|
243
|
+
unless set_pid_from_file
|
244
|
+
error "exit status #{res[:exitstatus]}, pid_file (#{self[:pid_file_ex]}) did not appear within the start_grace period (#{self[:start_grace].to_f}s); check your start_command, or tune the start_grace value (eye expect process to create pid_file in self-daemonization mode)"
|
245
|
+
return {:error => :pid_not_found}
|
252
246
|
end
|
247
|
+
|
248
|
+
unless process_really_running?
|
249
|
+
error "exit status #{res[:exitstatus]}, process <#{self.pid}> (from #{self[:pid_file_ex]}) was not found; ensure that the pid_file is being updated correctly (#{check_logs_str})"
|
250
|
+
return {:error => :not_really_running}
|
251
|
+
end
|
252
|
+
|
253
|
+
res[:pid] = self.pid
|
254
|
+
info "exit status #{res[:exitstatus]}, process <#{res[:pid]}> (from #{self[:pid_file_ex]}) was found"
|
255
|
+
res
|
253
256
|
end
|
254
257
|
|
255
258
|
def check_logs_str
|
@@ -275,14 +278,12 @@ private
|
|
275
278
|
end
|
276
279
|
|
277
280
|
def execute_user_command(name, cmd)
|
278
|
-
return unless check_identity
|
279
|
-
|
280
281
|
info "executing user command #{name} #{cmd.inspect}"
|
281
282
|
|
282
283
|
# cmd is string, or array of signals
|
283
284
|
if cmd.is_a?(String)
|
284
285
|
cmd = prepare_command(cmd)
|
285
|
-
res = execute(cmd, config.merge(timeout
|
286
|
+
res = execute(cmd, config.merge(:timeout => 120))
|
286
287
|
error "cmd #{cmd} error #{res.inspect}" if res[:error]
|
287
288
|
elsif cmd.is_a?(Array)
|
288
289
|
signals = cmd.clone
|
@@ -292,7 +293,7 @@ private
|
|
292
293
|
while signals.present?
|
293
294
|
delay = signals.shift
|
294
295
|
signal = signals.shift
|
295
|
-
if wait_for_condition(delay.to_f, 0.3)
|
296
|
+
if wait_for_condition(delay.to_f, 0.3){ !process_really_running? }
|
296
297
|
info 'has terminated'
|
297
298
|
break
|
298
299
|
end
|
data/lib/eye/process/config.rb
CHANGED
@@ -1,29 +1,25 @@
|
|
1
1
|
module Eye::Process::Config
|
2
2
|
|
3
3
|
DEFAULTS = {
|
4
|
-
keep_alive
|
5
|
-
check_alive_period
|
4
|
+
:keep_alive => true, # restart when crashed
|
5
|
+
:check_alive_period => 5.seconds,
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
:start_timeout => 15.seconds,
|
8
|
+
:stop_timeout => 10.seconds,
|
9
|
+
:restart_timeout => 10.seconds,
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
:start_grace => 2.5.seconds,
|
12
|
+
:stop_grace => 0.5.seconds,
|
13
|
+
:restart_grace => 1.second,
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
restart_grace: 1.second,
|
15
|
+
:daemonize => false,
|
16
|
+
:auto_start => true, # auto start on monitor action
|
18
17
|
|
19
|
-
|
20
|
-
|
18
|
+
:children_update_period => 30.seconds,
|
19
|
+
:clear_pid => true, # by default clear pid on stop
|
21
20
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
auto_update_pidfile_grace: 30.seconds,
|
26
|
-
revert_fuckup_pidfile_grace: 120.seconds
|
21
|
+
:auto_update_pidfile_grace => 30.seconds,
|
22
|
+
:revert_fuckup_pidfile_grace => 120.seconds,
|
27
23
|
}
|
28
24
|
|
29
25
|
def prepare_config(new_config)
|
@@ -31,14 +27,12 @@ module Eye::Process::Config
|
|
31
27
|
h[:pid_file_ex] = Eye::System.normalized_file(h[:pid_file], h[:working_dir]) if h[:pid_file]
|
32
28
|
h[:checks] = {} if h[:checks].blank?
|
33
29
|
h[:triggers] = {} if h[:triggers].blank?
|
34
|
-
|
35
|
-
h[:children_update_period] = upd
|
36
|
-
end
|
30
|
+
h[:children_update_period] = h[:monitor_children][:children_update_period] if h[:monitor_children] && h[:monitor_children][:children_update_period]
|
37
31
|
|
38
32
|
# check speedy flapping by default
|
39
33
|
if h[:triggers].blank? || !h[:triggers][:flapping]
|
40
34
|
h[:triggers] ||= {}
|
41
|
-
h[:triggers][:flapping] = {
|
35
|
+
h[:triggers][:flapping] = {:type => :flapping, :times => 10, :within => 10.seconds}
|
42
36
|
end
|
43
37
|
|
44
38
|
h[:stdout] = Eye::System.normalized_file(h[:stdout], h[:working_dir]) if h[:stdout]
|
@@ -5,12 +5,21 @@ module Eye::Process::Controller
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def start
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
res = if set_pid_from_file
|
9
|
+
if process_really_running?
|
10
|
+
info "process <#{self.pid}> from pid_file is already running"
|
11
|
+
switch :already_running
|
12
|
+
:ok
|
13
|
+
else
|
14
|
+
info "pid_file found, but process <#{self.pid}> is down, starting..."
|
15
|
+
start_process
|
16
|
+
end
|
11
17
|
else
|
18
|
+
info 'pid_file not found, starting...'
|
12
19
|
start_process
|
13
20
|
end
|
21
|
+
|
22
|
+
res
|
14
23
|
end
|
15
24
|
|
16
25
|
def stop
|
@@ -19,7 +28,10 @@ module Eye::Process::Controller
|
|
19
28
|
end
|
20
29
|
|
21
30
|
def restart
|
22
|
-
|
31
|
+
unless pid # unmonitored case
|
32
|
+
try_update_pid_from_file
|
33
|
+
end
|
34
|
+
|
23
35
|
restart_process
|
24
36
|
end
|
25
37
|
|
@@ -27,9 +39,11 @@ module Eye::Process::Controller
|
|
27
39
|
if self[:auto_start]
|
28
40
|
start
|
29
41
|
else
|
30
|
-
if
|
42
|
+
if try_update_pid_from_file
|
43
|
+
info "process <#{self.pid}> from pid_file is already running"
|
31
44
|
switch :already_running
|
32
45
|
else
|
46
|
+
warn 'process not found, unmonitoring'
|
33
47
|
schedule :unmonitor, Eye::Reason.new(:'not found')
|
34
48
|
end
|
35
49
|
end
|
data/lib/eye/process/data.rb
CHANGED
@@ -20,32 +20,30 @@ module Eye::Process::Data
|
|
20
20
|
@full_name ||= [app_name, group_name, self[:name]].compact.join(':')
|
21
21
|
end
|
22
22
|
|
23
|
-
def status_data(
|
24
|
-
p_st = self_status_data(
|
23
|
+
def status_data(debug = false)
|
24
|
+
p_st = self_status_data(debug)
|
25
25
|
|
26
26
|
if children.present?
|
27
|
-
p_st.merge(subtree
|
27
|
+
p_st.merge(:subtree => Eye::Utils::AliveArray.new(children.values).map{|c| c.status_data(debug) } )
|
28
28
|
elsif self[:monitor_children] && self.up?
|
29
|
-
p_st.merge(subtree
|
29
|
+
p_st.merge(:subtree => [{name: '=loading children='}])
|
30
30
|
else
|
31
31
|
# common state
|
32
32
|
p_st
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
-
def self_status_data(
|
37
|
-
h = { name: name,
|
38
|
-
state: state,
|
36
|
+
def self_status_data(debug = false)
|
37
|
+
h = { name: name, state: state,
|
39
38
|
type: (self.class == Eye::ChildProcess ? :child_process : :process),
|
40
39
|
resources: Eye::SystemResources.resources(pid) }
|
41
40
|
|
42
41
|
if @states_history
|
43
|
-
h
|
44
|
-
|
42
|
+
h.merge!( state_changed_at: @states_history.last_state_changed_at.to_i,
|
43
|
+
state_reason: @states_history.last_reason.to_s )
|
45
44
|
end
|
46
45
|
|
47
|
-
h[:debug] = debug_data if
|
48
|
-
h[:procline] = Eye::SystemResources.args(self.pid) if opts[:procline]
|
46
|
+
h[:debug] = debug_data if debug
|
49
47
|
h[:current_command] = current_scheduled_command if current_scheduled_command
|
50
48
|
|
51
49
|
h
|
data/lib/eye/process/monitor.rb
CHANGED
@@ -2,112 +2,102 @@ module Eye::Process::Monitor
|
|
2
2
|
|
3
3
|
private
|
4
4
|
|
5
|
-
def
|
6
|
-
newpid = failsafe_load_pid
|
7
|
-
|
8
|
-
if !newpid
|
9
|
-
self.pid = nil
|
10
|
-
info 'load_external_pid_file: pid_file not found'
|
11
|
-
:no_pid_file
|
12
|
-
elsif process_pid_running?(newpid)
|
13
|
-
self.pid = newpid
|
14
|
-
res = compare_identity
|
15
|
-
if res == :fail
|
16
|
-
warn "load_external_pid_file: process <#{self.pid}> from pid_file failed check_identity"
|
17
|
-
:bad_identity
|
18
|
-
else
|
19
|
-
args = Eye::SystemResources.args(self.pid)
|
20
|
-
info "load_external_pid_file: process <#{self.pid}> from pid_file found and running (identity: #{res}) (#{args})"
|
21
|
-
:ok
|
22
|
-
end
|
23
|
-
else
|
24
|
-
@last_loaded_pid = newpid
|
25
|
-
self.pid = nil
|
26
|
-
info "load_external_pid_file: pid_file found, but process <#{newpid}> not found"
|
27
|
-
:not_running
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
def check_alive
|
32
|
-
return unless up?
|
33
|
-
|
34
|
-
# check that process runned
|
5
|
+
def check_alive_with_refresh_pid_if_needed
|
35
6
|
if process_really_running?
|
36
|
-
|
37
|
-
else
|
38
|
-
warn "check_alive: process <#{self.pid}> not found"
|
39
|
-
notify :info, 'crashed!'
|
40
|
-
clear_pid_file(true) if control_pid?
|
7
|
+
return true
|
41
8
|
|
42
|
-
|
9
|
+
else
|
10
|
+
warn 'process not really running'
|
11
|
+
try_update_pid_from_file
|
43
12
|
end
|
44
13
|
end
|
45
14
|
|
46
|
-
def
|
47
|
-
|
48
|
-
|
15
|
+
def try_update_pid_from_file
|
16
|
+
# if pid file was rewritten
|
17
|
+
newpid = load_pid_from_file
|
18
|
+
if newpid != self.pid
|
19
|
+
info "process <#{self.pid}> changed pid to <#{newpid}>, updating..." if self.pid
|
20
|
+
self.pid = newpid
|
49
21
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
22
|
+
if process_really_running?
|
23
|
+
return true
|
24
|
+
else
|
25
|
+
warn "process <#{newpid}> was not found"
|
26
|
+
return false
|
55
27
|
end
|
56
28
|
else
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
unless failsafe_save_pid
|
62
|
-
msg += ', pid_file write failed! O_o'
|
63
|
-
end
|
29
|
+
debug { 'process was not found' }
|
30
|
+
return false
|
31
|
+
end
|
32
|
+
end
|
64
33
|
|
65
|
-
|
66
|
-
|
67
|
-
self.pid = ppid
|
34
|
+
def check_alive
|
35
|
+
if up?
|
68
36
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
37
|
+
# check that process runned
|
38
|
+
unless process_really_running?
|
39
|
+
warn "check_alive: process <#{self.pid}> not found"
|
40
|
+
notify :info, 'crashed!'
|
41
|
+
clear_pid_file if control_pid? && self.pid && load_pid_from_file == self.pid
|
74
42
|
|
43
|
+
switch :crashed, Eye::Reason.new(:crashed)
|
75
44
|
else
|
76
|
-
|
45
|
+
# check that pid_file still here
|
46
|
+
ppid = failsafe_load_pid
|
47
|
+
|
48
|
+
if ppid != self.pid
|
49
|
+
msg = "check_alive: pid_file (#{self[:pid_file]}) changed by itself (<#{self.pid}> => <#{ppid}>)"
|
50
|
+
if control_pid?
|
51
|
+
msg += ", reverting to <#{self.pid}> (the pid_file is controlled by eye)"
|
52
|
+
unless failsafe_save_pid
|
53
|
+
msg += ", pid_file write failed! O_o"
|
54
|
+
end
|
55
|
+
else
|
56
|
+
changed_ago_s = Time.now - pid_file_ctime
|
57
|
+
|
58
|
+
if ppid == nil
|
59
|
+
msg += ", reverting to <#{self.pid}> (the pid_file is empty)"
|
60
|
+
unless failsafe_save_pid
|
61
|
+
msg += ", pid_file write failed! O_o"
|
62
|
+
end
|
63
|
+
|
64
|
+
elsif (changed_ago_s > self[:auto_update_pidfile_grace]) && process_pid_running?(ppid)
|
65
|
+
msg += ", trusting this change, and now monitor <#{ppid}>"
|
66
|
+
self.pid = ppid
|
67
|
+
|
68
|
+
elsif (changed_ago_s > self[:revert_fuckup_pidfile_grace])
|
69
|
+
msg += " over #{self[:revert_fuckup_pidfile_grace]}s ago, reverting to <#{self.pid}>, because <#{ppid}> not alive"
|
70
|
+
unless failsafe_save_pid
|
71
|
+
msg += ", pid_file write failed! O_o"
|
72
|
+
end
|
73
|
+
|
74
|
+
else
|
75
|
+
msg += ', ignoring self-managed pid change'
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
warn msg
|
80
|
+
end
|
77
81
|
end
|
78
82
|
end
|
79
|
-
|
80
|
-
warn msg
|
81
|
-
end
|
82
|
-
|
83
|
-
def check_identity
|
84
|
-
if compare_identity == :fail
|
85
|
-
notify :info, 'crashed by identity!'
|
86
|
-
switch :crashed, Eye::Reason.new(:crashed_by_identity)
|
87
|
-
clear_pid_file if self[:clear_pid]
|
88
|
-
false
|
89
|
-
else
|
90
|
-
true
|
91
|
-
end
|
92
83
|
end
|
93
84
|
|
94
85
|
def check_crash
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
schedule_in self[:restore_in].to_f, :restore, Eye::Reason.new(:crashed)
|
86
|
+
if down?
|
87
|
+
if self[:keep_alive]
|
88
|
+
warn 'check crashed: process is down'
|
89
|
+
|
90
|
+
if self[:restore_in]
|
91
|
+
schedule_in self[:restore_in].to_f, :restore, Eye::Reason.new(:crashed)
|
92
|
+
else
|
93
|
+
schedule :restore, Eye::Reason.new(:crashed)
|
94
|
+
end
|
105
95
|
else
|
106
|
-
|
96
|
+
warn 'check crashed: process without keep_alive'
|
97
|
+
schedule :unmonitor, Eye::Reason.new(:crashed)
|
107
98
|
end
|
108
99
|
else
|
109
|
-
|
110
|
-
schedule :unmonitor, Eye::Reason.new(:crashed)
|
100
|
+
debug { 'check crashed: skipped, process is not in down' }
|
111
101
|
end
|
112
102
|
end
|
113
103
|
|