scout_agent 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +4 -0
- data/CHANGELOG +3 -0
- data/COPYING +340 -0
- data/INSTALL +17 -0
- data/LICENSE +6 -0
- data/README +3 -0
- data/Rakefile +123 -0
- data/TODO +3 -0
- data/bin/scout_agent +11 -0
- data/lib/scout_agent.rb +73 -0
- data/lib/scout_agent/agent.rb +42 -0
- data/lib/scout_agent/agent/communication_agent.rb +85 -0
- data/lib/scout_agent/agent/master_agent.rb +301 -0
- data/lib/scout_agent/api.rb +241 -0
- data/lib/scout_agent/assignment.rb +105 -0
- data/lib/scout_agent/assignment/configuration.rb +30 -0
- data/lib/scout_agent/assignment/identify.rb +110 -0
- data/lib/scout_agent/assignment/queue.rb +95 -0
- data/lib/scout_agent/assignment/reset.rb +91 -0
- data/lib/scout_agent/assignment/snapshot.rb +92 -0
- data/lib/scout_agent/assignment/start.rb +149 -0
- data/lib/scout_agent/assignment/status.rb +44 -0
- data/lib/scout_agent/assignment/stop.rb +60 -0
- data/lib/scout_agent/assignment/upload_log.rb +61 -0
- data/lib/scout_agent/core_extensions.rb +260 -0
- data/lib/scout_agent/database.rb +386 -0
- data/lib/scout_agent/database/mission_log.rb +282 -0
- data/lib/scout_agent/database/queue.rb +126 -0
- data/lib/scout_agent/database/snapshots.rb +187 -0
- data/lib/scout_agent/database/statuses.rb +65 -0
- data/lib/scout_agent/dispatcher.rb +157 -0
- data/lib/scout_agent/id_card.rb +143 -0
- data/lib/scout_agent/lifeline.rb +243 -0
- data/lib/scout_agent/mission.rb +212 -0
- data/lib/scout_agent/order.rb +58 -0
- data/lib/scout_agent/order/check_in_order.rb +32 -0
- data/lib/scout_agent/order/snapshot_order.rb +33 -0
- data/lib/scout_agent/plan.rb +306 -0
- data/lib/scout_agent/server.rb +123 -0
- data/lib/scout_agent/tracked.rb +59 -0
- data/lib/scout_agent/wire_tap.rb +513 -0
- data/setup.rb +1360 -0
- data/test/tc_core_extensions.rb +89 -0
- data/test/tc_id_card.rb +115 -0
- data/test/tc_plan.rb +285 -0
- data/test/test_helper.rb +22 -0
- data/test/ts_all.rb +7 -0
- metadata +171 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
class Database
|
5
|
+
class Statuses < Database
|
6
|
+
def update_schema(version = schema_version)
|
7
|
+
case version
|
8
|
+
when 0
|
9
|
+
<<-END_INITIAL_SCHEMA.trim
|
10
|
+
CREATE TABLE statuses (
|
11
|
+
name TEXT NOT NULL PRIMARY KEY
|
12
|
+
CHECK( name IN ( 'lifeline', 'master', 'mission',
|
13
|
+
'communication', 'queue', 'snapshot' ) ),
|
14
|
+
pid INTEGER NOT NULL,
|
15
|
+
status REQUIRED_TEXT_TYPE,
|
16
|
+
last_updated_at DATETIME_TYPE
|
17
|
+
);
|
18
|
+
DEFAULT_LOCALTIME_TRIGGER statuses last_updated_at
|
19
|
+
END_INITIAL_SCHEMA
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def update_status(status, name = IDCard.me && IDCard.me.process_name)
|
24
|
+
write_to_sqlite do |sqlite|
|
25
|
+
sqlite.execute(<<-END_UPDATE_STATUS.trim, name, Process.pid, status)
|
26
|
+
INSERT OR REPLACE INTO statuses(name, pid, status, last_updated_at)
|
27
|
+
VALUES( ?, ?, ?, null)
|
28
|
+
END_UPDATE_STATUS
|
29
|
+
end
|
30
|
+
rescue Amalgalite::SQLite3::Error => error # failed to update status
|
31
|
+
# do nothing: try again later
|
32
|
+
log.error("Database status update error: #{error.message}.")
|
33
|
+
end
|
34
|
+
|
35
|
+
def clear_status(name = IDCard.me && IDCard.me.process_name)
|
36
|
+
write_to_sqlite do |sqlite|
|
37
|
+
sqlite.execute("DELETE FROM statuses WHERE name = ?", name)
|
38
|
+
end
|
39
|
+
rescue Amalgalite::SQLite3::Error => error # failed to delete status
|
40
|
+
# do nothing: new process will replace
|
41
|
+
log.error("Database status clearing error: #{error.message}.")
|
42
|
+
end
|
43
|
+
|
44
|
+
def current_statuses
|
45
|
+
query(<<-END_FIND_STATUSES.trim)
|
46
|
+
SELECT name, pid, status, last_updated_at FROM statuses ORDER BY ROWID
|
47
|
+
END_FIND_STATUSES
|
48
|
+
rescue Amalgalite::SQLite3::Error => error # failed to find statuses
|
49
|
+
log.error("Database statuses error: #{error.message}.")
|
50
|
+
Array.new # return empty results
|
51
|
+
end
|
52
|
+
|
53
|
+
def current_status(name = IDCard.me && IDCard.me.process_name)
|
54
|
+
read_from_sqlite { |sqlite|
|
55
|
+
sqlite.first_value_from(<<-END_FIND_STATUS, name)
|
56
|
+
SELECT status FROM statuses WHERE name = ?
|
57
|
+
END_FIND_STATUS
|
58
|
+
}
|
59
|
+
rescue Amalgalite::SQLite3::Error => error # failed to find status
|
60
|
+
log.error("Database current status error: #{error.message}.")
|
61
|
+
nil # return no results
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
module Dispatcher
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def dispatch(args = ARGV)
|
8
|
+
switches = parse_switches(args)
|
9
|
+
assignment = parse_assignment(args)
|
10
|
+
code = load_assignment(assignment)
|
11
|
+
execute_assignment(assignment, code, switches, args)
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_switches(args)
|
15
|
+
switches = { }
|
16
|
+
|
17
|
+
args.options do |opts|
|
18
|
+
opts.banner = <<-END_USAGE.trim
|
19
|
+
Usage:
|
20
|
+
|
21
|
+
[sudo] #{ScoutAgent.agent_name} [OPTIONS] COMMAND
|
22
|
+
|
23
|
+
Use the commands identify, start, and stop to prepare, launch, and
|
24
|
+
shutdown the agent respectively. Those require super user privileges.
|
25
|
+
You can also use the status command to check in on a running agent.
|
26
|
+
|
27
|
+
END_USAGE
|
28
|
+
|
29
|
+
opts.separator "Basic Options:"
|
30
|
+
opts.on( "-s", "--server URL", String,
|
31
|
+
"The URL for the server to report to." ) do |url|
|
32
|
+
switches[:server_url] = url
|
33
|
+
end
|
34
|
+
opts.on( "-d", "--[no-]daemon",
|
35
|
+
"Run in the background as a daemon." ) do |boolean|
|
36
|
+
switches[:run_as_daemon] = boolean
|
37
|
+
end
|
38
|
+
opts.on( "-l", "--logging-level LEVEL", %w[DEBUG INFO WARN ERROR FATAL],
|
39
|
+
"The minimum level of log message to record." ) do |level|
|
40
|
+
switches[:logging_level] = level
|
41
|
+
end
|
42
|
+
opts.on( "-t", "--[no-]test-mode",
|
43
|
+
"Used in agent development." ) do |boolean|
|
44
|
+
if switches[:test_mode] = boolean
|
45
|
+
switches[:server_url] = "http://localhost:4567"
|
46
|
+
switches[:run_as_daemon] = false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.separator "Expert Options:"
|
51
|
+
opts.on( "--users NAME1,NAME2,...", Array,
|
52
|
+
"A list of users to try switching to." ) do |users|
|
53
|
+
switches[:user_choices] = users
|
54
|
+
end
|
55
|
+
opts.on( "--groups NAME1,NAME2,...", Array,
|
56
|
+
"A list of groups to try switching to." ) do |groups|
|
57
|
+
switches[:group_choices] = groups
|
58
|
+
end
|
59
|
+
opts.on( "--prefix PATH", String,
|
60
|
+
"A prefix path prepended to all other paths." ) do |path|
|
61
|
+
switches[:prefix_path] = path
|
62
|
+
end
|
63
|
+
[ %w[os_config_path configuration],
|
64
|
+
%w[os_db_path databases],
|
65
|
+
%w[os_pid_path PID\ files],
|
66
|
+
%w[os_log_path log\ files] ].each do |name, used_for|
|
67
|
+
opts.on( "--#{name.tr('_', '-')} PATH", String,
|
68
|
+
"The path your OS uses for #{used_for}." ) do |path|
|
69
|
+
switches[name.to_sym] = path
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
opts.separator "Application Options:"
|
74
|
+
opts.on( "-h", "--help",
|
75
|
+
"Show this message." ) do
|
76
|
+
puts opts # show usage
|
77
|
+
exit
|
78
|
+
end
|
79
|
+
opts.on( "-v", "--version",
|
80
|
+
"Display the current version." ) do
|
81
|
+
puts "#{ScoutAgent.proper_agent_name} v#{ScoutAgent::VERSION}"
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
opts.parse!
|
87
|
+
rescue OptionParser::ParseError # failed to parse options
|
88
|
+
puts opts # show usage
|
89
|
+
exit
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# apply switches so paths will be set correctly for load checks
|
94
|
+
Plan.update_from_switches(switches)
|
95
|
+
|
96
|
+
switches
|
97
|
+
end
|
98
|
+
|
99
|
+
def parse_assignment(args)
|
100
|
+
assignment = args.shift.to_s.downcase
|
101
|
+
if assignment.empty?
|
102
|
+
if Plan.present?
|
103
|
+
if IDCard.new(:lifeline).pid_file.exist?
|
104
|
+
return "status"
|
105
|
+
else
|
106
|
+
return "start"
|
107
|
+
end
|
108
|
+
else
|
109
|
+
return "identify"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
unless assignment =~ /\A\w+\z/
|
113
|
+
abort_with_unknown_assignment(assignment)
|
114
|
+
end
|
115
|
+
assignment
|
116
|
+
end
|
117
|
+
|
118
|
+
def load_assignment(assignment)
|
119
|
+
dir = LIB_DIR + "assignment"
|
120
|
+
matches = dir.entries.grep(/#{Regexp.escape(assignment)}\w*\.rb\z/)
|
121
|
+
if matches.size > 1
|
122
|
+
abort_with_ambiguous_assignment(assignment, matches)
|
123
|
+
elsif matches.first and (code = dir + matches.first).exist?
|
124
|
+
return code
|
125
|
+
else
|
126
|
+
abort_with_unknown_assignment(assignment)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def execute_assignment(assignment, code, switches, other_args)
|
131
|
+
require code
|
132
|
+
class_name = code.basename(".rb").to_s.CamelCase
|
133
|
+
begin
|
134
|
+
loaded = Assignment.const_get(class_name)
|
135
|
+
rescue NameError # can't load module
|
136
|
+
abort_with_missing_code(class_name)
|
137
|
+
end
|
138
|
+
loaded.new(switches, other_args).prepare_and_execute
|
139
|
+
end
|
140
|
+
|
141
|
+
def abort_with_ambiguous_assignment(assignment, matches)
|
142
|
+
choices = matches.map { |m| "'#{m.basename('.rb')}'" }
|
143
|
+
choices[-2..-1] = choices[-2..-1].join(", or ")
|
144
|
+
abort <<-END_AMBIGUOUS
|
145
|
+
Ambiguous command '#{assignment}'. Did you mean #{choices.join(', ')}?
|
146
|
+
END_AMBIGUOUS
|
147
|
+
end
|
148
|
+
|
149
|
+
def abort_with_unknown_assignment(assignment)
|
150
|
+
abort "Unknown command '#{assignment}'."
|
151
|
+
end
|
152
|
+
|
153
|
+
def abort_with_missing_code(class_name)
|
154
|
+
abort "Failed to load '#{class_name}'."
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
#
|
5
|
+
# This class excapsulates a named process. It is used to ensure exclusive
|
6
|
+
# execution and to signal other processes.
|
7
|
+
#
|
8
|
+
class IDCard
|
9
|
+
class << self
|
10
|
+
#
|
11
|
+
# This global attribute should contain the name of the current process.
|
12
|
+
# It is set during a successful authorization.
|
13
|
+
#
|
14
|
+
# <b>Warning:</b> Be sure to clear this attribute immediately after a
|
15
|
+
# fork() so you don't keep the parent's identity.
|
16
|
+
#
|
17
|
+
attr_accessor :me
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# Pass in the +process_name+ of the process you want to signal() or the
|
22
|
+
# +process_name+ you wish to authorize() for yourself.
|
23
|
+
#
|
24
|
+
def initialize(process_name)
|
25
|
+
@process_name = process_name
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :process_name
|
29
|
+
|
30
|
+
# A String representation of this process, with PID.
|
31
|
+
def to_s
|
32
|
+
"#{@process_name} (#{pid || 'unauthorized'})"
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Returns the path to the unique PID file for this process, based on the
|
37
|
+
# current Plan.
|
38
|
+
#
|
39
|
+
def pid_file
|
40
|
+
Plan.pid_dir + "#{@process_name}.pid"
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns the PID for the named process, or +nil+ if it cannot be read.
|
44
|
+
def pid
|
45
|
+
pid_file.read.to_i
|
46
|
+
rescue Exception
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Tries to send +message+ as a signal to the process represented by this
|
52
|
+
# instance. You can pass any message Process.kill() would understand.
|
53
|
+
#
|
54
|
+
# Returns +true+ if the signal was sent, or +false+ if the PID file could
|
55
|
+
# not be read. Any Exception raised during the send, such as Errno::ESRCH
|
56
|
+
# for a missing process, will bubble up to the calling code.
|
57
|
+
#
|
58
|
+
def signal(message)
|
59
|
+
if id = pid
|
60
|
+
Process.kill(message, id)
|
61
|
+
true
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Claims this identity for this process. This process is multiprocess-safe
|
69
|
+
# and will fail if another process has claimed this identity. However,
|
70
|
+
# stale claims are ignored and replaced, if possible.
|
71
|
+
#
|
72
|
+
# This method returns +true+ in the claim succeeded and +false+ if it could
|
73
|
+
# not happen for any reason. A return of +true+ indicates that me() has
|
74
|
+
# been updated and an exit handle has been installed to revoke() this claim
|
75
|
+
# as the process ends.
|
76
|
+
#
|
77
|
+
def authorize
|
78
|
+
File.open(pid_file, File::CREAT | File::EXCL | File::WRONLY) do |pid|
|
79
|
+
pid.flock(File::LOCK_EX)
|
80
|
+
if not block_given? or yield # allows for daemonization
|
81
|
+
pid.puts Process.pid
|
82
|
+
else
|
83
|
+
pid.flock(File::LOCK_UN)
|
84
|
+
revoke # remove this file if anything went wrong
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
pid.flock(File::LOCK_UN)
|
88
|
+
end
|
89
|
+
|
90
|
+
self.class.me = self
|
91
|
+
|
92
|
+
at_my_exit do
|
93
|
+
unless revoke
|
94
|
+
# log.error "Unable to unlink pid file: #{$!.message}" if log
|
95
|
+
end
|
96
|
+
end
|
97
|
+
true
|
98
|
+
rescue Errno::EEXIST # pid_file already exists
|
99
|
+
File.open(pid_file) do |pid|
|
100
|
+
if pid.flock(File::LOCK_EX | File::LOCK_NB)
|
101
|
+
if pid.read =~ /\A\d+/
|
102
|
+
begin
|
103
|
+
unless signal(0)
|
104
|
+
# log.warn "Could not create or read PID file. " +
|
105
|
+
# "You may need to the path to the config directory. " +
|
106
|
+
# "See: http://scoutapp.com/help#data_file" if log
|
107
|
+
end
|
108
|
+
rescue Errno::ESRCH # no such process
|
109
|
+
# log.info "Stale PID file found. Clearing it and reloading..." if log
|
110
|
+
if revoke
|
111
|
+
pid.flock(File::LOCK_UN) # release the lock before we recurse
|
112
|
+
return authorize # try again
|
113
|
+
else
|
114
|
+
# log.info "Failed to clear PID." if log
|
115
|
+
end
|
116
|
+
rescue Errno::EACCES # don't have permission
|
117
|
+
# nothing we can do so give up
|
118
|
+
end
|
119
|
+
else
|
120
|
+
# nothing we can do so give up
|
121
|
+
end
|
122
|
+
pid.flock(File::LOCK_UN) # release the lock
|
123
|
+
else
|
124
|
+
# log.info "Couldn't grab a file lock to verify existing PID file." if log
|
125
|
+
return false
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# log.warn "Process #{pid} was already running" if log
|
129
|
+
false
|
130
|
+
end
|
131
|
+
|
132
|
+
#
|
133
|
+
# Releases a held claim on a process name. Returns +true+ if successful or
|
134
|
+
# +false+ if the PID file didn't exist or couldn't be destroyed.
|
135
|
+
#
|
136
|
+
def revoke
|
137
|
+
pid_file.unlink
|
138
|
+
true
|
139
|
+
rescue Exception
|
140
|
+
false
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
class Lifeline
|
5
|
+
NO_CONTACT_TIMEOUT = 3
|
6
|
+
CHECK_IN_FREQUENCY = 0.99 # gives us three check ins before a cutoff
|
7
|
+
TERM_TO_KILL_PAUSE = 1
|
8
|
+
RELAUNCH_FREQUENCIES = [0, 1, 1, 2, 3, 5, 8, 13]
|
9
|
+
|
10
|
+
#################
|
11
|
+
### Interface ###
|
12
|
+
#################
|
13
|
+
|
14
|
+
def initialize(agent, log = WireTap.new(nil))
|
15
|
+
@agent = agent
|
16
|
+
@log = log
|
17
|
+
@parent_pid = Process.pid
|
18
|
+
@child_pid = nil
|
19
|
+
@reader = nil
|
20
|
+
@writer = nil
|
21
|
+
@launch_and_monitor_thread = nil
|
22
|
+
@check_in_with_parent_thread = nil
|
23
|
+
@code = nil
|
24
|
+
@last_launch = nil
|
25
|
+
@relaunch_index = 0
|
26
|
+
|
27
|
+
at_my_exit do
|
28
|
+
clear_status
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
include Tracked
|
33
|
+
|
34
|
+
attr_reader :log
|
35
|
+
|
36
|
+
def launch_and_monitor
|
37
|
+
@launch_and_monitor_thread = Thread.new do
|
38
|
+
Thread.current.abort_on_exception = true
|
39
|
+
loop do
|
40
|
+
wait_for_launch
|
41
|
+
prepare_pipe
|
42
|
+
launch_child
|
43
|
+
close_writer
|
44
|
+
monitor_child
|
45
|
+
restart_child
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def terminate
|
51
|
+
if Process.pid == @parent_pid
|
52
|
+
# stop monitoring
|
53
|
+
log.info("Stopping the monitoring for '#{@agent}'.")
|
54
|
+
@launch_and_monitor_thread.exit if @launch_and_monitor_thread
|
55
|
+
# ask child process to exit
|
56
|
+
log.info("Asking '#{@agent}' to stop.")
|
57
|
+
IDCard.new(@agent).signal("TERM")
|
58
|
+
end
|
59
|
+
rescue Errno::ESRCH # no such process
|
60
|
+
# if already exited, so we are fine
|
61
|
+
end
|
62
|
+
|
63
|
+
def join
|
64
|
+
if Process.pid == @parent_pid and @launch_and_monitor_thread
|
65
|
+
@launch_and_monitor_thread.join
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
#######
|
70
|
+
private
|
71
|
+
#######
|
72
|
+
|
73
|
+
##############
|
74
|
+
### Parent ###
|
75
|
+
##############
|
76
|
+
|
77
|
+
def wait_for_launch
|
78
|
+
if @last_launch
|
79
|
+
seconds_ran = Time.now - @last_launch
|
80
|
+
relaunch_wait = RELAUNCH_FREQUENCIES[@relaunch_index] * 60 - seconds_ran
|
81
|
+
|
82
|
+
if relaunch_wait > 0
|
83
|
+
log.info( "Waiting #{relaunch_wait} seconds before relaunching " +
|
84
|
+
"'#{@agent}'." )
|
85
|
+
sleep relaunch_wait
|
86
|
+
elsif relaunch_wait.abs > ( RELAUNCH_FREQUENCIES[@relaunch_index + 1] ||
|
87
|
+
RELAUNCH_FREQUENCIES.last ) * 60
|
88
|
+
@relaunch_index = 0
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
unless @relaunch_index == RELAUNCH_FREQUENCIES.size - 1
|
93
|
+
@relaunch_index += 1
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def prepare_pipe
|
99
|
+
@reader, @writer = IO.pipe
|
100
|
+
end
|
101
|
+
|
102
|
+
def launch_child
|
103
|
+
log.info("Launching '#{@agent}'.")
|
104
|
+
status(@agent)
|
105
|
+
@last_launch = Time.now
|
106
|
+
@child_pid = fork do
|
107
|
+
reset_environment
|
108
|
+
close_reader
|
109
|
+
load_code
|
110
|
+
authorize_code
|
111
|
+
check_in_with_parent
|
112
|
+
run_code
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def close_writer
|
117
|
+
@writer.close
|
118
|
+
rescue IOError # already closed
|
119
|
+
# it's closed so we're OK
|
120
|
+
rescue NoMethodError # @writer is nil
|
121
|
+
# it wasn't set so there's nothing to close
|
122
|
+
end
|
123
|
+
|
124
|
+
def monitor_child
|
125
|
+
loop do
|
126
|
+
check_in = nil
|
127
|
+
begin
|
128
|
+
Timeout.timeout(NO_CONTACT_TIMEOUT) { check_in = @reader.gets }
|
129
|
+
log.error("'#{@agent}' monitor channel has closed.") if check_in.nil?
|
130
|
+
rescue Timeout::Error
|
131
|
+
# check_in will stay nil
|
132
|
+
log.error("'#{@agent}' failed to check-in in time.")
|
133
|
+
end
|
134
|
+
unless check_in.to_s =~
|
135
|
+
/\A#{@child_pid}: \d{4}-\d{2}-\d{2} \d{2}:\d{2}\Z/
|
136
|
+
log.error("'#{@agent}' check-in was malformed.") unless check_in.nil?
|
137
|
+
break
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def restart_child
|
143
|
+
log.info("Stopping '#{@agent}'.")
|
144
|
+
status(@agent, :restarting)
|
145
|
+
close_reader
|
146
|
+
Process.term_or_kill(@child_pid, TERM_TO_KILL_PAUSE)
|
147
|
+
end
|
148
|
+
|
149
|
+
def status(process, restarting = false)
|
150
|
+
if db = status_database
|
151
|
+
db.write_to_sqlite do |sqlite|
|
152
|
+
old_status = db.current_status
|
153
|
+
if old_status =~ /\ARestarting (.+)\z/
|
154
|
+
processes = $1.split(" and ")
|
155
|
+
if restarting
|
156
|
+
processes << process unless processes.include? process
|
157
|
+
super("Restarting #{processes.join(' and ')}")
|
158
|
+
else
|
159
|
+
processes.delete(process)
|
160
|
+
if processes.empty?
|
161
|
+
super("Monitoring processes")
|
162
|
+
else
|
163
|
+
super("Restarting #{processes.join(' and ')}")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
else
|
167
|
+
if restarting
|
168
|
+
super("Restarting #{process}")
|
169
|
+
else
|
170
|
+
super("Monitoring processes")
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
#############
|
178
|
+
### Child ###
|
179
|
+
#############
|
180
|
+
|
181
|
+
def reset_environment
|
182
|
+
# swap out our parent's signal handlers
|
183
|
+
install_shutdown_handler { finish_code }
|
184
|
+
trap("ALRM") { alert_code }
|
185
|
+
|
186
|
+
# clear the parent's identity
|
187
|
+
IDCard.me = nil
|
188
|
+
end
|
189
|
+
|
190
|
+
def close_reader
|
191
|
+
@reader.close
|
192
|
+
rescue IOError # already closed
|
193
|
+
# it's closed so we're OK
|
194
|
+
rescue NoMethodError # @reader is nil
|
195
|
+
# it wasn't set so there's nothing to close
|
196
|
+
end
|
197
|
+
|
198
|
+
def load_code
|
199
|
+
require LIB_DIR + "agent"
|
200
|
+
require LIB_DIR + "agent/#{@agent}_agent"
|
201
|
+
@code = ScoutAgent::Agent.const_get("#{@agent.CamelCase}Agent").new
|
202
|
+
end
|
203
|
+
|
204
|
+
def authorize_code
|
205
|
+
@code.authorize
|
206
|
+
end
|
207
|
+
|
208
|
+
def check_in_with_parent
|
209
|
+
@check_in_with_parent_thread = Thread.new do
|
210
|
+
Thread.current.abort_on_exception = true
|
211
|
+
loop do
|
212
|
+
begin
|
213
|
+
@writer.puts "#{Process.pid}: " +
|
214
|
+
Time.now.strftime('%Y-%m-%d %H:%M')
|
215
|
+
rescue Errno::EPIPE, IOError # parent closed our pipe or exited
|
216
|
+
break finish_code
|
217
|
+
end
|
218
|
+
sleep CHECK_IN_FREQUENCY
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def run_code
|
224
|
+
@code.run
|
225
|
+
end
|
226
|
+
|
227
|
+
def finish_code
|
228
|
+
close_writer
|
229
|
+
|
230
|
+
if @code
|
231
|
+
@code.finish
|
232
|
+
else
|
233
|
+
exit
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def alert_code
|
238
|
+
if @code
|
239
|
+
@code.notice_changes
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|