scout_agent 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +4 -0
- data/CHANGELOG +3 -0
- data/COPYING +340 -0
- data/INSTALL +17 -0
- data/LICENSE +6 -0
- data/README +3 -0
- data/Rakefile +123 -0
- data/TODO +3 -0
- data/bin/scout_agent +11 -0
- data/lib/scout_agent.rb +73 -0
- data/lib/scout_agent/agent.rb +42 -0
- data/lib/scout_agent/agent/communication_agent.rb +85 -0
- data/lib/scout_agent/agent/master_agent.rb +301 -0
- data/lib/scout_agent/api.rb +241 -0
- data/lib/scout_agent/assignment.rb +105 -0
- data/lib/scout_agent/assignment/configuration.rb +30 -0
- data/lib/scout_agent/assignment/identify.rb +110 -0
- data/lib/scout_agent/assignment/queue.rb +95 -0
- data/lib/scout_agent/assignment/reset.rb +91 -0
- data/lib/scout_agent/assignment/snapshot.rb +92 -0
- data/lib/scout_agent/assignment/start.rb +149 -0
- data/lib/scout_agent/assignment/status.rb +44 -0
- data/lib/scout_agent/assignment/stop.rb +60 -0
- data/lib/scout_agent/assignment/upload_log.rb +61 -0
- data/lib/scout_agent/core_extensions.rb +260 -0
- data/lib/scout_agent/database.rb +386 -0
- data/lib/scout_agent/database/mission_log.rb +282 -0
- data/lib/scout_agent/database/queue.rb +126 -0
- data/lib/scout_agent/database/snapshots.rb +187 -0
- data/lib/scout_agent/database/statuses.rb +65 -0
- data/lib/scout_agent/dispatcher.rb +157 -0
- data/lib/scout_agent/id_card.rb +143 -0
- data/lib/scout_agent/lifeline.rb +243 -0
- data/lib/scout_agent/mission.rb +212 -0
- data/lib/scout_agent/order.rb +58 -0
- data/lib/scout_agent/order/check_in_order.rb +32 -0
- data/lib/scout_agent/order/snapshot_order.rb +33 -0
- data/lib/scout_agent/plan.rb +306 -0
- data/lib/scout_agent/server.rb +123 -0
- data/lib/scout_agent/tracked.rb +59 -0
- data/lib/scout_agent/wire_tap.rb +513 -0
- data/setup.rb +1360 -0
- data/test/tc_core_extensions.rb +89 -0
- data/test/tc_id_card.rb +115 -0
- data/test/tc_plan.rb +285 -0
- data/test/test_helper.rb +22 -0
- data/test/ts_all.rb +7 -0
- metadata +171 -0
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
class Database
|
5
|
+
class Statuses < Database
|
6
|
+
def update_schema(version = schema_version)
|
7
|
+
case version
|
8
|
+
when 0
|
9
|
+
<<-END_INITIAL_SCHEMA.trim
|
10
|
+
CREATE TABLE statuses (
|
11
|
+
name TEXT NOT NULL PRIMARY KEY
|
12
|
+
CHECK( name IN ( 'lifeline', 'master', 'mission',
|
13
|
+
'communication', 'queue', 'snapshot' ) ),
|
14
|
+
pid INTEGER NOT NULL,
|
15
|
+
status REQUIRED_TEXT_TYPE,
|
16
|
+
last_updated_at DATETIME_TYPE
|
17
|
+
);
|
18
|
+
DEFAULT_LOCALTIME_TRIGGER statuses last_updated_at
|
19
|
+
END_INITIAL_SCHEMA
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def update_status(status, name = IDCard.me && IDCard.me.process_name)
|
24
|
+
write_to_sqlite do |sqlite|
|
25
|
+
sqlite.execute(<<-END_UPDATE_STATUS.trim, name, Process.pid, status)
|
26
|
+
INSERT OR REPLACE INTO statuses(name, pid, status, last_updated_at)
|
27
|
+
VALUES( ?, ?, ?, null)
|
28
|
+
END_UPDATE_STATUS
|
29
|
+
end
|
30
|
+
rescue Amalgalite::SQLite3::Error => error # failed to update status
|
31
|
+
# do nothing: try again later
|
32
|
+
log.error("Database status update error: #{error.message}.")
|
33
|
+
end
|
34
|
+
|
35
|
+
def clear_status(name = IDCard.me && IDCard.me.process_name)
|
36
|
+
write_to_sqlite do |sqlite|
|
37
|
+
sqlite.execute("DELETE FROM statuses WHERE name = ?", name)
|
38
|
+
end
|
39
|
+
rescue Amalgalite::SQLite3::Error => error # failed to delete status
|
40
|
+
# do nothing: new process will replace
|
41
|
+
log.error("Database status clearing error: #{error.message}.")
|
42
|
+
end
|
43
|
+
|
44
|
+
def current_statuses
|
45
|
+
query(<<-END_FIND_STATUSES.trim)
|
46
|
+
SELECT name, pid, status, last_updated_at FROM statuses ORDER BY ROWID
|
47
|
+
END_FIND_STATUSES
|
48
|
+
rescue Amalgalite::SQLite3::Error => error # failed to find statuses
|
49
|
+
log.error("Database statuses error: #{error.message}.")
|
50
|
+
Array.new # return empty results
|
51
|
+
end
|
52
|
+
|
53
|
+
def current_status(name = IDCard.me && IDCard.me.process_name)
|
54
|
+
read_from_sqlite { |sqlite|
|
55
|
+
sqlite.first_value_from(<<-END_FIND_STATUS, name)
|
56
|
+
SELECT status FROM statuses WHERE name = ?
|
57
|
+
END_FIND_STATUS
|
58
|
+
}
|
59
|
+
rescue Amalgalite::SQLite3::Error => error # failed to find status
|
60
|
+
log.error("Database current status error: #{error.message}.")
|
61
|
+
nil # return no results
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
module Dispatcher
|
5
|
+
module_function
|
6
|
+
|
7
|
+
def dispatch(args = ARGV)
|
8
|
+
switches = parse_switches(args)
|
9
|
+
assignment = parse_assignment(args)
|
10
|
+
code = load_assignment(assignment)
|
11
|
+
execute_assignment(assignment, code, switches, args)
|
12
|
+
end
|
13
|
+
|
14
|
+
def parse_switches(args)
|
15
|
+
switches = { }
|
16
|
+
|
17
|
+
args.options do |opts|
|
18
|
+
opts.banner = <<-END_USAGE.trim
|
19
|
+
Usage:
|
20
|
+
|
21
|
+
[sudo] #{ScoutAgent.agent_name} [OPTIONS] COMMAND
|
22
|
+
|
23
|
+
Use the commands identify, start, and stop to prepare, launch, and
|
24
|
+
shutdown the agent respectively. Those require super user privileges.
|
25
|
+
You can also use the status command to check in on a running agent.
|
26
|
+
|
27
|
+
END_USAGE
|
28
|
+
|
29
|
+
opts.separator "Basic Options:"
|
30
|
+
opts.on( "-s", "--server URL", String,
|
31
|
+
"The URL for the server to report to." ) do |url|
|
32
|
+
switches[:server_url] = url
|
33
|
+
end
|
34
|
+
opts.on( "-d", "--[no-]daemon",
|
35
|
+
"Run in the background as a daemon." ) do |boolean|
|
36
|
+
switches[:run_as_daemon] = boolean
|
37
|
+
end
|
38
|
+
opts.on( "-l", "--logging-level LEVEL", %w[DEBUG INFO WARN ERROR FATAL],
|
39
|
+
"The minimum level of log message to record." ) do |level|
|
40
|
+
switches[:logging_level] = level
|
41
|
+
end
|
42
|
+
opts.on( "-t", "--[no-]test-mode",
|
43
|
+
"Used in agent development." ) do |boolean|
|
44
|
+
if switches[:test_mode] = boolean
|
45
|
+
switches[:server_url] = "http://localhost:4567"
|
46
|
+
switches[:run_as_daemon] = false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
opts.separator "Expert Options:"
|
51
|
+
opts.on( "--users NAME1,NAME2,...", Array,
|
52
|
+
"A list of users to try switching to." ) do |users|
|
53
|
+
switches[:user_choices] = users
|
54
|
+
end
|
55
|
+
opts.on( "--groups NAME1,NAME2,...", Array,
|
56
|
+
"A list of groups to try switching to." ) do |groups|
|
57
|
+
switches[:group_choices] = groups
|
58
|
+
end
|
59
|
+
opts.on( "--prefix PATH", String,
|
60
|
+
"A prefix path prepended to all other paths." ) do |path|
|
61
|
+
switches[:prefix_path] = path
|
62
|
+
end
|
63
|
+
[ %w[os_config_path configuration],
|
64
|
+
%w[os_db_path databases],
|
65
|
+
%w[os_pid_path PID\ files],
|
66
|
+
%w[os_log_path log\ files] ].each do |name, used_for|
|
67
|
+
opts.on( "--#{name.tr('_', '-')} PATH", String,
|
68
|
+
"The path your OS uses for #{used_for}." ) do |path|
|
69
|
+
switches[name.to_sym] = path
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
opts.separator "Application Options:"
|
74
|
+
opts.on( "-h", "--help",
|
75
|
+
"Show this message." ) do
|
76
|
+
puts opts # show usage
|
77
|
+
exit
|
78
|
+
end
|
79
|
+
opts.on( "-v", "--version",
|
80
|
+
"Display the current version." ) do
|
81
|
+
puts "#{ScoutAgent.proper_agent_name} v#{ScoutAgent::VERSION}"
|
82
|
+
exit
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
opts.parse!
|
87
|
+
rescue OptionParser::ParseError # failed to parse options
|
88
|
+
puts opts # show usage
|
89
|
+
exit
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# apply switches so paths will be set correctly for load checks
|
94
|
+
Plan.update_from_switches(switches)
|
95
|
+
|
96
|
+
switches
|
97
|
+
end
|
98
|
+
|
99
|
+
def parse_assignment(args)
|
100
|
+
assignment = args.shift.to_s.downcase
|
101
|
+
if assignment.empty?
|
102
|
+
if Plan.present?
|
103
|
+
if IDCard.new(:lifeline).pid_file.exist?
|
104
|
+
return "status"
|
105
|
+
else
|
106
|
+
return "start"
|
107
|
+
end
|
108
|
+
else
|
109
|
+
return "identify"
|
110
|
+
end
|
111
|
+
end
|
112
|
+
unless assignment =~ /\A\w+\z/
|
113
|
+
abort_with_unknown_assignment(assignment)
|
114
|
+
end
|
115
|
+
assignment
|
116
|
+
end
|
117
|
+
|
118
|
+
def load_assignment(assignment)
|
119
|
+
dir = LIB_DIR + "assignment"
|
120
|
+
matches = dir.entries.grep(/#{Regexp.escape(assignment)}\w*\.rb\z/)
|
121
|
+
if matches.size > 1
|
122
|
+
abort_with_ambiguous_assignment(assignment, matches)
|
123
|
+
elsif matches.first and (code = dir + matches.first).exist?
|
124
|
+
return code
|
125
|
+
else
|
126
|
+
abort_with_unknown_assignment(assignment)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def execute_assignment(assignment, code, switches, other_args)
|
131
|
+
require code
|
132
|
+
class_name = code.basename(".rb").to_s.CamelCase
|
133
|
+
begin
|
134
|
+
loaded = Assignment.const_get(class_name)
|
135
|
+
rescue NameError # can't load module
|
136
|
+
abort_with_missing_code(class_name)
|
137
|
+
end
|
138
|
+
loaded.new(switches, other_args).prepare_and_execute
|
139
|
+
end
|
140
|
+
|
141
|
+
def abort_with_ambiguous_assignment(assignment, matches)
|
142
|
+
choices = matches.map { |m| "'#{m.basename('.rb')}'" }
|
143
|
+
choices[-2..-1] = choices[-2..-1].join(", or ")
|
144
|
+
abort <<-END_AMBIGUOUS
|
145
|
+
Ambiguous command '#{assignment}'. Did you mean #{choices.join(', ')}?
|
146
|
+
END_AMBIGUOUS
|
147
|
+
end
|
148
|
+
|
149
|
+
def abort_with_unknown_assignment(assignment)
|
150
|
+
abort "Unknown command '#{assignment}'."
|
151
|
+
end
|
152
|
+
|
153
|
+
def abort_with_missing_code(class_name)
|
154
|
+
abort "Failed to load '#{class_name}'."
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
#
|
5
|
+
# This class excapsulates a named process. It is used to ensure exclusive
|
6
|
+
# execution and to signal other processes.
|
7
|
+
#
|
8
|
+
class IDCard
|
9
|
+
class << self
|
10
|
+
#
|
11
|
+
# This global attribute should contain the name of the current process.
|
12
|
+
# It is set during a successful authorization.
|
13
|
+
#
|
14
|
+
# <b>Warning:</b> Be sure to clear this attribute immediately after a
|
15
|
+
# fork() so you don't keep the parent's identity.
|
16
|
+
#
|
17
|
+
attr_accessor :me
|
18
|
+
end
|
19
|
+
|
20
|
+
#
|
21
|
+
# Pass in the +process_name+ of the process you want to signal() or the
|
22
|
+
# +process_name+ you wish to authorize() for yourself.
|
23
|
+
#
|
24
|
+
def initialize(process_name)
|
25
|
+
@process_name = process_name
|
26
|
+
end
|
27
|
+
|
28
|
+
attr_reader :process_name
|
29
|
+
|
30
|
+
# A String representation of this process, with PID.
|
31
|
+
def to_s
|
32
|
+
"#{@process_name} (#{pid || 'unauthorized'})"
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# Returns the path to the unique PID file for this process, based on the
|
37
|
+
# current Plan.
|
38
|
+
#
|
39
|
+
def pid_file
|
40
|
+
Plan.pid_dir + "#{@process_name}.pid"
|
41
|
+
end
|
42
|
+
|
43
|
+
# Returns the PID for the named process, or +nil+ if it cannot be read.
|
44
|
+
def pid
|
45
|
+
pid_file.read.to_i
|
46
|
+
rescue Exception
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
50
|
+
#
|
51
|
+
# Tries to send +message+ as a signal to the process represented by this
|
52
|
+
# instance. You can pass any message Process.kill() would understand.
|
53
|
+
#
|
54
|
+
# Returns +true+ if the signal was sent, or +false+ if the PID file could
|
55
|
+
# not be read. Any Exception raised during the send, such as Errno::ESRCH
|
56
|
+
# for a missing process, will bubble up to the calling code.
|
57
|
+
#
|
58
|
+
def signal(message)
|
59
|
+
if id = pid
|
60
|
+
Process.kill(message, id)
|
61
|
+
true
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Claims this identity for this process. This process is multiprocess-safe
|
69
|
+
# and will fail if another process has claimed this identity. However,
|
70
|
+
# stale claims are ignored and replaced, if possible.
|
71
|
+
#
|
72
|
+
# This method returns +true+ in the claim succeeded and +false+ if it could
|
73
|
+
# not happen for any reason. A return of +true+ indicates that me() has
|
74
|
+
# been updated and an exit handle has been installed to revoke() this claim
|
75
|
+
# as the process ends.
|
76
|
+
#
|
77
|
+
def authorize
|
78
|
+
File.open(pid_file, File::CREAT | File::EXCL | File::WRONLY) do |pid|
|
79
|
+
pid.flock(File::LOCK_EX)
|
80
|
+
if not block_given? or yield # allows for daemonization
|
81
|
+
pid.puts Process.pid
|
82
|
+
else
|
83
|
+
pid.flock(File::LOCK_UN)
|
84
|
+
revoke # remove this file if anything went wrong
|
85
|
+
return false
|
86
|
+
end
|
87
|
+
pid.flock(File::LOCK_UN)
|
88
|
+
end
|
89
|
+
|
90
|
+
self.class.me = self
|
91
|
+
|
92
|
+
at_my_exit do
|
93
|
+
unless revoke
|
94
|
+
# log.error "Unable to unlink pid file: #{$!.message}" if log
|
95
|
+
end
|
96
|
+
end
|
97
|
+
true
|
98
|
+
rescue Errno::EEXIST # pid_file already exists
|
99
|
+
File.open(pid_file) do |pid|
|
100
|
+
if pid.flock(File::LOCK_EX | File::LOCK_NB)
|
101
|
+
if pid.read =~ /\A\d+/
|
102
|
+
begin
|
103
|
+
unless signal(0)
|
104
|
+
# log.warn "Could not create or read PID file. " +
|
105
|
+
# "You may need to the path to the config directory. " +
|
106
|
+
# "See: http://scoutapp.com/help#data_file" if log
|
107
|
+
end
|
108
|
+
rescue Errno::ESRCH # no such process
|
109
|
+
# log.info "Stale PID file found. Clearing it and reloading..." if log
|
110
|
+
if revoke
|
111
|
+
pid.flock(File::LOCK_UN) # release the lock before we recurse
|
112
|
+
return authorize # try again
|
113
|
+
else
|
114
|
+
# log.info "Failed to clear PID." if log
|
115
|
+
end
|
116
|
+
rescue Errno::EACCES # don't have permission
|
117
|
+
# nothing we can do so give up
|
118
|
+
end
|
119
|
+
else
|
120
|
+
# nothing we can do so give up
|
121
|
+
end
|
122
|
+
pid.flock(File::LOCK_UN) # release the lock
|
123
|
+
else
|
124
|
+
# log.info "Couldn't grab a file lock to verify existing PID file." if log
|
125
|
+
return false
|
126
|
+
end
|
127
|
+
end
|
128
|
+
# log.warn "Process #{pid} was already running" if log
|
129
|
+
false
|
130
|
+
end
|
131
|
+
|
132
|
+
#
|
133
|
+
# Releases a held claim on a process name. Returns +true+ if successful or
|
134
|
+
# +false+ if the PID file didn't exist or couldn't be destroyed.
|
135
|
+
#
|
136
|
+
def revoke
|
137
|
+
pid_file.unlink
|
138
|
+
true
|
139
|
+
rescue Exception
|
140
|
+
false
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,243 @@
|
|
1
|
+
#!/usr/bin/env ruby -wKU
|
2
|
+
|
3
|
+
module ScoutAgent
|
4
|
+
class Lifeline
|
5
|
+
NO_CONTACT_TIMEOUT = 3
|
6
|
+
CHECK_IN_FREQUENCY = 0.99 # gives us three check ins before a cutoff
|
7
|
+
TERM_TO_KILL_PAUSE = 1
|
8
|
+
RELAUNCH_FREQUENCIES = [0, 1, 1, 2, 3, 5, 8, 13]
|
9
|
+
|
10
|
+
#################
|
11
|
+
### Interface ###
|
12
|
+
#################
|
13
|
+
|
14
|
+
def initialize(agent, log = WireTap.new(nil))
|
15
|
+
@agent = agent
|
16
|
+
@log = log
|
17
|
+
@parent_pid = Process.pid
|
18
|
+
@child_pid = nil
|
19
|
+
@reader = nil
|
20
|
+
@writer = nil
|
21
|
+
@launch_and_monitor_thread = nil
|
22
|
+
@check_in_with_parent_thread = nil
|
23
|
+
@code = nil
|
24
|
+
@last_launch = nil
|
25
|
+
@relaunch_index = 0
|
26
|
+
|
27
|
+
at_my_exit do
|
28
|
+
clear_status
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
include Tracked
|
33
|
+
|
34
|
+
attr_reader :log
|
35
|
+
|
36
|
+
def launch_and_monitor
|
37
|
+
@launch_and_monitor_thread = Thread.new do
|
38
|
+
Thread.current.abort_on_exception = true
|
39
|
+
loop do
|
40
|
+
wait_for_launch
|
41
|
+
prepare_pipe
|
42
|
+
launch_child
|
43
|
+
close_writer
|
44
|
+
monitor_child
|
45
|
+
restart_child
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def terminate
|
51
|
+
if Process.pid == @parent_pid
|
52
|
+
# stop monitoring
|
53
|
+
log.info("Stopping the monitoring for '#{@agent}'.")
|
54
|
+
@launch_and_monitor_thread.exit if @launch_and_monitor_thread
|
55
|
+
# ask child process to exit
|
56
|
+
log.info("Asking '#{@agent}' to stop.")
|
57
|
+
IDCard.new(@agent).signal("TERM")
|
58
|
+
end
|
59
|
+
rescue Errno::ESRCH # no such process
|
60
|
+
# if already exited, so we are fine
|
61
|
+
end
|
62
|
+
|
63
|
+
def join
|
64
|
+
if Process.pid == @parent_pid and @launch_and_monitor_thread
|
65
|
+
@launch_and_monitor_thread.join
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
#######
|
70
|
+
private
|
71
|
+
#######
|
72
|
+
|
73
|
+
##############
|
74
|
+
### Parent ###
|
75
|
+
##############
|
76
|
+
|
77
|
+
def wait_for_launch
|
78
|
+
if @last_launch
|
79
|
+
seconds_ran = Time.now - @last_launch
|
80
|
+
relaunch_wait = RELAUNCH_FREQUENCIES[@relaunch_index] * 60 - seconds_ran
|
81
|
+
|
82
|
+
if relaunch_wait > 0
|
83
|
+
log.info( "Waiting #{relaunch_wait} seconds before relaunching " +
|
84
|
+
"'#{@agent}'." )
|
85
|
+
sleep relaunch_wait
|
86
|
+
elsif relaunch_wait.abs > ( RELAUNCH_FREQUENCIES[@relaunch_index + 1] ||
|
87
|
+
RELAUNCH_FREQUENCIES.last ) * 60
|
88
|
+
@relaunch_index = 0
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
unless @relaunch_index == RELAUNCH_FREQUENCIES.size - 1
|
93
|
+
@relaunch_index += 1
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def prepare_pipe
|
99
|
+
@reader, @writer = IO.pipe
|
100
|
+
end
|
101
|
+
|
102
|
+
def launch_child
|
103
|
+
log.info("Launching '#{@agent}'.")
|
104
|
+
status(@agent)
|
105
|
+
@last_launch = Time.now
|
106
|
+
@child_pid = fork do
|
107
|
+
reset_environment
|
108
|
+
close_reader
|
109
|
+
load_code
|
110
|
+
authorize_code
|
111
|
+
check_in_with_parent
|
112
|
+
run_code
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def close_writer
|
117
|
+
@writer.close
|
118
|
+
rescue IOError # already closed
|
119
|
+
# it's closed so we're OK
|
120
|
+
rescue NoMethodError # @writer is nil
|
121
|
+
# it wasn't set so there's nothing to close
|
122
|
+
end
|
123
|
+
|
124
|
+
def monitor_child
|
125
|
+
loop do
|
126
|
+
check_in = nil
|
127
|
+
begin
|
128
|
+
Timeout.timeout(NO_CONTACT_TIMEOUT) { check_in = @reader.gets }
|
129
|
+
log.error("'#{@agent}' monitor channel has closed.") if check_in.nil?
|
130
|
+
rescue Timeout::Error
|
131
|
+
# check_in will stay nil
|
132
|
+
log.error("'#{@agent}' failed to check-in in time.")
|
133
|
+
end
|
134
|
+
unless check_in.to_s =~
|
135
|
+
/\A#{@child_pid}: \d{4}-\d{2}-\d{2} \d{2}:\d{2}\Z/
|
136
|
+
log.error("'#{@agent}' check-in was malformed.") unless check_in.nil?
|
137
|
+
break
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def restart_child
|
143
|
+
log.info("Stopping '#{@agent}'.")
|
144
|
+
status(@agent, :restarting)
|
145
|
+
close_reader
|
146
|
+
Process.term_or_kill(@child_pid, TERM_TO_KILL_PAUSE)
|
147
|
+
end
|
148
|
+
|
149
|
+
def status(process, restarting = false)
|
150
|
+
if db = status_database
|
151
|
+
db.write_to_sqlite do |sqlite|
|
152
|
+
old_status = db.current_status
|
153
|
+
if old_status =~ /\ARestarting (.+)\z/
|
154
|
+
processes = $1.split(" and ")
|
155
|
+
if restarting
|
156
|
+
processes << process unless processes.include? process
|
157
|
+
super("Restarting #{processes.join(' and ')}")
|
158
|
+
else
|
159
|
+
processes.delete(process)
|
160
|
+
if processes.empty?
|
161
|
+
super("Monitoring processes")
|
162
|
+
else
|
163
|
+
super("Restarting #{processes.join(' and ')}")
|
164
|
+
end
|
165
|
+
end
|
166
|
+
else
|
167
|
+
if restarting
|
168
|
+
super("Restarting #{process}")
|
169
|
+
else
|
170
|
+
super("Monitoring processes")
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
#############
|
178
|
+
### Child ###
|
179
|
+
#############
|
180
|
+
|
181
|
+
def reset_environment
|
182
|
+
# swap out our parent's signal handlers
|
183
|
+
install_shutdown_handler { finish_code }
|
184
|
+
trap("ALRM") { alert_code }
|
185
|
+
|
186
|
+
# clear the parent's identity
|
187
|
+
IDCard.me = nil
|
188
|
+
end
|
189
|
+
|
190
|
+
def close_reader
|
191
|
+
@reader.close
|
192
|
+
rescue IOError # already closed
|
193
|
+
# it's closed so we're OK
|
194
|
+
rescue NoMethodError # @reader is nil
|
195
|
+
# it wasn't set so there's nothing to close
|
196
|
+
end
|
197
|
+
|
198
|
+
def load_code
|
199
|
+
require LIB_DIR + "agent"
|
200
|
+
require LIB_DIR + "agent/#{@agent}_agent"
|
201
|
+
@code = ScoutAgent::Agent.const_get("#{@agent.CamelCase}Agent").new
|
202
|
+
end
|
203
|
+
|
204
|
+
def authorize_code
|
205
|
+
@code.authorize
|
206
|
+
end
|
207
|
+
|
208
|
+
def check_in_with_parent
|
209
|
+
@check_in_with_parent_thread = Thread.new do
|
210
|
+
Thread.current.abort_on_exception = true
|
211
|
+
loop do
|
212
|
+
begin
|
213
|
+
@writer.puts "#{Process.pid}: " +
|
214
|
+
Time.now.strftime('%Y-%m-%d %H:%M')
|
215
|
+
rescue Errno::EPIPE, IOError # parent closed our pipe or exited
|
216
|
+
break finish_code
|
217
|
+
end
|
218
|
+
sleep CHECK_IN_FREQUENCY
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def run_code
|
224
|
+
@code.run
|
225
|
+
end
|
226
|
+
|
227
|
+
def finish_code
|
228
|
+
close_writer
|
229
|
+
|
230
|
+
if @code
|
231
|
+
@code.finish
|
232
|
+
else
|
233
|
+
exit
|
234
|
+
end
|
235
|
+
end
|
236
|
+
|
237
|
+
def alert_code
|
238
|
+
if @code
|
239
|
+
@code.notice_changes
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|