right_link 5.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/actors/agent_manager.rb +88 -0
- data/actors/instance_scheduler.rb +321 -0
- data/actors/instance_services.rb +64 -0
- data/actors/instance_setup.rb +567 -0
- data/bin/cloud +25 -0
- data/bin/cook_runner +44 -0
- data/bin/deploy +120 -0
- data/bin/enroll +385 -0
- data/bin/rad +32 -0
- data/bin/rchk +29 -0
- data/bin/rnac +39 -0
- data/bin/rs_connect +33 -0
- data/bin/rs_log_level +31 -0
- data/bin/rs_ohai +28 -0
- data/bin/rs_reenroll +31 -0
- data/bin/rs_run_recipe +34 -0
- data/bin/rs_run_right_script +34 -0
- data/bin/rs_shutdown +33 -0
- data/bin/rs_tag +33 -0
- data/bin/rs_thunk +33 -0
- data/bin/rstat +31 -0
- data/bin/system +16 -0
- data/ext/Rakefile +18 -0
- data/init/config.yml +5 -0
- data/init/init.rb +79 -0
- data/lib/chef/ohai_setup.rb +51 -0
- data/lib/chef/plugins/cloud.rb +91 -0
- data/lib/chef/plugins/cloudstack.rb +23 -0
- data/lib/chef/plugins/ec2.rb +23 -0
- data/lib/chef/plugins/linux/block_device2.rb +24 -0
- data/lib/chef/plugins/rackspace.rb +23 -0
- data/lib/chef/plugins/rightscale.rb +125 -0
- data/lib/chef/plugins/windows/network.rb +114 -0
- data/lib/chef/plugins.rb +74 -0
- data/lib/chef/providers/dns_dnsmadeeasy_provider.rb +81 -0
- data/lib/chef/providers/dns_resource.rb +100 -0
- data/lib/chef/providers/executable_schedule_provider.rb +70 -0
- data/lib/chef/providers/executable_schedule_resource.rb +144 -0
- data/lib/chef/providers/remote_recipe_provider.rb +86 -0
- data/lib/chef/providers/remote_recipe_resource.rb +101 -0
- data/lib/chef/providers/right_link_tag_provider.rb +73 -0
- data/lib/chef/providers/right_link_tag_resource.rb +59 -0
- data/lib/chef/providers/right_script_provider.rb +190 -0
- data/lib/chef/providers/right_script_resource.rb +113 -0
- data/lib/chef/providers/rs_shutdown_provider.rb +75 -0
- data/lib/chef/providers/rs_shutdown_resource.rb +55 -0
- data/lib/chef/providers/server_collection_provider.rb +66 -0
- data/lib/chef/providers/server_collection_resource.rb +93 -0
- data/lib/chef/providers/windows/powershell_provider.rb +151 -0
- data/lib/chef/providers/windows/powershell_resource.rb +111 -0
- data/lib/chef/providers/windows/unsupported_provider.rb +51 -0
- data/lib/chef/right_providers.rb +55 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.csproj +104 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.dll-Help.xml +141 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Exceptions.cs +182 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeCommand.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeResponse.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceCommand.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceResponse.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceCommand.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceResponse.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionCommand.cs +178 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionRequest.cs +67 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionResponse.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueCommandBase.cs +142 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueRequestBase.cs +64 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueResponseBase.cs +69 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/JsonTransport.cs +110 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeClient.cs +158 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeServer.cs +142 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolConstants.cs +55 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolUtilities.cs +77 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ReadMe.txt +53 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeCommand.cs +59 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeResponse.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceCommand.cs +59 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceResponse.cs +40 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceCommand.cs +59 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceResponse.cs +40 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueCommandBase.cs +293 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueRequestBase.cs +75 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueResponseBase.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Transport.cs +91 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet.sln +35 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Program.cs +374 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/TestChefNodeCmdlet.csproj +65 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Program.cs +136 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Properties/AssemblyInfo.cs +36 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/ReadMe.txt +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/TestNextActionCmdlet.csproj +68 -0
- data/lib/chef/windows/bin/Newtonsoft.Json.dll +0 -0
- data/lib/chef/windows/chef_node_server.rb +463 -0
- data/lib/chef/windows/dynamic_powershell_provider.rb +296 -0
- data/lib/chef/windows/pipe_server.rb +283 -0
- data/lib/chef/windows/powershell_host.rb +285 -0
- data/lib/chef/windows/powershell_pipe_server.rb +136 -0
- data/lib/chef/windows/powershell_provider_base.rb +92 -0
- data/lib/chef/windows/scripts/run_loop.ps1 +105 -0
- data/lib/clouds/cloud.rb +557 -0
- data/lib/clouds/cloud_factory.rb +250 -0
- data/lib/clouds/cloud_utilities.rb +244 -0
- data/lib/clouds/clouds/azure.rb +106 -0
- data/lib/clouds/clouds/cloudstack.rb +114 -0
- data/lib/clouds/clouds/ec2.rb +113 -0
- data/lib/clouds/clouds/eucalyptus.rb +46 -0
- data/lib/clouds/clouds/google.rb +102 -0
- data/lib/clouds/clouds/none.rb +76 -0
- data/lib/clouds/clouds/openstack.rb +30 -0
- data/lib/clouds/clouds/rackspace-ng.rb +54 -0
- data/lib/clouds/clouds/rackspace.rb +78 -0
- data/lib/clouds/clouds/softlayer.rb +91 -0
- data/lib/clouds/metadata_formatter.rb +108 -0
- data/lib/clouds/metadata_provider.rb +128 -0
- data/lib/clouds/metadata_source.rb +87 -0
- data/lib/clouds/metadata_sources/certificate_metadata_source.rb +207 -0
- data/lib/clouds/metadata_sources/config_drive_metadata_source.rb +129 -0
- data/lib/clouds/metadata_sources/file_metadata_source.rb +74 -0
- data/lib/clouds/metadata_sources/http_metadata_source.rb +277 -0
- data/lib/clouds/metadata_sources/selective_metadata_source.rb +122 -0
- data/lib/clouds/metadata_tree_climber.rb +144 -0
- data/lib/clouds/metadata_writer.rb +155 -0
- data/lib/clouds/metadata_writers/dictionary_metadata_writer.rb +72 -0
- data/lib/clouds/metadata_writers/ruby_metadata_writer.rb +76 -0
- data/lib/clouds/metadata_writers/shell_metadata_writer.rb +121 -0
- data/lib/clouds/register_clouds.rb +34 -0
- data/lib/clouds.rb +32 -0
- data/lib/gem_dependencies.rb +83 -0
- data/lib/git_hooks/commit-msg.rb +7 -0
- data/lib/instance/agent_config.rb +168 -0
- data/lib/instance/agent_watcher.rb +233 -0
- data/lib/instance/audit_cook_stub.rb +104 -0
- data/lib/instance/audit_proxy.rb +247 -0
- data/lib/instance/bundle_queue.rb +104 -0
- data/lib/instance/cook/agent_connection.rb +109 -0
- data/lib/instance/cook/audit_logger.rb +165 -0
- data/lib/instance/cook/audit_stub.rb +142 -0
- data/lib/instance/cook/ca-bundle.crt +2794 -0
- data/lib/instance/cook/chef_state.rb +211 -0
- data/lib/instance/cook/cook.rb +306 -0
- data/lib/instance/cook/cook_state.rb +298 -0
- data/lib/instance/cook/cookbook_path_mapping.rb +66 -0
- data/lib/instance/cook/cookbook_repo_retriever.rb +190 -0
- data/lib/instance/cook/executable_sequence.rb +765 -0
- data/lib/instance/cook/external_parameter_gatherer.rb +190 -0
- data/lib/instance/cook/repose_downloader.rb +349 -0
- data/lib/instance/cook/shutdown_request_proxy.rb +121 -0
- data/lib/instance/cook.rb +41 -0
- data/lib/instance/downloader.rb +208 -0
- data/lib/instance/duplicable.rb +67 -0
- data/lib/instance/exceptions.rb +49 -0
- data/lib/instance/executable_sequence_proxy.rb +278 -0
- data/lib/instance/instance_commands.rb +577 -0
- data/lib/instance/instance_state.rb +633 -0
- data/lib/instance/json_utilities.rb +102 -0
- data/lib/instance/login_manager.rb +533 -0
- data/lib/instance/login_user_manager.rb +522 -0
- data/lib/instance/message_encoder.rb +118 -0
- data/lib/instance/multi_thread_bundle_queue.rb +232 -0
- data/lib/instance/operation_context.rb +60 -0
- data/lib/instance/options_bag.rb +65 -0
- data/lib/instance/payload_formatter.rb +46 -0
- data/lib/instance/policy.rb +53 -0
- data/lib/instance/policy_audit.rb +100 -0
- data/lib/instance/policy_manager.rb +146 -0
- data/lib/instance/reenroll_manager.rb +104 -0
- data/lib/instance/right_scripts_cookbook.rb +181 -0
- data/lib/instance/shutdown_request.rb +221 -0
- data/lib/instance/single_thread_bundle_queue.rb +189 -0
- data/lib/instance/volume_management.rb +450 -0
- data/lib/instance.rb +50 -0
- data/lib/repo_conf_generators/apt_conf_generators.rb +106 -0
- data/lib/repo_conf_generators/gem_conf_generators.rb +80 -0
- data/lib/repo_conf_generators/rightscale_conf_generators.rb +254 -0
- data/lib/repo_conf_generators/rightscale_key.pub +17 -0
- data/lib/repo_conf_generators/yum_conf_generators.rb +225 -0
- data/lib/repo_conf_generators.rb +30 -0
- data/lib/run_shell.rb +28 -0
- data/scripts/agent_checker.rb +571 -0
- data/scripts/agent_controller.rb +247 -0
- data/scripts/agent_deployer.rb +148 -0
- data/scripts/bundle_runner.rb +336 -0
- data/scripts/cloud_controller.rb +176 -0
- data/scripts/log_level_manager.rb +142 -0
- data/scripts/ohai_runner.rb +33 -0
- data/scripts/reenroller.rb +193 -0
- data/scripts/server_importer.rb +293 -0
- data/scripts/shutdown_client.rb +183 -0
- data/scripts/system_configurator.rb +367 -0
- data/scripts/tagger.rb +381 -0
- data/scripts/thunker.rb +356 -0
- metadata +418 -0
@@ -0,0 +1,571 @@
|
|
1
|
+
# === Synopsis:
|
2
|
+
# RightScale Agent Checker (rchk) - (c) 2010-2011 RightScale Inc
|
3
|
+
#
|
4
|
+
# Checks the agent to see if it is actively communicating with RightNet and if not
|
5
|
+
# triggers it to re-enroll and exits.
|
6
|
+
#
|
7
|
+
# Alternatively runs as a daemon and performs this communication check periodically.
|
8
|
+
#
|
9
|
+
# === Usage
|
10
|
+
# rchk
|
11
|
+
#
|
12
|
+
# Options:
|
13
|
+
# --time-limit, -t SEC Override the default time limit since last communication for
|
14
|
+
# check to pass (also the interval for daemon to run these checks),
|
15
|
+
# ignored if less than 1
|
16
|
+
# --attempts, -a N Override the default number of communication check attempts
|
17
|
+
# before trigger re-enroll, ignored if less than 1
|
18
|
+
# --retry-interval, -r SEC Override the default interval for retrying communication check,
|
19
|
+
# reset to time-limit if less than it, ignored if less than 1
|
20
|
+
# --start Run as a daemon process that checks agent communication after the
|
21
|
+
# configured time limit and repeatedly thereafter on that interval
|
22
|
+
# (the checker does an immediate one-time check if --start is not specified)
|
23
|
+
# --stop Stop the currently running daemon started with --start and then exit)
|
24
|
+
# --ping, -p Try communicating now regardless of whether have communicated within
|
25
|
+
# the configured time limit, does not apply if running as a daemon
|
26
|
+
# --verbose, -v Display debug information
|
27
|
+
# --version Display version information
|
28
|
+
# --help Display help
|
29
|
+
#
|
30
|
+
|
31
|
+
require 'rubygems'
|
32
|
+
require 'eventmachine'
|
33
|
+
require 'trollop'
|
34
|
+
require 'right_agent'
|
35
|
+
require 'right_agent/scripts/usage'
|
36
|
+
require 'right_agent/scripts/common_parser'
|
37
|
+
|
38
|
+
require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_watcher'))
|
39
|
+
require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_config'))
|
40
|
+
|
41
|
+
module RightScale
|
42
|
+
|
43
|
+
# Commands exposed by instance agent checker
|
44
|
+
class AgentCheckerCommands
|
45
|
+
|
46
|
+
# Build hash of commands associating command names with block
|
47
|
+
#
|
48
|
+
# === Parameters
|
49
|
+
# checker(AgentChecker):: Agent checker executing commands
|
50
|
+
#
|
51
|
+
# === Return
|
52
|
+
# (Hash):: Command blocks keyed by command names
|
53
|
+
def self.get(checker)
|
54
|
+
target = new(checker)
|
55
|
+
{:terminate => lambda { |opts, conn| opts[:conn] = conn; target.send("terminate_command", opts) }}
|
56
|
+
end
|
57
|
+
|
58
|
+
# Set agent checker for executing commands
|
59
|
+
#
|
60
|
+
# === Parameter
|
61
|
+
# checker(AgentChecker):: Agent checker
|
62
|
+
def initialize(checker)
|
63
|
+
@checker = checker
|
64
|
+
end
|
65
|
+
|
66
|
+
protected
|
67
|
+
|
68
|
+
# Terminate command
|
69
|
+
#
|
70
|
+
# === Parameters
|
71
|
+
# opts[:conn](EM::Connection):: Connection used to send reply
|
72
|
+
#
|
73
|
+
# === Return
|
74
|
+
# true:: Always return true
|
75
|
+
def terminate_command(opts)
|
76
|
+
CommandIO.instance.reply(opts[:conn], "Checker terminating")
|
77
|
+
# Delay terminate a bit to give reply a chance to be sent
|
78
|
+
EM.next_tick { @checker.terminate }
|
79
|
+
end
|
80
|
+
|
81
|
+
end # AgentCheckerCommands
|
82
|
+
|
83
|
+
class AgentChecker
|
84
|
+
|
85
|
+
include DaemonizeHelper
|
86
|
+
|
87
|
+
VERSION = [0, 1]
|
88
|
+
|
89
|
+
# Time constants
|
90
|
+
MINUTE = 60
|
91
|
+
HOUR = 60 * MINUTE
|
92
|
+
DAY = 24 * HOUR
|
93
|
+
|
94
|
+
# Default minimum seconds since last communication for instance to be considered connected
|
95
|
+
# Only used if --time-limit not specified and :ping_interval option not specified for agent
|
96
|
+
DEFAULT_TIME_LIMIT = 12 * HOUR
|
97
|
+
|
98
|
+
# Multiplier of agent's mapper ping interval to get daemon's last communication time limit
|
99
|
+
PING_INTERVAL_MULTIPLIER = 3
|
100
|
+
|
101
|
+
# Default maximum number of seconds between checks for recent communication if first check fails
|
102
|
+
DEFAULT_RETRY_INTERVAL = 5 * MINUTE
|
103
|
+
|
104
|
+
# Default maximum number of attempts to check communication before trigger re-enroll
|
105
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
106
|
+
|
107
|
+
# Maximum number of seconds to wait for a CommandIO response from the instance agent
|
108
|
+
COMMAND_IO_TIMEOUT = 2 * MINUTE
|
109
|
+
|
110
|
+
# Create and run checker
|
111
|
+
#
|
112
|
+
# === Return
|
113
|
+
# true:: Always return true
|
114
|
+
def self.run
|
115
|
+
c = AgentChecker.new
|
116
|
+
c.start(c.parse_args)
|
117
|
+
rescue Errno::EACCES => e
|
118
|
+
STDERR.puts e.message
|
119
|
+
STDERR.puts "Try elevating privilege (sudo/runas) before invoking this command."
|
120
|
+
exit(2)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Create AgentWatcher to monitor agent processes
|
124
|
+
#
|
125
|
+
# === Return
|
126
|
+
# nil
|
127
|
+
def setup_agent_watcher()
|
128
|
+
@agent_watcher ||= AgentWatcher.new( lambda { |s| self.info(s) }, @agent[:pid_dir] )
|
129
|
+
@agent_watcher.watch_agent(@agent[:identity], '/opt/rightscale/bin/rnac', '--start instance', '--stop instance')
|
130
|
+
@agent_watcher.start_watching()
|
131
|
+
end
|
132
|
+
|
133
|
+
# Stop AgentWatcher from monitoring agent processes
|
134
|
+
#
|
135
|
+
# === Return
|
136
|
+
# nil
|
137
|
+
def stop_agent_watcher()
|
138
|
+
@agent_watcher.stop_agent(@agent[:identity])
|
139
|
+
@agent_watcher.stop_watching()
|
140
|
+
end
|
141
|
+
|
142
|
+
# Run daemon or run one agent communication check
|
143
|
+
# If running as a daemon, store pid in same location as agent except suffix the
|
144
|
+
# agent identity with '-rchk'.
|
145
|
+
#
|
146
|
+
# === Parameters
|
147
|
+
# options(Hash):: Run options
|
148
|
+
# :time_limit(Integer):: Time limit for last communication and interval for daemon checks,
|
149
|
+
# defaults to PING_INTERVAL_MULTIPLIER times agent's ping interval or to DEFAULT_TIME_LIMIT
|
150
|
+
# :max_attempts(Integer):: Maximum number of communication check attempts,
|
151
|
+
# defaults to DEFAULT_MAX_ATTEMPTS
|
152
|
+
# :retry_interval(Integer):: Number of seconds to wait before retrying communication check,
|
153
|
+
# defaults to DEFAULT_RETRY_INTERVAL, reset to :time_limit if exceeds it
|
154
|
+
# :daemon(Boolean):: Whether to run as a daemon rather than do a one-time communication check
|
155
|
+
# :log_path(String):: Log file directory, defaults to one used by agent
|
156
|
+
# :stop(Boolean):: Whether to stop the currently running daemon and then exit
|
157
|
+
# :ping(Boolean):: Try communicating now regardless of whether have communicated within
|
158
|
+
# the configured time limit, ignored if :daemon true
|
159
|
+
# :verbose(Boolean):: Whether to display debug information
|
160
|
+
#
|
161
|
+
# === Return
|
162
|
+
# true:: Always return true
|
163
|
+
def start(options)
|
164
|
+
begin
|
165
|
+
setup_traps
|
166
|
+
@command_serializer = Serializer.new
|
167
|
+
@state_serializer = Serializer.new(:json)
|
168
|
+
|
169
|
+
# Retrieve instance agent configuration options
|
170
|
+
@agent = AgentConfig.agent_options('instance')
|
171
|
+
error("No instance agent configured", nil, abort = true) if @agent.empty?
|
172
|
+
|
173
|
+
# Apply agent's ping interval if needed and adjust options to make them consistent
|
174
|
+
@options = options
|
175
|
+
unless @options[:time_limit]
|
176
|
+
if @agent[:ping_interval]
|
177
|
+
@options[:time_limit] = @agent[:ping_interval] * PING_INTERVAL_MULTIPLIER
|
178
|
+
else
|
179
|
+
@options[:time_limit] = DEFAULT_TIME_LIMIT
|
180
|
+
end
|
181
|
+
end
|
182
|
+
@options[:retry_interval] = [@options[:retry_interval], @options[:time_limit]].min
|
183
|
+
@options[:max_attempts] = [@options[:max_attempts], @options[:time_limit] / @options[:retry_interval]].min
|
184
|
+
@options[:log_path] ||= RightScale::Platform.filesystem.log_dir
|
185
|
+
|
186
|
+
# Attach to log used by instance agent
|
187
|
+
Log.program_name = 'RightLink'
|
188
|
+
Log.facility = 'user'
|
189
|
+
Log.log_to_file_only(@agent[:log_to_file_only])
|
190
|
+
Log.init(@agent[:identity], @options[:log_path], :print => true)
|
191
|
+
Log.level = :debug if @options[:verbose]
|
192
|
+
@logging_enabled = true
|
193
|
+
|
194
|
+
# Catch any egregious eventmachine failures, especially failure to connect to agent with CommandIO
|
195
|
+
# Exit even if running as daemon since no longer can trust EM and should get restarted automatically
|
196
|
+
EM.error_handler do |e|
|
197
|
+
if e.class == RuntimeError && e.message =~ /no connection/
|
198
|
+
error("Failed to connect to agent for communication check", nil, abort = false)
|
199
|
+
@command_io_failures = (@command_io_failures || 0) + 1
|
200
|
+
reenroll! if @command_io_failures > @options[:max_attempts]
|
201
|
+
else
|
202
|
+
error("Internal checker failure", e, abort = true)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# note that our Windows service monitors rnac and rchk processes
|
207
|
+
# externally and restarts them if they die, so no need to roll our
|
208
|
+
# own cross-monitoring on that platform.
|
209
|
+
use_agent_watcher = !RightScale::Platform.windows?
|
210
|
+
EM.run do
|
211
|
+
check
|
212
|
+
setup_agent_watcher if use_agent_watcher
|
213
|
+
end
|
214
|
+
stop_agent_watcher if use_agent_watcher
|
215
|
+
|
216
|
+
rescue SystemExit => e
|
217
|
+
raise e
|
218
|
+
rescue Exception => e
|
219
|
+
error("Failed to run", e, abort = true)
|
220
|
+
end
|
221
|
+
true
|
222
|
+
end
|
223
|
+
|
224
|
+
# Terminate the checker
|
225
|
+
#
|
226
|
+
# === Return
|
227
|
+
# true:: Always return true
|
228
|
+
def terminate
|
229
|
+
CommandRunner.stop rescue nil if @command_runner
|
230
|
+
EM.stop rescue nil
|
231
|
+
true
|
232
|
+
end
|
233
|
+
|
234
|
+
# Create options hash from command line arguments
|
235
|
+
#
|
236
|
+
# === Return
|
237
|
+
# options(Hash):: Command line options
|
238
|
+
def parse_args
|
239
|
+
parser = Trollop::Parser.new do
|
240
|
+
opt :max_attempts, "", :default => DEFAULT_MAX_ATTEMPTS, :long => "--attempts", :short => "-a"
|
241
|
+
opt :retry_interval, "", :default => DEFAULT_RETRY_INTERVAL
|
242
|
+
opt :time_limit, "", :type => :int
|
243
|
+
opt :daemon, "", :long => "--start"
|
244
|
+
opt :stop
|
245
|
+
opt :ping
|
246
|
+
opt :verbose
|
247
|
+
opt :state_path, "", :type => String
|
248
|
+
version ""
|
249
|
+
end
|
250
|
+
|
251
|
+
begin
|
252
|
+
options = parser.parse
|
253
|
+
options.delete(:max_attempts) unless options[:max_attempts] > 0
|
254
|
+
if options[:delete]
|
255
|
+
options.delete(:time_limit) unless options[:time_limit] > 0
|
256
|
+
end
|
257
|
+
options.delete(:retry_interval) unless options[:retry_interval] > 0
|
258
|
+
options
|
259
|
+
rescue Trollop::HelpNeeded
|
260
|
+
puts Usage.scan(__FILE__)
|
261
|
+
exit
|
262
|
+
rescue Trollop::CommandlineError => e
|
263
|
+
error("#{e}\nUse --help for additional information", nil, abort = true)
|
264
|
+
rescue Trollop::VersionNeeded
|
265
|
+
puts version
|
266
|
+
exit
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
protected
|
271
|
+
|
272
|
+
# Perform required checks
|
273
|
+
#
|
274
|
+
# === Return
|
275
|
+
# true:: Always return true
|
276
|
+
def check
|
277
|
+
begin
|
278
|
+
checker_identity = "#{@agent[:identity]}-rchk"
|
279
|
+
pid_file = PidFile.new(checker_identity, @agent[:pid_dir])
|
280
|
+
|
281
|
+
if @options[:stop]
|
282
|
+
# Stop checker
|
283
|
+
pid_data = pid_file.read_pid
|
284
|
+
if pid_data[:pid]
|
285
|
+
info("Stopping checker daemon")
|
286
|
+
if RightScale::Platform.windows?
|
287
|
+
begin
|
288
|
+
client = CommandClient.new(pid_data[:listen_port], pid_data[:cookie])
|
289
|
+
client.send_command({:name => :terminate}, verbose = @options[:verbose], timeout = 30) do |r|
|
290
|
+
info(r)
|
291
|
+
terminate
|
292
|
+
end
|
293
|
+
rescue Exception => e
|
294
|
+
error("Failed stopping checker daemon, confirm it is still running", e, abort = true)
|
295
|
+
end
|
296
|
+
else
|
297
|
+
Process.kill('TERM', pid_data[:pid])
|
298
|
+
terminate
|
299
|
+
end
|
300
|
+
else
|
301
|
+
terminate
|
302
|
+
end
|
303
|
+
elsif @options[:daemon]
|
304
|
+
# Run checker as daemon
|
305
|
+
pid_file.check rescue error("Cannot start checker daemon because already running", nil, abort = true)
|
306
|
+
daemonize(checker_identity, @options) unless RightScale::Platform.windows?
|
307
|
+
pid_file.write
|
308
|
+
at_exit { pid_file.remove }
|
309
|
+
|
310
|
+
listen_port = CommandConstants::BASE_INSTANCE_AGENT_CHECKER_SOCKET_PORT
|
311
|
+
@command_runner = CommandRunner.start(listen_port, checker_identity, AgentCheckerCommands.get(self))
|
312
|
+
|
313
|
+
info("Checker daemon options:")
|
314
|
+
log_options = @options.inject([]) { |t, (k, v)| t << "- #{k}: #{v}" }
|
315
|
+
log_options.each { |l| info(l, to_console = false, no_check = true) }
|
316
|
+
|
317
|
+
info("Starting checker daemon with #{elapsed(@options[:time_limit])} polling " +
|
318
|
+
"and #{elapsed(@options[:time_limit])} last communication limit")
|
319
|
+
|
320
|
+
iteration = 0
|
321
|
+
EM.add_periodic_timer(@options[:time_limit]) do
|
322
|
+
iteration += 1
|
323
|
+
debug("Checker iteration #{iteration}")
|
324
|
+
check_communication(0)
|
325
|
+
end
|
326
|
+
else
|
327
|
+
# Perform one check
|
328
|
+
check_communication(0, @options[:ping])
|
329
|
+
end
|
330
|
+
rescue SystemExit => e
|
331
|
+
raise e
|
332
|
+
rescue Exception => e
|
333
|
+
error("Internal checker failure", e, abort = true)
|
334
|
+
end
|
335
|
+
true
|
336
|
+
end
|
337
|
+
|
338
|
+
# Check communication, repeatedly if necessary
|
339
|
+
#
|
340
|
+
# === Parameters
|
341
|
+
# attempt(Integer):: Number of attempts thus far
|
342
|
+
# must_try(Boolean):: Try communicating regardless of whether required based on time limit
|
343
|
+
#
|
344
|
+
# === Return
|
345
|
+
# true:: Always return true
|
346
|
+
def check_communication(attempt, must_try = false)
|
347
|
+
attempt += 1
|
348
|
+
begin
|
349
|
+
if !must_try && (time = time_since_last_communication) < @options[:time_limit]
|
350
|
+
@retry_timer.cancel if @retry_timer
|
351
|
+
elapsed = elapsed(time)
|
352
|
+
info("Passed communication check with activity as recently as #{elapsed} ago", to_console = !@options[:daemon])
|
353
|
+
terminate unless @options[:daemon]
|
354
|
+
elsif attempt <= @options[:max_attempts]
|
355
|
+
debug("Trying communication" + (attempt > 1 ? ", attempt #{attempt}" : ""))
|
356
|
+
try_communicating(attempt)
|
357
|
+
@retry_timer = EM::Timer.new(@options[:retry_interval]) do
|
358
|
+
error("Communication attempt #{attempt} timed out after #{elapsed(@options[:retry_interval])}")
|
359
|
+
@agent = AgentConfig.agent_options('instance') # Reload in case not using right cookie
|
360
|
+
check_communication(attempt)
|
361
|
+
end
|
362
|
+
else
|
363
|
+
reenroll!
|
364
|
+
end
|
365
|
+
rescue SystemExit => e
|
366
|
+
raise e
|
367
|
+
rescue Exception => e
|
368
|
+
abort = !@options[:daemon] && (attempt > @options[:max_attempts])
|
369
|
+
error("Failed communication check", e, abort)
|
370
|
+
check_communication(attempt)
|
371
|
+
end
|
372
|
+
true
|
373
|
+
end
|
374
|
+
|
375
|
+
# Get elapsed time since last communication
|
376
|
+
#
|
377
|
+
# === Return
|
378
|
+
# (Integer):: Elapsed time
|
379
|
+
def time_since_last_communication
|
380
|
+
state_file = @options[:state_path] || File.join(AgentConfig.agent_state_dir, 'state.js')
|
381
|
+
state = @state_serializer.load(File.read(state_file)) if File.file?(state_file)
|
382
|
+
state.nil? ? (@options[:time_limit] + 1) : (Time.now.to_i - state["last_communication"])
|
383
|
+
end
|
384
|
+
|
385
|
+
# Ask instance agent to try to communicate
|
386
|
+
#
|
387
|
+
# === Parameters
|
388
|
+
# attempt(Integer):: Number of attempts thus far
|
389
|
+
#
|
390
|
+
# === Return
|
391
|
+
# true:: Always return true
|
392
|
+
def try_communicating(attempt)
|
393
|
+
begin
|
394
|
+
listen_port = @agent[:listen_port]
|
395
|
+
client = CommandClient.new(listen_port, @agent[:cookie])
|
396
|
+
client.send_command({:name => "check_connectivity"}, @options[:verbose], COMMAND_IO_TIMEOUT) do |r|
|
397
|
+
@command_io_failures = 0
|
398
|
+
res = OperationResult.from_results(@command_serializer.load(r)) rescue nil
|
399
|
+
if res && res.success?
|
400
|
+
info("Successful agent communication" + (attempt > 1 ? " on attempt #{attempt}" : ""))
|
401
|
+
@retry_timer.cancel if @retry_timer
|
402
|
+
check_communication(attempt)
|
403
|
+
else
|
404
|
+
error = (res && result.content) || "<unknown error>"
|
405
|
+
error("Failed agent communication attempt", error, abort = false)
|
406
|
+
# Let existing timer control next attempt
|
407
|
+
end
|
408
|
+
end
|
409
|
+
rescue Exception => e
|
410
|
+
error("Failed to access agent for communication check", e, abort = false)
|
411
|
+
end
|
412
|
+
true
|
413
|
+
end
|
414
|
+
|
415
|
+
# Trigger re-enroll
|
416
|
+
# This will normally cause the checker to exit
|
417
|
+
#
|
418
|
+
# === Return
|
419
|
+
# true:: Always return true
|
420
|
+
def reenroll!
|
421
|
+
unless @reenrolling
|
422
|
+
@reenrolling = true
|
423
|
+
begin
|
424
|
+
info("Triggering re-enroll after unsuccessful communication check", to_console = true)
|
425
|
+
cmd = "rs_reenroll"
|
426
|
+
cmd += " -v" if @options[:verbose]
|
427
|
+
cmd += '&' unless RightScale::Platform.windows?
|
428
|
+
# Windows relies on the command protocol to terminate properly.
|
429
|
+
# If rchk terminates itself, then rchk --stop will hang trying
|
430
|
+
# to connect to this rchk.
|
431
|
+
terminate unless RightScale::Platform.windows?
|
432
|
+
system(cmd)
|
433
|
+
# Wait around until rs_reenroll has a chance to stop the checker
|
434
|
+
# otherwise we may restart it
|
435
|
+
sleep(5)
|
436
|
+
rescue Exception => e
|
437
|
+
error("Failed re-enroll after unsuccessful communication check", e, abort = true)
|
438
|
+
end
|
439
|
+
@reenrolling = false
|
440
|
+
end
|
441
|
+
true
|
442
|
+
end
|
443
|
+
|
444
|
+
# Setup signal traps
|
445
|
+
#
|
446
|
+
# === Return
|
447
|
+
# true:: Always return true
|
448
|
+
def setup_traps
|
449
|
+
['INT', 'TERM'].each do |sig|
|
450
|
+
trap(sig) do
|
451
|
+
EM.next_tick do
|
452
|
+
terminate
|
453
|
+
EM.stop
|
454
|
+
end
|
455
|
+
end
|
456
|
+
end
|
457
|
+
true
|
458
|
+
end
|
459
|
+
|
460
|
+
# Log debug information
|
461
|
+
#
|
462
|
+
# === Parameters
|
463
|
+
# info(String):: Information to be logged
|
464
|
+
#
|
465
|
+
# === Return
|
466
|
+
# true:: Always return true
|
467
|
+
def debug(info)
|
468
|
+
info(info) if @options[:verbose]
|
469
|
+
end
|
470
|
+
|
471
|
+
# Log information
|
472
|
+
#
|
473
|
+
# === Parameters
|
474
|
+
# info(String):: Information to be logged
|
475
|
+
# to_console(Boolean):: Whether to also display to console even if :verbose is false
|
476
|
+
# no_check(Boolean):: Whether to omit '[check]' prefix in logged info
|
477
|
+
#
|
478
|
+
# === Return
|
479
|
+
# true:: Always return true
|
480
|
+
def info(info, to_console = false, no_check = false)
|
481
|
+
Log.info("#{no_check ? '' : '[check] '}#{info}")
|
482
|
+
puts(info) if @options[:verbose] || to_console
|
483
|
+
end
|
484
|
+
|
485
|
+
# Handle error by logging message and optionally aborting execution
|
486
|
+
#
|
487
|
+
# === Parameters
|
488
|
+
# description(String):: Description of context where error occurred
|
489
|
+
# error(Exception|String):: Exception or error message
|
490
|
+
# abort(Boolean):: Whether to abort execution
|
491
|
+
#
|
492
|
+
# === Return
|
493
|
+
# true:: If do not abort
|
494
|
+
def error(description, error = nil, abort = false)
|
495
|
+
if @logging_enabled
|
496
|
+
msg = "[check] #{description}"
|
497
|
+
msg += ", aborting" if abort
|
498
|
+
msg = Log.format(msg, error, :trace) if error
|
499
|
+
Log.error(msg)
|
500
|
+
end
|
501
|
+
|
502
|
+
msg = description
|
503
|
+
msg += ": #{error}" if error
|
504
|
+
puts "** #{msg}"
|
505
|
+
|
506
|
+
if abort
|
507
|
+
terminate
|
508
|
+
exit(1)
|
509
|
+
end
|
510
|
+
true
|
511
|
+
end
|
512
|
+
|
513
|
+
# Convert elapsed time in seconds to displayable format
|
514
|
+
#
|
515
|
+
# === Parameters
|
516
|
+
# time(Integer|Float):: Elapsed time
|
517
|
+
#
|
518
|
+
# === Return
|
519
|
+
# (String):: Display string
|
520
|
+
def elapsed(time)
|
521
|
+
time = time.to_i
|
522
|
+
if time <= MINUTE
|
523
|
+
"#{time} sec"
|
524
|
+
elsif time <= HOUR
|
525
|
+
minutes = time / MINUTE
|
526
|
+
seconds = time - (minutes * MINUTE)
|
527
|
+
"#{minutes} min #{seconds} sec"
|
528
|
+
elsif time <= DAY
|
529
|
+
hours = time / HOUR
|
530
|
+
minutes = (time - (hours * HOUR)) / MINUTE
|
531
|
+
"#{hours} hr #{minutes} min"
|
532
|
+
else
|
533
|
+
days = time / DAY
|
534
|
+
hours = (time - (days * DAY)) / HOUR
|
535
|
+
minutes = (time - (days * DAY) - (hours * HOUR)) / MINUTE
|
536
|
+
"#{days} day#{days == 1 ? '' : 's'} #{hours} hr #{minutes} min"
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
# Version information
|
541
|
+
#
|
542
|
+
# === Return
|
543
|
+
# ver(String):: Version information
|
544
|
+
def version
|
545
|
+
ver = "rchk #{VERSION.join('.')} - RightScale Agent Checker (c) 2010 RightScale"
|
546
|
+
end
|
547
|
+
|
548
|
+
end # AgentChecker
|
549
|
+
|
550
|
+
end # RightScale
|
551
|
+
|
552
|
+
# Copyright (c) 2010-2011 RightScale Inc
|
553
|
+
#
|
554
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
555
|
+
# a copy of this software and associated documentation files (the
|
556
|
+
# "Software"), to deal in the Software without restriction, including
|
557
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
558
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
559
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
560
|
+
# the following conditions:
|
561
|
+
#
|
562
|
+
# The above copyright notice and this permission notice shall be
|
563
|
+
# included in all copies or substantial portions of the Software.
|
564
|
+
#
|
565
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
566
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
567
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
568
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
569
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
570
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
571
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|