right_link 5.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/actors/agent_manager.rb +88 -0
- data/actors/instance_scheduler.rb +321 -0
- data/actors/instance_services.rb +64 -0
- data/actors/instance_setup.rb +567 -0
- data/bin/cloud +25 -0
- data/bin/cook_runner +44 -0
- data/bin/deploy +120 -0
- data/bin/enroll +385 -0
- data/bin/rad +32 -0
- data/bin/rchk +29 -0
- data/bin/rnac +39 -0
- data/bin/rs_connect +33 -0
- data/bin/rs_log_level +31 -0
- data/bin/rs_ohai +28 -0
- data/bin/rs_reenroll +31 -0
- data/bin/rs_run_recipe +34 -0
- data/bin/rs_run_right_script +34 -0
- data/bin/rs_shutdown +33 -0
- data/bin/rs_tag +33 -0
- data/bin/rs_thunk +33 -0
- data/bin/rstat +31 -0
- data/bin/system +16 -0
- data/ext/Rakefile +18 -0
- data/init/config.yml +5 -0
- data/init/init.rb +79 -0
- data/lib/chef/ohai_setup.rb +51 -0
- data/lib/chef/plugins/cloud.rb +91 -0
- data/lib/chef/plugins/cloudstack.rb +23 -0
- data/lib/chef/plugins/ec2.rb +23 -0
- data/lib/chef/plugins/linux/block_device2.rb +24 -0
- data/lib/chef/plugins/rackspace.rb +23 -0
- data/lib/chef/plugins/rightscale.rb +125 -0
- data/lib/chef/plugins/windows/network.rb +114 -0
- data/lib/chef/plugins.rb +74 -0
- data/lib/chef/providers/dns_dnsmadeeasy_provider.rb +81 -0
- data/lib/chef/providers/dns_resource.rb +100 -0
- data/lib/chef/providers/executable_schedule_provider.rb +70 -0
- data/lib/chef/providers/executable_schedule_resource.rb +144 -0
- data/lib/chef/providers/remote_recipe_provider.rb +86 -0
- data/lib/chef/providers/remote_recipe_resource.rb +101 -0
- data/lib/chef/providers/right_link_tag_provider.rb +73 -0
- data/lib/chef/providers/right_link_tag_resource.rb +59 -0
- data/lib/chef/providers/right_script_provider.rb +190 -0
- data/lib/chef/providers/right_script_resource.rb +113 -0
- data/lib/chef/providers/rs_shutdown_provider.rb +75 -0
- data/lib/chef/providers/rs_shutdown_resource.rb +55 -0
- data/lib/chef/providers/server_collection_provider.rb +66 -0
- data/lib/chef/providers/server_collection_resource.rb +93 -0
- data/lib/chef/providers/windows/powershell_provider.rb +151 -0
- data/lib/chef/providers/windows/powershell_resource.rb +111 -0
- data/lib/chef/providers/windows/unsupported_provider.rb +51 -0
- data/lib/chef/right_providers.rb +55 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.csproj +104 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.dll-Help.xml +141 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Exceptions.cs +182 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeCommand.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeResponse.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceCommand.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceResponse.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceCommand.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceResponse.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionCommand.cs +178 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionRequest.cs +67 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionResponse.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueCommandBase.cs +142 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueRequestBase.cs +64 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueResponseBase.cs +69 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/JsonTransport.cs +110 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeClient.cs +158 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeServer.cs +142 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolConstants.cs +55 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolUtilities.cs +77 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ReadMe.txt +53 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeCommand.cs +59 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeResponse.cs +58 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceCommand.cs +59 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceResponse.cs +40 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceCommand.cs +59 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceRequest.cs +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceResponse.cs +40 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueCommandBase.cs +293 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueRequestBase.cs +75 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueResponseBase.cs +45 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Transport.cs +91 -0
- data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet.sln +35 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Program.cs +374 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/TestChefNodeCmdlet.csproj +65 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Program.cs +136 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Properties/AssemblyInfo.cs +36 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/ReadMe.txt +46 -0
- data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/TestNextActionCmdlet.csproj +68 -0
- data/lib/chef/windows/bin/Newtonsoft.Json.dll +0 -0
- data/lib/chef/windows/chef_node_server.rb +463 -0
- data/lib/chef/windows/dynamic_powershell_provider.rb +296 -0
- data/lib/chef/windows/pipe_server.rb +283 -0
- data/lib/chef/windows/powershell_host.rb +285 -0
- data/lib/chef/windows/powershell_pipe_server.rb +136 -0
- data/lib/chef/windows/powershell_provider_base.rb +92 -0
- data/lib/chef/windows/scripts/run_loop.ps1 +105 -0
- data/lib/clouds/cloud.rb +557 -0
- data/lib/clouds/cloud_factory.rb +250 -0
- data/lib/clouds/cloud_utilities.rb +244 -0
- data/lib/clouds/clouds/azure.rb +106 -0
- data/lib/clouds/clouds/cloudstack.rb +114 -0
- data/lib/clouds/clouds/ec2.rb +113 -0
- data/lib/clouds/clouds/eucalyptus.rb +46 -0
- data/lib/clouds/clouds/google.rb +102 -0
- data/lib/clouds/clouds/none.rb +76 -0
- data/lib/clouds/clouds/openstack.rb +30 -0
- data/lib/clouds/clouds/rackspace-ng.rb +54 -0
- data/lib/clouds/clouds/rackspace.rb +78 -0
- data/lib/clouds/clouds/softlayer.rb +91 -0
- data/lib/clouds/metadata_formatter.rb +108 -0
- data/lib/clouds/metadata_provider.rb +128 -0
- data/lib/clouds/metadata_source.rb +87 -0
- data/lib/clouds/metadata_sources/certificate_metadata_source.rb +207 -0
- data/lib/clouds/metadata_sources/config_drive_metadata_source.rb +129 -0
- data/lib/clouds/metadata_sources/file_metadata_source.rb +74 -0
- data/lib/clouds/metadata_sources/http_metadata_source.rb +277 -0
- data/lib/clouds/metadata_sources/selective_metadata_source.rb +122 -0
- data/lib/clouds/metadata_tree_climber.rb +144 -0
- data/lib/clouds/metadata_writer.rb +155 -0
- data/lib/clouds/metadata_writers/dictionary_metadata_writer.rb +72 -0
- data/lib/clouds/metadata_writers/ruby_metadata_writer.rb +76 -0
- data/lib/clouds/metadata_writers/shell_metadata_writer.rb +121 -0
- data/lib/clouds/register_clouds.rb +34 -0
- data/lib/clouds.rb +32 -0
- data/lib/gem_dependencies.rb +83 -0
- data/lib/git_hooks/commit-msg.rb +7 -0
- data/lib/instance/agent_config.rb +168 -0
- data/lib/instance/agent_watcher.rb +233 -0
- data/lib/instance/audit_cook_stub.rb +104 -0
- data/lib/instance/audit_proxy.rb +247 -0
- data/lib/instance/bundle_queue.rb +104 -0
- data/lib/instance/cook/agent_connection.rb +109 -0
- data/lib/instance/cook/audit_logger.rb +165 -0
- data/lib/instance/cook/audit_stub.rb +142 -0
- data/lib/instance/cook/ca-bundle.crt +2794 -0
- data/lib/instance/cook/chef_state.rb +211 -0
- data/lib/instance/cook/cook.rb +306 -0
- data/lib/instance/cook/cook_state.rb +298 -0
- data/lib/instance/cook/cookbook_path_mapping.rb +66 -0
- data/lib/instance/cook/cookbook_repo_retriever.rb +190 -0
- data/lib/instance/cook/executable_sequence.rb +765 -0
- data/lib/instance/cook/external_parameter_gatherer.rb +190 -0
- data/lib/instance/cook/repose_downloader.rb +349 -0
- data/lib/instance/cook/shutdown_request_proxy.rb +121 -0
- data/lib/instance/cook.rb +41 -0
- data/lib/instance/downloader.rb +208 -0
- data/lib/instance/duplicable.rb +67 -0
- data/lib/instance/exceptions.rb +49 -0
- data/lib/instance/executable_sequence_proxy.rb +278 -0
- data/lib/instance/instance_commands.rb +577 -0
- data/lib/instance/instance_state.rb +633 -0
- data/lib/instance/json_utilities.rb +102 -0
- data/lib/instance/login_manager.rb +533 -0
- data/lib/instance/login_user_manager.rb +522 -0
- data/lib/instance/message_encoder.rb +118 -0
- data/lib/instance/multi_thread_bundle_queue.rb +232 -0
- data/lib/instance/operation_context.rb +60 -0
- data/lib/instance/options_bag.rb +65 -0
- data/lib/instance/payload_formatter.rb +46 -0
- data/lib/instance/policy.rb +53 -0
- data/lib/instance/policy_audit.rb +100 -0
- data/lib/instance/policy_manager.rb +146 -0
- data/lib/instance/reenroll_manager.rb +104 -0
- data/lib/instance/right_scripts_cookbook.rb +181 -0
- data/lib/instance/shutdown_request.rb +221 -0
- data/lib/instance/single_thread_bundle_queue.rb +189 -0
- data/lib/instance/volume_management.rb +450 -0
- data/lib/instance.rb +50 -0
- data/lib/repo_conf_generators/apt_conf_generators.rb +106 -0
- data/lib/repo_conf_generators/gem_conf_generators.rb +80 -0
- data/lib/repo_conf_generators/rightscale_conf_generators.rb +254 -0
- data/lib/repo_conf_generators/rightscale_key.pub +17 -0
- data/lib/repo_conf_generators/yum_conf_generators.rb +225 -0
- data/lib/repo_conf_generators.rb +30 -0
- data/lib/run_shell.rb +28 -0
- data/scripts/agent_checker.rb +571 -0
- data/scripts/agent_controller.rb +247 -0
- data/scripts/agent_deployer.rb +148 -0
- data/scripts/bundle_runner.rb +336 -0
- data/scripts/cloud_controller.rb +176 -0
- data/scripts/log_level_manager.rb +142 -0
- data/scripts/ohai_runner.rb +33 -0
- data/scripts/reenroller.rb +193 -0
- data/scripts/server_importer.rb +293 -0
- data/scripts/shutdown_client.rb +183 -0
- data/scripts/system_configurator.rb +367 -0
- data/scripts/tagger.rb +381 -0
- data/scripts/thunker.rb +356 -0
- metadata +418 -0
@@ -0,0 +1,571 @@
|
|
1
|
+
# === Synopsis:
|
2
|
+
# RightScale Agent Checker (rchk) - (c) 2010-2011 RightScale Inc
|
3
|
+
#
|
4
|
+
# Checks the agent to see if it is actively communicating with RightNet and if not
|
5
|
+
# triggers it to re-enroll and exits.
|
6
|
+
#
|
7
|
+
# Alternatively runs as a daemon and performs this communication check periodically.
|
8
|
+
#
|
9
|
+
# === Usage
|
10
|
+
# rchk
|
11
|
+
#
|
12
|
+
# Options:
|
13
|
+
# --time-limit, -t SEC Override the default time limit since last communication for
|
14
|
+
# check to pass (also the interval for daemon to run these checks),
|
15
|
+
# ignored if less than 1
|
16
|
+
# --attempts, -a N Override the default number of communication check attempts
|
17
|
+
# before trigger re-enroll, ignored if less than 1
|
18
|
+
# --retry-interval, -r SEC Override the default interval for retrying communication check,
|
19
|
+
# reset to time-limit if less than it, ignored if less than 1
|
20
|
+
# --start Run as a daemon process that checks agent communication after the
|
21
|
+
# configured time limit and repeatedly thereafter on that interval
|
22
|
+
# (the checker does an immediate one-time check if --start is not specified)
|
23
|
+
# --stop Stop the currently running daemon started with --start and then exit)
|
24
|
+
# --ping, -p Try communicating now regardless of whether have communicated within
|
25
|
+
# the configured time limit, does not apply if running as a daemon
|
26
|
+
# --verbose, -v Display debug information
|
27
|
+
# --version Display version information
|
28
|
+
# --help Display help
|
29
|
+
#
|
30
|
+
|
31
|
+
require 'rubygems'
|
32
|
+
require 'eventmachine'
|
33
|
+
require 'trollop'
|
34
|
+
require 'right_agent'
|
35
|
+
require 'right_agent/scripts/usage'
|
36
|
+
require 'right_agent/scripts/common_parser'
|
37
|
+
|
38
|
+
require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_watcher'))
|
39
|
+
require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_config'))
|
40
|
+
|
41
|
+
module RightScale
|
42
|
+
|
43
|
+
# Commands exposed by instance agent checker
|
44
|
+
class AgentCheckerCommands
|
45
|
+
|
46
|
+
# Build hash of commands associating command names with block
|
47
|
+
#
|
48
|
+
# === Parameters
|
49
|
+
# checker(AgentChecker):: Agent checker executing commands
|
50
|
+
#
|
51
|
+
# === Return
|
52
|
+
# (Hash):: Command blocks keyed by command names
|
53
|
+
def self.get(checker)
|
54
|
+
target = new(checker)
|
55
|
+
{:terminate => lambda { |opts, conn| opts[:conn] = conn; target.send("terminate_command", opts) }}
|
56
|
+
end
|
57
|
+
|
58
|
+
# Set agent checker for executing commands
|
59
|
+
#
|
60
|
+
# === Parameter
|
61
|
+
# checker(AgentChecker):: Agent checker
|
62
|
+
def initialize(checker)
|
63
|
+
@checker = checker
|
64
|
+
end
|
65
|
+
|
66
|
+
protected
|
67
|
+
|
68
|
+
# Terminate command
|
69
|
+
#
|
70
|
+
# === Parameters
|
71
|
+
# opts[:conn](EM::Connection):: Connection used to send reply
|
72
|
+
#
|
73
|
+
# === Return
|
74
|
+
# true:: Always return true
|
75
|
+
def terminate_command(opts)
|
76
|
+
CommandIO.instance.reply(opts[:conn], "Checker terminating")
|
77
|
+
# Delay terminate a bit to give reply a chance to be sent
|
78
|
+
EM.next_tick { @checker.terminate }
|
79
|
+
end
|
80
|
+
|
81
|
+
end # AgentCheckerCommands
|
82
|
+
|
83
|
+
class AgentChecker
|
84
|
+
|
85
|
+
include DaemonizeHelper
|
86
|
+
|
87
|
+
VERSION = [0, 1]
|
88
|
+
|
89
|
+
# Time constants
|
90
|
+
MINUTE = 60
|
91
|
+
HOUR = 60 * MINUTE
|
92
|
+
DAY = 24 * HOUR
|
93
|
+
|
94
|
+
# Default minimum seconds since last communication for instance to be considered connected
|
95
|
+
# Only used if --time-limit not specified and :ping_interval option not specified for agent
|
96
|
+
DEFAULT_TIME_LIMIT = 12 * HOUR
|
97
|
+
|
98
|
+
# Multiplier of agent's mapper ping interval to get daemon's last communication time limit
|
99
|
+
PING_INTERVAL_MULTIPLIER = 3
|
100
|
+
|
101
|
+
# Default maximum number of seconds between checks for recent communication if first check fails
|
102
|
+
DEFAULT_RETRY_INTERVAL = 5 * MINUTE
|
103
|
+
|
104
|
+
# Default maximum number of attempts to check communication before trigger re-enroll
|
105
|
+
DEFAULT_MAX_ATTEMPTS = 3
|
106
|
+
|
107
|
+
# Maximum number of seconds to wait for a CommandIO response from the instance agent
|
108
|
+
COMMAND_IO_TIMEOUT = 2 * MINUTE
|
109
|
+
|
110
|
+
# Create and run checker
|
111
|
+
#
|
112
|
+
# === Return
|
113
|
+
# true:: Always return true
|
114
|
+
def self.run
|
115
|
+
c = AgentChecker.new
|
116
|
+
c.start(c.parse_args)
|
117
|
+
rescue Errno::EACCES => e
|
118
|
+
STDERR.puts e.message
|
119
|
+
STDERR.puts "Try elevating privilege (sudo/runas) before invoking this command."
|
120
|
+
exit(2)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Create AgentWatcher to monitor agent processes
|
124
|
+
#
|
125
|
+
# === Return
|
126
|
+
# nil
|
127
|
+
def setup_agent_watcher()
|
128
|
+
@agent_watcher ||= AgentWatcher.new( lambda { |s| self.info(s) }, @agent[:pid_dir] )
|
129
|
+
@agent_watcher.watch_agent(@agent[:identity], '/opt/rightscale/bin/rnac', '--start instance', '--stop instance')
|
130
|
+
@agent_watcher.start_watching()
|
131
|
+
end
|
132
|
+
|
133
|
+
# Stop AgentWatcher from monitoring agent processes
|
134
|
+
#
|
135
|
+
# === Return
|
136
|
+
# nil
|
137
|
+
def stop_agent_watcher()
|
138
|
+
@agent_watcher.stop_agent(@agent[:identity])
|
139
|
+
@agent_watcher.stop_watching()
|
140
|
+
end
|
141
|
+
|
142
|
+
# Run daemon or run one agent communication check
|
143
|
+
# If running as a daemon, store pid in same location as agent except suffix the
|
144
|
+
# agent identity with '-rchk'.
|
145
|
+
#
|
146
|
+
# === Parameters
|
147
|
+
# options(Hash):: Run options
|
148
|
+
# :time_limit(Integer):: Time limit for last communication and interval for daemon checks,
|
149
|
+
# defaults to PING_INTERVAL_MULTIPLIER times agent's ping interval or to DEFAULT_TIME_LIMIT
|
150
|
+
# :max_attempts(Integer):: Maximum number of communication check attempts,
|
151
|
+
# defaults to DEFAULT_MAX_ATTEMPTS
|
152
|
+
# :retry_interval(Integer):: Number of seconds to wait before retrying communication check,
|
153
|
+
# defaults to DEFAULT_RETRY_INTERVAL, reset to :time_limit if exceeds it
|
154
|
+
# :daemon(Boolean):: Whether to run as a daemon rather than do a one-time communication check
|
155
|
+
# :log_path(String):: Log file directory, defaults to one used by agent
|
156
|
+
# :stop(Boolean):: Whether to stop the currently running daemon and then exit
|
157
|
+
# :ping(Boolean):: Try communicating now regardless of whether have communicated within
|
158
|
+
# the configured time limit, ignored if :daemon true
|
159
|
+
# :verbose(Boolean):: Whether to display debug information
|
160
|
+
#
|
161
|
+
# === Return
|
162
|
+
# true:: Always return true
|
163
|
+
def start(options)
|
164
|
+
begin
|
165
|
+
setup_traps
|
166
|
+
@command_serializer = Serializer.new
|
167
|
+
@state_serializer = Serializer.new(:json)
|
168
|
+
|
169
|
+
# Retrieve instance agent configuration options
|
170
|
+
@agent = AgentConfig.agent_options('instance')
|
171
|
+
error("No instance agent configured", nil, abort = true) if @agent.empty?
|
172
|
+
|
173
|
+
# Apply agent's ping interval if needed and adjust options to make them consistent
|
174
|
+
@options = options
|
175
|
+
unless @options[:time_limit]
|
176
|
+
if @agent[:ping_interval]
|
177
|
+
@options[:time_limit] = @agent[:ping_interval] * PING_INTERVAL_MULTIPLIER
|
178
|
+
else
|
179
|
+
@options[:time_limit] = DEFAULT_TIME_LIMIT
|
180
|
+
end
|
181
|
+
end
|
182
|
+
@options[:retry_interval] = [@options[:retry_interval], @options[:time_limit]].min
|
183
|
+
@options[:max_attempts] = [@options[:max_attempts], @options[:time_limit] / @options[:retry_interval]].min
|
184
|
+
@options[:log_path] ||= RightScale::Platform.filesystem.log_dir
|
185
|
+
|
186
|
+
# Attach to log used by instance agent
|
187
|
+
Log.program_name = 'RightLink'
|
188
|
+
Log.facility = 'user'
|
189
|
+
Log.log_to_file_only(@agent[:log_to_file_only])
|
190
|
+
Log.init(@agent[:identity], @options[:log_path], :print => true)
|
191
|
+
Log.level = :debug if @options[:verbose]
|
192
|
+
@logging_enabled = true
|
193
|
+
|
194
|
+
# Catch any egregious eventmachine failures, especially failure to connect to agent with CommandIO
|
195
|
+
# Exit even if running as daemon since no longer can trust EM and should get restarted automatically
|
196
|
+
EM.error_handler do |e|
|
197
|
+
if e.class == RuntimeError && e.message =~ /no connection/
|
198
|
+
error("Failed to connect to agent for communication check", nil, abort = false)
|
199
|
+
@command_io_failures = (@command_io_failures || 0) + 1
|
200
|
+
reenroll! if @command_io_failures > @options[:max_attempts]
|
201
|
+
else
|
202
|
+
error("Internal checker failure", e, abort = true)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
# note that our Windows service monitors rnac and rchk processes
|
207
|
+
# externally and restarts them if they die, so no need to roll our
|
208
|
+
# own cross-monitoring on that platform.
|
209
|
+
use_agent_watcher = !RightScale::Platform.windows?
|
210
|
+
EM.run do
|
211
|
+
check
|
212
|
+
setup_agent_watcher if use_agent_watcher
|
213
|
+
end
|
214
|
+
stop_agent_watcher if use_agent_watcher
|
215
|
+
|
216
|
+
rescue SystemExit => e
|
217
|
+
raise e
|
218
|
+
rescue Exception => e
|
219
|
+
error("Failed to run", e, abort = true)
|
220
|
+
end
|
221
|
+
true
|
222
|
+
end
|
223
|
+
|
224
|
+
# Terminate the checker
|
225
|
+
#
|
226
|
+
# === Return
|
227
|
+
# true:: Always return true
|
228
|
+
def terminate
|
229
|
+
CommandRunner.stop rescue nil if @command_runner
|
230
|
+
EM.stop rescue nil
|
231
|
+
true
|
232
|
+
end
|
233
|
+
|
234
|
+
# Create options hash from command line arguments
|
235
|
+
#
|
236
|
+
# === Return
|
237
|
+
# options(Hash):: Command line options
|
238
|
+
def parse_args
|
239
|
+
parser = Trollop::Parser.new do
|
240
|
+
opt :max_attempts, "", :default => DEFAULT_MAX_ATTEMPTS, :long => "--attempts", :short => "-a"
|
241
|
+
opt :retry_interval, "", :default => DEFAULT_RETRY_INTERVAL
|
242
|
+
opt :time_limit, "", :type => :int
|
243
|
+
opt :daemon, "", :long => "--start"
|
244
|
+
opt :stop
|
245
|
+
opt :ping
|
246
|
+
opt :verbose
|
247
|
+
opt :state_path, "", :type => String
|
248
|
+
version ""
|
249
|
+
end
|
250
|
+
|
251
|
+
begin
|
252
|
+
options = parser.parse
|
253
|
+
options.delete(:max_attempts) unless options[:max_attempts] > 0
|
254
|
+
if options[:delete]
|
255
|
+
options.delete(:time_limit) unless options[:time_limit] > 0
|
256
|
+
end
|
257
|
+
options.delete(:retry_interval) unless options[:retry_interval] > 0
|
258
|
+
options
|
259
|
+
rescue Trollop::HelpNeeded
|
260
|
+
puts Usage.scan(__FILE__)
|
261
|
+
exit
|
262
|
+
rescue Trollop::CommandlineError => e
|
263
|
+
error("#{e}\nUse --help for additional information", nil, abort = true)
|
264
|
+
rescue Trollop::VersionNeeded
|
265
|
+
puts version
|
266
|
+
exit
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
protected
|
271
|
+
|
272
|
+
# Perform required checks
|
273
|
+
#
|
274
|
+
# === Return
|
275
|
+
# true:: Always return true
|
276
|
+
def check
|
277
|
+
begin
|
278
|
+
checker_identity = "#{@agent[:identity]}-rchk"
|
279
|
+
pid_file = PidFile.new(checker_identity, @agent[:pid_dir])
|
280
|
+
|
281
|
+
if @options[:stop]
|
282
|
+
# Stop checker
|
283
|
+
pid_data = pid_file.read_pid
|
284
|
+
if pid_data[:pid]
|
285
|
+
info("Stopping checker daemon")
|
286
|
+
if RightScale::Platform.windows?
|
287
|
+
begin
|
288
|
+
client = CommandClient.new(pid_data[:listen_port], pid_data[:cookie])
|
289
|
+
client.send_command({:name => :terminate}, verbose = @options[:verbose], timeout = 30) do |r|
|
290
|
+
info(r)
|
291
|
+
terminate
|
292
|
+
end
|
293
|
+
rescue Exception => e
|
294
|
+
error("Failed stopping checker daemon, confirm it is still running", e, abort = true)
|
295
|
+
end
|
296
|
+
else
|
297
|
+
Process.kill('TERM', pid_data[:pid])
|
298
|
+
terminate
|
299
|
+
end
|
300
|
+
else
|
301
|
+
terminate
|
302
|
+
end
|
303
|
+
elsif @options[:daemon]
|
304
|
+
# Run checker as daemon
|
305
|
+
pid_file.check rescue error("Cannot start checker daemon because already running", nil, abort = true)
|
306
|
+
daemonize(checker_identity, @options) unless RightScale::Platform.windows?
|
307
|
+
pid_file.write
|
308
|
+
at_exit { pid_file.remove }
|
309
|
+
|
310
|
+
listen_port = CommandConstants::BASE_INSTANCE_AGENT_CHECKER_SOCKET_PORT
|
311
|
+
@command_runner = CommandRunner.start(listen_port, checker_identity, AgentCheckerCommands.get(self))
|
312
|
+
|
313
|
+
info("Checker daemon options:")
|
314
|
+
log_options = @options.inject([]) { |t, (k, v)| t << "- #{k}: #{v}" }
|
315
|
+
log_options.each { |l| info(l, to_console = false, no_check = true) }
|
316
|
+
|
317
|
+
info("Starting checker daemon with #{elapsed(@options[:time_limit])} polling " +
|
318
|
+
"and #{elapsed(@options[:time_limit])} last communication limit")
|
319
|
+
|
320
|
+
iteration = 0
|
321
|
+
EM.add_periodic_timer(@options[:time_limit]) do
|
322
|
+
iteration += 1
|
323
|
+
debug("Checker iteration #{iteration}")
|
324
|
+
check_communication(0)
|
325
|
+
end
|
326
|
+
else
|
327
|
+
# Perform one check
|
328
|
+
check_communication(0, @options[:ping])
|
329
|
+
end
|
330
|
+
rescue SystemExit => e
|
331
|
+
raise e
|
332
|
+
rescue Exception => e
|
333
|
+
error("Internal checker failure", e, abort = true)
|
334
|
+
end
|
335
|
+
true
|
336
|
+
end
|
337
|
+
|
338
|
+
# Check communication, repeatedly if necessary
|
339
|
+
#
|
340
|
+
# === Parameters
|
341
|
+
# attempt(Integer):: Number of attempts thus far
|
342
|
+
# must_try(Boolean):: Try communicating regardless of whether required based on time limit
|
343
|
+
#
|
344
|
+
# === Return
|
345
|
+
# true:: Always return true
|
346
|
+
def check_communication(attempt, must_try = false)
|
347
|
+
attempt += 1
|
348
|
+
begin
|
349
|
+
if !must_try && (time = time_since_last_communication) < @options[:time_limit]
|
350
|
+
@retry_timer.cancel if @retry_timer
|
351
|
+
elapsed = elapsed(time)
|
352
|
+
info("Passed communication check with activity as recently as #{elapsed} ago", to_console = !@options[:daemon])
|
353
|
+
terminate unless @options[:daemon]
|
354
|
+
elsif attempt <= @options[:max_attempts]
|
355
|
+
debug("Trying communication" + (attempt > 1 ? ", attempt #{attempt}" : ""))
|
356
|
+
try_communicating(attempt)
|
357
|
+
@retry_timer = EM::Timer.new(@options[:retry_interval]) do
|
358
|
+
error("Communication attempt #{attempt} timed out after #{elapsed(@options[:retry_interval])}")
|
359
|
+
@agent = AgentConfig.agent_options('instance') # Reload in case not using right cookie
|
360
|
+
check_communication(attempt)
|
361
|
+
end
|
362
|
+
else
|
363
|
+
reenroll!
|
364
|
+
end
|
365
|
+
rescue SystemExit => e
|
366
|
+
raise e
|
367
|
+
rescue Exception => e
|
368
|
+
abort = !@options[:daemon] && (attempt > @options[:max_attempts])
|
369
|
+
error("Failed communication check", e, abort)
|
370
|
+
check_communication(attempt)
|
371
|
+
end
|
372
|
+
true
|
373
|
+
end
|
374
|
+
|
375
|
+
# Get elapsed time since last communication
|
376
|
+
#
|
377
|
+
# === Return
|
378
|
+
# (Integer):: Elapsed time
|
379
|
+
def time_since_last_communication
|
380
|
+
state_file = @options[:state_path] || File.join(AgentConfig.agent_state_dir, 'state.js')
|
381
|
+
state = @state_serializer.load(File.read(state_file)) if File.file?(state_file)
|
382
|
+
state.nil? ? (@options[:time_limit] + 1) : (Time.now.to_i - state["last_communication"])
|
383
|
+
end
|
384
|
+
|
385
|
+
# Ask instance agent to try to communicate
|
386
|
+
#
|
387
|
+
# === Parameters
|
388
|
+
# attempt(Integer):: Number of attempts thus far
|
389
|
+
#
|
390
|
+
# === Return
|
391
|
+
# true:: Always return true
|
392
|
+
def try_communicating(attempt)
|
393
|
+
begin
|
394
|
+
listen_port = @agent[:listen_port]
|
395
|
+
client = CommandClient.new(listen_port, @agent[:cookie])
|
396
|
+
client.send_command({:name => "check_connectivity"}, @options[:verbose], COMMAND_IO_TIMEOUT) do |r|
|
397
|
+
@command_io_failures = 0
|
398
|
+
res = OperationResult.from_results(@command_serializer.load(r)) rescue nil
|
399
|
+
if res && res.success?
|
400
|
+
info("Successful agent communication" + (attempt > 1 ? " on attempt #{attempt}" : ""))
|
401
|
+
@retry_timer.cancel if @retry_timer
|
402
|
+
check_communication(attempt)
|
403
|
+
else
|
404
|
+
error = (res && result.content) || "<unknown error>"
|
405
|
+
error("Failed agent communication attempt", error, abort = false)
|
406
|
+
# Let existing timer control next attempt
|
407
|
+
end
|
408
|
+
end
|
409
|
+
rescue Exception => e
|
410
|
+
error("Failed to access agent for communication check", e, abort = false)
|
411
|
+
end
|
412
|
+
true
|
413
|
+
end
|
414
|
+
|
415
|
+
# Trigger re-enroll
|
416
|
+
# This will normally cause the checker to exit
|
417
|
+
#
|
418
|
+
# === Return
|
419
|
+
# true:: Always return true
|
420
|
+
def reenroll!
|
421
|
+
unless @reenrolling
|
422
|
+
@reenrolling = true
|
423
|
+
begin
|
424
|
+
info("Triggering re-enroll after unsuccessful communication check", to_console = true)
|
425
|
+
cmd = "rs_reenroll"
|
426
|
+
cmd += " -v" if @options[:verbose]
|
427
|
+
cmd += '&' unless RightScale::Platform.windows?
|
428
|
+
# Windows relies on the command protocol to terminate properly.
|
429
|
+
# If rchk terminates itself, then rchk --stop will hang trying
|
430
|
+
# to connect to this rchk.
|
431
|
+
terminate unless RightScale::Platform.windows?
|
432
|
+
system(cmd)
|
433
|
+
# Wait around until rs_reenroll has a chance to stop the checker
|
434
|
+
# otherwise we may restart it
|
435
|
+
sleep(5)
|
436
|
+
rescue Exception => e
|
437
|
+
error("Failed re-enroll after unsuccessful communication check", e, abort = true)
|
438
|
+
end
|
439
|
+
@reenrolling = false
|
440
|
+
end
|
441
|
+
true
|
442
|
+
end
|
443
|
+
|
444
|
+
# Setup signal traps
|
445
|
+
#
|
446
|
+
# === Return
|
447
|
+
# true:: Always return true
|
448
|
+
def setup_traps
|
449
|
+
['INT', 'TERM'].each do |sig|
|
450
|
+
trap(sig) do
|
451
|
+
EM.next_tick do
|
452
|
+
terminate
|
453
|
+
EM.stop
|
454
|
+
end
|
455
|
+
end
|
456
|
+
end
|
457
|
+
true
|
458
|
+
end
|
459
|
+
|
460
|
+
# Log debug information
|
461
|
+
#
|
462
|
+
# === Parameters
|
463
|
+
# info(String):: Information to be logged
|
464
|
+
#
|
465
|
+
# === Return
|
466
|
+
# true:: Always return true
|
467
|
+
def debug(info)
|
468
|
+
info(info) if @options[:verbose]
|
469
|
+
end
|
470
|
+
|
471
|
+
# Log information
|
472
|
+
#
|
473
|
+
# === Parameters
|
474
|
+
# info(String):: Information to be logged
|
475
|
+
# to_console(Boolean):: Whether to also display to console even if :verbose is false
|
476
|
+
# no_check(Boolean):: Whether to omit '[check]' prefix in logged info
|
477
|
+
#
|
478
|
+
# === Return
|
479
|
+
# true:: Always return true
|
480
|
+
def info(info, to_console = false, no_check = false)
|
481
|
+
Log.info("#{no_check ? '' : '[check] '}#{info}")
|
482
|
+
puts(info) if @options[:verbose] || to_console
|
483
|
+
end
|
484
|
+
|
485
|
+
# Handle error by logging message and optionally aborting execution
|
486
|
+
#
|
487
|
+
# === Parameters
|
488
|
+
# description(String):: Description of context where error occurred
|
489
|
+
# error(Exception|String):: Exception or error message
|
490
|
+
# abort(Boolean):: Whether to abort execution
|
491
|
+
#
|
492
|
+
# === Return
|
493
|
+
# true:: If do not abort
|
494
|
+
def error(description, error = nil, abort = false)
|
495
|
+
if @logging_enabled
|
496
|
+
msg = "[check] #{description}"
|
497
|
+
msg += ", aborting" if abort
|
498
|
+
msg = Log.format(msg, error, :trace) if error
|
499
|
+
Log.error(msg)
|
500
|
+
end
|
501
|
+
|
502
|
+
msg = description
|
503
|
+
msg += ": #{error}" if error
|
504
|
+
puts "** #{msg}"
|
505
|
+
|
506
|
+
if abort
|
507
|
+
terminate
|
508
|
+
exit(1)
|
509
|
+
end
|
510
|
+
true
|
511
|
+
end
|
512
|
+
|
513
|
+
# Convert elapsed time in seconds to displayable format
|
514
|
+
#
|
515
|
+
# === Parameters
|
516
|
+
# time(Integer|Float):: Elapsed time
|
517
|
+
#
|
518
|
+
# === Return
|
519
|
+
# (String):: Display string
|
520
|
+
def elapsed(time)
|
521
|
+
time = time.to_i
|
522
|
+
if time <= MINUTE
|
523
|
+
"#{time} sec"
|
524
|
+
elsif time <= HOUR
|
525
|
+
minutes = time / MINUTE
|
526
|
+
seconds = time - (minutes * MINUTE)
|
527
|
+
"#{minutes} min #{seconds} sec"
|
528
|
+
elsif time <= DAY
|
529
|
+
hours = time / HOUR
|
530
|
+
minutes = (time - (hours * HOUR)) / MINUTE
|
531
|
+
"#{hours} hr #{minutes} min"
|
532
|
+
else
|
533
|
+
days = time / DAY
|
534
|
+
hours = (time - (days * DAY)) / HOUR
|
535
|
+
minutes = (time - (days * DAY) - (hours * HOUR)) / MINUTE
|
536
|
+
"#{days} day#{days == 1 ? '' : 's'} #{hours} hr #{minutes} min"
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
# Version information
|
541
|
+
#
|
542
|
+
# === Return
|
543
|
+
# ver(String):: Version information
|
544
|
+
def version
|
545
|
+
ver = "rchk #{VERSION.join('.')} - RightScale Agent Checker (c) 2010 RightScale"
|
546
|
+
end
|
547
|
+
|
548
|
+
end # AgentChecker
|
549
|
+
|
550
|
+
end # RightScale
|
551
|
+
|
552
|
+
# Copyright (c) 2010-2011 RightScale Inc
|
553
|
+
#
|
554
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
555
|
+
# a copy of this software and associated documentation files (the
|
556
|
+
# "Software"), to deal in the Software without restriction, including
|
557
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
558
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
559
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
560
|
+
# the following conditions:
|
561
|
+
#
|
562
|
+
# The above copyright notice and this permission notice shall be
|
563
|
+
# included in all copies or substantial portions of the Software.
|
564
|
+
#
|
565
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
566
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
567
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
568
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
569
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
570
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
571
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|