right_link 5.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (199) hide show
  1. data/actors/agent_manager.rb +88 -0
  2. data/actors/instance_scheduler.rb +321 -0
  3. data/actors/instance_services.rb +64 -0
  4. data/actors/instance_setup.rb +567 -0
  5. data/bin/cloud +25 -0
  6. data/bin/cook_runner +44 -0
  7. data/bin/deploy +120 -0
  8. data/bin/enroll +385 -0
  9. data/bin/rad +32 -0
  10. data/bin/rchk +29 -0
  11. data/bin/rnac +39 -0
  12. data/bin/rs_connect +33 -0
  13. data/bin/rs_log_level +31 -0
  14. data/bin/rs_ohai +28 -0
  15. data/bin/rs_reenroll +31 -0
  16. data/bin/rs_run_recipe +34 -0
  17. data/bin/rs_run_right_script +34 -0
  18. data/bin/rs_shutdown +33 -0
  19. data/bin/rs_tag +33 -0
  20. data/bin/rs_thunk +33 -0
  21. data/bin/rstat +31 -0
  22. data/bin/system +16 -0
  23. data/ext/Rakefile +18 -0
  24. data/init/config.yml +5 -0
  25. data/init/init.rb +79 -0
  26. data/lib/chef/ohai_setup.rb +51 -0
  27. data/lib/chef/plugins/cloud.rb +91 -0
  28. data/lib/chef/plugins/cloudstack.rb +23 -0
  29. data/lib/chef/plugins/ec2.rb +23 -0
  30. data/lib/chef/plugins/linux/block_device2.rb +24 -0
  31. data/lib/chef/plugins/rackspace.rb +23 -0
  32. data/lib/chef/plugins/rightscale.rb +125 -0
  33. data/lib/chef/plugins/windows/network.rb +114 -0
  34. data/lib/chef/plugins.rb +74 -0
  35. data/lib/chef/providers/dns_dnsmadeeasy_provider.rb +81 -0
  36. data/lib/chef/providers/dns_resource.rb +100 -0
  37. data/lib/chef/providers/executable_schedule_provider.rb +70 -0
  38. data/lib/chef/providers/executable_schedule_resource.rb +144 -0
  39. data/lib/chef/providers/remote_recipe_provider.rb +86 -0
  40. data/lib/chef/providers/remote_recipe_resource.rb +101 -0
  41. data/lib/chef/providers/right_link_tag_provider.rb +73 -0
  42. data/lib/chef/providers/right_link_tag_resource.rb +59 -0
  43. data/lib/chef/providers/right_script_provider.rb +190 -0
  44. data/lib/chef/providers/right_script_resource.rb +113 -0
  45. data/lib/chef/providers/rs_shutdown_provider.rb +75 -0
  46. data/lib/chef/providers/rs_shutdown_resource.rb +55 -0
  47. data/lib/chef/providers/server_collection_provider.rb +66 -0
  48. data/lib/chef/providers/server_collection_resource.rb +93 -0
  49. data/lib/chef/providers/windows/powershell_provider.rb +151 -0
  50. data/lib/chef/providers/windows/powershell_resource.rb +111 -0
  51. data/lib/chef/providers/windows/unsupported_provider.rb +51 -0
  52. data/lib/chef/right_providers.rb +55 -0
  53. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.csproj +104 -0
  54. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.dll-Help.xml +141 -0
  55. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Exceptions.cs +182 -0
  56. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeCommand.cs +58 -0
  57. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeRequest.cs +46 -0
  58. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeResponse.cs +45 -0
  59. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceCommand.cs +58 -0
  60. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceRequest.cs +46 -0
  61. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceResponse.cs +45 -0
  62. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceCommand.cs +58 -0
  63. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceRequest.cs +46 -0
  64. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceResponse.cs +45 -0
  65. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionCommand.cs +178 -0
  66. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionRequest.cs +67 -0
  67. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionResponse.cs +58 -0
  68. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueCommandBase.cs +142 -0
  69. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueRequestBase.cs +64 -0
  70. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueResponseBase.cs +69 -0
  71. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/JsonTransport.cs +110 -0
  72. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeClient.cs +158 -0
  73. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeServer.cs +142 -0
  74. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
  75. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolConstants.cs +55 -0
  76. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolUtilities.cs +77 -0
  77. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ReadMe.txt +53 -0
  78. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeCommand.cs +59 -0
  79. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeRequest.cs +46 -0
  80. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeResponse.cs +58 -0
  81. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceCommand.cs +59 -0
  82. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceRequest.cs +46 -0
  83. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceResponse.cs +40 -0
  84. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceCommand.cs +59 -0
  85. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceRequest.cs +46 -0
  86. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceResponse.cs +40 -0
  87. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueCommandBase.cs +293 -0
  88. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueRequestBase.cs +75 -0
  89. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueResponseBase.cs +45 -0
  90. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Transport.cs +91 -0
  91. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet.sln +35 -0
  92. data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Program.cs +374 -0
  93. data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
  94. data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/TestChefNodeCmdlet.csproj +65 -0
  95. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Program.cs +136 -0
  96. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Properties/AssemblyInfo.cs +36 -0
  97. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/ReadMe.txt +46 -0
  98. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/TestNextActionCmdlet.csproj +68 -0
  99. data/lib/chef/windows/bin/Newtonsoft.Json.dll +0 -0
  100. data/lib/chef/windows/chef_node_server.rb +463 -0
  101. data/lib/chef/windows/dynamic_powershell_provider.rb +296 -0
  102. data/lib/chef/windows/pipe_server.rb +283 -0
  103. data/lib/chef/windows/powershell_host.rb +285 -0
  104. data/lib/chef/windows/powershell_pipe_server.rb +136 -0
  105. data/lib/chef/windows/powershell_provider_base.rb +92 -0
  106. data/lib/chef/windows/scripts/run_loop.ps1 +105 -0
  107. data/lib/clouds/cloud.rb +557 -0
  108. data/lib/clouds/cloud_factory.rb +250 -0
  109. data/lib/clouds/cloud_utilities.rb +244 -0
  110. data/lib/clouds/clouds/azure.rb +106 -0
  111. data/lib/clouds/clouds/cloudstack.rb +114 -0
  112. data/lib/clouds/clouds/ec2.rb +113 -0
  113. data/lib/clouds/clouds/eucalyptus.rb +46 -0
  114. data/lib/clouds/clouds/google.rb +102 -0
  115. data/lib/clouds/clouds/none.rb +76 -0
  116. data/lib/clouds/clouds/openstack.rb +30 -0
  117. data/lib/clouds/clouds/rackspace-ng.rb +54 -0
  118. data/lib/clouds/clouds/rackspace.rb +78 -0
  119. data/lib/clouds/clouds/softlayer.rb +91 -0
  120. data/lib/clouds/metadata_formatter.rb +108 -0
  121. data/lib/clouds/metadata_provider.rb +128 -0
  122. data/lib/clouds/metadata_source.rb +87 -0
  123. data/lib/clouds/metadata_sources/certificate_metadata_source.rb +207 -0
  124. data/lib/clouds/metadata_sources/config_drive_metadata_source.rb +129 -0
  125. data/lib/clouds/metadata_sources/file_metadata_source.rb +74 -0
  126. data/lib/clouds/metadata_sources/http_metadata_source.rb +277 -0
  127. data/lib/clouds/metadata_sources/selective_metadata_source.rb +122 -0
  128. data/lib/clouds/metadata_tree_climber.rb +144 -0
  129. data/lib/clouds/metadata_writer.rb +155 -0
  130. data/lib/clouds/metadata_writers/dictionary_metadata_writer.rb +72 -0
  131. data/lib/clouds/metadata_writers/ruby_metadata_writer.rb +76 -0
  132. data/lib/clouds/metadata_writers/shell_metadata_writer.rb +121 -0
  133. data/lib/clouds/register_clouds.rb +34 -0
  134. data/lib/clouds.rb +32 -0
  135. data/lib/gem_dependencies.rb +83 -0
  136. data/lib/git_hooks/commit-msg.rb +7 -0
  137. data/lib/instance/agent_config.rb +168 -0
  138. data/lib/instance/agent_watcher.rb +233 -0
  139. data/lib/instance/audit_cook_stub.rb +104 -0
  140. data/lib/instance/audit_proxy.rb +247 -0
  141. data/lib/instance/bundle_queue.rb +104 -0
  142. data/lib/instance/cook/agent_connection.rb +109 -0
  143. data/lib/instance/cook/audit_logger.rb +165 -0
  144. data/lib/instance/cook/audit_stub.rb +142 -0
  145. data/lib/instance/cook/ca-bundle.crt +2794 -0
  146. data/lib/instance/cook/chef_state.rb +211 -0
  147. data/lib/instance/cook/cook.rb +306 -0
  148. data/lib/instance/cook/cook_state.rb +298 -0
  149. data/lib/instance/cook/cookbook_path_mapping.rb +66 -0
  150. data/lib/instance/cook/cookbook_repo_retriever.rb +190 -0
  151. data/lib/instance/cook/executable_sequence.rb +765 -0
  152. data/lib/instance/cook/external_parameter_gatherer.rb +190 -0
  153. data/lib/instance/cook/repose_downloader.rb +349 -0
  154. data/lib/instance/cook/shutdown_request_proxy.rb +121 -0
  155. data/lib/instance/cook.rb +41 -0
  156. data/lib/instance/downloader.rb +208 -0
  157. data/lib/instance/duplicable.rb +67 -0
  158. data/lib/instance/exceptions.rb +49 -0
  159. data/lib/instance/executable_sequence_proxy.rb +278 -0
  160. data/lib/instance/instance_commands.rb +577 -0
  161. data/lib/instance/instance_state.rb +633 -0
  162. data/lib/instance/json_utilities.rb +102 -0
  163. data/lib/instance/login_manager.rb +533 -0
  164. data/lib/instance/login_user_manager.rb +522 -0
  165. data/lib/instance/message_encoder.rb +118 -0
  166. data/lib/instance/multi_thread_bundle_queue.rb +232 -0
  167. data/lib/instance/operation_context.rb +60 -0
  168. data/lib/instance/options_bag.rb +65 -0
  169. data/lib/instance/payload_formatter.rb +46 -0
  170. data/lib/instance/policy.rb +53 -0
  171. data/lib/instance/policy_audit.rb +100 -0
  172. data/lib/instance/policy_manager.rb +146 -0
  173. data/lib/instance/reenroll_manager.rb +104 -0
  174. data/lib/instance/right_scripts_cookbook.rb +181 -0
  175. data/lib/instance/shutdown_request.rb +221 -0
  176. data/lib/instance/single_thread_bundle_queue.rb +189 -0
  177. data/lib/instance/volume_management.rb +450 -0
  178. data/lib/instance.rb +50 -0
  179. data/lib/repo_conf_generators/apt_conf_generators.rb +106 -0
  180. data/lib/repo_conf_generators/gem_conf_generators.rb +80 -0
  181. data/lib/repo_conf_generators/rightscale_conf_generators.rb +254 -0
  182. data/lib/repo_conf_generators/rightscale_key.pub +17 -0
  183. data/lib/repo_conf_generators/yum_conf_generators.rb +225 -0
  184. data/lib/repo_conf_generators.rb +30 -0
  185. data/lib/run_shell.rb +28 -0
  186. data/scripts/agent_checker.rb +571 -0
  187. data/scripts/agent_controller.rb +247 -0
  188. data/scripts/agent_deployer.rb +148 -0
  189. data/scripts/bundle_runner.rb +336 -0
  190. data/scripts/cloud_controller.rb +176 -0
  191. data/scripts/log_level_manager.rb +142 -0
  192. data/scripts/ohai_runner.rb +33 -0
  193. data/scripts/reenroller.rb +193 -0
  194. data/scripts/server_importer.rb +293 -0
  195. data/scripts/shutdown_client.rb +183 -0
  196. data/scripts/system_configurator.rb +367 -0
  197. data/scripts/tagger.rb +381 -0
  198. data/scripts/thunker.rb +356 -0
  199. metadata +418 -0
@@ -0,0 +1,571 @@
1
+ # === Synopsis:
2
+ # RightScale Agent Checker (rchk) - (c) 2010-2011 RightScale Inc
3
+ #
4
+ # Checks the agent to see if it is actively communicating with RightNet and if not
5
+ # triggers it to re-enroll and exits.
6
+ #
7
+ # Alternatively runs as a daemon and performs this communication check periodically.
8
+ #
9
+ # === Usage
10
+ # rchk
11
+ #
12
+ # Options:
13
+ # --time-limit, -t SEC Override the default time limit since last communication for
14
+ # check to pass (also the interval for daemon to run these checks),
15
+ # ignored if less than 1
16
+ # --attempts, -a N Override the default number of communication check attempts
17
+ # before trigger re-enroll, ignored if less than 1
18
+ # --retry-interval, -r SEC Override the default interval for retrying communication check,
19
+ # reset to time-limit if less than it, ignored if less than 1
20
+ # --start Run as a daemon process that checks agent communication after the
21
+ # configured time limit and repeatedly thereafter on that interval
22
+ # (the checker does an immediate one-time check if --start is not specified)
23
+ # --stop Stop the currently running daemon started with --start and then exit)
24
+ # --ping, -p Try communicating now regardless of whether have communicated within
25
+ # the configured time limit, does not apply if running as a daemon
26
+ # --verbose, -v Display debug information
27
+ # --version Display version information
28
+ # --help Display help
29
+ #
30
+
31
+ require 'rubygems'
32
+ require 'eventmachine'
33
+ require 'trollop'
34
+ require 'right_agent'
35
+ require 'right_agent/scripts/usage'
36
+ require 'right_agent/scripts/common_parser'
37
+
38
+ require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_watcher'))
39
+ require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_config'))
40
+
41
+ module RightScale
42
+
43
+ # Commands exposed by instance agent checker
44
+ class AgentCheckerCommands
45
+
46
+ # Build hash of commands associating command names with block
47
+ #
48
+ # === Parameters
49
+ # checker(AgentChecker):: Agent checker executing commands
50
+ #
51
+ # === Return
52
+ # (Hash):: Command blocks keyed by command names
53
+ def self.get(checker)
54
+ target = new(checker)
55
+ {:terminate => lambda { |opts, conn| opts[:conn] = conn; target.send("terminate_command", opts) }}
56
+ end
57
+
58
+ # Set agent checker for executing commands
59
+ #
60
+ # === Parameter
61
+ # checker(AgentChecker):: Agent checker
62
+ def initialize(checker)
63
+ @checker = checker
64
+ end
65
+
66
+ protected
67
+
68
+ # Terminate command
69
+ #
70
+ # === Parameters
71
+ # opts[:conn](EM::Connection):: Connection used to send reply
72
+ #
73
+ # === Return
74
+ # true:: Always return true
75
+ def terminate_command(opts)
76
+ CommandIO.instance.reply(opts[:conn], "Checker terminating")
77
+ # Delay terminate a bit to give reply a chance to be sent
78
+ EM.next_tick { @checker.terminate }
79
+ end
80
+
81
+ end # AgentCheckerCommands
82
+
83
+ class AgentChecker
84
+
85
+ include DaemonizeHelper
86
+
87
+ VERSION = [0, 1]
88
+
89
+ # Time constants
90
+ MINUTE = 60
91
+ HOUR = 60 * MINUTE
92
+ DAY = 24 * HOUR
93
+
94
+ # Default minimum seconds since last communication for instance to be considered connected
95
+ # Only used if --time-limit not specified and :ping_interval option not specified for agent
96
+ DEFAULT_TIME_LIMIT = 12 * HOUR
97
+
98
+ # Multiplier of agent's mapper ping interval to get daemon's last communication time limit
99
+ PING_INTERVAL_MULTIPLIER = 3
100
+
101
+ # Default maximum number of seconds between checks for recent communication if first check fails
102
+ DEFAULT_RETRY_INTERVAL = 5 * MINUTE
103
+
104
+ # Default maximum number of attempts to check communication before trigger re-enroll
105
+ DEFAULT_MAX_ATTEMPTS = 3
106
+
107
+ # Maximum number of seconds to wait for a CommandIO response from the instance agent
108
+ COMMAND_IO_TIMEOUT = 2 * MINUTE
109
+
110
+ # Create and run checker
111
+ #
112
+ # === Return
113
+ # true:: Always return true
114
+ def self.run
115
+ c = AgentChecker.new
116
+ c.start(c.parse_args)
117
+ rescue Errno::EACCES => e
118
+ STDERR.puts e.message
119
+ STDERR.puts "Try elevating privilege (sudo/runas) before invoking this command."
120
+ exit(2)
121
+ end
122
+
123
+ # Create AgentWatcher to monitor agent processes
124
+ #
125
+ # === Return
126
+ # nil
127
+ def setup_agent_watcher()
128
+ @agent_watcher ||= AgentWatcher.new( lambda { |s| self.info(s) }, @agent[:pid_dir] )
129
+ @agent_watcher.watch_agent(@agent[:identity], '/opt/rightscale/bin/rnac', '--start instance', '--stop instance')
130
+ @agent_watcher.start_watching()
131
+ end
132
+
133
+ # Stop AgentWatcher from monitoring agent processes
134
+ #
135
+ # === Return
136
+ # nil
137
+ def stop_agent_watcher()
138
+ @agent_watcher.stop_agent(@agent[:identity])
139
+ @agent_watcher.stop_watching()
140
+ end
141
+
142
+ # Run daemon or run one agent communication check
143
+ # If running as a daemon, store pid in same location as agent except suffix the
144
+ # agent identity with '-rchk'.
145
+ #
146
+ # === Parameters
147
+ # options(Hash):: Run options
148
+ # :time_limit(Integer):: Time limit for last communication and interval for daemon checks,
149
+ # defaults to PING_INTERVAL_MULTIPLIER times agent's ping interval or to DEFAULT_TIME_LIMIT
150
+ # :max_attempts(Integer):: Maximum number of communication check attempts,
151
+ # defaults to DEFAULT_MAX_ATTEMPTS
152
+ # :retry_interval(Integer):: Number of seconds to wait before retrying communication check,
153
+ # defaults to DEFAULT_RETRY_INTERVAL, reset to :time_limit if exceeds it
154
+ # :daemon(Boolean):: Whether to run as a daemon rather than do a one-time communication check
155
+ # :log_path(String):: Log file directory, defaults to one used by agent
156
+ # :stop(Boolean):: Whether to stop the currently running daemon and then exit
157
+ # :ping(Boolean):: Try communicating now regardless of whether have communicated within
158
+ # the configured time limit, ignored if :daemon true
159
+ # :verbose(Boolean):: Whether to display debug information
160
+ #
161
+ # === Return
162
+ # true:: Always return true
163
+ def start(options)
164
+ begin
165
+ setup_traps
166
+ @command_serializer = Serializer.new
167
+ @state_serializer = Serializer.new(:json)
168
+
169
+ # Retrieve instance agent configuration options
170
+ @agent = AgentConfig.agent_options('instance')
171
+ error("No instance agent configured", nil, abort = true) if @agent.empty?
172
+
173
+ # Apply agent's ping interval if needed and adjust options to make them consistent
174
+ @options = options
175
+ unless @options[:time_limit]
176
+ if @agent[:ping_interval]
177
+ @options[:time_limit] = @agent[:ping_interval] * PING_INTERVAL_MULTIPLIER
178
+ else
179
+ @options[:time_limit] = DEFAULT_TIME_LIMIT
180
+ end
181
+ end
182
+ @options[:retry_interval] = [@options[:retry_interval], @options[:time_limit]].min
183
+ @options[:max_attempts] = [@options[:max_attempts], @options[:time_limit] / @options[:retry_interval]].min
184
+ @options[:log_path] ||= RightScale::Platform.filesystem.log_dir
185
+
186
+ # Attach to log used by instance agent
187
+ Log.program_name = 'RightLink'
188
+ Log.facility = 'user'
189
+ Log.log_to_file_only(@agent[:log_to_file_only])
190
+ Log.init(@agent[:identity], @options[:log_path], :print => true)
191
+ Log.level = :debug if @options[:verbose]
192
+ @logging_enabled = true
193
+
194
+ # Catch any egregious eventmachine failures, especially failure to connect to agent with CommandIO
195
+ # Exit even if running as daemon since no longer can trust EM and should get restarted automatically
196
+ EM.error_handler do |e|
197
+ if e.class == RuntimeError && e.message =~ /no connection/
198
+ error("Failed to connect to agent for communication check", nil, abort = false)
199
+ @command_io_failures = (@command_io_failures || 0) + 1
200
+ reenroll! if @command_io_failures > @options[:max_attempts]
201
+ else
202
+ error("Internal checker failure", e, abort = true)
203
+ end
204
+ end
205
+
206
+ # note that our Windows service monitors rnac and rchk processes
207
+ # externally and restarts them if they die, so no need to roll our
208
+ # own cross-monitoring on that platform.
209
+ use_agent_watcher = !RightScale::Platform.windows?
210
+ EM.run do
211
+ check
212
+ setup_agent_watcher if use_agent_watcher
213
+ end
214
+ stop_agent_watcher if use_agent_watcher
215
+
216
+ rescue SystemExit => e
217
+ raise e
218
+ rescue Exception => e
219
+ error("Failed to run", e, abort = true)
220
+ end
221
+ true
222
+ end
223
+
224
+ # Terminate the checker
225
+ #
226
+ # === Return
227
+ # true:: Always return true
228
+ def terminate
229
+ CommandRunner.stop rescue nil if @command_runner
230
+ EM.stop rescue nil
231
+ true
232
+ end
233
+
234
+ # Create options hash from command line arguments
235
+ #
236
+ # === Return
237
+ # options(Hash):: Command line options
238
+ def parse_args
239
+ parser = Trollop::Parser.new do
240
+ opt :max_attempts, "", :default => DEFAULT_MAX_ATTEMPTS, :long => "--attempts", :short => "-a"
241
+ opt :retry_interval, "", :default => DEFAULT_RETRY_INTERVAL
242
+ opt :time_limit, "", :type => :int
243
+ opt :daemon, "", :long => "--start"
244
+ opt :stop
245
+ opt :ping
246
+ opt :verbose
247
+ opt :state_path, "", :type => String
248
+ version ""
249
+ end
250
+
251
+ begin
252
+ options = parser.parse
253
+ options.delete(:max_attempts) unless options[:max_attempts] > 0
254
+ if options[:delete]
255
+ options.delete(:time_limit) unless options[:time_limit] > 0
256
+ end
257
+ options.delete(:retry_interval) unless options[:retry_interval] > 0
258
+ options
259
+ rescue Trollop::HelpNeeded
260
+ puts Usage.scan(__FILE__)
261
+ exit
262
+ rescue Trollop::CommandlineError => e
263
+ error("#{e}\nUse --help for additional information", nil, abort = true)
264
+ rescue Trollop::VersionNeeded
265
+ puts version
266
+ exit
267
+ end
268
+ end
269
+
270
+ protected
271
+
272
+ # Perform required checks
273
+ #
274
+ # === Return
275
+ # true:: Always return true
276
+ def check
277
+ begin
278
+ checker_identity = "#{@agent[:identity]}-rchk"
279
+ pid_file = PidFile.new(checker_identity, @agent[:pid_dir])
280
+
281
+ if @options[:stop]
282
+ # Stop checker
283
+ pid_data = pid_file.read_pid
284
+ if pid_data[:pid]
285
+ info("Stopping checker daemon")
286
+ if RightScale::Platform.windows?
287
+ begin
288
+ client = CommandClient.new(pid_data[:listen_port], pid_data[:cookie])
289
+ client.send_command({:name => :terminate}, verbose = @options[:verbose], timeout = 30) do |r|
290
+ info(r)
291
+ terminate
292
+ end
293
+ rescue Exception => e
294
+ error("Failed stopping checker daemon, confirm it is still running", e, abort = true)
295
+ end
296
+ else
297
+ Process.kill('TERM', pid_data[:pid])
298
+ terminate
299
+ end
300
+ else
301
+ terminate
302
+ end
303
+ elsif @options[:daemon]
304
+ # Run checker as daemon
305
+ pid_file.check rescue error("Cannot start checker daemon because already running", nil, abort = true)
306
+ daemonize(checker_identity, @options) unless RightScale::Platform.windows?
307
+ pid_file.write
308
+ at_exit { pid_file.remove }
309
+
310
+ listen_port = CommandConstants::BASE_INSTANCE_AGENT_CHECKER_SOCKET_PORT
311
+ @command_runner = CommandRunner.start(listen_port, checker_identity, AgentCheckerCommands.get(self))
312
+
313
+ info("Checker daemon options:")
314
+ log_options = @options.inject([]) { |t, (k, v)| t << "- #{k}: #{v}" }
315
+ log_options.each { |l| info(l, to_console = false, no_check = true) }
316
+
317
+ info("Starting checker daemon with #{elapsed(@options[:time_limit])} polling " +
318
+ "and #{elapsed(@options[:time_limit])} last communication limit")
319
+
320
+ iteration = 0
321
+ EM.add_periodic_timer(@options[:time_limit]) do
322
+ iteration += 1
323
+ debug("Checker iteration #{iteration}")
324
+ check_communication(0)
325
+ end
326
+ else
327
+ # Perform one check
328
+ check_communication(0, @options[:ping])
329
+ end
330
+ rescue SystemExit => e
331
+ raise e
332
+ rescue Exception => e
333
+ error("Internal checker failure", e, abort = true)
334
+ end
335
+ true
336
+ end
337
+
338
+ # Check communication, repeatedly if necessary
339
+ #
340
+ # === Parameters
341
+ # attempt(Integer):: Number of attempts thus far
342
+ # must_try(Boolean):: Try communicating regardless of whether required based on time limit
343
+ #
344
+ # === Return
345
+ # true:: Always return true
346
+ def check_communication(attempt, must_try = false)
347
+ attempt += 1
348
+ begin
349
+ if !must_try && (time = time_since_last_communication) < @options[:time_limit]
350
+ @retry_timer.cancel if @retry_timer
351
+ elapsed = elapsed(time)
352
+ info("Passed communication check with activity as recently as #{elapsed} ago", to_console = !@options[:daemon])
353
+ terminate unless @options[:daemon]
354
+ elsif attempt <= @options[:max_attempts]
355
+ debug("Trying communication" + (attempt > 1 ? ", attempt #{attempt}" : ""))
356
+ try_communicating(attempt)
357
+ @retry_timer = EM::Timer.new(@options[:retry_interval]) do
358
+ error("Communication attempt #{attempt} timed out after #{elapsed(@options[:retry_interval])}")
359
+ @agent = AgentConfig.agent_options('instance') # Reload in case not using right cookie
360
+ check_communication(attempt)
361
+ end
362
+ else
363
+ reenroll!
364
+ end
365
+ rescue SystemExit => e
366
+ raise e
367
+ rescue Exception => e
368
+ abort = !@options[:daemon] && (attempt > @options[:max_attempts])
369
+ error("Failed communication check", e, abort)
370
+ check_communication(attempt)
371
+ end
372
+ true
373
+ end
374
+
375
+ # Get elapsed time since last communication
376
+ #
377
+ # === Return
378
+ # (Integer):: Elapsed time
379
+ def time_since_last_communication
380
+ state_file = @options[:state_path] || File.join(AgentConfig.agent_state_dir, 'state.js')
381
+ state = @state_serializer.load(File.read(state_file)) if File.file?(state_file)
382
+ state.nil? ? (@options[:time_limit] + 1) : (Time.now.to_i - state["last_communication"])
383
+ end
384
+
385
+ # Ask instance agent to try to communicate
386
+ #
387
+ # === Parameters
388
+ # attempt(Integer):: Number of attempts thus far
389
+ #
390
+ # === Return
391
+ # true:: Always return true
392
+ def try_communicating(attempt)
393
+ begin
394
+ listen_port = @agent[:listen_port]
395
+ client = CommandClient.new(listen_port, @agent[:cookie])
396
+ client.send_command({:name => "check_connectivity"}, @options[:verbose], COMMAND_IO_TIMEOUT) do |r|
397
+ @command_io_failures = 0
398
+ res = OperationResult.from_results(@command_serializer.load(r)) rescue nil
399
+ if res && res.success?
400
+ info("Successful agent communication" + (attempt > 1 ? " on attempt #{attempt}" : ""))
401
+ @retry_timer.cancel if @retry_timer
402
+ check_communication(attempt)
403
+ else
404
+ error = (res && result.content) || "<unknown error>"
405
+ error("Failed agent communication attempt", error, abort = false)
406
+ # Let existing timer control next attempt
407
+ end
408
+ end
409
+ rescue Exception => e
410
+ error("Failed to access agent for communication check", e, abort = false)
411
+ end
412
+ true
413
+ end
414
+
415
+ # Trigger re-enroll
416
+ # This will normally cause the checker to exit
417
+ #
418
+ # === Return
419
+ # true:: Always return true
420
+ def reenroll!
421
+ unless @reenrolling
422
+ @reenrolling = true
423
+ begin
424
+ info("Triggering re-enroll after unsuccessful communication check", to_console = true)
425
+ cmd = "rs_reenroll"
426
+ cmd += " -v" if @options[:verbose]
427
+ cmd += '&' unless RightScale::Platform.windows?
428
+ # Windows relies on the command protocol to terminate properly.
429
+ # If rchk terminates itself, then rchk --stop will hang trying
430
+ # to connect to this rchk.
431
+ terminate unless RightScale::Platform.windows?
432
+ system(cmd)
433
+ # Wait around until rs_reenroll has a chance to stop the checker
434
+ # otherwise we may restart it
435
+ sleep(5)
436
+ rescue Exception => e
437
+ error("Failed re-enroll after unsuccessful communication check", e, abort = true)
438
+ end
439
+ @reenrolling = false
440
+ end
441
+ true
442
+ end
443
+
444
+ # Setup signal traps
445
+ #
446
+ # === Return
447
+ # true:: Always return true
448
+ def setup_traps
449
+ ['INT', 'TERM'].each do |sig|
450
+ trap(sig) do
451
+ EM.next_tick do
452
+ terminate
453
+ EM.stop
454
+ end
455
+ end
456
+ end
457
+ true
458
+ end
459
+
460
+ # Log debug information
461
+ #
462
+ # === Parameters
463
+ # info(String):: Information to be logged
464
+ #
465
+ # === Return
466
+ # true:: Always return true
467
+ def debug(info)
468
+ info(info) if @options[:verbose]
469
+ end
470
+
471
+ # Log information
472
+ #
473
+ # === Parameters
474
+ # info(String):: Information to be logged
475
+ # to_console(Boolean):: Whether to also display to console even if :verbose is false
476
+ # no_check(Boolean):: Whether to omit '[check]' prefix in logged info
477
+ #
478
+ # === Return
479
+ # true:: Always return true
480
+ def info(info, to_console = false, no_check = false)
481
+ Log.info("#{no_check ? '' : '[check] '}#{info}")
482
+ puts(info) if @options[:verbose] || to_console
483
+ end
484
+
485
+ # Handle error by logging message and optionally aborting execution
486
+ #
487
+ # === Parameters
488
+ # description(String):: Description of context where error occurred
489
+ # error(Exception|String):: Exception or error message
490
+ # abort(Boolean):: Whether to abort execution
491
+ #
492
+ # === Return
493
+ # true:: If do not abort
494
+ def error(description, error = nil, abort = false)
495
+ if @logging_enabled
496
+ msg = "[check] #{description}"
497
+ msg += ", aborting" if abort
498
+ msg = Log.format(msg, error, :trace) if error
499
+ Log.error(msg)
500
+ end
501
+
502
+ msg = description
503
+ msg += ": #{error}" if error
504
+ puts "** #{msg}"
505
+
506
+ if abort
507
+ terminate
508
+ exit(1)
509
+ end
510
+ true
511
+ end
512
+
513
+ # Convert elapsed time in seconds to displayable format
514
+ #
515
+ # === Parameters
516
+ # time(Integer|Float):: Elapsed time
517
+ #
518
+ # === Return
519
+ # (String):: Display string
520
+ def elapsed(time)
521
+ time = time.to_i
522
+ if time <= MINUTE
523
+ "#{time} sec"
524
+ elsif time <= HOUR
525
+ minutes = time / MINUTE
526
+ seconds = time - (minutes * MINUTE)
527
+ "#{minutes} min #{seconds} sec"
528
+ elsif time <= DAY
529
+ hours = time / HOUR
530
+ minutes = (time - (hours * HOUR)) / MINUTE
531
+ "#{hours} hr #{minutes} min"
532
+ else
533
+ days = time / DAY
534
+ hours = (time - (days * DAY)) / HOUR
535
+ minutes = (time - (days * DAY) - (hours * HOUR)) / MINUTE
536
+ "#{days} day#{days == 1 ? '' : 's'} #{hours} hr #{minutes} min"
537
+ end
538
+ end
539
+
540
+ # Version information
541
+ #
542
+ # === Return
543
+ # ver(String):: Version information
544
+ def version
545
+ ver = "rchk #{VERSION.join('.')} - RightScale Agent Checker (c) 2010 RightScale"
546
+ end
547
+
548
+ end # AgentChecker
549
+
550
+ end # RightScale
551
+
552
+ # Copyright (c) 2010-2011 RightScale Inc
553
+ #
554
+ # Permission is hereby granted, free of charge, to any person obtaining
555
+ # a copy of this software and associated documentation files (the
556
+ # "Software"), to deal in the Software without restriction, including
557
+ # without limitation the rights to use, copy, modify, merge, publish,
558
+ # distribute, sublicense, and/or sell copies of the Software, and to
559
+ # permit persons to whom the Software is furnished to do so, subject to
560
+ # the following conditions:
561
+ #
562
+ # The above copyright notice and this permission notice shall be
563
+ # included in all copies or substantial portions of the Software.
564
+ #
565
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
566
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
567
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
568
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
569
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
570
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
571
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.