right_link 5.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. data/actors/agent_manager.rb +88 -0
  2. data/actors/instance_scheduler.rb +321 -0
  3. data/actors/instance_services.rb +64 -0
  4. data/actors/instance_setup.rb +567 -0
  5. data/bin/cloud +25 -0
  6. data/bin/cook_runner +44 -0
  7. data/bin/deploy +120 -0
  8. data/bin/enroll +385 -0
  9. data/bin/rad +32 -0
  10. data/bin/rchk +29 -0
  11. data/bin/rnac +39 -0
  12. data/bin/rs_connect +33 -0
  13. data/bin/rs_log_level +31 -0
  14. data/bin/rs_ohai +28 -0
  15. data/bin/rs_reenroll +31 -0
  16. data/bin/rs_run_recipe +34 -0
  17. data/bin/rs_run_right_script +34 -0
  18. data/bin/rs_shutdown +33 -0
  19. data/bin/rs_tag +33 -0
  20. data/bin/rs_thunk +33 -0
  21. data/bin/rstat +31 -0
  22. data/bin/system +16 -0
  23. data/ext/Rakefile +18 -0
  24. data/init/config.yml +5 -0
  25. data/init/init.rb +79 -0
  26. data/lib/chef/ohai_setup.rb +51 -0
  27. data/lib/chef/plugins/cloud.rb +91 -0
  28. data/lib/chef/plugins/cloudstack.rb +23 -0
  29. data/lib/chef/plugins/ec2.rb +23 -0
  30. data/lib/chef/plugins/linux/block_device2.rb +24 -0
  31. data/lib/chef/plugins/rackspace.rb +23 -0
  32. data/lib/chef/plugins/rightscale.rb +125 -0
  33. data/lib/chef/plugins/windows/network.rb +114 -0
  34. data/lib/chef/plugins.rb +74 -0
  35. data/lib/chef/providers/dns_dnsmadeeasy_provider.rb +81 -0
  36. data/lib/chef/providers/dns_resource.rb +100 -0
  37. data/lib/chef/providers/executable_schedule_provider.rb +70 -0
  38. data/lib/chef/providers/executable_schedule_resource.rb +144 -0
  39. data/lib/chef/providers/remote_recipe_provider.rb +86 -0
  40. data/lib/chef/providers/remote_recipe_resource.rb +101 -0
  41. data/lib/chef/providers/right_link_tag_provider.rb +73 -0
  42. data/lib/chef/providers/right_link_tag_resource.rb +59 -0
  43. data/lib/chef/providers/right_script_provider.rb +190 -0
  44. data/lib/chef/providers/right_script_resource.rb +113 -0
  45. data/lib/chef/providers/rs_shutdown_provider.rb +75 -0
  46. data/lib/chef/providers/rs_shutdown_resource.rb +55 -0
  47. data/lib/chef/providers/server_collection_provider.rb +66 -0
  48. data/lib/chef/providers/server_collection_resource.rb +93 -0
  49. data/lib/chef/providers/windows/powershell_provider.rb +151 -0
  50. data/lib/chef/providers/windows/powershell_resource.rb +111 -0
  51. data/lib/chef/providers/windows/unsupported_provider.rb +51 -0
  52. data/lib/chef/right_providers.rb +55 -0
  53. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.csproj +104 -0
  54. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ChefNodeCmdlet.dll-Help.xml +141 -0
  55. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Exceptions.cs +182 -0
  56. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeCommand.cs +58 -0
  57. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeRequest.cs +46 -0
  58. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetChefNodeResponse.cs +45 -0
  59. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceCommand.cs +58 -0
  60. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceRequest.cs +46 -0
  61. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetCurrentResourceResponse.cs +45 -0
  62. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceCommand.cs +58 -0
  63. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceRequest.cs +46 -0
  64. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNewResourceResponse.cs +45 -0
  65. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionCommand.cs +178 -0
  66. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionRequest.cs +67 -0
  67. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNextActionResponse.cs +58 -0
  68. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueCommandBase.cs +142 -0
  69. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueRequestBase.cs +64 -0
  70. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/GetNodeValueResponseBase.cs +69 -0
  71. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/JsonTransport.cs +110 -0
  72. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeClient.cs +158 -0
  73. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/PipeServer.cs +142 -0
  74. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
  75. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolConstants.cs +55 -0
  76. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ProtocolUtilities.cs +77 -0
  77. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/ReadMe.txt +53 -0
  78. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeCommand.cs +59 -0
  79. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeRequest.cs +46 -0
  80. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetChefNodeResponse.cs +58 -0
  81. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceCommand.cs +59 -0
  82. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceRequest.cs +46 -0
  83. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetCurrentResourceResponse.cs +40 -0
  84. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceCommand.cs +59 -0
  85. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceRequest.cs +46 -0
  86. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNewResourceResponse.cs +40 -0
  87. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueCommandBase.cs +293 -0
  88. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueRequestBase.cs +75 -0
  89. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/SetNodeValueResponseBase.cs +45 -0
  90. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet/Transport.cs +91 -0
  91. data/lib/chef/windows/ChefNodeCmdlet/ChefNodeCmdlet.sln +35 -0
  92. data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Program.cs +374 -0
  93. data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/Properties/AssemblyInfo.cs +16 -0
  94. data/lib/chef/windows/ChefNodeCmdlet/TestChefNodeCmdlet/TestChefNodeCmdlet.csproj +65 -0
  95. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Program.cs +136 -0
  96. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/Properties/AssemblyInfo.cs +36 -0
  97. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/ReadMe.txt +46 -0
  98. data/lib/chef/windows/ChefNodeCmdlet/TestNextActionCmdlet/TestNextActionCmdlet.csproj +68 -0
  99. data/lib/chef/windows/bin/Newtonsoft.Json.dll +0 -0
  100. data/lib/chef/windows/chef_node_server.rb +463 -0
  101. data/lib/chef/windows/dynamic_powershell_provider.rb +296 -0
  102. data/lib/chef/windows/pipe_server.rb +283 -0
  103. data/lib/chef/windows/powershell_host.rb +285 -0
  104. data/lib/chef/windows/powershell_pipe_server.rb +136 -0
  105. data/lib/chef/windows/powershell_provider_base.rb +92 -0
  106. data/lib/chef/windows/scripts/run_loop.ps1 +105 -0
  107. data/lib/clouds/cloud.rb +557 -0
  108. data/lib/clouds/cloud_factory.rb +250 -0
  109. data/lib/clouds/cloud_utilities.rb +244 -0
  110. data/lib/clouds/clouds/azure.rb +106 -0
  111. data/lib/clouds/clouds/cloudstack.rb +114 -0
  112. data/lib/clouds/clouds/ec2.rb +113 -0
  113. data/lib/clouds/clouds/eucalyptus.rb +46 -0
  114. data/lib/clouds/clouds/google.rb +102 -0
  115. data/lib/clouds/clouds/none.rb +76 -0
  116. data/lib/clouds/clouds/openstack.rb +30 -0
  117. data/lib/clouds/clouds/rackspace-ng.rb +54 -0
  118. data/lib/clouds/clouds/rackspace.rb +78 -0
  119. data/lib/clouds/clouds/softlayer.rb +91 -0
  120. data/lib/clouds/metadata_formatter.rb +108 -0
  121. data/lib/clouds/metadata_provider.rb +128 -0
  122. data/lib/clouds/metadata_source.rb +87 -0
  123. data/lib/clouds/metadata_sources/certificate_metadata_source.rb +207 -0
  124. data/lib/clouds/metadata_sources/config_drive_metadata_source.rb +129 -0
  125. data/lib/clouds/metadata_sources/file_metadata_source.rb +74 -0
  126. data/lib/clouds/metadata_sources/http_metadata_source.rb +277 -0
  127. data/lib/clouds/metadata_sources/selective_metadata_source.rb +122 -0
  128. data/lib/clouds/metadata_tree_climber.rb +144 -0
  129. data/lib/clouds/metadata_writer.rb +155 -0
  130. data/lib/clouds/metadata_writers/dictionary_metadata_writer.rb +72 -0
  131. data/lib/clouds/metadata_writers/ruby_metadata_writer.rb +76 -0
  132. data/lib/clouds/metadata_writers/shell_metadata_writer.rb +121 -0
  133. data/lib/clouds/register_clouds.rb +34 -0
  134. data/lib/clouds.rb +32 -0
  135. data/lib/gem_dependencies.rb +83 -0
  136. data/lib/git_hooks/commit-msg.rb +7 -0
  137. data/lib/instance/agent_config.rb +168 -0
  138. data/lib/instance/agent_watcher.rb +233 -0
  139. data/lib/instance/audit_cook_stub.rb +104 -0
  140. data/lib/instance/audit_proxy.rb +247 -0
  141. data/lib/instance/bundle_queue.rb +104 -0
  142. data/lib/instance/cook/agent_connection.rb +109 -0
  143. data/lib/instance/cook/audit_logger.rb +165 -0
  144. data/lib/instance/cook/audit_stub.rb +142 -0
  145. data/lib/instance/cook/ca-bundle.crt +2794 -0
  146. data/lib/instance/cook/chef_state.rb +211 -0
  147. data/lib/instance/cook/cook.rb +306 -0
  148. data/lib/instance/cook/cook_state.rb +298 -0
  149. data/lib/instance/cook/cookbook_path_mapping.rb +66 -0
  150. data/lib/instance/cook/cookbook_repo_retriever.rb +190 -0
  151. data/lib/instance/cook/executable_sequence.rb +765 -0
  152. data/lib/instance/cook/external_parameter_gatherer.rb +190 -0
  153. data/lib/instance/cook/repose_downloader.rb +349 -0
  154. data/lib/instance/cook/shutdown_request_proxy.rb +121 -0
  155. data/lib/instance/cook.rb +41 -0
  156. data/lib/instance/downloader.rb +208 -0
  157. data/lib/instance/duplicable.rb +67 -0
  158. data/lib/instance/exceptions.rb +49 -0
  159. data/lib/instance/executable_sequence_proxy.rb +278 -0
  160. data/lib/instance/instance_commands.rb +577 -0
  161. data/lib/instance/instance_state.rb +633 -0
  162. data/lib/instance/json_utilities.rb +102 -0
  163. data/lib/instance/login_manager.rb +533 -0
  164. data/lib/instance/login_user_manager.rb +522 -0
  165. data/lib/instance/message_encoder.rb +118 -0
  166. data/lib/instance/multi_thread_bundle_queue.rb +232 -0
  167. data/lib/instance/operation_context.rb +60 -0
  168. data/lib/instance/options_bag.rb +65 -0
  169. data/lib/instance/payload_formatter.rb +46 -0
  170. data/lib/instance/policy.rb +53 -0
  171. data/lib/instance/policy_audit.rb +100 -0
  172. data/lib/instance/policy_manager.rb +146 -0
  173. data/lib/instance/reenroll_manager.rb +104 -0
  174. data/lib/instance/right_scripts_cookbook.rb +181 -0
  175. data/lib/instance/shutdown_request.rb +221 -0
  176. data/lib/instance/single_thread_bundle_queue.rb +189 -0
  177. data/lib/instance/volume_management.rb +450 -0
  178. data/lib/instance.rb +50 -0
  179. data/lib/repo_conf_generators/apt_conf_generators.rb +106 -0
  180. data/lib/repo_conf_generators/gem_conf_generators.rb +80 -0
  181. data/lib/repo_conf_generators/rightscale_conf_generators.rb +254 -0
  182. data/lib/repo_conf_generators/rightscale_key.pub +17 -0
  183. data/lib/repo_conf_generators/yum_conf_generators.rb +225 -0
  184. data/lib/repo_conf_generators.rb +30 -0
  185. data/lib/run_shell.rb +28 -0
  186. data/scripts/agent_checker.rb +571 -0
  187. data/scripts/agent_controller.rb +247 -0
  188. data/scripts/agent_deployer.rb +148 -0
  189. data/scripts/bundle_runner.rb +336 -0
  190. data/scripts/cloud_controller.rb +176 -0
  191. data/scripts/log_level_manager.rb +142 -0
  192. data/scripts/ohai_runner.rb +33 -0
  193. data/scripts/reenroller.rb +193 -0
  194. data/scripts/server_importer.rb +293 -0
  195. data/scripts/shutdown_client.rb +183 -0
  196. data/scripts/system_configurator.rb +367 -0
  197. data/scripts/tagger.rb +381 -0
  198. data/scripts/thunker.rb +356 -0
  199. metadata +418 -0
@@ -0,0 +1,571 @@
1
+ # === Synopsis:
2
+ # RightScale Agent Checker (rchk) - (c) 2010-2011 RightScale Inc
3
+ #
4
+ # Checks the agent to see if it is actively communicating with RightNet and if not
5
+ # triggers it to re-enroll and exits.
6
+ #
7
+ # Alternatively runs as a daemon and performs this communication check periodically.
8
+ #
9
+ # === Usage
10
+ # rchk
11
+ #
12
+ # Options:
13
+ # --time-limit, -t SEC Override the default time limit since last communication for
14
+ # check to pass (also the interval for daemon to run these checks),
15
+ # ignored if less than 1
16
+ # --attempts, -a N Override the default number of communication check attempts
17
+ # before trigger re-enroll, ignored if less than 1
18
+ # --retry-interval, -r SEC Override the default interval for retrying communication check,
19
+ # reset to time-limit if less than it, ignored if less than 1
20
+ # --start Run as a daemon process that checks agent communication after the
21
+ # configured time limit and repeatedly thereafter on that interval
22
+ # (the checker does an immediate one-time check if --start is not specified)
23
+ # --stop Stop the currently running daemon started with --start and then exit)
24
+ # --ping, -p Try communicating now regardless of whether have communicated within
25
+ # the configured time limit, does not apply if running as a daemon
26
+ # --verbose, -v Display debug information
27
+ # --version Display version information
28
+ # --help Display help
29
+ #
30
+
31
+ require 'rubygems'
32
+ require 'eventmachine'
33
+ require 'trollop'
34
+ require 'right_agent'
35
+ require 'right_agent/scripts/usage'
36
+ require 'right_agent/scripts/common_parser'
37
+
38
+ require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_watcher'))
39
+ require File.normalize_path(File.join(File.dirname(__FILE__), '..', 'lib', 'instance', 'agent_config'))
40
+
41
+ module RightScale
42
+
43
+ # Commands exposed by instance agent checker
44
+ class AgentCheckerCommands
45
+
46
+ # Build hash of commands associating command names with block
47
+ #
48
+ # === Parameters
49
+ # checker(AgentChecker):: Agent checker executing commands
50
+ #
51
+ # === Return
52
+ # (Hash):: Command blocks keyed by command names
53
+ def self.get(checker)
54
+ target = new(checker)
55
+ {:terminate => lambda { |opts, conn| opts[:conn] = conn; target.send("terminate_command", opts) }}
56
+ end
57
+
58
+ # Set agent checker for executing commands
59
+ #
60
+ # === Parameter
61
+ # checker(AgentChecker):: Agent checker
62
+ def initialize(checker)
63
+ @checker = checker
64
+ end
65
+
66
+ protected
67
+
68
+ # Terminate command
69
+ #
70
+ # === Parameters
71
+ # opts[:conn](EM::Connection):: Connection used to send reply
72
+ #
73
+ # === Return
74
+ # true:: Always return true
75
+ def terminate_command(opts)
76
+ CommandIO.instance.reply(opts[:conn], "Checker terminating")
77
+ # Delay terminate a bit to give reply a chance to be sent
78
+ EM.next_tick { @checker.terminate }
79
+ end
80
+
81
+ end # AgentCheckerCommands
82
+
83
+ class AgentChecker
84
+
85
+ include DaemonizeHelper
86
+
87
+ VERSION = [0, 1]
88
+
89
+ # Time constants
90
+ MINUTE = 60
91
+ HOUR = 60 * MINUTE
92
+ DAY = 24 * HOUR
93
+
94
+ # Default minimum seconds since last communication for instance to be considered connected
95
+ # Only used if --time-limit not specified and :ping_interval option not specified for agent
96
+ DEFAULT_TIME_LIMIT = 12 * HOUR
97
+
98
+ # Multiplier of agent's mapper ping interval to get daemon's last communication time limit
99
+ PING_INTERVAL_MULTIPLIER = 3
100
+
101
+ # Default maximum number of seconds between checks for recent communication if first check fails
102
+ DEFAULT_RETRY_INTERVAL = 5 * MINUTE
103
+
104
+ # Default maximum number of attempts to check communication before trigger re-enroll
105
+ DEFAULT_MAX_ATTEMPTS = 3
106
+
107
+ # Maximum number of seconds to wait for a CommandIO response from the instance agent
108
+ COMMAND_IO_TIMEOUT = 2 * MINUTE
109
+
110
+ # Create and run checker
111
+ #
112
+ # === Return
113
+ # true:: Always return true
114
+ def self.run
115
+ c = AgentChecker.new
116
+ c.start(c.parse_args)
117
+ rescue Errno::EACCES => e
118
+ STDERR.puts e.message
119
+ STDERR.puts "Try elevating privilege (sudo/runas) before invoking this command."
120
+ exit(2)
121
+ end
122
+
123
+ # Create AgentWatcher to monitor agent processes
124
+ #
125
+ # === Return
126
+ # nil
127
+ def setup_agent_watcher()
128
+ @agent_watcher ||= AgentWatcher.new( lambda { |s| self.info(s) }, @agent[:pid_dir] )
129
+ @agent_watcher.watch_agent(@agent[:identity], '/opt/rightscale/bin/rnac', '--start instance', '--stop instance')
130
+ @agent_watcher.start_watching()
131
+ end
132
+
133
+ # Stop AgentWatcher from monitoring agent processes
134
+ #
135
+ # === Return
136
+ # nil
137
+ def stop_agent_watcher()
138
+ @agent_watcher.stop_agent(@agent[:identity])
139
+ @agent_watcher.stop_watching()
140
+ end
141
+
142
+ # Run daemon or run one agent communication check
143
+ # If running as a daemon, store pid in same location as agent except suffix the
144
+ # agent identity with '-rchk'.
145
+ #
146
+ # === Parameters
147
+ # options(Hash):: Run options
148
+ # :time_limit(Integer):: Time limit for last communication and interval for daemon checks,
149
+ # defaults to PING_INTERVAL_MULTIPLIER times agent's ping interval or to DEFAULT_TIME_LIMIT
150
+ # :max_attempts(Integer):: Maximum number of communication check attempts,
151
+ # defaults to DEFAULT_MAX_ATTEMPTS
152
+ # :retry_interval(Integer):: Number of seconds to wait before retrying communication check,
153
+ # defaults to DEFAULT_RETRY_INTERVAL, reset to :time_limit if exceeds it
154
+ # :daemon(Boolean):: Whether to run as a daemon rather than do a one-time communication check
155
+ # :log_path(String):: Log file directory, defaults to one used by agent
156
+ # :stop(Boolean):: Whether to stop the currently running daemon and then exit
157
+ # :ping(Boolean):: Try communicating now regardless of whether have communicated within
158
+ # the configured time limit, ignored if :daemon true
159
+ # :verbose(Boolean):: Whether to display debug information
160
+ #
161
+ # === Return
162
+ # true:: Always return true
163
+ def start(options)
164
+ begin
165
+ setup_traps
166
+ @command_serializer = Serializer.new
167
+ @state_serializer = Serializer.new(:json)
168
+
169
+ # Retrieve instance agent configuration options
170
+ @agent = AgentConfig.agent_options('instance')
171
+ error("No instance agent configured", nil, abort = true) if @agent.empty?
172
+
173
+ # Apply agent's ping interval if needed and adjust options to make them consistent
174
+ @options = options
175
+ unless @options[:time_limit]
176
+ if @agent[:ping_interval]
177
+ @options[:time_limit] = @agent[:ping_interval] * PING_INTERVAL_MULTIPLIER
178
+ else
179
+ @options[:time_limit] = DEFAULT_TIME_LIMIT
180
+ end
181
+ end
182
+ @options[:retry_interval] = [@options[:retry_interval], @options[:time_limit]].min
183
+ @options[:max_attempts] = [@options[:max_attempts], @options[:time_limit] / @options[:retry_interval]].min
184
+ @options[:log_path] ||= RightScale::Platform.filesystem.log_dir
185
+
186
+ # Attach to log used by instance agent
187
+ Log.program_name = 'RightLink'
188
+ Log.facility = 'user'
189
+ Log.log_to_file_only(@agent[:log_to_file_only])
190
+ Log.init(@agent[:identity], @options[:log_path], :print => true)
191
+ Log.level = :debug if @options[:verbose]
192
+ @logging_enabled = true
193
+
194
+ # Catch any egregious eventmachine failures, especially failure to connect to agent with CommandIO
195
+ # Exit even if running as daemon since no longer can trust EM and should get restarted automatically
196
+ EM.error_handler do |e|
197
+ if e.class == RuntimeError && e.message =~ /no connection/
198
+ error("Failed to connect to agent for communication check", nil, abort = false)
199
+ @command_io_failures = (@command_io_failures || 0) + 1
200
+ reenroll! if @command_io_failures > @options[:max_attempts]
201
+ else
202
+ error("Internal checker failure", e, abort = true)
203
+ end
204
+ end
205
+
206
+ # note that our Windows service monitors rnac and rchk processes
207
+ # externally and restarts them if they die, so no need to roll our
208
+ # own cross-monitoring on that platform.
209
+ use_agent_watcher = !RightScale::Platform.windows?
210
+ EM.run do
211
+ check
212
+ setup_agent_watcher if use_agent_watcher
213
+ end
214
+ stop_agent_watcher if use_agent_watcher
215
+
216
+ rescue SystemExit => e
217
+ raise e
218
+ rescue Exception => e
219
+ error("Failed to run", e, abort = true)
220
+ end
221
+ true
222
+ end
223
+
224
+ # Terminate the checker
225
+ #
226
+ # === Return
227
+ # true:: Always return true
228
+ def terminate
229
+ CommandRunner.stop rescue nil if @command_runner
230
+ EM.stop rescue nil
231
+ true
232
+ end
233
+
234
+ # Create options hash from command line arguments
235
+ #
236
+ # === Return
237
+ # options(Hash):: Command line options
238
+ def parse_args
239
+ parser = Trollop::Parser.new do
240
+ opt :max_attempts, "", :default => DEFAULT_MAX_ATTEMPTS, :long => "--attempts", :short => "-a"
241
+ opt :retry_interval, "", :default => DEFAULT_RETRY_INTERVAL
242
+ opt :time_limit, "", :type => :int
243
+ opt :daemon, "", :long => "--start"
244
+ opt :stop
245
+ opt :ping
246
+ opt :verbose
247
+ opt :state_path, "", :type => String
248
+ version ""
249
+ end
250
+
251
+ begin
252
+ options = parser.parse
253
+ options.delete(:max_attempts) unless options[:max_attempts] > 0
254
+ if options[:delete]
255
+ options.delete(:time_limit) unless options[:time_limit] > 0
256
+ end
257
+ options.delete(:retry_interval) unless options[:retry_interval] > 0
258
+ options
259
+ rescue Trollop::HelpNeeded
260
+ puts Usage.scan(__FILE__)
261
+ exit
262
+ rescue Trollop::CommandlineError => e
263
+ error("#{e}\nUse --help for additional information", nil, abort = true)
264
+ rescue Trollop::VersionNeeded
265
+ puts version
266
+ exit
267
+ end
268
+ end
269
+
270
+ protected
271
+
272
+ # Perform required checks
273
+ #
274
+ # === Return
275
+ # true:: Always return true
276
+ def check
277
+ begin
278
+ checker_identity = "#{@agent[:identity]}-rchk"
279
+ pid_file = PidFile.new(checker_identity, @agent[:pid_dir])
280
+
281
+ if @options[:stop]
282
+ # Stop checker
283
+ pid_data = pid_file.read_pid
284
+ if pid_data[:pid]
285
+ info("Stopping checker daemon")
286
+ if RightScale::Platform.windows?
287
+ begin
288
+ client = CommandClient.new(pid_data[:listen_port], pid_data[:cookie])
289
+ client.send_command({:name => :terminate}, verbose = @options[:verbose], timeout = 30) do |r|
290
+ info(r)
291
+ terminate
292
+ end
293
+ rescue Exception => e
294
+ error("Failed stopping checker daemon, confirm it is still running", e, abort = true)
295
+ end
296
+ else
297
+ Process.kill('TERM', pid_data[:pid])
298
+ terminate
299
+ end
300
+ else
301
+ terminate
302
+ end
303
+ elsif @options[:daemon]
304
+ # Run checker as daemon
305
+ pid_file.check rescue error("Cannot start checker daemon because already running", nil, abort = true)
306
+ daemonize(checker_identity, @options) unless RightScale::Platform.windows?
307
+ pid_file.write
308
+ at_exit { pid_file.remove }
309
+
310
+ listen_port = CommandConstants::BASE_INSTANCE_AGENT_CHECKER_SOCKET_PORT
311
+ @command_runner = CommandRunner.start(listen_port, checker_identity, AgentCheckerCommands.get(self))
312
+
313
+ info("Checker daemon options:")
314
+ log_options = @options.inject([]) { |t, (k, v)| t << "- #{k}: #{v}" }
315
+ log_options.each { |l| info(l, to_console = false, no_check = true) }
316
+
317
+ info("Starting checker daemon with #{elapsed(@options[:time_limit])} polling " +
318
+ "and #{elapsed(@options[:time_limit])} last communication limit")
319
+
320
+ iteration = 0
321
+ EM.add_periodic_timer(@options[:time_limit]) do
322
+ iteration += 1
323
+ debug("Checker iteration #{iteration}")
324
+ check_communication(0)
325
+ end
326
+ else
327
+ # Perform one check
328
+ check_communication(0, @options[:ping])
329
+ end
330
+ rescue SystemExit => e
331
+ raise e
332
+ rescue Exception => e
333
+ error("Internal checker failure", e, abort = true)
334
+ end
335
+ true
336
+ end
337
+
338
+ # Check communication, repeatedly if necessary
339
+ #
340
+ # === Parameters
341
+ # attempt(Integer):: Number of attempts thus far
342
+ # must_try(Boolean):: Try communicating regardless of whether required based on time limit
343
+ #
344
+ # === Return
345
+ # true:: Always return true
346
+ def check_communication(attempt, must_try = false)
347
+ attempt += 1
348
+ begin
349
+ if !must_try && (time = time_since_last_communication) < @options[:time_limit]
350
+ @retry_timer.cancel if @retry_timer
351
+ elapsed = elapsed(time)
352
+ info("Passed communication check with activity as recently as #{elapsed} ago", to_console = !@options[:daemon])
353
+ terminate unless @options[:daemon]
354
+ elsif attempt <= @options[:max_attempts]
355
+ debug("Trying communication" + (attempt > 1 ? ", attempt #{attempt}" : ""))
356
+ try_communicating(attempt)
357
+ @retry_timer = EM::Timer.new(@options[:retry_interval]) do
358
+ error("Communication attempt #{attempt} timed out after #{elapsed(@options[:retry_interval])}")
359
+ @agent = AgentConfig.agent_options('instance') # Reload in case not using right cookie
360
+ check_communication(attempt)
361
+ end
362
+ else
363
+ reenroll!
364
+ end
365
+ rescue SystemExit => e
366
+ raise e
367
+ rescue Exception => e
368
+ abort = !@options[:daemon] && (attempt > @options[:max_attempts])
369
+ error("Failed communication check", e, abort)
370
+ check_communication(attempt)
371
+ end
372
+ true
373
+ end
374
+
375
+ # Get elapsed time since last communication
376
+ #
377
+ # === Return
378
+ # (Integer):: Elapsed time
379
+ def time_since_last_communication
380
+ state_file = @options[:state_path] || File.join(AgentConfig.agent_state_dir, 'state.js')
381
+ state = @state_serializer.load(File.read(state_file)) if File.file?(state_file)
382
+ state.nil? ? (@options[:time_limit] + 1) : (Time.now.to_i - state["last_communication"])
383
+ end
384
+
385
+ # Ask instance agent to try to communicate
386
+ #
387
+ # === Parameters
388
+ # attempt(Integer):: Number of attempts thus far
389
+ #
390
+ # === Return
391
+ # true:: Always return true
392
+ def try_communicating(attempt)
393
+ begin
394
+ listen_port = @agent[:listen_port]
395
+ client = CommandClient.new(listen_port, @agent[:cookie])
396
+ client.send_command({:name => "check_connectivity"}, @options[:verbose], COMMAND_IO_TIMEOUT) do |r|
397
+ @command_io_failures = 0
398
+ res = OperationResult.from_results(@command_serializer.load(r)) rescue nil
399
+ if res && res.success?
400
+ info("Successful agent communication" + (attempt > 1 ? " on attempt #{attempt}" : ""))
401
+ @retry_timer.cancel if @retry_timer
402
+ check_communication(attempt)
403
+ else
404
+ error = (res && result.content) || "<unknown error>"
405
+ error("Failed agent communication attempt", error, abort = false)
406
+ # Let existing timer control next attempt
407
+ end
408
+ end
409
+ rescue Exception => e
410
+ error("Failed to access agent for communication check", e, abort = false)
411
+ end
412
+ true
413
+ end
414
+
415
+ # Trigger re-enroll
416
+ # This will normally cause the checker to exit
417
+ #
418
+ # === Return
419
+ # true:: Always return true
420
+ def reenroll!
421
+ unless @reenrolling
422
+ @reenrolling = true
423
+ begin
424
+ info("Triggering re-enroll after unsuccessful communication check", to_console = true)
425
+ cmd = "rs_reenroll"
426
+ cmd += " -v" if @options[:verbose]
427
+ cmd += '&' unless RightScale::Platform.windows?
428
+ # Windows relies on the command protocol to terminate properly.
429
+ # If rchk terminates itself, then rchk --stop will hang trying
430
+ # to connect to this rchk.
431
+ terminate unless RightScale::Platform.windows?
432
+ system(cmd)
433
+ # Wait around until rs_reenroll has a chance to stop the checker
434
+ # otherwise we may restart it
435
+ sleep(5)
436
+ rescue Exception => e
437
+ error("Failed re-enroll after unsuccessful communication check", e, abort = true)
438
+ end
439
+ @reenrolling = false
440
+ end
441
+ true
442
+ end
443
+
444
+ # Setup signal traps
445
+ #
446
+ # === Return
447
+ # true:: Always return true
448
+ def setup_traps
449
+ ['INT', 'TERM'].each do |sig|
450
+ trap(sig) do
451
+ EM.next_tick do
452
+ terminate
453
+ EM.stop
454
+ end
455
+ end
456
+ end
457
+ true
458
+ end
459
+
460
+ # Log debug information
461
+ #
462
+ # === Parameters
463
+ # info(String):: Information to be logged
464
+ #
465
+ # === Return
466
+ # true:: Always return true
467
+ def debug(info)
468
+ info(info) if @options[:verbose]
469
+ end
470
+
471
+ # Log information
472
+ #
473
+ # === Parameters
474
+ # info(String):: Information to be logged
475
+ # to_console(Boolean):: Whether to also display to console even if :verbose is false
476
+ # no_check(Boolean):: Whether to omit '[check]' prefix in logged info
477
+ #
478
+ # === Return
479
+ # true:: Always return true
480
+ def info(info, to_console = false, no_check = false)
481
+ Log.info("#{no_check ? '' : '[check] '}#{info}")
482
+ puts(info) if @options[:verbose] || to_console
483
+ end
484
+
485
+ # Handle error by logging message and optionally aborting execution
486
+ #
487
+ # === Parameters
488
+ # description(String):: Description of context where error occurred
489
+ # error(Exception|String):: Exception or error message
490
+ # abort(Boolean):: Whether to abort execution
491
+ #
492
+ # === Return
493
+ # true:: If do not abort
494
+ def error(description, error = nil, abort = false)
495
+ if @logging_enabled
496
+ msg = "[check] #{description}"
497
+ msg += ", aborting" if abort
498
+ msg = Log.format(msg, error, :trace) if error
499
+ Log.error(msg)
500
+ end
501
+
502
+ msg = description
503
+ msg += ": #{error}" if error
504
+ puts "** #{msg}"
505
+
506
+ if abort
507
+ terminate
508
+ exit(1)
509
+ end
510
+ true
511
+ end
512
+
513
+ # Convert elapsed time in seconds to displayable format
514
+ #
515
+ # === Parameters
516
+ # time(Integer|Float):: Elapsed time
517
+ #
518
+ # === Return
519
+ # (String):: Display string
520
+ def elapsed(time)
521
+ time = time.to_i
522
+ if time <= MINUTE
523
+ "#{time} sec"
524
+ elsif time <= HOUR
525
+ minutes = time / MINUTE
526
+ seconds = time - (minutes * MINUTE)
527
+ "#{minutes} min #{seconds} sec"
528
+ elsif time <= DAY
529
+ hours = time / HOUR
530
+ minutes = (time - (hours * HOUR)) / MINUTE
531
+ "#{hours} hr #{minutes} min"
532
+ else
533
+ days = time / DAY
534
+ hours = (time - (days * DAY)) / HOUR
535
+ minutes = (time - (days * DAY) - (hours * HOUR)) / MINUTE
536
+ "#{days} day#{days == 1 ? '' : 's'} #{hours} hr #{minutes} min"
537
+ end
538
+ end
539
+
540
+ # Version information
541
+ #
542
+ # === Return
543
+ # ver(String):: Version information
544
+ def version
545
+ ver = "rchk #{VERSION.join('.')} - RightScale Agent Checker (c) 2010 RightScale"
546
+ end
547
+
548
+ end # AgentChecker
549
+
550
+ end # RightScale
551
+
552
+ # Copyright (c) 2010-2011 RightScale Inc
553
+ #
554
+ # Permission is hereby granted, free of charge, to any person obtaining
555
+ # a copy of this software and associated documentation files (the
556
+ # "Software"), to deal in the Software without restriction, including
557
+ # without limitation the rights to use, copy, modify, merge, publish,
558
+ # distribute, sublicense, and/or sell copies of the Software, and to
559
+ # permit persons to whom the Software is furnished to do so, subject to
560
+ # the following conditions:
561
+ #
562
+ # The above copyright notice and this permission notice shall be
563
+ # included in all copies or substantial portions of the Software.
564
+ #
565
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
566
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
567
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
568
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
569
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
570
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
571
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.