bosh_agent 1.5.0.pre.1113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +0 -0
- data/bin/bosh_agent +102 -0
- data/lib/bosh_agent/alert.rb +191 -0
- data/lib/bosh_agent/alert_processor.rb +96 -0
- data/lib/bosh_agent/apply_plan/helpers.rb +30 -0
- data/lib/bosh_agent/apply_plan/job.rb +235 -0
- data/lib/bosh_agent/apply_plan/package.rb +58 -0
- data/lib/bosh_agent/apply_plan/plan.rb +96 -0
- data/lib/bosh_agent/bootstrap.rb +341 -0
- data/lib/bosh_agent/config.rb +5 -0
- data/lib/bosh_agent/configuration.rb +102 -0
- data/lib/bosh_agent/disk_util.rb +103 -0
- data/lib/bosh_agent/errors.rb +25 -0
- data/lib/bosh_agent/ext.rb +48 -0
- data/lib/bosh_agent/file_aggregator.rb +78 -0
- data/lib/bosh_agent/file_matcher.rb +45 -0
- data/lib/bosh_agent/handler.rb +440 -0
- data/lib/bosh_agent/heartbeat.rb +74 -0
- data/lib/bosh_agent/heartbeat_processor.rb +45 -0
- data/lib/bosh_agent/http_handler.rb +135 -0
- data/lib/bosh_agent/infrastructure/aws/registry.rb +177 -0
- data/lib/bosh_agent/infrastructure/aws/settings.rb +59 -0
- data/lib/bosh_agent/infrastructure/aws.rb +17 -0
- data/lib/bosh_agent/infrastructure/dummy.rb +24 -0
- data/lib/bosh_agent/infrastructure/openstack/registry.rb +220 -0
- data/lib/bosh_agent/infrastructure/openstack/settings.rb +76 -0
- data/lib/bosh_agent/infrastructure/openstack.rb +17 -0
- data/lib/bosh_agent/infrastructure/vsphere/settings.rb +135 -0
- data/lib/bosh_agent/infrastructure/vsphere.rb +16 -0
- data/lib/bosh_agent/infrastructure.rb +25 -0
- data/lib/bosh_agent/message/apply.rb +184 -0
- data/lib/bosh_agent/message/base.rb +38 -0
- data/lib/bosh_agent/message/compile_package.rb +250 -0
- data/lib/bosh_agent/message/drain.rb +195 -0
- data/lib/bosh_agent/message/list_disk.rb +25 -0
- data/lib/bosh_agent/message/logs.rb +108 -0
- data/lib/bosh_agent/message/migrate_disk.rb +55 -0
- data/lib/bosh_agent/message/mount_disk.rb +102 -0
- data/lib/bosh_agent/message/ssh.rb +109 -0
- data/lib/bosh_agent/message/state.rb +47 -0
- data/lib/bosh_agent/message/unmount_disk.rb +29 -0
- data/lib/bosh_agent/monit.rb +354 -0
- data/lib/bosh_agent/monit_client.rb +158 -0
- data/lib/bosh_agent/mounter.rb +42 -0
- data/lib/bosh_agent/ntp.rb +32 -0
- data/lib/bosh_agent/platform/centos/disk.rb +27 -0
- data/lib/bosh_agent/platform/centos/network.rb +39 -0
- data/lib/bosh_agent/platform/centos/templates/centos-ifcfg.erb +9 -0
- data/lib/bosh_agent/platform/centos/templates/dhclient_conf.erb +56 -0
- data/lib/bosh_agent/platform/centos/templates/logrotate.erb +8 -0
- data/lib/bosh_agent/platform/centos.rb +4 -0
- data/lib/bosh_agent/platform/dummy/templates/dummy_template.erb +1 -0
- data/lib/bosh_agent/platform/linux/adapter.rb +36 -0
- data/lib/bosh_agent/platform/linux/disk.rb +121 -0
- data/lib/bosh_agent/platform/linux/logrotate.rb +32 -0
- data/lib/bosh_agent/platform/linux/network.rb +124 -0
- data/lib/bosh_agent/platform/linux/password.rb +22 -0
- data/lib/bosh_agent/platform/linux.rb +4 -0
- data/lib/bosh_agent/platform/ubuntu/network.rb +59 -0
- data/lib/bosh_agent/platform/ubuntu/templates/dhclient_conf.erb +56 -0
- data/lib/bosh_agent/platform/ubuntu/templates/interfaces.erb +14 -0
- data/lib/bosh_agent/platform/ubuntu/templates/logrotate.erb +8 -0
- data/lib/bosh_agent/platform/ubuntu.rb +4 -0
- data/lib/bosh_agent/platform.rb +26 -0
- data/lib/bosh_agent/remote_exception.rb +62 -0
- data/lib/bosh_agent/runner.rb +36 -0
- data/lib/bosh_agent/settings.rb +61 -0
- data/lib/bosh_agent/sigar_box.rb +26 -0
- data/lib/bosh_agent/smtp_server.rb +96 -0
- data/lib/bosh_agent/state.rb +100 -0
- data/lib/bosh_agent/syslog_monitor.rb +53 -0
- data/lib/bosh_agent/template.rb +50 -0
- data/lib/bosh_agent/util.rb +190 -0
- data/lib/bosh_agent/version.rb +8 -0
- data/lib/bosh_agent.rb +92 -0
- metadata +332 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Agent
|
4
|
+
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
class FatalError < Error; end
|
8
|
+
class StateError < Error; end
|
9
|
+
class UnimplementedMethod < FatalError; end
|
10
|
+
class DiskNotFoundError < FatalError; end
|
11
|
+
|
12
|
+
class MessageHandlerError < Error
|
13
|
+
attr_reader :blob
|
14
|
+
def initialize(message, blob=nil)
|
15
|
+
super(message)
|
16
|
+
@blob = blob
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
class UnknownMessage < Error; end
|
21
|
+
class LoadSettingsError < Error; end
|
22
|
+
|
23
|
+
class HeartbeatError < StandardError; end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
class Object
|
4
|
+
def to_openstruct
|
5
|
+
self
|
6
|
+
end
|
7
|
+
|
8
|
+
def blank?
|
9
|
+
self.to_s.blank?
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class String
|
14
|
+
def blank?
|
15
|
+
self =~ /^\s*$/
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class Array
|
20
|
+
def to_openstruct
|
21
|
+
map{ |el| el.to_openstruct }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class Hash
|
26
|
+
def recursive_merge!(other)
|
27
|
+
self.merge!(other) do |_, old_value, new_value|
|
28
|
+
if old_value.class == Hash
|
29
|
+
old_value.recursive_merge!(new_value)
|
30
|
+
else
|
31
|
+
new_value
|
32
|
+
end
|
33
|
+
end
|
34
|
+
self
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_openstruct
|
38
|
+
mapped = {}
|
39
|
+
each{ |key,value| mapped[key] = value.to_openstruct }
|
40
|
+
OpenStruct.new(mapped)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
class Logger
|
45
|
+
def format_message(severity, timestamp, progname, msg)
|
46
|
+
"#[#{$$}] #{severity.upcase}: #{msg}\n"
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Agent
|
4
|
+
class FileAggregator
|
5
|
+
|
6
|
+
class Error < StandardError; end
|
7
|
+
class DirectoryNotFound < Error; end
|
8
|
+
class PackagingError < Error; end
|
9
|
+
|
10
|
+
attr_accessor :matcher
|
11
|
+
|
12
|
+
def initialize
|
13
|
+
@used_dirs = []
|
14
|
+
end
|
15
|
+
|
16
|
+
# Generates a tarball including all the requested entries
|
17
|
+
# @return tarball path
|
18
|
+
def generate_tarball
|
19
|
+
tmpdir = Dir.mktmpdir
|
20
|
+
out_dir = Dir.mktmpdir
|
21
|
+
@used_dirs << out_dir
|
22
|
+
|
23
|
+
copy_files(tmpdir)
|
24
|
+
tarball_path = File.join(out_dir, "files.tgz")
|
25
|
+
|
26
|
+
Dir.chdir(tmpdir) do
|
27
|
+
tar_out = `tar -czf #{tarball_path} . 2>&1`
|
28
|
+
raise PackagingError, "Cannot create tarball: #{tar_out}" unless $?.exitstatus == 0
|
29
|
+
end
|
30
|
+
|
31
|
+
tarball_path
|
32
|
+
ensure
|
33
|
+
FileUtils.rm_rf(tmpdir) if tmpdir && File.directory?(tmpdir)
|
34
|
+
end
|
35
|
+
|
36
|
+
def cleanup
|
37
|
+
@used_dirs.each do |dir|
|
38
|
+
FileUtils.rm_rf(dir) if File.directory?(dir)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def copy_files(dst_directory)
|
43
|
+
raise Error, "no matcher provided" unless @matcher
|
44
|
+
|
45
|
+
unless File.directory?(@matcher.base_dir)
|
46
|
+
raise DirectoryNotFound, "Base directory #{@matcher.base_dir} not found"
|
47
|
+
end
|
48
|
+
|
49
|
+
copied = 0
|
50
|
+
base_dir = realpath(@matcher.base_dir)
|
51
|
+
|
52
|
+
Dir.chdir(base_dir) do
|
53
|
+
@matcher.globs.each do |glob|
|
54
|
+
Dir[glob].each do |file|
|
55
|
+
path = File.expand_path(file)
|
56
|
+
|
57
|
+
next unless File.file?(file)
|
58
|
+
next unless path[0..base_dir.length-1] == base_dir
|
59
|
+
|
60
|
+
dst_filename = File.join(dst_directory, path[base_dir.length..-1])
|
61
|
+
FileUtils.mkdir_p(File.dirname(dst_filename))
|
62
|
+
FileUtils.cp(realpath(path), dst_filename, :preserve => true)
|
63
|
+
copied += 1
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
copied
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def realpath(path)
|
74
|
+
Pathname.new(path).realpath.to_s
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Agent
|
4
|
+
|
5
|
+
class FileMatcher
|
6
|
+
attr_writer :globs
|
7
|
+
|
8
|
+
def initialize(base_dir)
|
9
|
+
@base_dir = base_dir
|
10
|
+
end
|
11
|
+
|
12
|
+
def base_dir
|
13
|
+
@base_dir
|
14
|
+
end
|
15
|
+
|
16
|
+
def globs
|
17
|
+
@globs || default_globs
|
18
|
+
end
|
19
|
+
|
20
|
+
def default_globs
|
21
|
+
[]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class AgentLogMatcher < FileMatcher
|
26
|
+
def base_dir
|
27
|
+
File.join(@base_dir, "bosh", "log")
|
28
|
+
end
|
29
|
+
|
30
|
+
def default_globs
|
31
|
+
[ "**/*" ]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class JobLogMatcher < FileMatcher
|
36
|
+
def base_dir
|
37
|
+
File.join(@base_dir, "sys", "log")
|
38
|
+
end
|
39
|
+
|
40
|
+
def default_globs
|
41
|
+
[ "**/*.log" ]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,440 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Agent
|
4
|
+
|
5
|
+
class Handler
|
6
|
+
include Bosh::Exec
|
7
|
+
|
8
|
+
attr_accessor :nats
|
9
|
+
attr_reader :processors
|
10
|
+
|
11
|
+
def self.start
|
12
|
+
new.start
|
13
|
+
end
|
14
|
+
|
15
|
+
MAX_NATS_RETRIES = 10
|
16
|
+
NATS_RECONNECT_SLEEP = 0.5
|
17
|
+
|
18
|
+
# Seconds until we kill the agent so it can be restarted:
|
19
|
+
KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS = 15 # When there's an unexpected error
|
20
|
+
KILL_AGENT_THREAD_TIMEOUT_ON_RESTART = 1 # When we force a restart
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
@agent_id = Config.agent_id
|
24
|
+
@logger = Config.logger
|
25
|
+
@nats_uri = Config.mbus
|
26
|
+
@base_dir = Config.base_dir
|
27
|
+
|
28
|
+
# Alert processing
|
29
|
+
@process_alerts = Config.process_alerts
|
30
|
+
@smtp_user = Config.smtp_user
|
31
|
+
@smtp_password = Config.smtp_password
|
32
|
+
@smtp_port = Config.smtp_port
|
33
|
+
|
34
|
+
@hbp = Bosh::Agent::HeartbeatProcessor.new
|
35
|
+
|
36
|
+
@lock = Mutex.new
|
37
|
+
|
38
|
+
@results = []
|
39
|
+
@long_running_agent_task = []
|
40
|
+
@restarting_agent = false
|
41
|
+
|
42
|
+
@nats_fail_count = 0
|
43
|
+
|
44
|
+
@credentials = Config.credentials
|
45
|
+
@sessions = {}
|
46
|
+
@session_reply_map = {}
|
47
|
+
|
48
|
+
find_message_processors
|
49
|
+
end
|
50
|
+
|
51
|
+
def find_message_processors
|
52
|
+
message_consts = Bosh::Agent::Message.constants
|
53
|
+
@processors = {}
|
54
|
+
message_consts.each do |c|
|
55
|
+
klazz = Bosh::Agent::Message.const_get(c)
|
56
|
+
if klazz.respond_to?(:process)
|
57
|
+
# CamelCase -> under_score -> downcased
|
58
|
+
processor_key = c.to_s.gsub(/(.)([A-Z])/,'\1_\2').downcase
|
59
|
+
@processors[processor_key] = klazz
|
60
|
+
end
|
61
|
+
end
|
62
|
+
@logger.info("Message processors: #{@processors.inspect}")
|
63
|
+
end
|
64
|
+
|
65
|
+
def lookup(method)
|
66
|
+
@processors[method]
|
67
|
+
end
|
68
|
+
|
69
|
+
def start
|
70
|
+
['TERM', 'INT', 'QUIT'].each { |s| trap(s) { shutdown } }
|
71
|
+
|
72
|
+
EM.run do
|
73
|
+
begin
|
74
|
+
@nats = NATS.connect(:uri => @nats_uri, :autostart => false) { on_connect }
|
75
|
+
Config.nats = @nats
|
76
|
+
rescue Errno::ENETUNREACH, Timeout::Error => e
|
77
|
+
@logger.info("Unable to talk to nats - retry (#{e.inspect})")
|
78
|
+
sleep 0.1
|
79
|
+
retry
|
80
|
+
end
|
81
|
+
|
82
|
+
setup_heartbeats
|
83
|
+
|
84
|
+
if @process_alerts
|
85
|
+
if (@smtp_port.nil? || @smtp_user.nil? || @smtp_password.nil?)
|
86
|
+
@logger.error "Cannot start alert processor without having SMTP port, user and password configured"
|
87
|
+
@logger.error "Agent will be running but alerts will NOT be properly processed"
|
88
|
+
else
|
89
|
+
@logger.debug("SMTP: #{@smtp_password}")
|
90
|
+
@processor = Bosh::Agent::AlertProcessor.start("127.0.0.1", @smtp_port, @smtp_user, @smtp_password)
|
91
|
+
setup_syslog_monitor
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
rescue NATS::ConnectError => e
|
96
|
+
@nats_fail_count += 1
|
97
|
+
@logger.error("NATS connection error: #{e.message}")
|
98
|
+
sleep NATS_RECONNECT_SLEEP
|
99
|
+
# only retry a few times and then exit which lets the agent recover if we change credentials
|
100
|
+
retry if @nats_fail_count < MAX_NATS_RETRIES
|
101
|
+
@logger.fatal("Unable to reconnect to NATS after #{MAX_NATS_RETRIES} retries, exiting...")
|
102
|
+
end
|
103
|
+
|
104
|
+
def shutdown
|
105
|
+
@logger.info("Exit")
|
106
|
+
NATS.stop { EM.stop; exit }
|
107
|
+
end
|
108
|
+
|
109
|
+
def on_connect
|
110
|
+
subscription = "agent.#{@agent_id}"
|
111
|
+
@nats.subscribe(subscription) { |raw_msg| handle_message(raw_msg) }
|
112
|
+
@nats_fail_count = 0
|
113
|
+
end
|
114
|
+
|
115
|
+
def setup_heartbeats
|
116
|
+
interval = Config.heartbeat_interval.to_i
|
117
|
+
if interval > 0
|
118
|
+
@hbp.enable(interval)
|
119
|
+
@logger.info("Heartbeats are enabled and will be sent every #{interval} seconds")
|
120
|
+
else
|
121
|
+
@logger.warn("Heartbeats are disabled")
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def setup_syslog_monitor
|
126
|
+
Bosh::Agent::SyslogMonitor.start(@nats, @agent_id)
|
127
|
+
end
|
128
|
+
|
129
|
+
def handle_message(json)
|
130
|
+
msg = Yajl::Parser.new.parse(json)
|
131
|
+
|
132
|
+
unless msg["reply_to"]
|
133
|
+
@logger.info("Missing reply_to in: #{msg}")
|
134
|
+
return
|
135
|
+
end
|
136
|
+
|
137
|
+
@logger.info("Message: #{msg.inspect}")
|
138
|
+
|
139
|
+
if @credentials
|
140
|
+
msg = decrypt(msg)
|
141
|
+
return if msg.nil?
|
142
|
+
end
|
143
|
+
|
144
|
+
reply_to = msg['reply_to']
|
145
|
+
method = msg['method']
|
146
|
+
args = msg['arguments']
|
147
|
+
|
148
|
+
if method == "get_state"
|
149
|
+
method = "state"
|
150
|
+
end
|
151
|
+
|
152
|
+
processor = lookup(method)
|
153
|
+
if processor
|
154
|
+
EM.defer do
|
155
|
+
process_in_thread(processor, reply_to, method, args)
|
156
|
+
end
|
157
|
+
elsif method == "get_task"
|
158
|
+
handle_get_task(reply_to, args.first)
|
159
|
+
elsif method == "shutdown"
|
160
|
+
handle_shutdown(reply_to)
|
161
|
+
else
|
162
|
+
re = RemoteException.new("unknown message #{msg.inspect}")
|
163
|
+
publish(reply_to, re.to_hash)
|
164
|
+
end
|
165
|
+
rescue Yajl::ParseError => e
|
166
|
+
@logger.info("Failed to parse message: #{json}: #{e.inspect}: #{e.backtrace}")
|
167
|
+
end
|
168
|
+
|
169
|
+
def process_in_thread(processor, reply_to, method, args)
|
170
|
+
if processor.respond_to?(:long_running?)
|
171
|
+
if @restarting_agent
|
172
|
+
exception = RemoteException.new("restarting agent")
|
173
|
+
publish(reply_to, exception.to_hash)
|
174
|
+
else
|
175
|
+
@lock.synchronize do
|
176
|
+
if @long_running_agent_task.empty?
|
177
|
+
process_long_running(reply_to, processor, args)
|
178
|
+
else
|
179
|
+
exception = RemoteException.new("already running long running task")
|
180
|
+
publish(reply_to, exception.to_hash)
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
else
|
185
|
+
payload = process(processor, args)
|
186
|
+
|
187
|
+
if Config.configure && method == 'prepare_network_change'
|
188
|
+
publish(reply_to, payload) {
|
189
|
+
post_prepare_network_change
|
190
|
+
}
|
191
|
+
else
|
192
|
+
publish(reply_to, payload)
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
rescue => e
|
197
|
+
# since this is running in a thread we're going to be nice and
|
198
|
+
# log an error as this would otherwise be lost
|
199
|
+
@logger.error("#{processor.to_s}: #{e.message}\n#{e.backtrace.join("\n")}")
|
200
|
+
end
|
201
|
+
|
202
|
+
def handle_get_task(reply_to, agent_task_id)
|
203
|
+
if @long_running_agent_task == [agent_task_id]
|
204
|
+
publish(reply_to, {"value" => {"state" => "running", "agent_task_id" => agent_task_id}})
|
205
|
+
else
|
206
|
+
rs = @results.find { |time, task_id, result| task_id == agent_task_id }
|
207
|
+
if rs
|
208
|
+
time, task_id, result = rs
|
209
|
+
publish(reply_to, result)
|
210
|
+
else
|
211
|
+
publish(reply_to, {"exception" => "unknown agent_task_id" })
|
212
|
+
end
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
NATS_MAX_PAYLOAD_SIZE = 1024 * 1024
|
217
|
+
|
218
|
+
def publish(reply_to, payload, &blk)
|
219
|
+
@logger.info("reply_to: #{reply_to}: payload: #{payload.inspect}")
|
220
|
+
|
221
|
+
unencrypted = payload
|
222
|
+
if @credentials
|
223
|
+
payload = encrypt(reply_to, payload)
|
224
|
+
end
|
225
|
+
|
226
|
+
json = Yajl::Encoder.encode(payload)
|
227
|
+
|
228
|
+
if json.bytesize < NATS_MAX_PAYLOAD_SIZE
|
229
|
+
EM.next_tick do
|
230
|
+
@nats.publish(reply_to, json, &blk)
|
231
|
+
end
|
232
|
+
else
|
233
|
+
msg = "message > NATS_MAX_PAYLOAD, stored in blobstore"
|
234
|
+
exception = RemoteException.new(msg, nil, unencrypted)
|
235
|
+
@logger.fatal(msg)
|
236
|
+
EM.next_tick do
|
237
|
+
@nats.publish(reply_to, exception.to_hash, &blk)
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
|
242
|
+
def process_long_running(reply_to, processor, args)
|
243
|
+
agent_task_id = generate_agent_task_id
|
244
|
+
|
245
|
+
@long_running_agent_task = [agent_task_id]
|
246
|
+
|
247
|
+
payload = {:value => {:state => "running", :agent_task_id => agent_task_id}}
|
248
|
+
publish(reply_to, payload)
|
249
|
+
|
250
|
+
result = process(processor, args)
|
251
|
+
@results << [Time.now.to_i, agent_task_id, result]
|
252
|
+
@long_running_agent_task = []
|
253
|
+
end
|
254
|
+
|
255
|
+
def kill_main_thread_in(seconds)
|
256
|
+
@restarting_agent = true
|
257
|
+
Thread.new do
|
258
|
+
sleep(seconds)
|
259
|
+
Thread.main.terminate
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def process(processor, args)
|
264
|
+
begin
|
265
|
+
result = processor.process(args)
|
266
|
+
return {:value => result}
|
267
|
+
rescue Bosh::Agent::Error => e
|
268
|
+
@logger.info("#{e.inspect}: #{e.backtrace}")
|
269
|
+
return RemoteException.from(e).to_hash
|
270
|
+
rescue Exception => e
|
271
|
+
kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS)
|
272
|
+
@logger.error("#{e.inspect}: #{e.backtrace}")
|
273
|
+
return {:exception => "#{e.inspect}: #{e.backtrace}"}
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
def generate_agent_task_id
|
278
|
+
SecureRandom.uuid
|
279
|
+
end
|
280
|
+
|
281
|
+
##
|
282
|
+
# When there's a network change on an existing vm, director sends a prepare_network_change message to the vm
|
283
|
+
# agent. After agent replies to director with a `true` message, the post_prepare_network_change method is called
|
284
|
+
# (via EM callback).
|
285
|
+
#
|
286
|
+
# The post_prepare_network_change method will delete the udev network persistent rules, delete the agent settings
|
287
|
+
# and then it should restart the agent to get the new agent settings (set by director-cpi). For a simple network
|
288
|
+
# change (i.e. dns changes) this is enough, as when the agent is restarted it will apply the new network settings.
|
289
|
+
# But for other network changes (i.e. IP change), the CPI will be responsible to reboot or recreate the vm if needed.
|
290
|
+
def post_prepare_network_change
|
291
|
+
if Bosh::Agent::Config.configure
|
292
|
+
udev_file = '/etc/udev/rules.d/70-persistent-net.rules'
|
293
|
+
if File.exist?(udev_file)
|
294
|
+
@logger.info("deleting 70-persistent-net.rules - again")
|
295
|
+
File.delete(udev_file)
|
296
|
+
end
|
297
|
+
@logger.info("Removing settings.json")
|
298
|
+
settings_file = Bosh::Agent::Config.settings_file
|
299
|
+
File.delete(settings_file)
|
300
|
+
end
|
301
|
+
|
302
|
+
@logger.info("Restarting agent to prepare for a network change")
|
303
|
+
kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_RESTART)
|
304
|
+
end
|
305
|
+
|
306
|
+
def handle_shutdown(reply_to)
|
307
|
+
@logger.info("Shutting down #{URI.parse(Config.mbus).scheme.upcase} connection")
|
308
|
+
payload = {:value => "shutdown"}
|
309
|
+
|
310
|
+
if Bosh::Agent::Config.configure
|
311
|
+
# We should never come back up again
|
312
|
+
at_exit { `sv stop agent` }
|
313
|
+
end
|
314
|
+
|
315
|
+
publish(reply_to, payload) {
|
316
|
+
shutdown
|
317
|
+
}
|
318
|
+
end
|
319
|
+
|
320
|
+
def lookup_encryption_handler(arg)
|
321
|
+
if arg[:session_id]
|
322
|
+
message_session_id = arg[:session_id]
|
323
|
+
@sessions[message_session_id] ||= Bosh::Core::EncryptionHandler.new(@agent_id, @credentials)
|
324
|
+
encryption_handler = @sessions[message_session_id]
|
325
|
+
return encryption_handler
|
326
|
+
elsif arg[:reply_to]
|
327
|
+
reply_to = arg[:reply_to]
|
328
|
+
@session_reply_map[reply_to]
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
def decrypt(msg)
|
333
|
+
[ "session_id", "encrypted_data" ].each do |key|
|
334
|
+
unless msg.key?(key)
|
335
|
+
@logger.info("Missing #{key} in #{msg}")
|
336
|
+
return
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
message_session_id = msg["session_id"]
|
341
|
+
reply_to = msg["reply_to"]
|
342
|
+
|
343
|
+
encryption_handler = lookup_encryption_handler(:session_id => message_session_id)
|
344
|
+
|
345
|
+
# save message handler for the reply
|
346
|
+
@session_reply_map[reply_to] = encryption_handler
|
347
|
+
|
348
|
+
# Log exceptions from the EncryptionHandler, but stay quiet on the wire.
|
349
|
+
begin
|
350
|
+
msg = encryption_handler.decrypt(msg["encrypted_data"])
|
351
|
+
rescue Bosh::Core::EncryptionHandler::CryptError => e
|
352
|
+
log_encryption_error(e)
|
353
|
+
return
|
354
|
+
end
|
355
|
+
|
356
|
+
msg["reply_to"] = reply_to
|
357
|
+
|
358
|
+
@logger.info("Decrypted Message: #{msg}")
|
359
|
+
msg
|
360
|
+
end
|
361
|
+
|
362
|
+
def log_encryption_error(e)
|
363
|
+
@logger.info("Encrypton Error: #{e.inspect} #{e.backtrace.join('\n')}")
|
364
|
+
end
|
365
|
+
|
366
|
+
def encrypt(reply_to, payload)
|
367
|
+
encryption_handler = lookup_encryption_handler(:reply_to => reply_to)
|
368
|
+
session_id = encryption_handler.session_id
|
369
|
+
|
370
|
+
payload = {
|
371
|
+
"session_id" => session_id,
|
372
|
+
"encrypted_data" => encryption_handler.encrypt(payload)
|
373
|
+
}
|
374
|
+
|
375
|
+
payload
|
376
|
+
end
|
377
|
+
|
378
|
+
end
|
379
|
+
|
380
|
+
# Built-in message handlers
|
381
|
+
module Message
|
382
|
+
|
383
|
+
class Ping
|
384
|
+
def self.process(args)
|
385
|
+
"pong"
|
386
|
+
end
|
387
|
+
end
|
388
|
+
|
389
|
+
class Noop
|
390
|
+
def self.process(args)
|
391
|
+
"nope"
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
class Start
|
396
|
+
def self.process(args)
|
397
|
+
|
398
|
+
if Config.configure
|
399
|
+
Bosh::Agent::Monit.start_services
|
400
|
+
end
|
401
|
+
|
402
|
+
"started"
|
403
|
+
|
404
|
+
rescue => e
|
405
|
+
raise Bosh::Agent::MessageHandlerError, "Cannot start job: #{e}"
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
# FIXME: temporary stop method
|
410
|
+
class Stop
|
411
|
+
def self.long_running?
|
412
|
+
true
|
413
|
+
end
|
414
|
+
|
415
|
+
def self.process(args)
|
416
|
+
|
417
|
+
if Config.configure
|
418
|
+
Bosh::Agent::Monit.stop_services
|
419
|
+
end
|
420
|
+
|
421
|
+
"stopped"
|
422
|
+
|
423
|
+
rescue => e
|
424
|
+
# Monit retry logic should make it really hard to get here but if it happens we should yell.
|
425
|
+
# One potential problem is that drain process might have unmonitored and killed processes
|
426
|
+
# already but Monit became really unresponsive. In that case it might be a fake alert:
|
427
|
+
# however this is not common and can be handled on case-by-case basis.
|
428
|
+
raise Bosh::Agent::MessageHandlerError, "Cannot stop job: #{e}"
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
class PrepareNetworkChange
|
433
|
+
def self.process(args)
|
434
|
+
true
|
435
|
+
end
|
436
|
+
end
|
437
|
+
|
438
|
+
end
|
439
|
+
|
440
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Agent
|
4
|
+
|
5
|
+
class Heartbeat
|
6
|
+
|
7
|
+
# Mostly for tests so we can override these without touching Config
|
8
|
+
attr_accessor :logger, :nats, :agent_id, :state
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@logger = Config.logger
|
12
|
+
@nats = Config.nats
|
13
|
+
@agent_id = Config.agent_id
|
14
|
+
@state = Config.state
|
15
|
+
end
|
16
|
+
|
17
|
+
def send_via_mbus
|
18
|
+
if @state.nil?
|
19
|
+
@logger.error("Unable to send heartbeat: agent state unknown")
|
20
|
+
return
|
21
|
+
end
|
22
|
+
|
23
|
+
if @nats.nil?
|
24
|
+
raise Bosh::Agent::HeartbeatError, "NATS should be initialized in order to send heartbeats"
|
25
|
+
end
|
26
|
+
|
27
|
+
@nats.publish("hm.agent.heartbeat.#{@agent_id}", heartbeat_payload) do
|
28
|
+
yield if block_given?
|
29
|
+
@logger.debug("Heartbeat delivered")
|
30
|
+
end
|
31
|
+
@logger.info("Heartbeat sent")
|
32
|
+
end
|
33
|
+
|
34
|
+
# Heartbeat payload example:
|
35
|
+
# {
|
36
|
+
# "job": "cloud_controller",
|
37
|
+
# "index": 3,
|
38
|
+
# "job_state":"running",
|
39
|
+
# "vitals": {
|
40
|
+
# "load": ["0.09","0.04","0.01"],
|
41
|
+
# "cpu": {"user":"0.0","sys":"0.0","wait":"0.4"},
|
42
|
+
# "mem": {"percent":"3.5","kb":"145996"},
|
43
|
+
# "swap": {"percent":"0.0","kb":"0"},
|
44
|
+
# "disk": {
|
45
|
+
# "system": {"percent" => "82"},
|
46
|
+
# "ephemeral": {"percent" => "5"},
|
47
|
+
# "persistent": {"percent" => "94"}
|
48
|
+
# },
|
49
|
+
# "ntp": {
|
50
|
+
# "offset": "-0.06423",
|
51
|
+
# "timestamp": "14 Oct 11:13:19"
|
52
|
+
# }
|
53
|
+
# }
|
54
|
+
|
55
|
+
def heartbeat_payload
|
56
|
+
job_state = Bosh::Agent::Monit.service_group_state
|
57
|
+
monit_vitals = Bosh::Agent::Monit.get_vitals
|
58
|
+
|
59
|
+
disk_usage = Bosh::Agent::DiskUtil.get_usage
|
60
|
+
|
61
|
+
job_name = @state["job"] ? @state["job"]["name"] : nil
|
62
|
+
index = @state["index"]
|
63
|
+
|
64
|
+
vitals = monit_vitals.merge("disk" => disk_usage)
|
65
|
+
|
66
|
+
Yajl::Encoder.encode("job" => job_name,
|
67
|
+
"index" => index,
|
68
|
+
"job_state" => job_state,
|
69
|
+
"vitals" => vitals,
|
70
|
+
"ntp" => Bosh::Agent::NTP.offset)
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|