bosh_agent 1.2411.0 → 1.2416.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +1,9 @@
1
- # Copyright (c) 2009-2012 VMware, Inc.
2
-
3
1
  module Bosh::Agent
4
2
 
5
3
  class Handler
6
4
  include Bosh::Exec
7
5
 
6
+ attr_accessor :current_long_running_task
8
7
  attr_accessor :nats
9
8
  attr_reader :processors
10
9
 
@@ -17,7 +16,7 @@ module Bosh::Agent
17
16
 
18
17
  # Seconds until we kill the agent so it can be restarted:
19
18
  KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS = 15 # When there's an unexpected error
20
- KILL_AGENT_THREAD_TIMEOUT_ON_RESTART = 1 # When we force a restart
19
+ KILL_AGENT_THREAD_TIMEOUT_ON_RESTART = 1 # When we force a restart
21
20
 
22
21
  def initialize
23
22
  @agent_id = Config.agent_id
@@ -36,7 +35,7 @@ module Bosh::Agent
36
35
  @lock = Mutex.new
37
36
 
38
37
  @results = []
39
- @long_running_agent_task = []
38
+ self.current_long_running_task = {}
40
39
  @restarting_agent = false
41
40
 
42
41
  @nats_fail_count = 0
@@ -55,7 +54,7 @@ module Bosh::Agent
55
54
  klazz = Bosh::Agent::Message.const_get(c)
56
55
  if klazz.respond_to?(:process)
57
56
  # CamelCase -> under_score -> downcased
58
- processor_key = c.to_s.gsub(/(.)([A-Z])/,'\1_\2').downcase
57
+ processor_key = c.to_s.gsub(/(.)([A-Z])/, '\1_\2').downcase
59
58
  @processors[processor_key] = klazz
60
59
  end
61
60
  end
@@ -66,12 +65,13 @@ module Bosh::Agent
66
65
  @processors[method]
67
66
  end
68
67
 
68
+ # rubocop:disable MethodLength
69
69
  def start
70
- ['TERM', 'INT', 'QUIT'].each { |s| trap(s) { shutdown } }
70
+ %w(TERM INT QUIT).each { |s| trap(s) { shutdown } }
71
71
 
72
72
  EM.run do
73
73
  begin
74
- @nats = NATS.connect(:uri => @nats_uri, :autostart => false) { on_connect }
74
+ @nats = NATS.connect(uri: @nats_uri, autostart: false) { on_connect }
75
75
  Config.nats = @nats
76
76
  rescue Errno::ENETUNREACH, Timeout::Error => e
77
77
  @logger.info("Unable to talk to nats - retry (#{e.inspect})")
@@ -82,12 +82,12 @@ module Bosh::Agent
82
82
  setup_heartbeats
83
83
 
84
84
  if @process_alerts
85
- if (@smtp_port.nil? || @smtp_user.nil? || @smtp_password.nil?)
86
- @logger.error "Cannot start alert processor without having SMTP port, user and password configured"
87
- @logger.error "Agent will be running but alerts will NOT be properly processed"
85
+ if @smtp_port.nil? || @smtp_user.nil? || @smtp_password.nil?
86
+ @logger.error 'Cannot start alert processor without having SMTP port, user and password configured'
87
+ @logger.error 'Agent will be running but alerts will NOT be properly processed'
88
88
  else
89
89
  @logger.debug("SMTP: #{@smtp_password}")
90
- @processor = Bosh::Agent::AlertProcessor.start("127.0.0.1", @smtp_port, @smtp_user, @smtp_password)
90
+ @processor = Bosh::Agent::AlertProcessor.start('127.0.0.1', @smtp_port, @smtp_user, @smtp_password)
91
91
  setup_syslog_monitor
92
92
  end
93
93
  end
@@ -100,10 +100,14 @@ module Bosh::Agent
100
100
  retry if @nats_fail_count < MAX_NATS_RETRIES
101
101
  @logger.fatal("Unable to reconnect to NATS after #{MAX_NATS_RETRIES} retries, exiting...")
102
102
  end
103
+ # rubocop:enable MethodLength
103
104
 
104
105
  def shutdown
105
- @logger.info("Exit")
106
- NATS.stop { EM.stop; exit }
106
+ @logger.info('Exit')
107
+ NATS.stop do
108
+ EM.stop
109
+ exit
110
+ end
107
111
  end
108
112
 
109
113
  def on_connect
@@ -118,7 +122,7 @@ module Bosh::Agent
118
122
  @hbp.enable(interval)
119
123
  @logger.info("Heartbeats are enabled and will be sent every #{interval} seconds")
120
124
  else
121
- @logger.warn("Heartbeats are disabled")
125
+ @logger.warn('Heartbeats are disabled')
122
126
  end
123
127
  end
124
128
 
@@ -126,10 +130,11 @@ module Bosh::Agent
126
130
  Bosh::Agent::SyslogMonitor.start(@nats, @agent_id)
127
131
  end
128
132
 
133
+ # rubocop:disable MethodLength
129
134
  def handle_message(json)
130
135
  msg = Yajl::Parser.new.parse(json)
131
136
 
132
- unless msg["reply_to"]
137
+ unless msg['reply_to']
133
138
  @logger.info("Missing reply_to in: #{msg}")
134
139
  return
135
140
  end
@@ -145,8 +150,8 @@ module Bosh::Agent
145
150
  method = msg['method']
146
151
  args = msg['arguments']
147
152
 
148
- if method == "get_state"
149
- method = "state"
153
+ if method == 'get_state'
154
+ method = 'state'
150
155
  end
151
156
 
152
157
  processor = lookup(method)
@@ -154,9 +159,11 @@ module Bosh::Agent
154
159
  EM.defer do
155
160
  process_in_thread(processor, reply_to, method, args)
156
161
  end
157
- elsif method == "get_task"
162
+ elsif method == 'cancel_task'
163
+ handle_cancel_task(reply_to, args.first)
164
+ elsif method == 'get_task'
158
165
  handle_get_task(reply_to, args.first)
159
- elsif method == "shutdown"
166
+ elsif method == 'shutdown'
160
167
  handle_shutdown(reply_to)
161
168
  else
162
169
  re = RemoteException.new("unknown message #{msg.inspect}")
@@ -165,19 +172,21 @@ module Bosh::Agent
165
172
  rescue Yajl::ParseError => e
166
173
  @logger.info("Failed to parse message: #{json}: #{e.inspect}: #{e.backtrace}")
167
174
  end
175
+ # rubocop:enable MethodLength
168
176
 
177
+ # rubocop:disable MethodLength
169
178
  def process_in_thread(processor, reply_to, method, args)
170
179
  if processor.respond_to?(:long_running?)
171
180
  if @restarting_agent
172
- exception = RemoteException.new("restarting agent")
181
+ exception = RemoteException.new('restarting agent')
173
182
  publish(reply_to, exception.to_hash)
174
183
  else
175
184
  @lock.synchronize do
176
- if @long_running_agent_task.empty?
177
- process_long_running(reply_to, processor, args)
178
- else
179
- exception = RemoteException.new("already running long running task")
185
+ if current_long_running_task[:task_id]
186
+ exception = RemoteException.new('already running long running task')
180
187
  publish(reply_to, exception.to_hash)
188
+ else
189
+ process_long_running(reply_to, processor, args)
181
190
  end
182
191
  end
183
192
  end
@@ -198,17 +207,32 @@ module Bosh::Agent
198
207
  # log an error as this would otherwise be lost
199
208
  @logger.error("#{processor.to_s}: #{e.message}\n#{e.backtrace.join("\n")}")
200
209
  end
210
+ # rubocop:enable MethodLength
211
+
212
+ def handle_cancel_task(reply_to, agent_task_id)
213
+ if current_long_running_task?(agent_task_id)
214
+ if current_long_running_task[:processor].respond_to?(:cancel)
215
+ current_long_running_task[:processor].cancel
216
+ publish(reply_to, { 'value' => 'canceled' })
217
+ self.current_long_running_task = {}
218
+ else
219
+ publish(reply_to, { 'exception' => "could not cancel task #{agent_task_id}" })
220
+ end
221
+ else
222
+ publish(reply_to, { 'exception' => 'unknown agent_task_id' })
223
+ end
224
+ end
201
225
 
202
226
  def handle_get_task(reply_to, agent_task_id)
203
- if @long_running_agent_task == [agent_task_id]
204
- publish(reply_to, {"value" => {"state" => "running", "agent_task_id" => agent_task_id}})
227
+ if current_long_running_task?(agent_task_id)
228
+ publish(reply_to, { 'value' => { 'state' => 'running', 'agent_task_id' => agent_task_id } })
205
229
  else
206
230
  rs = @results.find { |time, task_id, result| task_id == agent_task_id }
207
231
  if rs
208
- time, task_id, result = rs
232
+ _, _, result = rs
209
233
  publish(reply_to, result)
210
234
  else
211
- publish(reply_to, {"exception" => "unknown agent_task_id" })
235
+ publish(reply_to, { 'exception' => 'unknown agent_task_id' })
212
236
  end
213
237
  end
214
238
  end
@@ -230,7 +254,7 @@ module Bosh::Agent
230
254
  @nats.publish(reply_to, json, &blk)
231
255
  end
232
256
  else
233
- msg = "message > NATS_MAX_PAYLOAD, stored in blobstore"
257
+ msg = 'message > NATS_MAX_PAYLOAD, stored in blobstore'
234
258
  exception = RemoteException.new(msg, nil, unencrypted)
235
259
  @logger.fatal(msg)
236
260
  EM.next_tick do
@@ -242,14 +266,14 @@ module Bosh::Agent
242
266
  def process_long_running(reply_to, processor, args)
243
267
  agent_task_id = generate_agent_task_id
244
268
 
245
- @long_running_agent_task = [agent_task_id]
269
+ self.current_long_running_task = { task_id: agent_task_id, processor: processor }
246
270
 
247
- payload = {:value => {:state => "running", :agent_task_id => agent_task_id}}
271
+ payload = { value: { state: 'running', agent_task_id: agent_task_id } }
248
272
  publish(reply_to, payload)
249
273
 
250
274
  result = process(processor, args)
251
275
  @results << [Time.now.to_i, agent_task_id, result]
252
- @long_running_agent_task = []
276
+ self.current_long_running_task = {}
253
277
  end
254
278
 
255
279
  def kill_main_thread_in(seconds)
@@ -261,17 +285,17 @@ module Bosh::Agent
261
285
  end
262
286
 
263
287
  def process(processor, args)
264
- begin
265
- result = processor.process(args)
266
- return {:value => result}
267
- rescue Bosh::Agent::Error => e
268
- @logger.info("#{e.inspect}: #{e.backtrace}")
269
- return RemoteException.from(e).to_hash
270
- rescue Exception => e
271
- kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS)
272
- @logger.error("#{e.inspect}: #{e.backtrace}")
273
- return {:exception => "#{e.inspect}: #{e.backtrace}"}
274
- end
288
+ result = processor.process(args)
289
+ return { value: result }
290
+ rescue Bosh::Agent::Error => e
291
+ @logger.info("#{e.inspect}: #{e.backtrace}")
292
+ return RemoteException.from(e).to_hash
293
+ # rubocop:disable RescueException
294
+ rescue Exception => e
295
+ # rubocop:enable RescueException
296
+ kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS)
297
+ @logger.error("#{e.inspect}: #{e.backtrace}")
298
+ return { exception: "#{e.inspect}: #{e.backtrace}" }
275
299
  end
276
300
 
277
301
  def generate_agent_task_id
@@ -279,33 +303,34 @@ module Bosh::Agent
279
303
  end
280
304
 
281
305
  ##
282
- # When there's a network change on an existing vm, director sends a prepare_network_change message to the vm
306
+ # When there's a network change on an existing vm, director sends a prepare_network_change message to the vm
283
307
  # agent. After agent replies to director with a `true` message, the post_prepare_network_change method is called
284
308
  # (via EM callback).
285
309
  #
286
- # The post_prepare_network_change method will delete the udev network persistent rules, delete the agent settings
287
- # and then it should restart the agent to get the new agent settings (set by director-cpi). For a simple network
288
- # change (i.e. dns changes) this is enough, as when the agent is restarted it will apply the new network settings.
289
- # But for other network changes (i.e. IP change), the CPI will be responsible to reboot or recreate the vm if needed.
310
+ # The post_prepare_network_change method will delete the udev network persistent rules, delete the agent settings
311
+ # and then it should restart the agent to get the new agent settings (set by director-cpi). For a simple network
312
+ # change (i.e. dns changes) this is enough, as when the agent is restarted it will apply the new network settings.
313
+ # But for other network changes (i.e. IP change), the CPI will be responsible to reboot or recreate the vm if
314
+ # needed.
290
315
  def post_prepare_network_change
291
316
  if Bosh::Agent::Config.configure
292
317
  udev_file = '/etc/udev/rules.d/70-persistent-net.rules'
293
318
  if File.exist?(udev_file)
294
- @logger.info("deleting 70-persistent-net.rules - again")
319
+ @logger.info('deleting 70-persistent-net.rules - again')
295
320
  File.delete(udev_file)
296
321
  end
297
- @logger.info("Removing settings.json")
322
+ @logger.info('Removing settings.json')
298
323
  settings_file = Bosh::Agent::Config.settings_file
299
324
  File.delete(settings_file)
300
325
  end
301
326
 
302
- @logger.info("Restarting agent to prepare for a network change")
327
+ @logger.info('Restarting agent to prepare for a network change')
303
328
  kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_RESTART)
304
329
  end
305
330
 
306
331
  def handle_shutdown(reply_to)
307
332
  @logger.info("Shutting down #{URI.parse(Config.mbus).scheme.upcase} connection")
308
- payload = {:value => "shutdown"}
333
+ payload = { value: 'shutdown' }
309
334
 
310
335
  if Bosh::Agent::Config.configure
311
336
  # We should never come back up again
@@ -330,30 +355,30 @@ module Bosh::Agent
330
355
  end
331
356
 
332
357
  def decrypt(msg)
333
- [ "session_id", "encrypted_data" ].each do |key|
358
+ %w(session_id encrypted_data).each do |key|
334
359
  unless msg.key?(key)
335
360
  @logger.info("Missing #{key} in #{msg}")
336
361
  return
337
362
  end
338
363
  end
339
364
 
340
- message_session_id = msg["session_id"]
341
- reply_to = msg["reply_to"]
365
+ message_session_id = msg['session_id']
366
+ reply_to = msg['reply_to']
342
367
 
343
- encryption_handler = lookup_encryption_handler(:session_id => message_session_id)
368
+ encryption_handler = lookup_encryption_handler(session_id: message_session_id)
344
369
 
345
370
  # save message handler for the reply
346
371
  @session_reply_map[reply_to] = encryption_handler
347
372
 
348
373
  # Log exceptions from the EncryptionHandler, but stay quiet on the wire.
349
374
  begin
350
- msg = encryption_handler.decrypt(msg["encrypted_data"])
375
+ msg = encryption_handler.decrypt(msg['encrypted_data'])
351
376
  rescue Bosh::Core::EncryptionHandler::CryptError => e
352
377
  log_encryption_error(e)
353
378
  return
354
379
  end
355
380
 
356
- msg["reply_to"] = reply_to
381
+ msg['reply_to'] = reply_to
357
382
 
358
383
  @logger.info("Decrypted Message: #{msg}")
359
384
  msg
@@ -364,17 +389,21 @@ module Bosh::Agent
364
389
  end
365
390
 
366
391
  def encrypt(reply_to, payload)
367
- encryption_handler = lookup_encryption_handler(:reply_to => reply_to)
392
+ encryption_handler = lookup_encryption_handler(reply_to: reply_to)
368
393
  session_id = encryption_handler.session_id
369
394
 
370
395
  payload = {
371
- "session_id" => session_id,
372
- "encrypted_data" => encryption_handler.encrypt(payload)
396
+ 'session_id' => session_id,
397
+ 'encrypted_data' => encryption_handler.encrypt(payload)
373
398
  }
374
399
 
375
400
  payload
376
401
  end
377
402
 
403
+ def current_long_running_task?(agent_task_id)
404
+ current_long_running_task[:task_id] == agent_task_id
405
+ end
406
+
378
407
  end
379
408
 
380
409
  # Built-in message handlers
@@ -382,13 +411,13 @@ module Bosh::Agent
382
411
 
383
412
  class Ping
384
413
  def self.process(args)
385
- "pong"
414
+ 'pong'
386
415
  end
387
416
  end
388
417
 
389
418
  class Noop
390
419
  def self.process(args)
391
- "nope"
420
+ 'nope'
392
421
  end
393
422
  end
394
423
 
@@ -399,7 +428,7 @@ module Bosh::Agent
399
428
  Bosh::Agent::Monit.start_services
400
429
  end
401
430
 
402
- "started"
431
+ 'started'
403
432
 
404
433
  rescue => e
405
434
  raise Bosh::Agent::MessageHandlerError, "Cannot start job: #{e}"
@@ -418,7 +447,7 @@ module Bosh::Agent
418
447
  Bosh::Agent::Monit.stop_services
419
448
  end
420
449
 
421
- "stopped"
450
+ 'stopped'
422
451
 
423
452
  rescue => e
424
453
  # Monit retry logic should make it really hard to get here but if it happens we should yell.
@@ -3,10 +3,23 @@ require 'open3'
3
3
  module Bosh::Agent
4
4
  module Message
5
5
  class RunErrand
6
+ CANCEL_GRACE_PERIOD_SECONDS = 30
7
+
6
8
  def self.process(args)
7
9
  self.new(args).start
8
10
  end
9
11
 
12
+ def self.cancel
13
+ pid = running_errand_pid
14
+ Process.kill('-TERM', pid) if errand_running?
15
+ CANCEL_GRACE_PERIOD_SECONDS.times do
16
+ break unless errand_running?
17
+ sleep 1
18
+ end
19
+ Process.kill('-KILL', pid) if errand_running?
20
+ rescue Errno::ESRCH
21
+ end
22
+
10
23
  def self.long_running?
11
24
  true
12
25
  end
@@ -27,9 +40,9 @@ module Bosh::Agent
27
40
 
28
41
  job_template_name = job_templates.first.fetch('name')
29
42
 
30
- env = { 'PATH' => '/usr/sbin:/usr/bin:/sbin:/bin' }
31
- cmd = "#{@base_dir}/jobs/#{job_template_name}/bin/run"
32
- opts = { :unsetenv_others => true }
43
+ env = { 'PATH' => '/usr/sbin:/usr/bin:/sbin:/bin', 'TMPDIR' => ENV['TMPDIR'] }
44
+ cmd = "#{@base_dir}/jobs/#{job_template_name}/bin/run"
45
+ opts = { unsetenv_others: true, pgroup: true }
33
46
 
34
47
  unless File.executable?(cmd)
35
48
  raise Bosh::Agent::MessageHandlerError,
@@ -37,9 +50,20 @@ module Bosh::Agent
37
50
  end
38
51
 
39
52
  begin
40
- stdout, stderr, status = Open3.capture3(env, cmd, opts)
53
+ stdout, stderr, status = Open3.popen3(env, cmd, opts) { |i, o, e, t|
54
+ self.class.running_errand_pid = t.pid
55
+
56
+ out_reader = Thread.new { o.read }
57
+ err_reader = Thread.new { e.read }
58
+
59
+ i.close
60
+
61
+ [out_reader.value, err_reader.value, t.value]
62
+ }
63
+ self.class.running_errand_pid = nil
64
+
41
65
  {
42
- 'exit_code' => status.exitstatus,
66
+ 'exit_code' => extract_status_code(status),
43
67
  'stdout' => stdout,
44
68
  'stderr' => stderr,
45
69
  }
@@ -48,6 +72,22 @@ module Bosh::Agent
48
72
  raise Bosh::Agent::MessageHandlerError, e.inspect
49
73
  end
50
74
  end
75
+
76
+ def extract_status_code(status)
77
+ status.exitstatus || (status.termsig + 128)
78
+ end
79
+
80
+ class << self
81
+ attr_accessor :running_errand_pid
82
+ end
83
+
84
+ def self.errand_running?
85
+ return false unless running_errand_pid
86
+ Process.kill(0, running_errand_pid)
87
+ true
88
+ rescue Errno::ESRCH
89
+ false
90
+ end
51
91
  end
52
92
  end
53
93
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bosh
4
4
  module Agent
5
- VERSION = '1.2411.0'
5
+ VERSION = '1.2416.0'
6
6
  BOSH_PROTOCOL = "1"
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bosh_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2411.0
4
+ version: 1.2416.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-04-16 00:00:00.000000000 Z
12
+ date: 2014-04-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: netaddr
@@ -162,7 +162,7 @@ dependencies:
162
162
  requirements:
163
163
  - - ~>
164
164
  - !ruby/object:Gem::Version
165
- version: 1.2411.0
165
+ version: 1.2416.0
166
166
  type: :runtime
167
167
  prerelease: false
168
168
  version_requirements: !ruby/object:Gem::Requirement
@@ -170,7 +170,7 @@ dependencies:
170
170
  requirements:
171
171
  - - ~>
172
172
  - !ruby/object:Gem::Version
173
- version: 1.2411.0
173
+ version: 1.2416.0
174
174
  - !ruby/object:Gem::Dependency
175
175
  name: bosh_common
176
176
  requirement: !ruby/object:Gem::Requirement
@@ -178,7 +178,7 @@ dependencies:
178
178
  requirements:
179
179
  - - ~>
180
180
  - !ruby/object:Gem::Version
181
- version: 1.2411.0
181
+ version: 1.2416.0
182
182
  type: :runtime
183
183
  prerelease: false
184
184
  version_requirements: !ruby/object:Gem::Requirement
@@ -186,7 +186,7 @@ dependencies:
186
186
  requirements:
187
187
  - - ~>
188
188
  - !ruby/object:Gem::Version
189
- version: 1.2411.0
189
+ version: 1.2416.0
190
190
  - !ruby/object:Gem::Dependency
191
191
  name: blobstore_client
192
192
  requirement: !ruby/object:Gem::Requirement
@@ -194,7 +194,7 @@ dependencies:
194
194
  requirements:
195
195
  - - ~>
196
196
  - !ruby/object:Gem::Version
197
- version: 1.2411.0
197
+ version: 1.2416.0
198
198
  type: :runtime
199
199
  prerelease: false
200
200
  version_requirements: !ruby/object:Gem::Requirement
@@ -202,7 +202,7 @@ dependencies:
202
202
  requirements:
203
203
  - - ~>
204
204
  - !ruby/object:Gem::Version
205
- version: 1.2411.0
205
+ version: 1.2416.0
206
206
  - !ruby/object:Gem::Dependency
207
207
  name: rspec
208
208
  requirement: !ruby/object:Gem::Requirement
@@ -238,7 +238,7 @@ dependencies:
238
238
  description: ! 'This agent listens for instructions from the bosh director on each
239
239
  server that bosh manages.
240
240
 
241
- a126f7'
241
+ 7ca225'
242
242
  email: support@cloudfoundry.com
243
243
  executables:
244
244
  - bosh_agent
@@ -344,7 +344,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
344
344
  version: '0'
345
345
  segments:
346
346
  - 0
347
- hash: 19318725305148884
347
+ hash: 3410278131475347007
348
348
  requirements: []
349
349
  rubyforge_project:
350
350
  rubygems_version: 1.8.23