bosh_agent 1.2411.0 → 1.2416.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,9 @@
1
- # Copyright (c) 2009-2012 VMware, Inc.
2
-
3
1
  module Bosh::Agent
4
2
 
5
3
  class Handler
6
4
  include Bosh::Exec
7
5
 
6
+ attr_accessor :current_long_running_task
8
7
  attr_accessor :nats
9
8
  attr_reader :processors
10
9
 
@@ -17,7 +16,7 @@ module Bosh::Agent
17
16
 
18
17
  # Seconds until we kill the agent so it can be restarted:
19
18
  KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS = 15 # When there's an unexpected error
20
- KILL_AGENT_THREAD_TIMEOUT_ON_RESTART = 1 # When we force a restart
19
+ KILL_AGENT_THREAD_TIMEOUT_ON_RESTART = 1 # When we force a restart
21
20
 
22
21
  def initialize
23
22
  @agent_id = Config.agent_id
@@ -36,7 +35,7 @@ module Bosh::Agent
36
35
  @lock = Mutex.new
37
36
 
38
37
  @results = []
39
- @long_running_agent_task = []
38
+ self.current_long_running_task = {}
40
39
  @restarting_agent = false
41
40
 
42
41
  @nats_fail_count = 0
@@ -55,7 +54,7 @@ module Bosh::Agent
55
54
  klazz = Bosh::Agent::Message.const_get(c)
56
55
  if klazz.respond_to?(:process)
57
56
  # CamelCase -> under_score -> downcased
58
- processor_key = c.to_s.gsub(/(.)([A-Z])/,'\1_\2').downcase
57
+ processor_key = c.to_s.gsub(/(.)([A-Z])/, '\1_\2').downcase
59
58
  @processors[processor_key] = klazz
60
59
  end
61
60
  end
@@ -66,12 +65,13 @@ module Bosh::Agent
66
65
  @processors[method]
67
66
  end
68
67
 
68
+ # rubocop:disable MethodLength
69
69
  def start
70
- ['TERM', 'INT', 'QUIT'].each { |s| trap(s) { shutdown } }
70
+ %w(TERM INT QUIT).each { |s| trap(s) { shutdown } }
71
71
 
72
72
  EM.run do
73
73
  begin
74
- @nats = NATS.connect(:uri => @nats_uri, :autostart => false) { on_connect }
74
+ @nats = NATS.connect(uri: @nats_uri, autostart: false) { on_connect }
75
75
  Config.nats = @nats
76
76
  rescue Errno::ENETUNREACH, Timeout::Error => e
77
77
  @logger.info("Unable to talk to nats - retry (#{e.inspect})")
@@ -82,12 +82,12 @@ module Bosh::Agent
82
82
  setup_heartbeats
83
83
 
84
84
  if @process_alerts
85
- if (@smtp_port.nil? || @smtp_user.nil? || @smtp_password.nil?)
86
- @logger.error "Cannot start alert processor without having SMTP port, user and password configured"
87
- @logger.error "Agent will be running but alerts will NOT be properly processed"
85
+ if @smtp_port.nil? || @smtp_user.nil? || @smtp_password.nil?
86
+ @logger.error 'Cannot start alert processor without having SMTP port, user and password configured'
87
+ @logger.error 'Agent will be running but alerts will NOT be properly processed'
88
88
  else
89
89
  @logger.debug("SMTP: #{@smtp_password}")
90
- @processor = Bosh::Agent::AlertProcessor.start("127.0.0.1", @smtp_port, @smtp_user, @smtp_password)
90
+ @processor = Bosh::Agent::AlertProcessor.start('127.0.0.1', @smtp_port, @smtp_user, @smtp_password)
91
91
  setup_syslog_monitor
92
92
  end
93
93
  end
@@ -100,10 +100,14 @@ module Bosh::Agent
100
100
  retry if @nats_fail_count < MAX_NATS_RETRIES
101
101
  @logger.fatal("Unable to reconnect to NATS after #{MAX_NATS_RETRIES} retries, exiting...")
102
102
  end
103
+ # rubocop:enable MethodLength
103
104
 
104
105
  def shutdown
105
- @logger.info("Exit")
106
- NATS.stop { EM.stop; exit }
106
+ @logger.info('Exit')
107
+ NATS.stop do
108
+ EM.stop
109
+ exit
110
+ end
107
111
  end
108
112
 
109
113
  def on_connect
@@ -118,7 +122,7 @@ module Bosh::Agent
118
122
  @hbp.enable(interval)
119
123
  @logger.info("Heartbeats are enabled and will be sent every #{interval} seconds")
120
124
  else
121
- @logger.warn("Heartbeats are disabled")
125
+ @logger.warn('Heartbeats are disabled')
122
126
  end
123
127
  end
124
128
 
@@ -126,10 +130,11 @@ module Bosh::Agent
126
130
  Bosh::Agent::SyslogMonitor.start(@nats, @agent_id)
127
131
  end
128
132
 
133
+ # rubocop:disable MethodLength
129
134
  def handle_message(json)
130
135
  msg = Yajl::Parser.new.parse(json)
131
136
 
132
- unless msg["reply_to"]
137
+ unless msg['reply_to']
133
138
  @logger.info("Missing reply_to in: #{msg}")
134
139
  return
135
140
  end
@@ -145,8 +150,8 @@ module Bosh::Agent
145
150
  method = msg['method']
146
151
  args = msg['arguments']
147
152
 
148
- if method == "get_state"
149
- method = "state"
153
+ if method == 'get_state'
154
+ method = 'state'
150
155
  end
151
156
 
152
157
  processor = lookup(method)
@@ -154,9 +159,11 @@ module Bosh::Agent
154
159
  EM.defer do
155
160
  process_in_thread(processor, reply_to, method, args)
156
161
  end
157
- elsif method == "get_task"
162
+ elsif method == 'cancel_task'
163
+ handle_cancel_task(reply_to, args.first)
164
+ elsif method == 'get_task'
158
165
  handle_get_task(reply_to, args.first)
159
- elsif method == "shutdown"
166
+ elsif method == 'shutdown'
160
167
  handle_shutdown(reply_to)
161
168
  else
162
169
  re = RemoteException.new("unknown message #{msg.inspect}")
@@ -165,19 +172,21 @@ module Bosh::Agent
165
172
  rescue Yajl::ParseError => e
166
173
  @logger.info("Failed to parse message: #{json}: #{e.inspect}: #{e.backtrace}")
167
174
  end
175
+ # rubocop:enable MethodLength
168
176
 
177
+ # rubocop:disable MethodLength
169
178
  def process_in_thread(processor, reply_to, method, args)
170
179
  if processor.respond_to?(:long_running?)
171
180
  if @restarting_agent
172
- exception = RemoteException.new("restarting agent")
181
+ exception = RemoteException.new('restarting agent')
173
182
  publish(reply_to, exception.to_hash)
174
183
  else
175
184
  @lock.synchronize do
176
- if @long_running_agent_task.empty?
177
- process_long_running(reply_to, processor, args)
178
- else
179
- exception = RemoteException.new("already running long running task")
185
+ if current_long_running_task[:task_id]
186
+ exception = RemoteException.new('already running long running task')
180
187
  publish(reply_to, exception.to_hash)
188
+ else
189
+ process_long_running(reply_to, processor, args)
181
190
  end
182
191
  end
183
192
  end
@@ -198,17 +207,32 @@ module Bosh::Agent
198
207
  # log an error as this would otherwise be lost
199
208
  @logger.error("#{processor.to_s}: #{e.message}\n#{e.backtrace.join("\n")}")
200
209
  end
210
+ # rubocop:enable MethodLength
211
+
212
+ def handle_cancel_task(reply_to, agent_task_id)
213
+ if current_long_running_task?(agent_task_id)
214
+ if current_long_running_task[:processor].respond_to?(:cancel)
215
+ current_long_running_task[:processor].cancel
216
+ publish(reply_to, { 'value' => 'canceled' })
217
+ self.current_long_running_task = {}
218
+ else
219
+ publish(reply_to, { 'exception' => "could not cancel task #{agent_task_id}" })
220
+ end
221
+ else
222
+ publish(reply_to, { 'exception' => 'unknown agent_task_id' })
223
+ end
224
+ end
201
225
 
202
226
  def handle_get_task(reply_to, agent_task_id)
203
- if @long_running_agent_task == [agent_task_id]
204
- publish(reply_to, {"value" => {"state" => "running", "agent_task_id" => agent_task_id}})
227
+ if current_long_running_task?(agent_task_id)
228
+ publish(reply_to, { 'value' => { 'state' => 'running', 'agent_task_id' => agent_task_id } })
205
229
  else
206
230
  rs = @results.find { |time, task_id, result| task_id == agent_task_id }
207
231
  if rs
208
- time, task_id, result = rs
232
+ _, _, result = rs
209
233
  publish(reply_to, result)
210
234
  else
211
- publish(reply_to, {"exception" => "unknown agent_task_id" })
235
+ publish(reply_to, { 'exception' => 'unknown agent_task_id' })
212
236
  end
213
237
  end
214
238
  end
@@ -230,7 +254,7 @@ module Bosh::Agent
230
254
  @nats.publish(reply_to, json, &blk)
231
255
  end
232
256
  else
233
- msg = "message > NATS_MAX_PAYLOAD, stored in blobstore"
257
+ msg = 'message > NATS_MAX_PAYLOAD, stored in blobstore'
234
258
  exception = RemoteException.new(msg, nil, unencrypted)
235
259
  @logger.fatal(msg)
236
260
  EM.next_tick do
@@ -242,14 +266,14 @@ module Bosh::Agent
242
266
  def process_long_running(reply_to, processor, args)
243
267
  agent_task_id = generate_agent_task_id
244
268
 
245
- @long_running_agent_task = [agent_task_id]
269
+ self.current_long_running_task = { task_id: agent_task_id, processor: processor }
246
270
 
247
- payload = {:value => {:state => "running", :agent_task_id => agent_task_id}}
271
+ payload = { value: { state: 'running', agent_task_id: agent_task_id } }
248
272
  publish(reply_to, payload)
249
273
 
250
274
  result = process(processor, args)
251
275
  @results << [Time.now.to_i, agent_task_id, result]
252
- @long_running_agent_task = []
276
+ self.current_long_running_task = {}
253
277
  end
254
278
 
255
279
  def kill_main_thread_in(seconds)
@@ -261,17 +285,17 @@ module Bosh::Agent
261
285
  end
262
286
 
263
287
  def process(processor, args)
264
- begin
265
- result = processor.process(args)
266
- return {:value => result}
267
- rescue Bosh::Agent::Error => e
268
- @logger.info("#{e.inspect}: #{e.backtrace}")
269
- return RemoteException.from(e).to_hash
270
- rescue Exception => e
271
- kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS)
272
- @logger.error("#{e.inspect}: #{e.backtrace}")
273
- return {:exception => "#{e.inspect}: #{e.backtrace}"}
274
- end
288
+ result = processor.process(args)
289
+ return { value: result }
290
+ rescue Bosh::Agent::Error => e
291
+ @logger.info("#{e.inspect}: #{e.backtrace}")
292
+ return RemoteException.from(e).to_hash
293
+ # rubocop:disable RescueException
294
+ rescue Exception => e
295
+ # rubocop:enable RescueException
296
+ kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_ERRORS)
297
+ @logger.error("#{e.inspect}: #{e.backtrace}")
298
+ return { exception: "#{e.inspect}: #{e.backtrace}" }
275
299
  end
276
300
 
277
301
  def generate_agent_task_id
@@ -279,33 +303,34 @@ module Bosh::Agent
279
303
  end
280
304
 
281
305
  ##
282
- # When there's a network change on an existing vm, director sends a prepare_network_change message to the vm
306
+ # When there's a network change on an existing vm, director sends a prepare_network_change message to the vm
283
307
  # agent. After agent replies to director with a `true` message, the post_prepare_network_change method is called
284
308
  # (via EM callback).
285
309
  #
286
- # The post_prepare_network_change method will delete the udev network persistent rules, delete the agent settings
287
- # and then it should restart the agent to get the new agent settings (set by director-cpi). For a simple network
288
- # change (i.e. dns changes) this is enough, as when the agent is restarted it will apply the new network settings.
289
- # But for other network changes (i.e. IP change), the CPI will be responsible to reboot or recreate the vm if needed.
310
+ # The post_prepare_network_change method will delete the udev network persistent rules, delete the agent settings
311
+ # and then it should restart the agent to get the new agent settings (set by director-cpi). For a simple network
312
+ # change (i.e. dns changes) this is enough, as when the agent is restarted it will apply the new network settings.
313
+ # But for other network changes (i.e. IP change), the CPI will be responsible to reboot or recreate the vm if
314
+ # needed.
290
315
  def post_prepare_network_change
291
316
  if Bosh::Agent::Config.configure
292
317
  udev_file = '/etc/udev/rules.d/70-persistent-net.rules'
293
318
  if File.exist?(udev_file)
294
- @logger.info("deleting 70-persistent-net.rules - again")
319
+ @logger.info('deleting 70-persistent-net.rules - again')
295
320
  File.delete(udev_file)
296
321
  end
297
- @logger.info("Removing settings.json")
322
+ @logger.info('Removing settings.json')
298
323
  settings_file = Bosh::Agent::Config.settings_file
299
324
  File.delete(settings_file)
300
325
  end
301
326
 
302
- @logger.info("Restarting agent to prepare for a network change")
327
+ @logger.info('Restarting agent to prepare for a network change')
303
328
  kill_main_thread_in(KILL_AGENT_THREAD_TIMEOUT_ON_RESTART)
304
329
  end
305
330
 
306
331
  def handle_shutdown(reply_to)
307
332
  @logger.info("Shutting down #{URI.parse(Config.mbus).scheme.upcase} connection")
308
- payload = {:value => "shutdown"}
333
+ payload = { value: 'shutdown' }
309
334
 
310
335
  if Bosh::Agent::Config.configure
311
336
  # We should never come back up again
@@ -330,30 +355,30 @@ module Bosh::Agent
330
355
  end
331
356
 
332
357
  def decrypt(msg)
333
- [ "session_id", "encrypted_data" ].each do |key|
358
+ %w(session_id encrypted_data).each do |key|
334
359
  unless msg.key?(key)
335
360
  @logger.info("Missing #{key} in #{msg}")
336
361
  return
337
362
  end
338
363
  end
339
364
 
340
- message_session_id = msg["session_id"]
341
- reply_to = msg["reply_to"]
365
+ message_session_id = msg['session_id']
366
+ reply_to = msg['reply_to']
342
367
 
343
- encryption_handler = lookup_encryption_handler(:session_id => message_session_id)
368
+ encryption_handler = lookup_encryption_handler(session_id: message_session_id)
344
369
 
345
370
  # save message handler for the reply
346
371
  @session_reply_map[reply_to] = encryption_handler
347
372
 
348
373
  # Log exceptions from the EncryptionHandler, but stay quiet on the wire.
349
374
  begin
350
- msg = encryption_handler.decrypt(msg["encrypted_data"])
375
+ msg = encryption_handler.decrypt(msg['encrypted_data'])
351
376
  rescue Bosh::Core::EncryptionHandler::CryptError => e
352
377
  log_encryption_error(e)
353
378
  return
354
379
  end
355
380
 
356
- msg["reply_to"] = reply_to
381
+ msg['reply_to'] = reply_to
357
382
 
358
383
  @logger.info("Decrypted Message: #{msg}")
359
384
  msg
@@ -364,17 +389,21 @@ module Bosh::Agent
364
389
  end
365
390
 
366
391
  def encrypt(reply_to, payload)
367
- encryption_handler = lookup_encryption_handler(:reply_to => reply_to)
392
+ encryption_handler = lookup_encryption_handler(reply_to: reply_to)
368
393
  session_id = encryption_handler.session_id
369
394
 
370
395
  payload = {
371
- "session_id" => session_id,
372
- "encrypted_data" => encryption_handler.encrypt(payload)
396
+ 'session_id' => session_id,
397
+ 'encrypted_data' => encryption_handler.encrypt(payload)
373
398
  }
374
399
 
375
400
  payload
376
401
  end
377
402
 
403
+ def current_long_running_task?(agent_task_id)
404
+ current_long_running_task[:task_id] == agent_task_id
405
+ end
406
+
378
407
  end
379
408
 
380
409
  # Built-in message handlers
@@ -382,13 +411,13 @@ module Bosh::Agent
382
411
 
383
412
  class Ping
384
413
  def self.process(args)
385
- "pong"
414
+ 'pong'
386
415
  end
387
416
  end
388
417
 
389
418
  class Noop
390
419
  def self.process(args)
391
- "nope"
420
+ 'nope'
392
421
  end
393
422
  end
394
423
 
@@ -399,7 +428,7 @@ module Bosh::Agent
399
428
  Bosh::Agent::Monit.start_services
400
429
  end
401
430
 
402
- "started"
431
+ 'started'
403
432
 
404
433
  rescue => e
405
434
  raise Bosh::Agent::MessageHandlerError, "Cannot start job: #{e}"
@@ -418,7 +447,7 @@ module Bosh::Agent
418
447
  Bosh::Agent::Monit.stop_services
419
448
  end
420
449
 
421
- "stopped"
450
+ 'stopped'
422
451
 
423
452
  rescue => e
424
453
  # Monit retry logic should make it really hard to get here but if it happens we should yell.
@@ -3,10 +3,23 @@ require 'open3'
3
3
  module Bosh::Agent
4
4
  module Message
5
5
  class RunErrand
6
+ CANCEL_GRACE_PERIOD_SECONDS = 30
7
+
6
8
  def self.process(args)
7
9
  self.new(args).start
8
10
  end
9
11
 
12
+ def self.cancel
13
+ pid = running_errand_pid
14
+ Process.kill('-TERM', pid) if errand_running?
15
+ CANCEL_GRACE_PERIOD_SECONDS.times do
16
+ break unless errand_running?
17
+ sleep 1
18
+ end
19
+ Process.kill('-KILL', pid) if errand_running?
20
+ rescue Errno::ESRCH
21
+ end
22
+
10
23
  def self.long_running?
11
24
  true
12
25
  end
@@ -27,9 +40,9 @@ module Bosh::Agent
27
40
 
28
41
  job_template_name = job_templates.first.fetch('name')
29
42
 
30
- env = { 'PATH' => '/usr/sbin:/usr/bin:/sbin:/bin' }
31
- cmd = "#{@base_dir}/jobs/#{job_template_name}/bin/run"
32
- opts = { :unsetenv_others => true }
43
+ env = { 'PATH' => '/usr/sbin:/usr/bin:/sbin:/bin', 'TMPDIR' => ENV['TMPDIR'] }
44
+ cmd = "#{@base_dir}/jobs/#{job_template_name}/bin/run"
45
+ opts = { unsetenv_others: true, pgroup: true }
33
46
 
34
47
  unless File.executable?(cmd)
35
48
  raise Bosh::Agent::MessageHandlerError,
@@ -37,9 +50,20 @@ module Bosh::Agent
37
50
  end
38
51
 
39
52
  begin
40
- stdout, stderr, status = Open3.capture3(env, cmd, opts)
53
+ stdout, stderr, status = Open3.popen3(env, cmd, opts) { |i, o, e, t|
54
+ self.class.running_errand_pid = t.pid
55
+
56
+ out_reader = Thread.new { o.read }
57
+ err_reader = Thread.new { e.read }
58
+
59
+ i.close
60
+
61
+ [out_reader.value, err_reader.value, t.value]
62
+ }
63
+ self.class.running_errand_pid = nil
64
+
41
65
  {
42
- 'exit_code' => status.exitstatus,
66
+ 'exit_code' => extract_status_code(status),
43
67
  'stdout' => stdout,
44
68
  'stderr' => stderr,
45
69
  }
@@ -48,6 +72,22 @@ module Bosh::Agent
48
72
  raise Bosh::Agent::MessageHandlerError, e.inspect
49
73
  end
50
74
  end
75
+
76
+ def extract_status_code(status)
77
+ status.exitstatus || (status.termsig + 128)
78
+ end
79
+
80
+ class << self
81
+ attr_accessor :running_errand_pid
82
+ end
83
+
84
+ def self.errand_running?
85
+ return false unless running_errand_pid
86
+ Process.kill(0, running_errand_pid)
87
+ true
88
+ rescue Errno::ESRCH
89
+ false
90
+ end
51
91
  end
52
92
  end
53
93
  end
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Bosh
4
4
  module Agent
5
- VERSION = '1.2411.0'
5
+ VERSION = '1.2416.0'
6
6
  BOSH_PROTOCOL = "1"
7
7
  end
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bosh_agent
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2411.0
4
+ version: 1.2416.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-04-16 00:00:00.000000000 Z
12
+ date: 2014-04-17 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: netaddr
@@ -162,7 +162,7 @@ dependencies:
162
162
  requirements:
163
163
  - - ~>
164
164
  - !ruby/object:Gem::Version
165
- version: 1.2411.0
165
+ version: 1.2416.0
166
166
  type: :runtime
167
167
  prerelease: false
168
168
  version_requirements: !ruby/object:Gem::Requirement
@@ -170,7 +170,7 @@ dependencies:
170
170
  requirements:
171
171
  - - ~>
172
172
  - !ruby/object:Gem::Version
173
- version: 1.2411.0
173
+ version: 1.2416.0
174
174
  - !ruby/object:Gem::Dependency
175
175
  name: bosh_common
176
176
  requirement: !ruby/object:Gem::Requirement
@@ -178,7 +178,7 @@ dependencies:
178
178
  requirements:
179
179
  - - ~>
180
180
  - !ruby/object:Gem::Version
181
- version: 1.2411.0
181
+ version: 1.2416.0
182
182
  type: :runtime
183
183
  prerelease: false
184
184
  version_requirements: !ruby/object:Gem::Requirement
@@ -186,7 +186,7 @@ dependencies:
186
186
  requirements:
187
187
  - - ~>
188
188
  - !ruby/object:Gem::Version
189
- version: 1.2411.0
189
+ version: 1.2416.0
190
190
  - !ruby/object:Gem::Dependency
191
191
  name: blobstore_client
192
192
  requirement: !ruby/object:Gem::Requirement
@@ -194,7 +194,7 @@ dependencies:
194
194
  requirements:
195
195
  - - ~>
196
196
  - !ruby/object:Gem::Version
197
- version: 1.2411.0
197
+ version: 1.2416.0
198
198
  type: :runtime
199
199
  prerelease: false
200
200
  version_requirements: !ruby/object:Gem::Requirement
@@ -202,7 +202,7 @@ dependencies:
202
202
  requirements:
203
203
  - - ~>
204
204
  - !ruby/object:Gem::Version
205
- version: 1.2411.0
205
+ version: 1.2416.0
206
206
  - !ruby/object:Gem::Dependency
207
207
  name: rspec
208
208
  requirement: !ruby/object:Gem::Requirement
@@ -238,7 +238,7 @@ dependencies:
238
238
  description: ! 'This agent listens for instructions from the bosh director on each
239
239
  server that bosh manages.
240
240
 
241
- a126f7'
241
+ 7ca225'
242
242
  email: support@cloudfoundry.com
243
243
  executables:
244
244
  - bosh_agent
@@ -344,7 +344,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
344
344
  version: '0'
345
345
  segments:
346
346
  - 0
347
- hash: 19318725305148884
347
+ hash: 3410278131475347007
348
348
  requirements: []
349
349
  rubyforge_project:
350
350
  rubygems_version: 1.8.23