bosh-director 1.2682.1.0 → 1.2685.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. checksums.yaml +4 -4
  2. data/bin/bosh-director +55 -2
  3. data/lib/bosh/director.rb +15 -2
  4. data/lib/bosh/director/api/controllers/backups_controller.rb +2 -2
  5. data/lib/bosh/director/api/controllers/base_controller.rb +1 -1
  6. data/lib/bosh/director/api/controllers/compiled_packages_controller.rb +2 -2
  7. data/lib/bosh/director/api/controllers/deployments_controller.rb +40 -26
  8. data/lib/bosh/director/api/controllers/info_controller.rb +1 -1
  9. data/lib/bosh/director/api/controllers/locks_controller.rb +1 -1
  10. data/lib/bosh/director/api/controllers/packages_controller.rb +1 -1
  11. data/lib/bosh/director/api/controllers/releases_controller.rb +5 -5
  12. data/lib/bosh/director/api/controllers/resources_controller.rb +1 -1
  13. data/lib/bosh/director/api/controllers/resurrection_controller.rb +1 -1
  14. data/lib/bosh/director/api/controllers/stemcells_controller.rb +4 -4
  15. data/lib/bosh/director/api/controllers/task_controller.rb +20 -0
  16. data/lib/bosh/director/api/controllers/tasks_controller.rb +3 -18
  17. data/lib/bosh/director/api/controllers/users_controller.rb +3 -3
  18. data/lib/bosh/director/api/deployment_lookup.rb +1 -1
  19. data/lib/bosh/director/api/resource_manager.rb +1 -1
  20. data/lib/bosh/director/instance_updater/vm_updater.rb +2 -1
  21. data/lib/bosh/director/jobs/cloud_check/scan.rb +1 -1
  22. data/lib/bosh/director/jobs/cloud_check/scan_and_fix.rb +1 -1
  23. data/lib/bosh/director/problem_handlers/missing_disk.rb +74 -0
  24. data/lib/bosh/director/problem_scanner/disk_scan_stage.rb +80 -0
  25. data/lib/bosh/director/problem_scanner/problem_register.rb +55 -0
  26. data/lib/bosh/director/problem_scanner/scanner.rb +86 -0
  27. data/lib/bosh/director/problem_scanner/vm_scan_stage.rb +134 -0
  28. data/lib/bosh/director/version.rb +1 -1
  29. data/lib/cloud/dummy.rb +9 -0
  30. metadata +29 -26
  31. data/lib/bosh/director/api/controller.rb +0 -33
  32. data/lib/bosh/director/api/controllers/errands_controller.rb +0 -26
  33. data/lib/bosh/director/problem_scanner.rb +0 -268
@@ -3,7 +3,7 @@ require 'bosh/director/api/controllers/base_controller'
3
3
  module Bosh::Director
4
4
  module Api::Controllers
5
5
  class ResourcesController < BaseController
6
- get '/resources/:id' do
6
+ get '/:id' do
7
7
  tmp_file = @resource_manager.get_resource_path(params[:id])
8
8
  send_disposable_file(tmp_file, :type => 'application/x-gzip')
9
9
  end
@@ -3,7 +3,7 @@ require 'bosh/director/api/controllers/base_controller'
3
3
  module Bosh::Director
4
4
  module Api::Controllers
5
5
  class ResurrectionController < BaseController
6
- put '/resurrection', consumes: :json do
6
+ put '/', consumes: :json do
7
7
  payload = json_decode(request.body)
8
8
 
9
9
  @resurrector_manager.set_pause_for_all(payload['resurrection_paused'])
@@ -3,18 +3,18 @@ require 'bosh/director/api/controllers/base_controller'
3
3
  module Bosh::Director
4
4
  module Api::Controllers
5
5
  class StemcellsController < BaseController
6
- post '/stemcells', :consumes => :json do
6
+ post '/', :consumes => :json do
7
7
  payload = json_decode(request.body)
8
8
  task = @stemcell_manager.create_stemcell_from_url(@user, payload['location'])
9
9
  redirect "/tasks/#{task.id}"
10
10
  end
11
11
 
12
- post '/stemcells', :consumes => :multipart do
12
+ post '/', :consumes => :multipart do
13
13
  task = @stemcell_manager.create_stemcell_from_file_path(@user, params[:nginx_upload_path])
14
14
  redirect "/tasks/#{task.id}"
15
15
  end
16
16
 
17
- get '/stemcells' do
17
+ get '/' do
18
18
  stemcells = Models::Stemcell.order_by(:name.asc).map do |stemcell|
19
19
  {
20
20
  'name' => stemcell.name,
@@ -26,7 +26,7 @@ module Bosh::Director
26
26
  json_encode(stemcells)
27
27
  end
28
28
 
29
- delete '/stemcells/:name/:version' do
29
+ delete '/:name/:version' do
30
30
  name, version = params[:name], params[:version]
31
31
  options = {}
32
32
  options['force'] = true if params['force'] == 'true'
@@ -0,0 +1,20 @@
1
+ require 'bosh/director/api/controllers/base_controller'
2
+
3
+ module Bosh::Director
4
+ module Api::Controllers
5
+ class TaskController < BaseController
6
+ delete '/:id' do
7
+ task_id = params[:id]
8
+ task = @task_manager.find_task(task_id)
9
+ if task.state != 'processing' && task.state != 'queued'
10
+ status(400)
11
+ body("Cannot cancel task #{task_id}: invalid state (#{task.state})")
12
+ else
13
+ task.state = :cancelling
14
+ task.save
15
+ status(204)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -3,7 +3,7 @@ require 'bosh/director/api/controllers/base_controller'
3
3
  module Bosh::Director
4
4
  module Api::Controllers
5
5
  class TasksController < BaseController
6
- get '/tasks' do
6
+ get '/' do
7
7
  dataset = Models::Task.dataset
8
8
  limit = params['limit']
9
9
  if limit
@@ -45,7 +45,7 @@ module Bosh::Director
45
45
  json_encode(tasks)
46
46
  end
47
47
 
48
- get '/tasks/:id' do
48
+ get '/:id' do
49
49
  task = @task_manager.find_task(params[:id])
50
50
  if task_timeout?(task)
51
51
  task.state = :timeout
@@ -59,7 +59,7 @@ module Bosh::Director
59
59
  # Sends back output of given task id and params[:type]
60
60
  # Example: `get /tasks/5/output?type=event` will send back the file
61
61
  # at /var/vcap/store/director/tasks/5/event
62
- get '/tasks/:id/output' do
62
+ get '/:id/output' do
63
63
  log_type = params[:type] || 'debug'
64
64
  task = @task_manager.find_task(params[:id])
65
65
 
@@ -75,21 +75,6 @@ module Bosh::Director
75
75
  status(204)
76
76
  end
77
77
  end
78
-
79
- delete '/task/:id' do
80
- task_id = params[:id]
81
- task = @task_manager.find_task(task_id)
82
-
83
- if task.state != 'processing' && task.state != 'queued'
84
- status(400)
85
- body("Cannot cancel task #{task_id}: invalid state (#{task.state})")
86
- else
87
- task.state = :cancelling
88
- task.save
89
- status(204)
90
- body("Cancelling task #{task_id}")
91
- end
92
- end
93
78
  end
94
79
  end
95
80
  end
@@ -3,14 +3,14 @@ require 'bosh/director/api/controllers/base_controller'
3
3
  module Bosh::Director
4
4
  module Api::Controllers
5
5
  class UsersController < BaseController
6
- post '/users', :consumes => [:json] do
6
+ post '/', :consumes => [:json] do
7
7
  user = @user_manager.get_user_from_request(request)
8
8
  @user_manager.create_user(user)
9
9
  status(204)
10
10
  nil
11
11
  end
12
12
 
13
- put '/users/:username', :consumes => [:json] do
13
+ put '/:username', :consumes => [:json] do
14
14
  user = @user_manager.get_user_from_request(request)
15
15
  if user.username != params[:username]
16
16
  raise UserImmutableUsername, 'The username is immutable'
@@ -20,7 +20,7 @@ module Bosh::Director
20
20
  nil
21
21
  end
22
22
 
23
- delete '/users/:username' do
23
+ delete '/:username' do
24
24
  @user_manager.delete_user(params[:username])
25
25
  status(204)
26
26
  nil
@@ -10,4 +10,4 @@ module Bosh::Director
10
10
  end
11
11
  end
12
12
  end
13
- end
13
+ end
@@ -66,4 +66,4 @@ module Bosh::Director
66
66
  end
67
67
  end
68
68
  end
69
- end
69
+ end
@@ -168,7 +168,8 @@ module Bosh::Director
168
168
  end
169
169
 
170
170
  def detach
171
- unless @instance.disk_currently_attached?
171
+ disk_list = @agent_client.list_disk
172
+ if disk_list.empty?
172
173
  @logger.info('Skipping disk detaching')
173
174
  return
174
175
  end
@@ -21,7 +21,7 @@ module Bosh::Director
21
21
  def perform
22
22
  begin
23
23
  with_deployment_lock(@deployment, :timeout => 0) do
24
- scanner = ProblemScanner.new(@deployment)
24
+ scanner = ProblemScanner::Scanner.new(@deployment)
25
25
  scanner.reset
26
26
  scanner.scan_vms
27
27
  scanner.scan_disks
@@ -29,7 +29,7 @@ module Bosh::Director
29
29
  begin
30
30
  with_deployment_lock(@deployment, :timeout => 0) do
31
31
 
32
- scanner = ProblemScanner.new(@deployment)
32
+ scanner = ProblemScanner::Scanner.new(@deployment)
33
33
  scanner.reset(jobs)
34
34
  scanner.scan_vms(jobs)
35
35
 
@@ -0,0 +1,74 @@
1
+ module Bosh::Director
2
+ module ProblemHandlers
3
+ class MissingDisk < Base
4
+
5
+ register_as :missing_disk
6
+ auto_resolution :ignore
7
+
8
+ def initialize(disk_id, data)
9
+ super
10
+ @disk_id = disk_id
11
+ @data = data
12
+ @disk = Models::PersistentDisk[@disk_id]
13
+
14
+ if @disk.nil?
15
+ handler_error("Disk `#{@disk_id}' is no longer in the database")
16
+ end
17
+
18
+ @instance = @disk.instance
19
+ if @instance.nil?
20
+ handler_error("Cannot find instance for disk `#{@disk.disk_cid}'")
21
+ end
22
+
23
+ @vm = @instance.vm
24
+ end
25
+
26
+ def description
27
+ job = @instance.job || "unknown job"
28
+ index = @instance.index || "unknown index"
29
+ disk_label = "`#{@disk.disk_cid}' (#{job}/#{index}, #{@disk.size.to_i}M)"
30
+ "Disk #{disk_label} is missing"
31
+ end
32
+
33
+ resolution :ignore do
34
+ plan { 'Ignore problem' }
35
+ action { }
36
+ end
37
+
38
+ resolution :delete_disk_reference do
39
+ plan { 'Delete disk reference (DANGEROUS!)' }
40
+ action { delete_disk_reference }
41
+ end
42
+
43
+ def delete_disk_reference
44
+ @disk.db.transaction do
45
+ @disk.update(active: false)
46
+ end
47
+
48
+ agent_client = agent_client(@vm)
49
+ disk_list = []
50
+
51
+ begin
52
+ disk_list = agent_client.list_disk
53
+ if disk_list.include?(@disk.disk_cid)
54
+ agent_client.unmount_disk(@disk.disk_cid)
55
+ end
56
+
57
+ rescue Bosh::Director::RpcTimeout
58
+ handler_error('Cannot unmount disk, agent is not responding')
59
+ rescue Bosh::Director::RpcRemoteException => e
60
+ handler_error("Cannot unmount disk, #{e.message}")
61
+ end
62
+
63
+ begin
64
+ cloud.detach_disk(@vm.cid, @disk.disk_cid) if @vm.cid
65
+ rescue Bosh::Clouds::DiskNotAttached
66
+ end
67
+
68
+ Api::SnapshotManager.delete_snapshots(@disk.snapshots)
69
+
70
+ @disk.destroy
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,80 @@
1
+ module Bosh::Director::ProblemScanner
2
+ class DiskScanStage
3
+ def initialize(disk_owners, problem_register, cloud, deployment_id, event_logger, logger)
4
+ @disk_owners = disk_owners
5
+ @problem_register = problem_register
6
+ @cloud = cloud
7
+ @deployment_id = deployment_id
8
+ @event_logger = event_logger
9
+ @logger = logger
10
+ end
11
+
12
+ def scan
13
+ disks = Bosh::Director::Models::PersistentDisk.eager(:instance).all.select do |disk|
14
+ disk.instance && disk.instance.deployment_id == @deployment_id
15
+ end
16
+
17
+ results = Hash.new(0)
18
+
19
+ @event_logger.begin_stage("Scanning #{disks.size} persistent disks", 2)
20
+
21
+ @event_logger.track_and_log('Looking for inactive disks') do
22
+ disks.each do |disk|
23
+ scan_result = scan_disk(disk)
24
+ results[scan_result] += 1
25
+ end
26
+ end
27
+
28
+ @event_logger.track_and_log("#{results[:ok]} OK, " +
29
+ "#{results[:missing]} missing, " +
30
+ "#{results[:inactive]} inactive, " +
31
+ "#{results[:mount_info_mismatch]} mount-info mismatch")
32
+ end
33
+
34
+ private
35
+
36
+ def scan_disk(disk)
37
+ begin
38
+ unless @cloud.has_disk?(disk.disk_cid)
39
+ @logger.info("Found missing disk: #{disk.id}")
40
+ @problem_register.problem_found(:missing_disk, disk)
41
+ return :missing
42
+ end
43
+ rescue Bosh::Clouds::NotImplemented
44
+ @logger.info('Ignored check for disk presence, CPI does not implement has_disk? method')
45
+ end
46
+
47
+ # inactive disks
48
+ unless disk.active
49
+ @logger.info("Found inactive disk: #{disk.id}")
50
+ @problem_register.problem_found(:inactive_disk, disk)
51
+ return :inactive
52
+ end
53
+
54
+ disk_cid = disk.disk_cid
55
+ vm_cid = nil
56
+ if disk.instance && disk.instance.vm
57
+ vm_cid = disk.instance.vm.cid
58
+ end
59
+
60
+ if vm_cid.nil?
61
+ # With the db dependencies this should not happen.
62
+ @logger.warn("Disk #{disk_cid} is not associated to any VM. " +
63
+ "Skipping scan")
64
+ return :ok
65
+ end
66
+
67
+ owner_vms = @disk_owners[disk_cid] || []
68
+ # active disk is not mounted or mounted more than once -or-
69
+ # the disk is mounted on a vm that is different form the record.
70
+ if owner_vms.size != 1 || owner_vms.first != vm_cid
71
+ @logger.info("Found problem in mount info: " +
72
+ "active disk #{disk_cid} mounted on " +
73
+ "#{owner_vms.join(', ')}")
74
+ @problem_register.problem_found(:mount_info_mismatch, disk, owner_vms: owner_vms)
75
+ return :mount_info_mismatch
76
+ end
77
+ :ok
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,55 @@
1
+ module Bosh::Director::ProblemScanner
2
+ class ProblemRegister
3
+ def initialize(deployment, logger)
4
+ @deployment = deployment
5
+ @logger = logger
6
+
7
+ @problem_lock = Mutex.new
8
+ end
9
+
10
+ def problem_found(type, resource, data = {})
11
+ @problem_lock.synchronize do
12
+ similar_open_problems = Bosh::Director::Models::DeploymentProblem.
13
+ filter(deployment_id: @deployment.id, type: type.to_s,
14
+ resource_id: resource.id, state: 'open').all
15
+
16
+ if similar_open_problems.size > 1
17
+ raise Bosh::Director::CloudcheckTooManySimilarProblems,
18
+ "More than one problem of type `#{type}' " +
19
+ "exists for resource #{type} #{resource.id}"
20
+ end
21
+
22
+ if similar_open_problems.empty?
23
+ problem = Bosh::Director::Models::DeploymentProblem.
24
+ create(type: type.to_s, resource_id: resource.id,
25
+ state: 'open', deployment_id: @deployment.id,
26
+ data: data, counter: 1)
27
+
28
+ @logger.info("Created problem #{problem.id} (#{problem.type})")
29
+ else
30
+ # This assumes we are running with deployment lock acquired,
31
+ # so there is no possible update conflict
32
+ problem = similar_open_problems[0]
33
+ problem.data = data
34
+ problem.last_seen_at = Time.now
35
+ problem.counter += 1
36
+ problem.save
37
+ @logger.info("Updated problem #{problem.id} (#{problem.type}), " +
38
+ "count is now #{problem.counter}")
39
+ end
40
+ end
41
+ end
42
+
43
+ def get_vm_instance_and_disk(vm)
44
+ instance = nil
45
+ mounted_disk_cid = nil
46
+
47
+ @problem_lock.synchronize do
48
+ instance = vm.instance
49
+ mounted_disk_cid = instance.persistent_disk_cid if instance
50
+ end
51
+
52
+ [instance, mounted_disk_cid]
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,86 @@
1
+ require 'bosh/director/problem_scanner/problem_register'
2
+ require 'bosh/director/problem_scanner/disk_scan_stage'
3
+ require 'bosh/director/problem_scanner/vm_scan_stage'
4
+
5
+ module Bosh::Director
6
+ module ProblemScanner
7
+ class Scanner
8
+ def initialize(deployment)
9
+ @deployment = deployment
10
+ @agent_disks = {}
11
+
12
+ @instance_manager = Api::InstanceManager.new
13
+
14
+ @logger = Config.logger
15
+ @event_logger = EventLogger.new(Config.event_log, @logger)
16
+
17
+ @problem_register = ProblemRegister.new(deployment, @logger)
18
+ end
19
+
20
+ def reset(vms=nil)
21
+ if vms
22
+ vms.each do |job, index|
23
+ instance = @instance_manager.find_by_name(@deployment.name, job, index)
24
+
25
+ Models::DeploymentProblem.where(
26
+ deployment: @deployment,
27
+ resource_id: instance.vm.id,
28
+ state: 'open'
29
+ ).update(state: 'closed')
30
+
31
+ end
32
+ else
33
+ Models::DeploymentProblem.where(
34
+ state: 'open',
35
+ deployment: @deployment
36
+ ).update(state: 'closed')
37
+ end
38
+ end
39
+
40
+ def scan_vms(vms=nil)
41
+ vm_scanner = VmScanStage.new(
42
+ @instance_manager,
43
+ @problem_register,
44
+ Config.cloud,
45
+ @deployment,
46
+ @event_logger,
47
+ @logger
48
+ )
49
+ vm_scanner.scan(vms)
50
+
51
+ @agent_disks = vm_scanner.agent_disks
52
+ end
53
+
54
+ def scan_disks
55
+ disk_scanner = DiskScanStage.new(
56
+ @agent_disks,
57
+ @problem_register,
58
+ Config.cloud,
59
+ @deployment.id,
60
+ @event_logger,
61
+ @logger
62
+ )
63
+ disk_scanner.scan
64
+ end
65
+ end
66
+
67
+ class EventLogger
68
+ def initialize(event_log, logger)
69
+ @event_log = event_log
70
+ @logger = logger
71
+ end
72
+
73
+ def begin_stage(stage_name, n_steps)
74
+ @event_log.begin_stage(stage_name, n_steps)
75
+ @logger.info(stage_name)
76
+ end
77
+
78
+ def track_and_log(task, log = true)
79
+ @event_log.track(task) do |ticker|
80
+ @logger.info(task) if log
81
+ yield ticker if block_given?
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end