bosh-director 1.5.0.pre.1113
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +34 -0
- data/bin/bosh-director +36 -0
- data/bin/bosh-director-console +84 -0
- data/bin/bosh-director-drain-workers +42 -0
- data/bin/bosh-director-migrate +58 -0
- data/bin/bosh-director-scheduler +27 -0
- data/bin/bosh-director-worker +76 -0
- data/db/migrations/README +1 -0
- data/db/migrations/director/20110209010747_initial.rb +118 -0
- data/db/migrations/director/20110406055800_add_task_user.rb +9 -0
- data/db/migrations/director/20110518225809_remove_cid_constrain.rb +13 -0
- data/db/migrations/director/20110617211923_add_deployments_release_versions.rb +32 -0
- data/db/migrations/director/20110622212607_add_task_checkpoint_timestamp.rb +9 -0
- data/db/migrations/director/20110628023039_add_state_to_instances.rb +21 -0
- data/db/migrations/director/20110709012332_add_disk_size_to_instances.rb +9 -0
- data/db/migrations/director/20110906183441_add_log_bundles.rb +11 -0
- data/db/migrations/director/20110907194830_add_logs_json_to_templates.rb +9 -0
- data/db/migrations/director/20110915205610_add_persistent_disks.rb +51 -0
- data/db/migrations/director/20111005180929_add_properties.rb +14 -0
- data/db/migrations/director/20111110024617_add_deployment_problems.rb +24 -0
- data/db/migrations/director/20111216214145_recreate_support_for_vms.rb +9 -0
- data/db/migrations/director/20120102084027_add_credentials_to_vms.rb +7 -0
- data/db/migrations/director/20120427235217_allow_multiple_releases_per_deployment.rb +36 -0
- data/db/migrations/director/20120524175805_add_task_type.rb +44 -0
- data/db/migrations/director/20120614001930_delete_redundant_deployment_release_relation.rb +34 -0
- data/db/migrations/director/20120822004528_add_fingerprint_to_templates_and_packages.rb +17 -0
- data/db/migrations/director/20120830191244_add_properties_to_templates.rb +9 -0
- data/db/migrations/director/20121106190739_persist_vm_env.rb +9 -0
- data/db/migrations/director/20130222232131_add_sha1_to_stemcells.rb +9 -0
- data/db/migrations/director/20130312211407_add_commit_hash_to_release_versions.rb +19 -0
- data/db/migrations/director/20130409235338_snapshot.rb +15 -0
- data/db/migrations/director/20130530164918_add_paused_flag_to_instance.rb +14 -0
- data/db/migrations/director/20130531172604_add_director_attributes.rb +13 -0
- data/db/migrations/dns/20120123234908_initial.rb +27 -0
- data/lib/bosh/director.rb +133 -0
- data/lib/bosh/director/agent_client.rb +78 -0
- data/lib/bosh/director/api.rb +29 -0
- data/lib/bosh/director/api/api_helper.rb +81 -0
- data/lib/bosh/director/api/backup_manager.rb +15 -0
- data/lib/bosh/director/api/controller.rb +639 -0
- data/lib/bosh/director/api/controller_helpers.rb +34 -0
- data/lib/bosh/director/api/deployment_lookup.rb +13 -0
- data/lib/bosh/director/api/deployment_manager.rb +60 -0
- data/lib/bosh/director/api/http_constants.rb +16 -0
- data/lib/bosh/director/api/instance_lookup.rb +44 -0
- data/lib/bosh/director/api/instance_manager.rb +63 -0
- data/lib/bosh/director/api/problem_manager.rb +40 -0
- data/lib/bosh/director/api/property_manager.rb +69 -0
- data/lib/bosh/director/api/release_manager.rb +59 -0
- data/lib/bosh/director/api/resource_manager.rb +69 -0
- data/lib/bosh/director/api/resurrector_manager.rb +15 -0
- data/lib/bosh/director/api/snapshot_manager.rb +94 -0
- data/lib/bosh/director/api/stemcell_manager.rb +50 -0
- data/lib/bosh/director/api/task_helper.rb +46 -0
- data/lib/bosh/director/api/task_manager.rb +64 -0
- data/lib/bosh/director/api/user_manager.rb +72 -0
- data/lib/bosh/director/api/vm_state_manager.rb +11 -0
- data/lib/bosh/director/app.rb +35 -0
- data/lib/bosh/director/blob_util.rb +87 -0
- data/lib/bosh/director/blobstores.rb +29 -0
- data/lib/bosh/director/client.rb +156 -0
- data/lib/bosh/director/cloudcheck_helper.rb +204 -0
- data/lib/bosh/director/compile_task.rb +157 -0
- data/lib/bosh/director/config.rb +370 -0
- data/lib/bosh/director/configuration_hasher.rb +114 -0
- data/lib/bosh/director/cycle_helper.rb +36 -0
- data/lib/bosh/director/db_backup.rb +22 -0
- data/lib/bosh/director/db_backup/adapter.rb +3 -0
- data/lib/bosh/director/db_backup/adapter/mysql2.rb +27 -0
- data/lib/bosh/director/db_backup/adapter/postgres.rb +36 -0
- data/lib/bosh/director/db_backup/adapter/sqlite.rb +17 -0
- data/lib/bosh/director/db_backup/error.rb +10 -0
- data/lib/bosh/director/deployment_plan.rb +26 -0
- data/lib/bosh/director/deployment_plan/assembler.rb +430 -0
- data/lib/bosh/director/deployment_plan/compilation_config.rb +54 -0
- data/lib/bosh/director/deployment_plan/compiled_package.rb +35 -0
- data/lib/bosh/director/deployment_plan/dynamic_network.rb +91 -0
- data/lib/bosh/director/deployment_plan/idle_vm.rb +109 -0
- data/lib/bosh/director/deployment_plan/instance.rb +413 -0
- data/lib/bosh/director/deployment_plan/job.rb +470 -0
- data/lib/bosh/director/deployment_plan/manual_network.rb +137 -0
- data/lib/bosh/director/deployment_plan/network.rb +74 -0
- data/lib/bosh/director/deployment_plan/network_subnet.rb +167 -0
- data/lib/bosh/director/deployment_plan/planner.rb +288 -0
- data/lib/bosh/director/deployment_plan/preparer.rb +52 -0
- data/lib/bosh/director/deployment_plan/release.rb +126 -0
- data/lib/bosh/director/deployment_plan/resource_pool.rb +143 -0
- data/lib/bosh/director/deployment_plan/resource_pools.rb +68 -0
- data/lib/bosh/director/deployment_plan/stemcell.rb +56 -0
- data/lib/bosh/director/deployment_plan/template.rb +94 -0
- data/lib/bosh/director/deployment_plan/update_config.rb +80 -0
- data/lib/bosh/director/deployment_plan/updater.rb +55 -0
- data/lib/bosh/director/deployment_plan/vip_network.rb +79 -0
- data/lib/bosh/director/dns_helper.rb +204 -0
- data/lib/bosh/director/download_helper.rb +44 -0
- data/lib/bosh/director/duration.rb +36 -0
- data/lib/bosh/director/encryption_helper.rb +10 -0
- data/lib/bosh/director/errors.rb +198 -0
- data/lib/bosh/director/event_log.rb +136 -0
- data/lib/bosh/director/ext.rb +64 -0
- data/lib/bosh/director/hash_string_vals.rb +13 -0
- data/lib/bosh/director/instance_deleter.rb +109 -0
- data/lib/bosh/director/instance_updater.rb +506 -0
- data/lib/bosh/director/ip_util.rb +67 -0
- data/lib/bosh/director/job_queue.rb +16 -0
- data/lib/bosh/director/job_runner.rb +162 -0
- data/lib/bosh/director/job_updater.rb +121 -0
- data/lib/bosh/director/jobs/backup.rb +86 -0
- data/lib/bosh/director/jobs/base_job.rb +66 -0
- data/lib/bosh/director/jobs/cloud_check/apply_resolutions.rb +46 -0
- data/lib/bosh/director/jobs/cloud_check/scan.rb +38 -0
- data/lib/bosh/director/jobs/cloud_check/scan_and_fix.rb +73 -0
- data/lib/bosh/director/jobs/create_snapshot.rb +23 -0
- data/lib/bosh/director/jobs/delete_deployment.rb +183 -0
- data/lib/bosh/director/jobs/delete_deployment_snapshots.rb +34 -0
- data/lib/bosh/director/jobs/delete_release.rb +219 -0
- data/lib/bosh/director/jobs/delete_snapshots.rb +23 -0
- data/lib/bosh/director/jobs/delete_stemcell.rb +102 -0
- data/lib/bosh/director/jobs/fetch_logs.rb +99 -0
- data/lib/bosh/director/jobs/scheduled_backup.rb +38 -0
- data/lib/bosh/director/jobs/snapshot_deployment.rb +61 -0
- data/lib/bosh/director/jobs/snapshot_deployments.rb +23 -0
- data/lib/bosh/director/jobs/snapshot_self.rb +43 -0
- data/lib/bosh/director/jobs/ssh.rb +59 -0
- data/lib/bosh/director/jobs/update_deployment.rb +110 -0
- data/lib/bosh/director/jobs/update_release.rb +672 -0
- data/lib/bosh/director/jobs/update_stemcell.rb +109 -0
- data/lib/bosh/director/jobs/vm_state.rb +89 -0
- data/lib/bosh/director/lock.rb +133 -0
- data/lib/bosh/director/lock_helper.rb +92 -0
- data/lib/bosh/director/models.rb +29 -0
- data/lib/bosh/director/models/compiled_package.rb +33 -0
- data/lib/bosh/director/models/deployment.rb +22 -0
- data/lib/bosh/director/models/deployment_problem.rb +49 -0
- data/lib/bosh/director/models/deployment_property.rb +21 -0
- data/lib/bosh/director/models/director_attribute.rb +9 -0
- data/lib/bosh/director/models/dns.rb +9 -0
- data/lib/bosh/director/models/dns/domain.rb +9 -0
- data/lib/bosh/director/models/dns/record.rb +7 -0
- data/lib/bosh/director/models/helpers/model_helper.rb +7 -0
- data/lib/bosh/director/models/instance.rb +28 -0
- data/lib/bosh/director/models/log_bundle.rb +10 -0
- data/lib/bosh/director/models/package.rb +30 -0
- data/lib/bosh/director/models/persistent_disk.rb +13 -0
- data/lib/bosh/director/models/release.rb +17 -0
- data/lib/bosh/director/models/release_version.rb +16 -0
- data/lib/bosh/director/models/snapshot.rb +13 -0
- data/lib/bosh/director/models/stemcell.rb +18 -0
- data/lib/bosh/director/models/task.rb +10 -0
- data/lib/bosh/director/models/template.rb +44 -0
- data/lib/bosh/director/models/user.rb +11 -0
- data/lib/bosh/director/models/vm.rb +42 -0
- data/lib/bosh/director/nats_rpc.rb +54 -0
- data/lib/bosh/director/network_reservation.rb +121 -0
- data/lib/bosh/director/next_rebase_version.rb +20 -0
- data/lib/bosh/director/package_compiler.rb +423 -0
- data/lib/bosh/director/problem_handlers/base.rb +153 -0
- data/lib/bosh/director/problem_handlers/inactive_disk.rb +112 -0
- data/lib/bosh/director/problem_handlers/invalid_problem.rb +28 -0
- data/lib/bosh/director/problem_handlers/missing_vm.rb +34 -0
- data/lib/bosh/director/problem_handlers/mount_info_mismatch.rb +62 -0
- data/lib/bosh/director/problem_handlers/out_of_sync_vm.rb +64 -0
- data/lib/bosh/director/problem_handlers/unbound_instance_vm.rb +85 -0
- data/lib/bosh/director/problem_handlers/unresponsive_agent.rb +78 -0
- data/lib/bosh/director/problem_resolver.rb +103 -0
- data/lib/bosh/director/problem_scanner.rb +268 -0
- data/lib/bosh/director/resource_pool_updater.rb +216 -0
- data/lib/bosh/director/scheduler.rb +57 -0
- data/lib/bosh/director/sequel.rb +13 -0
- data/lib/bosh/director/tar_gzipper.rb +47 -0
- data/lib/bosh/director/task_result_file.rb +19 -0
- data/lib/bosh/director/thread_pool.rb +8 -0
- data/lib/bosh/director/validation_helper.rb +55 -0
- data/lib/bosh/director/version.rb +7 -0
- data/lib/bosh/director/vm_creator.rb +80 -0
- data/lib/bosh/director/vm_data.rb +63 -0
- data/lib/bosh/director/vm_metadata_updater.rb +29 -0
- data/lib/bosh/director/vm_reuser.rb +63 -0
- data/lib/cloud/dummy.rb +149 -0
- metadata +664 -0
@@ -0,0 +1,109 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Director
|
4
|
+
# Coordinates the safe deletion of an instance and all associates resources.
|
5
|
+
class InstanceDeleter
|
6
|
+
include DnsHelper
|
7
|
+
|
8
|
+
def initialize(deployment_plan)
|
9
|
+
@deployment_plan = deployment_plan
|
10
|
+
@cloud = Config.cloud
|
11
|
+
@logger = Config.logger
|
12
|
+
@event_log = Config.event_log
|
13
|
+
end
|
14
|
+
|
15
|
+
# Deletes a list of instances
|
16
|
+
# @param [Array<Models::Instance>] instances list of instances to delete
|
17
|
+
# @param [Hash] options optional list of options controlling concurrency
|
18
|
+
# @return [void]
|
19
|
+
def delete_instances(instances, options = {})
|
20
|
+
max_threads = options[:max_threads] || Config.max_threads
|
21
|
+
ThreadPool.new(:max_threads => max_threads).wrap do |pool|
|
22
|
+
instances.each do |instance|
|
23
|
+
pool.process { delete_instance(instance) }
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Deletes a single instance and attached persistent disks
|
29
|
+
# @param [Models::Instance] instance instance to delete
|
30
|
+
# @return [void]
|
31
|
+
def delete_instance(instance)
|
32
|
+
vm = instance.vm
|
33
|
+
@event_log.track(vm.cid) do
|
34
|
+
@logger.info("Delete unneeded instance: #{vm.cid}")
|
35
|
+
|
36
|
+
drain(vm.agent_id)
|
37
|
+
@cloud.delete_vm(vm.cid)
|
38
|
+
delete_snapshots(instance)
|
39
|
+
delete_persistent_disks(instance.persistent_disks)
|
40
|
+
delete_dns(instance.job, instance.index)
|
41
|
+
|
42
|
+
vm.db.transaction do
|
43
|
+
instance.destroy
|
44
|
+
vm.destroy
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Drain the instance
|
50
|
+
# @param [String] agent_id agent id
|
51
|
+
# @return [void]
|
52
|
+
def drain(agent_id)
|
53
|
+
agent = AgentClient.new(agent_id)
|
54
|
+
|
55
|
+
drain_time = agent.drain("shutdown")
|
56
|
+
if drain_time < 0
|
57
|
+
drain_time = drain_time.abs
|
58
|
+
begin
|
59
|
+
Config.job_cancelled?
|
60
|
+
@logger.info("Drain - check back in #{drain_time} seconds")
|
61
|
+
sleep(drain_time)
|
62
|
+
drain_time = agent.drain("status")
|
63
|
+
rescue => e
|
64
|
+
@logger.warn("Failed to check drain-status: #{e.inspect}")
|
65
|
+
raise if e.kind_of?(Bosh::Director::TaskCancelled)
|
66
|
+
break
|
67
|
+
end while drain_time > 0
|
68
|
+
else
|
69
|
+
sleep(drain_time)
|
70
|
+
end
|
71
|
+
agent.stop
|
72
|
+
end
|
73
|
+
|
74
|
+
def delete_snapshots(instance)
|
75
|
+
snapshots = instance.persistent_disks.map { |disk| disk.snapshots }.flatten
|
76
|
+
Bosh::Director::Api::SnapshotManager.delete_snapshots(snapshots)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Delete persistent disks
|
80
|
+
# @param [Array<Model::PersistentDisk>] persistent_disks disks
|
81
|
+
# @return [void]
|
82
|
+
def delete_persistent_disks(persistent_disks)
|
83
|
+
persistent_disks.each do |disk|
|
84
|
+
@logger.info("Deleting disk: `#{disk.disk_cid}', " +
|
85
|
+
"#{disk.active ? "active" : "inactive"}")
|
86
|
+
begin
|
87
|
+
@cloud.delete_disk(disk.disk_cid)
|
88
|
+
rescue Bosh::Clouds::DiskNotFound => e
|
89
|
+
@logger.warn("Disk not found: #{disk.disk_cid}")
|
90
|
+
raise if disk.active
|
91
|
+
end
|
92
|
+
disk.destroy
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Deletes the DNS records
|
97
|
+
# @param [String] job job name
|
98
|
+
# @param [Numeric] index job index
|
99
|
+
# @return [void]
|
100
|
+
def delete_dns(job, index)
|
101
|
+
if Config.dns_enabled?
|
102
|
+
record_pattern = [index, canonical(job), "%",
|
103
|
+
@deployment_plan.canonical_name, dns_domain_name].join(".")
|
104
|
+
delete_dns_records(record_pattern, @deployment_plan.dns_domain.id)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,506 @@
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
2
|
+
|
3
|
+
module Bosh::Director
|
4
|
+
class InstanceUpdater
|
5
|
+
include DnsHelper
|
6
|
+
|
7
|
+
MAX_ATTACH_DISK_TRIES = 3
|
8
|
+
UPDATE_STEPS = 7
|
9
|
+
WATCH_INTERVALS = 10
|
10
|
+
|
11
|
+
attr_reader :current_state
|
12
|
+
|
13
|
+
# @params [DeploymentPlan::Instance] instance
|
14
|
+
def initialize(instance, event_ticker = nil)
|
15
|
+
@cloud = Config.cloud
|
16
|
+
@logger = Config.logger
|
17
|
+
@ticker = event_ticker
|
18
|
+
|
19
|
+
@instance = instance
|
20
|
+
@job = instance.job
|
21
|
+
|
22
|
+
@target_state = @instance.state
|
23
|
+
|
24
|
+
@deployment_plan = @job.deployment
|
25
|
+
@resource_pool_spec = @job.resource_pool
|
26
|
+
@update_config = @job.update
|
27
|
+
|
28
|
+
@vm = @instance.model.vm
|
29
|
+
|
30
|
+
@current_state = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
def instance_name
|
34
|
+
"#{@job.name}/#{@instance.index}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def step
|
38
|
+
yield
|
39
|
+
report_progress
|
40
|
+
end
|
41
|
+
|
42
|
+
def report_progress
|
43
|
+
@ticker.advance(100.0 / update_steps()) if @ticker
|
44
|
+
end
|
45
|
+
|
46
|
+
def update_steps
|
47
|
+
@instance.job_changed? || @instance.packages_changed? ? UPDATE_STEPS + 1 : UPDATE_STEPS
|
48
|
+
end
|
49
|
+
|
50
|
+
def update(options = {})
|
51
|
+
@canary = options.fetch(:canary, false)
|
52
|
+
|
53
|
+
@logger.info("Updating instance #{@instance}, changes: #{@instance.changes.to_a.join(', ')}")
|
54
|
+
|
55
|
+
# Optimization to only update DNS if nothing else changed.
|
56
|
+
if dns_change_only?
|
57
|
+
update_dns
|
58
|
+
return
|
59
|
+
end
|
60
|
+
|
61
|
+
step { stop }
|
62
|
+
step { take_snapshot }
|
63
|
+
|
64
|
+
if @target_state == "detached"
|
65
|
+
detach_disk
|
66
|
+
delete_vm
|
67
|
+
@resource_pool_spec.add_idle_vm
|
68
|
+
return
|
69
|
+
end
|
70
|
+
|
71
|
+
step { update_resource_pool }
|
72
|
+
step { update_networks }
|
73
|
+
step { update_dns }
|
74
|
+
step { update_persistent_disk }
|
75
|
+
|
76
|
+
VmMetadataUpdater.build.update(@vm, {})
|
77
|
+
|
78
|
+
step { apply_state(@instance.spec) }
|
79
|
+
|
80
|
+
start! if need_start?
|
81
|
+
|
82
|
+
step { wait_until_running }
|
83
|
+
|
84
|
+
if @target_state == "started" && current_state["job_state"] != "running"
|
85
|
+
raise AgentJobNotRunning, "`#{instance_name}' is not running after update"
|
86
|
+
end
|
87
|
+
|
88
|
+
if @target_state == "stopped" && current_state["job_state"] == "running"
|
89
|
+
raise AgentJobNotStopped, "`#{instance_name}' is still running despite the stop command"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Watch times don't include the get_state roundtrip time, so effective
|
94
|
+
# max watch time is roughly:
|
95
|
+
# max_watch_time + N_WATCH_INTERVALS * avg_roundtrip_time
|
96
|
+
def wait_until_running
|
97
|
+
watch_schedule(min_watch_time, max_watch_time).each do |watch_time|
|
98
|
+
sleep_time = watch_time.to_f / 1000
|
99
|
+
@logger.info("Waiting for #{sleep_time} seconds to check #{instance_name} status")
|
100
|
+
sleep(sleep_time)
|
101
|
+
@logger.info("Checking if #{instance_name} has been updated after #{sleep_time} seconds")
|
102
|
+
|
103
|
+
@current_state = agent.get_state
|
104
|
+
|
105
|
+
if @target_state == "started"
|
106
|
+
break if current_state["job_state"] == "running"
|
107
|
+
elsif @target_state == "stopped"
|
108
|
+
break if current_state["job_state"] != "running"
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def start!
|
114
|
+
agent.start
|
115
|
+
rescue RuntimeError => e
|
116
|
+
# FIXME: this is somewhat ghetto: we don't have a good way to
|
117
|
+
# negotiate on BOSH protocol between director and agent (yet),
|
118
|
+
# so updating from agent version that doesn't support 'start' RPC
|
119
|
+
# to the one that does might be hard. Right now we decided to
|
120
|
+
# just swallow the exception.
|
121
|
+
# This needs to be removed in one of the following cases:
|
122
|
+
# 1. BOSH protocol handshake gets implemented
|
123
|
+
# 2. All agents updated to support 'start' RPC
|
124
|
+
# and we no longer care about backward compatibility.
|
125
|
+
@logger.warn("Agent start raised an exception: #{e.inspect}, ignoring for compatibility")
|
126
|
+
end
|
127
|
+
|
128
|
+
def need_start?
|
129
|
+
@target_state == 'started'
|
130
|
+
end
|
131
|
+
|
132
|
+
def dns_change_only?
|
133
|
+
@instance.changes.include?(:dns) && @instance.changes.size == 1
|
134
|
+
end
|
135
|
+
|
136
|
+
def stop
|
137
|
+
drain_time = shutting_down? ? agent.drain("shutdown") : agent.drain("update", @instance.spec)
|
138
|
+
|
139
|
+
if drain_time > 0
|
140
|
+
sleep(drain_time)
|
141
|
+
else
|
142
|
+
wait_for_dynamic_drain(drain_time)
|
143
|
+
end
|
144
|
+
|
145
|
+
agent.stop
|
146
|
+
end
|
147
|
+
|
148
|
+
def wait_for_dynamic_drain(initial_drain_time)
|
149
|
+
drain_time = initial_drain_time
|
150
|
+
loop do
|
151
|
+
# This could go on forever if drain script is broken, canceling the task is a way out.
|
152
|
+
Config.task_checkpoint
|
153
|
+
|
154
|
+
wait_time = drain_time.abs
|
155
|
+
if wait_time > 0
|
156
|
+
@logger.info("`#{@instance}' is draining: checking back in #{wait_time}s")
|
157
|
+
sleep(wait_time)
|
158
|
+
end
|
159
|
+
# Positive number always means last drain call:
|
160
|
+
break if drain_time >= 0
|
161
|
+
|
162
|
+
# We used to ignore exceptions from drain status for compatibility
|
163
|
+
# with older agents but it doesn't need to happen anymore, as
|
164
|
+
# realistically speaking, all agents have already been updated
|
165
|
+
# to support drain status mechanism and swallowing real errors
|
166
|
+
# would be bad here, as it could mask potential problems.
|
167
|
+
drain_time = agent.drain("status")
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def take_snapshot
|
172
|
+
Api::SnapshotManager.take_snapshot(@instance.model, clean: true)
|
173
|
+
end
|
174
|
+
|
175
|
+
def delete_snapshots(disk)
|
176
|
+
Api::SnapshotManager.delete_snapshots(disk.snapshots)
|
177
|
+
end
|
178
|
+
|
179
|
+
def detach_disk
|
180
|
+
return unless @instance.disk_currently_attached?
|
181
|
+
|
182
|
+
if @instance.model.persistent_disk_cid.nil?
|
183
|
+
raise AgentUnexpectedDisk,
|
184
|
+
"`#{instance_name}' VM has disk attached " +
|
185
|
+
"but it's not reflected in director DB"
|
186
|
+
end
|
187
|
+
|
188
|
+
agent.unmount_disk(@instance.model.persistent_disk_cid)
|
189
|
+
@cloud.detach_disk(@vm.cid, @instance.model.persistent_disk_cid)
|
190
|
+
end
|
191
|
+
|
192
|
+
def attach_disk
|
193
|
+
return if @instance.model.persistent_disk_cid.nil?
|
194
|
+
|
195
|
+
@cloud.attach_disk(@vm.cid, @instance.model.persistent_disk_cid)
|
196
|
+
agent.mount_disk(@instance.model.persistent_disk_cid)
|
197
|
+
end
|
198
|
+
|
199
|
+
def delete_vm
|
200
|
+
@cloud.delete_vm(@vm.cid)
|
201
|
+
|
202
|
+
@instance.model.db.transaction do
|
203
|
+
@instance.model.vm = nil
|
204
|
+
@instance.model.save
|
205
|
+
@vm.destroy
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
def create_vm(new_disk_id)
|
210
|
+
stemcell = @resource_pool_spec.stemcell
|
211
|
+
disks = [@instance.model.persistent_disk_cid, new_disk_id].compact
|
212
|
+
|
213
|
+
@vm = VmCreator.create(@deployment_plan.model, stemcell.model,
|
214
|
+
@resource_pool_spec.cloud_properties,
|
215
|
+
@instance.network_settings, disks,
|
216
|
+
@resource_pool_spec.env)
|
217
|
+
@instance.model.vm = @vm
|
218
|
+
@instance.model.save
|
219
|
+
|
220
|
+
agent.wait_until_ready
|
221
|
+
rescue => e
|
222
|
+
if @vm
|
223
|
+
@logger.error("error during create_vm(), deleting vm #{@vm.cid}")
|
224
|
+
delete_vm
|
225
|
+
end
|
226
|
+
raise e
|
227
|
+
end
|
228
|
+
|
229
|
+
def apply_state(state)
|
230
|
+
@vm.update(:apply_spec => state)
|
231
|
+
agent.apply(state)
|
232
|
+
end
|
233
|
+
|
234
|
+
# Retrieve list of mounted disks from the agent
|
235
|
+
# @return [Array<String>] list of disk CIDs
|
236
|
+
def disk_info
|
237
|
+
return @disk_list if @disk_list
|
238
|
+
|
239
|
+
begin
|
240
|
+
@disk_list = agent.list_disk
|
241
|
+
rescue RuntimeError
|
242
|
+
# old agents don't support list_disk rpc
|
243
|
+
[@instance.persistent_disk_cid]
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def delete_disk(disk, vm_cid)
|
248
|
+
disk_cid = disk.disk_cid
|
249
|
+
# Unmount the disk only if disk is known by the agent
|
250
|
+
if agent && disk_info.include?(disk_cid)
|
251
|
+
agent.unmount_disk(disk_cid)
|
252
|
+
end
|
253
|
+
|
254
|
+
begin
|
255
|
+
@cloud.detach_disk(vm_cid, disk_cid) if vm_cid
|
256
|
+
rescue Bosh::Clouds::DiskNotAttached
|
257
|
+
if disk.active
|
258
|
+
raise CloudDiskNotAttached,
|
259
|
+
"`#{instance_name}' VM should have persistent disk attached " +
|
260
|
+
"but it doesn't (according to CPI)"
|
261
|
+
end
|
262
|
+
end
|
263
|
+
|
264
|
+
delete_snapshots(disk)
|
265
|
+
|
266
|
+
begin
|
267
|
+
@cloud.delete_disk(disk_cid)
|
268
|
+
rescue Bosh::Clouds::DiskNotFound
|
269
|
+
if disk.active
|
270
|
+
raise CloudDiskMissing,
|
271
|
+
"Disk `#{disk_cid}' is missing according to CPI but marked " +
|
272
|
+
"as active in DB"
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
disk.destroy
|
277
|
+
end
|
278
|
+
|
279
|
+
def update_dns
|
280
|
+
return unless @instance.dns_changed?
|
281
|
+
|
282
|
+
domain = @deployment_plan.dns_domain
|
283
|
+
@instance.dns_record_info.each do |record_name, ip_address|
|
284
|
+
@logger.info("Updating DNS for: #{record_name} to #{ip_address}")
|
285
|
+
update_dns_a_record(domain, record_name, ip_address)
|
286
|
+
update_dns_ptr_record(record_name, ip_address)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
def update_resource_pool(new_disk_cid = nil)
|
291
|
+
return unless @instance.resource_pool_changed? || new_disk_cid
|
292
|
+
|
293
|
+
detach_disk
|
294
|
+
num_retries = 0
|
295
|
+
begin
|
296
|
+
delete_vm
|
297
|
+
create_vm(new_disk_cid)
|
298
|
+
attach_disk
|
299
|
+
rescue Bosh::Clouds::NoDiskSpace => e
|
300
|
+
if e.ok_to_retry && num_retries < MAX_ATTACH_DISK_TRIES
|
301
|
+
num_retries += 1
|
302
|
+
@logger.warn("Retrying attach disk operation #{num_retries}")
|
303
|
+
retry
|
304
|
+
end
|
305
|
+
@logger.warn("Giving up on attach disk operation")
|
306
|
+
e.ok_to_retry = false
|
307
|
+
raise CloudNotEnoughDiskSpace,
|
308
|
+
"Not enough disk space to update `#{instance_name}'"
|
309
|
+
end
|
310
|
+
|
311
|
+
state = {
|
312
|
+
"deployment" => @deployment_plan.name,
|
313
|
+
"networks" => @instance.network_settings,
|
314
|
+
"resource_pool" => @job.resource_pool.spec,
|
315
|
+
"job" => @job.spec,
|
316
|
+
"index" => @instance.index,
|
317
|
+
"release" => @job.release.spec
|
318
|
+
}
|
319
|
+
|
320
|
+
if @instance.disk_size > 0
|
321
|
+
state["persistent_disk"] = @instance.disk_size
|
322
|
+
end
|
323
|
+
|
324
|
+
# if we have a failure above the new VM doesn't get any state,
|
325
|
+
# which makes it impossible to recreate it
|
326
|
+
apply_state(state)
|
327
|
+
@instance.current_state = agent.get_state
|
328
|
+
end
|
329
|
+
|
330
|
+
def attach_missing_disk
|
331
|
+
if @instance.model.persistent_disk_cid &&
|
332
|
+
!@instance.disk_currently_attached?
|
333
|
+
attach_disk
|
334
|
+
end
|
335
|
+
rescue Bosh::Clouds::NoDiskSpace => e
|
336
|
+
update_resource_pool(@instance.model.persistent_disk_cid)
|
337
|
+
end
|
338
|
+
|
339
|
+
# Synchronizes persistent_disks with the agent.
|
340
|
+
#
|
341
|
+
# NOTE: Currently assumes that we only have 1 persistent disk.
|
342
|
+
# @return [void]
|
343
|
+
def check_persistent_disk
|
344
|
+
return if @instance.model.persistent_disks.empty?
|
345
|
+
agent_disk_cid = disk_info.first
|
346
|
+
|
347
|
+
if agent_disk_cid != @instance.model.persistent_disk_cid
|
348
|
+
raise AgentDiskOutOfSync,
|
349
|
+
"`#{instance_name}' has invalid disks: agent reports " +
|
350
|
+
"`#{agent_disk_cid}' while director record shows " +
|
351
|
+
"`#{@instance.model.persistent_disk_cid}'"
|
352
|
+
end
|
353
|
+
|
354
|
+
@instance.model.persistent_disks.each do |disk|
|
355
|
+
unless disk.active
|
356
|
+
@logger.warn("`#{instance_name}' has inactive disk #{disk.disk_cid}")
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
360
|
+
|
361
|
+
def update_persistent_disk
|
362
|
+
# CLEANUP FIXME
|
363
|
+
# [olegs] Error cleanup should be performed AFTER logic cleanup, I can't
|
364
|
+
# event comprehend this method.
|
365
|
+
attach_missing_disk
|
366
|
+
check_persistent_disk
|
367
|
+
|
368
|
+
disk_cid = nil
|
369
|
+
disk = nil
|
370
|
+
return unless @instance.persistent_disk_changed?
|
371
|
+
|
372
|
+
old_disk = @instance.model.persistent_disk
|
373
|
+
|
374
|
+
if @job.persistent_disk > 0
|
375
|
+
@instance.model.db.transaction do
|
376
|
+
disk_cid = @cloud.create_disk(@job.persistent_disk, @vm.cid)
|
377
|
+
disk =
|
378
|
+
Models::PersistentDisk.create(:disk_cid => disk_cid,
|
379
|
+
:active => false,
|
380
|
+
:instance_id => @instance.model.id,
|
381
|
+
:size => @job.persistent_disk)
|
382
|
+
end
|
383
|
+
|
384
|
+
begin
|
385
|
+
@cloud.attach_disk(@vm.cid, disk_cid)
|
386
|
+
rescue Bosh::Clouds::NoDiskSpace => e
|
387
|
+
if e.ok_to_retry
|
388
|
+
@logger.warn("Retrying attach disk operation " +
|
389
|
+
"after persistent disk update failed")
|
390
|
+
# Recreate the vm
|
391
|
+
update_resource_pool(disk_cid)
|
392
|
+
begin
|
393
|
+
@cloud.attach_disk(@vm.cid, disk_cid)
|
394
|
+
rescue
|
395
|
+
@cloud.delete_disk(disk_cid)
|
396
|
+
disk.destroy
|
397
|
+
raise
|
398
|
+
end
|
399
|
+
else
|
400
|
+
@cloud.delete_disk(disk_cid)
|
401
|
+
disk.destroy
|
402
|
+
raise
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
begin
|
407
|
+
agent.mount_disk(disk_cid)
|
408
|
+
agent.migrate_disk(old_disk.disk_cid, disk_cid) if old_disk
|
409
|
+
rescue
|
410
|
+
delete_disk(disk, @vm.cid)
|
411
|
+
raise
|
412
|
+
end
|
413
|
+
end
|
414
|
+
|
415
|
+
@instance.model.db.transaction do
|
416
|
+
old_disk.update(:active => false) if old_disk
|
417
|
+
disk.update(:active => true) if disk
|
418
|
+
end
|
419
|
+
|
420
|
+
delete_disk(old_disk, @vm.cid) if old_disk
|
421
|
+
end
|
422
|
+
|
423
|
+
def update_networks
|
424
|
+
return unless @instance.networks_changed?
|
425
|
+
|
426
|
+
network_settings = @instance.network_settings
|
427
|
+
|
428
|
+
begin
|
429
|
+
# If configure_networks can't configure the network as
|
430
|
+
# requested, e.g. when the security groups change on AWS,
|
431
|
+
# configure_networks() will raise an exception and we'll
|
432
|
+
# recreate the VM to work around it
|
433
|
+
@cloud.configure_networks(@vm.cid, network_settings)
|
434
|
+
rescue Bosh::Clouds::NotSupported => e
|
435
|
+
@logger.info("configure_networks not supported: #{e.message}")
|
436
|
+
@instance.recreate = true
|
437
|
+
update_resource_pool
|
438
|
+
return
|
439
|
+
end
|
440
|
+
|
441
|
+
# Once CPI has configured the vm and stored the new network settings at the registry,
|
442
|
+
# we restart the agent via a 'prepare_network_change' message in order for the agent
|
443
|
+
# to pick up the new network settings.
|
444
|
+
agent.prepare_network_change(network_settings)
|
445
|
+
|
446
|
+
# Give some time to the agent to restart before pinging if it's ready (race condition)
|
447
|
+
sleep(5)
|
448
|
+
|
449
|
+
agent.wait_until_ready
|
450
|
+
end
|
451
|
+
|
452
|
+
def agent
|
453
|
+
if @agent && @agent.id == @vm.agent_id
|
454
|
+
@agent
|
455
|
+
else
|
456
|
+
if @vm.agent_id.nil?
|
457
|
+
raise VmAgentIdMissing, "VM #{@vm.id} is missing agent id"
|
458
|
+
end
|
459
|
+
@agent = AgentClient.new(@vm.agent_id)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
def generate_agent_id
|
464
|
+
SecureRandom.uuid
|
465
|
+
end
|
466
|
+
|
467
|
+
# Returns an array of wait times distributed
|
468
|
+
# on the [min_watch_time..max_watch_time] interval.
|
469
|
+
#
|
470
|
+
# Tries to respect intervals but doesn't allow an interval to
|
471
|
+
# fall under 1 second.
|
472
|
+
# All times are in milliseconds.
|
473
|
+
# @param [Numeric] min_watch_time minimum time to watch the jobs
|
474
|
+
# @param [Numeric] max_watch_time maximum time to watch the jobs
|
475
|
+
# @param [Numeric] intervals number of intervals between polling
|
476
|
+
# the state of the jobs
|
477
|
+
# @return [Array<Numeric>] watch schedule
|
478
|
+
def watch_schedule(min_watch_time, max_watch_time, intervals = WATCH_INTERVALS)
|
479
|
+
delta = (max_watch_time - min_watch_time).to_f
|
480
|
+
step = [1000, delta / (intervals - 1)].max
|
481
|
+
|
482
|
+
[min_watch_time] + ([step] * (delta / step).floor)
|
483
|
+
end
|
484
|
+
|
485
|
+
# @return [Boolean] Is instance shutting down for this update?
|
486
|
+
def shutting_down?
|
487
|
+
@instance.resource_pool_changed? ||
|
488
|
+
@instance.persistent_disk_changed? ||
|
489
|
+
@instance.networks_changed? ||
|
490
|
+
@target_state == "stopped" ||
|
491
|
+
@target_state == "detached"
|
492
|
+
end
|
493
|
+
|
494
|
+
def min_watch_time
|
495
|
+
canary? ? @update_config.min_canary_watch_time : @update_config.min_update_watch_time
|
496
|
+
end
|
497
|
+
|
498
|
+
def max_watch_time
|
499
|
+
canary? ? @update_config.max_canary_watch_time : @update_config.max_update_watch_time
|
500
|
+
end
|
501
|
+
|
502
|
+
def canary?
|
503
|
+
@canary
|
504
|
+
end
|
505
|
+
end
|
506
|
+
end
|