bosh-director 1.5.0.pre.1113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +34 -0
- data/bin/bosh-director +36 -0
- data/bin/bosh-director-console +84 -0
- data/bin/bosh-director-drain-workers +42 -0
- data/bin/bosh-director-migrate +58 -0
- data/bin/bosh-director-scheduler +27 -0
- data/bin/bosh-director-worker +76 -0
- data/db/migrations/README +1 -0
- data/db/migrations/director/20110209010747_initial.rb +118 -0
- data/db/migrations/director/20110406055800_add_task_user.rb +9 -0
- data/db/migrations/director/20110518225809_remove_cid_constrain.rb +13 -0
- data/db/migrations/director/20110617211923_add_deployments_release_versions.rb +32 -0
- data/db/migrations/director/20110622212607_add_task_checkpoint_timestamp.rb +9 -0
- data/db/migrations/director/20110628023039_add_state_to_instances.rb +21 -0
- data/db/migrations/director/20110709012332_add_disk_size_to_instances.rb +9 -0
- data/db/migrations/director/20110906183441_add_log_bundles.rb +11 -0
- data/db/migrations/director/20110907194830_add_logs_json_to_templates.rb +9 -0
- data/db/migrations/director/20110915205610_add_persistent_disks.rb +51 -0
- data/db/migrations/director/20111005180929_add_properties.rb +14 -0
- data/db/migrations/director/20111110024617_add_deployment_problems.rb +24 -0
- data/db/migrations/director/20111216214145_recreate_support_for_vms.rb +9 -0
- data/db/migrations/director/20120102084027_add_credentials_to_vms.rb +7 -0
- data/db/migrations/director/20120427235217_allow_multiple_releases_per_deployment.rb +36 -0
- data/db/migrations/director/20120524175805_add_task_type.rb +44 -0
- data/db/migrations/director/20120614001930_delete_redundant_deployment_release_relation.rb +34 -0
- data/db/migrations/director/20120822004528_add_fingerprint_to_templates_and_packages.rb +17 -0
- data/db/migrations/director/20120830191244_add_properties_to_templates.rb +9 -0
- data/db/migrations/director/20121106190739_persist_vm_env.rb +9 -0
- data/db/migrations/director/20130222232131_add_sha1_to_stemcells.rb +9 -0
- data/db/migrations/director/20130312211407_add_commit_hash_to_release_versions.rb +19 -0
- data/db/migrations/director/20130409235338_snapshot.rb +15 -0
- data/db/migrations/director/20130530164918_add_paused_flag_to_instance.rb +14 -0
- data/db/migrations/director/20130531172604_add_director_attributes.rb +13 -0
- data/db/migrations/dns/20120123234908_initial.rb +27 -0
- data/lib/bosh/director.rb +133 -0
- data/lib/bosh/director/agent_client.rb +78 -0
- data/lib/bosh/director/api.rb +29 -0
- data/lib/bosh/director/api/api_helper.rb +81 -0
- data/lib/bosh/director/api/backup_manager.rb +15 -0
- data/lib/bosh/director/api/controller.rb +639 -0
- data/lib/bosh/director/api/controller_helpers.rb +34 -0
- data/lib/bosh/director/api/deployment_lookup.rb +13 -0
- data/lib/bosh/director/api/deployment_manager.rb +60 -0
- data/lib/bosh/director/api/http_constants.rb +16 -0
- data/lib/bosh/director/api/instance_lookup.rb +44 -0
- data/lib/bosh/director/api/instance_manager.rb +63 -0
- data/lib/bosh/director/api/problem_manager.rb +40 -0
- data/lib/bosh/director/api/property_manager.rb +69 -0
- data/lib/bosh/director/api/release_manager.rb +59 -0
- data/lib/bosh/director/api/resource_manager.rb +69 -0
- data/lib/bosh/director/api/resurrector_manager.rb +15 -0
- data/lib/bosh/director/api/snapshot_manager.rb +94 -0
- data/lib/bosh/director/api/stemcell_manager.rb +50 -0
- data/lib/bosh/director/api/task_helper.rb +46 -0
- data/lib/bosh/director/api/task_manager.rb +64 -0
- data/lib/bosh/director/api/user_manager.rb +72 -0
- data/lib/bosh/director/api/vm_state_manager.rb +11 -0
- data/lib/bosh/director/app.rb +35 -0
- data/lib/bosh/director/blob_util.rb +87 -0
- data/lib/bosh/director/blobstores.rb +29 -0
- data/lib/bosh/director/client.rb +156 -0
- data/lib/bosh/director/cloudcheck_helper.rb +204 -0
- data/lib/bosh/director/compile_task.rb +157 -0
- data/lib/bosh/director/config.rb +370 -0
- data/lib/bosh/director/configuration_hasher.rb +114 -0
- data/lib/bosh/director/cycle_helper.rb +36 -0
- data/lib/bosh/director/db_backup.rb +22 -0
- data/lib/bosh/director/db_backup/adapter.rb +3 -0
- data/lib/bosh/director/db_backup/adapter/mysql2.rb +27 -0
- data/lib/bosh/director/db_backup/adapter/postgres.rb +36 -0
- data/lib/bosh/director/db_backup/adapter/sqlite.rb +17 -0
- data/lib/bosh/director/db_backup/error.rb +10 -0
- data/lib/bosh/director/deployment_plan.rb +26 -0
- data/lib/bosh/director/deployment_plan/assembler.rb +430 -0
- data/lib/bosh/director/deployment_plan/compilation_config.rb +54 -0
- data/lib/bosh/director/deployment_plan/compiled_package.rb +35 -0
- data/lib/bosh/director/deployment_plan/dynamic_network.rb +91 -0
- data/lib/bosh/director/deployment_plan/idle_vm.rb +109 -0
- data/lib/bosh/director/deployment_plan/instance.rb +413 -0
- data/lib/bosh/director/deployment_plan/job.rb +470 -0
- data/lib/bosh/director/deployment_plan/manual_network.rb +137 -0
- data/lib/bosh/director/deployment_plan/network.rb +74 -0
- data/lib/bosh/director/deployment_plan/network_subnet.rb +167 -0
- data/lib/bosh/director/deployment_plan/planner.rb +288 -0
- data/lib/bosh/director/deployment_plan/preparer.rb +52 -0
- data/lib/bosh/director/deployment_plan/release.rb +126 -0
- data/lib/bosh/director/deployment_plan/resource_pool.rb +143 -0
- data/lib/bosh/director/deployment_plan/resource_pools.rb +68 -0
- data/lib/bosh/director/deployment_plan/stemcell.rb +56 -0
- data/lib/bosh/director/deployment_plan/template.rb +94 -0
- data/lib/bosh/director/deployment_plan/update_config.rb +80 -0
- data/lib/bosh/director/deployment_plan/updater.rb +55 -0
- data/lib/bosh/director/deployment_plan/vip_network.rb +79 -0
- data/lib/bosh/director/dns_helper.rb +204 -0
- data/lib/bosh/director/download_helper.rb +44 -0
- data/lib/bosh/director/duration.rb +36 -0
- data/lib/bosh/director/encryption_helper.rb +10 -0
- data/lib/bosh/director/errors.rb +198 -0
- data/lib/bosh/director/event_log.rb +136 -0
- data/lib/bosh/director/ext.rb +64 -0
- data/lib/bosh/director/hash_string_vals.rb +13 -0
- data/lib/bosh/director/instance_deleter.rb +109 -0
- data/lib/bosh/director/instance_updater.rb +506 -0
- data/lib/bosh/director/ip_util.rb +67 -0
- data/lib/bosh/director/job_queue.rb +16 -0
- data/lib/bosh/director/job_runner.rb +162 -0
- data/lib/bosh/director/job_updater.rb +121 -0
- data/lib/bosh/director/jobs/backup.rb +86 -0
- data/lib/bosh/director/jobs/base_job.rb +66 -0
- data/lib/bosh/director/jobs/cloud_check/apply_resolutions.rb +46 -0
- data/lib/bosh/director/jobs/cloud_check/scan.rb +38 -0
- data/lib/bosh/director/jobs/cloud_check/scan_and_fix.rb +73 -0
- data/lib/bosh/director/jobs/create_snapshot.rb +23 -0
- data/lib/bosh/director/jobs/delete_deployment.rb +183 -0
- data/lib/bosh/director/jobs/delete_deployment_snapshots.rb +34 -0
- data/lib/bosh/director/jobs/delete_release.rb +219 -0
- data/lib/bosh/director/jobs/delete_snapshots.rb +23 -0
- data/lib/bosh/director/jobs/delete_stemcell.rb +102 -0
- data/lib/bosh/director/jobs/fetch_logs.rb +99 -0
- data/lib/bosh/director/jobs/scheduled_backup.rb +38 -0
- data/lib/bosh/director/jobs/snapshot_deployment.rb +61 -0
- data/lib/bosh/director/jobs/snapshot_deployments.rb +23 -0
- data/lib/bosh/director/jobs/snapshot_self.rb +43 -0
- data/lib/bosh/director/jobs/ssh.rb +59 -0
- data/lib/bosh/director/jobs/update_deployment.rb +110 -0
- data/lib/bosh/director/jobs/update_release.rb +672 -0
- data/lib/bosh/director/jobs/update_stemcell.rb +109 -0
- data/lib/bosh/director/jobs/vm_state.rb +89 -0
- data/lib/bosh/director/lock.rb +133 -0
- data/lib/bosh/director/lock_helper.rb +92 -0
- data/lib/bosh/director/models.rb +29 -0
- data/lib/bosh/director/models/compiled_package.rb +33 -0
- data/lib/bosh/director/models/deployment.rb +22 -0
- data/lib/bosh/director/models/deployment_problem.rb +49 -0
- data/lib/bosh/director/models/deployment_property.rb +21 -0
- data/lib/bosh/director/models/director_attribute.rb +9 -0
- data/lib/bosh/director/models/dns.rb +9 -0
- data/lib/bosh/director/models/dns/domain.rb +9 -0
- data/lib/bosh/director/models/dns/record.rb +7 -0
- data/lib/bosh/director/models/helpers/model_helper.rb +7 -0
- data/lib/bosh/director/models/instance.rb +28 -0
- data/lib/bosh/director/models/log_bundle.rb +10 -0
- data/lib/bosh/director/models/package.rb +30 -0
- data/lib/bosh/director/models/persistent_disk.rb +13 -0
- data/lib/bosh/director/models/release.rb +17 -0
- data/lib/bosh/director/models/release_version.rb +16 -0
- data/lib/bosh/director/models/snapshot.rb +13 -0
- data/lib/bosh/director/models/stemcell.rb +18 -0
- data/lib/bosh/director/models/task.rb +10 -0
- data/lib/bosh/director/models/template.rb +44 -0
- data/lib/bosh/director/models/user.rb +11 -0
- data/lib/bosh/director/models/vm.rb +42 -0
- data/lib/bosh/director/nats_rpc.rb +54 -0
- data/lib/bosh/director/network_reservation.rb +121 -0
- data/lib/bosh/director/next_rebase_version.rb +20 -0
- data/lib/bosh/director/package_compiler.rb +423 -0
- data/lib/bosh/director/problem_handlers/base.rb +153 -0
- data/lib/bosh/director/problem_handlers/inactive_disk.rb +112 -0
- data/lib/bosh/director/problem_handlers/invalid_problem.rb +28 -0
- data/lib/bosh/director/problem_handlers/missing_vm.rb +34 -0
- data/lib/bosh/director/problem_handlers/mount_info_mismatch.rb +62 -0
- data/lib/bosh/director/problem_handlers/out_of_sync_vm.rb +64 -0
- data/lib/bosh/director/problem_handlers/unbound_instance_vm.rb +85 -0
- data/lib/bosh/director/problem_handlers/unresponsive_agent.rb +78 -0
- data/lib/bosh/director/problem_resolver.rb +103 -0
- data/lib/bosh/director/problem_scanner.rb +268 -0
- data/lib/bosh/director/resource_pool_updater.rb +216 -0
- data/lib/bosh/director/scheduler.rb +57 -0
- data/lib/bosh/director/sequel.rb +13 -0
- data/lib/bosh/director/tar_gzipper.rb +47 -0
- data/lib/bosh/director/task_result_file.rb +19 -0
- data/lib/bosh/director/thread_pool.rb +8 -0
- data/lib/bosh/director/validation_helper.rb +55 -0
- data/lib/bosh/director/version.rb +7 -0
- data/lib/bosh/director/vm_creator.rb +80 -0
- data/lib/bosh/director/vm_data.rb +63 -0
- data/lib/bosh/director/vm_metadata_updater.rb +29 -0
- data/lib/bosh/director/vm_reuser.rb +63 -0
- data/lib/cloud/dummy.rb +149 -0
- metadata +664 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
|
2
|
+
|
|
3
|
+
module Bosh::Director
|
|
4
|
+
# Coordinates the safe deletion of an instance and all associates resources.
|
|
5
|
+
class InstanceDeleter
|
|
6
|
+
include DnsHelper
|
|
7
|
+
|
|
8
|
+
def initialize(deployment_plan)
|
|
9
|
+
@deployment_plan = deployment_plan
|
|
10
|
+
@cloud = Config.cloud
|
|
11
|
+
@logger = Config.logger
|
|
12
|
+
@event_log = Config.event_log
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Deletes a list of instances
|
|
16
|
+
# @param [Array<Models::Instance>] instances list of instances to delete
|
|
17
|
+
# @param [Hash] options optional list of options controlling concurrency
|
|
18
|
+
# @return [void]
|
|
19
|
+
def delete_instances(instances, options = {})
|
|
20
|
+
max_threads = options[:max_threads] || Config.max_threads
|
|
21
|
+
ThreadPool.new(:max_threads => max_threads).wrap do |pool|
|
|
22
|
+
instances.each do |instance|
|
|
23
|
+
pool.process { delete_instance(instance) }
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Deletes a single instance and attached persistent disks
|
|
29
|
+
# @param [Models::Instance] instance instance to delete
|
|
30
|
+
# @return [void]
|
|
31
|
+
def delete_instance(instance)
|
|
32
|
+
vm = instance.vm
|
|
33
|
+
@event_log.track(vm.cid) do
|
|
34
|
+
@logger.info("Delete unneeded instance: #{vm.cid}")
|
|
35
|
+
|
|
36
|
+
drain(vm.agent_id)
|
|
37
|
+
@cloud.delete_vm(vm.cid)
|
|
38
|
+
delete_snapshots(instance)
|
|
39
|
+
delete_persistent_disks(instance.persistent_disks)
|
|
40
|
+
delete_dns(instance.job, instance.index)
|
|
41
|
+
|
|
42
|
+
vm.db.transaction do
|
|
43
|
+
instance.destroy
|
|
44
|
+
vm.destroy
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Drain the instance
|
|
50
|
+
# @param [String] agent_id agent id
|
|
51
|
+
# @return [void]
|
|
52
|
+
def drain(agent_id)
|
|
53
|
+
agent = AgentClient.new(agent_id)
|
|
54
|
+
|
|
55
|
+
drain_time = agent.drain("shutdown")
|
|
56
|
+
if drain_time < 0
|
|
57
|
+
drain_time = drain_time.abs
|
|
58
|
+
begin
|
|
59
|
+
Config.job_cancelled?
|
|
60
|
+
@logger.info("Drain - check back in #{drain_time} seconds")
|
|
61
|
+
sleep(drain_time)
|
|
62
|
+
drain_time = agent.drain("status")
|
|
63
|
+
rescue => e
|
|
64
|
+
@logger.warn("Failed to check drain-status: #{e.inspect}")
|
|
65
|
+
raise if e.kind_of?(Bosh::Director::TaskCancelled)
|
|
66
|
+
break
|
|
67
|
+
end while drain_time > 0
|
|
68
|
+
else
|
|
69
|
+
sleep(drain_time)
|
|
70
|
+
end
|
|
71
|
+
agent.stop
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def delete_snapshots(instance)
|
|
75
|
+
snapshots = instance.persistent_disks.map { |disk| disk.snapshots }.flatten
|
|
76
|
+
Bosh::Director::Api::SnapshotManager.delete_snapshots(snapshots)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Delete persistent disks
|
|
80
|
+
# @param [Array<Model::PersistentDisk>] persistent_disks disks
|
|
81
|
+
# @return [void]
|
|
82
|
+
def delete_persistent_disks(persistent_disks)
|
|
83
|
+
persistent_disks.each do |disk|
|
|
84
|
+
@logger.info("Deleting disk: `#{disk.disk_cid}', " +
|
|
85
|
+
"#{disk.active ? "active" : "inactive"}")
|
|
86
|
+
begin
|
|
87
|
+
@cloud.delete_disk(disk.disk_cid)
|
|
88
|
+
rescue Bosh::Clouds::DiskNotFound => e
|
|
89
|
+
@logger.warn("Disk not found: #{disk.disk_cid}")
|
|
90
|
+
raise if disk.active
|
|
91
|
+
end
|
|
92
|
+
disk.destroy
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Deletes the DNS records
|
|
97
|
+
# @param [String] job job name
|
|
98
|
+
# @param [Numeric] index job index
|
|
99
|
+
# @return [void]
|
|
100
|
+
def delete_dns(job, index)
|
|
101
|
+
if Config.dns_enabled?
|
|
102
|
+
record_pattern = [index, canonical(job), "%",
|
|
103
|
+
@deployment_plan.canonical_name, dns_domain_name].join(".")
|
|
104
|
+
delete_dns_records(record_pattern, @deployment_plan.dns_domain.id)
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
# Copyright (c) 2009-2012 VMware, Inc.
|
|
2
|
+
|
|
3
|
+
module Bosh::Director
|
|
4
|
+
class InstanceUpdater
|
|
5
|
+
include DnsHelper
|
|
6
|
+
|
|
7
|
+
MAX_ATTACH_DISK_TRIES = 3
|
|
8
|
+
UPDATE_STEPS = 7
|
|
9
|
+
WATCH_INTERVALS = 10
|
|
10
|
+
|
|
11
|
+
attr_reader :current_state
|
|
12
|
+
|
|
13
|
+
# @params [DeploymentPlan::Instance] instance
|
|
14
|
+
def initialize(instance, event_ticker = nil)
|
|
15
|
+
@cloud = Config.cloud
|
|
16
|
+
@logger = Config.logger
|
|
17
|
+
@ticker = event_ticker
|
|
18
|
+
|
|
19
|
+
@instance = instance
|
|
20
|
+
@job = instance.job
|
|
21
|
+
|
|
22
|
+
@target_state = @instance.state
|
|
23
|
+
|
|
24
|
+
@deployment_plan = @job.deployment
|
|
25
|
+
@resource_pool_spec = @job.resource_pool
|
|
26
|
+
@update_config = @job.update
|
|
27
|
+
|
|
28
|
+
@vm = @instance.model.vm
|
|
29
|
+
|
|
30
|
+
@current_state = {}
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def instance_name
|
|
34
|
+
"#{@job.name}/#{@instance.index}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def step
|
|
38
|
+
yield
|
|
39
|
+
report_progress
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def report_progress
|
|
43
|
+
@ticker.advance(100.0 / update_steps()) if @ticker
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def update_steps
|
|
47
|
+
@instance.job_changed? || @instance.packages_changed? ? UPDATE_STEPS + 1 : UPDATE_STEPS
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def update(options = {})
|
|
51
|
+
@canary = options.fetch(:canary, false)
|
|
52
|
+
|
|
53
|
+
@logger.info("Updating instance #{@instance}, changes: #{@instance.changes.to_a.join(', ')}")
|
|
54
|
+
|
|
55
|
+
# Optimization to only update DNS if nothing else changed.
|
|
56
|
+
if dns_change_only?
|
|
57
|
+
update_dns
|
|
58
|
+
return
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
step { stop }
|
|
62
|
+
step { take_snapshot }
|
|
63
|
+
|
|
64
|
+
if @target_state == "detached"
|
|
65
|
+
detach_disk
|
|
66
|
+
delete_vm
|
|
67
|
+
@resource_pool_spec.add_idle_vm
|
|
68
|
+
return
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
step { update_resource_pool }
|
|
72
|
+
step { update_networks }
|
|
73
|
+
step { update_dns }
|
|
74
|
+
step { update_persistent_disk }
|
|
75
|
+
|
|
76
|
+
VmMetadataUpdater.build.update(@vm, {})
|
|
77
|
+
|
|
78
|
+
step { apply_state(@instance.spec) }
|
|
79
|
+
|
|
80
|
+
start! if need_start?
|
|
81
|
+
|
|
82
|
+
step { wait_until_running }
|
|
83
|
+
|
|
84
|
+
if @target_state == "started" && current_state["job_state"] != "running"
|
|
85
|
+
raise AgentJobNotRunning, "`#{instance_name}' is not running after update"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
if @target_state == "stopped" && current_state["job_state"] == "running"
|
|
89
|
+
raise AgentJobNotStopped, "`#{instance_name}' is still running despite the stop command"
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Watch times don't include the get_state roundtrip time, so effective
|
|
94
|
+
# max watch time is roughly:
|
|
95
|
+
# max_watch_time + N_WATCH_INTERVALS * avg_roundtrip_time
|
|
96
|
+
def wait_until_running
|
|
97
|
+
watch_schedule(min_watch_time, max_watch_time).each do |watch_time|
|
|
98
|
+
sleep_time = watch_time.to_f / 1000
|
|
99
|
+
@logger.info("Waiting for #{sleep_time} seconds to check #{instance_name} status")
|
|
100
|
+
sleep(sleep_time)
|
|
101
|
+
@logger.info("Checking if #{instance_name} has been updated after #{sleep_time} seconds")
|
|
102
|
+
|
|
103
|
+
@current_state = agent.get_state
|
|
104
|
+
|
|
105
|
+
if @target_state == "started"
|
|
106
|
+
break if current_state["job_state"] == "running"
|
|
107
|
+
elsif @target_state == "stopped"
|
|
108
|
+
break if current_state["job_state"] != "running"
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def start!
|
|
114
|
+
agent.start
|
|
115
|
+
rescue RuntimeError => e
|
|
116
|
+
# FIXME: this is somewhat ghetto: we don't have a good way to
|
|
117
|
+
# negotiate on BOSH protocol between director and agent (yet),
|
|
118
|
+
# so updating from agent version that doesn't support 'start' RPC
|
|
119
|
+
# to the one that does might be hard. Right now we decided to
|
|
120
|
+
# just swallow the exception.
|
|
121
|
+
# This needs to be removed in one of the following cases:
|
|
122
|
+
# 1. BOSH protocol handshake gets implemented
|
|
123
|
+
# 2. All agents updated to support 'start' RPC
|
|
124
|
+
# and we no longer care about backward compatibility.
|
|
125
|
+
@logger.warn("Agent start raised an exception: #{e.inspect}, ignoring for compatibility")
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def need_start?
|
|
129
|
+
@target_state == 'started'
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def dns_change_only?
|
|
133
|
+
@instance.changes.include?(:dns) && @instance.changes.size == 1
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def stop
|
|
137
|
+
drain_time = shutting_down? ? agent.drain("shutdown") : agent.drain("update", @instance.spec)
|
|
138
|
+
|
|
139
|
+
if drain_time > 0
|
|
140
|
+
sleep(drain_time)
|
|
141
|
+
else
|
|
142
|
+
wait_for_dynamic_drain(drain_time)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
agent.stop
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def wait_for_dynamic_drain(initial_drain_time)
|
|
149
|
+
drain_time = initial_drain_time
|
|
150
|
+
loop do
|
|
151
|
+
# This could go on forever if drain script is broken, canceling the task is a way out.
|
|
152
|
+
Config.task_checkpoint
|
|
153
|
+
|
|
154
|
+
wait_time = drain_time.abs
|
|
155
|
+
if wait_time > 0
|
|
156
|
+
@logger.info("`#{@instance}' is draining: checking back in #{wait_time}s")
|
|
157
|
+
sleep(wait_time)
|
|
158
|
+
end
|
|
159
|
+
# Positive number always means last drain call:
|
|
160
|
+
break if drain_time >= 0
|
|
161
|
+
|
|
162
|
+
# We used to ignore exceptions from drain status for compatibility
|
|
163
|
+
# with older agents but it doesn't need to happen anymore, as
|
|
164
|
+
# realistically speaking, all agents have already been updated
|
|
165
|
+
# to support drain status mechanism and swallowing real errors
|
|
166
|
+
# would be bad here, as it could mask potential problems.
|
|
167
|
+
drain_time = agent.drain("status")
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def take_snapshot
|
|
172
|
+
Api::SnapshotManager.take_snapshot(@instance.model, clean: true)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def delete_snapshots(disk)
|
|
176
|
+
Api::SnapshotManager.delete_snapshots(disk.snapshots)
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def detach_disk
|
|
180
|
+
return unless @instance.disk_currently_attached?
|
|
181
|
+
|
|
182
|
+
if @instance.model.persistent_disk_cid.nil?
|
|
183
|
+
raise AgentUnexpectedDisk,
|
|
184
|
+
"`#{instance_name}' VM has disk attached " +
|
|
185
|
+
"but it's not reflected in director DB"
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
agent.unmount_disk(@instance.model.persistent_disk_cid)
|
|
189
|
+
@cloud.detach_disk(@vm.cid, @instance.model.persistent_disk_cid)
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def attach_disk
|
|
193
|
+
return if @instance.model.persistent_disk_cid.nil?
|
|
194
|
+
|
|
195
|
+
@cloud.attach_disk(@vm.cid, @instance.model.persistent_disk_cid)
|
|
196
|
+
agent.mount_disk(@instance.model.persistent_disk_cid)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def delete_vm
|
|
200
|
+
@cloud.delete_vm(@vm.cid)
|
|
201
|
+
|
|
202
|
+
@instance.model.db.transaction do
|
|
203
|
+
@instance.model.vm = nil
|
|
204
|
+
@instance.model.save
|
|
205
|
+
@vm.destroy
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
def create_vm(new_disk_id)
|
|
210
|
+
stemcell = @resource_pool_spec.stemcell
|
|
211
|
+
disks = [@instance.model.persistent_disk_cid, new_disk_id].compact
|
|
212
|
+
|
|
213
|
+
@vm = VmCreator.create(@deployment_plan.model, stemcell.model,
|
|
214
|
+
@resource_pool_spec.cloud_properties,
|
|
215
|
+
@instance.network_settings, disks,
|
|
216
|
+
@resource_pool_spec.env)
|
|
217
|
+
@instance.model.vm = @vm
|
|
218
|
+
@instance.model.save
|
|
219
|
+
|
|
220
|
+
agent.wait_until_ready
|
|
221
|
+
rescue => e
|
|
222
|
+
if @vm
|
|
223
|
+
@logger.error("error during create_vm(), deleting vm #{@vm.cid}")
|
|
224
|
+
delete_vm
|
|
225
|
+
end
|
|
226
|
+
raise e
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def apply_state(state)
|
|
230
|
+
@vm.update(:apply_spec => state)
|
|
231
|
+
agent.apply(state)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Retrieve list of mounted disks from the agent
|
|
235
|
+
# @return [Array<String>] list of disk CIDs
|
|
236
|
+
def disk_info
|
|
237
|
+
return @disk_list if @disk_list
|
|
238
|
+
|
|
239
|
+
begin
|
|
240
|
+
@disk_list = agent.list_disk
|
|
241
|
+
rescue RuntimeError
|
|
242
|
+
# old agents don't support list_disk rpc
|
|
243
|
+
[@instance.persistent_disk_cid]
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
def delete_disk(disk, vm_cid)
|
|
248
|
+
disk_cid = disk.disk_cid
|
|
249
|
+
# Unmount the disk only if disk is known by the agent
|
|
250
|
+
if agent && disk_info.include?(disk_cid)
|
|
251
|
+
agent.unmount_disk(disk_cid)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
begin
|
|
255
|
+
@cloud.detach_disk(vm_cid, disk_cid) if vm_cid
|
|
256
|
+
rescue Bosh::Clouds::DiskNotAttached
|
|
257
|
+
if disk.active
|
|
258
|
+
raise CloudDiskNotAttached,
|
|
259
|
+
"`#{instance_name}' VM should have persistent disk attached " +
|
|
260
|
+
"but it doesn't (according to CPI)"
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
delete_snapshots(disk)
|
|
265
|
+
|
|
266
|
+
begin
|
|
267
|
+
@cloud.delete_disk(disk_cid)
|
|
268
|
+
rescue Bosh::Clouds::DiskNotFound
|
|
269
|
+
if disk.active
|
|
270
|
+
raise CloudDiskMissing,
|
|
271
|
+
"Disk `#{disk_cid}' is missing according to CPI but marked " +
|
|
272
|
+
"as active in DB"
|
|
273
|
+
end
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
disk.destroy
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
def update_dns
|
|
280
|
+
return unless @instance.dns_changed?
|
|
281
|
+
|
|
282
|
+
domain = @deployment_plan.dns_domain
|
|
283
|
+
@instance.dns_record_info.each do |record_name, ip_address|
|
|
284
|
+
@logger.info("Updating DNS for: #{record_name} to #{ip_address}")
|
|
285
|
+
update_dns_a_record(domain, record_name, ip_address)
|
|
286
|
+
update_dns_ptr_record(record_name, ip_address)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def update_resource_pool(new_disk_cid = nil)
|
|
291
|
+
return unless @instance.resource_pool_changed? || new_disk_cid
|
|
292
|
+
|
|
293
|
+
detach_disk
|
|
294
|
+
num_retries = 0
|
|
295
|
+
begin
|
|
296
|
+
delete_vm
|
|
297
|
+
create_vm(new_disk_cid)
|
|
298
|
+
attach_disk
|
|
299
|
+
rescue Bosh::Clouds::NoDiskSpace => e
|
|
300
|
+
if e.ok_to_retry && num_retries < MAX_ATTACH_DISK_TRIES
|
|
301
|
+
num_retries += 1
|
|
302
|
+
@logger.warn("Retrying attach disk operation #{num_retries}")
|
|
303
|
+
retry
|
|
304
|
+
end
|
|
305
|
+
@logger.warn("Giving up on attach disk operation")
|
|
306
|
+
e.ok_to_retry = false
|
|
307
|
+
raise CloudNotEnoughDiskSpace,
|
|
308
|
+
"Not enough disk space to update `#{instance_name}'"
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
state = {
|
|
312
|
+
"deployment" => @deployment_plan.name,
|
|
313
|
+
"networks" => @instance.network_settings,
|
|
314
|
+
"resource_pool" => @job.resource_pool.spec,
|
|
315
|
+
"job" => @job.spec,
|
|
316
|
+
"index" => @instance.index,
|
|
317
|
+
"release" => @job.release.spec
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
if @instance.disk_size > 0
|
|
321
|
+
state["persistent_disk"] = @instance.disk_size
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# if we have a failure above the new VM doesn't get any state,
|
|
325
|
+
# which makes it impossible to recreate it
|
|
326
|
+
apply_state(state)
|
|
327
|
+
@instance.current_state = agent.get_state
|
|
328
|
+
end
|
|
329
|
+
|
|
330
|
+
def attach_missing_disk
|
|
331
|
+
if @instance.model.persistent_disk_cid &&
|
|
332
|
+
!@instance.disk_currently_attached?
|
|
333
|
+
attach_disk
|
|
334
|
+
end
|
|
335
|
+
rescue Bosh::Clouds::NoDiskSpace => e
|
|
336
|
+
update_resource_pool(@instance.model.persistent_disk_cid)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# Synchronizes persistent_disks with the agent.
|
|
340
|
+
#
|
|
341
|
+
# NOTE: Currently assumes that we only have 1 persistent disk.
|
|
342
|
+
# @return [void]
|
|
343
|
+
def check_persistent_disk
|
|
344
|
+
return if @instance.model.persistent_disks.empty?
|
|
345
|
+
agent_disk_cid = disk_info.first
|
|
346
|
+
|
|
347
|
+
if agent_disk_cid != @instance.model.persistent_disk_cid
|
|
348
|
+
raise AgentDiskOutOfSync,
|
|
349
|
+
"`#{instance_name}' has invalid disks: agent reports " +
|
|
350
|
+
"`#{agent_disk_cid}' while director record shows " +
|
|
351
|
+
"`#{@instance.model.persistent_disk_cid}'"
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
@instance.model.persistent_disks.each do |disk|
|
|
355
|
+
unless disk.active
|
|
356
|
+
@logger.warn("`#{instance_name}' has inactive disk #{disk.disk_cid}")
|
|
357
|
+
end
|
|
358
|
+
end
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
def update_persistent_disk
|
|
362
|
+
# CLEANUP FIXME
|
|
363
|
+
# [olegs] Error cleanup should be performed AFTER logic cleanup, I can't
|
|
364
|
+
# event comprehend this method.
|
|
365
|
+
attach_missing_disk
|
|
366
|
+
check_persistent_disk
|
|
367
|
+
|
|
368
|
+
disk_cid = nil
|
|
369
|
+
disk = nil
|
|
370
|
+
return unless @instance.persistent_disk_changed?
|
|
371
|
+
|
|
372
|
+
old_disk = @instance.model.persistent_disk
|
|
373
|
+
|
|
374
|
+
if @job.persistent_disk > 0
|
|
375
|
+
@instance.model.db.transaction do
|
|
376
|
+
disk_cid = @cloud.create_disk(@job.persistent_disk, @vm.cid)
|
|
377
|
+
disk =
|
|
378
|
+
Models::PersistentDisk.create(:disk_cid => disk_cid,
|
|
379
|
+
:active => false,
|
|
380
|
+
:instance_id => @instance.model.id,
|
|
381
|
+
:size => @job.persistent_disk)
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
begin
|
|
385
|
+
@cloud.attach_disk(@vm.cid, disk_cid)
|
|
386
|
+
rescue Bosh::Clouds::NoDiskSpace => e
|
|
387
|
+
if e.ok_to_retry
|
|
388
|
+
@logger.warn("Retrying attach disk operation " +
|
|
389
|
+
"after persistent disk update failed")
|
|
390
|
+
# Recreate the vm
|
|
391
|
+
update_resource_pool(disk_cid)
|
|
392
|
+
begin
|
|
393
|
+
@cloud.attach_disk(@vm.cid, disk_cid)
|
|
394
|
+
rescue
|
|
395
|
+
@cloud.delete_disk(disk_cid)
|
|
396
|
+
disk.destroy
|
|
397
|
+
raise
|
|
398
|
+
end
|
|
399
|
+
else
|
|
400
|
+
@cloud.delete_disk(disk_cid)
|
|
401
|
+
disk.destroy
|
|
402
|
+
raise
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
|
|
406
|
+
begin
|
|
407
|
+
agent.mount_disk(disk_cid)
|
|
408
|
+
agent.migrate_disk(old_disk.disk_cid, disk_cid) if old_disk
|
|
409
|
+
rescue
|
|
410
|
+
delete_disk(disk, @vm.cid)
|
|
411
|
+
raise
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
@instance.model.db.transaction do
|
|
416
|
+
old_disk.update(:active => false) if old_disk
|
|
417
|
+
disk.update(:active => true) if disk
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
delete_disk(old_disk, @vm.cid) if old_disk
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
def update_networks
|
|
424
|
+
return unless @instance.networks_changed?
|
|
425
|
+
|
|
426
|
+
network_settings = @instance.network_settings
|
|
427
|
+
|
|
428
|
+
begin
|
|
429
|
+
# If configure_networks can't configure the network as
|
|
430
|
+
# requested, e.g. when the security groups change on AWS,
|
|
431
|
+
# configure_networks() will raise an exception and we'll
|
|
432
|
+
# recreate the VM to work around it
|
|
433
|
+
@cloud.configure_networks(@vm.cid, network_settings)
|
|
434
|
+
rescue Bosh::Clouds::NotSupported => e
|
|
435
|
+
@logger.info("configure_networks not supported: #{e.message}")
|
|
436
|
+
@instance.recreate = true
|
|
437
|
+
update_resource_pool
|
|
438
|
+
return
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Once CPI has configured the vm and stored the new network settings at the registry,
|
|
442
|
+
# we restart the agent via a 'prepare_network_change' message in order for the agent
|
|
443
|
+
# to pick up the new network settings.
|
|
444
|
+
agent.prepare_network_change(network_settings)
|
|
445
|
+
|
|
446
|
+
# Give some time to the agent to restart before pinging if it's ready (race condition)
|
|
447
|
+
sleep(5)
|
|
448
|
+
|
|
449
|
+
agent.wait_until_ready
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
def agent
|
|
453
|
+
if @agent && @agent.id == @vm.agent_id
|
|
454
|
+
@agent
|
|
455
|
+
else
|
|
456
|
+
if @vm.agent_id.nil?
|
|
457
|
+
raise VmAgentIdMissing, "VM #{@vm.id} is missing agent id"
|
|
458
|
+
end
|
|
459
|
+
@agent = AgentClient.new(@vm.agent_id)
|
|
460
|
+
end
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
def generate_agent_id
|
|
464
|
+
SecureRandom.uuid
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# Returns an array of wait times distributed
|
|
468
|
+
# on the [min_watch_time..max_watch_time] interval.
|
|
469
|
+
#
|
|
470
|
+
# Tries to respect intervals but doesn't allow an interval to
|
|
471
|
+
# fall under 1 second.
|
|
472
|
+
# All times are in milliseconds.
|
|
473
|
+
# @param [Numeric] min_watch_time minimum time to watch the jobs
|
|
474
|
+
# @param [Numeric] max_watch_time maximum time to watch the jobs
|
|
475
|
+
# @param [Numeric] intervals number of intervals between polling
|
|
476
|
+
# the state of the jobs
|
|
477
|
+
# @return [Array<Numeric>] watch schedule
|
|
478
|
+
def watch_schedule(min_watch_time, max_watch_time, intervals = WATCH_INTERVALS)
|
|
479
|
+
delta = (max_watch_time - min_watch_time).to_f
|
|
480
|
+
step = [1000, delta / (intervals - 1)].max
|
|
481
|
+
|
|
482
|
+
[min_watch_time] + ([step] * (delta / step).floor)
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
# @return [Boolean] Is instance shutting down for this update?
|
|
486
|
+
def shutting_down?
|
|
487
|
+
@instance.resource_pool_changed? ||
|
|
488
|
+
@instance.persistent_disk_changed? ||
|
|
489
|
+
@instance.networks_changed? ||
|
|
490
|
+
@target_state == "stopped" ||
|
|
491
|
+
@target_state == "detached"
|
|
492
|
+
end
|
|
493
|
+
|
|
494
|
+
def min_watch_time
|
|
495
|
+
canary? ? @update_config.min_canary_watch_time : @update_config.min_update_watch_time
|
|
496
|
+
end
|
|
497
|
+
|
|
498
|
+
def max_watch_time
|
|
499
|
+
canary? ? @update_config.max_canary_watch_time : @update_config.max_update_watch_time
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
def canary?
|
|
503
|
+
@canary
|
|
504
|
+
end
|
|
505
|
+
end
|
|
506
|
+
end
|