bosh-director 1.5.0.pre.1113

Sign up to get free protection for your applications and to get access to all the features.
Files changed (180) hide show
  1. data/CHANGELOG +34 -0
  2. data/bin/bosh-director +36 -0
  3. data/bin/bosh-director-console +84 -0
  4. data/bin/bosh-director-drain-workers +42 -0
  5. data/bin/bosh-director-migrate +58 -0
  6. data/bin/bosh-director-scheduler +27 -0
  7. data/bin/bosh-director-worker +76 -0
  8. data/db/migrations/README +1 -0
  9. data/db/migrations/director/20110209010747_initial.rb +118 -0
  10. data/db/migrations/director/20110406055800_add_task_user.rb +9 -0
  11. data/db/migrations/director/20110518225809_remove_cid_constrain.rb +13 -0
  12. data/db/migrations/director/20110617211923_add_deployments_release_versions.rb +32 -0
  13. data/db/migrations/director/20110622212607_add_task_checkpoint_timestamp.rb +9 -0
  14. data/db/migrations/director/20110628023039_add_state_to_instances.rb +21 -0
  15. data/db/migrations/director/20110709012332_add_disk_size_to_instances.rb +9 -0
  16. data/db/migrations/director/20110906183441_add_log_bundles.rb +11 -0
  17. data/db/migrations/director/20110907194830_add_logs_json_to_templates.rb +9 -0
  18. data/db/migrations/director/20110915205610_add_persistent_disks.rb +51 -0
  19. data/db/migrations/director/20111005180929_add_properties.rb +14 -0
  20. data/db/migrations/director/20111110024617_add_deployment_problems.rb +24 -0
  21. data/db/migrations/director/20111216214145_recreate_support_for_vms.rb +9 -0
  22. data/db/migrations/director/20120102084027_add_credentials_to_vms.rb +7 -0
  23. data/db/migrations/director/20120427235217_allow_multiple_releases_per_deployment.rb +36 -0
  24. data/db/migrations/director/20120524175805_add_task_type.rb +44 -0
  25. data/db/migrations/director/20120614001930_delete_redundant_deployment_release_relation.rb +34 -0
  26. data/db/migrations/director/20120822004528_add_fingerprint_to_templates_and_packages.rb +17 -0
  27. data/db/migrations/director/20120830191244_add_properties_to_templates.rb +9 -0
  28. data/db/migrations/director/20121106190739_persist_vm_env.rb +9 -0
  29. data/db/migrations/director/20130222232131_add_sha1_to_stemcells.rb +9 -0
  30. data/db/migrations/director/20130312211407_add_commit_hash_to_release_versions.rb +19 -0
  31. data/db/migrations/director/20130409235338_snapshot.rb +15 -0
  32. data/db/migrations/director/20130530164918_add_paused_flag_to_instance.rb +14 -0
  33. data/db/migrations/director/20130531172604_add_director_attributes.rb +13 -0
  34. data/db/migrations/dns/20120123234908_initial.rb +27 -0
  35. data/lib/bosh/director.rb +133 -0
  36. data/lib/bosh/director/agent_client.rb +78 -0
  37. data/lib/bosh/director/api.rb +29 -0
  38. data/lib/bosh/director/api/api_helper.rb +81 -0
  39. data/lib/bosh/director/api/backup_manager.rb +15 -0
  40. data/lib/bosh/director/api/controller.rb +639 -0
  41. data/lib/bosh/director/api/controller_helpers.rb +34 -0
  42. data/lib/bosh/director/api/deployment_lookup.rb +13 -0
  43. data/lib/bosh/director/api/deployment_manager.rb +60 -0
  44. data/lib/bosh/director/api/http_constants.rb +16 -0
  45. data/lib/bosh/director/api/instance_lookup.rb +44 -0
  46. data/lib/bosh/director/api/instance_manager.rb +63 -0
  47. data/lib/bosh/director/api/problem_manager.rb +40 -0
  48. data/lib/bosh/director/api/property_manager.rb +69 -0
  49. data/lib/bosh/director/api/release_manager.rb +59 -0
  50. data/lib/bosh/director/api/resource_manager.rb +69 -0
  51. data/lib/bosh/director/api/resurrector_manager.rb +15 -0
  52. data/lib/bosh/director/api/snapshot_manager.rb +94 -0
  53. data/lib/bosh/director/api/stemcell_manager.rb +50 -0
  54. data/lib/bosh/director/api/task_helper.rb +46 -0
  55. data/lib/bosh/director/api/task_manager.rb +64 -0
  56. data/lib/bosh/director/api/user_manager.rb +72 -0
  57. data/lib/bosh/director/api/vm_state_manager.rb +11 -0
  58. data/lib/bosh/director/app.rb +35 -0
  59. data/lib/bosh/director/blob_util.rb +87 -0
  60. data/lib/bosh/director/blobstores.rb +29 -0
  61. data/lib/bosh/director/client.rb +156 -0
  62. data/lib/bosh/director/cloudcheck_helper.rb +204 -0
  63. data/lib/bosh/director/compile_task.rb +157 -0
  64. data/lib/bosh/director/config.rb +370 -0
  65. data/lib/bosh/director/configuration_hasher.rb +114 -0
  66. data/lib/bosh/director/cycle_helper.rb +36 -0
  67. data/lib/bosh/director/db_backup.rb +22 -0
  68. data/lib/bosh/director/db_backup/adapter.rb +3 -0
  69. data/lib/bosh/director/db_backup/adapter/mysql2.rb +27 -0
  70. data/lib/bosh/director/db_backup/adapter/postgres.rb +36 -0
  71. data/lib/bosh/director/db_backup/adapter/sqlite.rb +17 -0
  72. data/lib/bosh/director/db_backup/error.rb +10 -0
  73. data/lib/bosh/director/deployment_plan.rb +26 -0
  74. data/lib/bosh/director/deployment_plan/assembler.rb +430 -0
  75. data/lib/bosh/director/deployment_plan/compilation_config.rb +54 -0
  76. data/lib/bosh/director/deployment_plan/compiled_package.rb +35 -0
  77. data/lib/bosh/director/deployment_plan/dynamic_network.rb +91 -0
  78. data/lib/bosh/director/deployment_plan/idle_vm.rb +109 -0
  79. data/lib/bosh/director/deployment_plan/instance.rb +413 -0
  80. data/lib/bosh/director/deployment_plan/job.rb +470 -0
  81. data/lib/bosh/director/deployment_plan/manual_network.rb +137 -0
  82. data/lib/bosh/director/deployment_plan/network.rb +74 -0
  83. data/lib/bosh/director/deployment_plan/network_subnet.rb +167 -0
  84. data/lib/bosh/director/deployment_plan/planner.rb +288 -0
  85. data/lib/bosh/director/deployment_plan/preparer.rb +52 -0
  86. data/lib/bosh/director/deployment_plan/release.rb +126 -0
  87. data/lib/bosh/director/deployment_plan/resource_pool.rb +143 -0
  88. data/lib/bosh/director/deployment_plan/resource_pools.rb +68 -0
  89. data/lib/bosh/director/deployment_plan/stemcell.rb +56 -0
  90. data/lib/bosh/director/deployment_plan/template.rb +94 -0
  91. data/lib/bosh/director/deployment_plan/update_config.rb +80 -0
  92. data/lib/bosh/director/deployment_plan/updater.rb +55 -0
  93. data/lib/bosh/director/deployment_plan/vip_network.rb +79 -0
  94. data/lib/bosh/director/dns_helper.rb +204 -0
  95. data/lib/bosh/director/download_helper.rb +44 -0
  96. data/lib/bosh/director/duration.rb +36 -0
  97. data/lib/bosh/director/encryption_helper.rb +10 -0
  98. data/lib/bosh/director/errors.rb +198 -0
  99. data/lib/bosh/director/event_log.rb +136 -0
  100. data/lib/bosh/director/ext.rb +64 -0
  101. data/lib/bosh/director/hash_string_vals.rb +13 -0
  102. data/lib/bosh/director/instance_deleter.rb +109 -0
  103. data/lib/bosh/director/instance_updater.rb +506 -0
  104. data/lib/bosh/director/ip_util.rb +67 -0
  105. data/lib/bosh/director/job_queue.rb +16 -0
  106. data/lib/bosh/director/job_runner.rb +162 -0
  107. data/lib/bosh/director/job_updater.rb +121 -0
  108. data/lib/bosh/director/jobs/backup.rb +86 -0
  109. data/lib/bosh/director/jobs/base_job.rb +66 -0
  110. data/lib/bosh/director/jobs/cloud_check/apply_resolutions.rb +46 -0
  111. data/lib/bosh/director/jobs/cloud_check/scan.rb +38 -0
  112. data/lib/bosh/director/jobs/cloud_check/scan_and_fix.rb +73 -0
  113. data/lib/bosh/director/jobs/create_snapshot.rb +23 -0
  114. data/lib/bosh/director/jobs/delete_deployment.rb +183 -0
  115. data/lib/bosh/director/jobs/delete_deployment_snapshots.rb +34 -0
  116. data/lib/bosh/director/jobs/delete_release.rb +219 -0
  117. data/lib/bosh/director/jobs/delete_snapshots.rb +23 -0
  118. data/lib/bosh/director/jobs/delete_stemcell.rb +102 -0
  119. data/lib/bosh/director/jobs/fetch_logs.rb +99 -0
  120. data/lib/bosh/director/jobs/scheduled_backup.rb +38 -0
  121. data/lib/bosh/director/jobs/snapshot_deployment.rb +61 -0
  122. data/lib/bosh/director/jobs/snapshot_deployments.rb +23 -0
  123. data/lib/bosh/director/jobs/snapshot_self.rb +43 -0
  124. data/lib/bosh/director/jobs/ssh.rb +59 -0
  125. data/lib/bosh/director/jobs/update_deployment.rb +110 -0
  126. data/lib/bosh/director/jobs/update_release.rb +672 -0
  127. data/lib/bosh/director/jobs/update_stemcell.rb +109 -0
  128. data/lib/bosh/director/jobs/vm_state.rb +89 -0
  129. data/lib/bosh/director/lock.rb +133 -0
  130. data/lib/bosh/director/lock_helper.rb +92 -0
  131. data/lib/bosh/director/models.rb +29 -0
  132. data/lib/bosh/director/models/compiled_package.rb +33 -0
  133. data/lib/bosh/director/models/deployment.rb +22 -0
  134. data/lib/bosh/director/models/deployment_problem.rb +49 -0
  135. data/lib/bosh/director/models/deployment_property.rb +21 -0
  136. data/lib/bosh/director/models/director_attribute.rb +9 -0
  137. data/lib/bosh/director/models/dns.rb +9 -0
  138. data/lib/bosh/director/models/dns/domain.rb +9 -0
  139. data/lib/bosh/director/models/dns/record.rb +7 -0
  140. data/lib/bosh/director/models/helpers/model_helper.rb +7 -0
  141. data/lib/bosh/director/models/instance.rb +28 -0
  142. data/lib/bosh/director/models/log_bundle.rb +10 -0
  143. data/lib/bosh/director/models/package.rb +30 -0
  144. data/lib/bosh/director/models/persistent_disk.rb +13 -0
  145. data/lib/bosh/director/models/release.rb +17 -0
  146. data/lib/bosh/director/models/release_version.rb +16 -0
  147. data/lib/bosh/director/models/snapshot.rb +13 -0
  148. data/lib/bosh/director/models/stemcell.rb +18 -0
  149. data/lib/bosh/director/models/task.rb +10 -0
  150. data/lib/bosh/director/models/template.rb +44 -0
  151. data/lib/bosh/director/models/user.rb +11 -0
  152. data/lib/bosh/director/models/vm.rb +42 -0
  153. data/lib/bosh/director/nats_rpc.rb +54 -0
  154. data/lib/bosh/director/network_reservation.rb +121 -0
  155. data/lib/bosh/director/next_rebase_version.rb +20 -0
  156. data/lib/bosh/director/package_compiler.rb +423 -0
  157. data/lib/bosh/director/problem_handlers/base.rb +153 -0
  158. data/lib/bosh/director/problem_handlers/inactive_disk.rb +112 -0
  159. data/lib/bosh/director/problem_handlers/invalid_problem.rb +28 -0
  160. data/lib/bosh/director/problem_handlers/missing_vm.rb +34 -0
  161. data/lib/bosh/director/problem_handlers/mount_info_mismatch.rb +62 -0
  162. data/lib/bosh/director/problem_handlers/out_of_sync_vm.rb +64 -0
  163. data/lib/bosh/director/problem_handlers/unbound_instance_vm.rb +85 -0
  164. data/lib/bosh/director/problem_handlers/unresponsive_agent.rb +78 -0
  165. data/lib/bosh/director/problem_resolver.rb +103 -0
  166. data/lib/bosh/director/problem_scanner.rb +268 -0
  167. data/lib/bosh/director/resource_pool_updater.rb +216 -0
  168. data/lib/bosh/director/scheduler.rb +57 -0
  169. data/lib/bosh/director/sequel.rb +13 -0
  170. data/lib/bosh/director/tar_gzipper.rb +47 -0
  171. data/lib/bosh/director/task_result_file.rb +19 -0
  172. data/lib/bosh/director/thread_pool.rb +8 -0
  173. data/lib/bosh/director/validation_helper.rb +55 -0
  174. data/lib/bosh/director/version.rb +7 -0
  175. data/lib/bosh/director/vm_creator.rb +80 -0
  176. data/lib/bosh/director/vm_data.rb +63 -0
  177. data/lib/bosh/director/vm_metadata_updater.rb +29 -0
  178. data/lib/bosh/director/vm_reuser.rb +63 -0
  179. data/lib/cloud/dummy.rb +149 -0
  180. metadata +664 -0
@@ -0,0 +1,13 @@
1
+ module Bosh::Director
2
+
3
+ module_function
4
+
5
+ # Replace values for keys in a hash with their to_s.
6
+ def hash_string_vals(h, *keys)
7
+ keys.each do |k|
8
+ h[k] = h[k].to_s
9
+ end
10
+ h
11
+ end
12
+
13
+ end
@@ -0,0 +1,109 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ module Bosh::Director
4
+ # Coordinates the safe deletion of an instance and all associates resources.
5
+ class InstanceDeleter
6
+ include DnsHelper
7
+
8
+ def initialize(deployment_plan)
9
+ @deployment_plan = deployment_plan
10
+ @cloud = Config.cloud
11
+ @logger = Config.logger
12
+ @event_log = Config.event_log
13
+ end
14
+
15
+ # Deletes a list of instances
16
+ # @param [Array<Models::Instance>] instances list of instances to delete
17
+ # @param [Hash] options optional list of options controlling concurrency
18
+ # @return [void]
19
+ def delete_instances(instances, options = {})
20
+ max_threads = options[:max_threads] || Config.max_threads
21
+ ThreadPool.new(:max_threads => max_threads).wrap do |pool|
22
+ instances.each do |instance|
23
+ pool.process { delete_instance(instance) }
24
+ end
25
+ end
26
+ end
27
+
28
+ # Deletes a single instance and attached persistent disks
29
+ # @param [Models::Instance] instance instance to delete
30
+ # @return [void]
31
+ def delete_instance(instance)
32
+ vm = instance.vm
33
+ @event_log.track(vm.cid) do
34
+ @logger.info("Delete unneeded instance: #{vm.cid}")
35
+
36
+ drain(vm.agent_id)
37
+ @cloud.delete_vm(vm.cid)
38
+ delete_snapshots(instance)
39
+ delete_persistent_disks(instance.persistent_disks)
40
+ delete_dns(instance.job, instance.index)
41
+
42
+ vm.db.transaction do
43
+ instance.destroy
44
+ vm.destroy
45
+ end
46
+ end
47
+ end
48
+
49
+ # Drain the instance
50
+ # @param [String] agent_id agent id
51
+ # @return [void]
52
+ def drain(agent_id)
53
+ agent = AgentClient.new(agent_id)
54
+
55
+ drain_time = agent.drain("shutdown")
56
+ if drain_time < 0
57
+ drain_time = drain_time.abs
58
+ begin
59
+ Config.job_cancelled?
60
+ @logger.info("Drain - check back in #{drain_time} seconds")
61
+ sleep(drain_time)
62
+ drain_time = agent.drain("status")
63
+ rescue => e
64
+ @logger.warn("Failed to check drain-status: #{e.inspect}")
65
+ raise if e.kind_of?(Bosh::Director::TaskCancelled)
66
+ break
67
+ end while drain_time > 0
68
+ else
69
+ sleep(drain_time)
70
+ end
71
+ agent.stop
72
+ end
73
+
74
+ def delete_snapshots(instance)
75
+ snapshots = instance.persistent_disks.map { |disk| disk.snapshots }.flatten
76
+ Bosh::Director::Api::SnapshotManager.delete_snapshots(snapshots)
77
+ end
78
+
79
+ # Delete persistent disks
80
+ # @param [Array<Model::PersistentDisk>] persistent_disks disks
81
+ # @return [void]
82
+ def delete_persistent_disks(persistent_disks)
83
+ persistent_disks.each do |disk|
84
+ @logger.info("Deleting disk: `#{disk.disk_cid}', " +
85
+ "#{disk.active ? "active" : "inactive"}")
86
+ begin
87
+ @cloud.delete_disk(disk.disk_cid)
88
+ rescue Bosh::Clouds::DiskNotFound => e
89
+ @logger.warn("Disk not found: #{disk.disk_cid}")
90
+ raise if disk.active
91
+ end
92
+ disk.destroy
93
+ end
94
+ end
95
+
96
+ # Deletes the DNS records
97
+ # @param [String] job job name
98
+ # @param [Numeric] index job index
99
+ # @return [void]
100
+ def delete_dns(job, index)
101
+ if Config.dns_enabled?
102
+ record_pattern = [index, canonical(job), "%",
103
+ @deployment_plan.canonical_name, dns_domain_name].join(".")
104
+ delete_dns_records(record_pattern, @deployment_plan.dns_domain.id)
105
+ end
106
+ end
107
+
108
+ end
109
+ end
@@ -0,0 +1,506 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ module Bosh::Director
4
+ class InstanceUpdater
5
+ include DnsHelper
6
+
7
+ MAX_ATTACH_DISK_TRIES = 3
8
+ UPDATE_STEPS = 7
9
+ WATCH_INTERVALS = 10
10
+
11
+ attr_reader :current_state
12
+
13
+ # @params [DeploymentPlan::Instance] instance
14
+ def initialize(instance, event_ticker = nil)
15
+ @cloud = Config.cloud
16
+ @logger = Config.logger
17
+ @ticker = event_ticker
18
+
19
+ @instance = instance
20
+ @job = instance.job
21
+
22
+ @target_state = @instance.state
23
+
24
+ @deployment_plan = @job.deployment
25
+ @resource_pool_spec = @job.resource_pool
26
+ @update_config = @job.update
27
+
28
+ @vm = @instance.model.vm
29
+
30
+ @current_state = {}
31
+ end
32
+
33
+ def instance_name
34
+ "#{@job.name}/#{@instance.index}"
35
+ end
36
+
37
+ def step
38
+ yield
39
+ report_progress
40
+ end
41
+
42
+ def report_progress
43
+ @ticker.advance(100.0 / update_steps()) if @ticker
44
+ end
45
+
46
+ def update_steps
47
+ @instance.job_changed? || @instance.packages_changed? ? UPDATE_STEPS + 1 : UPDATE_STEPS
48
+ end
49
+
50
+ def update(options = {})
51
+ @canary = options.fetch(:canary, false)
52
+
53
+ @logger.info("Updating instance #{@instance}, changes: #{@instance.changes.to_a.join(', ')}")
54
+
55
+ # Optimization to only update DNS if nothing else changed.
56
+ if dns_change_only?
57
+ update_dns
58
+ return
59
+ end
60
+
61
+ step { stop }
62
+ step { take_snapshot }
63
+
64
+ if @target_state == "detached"
65
+ detach_disk
66
+ delete_vm
67
+ @resource_pool_spec.add_idle_vm
68
+ return
69
+ end
70
+
71
+ step { update_resource_pool }
72
+ step { update_networks }
73
+ step { update_dns }
74
+ step { update_persistent_disk }
75
+
76
+ VmMetadataUpdater.build.update(@vm, {})
77
+
78
+ step { apply_state(@instance.spec) }
79
+
80
+ start! if need_start?
81
+
82
+ step { wait_until_running }
83
+
84
+ if @target_state == "started" && current_state["job_state"] != "running"
85
+ raise AgentJobNotRunning, "`#{instance_name}' is not running after update"
86
+ end
87
+
88
+ if @target_state == "stopped" && current_state["job_state"] == "running"
89
+ raise AgentJobNotStopped, "`#{instance_name}' is still running despite the stop command"
90
+ end
91
+ end
92
+
93
+ # Watch times don't include the get_state roundtrip time, so effective
94
+ # max watch time is roughly:
95
+ # max_watch_time + N_WATCH_INTERVALS * avg_roundtrip_time
96
+ def wait_until_running
97
+ watch_schedule(min_watch_time, max_watch_time).each do |watch_time|
98
+ sleep_time = watch_time.to_f / 1000
99
+ @logger.info("Waiting for #{sleep_time} seconds to check #{instance_name} status")
100
+ sleep(sleep_time)
101
+ @logger.info("Checking if #{instance_name} has been updated after #{sleep_time} seconds")
102
+
103
+ @current_state = agent.get_state
104
+
105
+ if @target_state == "started"
106
+ break if current_state["job_state"] == "running"
107
+ elsif @target_state == "stopped"
108
+ break if current_state["job_state"] != "running"
109
+ end
110
+ end
111
+ end
112
+
113
+ def start!
114
+ agent.start
115
+ rescue RuntimeError => e
116
+ # FIXME: this is somewhat ghetto: we don't have a good way to
117
+ # negotiate on BOSH protocol between director and agent (yet),
118
+ # so updating from agent version that doesn't support 'start' RPC
119
+ # to the one that does might be hard. Right now we decided to
120
+ # just swallow the exception.
121
+ # This needs to be removed in one of the following cases:
122
+ # 1. BOSH protocol handshake gets implemented
123
+ # 2. All agents updated to support 'start' RPC
124
+ # and we no longer care about backward compatibility.
125
+ @logger.warn("Agent start raised an exception: #{e.inspect}, ignoring for compatibility")
126
+ end
127
+
128
+ def need_start?
129
+ @target_state == 'started'
130
+ end
131
+
132
+ def dns_change_only?
133
+ @instance.changes.include?(:dns) && @instance.changes.size == 1
134
+ end
135
+
136
+ def stop
137
+ drain_time = shutting_down? ? agent.drain("shutdown") : agent.drain("update", @instance.spec)
138
+
139
+ if drain_time > 0
140
+ sleep(drain_time)
141
+ else
142
+ wait_for_dynamic_drain(drain_time)
143
+ end
144
+
145
+ agent.stop
146
+ end
147
+
148
+ def wait_for_dynamic_drain(initial_drain_time)
149
+ drain_time = initial_drain_time
150
+ loop do
151
+ # This could go on forever if drain script is broken, canceling the task is a way out.
152
+ Config.task_checkpoint
153
+
154
+ wait_time = drain_time.abs
155
+ if wait_time > 0
156
+ @logger.info("`#{@instance}' is draining: checking back in #{wait_time}s")
157
+ sleep(wait_time)
158
+ end
159
+ # Positive number always means last drain call:
160
+ break if drain_time >= 0
161
+
162
+ # We used to ignore exceptions from drain status for compatibility
163
+ # with older agents but it doesn't need to happen anymore, as
164
+ # realistically speaking, all agents have already been updated
165
+ # to support drain status mechanism and swallowing real errors
166
+ # would be bad here, as it could mask potential problems.
167
+ drain_time = agent.drain("status")
168
+ end
169
+ end
170
+
171
+ def take_snapshot
172
+ Api::SnapshotManager.take_snapshot(@instance.model, clean: true)
173
+ end
174
+
175
+ def delete_snapshots(disk)
176
+ Api::SnapshotManager.delete_snapshots(disk.snapshots)
177
+ end
178
+
179
+ def detach_disk
180
+ return unless @instance.disk_currently_attached?
181
+
182
+ if @instance.model.persistent_disk_cid.nil?
183
+ raise AgentUnexpectedDisk,
184
+ "`#{instance_name}' VM has disk attached " +
185
+ "but it's not reflected in director DB"
186
+ end
187
+
188
+ agent.unmount_disk(@instance.model.persistent_disk_cid)
189
+ @cloud.detach_disk(@vm.cid, @instance.model.persistent_disk_cid)
190
+ end
191
+
192
+ def attach_disk
193
+ return if @instance.model.persistent_disk_cid.nil?
194
+
195
+ @cloud.attach_disk(@vm.cid, @instance.model.persistent_disk_cid)
196
+ agent.mount_disk(@instance.model.persistent_disk_cid)
197
+ end
198
+
199
+ def delete_vm
200
+ @cloud.delete_vm(@vm.cid)
201
+
202
+ @instance.model.db.transaction do
203
+ @instance.model.vm = nil
204
+ @instance.model.save
205
+ @vm.destroy
206
+ end
207
+ end
208
+
209
+ def create_vm(new_disk_id)
210
+ stemcell = @resource_pool_spec.stemcell
211
+ disks = [@instance.model.persistent_disk_cid, new_disk_id].compact
212
+
213
+ @vm = VmCreator.create(@deployment_plan.model, stemcell.model,
214
+ @resource_pool_spec.cloud_properties,
215
+ @instance.network_settings, disks,
216
+ @resource_pool_spec.env)
217
+ @instance.model.vm = @vm
218
+ @instance.model.save
219
+
220
+ agent.wait_until_ready
221
+ rescue => e
222
+ if @vm
223
+ @logger.error("error during create_vm(), deleting vm #{@vm.cid}")
224
+ delete_vm
225
+ end
226
+ raise e
227
+ end
228
+
229
+ def apply_state(state)
230
+ @vm.update(:apply_spec => state)
231
+ agent.apply(state)
232
+ end
233
+
234
+ # Retrieve list of mounted disks from the agent
235
+ # @return [Array<String>] list of disk CIDs
236
+ def disk_info
237
+ return @disk_list if @disk_list
238
+
239
+ begin
240
+ @disk_list = agent.list_disk
241
+ rescue RuntimeError
242
+ # old agents don't support list_disk rpc
243
+ [@instance.persistent_disk_cid]
244
+ end
245
+ end
246
+
247
+ def delete_disk(disk, vm_cid)
248
+ disk_cid = disk.disk_cid
249
+ # Unmount the disk only if disk is known by the agent
250
+ if agent && disk_info.include?(disk_cid)
251
+ agent.unmount_disk(disk_cid)
252
+ end
253
+
254
+ begin
255
+ @cloud.detach_disk(vm_cid, disk_cid) if vm_cid
256
+ rescue Bosh::Clouds::DiskNotAttached
257
+ if disk.active
258
+ raise CloudDiskNotAttached,
259
+ "`#{instance_name}' VM should have persistent disk attached " +
260
+ "but it doesn't (according to CPI)"
261
+ end
262
+ end
263
+
264
+ delete_snapshots(disk)
265
+
266
+ begin
267
+ @cloud.delete_disk(disk_cid)
268
+ rescue Bosh::Clouds::DiskNotFound
269
+ if disk.active
270
+ raise CloudDiskMissing,
271
+ "Disk `#{disk_cid}' is missing according to CPI but marked " +
272
+ "as active in DB"
273
+ end
274
+ end
275
+
276
+ disk.destroy
277
+ end
278
+
279
+ def update_dns
280
+ return unless @instance.dns_changed?
281
+
282
+ domain = @deployment_plan.dns_domain
283
+ @instance.dns_record_info.each do |record_name, ip_address|
284
+ @logger.info("Updating DNS for: #{record_name} to #{ip_address}")
285
+ update_dns_a_record(domain, record_name, ip_address)
286
+ update_dns_ptr_record(record_name, ip_address)
287
+ end
288
+ end
289
+
290
+ def update_resource_pool(new_disk_cid = nil)
291
+ return unless @instance.resource_pool_changed? || new_disk_cid
292
+
293
+ detach_disk
294
+ num_retries = 0
295
+ begin
296
+ delete_vm
297
+ create_vm(new_disk_cid)
298
+ attach_disk
299
+ rescue Bosh::Clouds::NoDiskSpace => e
300
+ if e.ok_to_retry && num_retries < MAX_ATTACH_DISK_TRIES
301
+ num_retries += 1
302
+ @logger.warn("Retrying attach disk operation #{num_retries}")
303
+ retry
304
+ end
305
+ @logger.warn("Giving up on attach disk operation")
306
+ e.ok_to_retry = false
307
+ raise CloudNotEnoughDiskSpace,
308
+ "Not enough disk space to update `#{instance_name}'"
309
+ end
310
+
311
+ state = {
312
+ "deployment" => @deployment_plan.name,
313
+ "networks" => @instance.network_settings,
314
+ "resource_pool" => @job.resource_pool.spec,
315
+ "job" => @job.spec,
316
+ "index" => @instance.index,
317
+ "release" => @job.release.spec
318
+ }
319
+
320
+ if @instance.disk_size > 0
321
+ state["persistent_disk"] = @instance.disk_size
322
+ end
323
+
324
+ # if we have a failure above the new VM doesn't get any state,
325
+ # which makes it impossible to recreate it
326
+ apply_state(state)
327
+ @instance.current_state = agent.get_state
328
+ end
329
+
330
+ def attach_missing_disk
331
+ if @instance.model.persistent_disk_cid &&
332
+ !@instance.disk_currently_attached?
333
+ attach_disk
334
+ end
335
+ rescue Bosh::Clouds::NoDiskSpace => e
336
+ update_resource_pool(@instance.model.persistent_disk_cid)
337
+ end
338
+
339
+ # Synchronizes persistent_disks with the agent.
340
+ #
341
+ # NOTE: Currently assumes that we only have 1 persistent disk.
342
+ # @return [void]
343
+ def check_persistent_disk
344
+ return if @instance.model.persistent_disks.empty?
345
+ agent_disk_cid = disk_info.first
346
+
347
+ if agent_disk_cid != @instance.model.persistent_disk_cid
348
+ raise AgentDiskOutOfSync,
349
+ "`#{instance_name}' has invalid disks: agent reports " +
350
+ "`#{agent_disk_cid}' while director record shows " +
351
+ "`#{@instance.model.persistent_disk_cid}'"
352
+ end
353
+
354
+ @instance.model.persistent_disks.each do |disk|
355
+ unless disk.active
356
+ @logger.warn("`#{instance_name}' has inactive disk #{disk.disk_cid}")
357
+ end
358
+ end
359
+ end
360
+
361
+ def update_persistent_disk
362
+ # CLEANUP FIXME
363
+ # [olegs] Error cleanup should be performed AFTER logic cleanup, I can't
364
+ # event comprehend this method.
365
+ attach_missing_disk
366
+ check_persistent_disk
367
+
368
+ disk_cid = nil
369
+ disk = nil
370
+ return unless @instance.persistent_disk_changed?
371
+
372
+ old_disk = @instance.model.persistent_disk
373
+
374
+ if @job.persistent_disk > 0
375
+ @instance.model.db.transaction do
376
+ disk_cid = @cloud.create_disk(@job.persistent_disk, @vm.cid)
377
+ disk =
378
+ Models::PersistentDisk.create(:disk_cid => disk_cid,
379
+ :active => false,
380
+ :instance_id => @instance.model.id,
381
+ :size => @job.persistent_disk)
382
+ end
383
+
384
+ begin
385
+ @cloud.attach_disk(@vm.cid, disk_cid)
386
+ rescue Bosh::Clouds::NoDiskSpace => e
387
+ if e.ok_to_retry
388
+ @logger.warn("Retrying attach disk operation " +
389
+ "after persistent disk update failed")
390
+ # Recreate the vm
391
+ update_resource_pool(disk_cid)
392
+ begin
393
+ @cloud.attach_disk(@vm.cid, disk_cid)
394
+ rescue
395
+ @cloud.delete_disk(disk_cid)
396
+ disk.destroy
397
+ raise
398
+ end
399
+ else
400
+ @cloud.delete_disk(disk_cid)
401
+ disk.destroy
402
+ raise
403
+ end
404
+ end
405
+
406
+ begin
407
+ agent.mount_disk(disk_cid)
408
+ agent.migrate_disk(old_disk.disk_cid, disk_cid) if old_disk
409
+ rescue
410
+ delete_disk(disk, @vm.cid)
411
+ raise
412
+ end
413
+ end
414
+
415
+ @instance.model.db.transaction do
416
+ old_disk.update(:active => false) if old_disk
417
+ disk.update(:active => true) if disk
418
+ end
419
+
420
+ delete_disk(old_disk, @vm.cid) if old_disk
421
+ end
422
+
423
+ def update_networks
424
+ return unless @instance.networks_changed?
425
+
426
+ network_settings = @instance.network_settings
427
+
428
+ begin
429
+ # If configure_networks can't configure the network as
430
+ # requested, e.g. when the security groups change on AWS,
431
+ # configure_networks() will raise an exception and we'll
432
+ # recreate the VM to work around it
433
+ @cloud.configure_networks(@vm.cid, network_settings)
434
+ rescue Bosh::Clouds::NotSupported => e
435
+ @logger.info("configure_networks not supported: #{e.message}")
436
+ @instance.recreate = true
437
+ update_resource_pool
438
+ return
439
+ end
440
+
441
+ # Once CPI has configured the vm and stored the new network settings at the registry,
442
+ # we restart the agent via a 'prepare_network_change' message in order for the agent
443
+ # to pick up the new network settings.
444
+ agent.prepare_network_change(network_settings)
445
+
446
+ # Give some time to the agent to restart before pinging if it's ready (race condition)
447
+ sleep(5)
448
+
449
+ agent.wait_until_ready
450
+ end
451
+
452
+ def agent
453
+ if @agent && @agent.id == @vm.agent_id
454
+ @agent
455
+ else
456
+ if @vm.agent_id.nil?
457
+ raise VmAgentIdMissing, "VM #{@vm.id} is missing agent id"
458
+ end
459
+ @agent = AgentClient.new(@vm.agent_id)
460
+ end
461
+ end
462
+
463
+ def generate_agent_id
464
+ SecureRandom.uuid
465
+ end
466
+
467
+ # Returns an array of wait times distributed
468
+ # on the [min_watch_time..max_watch_time] interval.
469
+ #
470
+ # Tries to respect intervals but doesn't allow an interval to
471
+ # fall under 1 second.
472
+ # All times are in milliseconds.
473
+ # @param [Numeric] min_watch_time minimum time to watch the jobs
474
+ # @param [Numeric] max_watch_time maximum time to watch the jobs
475
+ # @param [Numeric] intervals number of intervals between polling
476
+ # the state of the jobs
477
+ # @return [Array<Numeric>] watch schedule
478
+ def watch_schedule(min_watch_time, max_watch_time, intervals = WATCH_INTERVALS)
479
+ delta = (max_watch_time - min_watch_time).to_f
480
+ step = [1000, delta / (intervals - 1)].max
481
+
482
+ [min_watch_time] + ([step] * (delta / step).floor)
483
+ end
484
+
485
+ # @return [Boolean] Is instance shutting down for this update?
486
+ def shutting_down?
487
+ @instance.resource_pool_changed? ||
488
+ @instance.persistent_disk_changed? ||
489
+ @instance.networks_changed? ||
490
+ @target_state == "stopped" ||
491
+ @target_state == "detached"
492
+ end
493
+
494
+ def min_watch_time
495
+ canary? ? @update_config.min_canary_watch_time : @update_config.min_update_watch_time
496
+ end
497
+
498
+ def max_watch_time
499
+ canary? ? @update_config.max_canary_watch_time : @update_config.max_update_watch_time
500
+ end
501
+
502
+ def canary?
503
+ @canary
504
+ end
505
+ end
506
+ end