bosh-director 1.5.0.pre.1113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. data/CHANGELOG +34 -0
  2. data/bin/bosh-director +36 -0
  3. data/bin/bosh-director-console +84 -0
  4. data/bin/bosh-director-drain-workers +42 -0
  5. data/bin/bosh-director-migrate +58 -0
  6. data/bin/bosh-director-scheduler +27 -0
  7. data/bin/bosh-director-worker +76 -0
  8. data/db/migrations/README +1 -0
  9. data/db/migrations/director/20110209010747_initial.rb +118 -0
  10. data/db/migrations/director/20110406055800_add_task_user.rb +9 -0
  11. data/db/migrations/director/20110518225809_remove_cid_constrain.rb +13 -0
  12. data/db/migrations/director/20110617211923_add_deployments_release_versions.rb +32 -0
  13. data/db/migrations/director/20110622212607_add_task_checkpoint_timestamp.rb +9 -0
  14. data/db/migrations/director/20110628023039_add_state_to_instances.rb +21 -0
  15. data/db/migrations/director/20110709012332_add_disk_size_to_instances.rb +9 -0
  16. data/db/migrations/director/20110906183441_add_log_bundles.rb +11 -0
  17. data/db/migrations/director/20110907194830_add_logs_json_to_templates.rb +9 -0
  18. data/db/migrations/director/20110915205610_add_persistent_disks.rb +51 -0
  19. data/db/migrations/director/20111005180929_add_properties.rb +14 -0
  20. data/db/migrations/director/20111110024617_add_deployment_problems.rb +24 -0
  21. data/db/migrations/director/20111216214145_recreate_support_for_vms.rb +9 -0
  22. data/db/migrations/director/20120102084027_add_credentials_to_vms.rb +7 -0
  23. data/db/migrations/director/20120427235217_allow_multiple_releases_per_deployment.rb +36 -0
  24. data/db/migrations/director/20120524175805_add_task_type.rb +44 -0
  25. data/db/migrations/director/20120614001930_delete_redundant_deployment_release_relation.rb +34 -0
  26. data/db/migrations/director/20120822004528_add_fingerprint_to_templates_and_packages.rb +17 -0
  27. data/db/migrations/director/20120830191244_add_properties_to_templates.rb +9 -0
  28. data/db/migrations/director/20121106190739_persist_vm_env.rb +9 -0
  29. data/db/migrations/director/20130222232131_add_sha1_to_stemcells.rb +9 -0
  30. data/db/migrations/director/20130312211407_add_commit_hash_to_release_versions.rb +19 -0
  31. data/db/migrations/director/20130409235338_snapshot.rb +15 -0
  32. data/db/migrations/director/20130530164918_add_paused_flag_to_instance.rb +14 -0
  33. data/db/migrations/director/20130531172604_add_director_attributes.rb +13 -0
  34. data/db/migrations/dns/20120123234908_initial.rb +27 -0
  35. data/lib/bosh/director.rb +133 -0
  36. data/lib/bosh/director/agent_client.rb +78 -0
  37. data/lib/bosh/director/api.rb +29 -0
  38. data/lib/bosh/director/api/api_helper.rb +81 -0
  39. data/lib/bosh/director/api/backup_manager.rb +15 -0
  40. data/lib/bosh/director/api/controller.rb +639 -0
  41. data/lib/bosh/director/api/controller_helpers.rb +34 -0
  42. data/lib/bosh/director/api/deployment_lookup.rb +13 -0
  43. data/lib/bosh/director/api/deployment_manager.rb +60 -0
  44. data/lib/bosh/director/api/http_constants.rb +16 -0
  45. data/lib/bosh/director/api/instance_lookup.rb +44 -0
  46. data/lib/bosh/director/api/instance_manager.rb +63 -0
  47. data/lib/bosh/director/api/problem_manager.rb +40 -0
  48. data/lib/bosh/director/api/property_manager.rb +69 -0
  49. data/lib/bosh/director/api/release_manager.rb +59 -0
  50. data/lib/bosh/director/api/resource_manager.rb +69 -0
  51. data/lib/bosh/director/api/resurrector_manager.rb +15 -0
  52. data/lib/bosh/director/api/snapshot_manager.rb +94 -0
  53. data/lib/bosh/director/api/stemcell_manager.rb +50 -0
  54. data/lib/bosh/director/api/task_helper.rb +46 -0
  55. data/lib/bosh/director/api/task_manager.rb +64 -0
  56. data/lib/bosh/director/api/user_manager.rb +72 -0
  57. data/lib/bosh/director/api/vm_state_manager.rb +11 -0
  58. data/lib/bosh/director/app.rb +35 -0
  59. data/lib/bosh/director/blob_util.rb +87 -0
  60. data/lib/bosh/director/blobstores.rb +29 -0
  61. data/lib/bosh/director/client.rb +156 -0
  62. data/lib/bosh/director/cloudcheck_helper.rb +204 -0
  63. data/lib/bosh/director/compile_task.rb +157 -0
  64. data/lib/bosh/director/config.rb +370 -0
  65. data/lib/bosh/director/configuration_hasher.rb +114 -0
  66. data/lib/bosh/director/cycle_helper.rb +36 -0
  67. data/lib/bosh/director/db_backup.rb +22 -0
  68. data/lib/bosh/director/db_backup/adapter.rb +3 -0
  69. data/lib/bosh/director/db_backup/adapter/mysql2.rb +27 -0
  70. data/lib/bosh/director/db_backup/adapter/postgres.rb +36 -0
  71. data/lib/bosh/director/db_backup/adapter/sqlite.rb +17 -0
  72. data/lib/bosh/director/db_backup/error.rb +10 -0
  73. data/lib/bosh/director/deployment_plan.rb +26 -0
  74. data/lib/bosh/director/deployment_plan/assembler.rb +430 -0
  75. data/lib/bosh/director/deployment_plan/compilation_config.rb +54 -0
  76. data/lib/bosh/director/deployment_plan/compiled_package.rb +35 -0
  77. data/lib/bosh/director/deployment_plan/dynamic_network.rb +91 -0
  78. data/lib/bosh/director/deployment_plan/idle_vm.rb +109 -0
  79. data/lib/bosh/director/deployment_plan/instance.rb +413 -0
  80. data/lib/bosh/director/deployment_plan/job.rb +470 -0
  81. data/lib/bosh/director/deployment_plan/manual_network.rb +137 -0
  82. data/lib/bosh/director/deployment_plan/network.rb +74 -0
  83. data/lib/bosh/director/deployment_plan/network_subnet.rb +167 -0
  84. data/lib/bosh/director/deployment_plan/planner.rb +288 -0
  85. data/lib/bosh/director/deployment_plan/preparer.rb +52 -0
  86. data/lib/bosh/director/deployment_plan/release.rb +126 -0
  87. data/lib/bosh/director/deployment_plan/resource_pool.rb +143 -0
  88. data/lib/bosh/director/deployment_plan/resource_pools.rb +68 -0
  89. data/lib/bosh/director/deployment_plan/stemcell.rb +56 -0
  90. data/lib/bosh/director/deployment_plan/template.rb +94 -0
  91. data/lib/bosh/director/deployment_plan/update_config.rb +80 -0
  92. data/lib/bosh/director/deployment_plan/updater.rb +55 -0
  93. data/lib/bosh/director/deployment_plan/vip_network.rb +79 -0
  94. data/lib/bosh/director/dns_helper.rb +204 -0
  95. data/lib/bosh/director/download_helper.rb +44 -0
  96. data/lib/bosh/director/duration.rb +36 -0
  97. data/lib/bosh/director/encryption_helper.rb +10 -0
  98. data/lib/bosh/director/errors.rb +198 -0
  99. data/lib/bosh/director/event_log.rb +136 -0
  100. data/lib/bosh/director/ext.rb +64 -0
  101. data/lib/bosh/director/hash_string_vals.rb +13 -0
  102. data/lib/bosh/director/instance_deleter.rb +109 -0
  103. data/lib/bosh/director/instance_updater.rb +506 -0
  104. data/lib/bosh/director/ip_util.rb +67 -0
  105. data/lib/bosh/director/job_queue.rb +16 -0
  106. data/lib/bosh/director/job_runner.rb +162 -0
  107. data/lib/bosh/director/job_updater.rb +121 -0
  108. data/lib/bosh/director/jobs/backup.rb +86 -0
  109. data/lib/bosh/director/jobs/base_job.rb +66 -0
  110. data/lib/bosh/director/jobs/cloud_check/apply_resolutions.rb +46 -0
  111. data/lib/bosh/director/jobs/cloud_check/scan.rb +38 -0
  112. data/lib/bosh/director/jobs/cloud_check/scan_and_fix.rb +73 -0
  113. data/lib/bosh/director/jobs/create_snapshot.rb +23 -0
  114. data/lib/bosh/director/jobs/delete_deployment.rb +183 -0
  115. data/lib/bosh/director/jobs/delete_deployment_snapshots.rb +34 -0
  116. data/lib/bosh/director/jobs/delete_release.rb +219 -0
  117. data/lib/bosh/director/jobs/delete_snapshots.rb +23 -0
  118. data/lib/bosh/director/jobs/delete_stemcell.rb +102 -0
  119. data/lib/bosh/director/jobs/fetch_logs.rb +99 -0
  120. data/lib/bosh/director/jobs/scheduled_backup.rb +38 -0
  121. data/lib/bosh/director/jobs/snapshot_deployment.rb +61 -0
  122. data/lib/bosh/director/jobs/snapshot_deployments.rb +23 -0
  123. data/lib/bosh/director/jobs/snapshot_self.rb +43 -0
  124. data/lib/bosh/director/jobs/ssh.rb +59 -0
  125. data/lib/bosh/director/jobs/update_deployment.rb +110 -0
  126. data/lib/bosh/director/jobs/update_release.rb +672 -0
  127. data/lib/bosh/director/jobs/update_stemcell.rb +109 -0
  128. data/lib/bosh/director/jobs/vm_state.rb +89 -0
  129. data/lib/bosh/director/lock.rb +133 -0
  130. data/lib/bosh/director/lock_helper.rb +92 -0
  131. data/lib/bosh/director/models.rb +29 -0
  132. data/lib/bosh/director/models/compiled_package.rb +33 -0
  133. data/lib/bosh/director/models/deployment.rb +22 -0
  134. data/lib/bosh/director/models/deployment_problem.rb +49 -0
  135. data/lib/bosh/director/models/deployment_property.rb +21 -0
  136. data/lib/bosh/director/models/director_attribute.rb +9 -0
  137. data/lib/bosh/director/models/dns.rb +9 -0
  138. data/lib/bosh/director/models/dns/domain.rb +9 -0
  139. data/lib/bosh/director/models/dns/record.rb +7 -0
  140. data/lib/bosh/director/models/helpers/model_helper.rb +7 -0
  141. data/lib/bosh/director/models/instance.rb +28 -0
  142. data/lib/bosh/director/models/log_bundle.rb +10 -0
  143. data/lib/bosh/director/models/package.rb +30 -0
  144. data/lib/bosh/director/models/persistent_disk.rb +13 -0
  145. data/lib/bosh/director/models/release.rb +17 -0
  146. data/lib/bosh/director/models/release_version.rb +16 -0
  147. data/lib/bosh/director/models/snapshot.rb +13 -0
  148. data/lib/bosh/director/models/stemcell.rb +18 -0
  149. data/lib/bosh/director/models/task.rb +10 -0
  150. data/lib/bosh/director/models/template.rb +44 -0
  151. data/lib/bosh/director/models/user.rb +11 -0
  152. data/lib/bosh/director/models/vm.rb +42 -0
  153. data/lib/bosh/director/nats_rpc.rb +54 -0
  154. data/lib/bosh/director/network_reservation.rb +121 -0
  155. data/lib/bosh/director/next_rebase_version.rb +20 -0
  156. data/lib/bosh/director/package_compiler.rb +423 -0
  157. data/lib/bosh/director/problem_handlers/base.rb +153 -0
  158. data/lib/bosh/director/problem_handlers/inactive_disk.rb +112 -0
  159. data/lib/bosh/director/problem_handlers/invalid_problem.rb +28 -0
  160. data/lib/bosh/director/problem_handlers/missing_vm.rb +34 -0
  161. data/lib/bosh/director/problem_handlers/mount_info_mismatch.rb +62 -0
  162. data/lib/bosh/director/problem_handlers/out_of_sync_vm.rb +64 -0
  163. data/lib/bosh/director/problem_handlers/unbound_instance_vm.rb +85 -0
  164. data/lib/bosh/director/problem_handlers/unresponsive_agent.rb +78 -0
  165. data/lib/bosh/director/problem_resolver.rb +103 -0
  166. data/lib/bosh/director/problem_scanner.rb +268 -0
  167. data/lib/bosh/director/resource_pool_updater.rb +216 -0
  168. data/lib/bosh/director/scheduler.rb +57 -0
  169. data/lib/bosh/director/sequel.rb +13 -0
  170. data/lib/bosh/director/tar_gzipper.rb +47 -0
  171. data/lib/bosh/director/task_result_file.rb +19 -0
  172. data/lib/bosh/director/thread_pool.rb +8 -0
  173. data/lib/bosh/director/validation_helper.rb +55 -0
  174. data/lib/bosh/director/version.rb +7 -0
  175. data/lib/bosh/director/vm_creator.rb +80 -0
  176. data/lib/bosh/director/vm_data.rb +63 -0
  177. data/lib/bosh/director/vm_metadata_updater.rb +29 -0
  178. data/lib/bosh/director/vm_reuser.rb +63 -0
  179. data/lib/cloud/dummy.rb +149 -0
  180. metadata +664 -0
@@ -0,0 +1,13 @@
1
+ module Bosh::Director
2
+
3
+ module_function
4
+
5
+ # Replace values for keys in a hash with their to_s.
6
+ def hash_string_vals(h, *keys)
7
+ keys.each do |k|
8
+ h[k] = h[k].to_s
9
+ end
10
+ h
11
+ end
12
+
13
+ end
@@ -0,0 +1,109 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ module Bosh::Director
4
+ # Coordinates the safe deletion of an instance and all associates resources.
5
+ class InstanceDeleter
6
+ include DnsHelper
7
+
8
+ def initialize(deployment_plan)
9
+ @deployment_plan = deployment_plan
10
+ @cloud = Config.cloud
11
+ @logger = Config.logger
12
+ @event_log = Config.event_log
13
+ end
14
+
15
+ # Deletes a list of instances
16
+ # @param [Array<Models::Instance>] instances list of instances to delete
17
+ # @param [Hash] options optional list of options controlling concurrency
18
+ # @return [void]
19
+ def delete_instances(instances, options = {})
20
+ max_threads = options[:max_threads] || Config.max_threads
21
+ ThreadPool.new(:max_threads => max_threads).wrap do |pool|
22
+ instances.each do |instance|
23
+ pool.process { delete_instance(instance) }
24
+ end
25
+ end
26
+ end
27
+
28
+ # Deletes a single instance and attached persistent disks
29
+ # @param [Models::Instance] instance instance to delete
30
+ # @return [void]
31
+ def delete_instance(instance)
32
+ vm = instance.vm
33
+ @event_log.track(vm.cid) do
34
+ @logger.info("Delete unneeded instance: #{vm.cid}")
35
+
36
+ drain(vm.agent_id)
37
+ @cloud.delete_vm(vm.cid)
38
+ delete_snapshots(instance)
39
+ delete_persistent_disks(instance.persistent_disks)
40
+ delete_dns(instance.job, instance.index)
41
+
42
+ vm.db.transaction do
43
+ instance.destroy
44
+ vm.destroy
45
+ end
46
+ end
47
+ end
48
+
49
+ # Drain the instance
50
+ # @param [String] agent_id agent id
51
+ # @return [void]
52
+ def drain(agent_id)
53
+ agent = AgentClient.new(agent_id)
54
+
55
+ drain_time = agent.drain("shutdown")
56
+ if drain_time < 0
57
+ drain_time = drain_time.abs
58
+ begin
59
+ Config.job_cancelled?
60
+ @logger.info("Drain - check back in #{drain_time} seconds")
61
+ sleep(drain_time)
62
+ drain_time = agent.drain("status")
63
+ rescue => e
64
+ @logger.warn("Failed to check drain-status: #{e.inspect}")
65
+ raise if e.kind_of?(Bosh::Director::TaskCancelled)
66
+ break
67
+ end while drain_time > 0
68
+ else
69
+ sleep(drain_time)
70
+ end
71
+ agent.stop
72
+ end
73
+
74
+ def delete_snapshots(instance)
75
+ snapshots = instance.persistent_disks.map { |disk| disk.snapshots }.flatten
76
+ Bosh::Director::Api::SnapshotManager.delete_snapshots(snapshots)
77
+ end
78
+
79
+ # Delete persistent disks
80
+ # @param [Array<Model::PersistentDisk>] persistent_disks disks
81
+ # @return [void]
82
+ def delete_persistent_disks(persistent_disks)
83
+ persistent_disks.each do |disk|
84
+ @logger.info("Deleting disk: `#{disk.disk_cid}', " +
85
+ "#{disk.active ? "active" : "inactive"}")
86
+ begin
87
+ @cloud.delete_disk(disk.disk_cid)
88
+ rescue Bosh::Clouds::DiskNotFound => e
89
+ @logger.warn("Disk not found: #{disk.disk_cid}")
90
+ raise if disk.active
91
+ end
92
+ disk.destroy
93
+ end
94
+ end
95
+
96
+ # Deletes the DNS records
97
+ # @param [String] job job name
98
+ # @param [Numeric] index job index
99
+ # @return [void]
100
+ def delete_dns(job, index)
101
+ if Config.dns_enabled?
102
+ record_pattern = [index, canonical(job), "%",
103
+ @deployment_plan.canonical_name, dns_domain_name].join(".")
104
+ delete_dns_records(record_pattern, @deployment_plan.dns_domain.id)
105
+ end
106
+ end
107
+
108
+ end
109
+ end
@@ -0,0 +1,506 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ module Bosh::Director
4
+ class InstanceUpdater
5
+ include DnsHelper
6
+
7
+ MAX_ATTACH_DISK_TRIES = 3
8
+ UPDATE_STEPS = 7
9
+ WATCH_INTERVALS = 10
10
+
11
+ attr_reader :current_state
12
+
13
+ # @params [DeploymentPlan::Instance] instance
14
+ def initialize(instance, event_ticker = nil)
15
+ @cloud = Config.cloud
16
+ @logger = Config.logger
17
+ @ticker = event_ticker
18
+
19
+ @instance = instance
20
+ @job = instance.job
21
+
22
+ @target_state = @instance.state
23
+
24
+ @deployment_plan = @job.deployment
25
+ @resource_pool_spec = @job.resource_pool
26
+ @update_config = @job.update
27
+
28
+ @vm = @instance.model.vm
29
+
30
+ @current_state = {}
31
+ end
32
+
33
+ def instance_name
34
+ "#{@job.name}/#{@instance.index}"
35
+ end
36
+
37
+ def step
38
+ yield
39
+ report_progress
40
+ end
41
+
42
+ def report_progress
43
+ @ticker.advance(100.0 / update_steps()) if @ticker
44
+ end
45
+
46
+ def update_steps
47
+ @instance.job_changed? || @instance.packages_changed? ? UPDATE_STEPS + 1 : UPDATE_STEPS
48
+ end
49
+
50
+ def update(options = {})
51
+ @canary = options.fetch(:canary, false)
52
+
53
+ @logger.info("Updating instance #{@instance}, changes: #{@instance.changes.to_a.join(', ')}")
54
+
55
+ # Optimization to only update DNS if nothing else changed.
56
+ if dns_change_only?
57
+ update_dns
58
+ return
59
+ end
60
+
61
+ step { stop }
62
+ step { take_snapshot }
63
+
64
+ if @target_state == "detached"
65
+ detach_disk
66
+ delete_vm
67
+ @resource_pool_spec.add_idle_vm
68
+ return
69
+ end
70
+
71
+ step { update_resource_pool }
72
+ step { update_networks }
73
+ step { update_dns }
74
+ step { update_persistent_disk }
75
+
76
+ VmMetadataUpdater.build.update(@vm, {})
77
+
78
+ step { apply_state(@instance.spec) }
79
+
80
+ start! if need_start?
81
+
82
+ step { wait_until_running }
83
+
84
+ if @target_state == "started" && current_state["job_state"] != "running"
85
+ raise AgentJobNotRunning, "`#{instance_name}' is not running after update"
86
+ end
87
+
88
+ if @target_state == "stopped" && current_state["job_state"] == "running"
89
+ raise AgentJobNotStopped, "`#{instance_name}' is still running despite the stop command"
90
+ end
91
+ end
92
+
93
+ # Watch times don't include the get_state roundtrip time, so effective
94
+ # max watch time is roughly:
95
+ # max_watch_time + N_WATCH_INTERVALS * avg_roundtrip_time
96
+ def wait_until_running
97
+ watch_schedule(min_watch_time, max_watch_time).each do |watch_time|
98
+ sleep_time = watch_time.to_f / 1000
99
+ @logger.info("Waiting for #{sleep_time} seconds to check #{instance_name} status")
100
+ sleep(sleep_time)
101
+ @logger.info("Checking if #{instance_name} has been updated after #{sleep_time} seconds")
102
+
103
+ @current_state = agent.get_state
104
+
105
+ if @target_state == "started"
106
+ break if current_state["job_state"] == "running"
107
+ elsif @target_state == "stopped"
108
+ break if current_state["job_state"] != "running"
109
+ end
110
+ end
111
+ end
112
+
113
+ def start!
114
+ agent.start
115
+ rescue RuntimeError => e
116
+ # FIXME: this is somewhat ghetto: we don't have a good way to
117
+ # negotiate on BOSH protocol between director and agent (yet),
118
+ # so updating from agent version that doesn't support 'start' RPC
119
+ # to the one that does might be hard. Right now we decided to
120
+ # just swallow the exception.
121
+ # This needs to be removed in one of the following cases:
122
+ # 1. BOSH protocol handshake gets implemented
123
+ # 2. All agents updated to support 'start' RPC
124
+ # and we no longer care about backward compatibility.
125
+ @logger.warn("Agent start raised an exception: #{e.inspect}, ignoring for compatibility")
126
+ end
127
+
128
+ def need_start?
129
+ @target_state == 'started'
130
+ end
131
+
132
+ def dns_change_only?
133
+ @instance.changes.include?(:dns) && @instance.changes.size == 1
134
+ end
135
+
136
+ def stop
137
+ drain_time = shutting_down? ? agent.drain("shutdown") : agent.drain("update", @instance.spec)
138
+
139
+ if drain_time > 0
140
+ sleep(drain_time)
141
+ else
142
+ wait_for_dynamic_drain(drain_time)
143
+ end
144
+
145
+ agent.stop
146
+ end
147
+
148
+ def wait_for_dynamic_drain(initial_drain_time)
149
+ drain_time = initial_drain_time
150
+ loop do
151
+ # This could go on forever if drain script is broken, canceling the task is a way out.
152
+ Config.task_checkpoint
153
+
154
+ wait_time = drain_time.abs
155
+ if wait_time > 0
156
+ @logger.info("`#{@instance}' is draining: checking back in #{wait_time}s")
157
+ sleep(wait_time)
158
+ end
159
+ # Positive number always means last drain call:
160
+ break if drain_time >= 0
161
+
162
+ # We used to ignore exceptions from drain status for compatibility
163
+ # with older agents but it doesn't need to happen anymore, as
164
+ # realistically speaking, all agents have already been updated
165
+ # to support drain status mechanism and swallowing real errors
166
+ # would be bad here, as it could mask potential problems.
167
+ drain_time = agent.drain("status")
168
+ end
169
+ end
170
+
171
+ def take_snapshot
172
+ Api::SnapshotManager.take_snapshot(@instance.model, clean: true)
173
+ end
174
+
175
+ def delete_snapshots(disk)
176
+ Api::SnapshotManager.delete_snapshots(disk.snapshots)
177
+ end
178
+
179
+ def detach_disk
180
+ return unless @instance.disk_currently_attached?
181
+
182
+ if @instance.model.persistent_disk_cid.nil?
183
+ raise AgentUnexpectedDisk,
184
+ "`#{instance_name}' VM has disk attached " +
185
+ "but it's not reflected in director DB"
186
+ end
187
+
188
+ agent.unmount_disk(@instance.model.persistent_disk_cid)
189
+ @cloud.detach_disk(@vm.cid, @instance.model.persistent_disk_cid)
190
+ end
191
+
192
+ def attach_disk
193
+ return if @instance.model.persistent_disk_cid.nil?
194
+
195
+ @cloud.attach_disk(@vm.cid, @instance.model.persistent_disk_cid)
196
+ agent.mount_disk(@instance.model.persistent_disk_cid)
197
+ end
198
+
199
+ def delete_vm
200
+ @cloud.delete_vm(@vm.cid)
201
+
202
+ @instance.model.db.transaction do
203
+ @instance.model.vm = nil
204
+ @instance.model.save
205
+ @vm.destroy
206
+ end
207
+ end
208
+
209
+ def create_vm(new_disk_id)
210
+ stemcell = @resource_pool_spec.stemcell
211
+ disks = [@instance.model.persistent_disk_cid, new_disk_id].compact
212
+
213
+ @vm = VmCreator.create(@deployment_plan.model, stemcell.model,
214
+ @resource_pool_spec.cloud_properties,
215
+ @instance.network_settings, disks,
216
+ @resource_pool_spec.env)
217
+ @instance.model.vm = @vm
218
+ @instance.model.save
219
+
220
+ agent.wait_until_ready
221
+ rescue => e
222
+ if @vm
223
+ @logger.error("error during create_vm(), deleting vm #{@vm.cid}")
224
+ delete_vm
225
+ end
226
+ raise e
227
+ end
228
+
229
+ def apply_state(state)
230
+ @vm.update(:apply_spec => state)
231
+ agent.apply(state)
232
+ end
233
+
234
+ # Retrieve list of mounted disks from the agent
235
+ # @return [Array<String>] list of disk CIDs
236
+ def disk_info
237
+ return @disk_list if @disk_list
238
+
239
+ begin
240
+ @disk_list = agent.list_disk
241
+ rescue RuntimeError
242
+ # old agents don't support list_disk rpc
243
+ [@instance.persistent_disk_cid]
244
+ end
245
+ end
246
+
247
+ def delete_disk(disk, vm_cid)
248
+ disk_cid = disk.disk_cid
249
+ # Unmount the disk only if disk is known by the agent
250
+ if agent && disk_info.include?(disk_cid)
251
+ agent.unmount_disk(disk_cid)
252
+ end
253
+
254
+ begin
255
+ @cloud.detach_disk(vm_cid, disk_cid) if vm_cid
256
+ rescue Bosh::Clouds::DiskNotAttached
257
+ if disk.active
258
+ raise CloudDiskNotAttached,
259
+ "`#{instance_name}' VM should have persistent disk attached " +
260
+ "but it doesn't (according to CPI)"
261
+ end
262
+ end
263
+
264
+ delete_snapshots(disk)
265
+
266
+ begin
267
+ @cloud.delete_disk(disk_cid)
268
+ rescue Bosh::Clouds::DiskNotFound
269
+ if disk.active
270
+ raise CloudDiskMissing,
271
+ "Disk `#{disk_cid}' is missing according to CPI but marked " +
272
+ "as active in DB"
273
+ end
274
+ end
275
+
276
+ disk.destroy
277
+ end
278
+
279
+ def update_dns
280
+ return unless @instance.dns_changed?
281
+
282
+ domain = @deployment_plan.dns_domain
283
+ @instance.dns_record_info.each do |record_name, ip_address|
284
+ @logger.info("Updating DNS for: #{record_name} to #{ip_address}")
285
+ update_dns_a_record(domain, record_name, ip_address)
286
+ update_dns_ptr_record(record_name, ip_address)
287
+ end
288
+ end
289
+
290
+ def update_resource_pool(new_disk_cid = nil)
291
+ return unless @instance.resource_pool_changed? || new_disk_cid
292
+
293
+ detach_disk
294
+ num_retries = 0
295
+ begin
296
+ delete_vm
297
+ create_vm(new_disk_cid)
298
+ attach_disk
299
+ rescue Bosh::Clouds::NoDiskSpace => e
300
+ if e.ok_to_retry && num_retries < MAX_ATTACH_DISK_TRIES
301
+ num_retries += 1
302
+ @logger.warn("Retrying attach disk operation #{num_retries}")
303
+ retry
304
+ end
305
+ @logger.warn("Giving up on attach disk operation")
306
+ e.ok_to_retry = false
307
+ raise CloudNotEnoughDiskSpace,
308
+ "Not enough disk space to update `#{instance_name}'"
309
+ end
310
+
311
+ state = {
312
+ "deployment" => @deployment_plan.name,
313
+ "networks" => @instance.network_settings,
314
+ "resource_pool" => @job.resource_pool.spec,
315
+ "job" => @job.spec,
316
+ "index" => @instance.index,
317
+ "release" => @job.release.spec
318
+ }
319
+
320
+ if @instance.disk_size > 0
321
+ state["persistent_disk"] = @instance.disk_size
322
+ end
323
+
324
+ # if we have a failure above the new VM doesn't get any state,
325
+ # which makes it impossible to recreate it
326
+ apply_state(state)
327
+ @instance.current_state = agent.get_state
328
+ end
329
+
330
+ def attach_missing_disk
331
+ if @instance.model.persistent_disk_cid &&
332
+ !@instance.disk_currently_attached?
333
+ attach_disk
334
+ end
335
+ rescue Bosh::Clouds::NoDiskSpace => e
336
+ update_resource_pool(@instance.model.persistent_disk_cid)
337
+ end
338
+
339
+ # Synchronizes persistent_disks with the agent.
340
+ #
341
+ # NOTE: Currently assumes that we only have 1 persistent disk.
342
+ # @return [void]
343
+ def check_persistent_disk
344
+ return if @instance.model.persistent_disks.empty?
345
+ agent_disk_cid = disk_info.first
346
+
347
+ if agent_disk_cid != @instance.model.persistent_disk_cid
348
+ raise AgentDiskOutOfSync,
349
+ "`#{instance_name}' has invalid disks: agent reports " +
350
+ "`#{agent_disk_cid}' while director record shows " +
351
+ "`#{@instance.model.persistent_disk_cid}'"
352
+ end
353
+
354
+ @instance.model.persistent_disks.each do |disk|
355
+ unless disk.active
356
+ @logger.warn("`#{instance_name}' has inactive disk #{disk.disk_cid}")
357
+ end
358
+ end
359
+ end
360
+
361
+ def update_persistent_disk
362
+ # CLEANUP FIXME
363
+ # [olegs] Error cleanup should be performed AFTER logic cleanup, I can't
364
+ # event comprehend this method.
365
+ attach_missing_disk
366
+ check_persistent_disk
367
+
368
+ disk_cid = nil
369
+ disk = nil
370
+ return unless @instance.persistent_disk_changed?
371
+
372
+ old_disk = @instance.model.persistent_disk
373
+
374
+ if @job.persistent_disk > 0
375
+ @instance.model.db.transaction do
376
+ disk_cid = @cloud.create_disk(@job.persistent_disk, @vm.cid)
377
+ disk =
378
+ Models::PersistentDisk.create(:disk_cid => disk_cid,
379
+ :active => false,
380
+ :instance_id => @instance.model.id,
381
+ :size => @job.persistent_disk)
382
+ end
383
+
384
+ begin
385
+ @cloud.attach_disk(@vm.cid, disk_cid)
386
+ rescue Bosh::Clouds::NoDiskSpace => e
387
+ if e.ok_to_retry
388
+ @logger.warn("Retrying attach disk operation " +
389
+ "after persistent disk update failed")
390
+ # Recreate the vm
391
+ update_resource_pool(disk_cid)
392
+ begin
393
+ @cloud.attach_disk(@vm.cid, disk_cid)
394
+ rescue
395
+ @cloud.delete_disk(disk_cid)
396
+ disk.destroy
397
+ raise
398
+ end
399
+ else
400
+ @cloud.delete_disk(disk_cid)
401
+ disk.destroy
402
+ raise
403
+ end
404
+ end
405
+
406
+ begin
407
+ agent.mount_disk(disk_cid)
408
+ agent.migrate_disk(old_disk.disk_cid, disk_cid) if old_disk
409
+ rescue
410
+ delete_disk(disk, @vm.cid)
411
+ raise
412
+ end
413
+ end
414
+
415
+ @instance.model.db.transaction do
416
+ old_disk.update(:active => false) if old_disk
417
+ disk.update(:active => true) if disk
418
+ end
419
+
420
+ delete_disk(old_disk, @vm.cid) if old_disk
421
+ end
422
+
423
+ def update_networks
424
+ return unless @instance.networks_changed?
425
+
426
+ network_settings = @instance.network_settings
427
+
428
+ begin
429
+ # If configure_networks can't configure the network as
430
+ # requested, e.g. when the security groups change on AWS,
431
+ # configure_networks() will raise an exception and we'll
432
+ # recreate the VM to work around it
433
+ @cloud.configure_networks(@vm.cid, network_settings)
434
+ rescue Bosh::Clouds::NotSupported => e
435
+ @logger.info("configure_networks not supported: #{e.message}")
436
+ @instance.recreate = true
437
+ update_resource_pool
438
+ return
439
+ end
440
+
441
+ # Once CPI has configured the vm and stored the new network settings at the registry,
442
+ # we restart the agent via a 'prepare_network_change' message in order for the agent
443
+ # to pick up the new network settings.
444
+ agent.prepare_network_change(network_settings)
445
+
446
+ # Give some time to the agent to restart before pinging if it's ready (race condition)
447
+ sleep(5)
448
+
449
+ agent.wait_until_ready
450
+ end
451
+
452
+ def agent
453
+ if @agent && @agent.id == @vm.agent_id
454
+ @agent
455
+ else
456
+ if @vm.agent_id.nil?
457
+ raise VmAgentIdMissing, "VM #{@vm.id} is missing agent id"
458
+ end
459
+ @agent = AgentClient.new(@vm.agent_id)
460
+ end
461
+ end
462
+
463
+ def generate_agent_id
464
+ SecureRandom.uuid
465
+ end
466
+
467
+ # Returns an array of wait times distributed
468
+ # on the [min_watch_time..max_watch_time] interval.
469
+ #
470
+ # Tries to respect intervals but doesn't allow an interval to
471
+ # fall under 1 second.
472
+ # All times are in milliseconds.
473
+ # @param [Numeric] min_watch_time minimum time to watch the jobs
474
+ # @param [Numeric] max_watch_time maximum time to watch the jobs
475
+ # @param [Numeric] intervals number of intervals between polling
476
+ # the state of the jobs
477
+ # @return [Array<Numeric>] watch schedule
478
+ def watch_schedule(min_watch_time, max_watch_time, intervals = WATCH_INTERVALS)
479
+ delta = (max_watch_time - min_watch_time).to_f
480
+ step = [1000, delta / (intervals - 1)].max
481
+
482
+ [min_watch_time] + ([step] * (delta / step).floor)
483
+ end
484
+
485
+ # @return [Boolean] Is instance shutting down for this update?
486
+ def shutting_down?
487
+ @instance.resource_pool_changed? ||
488
+ @instance.persistent_disk_changed? ||
489
+ @instance.networks_changed? ||
490
+ @target_state == "stopped" ||
491
+ @target_state == "detached"
492
+ end
493
+
494
+ def min_watch_time
495
+ canary? ? @update_config.min_canary_watch_time : @update_config.min_update_watch_time
496
+ end
497
+
498
+ def max_watch_time
499
+ canary? ? @update_config.max_canary_watch_time : @update_config.max_update_watch_time
500
+ end
501
+
502
+ def canary?
503
+ @canary
504
+ end
505
+ end
506
+ end