bosh_aws_cpi 0.7.0 → 1.5.0.pre.1113

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. data/README.md +22 -19
  2. data/bin/bosh_aws_console +1 -13
  3. data/lib/bosh_aws_cpi.rb +1 -1
  4. data/lib/cloud/aws/aki_picker.rb +7 -7
  5. data/lib/cloud/aws/availability_zone_selector.rb +40 -0
  6. data/lib/cloud/aws/cloud.rb +359 -476
  7. data/lib/cloud/aws/dynamic_network.rb +0 -6
  8. data/lib/cloud/aws/helpers.rb +10 -68
  9. data/lib/cloud/aws/instance_manager.rb +171 -0
  10. data/lib/cloud/aws/manual_network.rb +26 -0
  11. data/lib/cloud/aws/network_configurator.rb +33 -62
  12. data/lib/cloud/aws/resource_wait.rb +189 -0
  13. data/lib/cloud/aws/stemcell.rb +68 -0
  14. data/lib/cloud/aws/stemcell_creator.rb +114 -0
  15. data/lib/cloud/aws/tag_manager.rb +30 -0
  16. data/lib/cloud/aws/version.rb +1 -1
  17. data/lib/cloud/aws/vip_network.rb +9 -7
  18. data/lib/cloud/aws.rb +11 -2
  19. data/scripts/stemcell-copy.sh +37 -0
  20. metadata +45 -81
  21. data/Rakefile +0 -50
  22. data/lib/cloud/aws/registry_client.rb +0 -109
  23. data/spec/assets/stemcell-copy +0 -31
  24. data/spec/integration/cpi_test.rb +0 -78
  25. data/spec/spec_helper.rb +0 -121
  26. data/spec/unit/aki_picker_spec.rb +0 -29
  27. data/spec/unit/attach_disk_spec.rb +0 -143
  28. data/spec/unit/cloud_spec.rb +0 -32
  29. data/spec/unit/configure_networks_spec.rb +0 -113
  30. data/spec/unit/create_disk_spec.rb +0 -73
  31. data/spec/unit/create_stemcell_spec.rb +0 -113
  32. data/spec/unit/create_vm_spec.rb +0 -249
  33. data/spec/unit/delete_disk_spec.rb +0 -34
  34. data/spec/unit/delete_stemcell_spec.rb +0 -29
  35. data/spec/unit/delete_vm_spec.rb +0 -25
  36. data/spec/unit/detach_disk_spec.rb +0 -63
  37. data/spec/unit/helpers_spec.rb +0 -64
  38. data/spec/unit/network_configurator_spec.rb +0 -57
  39. data/spec/unit/reboot_vm_spec.rb +0 -38
  40. data/spec/unit/set_vm_metadata_spec.rb +0 -30
  41. data/spec/unit/validate_deployment_spec.rb +0 -16
@@ -14,14 +14,8 @@ module Bosh::AwsCloud
14
14
  super
15
15
  end
16
16
 
17
- ##
18
- # Configures EC2 dynamic network. Right now it's a no-op,
19
- # as dynamic networks are completely managed by EC2
20
- # @param [AWS:EC2] ec2 instance EC2 client
21
- # @param [AWS::EC2::Instance] instance EC2 instance to configure
22
17
  def configure(ec2, instance)
23
18
  end
24
-
25
19
  end
26
20
  end
27
21
 
@@ -4,8 +4,6 @@ module Bosh::AwsCloud
4
4
 
5
5
  module Helpers
6
6
 
7
- DEFAULT_TIMEOUT = 3600 # seconds
8
-
9
7
  ##
10
8
  # Raises CloudError exception
11
9
  #
@@ -16,72 +14,16 @@ module Bosh::AwsCloud
16
14
  raise Bosh::Clouds::CloudError, message
17
15
  end
18
16
 
19
- def wait_resource(resource, target_state, state_method = :status,
20
- timeout = DEFAULT_TIMEOUT)
21
-
22
- started_at = Time.now
23
- failures = 0
24
-
25
- # all resources but Attachment have id
26
- desc = resource.respond_to?(:id) ? resource.id : resource.to_s
27
-
28
- loop do
29
- task_checkpoint
30
-
31
- duration = Time.now - started_at
32
-
33
- if duration > timeout
34
- cloud_error("Timed out waiting for #{desc} to be #{target_state}")
35
- end
36
-
37
- if @logger
38
- @logger.debug("Waiting for #{desc} to be #{target_state} " \
39
- "(#{duration}s)")
40
- end
41
-
42
- state = get_state_for(resource, state_method) do |error|
43
- if block_given?
44
- yield error
45
- else
46
- @logger.error("#{error.message}: #{desc}")
47
- nil
48
- end
49
- end
50
-
51
- # This is not a very strong convention, but some resources
52
- # have 'error' and 'failed' states, we probably don't want to keep
53
- # waiting if we're in these states. Alternatively we could introduce a
54
- # set of 'loop breaker' states but that doesn't seem very helpful
55
- # at the moment
56
- if state == :error || state == :failed
57
- cloud_error("#{desc} state is #{state}, expected #{target_state}")
58
- end
59
-
60
- break if state == target_state
61
-
62
- sleep(1)
63
- end
64
-
65
- if @logger
66
- total = Time.now - started_at
67
- @logger.info("#{desc} is now #{target_state}, took #{total}s")
68
- end
69
- end
70
-
71
- private
72
-
73
- def get_state_for(resource, state_method)
74
- resource.send(state_method)
75
- rescue AWS::EC2::Errors::InvalidAMIID::NotFound,
76
- AWS::EC2::Errors::InvalidInstanceID::NotFound,
77
- AWS::Core::Resource::NotFound => e
78
- # ugly workaround for AWS race conditions:
79
- # 1) sometimes when we upload a stemcell and proceed to create a VM
80
- # from it, AWS reports that the AMI is missing
81
- # 2) sometimes when we create a new EC2 instance, AWS reports that
82
- # the instance it returns is missing
83
- # in both cases we just catch the exception, wait a little and retry...
84
- yield e
17
+ def extract_security_group_names(networks_spec)
18
+ networks_spec.
19
+ values.
20
+ select { |network_spec| network_spec.has_key? "cloud_properties" }.
21
+ map { |network_spec| network_spec["cloud_properties"] }.
22
+ select { |cloud_properties| cloud_properties.has_key? "security_groups" }.
23
+ map { |cloud_properties| Array(cloud_properties["security_groups"]) }.
24
+ flatten.
25
+ sort.
26
+ uniq
85
27
  end
86
28
  end
87
29
  end
@@ -0,0 +1,171 @@
1
+ require "common/common"
2
+
3
+ module Bosh::AwsCloud
4
+ class InstanceManager
5
+ include Helpers
6
+
7
+ attr_reader :instance
8
+ attr_reader :instance_params
9
+ attr_reader :elbs
10
+
11
+ def initialize(region, registry, az_selector=nil)
12
+ @region = region
13
+ @registry = registry
14
+ @logger = Bosh::Clouds::Config.logger
15
+ @az_selector = az_selector
16
+ @instance_params = {count: 1}
17
+ end
18
+
19
+ def create(agent_id, stemcell_id, resource_pool, networks_spec, disk_locality, environment, options)
20
+ @instance_params[:image_id] = stemcell_id
21
+ @instance_params[:instance_type] = resource_pool["instance_type"]
22
+ set_user_data_parameter(networks_spec)
23
+ set_key_name_parameter(resource_pool["key_name"], options["aws"]["default_key_name"])
24
+ set_security_groups_parameter(networks_spec, options["aws"]["default_security_groups"])
25
+ set_vpc_parameters(networks_spec)
26
+ set_availability_zone_parameter(
27
+ (disk_locality || []).map { |volume_id| @region.volumes[volume_id].availability_zone.to_s },
28
+ resource_pool["availability_zone"],
29
+ (@instance_params[:subnet].availability_zone_name if @instance_params[:subnet])
30
+ )
31
+
32
+ @logger.info("Creating new instance with: #{instance_params.inspect}")
33
+
34
+ # Retry the create instance operation a couple of times if we are told that the IP
35
+ # address is in use - it can happen when the director recreates a VM and AWS
36
+ # is too slow to update its state when we have released the IP address and want to
37
+ # realocate it again.
38
+ errors = [AWS::EC2::Errors::InvalidIPAddress::InUse]
39
+ Bosh::Common.retryable(sleep: instance_create_wait_time, tries: 10, on: errors) do |tries, error|
40
+ @logger.warn("IP address was in use: #{error}") if tries > 0
41
+ @instance = @region.instances.create(instance_params)
42
+ end
43
+
44
+ # We need to wait here for the instance to be running, as if we are going to
45
+ # attach to a load balancer, the instance must be running.
46
+ # If we time out, it is because the instance never gets from state running to started,
47
+ # so we signal the director that it is ok to retry the operation. At the moment this
48
+ # forever (until the operation is cancelled by the user).
49
+ begin
50
+ ResourceWait.for_instance(instance: instance, state: :running)
51
+ rescue Bosh::Common::RetryCountExceeded => e
52
+ @logger.warn("timed out waiting for #{instance.id} to be running")
53
+ raise Bosh::Clouds::VMCreationFailed.new(true)
54
+ end
55
+
56
+ @elbs = resource_pool['elbs']
57
+ attach_to_load_balancers if elbs
58
+
59
+ instance
60
+ end
61
+
62
+ def terminate(instance_id, fast=false)
63
+ @instance = @region.instances[instance_id]
64
+
65
+ remove_from_load_balancers
66
+
67
+ instance.terminate
68
+
69
+ @logger.info("Deleting instance settings for '#{instance.id}'")
70
+ @registry.delete_settings(instance.id)
71
+
72
+ if fast
73
+ TagManager.tag(instance, "Name", "to be deleted")
74
+ @logger.info("Instance #{instance_id} marked to deletion")
75
+ return
76
+ end
77
+
78
+ begin
79
+ @logger.info("Deleting instance '#{instance.id}'")
80
+ ResourceWait.for_instance(instance: instance, state: :terminated)
81
+ rescue AWS::EC2::Errors::InvalidInstanceID::NotFound
82
+ # It's OK, just means that instance has already been deleted
83
+ end
84
+ end
85
+
86
+ # Soft reboots EC2 instance
87
+ # @param [String] instance_id EC2 instance id
88
+ def reboot(instance_id)
89
+ instance = @region.instances[instance_id]
90
+
91
+ # There is no trackable status change for the instance being
92
+ # rebooted, so it's up to CPI client to keep track of agent
93
+ # being ready after reboot.
94
+ # Due to this, we can't deregister the instance from any load
95
+ # balancers it might be attached to, and reattach once the
96
+ # reboot is complete, so we just have to let the load balancers
97
+ # take the instance out of rotation, and put it back in once it
98
+ # is back up again.
99
+ instance.reboot
100
+ end
101
+
102
+ def attach_to_load_balancers
103
+ elb = AWS::ELB.new
104
+
105
+ elbs.each do |load_balancer|
106
+ lb = elb.load_balancers[load_balancer]
107
+ lb.instances.register(instance)
108
+ end
109
+ end
110
+
111
+ # Determines if the instance exists.
112
+ # @param [String] instance_id EC2 instance id
113
+ def has_instance?(instance_id)
114
+ instance = @region.instances[instance_id]
115
+
116
+ instance.exists? && instance.status != :terminated
117
+ end
118
+
119
+ def remove_from_load_balancers
120
+ elb = AWS::ELB.new
121
+
122
+ elb.load_balancers.each do |load_balancer|
123
+ begin
124
+ load_balancer.instances.deregister(instance)
125
+ rescue AWS::ELB::Errors::InvalidInstance
126
+ # ignore this, as it just means it wasn't registered
127
+ end
128
+ end
129
+ end
130
+
131
+ def set_key_name_parameter(resource_pool_key_name, default_aws_key_name)
132
+ key_name = resource_pool_key_name || default_aws_key_name
133
+ instance_params[:key_name] = key_name unless key_name.nil?
134
+ end
135
+
136
+ def set_security_groups_parameter(networks_spec, default_security_groups)
137
+ security_group_names = extract_security_group_names(networks_spec)
138
+ if security_group_names.empty?
139
+ instance_params[:security_groups] = default_security_groups
140
+ else
141
+ instance_params[:security_groups] = security_group_names
142
+ end
143
+ end
144
+
145
+ def set_vpc_parameters(network_spec)
146
+ manual_network_spec = network_spec.values.select { |spec| ["manual", nil].include? spec["type"] }.first
147
+ if manual_network_spec
148
+ instance_params[:subnet] = @region.subnets[manual_network_spec["cloud_properties"]["subnet"]]
149
+ instance_params[:private_ip_address] = manual_network_spec["ip"]
150
+ end
151
+ end
152
+
153
+ def set_availability_zone_parameter(volume_zones, resource_pool_zone, subnet_zone)
154
+ availability_zone = @az_selector.common_availability_zone(volume_zones, resource_pool_zone, subnet_zone)
155
+ instance_params[:availability_zone] = availability_zone if availability_zone
156
+ end
157
+
158
+ def set_user_data_parameter(networks_spec)
159
+ user_data = {registry: {endpoint: @registry.endpoint}}
160
+
161
+ spec_with_dns = networks_spec.values.select { |spec| spec.has_key? "dns" }.first
162
+ user_data[:dns] = {nameserver: spec_with_dns["dns"]} if spec_with_dns
163
+
164
+ @instance_params[:user_data] = Yajl::Encoder.encode(user_data)
165
+ end
166
+
167
+ private
168
+
169
+ def instance_create_wait_time; 30; end
170
+ end
171
+ end
@@ -0,0 +1,26 @@
1
+ module Bosh::AwsCloud
2
+ ##
3
+ #
4
+ class ManualNetwork < Network
5
+
6
+ attr_reader :subnet
7
+
8
+ # create manual network
9
+ # @param [String] name Network name
10
+ # @param [Hash] spec Raw network spec
11
+ def initialize(name, spec)
12
+ super
13
+ if @cloud_properties.nil? || !@cloud_properties.has_key?("subnet")
14
+ raise Bosh::Clouds::CloudError, "subnet required for manual network"
15
+ end
16
+ @subnet = @cloud_properties["subnet"]
17
+ end
18
+
19
+ def private_ip
20
+ @ip
21
+ end
22
+
23
+ def configure(ec2, instance)
24
+ end
25
+ end
26
+ end
@@ -12,11 +12,12 @@ module Bosh::AwsCloud
12
12
  class NetworkConfigurator
13
13
  include Helpers
14
14
 
15
+ attr_reader :vip_network, :network
16
+
15
17
  ##
16
18
  # Creates new network spec
17
19
  #
18
20
  # @param [Hash] spec raw network spec passed by director
19
- # TODO Add network configuration examples
20
21
  def initialize(spec)
21
22
  unless spec.is_a?(Hash)
22
23
  raise ArgumentError, "Invalid spec, Hash expected, " \
@@ -24,46 +25,52 @@ module Bosh::AwsCloud
24
25
  end
25
26
 
26
27
  @logger = Bosh::Clouds::Config.logger
27
- @dynamic_network = nil
28
+ @network = nil
28
29
  @vip_network = nil
29
- @security_groups = []
30
30
 
31
- spec.each_pair do |name, spec|
32
- network_type = spec["type"]
31
+ spec.each_pair do |name, network_spec|
32
+ network_type = network_spec["type"] || "manual"
33
33
 
34
34
  case network_type
35
- when "dynamic"
36
- if @dynamic_network
37
- cloud_error("More than one dynamic network for `#{name}'")
38
- else
39
- @dynamic_network = DynamicNetwork.new(name, spec)
40
- @security_groups += extract_security_groups(spec)
41
- end
42
- when "vip"
43
- if @vip_network
44
- cloud_error("More than one vip network for `#{name}'")
35
+ when "dynamic"
36
+ cloud_error("Must have exactly one dynamic or manual network per instance") if @network
37
+ @network = DynamicNetwork.new(name, network_spec)
38
+
39
+ when "manual"
40
+ cloud_error("Must have exactly one dynamic or manual network per instance") if @network
41
+ @network = ManualNetwork.new(name, network_spec)
42
+
43
+ when "vip"
44
+ cloud_error("More than one vip network for '#{name}'") if @vip_network
45
+ @vip_network = VipNetwork.new(name, network_spec)
46
+
45
47
  else
46
- @vip_network = VipNetwork.new(name, spec)
47
- @security_groups += extract_security_groups(spec)
48
- end
49
- else
50
- cloud_error("Invalid network type `#{network_type}': AWS CPI " \
51
- "can only handle `dynamic' and `vip' network types")
48
+ cloud_error("Invalid network type '#{network_type}' for AWS, " \
49
+ "can only handle 'dynamic', 'vip', or 'manual' network types")
52
50
  end
53
-
54
51
  end
55
52
 
56
- if @dynamic_network.nil?
57
- cloud_error("At least one dynamic network should be defined")
53
+ unless @network
54
+ cloud_error("Exactly one dynamic or manual network must be defined")
58
55
  end
59
56
  end
60
57
 
58
+ def subnet
59
+ @network.subnet
60
+ end
61
+
62
+ def private_ip
63
+ vpc? ? @network.private_ip : nil
64
+ end
65
+
66
+ def vpc?
67
+ @network.is_a? ManualNetwork
68
+ end
69
+
61
70
  # Applies network configuration to the vm
62
71
  # @param [AWS:EC2] ec2 instance EC2 client
63
72
  # @param [AWS::EC2::Instance] instance EC2 instance to configure
64
73
  def configure(ec2, instance)
65
- @dynamic_network.configure(ec2, instance)
66
-
67
74
  if @vip_network
68
75
  @vip_network.configure(ec2, instance)
69
76
  else
@@ -78,41 +85,5 @@ module Bosh::AwsCloud
78
85
  end
79
86
  end
80
87
  end
81
-
82
- ##
83
- # Returns the security groups for this network configuration, or
84
- # the default security groups if the configuration does not contain
85
- # security groups
86
- # @param [Array] default Default security groups
87
- # @return [Array] security groups
88
- def security_groups(default)
89
- if @security_groups.empty? && default
90
- default.sort
91
- else
92
- @security_groups.sort
93
- end
94
- end
95
-
96
- private
97
-
98
- ##
99
- # Extracts the security groups from the network configuration
100
- # @param [Hash] network_spec Network specification
101
- # @raise [ArgumentError] if the security groups in the network_spec
102
- # is not an Array
103
- def extract_security_groups(network_spec)
104
- if network_spec && network_spec["cloud_properties"]
105
- cloud_properties = network_spec["cloud_properties"]
106
- if cloud_properties && cloud_properties["security_groups"]
107
- unless cloud_properties["security_groups"].is_a?(Array)
108
- raise ArgumentError, "security groups must be an Array"
109
- end
110
- return cloud_properties["security_groups"]
111
- end
112
- end
113
- []
114
- end
115
-
116
88
  end
117
-
118
89
  end
@@ -0,0 +1,189 @@
1
+ require_relative 'helpers'
2
+
3
+ module Bosh::AwsCloud
4
+ class ResourceWait
5
+ include Helpers
6
+
7
+ DEFAULT_TRIES = 12 # a sane amount of retries on AWS (~25 minutes), as things can take anywhere between a minute and forevah
8
+ MAX_SLEEP_EXPONENT = 8
9
+
10
+ def self.for_instance(args)
11
+ raise ArgumentError, "args should be a Hash, but `#{args.class}' given" unless args.is_a?(Hash)
12
+ instance = args.fetch(:instance) { raise ArgumentError, 'instance object required' }
13
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
14
+ valid_states = [:running, :terminated]
15
+ validate_states(valid_states, target_state)
16
+
17
+ ignored_errors = [
18
+ AWS::EC2::Errors::InvalidInstanceID::NotFound,
19
+ AWS::Core::Resource::NotFound
20
+ ]
21
+
22
+ new.for_resource(resource: instance, errors: ignored_errors, target_state: target_state) do |current_state|
23
+ if target_state == :running && current_state == :terminated
24
+ logger.error("instance #{instance.id} terminated while starting")
25
+ raise Bosh::Clouds::VMCreationFailed.new(true)
26
+ else
27
+ current_state == target_state
28
+ end
29
+ end
30
+ end
31
+
32
+ def self.for_attachment(args)
33
+ attachment = args.fetch(:attachment) { raise ArgumentError, 'attachment object required' }
34
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
35
+ valid_states = [:attached, :detached]
36
+ validate_states(valid_states, target_state)
37
+
38
+ ignored_errors = []
39
+ if target_state == :attached
40
+ ignored_errors << AWS::Core::Resource::NotFound
41
+ end
42
+ description = "volume %s to be %s to instance %s as device %s" % [
43
+ attachment.volume.id, target_state, attachment.instance.id, attachment.device
44
+ ]
45
+
46
+ new.for_resource(resource: attachment, errors: ignored_errors, target_state: target_state, description: description) do |current_state|
47
+ current_state == target_state
48
+ end
49
+ rescue AWS::Core::Resource::NotFound
50
+ # if an attachment is detached, AWS can reap the object and the reference is no longer found,
51
+ # so consider this exception a success condition if we are detaching
52
+ raise unless target_state == :detached
53
+ end
54
+
55
+ def self.for_image(args)
56
+ image = args.fetch(:image) { raise ArgumentError, 'image object required' }
57
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
58
+ valid_states = [:available, :deleted]
59
+ validate_states(valid_states, target_state)
60
+
61
+ ignored_errors = []
62
+ if target_state == :available
63
+ ignored_errors = [AWS::EC2::Errors::InvalidAMIID::NotFound]
64
+ end
65
+
66
+ new.for_resource(resource: image, errors: ignored_errors, target_state: target_state, state_method: :state) do |current_state|
67
+ current_state == target_state
68
+ end
69
+ end
70
+
71
+ def self.for_volume(args)
72
+ volume = args.fetch(:volume) { raise ArgumentError, 'volume object required' }
73
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
74
+ valid_states = [:available, :deleted]
75
+ validate_states(valid_states, target_state)
76
+
77
+ new.for_resource(resource: volume, target_state: target_state) do |current_state|
78
+ current_state == target_state
79
+ end
80
+ rescue AWS::EC2::Errors::InvalidVolume::NotFound
81
+ # if an volume is deleted, AWS can reap the object and the reference is no longer found,
82
+ # so consider this exception a success condition if we are deleting
83
+ raise unless target_state == :deleted
84
+ end
85
+
86
+ def self.for_snapshot(args)
87
+ snapshot = args.fetch(:snapshot) { raise ArgumentError, 'snapshot object required' }
88
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
89
+ valid_states = [:completed]
90
+ validate_states(valid_states, target_state)
91
+
92
+ new.for_resource(resource: snapshot, target_state: target_state, tries: 18) do |current_state|
93
+ current_state == target_state
94
+ end
95
+ end
96
+
97
+ def self.for_subnet(args)
98
+ subnet = args.fetch(:subnet) { raise ArgumentError, 'subnet object required' }
99
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
100
+ valid_states = [:available]
101
+ validate_states(valid_states, target_state)
102
+
103
+ ignored_errors = [AWS::EC2::Errors::InvalidSubnetID::NotFound]
104
+
105
+ new.for_resource(resource: subnet, target_state: target_state, errors: ignored_errors, state_method: :state) do |current_state|
106
+ current_state == target_state
107
+ end
108
+ end
109
+
110
+ def self.for_sgroup(args)
111
+ sgroup = args.fetch(:sgroup) { raise ArgumentError, 'sgroup object required' }
112
+ target_state = args.fetch(:state) { raise ArgumentError, 'state symbol required' }
113
+ valid_states = [true, false]
114
+ validate_states(valid_states, target_state)
115
+
116
+ new.for_resource(resource: sgroup, target_state: true, state_method: :exists?) do |current_state|
117
+ current_state == target_state
118
+ end
119
+ end
120
+
121
+ def self.validate_states(valid_states, target_state)
122
+ unless valid_states.include?(target_state)
123
+ raise ArgumentError, "target state must be one of #{valid_states.join(', ')}, `#{target_state}' given"
124
+ end
125
+ end
126
+
127
+ def self.logger
128
+ Bosh::Clouds::Config.logger
129
+ end
130
+
131
+ def self.task_checkpoint
132
+ Bosh::Clouds::Config.task_checkpoint
133
+ end
134
+
135
+ def initialize
136
+ @started_at = Time.now
137
+ end
138
+
139
+ def for_resource(args)
140
+ resource = args.fetch(:resource)
141
+ state_method = args.fetch(:state_method, :status)
142
+ errors = args.fetch(:errors, [])
143
+ desc = args.fetch(:description) { resource.id }
144
+ tries = args.fetch(:tries, DEFAULT_TRIES).to_i
145
+ target_state = args.fetch(:target_state)
146
+
147
+ sleep_cb = self.class.sleep_callback("Waiting for #{desc} to be #{target_state}", tries)
148
+ errors << AWS::EC2::Errors::RequestLimitExceeded
149
+ ensure_cb = Proc.new do |retries|
150
+ cloud_error("Timed out waiting for #{desc} to be #{target_state}, took #{time_passed}s") if retries == tries
151
+ end
152
+
153
+ state = nil
154
+ Bosh::Common.retryable(tries: tries, sleep: sleep_cb, on: errors, ensure: ensure_cb ) do
155
+ Bosh::AwsCloud::ResourceWait.task_checkpoint
156
+
157
+ state = resource.method(state_method).call
158
+
159
+ if state == :error || state == :failed
160
+ raise Bosh::Clouds::CloudError, "#{desc} state is #{state}, expected #{target_state}, took #{time_passed}s"
161
+ end
162
+
163
+ # the yielded block should return true if we have reached the target state
164
+ yield state
165
+ end
166
+
167
+ Bosh::AwsCloud::ResourceWait.logger.info("#{desc} is now #{state}, took #{time_passed}s")
168
+ rescue Bosh::Common::RetryCountExceeded => e
169
+ Bosh::AwsCloud::ResourceWait.logger.error("Timed out waiting for #{desc} state is #{state}, expected to be #{target_state}, took #{time_passed}s")
170
+ raise e
171
+ end
172
+
173
+ def time_passed
174
+ Time.now - @started_at
175
+ end
176
+
177
+ private
178
+
179
+ def self.sleep_callback(description, tries)
180
+ lambda do |num_tries, error|
181
+ sleep_time = 2**[num_tries, MAX_SLEEP_EXPONENT].min # Exp backoff: 1, 2, 4, 8 ... up to max 256
182
+ Bosh::AwsCloud::ResourceWait.logger.debug("#{error.class}: `#{error.message}'") if error
183
+ Bosh::AwsCloud::ResourceWait.logger.debug("#{description}, retrying in #{sleep_time} seconds (#{num_tries}/#{tries})")
184
+ sleep_time
185
+ end
186
+ end
187
+ end
188
+ end
189
+
@@ -0,0 +1,68 @@
1
+ module Bosh::AwsCloud
2
+ class Stemcell
3
+ include Helpers
4
+
5
+ attr_reader :ami, :snapshots
6
+
7
+ def self.find(region, id)
8
+ image = region.images[id]
9
+ raise Bosh::Clouds::CloudError, "could not find AMI '#{id}'" unless image.exists?
10
+ new(region, image)
11
+ end
12
+
13
+ def initialize(region, image)
14
+ @region = region
15
+ @ami = image
16
+ @snapshots = []
17
+ end
18
+
19
+ def delete
20
+ memoize_snapshots
21
+
22
+ ami.deregister
23
+
24
+ # Wait for the AMI to be deregistered, or the snapshot deletion will fail,
25
+ # as the AMI is still in use.
26
+ ResourceWait.for_image(image: ami, state: :deleted)
27
+
28
+ delete_snapshots
29
+ logger.info("deleted stemcell '#{id}'")
30
+ rescue AWS::EC2::Errors::AuthFailure => e
31
+ # If we get an auth failure from the deregister call, it means we don't own the AMI
32
+ # and we were just faking it, so we can just return pretending that we deleted it.
33
+ logger.info("deleted fake stemcell '#{id}")
34
+ end
35
+
36
+ def id
37
+ ami.id
38
+ end
39
+
40
+ def root_device_name
41
+ ami.root_device_name
42
+ end
43
+
44
+ def memoize_snapshots
45
+ # .to_h is used as the AWS API documentation isn't trustworthy:
46
+ # it says block_device_mappings retruns a Hash, but in reality it flattens it!
47
+ ami.block_device_mappings.to_h.each do |device, map|
48
+ snapshot_id = map[:snapshot_id]
49
+ if id
50
+ logger.debug("queuing snapshot '#{snapshot_id}' for deletion")
51
+ snapshots << snapshot_id
52
+ end
53
+ end
54
+ end
55
+
56
+ def delete_snapshots
57
+ snapshots.each do |id|
58
+ logger.info("cleaning up snapshot '#{id}'")
59
+ snapshot = @region.snapshots[id]
60
+ snapshot.delete
61
+ end
62
+ end
63
+
64
+ def logger
65
+ Bosh::Clouds::Config.logger
66
+ end
67
+ end
68
+ end