bosh_aws_cpi 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,3 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ BOSH AWS Cloud Provider Interface
data/Rakefile ADDED
@@ -0,0 +1,50 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ $:.unshift(File.expand_path("../../rake", __FILE__))
4
+
5
+ ENV["BUNDLE_GEMFILE"] ||= File.expand_path("../Gemfile", __FILE__)
6
+
7
+ require "rubygems"
8
+ require "bundler"
9
+ Bundler.setup(:default, :test)
10
+
11
+ require "rake"
12
+ begin
13
+ require "rspec/core/rake_task"
14
+ rescue LoadError
15
+ end
16
+
17
+ require "bundler_task"
18
+ require "ci_task"
19
+
20
+ gem_helper = Bundler::GemHelper.new(Dir.pwd)
21
+
22
+ desc "Build CPI gem into the pkg directory"
23
+ task "build" do
24
+ gem_helper.build_gem
25
+ end
26
+
27
+ desc "Build and install CPI into system gems"
28
+ task "install" do
29
+ Rake::Task["bundler:install"].invoke
30
+ gem_helper.install_gem
31
+ end
32
+
33
+ BundlerTask.new
34
+
35
+ if defined?(RSpec)
36
+ namespace :spec do
37
+ desc "Run Unit Tests"
38
+ rspec_task = RSpec::Core::RakeTask.new(:unit) do |t|
39
+ t.pattern = "spec/unit/**/*_spec.rb"
40
+ t.rspec_opts = %w(--format progress --colour)
41
+ end
42
+
43
+ CiTask.new do |task|
44
+ task.rspec_task = rspec_task
45
+ end
46
+ end
47
+
48
+ desc "Run tests"
49
+ task :spec => %w(spec:unit)
50
+ end
@@ -0,0 +1,3 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ require "cloud/aws"
@@ -0,0 +1,596 @@
1
+ # Copyright (c) 2009-2012 VMware, Inc.
2
+
3
+ module Bosh::AwsCloud
4
+
5
+ class Cloud < Bosh::Cloud
6
+ include Helpers
7
+
8
+ DEFAULT_MAX_RETRIES = 2
9
+ DEFAULT_AVAILABILITY_ZONE = "us-east-1a"
10
+ DEFAULT_EC2_ENDPOINT = "ec2.amazonaws.com"
11
+ METADATA_TIMEOUT = 5 # seconds
12
+ DEVICE_POLL_TIMEOUT = 60 # seconds
13
+
14
+ DEFAULT_AKI = "aki-825ea7eb"
15
+
16
+ # UBUNTU_10_04_32_BIT_US_EAST_EBS = "ami-3e9b4957"
17
+ # UBUNTU_10_04_32_BIT_US_EAST = "ami-809a48e9"
18
+
19
+ attr_reader :ec2
20
+ attr_reader :registry
21
+
22
+ ##
23
+ # Initialize BOSH AWS CPI
24
+ # @param [Hash] options CPI options
25
+ #
26
+ def initialize(options)
27
+ @options = options.dup
28
+
29
+ validate_options
30
+
31
+ @logger = Bosh::Clouds::Config.logger
32
+
33
+ @aws_logger = @logger # TODO make configurable
34
+
35
+ @agent_properties = @options["agent"] || {}
36
+ @aws_properties = @options["aws"]
37
+ @registry_properties = @options["registry"]
38
+
39
+ @default_key_name = @aws_properties["default_key_name"]
40
+ @default_security_groups = @aws_properties["default_security_groups"]
41
+
42
+ aws_params = {
43
+ :access_key_id => @aws_properties["access_key_id"],
44
+ :secret_access_key => @aws_properties["secret_access_key"],
45
+ :ec2_endpoint => @aws_properties["ec2_endpoint"] || DEFAULT_EC2_ENDPOINT,
46
+ :max_retries => @aws_properties["max_retries"] || DEFAULT_MAX_RETRIES,
47
+ :logger => @aws_logger
48
+ }
49
+
50
+ registry_endpoint = @registry_properties["endpoint"]
51
+ registry_user = @registry_properties["user"]
52
+ registry_password = @registry_properties["password"]
53
+
54
+ # AWS Ruby SDK is threadsafe but Ruby autoload isn't,
55
+ # so we need to trigger eager autoload while constructing CPI
56
+ AWS.eager_autoload!
57
+ @ec2 = AWS::EC2.new(aws_params)
58
+
59
+ # Registry updates are not really atomic in relation to
60
+ # EC2 API calls, so they might get out of sync. Cloudcheck
61
+ # is supposed to fix that.
62
+ @registry = RegistryClient.new(registry_endpoint,
63
+ registry_user,
64
+ registry_password)
65
+
66
+ @metadata_lock = Mutex.new
67
+ end
68
+
69
+ ##
70
+ # Creates EC2 instance and waits until it's in running state
71
+ # @param [String] agent_id Agent id associated with new VM
72
+ # @param [String] stemcell_id AMI id that will be used
73
+ # to power on new instance
74
+ # @param [Hash] resource_pool Resource pool specification
75
+ # @param [Hash] network_spec Network specification, if it contains
76
+ # security groups they must be existing
77
+ # @param [optional, Array] disk_locality List of disks that
78
+ # might be attached to this instance in the future, can be
79
+ # used as a placement hint (i.e. instance will only be created
80
+ # if resource pool availability zone is the same as disk
81
+ # availability zone)
82
+ # @param [optional, Hash] environment Data to be merged into
83
+ # agent settings
84
+ #
85
+ # @return [String] created instance id
86
+ def create_vm(agent_id, stemcell_id, resource_pool,
87
+ network_spec, disk_locality = nil, environment = nil)
88
+ with_thread_name("create_vm(#{agent_id}, ...)") do
89
+ network_configurator = NetworkConfigurator.new(network_spec)
90
+
91
+ user_data = {
92
+ "registry" => {
93
+ "endpoint" => @registry.endpoint
94
+ }
95
+ }
96
+
97
+ if disk_locality
98
+ # TODO: use as hint for availability zones
99
+ @logger.debug("Disk locality is ignored by AWS CPI")
100
+ end
101
+
102
+ security_groups =
103
+ network_configurator.security_groups(@default_security_groups)
104
+ @logger.debug("using security groups: #{security_groups.join(', ')}")
105
+
106
+ instance_params = {
107
+ :image_id => stemcell_id,
108
+ :count => 1,
109
+ :key_name => resource_pool["key_name"] || @default_key_name,
110
+ :security_groups => security_groups,
111
+ :instance_type => resource_pool["instance_type"],
112
+ :user_data => Yajl::Encoder.encode(user_data)
113
+ }
114
+
115
+ availability_zone = resource_pool["availability_zone"]
116
+ if availability_zone
117
+ instance_params[:availability_zone] = availability_zone
118
+ end
119
+
120
+ @logger.info("Creating new instance...")
121
+ instance = @ec2.instances.create(instance_params)
122
+ state = instance.status
123
+
124
+ @logger.info("Creating new instance `#{instance.id}', " \
125
+ "state is `#{state}'")
126
+
127
+ wait_resource(instance, state, :running)
128
+
129
+ network_configurator.configure(@ec2, instance)
130
+
131
+ settings = initial_agent_settings(agent_id, network_spec, environment)
132
+ @registry.update_settings(instance.id, settings)
133
+
134
+ instance.id
135
+ end
136
+ end
137
+
138
+ ##
139
+ # Terminates EC2 instance and waits until it reports as terminated
140
+ # @param [String] vm_id Running instance id
141
+ def delete_vm(instance_id)
142
+ with_thread_name("delete_vm(#{instance_id})") do
143
+ instance = @ec2.instances[instance_id]
144
+
145
+ instance.terminate
146
+ state = instance.status
147
+
148
+ # TODO: should this be done before or after deleting VM?
149
+ @logger.info("Deleting instance settings for `#{instance.id}'")
150
+ @registry.delete_settings(instance.id)
151
+
152
+ @logger.info("Deleting instance `#{instance.id}', " \
153
+ "state is `#{state}'")
154
+
155
+ wait_resource(instance, state, :terminated)
156
+ end
157
+ end
158
+
159
+ ##
160
+ # Reboots EC2 instance
161
+ # @param [String] instance_id Running instance id
162
+ def reboot_vm(instance_id)
163
+ with_thread_name("reboot_vm(#{instance_id})") do
164
+ instance = @ec2.instances[instance_id]
165
+ soft_reboot(instance)
166
+ end
167
+ end
168
+
169
+ ##
170
+ # Creates a new EBS volume
171
+ # @param [Integer] size disk size in MiB
172
+ # @param [optional, String] instance_id vm id
173
+ # of the VM that this disk will be attached to
174
+ # @return [String] created EBS volume id
175
+ def create_disk(size, instance_id = nil)
176
+ with_thread_name("create_disk(#{size}, #{instance_id})") do
177
+ unless size.kind_of?(Integer)
178
+ raise ArgumentError, "disk size needs to be an integer"
179
+ end
180
+
181
+ if (size < 1024)
182
+ cloud_error("AWS CPI minimum disk size is 1 GiB")
183
+ end
184
+
185
+ if (size > 1024 * 1000)
186
+ cloud_error("AWS CPI maximum disk size is 1 TiB")
187
+ end
188
+
189
+ if instance_id
190
+ instance = @ec2.instances[instance_id]
191
+ availability_zone = instance.availability_zone
192
+ else
193
+ availability_zone = DEFAULT_AVAILABILITY_ZONE
194
+ end
195
+
196
+ volume_params = {
197
+ :size => (size / 1024.0).ceil,
198
+ :availability_zone => availability_zone
199
+ }
200
+
201
+ volume = @ec2.volumes.create(volume_params)
202
+ state = volume.state
203
+
204
+ @logger.info("Creating volume `#{volume.id}', " \
205
+ "state is `#{state}'")
206
+
207
+ wait_resource(volume, state, :available)
208
+
209
+ volume.id
210
+ end
211
+ end
212
+
213
+ ##
214
+ # Deletes EBS volume
215
+ # @param [String] disk_id volume id
216
+ # @raise [Bosh::Clouds::CloudError] if disk is not in available state
217
+ # @return nil
218
+ def delete_disk(disk_id)
219
+ with_thread_name("delete_disk(#{disk_id})") do
220
+ volume = @ec2.volumes[disk_id]
221
+ state = volume.state
222
+
223
+ if state != :available
224
+ cloud_error("Cannot delete volume `#{volume.id}', state is #{state}")
225
+ end
226
+
227
+ volume.delete
228
+
229
+ begin
230
+ state = volume.state
231
+ @logger.info("Deleting volume `#{volume.id}', " \
232
+ "state is `#{state}'")
233
+
234
+ wait_resource(volume, state, :deleted)
235
+ rescue AWS::EC2::Errors::InvalidVolume::NotFound
236
+ end
237
+
238
+ @logger.info("Volume `#{disk_id}' has been deleted")
239
+ end
240
+ end
241
+
242
+ def attach_disk(instance_id, disk_id)
243
+ with_thread_name("attach_disk(#{instance_id}, #{disk_id})") do
244
+ instance = @ec2.instances[instance_id]
245
+ volume = @ec2.volumes[disk_id]
246
+
247
+ device_name = attach_ebs_volume(instance, volume)
248
+
249
+ update_agent_settings(instance) do |settings|
250
+ settings["disks"] ||= {}
251
+ settings["disks"]["persistent"] ||= {}
252
+ settings["disks"]["persistent"][disk_id] = device_name
253
+ end
254
+ end
255
+ end
256
+
257
+ def detach_disk(instance_id, disk_id)
258
+ with_thread_name("detach_disk(#{instance_id}, #{disk_id})") do
259
+ instance = @ec2.instances[instance_id]
260
+ volume = @ec2.volumes[disk_id]
261
+
262
+ update_agent_settings(instance) do |settings|
263
+ settings["disks"] ||= {}
264
+ settings["disks"]["persistent"] ||= {}
265
+ settings["disks"]["persistent"].delete(disk_id)
266
+ end
267
+
268
+ detach_ebs_volume(instance, volume)
269
+
270
+ @logger.info("Detached `#{disk_id}' from `#{instance_id}'")
271
+ end
272
+ end
273
+
274
+ def configure_networks(instance_id, network_spec)
275
+ with_thread_name("configure_networks(#{instance_id}, ...)") do
276
+ @logger.info("Configuring `#{instance_id}' to use the following " \
277
+ "network settings: #{network_spec.pretty_inspect}")
278
+
279
+ network_configurator = NetworkConfigurator.new(network_spec)
280
+ instance = @ec2.instances[instance_id]
281
+
282
+ network_configurator.configure(@ec2, instance)
283
+
284
+ update_agent_settings(instance) do |settings|
285
+ settings["networks"] = network_spec
286
+ end
287
+ end
288
+ end
289
+
290
+ ##
291
+ # Creates a new AMI using stemcell image.
292
+ # This method can only be run on an EC2 instance, as image creation
293
+ # involves creating and mounting new EBS volume as local block device.
294
+ # @param [String] image_path local filesystem path to a stemcell image
295
+ # @param [Hash] cloud_properties CPI-specific properties
296
+ def create_stemcell(image_path, cloud_properties)
297
+ # TODO: refactor into several smaller methods
298
+ with_thread_name("create_stemcell(#{image_path}...)") do
299
+ begin
300
+ # These two variables are used in 'ensure' clause
301
+ instance = nil
302
+ volume = nil
303
+ # 1. Create and mount new EBS volume (2GB default)
304
+ disk_size = cloud_properties["disk"] || 2048
305
+ volume_id = create_disk(disk_size, current_instance_id)
306
+ volume = @ec2.volumes[volume_id]
307
+ instance = @ec2.instances[current_instance_id]
308
+
309
+ sd_name = attach_ebs_volume(instance, volume)
310
+ ebs_volume = find_ebs_device(sd_name)
311
+
312
+ # 2. Copy image to new EBS volume
313
+ Dir.mktmpdir do |tmp_dir|
314
+ @logger.info("Extracting stemcell to `#{tmp_dir}'")
315
+
316
+ unpack_image(tmp_dir, image_path)
317
+ copy_root_image(tmp_dir, ebs_volume)
318
+
319
+ # 3. Create snapshot and then an image using this snapshot
320
+ snapshot = volume.create_snapshot
321
+ wait_resource(snapshot, snapshot.status, :completed)
322
+
323
+ image_params = {
324
+ :name => "BOSH-#{generate_unique_name}",
325
+ :architecture => "x86_64",
326
+ :kernel_id => cloud_properties["kernel_id"] || DEFAULT_AKI,
327
+ :root_device_name => "/dev/sda",
328
+ :block_device_mappings => {
329
+ "/dev/sda" => { :snapshot_id => snapshot.id },
330
+ "/dev/sdb" => "ephemeral0"
331
+ }
332
+ }
333
+
334
+ image = @ec2.images.create(image_params)
335
+ wait_resource(image, image.state, :available, :state)
336
+
337
+ image.id
338
+ end
339
+ rescue => e
340
+ # TODO: delete snapshot?
341
+ @logger.error(e)
342
+ raise e
343
+ ensure
344
+ if instance && volume
345
+ detach_ebs_volume(instance, volume)
346
+ delete_disk(volume.id)
347
+ end
348
+ end
349
+ end
350
+ end
351
+
352
+ def delete_stemcell(stemcell_id)
353
+ with_thread_name("delete_stemcell(#{stemcell_id})") do
354
+ image = @ec2.images[stemcell_id]
355
+ image.deregister
356
+ end
357
+ end
358
+
359
+ def validate_deployment(old_manifest, new_manifest)
360
+ # Not implemented in VSphere CPI as well
361
+ not_implemented(:validate_deployment)
362
+ end
363
+
364
+ private
365
+
366
+ ##
367
+ # Generates initial agent settings. These settings will be read by agent
368
+ # from AWS registry (also a BOSH component) on a target instance. Disk
369
+ # conventions for amazon are:
370
+ # system disk: /dev/sda
371
+ # ephemeral disk: /dev/sdb
372
+ # EBS volumes can be configured to map to other device names later (sdf
373
+ # through sdp, also some kernels will remap sd* to xvd*).
374
+ #
375
+ # @param [String] agent_id Agent id (will be picked up by agent to
376
+ # assume its identity
377
+ # @param [Hash] network_spec Agent network spec
378
+ # @param [Hash] environment
379
+ # @return [Hash]
380
+ def initial_agent_settings(agent_id, network_spec, environment)
381
+ settings = {
382
+ "vm" => {
383
+ "name" => "vm-#{generate_unique_name}"
384
+ },
385
+ "agent_id" => agent_id,
386
+ "networks" => network_spec,
387
+ "disks" => {
388
+ "system" => "/dev/sda",
389
+ "ephemeral" => "/dev/sdb",
390
+ "persistent" => {}
391
+ }
392
+ }
393
+
394
+ settings["env"] = environment if environment
395
+ settings.merge(@agent_properties)
396
+ end
397
+
398
+ def update_agent_settings(instance)
399
+ unless block_given?
400
+ raise ArgumentError, "block is not provided"
401
+ end
402
+
403
+ settings = @registry.read_settings(instance.id)
404
+ yield settings
405
+ @registry.update_settings(instance.id, settings)
406
+ end
407
+
408
+ def generate_unique_name
409
+ UUIDTools::UUID.random_create.to_s
410
+ end
411
+
412
+ ##
413
+ # Reads current instance id from EC2 metadata. We are assuming
414
+ # instance id cannot change while current process is running
415
+ # and thus memoizing it.
416
+ def current_instance_id
417
+ @metadata_lock.synchronize do
418
+ return @current_instance_id if @current_instance_id
419
+
420
+ client = HTTPClient.new
421
+ client.connect_timeout = METADATA_TIMEOUT
422
+ # Using 169.254.169.254 is an EC2 convention for getting
423
+ # instance metadata
424
+ uri = "http://169.254.169.254/1.0/meta-data/instance-id/"
425
+
426
+ response = client.get(uri)
427
+ unless response.status == 200
428
+ cloud_error("Instance metadata endpoint returned " \
429
+ "HTTP #{response.status}")
430
+ end
431
+
432
+ @current_instance_id = response.body
433
+ end
434
+
435
+ rescue HTTPClient::TimeoutError
436
+ cloud_error("Timed out reading instance metadata, " \
437
+ "please make sure CPI is running on EC2 instance")
438
+ end
439
+
440
+ def attach_ebs_volume(instance, volume)
441
+ device_names = Set.new(instance.block_device_mappings.keys)
442
+ new_attachment = nil
443
+
444
+ ("f".."p").each do |char| # f..p is what console suggests
445
+ # Some kernels will remap sdX to xvdX, so agent needs
446
+ # to lookup both (sd, then xvd)
447
+ dev_name = "/dev/sd#{char}"
448
+ if device_names.include?(dev_name)
449
+ @logger.warn("`#{dev_name}' on `#{instance.id}' is taken")
450
+ next
451
+ end
452
+ new_attachment = volume.attach_to(instance, dev_name)
453
+ break
454
+ end
455
+
456
+ if new_attachment.nil?
457
+ # TODO: better messaging?
458
+ cloud_error("Instance has too many disks attached")
459
+ end
460
+
461
+ state = new_attachment.status
462
+
463
+ @logger.info("Attaching `#{volume.id}' to #{instance.id}, " \
464
+ "state is #{state}'")
465
+
466
+ wait_resource(new_attachment, state, :attached)
467
+ device_name = new_attachment.device
468
+
469
+ @logger.info("Attached `#{volume.id}' to `#{instance.id}', " \
470
+ "device name is `#{device_name}'")
471
+
472
+ device_name
473
+ end
474
+
475
+ def detach_ebs_volume(instance, volume)
476
+ mappings = instance.block_device_mappings
477
+
478
+ device_map = mappings.inject({}) do |hash, (device_name, attachment)|
479
+ hash[attachment.volume.id] = device_name
480
+ hash
481
+ end
482
+
483
+ if device_map[volume.id].nil?
484
+ cloud_error("Disk `#{volume.id}' is not attached " \
485
+ "to instance `#{instance.id}'")
486
+ end
487
+
488
+ attachment = volume.detach_from(instance, device_map[volume.id])
489
+ state = attachment.status
490
+
491
+ @logger.info("Detaching `#{volume.id}' from `#{instance.id}', " \
492
+ "state is #{state}'")
493
+
494
+ begin
495
+ wait_resource(attachment, state, :detached)
496
+ rescue AWS::Core::Resource::NotFound
497
+ # It's OK, just means attachment is gone when we're asking for state
498
+ end
499
+ end
500
+
501
+ def unpack_image(tmp_dir, image_path)
502
+ output = `tar -C #{tmp_dir} -xzf #{image_path} 2>&1`
503
+ if $?.exitstatus != 0
504
+ cloud_error("Failed to unpack stemcell root image" \
505
+ "tar exit status #{$?.exitstatus}: #{output}")
506
+ end
507
+
508
+ root_image = File.join(tmp_dir, "root.img")
509
+ unless File.exists?(root_image)
510
+ cloud_error("Root image is missing from stemcell archive")
511
+ end
512
+ end
513
+
514
+ def copy_root_image(dir, ebs_volume)
515
+ Dir.chdir(dir) do
516
+ dd_out = `dd if=root.img of=#{ebs_volume} 2>&1`
517
+ if $?.exitstatus != 0
518
+ cloud_error("Unable to copy stemcell root image, " \
519
+ "dd exit status #{$?.exitstatus}: " \
520
+ "#{dd_out}")
521
+ end
522
+ end
523
+ end
524
+
525
+ def find_ebs_device(sd_name)
526
+ xvd_name = sd_name.gsub(/^\/dev\/sd/, "/dev/xvd")
527
+
528
+ DEVICE_POLL_TIMEOUT.times do
529
+ if File.blockdev?(sd_name)
530
+ return sd_name
531
+ elsif File.blockdev?(xvd_name)
532
+ return xvd_name
533
+ end
534
+ sleep(1)
535
+ end
536
+
537
+ cloud_error("Cannot find EBS volume on current instance")
538
+ end
539
+
540
+ ##
541
+ # Soft reboots EC2 instance
542
+ # @param [AWS::EC2::Instance] instance EC2 instance
543
+ def soft_reboot(instance)
544
+ # There is no trackable status change for the instance being
545
+ # rebooted, so it's up to CPI client to keep track of agent
546
+ # being ready after reboot.
547
+ instance.reboot
548
+ end
549
+
550
+ ##
551
+ # Hard reboots EC2 instance
552
+ # @param [AWS::EC2::Instance] instance EC2 instance
553
+ def hard_reboot(instance)
554
+ # N.B. This will only work with ebs-store instances,
555
+ # as instance-store instances don't support stop/start.
556
+ instance.stop
557
+ state = instance.status
558
+
559
+ @logger.info("Stopping instance `#{instance.id}', " \
560
+ "state is `#{state}'")
561
+
562
+ wait_resource(instance, state, :stopped)
563
+
564
+ instance.start
565
+ state = instance.status
566
+
567
+ @logger.info("Starting instance `#{instance.id}', " \
568
+ "state is `#{state}'")
569
+
570
+ wait_resource(instance, state, :running)
571
+ end
572
+
573
+ ##
574
+ # Checks if options passed to CPI are valid and can actually
575
+ # be used to create all required data structures etc.
576
+ #
577
+ def validate_options
578
+ unless @options.has_key?("aws") &&
579
+ @options["aws"].is_a?(Hash) &&
580
+ @options["aws"]["access_key_id"] &&
581
+ @options["aws"]["secret_access_key"]
582
+ raise ArgumentError, "Invalid AWS configuration parameters"
583
+ end
584
+
585
+ unless @options.has_key?("registry") &&
586
+ @options["registry"].is_a?(Hash) &&
587
+ @options["registry"]["endpoint"] &&
588
+ @options["registry"]["user"] &&
589
+ @options["registry"]["password"]
590
+ raise ArgumentError, "Invalid registry configuration parameters"
591
+ end
592
+ end
593
+
594
+ end
595
+
596
+ end