staugaard-cloudmaster 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +1 -1
- data/app/active_set_factory.rb +16 -0
- data/app/active_set_none.rb +16 -0
- data/app/active_set_queue.rb +27 -0
- data/app/active_set_s3.rb +25 -0
- data/app/configuration.rb +85 -0
- data/app/default-config.ini +95 -0
- data/app/ec2_image_enumerator.rb +41 -0
- data/app/ec2_instance_enumerator.rb +25 -0
- data/app/instance.rb +146 -0
- data/app/instance_pool.rb +326 -0
- data/app/named_queue.rb +75 -0
- data/app/policy.rb +113 -0
- data/app/policy_daytime.rb +18 -0
- data/app/policy_factory.rb +16 -0
- data/app/policy_fixed.rb +19 -0
- data/app/policy_job.rb +54 -0
- data/app/policy_limit.rb +68 -0
- data/app/policy_manual.rb +36 -0
- data/app/policy_resource.rb +110 -0
- data/app/pool_configuration.rb +172 -0
- data/app/pool_manager.rb +239 -0
- data/app/pool_runner.rb +54 -0
- data/app/reporter.rb +81 -0
- data/app/status_parser_factory.rb +16 -0
- data/app/status_parser_lifeguard.rb +48 -0
- data/app/status_parser_std.rb +11 -0
- metadata +27 -1
data/VERSION.yml
CHANGED
@@ -0,0 +1,16 @@
|
|
1
|
+
# This is a factory for ActiveSet implementations.
|
2
|
+
require 'factory'
|
3
|
+
|
4
|
+
module Cloudmaster
|
5
|
+
class ActiveSetFactory
|
6
|
+
include Factory
|
7
|
+
def ActiveSetFactory.create(type, *params)
|
8
|
+
name = type.nil? ? 'none' : type.to_s
|
9
|
+
require 'active_set_' + name.downcase
|
10
|
+
class_name = 'ActiveSet' + name.capitalize
|
11
|
+
active_set = Factory.create_object_from_string(class_name, *params)
|
12
|
+
raise "Bad configuration -- bad active_set #{class_name}" unless active_set
|
13
|
+
active_set
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# This implementation of ActiveSet does nothing
|
2
|
+
# It is appropriate when you don't want the active set.
|
3
|
+
|
4
|
+
module Cloudmaster
|
5
|
+
class ActiveSetNone
|
6
|
+
def initialize(config)
|
7
|
+
end
|
8
|
+
|
9
|
+
def valid?
|
10
|
+
true
|
11
|
+
end
|
12
|
+
|
13
|
+
def update(active_set)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'aws_context'
|
2
|
+
|
3
|
+
# This implementation of ActiveSet writes the active set to a queue
|
4
|
+
|
5
|
+
module Cloudmaster
|
6
|
+
class ActiveSetQueue
|
7
|
+
def initialize(config)
|
8
|
+
@sqs = AwsContext.instance.sqs
|
9
|
+
active_set_queue_name = config.append_env(config[:active_set_queue])
|
10
|
+
@active_set_queue = NamedQueue.new(active_set_queue_name)
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
public
|
16
|
+
|
17
|
+
def valid?
|
18
|
+
! @active_set_queue.nil?
|
19
|
+
end
|
20
|
+
|
21
|
+
def update(active_set)
|
22
|
+
body = active_set
|
23
|
+
body = ' ' if body.empty?
|
24
|
+
@sqs.send_message(@active_set_queue, body)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'aws_context'
|
2
|
+
|
3
|
+
# This implementation of ActiveSet writes the active set to S3.
|
4
|
+
|
5
|
+
module Cloudmaster
|
6
|
+
class ActiveSetS3
|
7
|
+
def initialize(config)
|
8
|
+
@s3 = AwsContext.instance.s3
|
9
|
+
@config = config
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
public
|
15
|
+
|
16
|
+
def valid?
|
17
|
+
@config[:active_set_bucket] && @config[:active_set_key]
|
18
|
+
end
|
19
|
+
|
20
|
+
def update(active_set)
|
21
|
+
@s3.create_object(@config[:active_set_bucket],
|
22
|
+
@config.append_env(@config[:active_set_key]), :data => active_set)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'inifile'
|
2
|
+
require 'basic_configuration'
|
3
|
+
require 'pp'
|
4
|
+
|
5
|
+
# Configuration
|
6
|
+
# Read an ini file and create a configuration.
|
7
|
+
# The configuration contains the aws configuration, the defaults, and all the pool sections.
|
8
|
+
|
9
|
+
module Cloudmaster
|
10
|
+
|
11
|
+
class Configuration < BasicConfiguration
|
12
|
+
attr_reader :default, :pools
|
13
|
+
|
14
|
+
# Create a config structure by reading the given config_filenames.
|
15
|
+
# The base class handles the aws config and the default config.
|
16
|
+
def initialize(config_files = [], opts = [])
|
17
|
+
@pools = []
|
18
|
+
@default = {}
|
19
|
+
@opts = opts
|
20
|
+
# search for config files in this directory too
|
21
|
+
super(config_files, [ File.dirname(__FILE__)])
|
22
|
+
@default.merge!({:user_data => {
|
23
|
+
:aws_env => @aws[:aws_env],
|
24
|
+
:aws_access_key => @aws[:aws_access_key],
|
25
|
+
:aws_secret_key => @aws[:aws_secret_key]}})
|
26
|
+
end
|
27
|
+
|
28
|
+
def refresh
|
29
|
+
@pools = []
|
30
|
+
@default = {}
|
31
|
+
super
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
# Read the config file
|
37
|
+
def read(config_file)
|
38
|
+
ini = super(config_file)
|
39
|
+
return nil unless ini
|
40
|
+
|
41
|
+
@default.merge!(ini['default'])
|
42
|
+
|
43
|
+
# Handle each of the pool sections
|
44
|
+
ini.each_section do |section|
|
45
|
+
vals = ini[section]
|
46
|
+
# Look for sections of the form Pool-<name>
|
47
|
+
if section.index("pool-") == 0
|
48
|
+
name = section[5..-1].to_sym
|
49
|
+
vals[:name] = name
|
50
|
+
@pools << section_config(vals) if @opts.empty? || @opts.include?(name)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
# Supply the section defaults, based on the aws config and the section name.
|
56
|
+
# If there is no name, then there are no defaults,
|
57
|
+
# TODO Most of these should be hanled by policy or its subclasses.
|
58
|
+
def section_defaults(name)
|
59
|
+
return nil unless name
|
60
|
+
{
|
61
|
+
# generic
|
62
|
+
:ami_name => "#{@aws[:aws_user]}-ami-#{name}",
|
63
|
+
:security_groups => [name.to_s],
|
64
|
+
:key_pair_name => "#{@aws[:aws_user]}-kp",
|
65
|
+
# policy plugin
|
66
|
+
:work_queue_name => "#{name}-work",
|
67
|
+
:status_queue_name => "#{name}-status",
|
68
|
+
:manual_queue_name => "#{name}-manual",
|
69
|
+
# active_set plugin
|
70
|
+
:active_set_bucket => "#{@aws[:aws_bucket]}",
|
71
|
+
:active_set_key => "active-set/#{name}-instances",
|
72
|
+
:active_set_item => "active-set-#{name}-instances",
|
73
|
+
:active_set_queue_name => "#{name}-active-set",
|
74
|
+
}
|
75
|
+
end
|
76
|
+
|
77
|
+
# Perform configuration for a single section of the config file.
|
78
|
+
# If the conventions for image names, queues, and bucket
|
79
|
+
# is followed, then this is all the aws configuration needed.
|
80
|
+
# This associates a policy symbol with the instance.
|
81
|
+
def section_config(config)
|
82
|
+
section_defaults(config[:name]).merge(config)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#
|
2
|
+
# EC2 Instance Manager configuration default parameters
|
3
|
+
|
4
|
+
|
5
|
+
[default]
|
6
|
+
# defaults to name in section heading
|
7
|
+
# in this case <name> is default because section is Pool-default
|
8
|
+
name=nil
|
9
|
+
# The logger to use
|
10
|
+
logger=file
|
11
|
+
logfile=STDOUT
|
12
|
+
# must be supplied -- no default
|
13
|
+
# job, resource, or default
|
14
|
+
policy=nil
|
15
|
+
# how often policy is evaluated
|
16
|
+
policy_interval=60
|
17
|
+
# status parser
|
18
|
+
status_parser=std
|
19
|
+
# how often queue is monitored
|
20
|
+
poll_interval=5
|
21
|
+
# how many status and log messages received at once
|
22
|
+
receive_count=10
|
23
|
+
# work queue name
|
24
|
+
# default name comes from heading: work-<name}
|
25
|
+
work_queue_name=nil
|
26
|
+
# status queue name
|
27
|
+
# default name comes from heading: status-<name>
|
28
|
+
status_queue_name=nil
|
29
|
+
# store active-set updates in this S3 bucket
|
30
|
+
# default comes from aws_bucket in AWS section above
|
31
|
+
active_set_bucket=nil
|
32
|
+
# the key to use for storing active-set entries in S3
|
33
|
+
# the default is build from aws_user in AWS section: active-set/<name>-instances
|
34
|
+
active_set_key=nil
|
35
|
+
# if enabled, active set is written this often (minutes)
|
36
|
+
active_set_interval=1
|
37
|
+
# by default, active set is disabled
|
38
|
+
active_set_type=none
|
39
|
+
# the name of the image to start
|
40
|
+
# default is build from AWS entries: <aws_user>-ami-<name>-<aws-env>
|
41
|
+
ami_name=nil
|
42
|
+
# the keypair to use
|
43
|
+
# default is built from the aws_key in the AWS section: <keypair-name>
|
44
|
+
key_pair_name=nil
|
45
|
+
security_groups=[ ]
|
46
|
+
instance_type="m1.small"
|
47
|
+
# the user data sent to the instance when it starts
|
48
|
+
# the default containes the aws_env, the aws_access_key and the
|
49
|
+
# aws_secret_key
|
50
|
+
# given in hash notation in quotes: "{:key1 => value1, :key2 => value2}"
|
51
|
+
user_data={}
|
52
|
+
# the time (in minutes) between checking instances
|
53
|
+
audit_instance_interval=1
|
54
|
+
# maximum number of instances allowed
|
55
|
+
maximum_number_of_instances=2
|
56
|
+
# minimum number of instances allowed
|
57
|
+
minimum_number_of_instances=0
|
58
|
+
# the maximum instances started in one policy interval
|
59
|
+
start_limit=1
|
60
|
+
# the minimum instance lifetime, in minutes
|
61
|
+
minimum_lifetime=55
|
62
|
+
# the minimum time a server stays active (in minutes)
|
63
|
+
minimum_active_time=10
|
64
|
+
# the maximum number of instances to stop in one policy interval
|
65
|
+
stop_limit=1
|
66
|
+
# if no status is received for this time (in minutes) then
|
67
|
+
# the instance is considered dead can will be stopped
|
68
|
+
watchdog_interval=10
|
69
|
+
# in job policy, if work queue size is greater than
|
70
|
+
# start_threshold * number of
|
71
|
+
start_threshold=2
|
72
|
+
#in job policy, if more than idle_threshold active instances
|
73
|
+
# with load 0 exist, stop some of them
|
74
|
+
idle_threshold=1
|
75
|
+
# in resource policy, the limits that it tries
|
76
|
+
# to keep servers within
|
77
|
+
target_upper_load=0.75
|
78
|
+
target_lower_load=0.25
|
79
|
+
# in resource policy, divide the queue depth by this
|
80
|
+
# factor to get the number of new servers needed
|
81
|
+
queue_load_factor=2
|
82
|
+
# in resource policy, the minimum load on shutdown
|
83
|
+
# server before it is stopped
|
84
|
+
shut_down_threshold=0
|
85
|
+
# how long to wait after shutdown before termination
|
86
|
+
# in minutes
|
87
|
+
shut_down_interval=60
|
88
|
+
# how often to give summary (depth/instances/load)
|
89
|
+
# default of 0 means give every poll_interval
|
90
|
+
summary_interval=0
|
91
|
+
# how often to vie instance reports
|
92
|
+
instance_report_interval=60
|
93
|
+
# if set, it is a patname to a directory where individual log files
|
94
|
+
# are written for each instance
|
95
|
+
instance_log=""
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'aws_context'
|
2
|
+
|
3
|
+
module Cloudmaster
|
4
|
+
|
5
|
+
# Provides enumerators for EC2 images.
|
6
|
+
# The information is read once from EC2 and stored.
|
7
|
+
# It is then enumearted one image at a time.
|
8
|
+
# The stored list of images can also be searched
|
9
|
+
# Get the EC2 images and store them.
|
10
|
+
# Allow lookup by matching on the name.
|
11
|
+
class EC2ImageEnumerator
|
12
|
+
include Enumerable
|
13
|
+
|
14
|
+
# Create the enumerator. Fetch and store the complete image list.
|
15
|
+
def initialize
|
16
|
+
@images = AwsContext.instance.ec2.describe_images
|
17
|
+
end
|
18
|
+
|
19
|
+
# Enumerate each image
|
20
|
+
def each
|
21
|
+
@images.each { |image| yield image }
|
22
|
+
end
|
23
|
+
|
24
|
+
# Look for the image with the given name.
|
25
|
+
# Return the image id if exactly one is found, throw exception otherwise.
|
26
|
+
# Uses the set of images we fetched in the constructor and stored.
|
27
|
+
# The fetch is slow, so we don't want to repeat it.
|
28
|
+
def find_image_id_by_name(image_name)
|
29
|
+
filter = Regexp.new(image_name)
|
30
|
+
images = find_all {|i| i[:location] =~ /#{image_name}/}
|
31
|
+
case images.length
|
32
|
+
when 0
|
33
|
+
raise "Bad Configuration -- image #{image_name} not found"
|
34
|
+
when 1
|
35
|
+
images[0][:id]
|
36
|
+
else
|
37
|
+
raise "Bad configuration -- multiple images #{image_name}"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'aws_context'
|
2
|
+
|
3
|
+
module Cloudmaster
|
4
|
+
|
5
|
+
# Provides an enumerator for EC2 instances.
|
6
|
+
# Query for instances when object created.
|
7
|
+
# Handles all instances we own, or just ones maching a list of ids.
|
8
|
+
class EC2InstanceEnumerator
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
# Get the list of instances from EC2
|
12
|
+
def initialize(*ids)
|
13
|
+
@instances = AwsContext.instance.ec2.describe_instances(*ids)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Enumerator each instance
|
17
|
+
def each
|
18
|
+
@instances.each do |group|
|
19
|
+
group[:instances].each do |instance|
|
20
|
+
yield instance
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/app/instance.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
module Cloudmaster
|
2
|
+
|
3
|
+
# Holds information about a specific instance.
|
4
|
+
# When we create an instance in EC2, we create one of these classes to hold
|
5
|
+
# information about the instance. Instances are members of the InstancePool.
|
6
|
+
#
|
7
|
+
# Each instance holds the following information:
|
8
|
+
# * id -- instance id
|
9
|
+
# * public_dns -- the public dns name of the instance
|
10
|
+
# * load_estimate -- last reported load -- between 0 and 1
|
11
|
+
# * state -- startup, active, shut_down
|
12
|
+
# * start_time -- (local) time when the instance was started
|
13
|
+
# * last_status_time -- (local) time when the last status was received
|
14
|
+
# * last_timestamp -- (remote) timestamp of last report
|
15
|
+
#
|
16
|
+
# The state here differs from the EC2 state. We only track instances in
|
17
|
+
# EC@ state pending or running. Our state is controlled by status messages
|
18
|
+
# received and by stop policies.
|
19
|
+
class Instance
|
20
|
+
attr_reader :id, :load_estimate, :state, :state_change_time
|
21
|
+
attr_accessor :public_dns
|
22
|
+
attr_accessor :load_estimate, :state # for testing only
|
23
|
+
attr_reader :status_time, :timestamp # for testing only
|
24
|
+
|
25
|
+
# Create an instance object, reflecting some instance that was stated
|
26
|
+
# or discovered running.
|
27
|
+
# New instance objects know their instance id, their public DNS (once this is known) and their load estimate.
|
28
|
+
def initialize(id, public_dns, config)
|
29
|
+
@config = config
|
30
|
+
@id = id
|
31
|
+
@public_dns = public_dns
|
32
|
+
@load_estimate = 0
|
33
|
+
@start_time = @status_time = Clock.now
|
34
|
+
@active_time = Clock.at(0)
|
35
|
+
@state_change_time = Clock.now
|
36
|
+
@timestamp = Clock.at(0)
|
37
|
+
@state = :startup
|
38
|
+
end
|
39
|
+
|
40
|
+
# Return a report of the instance's state, load estimate, and time
|
41
|
+
# since the last status message was received.
|
42
|
+
def report
|
43
|
+
"State: #{@state} Load: #{sprintf("%.2f", @load_estimate)} Time Since Status: #{time_since_status.round}"
|
44
|
+
end
|
45
|
+
|
46
|
+
def update_state(state)
|
47
|
+
old_state, @state = @state, state
|
48
|
+
@state_change_time = Clock.now
|
49
|
+
if old_state != :active && @state == :active
|
50
|
+
@active_time = Clock.now
|
51
|
+
elsif @old_state == :active && @state != :active
|
52
|
+
@active_time = Clock.at(0)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Update the state and estimated load based on status message
|
57
|
+
# Ignore the status message if it was sent earlier than
|
58
|
+
# one we have already processed. This is important, because
|
59
|
+
# SQS routinely delivers messages out of order.
|
60
|
+
def update_status(msg)
|
61
|
+
if message_more_recent?(msg[:timestamp])
|
62
|
+
@timestamp = msg[:timestamp]
|
63
|
+
@status_time = Clock.now
|
64
|
+
update_state(msg[:state].to_sym) if msg[:state]
|
65
|
+
@load_estimate = msg[:load_estimate] if msg[:load_estimate]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# private
|
70
|
+
|
71
|
+
# Return true if the given timestamp is more recent than the last.
|
72
|
+
# This calculation takes place with times from the <b>sender's</b>
|
73
|
+
# clock. In other words, it is comparing values based on two timestamps
|
74
|
+
# created by the message sender.
|
75
|
+
def message_more_recent?(timestamp)
|
76
|
+
! timestamp.nil? && timestamp > @timestamp
|
77
|
+
end
|
78
|
+
|
79
|
+
# Return the number of seconds since the last message was received.
|
80
|
+
# This uses local time only.
|
81
|
+
def time_since_status
|
82
|
+
Clock.now - @status_time
|
83
|
+
end
|
84
|
+
|
85
|
+
# Return the number of seconds since the last status message with
|
86
|
+
# a state field in it. This uses local times only.
|
87
|
+
def time_since_state_change
|
88
|
+
Clock.now - @state_change_time
|
89
|
+
end
|
90
|
+
|
91
|
+
# Return the number of seconds since the instance was started.
|
92
|
+
def time_since_startup
|
93
|
+
Clock.now - @start_time
|
94
|
+
end
|
95
|
+
|
96
|
+
# Return the number of seconds since the instance became active.
|
97
|
+
def time_since_active
|
98
|
+
Clock.now - @active_time
|
99
|
+
end
|
100
|
+
|
101
|
+
public
|
102
|
+
|
103
|
+
# Return true if the instance has lived at least as long
|
104
|
+
# as its minimum lifetime.
|
105
|
+
def minimum_lifetime_elapsed?
|
106
|
+
lifetime = @config[:minimum_lifetime].to_i * 60
|
107
|
+
return true if lifetime <= 0
|
108
|
+
time_since_startup > lifetime
|
109
|
+
end
|
110
|
+
|
111
|
+
# Return true if the instance has been active at least as long
|
112
|
+
# as its minimum active time.
|
113
|
+
def minimum_active_time_elapsed?
|
114
|
+
active_time = @config[:minimum_active_time].to_i * 60
|
115
|
+
return true if active_time <= 0
|
116
|
+
time_since_active > active_time
|
117
|
+
end
|
118
|
+
|
119
|
+
# Return true if the instance has lived and has been active for its
|
120
|
+
# respective minimum times.
|
121
|
+
def minimum_time_elapsed?
|
122
|
+
minimum_lifetime_elapsed? && minimum_active_time_elapsed?
|
123
|
+
end
|
124
|
+
|
125
|
+
# Return true if the instance has not received a status message
|
126
|
+
# in the watchdog interval.
|
127
|
+
def watchdog_time_elapsed?
|
128
|
+
interval = @config[:watchdog_interval].to_i * 60
|
129
|
+
return false if interval <= 0
|
130
|
+
time_since_status > interval
|
131
|
+
end
|
132
|
+
|
133
|
+
# Shut down an instance by putting it in the "shut_down" state.
|
134
|
+
# After this is can either be activated again or stopped.
|
135
|
+
def shutdown
|
136
|
+
update_state(:shut_down)
|
137
|
+
end
|
138
|
+
|
139
|
+
# Make the instance active. This is usually done after the
|
140
|
+
# instance is shut down, but before it is stopped, it needs to
|
141
|
+
# become active again.
|
142
|
+
def activate
|
143
|
+
update_state(:active)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|