staugaard-cloudmaster 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +1 -1
- data/app/active_set_factory.rb +16 -0
- data/app/active_set_none.rb +16 -0
- data/app/active_set_queue.rb +27 -0
- data/app/active_set_s3.rb +25 -0
- data/app/configuration.rb +85 -0
- data/app/default-config.ini +95 -0
- data/app/ec2_image_enumerator.rb +41 -0
- data/app/ec2_instance_enumerator.rb +25 -0
- data/app/instance.rb +146 -0
- data/app/instance_pool.rb +326 -0
- data/app/named_queue.rb +75 -0
- data/app/policy.rb +113 -0
- data/app/policy_daytime.rb +18 -0
- data/app/policy_factory.rb +16 -0
- data/app/policy_fixed.rb +19 -0
- data/app/policy_job.rb +54 -0
- data/app/policy_limit.rb +68 -0
- data/app/policy_manual.rb +36 -0
- data/app/policy_resource.rb +110 -0
- data/app/pool_configuration.rb +172 -0
- data/app/pool_manager.rb +239 -0
- data/app/pool_runner.rb +54 -0
- data/app/reporter.rb +81 -0
- data/app/status_parser_factory.rb +16 -0
- data/app/status_parser_lifeguard.rb +48 -0
- data/app/status_parser_std.rb +11 -0
- metadata +27 -1
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'policy'
|
2
|
+
|
3
|
+
module Cloudmaster
|
4
|
+
|
5
|
+
# Provide manual policy.
|
6
|
+
# This policy only changes the instances when requested to do so.
|
7
|
+
# This implementation uses a queue to convey manual requests
|
8
|
+
# to the policy module.
|
9
|
+
class PolicyManual < Policy
|
10
|
+
def initialize(reporter, config, instances)
|
11
|
+
super(reporter, config, instances)
|
12
|
+
@config = config
|
13
|
+
@sqs = AwsContext.instance.sqs
|
14
|
+
manual_queue_name = @config.append_env(config[:manual_queue_name])
|
15
|
+
@manual_queue = NamedQueue.new(manual_queue_name)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Adjust never changes instances.
|
19
|
+
def adjust
|
20
|
+
n = 0
|
21
|
+
# Read all the messages out of the manual queue.
|
22
|
+
# Sum up all adjustments.
|
23
|
+
while true
|
24
|
+
messages = @manual_queue.read_messages(10)
|
25
|
+
break(n) if messages.size == 0
|
26
|
+
messages.each do |message|
|
27
|
+
msg = YAML.load(message[:body])
|
28
|
+
n += msg[:adjust]
|
29
|
+
@manual_queue.delete_message(message[:receipt_handle])
|
30
|
+
end
|
31
|
+
end
|
32
|
+
# the value of the while is n
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
|
2
|
+
require 'policy'
|
3
|
+
|
4
|
+
module Cloudmaster
|
5
|
+
|
6
|
+
# Provides resource policy implementation.
|
7
|
+
# Instances managed under a resource policy are expected to issue
|
8
|
+
# periodic status messages, giving their estimated load (generally
|
9
|
+
# between 0 and 1).
|
10
|
+
class PolicyResource < Policy
|
11
|
+
# Each policy object gets the configuration and the instance collection.
|
12
|
+
def initialize(reporter, config, instances)
|
13
|
+
super(reporter, config, instances)
|
14
|
+
@config = config
|
15
|
+
end
|
16
|
+
|
17
|
+
# Activate the given number of shut_down instances.
|
18
|
+
# We prefer those with highest load.
|
19
|
+
# Return the number actually activated.
|
20
|
+
def activate_shut_down_instances(number_to_activate)
|
21
|
+
shutdown_instances = @instances.shut_down_instances.sort do |a,b|
|
22
|
+
b.load_estimate - a.load_estimate
|
23
|
+
end
|
24
|
+
shutdown_instances = shutdown_instances[0..number_to_activate]
|
25
|
+
shutdown_instances.each { |i| i.activate }
|
26
|
+
shutdown_instances.each { |i| @reporter.info("Activating instance ", i.id) }
|
27
|
+
shutdown_instances.size
|
28
|
+
end
|
29
|
+
|
30
|
+
# Shut down the given instances, by changing their state to shut_down.
|
31
|
+
def shut_down_instances(instances_to_shut_down)
|
32
|
+
instances = @instances.shut_down(instances_to_shut_down)
|
33
|
+
instances.each {|i| @reporter.info("Shutting down instance ", i.id) }
|
34
|
+
instances.size
|
35
|
+
end
|
36
|
+
|
37
|
+
# Shut down the given number of instances.
|
38
|
+
# Shut down the ones with the lowest load.
|
39
|
+
def shut_down_n_instances(number_to_shut_down)
|
40
|
+
return if number_to_shut_down <= 0
|
41
|
+
instances_with_lowest_load = @instances.sorted_by_lowest_load
|
42
|
+
instances_to_shut_down = instances_with_lowest_load.find_all do |instance|
|
43
|
+
# Don't stop instances before minimum_active_time
|
44
|
+
instance.minimum_active_time_elapsed?
|
45
|
+
end
|
46
|
+
shut_down_instances(instances_to_shut_down[0...number_to_shut_down])
|
47
|
+
end
|
48
|
+
|
49
|
+
# Stop any shut down instances with load below threshold.
|
50
|
+
# Also stop instances that have exceeded shut_down_interval.
|
51
|
+
def clean_up_shut_down_instances
|
52
|
+
idle_instances = @instances.shut_down_idle_instances
|
53
|
+
timeout_instances = @instances.shut_down_timeout_instances
|
54
|
+
stop_instances(idle_instances | timeout_instances)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Adjust the instance pool up or down.
|
58
|
+
# If no instance are running, and there are requests in the work queue, start
|
59
|
+
# some.
|
60
|
+
# Additional instances are added if the load is too high.
|
61
|
+
# Instances are shut down, and then stopped if the load is low.
|
62
|
+
def adjust
|
63
|
+
depth = @config[:work_queue].empty_queue
|
64
|
+
if @instances.active_instances.size == 0
|
65
|
+
# capacity consumed by new arrivals
|
66
|
+
new_load = depth.to_f / @config[:queue_load_factor].to_f
|
67
|
+
initial = (new_load / @config[:target_upper_load].to_f).ceil
|
68
|
+
@reporter.info("Resource policy need initial #{initial} depth: #{depth} new_load #{new_load}") if initial > 0
|
69
|
+
return initial
|
70
|
+
end
|
71
|
+
if depth > 0
|
72
|
+
@reporter.info("Resource policy residual depth: #{depth}")
|
73
|
+
return 0
|
74
|
+
end
|
75
|
+
# the total capacity remaining below the upper bound
|
76
|
+
excess_capacity = @instances.excess_capacity
|
77
|
+
if excess_capacity == 0
|
78
|
+
# need this many more running at upper bound
|
79
|
+
over_capacity = @instances.over_capacity
|
80
|
+
additional = (over_capacity / @config[:target_upper_load].to_f).ceil
|
81
|
+
@reporter.info("Resource policy need additional #{additional} depth: #{depth} over_capacity #{over_capacity}")
|
82
|
+
return additional
|
83
|
+
end
|
84
|
+
# how many are needed to carry the total load at the lower bound
|
85
|
+
needed = (@instances.total_load / @config[:target_lower_load].to_f).ceil
|
86
|
+
if needed < @instances.size
|
87
|
+
excess = @instances.size - needed
|
88
|
+
@reporter.info("Resource policy need fewer #{excess} depth: #{depth} needed #{needed}")
|
89
|
+
return -excess
|
90
|
+
end
|
91
|
+
return 0
|
92
|
+
end
|
93
|
+
|
94
|
+
# We are not using the default apply, because we want to:
|
95
|
+
# * activate shut down instances, if posible, otherwise start
|
96
|
+
# * shut down instances if fewer are needed
|
97
|
+
# * stop inactive or expired shut_down instances
|
98
|
+
def apply
|
99
|
+
n = @limit_policy.adjust(adjust)
|
100
|
+
case
|
101
|
+
when n > 0
|
102
|
+
n -= activate_shut_down_instances(n)
|
103
|
+
start_instances(n)
|
104
|
+
when n < 0
|
105
|
+
shut_down_n_instances(-n)
|
106
|
+
end
|
107
|
+
clean_up_shut_down_instances
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
require 'named_queue'
|
2
|
+
require 'ec2_image_enumerator'
|
3
|
+
|
4
|
+
module Cloudmaster
|
5
|
+
|
6
|
+
# All configuration parameters passed in through the constructor.
|
7
|
+
# Items with * must be defined
|
8
|
+
#
|
9
|
+
# ==aws_config==
|
10
|
+
# aws_env -- used to form queue, instance, and s3 key names --
|
11
|
+
# typically development|test|production
|
12
|
+
# *aws_access_key -- the AWS access key
|
13
|
+
# *aws_secret_key -- the AWS secret key
|
14
|
+
# *aws_user -- the user name, used to build the image name
|
15
|
+
# *aws_bucket -- the bucket to use when storing the active set
|
16
|
+
# *aws_keypair -- full path name of the keypair file to use for
|
17
|
+
# connecting to instances
|
18
|
+
#
|
19
|
+
# ==config==
|
20
|
+
# ===GENERAL===
|
21
|
+
# *name -- the name of this config
|
22
|
+
# *policy -- :none, :job, :resource
|
23
|
+
# ===QUEUES===
|
24
|
+
# poll_interval -- how often to check work queue, etc (seconds)
|
25
|
+
# receive_count -- how many status messages to receive at once
|
26
|
+
# *work_queue -- name of work queue (aws_env)
|
27
|
+
# *status_queue -- name of status queue (aws_env)
|
28
|
+
# ===ACTIVE SET===
|
29
|
+
# active_set_type -- which active set algorithm to use: :none, :s3, :queue
|
30
|
+
# active_set_bucket -- the S3 bucket to use to store the active set
|
31
|
+
# active_set_key -- the S3 key used to store the active set (aws_env)
|
32
|
+
# active_set_interval -- how often to write active_set
|
33
|
+
# ===INSTANCE CREATION PARAMETERS===
|
34
|
+
# *ami_name -- the ami name to start and monitor (aws_env)
|
35
|
+
# key_pair_name -- the name if the keypair to start the instance with
|
36
|
+
# security_groups -- array of security group names to start the instance with
|
37
|
+
# instance_type -- the smi instance type to create
|
38
|
+
# user_data -- instance data made available to running instance
|
39
|
+
# through http://169.254.169.254/latest/user-data
|
40
|
+
# This is given as a hash, which is serialized by cloudmaster.
|
41
|
+
#
|
42
|
+
# ===INSTANCE MANAGEMENT POLICIES===
|
43
|
+
# policy_interval -- how often to apply job or resource policy
|
44
|
+
# audit_instance_interval -- how often (in minutes) to audit instances (-1 for never)
|
45
|
+
# maximum_number_of_instances -- the max number to allow
|
46
|
+
# minimum_number_of_instances -- the min number to allow
|
47
|
+
# ===INSTANCE START POLICIES===
|
48
|
+
# start_limit -- how many instances to start at one time
|
49
|
+
# ===INSTANCE STOP POLICIES===
|
50
|
+
# stop_limit -- how many to stop at one time
|
51
|
+
# minimum_lifetime -- don't stop an instance unless it has run this long (minutes)
|
52
|
+
# minimum_active_time -- the minimum amount of time (in minutes) that an instance
|
53
|
+
# may remain in the active state
|
54
|
+
# watchdog_interval -- if a machine does not report status in this interval, it is
|
55
|
+
# considered to be hung, and is stopped
|
56
|
+
# ===JOB POLICIES===
|
57
|
+
# start_threshold -- if work queue size is greater than start_threshold * number of
|
58
|
+
# active instances, start more instances
|
59
|
+
# idle_threshold -- if more than idle_threshold active instances with load 0
|
60
|
+
# exist, stop some of them
|
61
|
+
# ===RESOURCE POLICIES===
|
62
|
+
# target_upper_load -- try to keep instances below this load
|
63
|
+
# target_lower_load -- try to keep instances above this load
|
64
|
+
# queue_load_factor -- the portion of the load that a single queue entry represents.
|
65
|
+
# If a server can serve a maximum of 10 clients, then this is 10.
|
66
|
+
# shut_down_threshold -- stop instances that have load_estimate below this value
|
67
|
+
# shut_down_interval -- stop instances that have been in shut_down state for
|
68
|
+
# longer than this interval
|
69
|
+
# ===MANUAL POLICIES===
|
70
|
+
# manual_queue_name -- the name of the queue used to send manual instance adjustments
|
71
|
+
# ===REPORTING===
|
72
|
+
# summary_interval -- how often to give summary
|
73
|
+
# instance_log -- if set, it is a patname to a directory where individual log files
|
74
|
+
# are written for each instance
|
75
|
+
# instance_report_interval -- how often to show instance reports
|
76
|
+
|
77
|
+
# PoolConfiguration holds the configuration parameters for one pool.
|
78
|
+
# It also stores aws parameters and defaults, providing a single lookup mechanism
|
79
|
+
# for all.
|
80
|
+
# If lookup files, then it raise an exception.
|
81
|
+
|
82
|
+
class PoolConfiguration
|
83
|
+
# Create a new PoolConfiguration. The default parameters
|
84
|
+
# are used if the desired parameter is not given.
|
85
|
+
def initialize(aws_config, default, config)
|
86
|
+
# these parameters merge the defaults and the given parbameters
|
87
|
+
# merged parameters are also evaluated
|
88
|
+
@merge_params = [:user_data]
|
89
|
+
@aws_config = aws_config
|
90
|
+
@default = default
|
91
|
+
@config = config
|
92
|
+
end
|
93
|
+
|
94
|
+
# Get a parameter, either from aws_config, config or default.
|
95
|
+
# Don't raise an exception if there is no value.
|
96
|
+
def get(param)
|
97
|
+
@aws_config[param] || @config[param] || @default[param]
|
98
|
+
end
|
99
|
+
|
100
|
+
# Get a parameter, either from config or from default.
|
101
|
+
# Raise an exception if there is none.
|
102
|
+
def [](param)
|
103
|
+
if @default.nil?
|
104
|
+
raise "Missing defaults"
|
105
|
+
end
|
106
|
+
config_param = @aws_config[param] || @config[param]
|
107
|
+
if (res = config_param || @default[param]).nil?
|
108
|
+
raise "Missing config: #{param}"
|
109
|
+
end
|
110
|
+
begin
|
111
|
+
if @merge_params.include?(param)
|
112
|
+
# fix up default param if needed -- it must be a hash
|
113
|
+
@default[param] = {} if @default[param].nil?
|
114
|
+
@default[param] = eval(@default[param]) if @default[param].is_a?(String)
|
115
|
+
if config_param
|
116
|
+
@default[param].merge(eval(config_param))
|
117
|
+
else
|
118
|
+
@default[param]
|
119
|
+
end
|
120
|
+
else
|
121
|
+
res
|
122
|
+
end
|
123
|
+
rescue
|
124
|
+
raise "Config bad format: #{param} #{config_param} #{$!}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# Store (create or replace) a parameter.
|
129
|
+
def []=(param, val)
|
130
|
+
@config[param] = val
|
131
|
+
end
|
132
|
+
|
133
|
+
def append_env(name)
|
134
|
+
aws_env = @aws_config[:aws_env]
|
135
|
+
aws_env.nil? || aws_env == '' ? name : "#{name}-#{aws_env}"
|
136
|
+
end
|
137
|
+
|
138
|
+
# Test to see that the derived parameters are valid.
|
139
|
+
def valid?
|
140
|
+
@config[:ami_id] &&
|
141
|
+
@config[:work_queue] && @config[:work_queue].valid? &&
|
142
|
+
@config[:status_queue] && @config[:status_queue].valid?
|
143
|
+
end
|
144
|
+
|
145
|
+
# Looks up a queue given its name.
|
146
|
+
# Stores the result in config under the given key (if given).
|
147
|
+
# Returns the queue.
|
148
|
+
# Raises an exception if none found.
|
149
|
+
def setup_queue(key, name)
|
150
|
+
return nil unless name
|
151
|
+
name = append_env(@config[name])
|
152
|
+
queue = NamedQueue.new(name)
|
153
|
+
raise "Bad configuration -- no queue #{name}" if !queue
|
154
|
+
@config[key] = queue if key
|
155
|
+
queue
|
156
|
+
end
|
157
|
+
|
158
|
+
# Looks up the image, given its name.
|
159
|
+
# Stores the result in config under the given key (if given).
|
160
|
+
# Returns the image.
|
161
|
+
# Raises an exception if none found.
|
162
|
+
def setup_image(key, name)
|
163
|
+
return nil unless name
|
164
|
+
name = append_env(@config[name])
|
165
|
+
image = EC2ImageEnumerator.new.find_image_id_by_name(name)
|
166
|
+
raise "Bad configuration -- no image #{name}" if !image
|
167
|
+
@config[key] = image if key
|
168
|
+
image
|
169
|
+
end
|
170
|
+
|
171
|
+
end
|
172
|
+
end
|
data/app/pool_manager.rb
ADDED
@@ -0,0 +1,239 @@
|
|
1
|
+
require 'periodic'
|
2
|
+
require 'pp'
|
3
|
+
require 'logger'
|
4
|
+
require 'reporter'
|
5
|
+
require 'instance_pool'
|
6
|
+
require 'aws_context'
|
7
|
+
require 'policy_factory'
|
8
|
+
require 'active_set_factory'
|
9
|
+
require 'status_parser_factory'
|
10
|
+
require 'logger_factory'
|
11
|
+
require 'policy'
|
12
|
+
|
13
|
+
module Cloudmaster
|
14
|
+
|
15
|
+
# PoolManager
|
16
|
+
#
|
17
|
+
# Manages one InstancePool, which is collections of EC2 instances
|
18
|
+
# running the same image.
|
19
|
+
# The InstancePoolMaanger is responsible for starting and terminating
|
20
|
+
# instances.
|
21
|
+
# It's policies are meant to balance acceptable performance while
|
22
|
+
# minimizing cost.
|
23
|
+
# To help achieve this goal, the PoolManager receives
|
24
|
+
# status reports from instances, through a status queue.
|
25
|
+
#
|
26
|
+
# Two classes of policies are defined: job and resource.
|
27
|
+
# These roughly correspond to stateless and stateful services.
|
28
|
+
#
|
29
|
+
# ==Job Policy==
|
30
|
+
# In the job policy, instances are assigned work through a work queue.
|
31
|
+
# * Each request is stateless, and can be serviceed by any instance.
|
32
|
+
# * Each instance processes one request at a time.
|
33
|
+
# * Each instance is either starting_up or active.
|
34
|
+
# * Once it is active, it is either busy (load 1.0) or idle (load 0.0).
|
35
|
+
# At startup, the instance reports when it is ready to begin processing, and
|
36
|
+
# enters the active state.
|
37
|
+
# Each instance reports the load through the status queue when it
|
38
|
+
# starts/stops processing a job.
|
39
|
+
#
|
40
|
+
# The job policy aims to keep the work queue to a reasonable size while not
|
41
|
+
# maintaining an excessive number of idle instances.
|
42
|
+
#
|
43
|
+
# ==Resource Policy==
|
44
|
+
# Instance managed by theresource policy have stateful associations with
|
45
|
+
# clients, and provide them services on demand.
|
46
|
+
# * Each instance processes requests made by clients as requested.
|
47
|
+
# * An external entity (the alllocator) assigns clients to instances
|
48
|
+
# based on an instance report, which lists the active instances
|
49
|
+
# and their associated load.
|
50
|
+
# * The instance report (called the active set) is stored in
|
51
|
+
# S3, at a configurable bucket and key.
|
52
|
+
# * The allocator assigns clients to instances, and also creates a
|
53
|
+
# work-queue entry each time it assigns a new client.
|
54
|
+
# * The allocator is expected to assign clients only to those instances
|
55
|
+
# listed in the active set.
|
56
|
+
# * The work queue is emptied by cloudmaster.
|
57
|
+
# * Each instance may be starting_up, active, or shutting_down.
|
58
|
+
# * At startup, the instance reports when it is ready to begin processing,
|
59
|
+
# and enters the active state.
|
60
|
+
# * The policy decides when to shut down an instance.
|
61
|
+
# It puts it in the shut_down state, but does not stop
|
62
|
+
# it immediately (to avoid disturbing existing clients).
|
63
|
+
# Instances in shutting_down state with zero load, or who have
|
64
|
+
# remained in this state for an excessive time are stopped.
|
65
|
+
# * Active instances are available to accept new clients;
|
66
|
+
# shutting_down instances are not.
|
67
|
+
# During any given time period, each instance can be partially busy (load
|
68
|
+
# between 0.0 and 1.0)
|
69
|
+
# Each instance periodically reports is load estimate for that period through
|
70
|
+
# the status queue.
|
71
|
+
# The resource policy seeks to maintain a load between an
|
72
|
+
# upper threshold and a lower threshold.
|
73
|
+
# It starts instances or stops them to achieve this.
|
74
|
+
|
75
|
+
class PoolManager
|
76
|
+
attr_reader :instances, :logger # for testing only
|
77
|
+
|
78
|
+
# Set up PoolManager.
|
79
|
+
# Creates objects used to access SQS and EC2.
|
80
|
+
# Creates instance pool, policy classes, repoter, and queues.
|
81
|
+
# Actual processing does not start until "run" is called.
|
82
|
+
def initialize(config)
|
83
|
+
# set up AWS access objects
|
84
|
+
keys = [ config[:aws_access_key], config[:aws_secret_key]]
|
85
|
+
aws = AwsContext.instance
|
86
|
+
@ec2 = aws.ec2(*keys)
|
87
|
+
@sqs = aws.sqs(*keys)
|
88
|
+
@s3 = aws.s3(*keys)
|
89
|
+
@config = config
|
90
|
+
|
91
|
+
# set up reporter
|
92
|
+
@logger = LoggerFactory.create(@config[:logger], @config[:logfile])
|
93
|
+
@reporter = Reporter.setup(@config[:name], @logger)
|
94
|
+
|
95
|
+
# Create instance pool.
|
96
|
+
# Used to keep track of instances in the pool.
|
97
|
+
@instances = InstancePool.new(@reporter, @config)
|
98
|
+
|
99
|
+
# Create a policy class
|
100
|
+
@policy = PolicyFactory.create(@config[:policy], @reporter, @config, @instances)
|
101
|
+
|
102
|
+
# Create ActiveSet
|
103
|
+
@active_set = ActiveSetFactory.create(@config[:active_set_type], @config)
|
104
|
+
|
105
|
+
# Create StatusParser
|
106
|
+
@status_parser = StatusParserFactory.create(@config[:status_parser])
|
107
|
+
|
108
|
+
unless @config[:instance_log].empty?
|
109
|
+
@reporter.log_instances(@config[:instance_log])
|
110
|
+
end
|
111
|
+
|
112
|
+
# Look up the work queues and the image from their names.
|
113
|
+
# Have policy do most of the work.
|
114
|
+
@work_queue = @config.setup_queue(:work_queue, :work_queue_name)
|
115
|
+
@status_queue = @config.setup_queue(:status_queue, :status_queue_name)
|
116
|
+
@ami_id = @config.setup_image(:ami_id, :ami_name)
|
117
|
+
|
118
|
+
@keep_running = true
|
119
|
+
end
|
120
|
+
|
121
|
+
# Main loop of cloudmaster
|
122
|
+
#
|
123
|
+
# * Reads and processes status messages.
|
124
|
+
# * Starts and stops instances according to policies
|
125
|
+
# * Detects hung instances, and stops them.
|
126
|
+
# * Displays periodic reports.
|
127
|
+
def run(end_time = nil)
|
128
|
+
summary_period = Periodic.new(@config[:summary_interval].to_i)
|
129
|
+
instance_report_period = Periodic.new(@config[:instance_report_interval].to_i)
|
130
|
+
policy_period = Periodic.new(@config[:policy_interval].to_i)
|
131
|
+
active_set_period = Periodic.new(@config[:active_set_interval].to_i * 60)
|
132
|
+
audit_instances_period = Periodic.new(@config[:audit_instance_interval].to_i * 60)
|
133
|
+
|
134
|
+
# loop reading messages from the status queue
|
135
|
+
while keep_running(end_time) do
|
136
|
+
# upate instance list and get queue depth
|
137
|
+
audit_instances_period.check do
|
138
|
+
@instances.audit_existing_instances
|
139
|
+
end
|
140
|
+
|
141
|
+
@work_queue.read_queue_depth
|
142
|
+
break unless @keep_running
|
143
|
+
|
144
|
+
# start first instance, if necessary, and ensure the
|
145
|
+
# number of running instances stays between maximum and minimum
|
146
|
+
@policy.ensure_limits
|
147
|
+
break unless @keep_running
|
148
|
+
|
149
|
+
# handle status and log messages
|
150
|
+
process_messages(@config[:receive_count].to_i)
|
151
|
+
|
152
|
+
# update public dns (for new instances) and show summary reports
|
153
|
+
@instances.update_public_dns_all
|
154
|
+
summary_period.check do
|
155
|
+
@reporter.info("Instances: #{@instances.size} Queue Depth: #{@work_queue.queue_depth}")
|
156
|
+
end
|
157
|
+
instance_report_period.check do
|
158
|
+
@reporter.info("---Instance Summary---")
|
159
|
+
@instances.each do |instance|
|
160
|
+
@reporter.info(" #{instance.id} #{instance.report}\n")
|
161
|
+
end
|
162
|
+
@reporter.info("----------------------")
|
163
|
+
end
|
164
|
+
break unless @keep_running
|
165
|
+
|
166
|
+
# Based on queue depth and load_estimate, make a decision on
|
167
|
+
# whether to start or stop servers.
|
168
|
+
policy_period.check { @policy.apply }
|
169
|
+
|
170
|
+
active_set_period.check { update_active_set }
|
171
|
+
|
172
|
+
# Stop instances that have not given recent status.
|
173
|
+
@policy.stop_hung_instances
|
174
|
+
break unless @keep_running
|
175
|
+
|
176
|
+
Clock.sleep @config[:poll_interval].to_i
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Shut down the manager.
|
181
|
+
# This may take a little time.
|
182
|
+
def shutdown
|
183
|
+
@keep_running = false
|
184
|
+
end
|
185
|
+
|
186
|
+
private
|
187
|
+
|
188
|
+
# Process a batch of status and log messaage.
|
189
|
+
# Status messages update the instance usage information, and
|
190
|
+
# log messages are just logged.
|
191
|
+
# Observed behavior is that only one message is returned per call
|
192
|
+
# to SQS, no matter how many are requested.
|
193
|
+
def process_message_batch(count)
|
194
|
+
# read some messages
|
195
|
+
messages = @status_queue.read_messages(count)
|
196
|
+
messages.each do |message|
|
197
|
+
# parse message
|
198
|
+
msg = @status_parser.parse_message(message[:body])
|
199
|
+
case msg[:type]
|
200
|
+
when "status"
|
201
|
+
# save the status and load_estimate
|
202
|
+
@instances.update_status(msg)
|
203
|
+
when "log"
|
204
|
+
# just log the message
|
205
|
+
@reporter.info(msg[:message], msg[:instance_id])
|
206
|
+
end
|
207
|
+
# delete the message once it has been processed
|
208
|
+
@status_queue.delete_message(message[:receipt_handle])
|
209
|
+
end
|
210
|
+
messages.size
|
211
|
+
end
|
212
|
+
|
213
|
+
# Process messages (up to count)
|
214
|
+
# Continue until there are no messages remaining.
|
215
|
+
def process_messages(count)
|
216
|
+
n_remaining = count
|
217
|
+
while n_remaining > 0
|
218
|
+
n = process_message_batch(n_remaining)
|
219
|
+
break if n == 0
|
220
|
+
n_remaining -= n
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
# Write active set if it has changed since the last write.
|
225
|
+
def update_active_set
|
226
|
+
@active_set.update(@instances.active_set)
|
227
|
+
end
|
228
|
+
|
229
|
+
# Returns true if the manager should keep running.
|
230
|
+
def keep_running(end_time)
|
231
|
+
if end_time && Clock.now > end_time
|
232
|
+
false
|
233
|
+
else
|
234
|
+
@keep_running
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
239
|
+
end
|
data/app/pool_runner.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'pool_configuration'
|
2
|
+
require 'pool_manager'
|
3
|
+
|
4
|
+
module Cloudmaster
|
5
|
+
|
6
|
+
# oolRunner
|
7
|
+
#
|
8
|
+
# Manages separate PoolManagers, each in a separate thread.
|
9
|
+
#
|
10
|
+
# Knows how to start (run) and stop (shutdown) the pools.
|
11
|
+
#
|
12
|
+
# Creates a thread for each pool in config, and runs a PoolManager in it.
|
13
|
+
# This needs to be passed a configuration, normally a InifileConfig object
|
14
|
+
# The configuration object contains all the information needed to control
|
15
|
+
# the pools, including the number of pools and each one's characteristics.
|
16
|
+
class PoolRunner
|
17
|
+
attr_reader :pool_managers # for testing only
|
18
|
+
# Create empty runner. Until the run method is called, the
|
19
|
+
# individual pool managers are not created.
|
20
|
+
def initialize(config)
|
21
|
+
@config = config
|
22
|
+
@pool_managers = []
|
23
|
+
Signal.trap("INT") do
|
24
|
+
self.shutdown
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Create each of the pool managers described in the configuration.
|
29
|
+
# We can limit the amount of time it runs, for testing purposes only
|
30
|
+
# In testing we can call run again after it returns, so we make sure
|
31
|
+
# that we only create pool managers the first time through.
|
32
|
+
def run(limit = nil)
|
33
|
+
if @pool_managers == []
|
34
|
+
@config.pools.each do |pool_config|
|
35
|
+
# Wrap pool config parameters up with defaults.
|
36
|
+
config = PoolConfiguration.new(@config.aws, @config.default, pool_config)
|
37
|
+
@pool_managers << PoolManager.new(config)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
threads = []
|
41
|
+
@pool_managers.each do |pool_manager|
|
42
|
+
threads << Thread.new(pool_manager) do |mgr|
|
43
|
+
mgr.run(limit)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
threads.each { |thread| thread.join }
|
47
|
+
end
|
48
|
+
|
49
|
+
# Shut down each of the pool managers.
|
50
|
+
def shutdown
|
51
|
+
@pool_managers.each { |mgr| mgr.shutdown }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/app/reporter.rb
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'instance_logger'
|
2
|
+
|
3
|
+
module Cloudmaster
|
4
|
+
|
5
|
+
# Creates and outputs log messages
|
6
|
+
# These are formatted with a timestamp and an instance name.
|
7
|
+
# This remembers the log device, which is anything with puts.
|
8
|
+
# This is treated as a global. It is initialized by calling "Reporter.setup"
|
9
|
+
# and then anyone can get a copy by calling "Reporter.instance".
|
10
|
+
class Reporter
|
11
|
+
attr_accessor :level
|
12
|
+
|
13
|
+
NONE = 0
|
14
|
+
ERROR = 1
|
15
|
+
WARNING = 2
|
16
|
+
INFO = 3
|
17
|
+
TRACE = 4
|
18
|
+
DEBUG = 5
|
19
|
+
ALL = 10
|
20
|
+
|
21
|
+
# Reporter displays the given name on every line.
|
22
|
+
# reports go to the given log (an IO).
|
23
|
+
def initialize(name, log)
|
24
|
+
@level = ALL
|
25
|
+
@name = name
|
26
|
+
@log = log || STDOUT
|
27
|
+
@instance_logger = nil
|
28
|
+
end
|
29
|
+
|
30
|
+
def Reporter.setup(name, log)
|
31
|
+
new(name, log)
|
32
|
+
end
|
33
|
+
|
34
|
+
def log_instances(dir)
|
35
|
+
@instance_logger = InstanceLogger.new(dir)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Log a message
|
39
|
+
def log(message, *opts)
|
40
|
+
send_to_log("INFO:", message, *opts)
|
41
|
+
end
|
42
|
+
|
43
|
+
def err(msg, *opts)
|
44
|
+
send_to_log("ERROR:", msg, *opts) if @level >= ERROR
|
45
|
+
end
|
46
|
+
alias error err
|
47
|
+
|
48
|
+
def warning(msg, *opts)
|
49
|
+
send_to_log("WARNING:", msg, *opts) if @level >= WARNING
|
50
|
+
end
|
51
|
+
|
52
|
+
def info(msg, *opts)
|
53
|
+
send_to_log("INFO:", msg, *opts) if @level >= INFO
|
54
|
+
end
|
55
|
+
|
56
|
+
def trace(msg, *opts)
|
57
|
+
send_to_log("TRACE:", msg, *opts) if @level >= TRACE
|
58
|
+
end
|
59
|
+
|
60
|
+
def debug(msg, *opts)
|
61
|
+
send_to_log("DEBUG:", msg, *opts) if @level >= DEBUG
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def send_to_log(type, message, instance_id = nil)
|
67
|
+
msg = [type, format_timestamp(Clock.now), @name]
|
68
|
+
msg << instance_id if instance_id
|
69
|
+
msg << message
|
70
|
+
message = msg.join(' ')
|
71
|
+
@log.puts(message)
|
72
|
+
if instance_id && @instance_logger
|
73
|
+
@instance_logger.puts(instance_id, message)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def format_timestamp(ts)
|
78
|
+
"#{Clock.now.strftime("%m-%d-%y %H:%M:%S")}"
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|