RubyGems - staugaard-cloudmaster - Versions diffs - 0.1.3 → 0.1.4 - Mend

staugaard-cloudmaster 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

data/VERSION.yml +1 -1
data/app/active_set_factory.rb +16 -0
data/app/active_set_none.rb +16 -0
data/app/active_set_queue.rb +27 -0
data/app/active_set_s3.rb +25 -0
data/app/configuration.rb +85 -0
data/app/default-config.ini +95 -0
data/app/ec2_image_enumerator.rb +41 -0
data/app/ec2_instance_enumerator.rb +25 -0
data/app/instance.rb +146 -0
data/app/instance_pool.rb +326 -0
data/app/named_queue.rb +75 -0
data/app/policy.rb +113 -0
data/app/policy_daytime.rb +18 -0
data/app/policy_factory.rb +16 -0
data/app/policy_fixed.rb +19 -0
data/app/policy_job.rb +54 -0
data/app/policy_limit.rb +68 -0
data/app/policy_manual.rb +36 -0
data/app/policy_resource.rb +110 -0
data/app/pool_configuration.rb +172 -0
data/app/pool_manager.rb +239 -0
data/app/pool_runner.rb +54 -0
data/app/reporter.rb +81 -0
data/app/status_parser_factory.rb +16 -0
data/app/status_parser_lifeguard.rb +48 -0
data/app/status_parser_std.rb +11 -0
metadata +27 -1

data/app/instance_pool.rb ADDED Viewed

@@ -0,0 +1,326 @@
+require 'aws_context'
+require 'ec2_instance_enumerator'
+require 'instance'
+require 'yaml'
+module Cloudmaster
+  #  Stores and operates on a collection of instances
+  #
+  #  Internally, instances are stored as an array of Instance objects.
+  class InstancePool
+    include Enumerable
+    # Create an instance pool.
+    # This class knows how to start and stop instances, and how to detect
+    # that new instance have come about, or existing ones have gone away.
+    # The constructor takes:
+    #   [config] describes configurable instance properties
+    def initialize(reporter, config)
+      @ec2 = AwsContext.instance.ec2
+      @reporter = reporter
+      @config = config
+      @state_change_time = Clock.now
+      @instances = []         # holds Instance objects
+    end
+#    private
+    # Create and return options, in a way that is acceptable to EC2.
+    def start_opts
+      groups = @config[:security_groups]
+      # this can throw an exception if groups is not formetteed properly
+      begin
+        groups = eval(groups) if groups.kind_of?(String)
+      rescue
+        groups = [:default]
+      end
+      {:key_name => @config[:key_pair_name],
+          :user_data => YAML.dump(@config[:user_data]),
+          :security_groups => groups,
+          :instance_type => @config[:instance_type]}
+    end
+    def max2(a, b)
+      a > b ? a : b
+    end
+    # Allows iteration through instances.
+    # So enumeration on InstancePool is implicitly enumeration
+    #  on @instances.
+    def each
+      @instances.each {|i| yield i}
+    end
+    # return first instance
+    def first
+      @instances.first
+    end
+    # Return the number of instances in the pool.
+    def size
+      @instances.size
+    end
+    # Create an instance and add to the list.
+    # Return the newly created instance.
+    def add(id, public_dns)
+      new_instance =  Instance.new(id, public_dns, @config)
+      @instances << new_instance
+      new_instance
+    end
+    # Delete the instance from the list
+    def delete(instance)
+      @instances.delete(instance)
+    end
+    # Find an instance given its instance id.
+    def find_by_id(id)
+      find {|i| i.id == id}
+    end
+    # Return a list of all instance ids.
+    def id_list
+      map {|i| i.id}
+    end
+    # Return the maximum number of instances allowed.
+    def maximum
+      @config[:maximum_number_of_instances].to_i
+    end
+    # Return the minimum number of instances allowed.
+    def minimum
+      @config[:minimum_number_of_instances].to_i
+    end
+    # Return true if the number of instances is less than the minimum.
+    def less_than_minimum?
+      size < minimum
+    end
+    # return true if number of instances is more than maximum
+    def greater_than_maximum?
+      size > maximum
+    end
+    # Return count of instance below minimum
+    def below_minimum_count
+      less_than_minimum? ? minimum - size : 0
+    end
+    # Return count of instance above maximum
+    def above_maximum_count
+      greater_than_maximum? ? size - maximum : 0
+    end
+    # Return a list of instances missing public_dns.
+    def missing_public_dns_instances
+      find_all {|i| i.public_dns.nil? || i.public_dns.empty? }
+    end
+    # Return ids of all instances missing a public_dns.
+    def missing_public_dns_ids
+      missing_public_dns_instances.collect {|i| i.id}
+    end
+    # Find the instance identified by id and update its public_dns
+    # If there is no dns information, then skip it.
+    def update_public_dns(id, public_dns)
+      return if public_dns.nil? || public_dns.empty?
+      i = find_by_id(id)
+      i.public_dns = public_dns if i
+    end
+    # Return instances that have not seen status in watchdog_interval
+    def hung_instances
+      find_all {|i| i.watchdog_time_elapsed?}
+    end
+    # Return all instances in active state
+    def active_instances
+      find_all {|i| i.state == :active}
+    end
+    # Return all instances in shut_down state
+    def shut_down_instances
+      find_all {|i| i.state == :shut_down}
+    end
+    # Return instances that are active and have load <= target_load
+    def active_idle_instances
+      target_load = 0
+      active_instances.find_all {|i| i.load_estimate <= target_load}
+    end
+    # Shut down all instances who have a load below the target.
+    # Shut down is not the same as stop -- the instances continue to
+    # provide service, but are no longer allocated new clients.
+    def shut_down_idle_instances
+      target_load = @config[:shut_down_threshold].to_i
+      shut_down_instances.find_all {|i| i.load_estimate <= target_load}
+    end
+    # Return instances that are shut_down and have
+    # time_since_state_change > shut_down_interval.
+    def shut_down_timeout_instances
+      shut_down_interval = @config[:shut_down_interval].to_i * 60
+      shut_down_instances.find_all {|i| i.time_since_state_change > shut_down_interval}
+    end
+    # Return the latest time since any state change of any instance.
+    def state_change_time
+      @state_change_time = inject(@state_change_time) do |latest, instance|
+        max2(latest, instance.state_change_time)
+      end
+    end
+    # Return the sum of all the extra capacity of active instances
+    # that have excess capacity (load less than target load).
+    def excess_capacity
+      target_load = @config[:target_upper_load].to_f
+      active_instances.inject(0) do |sum, instance|
+        sum + max2(target_load - instance.load_estimate, 0)
+      end
+    end
+    # Return the sum of capacity in excess of the target upper load
+    def over_capacity
+      target_load = @config[:target_upper_load].to_f
+      active_instances.inject(0) do |sum, instance|
+        sum + max2(instance.load_estimate - target_load, 0)
+      end
+    end
+    # Return the total load of all active instances
+    def total_load
+      active_instances.inject(0) do |sum, instance|
+        sum + instance.load_estimate
+      end
+    end
+    # Update the status of an instance using the contents of the status message.
+    def update_status(msg)
+      id = msg[:instance_id]
+      if instance = find_by_id(id)
+        instance.update_status(msg)
+      else
+        @reporter.error("Received status message from unknown instance: #{id}") unless id == 'unknown'
+      end
+    end
+    # Return a YAML encoded representation of the active set.
+    # The active set describes the id, public DNS, and load average of
+    # each active instance in the pool.
+    def active_set
+      message = active_instances.collect do |instance|
+        { :id => instance.id,
+          :public_dns => instance.public_dns,
+          :load_estimate => instance.load_estimate }
+      end
+      YAML.dump(message)
+    end
+    # Return all the instance, sortd by lowest load estimate.
+    def sorted_by_lowest_load
+      @instances.sort do |a,b|
+        # Compare the elapsed lifetime status. If the status differs, instances
+        # that have lived beyond the minimum lifetime will be sorted earlier.
+        if a.minimum_lifetime_elapsed? != b.minimum_lifetime_elapsed?
+          if a.minimum_lifetime_elapsed?
+            -1   # This instance has lived long enough, the other hasn't
+          else
+            1    # The other instance has lived long enough, this one hasn't
+          end
+        else
+          a.load_estimate - b.load_estimate
+        end
+      end
+    end
+    # Find all instances for which we don't have a public_dns,
+    # For each one,see of EC2 now has the public DNS.  If so, store it.
+    def update_public_dns_all
+      missing_ids = missing_public_dns_ids
+      return if missing_ids.size == 0
+      EC2InstanceEnumerator.new(missing_ids).each do |instance|
+        update_public_dns(instance[:id], instance[:public_dns])
+      end
+    end
+    # Return instances that match our ami_id that are either pending
+    # or running.
+    # These instances are as returned by ec2: containing fields such as
+    # id and :public_dns.
+    def our_running_instances
+      EC2InstanceEnumerator.new.find_all do |instance|
+        instance[:image_id] == @config[:ami_id] &&
+          %w[pending running].include?(instance[:state])
+      end
+    end
+    # Audit the list of instances based on what is currently known to EC2.
+    # In other words, bring our list of instances into agreement with the instances
+    # EC2 knows about by
+    # (1) adding instances that EC2 knows but that we do not, and
+    # (2) deleting instance that EC2 no longer knows about.
+    # This is used initially to build the instance list, and
+    # periodically thereafter to catch instances started or stopped
+    # outside cloudmaster.
+    def audit_existing_instances
+      running_instances = our_running_instances
+      # add running instances that we don't have
+      running_instances.each do |running|
+        if ! find_by_id(running[:id])
+          add(running[:id], running[:public_dns])
+          @reporter.info("Instance discovered #{running[:public_dns]}", running[:id])
+        end
+      end
+      # delete instances that are no longer running
+      each do |instance|
+        if ! running_instances.find {|running| running[:id] == instance.id}
+          delete(instance)
+          @reporter.info("Instance disappeared #{instance.public_dns}", instance.id)
+        end
+      end
+    end
+    # Start the given number of instances.
+    # Remember started instances by creating an Instance object and storing it
+    # in the pool.
+    # Return an array of the ones we just started.
+    def start_n_instances(number_to_start)
+      return [] if number_to_start <= 0
+      started_instances = @ec2.run_instances(@config[:ami_id], 1,
+                               number_to_start, start_opts)[:instances]
+      started_instances.collect do |started_instance|
+        # the public dns is not available yet
+        add(started_instance[:id], nil)
+      end
+    end
+    # Stop the given set of instances.
+    # Remove stopped instance from the pool.
+    # Return an array of stopped instances.
+    def stop_instances(instances_to_stop)
+      instances_to_stop.collect do |instance|
+        @ec2.terminate_instances(instance.id.to_s)
+        delete(instance)
+        instance
+      end
+    end
+    # Shut down the given set of instances.
+    # Set the state to shut_down
+    # Return an array of shut down instances.
+    def shut_down(instances_to_shut_down)
+      instances_to_shut_down.collect do |instance|
+        instance.shutdown
+        instance
+      end
+    end
+  end
+end

data/app/named_queue.rb ADDED Viewed

@@ -0,0 +1,75 @@
+require 'aws_context'
+module Cloudmaster
+  # Implements a queue specified by name.
+  # The queue name is used to look up the actual SQS queue.
+  # The queue name is used in a substring match, so possibly more than one
+  # string may match.
+  # If no queue is found for given name, or if there are
+  # multiple, the NamedQueue throws an exception.
+  # Implements the read and delete operations.
+  # It also caches queue_depth, so it need not query SQS every time
+  # it is needed.
+  class NamedQueue
+    attr_reader :queue_depth
+    # Create a queue given the queue name.
+    # A SQS interface must be supplied: it is used to interact with Amazon.
+    # The queue name is given, and is used to look up the actual queue.
+    # Raises an exception if there is more than one queue matching the given
+    # name.
+    # If there is no queue with the given name, create one.
+    def initialize(queue_name)
+      @sqs = AwsContext.instance.sqs
+      queues = @sqs.list_queues(queue_name)
+      @queue_depth = 0
+      @queue = case queues.length
+        when 0
+	  queue = @sqs.create_queue(queue_name)
+          raise "Bad Configuration -- no queue: #{queue_name}" if queue.nil?
+	  queue
+        when 1
+          queues.first
+        else
+          raise "Bad configuration -- multiple queues match #{queue_name}"
+        end
+    end
+    # Read some messages off a queue.
+    # For some reason, we never receive more than 1.
+    # But we are prepared for more.
+    # return an array of the messages that were read.
+    def read_messages(count = 1)
+      @sqs.receive_messages(@queue, count)
+    end
+    # Delete a message from the queue given its id.
+    def delete_message(receipt_handle)
+      @sqs.delete_message(@queue, receipt_handle)
+    end
+    # Read and discard all messages on the queue.
+    # Return the number read.
+    def empty_queue
+      n = 0
+      while true
+        msgs = read_messages
+        break if msgs.size == 0
+        n += 1
+        msgs.each {|msg| delete_message(msg[:receipt_handle])}
+      end
+      n
+    end
+    # Get qeue depth and store it.
+    # In case of failure, keep the old value.
+    # The last value read is available through the queue_depth attribute.
+    def read_queue_depth
+      attr = 'ApproximateNumberOfMessages'
+      attrs = @sqs.get_queue_attributes(@queue, attr)
+      @queue_depth = attrs[attr] if attrs.has_key?(attr)
+      @queue_depth
+    end
+  end
+end

data/app/policy.rb ADDED Viewed

@@ -0,0 +1,113 @@
+require 'policy_limit'
+module Cloudmaster
+  # Provides the common data and behaviors for policies.
+  # This includes storing the configuration and instance collection.
+  # It also includes implementing essential methods such as ensure_limite and
+  # stop_hung_instances.
+  # Finally, this implements helpers that process the queue and image names
+  # which might be desirable to override.
+  class Policy
+    def initialize(reporter, config, instances)
+      @reporter = reporter
+      @config = config
+      @instances = instances
+      @limit_policy = PolicyLimit.new(reporter, config, instances)
+    end
+    # Make sure there are at least the minimum instances running.
+    # Also make sure there are no more than maximum number of instances.
+    # If not, start or stop enough to conform.
+    # These actions bypass the creation and termination limits normally in place
+    def ensure_limits
+      n = @limit_policy.adjust_limits
+      case
+      when n > 0
+        start_instances(n)
+      when n < 0
+        stop_instances(-n)
+      end
+    end
+    # If instances have not sent status in a long time, they
+    # are probably hung, and should be stopped.
+    def stop_hung_instances
+      if @instances.hung_instances.size > 0
+        @reporter.info("Stopping hung instances #{@instances.hung_instances.size}")
+        stop_instances(@instances.hung_instances)
+      end
+    end
+    # Default policy application function
+    # Calculate the number to adjust by, and then start or
+    # stop instances accordingly.
+    def apply
+      n = @limit_policy.adjust(adjust)
+      case
+      when n > 0
+        start_instances(n)
+      when n == 0
+        0
+      when n < 0
+        stop_instances(-n)
+      end
+    end
+    # Returns the number of additional instances to start (positive)
+    # or the number to stop (negative)
+    # This must be implemented by derived class.
+    def adjust
+      raise NotImpelemntedError("#{self.class.name}#adjust is not implemented.")
+    end
+    protected
+    # Start the given number of instances.
+    # Return the number started.
+    def start_instances(number_to_start)
+      started_instances = @instances.start_n_instances(number_to_start)
+      if started_instances.size < number_to_start
+        @reporter.error("Failed to start requested number of instances. (#{started_instances.size} instead of #{number_to_start})")
+      end
+      started_instances.each { |started| @reporter.info("Started instance #{started.id}")}
+      started_instances.size
+    end
+    # Stop the given list of instances.
+    # Don't stop ones whose minimum lifetime has not elapsed.
+    # Returns the number stopped.
+    def stop_instances_list(instances_to_stop, force = false)
+      instances_to_stop = instances_to_stop.find_all do |instance|
+        # Don't stop instances before minimum_time
+        force || instance.minimum_time_elapsed?
+      end
+      instances = @instances.stop_instances(instances_to_stop)
+      instances.each {|i| @reporter.info("Terminating instance ", i.id) }
+      instances.size
+    end
+    # Stop a given number of instances.
+    # Return the number stopped.
+    def stop_n_instances(number_to_stop)
+      return if number_to_stop <= 0
+      # stop the instances with the lowest load estimate
+      instances_with_lowest_load = @instances.sorted_by_lowest_load
+      stop_instances_list(instances_with_lowest_load[0...number_to_stop])
+    end
+    # Stop instances.
+    # If passed a number, it stops that number of instances.
+    # If passed a list of instance ids, it stops those instances.
+    def stop_instances(to_stop, *params)
+      if to_stop.class == Fixnum
+        stop_n_instances(to_stop)
+      elsif to_stop.class == Array
+        stop_instances_list(to_stop, *params)
+      else
+        raise "Bad call -- stop_instances #{to_stop.class}"
+      end
+    end
+  end
+end

data/app/policy_daytime.rb ADDED Viewed

@@ -0,0 +1,18 @@
+require 'policy'
+module Cloudmaster
+  # Provide example daytime policy
+  # This increases the pool size during the day.
+  # This is provided as part of an example on how to create custom policies.
+  class PolicyDaytime < Policy
+    # Define the adjust function increase the pool in the
+    #  daytime if it is below daytime minimum
+    def adjust
+      hour = Clock.hour
+      return 0 if hour < 10 || hour > 17
+      return 0 if @instances.size >= @config[:minimum_number_of_instances_daytime].to_i
+      return 1
+    end
+  end
+end

data/app/policy_factory.rb ADDED Viewed

@@ -0,0 +1,16 @@
+# This is a factory for Policy implementations.
+require 'factory'
+module Cloudmaster
+  class PolicyFactory
+    include Factory
+    def PolicyFactory.create(policy_name, *params)
+      name = policy_name.nil? ? 'default' : policy_name.to_s
+      require 'policy_' + name.downcase
+      class_name = 'Policy' +name.capitalize
+      policy = Factory.create_object_from_string(class_name, *params)
+      raise "Bad configuration -- no policy #{class_name}" unless policy
+      policy
+    end
+  end
+end

data/app/policy_fixed.rb ADDED Viewed

@@ -0,0 +1,19 @@
+require 'policy'
+module Cloudmaster
+  # Provide fixed policy.
+  # This never adjusts the size of the pool.
+  # PolicyFixed is still useful, because it still ensures that the number
+  # of instances stay beteen the maximum and minimum (because this is
+  # enforced in the base class apply).
+  class PolicyFixed < Policy
+    # Use everything from the base class.
+    # Adjust never changes instances.
+    def adjust
+      0
+    end
+  end
+end

data/app/policy_job.rb ADDED Viewed

@@ -0,0 +1,54 @@
+require 'policy'
+module Cloudmaster
+  # Provides job policy
+  # Instances managed by the job policy take work from a queue and report
+  # status thrugh another queue.
+  class PolicyJob < Policy
+    # Initialize the policy by giving it access to the configuration and
+    # the collection of instances.
+    def initialize(reporter, config, instances)
+      super(reporter, config, instances)
+      @config = config
+    end
+    # Adjust the pool size.
+    # Add instances if the queue is getting backed up.
+    # Delete instances if the queue is empty and instances are idle.
+    def adjust
+      depth = @config[:work_queue].queue_depth
+      # See if we should add instances
+      if (depth > 0 && @instances.size == 0) || depth >= @config[:start_threshold].to_i
+        additional = 0
+        # need this many more to service the queued work
+        needed = (depth.to_f / @config[:start_threshold].to_f).floor
+	needed = 1 if needed < 1
+        if needed > @instances.size
+          additional = needed - @instances.size
+        end
+       @reporter.info("Job policy need additional #{additional}  depth: #{depth} needed #{needed}")
+       return additional
+      end
+      # Queue not empty -- don't stop any
+      return 0 if depth > 0
+      # See if we should stop some
+      # count how many are active idle and active
+      idle = @instances.active_idle_instances
+      active = @instances.active_instances
+      if idle.size > 0
+        # stop some fraction of the idle instances
+        excess = (idle.size / @config[:idle_threshold].to_f).round
+        @reporter.info("Job policy need fewer #{excess}  idle: #{idle.size} active #{active.size}")
+	return -excess
+      end
+      0
+    end
+    # uses the default apply
+  end
+end

data/app/policy_limit.rb ADDED Viewed

@@ -0,0 +1,68 @@
+module Cloudmaster
+  # This enforces the limits on starting and stopping instances.
+  # It can be used by other policies to make sure thrie own adjust method
+  # does not increase or decrease by more than the configurable start limit or stop limit.
+  # This also provides an exaplanation for its actions.
+  class PolicyLimit
+    def initialize(reporter, config, instances)
+      @reporter = reporter
+      @config = config
+      @instances = instances
+    end
+    def max2(a, b)
+      a > b ? a : b
+    end
+    # Make sure there are at least the minimum instances running.
+    # Also make sure there are no more than maximum number of instances.
+    # Return the number to start (positive) or stop (negative) to stay within the limits.
+    # Do not enforce start_limit and stop_limit.
+    def adjust_limits
+      if @instances.less_than_minimum?
+        number_to_start = @instances.below_minimum_count
+	@reporter.info("Less than minimum -- start more #{number_to_start}")
+        number_to_start
+      elsif @instances.greater_than_maximum?
+        number_to_stop = @instances.above_maximum_count
+	@reporter.info("Greater than maximum -- stop some #{number_to_stop}")
+        -number_to_stop
+      else
+        0
+      end
+    end
+    # After other policies have computed a value for adjust, then this one possibly
+    # modifies the value by ensuring that the start_limit and stop_limit constraints are
+    # honored.  It also makes sure that the adjustment makes the instance count
+    # stay between the minimum and maximum.
+    def adjust(n)
+      case
+      when n > 0
+        start_limit = @config[:start_limit].to_i
+	if n > start_limit
+	  @reporter.info("Limit start -- requested: #{n} limit: #{start_limit}")
+	  n = start_limit
+	end
+	remaining = max2(@instances.maximum - @instances.size, 0)
+	if n > remaining
+	  @reporter.info("Limit start -- requested: #{n} remaining: #{remaining}")
+	  n = remaining
+	end
+      when n < 0
+        stop_limit = @config[:stop_limit].to_i
+	if -n > stop_limit
+	  @reporter.info("Limit stop -- requested: #{-n} limit: #{stop_limit}")
+	  n = -stop_limit
+	end
+	remaining = max2(@instances.size - @instances.minimum, 0)
+	if -n > remaining
+	  @reporter.info("Limit stop -- requested: #{n} remaining: #{remaining}")
+	  n = -remaining
+	end
+      end
+      n
+    end
+  end
+end