RubyGems - dynamo-autoscale - Versions diffs - 0.1 - Mend

dynamo-autoscale 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

data/.gitignore +4 -0
data/Gemfile +13 -0
data/Gemfile.lock +58 -0
data/LICENSE +21 -0
data/README.md +400 -0
data/Rakefile +9 -0
data/aws.sample.yml +16 -0
data/bin/dynamo-autoscale +131 -0
data/config/environment/common.rb +114 -0
data/config/environment/console.rb +2 -0
data/config/environment/test.rb +3 -0
data/config/logger.yml +11 -0
data/config/services/aws.rb +20 -0
data/config/services/logger.rb +35 -0
data/data/.gitkeep +0 -0
data/dynamo-autoscale.gemspec +29 -0
data/lib/dynamo-autoscale/actioner.rb +265 -0
data/lib/dynamo-autoscale/cw_poller.rb +49 -0
data/lib/dynamo-autoscale/dispatcher.rb +39 -0
data/lib/dynamo-autoscale/dynamo_actioner.rb +59 -0
data/lib/dynamo-autoscale/ext/active_support/duration.rb +7 -0
data/lib/dynamo-autoscale/local_actioner.rb +39 -0
data/lib/dynamo-autoscale/local_data_poll.rb +51 -0
data/lib/dynamo-autoscale/logger.rb +15 -0
data/lib/dynamo-autoscale/metrics.rb +192 -0
data/lib/dynamo-autoscale/poller.rb +41 -0
data/lib/dynamo-autoscale/pretty_formatter.rb +27 -0
data/lib/dynamo-autoscale/rule.rb +180 -0
data/lib/dynamo-autoscale/rule_set.rb +69 -0
data/lib/dynamo-autoscale/table_tracker.rb +329 -0
data/lib/dynamo-autoscale/unit_cost.rb +41 -0
data/lib/dynamo-autoscale/version.rb +3 -0
data/lib/dynamo-autoscale.rb +1 -0
data/rlib/dynamodb_graph.r +15 -0
data/rlib/dynamodb_scatterplot.r +13 -0
data/rulesets/default.rb +5 -0
data/rulesets/erroneous.rb +1 -0
data/rulesets/gradual_tail.rb +11 -0
data/rulesets/none.rb +0 -0
data/script/console +3 -0
data/script/historic_data +46 -0
data/script/hourly_wastage +40 -0
data/script/monitor +55 -0
data/script/simulator +40 -0
data/script/test +52 -0
data/script/validate_ruleset +20 -0
data/spec/actioner_spec.rb +244 -0
data/spec/rule_set_spec.rb +89 -0
data/spec/rule_spec.rb +491 -0
data/spec/spec_helper.rb +4 -0
data/spec/table_tracker_spec.rb +256 -0
metadata +178 -0

data/lib/dynamo-autoscale/local_data_poll.rb ADDED Viewed

@@ -0,0 +1,51 @@
+module DynamoAutoscale
+  class LocalDataPoll < Poller
+    def initialize *args
+      super(*args)
+      @cache = Hash.new { |h, k| h[k] = {} }
+    end
+    def poll tables, &block
+      if tables.nil?
+        tables = ["*"]
+      end
+      tables.each do |table_name|
+        unless @cache[table_name].empty?
+          @cache[table_name].each do |day, table_day_data|
+            block.call(table_name, table_day_data)
+          end
+        else
+          file = "#{table_name}.json"
+          Dir[File.join(DynamoAutoscale.data_dir, '*')].each do |day_path|
+            Dir[File.join(day_path, file)].each do |table_path|
+              data = JSON.parse(File.read(table_path)).symbolize_keys
+              if data[:consumed_writes].nil? or data[:consumed_reads].nil?
+                logger.warn "Lacking data for table #{table_name}. Skipping."
+                next
+              end
+              # All this monstrosity below is doing is parsing the time keys in
+              # the nested hash from strings into Time objects. Hash mapping
+              # semantics are weird, hence why this looks ridiculous.
+              data = Hash[data.map do |key, ts|
+                [
+                  key,
+                  Hash[ts.map do |t, d|
+                    [Time.parse(t), d]
+                  end],
+                ]
+              end]
+              @cache[table_name][day_path] = data
+              block.call(table_name, data)
+            end
+          end
+        end
+      end
+    end
+  end
+end

data/lib/dynamo-autoscale/logger.rb ADDED Viewed

@@ -0,0 +1,15 @@
+module DynamoAutoscale
+  module Logger
+    def self.logger= new_logger
+      @@logger = new_logger
+    end
+    def self.logger
+      @@logger
+    end
+    def logger
+      DynamoAutoscale::Logger.logger
+    end
+  end
+end

data/lib/dynamo-autoscale/metrics.rb ADDED Viewed

@@ -0,0 +1,192 @@
+module DynamoAutoscale
+  class Metrics
+    extend DynamoAutoscale::Logger
+    DEFAULT_OPTS = {
+      namespace:   'AWS/DynamoDB',
+      period:      300,
+      # metric_name: metric,
+      # start_time:  (NOW - 3600).iso8601,
+      # end_time:    NOW.iso8601,
+      # dimensions:  [{
+      #   name: "TableName", value: TABLE_NAME,
+      # }],
+    }
+    # Returns a CloudWatch client object for a given region. If no region
+    # exists, the region defaults to whatever is in
+    # DynamoAutoscale::DEFAULT_AWS_REGION.
+    #
+    # CloudWatch client documentation:
+    #   https://github.com/aws/aws-sdk-ruby/blob/master/lib/aws/cloud_watch/client.rb
+    def self.client region = nil
+      @client ||= Hash.new do |hash, _region|
+        hash[_region] = AWS::CloudWatch.new({
+          cloud_watch_endpoint: "monitoring.#{_region}.amazonaws.com",
+        }).client
+      end
+      @client[region || DEFAULT_AWS_REGION]
+    end
+    # Returns a hash of timeseries data. Looks a bit like this:
+    #
+    #   {
+    #     provisioned_reads:  { date => value... },
+    #     provisioned_writes: { date => value... },
+    #     consumed_reads:     { date => value... },
+    #     consumed_writes:    { date => value... },
+    #   }
+    def self.all_metrics table_name, opts = {}
+      data = Hash.new { |h, k| h[k] = {} }
+      pr = provisioned_reads(table_name, opts).sort_by do |datum|
+        datum[:timestamp]
+      end
+      pr.each do |timeslice|
+        data[:provisioned_reads][timeslice[:timestamp]] = timeslice[:average]
+      end
+      cr = consumed_reads(table_name, opts).sort_by do |datum|
+        datum[:timestamp]
+      end
+      cr.each do |timeslice|
+        data[:consumed_reads][timeslice[:timestamp]] = timeslice[:sum]
+      end
+      pw = provisioned_writes(table_name, opts).sort_by do |datum|
+        datum[:timestamp]
+      end
+      pw.each do |timeslice|
+        data[:provisioned_writes][timeslice[:timestamp]] = timeslice[:average]
+      end
+      cw = consumed_writes(table_name, opts).sort_by do |datum|
+        datum[:timestamp]
+      end
+      cw.each do |timeslice|
+        data[:consumed_writes][timeslice[:timestamp]] = timeslice[:sum]
+      end
+      data
+    end
+    # Returns provisioned througput reads for a table in DynamoDB. Works on
+    # moving averages.
+    #
+    # Example:
+    #
+    #   pp DynamoAutoscale::Metrics.provisioned_reads("table_name")
+    #   #=> [{:timestamp=>2013-06-18 15:25:00 UTC, :unit=>"Count", :average=>800.0},
+    #        {:timestamp=>2013-06-18 15:05:00 UTC, :unit=>"Count", :average=>800.0},
+    #        ...
+    #       ]
+    def self.provisioned_reads table_name, opts = {}
+      opts[:metric_name] = "ProvisionedReadCapacityUnits"
+      provisioned_metric_statistics(table_name, opts)
+    end
+    # Returns provisioned througput writes for a table in DynamoDB. Works on
+    # moving averages.
+    #
+    # Example:
+    #
+    #   pp DynamoAutoscale::Metrics.provisioned_writes("table_name")
+    #   #=> [{:timestamp=>2013-06-18 15:25:00 UTC, :unit=>"Count", :average=>600.0},
+    #        {:timestamp=>2013-06-18 15:05:00 UTC, :unit=>"Count", :average=>600.0},
+    #        ...
+    #       ]
+    def self.provisioned_writes table_name, opts = {}
+      opts[:metric_name] = "ProvisionedWriteCapacityUnits"
+      provisioned_metric_statistics(table_name, opts)
+    end
+    # Returns consumed througput reads for a table in DynamoDB. Works on
+    # moving averages.
+    #
+    # Example:
+    #
+    #   pp DynamoAutoscale::Metrics.consumed_reads("table_name")
+    #   #=> [{:timestamp=>2013-06-18 15:53:00 UTC,
+    #         :unit=>"Count",
+    #         :average=>1.2111202996546189},
+    #        {:timestamp=>2013-06-18 15:18:00 UTC,
+    #         :unit=>"Count",
+    #         :average=>1.5604973943552964},
+    #         ...
+    #       ]
+    def self.consumed_reads table_name, opts = {}
+      opts[:metric_name] = "ConsumedReadCapacityUnits"
+      opts[:statistics]  = ["Sum"]
+      consumed_metric_statistics(table_name, opts)
+    end
+    # Returns consumed througput writes for a table in DynamoDB. Works on
+    # moving averages.
+    #
+    # Example:
+    #
+    #   pp DynamoAutoscale::Metrics.consumed_writes("table_name")
+    #   #=> [{:timestamp=>2013-06-18 15:39:00 UTC,
+    #         :unit=>"Count",
+    #         :average=>1.6882725354235755},
+    #        {:timestamp=>2013-06-18 15:24:00 UTC,
+    #         :unit=>"Count",
+    #         :average=>1.7701510393300435},
+    #         ...
+    #       ]
+    def self.consumed_writes table_name, opts = {}
+      opts[:metric_name] = "ConsumedWriteCapacityUnits"
+      opts[:statistics]  = ["Sum"]
+      consumed_metric_statistics(table_name, opts)
+    end
+    private
+    # Because there's a difference to how consumed and provisioned statistics
+    # are gathered for DynamoDB, the two metrics are not comparable without a
+    # little bit of modification.
+    #
+    # Relevant forum post:
+    #   https://forums.aws.amazon.com/thread.jspa?threadID=119523
+    def self.consumed_metric_statistics table_name, opts = {}
+      opts[:statistics] = ["Sum"]
+      data = metric_statistics(table_name, opts)
+      data.map do |datum|
+        datum[:sum] = datum[:sum] / (opts[:period] || DEFAULT_OPTS[:period])
+        datum
+      end
+    end
+    def self.provisioned_metric_statistics table_name, opts = {}
+      opts[:statistics] = ["Average"]
+      metric_statistics(table_name, opts)
+    end
+    # A base method that gets called by wrapper methods defined above. Makes a
+    # call to CloudWatch, getting statistics on whatever metric is given.
+    def self.metric_statistics table_name, opts = {}
+      region = opts.delete :region
+      opts   = DEFAULT_OPTS.merge({
+        dimensions:  [{ name: "TableName", value: table_name }],
+        start_time:  1.hour.ago,
+        end_time:    Time.now,
+      }).merge(opts)
+      if opts[:start_time] and opts[:start_time].respond_to? :iso8601
+        opts[:start_time] = opts[:start_time].iso8601
+      end
+      if opts[:end_time] and opts[:end_time].respond_to? :iso8601
+        opts[:end_time] = opts[:end_time].iso8601
+      end
+      client(region).get_metric_statistics(opts)[:datapoints]
+    end
+  end
+end

data/lib/dynamo-autoscale/poller.rb ADDED Viewed

@@ -0,0 +1,41 @@
+module DynamoAutoscale
+  class Poller
+    # The poller constructor accepts a hash of options. The following arguments
+    # are valid but optional:
+    #
+    #   - :tables  - An array of the tables you would like to poll.
+    #   - :filters - This is primarily for working with local data but there
+    #   could maybe be a production use for it. Locally, it is used to modify
+    #   each datum before it gets sent to the dispatcher. It helps fake setting
+    #   provisioned throughput.
+    def initialize opts = {}
+      @opts = opts
+    end
+    def run &block
+      poll(@opts[:tables]) do |table_name, data|
+        logger.debug "[poller] Got data: #{data}"
+        table = DynamoAutoscale.tables[table_name]
+        times = data.inject([]) do |memo, (_, timeseries)|
+          memo += timeseries.keys
+        end.sort!.uniq!
+        times.each do |time|
+          datum = {
+            provisioned_writes: data[:provisioned_writes][time],
+            provisioned_reads:  data[:provisioned_reads][time],
+            consumed_writes:    data[:consumed_writes][time],
+            consumed_reads:     data[:consumed_reads][time],
+          }
+          if @opts[:filters]
+            @opts[:filters].each { |filter| filter.call(table, time, datum) }
+          end
+          DynamoAutoscale.dispatcher.dispatch(table, time, datum, &block)
+        end
+      end
+    end
+  end
+end

data/lib/dynamo-autoscale/pretty_formatter.rb ADDED Viewed

@@ -0,0 +1,27 @@
+module DynamoAutoscale
+  class PrettyFormatter
+    def call(severity, time, progname, msg)
+      table = DynamoAutoscale.current_table.name rescue "no table"
+      "[#{time.utc.to_s.cyan}][#{severity_color(severity)}][#{table.green}] " +
+        "#{String === msg ? msg : msg.inspect}\n"
+    end
+    def severity_color(severity)
+      case severity
+      when "DEBUG"
+        "#{severity}".blue
+      when "INFO"
+        "#{severity}".white
+      when "WARN"
+        "#{severity}".yellow
+      when "ERROR"
+        "#{severity}".red
+      when "FATAL"
+        "#{severity}".red
+      else
+        "#{severity}"
+      end
+    end
+  end
+end

data/lib/dynamo-autoscale/rule.rb ADDED Viewed

@@ -0,0 +1,180 @@
+module DynamoAutoscale
+  class Rule
+    attr_accessor :metric, :opts
+    CONDITIONS = {
+      greater_than: Proc.new { |a, b| a > b },
+      less_than:    Proc.new { |a, b| a < b },
+    }
+    def initialize metric, opts, &block
+      @metric = metric
+      @opts   = opts
+      @block  = block
+      @count  = Hash.new(0)
+      unless [:consumed_reads, :consumed_writes].include? @metric
+        raise ArgumentError.new("Invalid metric: #{@metric}. Must be either " +
+                                ":consumed_reads or :consumed_writes.")
+      end
+      if @opts[:greater_than].nil? and @opts[:less_than].nil?
+        raise ArgumentError.new("Must specify at least one of greater_than " +
+          "or less_than")
+      end
+      if @opts[:for].nil? and @opts[:last].nil?
+        raise ArgumentError.new("Need to specify at least one of :for and :last.")
+      end
+      if @opts[:greater_than] and @opts[:less_than] and
+        @opts[:greater_than].to_f >= @opts[:less_than].to_f
+        raise ArgumentError.new("greater_than cannot be greater than or " +
+                                "equal to less_than. Rule will never trigger.")
+      end
+      if @opts[:greater_than] and @opts[:greater_than].to_f <= 0
+        raise ArgumentError.new("greater_than cannot be less than or equal to 0")
+      end
+      if @opts[:less_than] and @opts[:less_than].to_f <= 0
+        raise ArgumentError.new("less_than cannot be less than or equal to 0")
+      end
+      if @opts[:min] and @opts[:min] <= 0
+        raise ArgumentError.new("min cannot be less than or equal to 0")
+      end
+      if @opts[:max] and @opts[:max] <= 0
+        raise ArgumentError.new("max cannot be less than or equal to 0")
+      end
+      if @opts[:count] and @opts[:count] <= 0
+        raise ArgumentError.new("count cannot be less than or equal to 0")
+      end
+      if @opts[:scale].nil? and block.nil?
+        raise ArgumentError.new("no :scale option or block specified. Rule has no action.")
+      end
+      if @opts[:scale] and !@opts[:scale].is_a? Hash
+        raise ArgumentError.new(":scale option expects to be a hash.")
+      end
+      if @opts[:scale] and @opts[:scale][:on].nil? and @opts[:scale][:by].nil?
+        raise ArgumentError.new(":scale option expects :on and :by options.")
+      end
+      if @opts[:scale] and ![:consumed, :provisioned].include?(@opts[:scale][:on])
+        raise ArgumentError.new(":scale { :on } needs to be either :consumed " +
+          "or :provisioned")
+      end
+      if @opts[:scale] and @opts[:scale][:by] <= 0
+        raise ArgumentError.new(":scale { :by } needs to be greater than 0")
+      end
+    end
+    def test table
+      last_provisioned = table.last_provisioned_for(@metric)
+      CONDITIONS.each do |key, comparator|
+        if @opts[key]
+          value = @opts[key].to_f
+          # Get the value as a percentage of the last amount provisioned for
+          # this metric if it is a string that ends with a percent symbol.
+          if @opts[key].is_a? String and @opts[key].end_with? "%"
+            # If we don't have a provisioned value yet, we have to move along.
+            # We don't know what the headroom is and we can't trigger an
+            # alarm.
+            next if last_provisioned.nil?
+            value = (value / 100.0) * last_provisioned
+          end
+          duration = @opts[:for] || @opts[:last]
+          data     = table.last(duration, @metric)
+          # If a specific number of points have been specified to look at,
+          # make sure we have exactly that number of points before continuing.
+          if !duration.is_a? ActiveSupport::Duration and data.length != duration
+            return false
+          end
+          if @opts[:max]
+            data = data.take(@opts[:max])
+          end
+          if @opts[:min]
+            return false unless data.length >= @opts[:min]
+          end
+          if data.all? { |datum| comparator.call(datum, value) }
+            @count[table.name] += 1
+            if @opts[:times].nil? or @count[table.name] == @opts[:times]
+              @count[table.name] = 0
+              if scale = @opts[:scale]
+                new_val = table.send("last_#{scale[:on]}_for", @metric) * scale[:by]
+                DynamoAutoscale.actioners[table].set(@metric, new_val)
+              end
+              if @block
+                @block.call(table, self, DynamoAutoscale.actioners[table])
+              end
+              return true
+            else
+              return false
+            end
+          else
+            @count[table.name] = 0
+          end
+        end
+      end
+      false
+    end
+    def to_english
+      message = "#{@metric} "
+      if @opts[:greater_than]
+        message << "were greater than " << @opts[:greater_than] << " "
+      end
+      if @opts[:less_than]
+        message << "and " if @opts[:greater_than]
+        message << "were less than " << @opts[:less_than] << " "
+      end
+      if @opts[:for] or @opts[:last]
+        val = @opts[:for] || @opts[:last]
+        if val.is_a? ActiveSupport::Duration
+          message << "for #{val.inspect} "
+        else
+          message << "for #{val} data points "
+        end
+      end
+      if @opts[:min]
+        message << "with a minimum of #{@opts[:min]} data points "
+      end
+      if @opts[:max]
+        message << "and " if @opts[:min]
+        message << "with a maximum of #{@opts[:max]} data points "
+      end
+      message
+    end
+    def serialize
+      metric = @metric == :consumed_reads ? "reads" : "writes"
+      "#{metric}(#{@opts})"
+    end
+  end
+end

data/lib/dynamo-autoscale/rule_set.rb ADDED Viewed

@@ -0,0 +1,69 @@
+module DynamoAutoscale
+  class RuleSet
+    attr_accessor :rules
+    def initialize path = nil, &block
+      @rules = Hash.new { |h, k| h[k] = [] }
+      @current_table = :all
+      if path
+        instance_eval(File.read(path))
+      elsif block
+        instance_eval(&block)
+      end
+    end
+    def for table_name
+      return @rules[:all] if table_name == :all
+      @rules[table_name] + @rules[:all]
+    end
+    def test table
+      self.for(table.name).each do |rule|
+        return true if rule.test(table)
+      end
+    end
+    def table table_name, &block
+      @current_table = table_name
+      instance_eval(&block)
+      @current_table = :all
+    end
+    def writes opts, &block
+      @rules[@current_table] << Rule.new(:consumed_writes, opts, &block)
+    end
+    def reads opts, &block
+      @rules[@current_table] << Rule.new(:consumed_reads, opts, &block)
+    end
+    def serialize
+      @rules.inject("") do |memo, (table_name, rules)|
+        memo += "table #{table_name.inspect} do\n"
+        rules.each do |rule|
+          memo += "  #{rule.serialize}\n"
+        end
+        memo += "end\n"
+      end
+    end
+    def checksum
+      Digest::MD5.hexdigest(self.serialize)
+    end
+    def deep_dup
+      duplicate = RuleSet.new
+      new_rules = Hash.new { |h, k| h[k] = [] }
+      @rules.each do |table_name, rules|
+        rules.each do |rule|
+          new_rules[table_name] << Rule.new(rule.metric, rule.opts)
+        end
+      end
+      duplicate.rules = new_rules
+      duplicate
+    end
+  end
+end