dynamo-autoscale 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +13 -0
  3. data/Gemfile.lock +58 -0
  4. data/LICENSE +21 -0
  5. data/README.md +400 -0
  6. data/Rakefile +9 -0
  7. data/aws.sample.yml +16 -0
  8. data/bin/dynamo-autoscale +131 -0
  9. data/config/environment/common.rb +114 -0
  10. data/config/environment/console.rb +2 -0
  11. data/config/environment/test.rb +3 -0
  12. data/config/logger.yml +11 -0
  13. data/config/services/aws.rb +20 -0
  14. data/config/services/logger.rb +35 -0
  15. data/data/.gitkeep +0 -0
  16. data/dynamo-autoscale.gemspec +29 -0
  17. data/lib/dynamo-autoscale/actioner.rb +265 -0
  18. data/lib/dynamo-autoscale/cw_poller.rb +49 -0
  19. data/lib/dynamo-autoscale/dispatcher.rb +39 -0
  20. data/lib/dynamo-autoscale/dynamo_actioner.rb +59 -0
  21. data/lib/dynamo-autoscale/ext/active_support/duration.rb +7 -0
  22. data/lib/dynamo-autoscale/local_actioner.rb +39 -0
  23. data/lib/dynamo-autoscale/local_data_poll.rb +51 -0
  24. data/lib/dynamo-autoscale/logger.rb +15 -0
  25. data/lib/dynamo-autoscale/metrics.rb +192 -0
  26. data/lib/dynamo-autoscale/poller.rb +41 -0
  27. data/lib/dynamo-autoscale/pretty_formatter.rb +27 -0
  28. data/lib/dynamo-autoscale/rule.rb +180 -0
  29. data/lib/dynamo-autoscale/rule_set.rb +69 -0
  30. data/lib/dynamo-autoscale/table_tracker.rb +329 -0
  31. data/lib/dynamo-autoscale/unit_cost.rb +41 -0
  32. data/lib/dynamo-autoscale/version.rb +3 -0
  33. data/lib/dynamo-autoscale.rb +1 -0
  34. data/rlib/dynamodb_graph.r +15 -0
  35. data/rlib/dynamodb_scatterplot.r +13 -0
  36. data/rulesets/default.rb +5 -0
  37. data/rulesets/erroneous.rb +1 -0
  38. data/rulesets/gradual_tail.rb +11 -0
  39. data/rulesets/none.rb +0 -0
  40. data/script/console +3 -0
  41. data/script/historic_data +46 -0
  42. data/script/hourly_wastage +40 -0
  43. data/script/monitor +55 -0
  44. data/script/simulator +40 -0
  45. data/script/test +52 -0
  46. data/script/validate_ruleset +20 -0
  47. data/spec/actioner_spec.rb +244 -0
  48. data/spec/rule_set_spec.rb +89 -0
  49. data/spec/rule_spec.rb +491 -0
  50. data/spec/spec_helper.rb +4 -0
  51. data/spec/table_tracker_spec.rb +256 -0
  52. metadata +178 -0
@@ -0,0 +1,51 @@
1
+ module DynamoAutoscale
2
+ class LocalDataPoll < Poller
3
+ def initialize *args
4
+ super(*args)
5
+ @cache = Hash.new { |h, k| h[k] = {} }
6
+ end
7
+
8
+ def poll tables, &block
9
+ if tables.nil?
10
+ tables = ["*"]
11
+ end
12
+
13
+ tables.each do |table_name|
14
+ unless @cache[table_name].empty?
15
+ @cache[table_name].each do |day, table_day_data|
16
+ block.call(table_name, table_day_data)
17
+ end
18
+ else
19
+ file = "#{table_name}.json"
20
+
21
+ Dir[File.join(DynamoAutoscale.data_dir, '*')].each do |day_path|
22
+ Dir[File.join(day_path, file)].each do |table_path|
23
+ data = JSON.parse(File.read(table_path)).symbolize_keys
24
+
25
+ if data[:consumed_writes].nil? or data[:consumed_reads].nil?
26
+ logger.warn "Lacking data for table #{table_name}. Skipping."
27
+ next
28
+ end
29
+
30
+ # All this monstrosity below is doing is parsing the time keys in
31
+ # the nested hash from strings into Time objects. Hash mapping
32
+ # semantics are weird, hence why this looks ridiculous.
33
+ data = Hash[data.map do |key, ts|
34
+ [
35
+ key,
36
+ Hash[ts.map do |t, d|
37
+ [Time.parse(t), d]
38
+ end],
39
+ ]
40
+ end]
41
+
42
+ @cache[table_name][day_path] = data
43
+
44
+ block.call(table_name, data)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,15 @@
1
+ module DynamoAutoscale
2
+ module Logger
3
+ def self.logger= new_logger
4
+ @@logger = new_logger
5
+ end
6
+
7
+ def self.logger
8
+ @@logger
9
+ end
10
+
11
+ def logger
12
+ DynamoAutoscale::Logger.logger
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,192 @@
1
+ module DynamoAutoscale
2
+ class Metrics
3
+ extend DynamoAutoscale::Logger
4
+
5
+ DEFAULT_OPTS = {
6
+ namespace: 'AWS/DynamoDB',
7
+ period: 300,
8
+ # metric_name: metric,
9
+ # start_time: (NOW - 3600).iso8601,
10
+ # end_time: NOW.iso8601,
11
+ # dimensions: [{
12
+ # name: "TableName", value: TABLE_NAME,
13
+ # }],
14
+ }
15
+
16
+ # Returns a CloudWatch client object for a given region. If no region
17
+ # exists, the region defaults to whatever is in
18
+ # DynamoAutoscale::DEFAULT_AWS_REGION.
19
+ #
20
+ # CloudWatch client documentation:
21
+ # https://github.com/aws/aws-sdk-ruby/blob/master/lib/aws/cloud_watch/client.rb
22
+ def self.client region = nil
23
+ @client ||= Hash.new do |hash, _region|
24
+ hash[_region] = AWS::CloudWatch.new({
25
+ cloud_watch_endpoint: "monitoring.#{_region}.amazonaws.com",
26
+ }).client
27
+ end
28
+
29
+ @client[region || DEFAULT_AWS_REGION]
30
+ end
31
+
32
+ # Returns a hash of timeseries data. Looks a bit like this:
33
+ #
34
+ # {
35
+ # provisioned_reads: { date => value... },
36
+ # provisioned_writes: { date => value... },
37
+ # consumed_reads: { date => value... },
38
+ # consumed_writes: { date => value... },
39
+ # }
40
+ def self.all_metrics table_name, opts = {}
41
+ data = Hash.new { |h, k| h[k] = {} }
42
+
43
+ pr = provisioned_reads(table_name, opts).sort_by do |datum|
44
+ datum[:timestamp]
45
+ end
46
+
47
+ pr.each do |timeslice|
48
+ data[:provisioned_reads][timeslice[:timestamp]] = timeslice[:average]
49
+ end
50
+
51
+ cr = consumed_reads(table_name, opts).sort_by do |datum|
52
+ datum[:timestamp]
53
+ end
54
+
55
+ cr.each do |timeslice|
56
+ data[:consumed_reads][timeslice[:timestamp]] = timeslice[:sum]
57
+ end
58
+
59
+ pw = provisioned_writes(table_name, opts).sort_by do |datum|
60
+ datum[:timestamp]
61
+ end
62
+
63
+ pw.each do |timeslice|
64
+ data[:provisioned_writes][timeslice[:timestamp]] = timeslice[:average]
65
+ end
66
+
67
+ cw = consumed_writes(table_name, opts).sort_by do |datum|
68
+ datum[:timestamp]
69
+ end
70
+
71
+ cw.each do |timeslice|
72
+ data[:consumed_writes][timeslice[:timestamp]] = timeslice[:sum]
73
+ end
74
+
75
+ data
76
+ end
77
+
78
+ # Returns provisioned througput reads for a table in DynamoDB. Works on
79
+ # moving averages.
80
+ #
81
+ # Example:
82
+ #
83
+ # pp DynamoAutoscale::Metrics.provisioned_reads("table_name")
84
+ # #=> [{:timestamp=>2013-06-18 15:25:00 UTC, :unit=>"Count", :average=>800.0},
85
+ # {:timestamp=>2013-06-18 15:05:00 UTC, :unit=>"Count", :average=>800.0},
86
+ # ...
87
+ # ]
88
+ def self.provisioned_reads table_name, opts = {}
89
+ opts[:metric_name] = "ProvisionedReadCapacityUnits"
90
+ provisioned_metric_statistics(table_name, opts)
91
+ end
92
+
93
+ # Returns provisioned througput writes for a table in DynamoDB. Works on
94
+ # moving averages.
95
+ #
96
+ # Example:
97
+ #
98
+ # pp DynamoAutoscale::Metrics.provisioned_writes("table_name")
99
+ # #=> [{:timestamp=>2013-06-18 15:25:00 UTC, :unit=>"Count", :average=>600.0},
100
+ # {:timestamp=>2013-06-18 15:05:00 UTC, :unit=>"Count", :average=>600.0},
101
+ # ...
102
+ # ]
103
+ def self.provisioned_writes table_name, opts = {}
104
+ opts[:metric_name] = "ProvisionedWriteCapacityUnits"
105
+ provisioned_metric_statistics(table_name, opts)
106
+ end
107
+
108
+ # Returns consumed througput reads for a table in DynamoDB. Works on
109
+ # moving averages.
110
+ #
111
+ # Example:
112
+ #
113
+ # pp DynamoAutoscale::Metrics.consumed_reads("table_name")
114
+ # #=> [{:timestamp=>2013-06-18 15:53:00 UTC,
115
+ # :unit=>"Count",
116
+ # :average=>1.2111202996546189},
117
+ # {:timestamp=>2013-06-18 15:18:00 UTC,
118
+ # :unit=>"Count",
119
+ # :average=>1.5604973943552964},
120
+ # ...
121
+ # ]
122
+ def self.consumed_reads table_name, opts = {}
123
+ opts[:metric_name] = "ConsumedReadCapacityUnits"
124
+ opts[:statistics] = ["Sum"]
125
+ consumed_metric_statistics(table_name, opts)
126
+ end
127
+
128
+ # Returns consumed througput writes for a table in DynamoDB. Works on
129
+ # moving averages.
130
+ #
131
+ # Example:
132
+ #
133
+ # pp DynamoAutoscale::Metrics.consumed_writes("table_name")
134
+ # #=> [{:timestamp=>2013-06-18 15:39:00 UTC,
135
+ # :unit=>"Count",
136
+ # :average=>1.6882725354235755},
137
+ # {:timestamp=>2013-06-18 15:24:00 UTC,
138
+ # :unit=>"Count",
139
+ # :average=>1.7701510393300435},
140
+ # ...
141
+ # ]
142
+ def self.consumed_writes table_name, opts = {}
143
+ opts[:metric_name] = "ConsumedWriteCapacityUnits"
144
+ opts[:statistics] = ["Sum"]
145
+ consumed_metric_statistics(table_name, opts)
146
+ end
147
+
148
+ private
149
+
150
+ # Because there's a difference to how consumed and provisioned statistics
151
+ # are gathered for DynamoDB, the two metrics are not comparable without a
152
+ # little bit of modification.
153
+ #
154
+ # Relevant forum post:
155
+ # https://forums.aws.amazon.com/thread.jspa?threadID=119523
156
+ def self.consumed_metric_statistics table_name, opts = {}
157
+ opts[:statistics] = ["Sum"]
158
+ data = metric_statistics(table_name, opts)
159
+
160
+ data.map do |datum|
161
+ datum[:sum] = datum[:sum] / (opts[:period] || DEFAULT_OPTS[:period])
162
+ datum
163
+ end
164
+ end
165
+
166
+ def self.provisioned_metric_statistics table_name, opts = {}
167
+ opts[:statistics] = ["Average"]
168
+ metric_statistics(table_name, opts)
169
+ end
170
+
171
+ # A base method that gets called by wrapper methods defined above. Makes a
172
+ # call to CloudWatch, getting statistics on whatever metric is given.
173
+ def self.metric_statistics table_name, opts = {}
174
+ region = opts.delete :region
175
+ opts = DEFAULT_OPTS.merge({
176
+ dimensions: [{ name: "TableName", value: table_name }],
177
+ start_time: 1.hour.ago,
178
+ end_time: Time.now,
179
+ }).merge(opts)
180
+
181
+ if opts[:start_time] and opts[:start_time].respond_to? :iso8601
182
+ opts[:start_time] = opts[:start_time].iso8601
183
+ end
184
+
185
+ if opts[:end_time] and opts[:end_time].respond_to? :iso8601
186
+ opts[:end_time] = opts[:end_time].iso8601
187
+ end
188
+
189
+ client(region).get_metric_statistics(opts)[:datapoints]
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,41 @@
1
+ module DynamoAutoscale
2
+ class Poller
3
+ # The poller constructor accepts a hash of options. The following arguments
4
+ # are valid but optional:
5
+ #
6
+ # - :tables - An array of the tables you would like to poll.
7
+ # - :filters - This is primarily for working with local data but there
8
+ # could maybe be a production use for it. Locally, it is used to modify
9
+ # each datum before it gets sent to the dispatcher. It helps fake setting
10
+ # provisioned throughput.
11
+ def initialize opts = {}
12
+ @opts = opts
13
+ end
14
+
15
+ def run &block
16
+ poll(@opts[:tables]) do |table_name, data|
17
+ logger.debug "[poller] Got data: #{data}"
18
+ table = DynamoAutoscale.tables[table_name]
19
+
20
+ times = data.inject([]) do |memo, (_, timeseries)|
21
+ memo += timeseries.keys
22
+ end.sort!.uniq!
23
+
24
+ times.each do |time|
25
+ datum = {
26
+ provisioned_writes: data[:provisioned_writes][time],
27
+ provisioned_reads: data[:provisioned_reads][time],
28
+ consumed_writes: data[:consumed_writes][time],
29
+ consumed_reads: data[:consumed_reads][time],
30
+ }
31
+
32
+ if @opts[:filters]
33
+ @opts[:filters].each { |filter| filter.call(table, time, datum) }
34
+ end
35
+
36
+ DynamoAutoscale.dispatcher.dispatch(table, time, datum, &block)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,27 @@
1
+ module DynamoAutoscale
2
+ class PrettyFormatter
3
+ def call(severity, time, progname, msg)
4
+ table = DynamoAutoscale.current_table.name rescue "no table"
5
+
6
+ "[#{time.utc.to_s.cyan}][#{severity_color(severity)}][#{table.green}] " +
7
+ "#{String === msg ? msg : msg.inspect}\n"
8
+ end
9
+
10
+ def severity_color(severity)
11
+ case severity
12
+ when "DEBUG"
13
+ "#{severity}".blue
14
+ when "INFO"
15
+ "#{severity}".white
16
+ when "WARN"
17
+ "#{severity}".yellow
18
+ when "ERROR"
19
+ "#{severity}".red
20
+ when "FATAL"
21
+ "#{severity}".red
22
+ else
23
+ "#{severity}"
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,180 @@
1
+ module DynamoAutoscale
2
+ class Rule
3
+ attr_accessor :metric, :opts
4
+
5
+ CONDITIONS = {
6
+ greater_than: Proc.new { |a, b| a > b },
7
+ less_than: Proc.new { |a, b| a < b },
8
+ }
9
+
10
+ def initialize metric, opts, &block
11
+ @metric = metric
12
+ @opts = opts
13
+ @block = block
14
+ @count = Hash.new(0)
15
+
16
+ unless [:consumed_reads, :consumed_writes].include? @metric
17
+ raise ArgumentError.new("Invalid metric: #{@metric}. Must be either " +
18
+ ":consumed_reads or :consumed_writes.")
19
+ end
20
+
21
+ if @opts[:greater_than].nil? and @opts[:less_than].nil?
22
+ raise ArgumentError.new("Must specify at least one of greater_than " +
23
+ "or less_than")
24
+ end
25
+
26
+ if @opts[:for].nil? and @opts[:last].nil?
27
+ raise ArgumentError.new("Need to specify at least one of :for and :last.")
28
+ end
29
+
30
+ if @opts[:greater_than] and @opts[:less_than] and
31
+ @opts[:greater_than].to_f >= @opts[:less_than].to_f
32
+ raise ArgumentError.new("greater_than cannot be greater than or " +
33
+ "equal to less_than. Rule will never trigger.")
34
+ end
35
+
36
+ if @opts[:greater_than] and @opts[:greater_than].to_f <= 0
37
+ raise ArgumentError.new("greater_than cannot be less than or equal to 0")
38
+ end
39
+
40
+ if @opts[:less_than] and @opts[:less_than].to_f <= 0
41
+ raise ArgumentError.new("less_than cannot be less than or equal to 0")
42
+ end
43
+
44
+ if @opts[:min] and @opts[:min] <= 0
45
+ raise ArgumentError.new("min cannot be less than or equal to 0")
46
+ end
47
+
48
+ if @opts[:max] and @opts[:max] <= 0
49
+ raise ArgumentError.new("max cannot be less than or equal to 0")
50
+ end
51
+
52
+ if @opts[:count] and @opts[:count] <= 0
53
+ raise ArgumentError.new("count cannot be less than or equal to 0")
54
+ end
55
+
56
+ if @opts[:scale].nil? and block.nil?
57
+ raise ArgumentError.new("no :scale option or block specified. Rule has no action.")
58
+ end
59
+
60
+ if @opts[:scale] and !@opts[:scale].is_a? Hash
61
+ raise ArgumentError.new(":scale option expects to be a hash.")
62
+ end
63
+
64
+ if @opts[:scale] and @opts[:scale][:on].nil? and @opts[:scale][:by].nil?
65
+ raise ArgumentError.new(":scale option expects :on and :by options.")
66
+ end
67
+
68
+ if @opts[:scale] and ![:consumed, :provisioned].include?(@opts[:scale][:on])
69
+ raise ArgumentError.new(":scale { :on } needs to be either :consumed " +
70
+ "or :provisioned")
71
+ end
72
+
73
+ if @opts[:scale] and @opts[:scale][:by] <= 0
74
+ raise ArgumentError.new(":scale { :by } needs to be greater than 0")
75
+ end
76
+ end
77
+
78
+ def test table
79
+ last_provisioned = table.last_provisioned_for(@metric)
80
+
81
+ CONDITIONS.each do |key, comparator|
82
+ if @opts[key]
83
+ value = @opts[key].to_f
84
+
85
+ # Get the value as a percentage of the last amount provisioned for
86
+ # this metric if it is a string that ends with a percent symbol.
87
+ if @opts[key].is_a? String and @opts[key].end_with? "%"
88
+ # If we don't have a provisioned value yet, we have to move along.
89
+ # We don't know what the headroom is and we can't trigger an
90
+ # alarm.
91
+ next if last_provisioned.nil?
92
+
93
+ value = (value / 100.0) * last_provisioned
94
+ end
95
+
96
+ duration = @opts[:for] || @opts[:last]
97
+ data = table.last(duration, @metric)
98
+
99
+ # If a specific number of points have been specified to look at,
100
+ # make sure we have exactly that number of points before continuing.
101
+ if !duration.is_a? ActiveSupport::Duration and data.length != duration
102
+ return false
103
+ end
104
+
105
+ if @opts[:max]
106
+ data = data.take(@opts[:max])
107
+ end
108
+
109
+ if @opts[:min]
110
+ return false unless data.length >= @opts[:min]
111
+ end
112
+
113
+ if data.all? { |datum| comparator.call(datum, value) }
114
+ @count[table.name] += 1
115
+
116
+ if @opts[:times].nil? or @count[table.name] == @opts[:times]
117
+ @count[table.name] = 0
118
+
119
+ if scale = @opts[:scale]
120
+ new_val = table.send("last_#{scale[:on]}_for", @metric) * scale[:by]
121
+ DynamoAutoscale.actioners[table].set(@metric, new_val)
122
+ end
123
+
124
+ if @block
125
+ @block.call(table, self, DynamoAutoscale.actioners[table])
126
+ end
127
+
128
+ return true
129
+ else
130
+ return false
131
+ end
132
+ else
133
+ @count[table.name] = 0
134
+ end
135
+ end
136
+ end
137
+
138
+ false
139
+ end
140
+
141
+ def to_english
142
+ message = "#{@metric} "
143
+ if @opts[:greater_than]
144
+ message << "were greater than " << @opts[:greater_than] << " "
145
+ end
146
+
147
+ if @opts[:less_than]
148
+ message << "and " if @opts[:greater_than]
149
+ message << "were less than " << @opts[:less_than] << " "
150
+ end
151
+
152
+ if @opts[:for] or @opts[:last]
153
+ val = @opts[:for] || @opts[:last]
154
+
155
+ if val.is_a? ActiveSupport::Duration
156
+ message << "for #{val.inspect} "
157
+ else
158
+ message << "for #{val} data points "
159
+ end
160
+ end
161
+
162
+ if @opts[:min]
163
+ message << "with a minimum of #{@opts[:min]} data points "
164
+ end
165
+
166
+ if @opts[:max]
167
+ message << "and " if @opts[:min]
168
+ message << "with a maximum of #{@opts[:max]} data points "
169
+ end
170
+
171
+ message
172
+ end
173
+
174
+ def serialize
175
+ metric = @metric == :consumed_reads ? "reads" : "writes"
176
+
177
+ "#{metric}(#{@opts})"
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,69 @@
1
+ module DynamoAutoscale
2
+ class RuleSet
3
+ attr_accessor :rules
4
+
5
+ def initialize path = nil, &block
6
+ @rules = Hash.new { |h, k| h[k] = [] }
7
+ @current_table = :all
8
+
9
+ if path
10
+ instance_eval(File.read(path))
11
+ elsif block
12
+ instance_eval(&block)
13
+ end
14
+ end
15
+
16
+ def for table_name
17
+ return @rules[:all] if table_name == :all
18
+ @rules[table_name] + @rules[:all]
19
+ end
20
+
21
+ def test table
22
+ self.for(table.name).each do |rule|
23
+ return true if rule.test(table)
24
+ end
25
+ end
26
+
27
+ def table table_name, &block
28
+ @current_table = table_name
29
+ instance_eval(&block)
30
+ @current_table = :all
31
+ end
32
+
33
+ def writes opts, &block
34
+ @rules[@current_table] << Rule.new(:consumed_writes, opts, &block)
35
+ end
36
+
37
+ def reads opts, &block
38
+ @rules[@current_table] << Rule.new(:consumed_reads, opts, &block)
39
+ end
40
+
41
+ def serialize
42
+ @rules.inject("") do |memo, (table_name, rules)|
43
+ memo += "table #{table_name.inspect} do\n"
44
+ rules.each do |rule|
45
+ memo += " #{rule.serialize}\n"
46
+ end
47
+ memo += "end\n"
48
+ end
49
+ end
50
+
51
+ def checksum
52
+ Digest::MD5.hexdigest(self.serialize)
53
+ end
54
+
55
+ def deep_dup
56
+ duplicate = RuleSet.new
57
+ new_rules = Hash.new { |h, k| h[k] = [] }
58
+
59
+ @rules.each do |table_name, rules|
60
+ rules.each do |rule|
61
+ new_rules[table_name] << Rule.new(rule.metric, rule.opts)
62
+ end
63
+ end
64
+
65
+ duplicate.rules = new_rules
66
+ duplicate
67
+ end
68
+ end
69
+ end