dynamo-autoscale 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. data/.gitignore +4 -0
  2. data/Gemfile +13 -0
  3. data/Gemfile.lock +58 -0
  4. data/LICENSE +21 -0
  5. data/README.md +400 -0
  6. data/Rakefile +9 -0
  7. data/aws.sample.yml +16 -0
  8. data/bin/dynamo-autoscale +131 -0
  9. data/config/environment/common.rb +114 -0
  10. data/config/environment/console.rb +2 -0
  11. data/config/environment/test.rb +3 -0
  12. data/config/logger.yml +11 -0
  13. data/config/services/aws.rb +20 -0
  14. data/config/services/logger.rb +35 -0
  15. data/data/.gitkeep +0 -0
  16. data/dynamo-autoscale.gemspec +29 -0
  17. data/lib/dynamo-autoscale/actioner.rb +265 -0
  18. data/lib/dynamo-autoscale/cw_poller.rb +49 -0
  19. data/lib/dynamo-autoscale/dispatcher.rb +39 -0
  20. data/lib/dynamo-autoscale/dynamo_actioner.rb +59 -0
  21. data/lib/dynamo-autoscale/ext/active_support/duration.rb +7 -0
  22. data/lib/dynamo-autoscale/local_actioner.rb +39 -0
  23. data/lib/dynamo-autoscale/local_data_poll.rb +51 -0
  24. data/lib/dynamo-autoscale/logger.rb +15 -0
  25. data/lib/dynamo-autoscale/metrics.rb +192 -0
  26. data/lib/dynamo-autoscale/poller.rb +41 -0
  27. data/lib/dynamo-autoscale/pretty_formatter.rb +27 -0
  28. data/lib/dynamo-autoscale/rule.rb +180 -0
  29. data/lib/dynamo-autoscale/rule_set.rb +69 -0
  30. data/lib/dynamo-autoscale/table_tracker.rb +329 -0
  31. data/lib/dynamo-autoscale/unit_cost.rb +41 -0
  32. data/lib/dynamo-autoscale/version.rb +3 -0
  33. data/lib/dynamo-autoscale.rb +1 -0
  34. data/rlib/dynamodb_graph.r +15 -0
  35. data/rlib/dynamodb_scatterplot.r +13 -0
  36. data/rulesets/default.rb +5 -0
  37. data/rulesets/erroneous.rb +1 -0
  38. data/rulesets/gradual_tail.rb +11 -0
  39. data/rulesets/none.rb +0 -0
  40. data/script/console +3 -0
  41. data/script/historic_data +46 -0
  42. data/script/hourly_wastage +40 -0
  43. data/script/monitor +55 -0
  44. data/script/simulator +40 -0
  45. data/script/test +52 -0
  46. data/script/validate_ruleset +20 -0
  47. data/spec/actioner_spec.rb +244 -0
  48. data/spec/rule_set_spec.rb +89 -0
  49. data/spec/rule_spec.rb +491 -0
  50. data/spec/spec_helper.rb +4 -0
  51. data/spec/table_tracker_spec.rb +256 -0
  52. metadata +178 -0
@@ -0,0 +1,51 @@
1
+ module DynamoAutoscale
2
+ class LocalDataPoll < Poller
3
+ def initialize *args
4
+ super(*args)
5
+ @cache = Hash.new { |h, k| h[k] = {} }
6
+ end
7
+
8
+ def poll tables, &block
9
+ if tables.nil?
10
+ tables = ["*"]
11
+ end
12
+
13
+ tables.each do |table_name|
14
+ unless @cache[table_name].empty?
15
+ @cache[table_name].each do |day, table_day_data|
16
+ block.call(table_name, table_day_data)
17
+ end
18
+ else
19
+ file = "#{table_name}.json"
20
+
21
+ Dir[File.join(DynamoAutoscale.data_dir, '*')].each do |day_path|
22
+ Dir[File.join(day_path, file)].each do |table_path|
23
+ data = JSON.parse(File.read(table_path)).symbolize_keys
24
+
25
+ if data[:consumed_writes].nil? or data[:consumed_reads].nil?
26
+ logger.warn "Lacking data for table #{table_name}. Skipping."
27
+ next
28
+ end
29
+
30
+ # All this monstrosity below is doing is parsing the time keys in
31
+ # the nested hash from strings into Time objects. Hash mapping
32
+ # semantics are weird, hence why this looks ridiculous.
33
+ data = Hash[data.map do |key, ts|
34
+ [
35
+ key,
36
+ Hash[ts.map do |t, d|
37
+ [Time.parse(t), d]
38
+ end],
39
+ ]
40
+ end]
41
+
42
+ @cache[table_name][day_path] = data
43
+
44
+ block.call(table_name, data)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,15 @@
1
+ module DynamoAutoscale
2
+ module Logger
3
+ def self.logger= new_logger
4
+ @@logger = new_logger
5
+ end
6
+
7
+ def self.logger
8
+ @@logger
9
+ end
10
+
11
+ def logger
12
+ DynamoAutoscale::Logger.logger
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,192 @@
1
+ module DynamoAutoscale
2
+ class Metrics
3
+ extend DynamoAutoscale::Logger
4
+
5
+ DEFAULT_OPTS = {
6
+ namespace: 'AWS/DynamoDB',
7
+ period: 300,
8
+ # metric_name: metric,
9
+ # start_time: (NOW - 3600).iso8601,
10
+ # end_time: NOW.iso8601,
11
+ # dimensions: [{
12
+ # name: "TableName", value: TABLE_NAME,
13
+ # }],
14
+ }
15
+
16
+ # Returns a CloudWatch client object for a given region. If no region
17
+ # exists, the region defaults to whatever is in
18
+ # DynamoAutoscale::DEFAULT_AWS_REGION.
19
+ #
20
+ # CloudWatch client documentation:
21
+ # https://github.com/aws/aws-sdk-ruby/blob/master/lib/aws/cloud_watch/client.rb
22
+ def self.client region = nil
23
+ @client ||= Hash.new do |hash, _region|
24
+ hash[_region] = AWS::CloudWatch.new({
25
+ cloud_watch_endpoint: "monitoring.#{_region}.amazonaws.com",
26
+ }).client
27
+ end
28
+
29
+ @client[region || DEFAULT_AWS_REGION]
30
+ end
31
+
32
+ # Returns a hash of timeseries data. Looks a bit like this:
33
+ #
34
+ # {
35
+ # provisioned_reads: { date => value... },
36
+ # provisioned_writes: { date => value... },
37
+ # consumed_reads: { date => value... },
38
+ # consumed_writes: { date => value... },
39
+ # }
40
+ def self.all_metrics table_name, opts = {}
41
+ data = Hash.new { |h, k| h[k] = {} }
42
+
43
+ pr = provisioned_reads(table_name, opts).sort_by do |datum|
44
+ datum[:timestamp]
45
+ end
46
+
47
+ pr.each do |timeslice|
48
+ data[:provisioned_reads][timeslice[:timestamp]] = timeslice[:average]
49
+ end
50
+
51
+ cr = consumed_reads(table_name, opts).sort_by do |datum|
52
+ datum[:timestamp]
53
+ end
54
+
55
+ cr.each do |timeslice|
56
+ data[:consumed_reads][timeslice[:timestamp]] = timeslice[:sum]
57
+ end
58
+
59
+ pw = provisioned_writes(table_name, opts).sort_by do |datum|
60
+ datum[:timestamp]
61
+ end
62
+
63
+ pw.each do |timeslice|
64
+ data[:provisioned_writes][timeslice[:timestamp]] = timeslice[:average]
65
+ end
66
+
67
+ cw = consumed_writes(table_name, opts).sort_by do |datum|
68
+ datum[:timestamp]
69
+ end
70
+
71
+ cw.each do |timeslice|
72
+ data[:consumed_writes][timeslice[:timestamp]] = timeslice[:sum]
73
+ end
74
+
75
+ data
76
+ end
77
+
78
+ # Returns provisioned througput reads for a table in DynamoDB. Works on
79
+ # moving averages.
80
+ #
81
+ # Example:
82
+ #
83
+ # pp DynamoAutoscale::Metrics.provisioned_reads("table_name")
84
+ # #=> [{:timestamp=>2013-06-18 15:25:00 UTC, :unit=>"Count", :average=>800.0},
85
+ # {:timestamp=>2013-06-18 15:05:00 UTC, :unit=>"Count", :average=>800.0},
86
+ # ...
87
+ # ]
88
+ def self.provisioned_reads table_name, opts = {}
89
+ opts[:metric_name] = "ProvisionedReadCapacityUnits"
90
+ provisioned_metric_statistics(table_name, opts)
91
+ end
92
+
93
+ # Returns provisioned througput writes for a table in DynamoDB. Works on
94
+ # moving averages.
95
+ #
96
+ # Example:
97
+ #
98
+ # pp DynamoAutoscale::Metrics.provisioned_writes("table_name")
99
+ # #=> [{:timestamp=>2013-06-18 15:25:00 UTC, :unit=>"Count", :average=>600.0},
100
+ # {:timestamp=>2013-06-18 15:05:00 UTC, :unit=>"Count", :average=>600.0},
101
+ # ...
102
+ # ]
103
+ def self.provisioned_writes table_name, opts = {}
104
+ opts[:metric_name] = "ProvisionedWriteCapacityUnits"
105
+ provisioned_metric_statistics(table_name, opts)
106
+ end
107
+
108
+ # Returns consumed througput reads for a table in DynamoDB. Works on
109
+ # moving averages.
110
+ #
111
+ # Example:
112
+ #
113
+ # pp DynamoAutoscale::Metrics.consumed_reads("table_name")
114
+ # #=> [{:timestamp=>2013-06-18 15:53:00 UTC,
115
+ # :unit=>"Count",
116
+ # :average=>1.2111202996546189},
117
+ # {:timestamp=>2013-06-18 15:18:00 UTC,
118
+ # :unit=>"Count",
119
+ # :average=>1.5604973943552964},
120
+ # ...
121
+ # ]
122
+ def self.consumed_reads table_name, opts = {}
123
+ opts[:metric_name] = "ConsumedReadCapacityUnits"
124
+ opts[:statistics] = ["Sum"]
125
+ consumed_metric_statistics(table_name, opts)
126
+ end
127
+
128
+ # Returns consumed througput writes for a table in DynamoDB. Works on
129
+ # moving averages.
130
+ #
131
+ # Example:
132
+ #
133
+ # pp DynamoAutoscale::Metrics.consumed_writes("table_name")
134
+ # #=> [{:timestamp=>2013-06-18 15:39:00 UTC,
135
+ # :unit=>"Count",
136
+ # :average=>1.6882725354235755},
137
+ # {:timestamp=>2013-06-18 15:24:00 UTC,
138
+ # :unit=>"Count",
139
+ # :average=>1.7701510393300435},
140
+ # ...
141
+ # ]
142
+ def self.consumed_writes table_name, opts = {}
143
+ opts[:metric_name] = "ConsumedWriteCapacityUnits"
144
+ opts[:statistics] = ["Sum"]
145
+ consumed_metric_statistics(table_name, opts)
146
+ end
147
+
148
+ private
149
+
150
+ # Because there's a difference to how consumed and provisioned statistics
151
+ # are gathered for DynamoDB, the two metrics are not comparable without a
152
+ # little bit of modification.
153
+ #
154
+ # Relevant forum post:
155
+ # https://forums.aws.amazon.com/thread.jspa?threadID=119523
156
+ def self.consumed_metric_statistics table_name, opts = {}
157
+ opts[:statistics] = ["Sum"]
158
+ data = metric_statistics(table_name, opts)
159
+
160
+ data.map do |datum|
161
+ datum[:sum] = datum[:sum] / (opts[:period] || DEFAULT_OPTS[:period])
162
+ datum
163
+ end
164
+ end
165
+
166
+ def self.provisioned_metric_statistics table_name, opts = {}
167
+ opts[:statistics] = ["Average"]
168
+ metric_statistics(table_name, opts)
169
+ end
170
+
171
+ # A base method that gets called by wrapper methods defined above. Makes a
172
+ # call to CloudWatch, getting statistics on whatever metric is given.
173
+ def self.metric_statistics table_name, opts = {}
174
+ region = opts.delete :region
175
+ opts = DEFAULT_OPTS.merge({
176
+ dimensions: [{ name: "TableName", value: table_name }],
177
+ start_time: 1.hour.ago,
178
+ end_time: Time.now,
179
+ }).merge(opts)
180
+
181
+ if opts[:start_time] and opts[:start_time].respond_to? :iso8601
182
+ opts[:start_time] = opts[:start_time].iso8601
183
+ end
184
+
185
+ if opts[:end_time] and opts[:end_time].respond_to? :iso8601
186
+ opts[:end_time] = opts[:end_time].iso8601
187
+ end
188
+
189
+ client(region).get_metric_statistics(opts)[:datapoints]
190
+ end
191
+ end
192
+ end
@@ -0,0 +1,41 @@
1
+ module DynamoAutoscale
2
+ class Poller
3
+ # The poller constructor accepts a hash of options. The following arguments
4
+ # are valid but optional:
5
+ #
6
+ # - :tables - An array of the tables you would like to poll.
7
+ # - :filters - This is primarily for working with local data but there
8
+ # could maybe be a production use for it. Locally, it is used to modify
9
+ # each datum before it gets sent to the dispatcher. It helps fake setting
10
+ # provisioned throughput.
11
+ def initialize opts = {}
12
+ @opts = opts
13
+ end
14
+
15
+ def run &block
16
+ poll(@opts[:tables]) do |table_name, data|
17
+ logger.debug "[poller] Got data: #{data}"
18
+ table = DynamoAutoscale.tables[table_name]
19
+
20
+ times = data.inject([]) do |memo, (_, timeseries)|
21
+ memo += timeseries.keys
22
+ end.sort!.uniq!
23
+
24
+ times.each do |time|
25
+ datum = {
26
+ provisioned_writes: data[:provisioned_writes][time],
27
+ provisioned_reads: data[:provisioned_reads][time],
28
+ consumed_writes: data[:consumed_writes][time],
29
+ consumed_reads: data[:consumed_reads][time],
30
+ }
31
+
32
+ if @opts[:filters]
33
+ @opts[:filters].each { |filter| filter.call(table, time, datum) }
34
+ end
35
+
36
+ DynamoAutoscale.dispatcher.dispatch(table, time, datum, &block)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,27 @@
1
+ module DynamoAutoscale
2
+ class PrettyFormatter
3
+ def call(severity, time, progname, msg)
4
+ table = DynamoAutoscale.current_table.name rescue "no table"
5
+
6
+ "[#{time.utc.to_s.cyan}][#{severity_color(severity)}][#{table.green}] " +
7
+ "#{String === msg ? msg : msg.inspect}\n"
8
+ end
9
+
10
+ def severity_color(severity)
11
+ case severity
12
+ when "DEBUG"
13
+ "#{severity}".blue
14
+ when "INFO"
15
+ "#{severity}".white
16
+ when "WARN"
17
+ "#{severity}".yellow
18
+ when "ERROR"
19
+ "#{severity}".red
20
+ when "FATAL"
21
+ "#{severity}".red
22
+ else
23
+ "#{severity}"
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,180 @@
1
+ module DynamoAutoscale
2
+ class Rule
3
+ attr_accessor :metric, :opts
4
+
5
+ CONDITIONS = {
6
+ greater_than: Proc.new { |a, b| a > b },
7
+ less_than: Proc.new { |a, b| a < b },
8
+ }
9
+
10
+ def initialize metric, opts, &block
11
+ @metric = metric
12
+ @opts = opts
13
+ @block = block
14
+ @count = Hash.new(0)
15
+
16
+ unless [:consumed_reads, :consumed_writes].include? @metric
17
+ raise ArgumentError.new("Invalid metric: #{@metric}. Must be either " +
18
+ ":consumed_reads or :consumed_writes.")
19
+ end
20
+
21
+ if @opts[:greater_than].nil? and @opts[:less_than].nil?
22
+ raise ArgumentError.new("Must specify at least one of greater_than " +
23
+ "or less_than")
24
+ end
25
+
26
+ if @opts[:for].nil? and @opts[:last].nil?
27
+ raise ArgumentError.new("Need to specify at least one of :for and :last.")
28
+ end
29
+
30
+ if @opts[:greater_than] and @opts[:less_than] and
31
+ @opts[:greater_than].to_f >= @opts[:less_than].to_f
32
+ raise ArgumentError.new("greater_than cannot be greater than or " +
33
+ "equal to less_than. Rule will never trigger.")
34
+ end
35
+
36
+ if @opts[:greater_than] and @opts[:greater_than].to_f <= 0
37
+ raise ArgumentError.new("greater_than cannot be less than or equal to 0")
38
+ end
39
+
40
+ if @opts[:less_than] and @opts[:less_than].to_f <= 0
41
+ raise ArgumentError.new("less_than cannot be less than or equal to 0")
42
+ end
43
+
44
+ if @opts[:min] and @opts[:min] <= 0
45
+ raise ArgumentError.new("min cannot be less than or equal to 0")
46
+ end
47
+
48
+ if @opts[:max] and @opts[:max] <= 0
49
+ raise ArgumentError.new("max cannot be less than or equal to 0")
50
+ end
51
+
52
+ if @opts[:count] and @opts[:count] <= 0
53
+ raise ArgumentError.new("count cannot be less than or equal to 0")
54
+ end
55
+
56
+ if @opts[:scale].nil? and block.nil?
57
+ raise ArgumentError.new("no :scale option or block specified. Rule has no action.")
58
+ end
59
+
60
+ if @opts[:scale] and !@opts[:scale].is_a? Hash
61
+ raise ArgumentError.new(":scale option expects to be a hash.")
62
+ end
63
+
64
+ if @opts[:scale] and @opts[:scale][:on].nil? and @opts[:scale][:by].nil?
65
+ raise ArgumentError.new(":scale option expects :on and :by options.")
66
+ end
67
+
68
+ if @opts[:scale] and ![:consumed, :provisioned].include?(@opts[:scale][:on])
69
+ raise ArgumentError.new(":scale { :on } needs to be either :consumed " +
70
+ "or :provisioned")
71
+ end
72
+
73
+ if @opts[:scale] and @opts[:scale][:by] <= 0
74
+ raise ArgumentError.new(":scale { :by } needs to be greater than 0")
75
+ end
76
+ end
77
+
78
+ def test table
79
+ last_provisioned = table.last_provisioned_for(@metric)
80
+
81
+ CONDITIONS.each do |key, comparator|
82
+ if @opts[key]
83
+ value = @opts[key].to_f
84
+
85
+ # Get the value as a percentage of the last amount provisioned for
86
+ # this metric if it is a string that ends with a percent symbol.
87
+ if @opts[key].is_a? String and @opts[key].end_with? "%"
88
+ # If we don't have a provisioned value yet, we have to move along.
89
+ # We don't know what the headroom is and we can't trigger an
90
+ # alarm.
91
+ next if last_provisioned.nil?
92
+
93
+ value = (value / 100.0) * last_provisioned
94
+ end
95
+
96
+ duration = @opts[:for] || @opts[:last]
97
+ data = table.last(duration, @metric)
98
+
99
+ # If a specific number of points have been specified to look at,
100
+ # make sure we have exactly that number of points before continuing.
101
+ if !duration.is_a? ActiveSupport::Duration and data.length != duration
102
+ return false
103
+ end
104
+
105
+ if @opts[:max]
106
+ data = data.take(@opts[:max])
107
+ end
108
+
109
+ if @opts[:min]
110
+ return false unless data.length >= @opts[:min]
111
+ end
112
+
113
+ if data.all? { |datum| comparator.call(datum, value) }
114
+ @count[table.name] += 1
115
+
116
+ if @opts[:times].nil? or @count[table.name] == @opts[:times]
117
+ @count[table.name] = 0
118
+
119
+ if scale = @opts[:scale]
120
+ new_val = table.send("last_#{scale[:on]}_for", @metric) * scale[:by]
121
+ DynamoAutoscale.actioners[table].set(@metric, new_val)
122
+ end
123
+
124
+ if @block
125
+ @block.call(table, self, DynamoAutoscale.actioners[table])
126
+ end
127
+
128
+ return true
129
+ else
130
+ return false
131
+ end
132
+ else
133
+ @count[table.name] = 0
134
+ end
135
+ end
136
+ end
137
+
138
+ false
139
+ end
140
+
141
+ def to_english
142
+ message = "#{@metric} "
143
+ if @opts[:greater_than]
144
+ message << "were greater than " << @opts[:greater_than] << " "
145
+ end
146
+
147
+ if @opts[:less_than]
148
+ message << "and " if @opts[:greater_than]
149
+ message << "were less than " << @opts[:less_than] << " "
150
+ end
151
+
152
+ if @opts[:for] or @opts[:last]
153
+ val = @opts[:for] || @opts[:last]
154
+
155
+ if val.is_a? ActiveSupport::Duration
156
+ message << "for #{val.inspect} "
157
+ else
158
+ message << "for #{val} data points "
159
+ end
160
+ end
161
+
162
+ if @opts[:min]
163
+ message << "with a minimum of #{@opts[:min]} data points "
164
+ end
165
+
166
+ if @opts[:max]
167
+ message << "and " if @opts[:min]
168
+ message << "with a maximum of #{@opts[:max]} data points "
169
+ end
170
+
171
+ message
172
+ end
173
+
174
+ def serialize
175
+ metric = @metric == :consumed_reads ? "reads" : "writes"
176
+
177
+ "#{metric}(#{@opts})"
178
+ end
179
+ end
180
+ end
@@ -0,0 +1,69 @@
1
+ module DynamoAutoscale
2
+ class RuleSet
3
+ attr_accessor :rules
4
+
5
+ def initialize path = nil, &block
6
+ @rules = Hash.new { |h, k| h[k] = [] }
7
+ @current_table = :all
8
+
9
+ if path
10
+ instance_eval(File.read(path))
11
+ elsif block
12
+ instance_eval(&block)
13
+ end
14
+ end
15
+
16
+ def for table_name
17
+ return @rules[:all] if table_name == :all
18
+ @rules[table_name] + @rules[:all]
19
+ end
20
+
21
+ def test table
22
+ self.for(table.name).each do |rule|
23
+ return true if rule.test(table)
24
+ end
25
+ end
26
+
27
+ def table table_name, &block
28
+ @current_table = table_name
29
+ instance_eval(&block)
30
+ @current_table = :all
31
+ end
32
+
33
+ def writes opts, &block
34
+ @rules[@current_table] << Rule.new(:consumed_writes, opts, &block)
35
+ end
36
+
37
+ def reads opts, &block
38
+ @rules[@current_table] << Rule.new(:consumed_reads, opts, &block)
39
+ end
40
+
41
+ def serialize
42
+ @rules.inject("") do |memo, (table_name, rules)|
43
+ memo += "table #{table_name.inspect} do\n"
44
+ rules.each do |rule|
45
+ memo += " #{rule.serialize}\n"
46
+ end
47
+ memo += "end\n"
48
+ end
49
+ end
50
+
51
+ def checksum
52
+ Digest::MD5.hexdigest(self.serialize)
53
+ end
54
+
55
+ def deep_dup
56
+ duplicate = RuleSet.new
57
+ new_rules = Hash.new { |h, k| h[k] = [] }
58
+
59
+ @rules.each do |table_name, rules|
60
+ rules.each do |rule|
61
+ new_rules[table_name] << Rule.new(rule.metric, rule.opts)
62
+ end
63
+ end
64
+
65
+ duplicate.rules = new_rules
66
+ duplicate
67
+ end
68
+ end
69
+ end