dynamo-autoscale 0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/Gemfile +13 -0
- data/Gemfile.lock +58 -0
- data/LICENSE +21 -0
- data/README.md +400 -0
- data/Rakefile +9 -0
- data/aws.sample.yml +16 -0
- data/bin/dynamo-autoscale +131 -0
- data/config/environment/common.rb +114 -0
- data/config/environment/console.rb +2 -0
- data/config/environment/test.rb +3 -0
- data/config/logger.yml +11 -0
- data/config/services/aws.rb +20 -0
- data/config/services/logger.rb +35 -0
- data/data/.gitkeep +0 -0
- data/dynamo-autoscale.gemspec +29 -0
- data/lib/dynamo-autoscale/actioner.rb +265 -0
- data/lib/dynamo-autoscale/cw_poller.rb +49 -0
- data/lib/dynamo-autoscale/dispatcher.rb +39 -0
- data/lib/dynamo-autoscale/dynamo_actioner.rb +59 -0
- data/lib/dynamo-autoscale/ext/active_support/duration.rb +7 -0
- data/lib/dynamo-autoscale/local_actioner.rb +39 -0
- data/lib/dynamo-autoscale/local_data_poll.rb +51 -0
- data/lib/dynamo-autoscale/logger.rb +15 -0
- data/lib/dynamo-autoscale/metrics.rb +192 -0
- data/lib/dynamo-autoscale/poller.rb +41 -0
- data/lib/dynamo-autoscale/pretty_formatter.rb +27 -0
- data/lib/dynamo-autoscale/rule.rb +180 -0
- data/lib/dynamo-autoscale/rule_set.rb +69 -0
- data/lib/dynamo-autoscale/table_tracker.rb +329 -0
- data/lib/dynamo-autoscale/unit_cost.rb +41 -0
- data/lib/dynamo-autoscale/version.rb +3 -0
- data/lib/dynamo-autoscale.rb +1 -0
- data/rlib/dynamodb_graph.r +15 -0
- data/rlib/dynamodb_scatterplot.r +13 -0
- data/rulesets/default.rb +5 -0
- data/rulesets/erroneous.rb +1 -0
- data/rulesets/gradual_tail.rb +11 -0
- data/rulesets/none.rb +0 -0
- data/script/console +3 -0
- data/script/historic_data +46 -0
- data/script/hourly_wastage +40 -0
- data/script/monitor +55 -0
- data/script/simulator +40 -0
- data/script/test +52 -0
- data/script/validate_ruleset +20 -0
- data/spec/actioner_spec.rb +244 -0
- data/spec/rule_set_spec.rb +89 -0
- data/spec/rule_spec.rb +491 -0
- data/spec/spec_helper.rb +4 -0
- data/spec/table_tracker_spec.rb +256 -0
- metadata +178 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
dynamo-autoscale (0.1)
|
5
|
+
aws-sdk
|
6
|
+
colored
|
7
|
+
rbtree
|
8
|
+
ruby-prof
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
activesupport (3.2.13)
|
14
|
+
i18n (= 0.6.1)
|
15
|
+
multi_json (~> 1.0)
|
16
|
+
aws-sdk (1.11.2)
|
17
|
+
json (~> 1.4)
|
18
|
+
nokogiri (< 1.6.0)
|
19
|
+
uuidtools (~> 2.1)
|
20
|
+
bond (0.4.3)
|
21
|
+
coderay (1.0.9)
|
22
|
+
colored (1.2)
|
23
|
+
diff-lcs (1.2.4)
|
24
|
+
i18n (0.6.1)
|
25
|
+
json (1.8.0)
|
26
|
+
method_source (0.8.1)
|
27
|
+
multi_json (1.7.6)
|
28
|
+
nokogiri (1.5.10)
|
29
|
+
pry (0.9.12.2)
|
30
|
+
coderay (~> 1.0.5)
|
31
|
+
method_source (~> 0.8)
|
32
|
+
slop (~> 3.4)
|
33
|
+
rbtree (0.4.1)
|
34
|
+
ripl (0.7.0)
|
35
|
+
bond (~> 0.4.2)
|
36
|
+
rspec (2.13.0)
|
37
|
+
rspec-core (~> 2.13.0)
|
38
|
+
rspec-expectations (~> 2.13.0)
|
39
|
+
rspec-mocks (~> 2.13.0)
|
40
|
+
rspec-core (2.13.1)
|
41
|
+
rspec-expectations (2.13.0)
|
42
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
43
|
+
rspec-mocks (2.13.1)
|
44
|
+
ruby-prof (0.13.0)
|
45
|
+
slop (3.4.5)
|
46
|
+
timecop (0.6.1)
|
47
|
+
uuidtools (2.1.4)
|
48
|
+
|
49
|
+
PLATFORMS
|
50
|
+
ruby
|
51
|
+
|
52
|
+
DEPENDENCIES
|
53
|
+
activesupport
|
54
|
+
dynamo-autoscale!
|
55
|
+
pry
|
56
|
+
ripl
|
57
|
+
rspec
|
58
|
+
timecop
|
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 InvisibleHand Software Ltd
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,400 @@
|
|
1
|
+
# DynamoDB Autoscaling
|
2
|
+
|
3
|
+
**IMPORTANT**: It's highly recommended that you read this README before
|
4
|
+
continuing. This project, if used incorrectly, has a lot of potential to cost
|
5
|
+
you huge amounts of money. Proceeding with caution is paramount, as we cannot be
|
6
|
+
held responsible for misuse that leads to excessive cost on your part.
|
7
|
+
|
8
|
+
There are tools and flags in place that will allow you to dry-run the project
|
9
|
+
before actually allowing it to change your provisioned throughputs and it is
|
10
|
+
highly recommended that you first try running the project as a dry-run and
|
11
|
+
inspecting the log output to make sure it is doing what you expect.
|
12
|
+
|
13
|
+
It is also worth noting that this project is very much in its infancy.
|
14
|
+
|
15
|
+
You have been warned.
|
16
|
+
|
17
|
+
## Rules of the game
|
18
|
+
|
19
|
+
Welcome to the delightful mini-game that is DynamoDB provisioned throughputs.
|
20
|
+
Here are the rules of the game:
|
21
|
+
|
22
|
+
- In a single API call, you can only change your throughput by up to 100% in
|
23
|
+
either direction. In other words, you can decrease as much as you want but
|
24
|
+
you can only increase to up to double what the current throughput is.
|
25
|
+
|
26
|
+
- You may scale up as many times per day as you like, however you may only
|
27
|
+
scale down 4 times per day per table. (If you scale both reads and writes
|
28
|
+
down in the same request, that only counts as 1 downscale used)
|
29
|
+
|
30
|
+
- Scaling is not an instantaneous event. It can take up to 5 minutes for a
|
31
|
+
table's throughput to be updated.
|
32
|
+
|
33
|
+
- Small spikes over your threshold are tolerated but the exact amount of time
|
34
|
+
they are tolerated for seems to vary.
|
35
|
+
|
36
|
+
This project aims to take all of this into consideration and automatically scale
|
37
|
+
your throughputs to enable you to deal with spikes and save money where
|
38
|
+
possible.
|
39
|
+
|
40
|
+
# Configuration
|
41
|
+
|
42
|
+
This library requires AWS keys that have access to both CloudWatch and DynamoDB,
|
43
|
+
for retriving data and sending scaling requests. Using IAM, create a new user, and
|
44
|
+
assign the 'CloudWatch Read Only Access' policy template. In addition, you will
|
45
|
+
need to use the Policy Generator to add at least the following Amazon DynamoDB actions:
|
46
|
+
|
47
|
+
- "dynamodb:DescribeTable"
|
48
|
+
- "dynamodb:ListTables"
|
49
|
+
- "dynamodb:UpdateTable"
|
50
|
+
|
51
|
+
The ARN for the custom policy can be specified as '\*' to allow access to all tables,
|
52
|
+
or alternatively you can refer to the IAM documentation to limit access to specific
|
53
|
+
tables only.
|
54
|
+
|
55
|
+
The project will look for a YAML file in the following locations on start up:
|
56
|
+
|
57
|
+
- ./aws.yml
|
58
|
+
- ENV['AWS_CONFIG']
|
59
|
+
|
60
|
+
If it doesn't find an AWS YAML config in any of those locations, the process
|
61
|
+
prints an error and exits.
|
62
|
+
|
63
|
+
**A sample config can be found in the project root directory.**
|
64
|
+
|
65
|
+
# Usage
|
66
|
+
|
67
|
+
First of all, you'll need to install this project as a gem:
|
68
|
+
|
69
|
+
$ gem install dynamo-autoscale
|
70
|
+
|
71
|
+
This will give you access to the `dynamo-autoscale` executable. For some
|
72
|
+
internal documentation on the executable, you can run:
|
73
|
+
|
74
|
+
$ dynamo-autoscale -h
|
75
|
+
|
76
|
+
This should tell you what flags you can set and what arguments the command
|
77
|
+
expects.
|
78
|
+
|
79
|
+
## Logging
|
80
|
+
|
81
|
+
By default, not a whole lot will be logged at first. If you want to be sure that
|
82
|
+
the gem is working and doing things, you can run with the `DEBUG` environment
|
83
|
+
variable set to `true`:
|
84
|
+
|
85
|
+
$ DEBUG=true dynamo-autoscale <args...>
|
86
|
+
|
87
|
+
Also, if you want pretty coloured logging, you can set the `PRETTY_LOG`
|
88
|
+
environment variable to `true`:
|
89
|
+
|
90
|
+
$ PRETTY_LOG=true DEBUG=true dynamo-autoscale <args...>
|
91
|
+
|
92
|
+
## Rulesets
|
93
|
+
|
94
|
+
One of the first things you'll notice upon looking into the `--help` on the
|
95
|
+
executable is that it's looking for a "rule set". What on earth is a rule set?
|
96
|
+
|
97
|
+
A rule set is the primary user input for dynamo-autoscale. It is a DSL for
|
98
|
+
specifying when to increase and decrease your provisioned throughputs. Here is a
|
99
|
+
very basic rule set:
|
100
|
+
|
101
|
+
``` ruby
|
102
|
+
reads last: 1, greater_than: "90%", scale: { on: :consumed, by: 2 }
|
103
|
+
writes last: 1, greater_than: "90%", scale: { on: :consumed, by: 2 }
|
104
|
+
|
105
|
+
reads for: 2.hours, less_than: "50%", min: 2, scale: { on: :consumed, by: 2 }
|
106
|
+
writes for: 2.hours, less_than: "50%", min: 2, scale: { on: :consumed, by: 2 }
|
107
|
+
```
|
108
|
+
|
109
|
+
You would put this ruleset in a file and then pass that file in as the first
|
110
|
+
argument to `dynamo-autoscale` on the command line.
|
111
|
+
|
112
|
+
The first two rules are designed to deal with spikes. They are saying that if
|
113
|
+
the consumed capacity units is greater than 90% of the provisioned throughput
|
114
|
+
for a single data point, scale the provisioned throughput up by the last
|
115
|
+
consumed units multiplied by two.
|
116
|
+
|
117
|
+
For example, if we had a provisioned reads of 100 and a consumed units of
|
118
|
+
95 comes through, that will trigger that rule and the table will be scaled up to
|
119
|
+
have a provisioned reads of 190.
|
120
|
+
|
121
|
+
The last two rules are controlling downscaling. Because downscaling can only
|
122
|
+
happen 4 times per day per table, the rules are far less aggressive. Those rules
|
123
|
+
are saying: if the consumed capacity is less than 50% of the provisioned for a
|
124
|
+
whole two hours, with a minimum of 2 data points, scale the provisioned
|
125
|
+
throughput to the consumed units multiplied by 2.
|
126
|
+
|
127
|
+
### The :last and :for options
|
128
|
+
|
129
|
+
These options declare how many points or what time range you want to examine.
|
130
|
+
They're aliases of each other and if you specify both, one will be ignored. If
|
131
|
+
you don't specify a `:min` or `:max` option, they will just get as many points
|
132
|
+
as they can and evaluate the rest of the rule even if they don't get a full 2
|
133
|
+
hours of data, or a full 6 points of data. This only affects the start of the
|
134
|
+
process's lifetime, eventually it will have enough data to always get the full
|
135
|
+
range of points you're asking for.
|
136
|
+
|
137
|
+
### The :min and :max options
|
138
|
+
|
139
|
+
If you're not keen on asking for 2 hours of data and not receiving the full
|
140
|
+
range before evaluating the rest of the rule, you can specify a minimum or
|
141
|
+
maximum number of points to evaluate. Currently, this only supports a numeric
|
142
|
+
value. So you can ask for at least 20 points to be present like so:
|
143
|
+
|
144
|
+
``` ruby
|
145
|
+
reads for: 2.hours, less_than: "50%", min: 20, scale: { on: :consumed, by: 2 }
|
146
|
+
```
|
147
|
+
|
148
|
+
### The :greater_than and :less_than options
|
149
|
+
|
150
|
+
You must specify at least one of these options for the rule to actually validate
|
151
|
+
without throwing an error. Having neither makes no sense.
|
152
|
+
|
153
|
+
You can specify either an absolute value or a percentage specified as a string.
|
154
|
+
The percentage will calculate the percentage consumed against the amount
|
155
|
+
provisioned.
|
156
|
+
|
157
|
+
Examples:
|
158
|
+
|
159
|
+
``` ruby
|
160
|
+
reads for: 2.hours, less_than: 10, scale: { on: :consumed, by: 2 }
|
161
|
+
|
162
|
+
reads for: 2, less_than: "20%", scale: { on: :consumed, by: 2 }
|
163
|
+
```
|
164
|
+
|
165
|
+
### The :scale option
|
166
|
+
|
167
|
+
The `:scale` option is a way of doing a simple change to the provisioned
|
168
|
+
throughput without having to specify repetitive stuff in a block. `:scale`
|
169
|
+
expects to be a hash and it expects to have two keys in the hash: `:on` and
|
170
|
+
`:by`.
|
171
|
+
|
172
|
+
`:on` specifies what part of the metric you want to scale on. It can either by
|
173
|
+
`:provisioned` or `:consumed`. In most cases, `:consumed` makes a lot more sense
|
174
|
+
than `:provisioned`.
|
175
|
+
|
176
|
+
`:by` specifies the scale factor. If you want to double the provisioned capacity
|
177
|
+
when a rule triggers, you would write something like this:
|
178
|
+
|
179
|
+
``` ruby
|
180
|
+
reads for: 2.hours, less_than: "30%", scale: { on: :provisioned, by: 0.5 }
|
181
|
+
```
|
182
|
+
|
183
|
+
And that would half the provisioned throughput for reads if the consumed is
|
184
|
+
less than 30% of the provisioned for 2 hours.
|
185
|
+
|
186
|
+
### Passing a block
|
187
|
+
|
188
|
+
If you want to do something a little bit more complicated with your rules, you
|
189
|
+
can pass a block to them. The block will get passed three things: the table the
|
190
|
+
rule was triggered for, the rule object that triggered and the actioner for that
|
191
|
+
table.
|
192
|
+
|
193
|
+
An actioner is an abstraction of communication with Dynamo and it allows
|
194
|
+
communication to be faked if you want to do a dry run. It exposes a very simple
|
195
|
+
interface. Here's an example:
|
196
|
+
|
197
|
+
``` ruby
|
198
|
+
writes for: 2.hours, greater_than: 200 do |table, rule, actioner|
|
199
|
+
actioner.set(:writes, 300)
|
200
|
+
end
|
201
|
+
```
|
202
|
+
|
203
|
+
This rule will set the provisioned write throughput to 300 if the consumed
|
204
|
+
writes are greater than 200 for 2 hours. The actioner handles a tonne of things
|
205
|
+
under the hood, such as making sure you don't scale up more than you're allowed
|
206
|
+
to in a single call and making sure you don't try to change a table when it's in
|
207
|
+
the updating state.
|
208
|
+
|
209
|
+
It also handles the grouping of downscales, which we will talk about in a later
|
210
|
+
section of the README.
|
211
|
+
|
212
|
+
The `table` argument is a `TableTracker` object. For a run down of what
|
213
|
+
information is available to you I advise checking out the source code in
|
214
|
+
`lib/dynamo-autoscale/table_tracker.rb`.
|
215
|
+
|
216
|
+
### The :count option
|
217
|
+
|
218
|
+
The `:count` option allows you to specify that a rule must be triggered a set
|
219
|
+
number of times in a row before its action is executed.
|
220
|
+
|
221
|
+
Example:
|
222
|
+
|
223
|
+
``` ruby
|
224
|
+
writes for: 10.minutes, greater_than: "90%", count: 3, scale: { on: :consumed, by: 1.5 }
|
225
|
+
```
|
226
|
+
|
227
|
+
This says that is writes are greater than 90% for 10 minutes three checks in a
|
228
|
+
row, scale by the amount consumed multiplied by 1.5. A new check will only
|
229
|
+
happen when the table receives new data from cloud watch, which means that the
|
230
|
+
10 minute windows could potentially overlap.
|
231
|
+
|
232
|
+
## Downscale grouping
|
233
|
+
|
234
|
+
You can downscale reads or writes individually and this will cost you one of
|
235
|
+
your four downscales for the current day. Or, you can downscale reads and writes
|
236
|
+
at the same time and this also costs you one of your four. (Reference:
|
237
|
+
http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html)
|
238
|
+
|
239
|
+
Because of this, the actioner can handle the grouping up of downscales. Let's
|
240
|
+
say you passed in the following options in at the command line:
|
241
|
+
|
242
|
+
$ dynamo-autoscale some/ruleset.rb some_table --group-downscales --flush-after 300
|
243
|
+
|
244
|
+
What this is saying is that if a write downscale came in, the actioner wouldn't
|
245
|
+
fire it off immediately. It would wait 300 seconds, or 5 minutes, to see if a
|
246
|
+
corresponding read downscale was triggered and would run them both at the same
|
247
|
+
time. If no corresponding read came in, after 5 minutes the pending write
|
248
|
+
downscale would get "flushed" and applied without a read downscale.
|
249
|
+
|
250
|
+
This technique helps to save downscales on tables that may have unpredictable
|
251
|
+
consumption. You may need to tweak the `--flush-after` value to match your own
|
252
|
+
situation. By default, there is no `--flush-after` and downscales will wait
|
253
|
+
indefinitely, this may not be desirable.
|
254
|
+
|
255
|
+
## Signaling
|
256
|
+
|
257
|
+
The `dynamo-autoscale` process responds to the SIGUSR1 and SIGUSR2 signals. What
|
258
|
+
we've done may be a dramatic bastardisation of what signals are intended for or
|
259
|
+
how they work, but here's what each does.
|
260
|
+
|
261
|
+
### USR1
|
262
|
+
|
263
|
+
If you send SIGUSR1 to the process as it's running, the process will dump all of
|
264
|
+
the data it has collected on all of the tables it is collecting for into CSV
|
265
|
+
files in the directory it was run in.
|
266
|
+
|
267
|
+
Example:
|
268
|
+
|
269
|
+
$ dynamo-autoscale some/ruleset.rb some_table
|
270
|
+
# Runs as PID 1234. Wait for some time to pass...
|
271
|
+
$ kill -USR1 1234
|
272
|
+
$ cat some_table.csv
|
273
|
+
|
274
|
+
The CSV is in the following format:
|
275
|
+
|
276
|
+
time,provisioned_reads,provisioned_writes,consumed_reads,consumed_writes
|
277
|
+
2013-07-02T10:48:00Z,800.0,600.0,390.93666666666667,30.54
|
278
|
+
2013-07-02T10:49:00Z,800.0,600.0,390.93666666666667,30.54
|
279
|
+
2013-07-02T10:53:00Z,800.0,600.0,386.4533333333333,95.26666666666667
|
280
|
+
2013-07-02T10:54:00Z,800.0,600.0,386.4533333333333,95.26666666666667
|
281
|
+
2013-07-02T10:58:00Z,800.0,600.0,110.275,25.406666666666666
|
282
|
+
2013-07-02T10:59:00Z,800.0,600.0,246.12,54.92
|
283
|
+
|
284
|
+
### USR2
|
285
|
+
|
286
|
+
If you send SIGUSR2 to the process as it's running, the process will take all of
|
287
|
+
the data it has on all of its tables and generate a graph for each table using R
|
288
|
+
(see the Graphs section below). This is handy for visualising what the process
|
289
|
+
is doing, especially after doing a few hours of a `--dry-run`.
|
290
|
+
|
291
|
+
# Developers / Tooling
|
292
|
+
|
293
|
+
Everything below this part of the README is intended for people that want to
|
294
|
+
work on the dynamo-autoscale codebase or use the internal tools that we use for
|
295
|
+
testing new rulesets.
|
296
|
+
|
297
|
+
## Technical details
|
298
|
+
|
299
|
+
The code has a set number of moving parts that are globally available and must
|
300
|
+
implement certain interfaces (for exact details, you would need to study the
|
301
|
+
code):
|
302
|
+
|
303
|
+
- `DynamoAutoscale.poller`: This component is responsible for pulling data
|
304
|
+
from a data source (CloudWatch or Local at the moment) and piping it into
|
305
|
+
the next stage in the pipeline.
|
306
|
+
|
307
|
+
- `DynamoAutoscale.dispatcher`: The dispatcher takes data from the poller and
|
308
|
+
populates a hash table of `TableTracker` objects, as well as checking to see
|
309
|
+
if any of the tables have triggered any rules.
|
310
|
+
|
311
|
+
- `DynamoAutoscale.rules`: The ruleset contains an array of `Rule` objects
|
312
|
+
inside a hash table keyed by table name. The ruleset initializer takes a
|
313
|
+
file path as an argument, or a block, either of these needs to contain a set
|
314
|
+
of rules (examples can be found in the `rulesets/` directory).
|
315
|
+
|
316
|
+
- `DynamoAutoscale.actioners`: The actioners are what perform provision scaling.
|
317
|
+
Locally this is faked, in production it makes API calls to DynamoDB.
|
318
|
+
|
319
|
+
- `DynamoAutoscale.tables`: This is a hash table of `TableTracker` objects,
|
320
|
+
keyed on the table name.
|
321
|
+
|
322
|
+
All of these components are globally available because most of them need access
|
323
|
+
to each other and it was a pain to pass instances of them around to everybody
|
324
|
+
that needed them.
|
325
|
+
|
326
|
+
They're also completely swappable. As long as they implement the right methods
|
327
|
+
you can get your data from anywhere, dispatch your data to anywhere and send
|
328
|
+
your actions to whatever you want. The defaults all work on local data gathered
|
329
|
+
with the `script/historic_data` executable.
|
330
|
+
|
331
|
+
## Testing rules locally
|
332
|
+
|
333
|
+
If you want to test rules on your local machine without having to query
|
334
|
+
CloudWatch or hit DynamoDB, there are tools that facilitate that nicely.
|
335
|
+
|
336
|
+
The first thing you would need to do is gather some historic data. There's a
|
337
|
+
script called `script/historic_data` that you can run to gather data on a
|
338
|
+
specific table and store it into the `data/` directory in a format that all of
|
339
|
+
the other scripts are familiar with.
|
340
|
+
|
341
|
+
Next there are a couple of things you can do.
|
342
|
+
|
343
|
+
### Running a test
|
344
|
+
|
345
|
+
You can run a big batch of data all in one go with the `script/test` script.
|
346
|
+
This script can be invoked like this:
|
347
|
+
|
348
|
+
$ script/test rulesets/default.rb table_name
|
349
|
+
|
350
|
+
Substituting `table_name` with the name of a table that exists in your DynamoDB.
|
351
|
+
This will run through all of the data for that table in time order, logging
|
352
|
+
along the way and triggering rules from the rule set if any were defined.
|
353
|
+
|
354
|
+
At the end, it shows you a report on the amount of wasted, used and lost units.
|
355
|
+
|
356
|
+
#### Graphs
|
357
|
+
|
358
|
+
If you felt so inclined, you could add the `--graph` flag to the above command
|
359
|
+
and the script will generate a graph for you at the end. This will shell out to
|
360
|
+
an R process to generate the graph, so you will need to ensure that you have R
|
361
|
+
installed on your system with the `ggplot2` and `reshape` packages installed.
|
362
|
+
|
363
|
+
Personally, I use a Mac and I attempted to install R through Homebrew but had
|
364
|
+
troubles with compiling packages. I had far more success when I installed R
|
365
|
+
straight from the R website, http://cran.r-project.org/bin/macosx/, and used
|
366
|
+
their GUI R.app to install the packages.
|
367
|
+
|
368
|
+
None of this is required to run the `dynamo-autoscale` executable in production.
|
369
|
+
|
370
|
+
### Simulating data coming in
|
371
|
+
|
372
|
+
There's a script called `script/simulator` that allows you to step through data
|
373
|
+
as it arrives. It takes the exact same arguments as the `script/test` script but
|
374
|
+
instead of running all the way through the data and generating a report,
|
375
|
+
`script/simulate` will pause after each round of new data and drop you into a
|
376
|
+
REPL. This is very handy for debugging tricky situations with your rules or the
|
377
|
+
codebase.
|
378
|
+
|
379
|
+
The simulator does not hit CloudWatch or DynamoDB at any point.
|
380
|
+
|
381
|
+
## Contributing
|
382
|
+
|
383
|
+
Report Issues/Feature requests on
|
384
|
+
[GitHub Issues](https://github.com/invisiblehand/dynamo-autoscale/issues).
|
385
|
+
|
386
|
+
#### Note on Patches/Pull Requests
|
387
|
+
|
388
|
+
* Fork the project.
|
389
|
+
* Make your feature addition or bug fix.
|
390
|
+
* Add tests for it. This is important so we don't break it in a
|
391
|
+
future version unintentionally.
|
392
|
+
* Commit, do not modify the rakefile, version, or history.
|
393
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself so it can be ignored when we pull)
|
394
|
+
* Send a pull request. Bonus points for topic branches.
|
395
|
+
|
396
|
+
### Copyright
|
397
|
+
|
398
|
+
Copyright (c) 2013 InvisibleHand Software Ltd. See
|
399
|
+
[LICENSE](https://github.com/invisiblehand/dynamo-autoscale/blob/master/LICENSE)
|
400
|
+
for details.
|
data/Rakefile
ADDED
data/aws.sample.yml
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
default: &default
|
2
|
+
:access_key_id: your_id
|
3
|
+
:secret_access_key: your_key
|
4
|
+
|
5
|
+
development:
|
6
|
+
<<: *default
|
7
|
+
:dynamo_db_endpoint: dynamodb.us-east-1.amazonaws.com
|
8
|
+
|
9
|
+
test:
|
10
|
+
<<: *default
|
11
|
+
:dynamo_db_endpoint: localhost
|
12
|
+
:dynamo_db_port: 4568
|
13
|
+
|
14
|
+
production:
|
15
|
+
<<: *default
|
16
|
+
:dynamo_db_endpoint: dynamodb.us-east-1.amazonaws.com
|
@@ -0,0 +1,131 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'optparse'
|
5
|
+
require 'active_support/all'
|
6
|
+
|
7
|
+
# Force this script into production mode as it's the only thing that will
|
8
|
+
# actually hit DynamoDB in the entire project.
|
9
|
+
ENV['RACK_ENV'] = "production"
|
10
|
+
|
11
|
+
actioner_opts = {}
|
12
|
+
general_opts = {}
|
13
|
+
|
14
|
+
OptionParser.new do |opts|
|
15
|
+
opts.banner = "Usage: dynamo-autoscale ruleset_path table_name [more table names] [options]"
|
16
|
+
|
17
|
+
doc = 'Makes read and write downscales happen at the same time to save ' +
|
18
|
+
'downscales per day.'
|
19
|
+
|
20
|
+
opts.on('-g', '--group-downscales', doc) do
|
21
|
+
actioner_opts[:group_downscales] = true
|
22
|
+
end
|
23
|
+
|
24
|
+
doc = 'Only works in conjunction with --group-downscales. Sets a maximum ' +
|
25
|
+
'amount of time for an operation to be pending before it gets applied to Dynamo'
|
26
|
+
|
27
|
+
opts.on('--flush-after SECONDS', Integer, doc) do |seconds|
|
28
|
+
actioner_opts[:flush_after] = seconds.to_i.seconds
|
29
|
+
end
|
30
|
+
|
31
|
+
doc = 'Stops dynamo-autoscale from talking to DynamoDB. Instead, it just ' +
|
32
|
+
'tracks the changes it would have made locally.'
|
33
|
+
|
34
|
+
opts.on('--dry-run', doc) do
|
35
|
+
general_opts[:dry_run] = true
|
36
|
+
end
|
37
|
+
|
38
|
+
doc = "Sets a minimum value for throughputs to be set to. " +
|
39
|
+
"Defaults to 10."
|
40
|
+
|
41
|
+
opts.on('--minimum-throughput VALUE', Float, doc) do |value|
|
42
|
+
if value < 1.0
|
43
|
+
STDERR.puts "Cannot set minimum throughput to less than 1."
|
44
|
+
exit 1
|
45
|
+
end
|
46
|
+
|
47
|
+
general_opts[:minimum_throughput] = value
|
48
|
+
end
|
49
|
+
|
50
|
+
doc = "Sets a maximum value for throughputs to be set to. " +
|
51
|
+
"Defaults to 20,000."
|
52
|
+
|
53
|
+
opts.on('--maximum-throughput VALUE', Float, doc) do |value|
|
54
|
+
general_opts[:maximum_throughput] = value
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on( '-h', '--help', 'Display this screen' ) do
|
58
|
+
puts opts
|
59
|
+
exit
|
60
|
+
end
|
61
|
+
end.parse!
|
62
|
+
|
63
|
+
ruleset = ARGV.shift
|
64
|
+
tables = ARGV
|
65
|
+
|
66
|
+
if tables.empty? or ruleset.nil?
|
67
|
+
STDERR.puts "Usage: dynamo-autoscale ruleset table_name [another_table_name ... ]"
|
68
|
+
exit 1
|
69
|
+
end
|
70
|
+
|
71
|
+
if actioner_opts[:flush_after] and actioner_opts[:group_downscales].nil?
|
72
|
+
STDERR.puts "Cannot specify a flush_after value with setting --group-downscales."
|
73
|
+
exit 1
|
74
|
+
end
|
75
|
+
|
76
|
+
require_relative '../config/environment/common'
|
77
|
+
include DynamoAutoscale
|
78
|
+
extend DynamoAutoscale
|
79
|
+
|
80
|
+
dynamo = AWS::DynamoDB.new
|
81
|
+
tables.select! do |table_name|
|
82
|
+
if dynamo.tables[table_name].exists?
|
83
|
+
true
|
84
|
+
else
|
85
|
+
logger.error "Table #{table_name} does not exist inside your DynamoDB."
|
86
|
+
false
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
if tables.empty?
|
91
|
+
STDERR.puts "No valid tables specified."
|
92
|
+
exit 1
|
93
|
+
end
|
94
|
+
|
95
|
+
poller_opts = { tables: tables }
|
96
|
+
|
97
|
+
if general_opts[:dry_run]
|
98
|
+
poller_opts[:filters] = LocalActioner.faux_provisioning_filters
|
99
|
+
end
|
100
|
+
|
101
|
+
DynamoAutoscale.rules = RuleSet.new(ruleset)
|
102
|
+
DynamoAutoscale.dispatcher = Dispatcher.new
|
103
|
+
DynamoAutoscale.poller = CWPoller.new(poller_opts)
|
104
|
+
DynamoAutoscale.actioner_class = general_opts[:dry_run] ? LocalActioner : DynamoActioner
|
105
|
+
DynamoAutoscale.actioner_opts = actioner_opts
|
106
|
+
|
107
|
+
if general_opts[:minimum_throughput]
|
108
|
+
Actioner.minimum_throughput = general_opts[:minimum_throughput]
|
109
|
+
end
|
110
|
+
|
111
|
+
if general_opts[:maximum_throughput]
|
112
|
+
Actioner.maximum_throughput = general_opts[:maximum_throughput]
|
113
|
+
end
|
114
|
+
|
115
|
+
Signal.trap("USR1") do
|
116
|
+
logger.info "[signal] Caught SIGUSR1. Dumping CSV for all tables in #{Dir.pwd}"
|
117
|
+
|
118
|
+
DynamoAutoscale.tables.each do |name, table|
|
119
|
+
table.to_csv! path: File.join(Dir.pwd, "#{table.name}.csv")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
Signal.trap("USR2") do
|
124
|
+
logger.info "[signal] Caught SIGUSR2. Dumping graphs for all tables in #{Dir.pwd}"
|
125
|
+
|
126
|
+
DynamoAutoscale.tables.each do |name, table|
|
127
|
+
table.graph! path: File.join(Dir.pwd, "#{table.name}.png")
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
DynamoAutoscale.poller.run
|