moe 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.markdown +66 -0
- data/Rakefile +69 -0
- data/lib/moe.rb +29 -0
- data/lib/moe/config.rb +21 -0
- data/lib/moe/dyna.rb +107 -0
- data/lib/moe/sequence.rb +23 -0
- data/lib/moe/sequence/collection.rb +56 -0
- data/lib/moe/sequence/collector.rb +58 -0
- data/lib/moe/sequence/item_fetcher.rb +44 -0
- data/lib/moe/sequence/locksmith.rb +25 -0
- data/lib/moe/sequence/metadata_item.rb +21 -0
- data/lib/moe/table_manager.rb +104 -0
- data/lib/moe/version.rb +3 -0
- data/moe.gemspec +31 -0
- data/spec/lib/config_spec.rb +26 -0
- data/spec/lib/dyna_spec.rb +113 -0
- data/spec/lib/moe_spec.rb +19 -0
- data/spec/lib/sequence/collection_spec.rb +21 -0
- data/spec/lib/sequence/collector_spec.rb +72 -0
- data/spec/lib/sequence/locksmith_spec.rb +19 -0
- data/spec/lib/sequence/metadata_item_spec.rb +24 -0
- data/spec/lib/sequence_spec.rb +27 -0
- data/spec/lib/table_manager_spec.rb +127 -0
- data/spec/spec_helper.rb +28 -0
- metadata +210 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bc6f10b8fa23a7f93c206d40edffd642321b389d
|
4
|
+
data.tar.gz: 83e093fe27c117a65e63de5fb85797c0bea852d2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 160f2712f8ecf52074e43c7573562962fedc8927db12afae9320632a52f675e133b28d2164dc23e625f2ce175d7f497f3ed0ae6d1f3415e2eda59735b38d8b72
|
7
|
+
data.tar.gz: d0157eda063695b14915b1f1867739c3c1cef272983102adb4378750f7b4a69dcdad8d646ceb84021e3ed0d7804e4fc244bed292202386e62ad59e175c8235a8
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.0
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Geezeo
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/fuzz/moe.png?branch=master)](https://travis-ci.org/fuzz/moe)
|
2
|
+
[![Code Climate](https://codeclimate.com/github/fuzz/moe.png)](https://codeclimate.com/github/fuzz/moe)
|
3
|
+
[![Coverage Status](https://coveralls.io/repos/fuzz/moe/badge.png)](https://coveralls.io/r/fuzz/moe)
|
4
|
+
|
5
|
+
# Moe
|
6
|
+
|
7
|
+
A toolkit for working with DynamoDB at scale
|
8
|
+
|
9
|
+
## Features
|
10
|
+
|
11
|
+
### Multitable support
|
12
|
+
|
13
|
+
Moe provides a table manager to create and rotate tables. Tables may be
|
14
|
+
mirrored (each write goes to two or more tables) and/or split (reads happen
|
15
|
+
across multiple tables).
|
16
|
+
|
17
|
+
Currently mirroring uses a table with the same capacity as the primary table.
|
18
|
+
In the future we may add Kinesis support for the mirror table so the mirrored
|
19
|
+
writes can go to Kinesis first and then trickle into the mirror table so we can
|
20
|
+
minimize the capacity required. This mill also allow us do Big Data type
|
21
|
+
realtime procesing as it goes through Kinesis should that be desired.
|
22
|
+
|
23
|
+
There are different approaches to splitting based on the use case. The most
|
24
|
+
straightforward mechanism is to shut everything down, push out a table change,
|
25
|
+
start everything back up. This ensures that a set of data will not be written
|
26
|
+
across multiple tables thus providing the most safety when expiring old tables.
|
27
|
+
At the other end of the spectrum we can simply write the metadata item as the
|
28
|
+
first in the sequence so knowledge of the sequence will disappear when the
|
29
|
+
table its metadata is written on does. At this time the splitting strategy is
|
30
|
+
determined by the persistence strategy though it would be good to be able to
|
31
|
+
mix and match.
|
32
|
+
|
33
|
+
### Persistence strategies
|
34
|
+
|
35
|
+
DynamoDB presents a unique and interesting set of contraints. By designing
|
36
|
+
persistence strategies around these contraints we can take advantage of useful
|
37
|
+
patterns that fall out of them. Thus unlike a generalized ORM moe provides a
|
38
|
+
number of different persistence strategies depending on your use cases and
|
39
|
+
patterns. Cannot find a pattern that is a good match for your workload? Add
|
40
|
+
one!
|
41
|
+
|
42
|
+
The first persistence strategy available is the one described at
|
43
|
+
http://fuzz.github.io -- it provides a model where an "owner" has many things
|
44
|
+
that each have many things using a single DynamoDB table with no indexes. By
|
45
|
+
comparison a more ruby-typical-but-dynamo-naive implementation might use two
|
46
|
+
tables plus a GSI at ~3x the cost. Not to mention the additional complexity as
|
47
|
+
capacity and other operational concerns have to be managed on each table and
|
48
|
+
the GSI.
|
49
|
+
|
50
|
+
### The Future: Partitioning
|
51
|
+
|
52
|
+
DynamoDB capacity is evenly divided among a number of partitions that
|
53
|
+
increase/decrease along with capacity. We do not know the number of partitions;
|
54
|
+
we only know (if we pay attention) when our primary hash key strategy is
|
55
|
+
insufficently random to spread evenly across all partitions. Moe will provide a
|
56
|
+
number of partitioning strategies and instrumentation so you can compare your
|
57
|
+
actual overall throughput to what AWS reports in order to determine the
|
58
|
+
effectiveness of and tune your key randomization strategy.
|
59
|
+
|
60
|
+
## JRuby
|
61
|
+
|
62
|
+
To run the tests you will need to start fake_dynamo manually and
|
63
|
+
|
64
|
+
```
|
65
|
+
bundle exec rake rspec # use rspec instead of spec to bypass auto fake_dynamo
|
66
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require "aws-sdk-core"
|
2
|
+
require "bundler/setup"
|
3
|
+
require "rspec/core/rake_task"
|
4
|
+
|
5
|
+
Aws.config = {
|
6
|
+
access_key_id: "xxx",
|
7
|
+
secret_access_key: "xxx",
|
8
|
+
dynamodb: {
|
9
|
+
api_version: "2012-08-10",
|
10
|
+
endpoint: "http://localhost:4567"
|
11
|
+
},
|
12
|
+
region: "us-east-1"
|
13
|
+
}
|
14
|
+
|
15
|
+
desc "Start fake_dynamo, run tests, cleanup"
|
16
|
+
task :spec do |t|
|
17
|
+
require "tmpdir"
|
18
|
+
require "socket"
|
19
|
+
|
20
|
+
dir = Dir.mktmpdir
|
21
|
+
data_file = File.join(dir, "data.fdb")
|
22
|
+
|
23
|
+
# Launch fake_dynamo
|
24
|
+
pid = Process.spawn"fake_dynamo", "-d", data_file, err: "/dev/null", out: "/dev/null"
|
25
|
+
|
26
|
+
# Cleanup
|
27
|
+
at_exit {
|
28
|
+
Process.kill("TERM", pid)
|
29
|
+
FileUtils.rmtree(dir)
|
30
|
+
}
|
31
|
+
|
32
|
+
# Wait for fake_dynamo to start taking requests
|
33
|
+
40.downto(0) do |count| #Wait up to 2 seconds
|
34
|
+
begin
|
35
|
+
s = TCPSocket.new "localhost", 4567
|
36
|
+
s.close
|
37
|
+
break
|
38
|
+
rescue Errno::ECONNREFUSED
|
39
|
+
raise if(count == 0)
|
40
|
+
sleep 0.1
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Clean out old data
|
45
|
+
Rake::Task["dbdrop"].invoke
|
46
|
+
|
47
|
+
# Run specs
|
48
|
+
Rake::Task["rspec"].invoke
|
49
|
+
end
|
50
|
+
|
51
|
+
spec_tasks = Dir["spec/*/"].map { |d| File.basename(d) }
|
52
|
+
|
53
|
+
spec_tasks.each do |folder|
|
54
|
+
RSpec::Core::RakeTask.new("spec:#{folder}") do |t|
|
55
|
+
t.pattern = "./spec/#{folder}/**/*_spec.rb"
|
56
|
+
t.rspec_opts = %w(-fs --color)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
desc "Drop fake_dynamo"
|
61
|
+
task :dbdrop do
|
62
|
+
sh "curl -X DELETE http://localhost:4567"
|
63
|
+
end
|
64
|
+
|
65
|
+
desc "Run specs"
|
66
|
+
task rspec: spec_tasks.map { |f| "spec:#{f}" }
|
67
|
+
|
68
|
+
desc "Default task"
|
69
|
+
task :default => [:spec]
|
data/lib/moe.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require "aws-sdk-core"
|
2
|
+
require "multi_json"
|
3
|
+
require "securerandom"
|
4
|
+
|
5
|
+
require "moe/config"
|
6
|
+
require "moe/dyna"
|
7
|
+
require "moe/sequence"
|
8
|
+
require "moe/sequence/collection"
|
9
|
+
require "moe/sequence/collector"
|
10
|
+
require "moe/sequence/item_fetcher"
|
11
|
+
require "moe/sequence/locksmith"
|
12
|
+
require "moe/sequence/metadata_item"
|
13
|
+
require "moe/table_manager"
|
14
|
+
require "moe/version"
|
15
|
+
|
16
|
+
module Moe
|
17
|
+
|
18
|
+
module ModuleFunctions
|
19
|
+
def collection(name, owner_id)
|
20
|
+
Sequence::Collection.new name, owner_id
|
21
|
+
end
|
22
|
+
|
23
|
+
def collector(name, owner_id)
|
24
|
+
Sequence::Collector.new name, owner_id
|
25
|
+
end
|
26
|
+
end
|
27
|
+
extend ModuleFunctions
|
28
|
+
|
29
|
+
end
|
data/lib/moe/config.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module Moe
|
2
|
+
|
3
|
+
module ModuleFunctions
|
4
|
+
attr_accessor :config
|
5
|
+
|
6
|
+
def configure
|
7
|
+
self.config ||= Config.new
|
8
|
+
yield(config)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
extend ModuleFunctions
|
12
|
+
|
13
|
+
class Config
|
14
|
+
attr_accessor :batch_limit, :tables
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@batch_limit = 15
|
18
|
+
@tables = {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/moe/dyna.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
module Moe
|
2
|
+
class Dyna
|
3
|
+
|
4
|
+
attr_accessor :dynamodb
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@dynamodb = Aws.dynamodb
|
8
|
+
end
|
9
|
+
|
10
|
+
def batch_write_item(write_tables, items)
|
11
|
+
explosion = items.map { |item| explode item }
|
12
|
+
|
13
|
+
write_tables.each do |table_name|
|
14
|
+
dynamodb.batch_write_item request_items: {
|
15
|
+
table_name => batchify(explosion)
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def create_table(name, copies=1, hash_key="hash", range_key=nil, read_capacity=5, write_capacity=10)
|
21
|
+
[].tap do |tables|
|
22
|
+
1.upto(copies).each do |copy|
|
23
|
+
schema = table_schema "#{name}_#{copy}",
|
24
|
+
hash_key,
|
25
|
+
range_key,
|
26
|
+
read_capacity,
|
27
|
+
write_capacity
|
28
|
+
|
29
|
+
table = dynamodb.create_table schema
|
30
|
+
|
31
|
+
tables << "#{name}_#{copy}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def explode(item)
|
37
|
+
clone = item.clone
|
38
|
+
|
39
|
+
clone.each do |key, value|
|
40
|
+
clone[key] = { s: value }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_item(read_tables, key)
|
45
|
+
item = nil
|
46
|
+
read_tables.each do |table_name|
|
47
|
+
item = dynamodb.get_item(table_name: table_name, key: key).item
|
48
|
+
break if item
|
49
|
+
end
|
50
|
+
item
|
51
|
+
end
|
52
|
+
|
53
|
+
def implode(item)
|
54
|
+
item.each_key { |key| item[key] = item[key][:s] }
|
55
|
+
end
|
56
|
+
|
57
|
+
def put_item(write_tables, item)
|
58
|
+
write_tables.each do |table_name|
|
59
|
+
dynamodb.put_item table_name: table_name, item: explode(item)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def find(name)
|
64
|
+
if dynamodb.list_tables.table_names.include? name
|
65
|
+
dynamodb.describe_table table_name: name
|
66
|
+
else
|
67
|
+
false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def batchify(items)
|
74
|
+
items.map do |item|
|
75
|
+
{ put_request:
|
76
|
+
{ item: item }
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def table_schema(name, hash_key, range_key, read_capacity, write_capacity)
|
82
|
+
{ table_name: name,
|
83
|
+
key_schema: [
|
84
|
+
attribute_name: hash_key,
|
85
|
+
key_type: "HASH"
|
86
|
+
],
|
87
|
+
attribute_definitions: [
|
88
|
+
{
|
89
|
+
attribute_name: hash_key,
|
90
|
+
attribute_type: "S"
|
91
|
+
}
|
92
|
+
],
|
93
|
+
provisioned_throughput: {
|
94
|
+
read_capacity_units: read_capacity,
|
95
|
+
write_capacity_units: write_capacity
|
96
|
+
}
|
97
|
+
}.tap do |table|
|
98
|
+
if range_key
|
99
|
+
table[:key_schema] << { attribute_name: range_key,
|
100
|
+
key_type: "RANGE" }
|
101
|
+
table[:attribute_definitions] << { attribute_name: range_key,
|
102
|
+
attribute_type: "S" }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/moe/sequence.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Moe
|
2
|
+
module Sequence
|
3
|
+
|
4
|
+
module ModuleFunctions
|
5
|
+
def setup(name, copies, read_capacity, write_capacity)
|
6
|
+
return "#{name} already exists in config" if Moe.config.tables[name]
|
7
|
+
|
8
|
+
table_manager = TableManager.new
|
9
|
+
|
10
|
+
tables = table_manager.build name,
|
11
|
+
copies,
|
12
|
+
"hash",
|
13
|
+
"range",
|
14
|
+
read_capacity,
|
15
|
+
write_capacity
|
16
|
+
|
17
|
+
Moe.config.tables[name] = tables
|
18
|
+
end
|
19
|
+
end
|
20
|
+
extend ModuleFunctions
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Moe
|
2
|
+
module Sequence
|
3
|
+
class Collection
|
4
|
+
attr_accessor :dyna
|
5
|
+
attr_reader :owner_id, :read_tables
|
6
|
+
|
7
|
+
def initialize(name, owner_id)
|
8
|
+
@dyna = Dyna.new
|
9
|
+
@owner_id = owner_id
|
10
|
+
@read_tables = Moe.config.tables[name].first
|
11
|
+
end
|
12
|
+
|
13
|
+
def metadata_items
|
14
|
+
[].tap do |results|
|
15
|
+
read_tables.each do |table_name|
|
16
|
+
|
17
|
+
dyna.dynamodb.query(request table_name).items.each do |item|
|
18
|
+
results << process(table_name, item)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def process(table_name, item)
|
28
|
+
MetadataItem.new( table_name,
|
29
|
+
owner_id,
|
30
|
+
item["range"].s.gsub(/0\./, ""),
|
31
|
+
item["count"].s.to_i,
|
32
|
+
MultiJson.load(item["payload"].s) )
|
33
|
+
end
|
34
|
+
|
35
|
+
def request(table_name)
|
36
|
+
{
|
37
|
+
table_name: table_name,
|
38
|
+
key_conditions: {
|
39
|
+
hash: {
|
40
|
+
attribute_value_list: [
|
41
|
+
{ s: owner_id }
|
42
|
+
],
|
43
|
+
comparison_operator: "EQ"
|
44
|
+
},
|
45
|
+
range: {
|
46
|
+
attribute_value_list: [
|
47
|
+
{ s: "0" }
|
48
|
+
],
|
49
|
+
comparison_operator: "BEGINS_WITH"
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|