moe 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +19 -0
- data/.ruby-version +1 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.markdown +66 -0
- data/Rakefile +69 -0
- data/lib/moe.rb +29 -0
- data/lib/moe/config.rb +21 -0
- data/lib/moe/dyna.rb +107 -0
- data/lib/moe/sequence.rb +23 -0
- data/lib/moe/sequence/collection.rb +56 -0
- data/lib/moe/sequence/collector.rb +58 -0
- data/lib/moe/sequence/item_fetcher.rb +44 -0
- data/lib/moe/sequence/locksmith.rb +25 -0
- data/lib/moe/sequence/metadata_item.rb +21 -0
- data/lib/moe/table_manager.rb +104 -0
- data/lib/moe/version.rb +3 -0
- data/moe.gemspec +31 -0
- data/spec/lib/config_spec.rb +26 -0
- data/spec/lib/dyna_spec.rb +113 -0
- data/spec/lib/moe_spec.rb +19 -0
- data/spec/lib/sequence/collection_spec.rb +21 -0
- data/spec/lib/sequence/collector_spec.rb +72 -0
- data/spec/lib/sequence/locksmith_spec.rb +19 -0
- data/spec/lib/sequence/metadata_item_spec.rb +24 -0
- data/spec/lib/sequence_spec.rb +27 -0
- data/spec/lib/table_manager_spec.rb +127 -0
- data/spec/spec_helper.rb +28 -0
- metadata +210 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: bc6f10b8fa23a7f93c206d40edffd642321b389d
|
4
|
+
data.tar.gz: 83e093fe27c117a65e63de5fb85797c0bea852d2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 160f2712f8ecf52074e43c7573562962fedc8927db12afae9320632a52f675e133b28d2164dc23e625f2ce175d7f497f3ed0ae6d1f3415e2eda59735b38d8b72
|
7
|
+
data.tar.gz: d0157eda063695b14915b1f1867739c3c1cef272983102adb4378750f7b4a69dcdad8d646ceb84021e3ed0d7804e4fc244bed292202386e62ad59e175c8235a8
|
data/.gitignore
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
2.1.0
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Geezeo
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
[](https://travis-ci.org/fuzz/moe)
|
2
|
+
[](https://codeclimate.com/github/fuzz/moe)
|
3
|
+
[](https://coveralls.io/r/fuzz/moe)
|
4
|
+
|
5
|
+
# Moe
|
6
|
+
|
7
|
+
A toolkit for working with DynamoDB at scale
|
8
|
+
|
9
|
+
## Features
|
10
|
+
|
11
|
+
### Multitable support
|
12
|
+
|
13
|
+
Moe provides a table manager to create and rotate tables. Tables may be
|
14
|
+
mirrored (each write goes to two or more tables) and/or split (reads happen
|
15
|
+
across multiple tables).
|
16
|
+
|
17
|
+
Currently mirroring uses a table with the same capacity as the primary table.
|
18
|
+
In the future we may add Kinesis support for the mirror table so the mirrored
|
19
|
+
writes can go to Kinesis first and then trickle into the mirror table so we can
|
20
|
+
minimize the capacity required. This mill also allow us do Big Data type
|
21
|
+
realtime procesing as it goes through Kinesis should that be desired.
|
22
|
+
|
23
|
+
There are different approaches to splitting based on the use case. The most
|
24
|
+
straightforward mechanism is to shut everything down, push out a table change,
|
25
|
+
start everything back up. This ensures that a set of data will not be written
|
26
|
+
across multiple tables thus providing the most safety when expiring old tables.
|
27
|
+
At the other end of the spectrum we can simply write the metadata item as the
|
28
|
+
first in the sequence so knowledge of the sequence will disappear when the
|
29
|
+
table its metadata is written on does. At this time the splitting strategy is
|
30
|
+
determined by the persistence strategy though it would be good to be able to
|
31
|
+
mix and match.
|
32
|
+
|
33
|
+
### Persistence strategies
|
34
|
+
|
35
|
+
DynamoDB presents a unique and interesting set of contraints. By designing
|
36
|
+
persistence strategies around these contraints we can take advantage of useful
|
37
|
+
patterns that fall out of them. Thus unlike a generalized ORM moe provides a
|
38
|
+
number of different persistence strategies depending on your use cases and
|
39
|
+
patterns. Cannot find a pattern that is a good match for your workload? Add
|
40
|
+
one!
|
41
|
+
|
42
|
+
The first persistence strategy available is the one described at
|
43
|
+
http://fuzz.github.io -- it provides a model where an "owner" has many things
|
44
|
+
that each have many things using a single DynamoDB table with no indexes. By
|
45
|
+
comparison a more ruby-typical-but-dynamo-naive implementation might use two
|
46
|
+
tables plus a GSI at ~3x the cost. Not to mention the additional complexity as
|
47
|
+
capacity and other operational concerns have to be managed on each table and
|
48
|
+
the GSI.
|
49
|
+
|
50
|
+
### The Future: Partitioning
|
51
|
+
|
52
|
+
DynamoDB capacity is evenly divided among a number of partitions that
|
53
|
+
increase/decrease along with capacity. We do not know the number of partitions;
|
54
|
+
we only know (if we pay attention) when our primary hash key strategy is
|
55
|
+
insufficently random to spread evenly across all partitions. Moe will provide a
|
56
|
+
number of partitioning strategies and instrumentation so you can compare your
|
57
|
+
actual overall throughput to what AWS reports in order to determine the
|
58
|
+
effectiveness of and tune your key randomization strategy.
|
59
|
+
|
60
|
+
## JRuby
|
61
|
+
|
62
|
+
To run the tests you will need to start fake_dynamo manually and
|
63
|
+
|
64
|
+
```
|
65
|
+
bundle exec rake rspec # use rspec instead of spec to bypass auto fake_dynamo
|
66
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
require "aws-sdk-core"
|
2
|
+
require "bundler/setup"
|
3
|
+
require "rspec/core/rake_task"
|
4
|
+
|
5
|
+
Aws.config = {
|
6
|
+
access_key_id: "xxx",
|
7
|
+
secret_access_key: "xxx",
|
8
|
+
dynamodb: {
|
9
|
+
api_version: "2012-08-10",
|
10
|
+
endpoint: "http://localhost:4567"
|
11
|
+
},
|
12
|
+
region: "us-east-1"
|
13
|
+
}
|
14
|
+
|
15
|
+
desc "Start fake_dynamo, run tests, cleanup"
|
16
|
+
task :spec do |t|
|
17
|
+
require "tmpdir"
|
18
|
+
require "socket"
|
19
|
+
|
20
|
+
dir = Dir.mktmpdir
|
21
|
+
data_file = File.join(dir, "data.fdb")
|
22
|
+
|
23
|
+
# Launch fake_dynamo
|
24
|
+
pid = Process.spawn"fake_dynamo", "-d", data_file, err: "/dev/null", out: "/dev/null"
|
25
|
+
|
26
|
+
# Cleanup
|
27
|
+
at_exit {
|
28
|
+
Process.kill("TERM", pid)
|
29
|
+
FileUtils.rmtree(dir)
|
30
|
+
}
|
31
|
+
|
32
|
+
# Wait for fake_dynamo to start taking requests
|
33
|
+
40.downto(0) do |count| #Wait up to 2 seconds
|
34
|
+
begin
|
35
|
+
s = TCPSocket.new "localhost", 4567
|
36
|
+
s.close
|
37
|
+
break
|
38
|
+
rescue Errno::ECONNREFUSED
|
39
|
+
raise if(count == 0)
|
40
|
+
sleep 0.1
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
# Clean out old data
|
45
|
+
Rake::Task["dbdrop"].invoke
|
46
|
+
|
47
|
+
# Run specs
|
48
|
+
Rake::Task["rspec"].invoke
|
49
|
+
end
|
50
|
+
|
51
|
+
spec_tasks = Dir["spec/*/"].map { |d| File.basename(d) }
|
52
|
+
|
53
|
+
spec_tasks.each do |folder|
|
54
|
+
RSpec::Core::RakeTask.new("spec:#{folder}") do |t|
|
55
|
+
t.pattern = "./spec/#{folder}/**/*_spec.rb"
|
56
|
+
t.rspec_opts = %w(-fs --color)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
desc "Drop fake_dynamo"
|
61
|
+
task :dbdrop do
|
62
|
+
sh "curl -X DELETE http://localhost:4567"
|
63
|
+
end
|
64
|
+
|
65
|
+
desc "Run specs"
|
66
|
+
task rspec: spec_tasks.map { |f| "spec:#{f}" }
|
67
|
+
|
68
|
+
desc "Default task"
|
69
|
+
task :default => [:spec]
|
data/lib/moe.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require "aws-sdk-core"
|
2
|
+
require "multi_json"
|
3
|
+
require "securerandom"
|
4
|
+
|
5
|
+
require "moe/config"
|
6
|
+
require "moe/dyna"
|
7
|
+
require "moe/sequence"
|
8
|
+
require "moe/sequence/collection"
|
9
|
+
require "moe/sequence/collector"
|
10
|
+
require "moe/sequence/item_fetcher"
|
11
|
+
require "moe/sequence/locksmith"
|
12
|
+
require "moe/sequence/metadata_item"
|
13
|
+
require "moe/table_manager"
|
14
|
+
require "moe/version"
|
15
|
+
|
16
|
+
module Moe
|
17
|
+
|
18
|
+
module ModuleFunctions
|
19
|
+
def collection(name, owner_id)
|
20
|
+
Sequence::Collection.new name, owner_id
|
21
|
+
end
|
22
|
+
|
23
|
+
def collector(name, owner_id)
|
24
|
+
Sequence::Collector.new name, owner_id
|
25
|
+
end
|
26
|
+
end
|
27
|
+
extend ModuleFunctions
|
28
|
+
|
29
|
+
end
|
data/lib/moe/config.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module Moe
|
2
|
+
|
3
|
+
module ModuleFunctions
|
4
|
+
attr_accessor :config
|
5
|
+
|
6
|
+
def configure
|
7
|
+
self.config ||= Config.new
|
8
|
+
yield(config)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
extend ModuleFunctions
|
12
|
+
|
13
|
+
class Config
|
14
|
+
attr_accessor :batch_limit, :tables
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
@batch_limit = 15
|
18
|
+
@tables = {}
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/moe/dyna.rb
ADDED
@@ -0,0 +1,107 @@
|
|
1
|
+
module Moe
|
2
|
+
class Dyna
|
3
|
+
|
4
|
+
attr_accessor :dynamodb
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
@dynamodb = Aws.dynamodb
|
8
|
+
end
|
9
|
+
|
10
|
+
def batch_write_item(write_tables, items)
|
11
|
+
explosion = items.map { |item| explode item }
|
12
|
+
|
13
|
+
write_tables.each do |table_name|
|
14
|
+
dynamodb.batch_write_item request_items: {
|
15
|
+
table_name => batchify(explosion)
|
16
|
+
}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def create_table(name, copies=1, hash_key="hash", range_key=nil, read_capacity=5, write_capacity=10)
|
21
|
+
[].tap do |tables|
|
22
|
+
1.upto(copies).each do |copy|
|
23
|
+
schema = table_schema "#{name}_#{copy}",
|
24
|
+
hash_key,
|
25
|
+
range_key,
|
26
|
+
read_capacity,
|
27
|
+
write_capacity
|
28
|
+
|
29
|
+
table = dynamodb.create_table schema
|
30
|
+
|
31
|
+
tables << "#{name}_#{copy}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def explode(item)
|
37
|
+
clone = item.clone
|
38
|
+
|
39
|
+
clone.each do |key, value|
|
40
|
+
clone[key] = { s: value }
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_item(read_tables, key)
|
45
|
+
item = nil
|
46
|
+
read_tables.each do |table_name|
|
47
|
+
item = dynamodb.get_item(table_name: table_name, key: key).item
|
48
|
+
break if item
|
49
|
+
end
|
50
|
+
item
|
51
|
+
end
|
52
|
+
|
53
|
+
def implode(item)
|
54
|
+
item.each_key { |key| item[key] = item[key][:s] }
|
55
|
+
end
|
56
|
+
|
57
|
+
def put_item(write_tables, item)
|
58
|
+
write_tables.each do |table_name|
|
59
|
+
dynamodb.put_item table_name: table_name, item: explode(item)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def find(name)
|
64
|
+
if dynamodb.list_tables.table_names.include? name
|
65
|
+
dynamodb.describe_table table_name: name
|
66
|
+
else
|
67
|
+
false
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def batchify(items)
|
74
|
+
items.map do |item|
|
75
|
+
{ put_request:
|
76
|
+
{ item: item }
|
77
|
+
}
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def table_schema(name, hash_key, range_key, read_capacity, write_capacity)
|
82
|
+
{ table_name: name,
|
83
|
+
key_schema: [
|
84
|
+
attribute_name: hash_key,
|
85
|
+
key_type: "HASH"
|
86
|
+
],
|
87
|
+
attribute_definitions: [
|
88
|
+
{
|
89
|
+
attribute_name: hash_key,
|
90
|
+
attribute_type: "S"
|
91
|
+
}
|
92
|
+
],
|
93
|
+
provisioned_throughput: {
|
94
|
+
read_capacity_units: read_capacity,
|
95
|
+
write_capacity_units: write_capacity
|
96
|
+
}
|
97
|
+
}.tap do |table|
|
98
|
+
if range_key
|
99
|
+
table[:key_schema] << { attribute_name: range_key,
|
100
|
+
key_type: "RANGE" }
|
101
|
+
table[:attribute_definitions] << { attribute_name: range_key,
|
102
|
+
attribute_type: "S" }
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/moe/sequence.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
module Moe
|
2
|
+
module Sequence
|
3
|
+
|
4
|
+
module ModuleFunctions
|
5
|
+
def setup(name, copies, read_capacity, write_capacity)
|
6
|
+
return "#{name} already exists in config" if Moe.config.tables[name]
|
7
|
+
|
8
|
+
table_manager = TableManager.new
|
9
|
+
|
10
|
+
tables = table_manager.build name,
|
11
|
+
copies,
|
12
|
+
"hash",
|
13
|
+
"range",
|
14
|
+
read_capacity,
|
15
|
+
write_capacity
|
16
|
+
|
17
|
+
Moe.config.tables[name] = tables
|
18
|
+
end
|
19
|
+
end
|
20
|
+
extend ModuleFunctions
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Moe
|
2
|
+
module Sequence
|
3
|
+
class Collection
|
4
|
+
attr_accessor :dyna
|
5
|
+
attr_reader :owner_id, :read_tables
|
6
|
+
|
7
|
+
def initialize(name, owner_id)
|
8
|
+
@dyna = Dyna.new
|
9
|
+
@owner_id = owner_id
|
10
|
+
@read_tables = Moe.config.tables[name].first
|
11
|
+
end
|
12
|
+
|
13
|
+
def metadata_items
|
14
|
+
[].tap do |results|
|
15
|
+
read_tables.each do |table_name|
|
16
|
+
|
17
|
+
dyna.dynamodb.query(request table_name).items.each do |item|
|
18
|
+
results << process(table_name, item)
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
def process(table_name, item)
|
28
|
+
MetadataItem.new( table_name,
|
29
|
+
owner_id,
|
30
|
+
item["range"].s.gsub(/0\./, ""),
|
31
|
+
item["count"].s.to_i,
|
32
|
+
MultiJson.load(item["payload"].s) )
|
33
|
+
end
|
34
|
+
|
35
|
+
def request(table_name)
|
36
|
+
{
|
37
|
+
table_name: table_name,
|
38
|
+
key_conditions: {
|
39
|
+
hash: {
|
40
|
+
attribute_value_list: [
|
41
|
+
{ s: owner_id }
|
42
|
+
],
|
43
|
+
comparison_operator: "EQ"
|
44
|
+
},
|
45
|
+
range: {
|
46
|
+
attribute_value_list: [
|
47
|
+
{ s: "0" }
|
48
|
+
],
|
49
|
+
comparison_operator: "BEGINS_WITH"
|
50
|
+
}
|
51
|
+
}
|
52
|
+
}
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|