mitchellh-hash_ring 0.1

Sign up to get free protection for your applications and to get access to all the features.
data/CREDITS ADDED
@@ -0,0 +1,9 @@
1
+ ======================================
2
+ = hash_ring credits =
3
+ ======================================
4
+
5
+ Original Author (Python Implementation):
6
+ Amir Salihefendic http://amix.dk
7
+
8
+ Ported to Ruby by:
9
+ Mitchell Hashimoto http://mitchellhashimoto.com
data/LICENSE ADDED
@@ -0,0 +1,10 @@
1
+ Copyright (c) 2009, Mitchell Hashimoto
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
5
+
6
+ * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
7
+ * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
8
+ * Neither the name of the owner nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
9
+
10
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README.rdoc ADDED
@@ -0,0 +1,24 @@
1
+ = hash_ring
2
+
3
+ == Background
4
+
5
+ hash_ring is a pure Ruby implementation of consistent hashing.
6
+ hash_ring is based on the original Python code written by
7
+ Amir Salihefendic. A comprehensive blog post detailing the methods
8
+ and reasoning for such a library can be viewed by visiting the following
9
+ URL:
10
+
11
+ http://amix.dk/blog/viewEntry/19367
12
+
13
+ == Usage
14
+
15
+ memcache_servers = ['192.168.0.111:14107',
16
+ '192.168.0.112:14107',
17
+ '192.168.0.113:14108']
18
+
19
+ # Since server 1 has double the RAM, lets weight it
20
+ # twice as much to get twice the keys. This is optional
21
+ weights = { '192.168.0.111' => 2 }
22
+
23
+ ring = HashRing.new(memcache_servers, weights)
24
+ server = ring.get_node('my_key')
data/Rakefile ADDED
@@ -0,0 +1,92 @@
1
+ require 'rake'
2
+ require 'rake/clean'
3
+ require 'rake/packagetask'
4
+ require 'rake/gempackagetask'
5
+ require 'rake/rdoctask'
6
+ require 'spec/rake/spectask'
7
+ require 'fileutils'
8
+
9
+ load 'hash_ring.gemspec'
10
+
11
+ ###################################
12
+ # Clean & Defaut Task
13
+ ###################################
14
+ CLEAN.include('dist','tmp','rdoc')
15
+ task :default => [:clean, :repackage]
16
+
17
+ ###################################
18
+ # Specs
19
+ ###################################
20
+ desc "Run all specs for hash_ring"
21
+ Spec::Rake::SpecTask.new('spec') do |t|
22
+ t.spec_files = FileList['spec/**/*.rb']
23
+ end
24
+
25
+ ###################################
26
+ # Docs
27
+ ###################################
28
+ Rake::RDocTask.new do |rd|
29
+ rd.main = 'README.rdoc'
30
+
31
+ rd.rdoc_dir = 'doc'
32
+
33
+ rd.rdoc_files.include(
34
+ 'README.rdoc',
35
+ 'LICENSE',
36
+ 'CREDITS',
37
+ 'lib/**/*.rb')
38
+
39
+ rd.title = 'hash_ring'
40
+
41
+ rd.options << '-N' # line numbers
42
+ rd.options << '-S' # inline source
43
+ end
44
+
45
+ ###################################
46
+ # Packaging - Thank you Sinatra
47
+ ###################################
48
+ # Load the gemspec using the same limitations as github
49
+ def spec
50
+ @spec ||=
51
+ begin
52
+ require 'rubygems/specification'
53
+ data = File.read('hash_ring.gemspec')
54
+ spec = nil
55
+ Thread.new { spec = eval("$SAFE = 3\n#{data}") }.join
56
+ spec
57
+ end
58
+ end
59
+
60
+ def package(ext='')
61
+ "dist/hash_ring-#{spec.version}" + ext
62
+ end
63
+
64
+ desc 'Build packages'
65
+ task :package => %w[.gem .tar.gz].map {|e| package(e)}
66
+
67
+ desc 'Build and install as local gem'
68
+ task :install => package('.gem') do
69
+ sh "gem install #{package('.gem')}"
70
+ end
71
+
72
+ directory 'dist/'
73
+ CLOBBER.include('dist')
74
+
75
+ file package('.gem') => %w[dist/ hash_ring.gemspec] + spec.files do |f|
76
+ sh "gem build hash_ring.gemspec"
77
+ mv File.basename(f.name), f.name
78
+ end
79
+
80
+ file package('.tar.gz') => %w[dist/] + spec.files do |f|
81
+ sh <<-SH
82
+ git archive \
83
+ --prefix=hash_ring-#{source_version}/ \
84
+ --format=tar \
85
+ HEAD | gzip > #{f.name}
86
+ SH
87
+ end
88
+
89
+ def source_version
90
+ line = File.read('lib/hash_ring.rb')[/^\s*VERSION = .*/]
91
+ line.match(/.*VERSION = '(.*)'/)[1]
92
+ end
data/lib/hash_ring.rb ADDED
@@ -0,0 +1,187 @@
1
+ ######################################
2
+ # hash_ring
3
+ # Code ported from Python version written by Amir Salihefendic
4
+ ######################################
5
+ # Copyright (c) 2009, Mitchell Hashimoto, mitchell.hashimoto@gmail.com
6
+ #
7
+
8
+ require 'digest/md5'
9
+
10
+ # = HashRing Class
11
+ #
12
+ # == Background
13
+ #
14
+ # Implements consistent hashing that can be used when
15
+ # the number of server nodes can increase or decrease (like in memcached).
16
+ #
17
+ # Consistent hashing is a scheme that provides a hash table functionality
18
+ # in a way that the adding or removing of one slot
19
+ # does not significantly change the mapping of keys to slots.
20
+ #
21
+ # More information about consistent hashing can be read in these articles:
22
+ #
23
+ # "Web Caching with Consistent Hashing":
24
+ # http://www8.org/w8-papers/2a-webserver/caching/paper2.html
25
+ #
26
+ # "Consistent hashing and random trees:
27
+ # Distributed caching protocols for relieving hot spots on the World Wide Web (1997)":
28
+ # http://citeseerx.ist.psu.edu/legacymapper?did=38148
29
+ #
30
+ # == Usage
31
+ #
32
+ # memcache_servers = ['192.168.0.111:14107',
33
+ # '192.168.0.112:14107',
34
+ # '192.168.0.113:14108']
35
+ #
36
+ # # Since server 1 has double the RAM, lets weight it
37
+ # # twice as much to get twice the keys. This is optional
38
+ # weights = { '192.168.0.111' => 2 }
39
+ #
40
+ # ring = HashRing.new(memcache_servers, weights)
41
+ # server = ring.get_node('my_key')
42
+ #
43
+ class HashRing
44
+ VERSION = '0.1'
45
+
46
+ #
47
+ # Creates a HashRing instance
48
+ #
49
+ # == parameters
50
+ #
51
+ # * nodes - A list of objects which have a proper to_s representation.
52
+ # * weights - A hash (dictionary, not to be mixed up with HashRing)
53
+ # which sets weights to the nodes. The default weight is that all
54
+ # nodes have equal weight.
55
+ def initialize(nodes=nil, weights=nil)
56
+ @ring = {}
57
+ @_sorted_keys = []
58
+
59
+ @nodes = nodes
60
+
61
+ weights = {} if weights.nil?
62
+
63
+ @weights = weights
64
+
65
+ self._generate_circle()
66
+ self
67
+ end
68
+
69
+ #
70
+ # Generates the circle
71
+ def _generate_circle
72
+ total_weight = 0
73
+
74
+ @nodes.each do |node|
75
+ total_weight += @weights[node] || 1
76
+ end
77
+
78
+ @nodes.each do |node|
79
+ weight = @weights[node] || 1
80
+ factor = ((40 * @nodes.length * weight) / total_weight.to_f).floor.to_i
81
+
82
+ factor.times do |j|
83
+ b_key = self._hash_digest("#{node}-#{j}")
84
+
85
+ 3.times do |i|
86
+ key = self._hash_val(b_key) { |x| x+(i*4) }
87
+ @ring[key] = node
88
+ @_sorted_keys.push(key)
89
+ end
90
+ end
91
+ end
92
+
93
+ @_sorted_keys.sort!
94
+ end
95
+
96
+ #
97
+ # Given a string key a corresponding node is returned. If the
98
+ # ring is empty, nil is returned.
99
+ def get_node(string_key)
100
+ pos = self.get_node_pos(string_key)
101
+ return nil if pos.nil?
102
+
103
+ return @ring[@_sorted_keys[pos]]
104
+ end
105
+
106
+ #
107
+ # Given a string key a corresponding node's position in the ring
108
+ # is returned. Nil is returned if the ring is empty.
109
+ def get_node_pos(string_key)
110
+ return nil if @ring.empty?
111
+
112
+ key = self.gen_key(string_key)
113
+ nodes = @_sorted_keys
114
+ pos = bisect(nodes, key)
115
+
116
+ if pos == nodes.length
117
+ return 0
118
+ else
119
+ return pos
120
+ end
121
+ end
122
+
123
+ #
124
+ # Returns an array of nodes where the key could be stored, starting
125
+ # at the correct position.
126
+ def iterate_nodes(string_key)
127
+ returned_values = []
128
+ pos = self.get_node_pos(string_key)
129
+ @_sorted_keys[pos, @_sorted_keys.length].each do |ring_index|
130
+ key = @ring[ring_index]
131
+ next if returned_values.include?(key)
132
+ returned_values.push(key)
133
+ end
134
+
135
+ @_sorted_keys.each_index do |i|
136
+ break if i >= pos
137
+
138
+ key = @ring[@_sorted_keys[i]]
139
+ next if returned_values.include?(key)
140
+ returned_values.push(key)
141
+ end
142
+
143
+ returned_values
144
+ end
145
+
146
+ #
147
+ # Given a string key this returns a long value. This long value
148
+ # represents a location on the ring.
149
+ #
150
+ # MD5 is used currently.
151
+ def gen_key(string_key)
152
+ b_key = self._hash_digest(string_key)
153
+ return self._hash_val(b_key) { |x| x }
154
+ end
155
+
156
+ def _hash_val(b_key, &block)
157
+ return ((b_key[block.call(3)] << 24) |
158
+ (b_key[block.call(2)] << 16) |
159
+ (b_key[block.call(1)] << 8) |
160
+ (b_key[block.call(0)]))
161
+ end
162
+
163
+ #
164
+ # Returns raw MD5 digest given a key
165
+ def _hash_digest(key)
166
+ m = Digest::MD5.new
167
+ m.update(key)
168
+
169
+ # No need to ord each item since ordinary array access
170
+ # of a string in Ruby converts to ordinal value
171
+ return m.digest
172
+ end
173
+
174
+ #
175
+ # Bisect an array
176
+ def bisect(arr, key)
177
+ arr.each_index do |i|
178
+ return i if key < arr[i]
179
+ end
180
+
181
+ return arr.length
182
+ end
183
+
184
+ def sorted_keys #:nodoc:
185
+ @_sorted_keys
186
+ end
187
+ end
@@ -0,0 +1,179 @@
1
+ require File.join(File.dirname(__FILE__), 'spec_base')
2
+
3
+ # Basic constants
4
+ UNWEIGHTED_RUNS = 1000
5
+ UNWEIGHTED_ERROR_BOUND = 0.05
6
+ WEIGHTED_RUNS = 1000
7
+ WEIGHTED_ERROR_BOUND = 0.05
8
+
9
+ describe HashRing do
10
+ include HashRingHelpers
11
+
12
+ describe "bisection" do
13
+ before do
14
+ @ring = HashRing.new(['a'])
15
+ @test_array = [10,20,30]
16
+ end
17
+
18
+ it "should return 0 if it less than the first element" do
19
+ @ring.bisect(@test_array, 5).should eql(0)
20
+ end
21
+
22
+ it "should return the index it should go into to maintain order" do
23
+ @ring.bisect(@test_array, 15).should eql(1)
24
+ end
25
+
26
+ it "should return the final index if greater than all items" do
27
+ @ring.bisect(@test_array, 40).should eql(3)
28
+ end
29
+ end
30
+
31
+ describe "iterating nodes" do
32
+ before do
33
+ @ring = HashRing.new(['a','b','c'])
34
+ end
35
+
36
+ it "should return correct values based on python" do
37
+ a_iterate = @ring.iterate_nodes('a')
38
+ b_iterate = @ring.iterate_nodes('b')
39
+ c_iterate = @ring.iterate_nodes('ccccccccc')
40
+
41
+ a_python = ["a","c","b"]
42
+ b_python = ["b","c","a"]
43
+ c_python = ["c","a","b"]
44
+
45
+ (a_iterate - a_python).should be_empty
46
+ (b_iterate - b_python).should be_empty
47
+ (c_iterate - c_python).should be_empty
48
+ end
49
+ end
50
+
51
+ describe "getting nodes" do
52
+ def check_consistent_assigns
53
+ first_node = @ring.get_node(@consistent_key)
54
+
55
+ 100.times do
56
+ @ring.get_node(@consistent_key).should eql(first_node)
57
+ end
58
+ end
59
+
60
+
61
+ def check_distribution
62
+ # Keys chosen specifically from trying on Python code
63
+ first_node = @ring.get_node('a')
64
+ second_node = @ring.get_node('b')
65
+
66
+ first_node.should_not eql(second_node)
67
+ end
68
+
69
+ def check_probability(run_count, error_bound, weights={})
70
+ counts = {}
71
+ total_counts = 0
72
+
73
+ run_count.times do |i|
74
+ node = @ring.get_node(random_string)
75
+
76
+ if counts[node].nil?
77
+ counts[node] = 0
78
+ else
79
+ counts[node] += 1
80
+ end
81
+
82
+ total_counts += 1
83
+ end
84
+
85
+ total_keys = counts.keys.length
86
+
87
+ # Should be bounded, hopefully by 1/total_keys (give or take an error bound)
88
+ ideal_probability = (1.0/total_keys) + error_bound
89
+ counts.each do |node, count|
90
+ weight = weights[node] || 1
91
+ probability = (count / run_count.to_f)
92
+ weighted_probability = ideal_probability * weight
93
+
94
+ if probability >= weighted_probability
95
+ fail "#{node} has probability: #{probability}"
96
+ end
97
+ end
98
+ end
99
+
100
+ describe "without explicit weights" do
101
+ before do
102
+ @ring = HashRing.new(['a','b','c'])
103
+ @consistent_key = 'Hello, World'
104
+ end
105
+
106
+ it "should consistently assign nodes" do
107
+ check_consistent_assigns
108
+ end
109
+
110
+ it "should distribute keys to different buckets" do
111
+ check_distribution
112
+ end
113
+
114
+ it "should assign keys fairly randomly" do
115
+ check_probability(UNWEIGHTED_RUNS, UNWEIGHTED_ERROR_BOUND)
116
+ end
117
+ end
118
+
119
+ describe "with explicit weights" do
120
+ before do
121
+ # Create a hash ring with 'a' having a 2:1 weight
122
+ @weights = { 'a' => 2 }
123
+ @ring = HashRing.new(['a','b','c'], @weights)
124
+ @consistent_key = 'Hello, World'
125
+ end
126
+
127
+ it "should consistently assign nodes" do
128
+ check_consistent_assigns
129
+ end
130
+
131
+ it "should distribute keys to different buckets" do
132
+ check_distribution
133
+ end
134
+
135
+ it "should assign keys fairly randomly, but according to weights" do
136
+ check_probability(WEIGHTED_RUNS, WEIGHTED_ERROR_BOUND, @weights)
137
+ end
138
+ end
139
+ end
140
+
141
+ describe "hashing methods" do
142
+ before do
143
+ @ring = HashRing.new(['a'])
144
+ end
145
+
146
+ it "should return the raw digest for _hash_digest" do
147
+ random_string = 'some random string'
148
+
149
+ m = Digest::MD5.new
150
+ m.update(random_string)
151
+
152
+ @ring._hash_digest(random_string).should eql(m.digest)
153
+ end
154
+
155
+ it "should match the python output for _hash_val" do
156
+ # This output was taken directly from the python library
157
+ py_output = 2830561728
158
+ ruby_output = @ring._hash_val(@ring._hash_digest('a')) { |x| x+4 }
159
+
160
+ ruby_output.should eql(py_output)
161
+ end
162
+ end
163
+
164
+ # THIS IS A VERY DIRTY WAY TO SPEC THIS
165
+ # But given its "random" nature, I figured comparing the two libraries'
166
+ # (one of which is in production on a huge site) output should be
167
+ # "safe enough"
168
+ describe "ring generation" do
169
+ it "should generate the same ring as python, given the same inputs" do
170
+ # Yeah... I know... terrible.
171
+ py_output = [3747649, 3747649, 35374473, 35374473, 61840307, 61840307, 82169324, 82169324, 99513906, 99513906, 171267966, 171267966, 189092589, 189092589, 211562723, 211562723, 274168570, 274168570, 309884358, 309884358, 337859634, 337859634, 359487305, 359487305, 437877875, 437877875, 440532511, 440532511, 441427647, 441427647, 540691923, 540691923, 561744136, 561744136, 566640950, 566640950, 573631360, 573631360, 593354384, 593354384, 616375601, 616375601, 653401705, 653401705, 658933707, 658933707, 711407824, 711407824, 717967565, 717967565, 791654246, 791654246, 815230777, 815230777, 836319689, 836319689, 943387296, 943387296, 948212432, 948212432, 954761114, 954761114, 983151602, 983151602, 1041951938, 1041951938, 1044903177, 1044903177, 1109542669, 1109542669, 1215807553, 1215807553, 1234529376, 1234529376, 1240978794, 1240978794, 1241570279, 1241570279, 1245440929, 1245440929, 1295496069, 1295496069, 1359345465, 1359345465, 1371916815, 1371916815, 1440228341, 1440228341, 1463589668, 1463589668, 1542595588, 1542595588, 1571041323, 1571041323, 1580821462, 1580821462, 1609040193, 1609040193, 1663806909, 1663806909, 1673418579, 1673418579, 1725587406, 1725587406, 1743807106, 1743807106, 1745454947, 1745454947, 1770079607, 1770079607, 1816647406, 1816647406, 1823214399, 1823214399, 1858099396, 1858099396, 1889941457, 1889941457, 1903777629, 1903777629, 1956489818, 1956489818, 1981836821, 1981836821, 2027012493, 2027012493, 2036573472, 2036573472, 2063971870, 2063971870, 2113406442, 2113406442, 2203084188, 2203084188, 2245550483, 2245550483, 2369128516, 2369128516, 2401481896, 2401481896, 2405232024, 2405232024, 2439876819, 2439876819, 2498655628, 2498655628, 2666618195, 2666618195, 2709250454, 2709250454, 2725462545, 2725462545, 2761971368, 2761971368, 2820158560, 2820158560, 2847935782, 2847935782, 2873909817, 2873909817, 2960677255, 2960677255, 2970346521, 2970346521, 3065786853, 3065786853, 3173507458, 3173507458, 3187067483, 3187067483, 3189484171, 3189484171, 3196179889, 3196179889, 3200322582, 3200322582, 3234564840, 3234564840, 3262283799, 3262283799, 3310202261, 3310202261, 3326019031, 3326019031, 3332298302, 3332298302, 3347538539, 3347538539, 3365852132, 3365852132, 3378546819, 3378546819, 3430078214, 3430078214, 3453809654, 3453809654, 3467283568, 3467283568, 3469681976, 3469681976, 3494401641, 3494401641, 3522127265, 3522127265, 3523123410, 3523123410, 3555788439, 3555788439, 3585259232, 3585259232, 3587218875, 3587218875, 3587230532, 3587230532, 3627100732, 3627100732, 3642352831, 3642352831, 3670553958, 3670553958, 3721827301, 3721827301, 3746479890, 3746479890, 3836178086, 3836178086, 3887780209, 3887780209, 3927215372, 3927215372, 3953297430, 3953297430, 3967308270, 3967308270, 4025490138, 4025490138, 4045625605, 4045625605, 4094112530, 4094112530]
172
+
173
+ ruby_output = HashRing.new(['a'])
174
+
175
+ # Calculate the difference of the array, since ordering may be different
176
+ (ruby_output.sorted_keys - py_output).should be_empty
177
+ end
178
+ end
179
+ end
data/spec/spec_base.rb ADDED
@@ -0,0 +1,9 @@
1
+ # Include the hash_ring library
2
+ require File.expand_path(File.dirname(__FILE__) + '/../lib/hash_ring')
3
+
4
+ # Helpers
5
+ module HashRingHelpers
6
+ def random_string(length=50)
7
+ (0...length).map{ ('a'..'z').to_a[rand(26)] }.join
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mitchellh-hash_ring
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.1"
5
+ platform: ruby
6
+ authors:
7
+ - Mitchell Hashimoto
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-03-04 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: hash_ring implementation in Ruby
17
+ email: mitchell.hashimoto@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - README.rdoc
24
+ - LICENSE
25
+ files:
26
+ - CREDITS
27
+ - LICENSE
28
+ - README.rdoc
29
+ - Rakefile
30
+ - lib/hash_ring.rb
31
+ - spec/spec_base.rb
32
+ - spec/hash_ring_spec.rb
33
+ has_rdoc: true
34
+ homepage: http://github.com/mitchellh/hash_ring/
35
+ post_install_message:
36
+ rdoc_options:
37
+ - --line-numbers
38
+ - --inline-source
39
+ - --title
40
+ - hash_ring
41
+ - --main
42
+ - README.rdoc
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: "0"
50
+ version:
51
+ required_rubygems_version: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: "0"
56
+ version:
57
+ requirements: []
58
+
59
+ rubyforge_project:
60
+ rubygems_version: 1.2.0
61
+ signing_key:
62
+ specification_version: 2
63
+ summary: Consistent hashing implemented in Ruby
64
+ test_files: []
65
+