hamster 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +36 -2
- data/lib/hamster/entry.rb +1 -1
- data/lib/hamster/trie.rb +39 -24
- data/lib/hamster/version.rb +1 -1
- data/spec/hamster/trie_spec.rb +13 -12
- metadata +2 -2
data/README.rdoc
CHANGED
@@ -1,5 +1,39 @@
|
|
1
1
|
= Hamster
|
2
2
|
|
3
|
-
Hash Array Mapped Tries (HAMT) for Ruby.
|
3
|
+
Hash Array Mapped Tries (HAMT) for Ruby (See http://lamp.epfl.ch/papers/idealhashtrees.pdf).
|
4
4
|
|
5
|
-
|
5
|
+
Why do you care?
|
6
|
+
|
7
|
+
HAMTs are hash tables with one really neat property: their structure enables you to perform very efficient write-on-copy. For example:
|
8
|
+
|
9
|
+
trie = Hamster::Trie.new
|
10
|
+
|
11
|
+
trie.put("Name", "Simon")
|
12
|
+
trie.get("Name") # => nil
|
13
|
+
|
14
|
+
Huh? That's not much use!
|
15
|
+
|
16
|
+
Remember, each instance of a trie is immutable. #put creates an efficient copy containing the modifications. So, let's try that again:
|
17
|
+
|
18
|
+
trie = Hamster::Trie.new
|
19
|
+
|
20
|
+
trie = trie.put("Name", "Simon")
|
21
|
+
trie.get("Name") # => "Simon"
|
22
|
+
|
23
|
+
The same goes for remove:
|
24
|
+
|
25
|
+
trie = Hamster::Trie.new
|
26
|
+
|
27
|
+
trie = trie.put("Name", "Simon")
|
28
|
+
trie = trie.put("Gender", "Male")
|
29
|
+
trie = trie.remove("Name")
|
30
|
+
trie.get("Name") # => nil
|
31
|
+
trie.get("Gender") # => "Male"
|
32
|
+
|
33
|
+
So tell me again why I care?
|
34
|
+
|
35
|
+
As mentioned earlier, HAMTs perform a copy whenever they are modified means that there is never a chance that two threads could be modifying the same instance at any one time. And the fact that they are very efficient copies means you don't need to worry about using up gobs of heap space.
|
36
|
+
|
37
|
+
So what's the downside?
|
38
|
+
|
39
|
+
The downside is that because the implementation is pure Ruby, MRI's built-in, native, hand-crafted C-code implementation of Hash is 10-times faster!
|
data/lib/hamster/entry.rb
CHANGED
data/lib/hamster/trie.rb
CHANGED
@@ -4,10 +4,10 @@ module Hamster
|
|
4
4
|
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
-
def initialize(significant_bits = 0)
|
7
|
+
def initialize(significant_bits = 0, entries = [], children = [])
|
8
8
|
@significant_bits = significant_bits
|
9
|
-
@entries =
|
10
|
-
@children =
|
9
|
+
@entries = entries
|
10
|
+
@children = children
|
11
11
|
end
|
12
12
|
|
13
13
|
# Returns the number of key-value pairs in the trie.
|
@@ -37,26 +37,28 @@ module Hamster
|
|
37
37
|
self
|
38
38
|
end
|
39
39
|
|
40
|
-
# Returns a copy of <tt>self</tt> with given value associated with the key.
|
40
|
+
# Returns a copy of <tt>self</tt> with the given value associated with the key.
|
41
41
|
def put(key, value)
|
42
|
-
dup.put!(key, value)
|
43
|
-
end
|
44
|
-
|
45
|
-
# Associates the given value with the key and returns <tt>self</tt>
|
46
|
-
def put!(key, value)
|
47
42
|
index = index_for(key)
|
48
43
|
entry = @entries[index]
|
44
|
+
|
49
45
|
if entry && !entry.has_key?(key)
|
50
|
-
|
51
|
-
|
46
|
+
children = @children.dup
|
47
|
+
child = children[index]
|
48
|
+
|
49
|
+
children[index] = if child
|
52
50
|
child.put(key, value)
|
53
51
|
else
|
54
52
|
self.class.new(@significant_bits + 5).put!(key, value)
|
55
53
|
end
|
54
|
+
|
55
|
+
self.class.new(@significant_bits, @entries, children)
|
56
56
|
else
|
57
|
-
|
57
|
+
entries = @entries.dup
|
58
|
+
entries[index] = Entry.new(key, value)
|
59
|
+
|
60
|
+
self.class.new(@significant_bits, entries, @children)
|
58
61
|
end
|
59
|
-
self
|
60
62
|
end
|
61
63
|
|
62
64
|
# Retrieves the value corresponding to the given key. If not found, returns <tt>nil</tt>.
|
@@ -75,25 +77,38 @@ module Hamster
|
|
75
77
|
|
76
78
|
# Returns a copy of <tt>self</tt> with the given key/value pair removed. If not found, returns <tt>self</tt>.
|
77
79
|
def remove(key)
|
78
|
-
|
79
|
-
|
80
|
+
index = index_for(key)
|
81
|
+
entry = @entries[index]
|
82
|
+
child = @children[index]
|
83
|
+
if entry && entry.has_key?(key)
|
84
|
+
# TODO: Probably should "pull up" a child entry
|
85
|
+
entries = @entries.dup
|
86
|
+
entries[index] = nil
|
87
|
+
self.class.new(@significant_bits, entries, @children)
|
88
|
+
elsif child
|
89
|
+
new_child = child.remove(key)
|
90
|
+
if new_child != child
|
91
|
+
# TODO: Probably should "prune" empty children
|
92
|
+
children = @children.dup
|
93
|
+
children[index] = new_child
|
94
|
+
self.class.new(@significant_bits, @entries, children)
|
95
|
+
end
|
96
|
+
end || self
|
80
97
|
end
|
81
98
|
|
82
|
-
|
83
|
-
|
99
|
+
protected
|
100
|
+
|
101
|
+
def put!(key, value)
|
102
|
+
@entries[index_for(key)] = Entry.new(key, value)
|
84
103
|
self
|
85
104
|
end
|
86
105
|
|
87
106
|
private
|
88
107
|
|
89
|
-
def initialize_copy(other)
|
90
|
-
@significant_bits = other.instance_eval{@significant_bits}
|
91
|
-
@entries = other.instance_eval{@entries}.dup
|
92
|
-
@children = other.instance_eval{@children}.dup
|
93
|
-
end
|
94
|
-
|
95
108
|
def index_for(key)
|
96
|
-
|
109
|
+
key.hash.abs & 31
|
110
|
+
# puts "#{key}##{key.object_id}:#{key.hash}"
|
111
|
+
# (key.hash.abs >> @significant_bits) & 31
|
97
112
|
end
|
98
113
|
|
99
114
|
end
|
data/lib/hamster/version.rb
CHANGED
data/spec/hamster/trie_spec.rb
CHANGED
@@ -13,12 +13,6 @@ module Hamster
|
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
it "returns values associated with existing keys" do
|
17
|
-
@expected_pairs.each do |key, value|
|
18
|
-
@trie.get(key).should == value
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
16
|
it "is Enumerable" do
|
23
17
|
Trie.is_a?(Enumerable)
|
24
18
|
end
|
@@ -112,13 +106,13 @@ module Hamster
|
|
112
106
|
|
113
107
|
describe "the original" do
|
114
108
|
|
115
|
-
it "
|
109
|
+
it "still has the original key/value pairs" do
|
116
110
|
@expected_pairs.each do |key, value|
|
117
111
|
@trie.get(key).should == value
|
118
112
|
end
|
119
113
|
end
|
120
114
|
|
121
|
-
it "has the original size" do
|
115
|
+
it "still has the original size" do
|
122
116
|
@trie.size.should == @expected_pairs.size
|
123
117
|
end
|
124
118
|
|
@@ -150,7 +144,7 @@ module Hamster
|
|
150
144
|
|
151
145
|
describe "the original" do
|
152
146
|
|
153
|
-
it "
|
147
|
+
it "still has the original key/value pairs" do
|
154
148
|
@expected_pairs.each do |key, value|
|
155
149
|
@trie.get(key).should == value
|
156
150
|
end
|
@@ -160,7 +154,7 @@ module Hamster
|
|
160
154
|
@trie.has_key?("missing").should be_false
|
161
155
|
end
|
162
156
|
|
163
|
-
it "has the original size" do
|
157
|
+
it "still has the original size" do
|
164
158
|
@trie.size.should == @expected_pairs.size
|
165
159
|
end
|
166
160
|
|
@@ -188,6 +182,13 @@ module Hamster
|
|
188
182
|
|
189
183
|
describe "#remove" do
|
190
184
|
|
185
|
+
it "can be used successively to remove all key/value pairs" do
|
186
|
+
@expected_pairs.each do |key, value|
|
187
|
+
@trie = @trie.remove(key)
|
188
|
+
end
|
189
|
+
@trie.should be_empty
|
190
|
+
end
|
191
|
+
|
191
192
|
describe "with existing keys" do
|
192
193
|
|
193
194
|
before do
|
@@ -200,13 +201,13 @@ module Hamster
|
|
200
201
|
|
201
202
|
describe "the original" do
|
202
203
|
|
203
|
-
it "
|
204
|
+
it "still has the original key/value pairs" do
|
204
205
|
@expected_pairs.each do |key, value|
|
205
206
|
@trie.get(key).should == value
|
206
207
|
end
|
207
208
|
end
|
208
209
|
|
209
|
-
it "has the original size" do
|
210
|
+
it "still has the original size" do
|
210
211
|
@trie.size.should == @expected_pairs.size
|
211
212
|
end
|
212
213
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hamster
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Simon Harris
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-10-
|
12
|
+
date: 2009-10-24 00:00:00 +11:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|