hamster 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,5 +1,39 @@
1
1
  = Hamster
2
2
 
3
- Hash Array Mapped Tries (HAMT) for Ruby.
3
+ Hash Array Mapped Tries (HAMT) for Ruby (See http://lamp.epfl.ch/papers/idealhashtrees.pdf).
4
4
 
5
- See http://lamp.epfl.ch/papers/idealhashtrees.pdf
5
+ Why do you care?
6
+
7
+ HAMTs are hash tables with one really neat property: their structure enables you to perform very efficient write-on-copy. For example:
8
+
9
+ trie = Hamster::Trie.new
10
+
11
+ trie.put("Name", "Simon")
12
+ trie.get("Name") # => nil
13
+
14
+ Huh? That's not much use!
15
+
16
+ Remember, each instance of a trie is immutable. #put creates an efficient copy containing the modifications. So, let's try that again:
17
+
18
+ trie = Hamster::Trie.new
19
+
20
+ trie = trie.put("Name", "Simon")
21
+ trie.get("Name") # => "Simon"
22
+
23
+ The same goes for remove:
24
+
25
+ trie = Hamster::Trie.new
26
+
27
+ trie = trie.put("Name", "Simon")
28
+ trie = trie.put("Gender", "Male")
29
+ trie = trie.remove("Name")
30
+ trie.get("Name") # => nil
31
+ trie.get("Gender") # => "Male"
32
+
33
+ So tell me again why I care?
34
+
35
+ As mentioned earlier, HAMTs perform a copy whenever they are modified means that there is never a chance that two threads could be modifying the same instance at any one time. And the fact that they are very efficient copies means you don't need to worry about using up gobs of heap space.
36
+
37
+ So what's the downside?
38
+
39
+ The downside is that because the implementation is pure Ruby, MRI's built-in, native, hand-crafted C-code implementation of Hash is 10-times faster!
data/lib/hamster/entry.rb CHANGED
@@ -10,7 +10,7 @@ module Hamster
10
10
  end
11
11
 
12
12
  def has_key?(key)
13
- @key == key
13
+ @key.eql?(key)
14
14
  end
15
15
 
16
16
  end
data/lib/hamster/trie.rb CHANGED
@@ -4,10 +4,10 @@ module Hamster
4
4
 
5
5
  include Enumerable
6
6
 
7
- def initialize(significant_bits = 0)
7
+ def initialize(significant_bits = 0, entries = [], children = [])
8
8
  @significant_bits = significant_bits
9
- @entries = []
10
- @children = []
9
+ @entries = entries
10
+ @children = children
11
11
  end
12
12
 
13
13
  # Returns the number of key-value pairs in the trie.
@@ -37,26 +37,28 @@ module Hamster
37
37
  self
38
38
  end
39
39
 
40
- # Returns a copy of <tt>self</tt> with given value associated with the key.
40
+ # Returns a copy of <tt>self</tt> with the given value associated with the key.
41
41
  def put(key, value)
42
- dup.put!(key, value)
43
- end
44
-
45
- # Associates the given value with the key and returns <tt>self</tt>
46
- def put!(key, value)
47
42
  index = index_for(key)
48
43
  entry = @entries[index]
44
+
49
45
  if entry && !entry.has_key?(key)
50
- child = @children[index]
51
- @children[index] = if child
46
+ children = @children.dup
47
+ child = children[index]
48
+
49
+ children[index] = if child
52
50
  child.put(key, value)
53
51
  else
54
52
  self.class.new(@significant_bits + 5).put!(key, value)
55
53
  end
54
+
55
+ self.class.new(@significant_bits, @entries, children)
56
56
  else
57
- @entries[index] = Entry.new(key, value)
57
+ entries = @entries.dup
58
+ entries[index] = Entry.new(key, value)
59
+
60
+ self.class.new(@significant_bits, entries, @children)
58
61
  end
59
- self
60
62
  end
61
63
 
62
64
  # Retrieves the value corresponding to the given key. If not found, returns <tt>nil</tt>.
@@ -75,25 +77,38 @@ module Hamster
75
77
 
76
78
  # Returns a copy of <tt>self</tt> with the given key/value pair removed. If not found, returns <tt>self</tt>.
77
79
  def remove(key)
78
- has_key?(key) or return self
79
- dup.remove!(key)
80
+ index = index_for(key)
81
+ entry = @entries[index]
82
+ child = @children[index]
83
+ if entry && entry.has_key?(key)
84
+ # TODO: Probably should "pull up" a child entry
85
+ entries = @entries.dup
86
+ entries[index] = nil
87
+ self.class.new(@significant_bits, entries, @children)
88
+ elsif child
89
+ new_child = child.remove(key)
90
+ if new_child != child
91
+ # TODO: Probably should "prune" empty children
92
+ children = @children.dup
93
+ children[index] = new_child
94
+ self.class.new(@significant_bits, @entries, children)
95
+ end
96
+ end || self
80
97
  end
81
98
 
82
- # Removes the given key/value pair and returns <tt>self</tt>
83
- def remove!(key)
99
+ protected
100
+
101
+ def put!(key, value)
102
+ @entries[index_for(key)] = Entry.new(key, value)
84
103
  self
85
104
  end
86
105
 
87
106
  private
88
107
 
89
- def initialize_copy(other)
90
- @significant_bits = other.instance_eval{@significant_bits}
91
- @entries = other.instance_eval{@entries}.dup
92
- @children = other.instance_eval{@children}.dup
93
- end
94
-
95
108
  def index_for(key)
96
- (key.hash.abs >> @significant_bits) & 31
109
+ key.hash.abs & 31
110
+ # puts "#{key}##{key.object_id}:#{key.hash}"
111
+ # (key.hash.abs >> @significant_bits) & 31
97
112
  end
98
113
 
99
114
  end
@@ -1,5 +1,5 @@
1
1
  module Hamster
2
2
 
3
- VERSION = "0.1.0".freeze
3
+ VERSION = "0.1.1".freeze
4
4
 
5
5
  end
@@ -13,12 +13,6 @@ module Hamster
13
13
  end
14
14
  end
15
15
 
16
- it "returns values associated with existing keys" do
17
- @expected_pairs.each do |key, value|
18
- @trie.get(key).should == value
19
- end
20
- end
21
-
22
16
  it "is Enumerable" do
23
17
  Trie.is_a?(Enumerable)
24
18
  end
@@ -112,13 +106,13 @@ module Hamster
112
106
 
113
107
  describe "the original" do
114
108
 
115
- it "returns values associated with existing keys" do
109
+ it "still has the original key/value pairs" do
116
110
  @expected_pairs.each do |key, value|
117
111
  @trie.get(key).should == value
118
112
  end
119
113
  end
120
114
 
121
- it "has the original size" do
115
+ it "still has the original size" do
122
116
  @trie.size.should == @expected_pairs.size
123
117
  end
124
118
 
@@ -150,7 +144,7 @@ module Hamster
150
144
 
151
145
  describe "the original" do
152
146
 
153
- it "returns values associated with existing keys" do
147
+ it "still has the original key/value pairs" do
154
148
  @expected_pairs.each do |key, value|
155
149
  @trie.get(key).should == value
156
150
  end
@@ -160,7 +154,7 @@ module Hamster
160
154
  @trie.has_key?("missing").should be_false
161
155
  end
162
156
 
163
- it "has the original size" do
157
+ it "still has the original size" do
164
158
  @trie.size.should == @expected_pairs.size
165
159
  end
166
160
 
@@ -188,6 +182,13 @@ module Hamster
188
182
 
189
183
  describe "#remove" do
190
184
 
185
+ it "can be used successively to remove all key/value pairs" do
186
+ @expected_pairs.each do |key, value|
187
+ @trie = @trie.remove(key)
188
+ end
189
+ @trie.should be_empty
190
+ end
191
+
191
192
  describe "with existing keys" do
192
193
 
193
194
  before do
@@ -200,13 +201,13 @@ module Hamster
200
201
 
201
202
  describe "the original" do
202
203
 
203
- it "returns values associated with existing keys" do
204
+ it "still has the original key/value pairs" do
204
205
  @expected_pairs.each do |key, value|
205
206
  @trie.get(key).should == value
206
207
  end
207
208
  end
208
209
 
209
- it "has the original size" do
210
+ it "still has the original size" do
210
211
  @trie.size.should == @expected_pairs.size
211
212
  end
212
213
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hamster
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simon Harris
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-10-23 00:00:00 +11:00
12
+ date: 2009-10-24 00:00:00 +11:00
13
13
  default_executable:
14
14
  dependencies: []
15
15