trie-file 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8bc2e8641b2afc2c19a2e519a6af559efdf08a97
4
+ data.tar.gz: 85c0da8ef2d170d9588525adcccb6edad71ebad6
5
+ SHA512:
6
+ metadata.gz: 3bab7cec0c62f28a54fce93b5e8f9eabc05390fd67c077317e0477663bcbe5bc9aa0b4feb15c50528c2fdc241183b8e4290faeea6d79aed6d04e2b5019645891
7
+ data.tar.gz: 55ec9c2d427f8a399a4470905dfb1c78ac2a37d799af20531b5525128e4da0707f6256db5e05531e37aaff7dc2b3612a106009d35a1017479ad54239ddc93d72
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'pry-nav'
7
+ gem 'rake'
8
+ end
9
+
10
+ group :test do
11
+ gem 'rspec'
12
+ gem 'rr'
13
+ end
@@ -0,0 +1,3 @@
1
+ == 1.0.0
2
+
3
+ * Birthday!
@@ -0,0 +1,77 @@
1
+ trie-file
2
+ =========
3
+
4
+ Memory-efficient cached trie and trie storage.
5
+
6
+ ## Installation
7
+
8
+ `gem install trie-file`
9
+
10
+ Then, somewhere in your code:
11
+
12
+ ```ruby
13
+ require 'trie-file'
14
+ ```
15
+
16
+ ## Rationale
17
+
18
+ trie-file contains two things: an implementation of the [trie data structure](http://en.wikipedia.org/wiki/Trie), and a way to write them to disk and read them back again. It tries (ha!) to do this in a memory-efficient way by packing the trie structure in a specialized binary form. This special packing method means the trie can be searched entirely _on disk_ without needing to load the whole structure into memory (linear time). Each key you look up is cached so subsequent accesses are even faster (constant time). trie-file is also capable of reading and writing entire trie structures.
19
+
20
+ Because tries (also known as prefix trees) rely on keys having common prefixes, you're required to use string keys. There are no type restrictions on values.
21
+
22
+ ## What's a Trie?
23
+
24
+ For an in-depth explanation, see the Wikipedia link above. Essentially tries are key-value data structures that work similar to Ruby hashes. You add a key and a value to the trie and can later retrieve the value using the same key.
25
+
26
+ ## Basic Usage
27
+
28
+ Create a trie and write it to disk:
29
+
30
+ ```ruby
31
+ trie = TrieFile::Trie.new
32
+ trie.add('foo', 'bar')
33
+
34
+ TrieFile::File.open('/path/to/file', 'wb') do |f|
35
+ f.write_trie(trie)
36
+ end
37
+ ```
38
+
39
+ Open a file handle to a trie and search it _on disk_:
40
+
41
+ ```ruby
42
+ trie_file = TrieFile::File.open('/path/to/file', 'rb')
43
+ trie_file.find('foo') # => 'bar'
44
+ ```
45
+
46
+ To read an entire trie, use the `#read` method instead of `#open`:
47
+
48
+ ```ruby
49
+ trie = TrieFile::File.read('/path/to/file')
50
+ ```
51
+
52
+ ## Choosing a Hash Method
53
+
54
+ By default, trie-file does not hash your keys. Instead, it iterates over each character in the key and constructs the internal trie structure. trie-file also supports hashing keys with the md5 or sha1 algorithms to minimize your search space:
55
+
56
+ ```ruby
57
+ trie = TrieFile::Trie.new(nil, :sha1)
58
+ ```
59
+
60
+ If you wrote a trie to disk that was hashed using sha1, you'll need to supply an additional argument to `#open` and `#read`:
61
+
62
+ ```ruby
63
+ trie_file = TrieFile::File.open('/path/to/file', 'rb', :sha1)
64
+ trie = TrieFile::File.read('/path/to/file', :sha1)
65
+ ```
66
+
67
+ ## Requirements
68
+
69
+ No external requirements.
70
+
71
+ ## Running Tests
72
+
73
+ `bundle exec rspec` should do the trick :)
74
+
75
+ ## Authors
76
+
77
+ * Cameron C. Dutro: http://github.com/camertron
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
4
+
5
+ require 'bundler'
6
+ require 'rspec/core/rake_task'
7
+ require 'rubygems/package_task'
8
+
9
+ require './lib/trie-file'
10
+
11
+ Bundler::GemHelper.install_tasks
12
+
13
+ task :default => :spec
14
+
15
+ desc 'Run specs'
16
+ RSpec::Core::RakeTask.new do |t|
17
+ t.pattern = './spec/**/*_spec.rb'
18
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'trie-file/trie'
4
+ require 'trie-file/node'
5
+ require 'trie-file/file'
@@ -0,0 +1,232 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'thread'
4
+
5
+ # root:
6
+ # 2b value length
7
+ # nb value
8
+ # 2b (number of children)
9
+ # children metadata:
10
+ # 3b letter
11
+ # 3b child location
12
+ # children:
13
+ # node:
14
+ # 2b value length
15
+ # ...
16
+
17
+ module TrieFile
18
+ class File
19
+ attr_reader :handle, :hash_mode
20
+
21
+ def self.open(path, mode, hash_mode = :none)
22
+ handle = ::File.open(path, mode)
23
+
24
+ unless handle.binmode?
25
+ raise ArgumentError, 'TrieFile must be opened in binary mode.'
26
+ end
27
+
28
+ file = new(handle, hash_mode)
29
+
30
+ if block_given?
31
+ yield file
32
+ handle.close
33
+ end
34
+
35
+ file
36
+ end
37
+
38
+ def self.read(path, hash_mode = :none)
39
+ root = nil
40
+
41
+ ::File.open(path, 'rb') do |io|
42
+ root = read_node(io)
43
+ end
44
+
45
+ Trie.new(root, hash_mode)
46
+ end
47
+
48
+ def initialize(handle, hash_mode)
49
+ @handle = handle
50
+ @semaphore = Mutex.new
51
+ @hash_mode = hash_mode
52
+ end
53
+
54
+ def write_trie(trie)
55
+ mark(trie)
56
+ self.class.write_node(trie.root, handle)
57
+ end
58
+
59
+ def find(key)
60
+ if closed?
61
+ raise IOError, 'file is not currently open.'
62
+ end
63
+
64
+ @semaphore.synchronize do
65
+ key = hash_key(key)
66
+ cache.fetch(key) do
67
+ handle.seek(0, IO::SEEK_SET)
68
+ value = nil
69
+
70
+ key.each_char do |char|
71
+ value, child_metadata = self.class.read_node_header(handle)
72
+ metadata = child_metadata.find do |data|
73
+ data.first == char
74
+ end
75
+
76
+ return nil unless metadata
77
+ handle.seek(metadata.last, IO::SEEK_SET)
78
+ end
79
+
80
+ value = self.class.read_value(handle)
81
+ cache[key] = value
82
+ value
83
+ end
84
+ end
85
+ end
86
+
87
+ def closed?
88
+ @handle.closed?
89
+ end
90
+
91
+ def close
92
+ handle.close
93
+ end
94
+
95
+ private
96
+
97
+ BYTE_LENGTH = 8
98
+ LETTER_FIELD_LENGTH = 3
99
+ POSITION_FIELD_LENGTH = 3
100
+ POSITION_MAX = 2 ** (BYTE_LENGTH * POSITION_FIELD_LENGTH)
101
+ VALUE_FIELD_LENGTH = 2
102
+ CHILD_COUNT_FIELD_LENGTH = 2
103
+
104
+ def hash_key(key)
105
+ Trie.hash_key(key, hash_mode)
106
+ end
107
+
108
+ def cache
109
+ @cache ||= {}
110
+ end
111
+
112
+ def mark(trie)
113
+ mark_node(trie.root, 0)
114
+ end
115
+
116
+ def mark_node(node, byte_pos)
117
+ node.byte_pos = byte_pos
118
+ total_child_size = 0
119
+ node.children.each_pair do |letter, child|
120
+ offset = mark_node(child, byte_pos + node.bytesize + total_child_size)
121
+ total_child_size += child.bytesize + offset
122
+ end
123
+ total_child_size
124
+ end
125
+
126
+ def self.read_value(io)
127
+ # 2b value length
128
+ value_bytesize = read_int(io, VALUE_FIELD_LENGTH)
129
+
130
+ # nb value
131
+ value = io.read(value_bytesize)
132
+ end
133
+
134
+ def self.read_node_header(io)
135
+ value = read_value(io)
136
+
137
+ # 2b number of children
138
+ number_of_children = read_int(io, CHILD_COUNT_FIELD_LENGTH)
139
+
140
+ child_metadata = number_of_children.times.map do
141
+ # 2b letter
142
+ letter = read_bytes(io, LETTER_FIELD_LENGTH)
143
+
144
+ # 3b child location
145
+ child_pos = read_int(io, POSITION_FIELD_LENGTH)
146
+ [letter, child_pos]
147
+ end
148
+
149
+ [value, child_metadata]
150
+ end
151
+
152
+ def self.read_node(io)
153
+ value, child_metadata = read_node_header(io)
154
+ node = Node.new(value)
155
+
156
+ child_metadata.each do |metadata|
157
+ node.add_child(
158
+ metadata.first,
159
+ read_node(io)
160
+ )
161
+ end
162
+
163
+ node
164
+ end
165
+
166
+ def self.write_node(node, io)
167
+ # 2b value length
168
+ write_int(io, node.value_bytesize, VALUE_FIELD_LENGTH)
169
+
170
+ # nb value
171
+ write_bytes(io, node.value_bytes.to_a)
172
+
173
+ # 2b number of children
174
+ write_int(io, node.children.size, CHILD_COUNT_FIELD_LENGTH)
175
+
176
+ # children
177
+ node.children.each_pair do |letter, child_node|
178
+ # 2b letter
179
+ if letter.bytesize > LETTER_FIELD_LENGTH
180
+ raise "Letter #{letter} is larger than #{LETTER_FIELD_LENGTH} bytes."
181
+ else
182
+ write_bytes(io, letter.bytes.to_a, LETTER_FIELD_LENGTH)
183
+ end
184
+
185
+ # 3b child location
186
+ if child_node.byte_pos > POSITION_MAX
187
+ raise "Encountered write position greater than #{POSITION_FIELD_LENGTH} bytes."
188
+ else
189
+ write_int(io, child_node.byte_pos, POSITION_FIELD_LENGTH)
190
+ end
191
+ end
192
+
193
+ node.children.each_pair do |letter, child_node|
194
+ write_node(child_node, io)
195
+ end
196
+ end
197
+
198
+ def self.write_int(io, int, bytesize = int_bytesize(int))
199
+ actual_bytesize = int_bytesize(int)
200
+ (bytesize - actual_bytesize).times { io.putc("\0") }
201
+
202
+ actual_bytesize.times do |i|
203
+ # putc always writes the LSB if given a multibyte arg
204
+ io.putc(int >> ((actual_bytesize - i - 1) * BYTE_LENGTH))
205
+ end
206
+ end
207
+
208
+ def self.int_bytesize(int)
209
+ return 0 if int == 0
210
+ (Math.log2(int) / BYTE_LENGTH).to_i + 1
211
+ end
212
+
213
+ def self.write_bytes(io, bytes, bytesize = bytes.size)
214
+ (bytesize - bytes.size).times { io.putc("\0") }
215
+ bytes.each { |byte| io.putc(byte) }
216
+ end
217
+
218
+ def self.read_int(io, bytesize)
219
+ (bytesize - 1).downto(0).inject(0) do |sum, i|
220
+ sum + (io.readbyte << (i * BYTE_LENGTH))
221
+ end
222
+ end
223
+
224
+ def self.read_bytes(io, bytesize)
225
+ # remove leading zero bytes
226
+ bytes = bytesize.times.map { io.readbyte }
227
+ return [0] if bytes.all? { |byte| byte == 0 }
228
+ idx = bytes.find_index { |byte| byte != 0 }
229
+ bytes[idx..-1].pack("U*")
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: UTF-8
2
+
3
+ module TrieFile
4
+ class Node
5
+ CHILD_FIELDS_LENGTH = 6
6
+ HEADER_FIELD_LENGTH = 4
7
+
8
+ attr_reader :children
9
+ attr_accessor :value, :byte_pos
10
+
11
+ def initialize(value = nil)
12
+ @value = value
13
+ @children = {}
14
+ @byte_pos = 0
15
+ end
16
+
17
+ def has_child?(char)
18
+ children.include?(char)
19
+ end
20
+
21
+ def child_at(char)
22
+ children[char]
23
+ end
24
+
25
+ def add_child(char, node)
26
+ @children[char] = node
27
+ end
28
+
29
+ def bytesize
30
+ # add some constants here
31
+ HEADER_FIELD_LENGTH + (children.size * CHILD_FIELDS_LENGTH) + value_bytesize
32
+ end
33
+
34
+ def value_bytesize
35
+ value ? value.bytesize : 0
36
+ end
37
+
38
+ def value_bytes
39
+ value ? value.bytes : []
40
+ end
41
+ end
42
+ end
File without changes
@@ -0,0 +1,54 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'digest/md5'
4
+ require 'digest/sha1'
5
+
6
+ module TrieFile
7
+ class Trie
8
+ attr_reader :root, :hash_mode
9
+
10
+ def initialize(root = nil, hash_mode = :none)
11
+ @root = root || Node.new
12
+ @hash_mode = hash_mode
13
+ end
14
+
15
+ def add(str, value)
16
+ node = root
17
+ key = hash_key(str)
18
+
19
+ key.each_char do |char|
20
+ if node.has_child?(char)
21
+ node = node.child_at(char)
22
+ else
23
+ node = node.add_child(char, Node.new)
24
+ end
25
+ end
26
+
27
+ node.value = value
28
+ end
29
+
30
+ def find(key)
31
+ node = root
32
+ hash_key(key).each_char do |char|
33
+ node = node.child_at(char)
34
+ return nil unless node
35
+ end
36
+ node.value
37
+ end
38
+
39
+ def hash_key(key)
40
+ self.class.hash_key(key, hash_mode)
41
+ end
42
+
43
+ def self.hash_key(key, hash_mode)
44
+ case hash_mode
45
+ when :md5
46
+ Digest::MD5.hexdigest(key)
47
+ when :sha1
48
+ Digest::SHA1.hexdigest(key)
49
+ else
50
+ key
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: UTF-8
2
+
3
+ module TrieFile
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,154 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TrieFile::File do
6
+ def file
7
+ TrieFile::File
8
+ end
9
+
10
+ def trie
11
+ TrieFile::Trie
12
+ end
13
+
14
+ let(:tmpdir) { './' }
15
+ let(:tmpfile) { File.join(tmpdir, 'test.txt') }
16
+
17
+ after(:each) do
18
+ File.unlink(tmpfile) if File.exist?(tmpfile)
19
+ end
20
+
21
+ describe 'self#open' do
22
+ it "raises an exception if the file isn't opened in binary mode" do
23
+ proc = lambda { file.open(tmpfile, 'w') }
24
+ expect(proc).to raise_error(ArgumentError, 'TrieFile must be opened in binary mode.')
25
+ end
26
+
27
+ it 'yields the file when a block is given and closes then returns it afterwards' do
28
+ f = file.open(tmpfile, 'wb') do |f|
29
+ expect(f).to be_a(file)
30
+ expect(f).to respond_to(:write_trie)
31
+ expect(f).to_not be_closed
32
+ end
33
+
34
+ expect(f).to be_a(file)
35
+ expect(f).to respond_to(:write_trie)
36
+ expect(f).to be_closed
37
+ end
38
+
39
+ it 'returns the open file when a block is not given' do
40
+ file.open(tmpfile, 'wb').tap do |f|
41
+ expect(f).to be_a(file)
42
+ expect(f).to_not be_closed
43
+ end
44
+ end
45
+
46
+ it 'uses the given hash mode when passed' do
47
+ file.open(tmpfile, 'wb') do |f|
48
+ f.write_trie(trie.new(nil, :md5).tap { |t| t.add('foo', 'bar') })
49
+ end
50
+
51
+ f = file.open(tmpfile, 'rb', :md5)
52
+ expect(f.find('foo')).to eq('bar')
53
+ f.close
54
+ end
55
+ end
56
+
57
+ describe 'self#read' do
58
+ let(:bytes) do
59
+ [
60
+ 0, 0, 0, 1, 0, 0, 102, 0, 0, 10, 0, 0, 0, 1, 0, 0, 111, 0, 0,
61
+ 20, 0, 0, 0, 1, 0, 0, 111, 0, 0, 30, 0, 3, 98, 97, 114, 0, 0
62
+ ]
63
+ end
64
+
65
+ let(:sha1_bytes) do
66
+ [
67
+ 0, 0, 0, 1, 0, 0, 48, 0, 0, 10, 0, 0, 0, 1, 0, 0, 98, 0, 0, 20,
68
+ 0, 0, 0, 1, 0, 0, 101, 0, 0, 30, 0, 0, 0, 1, 0, 0, 101, 0, 0, 40,
69
+ 0, 0, 0, 1, 0, 0, 99, 0, 0, 50, 0, 0, 0, 1, 0, 0, 55, 0, 0, 60,
70
+ 0, 0, 0, 1, 0, 0, 98, 0, 0, 70, 0, 0, 0, 1, 0, 0, 53, 0, 0, 80,
71
+ 0, 0, 0, 1, 0, 0, 101, 0, 0, 90, 0, 0, 0, 1, 0, 0, 97, 0, 0, 100,
72
+ 0, 0, 0, 1, 0, 0, 51, 0, 0, 110, 0, 0, 0, 1, 0, 0, 102, 0, 0, 120,
73
+ 0, 0, 0, 1, 0, 0, 48, 0, 0, 130, 0, 0, 0, 1, 0, 0, 102, 0, 0, 140,
74
+ 0, 0, 0, 1, 0, 0, 100, 0, 0, 150, 0, 0, 0, 1, 0, 0, 98, 0, 0, 160,
75
+ 0, 0, 0, 1, 0, 0, 99, 0, 0, 170, 0, 0, 0, 1, 0, 0, 57, 0, 0, 180,
76
+ 0, 0, 0, 1, 0, 0, 53, 0, 0, 190, 0, 0, 0, 1, 0, 0, 100, 0, 0, 200,
77
+ 0, 0, 0, 1, 0, 0, 48, 0, 0, 210, 0, 0, 0, 1, 0, 0, 100, 0, 0, 220,
78
+ 0, 0, 0, 1, 0, 0, 100, 0, 0, 230, 0, 0, 0, 1, 0, 0, 52, 0, 0, 240,
79
+ 0, 0, 0, 1, 0, 0, 55, 0, 0, 250, 0, 0, 0, 1, 0, 0, 102, 0, 1, 4, 0,
80
+ 0, 0, 1, 0, 0, 51, 0, 1, 14, 0, 0, 0, 1, 0, 0, 99, 0, 1, 24, 0, 0,
81
+ 0, 1, 0, 0, 53, 0, 1, 34, 0, 0, 0, 1, 0, 0, 98, 0, 1, 44, 0, 0, 0,
82
+ 1, 0, 0, 99, 0, 1, 54, 0, 0, 0, 1, 0, 0, 50, 0, 1, 64, 0, 0, 0, 1,
83
+ 0, 0, 55, 0, 1, 74, 0, 0, 0, 1, 0, 0, 53, 0, 1, 84, 0, 0, 0, 1, 0,
84
+ 0, 100, 0, 1, 94, 0, 0, 0, 1, 0, 0, 97, 0, 1, 104, 0, 0, 0, 1, 0,
85
+ 0, 56, 0, 1, 114, 0, 0, 0, 1, 0, 0, 97, 0, 1, 124, 0, 0, 0, 1, 0,
86
+ 0, 51, 0, 1, 134, 0, 0, 0, 1, 0, 0, 51, 0, 1, 144, 0, 3, 98, 97,
87
+ 114, 0, 0
88
+ ]
89
+ end
90
+
91
+ it 'reads a trie from disk' do
92
+ File.open(tmpfile, 'wb') do |f|
93
+ bytes.each { |byte| f.putc(byte) }
94
+ end
95
+
96
+ t = file.read(tmpfile)
97
+ check_trie(t.root, 'foo', 'bar')
98
+ expect(t.find('foo')).to eq('bar')
99
+ end
100
+
101
+ it 'reads a sha1-hashed trie from disk' do
102
+ File.open(tmpfile, 'wb') do |f|
103
+ sha1_bytes.each { |byte| f.putc(byte) }
104
+ end
105
+
106
+ t = file.read(tmpfile)
107
+ expect(t.find('foo')).to be_nil
108
+
109
+ t = file.read(tmpfile, :sha1)
110
+ check_trie(t.root, Digest::SHA1.hexdigest('foo'), 'bar')
111
+ expect(t.find('foo')).to eq('bar')
112
+ end
113
+ end
114
+
115
+ describe '#write_trie' do
116
+ it 'should write the trie to disk' do
117
+ file.open(tmpfile, 'wb') do |f|
118
+ f.write_trie(trie.new.tap { |t| t.add('foo', 'bar') })
119
+ end
120
+
121
+ t = file.read(tmpfile)
122
+ check_trie(t.root, 'foo', 'bar')
123
+ expect(t.find('foo')).to eq('bar')
124
+ end
125
+
126
+ it 'uses the given hash mode when passed' do
127
+ file.open(tmpfile, 'wb') do |f|
128
+ f.write_trie(trie.new(nil, :md5).tap { |t| t.add('foo', 'bar') })
129
+ end
130
+
131
+ t = file.read(tmpfile)
132
+ check_trie(t.root, Digest::MD5.hexdigest('foo'), 'bar')
133
+ end
134
+ end
135
+
136
+ describe '#find' do
137
+ it 'should traverse the file on disk and find the value' do
138
+ file.open(tmpfile, 'wb') do |f|
139
+ f.write_trie(trie.new.tap { |t| t.add('foo', 'bar') })
140
+ end
141
+
142
+ # notice we're calling 'open' instead of 'read'
143
+ f = file.open(tmpfile, 'rb')
144
+ expect(f.find('foo')).to eq('bar')
145
+ f.close
146
+ end
147
+
148
+ it 'raises an error if the file is already closed, eg. if open is called with a block' do
149
+ File.open(tmpfile, 'w+') { |f| f.write('test') }
150
+ f = file.open(tmpfile, 'rb') {}
151
+ expect(lambda { f.find('foo') }).to raise_error(IOError, 'file is not currently open.')
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,98 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TrieFile::Node do
6
+ def create_node(value = nil)
7
+ node = TrieFile::Node.new(value)
8
+ yield node if block_given?
9
+ node
10
+ end
11
+
12
+ def header_field_length
13
+ TrieFile::Node::HEADER_FIELD_LENGTH
14
+ end
15
+
16
+ def child_fields_length
17
+ TrieFile::Node::CHILD_FIELDS_LENGTH
18
+ end
19
+
20
+ describe '#has_child?' do
21
+ it 'returns true if the node contains the child, false otherwise' do
22
+ node = create_node do |node|
23
+ node.add_child('a', create_node('foo'))
24
+ end
25
+
26
+ expect(node.has_child?('a')).to be(true)
27
+ expect(node.has_child?('b')).to be(false)
28
+ end
29
+ end
30
+
31
+ describe '#child_at' do
32
+ it 'returns the child at the given letter, nil otherwise' do
33
+ child = create_node('foo')
34
+ node = create_node do |node|
35
+ node.add_child('a', child)
36
+ end
37
+
38
+ expect(node.child_at('a')).to be(child)
39
+ expect(node.child_at('b')).to be(nil)
40
+ end
41
+ end
42
+
43
+ describe '#add_child' do
44
+ it 'should add the child at the given letter' do
45
+ node = create_node
46
+ node.add_child('a', create_node('foo'))
47
+ expect(node.children).to include('a')
48
+ expect(node.children['a'].value).to eq('foo')
49
+ end
50
+ end
51
+
52
+ describe '#bytesize' do
53
+ it 'when no children and no value, returns just the header size' do
54
+ expect(create_node.bytesize).to eq(header_field_length)
55
+ end
56
+
57
+ it 'when no children and a value, returns the header size plus the size of the value' do
58
+ expect(create_node('foo').bytesize).to eq(header_field_length + 3)
59
+ end
60
+
61
+ it 'when a child and a value, returns the header size plus the size of the children plus the size of the value' do
62
+ expect(
63
+ create_node('foo') do |node|
64
+ node.add_child('a', create_node('foo'))
65
+ end.bytesize
66
+ ).to eq(header_field_length + 3 + child_fields_length)
67
+ end
68
+
69
+ it 'when multiple children and a value, returns the header size plus the size of the children plus the size of the value' do
70
+ expect(
71
+ create_node('foo') do |node|
72
+ node.add_child('a', create_node('foo'))
73
+ node.add_child('b', create_node('bar'))
74
+ end.bytesize
75
+ ).to eq(header_field_length + 3 + child_fields_length * 2)
76
+ end
77
+ end
78
+
79
+ describe '#value_bytesize' do
80
+ it 'returns the number of bytes in the value' do
81
+ expect(create_node('foo').value_bytesize).to eq(3)
82
+ end
83
+
84
+ it 'returns zero if the value is nil' do
85
+ expect(create_node.value_bytesize).to eq(0)
86
+ end
87
+ end
88
+
89
+ describe '#value_bytes' do
90
+ it 'returns an enumerator of the bytes in the value' do
91
+ expect(create_node('foo').value_bytes.to_a).to eq([102, 111, 111])
92
+ end
93
+
94
+ it 'returns an empty array if the value is nil' do
95
+ expect(create_node.value_bytes).to eq([])
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,19 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'rspec'
4
+ require 'trie-file'
5
+ require 'pry-nav'
6
+
7
+ RSpec.configure do |config|
8
+ config.mock_with :rr
9
+ end
10
+
11
+ def check_trie(root, key, val)
12
+ node = root
13
+ key.each_char do |char|
14
+ expect(node.children.size).to eq(1)
15
+ expect(node.children).to include(char)
16
+ node = node.children[char]
17
+ end
18
+ expect(node.value).to eq(val)
19
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TrieFile::Trie do
6
+ def trie
7
+ TrieFile::Trie
8
+ end
9
+
10
+ describe '#add' do
11
+ it 'should add the item' do
12
+ trie.new.tap do |t|
13
+ t.add('foo', 'bar')
14
+ check_trie(t.root, 'foo', 'bar')
15
+ end
16
+ end
17
+
18
+ it 'should hash the key with md5 if asked' do
19
+ trie.new(nil, :md5).tap do |t|
20
+ t.add('foo', 'bar')
21
+ check_trie(
22
+ t.root, Digest::MD5.hexdigest('foo'), 'bar'
23
+ )
24
+ end
25
+ end
26
+
27
+ it 'should hash the key with sha1 if asked' do
28
+ trie.new(nil, :sha1).tap do |t|
29
+ t.add('foo', 'bar')
30
+ check_trie(
31
+ t.root, Digest::SHA1.hexdigest('foo'), 'bar'
32
+ )
33
+ end
34
+ end
35
+ end
36
+
37
+ describe '#find' do
38
+ it 'should be able to find the item' do
39
+ trie.new.tap do |t|
40
+ t.add('foo', 'bar')
41
+ expect(t.find('foo')).to eq('bar')
42
+ end
43
+ end
44
+
45
+ it 'should be able to find the item using the md5 hash mode' do
46
+ trie.new(nil, :md5).tap do |t|
47
+ t.add('foo', 'bar')
48
+ expect(t.find('foo')).to eq('bar')
49
+ end
50
+ end
51
+
52
+ it 'should be able to find the item using the sha1 hash mode' do
53
+ trie.new(nil, :sha1).tap do |t|
54
+ t.add('foo', 'bar')
55
+ expect(t.find('foo')).to eq('bar')
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,18 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), 'lib')
2
+ require 'trie-file/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "trie-file"
6
+ s.version = ::TrieFile::VERSION
7
+ s.authors = ["Cameron Dutro"]
8
+ s.email = ["camertron@gmail.com"]
9
+ s.homepage = "http://github.com/camertron"
10
+
11
+ s.description = s.summary = "Memory-efficient cached trie and trie storage."
12
+
13
+ s.platform = Gem::Platform::RUBY
14
+ s.has_rdoc = true
15
+
16
+ s.require_path = 'lib'
17
+ s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "trie-file.gemspec"]
18
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: trie-file
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Cameron Dutro
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Memory-efficient cached trie and trie storage.
14
+ email:
15
+ - camertron@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - Gemfile
21
+ - History.txt
22
+ - README.md
23
+ - Rakefile
24
+ - lib/trie-file.rb
25
+ - lib/trie-file/file.rb
26
+ - lib/trie-file/node.rb
27
+ - lib/trie-file/trie-file.rb
28
+ - lib/trie-file/trie.rb
29
+ - lib/trie-file/version.rb
30
+ - spec/file_spec.rb
31
+ - spec/node_spec.rb
32
+ - spec/spec_helper.rb
33
+ - spec/trie_spec.rb
34
+ - trie-file.gemspec
35
+ homepage: http://github.com/camertron
36
+ licenses: []
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.2.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Memory-efficient cached trie and trie storage.
58
+ test_files: []