trie-file 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 8bc2e8641b2afc2c19a2e519a6af559efdf08a97
4
+ data.tar.gz: 85c0da8ef2d170d9588525adcccb6edad71ebad6
5
+ SHA512:
6
+ metadata.gz: 3bab7cec0c62f28a54fce93b5e8f9eabc05390fd67c077317e0477663bcbe5bc9aa0b4feb15c50528c2fdc241183b8e4290faeea6d79aed6d04e2b5019645891
7
+ data.tar.gz: 55ec9c2d427f8a399a4470905dfb1c78ac2a37d799af20531b5525128e4da0707f6256db5e05531e37aaff7dc2b3612a106009d35a1017479ad54239ddc93d72
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
4
+
5
+ group :development, :test do
6
+ gem 'pry-nav'
7
+ gem 'rake'
8
+ end
9
+
10
+ group :test do
11
+ gem 'rspec'
12
+ gem 'rr'
13
+ end
@@ -0,0 +1,3 @@
1
+ == 1.0.0
2
+
3
+ * Birthday!
@@ -0,0 +1,77 @@
1
+ trie-file
2
+ =========
3
+
4
+ Memory-efficient cached trie and trie storage.
5
+
6
+ ## Installation
7
+
8
+ `gem install trie-file`
9
+
10
+ Then, somewhere in your code:
11
+
12
+ ```ruby
13
+ require 'trie-file'
14
+ ```
15
+
16
+ ## Rationale
17
+
18
+ trie-file contains two things: an implementation of the [trie data structure](http://en.wikipedia.org/wiki/Trie), and a way to write them to disk and read them back again. It tries (ha!) to do this in a memory-efficient way by packing the trie structure in a specialized binary form. This special packing method means the trie can be searched entirely _on disk_ without needing to load the whole structure into memory (linear time). Each key you look up is cached so subsequent accesses are even faster (constant time). trie-file is also capable of reading and writing entire trie structures.
19
+
20
+ Because tries (also known as prefix trees) rely on keys having common prefixes, you're required to use string keys. There are no type restrictions on values.
21
+
22
+ ## What's a Trie?
23
+
24
+ For an in-depth explanation, see the Wikipedia link above. Essentially tries are key-value data structures that work similar to Ruby hashes. You add a key and a value to the trie and can later retrieve the value using the same key.
25
+
26
+ ## Basic Usage
27
+
28
+ Create a trie and write it to disk:
29
+
30
+ ```ruby
31
+ trie = TrieFile::Trie.new
32
+ trie.add('foo', 'bar')
33
+
34
+ TrieFile::File.open('/path/to/file', 'wb') do |f|
35
+ f.write_trie(trie)
36
+ end
37
+ ```
38
+
39
+ Open a file handle to a trie and search it _on disk_:
40
+
41
+ ```ruby
42
+ trie_file = TrieFile::File.open('/path/to/file', 'rb')
43
+ trie_file.find('foo') # => 'bar'
44
+ ```
45
+
46
+ To read an entire trie, use the `#read` method instead of `#open`:
47
+
48
+ ```ruby
49
+ trie = TrieFile::File.read('/path/to/file')
50
+ ```
51
+
52
+ ## Choosing a Hash Method
53
+
54
+ By default, trie-file does not hash your keys. Instead, it iterates over each character in the key and constructs the internal trie structure. trie-file also supports hashing keys with the md5 or sha1 algorithms to minimize your search space:
55
+
56
+ ```ruby
57
+ trie = TrieFile::Trie.new(nil, :sha1)
58
+ ```
59
+
60
+ If you wrote a trie to disk that was hashed using sha1, you'll need to supply an additional argument to `#open` and `#read`:
61
+
62
+ ```ruby
63
+ trie_file = TrieFile::File.open('/path/to/file', 'rb', :sha1)
64
+ trie = TrieFile::File.read('/path/to/file', :sha1)
65
+ ```
66
+
67
+ ## Requirements
68
+
69
+ No external requirements.
70
+
71
+ ## Running Tests
72
+
73
+ `bundle exec rspec` should do the trick :)
74
+
75
+ ## Authors
76
+
77
+ * Cameron C. Dutro: http://github.com/camertron
@@ -0,0 +1,18 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'rubygems' unless ENV['NO_RUBYGEMS']
4
+
5
+ require 'bundler'
6
+ require 'rspec/core/rake_task'
7
+ require 'rubygems/package_task'
8
+
9
+ require './lib/trie-file'
10
+
11
+ Bundler::GemHelper.install_tasks
12
+
13
+ task :default => :spec
14
+
15
+ desc 'Run specs'
16
+ RSpec::Core::RakeTask.new do |t|
17
+ t.pattern = './spec/**/*_spec.rb'
18
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'trie-file/trie'
4
+ require 'trie-file/node'
5
+ require 'trie-file/file'
@@ -0,0 +1,232 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'thread'
4
+
5
+ # root:
6
+ # 2b value length
7
+ # nb value
8
+ # 2b (number of children)
9
+ # children metadata:
10
+ # 3b letter
11
+ # 3b child location
12
+ # children:
13
+ # node:
14
+ # 2b value length
15
+ # ...
16
+
17
+ module TrieFile
18
+ class File
19
+ attr_reader :handle, :hash_mode
20
+
21
+ def self.open(path, mode, hash_mode = :none)
22
+ handle = ::File.open(path, mode)
23
+
24
+ unless handle.binmode?
25
+ raise ArgumentError, 'TrieFile must be opened in binary mode.'
26
+ end
27
+
28
+ file = new(handle, hash_mode)
29
+
30
+ if block_given?
31
+ yield file
32
+ handle.close
33
+ end
34
+
35
+ file
36
+ end
37
+
38
+ def self.read(path, hash_mode = :none)
39
+ root = nil
40
+
41
+ ::File.open(path, 'rb') do |io|
42
+ root = read_node(io)
43
+ end
44
+
45
+ Trie.new(root, hash_mode)
46
+ end
47
+
48
+ def initialize(handle, hash_mode)
49
+ @handle = handle
50
+ @semaphore = Mutex.new
51
+ @hash_mode = hash_mode
52
+ end
53
+
54
+ def write_trie(trie)
55
+ mark(trie)
56
+ self.class.write_node(trie.root, handle)
57
+ end
58
+
59
+ def find(key)
60
+ if closed?
61
+ raise IOError, 'file is not currently open.'
62
+ end
63
+
64
+ @semaphore.synchronize do
65
+ key = hash_key(key)
66
+ cache.fetch(key) do
67
+ handle.seek(0, IO::SEEK_SET)
68
+ value = nil
69
+
70
+ key.each_char do |char|
71
+ value, child_metadata = self.class.read_node_header(handle)
72
+ metadata = child_metadata.find do |data|
73
+ data.first == char
74
+ end
75
+
76
+ return nil unless metadata
77
+ handle.seek(metadata.last, IO::SEEK_SET)
78
+ end
79
+
80
+ value = self.class.read_value(handle)
81
+ cache[key] = value
82
+ value
83
+ end
84
+ end
85
+ end
86
+
87
+ def closed?
88
+ @handle.closed?
89
+ end
90
+
91
+ def close
92
+ handle.close
93
+ end
94
+
95
+ private
96
+
97
+ BYTE_LENGTH = 8
98
+ LETTER_FIELD_LENGTH = 3
99
+ POSITION_FIELD_LENGTH = 3
100
+ POSITION_MAX = 2 ** (BYTE_LENGTH * POSITION_FIELD_LENGTH)
101
+ VALUE_FIELD_LENGTH = 2
102
+ CHILD_COUNT_FIELD_LENGTH = 2
103
+
104
+ def hash_key(key)
105
+ Trie.hash_key(key, hash_mode)
106
+ end
107
+
108
+ def cache
109
+ @cache ||= {}
110
+ end
111
+
112
+ def mark(trie)
113
+ mark_node(trie.root, 0)
114
+ end
115
+
116
+ def mark_node(node, byte_pos)
117
+ node.byte_pos = byte_pos
118
+ total_child_size = 0
119
+ node.children.each_pair do |letter, child|
120
+ offset = mark_node(child, byte_pos + node.bytesize + total_child_size)
121
+ total_child_size += child.bytesize + offset
122
+ end
123
+ total_child_size
124
+ end
125
+
126
+ def self.read_value(io)
127
+ # 2b value length
128
+ value_bytesize = read_int(io, VALUE_FIELD_LENGTH)
129
+
130
+ # nb value
131
+ value = io.read(value_bytesize)
132
+ end
133
+
134
+ def self.read_node_header(io)
135
+ value = read_value(io)
136
+
137
+ # 2b number of children
138
+ number_of_children = read_int(io, CHILD_COUNT_FIELD_LENGTH)
139
+
140
+ child_metadata = number_of_children.times.map do
141
+ # 2b letter
142
+ letter = read_bytes(io, LETTER_FIELD_LENGTH)
143
+
144
+ # 3b child location
145
+ child_pos = read_int(io, POSITION_FIELD_LENGTH)
146
+ [letter, child_pos]
147
+ end
148
+
149
+ [value, child_metadata]
150
+ end
151
+
152
+ def self.read_node(io)
153
+ value, child_metadata = read_node_header(io)
154
+ node = Node.new(value)
155
+
156
+ child_metadata.each do |metadata|
157
+ node.add_child(
158
+ metadata.first,
159
+ read_node(io)
160
+ )
161
+ end
162
+
163
+ node
164
+ end
165
+
166
+ def self.write_node(node, io)
167
+ # 2b value length
168
+ write_int(io, node.value_bytesize, VALUE_FIELD_LENGTH)
169
+
170
+ # nb value
171
+ write_bytes(io, node.value_bytes.to_a)
172
+
173
+ # 2b number of children
174
+ write_int(io, node.children.size, CHILD_COUNT_FIELD_LENGTH)
175
+
176
+ # children
177
+ node.children.each_pair do |letter, child_node|
178
+ # 2b letter
179
+ if letter.bytesize > LETTER_FIELD_LENGTH
180
+ raise "Letter #{letter} is larger than #{LETTER_FIELD_LENGTH} bytes."
181
+ else
182
+ write_bytes(io, letter.bytes.to_a, LETTER_FIELD_LENGTH)
183
+ end
184
+
185
+ # 3b child location
186
+ if child_node.byte_pos > POSITION_MAX
187
+ raise "Encountered write position greater than #{POSITION_FIELD_LENGTH} bytes."
188
+ else
189
+ write_int(io, child_node.byte_pos, POSITION_FIELD_LENGTH)
190
+ end
191
+ end
192
+
193
+ node.children.each_pair do |letter, child_node|
194
+ write_node(child_node, io)
195
+ end
196
+ end
197
+
198
+ def self.write_int(io, int, bytesize = int_bytesize(int))
199
+ actual_bytesize = int_bytesize(int)
200
+ (bytesize - actual_bytesize).times { io.putc("\0") }
201
+
202
+ actual_bytesize.times do |i|
203
+ # putc always writes the LSB if given a multibyte arg
204
+ io.putc(int >> ((actual_bytesize - i - 1) * BYTE_LENGTH))
205
+ end
206
+ end
207
+
208
+ def self.int_bytesize(int)
209
+ return 0 if int == 0
210
+ (Math.log2(int) / BYTE_LENGTH).to_i + 1
211
+ end
212
+
213
+ def self.write_bytes(io, bytes, bytesize = bytes.size)
214
+ (bytesize - bytes.size).times { io.putc("\0") }
215
+ bytes.each { |byte| io.putc(byte) }
216
+ end
217
+
218
+ def self.read_int(io, bytesize)
219
+ (bytesize - 1).downto(0).inject(0) do |sum, i|
220
+ sum + (io.readbyte << (i * BYTE_LENGTH))
221
+ end
222
+ end
223
+
224
+ def self.read_bytes(io, bytesize)
225
+ # remove leading zero bytes
226
+ bytes = bytesize.times.map { io.readbyte }
227
+ return [0] if bytes.all? { |byte| byte == 0 }
228
+ idx = bytes.find_index { |byte| byte != 0 }
229
+ bytes[idx..-1].pack("U*")
230
+ end
231
+ end
232
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: UTF-8
2
+
3
+ module TrieFile
4
+ class Node
5
+ CHILD_FIELDS_LENGTH = 6
6
+ HEADER_FIELD_LENGTH = 4
7
+
8
+ attr_reader :children
9
+ attr_accessor :value, :byte_pos
10
+
11
+ def initialize(value = nil)
12
+ @value = value
13
+ @children = {}
14
+ @byte_pos = 0
15
+ end
16
+
17
+ def has_child?(char)
18
+ children.include?(char)
19
+ end
20
+
21
+ def child_at(char)
22
+ children[char]
23
+ end
24
+
25
+ def add_child(char, node)
26
+ @children[char] = node
27
+ end
28
+
29
+ def bytesize
30
+ # add some constants here
31
+ HEADER_FIELD_LENGTH + (children.size * CHILD_FIELDS_LENGTH) + value_bytesize
32
+ end
33
+
34
+ def value_bytesize
35
+ value ? value.bytesize : 0
36
+ end
37
+
38
+ def value_bytes
39
+ value ? value.bytes : []
40
+ end
41
+ end
42
+ end
File without changes
@@ -0,0 +1,54 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'digest/md5'
4
+ require 'digest/sha1'
5
+
6
+ module TrieFile
7
+ class Trie
8
+ attr_reader :root, :hash_mode
9
+
10
+ def initialize(root = nil, hash_mode = :none)
11
+ @root = root || Node.new
12
+ @hash_mode = hash_mode
13
+ end
14
+
15
+ def add(str, value)
16
+ node = root
17
+ key = hash_key(str)
18
+
19
+ key.each_char do |char|
20
+ if node.has_child?(char)
21
+ node = node.child_at(char)
22
+ else
23
+ node = node.add_child(char, Node.new)
24
+ end
25
+ end
26
+
27
+ node.value = value
28
+ end
29
+
30
+ def find(key)
31
+ node = root
32
+ hash_key(key).each_char do |char|
33
+ node = node.child_at(char)
34
+ return nil unless node
35
+ end
36
+ node.value
37
+ end
38
+
39
+ def hash_key(key)
40
+ self.class.hash_key(key, hash_mode)
41
+ end
42
+
43
+ def self.hash_key(key, hash_mode)
44
+ case hash_mode
45
+ when :md5
46
+ Digest::MD5.hexdigest(key)
47
+ when :sha1
48
+ Digest::SHA1.hexdigest(key)
49
+ else
50
+ key
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: UTF-8
2
+
3
+ module TrieFile
4
+ VERSION = '1.0.0'
5
+ end
@@ -0,0 +1,154 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TrieFile::File do
6
+ def file
7
+ TrieFile::File
8
+ end
9
+
10
+ def trie
11
+ TrieFile::Trie
12
+ end
13
+
14
+ let(:tmpdir) { './' }
15
+ let(:tmpfile) { File.join(tmpdir, 'test.txt') }
16
+
17
+ after(:each) do
18
+ File.unlink(tmpfile) if File.exist?(tmpfile)
19
+ end
20
+
21
+ describe 'self#open' do
22
+ it "raises an exception if the file isn't opened in binary mode" do
23
+ proc = lambda { file.open(tmpfile, 'w') }
24
+ expect(proc).to raise_error(ArgumentError, 'TrieFile must be opened in binary mode.')
25
+ end
26
+
27
+ it 'yields the file when a block is given and closes then returns it afterwards' do
28
+ f = file.open(tmpfile, 'wb') do |f|
29
+ expect(f).to be_a(file)
30
+ expect(f).to respond_to(:write_trie)
31
+ expect(f).to_not be_closed
32
+ end
33
+
34
+ expect(f).to be_a(file)
35
+ expect(f).to respond_to(:write_trie)
36
+ expect(f).to be_closed
37
+ end
38
+
39
+ it 'returns the open file when a block is not given' do
40
+ file.open(tmpfile, 'wb').tap do |f|
41
+ expect(f).to be_a(file)
42
+ expect(f).to_not be_closed
43
+ end
44
+ end
45
+
46
+ it 'uses the given hash mode when passed' do
47
+ file.open(tmpfile, 'wb') do |f|
48
+ f.write_trie(trie.new(nil, :md5).tap { |t| t.add('foo', 'bar') })
49
+ end
50
+
51
+ f = file.open(tmpfile, 'rb', :md5)
52
+ expect(f.find('foo')).to eq('bar')
53
+ f.close
54
+ end
55
+ end
56
+
57
+ describe 'self#read' do
58
+ let(:bytes) do
59
+ [
60
+ 0, 0, 0, 1, 0, 0, 102, 0, 0, 10, 0, 0, 0, 1, 0, 0, 111, 0, 0,
61
+ 20, 0, 0, 0, 1, 0, 0, 111, 0, 0, 30, 0, 3, 98, 97, 114, 0, 0
62
+ ]
63
+ end
64
+
65
+ let(:sha1_bytes) do
66
+ [
67
+ 0, 0, 0, 1, 0, 0, 48, 0, 0, 10, 0, 0, 0, 1, 0, 0, 98, 0, 0, 20,
68
+ 0, 0, 0, 1, 0, 0, 101, 0, 0, 30, 0, 0, 0, 1, 0, 0, 101, 0, 0, 40,
69
+ 0, 0, 0, 1, 0, 0, 99, 0, 0, 50, 0, 0, 0, 1, 0, 0, 55, 0, 0, 60,
70
+ 0, 0, 0, 1, 0, 0, 98, 0, 0, 70, 0, 0, 0, 1, 0, 0, 53, 0, 0, 80,
71
+ 0, 0, 0, 1, 0, 0, 101, 0, 0, 90, 0, 0, 0, 1, 0, 0, 97, 0, 0, 100,
72
+ 0, 0, 0, 1, 0, 0, 51, 0, 0, 110, 0, 0, 0, 1, 0, 0, 102, 0, 0, 120,
73
+ 0, 0, 0, 1, 0, 0, 48, 0, 0, 130, 0, 0, 0, 1, 0, 0, 102, 0, 0, 140,
74
+ 0, 0, 0, 1, 0, 0, 100, 0, 0, 150, 0, 0, 0, 1, 0, 0, 98, 0, 0, 160,
75
+ 0, 0, 0, 1, 0, 0, 99, 0, 0, 170, 0, 0, 0, 1, 0, 0, 57, 0, 0, 180,
76
+ 0, 0, 0, 1, 0, 0, 53, 0, 0, 190, 0, 0, 0, 1, 0, 0, 100, 0, 0, 200,
77
+ 0, 0, 0, 1, 0, 0, 48, 0, 0, 210, 0, 0, 0, 1, 0, 0, 100, 0, 0, 220,
78
+ 0, 0, 0, 1, 0, 0, 100, 0, 0, 230, 0, 0, 0, 1, 0, 0, 52, 0, 0, 240,
79
+ 0, 0, 0, 1, 0, 0, 55, 0, 0, 250, 0, 0, 0, 1, 0, 0, 102, 0, 1, 4, 0,
80
+ 0, 0, 1, 0, 0, 51, 0, 1, 14, 0, 0, 0, 1, 0, 0, 99, 0, 1, 24, 0, 0,
81
+ 0, 1, 0, 0, 53, 0, 1, 34, 0, 0, 0, 1, 0, 0, 98, 0, 1, 44, 0, 0, 0,
82
+ 1, 0, 0, 99, 0, 1, 54, 0, 0, 0, 1, 0, 0, 50, 0, 1, 64, 0, 0, 0, 1,
83
+ 0, 0, 55, 0, 1, 74, 0, 0, 0, 1, 0, 0, 53, 0, 1, 84, 0, 0, 0, 1, 0,
84
+ 0, 100, 0, 1, 94, 0, 0, 0, 1, 0, 0, 97, 0, 1, 104, 0, 0, 0, 1, 0,
85
+ 0, 56, 0, 1, 114, 0, 0, 0, 1, 0, 0, 97, 0, 1, 124, 0, 0, 0, 1, 0,
86
+ 0, 51, 0, 1, 134, 0, 0, 0, 1, 0, 0, 51, 0, 1, 144, 0, 3, 98, 97,
87
+ 114, 0, 0
88
+ ]
89
+ end
90
+
91
+ it 'reads a trie from disk' do
92
+ File.open(tmpfile, 'wb') do |f|
93
+ bytes.each { |byte| f.putc(byte) }
94
+ end
95
+
96
+ t = file.read(tmpfile)
97
+ check_trie(t.root, 'foo', 'bar')
98
+ expect(t.find('foo')).to eq('bar')
99
+ end
100
+
101
+ it 'reads a sha1-hashed trie from disk' do
102
+ File.open(tmpfile, 'wb') do |f|
103
+ sha1_bytes.each { |byte| f.putc(byte) }
104
+ end
105
+
106
+ t = file.read(tmpfile)
107
+ expect(t.find('foo')).to be_nil
108
+
109
+ t = file.read(tmpfile, :sha1)
110
+ check_trie(t.root, Digest::SHA1.hexdigest('foo'), 'bar')
111
+ expect(t.find('foo')).to eq('bar')
112
+ end
113
+ end
114
+
115
+ describe '#write_trie' do
116
+ it 'should write the trie to disk' do
117
+ file.open(tmpfile, 'wb') do |f|
118
+ f.write_trie(trie.new.tap { |t| t.add('foo', 'bar') })
119
+ end
120
+
121
+ t = file.read(tmpfile)
122
+ check_trie(t.root, 'foo', 'bar')
123
+ expect(t.find('foo')).to eq('bar')
124
+ end
125
+
126
+ it 'uses the given hash mode when passed' do
127
+ file.open(tmpfile, 'wb') do |f|
128
+ f.write_trie(trie.new(nil, :md5).tap { |t| t.add('foo', 'bar') })
129
+ end
130
+
131
+ t = file.read(tmpfile)
132
+ check_trie(t.root, Digest::MD5.hexdigest('foo'), 'bar')
133
+ end
134
+ end
135
+
136
+ describe '#find' do
137
+ it 'should traverse the file on disk and find the value' do
138
+ file.open(tmpfile, 'wb') do |f|
139
+ f.write_trie(trie.new.tap { |t| t.add('foo', 'bar') })
140
+ end
141
+
142
+ # notice we're calling 'open' instead of 'read'
143
+ f = file.open(tmpfile, 'rb')
144
+ expect(f.find('foo')).to eq('bar')
145
+ f.close
146
+ end
147
+
148
+ it 'raises an error if the file is already closed, eg. if open is called with a block' do
149
+ File.open(tmpfile, 'w+') { |f| f.write('test') }
150
+ f = file.open(tmpfile, 'rb') {}
151
+ expect(lambda { f.find('foo') }).to raise_error(IOError, 'file is not currently open.')
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,98 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TrieFile::Node do
6
+ def create_node(value = nil)
7
+ node = TrieFile::Node.new(value)
8
+ yield node if block_given?
9
+ node
10
+ end
11
+
12
+ def header_field_length
13
+ TrieFile::Node::HEADER_FIELD_LENGTH
14
+ end
15
+
16
+ def child_fields_length
17
+ TrieFile::Node::CHILD_FIELDS_LENGTH
18
+ end
19
+
20
+ describe '#has_child?' do
21
+ it 'returns true if the node contains the child, false otherwise' do
22
+ node = create_node do |node|
23
+ node.add_child('a', create_node('foo'))
24
+ end
25
+
26
+ expect(node.has_child?('a')).to be(true)
27
+ expect(node.has_child?('b')).to be(false)
28
+ end
29
+ end
30
+
31
+ describe '#child_at' do
32
+ it 'returns the child at the given letter, nil otherwise' do
33
+ child = create_node('foo')
34
+ node = create_node do |node|
35
+ node.add_child('a', child)
36
+ end
37
+
38
+ expect(node.child_at('a')).to be(child)
39
+ expect(node.child_at('b')).to be(nil)
40
+ end
41
+ end
42
+
43
+ describe '#add_child' do
44
+ it 'should add the child at the given letter' do
45
+ node = create_node
46
+ node.add_child('a', create_node('foo'))
47
+ expect(node.children).to include('a')
48
+ expect(node.children['a'].value).to eq('foo')
49
+ end
50
+ end
51
+
52
+ describe '#bytesize' do
53
+ it 'when no children and no value, returns just the header size' do
54
+ expect(create_node.bytesize).to eq(header_field_length)
55
+ end
56
+
57
+ it 'when no children and a value, returns the header size plus the size of the value' do
58
+ expect(create_node('foo').bytesize).to eq(header_field_length + 3)
59
+ end
60
+
61
+ it 'when a child and a value, returns the header size plus the size of the children plus the size of the value' do
62
+ expect(
63
+ create_node('foo') do |node|
64
+ node.add_child('a', create_node('foo'))
65
+ end.bytesize
66
+ ).to eq(header_field_length + 3 + child_fields_length)
67
+ end
68
+
69
+ it 'when multiple children and a value, returns the header size plus the size of the children plus the size of the value' do
70
+ expect(
71
+ create_node('foo') do |node|
72
+ node.add_child('a', create_node('foo'))
73
+ node.add_child('b', create_node('bar'))
74
+ end.bytesize
75
+ ).to eq(header_field_length + 3 + child_fields_length * 2)
76
+ end
77
+ end
78
+
79
+ describe '#value_bytesize' do
80
+ it 'returns the number of bytes in the value' do
81
+ expect(create_node('foo').value_bytesize).to eq(3)
82
+ end
83
+
84
+ it 'returns zero if the value is nil' do
85
+ expect(create_node.value_bytesize).to eq(0)
86
+ end
87
+ end
88
+
89
+ describe '#value_bytes' do
90
+ it 'returns an enumerator of the bytes in the value' do
91
+ expect(create_node('foo').value_bytes.to_a).to eq([102, 111, 111])
92
+ end
93
+
94
+ it 'returns an empty array if the value is nil' do
95
+ expect(create_node.value_bytes).to eq([])
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,19 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'rspec'
4
+ require 'trie-file'
5
+ require 'pry-nav'
6
+
7
+ RSpec.configure do |config|
8
+ config.mock_with :rr
9
+ end
10
+
11
+ def check_trie(root, key, val)
12
+ node = root
13
+ key.each_char do |char|
14
+ expect(node.children.size).to eq(1)
15
+ expect(node.children).to include(char)
16
+ node = node.children[char]
17
+ end
18
+ expect(node.value).to eq(val)
19
+ end
@@ -0,0 +1,59 @@
1
+ # encoding: UTF-8
2
+
3
+ require 'spec_helper'
4
+
5
+ describe TrieFile::Trie do
6
+ def trie
7
+ TrieFile::Trie
8
+ end
9
+
10
+ describe '#add' do
11
+ it 'should add the item' do
12
+ trie.new.tap do |t|
13
+ t.add('foo', 'bar')
14
+ check_trie(t.root, 'foo', 'bar')
15
+ end
16
+ end
17
+
18
+ it 'should hash the key with md5 if asked' do
19
+ trie.new(nil, :md5).tap do |t|
20
+ t.add('foo', 'bar')
21
+ check_trie(
22
+ t.root, Digest::MD5.hexdigest('foo'), 'bar'
23
+ )
24
+ end
25
+ end
26
+
27
+ it 'should hash the key with sha1 if asked' do
28
+ trie.new(nil, :sha1).tap do |t|
29
+ t.add('foo', 'bar')
30
+ check_trie(
31
+ t.root, Digest::SHA1.hexdigest('foo'), 'bar'
32
+ )
33
+ end
34
+ end
35
+ end
36
+
37
+ describe '#find' do
38
+ it 'should be able to find the item' do
39
+ trie.new.tap do |t|
40
+ t.add('foo', 'bar')
41
+ expect(t.find('foo')).to eq('bar')
42
+ end
43
+ end
44
+
45
+ it 'should be able to find the item using the md5 hash mode' do
46
+ trie.new(nil, :md5).tap do |t|
47
+ t.add('foo', 'bar')
48
+ expect(t.find('foo')).to eq('bar')
49
+ end
50
+ end
51
+
52
+ it 'should be able to find the item using the sha1 hash mode' do
53
+ trie.new(nil, :sha1).tap do |t|
54
+ t.add('foo', 'bar')
55
+ expect(t.find('foo')).to eq('bar')
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,18 @@
1
+ $:.unshift File.join(File.dirname(__FILE__), 'lib')
2
+ require 'trie-file/version'
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "trie-file"
6
+ s.version = ::TrieFile::VERSION
7
+ s.authors = ["Cameron Dutro"]
8
+ s.email = ["camertron@gmail.com"]
9
+ s.homepage = "http://github.com/camertron"
10
+
11
+ s.description = s.summary = "Memory-efficient cached trie and trie storage."
12
+
13
+ s.platform = Gem::Platform::RUBY
14
+ s.has_rdoc = true
15
+
16
+ s.require_path = 'lib'
17
+ s.files = Dir["{lib,spec}/**/*", "Gemfile", "History.txt", "README.md", "Rakefile", "trie-file.gemspec"]
18
+ end
metadata ADDED
@@ -0,0 +1,58 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: trie-file
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Cameron Dutro
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-28 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Memory-efficient cached trie and trie storage.
14
+ email:
15
+ - camertron@gmail.com
16
+ executables: []
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - Gemfile
21
+ - History.txt
22
+ - README.md
23
+ - Rakefile
24
+ - lib/trie-file.rb
25
+ - lib/trie-file/file.rb
26
+ - lib/trie-file/node.rb
27
+ - lib/trie-file/trie-file.rb
28
+ - lib/trie-file/trie.rb
29
+ - lib/trie-file/version.rb
30
+ - spec/file_spec.rb
31
+ - spec/node_spec.rb
32
+ - spec/spec_helper.rb
33
+ - spec/trie_spec.rb
34
+ - trie-file.gemspec
35
+ homepage: http://github.com/camertron
36
+ licenses: []
37
+ metadata: {}
38
+ post_install_message:
39
+ rdoc_options: []
40
+ require_paths:
41
+ - lib
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - '>='
50
+ - !ruby/object:Gem::Version
51
+ version: '0'
52
+ requirements: []
53
+ rubyforge_project:
54
+ rubygems_version: 2.2.2
55
+ signing_key:
56
+ specification_version: 4
57
+ summary: Memory-efficient cached trie and trie storage.
58
+ test_files: []