distributed-trie 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +1 -0
- data/COPYING +33 -0
- data/README.md +59 -0
- data/Rakefile +72 -0
- data/VERSION.yml +4 -0
- data/lib/distributedtrie.rb +38 -0
- data/lib/distributedtrie/kvs/base.rb +55 -0
- data/lib/distributedtrie/kvs/dbm.rb +45 -0
- data/lib/distributedtrie/kvs/dynamodb.rb +81 -0
- data/lib/distributedtrie/kvs/memcache.rb +52 -0
- data/lib/distributedtrie/kvs/redis.rb +46 -0
- data/lib/distributedtrie/kvs/simpledb.rb +86 -0
- data/lib/distributedtrie/kvs/tokyocabinet.rb +52 -0
- data/lib/distributedtrie/kvsif.rb +69 -0
- data/lib/distributedtrie/trie.rb +220 -0
- data/test/bigdata_spec.rb +85 -0
- data/test/internal_spec.rb +241 -0
- data/test/rspec_formatter_for_emacs.rb +26 -0
- data/test/usecase_spec.rb +126 -0
- metadata +130 -0
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- encoding: utf-8 -*-
|
3
|
+
#
|
4
|
+
# bigdata_spec.rb - "RSpec file for bigdata test-case"
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# 1. Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
#
|
15
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
16
|
+
# notice, this list of conditions and the following disclaimer in the
|
17
|
+
# documentation and/or other materials provided with the distribution.
|
18
|
+
#
|
19
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
20
|
+
# may be used to endorse or promote products derived from this
|
21
|
+
# software without specific prior written permission.
|
22
|
+
#
|
23
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
24
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
25
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
26
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
27
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
28
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
29
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
30
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
31
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
32
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
33
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
34
|
+
#
|
35
|
+
require 'distributedtrie'
|
36
|
+
include DistributedTrie
|
37
|
+
|
38
|
+
|
39
|
+
describe Trie, "when initialized as '()" do
|
40
|
+
before do
|
41
|
+
@kvs = DistributedTrie::KvsIf.new
|
42
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
43
|
+
@arr = "0123456789abcdefghijklmnopqrstuvwxyz".split(//)
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should" do
|
47
|
+
@arr.each { |s1|
|
48
|
+
@arr.each { |s2|
|
49
|
+
@arr.each { |s3|
|
50
|
+
@trie.addKey!( s1+s2+s3 )
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
@trie.addKey!( "0" )
|
56
|
+
@trie.addKey!( "1" )
|
57
|
+
@trie.addKey!( "AA" )
|
58
|
+
@trie.addKey!( "BB" )
|
59
|
+
|
60
|
+
@trie._getInternal( :work ).size.should == 1335
|
61
|
+
@trie.commit!()
|
62
|
+
|
63
|
+
@trie.exactMatchSearch( "0" ).should == ["0"]
|
64
|
+
@trie.exactMatchSearch( "1" ).should == ["1"]
|
65
|
+
@trie.exactMatchSearch( "2" ).should == []
|
66
|
+
@trie.exactMatchSearch( "AA" ).should == ["AA"]
|
67
|
+
@trie.exactMatchSearch( "BB" ).should == ["BB"]
|
68
|
+
@trie.exactMatchSearch( "CC" ).should == []
|
69
|
+
@trie.exactMatchSearch( "aa" ).should == []
|
70
|
+
@trie.exactMatchSearch( "bb" ).should == []
|
71
|
+
@trie.exactMatchSearch( "aaa" ).should == ["aaa"]
|
72
|
+
@trie.exactMatchSearch( "aaa" ).should == ["aaa"]
|
73
|
+
@trie.exactMatchSearch( "zzz" ).should == ["zzz"]
|
74
|
+
@trie.exactMatchSearch( "012" ).should == ["012"]
|
75
|
+
|
76
|
+
@trie.commonPrefixSearch( '00' ).should == @arr.map{ |x| "00" + x }
|
77
|
+
@trie.commonPrefixSearch( '' ).size.should == (@arr.size * @arr.size * @arr.size) + 4
|
78
|
+
@trie.search( 'ab' ){|x| ( 'aba' <= x) && (x <= 'abe') }.should == ["aba", "abb", "abc", "abd", "abe"]
|
79
|
+
@trie.search( 'zz' ){|x| x.match( /zz[7-9]/ ) }.should == ["zz7", "zz8", "zz9"]
|
80
|
+
|
81
|
+
@trie.search( '' ){|x| 1 == x.size }.should == ["0", "1"]
|
82
|
+
@trie.search( '' ){|x| 2 >= x.size }.should == ["0", "1", "AA", "BB"]
|
83
|
+
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,241 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- encoding: utf-8 -*-
|
3
|
+
#
|
4
|
+
# internal_spec.rb - "RSpec file for trie internal hehavior"
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# 1. Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
#
|
15
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
16
|
+
# notice, this list of conditions and the following disclaimer in the
|
17
|
+
# documentation and/or other materials provided with the distribution.
|
18
|
+
#
|
19
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
20
|
+
# may be used to endorse or promote products derived from this
|
21
|
+
# software without specific prior written permission.
|
22
|
+
#
|
23
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
24
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
25
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
26
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
27
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
28
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
29
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
30
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
31
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
32
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
33
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
34
|
+
#
|
35
|
+
require 'distributedtrie'
|
36
|
+
include DistributedTrie
|
37
|
+
|
38
|
+
describe Trie, "Ruby version " do
|
39
|
+
it "should" do
|
40
|
+
RUBY_VERSION.match( /^1[.]8/ ).should_not be_true
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
describe Trie, "when _mergeIndex is called " do
|
45
|
+
before do
|
46
|
+
@kvs = DistributedTrie::KvsIf.new
|
47
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
48
|
+
end
|
49
|
+
it "should" do
|
50
|
+
@trie._mergeIndex( "a$ a" ).should == "a$"
|
51
|
+
@trie._mergeIndex( " a$" ).should == "a$"
|
52
|
+
@trie._mergeIndex( "a$ b" ).should == "a$ b"
|
53
|
+
@trie._mergeIndex( "a$ a$ a$ a$" ).should == "a$"
|
54
|
+
@trie._mergeIndex( "a$ a a a a a a a" ).should == "a$"
|
55
|
+
@trie._mergeIndex( "b b b b b b ").should == "b"
|
56
|
+
@trie._mergeIndex( "a b c d e f g" ).should == "a b c d e f g"
|
57
|
+
@trie._mergeIndex( "a$ b c$ d e$ f g$" ).should == "a$ c$ e$ g$ b d f"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
describe Trie, "when _createTree is called " do
|
62
|
+
before do
|
63
|
+
@kvs = DistributedTrie::KvsIf.new
|
64
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
65
|
+
end
|
66
|
+
|
67
|
+
it "should" do
|
68
|
+
@trie.addKey!( "a" )
|
69
|
+
@trie._getInternal( :work ).should == { '' => 'a$' }
|
70
|
+
@trie.addKey!( "ab" )
|
71
|
+
@trie._getInternal( :work ).should == { '' => 'a$', 'a' => 'b$' }
|
72
|
+
@trie.addKey!( "in" )
|
73
|
+
@trie._getInternal( :work ).should == { '' => 'a$ i', 'a' => 'b$', 'i' => 'n$' }
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
describe Trie, "when _commit is called " do
|
78
|
+
before do
|
79
|
+
@kvs = DistributedTrie::KvsIf.new
|
80
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should" do
|
84
|
+
@trie.addKey!( "app" )
|
85
|
+
@trie._getInternal( :work ).should == { ""=>"a", "a"=>"p", "ap"=>"p$" }
|
86
|
+
@trie.addKey!( "apple" )
|
87
|
+
@trie._getInternal( :work ).should == { ""=>"a", "a"=>"p", "ap"=>"p$", "app"=>"l", "appl"=>"e$" }
|
88
|
+
@trie.addKey!( "application" )
|
89
|
+
@trie._getInternal( :work ).should == { ""=>"a", "a"=>"p", "ap"=>"p$", "app"=>"l", "appl"=>"e$ i", "appli"=>"c", "applic"=>"a", "applica"=>"t", "applicat"=>"i", "applicati"=>"o", "applicatio"=>"n$" }
|
90
|
+
@trie.commit!()
|
91
|
+
@trie._getInternal( :work ).should == {}
|
92
|
+
@kvs._getInternal( ).should == [
|
93
|
+
["TEST::", "a"],
|
94
|
+
["TEST::a", "p"],
|
95
|
+
["TEST::ap", "p$"],
|
96
|
+
["TEST::app", "l"],
|
97
|
+
["TEST::appl", "e$ i"],
|
98
|
+
["TEST::appli", "c"],
|
99
|
+
["TEST::applic", "a"],
|
100
|
+
["TEST::applica", "t"],
|
101
|
+
["TEST::applicat", "i"],
|
102
|
+
["TEST::applicati", "o"],
|
103
|
+
["TEST::applicatio", "n$"]]
|
104
|
+
@trie.listChilds( "" ).should == ["app", "apple", "application"]
|
105
|
+
@trie.listChilds( "ap" ).should == ["app", "apple", "application"]
|
106
|
+
@trie.listChilds( "app" ).should == [ "apple", "application"]
|
107
|
+
@trie.listChilds( "appl" ).should == [ "apple", "application"]
|
108
|
+
@trie.listChilds( "appli" ).should == [ "application"]
|
109
|
+
|
110
|
+
@trie.exactMatchSearch( "" ).should == []
|
111
|
+
@trie.exactMatchSearch( "ap" ).should == []
|
112
|
+
@trie.exactMatchSearch( "app" ).should == ["app"]
|
113
|
+
@trie.exactMatchSearch( "appl" ).should == []
|
114
|
+
@trie.exactMatchSearch( "appli" ).should == []
|
115
|
+
@trie.exactMatchSearch( "apple" ).should == ["apple"]
|
116
|
+
|
117
|
+
@trie.commonPrefixSearch( "" ).should == ["app", "apple", "application"]
|
118
|
+
@trie.commonPrefixSearch( "ap" ).should == ["app", "apple", "application"]
|
119
|
+
@trie.commonPrefixSearch( "app" ).should == ["app", "apple", "application"]
|
120
|
+
@trie.commonPrefixSearch( "appl" ).should == [ "apple", "application"]
|
121
|
+
@trie.commonPrefixSearch( "appli" ).should == [ "application"]
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
describe Trie, "when api methods are called " do
|
127
|
+
before do
|
128
|
+
@kvs = DistributedTrie::KvsIf.new
|
129
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
130
|
+
end
|
131
|
+
|
132
|
+
it "should" do
|
133
|
+
@trie.addKey!( "i" )
|
134
|
+
@trie._getInternal( :work ).should == { ""=>"i$" }
|
135
|
+
@trie.addKey!( "in" )
|
136
|
+
@trie._getInternal( :work ).should == { ""=>"i$", "i"=>"n$" }
|
137
|
+
@trie.addKey!( "ab1" )
|
138
|
+
@trie._getInternal( :work ).should == { ""=>"i$ a", "i"=>"n$", "a"=>"b", "ab"=>"1$" }
|
139
|
+
@trie.addKey!( "ab2" )
|
140
|
+
@trie._getInternal( :work ).should == { ""=>"i$ a", "i"=>"n$", "a"=>"b", "ab"=>"1$ 2$" }
|
141
|
+
@trie.addKey!( "ab3" )
|
142
|
+
@trie._getInternal( :work ).should == { ""=>"i$ a", "i"=>"n$", "a"=>"b", "ab"=>"1$ 2$ 3$" }
|
143
|
+
@trie.addKey!( "abc4" )
|
144
|
+
@trie._getInternal( :work ).should == { ""=>"i$ a", "i"=>"n$", "a"=>"b", "ab"=>"1$ 2$ 3$ c", "abc"=>"4$" }
|
145
|
+
@trie.commit!()
|
146
|
+
@trie._getInternal( :work ).should == {}
|
147
|
+
@kvs._getInternal( ).should == [
|
148
|
+
["TEST::", "i$ a"],
|
149
|
+
["TEST::i", "n$"],
|
150
|
+
["TEST::a", "b"],
|
151
|
+
["TEST::ab", "1$ 2$ 3$ c"],
|
152
|
+
["TEST::abc", "4$"]]
|
153
|
+
@trie.exactMatchSearch( "" ).should == []
|
154
|
+
@trie.exactMatchSearch( "a" ).should == []
|
155
|
+
@trie.exactMatchSearch( "ab1" ).should == ["ab1"]
|
156
|
+
@trie.exactMatchSearch( "ab3" ).should == ["ab3"]
|
157
|
+
@trie.exactMatchSearch( "xxx" ).should == []
|
158
|
+
@trie.exactMatchSearch( "abc4" ).should == ["abc4"]
|
159
|
+
@trie.exactMatchSearch( "abc4A" ).should == []
|
160
|
+
|
161
|
+
@trie.search( '' ) {|x| x.size == 0}.should == []
|
162
|
+
@trie.search( '' ) {|x| x.size == 1}.should == ["i"]
|
163
|
+
@trie.search( '' ) {|x| x.size <= 2}.should == ["in", "i"]
|
164
|
+
@trie.search( '' ) {|x| x.size < 4}.should == ["in", "i", "ab1", "ab2", "ab3"]
|
165
|
+
@trie.search( 'ab' ) {|x| true}.should == ["ab1", "ab2", "ab3", "abc4"]
|
166
|
+
@trie.commonPrefixSearch( 'ab' ).should == ["ab1", "ab2", "ab3", "abc4"]
|
167
|
+
@trie.commonPrefixSearch( 'i' ).should == ["i", "in"]
|
168
|
+
@trie.commonPrefixSearch( 'in' ).should == ["in"]
|
169
|
+
|
170
|
+
@trie.search( '' ){|x| x == "ab1" }.should == []
|
171
|
+
@trie.search( 'ab' ){|x| x == "ab1" }.should == ["ab1"]
|
172
|
+
@trie.search( 'ab' ){|x| ( 'ab1' < x) && (x < 'ab4') }.should == ["ab2", "ab3"]
|
173
|
+
@trie.search( '' ){|x| ( '0' < x) && (x < 'z') }.should == ["in", "i", "ab1", "ab2", "ab3", "abc4"]
|
174
|
+
|
175
|
+
@trie.search( '' ){|x| ( 'a' <= x) && (x < 'b') }.should == ["ab1", "ab2", "ab3", "abc4"]
|
176
|
+
|
177
|
+
@trie.rangeSearch( 'ab1', 'ab3' ).should == ["ab1", "ab2", "ab3"]
|
178
|
+
@trie.rangeSearch( 'ab2', 'abd' ).should == ["ab2", "ab3", "abc4"]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
describe Trie, "when commit! are called " do
|
184
|
+
before do
|
185
|
+
@kvs = DistributedTrie::KvsIf.new
|
186
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
187
|
+
end
|
188
|
+
|
189
|
+
it "should" do
|
190
|
+
@trie.addKey!( "a" )
|
191
|
+
@trie._getInternal( :work ).should == { ""=>"a$" }
|
192
|
+
@kvs._getInternal( ).should == []
|
193
|
+
@trie.commit!
|
194
|
+
@trie._getInternal( :work ).should == {}
|
195
|
+
@kvs._getInternal( ).should == [[ "TEST::", "a$" ]]
|
196
|
+
|
197
|
+
@trie.addKey!( "b" )
|
198
|
+
@trie._getInternal( :work ).should == { ""=>"b$" }
|
199
|
+
@kvs._getInternal( ).should == [[ "TEST::", "a$" ]]
|
200
|
+
@trie.commit!
|
201
|
+
@trie._getInternal( :work ).should == {}
|
202
|
+
@kvs._getInternal( ).should == [[ "TEST::", "a$ b$" ]]
|
203
|
+
|
204
|
+
@trie.addKey!( "a" )
|
205
|
+
@trie.addKey!( "b" )
|
206
|
+
@trie._getInternal( :work ).should == { ""=>"a$ b$" }
|
207
|
+
@kvs._getInternal( ).should == [[ "TEST::", "a$ b$" ]]
|
208
|
+
@trie.commit!
|
209
|
+
@trie._getInternal( :work ).should == {}
|
210
|
+
@kvs._getInternal( ).should == [[ "TEST::", "a$ b$" ]]
|
211
|
+
|
212
|
+
@trie.addKey!( "01234" )
|
213
|
+
@trie._getInternal( :work ).should == {""=>"0", "0"=>"1", "01"=>"2", "012"=>"3", "0123"=>"4$"}
|
214
|
+
@kvs._getInternal( ).should == [[ "TEST::", "a$ b$" ]]
|
215
|
+
@trie.commit!
|
216
|
+
@trie._getInternal( :work ).should == {}
|
217
|
+
@kvs._getInternal( ).should == [
|
218
|
+
["TEST::", "a$ b$ 0"],
|
219
|
+
["TEST::0", "1"],
|
220
|
+
["TEST::01", "2"],
|
221
|
+
["TEST::012", "3"],
|
222
|
+
["TEST::0123", "4$"]]
|
223
|
+
|
224
|
+
@trie.addKey!( "0123Z" )
|
225
|
+
@trie._getInternal( :work ).should == {""=>"0", "0"=>"1", "01"=>"2", "012"=>"3", "0123"=>"Z$"}
|
226
|
+
@kvs._getInternal( ).should == [
|
227
|
+
["TEST::", "a$ b$ 0"],
|
228
|
+
["TEST::0", "1"],
|
229
|
+
["TEST::01", "2"],
|
230
|
+
["TEST::012", "3"],
|
231
|
+
["TEST::0123", "4$"]]
|
232
|
+
@trie.commit!
|
233
|
+
@trie._getInternal( :work ).should == {}
|
234
|
+
@kvs._getInternal( ).should == [
|
235
|
+
["TEST::", "a$ b$ 0"],
|
236
|
+
["TEST::0", "1"],
|
237
|
+
["TEST::01", "2"],
|
238
|
+
["TEST::012", "3"],
|
239
|
+
["TEST::0123", "4$ Z$"]]
|
240
|
+
end
|
241
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'rspec/core/formatters/progress_formatter'
|
2
|
+
|
3
|
+
# Example of a formatter with custom bactrace printing. Run me with:
|
4
|
+
# ruby bin/spec xxxxx.rb -r ./test/rspec_formatter_for_emacs.rb -f CustomFormatter
|
5
|
+
class CustomFormatter < RSpec::Core::Formatters::ProgressFormatter
|
6
|
+
def backtrace_line(line)
|
7
|
+
return nil if configuration.cleaned_from_backtrace?(line)
|
8
|
+
str = line.gsub(/([^:]*\.rb):([0-9]+):in /) do
|
9
|
+
path = "#{$1}"
|
10
|
+
lineno = "#{$2}"
|
11
|
+
if path.match( /lib/ ) and path.match( /rspec/ )
|
12
|
+
"#{File.expand_path(path)}:#{lineno} IN "
|
13
|
+
else
|
14
|
+
"#{File.expand_path(path)}:#{lineno}:in "
|
15
|
+
end
|
16
|
+
end
|
17
|
+
str
|
18
|
+
end
|
19
|
+
|
20
|
+
def dump_backtrace(example)
|
21
|
+
format_backtrace(example.execution_result[:exception].backtrace, example).each do |backtrace_info|
|
22
|
+
output.puts cyan(" #{backtrace_info}")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# -*- encoding: utf-8 -*-
|
3
|
+
#
|
4
|
+
# usecase_spec.rb - "RSpec file for ordinary usecase"
|
5
|
+
#
|
6
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
7
|
+
#
|
8
|
+
# Redistribution and use in source and binary forms, with or without
|
9
|
+
# modification, are permitted provided that the following conditions
|
10
|
+
# are met:
|
11
|
+
#
|
12
|
+
# 1. Redistributions of source code must retain the above copyright
|
13
|
+
# notice, this list of conditions and the following disclaimer.
|
14
|
+
#
|
15
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
16
|
+
# notice, this list of conditions and the following disclaimer in the
|
17
|
+
# documentation and/or other materials provided with the distribution.
|
18
|
+
#
|
19
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
20
|
+
# may be used to endorse or promote products derived from this
|
21
|
+
# software without specific prior written permission.
|
22
|
+
#
|
23
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
24
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
25
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
26
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
27
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
28
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
29
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
30
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
31
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
32
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
33
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
34
|
+
#
|
35
|
+
require 'distributedtrie'
|
36
|
+
include DistributedTrie
|
37
|
+
|
38
|
+
describe Trie, "when you create auto complete application " do
|
39
|
+
before do
|
40
|
+
@kvs = DistributedTrie::KvsIf.new
|
41
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should" do
|
45
|
+
@trie.addKey!( "i" )
|
46
|
+
@trie.addKey!( "in" )
|
47
|
+
@trie.addKey!( "inn" )
|
48
|
+
@trie.addKey!( "communication" )
|
49
|
+
@trie.addKey!( "command" )
|
50
|
+
@trie.addKey!( "come" )
|
51
|
+
@trie.addKey!( "coming" )
|
52
|
+
@trie.addKey!( "code" )
|
53
|
+
@trie.addKey!( "copy" )
|
54
|
+
@trie.addKey!( "copyright" )
|
55
|
+
@trie.commit!
|
56
|
+
|
57
|
+
@trie.commonPrefixSearch( "i" ).should == ["i", "in", "inn"]
|
58
|
+
@trie.commonPrefixSearch( "in" ).should == ["in", "inn"]
|
59
|
+
@trie.commonPrefixSearch( "c" ).should == ["come", "communication", "command", "coming", "code", "copy", "copyright"]
|
60
|
+
@trie.commonPrefixSearch( "co" ).should == ["come", "communication", "command", "coming", "code", "copy", "copyright"]
|
61
|
+
@trie.commonPrefixSearch( "comm" ).should == ["communication", "command"]
|
62
|
+
@trie.commonPrefixSearch( "cod" ).should == ["code"]
|
63
|
+
@trie.commonPrefixSearch( "cop" ).should == ["copy", "copyright"]
|
64
|
+
|
65
|
+
@trie.exactMatchSearch( "copy" ).should == ["copy"]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def _roundDistance( arr )
|
70
|
+
arr.map { |x|
|
71
|
+
val = x[0] * 1000
|
72
|
+
[ val.round / 1000.0, x[1] ]
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
describe Trie, "when you create fuzzy-string-search application " do
|
77
|
+
before do
|
78
|
+
@kvs = DistributedTrie::KvsIf.new
|
79
|
+
@trie = Trie.new( @kvs, "TEST::" )
|
80
|
+
@words = [
|
81
|
+
"communication",
|
82
|
+
"community",
|
83
|
+
"command",
|
84
|
+
"comedy",
|
85
|
+
"coming",
|
86
|
+
"code",
|
87
|
+
"copy",
|
88
|
+
"copyright"
|
89
|
+
]
|
90
|
+
@words2 = [
|
91
|
+
"henkan",
|
92
|
+
"henkann",
|
93
|
+
"henka",
|
94
|
+
"hennka",
|
95
|
+
"henkaq",
|
96
|
+
]
|
97
|
+
end
|
98
|
+
|
99
|
+
it "should" do
|
100
|
+
@words.each { |word| @trie.addKey!( word ) }
|
101
|
+
@words2.each { |word| @trie.addKey!( word ) }
|
102
|
+
@trie.commit!
|
103
|
+
|
104
|
+
_roundDistance( @trie.fuzzySearch( "come" )).should == [[0.933, "comedy"]]
|
105
|
+
_roundDistance( @trie.fuzzySearch( "come", 0.85 )).should == [[0.933, "comedy"], [0.867, "code"]]
|
106
|
+
_roundDistance( @trie.fuzzySearch( "come", 0.82 )).should == [[0.933, "comedy"], [0.867, "code"], [0.825, "coming"]]
|
107
|
+
_roundDistance( @trie.fuzzySearch( "come", 0.80 )).should == [[0.933, "comedy"], [0.867, "code"], [0.825, "coming"], [0.808, "command"]]
|
108
|
+
|
109
|
+
_roundDistance( @trie.fuzzySearch( "comm" )).should == [[0.914, "command"]]
|
110
|
+
_roundDistance( @trie.fuzzySearch( "communication", 0.92 )).should == [[1.0, "communication"], [0.924, "community"]]
|
111
|
+
|
112
|
+
_roundDistance( @trie.fuzzySearch( "copylight" )).should == [[0.956, "copyright"]]
|
113
|
+
_roundDistance( @trie.fuzzySearch( "copyrigh" , 0.99 )).should == [[0.993, "copyright"]]
|
114
|
+
_roundDistance( @trie.fuzzySearch( "copyleft" )).should == [[0.9, "copy"]]
|
115
|
+
|
116
|
+
_roundDistance( @trie.fuzzySearch( "henkan" , 0.94 )).should == [[1.0, "henkan"], [0.981, "henkann"], [0.972, "henka"], [0.944, "henkaq"]]
|
117
|
+
|
118
|
+
jarow = FuzzyStringMatch::JaroWinkler.create( )
|
119
|
+
@words.select { |word| 0.85 <= jarow.getDistance( word, "come" ) }.should == ["comedy", "code"]
|
120
|
+
@words.select { |word| 0.90 <= jarow.getDistance( word, "copylight" ) }.should == ["copyright"]
|
121
|
+
@words.select { |word| 0.92 <= jarow.getDistance( word, "communication" ) }.should == ["communication", "community"]
|
122
|
+
@words.select { |word| 0.90 <= jarow.getDistance( word, "copyleft" ) }.should == ["copy"]
|
123
|
+
|
124
|
+
@words2.select { |word| 0.94 < jarow.getDistance( word, "henkan" ) }.should == ["henkan", "henkann", "henka", "hennka", "henkaq"]
|
125
|
+
end
|
126
|
+
end
|