distributed-trie 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,46 @@
1
+ #
2
+ # Distributed Trie / KvsRedis
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'redis'
37
+ module DistributedTrie
38
+
39
+ # Tokyo Cabinet implementation
40
+ class KvsRedis < KvsBase
41
+ def initialize( hostname = "localhost" )
42
+ @db = Redis.new( :host => hostname )
43
+ @db
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,86 @@
1
+ #
2
+ # Distributed Trie / KvsSdb
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ module DistributedTrie
37
+
38
+ # AWS SimpleDB implementation
39
+ begin
40
+ require 'aws-sdk'
41
+ class KvsSdb < KvsBase
42
+ def initialize( domainName )
43
+ printf( "Amazon SimpleDB access_key_id: %s\n", ENV['AMAZON_ACCESS_KEY_ID'])
44
+ printf( "Amazon SimpleDB secret_access_key: %s\n", ENV['AMAZON_SECRET_ACCESS_KEY'])
45
+ @domainName = domainName
46
+ @db = AWS::SimpleDB.new(
47
+ :access_key_id => ENV['AMAZON_ACCESS_KEY_ID'],
48
+ :secret_access_key => ENV['AMAZON_SECRET_ACCESS_KEY'],
49
+ :simple_db_endpoint => 'sdb.ap-northeast-1.amazonaws.com',
50
+ :use_ssl => false )
51
+ @domain = @db.domains.create( domainName )
52
+ end
53
+ def put!( key, value, timeout = 0 )
54
+ item = @domain.items[ key ]
55
+ item.attributes[ 'val' ] = value.force_encoding("ASCII-8BIT")
56
+ puts "simpleDB put: " + key
57
+ end
58
+ def get( key, fallback = false )
59
+ res = @db.client.get_attributes(
60
+ :domain_name => @domainName,
61
+ :item_name => key,
62
+ :attribute_names => ['val'],
63
+ :consistent_read => false
64
+ )
65
+ val = nil
66
+ res.attributes.each { |x|
67
+ val = x.value
68
+ }
69
+ if val
70
+ puts "simpleDB get: " + key + "," + val
71
+ val.force_encoding("UTF-8")
72
+ else
73
+ fallback
74
+ end
75
+ end
76
+ def enabled?() true end
77
+
78
+ attr_reader :db
79
+ end
80
+ rescue LoadError
81
+ class KvsSdb < KvsBase
82
+ def initialize( domainName ) end
83
+ def enabled?() false end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,52 @@
1
+ #
2
+ # Distributed Trie / KvsTc
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'tokyocabinet'
37
+ module DistributedTrie
38
+
39
+ # Tokyo Cabinet implementation
40
+ class KvsTc < KvsBase
41
+ def initialize( dbFilename )
42
+ if not dbFilename.match( /[.]tch$/ )
43
+ raise ArgumentError, "Info KvsTc.new() method get only '*.tch' suffix"
44
+ end
45
+ @db = TokyoCabinet::HDB.new( )
46
+ @db.open( dbFilename, TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT )
47
+ end
48
+
49
+ attr_reader :db
50
+
51
+ end
52
+ end
@@ -0,0 +1,69 @@
1
+ #
2
+ # Distributed Trie / KvsIF
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ module DistributedTrie
36
+
37
+ # Example of Key-Value Store Interface
38
+ # Please implement your version like this.
39
+ class KvsIf
40
+ def initialize()
41
+ @data = Hash.new
42
+ end
43
+
44
+ def put!( key, value, timeout = 0 )
45
+ @data[key] = value
46
+ end
47
+
48
+ def get( key, fallback = false )
49
+ val = @data[key]
50
+ if val
51
+ val
52
+ else
53
+ fallback
54
+ end
55
+ end
56
+
57
+ def delete( key )
58
+ end
59
+
60
+ def _getInternal( )
61
+ arr = []
62
+ @data.keys.each { |key|
63
+ arr << [key,@data[key]]
64
+ }
65
+ arr
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,220 @@
1
+ #
2
+ # Distributed Trie / Trie
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'fuzzystringmatch'
36
+ module DistributedTrie
37
+
38
+ class Trie
39
+
40
+ # kvsif ... Please implement like DistributedTrie::KvsIF class and specify instance of it.
41
+ def initialize( kvsif, prefixString )
42
+ @kvsif = kvsif
43
+ @req = Hash.new
44
+ @prefixString = prefixString
45
+ @key_hash = Hash.new
46
+ end
47
+
48
+ def addKey!( key )
49
+ _createTree( key )
50
+ end
51
+
52
+ def deleteKey!( key )
53
+ end
54
+
55
+ def commit!()
56
+ @key_hash.each_key { |key|
57
+ cur = @kvsif.get( @prefixString + key, "" )
58
+ @kvsif.put!( @prefixString + key, _mergeIndex( cur + " " + @key_hash[ key ] ))
59
+ }
60
+ @key_hash = Hash.new
61
+ end
62
+
63
+ def cancel()
64
+ @key_hash = Hash.new
65
+ end
66
+
67
+ def listChilds( key )
68
+ result = []
69
+ (term, nonTerm) = _getNextLetters( key )
70
+ #pp [ "searchChilds", key, term, nonTerm ]
71
+ term.each { |x|
72
+ result << key + x
73
+ }
74
+ (term + nonTerm).each { |x|
75
+ result += listChilds( key + x )
76
+ }
77
+ result
78
+ end
79
+
80
+ def commonPrefixSearch( key )
81
+ result = exactMatchSearch( key )
82
+ result += listChilds( key )
83
+ end
84
+
85
+ def exactMatchSearch( key )
86
+ (term, nonTerm) = _getNextLetters( key[0...(key.size-1)] )
87
+ #pp [ "exactMatchSearch", key, key[0...(key.size-1)], term, nonTerm ]
88
+ if term.include?( key[-1] )
89
+ [key]
90
+ else
91
+ []
92
+ end
93
+ end
94
+
95
+ def _searchWith( key, &block )
96
+ result = []
97
+ (term, nonTerm) = _getNextLetters( key )
98
+ term.each { |x|
99
+ arg = key + x
100
+ #pp [ "_check(1)", arg ]
101
+ if block.call( arg, true )
102
+ #pp [ '_match(1)', key, x ]
103
+ result += _searchWith( key + x, &block )
104
+ result << arg
105
+ elsif block.call( arg, false )
106
+ #pp [ '_match(2)', key, x ]
107
+ result += _searchWith( key + x, &block )
108
+ end
109
+ }
110
+ nonTerm.each { |x|
111
+ arg = key + x
112
+ #pp [ "_check(3)", arg ]
113
+ if block.call( arg, false )
114
+ #pp [ '_match(3)', key, x ]
115
+ result += _searchWith( key + x, &block )
116
+ end
117
+ }
118
+ result
119
+ end
120
+
121
+ def search( entryNode, &block )
122
+ _searchWith( entryNode, &block )
123
+ end
124
+
125
+ def rangeSearch( from, to )
126
+ search( '' ) { |x,termFlag|
127
+ _from = from[0...x.size]
128
+ _to = to [0...x.size]
129
+ ( _from <= x ) && ( x <= _to )
130
+ }
131
+ end
132
+
133
+ # return: [ [distance, keyword], [distance, keyword], ... ]
134
+ def fuzzySearch( searchWord, threshold = 0.90 )
135
+ jarow = FuzzyStringMatch::JaroWinkler.create( )
136
+ result = search( '' ) { |x,termFlag|
137
+ _word = searchWord
138
+ if not termFlag and (x.size < searchWord.size)
139
+ _word = searchWord[0...x.size]
140
+ (searchWord.size-x.size).times { |i|
141
+ _word += ' '
142
+ x += ' '
143
+ # pp [ "non terminal: ", i, x, _word ]
144
+ }
145
+ end
146
+ result = jarow.getDistance( x, _word )
147
+ threshold <= result
148
+ }
149
+ result.map { |k| [ jarow.getDistance( searchWord, k ), k ] }.sort_by {|item| 1.0 - item[0]}
150
+ end
151
+
152
+ def _getNextLetters( node )
153
+ str = @kvsif.get( @prefixString + node )
154
+ if str
155
+ term = []
156
+ nonTerm = []
157
+ str.split( /[ ]+/ ).each { |x|
158
+ case x.size
159
+ when 1
160
+ nonTerm << x
161
+ when 2
162
+ term << x[0...1]
163
+ end
164
+ }
165
+ [ term, nonTerm ]
166
+ else
167
+ [ [], [] ]
168
+ end
169
+ end
170
+
171
+ def _mergeIndex( indexStr )
172
+ # "a$ a" => "a$" # merge into terminal
173
+ # " a$" => "a$" # strip spaces
174
+ # "a$ b" => "a$ b" # alredy merged
175
+
176
+ h = Hash.new
177
+ term = Array.new
178
+ nonTerm = Array.new
179
+ indexStr.split( /[ ]+/ ).each {|entry|
180
+ case entry.size
181
+ when 1
182
+ nonTerm << entry
183
+ when 2
184
+ term << entry[0...1]
185
+ else
186
+ end
187
+ }
188
+ arr = term.uniq.map{ |x| x + '$' }
189
+ arr += nonTerm.uniq.reject { |x| term.include?( x ) }
190
+ arr.join( ' ' )
191
+ end
192
+
193
+ def _createTree( key )
194
+ h = Hash.new
195
+ str = ''
196
+ key.split( // ).each { |c|
197
+ val = if str.size == (key.size-1)
198
+ c + '$'
199
+ else
200
+ c
201
+ end
202
+ h [ str ] = val
203
+ str += c
204
+ }
205
+
206
+ h.keys.each{ |key|
207
+ if not @key_hash.has_key?( key )
208
+ @key_hash[ key ] = ''
209
+ end
210
+ @key_hash[ key ] += ' ' + h[ key ]
211
+ @key_hash[key] = _mergeIndex( @key_hash[key] )
212
+ }
213
+ @key_hash
214
+ end
215
+
216
+ def _getInternal( type = :work )
217
+ @key_hash
218
+ end
219
+ end
220
+ end