distributed-trie 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,46 @@
1
+ #
2
+ # Distributed Trie / KvsRedis
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'redis'
37
+ module DistributedTrie
38
+
39
+ # Tokyo Cabinet implementation
40
+ class KvsRedis < KvsBase
41
+ def initialize( hostname = "localhost" )
42
+ @db = Redis.new( :host => hostname )
43
+ @db
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,86 @@
1
+ #
2
+ # Distributed Trie / KvsSdb
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ module DistributedTrie
37
+
38
+ # AWS SimpleDB implementation
39
+ begin
40
+ require 'aws-sdk'
41
+ class KvsSdb < KvsBase
42
+ def initialize( domainName )
43
+ printf( "Amazon SimpleDB access_key_id: %s\n", ENV['AMAZON_ACCESS_KEY_ID'])
44
+ printf( "Amazon SimpleDB secret_access_key: %s\n", ENV['AMAZON_SECRET_ACCESS_KEY'])
45
+ @domainName = domainName
46
+ @db = AWS::SimpleDB.new(
47
+ :access_key_id => ENV['AMAZON_ACCESS_KEY_ID'],
48
+ :secret_access_key => ENV['AMAZON_SECRET_ACCESS_KEY'],
49
+ :simple_db_endpoint => 'sdb.ap-northeast-1.amazonaws.com',
50
+ :use_ssl => false )
51
+ @domain = @db.domains.create( domainName )
52
+ end
53
+ def put!( key, value, timeout = 0 )
54
+ item = @domain.items[ key ]
55
+ item.attributes[ 'val' ] = value.force_encoding("ASCII-8BIT")
56
+ puts "simpleDB put: " + key
57
+ end
58
+ def get( key, fallback = false )
59
+ res = @db.client.get_attributes(
60
+ :domain_name => @domainName,
61
+ :item_name => key,
62
+ :attribute_names => ['val'],
63
+ :consistent_read => false
64
+ )
65
+ val = nil
66
+ res.attributes.each { |x|
67
+ val = x.value
68
+ }
69
+ if val
70
+ puts "simpleDB get: " + key + "," + val
71
+ val.force_encoding("UTF-8")
72
+ else
73
+ fallback
74
+ end
75
+ end
76
+ def enabled?() true end
77
+
78
+ attr_reader :db
79
+ end
80
+ rescue LoadError
81
+ class KvsSdb < KvsBase
82
+ def initialize( domainName ) end
83
+ def enabled?() false end
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,52 @@
1
+ #
2
+ # Distributed Trie / KvsTc
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'distributedtrie/kvs/base'
36
+ require 'tokyocabinet'
37
+ module DistributedTrie
38
+
39
+ # Tokyo Cabinet implementation
40
+ class KvsTc < KvsBase
41
+ def initialize( dbFilename )
42
+ if not dbFilename.match( /[.]tch$/ )
43
+ raise ArgumentError, "Info KvsTc.new() method get only '*.tch' suffix"
44
+ end
45
+ @db = TokyoCabinet::HDB.new( )
46
+ @db.open( dbFilename, TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT )
47
+ end
48
+
49
+ attr_reader :db
50
+
51
+ end
52
+ end
@@ -0,0 +1,69 @@
1
+ #
2
+ # Distributed Trie / KvsIF
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ module DistributedTrie
36
+
37
+ # Example of Key-Value Store Interface
38
+ # Please implement your version like this.
39
+ class KvsIf
40
+ def initialize()
41
+ @data = Hash.new
42
+ end
43
+
44
+ def put!( key, value, timeout = 0 )
45
+ @data[key] = value
46
+ end
47
+
48
+ def get( key, fallback = false )
49
+ val = @data[key]
50
+ if val
51
+ val
52
+ else
53
+ fallback
54
+ end
55
+ end
56
+
57
+ def delete( key )
58
+ end
59
+
60
+ def _getInternal( )
61
+ arr = []
62
+ @data.keys.each { |key|
63
+ arr << [key,@data[key]]
64
+ }
65
+ arr
66
+ end
67
+ end
68
+
69
+ end
@@ -0,0 +1,220 @@
1
+ #
2
+ # Distributed Trie / Trie
3
+ #
4
+ #
5
+ # Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
6
+ #
7
+ # Redistribution and use in source and binary forms, with or without
8
+ # modification, are permitted provided that the following conditions
9
+ # are met:
10
+ #
11
+ # 1. Redistributions of source code must retain the above copyright
12
+ # notice, this list of conditions and the following disclaimer.
13
+ #
14
+ # 2. Redistributions in binary form must reproduce the above copyright
15
+ # notice, this list of conditions and the following disclaimer in the
16
+ # documentation and/or other materials provided with the distribution.
17
+ #
18
+ # 3. Neither the name of the authors nor the names of its contributors
19
+ # may be used to endorse or promote products derived from this
20
+ # software without specific prior written permission.
21
+ #
22
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23
+ # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24
+ # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25
+ # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26
+ # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
28
+ # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
29
+ # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
30
+ # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
31
+ # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
32
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33
+ #
34
+ #
35
+ require 'fuzzystringmatch'
36
+ module DistributedTrie
37
+
38
+ class Trie
39
+
40
+ # kvsif ... Please implement like DistributedTrie::KvsIF class and specify instance of it.
41
+ def initialize( kvsif, prefixString )
42
+ @kvsif = kvsif
43
+ @req = Hash.new
44
+ @prefixString = prefixString
45
+ @key_hash = Hash.new
46
+ end
47
+
48
+ def addKey!( key )
49
+ _createTree( key )
50
+ end
51
+
52
+ def deleteKey!( key )
53
+ end
54
+
55
+ def commit!()
56
+ @key_hash.each_key { |key|
57
+ cur = @kvsif.get( @prefixString + key, "" )
58
+ @kvsif.put!( @prefixString + key, _mergeIndex( cur + " " + @key_hash[ key ] ))
59
+ }
60
+ @key_hash = Hash.new
61
+ end
62
+
63
+ def cancel()
64
+ @key_hash = Hash.new
65
+ end
66
+
67
+ def listChilds( key )
68
+ result = []
69
+ (term, nonTerm) = _getNextLetters( key )
70
+ #pp [ "searchChilds", key, term, nonTerm ]
71
+ term.each { |x|
72
+ result << key + x
73
+ }
74
+ (term + nonTerm).each { |x|
75
+ result += listChilds( key + x )
76
+ }
77
+ result
78
+ end
79
+
80
+ def commonPrefixSearch( key )
81
+ result = exactMatchSearch( key )
82
+ result += listChilds( key )
83
+ end
84
+
85
+ def exactMatchSearch( key )
86
+ (term, nonTerm) = _getNextLetters( key[0...(key.size-1)] )
87
+ #pp [ "exactMatchSearch", key, key[0...(key.size-1)], term, nonTerm ]
88
+ if term.include?( key[-1] )
89
+ [key]
90
+ else
91
+ []
92
+ end
93
+ end
94
+
95
+ def _searchWith( key, &block )
96
+ result = []
97
+ (term, nonTerm) = _getNextLetters( key )
98
+ term.each { |x|
99
+ arg = key + x
100
+ #pp [ "_check(1)", arg ]
101
+ if block.call( arg, true )
102
+ #pp [ '_match(1)', key, x ]
103
+ result += _searchWith( key + x, &block )
104
+ result << arg
105
+ elsif block.call( arg, false )
106
+ #pp [ '_match(2)', key, x ]
107
+ result += _searchWith( key + x, &block )
108
+ end
109
+ }
110
+ nonTerm.each { |x|
111
+ arg = key + x
112
+ #pp [ "_check(3)", arg ]
113
+ if block.call( arg, false )
114
+ #pp [ '_match(3)', key, x ]
115
+ result += _searchWith( key + x, &block )
116
+ end
117
+ }
118
+ result
119
+ end
120
+
121
+ def search( entryNode, &block )
122
+ _searchWith( entryNode, &block )
123
+ end
124
+
125
+ def rangeSearch( from, to )
126
+ search( '' ) { |x,termFlag|
127
+ _from = from[0...x.size]
128
+ _to = to [0...x.size]
129
+ ( _from <= x ) && ( x <= _to )
130
+ }
131
+ end
132
+
133
+ # return: [ [distance, keyword], [distance, keyword], ... ]
134
+ def fuzzySearch( searchWord, threshold = 0.90 )
135
+ jarow = FuzzyStringMatch::JaroWinkler.create( )
136
+ result = search( '' ) { |x,termFlag|
137
+ _word = searchWord
138
+ if not termFlag and (x.size < searchWord.size)
139
+ _word = searchWord[0...x.size]
140
+ (searchWord.size-x.size).times { |i|
141
+ _word += ' '
142
+ x += ' '
143
+ # pp [ "non terminal: ", i, x, _word ]
144
+ }
145
+ end
146
+ result = jarow.getDistance( x, _word )
147
+ threshold <= result
148
+ }
149
+ result.map { |k| [ jarow.getDistance( searchWord, k ), k ] }.sort_by {|item| 1.0 - item[0]}
150
+ end
151
+
152
+ def _getNextLetters( node )
153
+ str = @kvsif.get( @prefixString + node )
154
+ if str
155
+ term = []
156
+ nonTerm = []
157
+ str.split( /[ ]+/ ).each { |x|
158
+ case x.size
159
+ when 1
160
+ nonTerm << x
161
+ when 2
162
+ term << x[0...1]
163
+ end
164
+ }
165
+ [ term, nonTerm ]
166
+ else
167
+ [ [], [] ]
168
+ end
169
+ end
170
+
171
+ def _mergeIndex( indexStr )
172
+ # "a$ a" => "a$" # merge into terminal
173
+ # " a$" => "a$" # strip spaces
174
+ # "a$ b" => "a$ b" # alredy merged
175
+
176
+ h = Hash.new
177
+ term = Array.new
178
+ nonTerm = Array.new
179
+ indexStr.split( /[ ]+/ ).each {|entry|
180
+ case entry.size
181
+ when 1
182
+ nonTerm << entry
183
+ when 2
184
+ term << entry[0...1]
185
+ else
186
+ end
187
+ }
188
+ arr = term.uniq.map{ |x| x + '$' }
189
+ arr += nonTerm.uniq.reject { |x| term.include?( x ) }
190
+ arr.join( ' ' )
191
+ end
192
+
193
+ def _createTree( key )
194
+ h = Hash.new
195
+ str = ''
196
+ key.split( // ).each { |c|
197
+ val = if str.size == (key.size-1)
198
+ c + '$'
199
+ else
200
+ c
201
+ end
202
+ h [ str ] = val
203
+ str += c
204
+ }
205
+
206
+ h.keys.each{ |key|
207
+ if not @key_hash.has_key?( key )
208
+ @key_hash[ key ] = ''
209
+ end
210
+ @key_hash[ key ] += ' ' + h[ key ]
211
+ @key_hash[key] = _mergeIndex( @key_hash[key] )
212
+ }
213
+ @key_hash
214
+ end
215
+
216
+ def _getInternal( type = :work )
217
+ @key_hash
218
+ end
219
+ end
220
+ end