distributed-trie 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gemtest +1 -0
- data/COPYING +33 -0
- data/README.md +59 -0
- data/Rakefile +72 -0
- data/VERSION.yml +4 -0
- data/lib/distributedtrie.rb +38 -0
- data/lib/distributedtrie/kvs/base.rb +55 -0
- data/lib/distributedtrie/kvs/dbm.rb +45 -0
- data/lib/distributedtrie/kvs/dynamodb.rb +81 -0
- data/lib/distributedtrie/kvs/memcache.rb +52 -0
- data/lib/distributedtrie/kvs/redis.rb +46 -0
- data/lib/distributedtrie/kvs/simpledb.rb +86 -0
- data/lib/distributedtrie/kvs/tokyocabinet.rb +52 -0
- data/lib/distributedtrie/kvsif.rb +69 -0
- data/lib/distributedtrie/trie.rb +220 -0
- data/test/bigdata_spec.rb +85 -0
- data/test/internal_spec.rb +241 -0
- data/test/rspec_formatter_for_emacs.rb +26 -0
- data/test/usecase_spec.rb +126 -0
- metadata +130 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
#
|
2
|
+
# Distributed Trie / KvsRedis
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions
|
9
|
+
# are met:
|
10
|
+
#
|
11
|
+
# 1. Redistributions of source code must retain the above copyright
|
12
|
+
# notice, this list of conditions and the following disclaimer.
|
13
|
+
#
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
#
|
18
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
# may be used to endorse or promote products derived from this
|
20
|
+
# software without specific prior written permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
#
|
34
|
+
#
|
35
|
+
require 'distributedtrie/kvs/base'
|
36
|
+
require 'redis'
|
37
|
+
module DistributedTrie
|
38
|
+
|
39
|
+
# Tokyo Cabinet implementation
|
40
|
+
class KvsRedis < KvsBase
|
41
|
+
def initialize( hostname = "localhost" )
|
42
|
+
@db = Redis.new( :host => hostname )
|
43
|
+
@db
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#
|
2
|
+
# Distributed Trie / KvsSdb
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions
|
9
|
+
# are met:
|
10
|
+
#
|
11
|
+
# 1. Redistributions of source code must retain the above copyright
|
12
|
+
# notice, this list of conditions and the following disclaimer.
|
13
|
+
#
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
#
|
18
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
# may be used to endorse or promote products derived from this
|
20
|
+
# software without specific prior written permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
#
|
34
|
+
#
|
35
|
+
require 'distributedtrie/kvs/base'
|
36
|
+
module DistributedTrie
|
37
|
+
|
38
|
+
# AWS SimpleDB implementation
|
39
|
+
begin
|
40
|
+
require 'aws-sdk'
|
41
|
+
class KvsSdb < KvsBase
|
42
|
+
def initialize( domainName )
|
43
|
+
printf( "Amazon SimpleDB access_key_id: %s\n", ENV['AMAZON_ACCESS_KEY_ID'])
|
44
|
+
printf( "Amazon SimpleDB secret_access_key: %s\n", ENV['AMAZON_SECRET_ACCESS_KEY'])
|
45
|
+
@domainName = domainName
|
46
|
+
@db = AWS::SimpleDB.new(
|
47
|
+
:access_key_id => ENV['AMAZON_ACCESS_KEY_ID'],
|
48
|
+
:secret_access_key => ENV['AMAZON_SECRET_ACCESS_KEY'],
|
49
|
+
:simple_db_endpoint => 'sdb.ap-northeast-1.amazonaws.com',
|
50
|
+
:use_ssl => false )
|
51
|
+
@domain = @db.domains.create( domainName )
|
52
|
+
end
|
53
|
+
def put!( key, value, timeout = 0 )
|
54
|
+
item = @domain.items[ key ]
|
55
|
+
item.attributes[ 'val' ] = value.force_encoding("ASCII-8BIT")
|
56
|
+
puts "simpleDB put: " + key
|
57
|
+
end
|
58
|
+
def get( key, fallback = false )
|
59
|
+
res = @db.client.get_attributes(
|
60
|
+
:domain_name => @domainName,
|
61
|
+
:item_name => key,
|
62
|
+
:attribute_names => ['val'],
|
63
|
+
:consistent_read => false
|
64
|
+
)
|
65
|
+
val = nil
|
66
|
+
res.attributes.each { |x|
|
67
|
+
val = x.value
|
68
|
+
}
|
69
|
+
if val
|
70
|
+
puts "simpleDB get: " + key + "," + val
|
71
|
+
val.force_encoding("UTF-8")
|
72
|
+
else
|
73
|
+
fallback
|
74
|
+
end
|
75
|
+
end
|
76
|
+
def enabled?() true end
|
77
|
+
|
78
|
+
attr_reader :db
|
79
|
+
end
|
80
|
+
rescue LoadError
|
81
|
+
class KvsSdb < KvsBase
|
82
|
+
def initialize( domainName ) end
|
83
|
+
def enabled?() false end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
#
|
2
|
+
# Distributed Trie / KvsTc
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions
|
9
|
+
# are met:
|
10
|
+
#
|
11
|
+
# 1. Redistributions of source code must retain the above copyright
|
12
|
+
# notice, this list of conditions and the following disclaimer.
|
13
|
+
#
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
#
|
18
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
# may be used to endorse or promote products derived from this
|
20
|
+
# software without specific prior written permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
#
|
34
|
+
#
|
35
|
+
require 'distributedtrie/kvs/base'
|
36
|
+
require 'tokyocabinet'
|
37
|
+
module DistributedTrie
|
38
|
+
|
39
|
+
# Tokyo Cabinet implementation
|
40
|
+
class KvsTc < KvsBase
|
41
|
+
def initialize( dbFilename )
|
42
|
+
if not dbFilename.match( /[.]tch$/ )
|
43
|
+
raise ArgumentError, "Info KvsTc.new() method get only '*.tch' suffix"
|
44
|
+
end
|
45
|
+
@db = TokyoCabinet::HDB.new( )
|
46
|
+
@db.open( dbFilename, TokyoCabinet::HDB::OWRITER | TokyoCabinet::HDB::OCREAT )
|
47
|
+
end
|
48
|
+
|
49
|
+
attr_reader :db
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#
|
2
|
+
# Distributed Trie / KvsIF
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions
|
9
|
+
# are met:
|
10
|
+
#
|
11
|
+
# 1. Redistributions of source code must retain the above copyright
|
12
|
+
# notice, this list of conditions and the following disclaimer.
|
13
|
+
#
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
#
|
18
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
# may be used to endorse or promote products derived from this
|
20
|
+
# software without specific prior written permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
#
|
34
|
+
#
|
35
|
+
module DistributedTrie
|
36
|
+
|
37
|
+
# Example of Key-Value Store Interface
|
38
|
+
# Please implement your version like this.
|
39
|
+
class KvsIf
|
40
|
+
def initialize()
|
41
|
+
@data = Hash.new
|
42
|
+
end
|
43
|
+
|
44
|
+
def put!( key, value, timeout = 0 )
|
45
|
+
@data[key] = value
|
46
|
+
end
|
47
|
+
|
48
|
+
def get( key, fallback = false )
|
49
|
+
val = @data[key]
|
50
|
+
if val
|
51
|
+
val
|
52
|
+
else
|
53
|
+
fallback
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def delete( key )
|
58
|
+
end
|
59
|
+
|
60
|
+
def _getInternal( )
|
61
|
+
arr = []
|
62
|
+
@data.keys.each { |key|
|
63
|
+
arr << [key,@data[key]]
|
64
|
+
}
|
65
|
+
arr
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,220 @@
|
|
1
|
+
#
|
2
|
+
# Distributed Trie / Trie
|
3
|
+
#
|
4
|
+
#
|
5
|
+
# Copyright (c) 2012 Kiyoka Nishiyama <kiyoka@sumibi.org>
|
6
|
+
#
|
7
|
+
# Redistribution and use in source and binary forms, with or without
|
8
|
+
# modification, are permitted provided that the following conditions
|
9
|
+
# are met:
|
10
|
+
#
|
11
|
+
# 1. Redistributions of source code must retain the above copyright
|
12
|
+
# notice, this list of conditions and the following disclaimer.
|
13
|
+
#
|
14
|
+
# 2. Redistributions in binary form must reproduce the above copyright
|
15
|
+
# notice, this list of conditions and the following disclaimer in the
|
16
|
+
# documentation and/or other materials provided with the distribution.
|
17
|
+
#
|
18
|
+
# 3. Neither the name of the authors nor the names of its contributors
|
19
|
+
# may be used to endorse or promote products derived from this
|
20
|
+
# software without specific prior written permission.
|
21
|
+
#
|
22
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
23
|
+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
24
|
+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
25
|
+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
26
|
+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
|
28
|
+
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
29
|
+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
30
|
+
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
31
|
+
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
32
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
33
|
+
#
|
34
|
+
#
|
35
|
+
require 'fuzzystringmatch'
|
36
|
+
module DistributedTrie
|
37
|
+
|
38
|
+
class Trie
|
39
|
+
|
40
|
+
# kvsif ... Please implement like DistributedTrie::KvsIF class and specify instance of it.
|
41
|
+
def initialize( kvsif, prefixString )
|
42
|
+
@kvsif = kvsif
|
43
|
+
@req = Hash.new
|
44
|
+
@prefixString = prefixString
|
45
|
+
@key_hash = Hash.new
|
46
|
+
end
|
47
|
+
|
48
|
+
def addKey!( key )
|
49
|
+
_createTree( key )
|
50
|
+
end
|
51
|
+
|
52
|
+
def deleteKey!( key )
|
53
|
+
end
|
54
|
+
|
55
|
+
def commit!()
|
56
|
+
@key_hash.each_key { |key|
|
57
|
+
cur = @kvsif.get( @prefixString + key, "" )
|
58
|
+
@kvsif.put!( @prefixString + key, _mergeIndex( cur + " " + @key_hash[ key ] ))
|
59
|
+
}
|
60
|
+
@key_hash = Hash.new
|
61
|
+
end
|
62
|
+
|
63
|
+
def cancel()
|
64
|
+
@key_hash = Hash.new
|
65
|
+
end
|
66
|
+
|
67
|
+
def listChilds( key )
|
68
|
+
result = []
|
69
|
+
(term, nonTerm) = _getNextLetters( key )
|
70
|
+
#pp [ "searchChilds", key, term, nonTerm ]
|
71
|
+
term.each { |x|
|
72
|
+
result << key + x
|
73
|
+
}
|
74
|
+
(term + nonTerm).each { |x|
|
75
|
+
result += listChilds( key + x )
|
76
|
+
}
|
77
|
+
result
|
78
|
+
end
|
79
|
+
|
80
|
+
def commonPrefixSearch( key )
|
81
|
+
result = exactMatchSearch( key )
|
82
|
+
result += listChilds( key )
|
83
|
+
end
|
84
|
+
|
85
|
+
def exactMatchSearch( key )
|
86
|
+
(term, nonTerm) = _getNextLetters( key[0...(key.size-1)] )
|
87
|
+
#pp [ "exactMatchSearch", key, key[0...(key.size-1)], term, nonTerm ]
|
88
|
+
if term.include?( key[-1] )
|
89
|
+
[key]
|
90
|
+
else
|
91
|
+
[]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def _searchWith( key, &block )
|
96
|
+
result = []
|
97
|
+
(term, nonTerm) = _getNextLetters( key )
|
98
|
+
term.each { |x|
|
99
|
+
arg = key + x
|
100
|
+
#pp [ "_check(1)", arg ]
|
101
|
+
if block.call( arg, true )
|
102
|
+
#pp [ '_match(1)', key, x ]
|
103
|
+
result += _searchWith( key + x, &block )
|
104
|
+
result << arg
|
105
|
+
elsif block.call( arg, false )
|
106
|
+
#pp [ '_match(2)', key, x ]
|
107
|
+
result += _searchWith( key + x, &block )
|
108
|
+
end
|
109
|
+
}
|
110
|
+
nonTerm.each { |x|
|
111
|
+
arg = key + x
|
112
|
+
#pp [ "_check(3)", arg ]
|
113
|
+
if block.call( arg, false )
|
114
|
+
#pp [ '_match(3)', key, x ]
|
115
|
+
result += _searchWith( key + x, &block )
|
116
|
+
end
|
117
|
+
}
|
118
|
+
result
|
119
|
+
end
|
120
|
+
|
121
|
+
def search( entryNode, &block )
|
122
|
+
_searchWith( entryNode, &block )
|
123
|
+
end
|
124
|
+
|
125
|
+
def rangeSearch( from, to )
|
126
|
+
search( '' ) { |x,termFlag|
|
127
|
+
_from = from[0...x.size]
|
128
|
+
_to = to [0...x.size]
|
129
|
+
( _from <= x ) && ( x <= _to )
|
130
|
+
}
|
131
|
+
end
|
132
|
+
|
133
|
+
# return: [ [distance, keyword], [distance, keyword], ... ]
|
134
|
+
def fuzzySearch( searchWord, threshold = 0.90 )
|
135
|
+
jarow = FuzzyStringMatch::JaroWinkler.create( )
|
136
|
+
result = search( '' ) { |x,termFlag|
|
137
|
+
_word = searchWord
|
138
|
+
if not termFlag and (x.size < searchWord.size)
|
139
|
+
_word = searchWord[0...x.size]
|
140
|
+
(searchWord.size-x.size).times { |i|
|
141
|
+
_word += ' '
|
142
|
+
x += ' '
|
143
|
+
# pp [ "non terminal: ", i, x, _word ]
|
144
|
+
}
|
145
|
+
end
|
146
|
+
result = jarow.getDistance( x, _word )
|
147
|
+
threshold <= result
|
148
|
+
}
|
149
|
+
result.map { |k| [ jarow.getDistance( searchWord, k ), k ] }.sort_by {|item| 1.0 - item[0]}
|
150
|
+
end
|
151
|
+
|
152
|
+
def _getNextLetters( node )
|
153
|
+
str = @kvsif.get( @prefixString + node )
|
154
|
+
if str
|
155
|
+
term = []
|
156
|
+
nonTerm = []
|
157
|
+
str.split( /[ ]+/ ).each { |x|
|
158
|
+
case x.size
|
159
|
+
when 1
|
160
|
+
nonTerm << x
|
161
|
+
when 2
|
162
|
+
term << x[0...1]
|
163
|
+
end
|
164
|
+
}
|
165
|
+
[ term, nonTerm ]
|
166
|
+
else
|
167
|
+
[ [], [] ]
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def _mergeIndex( indexStr )
|
172
|
+
# "a$ a" => "a$" # merge into terminal
|
173
|
+
# " a$" => "a$" # strip spaces
|
174
|
+
# "a$ b" => "a$ b" # alredy merged
|
175
|
+
|
176
|
+
h = Hash.new
|
177
|
+
term = Array.new
|
178
|
+
nonTerm = Array.new
|
179
|
+
indexStr.split( /[ ]+/ ).each {|entry|
|
180
|
+
case entry.size
|
181
|
+
when 1
|
182
|
+
nonTerm << entry
|
183
|
+
when 2
|
184
|
+
term << entry[0...1]
|
185
|
+
else
|
186
|
+
end
|
187
|
+
}
|
188
|
+
arr = term.uniq.map{ |x| x + '$' }
|
189
|
+
arr += nonTerm.uniq.reject { |x| term.include?( x ) }
|
190
|
+
arr.join( ' ' )
|
191
|
+
end
|
192
|
+
|
193
|
+
def _createTree( key )
|
194
|
+
h = Hash.new
|
195
|
+
str = ''
|
196
|
+
key.split( // ).each { |c|
|
197
|
+
val = if str.size == (key.size-1)
|
198
|
+
c + '$'
|
199
|
+
else
|
200
|
+
c
|
201
|
+
end
|
202
|
+
h [ str ] = val
|
203
|
+
str += c
|
204
|
+
}
|
205
|
+
|
206
|
+
h.keys.each{ |key|
|
207
|
+
if not @key_hash.has_key?( key )
|
208
|
+
@key_hash[ key ] = ''
|
209
|
+
end
|
210
|
+
@key_hash[ key ] += ' ' + h[ key ]
|
211
|
+
@key_hash[key] = _mergeIndex( @key_hash[key] )
|
212
|
+
}
|
213
|
+
@key_hash
|
214
|
+
end
|
215
|
+
|
216
|
+
def _getInternal( type = :work )
|
217
|
+
@key_hash
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|