fts_lite 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/fts_lite/index.rb +1 -1
- data/lib/fts_lite/tokenizer.rb +20 -11
- data/lib/fts_lite/version.rb +1 -1
- data/test/fts_lite_test.rb +14 -0
- metadata +3 -3
data/lib/fts_lite/index.rb
CHANGED
@@ -91,7 +91,7 @@ module FtsLite
|
|
91
91
|
sql += sprintf(" LIMIT %d", limit)
|
92
92
|
end
|
93
93
|
sql += ";"
|
94
|
-
@db.execute(sql, [@tokenizer.query(text)]).flatten
|
94
|
+
@db.execute(sql, [@tokenizer.query(text, options)]).flatten
|
95
95
|
end
|
96
96
|
def count
|
97
97
|
@db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
|
data/lib/fts_lite/tokenizer.rb
CHANGED
@@ -6,7 +6,8 @@ module FtsLite
|
|
6
6
|
module Tokenizer
|
7
7
|
QUERY_DELIMITER = /[\s ]+/
|
8
8
|
SIMPLE_DELIMITER = /[\s \.\*"',\?!;\(\)。、.,?!「」『』()]+/
|
9
|
-
|
9
|
+
NEAR0 = " NEAR/0 "
|
10
|
+
NEAR2 = " NEAR/2 "
|
10
11
|
|
11
12
|
def self.create(name)
|
12
13
|
case name.to_sym
|
@@ -28,7 +29,7 @@ module FtsLite
|
|
28
29
|
NKF::nkf('-wZX', text).downcase
|
29
30
|
end
|
30
31
|
class Simple
|
31
|
-
def query(text)
|
32
|
+
def query(text, options)
|
32
33
|
vector(text)
|
33
34
|
end
|
34
35
|
def vector(text)
|
@@ -39,12 +40,14 @@ module FtsLite
|
|
39
40
|
end
|
40
41
|
end
|
41
42
|
class Bigram
|
42
|
-
def query(text)
|
43
|
+
def query(text, options = {})
|
44
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
45
|
+
near = fuzzy ? NEAR2 : NEAR0
|
43
46
|
text = Tokenizer.normalize(text)
|
44
47
|
text.split(QUERY_DELIMITER).map {|segment|
|
45
48
|
segment.split(SIMPLE_DELIMITER).map {|word|
|
46
49
|
0.upto(word.size - 2).map {|i| word[i, 2] }
|
47
|
-
}.join(
|
50
|
+
}.join(near)
|
48
51
|
}.flatten.join(" ")
|
49
52
|
end
|
50
53
|
def vector(text)
|
@@ -58,12 +61,14 @@ module FtsLite
|
|
58
61
|
end
|
59
62
|
end
|
60
63
|
class Trigram
|
61
|
-
def query(text)
|
64
|
+
def query(text, options = {})
|
65
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
66
|
+
near = fuzzy ? NEAR2 : NEAR0
|
62
67
|
text = Tokenizer.normalize(text)
|
63
68
|
text.split(QUERY_DELIMITER).map {|segment|
|
64
69
|
segment.split(SIMPLE_DELIMITER).map {|word|
|
65
70
|
0.upto(word.size - 3).map {|i| word[i, 3] }
|
66
|
-
}.join(
|
71
|
+
}.join(near)
|
67
72
|
}.flatten.join(" ")
|
68
73
|
end
|
69
74
|
def vector(text)
|
@@ -77,12 +82,14 @@ module FtsLite
|
|
77
82
|
end
|
78
83
|
end
|
79
84
|
class Wakachi
|
80
|
-
def query(text)
|
85
|
+
def query(text, options = {})
|
86
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
87
|
+
near = fuzzy ? NEAR2 : NEAR0
|
81
88
|
text = Tokenizer.normalize(text)
|
82
89
|
text.split(QUERY_DELIMITER).map {|segment|
|
83
90
|
BimyouSegmenter.segment(segment,
|
84
91
|
:white_space => false,
|
85
|
-
:symbol => false).join(
|
92
|
+
:symbol => false).join(near)
|
86
93
|
}.join(" ")
|
87
94
|
end
|
88
95
|
def vector(text)
|
@@ -95,7 +102,9 @@ module FtsLite
|
|
95
102
|
end
|
96
103
|
end
|
97
104
|
class WakachiBigram
|
98
|
-
def query(text)
|
105
|
+
def query(text, options = {})
|
106
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
107
|
+
near = fuzzy ? NEAR2 : NEAR0
|
99
108
|
text = Tokenizer.normalize(text)
|
100
109
|
text.split(QUERY_DELIMITER).map {|segment|
|
101
110
|
BimyouSegmenter.segment(segment,
|
@@ -104,9 +113,9 @@ module FtsLite
|
|
104
113
|
if (word.size == 1)
|
105
114
|
word
|
106
115
|
else
|
107
|
-
0.upto(word.size - 2).map {|i| word[i, 2] }.join(
|
116
|
+
0.upto(word.size - 2).map {|i| word[i, 2] }.join(near)
|
108
117
|
end
|
109
|
-
}.flatten.join(
|
118
|
+
}.flatten.join(near)
|
110
119
|
}.join(" ")
|
111
120
|
end
|
112
121
|
def vector(text)
|
data/lib/fts_lite/version.rb
CHANGED
data/test/fts_lite_test.rb
CHANGED
@@ -181,6 +181,20 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
181
181
|
assert_equal db.search("ナポリタン", :order => :asc)[1], 2
|
182
182
|
end
|
183
183
|
end
|
184
|
+
def test_fuzzy
|
185
|
+
db = FtsLite::Index.open(DB_FILE, :tokenizer => :bigram)
|
186
|
+
db.transaction do
|
187
|
+
db.delete_all
|
188
|
+
db.set(1, "あいいいう")
|
189
|
+
db.set(2, "あいいう")
|
190
|
+
assert_equal db.search("あいい").size, 2
|
191
|
+
assert_equal db.search("あいう").size, 0
|
192
|
+
assert_equal db.search("あいい", :fuzzy => true).size, 2
|
193
|
+
assert_equal db.search("あいう", :fuzzy => true).size, 2
|
194
|
+
assert_equal db.search("あいい", :fuzzy => false).size, 2
|
195
|
+
assert_equal db.search("あいう", :fuzzy => false).size, 0
|
196
|
+
end
|
197
|
+
end
|
184
198
|
def test_create
|
185
199
|
db = FtsLite::Index.open(DB_FILE)
|
186
200
|
db.drop_table!
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fts_lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-06-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bimyou_segmenter
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
84
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.8.
|
85
|
+
rubygems_version: 1.8.23
|
86
86
|
signing_key:
|
87
87
|
specification_version: 3
|
88
88
|
summary: simple full text search engine
|