fts_lite 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/fts_lite/index.rb +1 -1
- data/lib/fts_lite/tokenizer.rb +45 -2
- data/lib/fts_lite/version.rb +1 -1
- data/test/fts_lite_test.rb +41 -1
- metadata +1 -1
data/lib/fts_lite/index.rb
CHANGED
@@ -90,7 +90,7 @@ module FtsLite
|
|
90
90
|
sql += sprintf(" LIMIT %d", limit)
|
91
91
|
end
|
92
92
|
sql += ";"
|
93
|
-
@db.execute(sql, [@tokenizer.
|
93
|
+
@db.execute(sql, [@tokenizer.query(text)]).flatten
|
94
94
|
end
|
95
95
|
def count
|
96
96
|
@db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
|
data/lib/fts_lite/tokenizer.rb
CHANGED
@@ -4,7 +4,9 @@ require 'bimyou_segmenter'
|
|
4
4
|
|
5
5
|
module FtsLite
|
6
6
|
module Tokenizer
|
7
|
-
|
7
|
+
QUERY_DELIMITER = /[\s ]+/
|
8
|
+
SIMPLE_DELIMITER = /[\s \.\*"',\?!;\(\)。、.,?!「」『』()]+/
|
9
|
+
NEAR = " NEAR/2 "
|
8
10
|
|
9
11
|
def self.create(name)
|
10
12
|
case name.to_sym
|
@@ -26,14 +28,25 @@ module FtsLite
|
|
26
28
|
NKF::nkf('-wZX', text).downcase
|
27
29
|
end
|
28
30
|
class Simple
|
31
|
+
def query(text)
|
32
|
+
vector(text)
|
33
|
+
end
|
29
34
|
def vector(text)
|
30
35
|
split(text).join(" ")
|
31
36
|
end
|
32
37
|
def split(text)
|
33
|
-
Tokenizer.normalize(text).
|
38
|
+
Tokenizer.normalize(text).split(SIMPLE_DELIMITER)
|
34
39
|
end
|
35
40
|
end
|
36
41
|
class Bigram
|
42
|
+
def query(text)
|
43
|
+
text = Tokenizer.normalize(text)
|
44
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
45
|
+
segment.split(SIMPLE_DELIMITER).map {|word|
|
46
|
+
0.upto(word.size - 2).map {|i| word[i, 2] }
|
47
|
+
}.join(NEAR)
|
48
|
+
}.flatten.join(" ")
|
49
|
+
end
|
37
50
|
def vector(text)
|
38
51
|
split(text).join(" ")
|
39
52
|
end
|
@@ -45,6 +58,14 @@ module FtsLite
|
|
45
58
|
end
|
46
59
|
end
|
47
60
|
class Trigram
|
61
|
+
def query(text)
|
62
|
+
text = Tokenizer.normalize(text)
|
63
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
64
|
+
segment.split(SIMPLE_DELIMITER).map {|word|
|
65
|
+
0.upto(word.size - 3).map {|i| word[i, 3] }
|
66
|
+
}.join(NEAR)
|
67
|
+
}.flatten.join(" ")
|
68
|
+
end
|
48
69
|
def vector(text)
|
49
70
|
split(text).join(" ")
|
50
71
|
end
|
@@ -56,6 +77,14 @@ module FtsLite
|
|
56
77
|
end
|
57
78
|
end
|
58
79
|
class Wakachi
|
80
|
+
def query(text)
|
81
|
+
text = Tokenizer.normalize(text)
|
82
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
83
|
+
BimyouSegmenter.segment(segment,
|
84
|
+
:white_space => false,
|
85
|
+
:symbol => false).join(NEAR)
|
86
|
+
}.join(" ")
|
87
|
+
end
|
59
88
|
def vector(text)
|
60
89
|
split(text).join(" ")
|
61
90
|
end
|
@@ -66,6 +95,20 @@ module FtsLite
|
|
66
95
|
end
|
67
96
|
end
|
68
97
|
class WakachiBigram
|
98
|
+
def query(text)
|
99
|
+
text = Tokenizer.normalize(text)
|
100
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
101
|
+
BimyouSegmenter.segment(segment,
|
102
|
+
:white_space => false,
|
103
|
+
:symbol => false).map {|word|
|
104
|
+
if (word.size == 1)
|
105
|
+
word
|
106
|
+
else
|
107
|
+
0.upto(word.size - 2).map {|i| word[i, 2] }.join(NEAR)
|
108
|
+
end
|
109
|
+
}.flatten.join(NEAR)
|
110
|
+
}.join(" ")
|
111
|
+
end
|
69
112
|
def vector(text)
|
70
113
|
split(text).join(" ")
|
71
114
|
end
|
data/lib/fts_lite/version.rb
CHANGED
data/test/fts_lite_test.rb
CHANGED
@@ -26,6 +26,7 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
26
26
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
27
27
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
28
28
|
assert_equal db.search("ナポリタン", :order => :desc)[1], 2
|
29
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
29
30
|
|
30
31
|
db.set(1, "なぜナポリタンは青いのだろうか ?", 0)
|
31
32
|
assert_equal db.search("赤い").size, 0
|
@@ -51,6 +52,7 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
51
52
|
assert_equal db.search("ナポリタン").size, 2
|
52
53
|
assert_equal db.search("ナポリタン")[0], 1
|
53
54
|
assert_equal db.search("ナポリタン")[1], 2
|
55
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
54
56
|
|
55
57
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
56
58
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
@@ -80,11 +82,12 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
80
82
|
db.set(1, "なぜナポリタンは赤いのだろうか ?", 2)
|
81
83
|
db.set(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
|
82
84
|
|
83
|
-
assert_equal db.search("
|
85
|
+
assert_equal db.search("赤いの").size, 1
|
84
86
|
|
85
87
|
assert_equal db.search("ナポリタン").size, 2
|
86
88
|
assert_equal db.search("ナポリタン")[0], 1
|
87
89
|
assert_equal db.search("ナポリタン")[1], 2
|
90
|
+
assert_equal db.search("赤いの ナポリタン", :order => :desc).size, 1
|
88
91
|
|
89
92
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
90
93
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
@@ -120,6 +123,43 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
120
123
|
assert_equal db.search("ナポリタン").size, 2
|
121
124
|
assert_equal db.search("ナポリタン")[0], 1
|
122
125
|
assert_equal db.search("ナポリタン")[1], 2
|
126
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
127
|
+
|
128
|
+
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
129
|
+
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
130
|
+
assert_equal db.search("ナポリタン", :order => :desc)[1], 2
|
131
|
+
|
132
|
+
assert_equal db.search("ナポリタン", :order => :asc).size, 2
|
133
|
+
assert_equal db.search("ナポリタン", :order => :asc)[0], 2
|
134
|
+
assert_equal db.search("ナポリタン", :order => :asc)[1], 1
|
135
|
+
|
136
|
+
db.update_sort_value(1, 1)
|
137
|
+
db.update_sort_value(2, 2)
|
138
|
+
|
139
|
+
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
140
|
+
assert_equal db.search("ナポリタン", :order => :desc)[0], 2
|
141
|
+
assert_equal db.search("ナポリタン", :order => :desc)[1], 1
|
142
|
+
|
143
|
+
assert_equal db.search("ナポリタン", :order => :asc).size, 2
|
144
|
+
assert_equal db.search("ナポリタン", :order => :asc)[0], 1
|
145
|
+
assert_equal db.search("ナポリタン", :order => :asc)[1], 2
|
146
|
+
end
|
147
|
+
end
|
148
|
+
def test_simple
|
149
|
+
db = FtsLite::Index.open(DB_FILE, :tokenizer => :simple)
|
150
|
+
db.transaction do
|
151
|
+
db.delete_all
|
152
|
+
p db.tokenize("なぜ ナポリタン は 赤い の だろ う か ?")
|
153
|
+
db.set(1, "なぜ ナポリタン は 赤い の だろ う か ?", 2)
|
154
|
+
db.set(2, "昼飯 の スパゲティ ナポリタン を 眺め ながら 、 積年 の 疑問 を 考え て い た", 1)
|
155
|
+
|
156
|
+
assert_equal db.search("赤い").size, 1
|
157
|
+
assert_equal db.search("赤い")[0], 1
|
158
|
+
|
159
|
+
assert_equal db.search("ナポリタン").size, 2
|
160
|
+
assert_equal db.search("ナポリタン")[0], 1
|
161
|
+
assert_equal db.search("ナポリタン")[1], 2
|
162
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
123
163
|
|
124
164
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
125
165
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|