fts_lite 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/fts_lite/index.rb +1 -1
- data/lib/fts_lite/tokenizer.rb +45 -2
- data/lib/fts_lite/version.rb +1 -1
- data/test/fts_lite_test.rb +41 -1
- metadata +1 -1
data/lib/fts_lite/index.rb
CHANGED
@@ -90,7 +90,7 @@ module FtsLite
|
|
90
90
|
sql += sprintf(" LIMIT %d", limit)
|
91
91
|
end
|
92
92
|
sql += ";"
|
93
|
-
@db.execute(sql, [@tokenizer.
|
93
|
+
@db.execute(sql, [@tokenizer.query(text)]).flatten
|
94
94
|
end
|
95
95
|
def count
|
96
96
|
@db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
|
data/lib/fts_lite/tokenizer.rb
CHANGED
@@ -4,7 +4,9 @@ require 'bimyou_segmenter'
|
|
4
4
|
|
5
5
|
module FtsLite
|
6
6
|
module Tokenizer
|
7
|
-
|
7
|
+
QUERY_DELIMITER = /[\s ]+/
|
8
|
+
SIMPLE_DELIMITER = /[\s \.\*"',\?!;\(\)。、.,?!「」『』()]+/
|
9
|
+
NEAR = " NEAR/2 "
|
8
10
|
|
9
11
|
def self.create(name)
|
10
12
|
case name.to_sym
|
@@ -26,14 +28,25 @@ module FtsLite
|
|
26
28
|
NKF::nkf('-wZX', text).downcase
|
27
29
|
end
|
28
30
|
class Simple
|
31
|
+
def query(text)
|
32
|
+
vector(text)
|
33
|
+
end
|
29
34
|
def vector(text)
|
30
35
|
split(text).join(" ")
|
31
36
|
end
|
32
37
|
def split(text)
|
33
|
-
Tokenizer.normalize(text).
|
38
|
+
Tokenizer.normalize(text).split(SIMPLE_DELIMITER)
|
34
39
|
end
|
35
40
|
end
|
36
41
|
class Bigram
|
42
|
+
def query(text)
|
43
|
+
text = Tokenizer.normalize(text)
|
44
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
45
|
+
segment.split(SIMPLE_DELIMITER).map {|word|
|
46
|
+
0.upto(word.size - 2).map {|i| word[i, 2] }
|
47
|
+
}.join(NEAR)
|
48
|
+
}.flatten.join(" ")
|
49
|
+
end
|
37
50
|
def vector(text)
|
38
51
|
split(text).join(" ")
|
39
52
|
end
|
@@ -45,6 +58,14 @@ module FtsLite
|
|
45
58
|
end
|
46
59
|
end
|
47
60
|
class Trigram
|
61
|
+
def query(text)
|
62
|
+
text = Tokenizer.normalize(text)
|
63
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
64
|
+
segment.split(SIMPLE_DELIMITER).map {|word|
|
65
|
+
0.upto(word.size - 3).map {|i| word[i, 3] }
|
66
|
+
}.join(NEAR)
|
67
|
+
}.flatten.join(" ")
|
68
|
+
end
|
48
69
|
def vector(text)
|
49
70
|
split(text).join(" ")
|
50
71
|
end
|
@@ -56,6 +77,14 @@ module FtsLite
|
|
56
77
|
end
|
57
78
|
end
|
58
79
|
class Wakachi
|
80
|
+
def query(text)
|
81
|
+
text = Tokenizer.normalize(text)
|
82
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
83
|
+
BimyouSegmenter.segment(segment,
|
84
|
+
:white_space => false,
|
85
|
+
:symbol => false).join(NEAR)
|
86
|
+
}.join(" ")
|
87
|
+
end
|
59
88
|
def vector(text)
|
60
89
|
split(text).join(" ")
|
61
90
|
end
|
@@ -66,6 +95,20 @@ module FtsLite
|
|
66
95
|
end
|
67
96
|
end
|
68
97
|
class WakachiBigram
|
98
|
+
def query(text)
|
99
|
+
text = Tokenizer.normalize(text)
|
100
|
+
text.split(QUERY_DELIMITER).map {|segment|
|
101
|
+
BimyouSegmenter.segment(segment,
|
102
|
+
:white_space => false,
|
103
|
+
:symbol => false).map {|word|
|
104
|
+
if (word.size == 1)
|
105
|
+
word
|
106
|
+
else
|
107
|
+
0.upto(word.size - 2).map {|i| word[i, 2] }.join(NEAR)
|
108
|
+
end
|
109
|
+
}.flatten.join(NEAR)
|
110
|
+
}.join(" ")
|
111
|
+
end
|
69
112
|
def vector(text)
|
70
113
|
split(text).join(" ")
|
71
114
|
end
|
data/lib/fts_lite/version.rb
CHANGED
data/test/fts_lite_test.rb
CHANGED
@@ -26,6 +26,7 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
26
26
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
27
27
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
28
28
|
assert_equal db.search("ナポリタン", :order => :desc)[1], 2
|
29
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
29
30
|
|
30
31
|
db.set(1, "なぜナポリタンは青いのだろうか ?", 0)
|
31
32
|
assert_equal db.search("赤い").size, 0
|
@@ -51,6 +52,7 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
51
52
|
assert_equal db.search("ナポリタン").size, 2
|
52
53
|
assert_equal db.search("ナポリタン")[0], 1
|
53
54
|
assert_equal db.search("ナポリタン")[1], 2
|
55
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
54
56
|
|
55
57
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
56
58
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
@@ -80,11 +82,12 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
80
82
|
db.set(1, "なぜナポリタンは赤いのだろうか ?", 2)
|
81
83
|
db.set(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
|
82
84
|
|
83
|
-
assert_equal db.search("
|
85
|
+
assert_equal db.search("赤いの").size, 1
|
84
86
|
|
85
87
|
assert_equal db.search("ナポリタン").size, 2
|
86
88
|
assert_equal db.search("ナポリタン")[0], 1
|
87
89
|
assert_equal db.search("ナポリタン")[1], 2
|
90
|
+
assert_equal db.search("赤いの ナポリタン", :order => :desc).size, 1
|
88
91
|
|
89
92
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
90
93
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
@@ -120,6 +123,43 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
120
123
|
assert_equal db.search("ナポリタン").size, 2
|
121
124
|
assert_equal db.search("ナポリタン")[0], 1
|
122
125
|
assert_equal db.search("ナポリタン")[1], 2
|
126
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
127
|
+
|
128
|
+
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
129
|
+
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|
130
|
+
assert_equal db.search("ナポリタン", :order => :desc)[1], 2
|
131
|
+
|
132
|
+
assert_equal db.search("ナポリタン", :order => :asc).size, 2
|
133
|
+
assert_equal db.search("ナポリタン", :order => :asc)[0], 2
|
134
|
+
assert_equal db.search("ナポリタン", :order => :asc)[1], 1
|
135
|
+
|
136
|
+
db.update_sort_value(1, 1)
|
137
|
+
db.update_sort_value(2, 2)
|
138
|
+
|
139
|
+
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
140
|
+
assert_equal db.search("ナポリタン", :order => :desc)[0], 2
|
141
|
+
assert_equal db.search("ナポリタン", :order => :desc)[1], 1
|
142
|
+
|
143
|
+
assert_equal db.search("ナポリタン", :order => :asc).size, 2
|
144
|
+
assert_equal db.search("ナポリタン", :order => :asc)[0], 1
|
145
|
+
assert_equal db.search("ナポリタン", :order => :asc)[1], 2
|
146
|
+
end
|
147
|
+
end
|
148
|
+
def test_simple
|
149
|
+
db = FtsLite::Index.open(DB_FILE, :tokenizer => :simple)
|
150
|
+
db.transaction do
|
151
|
+
db.delete_all
|
152
|
+
p db.tokenize("なぜ ナポリタン は 赤い の だろ う か ?")
|
153
|
+
db.set(1, "なぜ ナポリタン は 赤い の だろ う か ?", 2)
|
154
|
+
db.set(2, "昼飯 の スパゲティ ナポリタン を 眺め ながら 、 積年 の 疑問 を 考え て い た", 1)
|
155
|
+
|
156
|
+
assert_equal db.search("赤い").size, 1
|
157
|
+
assert_equal db.search("赤い")[0], 1
|
158
|
+
|
159
|
+
assert_equal db.search("ナポリタン").size, 2
|
160
|
+
assert_equal db.search("ナポリタン")[0], 1
|
161
|
+
assert_equal db.search("ナポリタン")[1], 2
|
162
|
+
assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
|
123
163
|
|
124
164
|
assert_equal db.search("ナポリタン", :order => :desc).size, 2
|
125
165
|
assert_equal db.search("ナポリタン", :order => :desc)[0], 1
|