fts_lite 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/fts_lite/index.rb +1 -1
- data/lib/fts_lite/tokenizer.rb +20 -11
- data/lib/fts_lite/version.rb +1 -1
- data/test/fts_lite_test.rb +14 -0
- metadata +3 -3
data/lib/fts_lite/index.rb
CHANGED
@@ -91,7 +91,7 @@ module FtsLite
|
|
91
91
|
sql += sprintf(" LIMIT %d", limit)
|
92
92
|
end
|
93
93
|
sql += ";"
|
94
|
-
@db.execute(sql, [@tokenizer.query(text)]).flatten
|
94
|
+
@db.execute(sql, [@tokenizer.query(text, options)]).flatten
|
95
95
|
end
|
96
96
|
def count
|
97
97
|
@db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
|
data/lib/fts_lite/tokenizer.rb
CHANGED
@@ -6,7 +6,8 @@ module FtsLite
|
|
6
6
|
module Tokenizer
|
7
7
|
QUERY_DELIMITER = /[\s ]+/
|
8
8
|
SIMPLE_DELIMITER = /[\s \.\*"',\?!;\(\)。、.,?!「」『』()]+/
|
9
|
-
|
9
|
+
NEAR0 = " NEAR/0 "
|
10
|
+
NEAR2 = " NEAR/2 "
|
10
11
|
|
11
12
|
def self.create(name)
|
12
13
|
case name.to_sym
|
@@ -28,7 +29,7 @@ module FtsLite
|
|
28
29
|
NKF::nkf('-wZX', text).downcase
|
29
30
|
end
|
30
31
|
class Simple
|
31
|
-
def query(text)
|
32
|
+
def query(text, options)
|
32
33
|
vector(text)
|
33
34
|
end
|
34
35
|
def vector(text)
|
@@ -39,12 +40,14 @@ module FtsLite
|
|
39
40
|
end
|
40
41
|
end
|
41
42
|
class Bigram
|
42
|
-
def query(text)
|
43
|
+
def query(text, options = {})
|
44
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
45
|
+
near = fuzzy ? NEAR2 : NEAR0
|
43
46
|
text = Tokenizer.normalize(text)
|
44
47
|
text.split(QUERY_DELIMITER).map {|segment|
|
45
48
|
segment.split(SIMPLE_DELIMITER).map {|word|
|
46
49
|
0.upto(word.size - 2).map {|i| word[i, 2] }
|
47
|
-
}.join(
|
50
|
+
}.join(near)
|
48
51
|
}.flatten.join(" ")
|
49
52
|
end
|
50
53
|
def vector(text)
|
@@ -58,12 +61,14 @@ module FtsLite
|
|
58
61
|
end
|
59
62
|
end
|
60
63
|
class Trigram
|
61
|
-
def query(text)
|
64
|
+
def query(text, options = {})
|
65
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
66
|
+
near = fuzzy ? NEAR2 : NEAR0
|
62
67
|
text = Tokenizer.normalize(text)
|
63
68
|
text.split(QUERY_DELIMITER).map {|segment|
|
64
69
|
segment.split(SIMPLE_DELIMITER).map {|word|
|
65
70
|
0.upto(word.size - 3).map {|i| word[i, 3] }
|
66
|
-
}.join(
|
71
|
+
}.join(near)
|
67
72
|
}.flatten.join(" ")
|
68
73
|
end
|
69
74
|
def vector(text)
|
@@ -77,12 +82,14 @@ module FtsLite
|
|
77
82
|
end
|
78
83
|
end
|
79
84
|
class Wakachi
|
80
|
-
def query(text)
|
85
|
+
def query(text, options = {})
|
86
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
87
|
+
near = fuzzy ? NEAR2 : NEAR0
|
81
88
|
text = Tokenizer.normalize(text)
|
82
89
|
text.split(QUERY_DELIMITER).map {|segment|
|
83
90
|
BimyouSegmenter.segment(segment,
|
84
91
|
:white_space => false,
|
85
|
-
:symbol => false).join(
|
92
|
+
:symbol => false).join(near)
|
86
93
|
}.join(" ")
|
87
94
|
end
|
88
95
|
def vector(text)
|
@@ -95,7 +102,9 @@ module FtsLite
|
|
95
102
|
end
|
96
103
|
end
|
97
104
|
class WakachiBigram
|
98
|
-
def query(text)
|
105
|
+
def query(text, options = {})
|
106
|
+
fuzzy = options.key?(:fuzzy) ? options[:fuzzy] : false
|
107
|
+
near = fuzzy ? NEAR2 : NEAR0
|
99
108
|
text = Tokenizer.normalize(text)
|
100
109
|
text.split(QUERY_DELIMITER).map {|segment|
|
101
110
|
BimyouSegmenter.segment(segment,
|
@@ -104,9 +113,9 @@ module FtsLite
|
|
104
113
|
if (word.size == 1)
|
105
114
|
word
|
106
115
|
else
|
107
|
-
0.upto(word.size - 2).map {|i| word[i, 2] }.join(
|
116
|
+
0.upto(word.size - 2).map {|i| word[i, 2] }.join(near)
|
108
117
|
end
|
109
|
-
}.flatten.join(
|
118
|
+
}.flatten.join(near)
|
110
119
|
}.join(" ")
|
111
120
|
end
|
112
121
|
def vector(text)
|
data/lib/fts_lite/version.rb
CHANGED
data/test/fts_lite_test.rb
CHANGED
@@ -181,6 +181,20 @@ class FtsLiteTest < Test::Unit::TestCase
|
|
181
181
|
assert_equal db.search("ナポリタン", :order => :asc)[1], 2
|
182
182
|
end
|
183
183
|
end
|
184
|
+
def test_fuzzy
|
185
|
+
db = FtsLite::Index.open(DB_FILE, :tokenizer => :bigram)
|
186
|
+
db.transaction do
|
187
|
+
db.delete_all
|
188
|
+
db.set(1, "あいいいう")
|
189
|
+
db.set(2, "あいいう")
|
190
|
+
assert_equal db.search("あいい").size, 2
|
191
|
+
assert_equal db.search("あいう").size, 0
|
192
|
+
assert_equal db.search("あいい", :fuzzy => true).size, 2
|
193
|
+
assert_equal db.search("あいう", :fuzzy => true).size, 2
|
194
|
+
assert_equal db.search("あいい", :fuzzy => false).size, 2
|
195
|
+
assert_equal db.search("あいう", :fuzzy => false).size, 0
|
196
|
+
end
|
197
|
+
end
|
184
198
|
def test_create
|
185
199
|
db = FtsLite::Index.open(DB_FILE)
|
186
200
|
db.drop_table!
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fts_lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-06-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bimyou_segmenter
|
@@ -82,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
version: '0'
|
83
83
|
requirements: []
|
84
84
|
rubyforge_project:
|
85
|
-
rubygems_version: 1.8.
|
85
|
+
rubygems_version: 1.8.23
|
86
86
|
signing_key:
|
87
87
|
specification_version: 3
|
88
88
|
summary: simple full text search engine
|