fts_lite 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -90,7 +90,7 @@ module FtsLite
90
90
  sql += sprintf(" LIMIT %d", limit)
91
91
  end
92
92
  sql += ";"
93
- @db.execute(sql, [@tokenizer.vector(text)]).flatten
93
+ @db.execute(sql, [@tokenizer.query(text)]).flatten
94
94
  end
95
95
  def count
96
96
  @db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
@@ -4,7 +4,9 @@ require 'bimyou_segmenter'
4
4
 
5
5
  module FtsLite
6
6
  module Tokenizer
7
- SIMPLE_DELIMITER = /[\s\.,\?!;\(\)。、.,?!「」『』()]+/
7
+ QUERY_DELIMITER = /[\s ]+/
8
+ SIMPLE_DELIMITER = /[\s \.\*"',\?!;\(\)。、.,?!「」『』()]+/
9
+ NEAR = " NEAR/2 "
8
10
 
9
11
  def self.create(name)
10
12
  case name.to_sym
@@ -26,14 +28,25 @@ module FtsLite
26
28
  NKF::nkf('-wZX', text).downcase
27
29
  end
28
30
  class Simple
31
+ def query(text)
32
+ vector(text)
33
+ end
29
34
  def vector(text)
30
35
  split(text).join(" ")
31
36
  end
32
37
  def split(text)
33
- Tokenizer.normalize(text).gsub(/[\.,\?!;:]/, ' ').split(SIMPLE_DELIMITER)
38
+ Tokenizer.normalize(text).split(SIMPLE_DELIMITER)
34
39
  end
35
40
  end
36
41
  class Bigram
42
+ def query(text)
43
+ text = Tokenizer.normalize(text)
44
+ text.split(QUERY_DELIMITER).map {|segment|
45
+ segment.split(SIMPLE_DELIMITER).map {|word|
46
+ 0.upto(word.size - 2).map {|i| word[i, 2] }
47
+ }.join(NEAR)
48
+ }.flatten.join(" ")
49
+ end
37
50
  def vector(text)
38
51
  split(text).join(" ")
39
52
  end
@@ -45,6 +58,14 @@ module FtsLite
45
58
  end
46
59
  end
47
60
  class Trigram
61
+ def query(text)
62
+ text = Tokenizer.normalize(text)
63
+ text.split(QUERY_DELIMITER).map {|segment|
64
+ segment.split(SIMPLE_DELIMITER).map {|word|
65
+ 0.upto(word.size - 3).map {|i| word[i, 3] }
66
+ }.join(NEAR)
67
+ }.flatten.join(" ")
68
+ end
48
69
  def vector(text)
49
70
  split(text).join(" ")
50
71
  end
@@ -56,6 +77,14 @@ module FtsLite
56
77
  end
57
78
  end
58
79
  class Wakachi
80
+ def query(text)
81
+ text = Tokenizer.normalize(text)
82
+ text.split(QUERY_DELIMITER).map {|segment|
83
+ BimyouSegmenter.segment(segment,
84
+ :white_space => false,
85
+ :symbol => false).join(NEAR)
86
+ }.join(" ")
87
+ end
59
88
  def vector(text)
60
89
  split(text).join(" ")
61
90
  end
@@ -66,6 +95,20 @@ module FtsLite
66
95
  end
67
96
  end
68
97
  class WakachiBigram
98
+ def query(text)
99
+ text = Tokenizer.normalize(text)
100
+ text.split(QUERY_DELIMITER).map {|segment|
101
+ BimyouSegmenter.segment(segment,
102
+ :white_space => false,
103
+ :symbol => false).map {|word|
104
+ if (word.size == 1)
105
+ word
106
+ else
107
+ 0.upto(word.size - 2).map {|i| word[i, 2] }.join(NEAR)
108
+ end
109
+ }.flatten.join(NEAR)
110
+ }.join(" ")
111
+ end
69
112
  def vector(text)
70
113
  split(text).join(" ")
71
114
  end
@@ -1,3 +1,3 @@
1
1
  module FtsLite
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -26,6 +26,7 @@ class FtsLiteTest < Test::Unit::TestCase
26
26
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
27
27
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
28
28
  assert_equal db.search("ナポリタン", :order => :desc)[1], 2
29
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
29
30
 
30
31
  db.set(1, "なぜナポリタンは青いのだろうか ?", 0)
31
32
  assert_equal db.search("赤い").size, 0
@@ -51,6 +52,7 @@ class FtsLiteTest < Test::Unit::TestCase
51
52
  assert_equal db.search("ナポリタン").size, 2
52
53
  assert_equal db.search("ナポリタン")[0], 1
53
54
  assert_equal db.search("ナポリタン")[1], 2
55
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
54
56
 
55
57
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
56
58
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
@@ -80,11 +82,12 @@ class FtsLiteTest < Test::Unit::TestCase
80
82
  db.set(1, "なぜナポリタンは赤いのだろうか ?", 2)
81
83
  db.set(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
82
84
 
83
- assert_equal db.search("赤い").size, 0
85
+ assert_equal db.search("赤いの").size, 1
84
86
 
85
87
  assert_equal db.search("ナポリタン").size, 2
86
88
  assert_equal db.search("ナポリタン")[0], 1
87
89
  assert_equal db.search("ナポリタン")[1], 2
90
+ assert_equal db.search("赤いの ナポリタン", :order => :desc).size, 1
88
91
 
89
92
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
90
93
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
@@ -120,6 +123,43 @@ class FtsLiteTest < Test::Unit::TestCase
120
123
  assert_equal db.search("ナポリタン").size, 2
121
124
  assert_equal db.search("ナポリタン")[0], 1
122
125
  assert_equal db.search("ナポリタン")[1], 2
126
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
127
+
128
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
129
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
130
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
131
+
132
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
133
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
134
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
135
+
136
+ db.update_sort_value(1, 1)
137
+ db.update_sort_value(2, 2)
138
+
139
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
140
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
141
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
142
+
143
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
144
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
145
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
146
+ end
147
+ end
148
+ def test_simple
149
+ db = FtsLite::Index.open(DB_FILE, :tokenizer => :simple)
150
+ db.transaction do
151
+ db.delete_all
152
+ p db.tokenize("なぜ ナポリタン は 赤い の だろ う か ?")
153
+ db.set(1, "なぜ ナポリタン は 赤い の だろ う か ?", 2)
154
+ db.set(2, "昼飯 の スパゲティ ナポリタン を 眺め ながら 、 積年 の 疑問 を 考え て い た", 1)
155
+
156
+ assert_equal db.search("赤い").size, 1
157
+ assert_equal db.search("赤い")[0], 1
158
+
159
+ assert_equal db.search("ナポリタン").size, 2
160
+ assert_equal db.search("ナポリタン")[0], 1
161
+ assert_equal db.search("ナポリタン")[1], 2
162
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
123
163
 
124
164
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
125
165
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fts_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: