fts_lite 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,7 +90,7 @@ module FtsLite
90
90
  sql += sprintf(" LIMIT %d", limit)
91
91
  end
92
92
  sql += ";"
93
- @db.execute(sql, [@tokenizer.vector(text)]).flatten
93
+ @db.execute(sql, [@tokenizer.query(text)]).flatten
94
94
  end
95
95
  def count
96
96
  @db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
@@ -4,7 +4,9 @@ require 'bimyou_segmenter'
4
4
 
5
5
  module FtsLite
6
6
  module Tokenizer
7
- SIMPLE_DELIMITER = /[\s\.,\?!;\(\)。、.,?!「」『』()]+/
7
+ QUERY_DELIMITER = /[\s ]+/
8
+ SIMPLE_DELIMITER = /[\s \.\*"',\?!;\(\)。、.,?!「」『』()]+/
9
+ NEAR = " NEAR/2 "
8
10
 
9
11
  def self.create(name)
10
12
  case name.to_sym
@@ -26,14 +28,25 @@ module FtsLite
26
28
  NKF::nkf('-wZX', text).downcase
27
29
  end
28
30
  class Simple
31
+ def query(text)
32
+ vector(text)
33
+ end
29
34
  def vector(text)
30
35
  split(text).join(" ")
31
36
  end
32
37
  def split(text)
33
- Tokenizer.normalize(text).gsub(/[\.,\?!;:]/, ' ').split(SIMPLE_DELIMITER)
38
+ Tokenizer.normalize(text).split(SIMPLE_DELIMITER)
34
39
  end
35
40
  end
36
41
  class Bigram
42
+ def query(text)
43
+ text = Tokenizer.normalize(text)
44
+ text.split(QUERY_DELIMITER).map {|segment|
45
+ segment.split(SIMPLE_DELIMITER).map {|word|
46
+ 0.upto(word.size - 2).map {|i| word[i, 2] }
47
+ }.join(NEAR)
48
+ }.flatten.join(" ")
49
+ end
37
50
  def vector(text)
38
51
  split(text).join(" ")
39
52
  end
@@ -45,6 +58,14 @@ module FtsLite
45
58
  end
46
59
  end
47
60
  class Trigram
61
+ def query(text)
62
+ text = Tokenizer.normalize(text)
63
+ text.split(QUERY_DELIMITER).map {|segment|
64
+ segment.split(SIMPLE_DELIMITER).map {|word|
65
+ 0.upto(word.size - 3).map {|i| word[i, 3] }
66
+ }.join(NEAR)
67
+ }.flatten.join(" ")
68
+ end
48
69
  def vector(text)
49
70
  split(text).join(" ")
50
71
  end
@@ -56,6 +77,14 @@ module FtsLite
56
77
  end
57
78
  end
58
79
  class Wakachi
80
+ def query(text)
81
+ text = Tokenizer.normalize(text)
82
+ text.split(QUERY_DELIMITER).map {|segment|
83
+ BimyouSegmenter.segment(segment,
84
+ :white_space => false,
85
+ :symbol => false).join(NEAR)
86
+ }.join(" ")
87
+ end
59
88
  def vector(text)
60
89
  split(text).join(" ")
61
90
  end
@@ -66,6 +95,20 @@ module FtsLite
66
95
  end
67
96
  end
68
97
  class WakachiBigram
98
+ def query(text)
99
+ text = Tokenizer.normalize(text)
100
+ text.split(QUERY_DELIMITER).map {|segment|
101
+ BimyouSegmenter.segment(segment,
102
+ :white_space => false,
103
+ :symbol => false).map {|word|
104
+ if (word.size == 1)
105
+ word
106
+ else
107
+ 0.upto(word.size - 2).map {|i| word[i, 2] }.join(NEAR)
108
+ end
109
+ }.flatten.join(NEAR)
110
+ }.join(" ")
111
+ end
69
112
  def vector(text)
70
113
  split(text).join(" ")
71
114
  end
@@ -1,3 +1,3 @@
1
1
  module FtsLite
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
@@ -26,6 +26,7 @@ class FtsLiteTest < Test::Unit::TestCase
26
26
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
27
27
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
28
28
  assert_equal db.search("ナポリタン", :order => :desc)[1], 2
29
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
29
30
 
30
31
  db.set(1, "なぜナポリタンは青いのだろうか ?", 0)
31
32
  assert_equal db.search("赤い").size, 0
@@ -51,6 +52,7 @@ class FtsLiteTest < Test::Unit::TestCase
51
52
  assert_equal db.search("ナポリタン").size, 2
52
53
  assert_equal db.search("ナポリタン")[0], 1
53
54
  assert_equal db.search("ナポリタン")[1], 2
55
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
54
56
 
55
57
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
56
58
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
@@ -80,11 +82,12 @@ class FtsLiteTest < Test::Unit::TestCase
80
82
  db.set(1, "なぜナポリタンは赤いのだろうか ?", 2)
81
83
  db.set(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
82
84
 
83
- assert_equal db.search("赤い").size, 0
85
+ assert_equal db.search("赤いの").size, 1
84
86
 
85
87
  assert_equal db.search("ナポリタン").size, 2
86
88
  assert_equal db.search("ナポリタン")[0], 1
87
89
  assert_equal db.search("ナポリタン")[1], 2
90
+ assert_equal db.search("赤いの ナポリタン", :order => :desc).size, 1
88
91
 
89
92
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
90
93
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
@@ -120,6 +123,43 @@ class FtsLiteTest < Test::Unit::TestCase
120
123
  assert_equal db.search("ナポリタン").size, 2
121
124
  assert_equal db.search("ナポリタン")[0], 1
122
125
  assert_equal db.search("ナポリタン")[1], 2
126
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
127
+
128
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
129
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
130
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
131
+
132
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
133
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
134
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
135
+
136
+ db.update_sort_value(1, 1)
137
+ db.update_sort_value(2, 2)
138
+
139
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
140
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
141
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
142
+
143
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
144
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
145
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
146
+ end
147
+ end
148
+ def test_simple
149
+ db = FtsLite::Index.open(DB_FILE, :tokenizer => :simple)
150
+ db.transaction do
151
+ db.delete_all
152
+ p db.tokenize("なぜ ナポリタン は 赤い の だろ う か ?")
153
+ db.set(1, "なぜ ナポリタン は 赤い の だろ う か ?", 2)
154
+ db.set(2, "昼飯 の スパゲティ ナポリタン を 眺め ながら 、 積年 の 疑問 を 考え て い た", 1)
155
+
156
+ assert_equal db.search("赤い").size, 1
157
+ assert_equal db.search("赤い")[0], 1
158
+
159
+ assert_equal db.search("ナポリタン").size, 2
160
+ assert_equal db.search("ナポリタン")[0], 1
161
+ assert_equal db.search("ナポリタン")[1], 2
162
+ assert_equal db.search("赤い ナポリタン", :order => :desc).size, 1
123
163
 
124
164
  assert_equal db.search("ナポリタン", :order => :desc).size, 2
125
165
  assert_equal db.search("ナポリタン", :order => :desc)[0], 1
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fts_lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: