fts_lite 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.sqlite3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fts_lite.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 nagadomi@nurs.or.jp
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # FtsLite
2
+
3
+ simple full text search index.
4
+
5
+ ## Dependency
6
+
7
+ Ruby >= 1.9.2
8
+ SQLite3 >= 3.7.7 (FTS4 REPLACE support)
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'fts_lite'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install fts_lite
23
+
24
+ ## Usage
25
+
26
+ require 'fts_lite'
27
+
28
+ db = FtsLite::Database.new("./db.sqlite3", :tokenizer => :bigram, :cache_size => 64000)
29
+
30
+ docid = 1
31
+ text = "hoge piyo"
32
+ sort_value = "2012-08-01"
33
+
34
+ db.transaction do
35
+ db.insert_or_replace(docid, text, sort_value)
36
+ db.batch_insert_or_replace([
37
+ {:docid => 30, :text => "hoge hoge", :sort_value => '2012-08-01'},
38
+ {:docid => 40, :text => "piyo piyo", :sort_value => '2012-08-02'}
39
+ ])
40
+ end
41
+
42
+ db.search('piyo', :order => :desc, :limit => 10).each do |docid|
43
+ p docid
44
+ end
45
+
46
+
47
+ db.batch_update_sort_value([
48
+ {:docid => 30, :sort_value => '2012-07-01'},
49
+ {:docid => 40, :sort_value => '2012-07-02'}
50
+ ])
51
+
52
+ db.search('piyo', :order => :desc, :limit => 10).each do |docid|
53
+ p docid
54
+ end
55
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'test'
7
+ test.test_files = Dir.glob("test/**/*_test.rb")
8
+ test.verbose = true
9
+ test.warning = true
10
+ end
data/fts_lite.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path('../lib/fts_lite/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["nagadomi"]
6
+ gem.email = ["nagadomi@nurs.or.jp"]
7
+ gem.description = %q{simple full text search engine}
8
+ gem.summary = %q{simple full text search engine}
9
+ gem.homepage = "https://github.com/nagadomi/fts_lite"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fts_lite"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = FtsLite::VERSION
17
+
18
+ gem.add_dependency 'bimyou_segmenter', '>= 1.2.0'
19
+ gem.add_dependency 'sqlite3-ruby'
20
+ end
@@ -0,0 +1,124 @@
1
+ require 'sqlite3'
2
+
3
+ module FtsLite
4
+ class Database
5
+ DEFAULT_TOKENIZER = :bigram
6
+ DEFAULT_JURNAL_MODE = "MEMORY"
7
+ DEFAULT_TEMP_STORE = "MEMORY"
8
+ DEFAULT_CACHE_SIZE = 32000
9
+
10
+ class RuntimeError < ::RuntimeError
11
+ end
12
+
13
+ def initialize(path, options = {})
14
+ @db = SQLite3::Database.new(path)
15
+ @table_name = options[:table_name] || "fts_lite"
16
+ create_table!(options)
17
+ set_db_param(options)
18
+ @tokenizer = Tokenizer.create(options[:tokenizer] || DEFAULT_TOKENIZER)
19
+ end
20
+ def tokenize(text)
21
+ @tokenizer.vector(text).split(" ")
22
+ end
23
+ def close
24
+ @db.close
25
+ end
26
+ def transaction(&block)
27
+ @db.transaction do
28
+ block.call
29
+ end
30
+ end
31
+ def insert_or_replace(docid, text, sort_value = nil)
32
+ @db.execute("INSERT OR REPLACE INTO #{@table_name} (docid, text, sort_value) VALUES(?, ?, ?);",
33
+ [docid, @tokenizer.vector(text), sort_value])
34
+ end
35
+ def update_sort_value(docid, sort_value)
36
+ @db.execute("UPDATE #{@table_name} SET sort_value = ? WHERE docid = ?;",
37
+ [sort_value, docid])
38
+ end
39
+ def delete(docid)
40
+ @db.execute("DELETE FROM #{@table_name} WHERE docid = ?;", [docid])
41
+ end
42
+ def search(text, options = {})
43
+ limit = options[:limit]
44
+ order = nil
45
+ if (options[:order])
46
+ case options[:order].to_sym
47
+ when :desc
48
+ order = :desc
49
+ when :asc
50
+ order = :asc
51
+ end
52
+ end
53
+ sql = "SELECT docid FROM #{@table_name} WHERE text MATCH ?"
54
+ if (order)
55
+ sql += sprintf(" ORDER BY sort_value %s", order == :desc ? "DESC" : "ASC")
56
+ else
57
+ sql += sprintf(" ORDER BY docid ASC")
58
+ end
59
+ if (limit)
60
+ sql += sprintf(" LIMIT %d", limit)
61
+ end
62
+ sql += ";"
63
+ @db.execute(sql, [@tokenizer.vector(text)]).flatten
64
+ end
65
+ def count
66
+ @db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
67
+ end
68
+ def delete_all
69
+ @db.execute("DELETE FROM #{@table_name} ;")
70
+ end
71
+ def batch_insert(records)
72
+ @db.prepare("INSERT INTO #{@table_name} (docid, text, sort_value) VALUES(?, ?, ?);") do |stmt|
73
+ records.each do |rec|
74
+ stmt.execute([rec[:docid], @tokenizer.vector(rec[:text]), rec[:sort_value]])
75
+ end
76
+ end
77
+ end
78
+ def batch_insert_or_replace(records)
79
+ @db.prepare("INSERT OR REPLACE INTO #{@table_name} (docid, text, sort_value) VALUES(?, ?, ?);") do |stmt|
80
+ records.each do |rec|
81
+ stmt.execute([rec[:docid], @tokenizer.vector(rec[:text]), rec[:sort_value]])
82
+ end
83
+ end
84
+ end
85
+ def batch_update_sort_value(records)
86
+ @db.prepare("UPDATE #{@table_name} SET sort_value = ? WHERE docid = ?;") do |stmt|
87
+ records.each do |rec|
88
+ stmt.execute([rec[:sort_value], rec[:docid]])
89
+ end
90
+ end
91
+ end
92
+ def drop_table!
93
+ if (table_exist?)
94
+ @db.execute("DROP TABLE #{@table_name};")
95
+ end
96
+ end
97
+
98
+ private
99
+ def create_table!(options)
100
+ ret = false
101
+ @db.transaction do
102
+ tokenizer = options[:tokenizer] || DEFAULT_TOKENIZER
103
+ exist = table_exist?
104
+ if (!exist)
105
+ drop_table!
106
+ @db.execute("CREATE VIRTUAL TABLE #{@table_name} USING FTS4(text, sort_value, tokenize=simple);")
107
+ ret = true
108
+ end
109
+ end
110
+ ret
111
+ end
112
+ def table_exist?
113
+ @db.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?;",
114
+ [@table_name]).size == 1
115
+ end
116
+ def set_db_param(options)
117
+ @db.transaction do
118
+ @db.execute("PRAGMA journal_mode=#{options[:journal_mode] || DEFAULT_JURNAL_MODE};")
119
+ @db.execute("PRAGMA temp_store=#{options[:temp_store] || DEFAULT_TEMP_STORE};")
120
+ @db.execute("PRAGMA cache_size=#{options[:cache_size] || DEFAULT_CACHE_SIZE};")
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,85 @@
1
+ # coding: utf-8
2
+ require 'nkf'
3
+ require 'bimyou_segmenter'
4
+
5
+ module FtsLite
6
+ module Tokenizer
7
+ SIMPLE_DELIMITER = /[\s\.,\?!;\(\)。、.,?!「」『』()]+/
8
+
9
+ def self.create(name)
10
+ case name.to_sym
11
+ when :simple
12
+ Simple.new
13
+ when :bigram
14
+ Bigram.new
15
+ when :trigram
16
+ Trigram.new
17
+ when :wakachi
18
+ Wakachi.new
19
+ when :wakachi_bigram
20
+ WakachiBigram.new
21
+ else
22
+ raise ArgumentError
23
+ end
24
+ end
25
+ def self.normalize(text)
26
+ NKF::nkf('-wZX', text).downcase
27
+ end
28
+ class Simple
29
+ def vector(text)
30
+ split(text).join(" ")
31
+ end
32
+ def split(text)
33
+ Tokenizer.normalize(text).gsub(/[\.,\?!;:]/, ' ').split(SIMPLE_DELIMITER)
34
+ end
35
+ end
36
+ class Bigram
37
+ def vector(text)
38
+ split(text).join(" ")
39
+ end
40
+ def split(text)
41
+ text = Tokenizer.normalize(text)
42
+ text.split(SIMPLE_DELIMITER).map {|word|
43
+ 0.upto(word.size - 2).map {|i| word[i, 2] }
44
+ }.flatten
45
+ end
46
+ end
47
+ class Trigram
48
+ def vector(text)
49
+ split(text).join(" ")
50
+ end
51
+ def split(text)
52
+ text = Tokenizer.normalize(text)
53
+ text.split(SIMPLE_DELIMITER).map {|word|
54
+ 0.upto(word.size - 3).map {|i| word[i, 3] }
55
+ }.flatten
56
+ end
57
+ end
58
+ class Wakachi
59
+ def vector(text)
60
+ split(text).join(" ")
61
+ end
62
+ def split(text)
63
+ BimyouSegmenter.segment(Tokenizer.normalize(text),
64
+ :white_space => false,
65
+ :symbol => false)
66
+ end
67
+ end
68
+ class WakachiBigram
69
+ def vector(text)
70
+ split(text).join(" ")
71
+ end
72
+ def split(text)
73
+ words = BimyouSegmenter.segment(Tokenizer.normalize(text),
74
+ :white_space => false,
75
+ :symbol => false).map {|word|
76
+ if (word.size == 1)
77
+ word
78
+ else
79
+ 0.upto(word.size - 2).map {|i| word[i, 2] }
80
+ end
81
+ }.flatten
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,3 @@
1
+ module FtsLite
2
+ VERSION = "0.0.1"
3
+ end
data/lib/fts_lite.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "fts_lite/version"
2
+ require "fts_lite/tokenizer"
3
+ require "fts_lite/database"
4
+
5
+ module FtsLite
6
+ end
@@ -0,0 +1,131 @@
1
+ # coding: utf-8
2
+ require 'test_helper'
3
+
4
+ class FtsLiteTest < Test::Unit::TestCase
5
+ DB_FILE = File.expand_path(File.join(File.dirname(__FILE__), "test.sqlite3"))
6
+ def setup
7
+ if (File.exist?(DB_FILE))
8
+ File.unlink(DB_FILE)
9
+ end
10
+ end
11
+ def teardown
12
+ end
13
+ def test_bigram
14
+ db = FtsLite::Database.new(DB_FILE, :tokenizer => :bigram)
15
+ db.transaction do
16
+ db.delete_all
17
+ p db.tokenize("なぜナポリタンは赤いのだろうか ?")
18
+ db.insert_or_replace(1, "なぜナポリタンは赤いのだろうか ?", 2)
19
+ db.insert_or_replace(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
20
+
21
+ assert_equal db.search("赤い").size, 1
22
+ assert_equal db.search("赤い")[0], 1
23
+
24
+ assert_equal db.search("ナポリタン").size, 2
25
+ assert_equal db.search("ナポリタン")[0], 1
26
+ assert_equal db.search("ナポリタン")[1], 2
27
+
28
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
29
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
30
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
31
+
32
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
33
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
34
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
35
+
36
+ db.update_sort_value(1, 1)
37
+ db.update_sort_value(2, 2)
38
+
39
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
40
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
41
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
42
+
43
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
44
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
45
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
46
+ end
47
+ end
48
+ def test_trigram
49
+ db = FtsLite::Database.new(DB_FILE, :tokenizer => :trigram)
50
+ db.transaction do
51
+ db.delete_all
52
+ p db.tokenize("なぜナポリタンは赤いのだろうか ?")
53
+ db.insert_or_replace(1, "なぜナポリタンは赤いのだろうか ?", 2)
54
+ db.insert_or_replace(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
55
+
56
+ assert_equal db.search("赤い").size, 0
57
+
58
+ assert_equal db.search("ナポリタン").size, 2
59
+ assert_equal db.search("ナポリタン")[0], 1
60
+ assert_equal db.search("ナポリタン")[1], 2
61
+
62
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
63
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
64
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
65
+
66
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
67
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
68
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
69
+
70
+ db.update_sort_value(1, 1)
71
+ db.update_sort_value(2, 2)
72
+
73
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
74
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
75
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
76
+
77
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
78
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
79
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
80
+ end
81
+ end
82
+ def test_wakachi_bigram
83
+ db = FtsLite::Database.new(DB_FILE, :tokenizer => :wakachi_bigram)
84
+ db.transaction do
85
+ db.delete_all
86
+ p db.tokenize("なぜナポリタンは赤いのだろうか ?")
87
+ db.batch_insert([{ :docid => 1,
88
+ :text => "なぜナポリタンは赤いのだろうか ?",
89
+ :sort_value => 2
90
+ },
91
+ { :docid => 2,
92
+ :text => "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ",
93
+ :sort_value => 1
94
+ }
95
+ ])
96
+ assert_equal db.search("赤い").size, 1
97
+ assert_equal db.search("赤い")[0], 1
98
+
99
+ assert_equal db.search("ナポリタン").size, 2
100
+ assert_equal db.search("ナポリタン")[0], 1
101
+ assert_equal db.search("ナポリタン")[1], 2
102
+
103
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
104
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
105
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
106
+
107
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
108
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
109
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
110
+
111
+ db.update_sort_value(1, 1)
112
+ db.update_sort_value(2, 2)
113
+
114
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
115
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
116
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
117
+
118
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
119
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
120
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
121
+ end
122
+ end
123
+ def test_create
124
+ db = FtsLite::Database.new(DB_FILE)
125
+ db.drop_table!
126
+ db.close
127
+ db = FtsLite::Database.new(DB_FILE, :table_name => "hogehgoe")
128
+ db.drop_table!
129
+ db.close
130
+ end
131
+ end
@@ -0,0 +1,13 @@
1
+ if (RUBY_VERSION < "1.9.0")
2
+ $KCODE = 'u'
3
+ begin
4
+ require 'rubygems'
5
+ rescue LoadError
6
+ end
7
+ end
8
+ require 'test/unit'
9
+
10
+ $LOAD_PATH.unshift(File.expand_path(File.join('..', 'lib')))
11
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
12
+
13
+ require 'fts_lite'
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fts_lite
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - nagadomi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bimyou_segmenter
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.2.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.2.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: sqlite3-ruby
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: simple full text search engine
47
+ email:
48
+ - nagadomi@nurs.or.jp
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE
56
+ - README.md
57
+ - Rakefile
58
+ - fts_lite.gemspec
59
+ - lib/fts_lite.rb
60
+ - lib/fts_lite/database.rb
61
+ - lib/fts_lite/tokenizer.rb
62
+ - lib/fts_lite/version.rb
63
+ - test/fts_lite_test.rb
64
+ - test/test_helper.rb
65
+ homepage: https://github.com/nagadomi/fts_lite
66
+ licenses: []
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 1.8.24
86
+ signing_key:
87
+ specification_version: 3
88
+ summary: simple full text search engine
89
+ test_files:
90
+ - test/fts_lite_test.rb
91
+ - test/test_helper.rb