fts_lite 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.sqlite3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fts_lite.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 nagadomi@nurs.or.jp
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # FtsLite
2
+
3
+ simple full text search index.
4
+
5
+ ## Dependency
6
+
7
+ Ruby >= 1.9.2
8
+ SQLite3 >= 3.7.7 (FTS4 REPLACE support)
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ gem 'fts_lite'
15
+
16
+ And then execute:
17
+
18
+ $ bundle
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install fts_lite
23
+
24
+ ## Usage
25
+
26
+ require 'fts_lite'
27
+
28
+ db = FtsLite::Database.new("./db.sqlite3", :tokenizer => :bigram, :cache_size => 64000)
29
+
30
+ docid = 1
31
+ text = "hoge piyo"
32
+ sort_value = "2012-08-01"
33
+
34
+ db.transaction do
35
+ db.insert_or_replace(docid, text, sort_value)
36
+ db.batch_insert_or_replace([
37
+ {:docid => 30, :text => "hoge hoge", :sort_value => '2012-08-01'},
38
+ {:docid => 40, :text => "piyo piyo", :sort_value => '2012-08-02'}
39
+ ])
40
+ end
41
+
42
+ db.search('piyo', :order => :desc, :limit => 10).each do |docid|
43
+ p docid
44
+ end
45
+
46
+
47
+ db.batch_update_sort_value([
48
+ {:docid => 30, :sort_value => '2012-07-01'},
49
+ {:docid => 40, :sort_value => '2012-07-02'}
50
+ ])
51
+
52
+ db.search('piyo', :order => :desc, :limit => 10).each do |docid|
53
+ p docid
54
+ end
55
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'test'
7
+ test.test_files = Dir.glob("test/**/*_test.rb")
8
+ test.verbose = true
9
+ test.warning = true
10
+ end
data/fts_lite.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- coding: utf-8 -*-
2
+ require File.expand_path('../lib/fts_lite/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["nagadomi"]
6
+ gem.email = ["nagadomi@nurs.or.jp"]
7
+ gem.description = %q{simple full text search engine}
8
+ gem.summary = %q{simple full text search engine}
9
+ gem.homepage = "https://github.com/nagadomi/fts_lite"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "fts_lite"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = FtsLite::VERSION
17
+
18
+ gem.add_dependency 'bimyou_segmenter', '>= 1.2.0'
19
+ gem.add_dependency 'sqlite3-ruby'
20
+ end
@@ -0,0 +1,124 @@
1
+ require 'sqlite3'
2
+
3
+ module FtsLite
4
+ class Database
5
+ DEFAULT_TOKENIZER = :bigram
6
+ DEFAULT_JURNAL_MODE = "MEMORY"
7
+ DEFAULT_TEMP_STORE = "MEMORY"
8
+ DEFAULT_CACHE_SIZE = 32000
9
+
10
+ class RuntimeError < ::RuntimeError
11
+ end
12
+
13
+ def initialize(path, options = {})
14
+ @db = SQLite3::Database.new(path)
15
+ @table_name = options[:table_name] || "fts_lite"
16
+ create_table!(options)
17
+ set_db_param(options)
18
+ @tokenizer = Tokenizer.create(options[:tokenizer] || DEFAULT_TOKENIZER)
19
+ end
20
+ def tokenize(text)
21
+ @tokenizer.vector(text).split(" ")
22
+ end
23
+ def close
24
+ @db.close
25
+ end
26
+ def transaction(&block)
27
+ @db.transaction do
28
+ block.call
29
+ end
30
+ end
31
+ def insert_or_replace(docid, text, sort_value = nil)
32
+ @db.execute("INSERT OR REPLACE INTO #{@table_name} (docid, text, sort_value) VALUES(?, ?, ?);",
33
+ [docid, @tokenizer.vector(text), sort_value])
34
+ end
35
+ def update_sort_value(docid, sort_value)
36
+ @db.execute("UPDATE #{@table_name} SET sort_value = ? WHERE docid = ?;",
37
+ [sort_value, docid])
38
+ end
39
+ def delete(docid)
40
+ @db.execute("DELETE FROM #{@table_name} WHERE docid = ?;", [docid])
41
+ end
42
+ def search(text, options = {})
43
+ limit = options[:limit]
44
+ order = nil
45
+ if (options[:order])
46
+ case options[:order].to_sym
47
+ when :desc
48
+ order = :desc
49
+ when :asc
50
+ order = :asc
51
+ end
52
+ end
53
+ sql = "SELECT docid FROM #{@table_name} WHERE text MATCH ?"
54
+ if (order)
55
+ sql += sprintf(" ORDER BY sort_value %s", order == :desc ? "DESC" : "ASC")
56
+ else
57
+ sql += sprintf(" ORDER BY docid ASC")
58
+ end
59
+ if (limit)
60
+ sql += sprintf(" LIMIT %d", limit)
61
+ end
62
+ sql += ";"
63
+ @db.execute(sql, [@tokenizer.vector(text)]).flatten
64
+ end
65
+ def count
66
+ @db.execute("SELECT COUNT(*) FROM #{@table_name} ;").first.first
67
+ end
68
+ def delete_all
69
+ @db.execute("DELETE FROM #{@table_name} ;")
70
+ end
71
+ def batch_insert(records)
72
+ @db.prepare("INSERT INTO #{@table_name} (docid, text, sort_value) VALUES(?, ?, ?);") do |stmt|
73
+ records.each do |rec|
74
+ stmt.execute([rec[:docid], @tokenizer.vector(rec[:text]), rec[:sort_value]])
75
+ end
76
+ end
77
+ end
78
+ def batch_insert_or_replace(records)
79
+ @db.prepare("INSERT OR REPLACE INTO #{@table_name} (docid, text, sort_value) VALUES(?, ?, ?);") do |stmt|
80
+ records.each do |rec|
81
+ stmt.execute([rec[:docid], @tokenizer.vector(rec[:text]), rec[:sort_value]])
82
+ end
83
+ end
84
+ end
85
+ def batch_update_sort_value(records)
86
+ @db.prepare("UPDATE #{@table_name} SET sort_value = ? WHERE docid = ?;") do |stmt|
87
+ records.each do |rec|
88
+ stmt.execute([rec[:sort_value], rec[:docid]])
89
+ end
90
+ end
91
+ end
92
+ def drop_table!
93
+ if (table_exist?)
94
+ @db.execute("DROP TABLE #{@table_name};")
95
+ end
96
+ end
97
+
98
+ private
99
+ def create_table!(options)
100
+ ret = false
101
+ @db.transaction do
102
+ tokenizer = options[:tokenizer] || DEFAULT_TOKENIZER
103
+ exist = table_exist?
104
+ if (!exist)
105
+ drop_table!
106
+ @db.execute("CREATE VIRTUAL TABLE #{@table_name} USING FTS4(text, sort_value, tokenize=simple);")
107
+ ret = true
108
+ end
109
+ end
110
+ ret
111
+ end
112
+ def table_exist?
113
+ @db.execute("SELECT name FROM sqlite_master WHERE type = 'table' AND name = ?;",
114
+ [@table_name]).size == 1
115
+ end
116
+ def set_db_param(options)
117
+ @db.transaction do
118
+ @db.execute("PRAGMA journal_mode=#{options[:journal_mode] || DEFAULT_JURNAL_MODE};")
119
+ @db.execute("PRAGMA temp_store=#{options[:temp_store] || DEFAULT_TEMP_STORE};")
120
+ @db.execute("PRAGMA cache_size=#{options[:cache_size] || DEFAULT_CACHE_SIZE};")
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,85 @@
1
+ # coding: utf-8
2
+ require 'nkf'
3
+ require 'bimyou_segmenter'
4
+
5
+ module FtsLite
6
+ module Tokenizer
7
+ SIMPLE_DELIMITER = /[\s\.,\?!;\(\)。、.,?!「」『』()]+/
8
+
9
+ def self.create(name)
10
+ case name.to_sym
11
+ when :simple
12
+ Simple.new
13
+ when :bigram
14
+ Bigram.new
15
+ when :trigram
16
+ Trigram.new
17
+ when :wakachi
18
+ Wakachi.new
19
+ when :wakachi_bigram
20
+ WakachiBigram.new
21
+ else
22
+ raise ArgumentError
23
+ end
24
+ end
25
+ def self.normalize(text)
26
+ NKF::nkf('-wZX', text).downcase
27
+ end
28
+ class Simple
29
+ def vector(text)
30
+ split(text).join(" ")
31
+ end
32
+ def split(text)
33
+ Tokenizer.normalize(text).gsub(/[\.,\?!;:]/, ' ').split(SIMPLE_DELIMITER)
34
+ end
35
+ end
36
+ class Bigram
37
+ def vector(text)
38
+ split(text).join(" ")
39
+ end
40
+ def split(text)
41
+ text = Tokenizer.normalize(text)
42
+ text.split(SIMPLE_DELIMITER).map {|word|
43
+ 0.upto(word.size - 2).map {|i| word[i, 2] }
44
+ }.flatten
45
+ end
46
+ end
47
+ class Trigram
48
+ def vector(text)
49
+ split(text).join(" ")
50
+ end
51
+ def split(text)
52
+ text = Tokenizer.normalize(text)
53
+ text.split(SIMPLE_DELIMITER).map {|word|
54
+ 0.upto(word.size - 3).map {|i| word[i, 3] }
55
+ }.flatten
56
+ end
57
+ end
58
+ class Wakachi
59
+ def vector(text)
60
+ split(text).join(" ")
61
+ end
62
+ def split(text)
63
+ BimyouSegmenter.segment(Tokenizer.normalize(text),
64
+ :white_space => false,
65
+ :symbol => false)
66
+ end
67
+ end
68
+ class WakachiBigram
69
+ def vector(text)
70
+ split(text).join(" ")
71
+ end
72
+ def split(text)
73
+ words = BimyouSegmenter.segment(Tokenizer.normalize(text),
74
+ :white_space => false,
75
+ :symbol => false).map {|word|
76
+ if (word.size == 1)
77
+ word
78
+ else
79
+ 0.upto(word.size - 2).map {|i| word[i, 2] }
80
+ end
81
+ }.flatten
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,3 @@
1
+ module FtsLite
2
+ VERSION = "0.0.1"
3
+ end
data/lib/fts_lite.rb ADDED
@@ -0,0 +1,6 @@
1
+ require "fts_lite/version"
2
+ require "fts_lite/tokenizer"
3
+ require "fts_lite/database"
4
+
5
+ module FtsLite
6
+ end
@@ -0,0 +1,131 @@
1
+ # coding: utf-8
2
+ require 'test_helper'
3
+
4
+ class FtsLiteTest < Test::Unit::TestCase
5
+ DB_FILE = File.expand_path(File.join(File.dirname(__FILE__), "test.sqlite3"))
6
+ def setup
7
+ if (File.exist?(DB_FILE))
8
+ File.unlink(DB_FILE)
9
+ end
10
+ end
11
+ def teardown
12
+ end
13
+ def test_bigram
14
+ db = FtsLite::Database.new(DB_FILE, :tokenizer => :bigram)
15
+ db.transaction do
16
+ db.delete_all
17
+ p db.tokenize("なぜナポリタンは赤いのだろうか ?")
18
+ db.insert_or_replace(1, "なぜナポリタンは赤いのだろうか ?", 2)
19
+ db.insert_or_replace(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
20
+
21
+ assert_equal db.search("赤い").size, 1
22
+ assert_equal db.search("赤い")[0], 1
23
+
24
+ assert_equal db.search("ナポリタン").size, 2
25
+ assert_equal db.search("ナポリタン")[0], 1
26
+ assert_equal db.search("ナポリタン")[1], 2
27
+
28
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
29
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
30
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
31
+
32
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
33
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
34
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
35
+
36
+ db.update_sort_value(1, 1)
37
+ db.update_sort_value(2, 2)
38
+
39
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
40
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
41
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
42
+
43
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
44
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
45
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
46
+ end
47
+ end
48
+ def test_trigram
49
+ db = FtsLite::Database.new(DB_FILE, :tokenizer => :trigram)
50
+ db.transaction do
51
+ db.delete_all
52
+ p db.tokenize("なぜナポリタンは赤いのだろうか ?")
53
+ db.insert_or_replace(1, "なぜナポリタンは赤いのだろうか ?", 2)
54
+ db.insert_or_replace(2, "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ", 1)
55
+
56
+ assert_equal db.search("赤い").size, 0
57
+
58
+ assert_equal db.search("ナポリタン").size, 2
59
+ assert_equal db.search("ナポリタン")[0], 1
60
+ assert_equal db.search("ナポリタン")[1], 2
61
+
62
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
63
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
64
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
65
+
66
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
67
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
68
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
69
+
70
+ db.update_sort_value(1, 1)
71
+ db.update_sort_value(2, 2)
72
+
73
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
74
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
75
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
76
+
77
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
78
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
79
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
80
+ end
81
+ end
82
+ def test_wakachi_bigram
83
+ db = FtsLite::Database.new(DB_FILE, :tokenizer => :wakachi_bigram)
84
+ db.transaction do
85
+ db.delete_all
86
+ p db.tokenize("なぜナポリタンは赤いのだろうか ?")
87
+ db.batch_insert([{ :docid => 1,
88
+ :text => "なぜナポリタンは赤いのだろうか ?",
89
+ :sort_value => 2
90
+ },
91
+ { :docid => 2,
92
+ :text => "昼飯のスパゲティナポリタンを眺めながら、積年の疑問を考えていた。 ",
93
+ :sort_value => 1
94
+ }
95
+ ])
96
+ assert_equal db.search("赤い").size, 1
97
+ assert_equal db.search("赤い")[0], 1
98
+
99
+ assert_equal db.search("ナポリタン").size, 2
100
+ assert_equal db.search("ナポリタン")[0], 1
101
+ assert_equal db.search("ナポリタン")[1], 2
102
+
103
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
104
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 1
105
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 2
106
+
107
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
108
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 2
109
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 1
110
+
111
+ db.update_sort_value(1, 1)
112
+ db.update_sort_value(2, 2)
113
+
114
+ assert_equal db.search("ナポリタン", :order => :desc).size, 2
115
+ assert_equal db.search("ナポリタン", :order => :desc)[0], 2
116
+ assert_equal db.search("ナポリタン", :order => :desc)[1], 1
117
+
118
+ assert_equal db.search("ナポリタン", :order => :asc).size, 2
119
+ assert_equal db.search("ナポリタン", :order => :asc)[0], 1
120
+ assert_equal db.search("ナポリタン", :order => :asc)[1], 2
121
+ end
122
+ end
123
+ def test_create
124
+ db = FtsLite::Database.new(DB_FILE)
125
+ db.drop_table!
126
+ db.close
127
+ db = FtsLite::Database.new(DB_FILE, :table_name => "hogehgoe")
128
+ db.drop_table!
129
+ db.close
130
+ end
131
+ end
@@ -0,0 +1,13 @@
1
+ if (RUBY_VERSION < "1.9.0")
2
+ $KCODE = 'u'
3
+ begin
4
+ require 'rubygems'
5
+ rescue LoadError
6
+ end
7
+ end
8
+ require 'test/unit'
9
+
10
+ $LOAD_PATH.unshift(File.expand_path(File.join('..', 'lib')))
11
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
12
+
13
+ require 'fts_lite'
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fts_lite
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - nagadomi
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-08-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bimyou_segmenter
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 1.2.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.2.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: sqlite3-ruby
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description: simple full text search engine
47
+ email:
48
+ - nagadomi@nurs.or.jp
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - Gemfile
55
+ - LICENSE
56
+ - README.md
57
+ - Rakefile
58
+ - fts_lite.gemspec
59
+ - lib/fts_lite.rb
60
+ - lib/fts_lite/database.rb
61
+ - lib/fts_lite/tokenizer.rb
62
+ - lib/fts_lite/version.rb
63
+ - test/fts_lite_test.rb
64
+ - test/test_helper.rb
65
+ homepage: https://github.com/nagadomi/fts_lite
66
+ licenses: []
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 1.8.24
86
+ signing_key:
87
+ specification_version: 3
88
+ summary: simple full text search engine
89
+ test_files:
90
+ - test/fts_lite_test.rb
91
+ - test/test_helper.rb