xapian_db 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +15 -0
- data/examples/basic.rb +59 -0
- data/lib/xapian_db/adapters/active_record_adapter.rb +74 -0
- data/lib/xapian_db/adapters/datamapper_adapter.rb +62 -0
- data/lib/xapian_db/adapters/generic_adapter.rb +41 -0
- data/lib/xapian_db/config.rb +82 -0
- data/lib/xapian_db/database.rb +102 -0
- data/lib/xapian_db/document_blueprint.rb +121 -0
- data/lib/xapian_db/index_writers/direct_writer.rb +52 -0
- data/lib/xapian_db/indexer.rb +75 -0
- data/lib/xapian_db/query_parser.rb +34 -0
- data/lib/xapian_db/railtie.rb +43 -0
- data/lib/xapian_db/resultset.rb +51 -0
- data/lib/xapian_db.rb +57 -0
- metadata +97 -0
data/CHANGELOG
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
*0.1.0* (November 23th, 2010)
|
2
|
+
|
3
|
+
* Proof of concept, not really useful for real world usage
|
4
|
+
|
5
|
+
*0.2.0* (December 1st, 2010)
|
6
|
+
|
7
|
+
* Blueprint configuration extended
|
8
|
+
* Adapter for Datamapper
|
9
|
+
* Search by attribute names
|
10
|
+
* Search with wildcards
|
11
|
+
* Document attributes can carry anything that is serializable by YAML
|
12
|
+
|
13
|
+
*0.3.0* (December 4st, 2010)
|
14
|
+
|
15
|
+
* Rails integration with configuration file (config/xapian_db.yml) and automatic setup
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This example shows the most basic way to use xapian_db
|
4
|
+
# To run the example, please install the xapian_db gem first
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'xapian_db'
|
8
|
+
|
9
|
+
puts "Setting up the demo..."
|
10
|
+
|
11
|
+
# 1: Open an in memory database
|
12
|
+
db = XapianDb.create_db
|
13
|
+
|
14
|
+
# 2: Define a class which should get indexed; we define a class that
|
15
|
+
# could be an ActiveRecord or Datamapper Domain class
|
16
|
+
class People
|
17
|
+
|
18
|
+
attr_accessor :id, :name, :first_name
|
19
|
+
|
20
|
+
def initialize(data)
|
21
|
+
@id, @name, @first_name = data[:id], data[:name], data[:first_name]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
# 3: Configure the generic adapter with a unique key expression
|
27
|
+
XapianDb::Adapters::GenericAdapter.unique_key do
|
28
|
+
"#{self.class}-#{self.id}"
|
29
|
+
end
|
30
|
+
|
31
|
+
# 4: Define a document blueprint for our class; the blueprint describes
|
32
|
+
# the structure of all documents for our class. Attribute values can
|
33
|
+
# be accessed later for each retrieved doc. Attributes are indexed
|
34
|
+
# by default.
|
35
|
+
XapianDb::DocumentBlueprint.setup(People) do |blueprint|
|
36
|
+
blueprint.attribute :name
|
37
|
+
blueprint.attribute :first_name
|
38
|
+
end
|
39
|
+
|
40
|
+
# 5: Let's create some objects
|
41
|
+
person_1 = People.new(:id => 1, :name => "Kogler", :first_name => "Gernot")
|
42
|
+
person_2 = People.new(:id => 2, :name => "Frey", :first_name => "Daniel")
|
43
|
+
person_3 = People.new(:id => 3, :name => "Garaio", :first_name => "Thomas")
|
44
|
+
|
45
|
+
# 6: Now add them to the database
|
46
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(People)
|
47
|
+
db.store_doc(blueprint.indexer.build_document_for(person_1))
|
48
|
+
db.store_doc(blueprint.indexer.build_document_for(person_2))
|
49
|
+
db.store_doc(blueprint.indexer.build_document_for(person_3))
|
50
|
+
|
51
|
+
# 7: Now find the gem author ;-)
|
52
|
+
puts "Searching for Gernot..."
|
53
|
+
results = db.search("Gernot")
|
54
|
+
puts "We found #{results.size} documents"
|
55
|
+
puts "And the first document looks like this:"
|
56
|
+
page = results.paginate(:page => 1)
|
57
|
+
doc = page.first
|
58
|
+
puts "name: #{doc.name}"
|
59
|
+
puts "first name: #{doc.first_name}"
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Adapter for ActiveRecord. To use it, simply set it as the
|
4
|
+
# default for any DocumentBlueprint or a specific DocumentBlueprint
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
module Adapters
|
8
|
+
|
9
|
+
class ActiveRecordAdapter
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Implement the class helper methods
|
14
|
+
def add_class_helper_methods_to(klass)
|
15
|
+
|
16
|
+
klass.instance_eval do
|
17
|
+
# define the method to retrieve a unique key
|
18
|
+
define_method(:xapian_id) do
|
19
|
+
"#{self.class}-#{self.id}"
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
klass.class_eval do
|
25
|
+
|
26
|
+
# add the after save logic
|
27
|
+
after_save do
|
28
|
+
XapianDb::Config.writer.index(self)
|
29
|
+
end
|
30
|
+
|
31
|
+
# add the after destroy logic
|
32
|
+
after_destroy do
|
33
|
+
XapianDb::Config.writer.unindex(self)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a method to reindex all models of this class
|
37
|
+
define_singleton_method(:rebuild_xapian_index) do
|
38
|
+
# db = XapianDb::Adapters::ActiveRecordAdapter.database
|
39
|
+
# # First, delete all docs of this class
|
40
|
+
# db.delete_docs_of_class(klass)
|
41
|
+
# obj_count = klass.count
|
42
|
+
# puts "Reindexing #{obj_count} objects..."
|
43
|
+
# pbar = ProgressBar.new("Status", obj_count)
|
44
|
+
# klass.all.each do |obj|
|
45
|
+
# doc = @@blueprint.indexer.build_document_for(obj)
|
46
|
+
# db.store_doc(doc)
|
47
|
+
# pbar.inc
|
48
|
+
# end
|
49
|
+
# db.commit
|
50
|
+
XapianDb::Config.writer.reindex_class(klass)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
# Implement the document helper methods
|
57
|
+
def add_doc_helper_methods_to(a_module)
|
58
|
+
a_module.instance_eval do
|
59
|
+
# Implement access to the indexed object
|
60
|
+
define_method :indexed_object do
|
61
|
+
return @indexed_object unless @indexed_object.nil?
|
62
|
+
# retrieve the object id from data
|
63
|
+
klass_name, id = data.split("-")
|
64
|
+
klass = Kernel.const_get(klass_name)
|
65
|
+
@indexed_object = klass.find(id.to_i)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Adapter for datamapper. To use it, simply set it as the
|
4
|
+
# default for any DocumentBlueprint or a specific DocumentBlueprint
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
module Adapters
|
8
|
+
|
9
|
+
class DatamapperAdapter
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Implement the class helper methods
|
14
|
+
def add_class_helper_methods_to(klass)
|
15
|
+
|
16
|
+
klass.instance_eval do
|
17
|
+
# define the method to retrieve a unique key
|
18
|
+
define_method(:xapian_id) do
|
19
|
+
"#{self.class}-#{self.id}"
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
klass.class_eval do
|
25
|
+
|
26
|
+
# add the after save logic
|
27
|
+
after :save do
|
28
|
+
XapianDb::Config.writer.index(self)
|
29
|
+
end
|
30
|
+
|
31
|
+
# add the after destroy logic
|
32
|
+
after :destroy do
|
33
|
+
XapianDb::Config.writer.unindex(self)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a method to reindex all models of this class
|
37
|
+
define_singleton_method(:rebuild_xapian_index) do
|
38
|
+
XapianDb::Config.writer.reindex_class(self)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
# Implement the document helper methods
|
45
|
+
def add_doc_helper_methods_to(a_module)
|
46
|
+
a_module.instance_eval do
|
47
|
+
# Implement access to the indexed object
|
48
|
+
define_method :indexed_object do
|
49
|
+
return @indexed_object unless @indexed_object.nil?
|
50
|
+
# retrieve the object id from data
|
51
|
+
klass_name, id = data.split("-")
|
52
|
+
klass = Kernel.const_get(klass_name)
|
53
|
+
@indexed_object = klass.get(id.to_i)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# The generic adapter is a universal adapater that can be used for any
|
4
|
+
# ruby class. To use the generic adapter (which is the default),
|
5
|
+
# configure the expression that generates a unique key from your objects
|
6
|
+
# using the method 'unique_key'.
|
7
|
+
module XapianDb
|
8
|
+
module Adapters
|
9
|
+
|
10
|
+
class GenericAdapter
|
11
|
+
|
12
|
+
class << self
|
13
|
+
|
14
|
+
# Define the unique key expression
|
15
|
+
def unique_key(&block)
|
16
|
+
@unique_key_block = block
|
17
|
+
end
|
18
|
+
|
19
|
+
# Implement the class helper methods
|
20
|
+
def add_class_helper_methods_to(klass)
|
21
|
+
raise "Unique key is not configured for generic adapter!" if @unique_key_block.nil?
|
22
|
+
expression = @unique_key_block
|
23
|
+
klass.instance_eval do
|
24
|
+
define_method(:xapian_id) do
|
25
|
+
instance_eval &expression
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Implement the document helper methods
|
31
|
+
def add_doc_helper_methods_to(obj)
|
32
|
+
# We have none so far
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Global configuration for XapianDb
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
class Config
|
9
|
+
|
10
|
+
# ---------------------------------------------------------------------------------
|
11
|
+
# Singleton methods
|
12
|
+
# ---------------------------------------------------------------------------------
|
13
|
+
class << self
|
14
|
+
|
15
|
+
def setup(&block)
|
16
|
+
@config ||= Config.new
|
17
|
+
yield @config if block_given?
|
18
|
+
end
|
19
|
+
|
20
|
+
# Install delegates for the config instance variables
|
21
|
+
[:database, :adapter, :writer].each do |attr|
|
22
|
+
define_method attr do
|
23
|
+
@config.nil? ? nil : @config.instance_variable_get("@_#{attr}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# ---------------------------------------------------------------------------------
|
29
|
+
# DSL methods
|
30
|
+
# ---------------------------------------------------------------------------------
|
31
|
+
attr_reader :_database, :_adapter, :_writer
|
32
|
+
|
33
|
+
# Set the database; either pass a path to the file system or
|
34
|
+
# the symbolic name "memory"
|
35
|
+
def database(path)
|
36
|
+
|
37
|
+
# If the current database is a persistent database, we must release the
|
38
|
+
# database and run the garbage collector to remove the write lock
|
39
|
+
if @_database.is_a?(XapianDb::PersistentDatabase)
|
40
|
+
@_database = nil
|
41
|
+
GC.start
|
42
|
+
end
|
43
|
+
|
44
|
+
if path.to_sym == :memory
|
45
|
+
@_database = XapianDb.create_db
|
46
|
+
else
|
47
|
+
if File.exist?(path)
|
48
|
+
@_database = XapianDb.open_db :path => path
|
49
|
+
else
|
50
|
+
# Database does not exist; create it
|
51
|
+
@_database = XapianDb.create_db :path => path
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Define the adapter to use; the following adapters are available:
|
57
|
+
# - :generic
|
58
|
+
# - :active_record
|
59
|
+
# - :datamapper
|
60
|
+
def adapter(type)
|
61
|
+
# We try to guess the adapter name
|
62
|
+
@_adapter = XapianDb::Adapters.const_get("#{camelize(type.to_s)}Adapter")
|
63
|
+
end
|
64
|
+
|
65
|
+
# Define the writer to use; the following adapters are available:
|
66
|
+
# - :direct
|
67
|
+
# More to come in a future release :-)
|
68
|
+
def writer(type)
|
69
|
+
# We try to guess the writer name
|
70
|
+
@_writer = XapianDb::IndexWriters.const_get("#{camelize(type.to_s)}Writer")
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
# TODO: move this to a helper module
|
76
|
+
def camelize(string)
|
77
|
+
string.split(/[^a-z0-9]/i).map{|w| w.capitalize}.join
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Singleton class representing a Xapian database.
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
# Base class for a Xapian database.
|
9
|
+
class Database
|
10
|
+
attr_reader :reader
|
11
|
+
|
12
|
+
# Size of the database (number of docs)
|
13
|
+
def size
|
14
|
+
reader.doccount
|
15
|
+
end
|
16
|
+
|
17
|
+
# Store a Xapian document
|
18
|
+
def store_doc(doc)
|
19
|
+
# We always replace; Xapian adds the document automatically if
|
20
|
+
# it is not found
|
21
|
+
writer.replace_document("Q#{doc.data}", doc)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Delete a document by a unique term; this method is used by the
|
25
|
+
# orm adapters
|
26
|
+
def delete_doc_with_unique_term(term)
|
27
|
+
writer.delete_document("Q#{term}")
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
# Delete all docs of a specific class
|
32
|
+
def delete_docs_of_class(klass)
|
33
|
+
writer.delete_document("C#{klass}")
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
# Perform a search
|
38
|
+
def search(expression)
|
39
|
+
@query_parser ||= QueryParser.new(self)
|
40
|
+
query = @query_parser.parse(expression)
|
41
|
+
enquiry = Xapian::Enquire.new(reader)
|
42
|
+
enquiry.query = query
|
43
|
+
Resultset.new(enquiry)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
# In Memory database
|
49
|
+
class InMemoryDatabase < Database
|
50
|
+
|
51
|
+
def initialize
|
52
|
+
@writer ||= Xapian::inmemory_open
|
53
|
+
@reader = @writer
|
54
|
+
end
|
55
|
+
|
56
|
+
def writer
|
57
|
+
@writer
|
58
|
+
end
|
59
|
+
|
60
|
+
# Commit all pending changes
|
61
|
+
def commit
|
62
|
+
# Nothing to do for an in memory database
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
# Persistent database on disk
|
68
|
+
class PersistentDatabase < Database
|
69
|
+
|
70
|
+
def initialize(options)
|
71
|
+
@path = options[:path]
|
72
|
+
@db_flag = options[:create] ? Xapian::DB_CREATE_OR_OVERWRITE : Xapian::DB_OPEN
|
73
|
+
if options[:create]
|
74
|
+
# make sure the path exists; Xapian will not create the necessary directories
|
75
|
+
FileUtils.makedirs @path
|
76
|
+
@writer = Xapian::WritableDatabase.new(@path, @db_flag)
|
77
|
+
end
|
78
|
+
@reader = Xapian::Database.new(@path)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Get the readable instance of the database
|
82
|
+
def reader
|
83
|
+
# Always reopen the readable database so we get live index data
|
84
|
+
# TODO: make this configurable
|
85
|
+
@reader.reopen
|
86
|
+
@reader
|
87
|
+
end
|
88
|
+
|
89
|
+
# The writer is instantiated layzily to avoid a permanent write lock on the database
|
90
|
+
def writer
|
91
|
+
@writer ||= Xapian::WritableDatabase.new(@path, @db_flag)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Commit all pending changes
|
95
|
+
def commit
|
96
|
+
writer.commit
|
97
|
+
reader.reopen
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# A document blueprint describes the mapping of an object to a Xapian document
|
4
|
+
# for a given class.
|
5
|
+
# @author Gernot Kogler
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
|
9
|
+
class DocumentBlueprint
|
10
|
+
|
11
|
+
# ---------------------------------------------------------------------------------
|
12
|
+
# Singleton methods
|
13
|
+
# ---------------------------------------------------------------------------------
|
14
|
+
class << self
|
15
|
+
|
16
|
+
# Configure the blueprint for a class
|
17
|
+
def setup(klass, &block)
|
18
|
+
@blueprints ||= {}
|
19
|
+
blueprint = DocumentBlueprint.new
|
20
|
+
blueprint.indexer = Indexer.new(blueprint)
|
21
|
+
yield blueprint if block_given? # configure the blueprint through the block
|
22
|
+
@blueprints[klass] = blueprint
|
23
|
+
@adapter = blueprint.adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
24
|
+
@adapter.add_class_helper_methods_to klass
|
25
|
+
@searchable_prefixes = nil # force rebuild of the searchable prefixes
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get the blueprint for a class
|
29
|
+
def blueprint_for(klass)
|
30
|
+
@blueprints[klass] if @blueprints
|
31
|
+
end
|
32
|
+
|
33
|
+
# Return an array of all configured text methods in any blueprint
|
34
|
+
def searchable_prefixes
|
35
|
+
return [] unless @blueprints
|
36
|
+
return @searchable_prefixes unless @searchable_prefixes.nil?
|
37
|
+
prefixes = []
|
38
|
+
@blueprints.each do |klass, blueprint|
|
39
|
+
prefixes << blueprint.searchable_prefixes
|
40
|
+
end
|
41
|
+
@searchable_prefixes = prefixes.flatten.compact.uniq
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
# ---------------------------------------------------------------------------------
|
47
|
+
# Instance methods
|
48
|
+
# ---------------------------------------------------------------------------------
|
49
|
+
attr_accessor :indexer
|
50
|
+
|
51
|
+
# Return an array of all configured text methods in this blueprint
|
52
|
+
def searchable_prefixes
|
53
|
+
@prefixes ||= indexed_methods.map{|method_name, options| method_name}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Lazily build and return a module that implements accessors for each field
|
57
|
+
def accessors_module
|
58
|
+
return @accessors_module unless @accessors_module.nil?
|
59
|
+
@accessors_module = Module.new
|
60
|
+
|
61
|
+
@accessors_module.instance_eval do
|
62
|
+
define_method :domain_class do
|
63
|
+
self.values[0].value
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
@attributes.each_with_index do |field, index|
|
68
|
+
@accessors_module.instance_eval do
|
69
|
+
define_method field do
|
70
|
+
YAML::load(self.values[index+1].value)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
# Let the adapter add its document helper methods (if any)
|
75
|
+
adapter = XapianDb::Config.adapter || XapianDb::Adapters::GenericAdapter
|
76
|
+
adapter.add_doc_helper_methods_to(@accessors_module)
|
77
|
+
@accessors_module
|
78
|
+
end
|
79
|
+
|
80
|
+
# ---------------------------------------------------------------------------------
|
81
|
+
# Blueprint DSL methods
|
82
|
+
# ---------------------------------------------------------------------------------
|
83
|
+
attr_reader :adapter, :attributes, :indexed_methods
|
84
|
+
|
85
|
+
# Construct the blueprint
|
86
|
+
def initialize
|
87
|
+
@attributes = []
|
88
|
+
@indexed_methods = {}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Set a custom adapter for this blueprint
|
92
|
+
def adapter=(adapter)
|
93
|
+
@adapter = adapter
|
94
|
+
end
|
95
|
+
|
96
|
+
# Add an attribute to the list
|
97
|
+
# TODO: Make sure the name does not collide with a method name of Xapian::Document since
|
98
|
+
# we generate methods in the documents for all defined fields
|
99
|
+
def attribute(name, options={})
|
100
|
+
opts = {:index => true}.merge(options)
|
101
|
+
@attributes << name
|
102
|
+
self.index(name, opts) if opts[:index]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Add an indexed value to the list
|
106
|
+
def index(name, options={})
|
107
|
+
@indexed_methods[name] = IndexOptions.new(options)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Options for an indexed text
|
111
|
+
class IndexOptions
|
112
|
+
attr_accessor :weight
|
113
|
+
|
114
|
+
def initialize(options)
|
115
|
+
@weight = options[:weight] || 1
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This writer writes changes directly to the open database.
|
4
|
+
# Use the direct writer only for single process environments
|
5
|
+
# (one single rails app server, e.g. one mongrel).
|
6
|
+
# For multi process environemnts you should use a writer that
|
7
|
+
# processes index changes through a queue.
|
8
|
+
# @author Gernot Kogler
|
9
|
+
|
10
|
+
module XapianDb
|
11
|
+
module IndexWriters
|
12
|
+
|
13
|
+
class DirectWriter
|
14
|
+
|
15
|
+
class << self
|
16
|
+
|
17
|
+
# Update an object in the index
|
18
|
+
def index(obj)
|
19
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
|
20
|
+
doc = blueprint.indexer.build_document_for(obj)
|
21
|
+
XapianDb.database.store_doc(doc)
|
22
|
+
XapianDb.database.commit
|
23
|
+
end
|
24
|
+
|
25
|
+
# Remove an object from the index
|
26
|
+
def unindex(obj)
|
27
|
+
XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
|
28
|
+
XapianDb.database.commit
|
29
|
+
end
|
30
|
+
|
31
|
+
# Reindex all objects of a given class
|
32
|
+
def reindex_class(klass)
|
33
|
+
# First, delete all docs of this class
|
34
|
+
XapianDb.database.delete_docs_of_class(klass)
|
35
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
|
36
|
+
obj_count = klass.count
|
37
|
+
puts "Reindexing #{obj_count} objects..."
|
38
|
+
pbar = ProgressBar.new("Status", obj_count)
|
39
|
+
klass.all.each do |obj|
|
40
|
+
doc = blueprint.indexer.build_document_for(obj)
|
41
|
+
XapianDb.database.store_doc(doc)
|
42
|
+
pbar.inc
|
43
|
+
end
|
44
|
+
XapianDb.database.commit
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# The indexer creates a Xapian::Document from a configured object
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
class Indexer
|
9
|
+
|
10
|
+
def initialize(document_blueprint)
|
11
|
+
@document_blueprint = document_blueprint
|
12
|
+
end
|
13
|
+
|
14
|
+
# Build the doc for an object. The object must respond to 'xapian_id'.
|
15
|
+
# The configured adapter should implement this method.
|
16
|
+
def build_document_for(obj)
|
17
|
+
@obj = obj
|
18
|
+
@blueprint = DocumentBlueprint.blueprint_for(@obj.class)
|
19
|
+
@xapian_doc = Xapian::Document.new
|
20
|
+
@xapian_doc.data = @obj.xapian_id
|
21
|
+
store_fields
|
22
|
+
index_text
|
23
|
+
@xapian_doc
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# Store all configured fields
|
29
|
+
def store_fields
|
30
|
+
|
31
|
+
# We store the class name of the object at position 0
|
32
|
+
@xapian_doc.add_value(0, @obj.class.name)
|
33
|
+
|
34
|
+
pos = 1
|
35
|
+
@blueprint.attributes.each do |attribute, options|
|
36
|
+
value = @obj.send(attribute)
|
37
|
+
@xapian_doc.add_value(pos, value.to_yaml)
|
38
|
+
pos += 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Index all configured text methods
|
43
|
+
def index_text
|
44
|
+
term_generator = Xapian::TermGenerator.new()
|
45
|
+
term_generator.document = @xapian_doc
|
46
|
+
# TODO: make this configurable globally and per document
|
47
|
+
# (retrieve the language from the object, if configured)
|
48
|
+
stemmer = Xapian::Stem.new("english")
|
49
|
+
term_generator.stemmer = stemmer
|
50
|
+
# TODO: Configure and enable these features
|
51
|
+
# tg.stopper = stopper if stopper
|
52
|
+
# tg.stemmer = stemmer
|
53
|
+
# tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
|
54
|
+
|
55
|
+
# Always index the class and the primary key
|
56
|
+
@xapian_doc.add_term("C#{@obj.class}")
|
57
|
+
@xapian_doc.add_term("Q#{@obj.xapian_id}")
|
58
|
+
|
59
|
+
@blueprint.indexed_methods.each do |method, options|
|
60
|
+
value = @obj.send(method)
|
61
|
+
unless value.nil?
|
62
|
+
values = value.is_a?(Array) ? value : [value]
|
63
|
+
values.each do |value|
|
64
|
+
# Add value with field name
|
65
|
+
term_generator.index_text(value.to_s.downcase, options.weight, "X#{method.upcase}")
|
66
|
+
# Add value without field name
|
67
|
+
term_generator.index_text(value.to_s.downcase)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Parse a query expression and convert it to Xapian Query arguments
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
class QueryParser
|
9
|
+
|
10
|
+
def initialize(database)
|
11
|
+
@db = database
|
12
|
+
|
13
|
+
# Set the parser options
|
14
|
+
@query_flags = 0
|
15
|
+
@query_flags |= Xapian::QueryParser::FLAG_WILDCARD # enable wildcards
|
16
|
+
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN
|
17
|
+
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(expression)
|
21
|
+
parser = Xapian::QueryParser.new
|
22
|
+
parser.database = @db.reader
|
23
|
+
parser.default_op = Xapian::Query::OP_AND # Could be made configurable
|
24
|
+
# TODO: Setup stopper, stemmer, defaults and fields
|
25
|
+
|
26
|
+
# Add the searchable prefixes to allow searches by field
|
27
|
+
# (like "name:Kogler")
|
28
|
+
XapianDb::DocumentBlueprint.searchable_prefixes.each{|prefix| parser.add_prefix(prefix.to_s.downcase, "X#{prefix.to_s.upcase}") }
|
29
|
+
parser.parse_query(expression, @query_flags)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Configuration for a rails app
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
require 'xapian_db'
|
7
|
+
require 'rails'
|
8
|
+
|
9
|
+
module XapianDb
|
10
|
+
class Railtie < ::Rails::Railtie
|
11
|
+
|
12
|
+
config.before_configuration do
|
13
|
+
|
14
|
+
# Read the database configuration file if there is one
|
15
|
+
config_file_path = "#{Rails.root}/config/xapian_db.yml"
|
16
|
+
if File.exist?(config_file_path)
|
17
|
+
db_config = YAML::load_file config_file_path
|
18
|
+
env_config = db_config[Rails.env]
|
19
|
+
database_path = env_config["database"] || ":memory:"
|
20
|
+
adapter = env_config["adapter"] || :active_record
|
21
|
+
writer = env_config["writer"] || :direct
|
22
|
+
else
|
23
|
+
# No config file, set the defaults
|
24
|
+
Rails.env == "test" ? database_path = ":memory:" : database_path = "db/xapian_db/#{Rails.env}"
|
25
|
+
adapter = :active_record
|
26
|
+
writer = :direct
|
27
|
+
end
|
28
|
+
|
29
|
+
# Do the configuration
|
30
|
+
XapianDb::Config.setup do |config|
|
31
|
+
if database_path == ":memory:"
|
32
|
+
config.database :memory
|
33
|
+
else
|
34
|
+
config.database database_path
|
35
|
+
end
|
36
|
+
config.adapter adapter.to_sym
|
37
|
+
config.writer writer.to_sym
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# The resultset holds a Xapian::Query object and allows paged access
|
4
|
+
# to the found documents.
|
5
|
+
# author Gernot Kogler
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
|
9
|
+
class Resultset
|
10
|
+
|
11
|
+
attr_reader :size
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
# @param [Xapian::Enquire] a Xapian query result
|
15
|
+
def initialize(enquiry)
|
16
|
+
@enquiry = enquiry
|
17
|
+
# By passing 0 as the max parameter to the mset method,
|
18
|
+
# we only get statistics about the query, no results
|
19
|
+
@size = enquiry.mset(0, 0).matches_estimated
|
20
|
+
end
|
21
|
+
|
22
|
+
# Paginate the result
|
23
|
+
def paginate(opts={})
|
24
|
+
options = {:page => 1, :per_page => 10}.merge(opts)
|
25
|
+
offset = (options[:page] - 1) * options[:per_page]
|
26
|
+
return [] if offset > @size
|
27
|
+
build_page(offset, options[:per_page])
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Build a page of Xapian documents
|
33
|
+
def build_page(offset, count)
|
34
|
+
docs = []
|
35
|
+
result_window = @enquiry.mset(offset, count)
|
36
|
+
result_window.matches.each do |match|
|
37
|
+
docs << decorate(match.document)
|
38
|
+
end
|
39
|
+
docs
|
40
|
+
end
|
41
|
+
|
42
|
+
# Decorate a Xapian document with field accessors
|
43
|
+
def decorate(document)
|
44
|
+
klass_name = document.values[0].value
|
45
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(Kernel.const_get(klass_name))
|
46
|
+
document.extend blueprint.accessors_module
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
data/lib/xapian_db.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'xapian'
|
4
|
+
require 'yaml'
|
5
|
+
require 'progressbar'
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
|
9
|
+
# Configure XapianDb
|
10
|
+
def self.setup(&block)
|
11
|
+
XapianDb::Config.setup(&block)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Create a database. Overwrites an existing database on disk, if
|
15
|
+
# option :in_memory is set to false.
|
16
|
+
def self.create_db(options = {})
|
17
|
+
if options[:path]
|
18
|
+
PersistentDatabase.new(:path => options[:path], :create => true)
|
19
|
+
else
|
20
|
+
InMemoryDatabase.new
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Open a database.
|
25
|
+
def self.open_db(options = {})
|
26
|
+
if options[:path]
|
27
|
+
PersistentDatabase.new(:path => options[:path], :create => false)
|
28
|
+
else
|
29
|
+
InMemoryDatabase.new
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Access he configured database
|
34
|
+
def self.database
|
35
|
+
XapianDb::Config.database
|
36
|
+
end
|
37
|
+
|
38
|
+
# Query the database
|
39
|
+
def self.search(expression)
|
40
|
+
XapianDb::Config.database.search(expression)
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
require File.dirname(__FILE__) + '/xapian_db/config'
|
46
|
+
require File.dirname(__FILE__) + '/xapian_db/adapters/generic_adapter'
|
47
|
+
require File.dirname(__FILE__) + '/xapian_db/adapters/datamapper_adapter'
|
48
|
+
require File.dirname(__FILE__) + '/xapian_db/adapters/active_record_adapter'
|
49
|
+
require File.dirname(__FILE__) + '/xapian_db/index_writers/direct_writer'
|
50
|
+
require File.dirname(__FILE__) + '/xapian_db/database'
|
51
|
+
require File.dirname(__FILE__) + '/xapian_db/document_blueprint'
|
52
|
+
require File.dirname(__FILE__) + '/xapian_db/indexer'
|
53
|
+
require File.dirname(__FILE__) + '/xapian_db/query_parser'
|
54
|
+
require File.dirname(__FILE__) + '/xapian_db/resultset'
|
55
|
+
|
56
|
+
# Configure XapianDB if we are in a Rails app
|
57
|
+
require File.dirname(__FILE__) + '/xapian_db/railtie' if defined?(Rails)
|
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xapian_db
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 1
|
9
|
+
version: 0.3.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Gernot kogler
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-06 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: progressbar
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
- 9
|
31
|
+
- 0
|
32
|
+
version: 0.9.0
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Ruby library to use a Xapian db as a key/value store with high performance fulltext search
|
36
|
+
email: gernot.kogler (at) garaio (dot) com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- CHANGELOG
|
43
|
+
files:
|
44
|
+
- CHANGELOG
|
45
|
+
- lib/xapian_db.rb
|
46
|
+
- lib/xapian_db/railtie.rb
|
47
|
+
- lib/xapian_db/config.rb
|
48
|
+
- lib/xapian_db/index_writers/direct_writer.rb
|
49
|
+
- lib/xapian_db/database.rb
|
50
|
+
- lib/xapian_db/document_blueprint.rb
|
51
|
+
- lib/xapian_db/indexer.rb
|
52
|
+
- lib/xapian_db/adapters/generic_adapter.rb
|
53
|
+
- lib/xapian_db/adapters/datamapper_adapter.rb
|
54
|
+
- lib/xapian_db/adapters/active_record_adapter.rb
|
55
|
+
- lib/xapian_db/query_parser.rb
|
56
|
+
- lib/xapian_db/resultset.rb
|
57
|
+
- examples/basic.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: https://github.com/garaio/xapian_db
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --line-numbers
|
65
|
+
- --inline-source
|
66
|
+
- --title
|
67
|
+
- Xapian-DB
|
68
|
+
- --main
|
69
|
+
- README.rdoc
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
segments:
|
86
|
+
- 1
|
87
|
+
- 2
|
88
|
+
version: "1.2"
|
89
|
+
requirements: []
|
90
|
+
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 1.3.7
|
93
|
+
signing_key:
|
94
|
+
specification_version: 3
|
95
|
+
summary: Ruby library to use a Xapian db as a key/value store with high performance fulltext search
|
96
|
+
test_files: []
|
97
|
+
|