xapian_db 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +15 -0
- data/examples/basic.rb +59 -0
- data/lib/xapian_db/adapters/active_record_adapter.rb +74 -0
- data/lib/xapian_db/adapters/datamapper_adapter.rb +62 -0
- data/lib/xapian_db/adapters/generic_adapter.rb +41 -0
- data/lib/xapian_db/config.rb +82 -0
- data/lib/xapian_db/database.rb +102 -0
- data/lib/xapian_db/document_blueprint.rb +121 -0
- data/lib/xapian_db/index_writers/direct_writer.rb +52 -0
- data/lib/xapian_db/indexer.rb +75 -0
- data/lib/xapian_db/query_parser.rb +34 -0
- data/lib/xapian_db/railtie.rb +43 -0
- data/lib/xapian_db/resultset.rb +51 -0
- data/lib/xapian_db.rb +57 -0
- metadata +97 -0
data/CHANGELOG
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
*0.1.0* (November 23th, 2010)
|
2
|
+
|
3
|
+
* Proof of concept, not really useful for real world usage
|
4
|
+
|
5
|
+
*0.2.0* (December 1st, 2010)
|
6
|
+
|
7
|
+
* Blueprint configuration extended
|
8
|
+
* Adapter for Datamapper
|
9
|
+
* Search by attribute names
|
10
|
+
* Search with wildcards
|
11
|
+
* Document attributes can carry anything that is serializable by YAML
|
12
|
+
|
13
|
+
*0.3.0* (December 4st, 2010)
|
14
|
+
|
15
|
+
* Rails integration with configuration file (config/xapian_db.yml) and automatic setup
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This example shows the most basic way to use xapian_db
|
4
|
+
# To run the example, please install the xapian_db gem first
|
5
|
+
|
6
|
+
require 'rubygems'
|
7
|
+
require 'xapian_db'
|
8
|
+
|
9
|
+
puts "Setting up the demo..."
|
10
|
+
|
11
|
+
# 1: Open an in memory database
|
12
|
+
db = XapianDb.create_db
|
13
|
+
|
14
|
+
# 2: Define a class which should get indexed; we define a class that
|
15
|
+
# could be an ActiveRecord or Datamapper Domain class
|
16
|
+
class People
|
17
|
+
|
18
|
+
attr_accessor :id, :name, :first_name
|
19
|
+
|
20
|
+
def initialize(data)
|
21
|
+
@id, @name, @first_name = data[:id], data[:name], data[:first_name]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
# 3: Configure the generic adapter with a unique key expression
|
27
|
+
XapianDb::Adapters::GenericAdapter.unique_key do
|
28
|
+
"#{self.class}-#{self.id}"
|
29
|
+
end
|
30
|
+
|
31
|
+
# 4: Define a document blueprint for our class; the blueprint describes
|
32
|
+
# the structure of all documents for our class. Attribute values can
|
33
|
+
# be accessed later for each retrieved doc. Attributes are indexed
|
34
|
+
# by default.
|
35
|
+
XapianDb::DocumentBlueprint.setup(People) do |blueprint|
|
36
|
+
blueprint.attribute :name
|
37
|
+
blueprint.attribute :first_name
|
38
|
+
end
|
39
|
+
|
40
|
+
# 5: Let's create some objects
|
41
|
+
person_1 = People.new(:id => 1, :name => "Kogler", :first_name => "Gernot")
|
42
|
+
person_2 = People.new(:id => 2, :name => "Frey", :first_name => "Daniel")
|
43
|
+
person_3 = People.new(:id => 3, :name => "Garaio", :first_name => "Thomas")
|
44
|
+
|
45
|
+
# 6: Now add them to the database
|
46
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(People)
|
47
|
+
db.store_doc(blueprint.indexer.build_document_for(person_1))
|
48
|
+
db.store_doc(blueprint.indexer.build_document_for(person_2))
|
49
|
+
db.store_doc(blueprint.indexer.build_document_for(person_3))
|
50
|
+
|
51
|
+
# 7: Now find the gem author ;-)
|
52
|
+
puts "Searching for Gernot..."
|
53
|
+
results = db.search("Gernot")
|
54
|
+
puts "We found #{results.size} documents"
|
55
|
+
puts "And the first document looks like this:"
|
56
|
+
page = results.paginate(:page => 1)
|
57
|
+
doc = page.first
|
58
|
+
puts "name: #{doc.name}"
|
59
|
+
puts "first name: #{doc.first_name}"
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Adapter for ActiveRecord. To use it, simply set it as the
|
4
|
+
# default for any DocumentBlueprint or a specific DocumentBlueprint
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
module Adapters
|
8
|
+
|
9
|
+
class ActiveRecordAdapter
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Implement the class helper methods
|
14
|
+
def add_class_helper_methods_to(klass)
|
15
|
+
|
16
|
+
klass.instance_eval do
|
17
|
+
# define the method to retrieve a unique key
|
18
|
+
define_method(:xapian_id) do
|
19
|
+
"#{self.class}-#{self.id}"
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
klass.class_eval do
|
25
|
+
|
26
|
+
# add the after save logic
|
27
|
+
after_save do
|
28
|
+
XapianDb::Config.writer.index(self)
|
29
|
+
end
|
30
|
+
|
31
|
+
# add the after destroy logic
|
32
|
+
after_destroy do
|
33
|
+
XapianDb::Config.writer.unindex(self)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a method to reindex all models of this class
|
37
|
+
define_singleton_method(:rebuild_xapian_index) do
|
38
|
+
# db = XapianDb::Adapters::ActiveRecordAdapter.database
|
39
|
+
# # First, delete all docs of this class
|
40
|
+
# db.delete_docs_of_class(klass)
|
41
|
+
# obj_count = klass.count
|
42
|
+
# puts "Reindexing #{obj_count} objects..."
|
43
|
+
# pbar = ProgressBar.new("Status", obj_count)
|
44
|
+
# klass.all.each do |obj|
|
45
|
+
# doc = @@blueprint.indexer.build_document_for(obj)
|
46
|
+
# db.store_doc(doc)
|
47
|
+
# pbar.inc
|
48
|
+
# end
|
49
|
+
# db.commit
|
50
|
+
XapianDb::Config.writer.reindex_class(klass)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
# Implement the document helper methods
|
57
|
+
def add_doc_helper_methods_to(a_module)
|
58
|
+
a_module.instance_eval do
|
59
|
+
# Implement access to the indexed object
|
60
|
+
define_method :indexed_object do
|
61
|
+
return @indexed_object unless @indexed_object.nil?
|
62
|
+
# retrieve the object id from data
|
63
|
+
klass_name, id = data.split("-")
|
64
|
+
klass = Kernel.const_get(klass_name)
|
65
|
+
@indexed_object = klass.find(id.to_i)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Adapter for datamapper. To use it, simply set it as the
|
4
|
+
# default for any DocumentBlueprint or a specific DocumentBlueprint
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
module Adapters
|
8
|
+
|
9
|
+
class DatamapperAdapter
|
10
|
+
|
11
|
+
class << self
|
12
|
+
|
13
|
+
# Implement the class helper methods
|
14
|
+
def add_class_helper_methods_to(klass)
|
15
|
+
|
16
|
+
klass.instance_eval do
|
17
|
+
# define the method to retrieve a unique key
|
18
|
+
define_method(:xapian_id) do
|
19
|
+
"#{self.class}-#{self.id}"
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
klass.class_eval do
|
25
|
+
|
26
|
+
# add the after save logic
|
27
|
+
after :save do
|
28
|
+
XapianDb::Config.writer.index(self)
|
29
|
+
end
|
30
|
+
|
31
|
+
# add the after destroy logic
|
32
|
+
after :destroy do
|
33
|
+
XapianDb::Config.writer.unindex(self)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Add a method to reindex all models of this class
|
37
|
+
define_singleton_method(:rebuild_xapian_index) do
|
38
|
+
XapianDb::Config.writer.reindex_class(self)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
# Implement the document helper methods
|
45
|
+
def add_doc_helper_methods_to(a_module)
|
46
|
+
a_module.instance_eval do
|
47
|
+
# Implement access to the indexed object
|
48
|
+
define_method :indexed_object do
|
49
|
+
return @indexed_object unless @indexed_object.nil?
|
50
|
+
# retrieve the object id from data
|
51
|
+
klass_name, id = data.split("-")
|
52
|
+
klass = Kernel.const_get(klass_name)
|
53
|
+
@indexed_object = klass.get(id.to_i)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# The generic adapter is a universal adapater that can be used for any
|
4
|
+
# ruby class. To use the generic adapter (which is the default),
|
5
|
+
# configure the expression that generates a unique key from your objects
|
6
|
+
# using the method 'unique_key'.
|
7
|
+
module XapianDb
|
8
|
+
module Adapters
|
9
|
+
|
10
|
+
class GenericAdapter
|
11
|
+
|
12
|
+
class << self
|
13
|
+
|
14
|
+
# Define the unique key expression
|
15
|
+
def unique_key(&block)
|
16
|
+
@unique_key_block = block
|
17
|
+
end
|
18
|
+
|
19
|
+
# Implement the class helper methods
|
20
|
+
def add_class_helper_methods_to(klass)
|
21
|
+
raise "Unique key is not configured for generic adapter!" if @unique_key_block.nil?
|
22
|
+
expression = @unique_key_block
|
23
|
+
klass.instance_eval do
|
24
|
+
define_method(:xapian_id) do
|
25
|
+
instance_eval &expression
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Implement the document helper methods
|
31
|
+
def add_doc_helper_methods_to(obj)
|
32
|
+
# We have none so far
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Global configuration for XapianDb
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
class Config
|
9
|
+
|
10
|
+
# ---------------------------------------------------------------------------------
|
11
|
+
# Singleton methods
|
12
|
+
# ---------------------------------------------------------------------------------
|
13
|
+
class << self
|
14
|
+
|
15
|
+
def setup(&block)
|
16
|
+
@config ||= Config.new
|
17
|
+
yield @config if block_given?
|
18
|
+
end
|
19
|
+
|
20
|
+
# Install delegates for the config instance variables
|
21
|
+
[:database, :adapter, :writer].each do |attr|
|
22
|
+
define_method attr do
|
23
|
+
@config.nil? ? nil : @config.instance_variable_get("@_#{attr}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# ---------------------------------------------------------------------------------
|
29
|
+
# DSL methods
|
30
|
+
# ---------------------------------------------------------------------------------
|
31
|
+
attr_reader :_database, :_adapter, :_writer
|
32
|
+
|
33
|
+
# Set the database; either pass a path to the file system or
|
34
|
+
# the symbolic name "memory"
|
35
|
+
def database(path)
|
36
|
+
|
37
|
+
# If the current database is a persistent database, we must release the
|
38
|
+
# database and run the garbage collector to remove the write lock
|
39
|
+
if @_database.is_a?(XapianDb::PersistentDatabase)
|
40
|
+
@_database = nil
|
41
|
+
GC.start
|
42
|
+
end
|
43
|
+
|
44
|
+
if path.to_sym == :memory
|
45
|
+
@_database = XapianDb.create_db
|
46
|
+
else
|
47
|
+
if File.exist?(path)
|
48
|
+
@_database = XapianDb.open_db :path => path
|
49
|
+
else
|
50
|
+
# Database does not exist; create it
|
51
|
+
@_database = XapianDb.create_db :path => path
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
# Define the adapter to use; the following adapters are available:
|
57
|
+
# - :generic
|
58
|
+
# - :active_record
|
59
|
+
# - :datamapper
|
60
|
+
def adapter(type)
|
61
|
+
# We try to guess the adapter name
|
62
|
+
@_adapter = XapianDb::Adapters.const_get("#{camelize(type.to_s)}Adapter")
|
63
|
+
end
|
64
|
+
|
65
|
+
# Define the writer to use; the following adapters are available:
|
66
|
+
# - :direct
|
67
|
+
# More to come in a future release :-)
|
68
|
+
def writer(type)
|
69
|
+
# We try to guess the writer name
|
70
|
+
@_writer = XapianDb::IndexWriters.const_get("#{camelize(type.to_s)}Writer")
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
# TODO: move this to a helper module
|
76
|
+
def camelize(string)
|
77
|
+
string.split(/[^a-z0-9]/i).map{|w| w.capitalize}.join
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Singleton class representing a Xapian database.
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
# Base class for a Xapian database.
|
9
|
+
class Database
|
10
|
+
attr_reader :reader
|
11
|
+
|
12
|
+
# Size of the database (number of docs)
|
13
|
+
def size
|
14
|
+
reader.doccount
|
15
|
+
end
|
16
|
+
|
17
|
+
# Store a Xapian document
|
18
|
+
def store_doc(doc)
|
19
|
+
# We always replace; Xapian adds the document automatically if
|
20
|
+
# it is not found
|
21
|
+
writer.replace_document("Q#{doc.data}", doc)
|
22
|
+
end
|
23
|
+
|
24
|
+
# Delete a document by a unique term; this method is used by the
|
25
|
+
# orm adapters
|
26
|
+
def delete_doc_with_unique_term(term)
|
27
|
+
writer.delete_document("Q#{term}")
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
# Delete all docs of a specific class
|
32
|
+
def delete_docs_of_class(klass)
|
33
|
+
writer.delete_document("C#{klass}")
|
34
|
+
true
|
35
|
+
end
|
36
|
+
|
37
|
+
# Perform a search
|
38
|
+
def search(expression)
|
39
|
+
@query_parser ||= QueryParser.new(self)
|
40
|
+
query = @query_parser.parse(expression)
|
41
|
+
enquiry = Xapian::Enquire.new(reader)
|
42
|
+
enquiry.query = query
|
43
|
+
Resultset.new(enquiry)
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
|
48
|
+
# In Memory database
|
49
|
+
class InMemoryDatabase < Database
|
50
|
+
|
51
|
+
def initialize
|
52
|
+
@writer ||= Xapian::inmemory_open
|
53
|
+
@reader = @writer
|
54
|
+
end
|
55
|
+
|
56
|
+
def writer
|
57
|
+
@writer
|
58
|
+
end
|
59
|
+
|
60
|
+
# Commit all pending changes
|
61
|
+
def commit
|
62
|
+
# Nothing to do for an in memory database
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
# Persistent database on disk
|
68
|
+
class PersistentDatabase < Database
|
69
|
+
|
70
|
+
def initialize(options)
|
71
|
+
@path = options[:path]
|
72
|
+
@db_flag = options[:create] ? Xapian::DB_CREATE_OR_OVERWRITE : Xapian::DB_OPEN
|
73
|
+
if options[:create]
|
74
|
+
# make sure the path exists; Xapian will not create the necessary directories
|
75
|
+
FileUtils.makedirs @path
|
76
|
+
@writer = Xapian::WritableDatabase.new(@path, @db_flag)
|
77
|
+
end
|
78
|
+
@reader = Xapian::Database.new(@path)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Get the readable instance of the database
|
82
|
+
def reader
|
83
|
+
# Always reopen the readable database so we get live index data
|
84
|
+
# TODO: make this configurable
|
85
|
+
@reader.reopen
|
86
|
+
@reader
|
87
|
+
end
|
88
|
+
|
89
|
+
# The writer is instantiated layzily to avoid a permanent write lock on the database
|
90
|
+
def writer
|
91
|
+
@writer ||= Xapian::WritableDatabase.new(@path, @db_flag)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Commit all pending changes
|
95
|
+
def commit
|
96
|
+
writer.commit
|
97
|
+
reader.reopen
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# A document blueprint describes the mapping of an object to a Xapian document
|
4
|
+
# for a given class.
|
5
|
+
# @author Gernot Kogler
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
|
9
|
+
class DocumentBlueprint
|
10
|
+
|
11
|
+
# ---------------------------------------------------------------------------------
|
12
|
+
# Singleton methods
|
13
|
+
# ---------------------------------------------------------------------------------
|
14
|
+
class << self
|
15
|
+
|
16
|
+
# Configure the blueprint for a class
|
17
|
+
def setup(klass, &block)
|
18
|
+
@blueprints ||= {}
|
19
|
+
blueprint = DocumentBlueprint.new
|
20
|
+
blueprint.indexer = Indexer.new(blueprint)
|
21
|
+
yield blueprint if block_given? # configure the blueprint through the block
|
22
|
+
@blueprints[klass] = blueprint
|
23
|
+
@adapter = blueprint.adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
|
24
|
+
@adapter.add_class_helper_methods_to klass
|
25
|
+
@searchable_prefixes = nil # force rebuild of the searchable prefixes
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get the blueprint for a class
|
29
|
+
def blueprint_for(klass)
|
30
|
+
@blueprints[klass] if @blueprints
|
31
|
+
end
|
32
|
+
|
33
|
+
# Return an array of all configured text methods in any blueprint
|
34
|
+
def searchable_prefixes
|
35
|
+
return [] unless @blueprints
|
36
|
+
return @searchable_prefixes unless @searchable_prefixes.nil?
|
37
|
+
prefixes = []
|
38
|
+
@blueprints.each do |klass, blueprint|
|
39
|
+
prefixes << blueprint.searchable_prefixes
|
40
|
+
end
|
41
|
+
@searchable_prefixes = prefixes.flatten.compact.uniq
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
# ---------------------------------------------------------------------------------
|
47
|
+
# Instance methods
|
48
|
+
# ---------------------------------------------------------------------------------
|
49
|
+
attr_accessor :indexer
|
50
|
+
|
51
|
+
# Return an array of all configured text methods in this blueprint
|
52
|
+
def searchable_prefixes
|
53
|
+
@prefixes ||= indexed_methods.map{|method_name, options| method_name}
|
54
|
+
end
|
55
|
+
|
56
|
+
# Lazily build and return a module that implements accessors for each field
|
57
|
+
def accessors_module
|
58
|
+
return @accessors_module unless @accessors_module.nil?
|
59
|
+
@accessors_module = Module.new
|
60
|
+
|
61
|
+
@accessors_module.instance_eval do
|
62
|
+
define_method :domain_class do
|
63
|
+
self.values[0].value
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
@attributes.each_with_index do |field, index|
|
68
|
+
@accessors_module.instance_eval do
|
69
|
+
define_method field do
|
70
|
+
YAML::load(self.values[index+1].value)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
# Let the adapter add its document helper methods (if any)
|
75
|
+
adapter = XapianDb::Config.adapter || XapianDb::Adapters::GenericAdapter
|
76
|
+
adapter.add_doc_helper_methods_to(@accessors_module)
|
77
|
+
@accessors_module
|
78
|
+
end
|
79
|
+
|
80
|
+
# ---------------------------------------------------------------------------------
|
81
|
+
# Blueprint DSL methods
|
82
|
+
# ---------------------------------------------------------------------------------
|
83
|
+
attr_reader :adapter, :attributes, :indexed_methods
|
84
|
+
|
85
|
+
# Construct the blueprint
|
86
|
+
def initialize
|
87
|
+
@attributes = []
|
88
|
+
@indexed_methods = {}
|
89
|
+
end
|
90
|
+
|
91
|
+
# Set a custom adapter for this blueprint
|
92
|
+
def adapter=(adapter)
|
93
|
+
@adapter = adapter
|
94
|
+
end
|
95
|
+
|
96
|
+
# Add an attribute to the list
|
97
|
+
# TODO: Make sure the name does not collide with a method name of Xapian::Document since
|
98
|
+
# we generate methods in the documents for all defined fields
|
99
|
+
def attribute(name, options={})
|
100
|
+
opts = {:index => true}.merge(options)
|
101
|
+
@attributes << name
|
102
|
+
self.index(name, opts) if opts[:index]
|
103
|
+
end
|
104
|
+
|
105
|
+
# Add an indexed value to the list
|
106
|
+
def index(name, options={})
|
107
|
+
@indexed_methods[name] = IndexOptions.new(options)
|
108
|
+
end
|
109
|
+
|
110
|
+
# Options for an indexed text
|
111
|
+
class IndexOptions
|
112
|
+
attr_accessor :weight
|
113
|
+
|
114
|
+
def initialize(options)
|
115
|
+
@weight = options[:weight] || 1
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# This writer writes changes directly to the open database.
|
4
|
+
# Use the direct writer only for single process environments
|
5
|
+
# (one single rails app server, e.g. one mongrel).
|
6
|
+
# For multi process environemnts you should use a writer that
|
7
|
+
# processes index changes through a queue.
|
8
|
+
# @author Gernot Kogler
|
9
|
+
|
10
|
+
module XapianDb
|
11
|
+
module IndexWriters
|
12
|
+
|
13
|
+
class DirectWriter
|
14
|
+
|
15
|
+
class << self
|
16
|
+
|
17
|
+
# Update an object in the index
|
18
|
+
def index(obj)
|
19
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
|
20
|
+
doc = blueprint.indexer.build_document_for(obj)
|
21
|
+
XapianDb.database.store_doc(doc)
|
22
|
+
XapianDb.database.commit
|
23
|
+
end
|
24
|
+
|
25
|
+
# Remove an object from the index
|
26
|
+
def unindex(obj)
|
27
|
+
XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
|
28
|
+
XapianDb.database.commit
|
29
|
+
end
|
30
|
+
|
31
|
+
# Reindex all objects of a given class
|
32
|
+
def reindex_class(klass)
|
33
|
+
# First, delete all docs of this class
|
34
|
+
XapianDb.database.delete_docs_of_class(klass)
|
35
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
|
36
|
+
obj_count = klass.count
|
37
|
+
puts "Reindexing #{obj_count} objects..."
|
38
|
+
pbar = ProgressBar.new("Status", obj_count)
|
39
|
+
klass.all.each do |obj|
|
40
|
+
doc = blueprint.indexer.build_document_for(obj)
|
41
|
+
XapianDb.database.store_doc(doc)
|
42
|
+
pbar.inc
|
43
|
+
end
|
44
|
+
XapianDb.database.commit
|
45
|
+
end
|
46
|
+
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# The indexer creates a Xapian::Document from a configured object
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
class Indexer
|
9
|
+
|
10
|
+
def initialize(document_blueprint)
|
11
|
+
@document_blueprint = document_blueprint
|
12
|
+
end
|
13
|
+
|
14
|
+
# Build the doc for an object. The object must respond to 'xapian_id'.
|
15
|
+
# The configured adapter should implement this method.
|
16
|
+
def build_document_for(obj)
|
17
|
+
@obj = obj
|
18
|
+
@blueprint = DocumentBlueprint.blueprint_for(@obj.class)
|
19
|
+
@xapian_doc = Xapian::Document.new
|
20
|
+
@xapian_doc.data = @obj.xapian_id
|
21
|
+
store_fields
|
22
|
+
index_text
|
23
|
+
@xapian_doc
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
# Store all configured fields
|
29
|
+
def store_fields
|
30
|
+
|
31
|
+
# We store the class name of the object at position 0
|
32
|
+
@xapian_doc.add_value(0, @obj.class.name)
|
33
|
+
|
34
|
+
pos = 1
|
35
|
+
@blueprint.attributes.each do |attribute, options|
|
36
|
+
value = @obj.send(attribute)
|
37
|
+
@xapian_doc.add_value(pos, value.to_yaml)
|
38
|
+
pos += 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
# Index all configured text methods
|
43
|
+
def index_text
|
44
|
+
term_generator = Xapian::TermGenerator.new()
|
45
|
+
term_generator.document = @xapian_doc
|
46
|
+
# TODO: make this configurable globally and per document
|
47
|
+
# (retrieve the language from the object, if configured)
|
48
|
+
stemmer = Xapian::Stem.new("english")
|
49
|
+
term_generator.stemmer = stemmer
|
50
|
+
# TODO: Configure and enable these features
|
51
|
+
# tg.stopper = stopper if stopper
|
52
|
+
# tg.stemmer = stemmer
|
53
|
+
# tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
|
54
|
+
|
55
|
+
# Always index the class and the primary key
|
56
|
+
@xapian_doc.add_term("C#{@obj.class}")
|
57
|
+
@xapian_doc.add_term("Q#{@obj.xapian_id}")
|
58
|
+
|
59
|
+
@blueprint.indexed_methods.each do |method, options|
|
60
|
+
value = @obj.send(method)
|
61
|
+
unless value.nil?
|
62
|
+
values = value.is_a?(Array) ? value : [value]
|
63
|
+
values.each do |value|
|
64
|
+
# Add value with field name
|
65
|
+
term_generator.index_text(value.to_s.downcase, options.weight, "X#{method.upcase}")
|
66
|
+
# Add value without field name
|
67
|
+
term_generator.index_text(value.to_s.downcase)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Parse a query expression and convert it to Xapian Query arguments
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
module XapianDb
|
7
|
+
|
8
|
+
class QueryParser
|
9
|
+
|
10
|
+
def initialize(database)
|
11
|
+
@db = database
|
12
|
+
|
13
|
+
# Set the parser options
|
14
|
+
@query_flags = 0
|
15
|
+
@query_flags |= Xapian::QueryParser::FLAG_WILDCARD # enable wildcards
|
16
|
+
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN
|
17
|
+
@query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
|
18
|
+
end
|
19
|
+
|
20
|
+
def parse(expression)
|
21
|
+
parser = Xapian::QueryParser.new
|
22
|
+
parser.database = @db.reader
|
23
|
+
parser.default_op = Xapian::Query::OP_AND # Could be made configurable
|
24
|
+
# TODO: Setup stopper, stemmer, defaults and fields
|
25
|
+
|
26
|
+
# Add the searchable prefixes to allow searches by field
|
27
|
+
# (like "name:Kogler")
|
28
|
+
XapianDb::DocumentBlueprint.searchable_prefixes.each{|prefix| parser.add_prefix(prefix.to_s.downcase, "X#{prefix.to_s.upcase}") }
|
29
|
+
parser.parse_query(expression, @query_flags)
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Configuration for a rails app
|
4
|
+
# @author Gernot Kogler
|
5
|
+
|
6
|
+
require 'xapian_db'
|
7
|
+
require 'rails'
|
8
|
+
|
9
|
+
module XapianDb
|
10
|
+
class Railtie < ::Rails::Railtie
|
11
|
+
|
12
|
+
config.before_configuration do
|
13
|
+
|
14
|
+
# Read the database configuration file if there is one
|
15
|
+
config_file_path = "#{Rails.root}/config/xapian_db.yml"
|
16
|
+
if File.exist?(config_file_path)
|
17
|
+
db_config = YAML::load_file config_file_path
|
18
|
+
env_config = db_config[Rails.env]
|
19
|
+
database_path = env_config["database"] || ":memory:"
|
20
|
+
adapter = env_config["adapter"] || :active_record
|
21
|
+
writer = env_config["writer"] || :direct
|
22
|
+
else
|
23
|
+
# No config file, set the defaults
|
24
|
+
Rails.env == "test" ? database_path = ":memory:" : database_path = "db/xapian_db/#{Rails.env}"
|
25
|
+
adapter = :active_record
|
26
|
+
writer = :direct
|
27
|
+
end
|
28
|
+
|
29
|
+
# Do the configuration
|
30
|
+
XapianDb::Config.setup do |config|
|
31
|
+
if database_path == ":memory:"
|
32
|
+
config.database :memory
|
33
|
+
else
|
34
|
+
config.database database_path
|
35
|
+
end
|
36
|
+
config.adapter adapter.to_sym
|
37
|
+
config.writer writer.to_sym
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# The resultset holds a Xapian::Query object and allows paged access
|
4
|
+
# to the found documents.
|
5
|
+
# author Gernot Kogler
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
|
9
|
+
class Resultset
|
10
|
+
|
11
|
+
attr_reader :size
|
12
|
+
|
13
|
+
# Constructor
|
14
|
+
# @param [Xapian::Enquire] a Xapian query result
|
15
|
+
def initialize(enquiry)
|
16
|
+
@enquiry = enquiry
|
17
|
+
# By passing 0 as the max parameter to the mset method,
|
18
|
+
# we only get statistics about the query, no results
|
19
|
+
@size = enquiry.mset(0, 0).matches_estimated
|
20
|
+
end
|
21
|
+
|
22
|
+
# Paginate the result
|
23
|
+
def paginate(opts={})
|
24
|
+
options = {:page => 1, :per_page => 10}.merge(opts)
|
25
|
+
offset = (options[:page] - 1) * options[:per_page]
|
26
|
+
return [] if offset > @size
|
27
|
+
build_page(offset, options[:per_page])
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
# Build a page of Xapian documents
|
33
|
+
def build_page(offset, count)
|
34
|
+
docs = []
|
35
|
+
result_window = @enquiry.mset(offset, count)
|
36
|
+
result_window.matches.each do |match|
|
37
|
+
docs << decorate(match.document)
|
38
|
+
end
|
39
|
+
docs
|
40
|
+
end
|
41
|
+
|
42
|
+
# Decorate a Xapian document with field accessors
|
43
|
+
def decorate(document)
|
44
|
+
klass_name = document.values[0].value
|
45
|
+
blueprint = XapianDb::DocumentBlueprint.blueprint_for(Kernel.const_get(klass_name))
|
46
|
+
document.extend blueprint.accessors_module
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
data/lib/xapian_db.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'digest/sha1'
|
2
|
+
require 'rubygems'
|
3
|
+
require 'xapian'
|
4
|
+
require 'yaml'
|
5
|
+
require 'progressbar'
|
6
|
+
|
7
|
+
module XapianDb
|
8
|
+
|
9
|
+
# Configure XapianDb
|
10
|
+
def self.setup(&block)
|
11
|
+
XapianDb::Config.setup(&block)
|
12
|
+
end
|
13
|
+
|
14
|
+
# Create a database. Overwrites an existing database on disk, if
|
15
|
+
# option :in_memory is set to false.
|
16
|
+
def self.create_db(options = {})
|
17
|
+
if options[:path]
|
18
|
+
PersistentDatabase.new(:path => options[:path], :create => true)
|
19
|
+
else
|
20
|
+
InMemoryDatabase.new
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Open a database.
|
25
|
+
def self.open_db(options = {})
|
26
|
+
if options[:path]
|
27
|
+
PersistentDatabase.new(:path => options[:path], :create => false)
|
28
|
+
else
|
29
|
+
InMemoryDatabase.new
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Access he configured database
|
34
|
+
def self.database
|
35
|
+
XapianDb::Config.database
|
36
|
+
end
|
37
|
+
|
38
|
+
# Query the database
|
39
|
+
def self.search(expression)
|
40
|
+
XapianDb::Config.database.search(expression)
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
require File.dirname(__FILE__) + '/xapian_db/config'
|
46
|
+
require File.dirname(__FILE__) + '/xapian_db/adapters/generic_adapter'
|
47
|
+
require File.dirname(__FILE__) + '/xapian_db/adapters/datamapper_adapter'
|
48
|
+
require File.dirname(__FILE__) + '/xapian_db/adapters/active_record_adapter'
|
49
|
+
require File.dirname(__FILE__) + '/xapian_db/index_writers/direct_writer'
|
50
|
+
require File.dirname(__FILE__) + '/xapian_db/database'
|
51
|
+
require File.dirname(__FILE__) + '/xapian_db/document_blueprint'
|
52
|
+
require File.dirname(__FILE__) + '/xapian_db/indexer'
|
53
|
+
require File.dirname(__FILE__) + '/xapian_db/query_parser'
|
54
|
+
require File.dirname(__FILE__) + '/xapian_db/resultset'
|
55
|
+
|
56
|
+
# Configure XapianDB if we are in a Rails app
|
57
|
+
require File.dirname(__FILE__) + '/xapian_db/railtie' if defined?(Rails)
|
metadata
ADDED
@@ -0,0 +1,97 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: xapian_db
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 3
|
8
|
+
- 1
|
9
|
+
version: 0.3.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Gernot kogler
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-12-06 00:00:00 +01:00
|
18
|
+
default_executable:
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: progressbar
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
- 9
|
31
|
+
- 0
|
32
|
+
version: 0.9.0
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Ruby library to use a Xapian db as a key/value store with high performance fulltext search
|
36
|
+
email: gernot.kogler (at) garaio (dot) com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- CHANGELOG
|
43
|
+
files:
|
44
|
+
- CHANGELOG
|
45
|
+
- lib/xapian_db.rb
|
46
|
+
- lib/xapian_db/railtie.rb
|
47
|
+
- lib/xapian_db/config.rb
|
48
|
+
- lib/xapian_db/index_writers/direct_writer.rb
|
49
|
+
- lib/xapian_db/database.rb
|
50
|
+
- lib/xapian_db/document_blueprint.rb
|
51
|
+
- lib/xapian_db/indexer.rb
|
52
|
+
- lib/xapian_db/adapters/generic_adapter.rb
|
53
|
+
- lib/xapian_db/adapters/datamapper_adapter.rb
|
54
|
+
- lib/xapian_db/adapters/active_record_adapter.rb
|
55
|
+
- lib/xapian_db/query_parser.rb
|
56
|
+
- lib/xapian_db/resultset.rb
|
57
|
+
- examples/basic.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: https://github.com/garaio/xapian_db
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --line-numbers
|
65
|
+
- --inline-source
|
66
|
+
- --title
|
67
|
+
- Xapian-DB
|
68
|
+
- --main
|
69
|
+
- README.rdoc
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
segments:
|
86
|
+
- 1
|
87
|
+
- 2
|
88
|
+
version: "1.2"
|
89
|
+
requirements: []
|
90
|
+
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 1.3.7
|
93
|
+
signing_key:
|
94
|
+
specification_version: 3
|
95
|
+
summary: Ruby library to use a Xapian db as a key/value store with high performance fulltext search
|
96
|
+
test_files: []
|
97
|
+
|