xapian_db 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
@@ -0,0 +1,15 @@
1
+ *0.1.0* (November 23th, 2010)
2
+
3
+ * Proof of concept, not really useful for real world usage
4
+
5
+ *0.2.0* (December 1st, 2010)
6
+
7
+ * Blueprint configuration extended
8
+ * Adapter for Datamapper
9
+ * Search by attribute names
10
+ * Search with wildcards
11
+ * Document attributes can carry anything that is serializable by YAML
12
+
13
+ *0.3.0* (December 4st, 2010)
14
+
15
+ * Rails integration with configuration file (config/xapian_db.yml) and automatic setup
data/examples/basic.rb ADDED
@@ -0,0 +1,59 @@
1
+ # encoding: utf-8
2
+
3
+ # This example shows the most basic way to use xapian_db
4
+ # To run the example, please install the xapian_db gem first
5
+
6
+ require 'rubygems'
7
+ require 'xapian_db'
8
+
9
+ puts "Setting up the demo..."
10
+
11
+ # 1: Open an in memory database
12
+ db = XapianDb.create_db
13
+
14
+ # 2: Define a class which should get indexed; we define a class that
15
+ # could be an ActiveRecord or Datamapper Domain class
16
+ class People
17
+
18
+ attr_accessor :id, :name, :first_name
19
+
20
+ def initialize(data)
21
+ @id, @name, @first_name = data[:id], data[:name], data[:first_name]
22
+ end
23
+
24
+ end
25
+
26
+ # 3: Configure the generic adapter with a unique key expression
27
+ XapianDb::Adapters::GenericAdapter.unique_key do
28
+ "#{self.class}-#{self.id}"
29
+ end
30
+
31
+ # 4: Define a document blueprint for our class; the blueprint describes
32
+ # the structure of all documents for our class. Attribute values can
33
+ # be accessed later for each retrieved doc. Attributes are indexed
34
+ # by default.
35
+ XapianDb::DocumentBlueprint.setup(People) do |blueprint|
36
+ blueprint.attribute :name
37
+ blueprint.attribute :first_name
38
+ end
39
+
40
+ # 5: Let's create some objects
41
+ person_1 = People.new(:id => 1, :name => "Kogler", :first_name => "Gernot")
42
+ person_2 = People.new(:id => 2, :name => "Frey", :first_name => "Daniel")
43
+ person_3 = People.new(:id => 3, :name => "Garaio", :first_name => "Thomas")
44
+
45
+ # 6: Now add them to the database
46
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for(People)
47
+ db.store_doc(blueprint.indexer.build_document_for(person_1))
48
+ db.store_doc(blueprint.indexer.build_document_for(person_2))
49
+ db.store_doc(blueprint.indexer.build_document_for(person_3))
50
+
51
+ # 7: Now find the gem author ;-)
52
+ puts "Searching for Gernot..."
53
+ results = db.search("Gernot")
54
+ puts "We found #{results.size} documents"
55
+ puts "And the first document looks like this:"
56
+ page = results.paginate(:page => 1)
57
+ doc = page.first
58
+ puts "name: #{doc.name}"
59
+ puts "first name: #{doc.first_name}"
@@ -0,0 +1,74 @@
1
+ # encoding: utf-8
2
+
3
+ # Adapter for ActiveRecord. To use it, simply set it as the
4
+ # default for any DocumentBlueprint or a specific DocumentBlueprint
5
+
6
+ module XapianDb
7
+ module Adapters
8
+
9
+ class ActiveRecordAdapter
10
+
11
+ class << self
12
+
13
+ # Implement the class helper methods
14
+ def add_class_helper_methods_to(klass)
15
+
16
+ klass.instance_eval do
17
+ # define the method to retrieve a unique key
18
+ define_method(:xapian_id) do
19
+ "#{self.class}-#{self.id}"
20
+ end
21
+
22
+ end
23
+
24
+ klass.class_eval do
25
+
26
+ # add the after save logic
27
+ after_save do
28
+ XapianDb::Config.writer.index(self)
29
+ end
30
+
31
+ # add the after destroy logic
32
+ after_destroy do
33
+ XapianDb::Config.writer.unindex(self)
34
+ end
35
+
36
+ # Add a method to reindex all models of this class
37
+ define_singleton_method(:rebuild_xapian_index) do
38
+ # db = XapianDb::Adapters::ActiveRecordAdapter.database
39
+ # # First, delete all docs of this class
40
+ # db.delete_docs_of_class(klass)
41
+ # obj_count = klass.count
42
+ # puts "Reindexing #{obj_count} objects..."
43
+ # pbar = ProgressBar.new("Status", obj_count)
44
+ # klass.all.each do |obj|
45
+ # doc = @@blueprint.indexer.build_document_for(obj)
46
+ # db.store_doc(doc)
47
+ # pbar.inc
48
+ # end
49
+ # db.commit
50
+ XapianDb::Config.writer.reindex_class(klass)
51
+ end
52
+ end
53
+
54
+ end
55
+
56
+ # Implement the document helper methods
57
+ def add_doc_helper_methods_to(a_module)
58
+ a_module.instance_eval do
59
+ # Implement access to the indexed object
60
+ define_method :indexed_object do
61
+ return @indexed_object unless @indexed_object.nil?
62
+ # retrieve the object id from data
63
+ klass_name, id = data.split("-")
64
+ klass = Kernel.const_get(klass_name)
65
+ @indexed_object = klass.find(id.to_i)
66
+ end
67
+ end
68
+
69
+ end
70
+
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,62 @@
1
+ # encoding: utf-8
2
+
3
+ # Adapter for datamapper. To use it, simply set it as the
4
+ # default for any DocumentBlueprint or a specific DocumentBlueprint
5
+
6
+ module XapianDb
7
+ module Adapters
8
+
9
+ class DatamapperAdapter
10
+
11
+ class << self
12
+
13
+ # Implement the class helper methods
14
+ def add_class_helper_methods_to(klass)
15
+
16
+ klass.instance_eval do
17
+ # define the method to retrieve a unique key
18
+ define_method(:xapian_id) do
19
+ "#{self.class}-#{self.id}"
20
+ end
21
+
22
+ end
23
+
24
+ klass.class_eval do
25
+
26
+ # add the after save logic
27
+ after :save do
28
+ XapianDb::Config.writer.index(self)
29
+ end
30
+
31
+ # add the after destroy logic
32
+ after :destroy do
33
+ XapianDb::Config.writer.unindex(self)
34
+ end
35
+
36
+ # Add a method to reindex all models of this class
37
+ define_singleton_method(:rebuild_xapian_index) do
38
+ XapianDb::Config.writer.reindex_class(self)
39
+ end
40
+ end
41
+
42
+ end
43
+
44
+ # Implement the document helper methods
45
+ def add_doc_helper_methods_to(a_module)
46
+ a_module.instance_eval do
47
+ # Implement access to the indexed object
48
+ define_method :indexed_object do
49
+ return @indexed_object unless @indexed_object.nil?
50
+ # retrieve the object id from data
51
+ klass_name, id = data.split("-")
52
+ klass = Kernel.const_get(klass_name)
53
+ @indexed_object = klass.get(id.to_i)
54
+ end
55
+ end
56
+
57
+ end
58
+
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,41 @@
1
+ # encoding: utf-8
2
+
3
+ # The generic adapter is a universal adapater that can be used for any
4
+ # ruby class. To use the generic adapter (which is the default),
5
+ # configure the expression that generates a unique key from your objects
6
+ # using the method 'unique_key'.
7
+ module XapianDb
8
+ module Adapters
9
+
10
+ class GenericAdapter
11
+
12
+ class << self
13
+
14
+ # Define the unique key expression
15
+ def unique_key(&block)
16
+ @unique_key_block = block
17
+ end
18
+
19
+ # Implement the class helper methods
20
+ def add_class_helper_methods_to(klass)
21
+ raise "Unique key is not configured for generic adapter!" if @unique_key_block.nil?
22
+ expression = @unique_key_block
23
+ klass.instance_eval do
24
+ define_method(:xapian_id) do
25
+ instance_eval &expression
26
+ end
27
+ end
28
+ end
29
+
30
+ # Implement the document helper methods
31
+ def add_doc_helper_methods_to(obj)
32
+ # We have none so far
33
+ end
34
+
35
+ end
36
+
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,82 @@
1
+ # encoding: utf-8
2
+
3
+ # Global configuration for XapianDb
4
+ # @author Gernot Kogler
5
+
6
+ module XapianDb
7
+
8
+ class Config
9
+
10
+ # ---------------------------------------------------------------------------------
11
+ # Singleton methods
12
+ # ---------------------------------------------------------------------------------
13
+ class << self
14
+
15
+ def setup(&block)
16
+ @config ||= Config.new
17
+ yield @config if block_given?
18
+ end
19
+
20
+ # Install delegates for the config instance variables
21
+ [:database, :adapter, :writer].each do |attr|
22
+ define_method attr do
23
+ @config.nil? ? nil : @config.instance_variable_get("@_#{attr}")
24
+ end
25
+ end
26
+ end
27
+
28
+ # ---------------------------------------------------------------------------------
29
+ # DSL methods
30
+ # ---------------------------------------------------------------------------------
31
+ attr_reader :_database, :_adapter, :_writer
32
+
33
+ # Set the database; either pass a path to the file system or
34
+ # the symbolic name "memory"
35
+ def database(path)
36
+
37
+ # If the current database is a persistent database, we must release the
38
+ # database and run the garbage collector to remove the write lock
39
+ if @_database.is_a?(XapianDb::PersistentDatabase)
40
+ @_database = nil
41
+ GC.start
42
+ end
43
+
44
+ if path.to_sym == :memory
45
+ @_database = XapianDb.create_db
46
+ else
47
+ if File.exist?(path)
48
+ @_database = XapianDb.open_db :path => path
49
+ else
50
+ # Database does not exist; create it
51
+ @_database = XapianDb.create_db :path => path
52
+ end
53
+ end
54
+ end
55
+
56
+ # Define the adapter to use; the following adapters are available:
57
+ # - :generic
58
+ # - :active_record
59
+ # - :datamapper
60
+ def adapter(type)
61
+ # We try to guess the adapter name
62
+ @_adapter = XapianDb::Adapters.const_get("#{camelize(type.to_s)}Adapter")
63
+ end
64
+
65
+ # Define the writer to use; the following adapters are available:
66
+ # - :direct
67
+ # More to come in a future release :-)
68
+ def writer(type)
69
+ # We try to guess the writer name
70
+ @_writer = XapianDb::IndexWriters.const_get("#{camelize(type.to_s)}Writer")
71
+ end
72
+
73
+ private
74
+
75
+ # TODO: move this to a helper module
76
+ def camelize(string)
77
+ string.split(/[^a-z0-9]/i).map{|w| w.capitalize}.join
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -0,0 +1,102 @@
1
+ # encoding: utf-8
2
+
3
+ # Singleton class representing a Xapian database.
4
+ # @author Gernot Kogler
5
+
6
+ module XapianDb
7
+
8
+ # Base class for a Xapian database.
9
+ class Database
10
+ attr_reader :reader
11
+
12
+ # Size of the database (number of docs)
13
+ def size
14
+ reader.doccount
15
+ end
16
+
17
+ # Store a Xapian document
18
+ def store_doc(doc)
19
+ # We always replace; Xapian adds the document automatically if
20
+ # it is not found
21
+ writer.replace_document("Q#{doc.data}", doc)
22
+ end
23
+
24
+ # Delete a document by a unique term; this method is used by the
25
+ # orm adapters
26
+ def delete_doc_with_unique_term(term)
27
+ writer.delete_document("Q#{term}")
28
+ true
29
+ end
30
+
31
+ # Delete all docs of a specific class
32
+ def delete_docs_of_class(klass)
33
+ writer.delete_document("C#{klass}")
34
+ true
35
+ end
36
+
37
+ # Perform a search
38
+ def search(expression)
39
+ @query_parser ||= QueryParser.new(self)
40
+ query = @query_parser.parse(expression)
41
+ enquiry = Xapian::Enquire.new(reader)
42
+ enquiry.query = query
43
+ Resultset.new(enquiry)
44
+ end
45
+
46
+ end
47
+
48
+ # In Memory database
49
+ class InMemoryDatabase < Database
50
+
51
+ def initialize
52
+ @writer ||= Xapian::inmemory_open
53
+ @reader = @writer
54
+ end
55
+
56
+ def writer
57
+ @writer
58
+ end
59
+
60
+ # Commit all pending changes
61
+ def commit
62
+ # Nothing to do for an in memory database
63
+ end
64
+
65
+ end
66
+
67
+ # Persistent database on disk
68
+ class PersistentDatabase < Database
69
+
70
+ def initialize(options)
71
+ @path = options[:path]
72
+ @db_flag = options[:create] ? Xapian::DB_CREATE_OR_OVERWRITE : Xapian::DB_OPEN
73
+ if options[:create]
74
+ # make sure the path exists; Xapian will not create the necessary directories
75
+ FileUtils.makedirs @path
76
+ @writer = Xapian::WritableDatabase.new(@path, @db_flag)
77
+ end
78
+ @reader = Xapian::Database.new(@path)
79
+ end
80
+
81
+ # Get the readable instance of the database
82
+ def reader
83
+ # Always reopen the readable database so we get live index data
84
+ # TODO: make this configurable
85
+ @reader.reopen
86
+ @reader
87
+ end
88
+
89
+ # The writer is instantiated layzily to avoid a permanent write lock on the database
90
+ def writer
91
+ @writer ||= Xapian::WritableDatabase.new(@path, @db_flag)
92
+ end
93
+
94
+ # Commit all pending changes
95
+ def commit
96
+ writer.commit
97
+ reader.reopen
98
+ end
99
+
100
+ end
101
+
102
+ end
@@ -0,0 +1,121 @@
1
+ # encoding: utf-8
2
+
3
+ # A document blueprint describes the mapping of an object to a Xapian document
4
+ # for a given class.
5
+ # @author Gernot Kogler
6
+
7
+ module XapianDb
8
+
9
+ class DocumentBlueprint
10
+
11
+ # ---------------------------------------------------------------------------------
12
+ # Singleton methods
13
+ # ---------------------------------------------------------------------------------
14
+ class << self
15
+
16
+ # Configure the blueprint for a class
17
+ def setup(klass, &block)
18
+ @blueprints ||= {}
19
+ blueprint = DocumentBlueprint.new
20
+ blueprint.indexer = Indexer.new(blueprint)
21
+ yield blueprint if block_given? # configure the blueprint through the block
22
+ @blueprints[klass] = blueprint
23
+ @adapter = blueprint.adapter || XapianDb::Config.adapter || Adapters::GenericAdapter
24
+ @adapter.add_class_helper_methods_to klass
25
+ @searchable_prefixes = nil # force rebuild of the searchable prefixes
26
+ end
27
+
28
+ # Get the blueprint for a class
29
+ def blueprint_for(klass)
30
+ @blueprints[klass] if @blueprints
31
+ end
32
+
33
+ # Return an array of all configured text methods in any blueprint
34
+ def searchable_prefixes
35
+ return [] unless @blueprints
36
+ return @searchable_prefixes unless @searchable_prefixes.nil?
37
+ prefixes = []
38
+ @blueprints.each do |klass, blueprint|
39
+ prefixes << blueprint.searchable_prefixes
40
+ end
41
+ @searchable_prefixes = prefixes.flatten.compact.uniq
42
+ end
43
+
44
+ end
45
+
46
+ # ---------------------------------------------------------------------------------
47
+ # Instance methods
48
+ # ---------------------------------------------------------------------------------
49
+ attr_accessor :indexer
50
+
51
+ # Return an array of all configured text methods in this blueprint
52
+ def searchable_prefixes
53
+ @prefixes ||= indexed_methods.map{|method_name, options| method_name}
54
+ end
55
+
56
+ # Lazily build and return a module that implements accessors for each field
57
+ def accessors_module
58
+ return @accessors_module unless @accessors_module.nil?
59
+ @accessors_module = Module.new
60
+
61
+ @accessors_module.instance_eval do
62
+ define_method :domain_class do
63
+ self.values[0].value
64
+ end
65
+ end
66
+
67
+ @attributes.each_with_index do |field, index|
68
+ @accessors_module.instance_eval do
69
+ define_method field do
70
+ YAML::load(self.values[index+1].value)
71
+ end
72
+ end
73
+ end
74
+ # Let the adapter add its document helper methods (if any)
75
+ adapter = XapianDb::Config.adapter || XapianDb::Adapters::GenericAdapter
76
+ adapter.add_doc_helper_methods_to(@accessors_module)
77
+ @accessors_module
78
+ end
79
+
80
+ # ---------------------------------------------------------------------------------
81
+ # Blueprint DSL methods
82
+ # ---------------------------------------------------------------------------------
83
+ attr_reader :adapter, :attributes, :indexed_methods
84
+
85
+ # Construct the blueprint
86
+ def initialize
87
+ @attributes = []
88
+ @indexed_methods = {}
89
+ end
90
+
91
+ # Set a custom adapter for this blueprint
92
+ def adapter=(adapter)
93
+ @adapter = adapter
94
+ end
95
+
96
+ # Add an attribute to the list
97
+ # TODO: Make sure the name does not collide with a method name of Xapian::Document since
98
+ # we generate methods in the documents for all defined fields
99
+ def attribute(name, options={})
100
+ opts = {:index => true}.merge(options)
101
+ @attributes << name
102
+ self.index(name, opts) if opts[:index]
103
+ end
104
+
105
+ # Add an indexed value to the list
106
+ def index(name, options={})
107
+ @indexed_methods[name] = IndexOptions.new(options)
108
+ end
109
+
110
+ # Options for an indexed text
111
+ class IndexOptions
112
+ attr_accessor :weight
113
+
114
+ def initialize(options)
115
+ @weight = options[:weight] || 1
116
+ end
117
+ end
118
+
119
+ end
120
+
121
+ end
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ # This writer writes changes directly to the open database.
4
+ # Use the direct writer only for single process environments
5
+ # (one single rails app server, e.g. one mongrel).
6
+ # For multi process environemnts you should use a writer that
7
+ # processes index changes through a queue.
8
+ # @author Gernot Kogler
9
+
10
+ module XapianDb
11
+ module IndexWriters
12
+
13
+ class DirectWriter
14
+
15
+ class << self
16
+
17
+ # Update an object in the index
18
+ def index(obj)
19
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
20
+ doc = blueprint.indexer.build_document_for(obj)
21
+ XapianDb.database.store_doc(doc)
22
+ XapianDb.database.commit
23
+ end
24
+
25
+ # Remove an object from the index
26
+ def unindex(obj)
27
+ XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
28
+ XapianDb.database.commit
29
+ end
30
+
31
+ # Reindex all objects of a given class
32
+ def reindex_class(klass)
33
+ # First, delete all docs of this class
34
+ XapianDb.database.delete_docs_of_class(klass)
35
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
36
+ obj_count = klass.count
37
+ puts "Reindexing #{obj_count} objects..."
38
+ pbar = ProgressBar.new("Status", obj_count)
39
+ klass.all.each do |obj|
40
+ doc = blueprint.indexer.build_document_for(obj)
41
+ XapianDb.database.store_doc(doc)
42
+ pbar.inc
43
+ end
44
+ XapianDb.database.commit
45
+ end
46
+
47
+ end
48
+
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,75 @@
1
+ # encoding: utf-8
2
+
3
+ # The indexer creates a Xapian::Document from a configured object
4
+ # @author Gernot Kogler
5
+
6
+ module XapianDb
7
+
8
+ class Indexer
9
+
10
+ def initialize(document_blueprint)
11
+ @document_blueprint = document_blueprint
12
+ end
13
+
14
+ # Build the doc for an object. The object must respond to 'xapian_id'.
15
+ # The configured adapter should implement this method.
16
+ def build_document_for(obj)
17
+ @obj = obj
18
+ @blueprint = DocumentBlueprint.blueprint_for(@obj.class)
19
+ @xapian_doc = Xapian::Document.new
20
+ @xapian_doc.data = @obj.xapian_id
21
+ store_fields
22
+ index_text
23
+ @xapian_doc
24
+ end
25
+
26
+ private
27
+
28
+ # Store all configured fields
29
+ def store_fields
30
+
31
+ # We store the class name of the object at position 0
32
+ @xapian_doc.add_value(0, @obj.class.name)
33
+
34
+ pos = 1
35
+ @blueprint.attributes.each do |attribute, options|
36
+ value = @obj.send(attribute)
37
+ @xapian_doc.add_value(pos, value.to_yaml)
38
+ pos += 1
39
+ end
40
+ end
41
+
42
+ # Index all configured text methods
43
+ def index_text
44
+ term_generator = Xapian::TermGenerator.new()
45
+ term_generator.document = @xapian_doc
46
+ # TODO: make this configurable globally and per document
47
+ # (retrieve the language from the object, if configured)
48
+ stemmer = Xapian::Stem.new("english")
49
+ term_generator.stemmer = stemmer
50
+ # TODO: Configure and enable these features
51
+ # tg.stopper = stopper if stopper
52
+ # tg.stemmer = stemmer
53
+ # tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
54
+
55
+ # Always index the class and the primary key
56
+ @xapian_doc.add_term("C#{@obj.class}")
57
+ @xapian_doc.add_term("Q#{@obj.xapian_id}")
58
+
59
+ @blueprint.indexed_methods.each do |method, options|
60
+ value = @obj.send(method)
61
+ unless value.nil?
62
+ values = value.is_a?(Array) ? value : [value]
63
+ values.each do |value|
64
+ # Add value with field name
65
+ term_generator.index_text(value.to_s.downcase, options.weight, "X#{method.upcase}")
66
+ # Add value without field name
67
+ term_generator.index_text(value.to_s.downcase)
68
+ end
69
+ end
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ end
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ # Parse a query expression and convert it to Xapian Query arguments
4
+ # @author Gernot Kogler
5
+
6
+ module XapianDb
7
+
8
+ class QueryParser
9
+
10
+ def initialize(database)
11
+ @db = database
12
+
13
+ # Set the parser options
14
+ @query_flags = 0
15
+ @query_flags |= Xapian::QueryParser::FLAG_WILDCARD # enable wildcards
16
+ @query_flags |= Xapian::QueryParser::FLAG_BOOLEAN
17
+ @query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
18
+ end
19
+
20
+ def parse(expression)
21
+ parser = Xapian::QueryParser.new
22
+ parser.database = @db.reader
23
+ parser.default_op = Xapian::Query::OP_AND # Could be made configurable
24
+ # TODO: Setup stopper, stemmer, defaults and fields
25
+
26
+ # Add the searchable prefixes to allow searches by field
27
+ # (like "name:Kogler")
28
+ XapianDb::DocumentBlueprint.searchable_prefixes.each{|prefix| parser.add_prefix(prefix.to_s.downcase, "X#{prefix.to_s.upcase}") }
29
+ parser.parse_query(expression, @query_flags)
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ # Configuration for a rails app
4
+ # @author Gernot Kogler
5
+
6
+ require 'xapian_db'
7
+ require 'rails'
8
+
9
+ module XapianDb
10
+ class Railtie < ::Rails::Railtie
11
+
12
+ config.before_configuration do
13
+
14
+ # Read the database configuration file if there is one
15
+ config_file_path = "#{Rails.root}/config/xapian_db.yml"
16
+ if File.exist?(config_file_path)
17
+ db_config = YAML::load_file config_file_path
18
+ env_config = db_config[Rails.env]
19
+ database_path = env_config["database"] || ":memory:"
20
+ adapter = env_config["adapter"] || :active_record
21
+ writer = env_config["writer"] || :direct
22
+ else
23
+ # No config file, set the defaults
24
+ Rails.env == "test" ? database_path = ":memory:" : database_path = "db/xapian_db/#{Rails.env}"
25
+ adapter = :active_record
26
+ writer = :direct
27
+ end
28
+
29
+ # Do the configuration
30
+ XapianDb::Config.setup do |config|
31
+ if database_path == ":memory:"
32
+ config.database :memory
33
+ else
34
+ config.database database_path
35
+ end
36
+ config.adapter adapter.to_sym
37
+ config.writer writer.to_sym
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+
3
+ # The resultset holds a Xapian::Query object and allows paged access
4
+ # to the found documents.
5
+ # author Gernot Kogler
6
+
7
+ module XapianDb
8
+
9
+ class Resultset
10
+
11
+ attr_reader :size
12
+
13
+ # Constructor
14
+ # @param [Xapian::Enquire] a Xapian query result
15
+ def initialize(enquiry)
16
+ @enquiry = enquiry
17
+ # By passing 0 as the max parameter to the mset method,
18
+ # we only get statistics about the query, no results
19
+ @size = enquiry.mset(0, 0).matches_estimated
20
+ end
21
+
22
+ # Paginate the result
23
+ def paginate(opts={})
24
+ options = {:page => 1, :per_page => 10}.merge(opts)
25
+ offset = (options[:page] - 1) * options[:per_page]
26
+ return [] if offset > @size
27
+ build_page(offset, options[:per_page])
28
+ end
29
+
30
+ private
31
+
32
+ # Build a page of Xapian documents
33
+ def build_page(offset, count)
34
+ docs = []
35
+ result_window = @enquiry.mset(offset, count)
36
+ result_window.matches.each do |match|
37
+ docs << decorate(match.document)
38
+ end
39
+ docs
40
+ end
41
+
42
+ # Decorate a Xapian document with field accessors
43
+ def decorate(document)
44
+ klass_name = document.values[0].value
45
+ blueprint = XapianDb::DocumentBlueprint.blueprint_for(Kernel.const_get(klass_name))
46
+ document.extend blueprint.accessors_module
47
+ end
48
+
49
+ end
50
+
51
+ end
data/lib/xapian_db.rb ADDED
@@ -0,0 +1,57 @@
1
+ require 'digest/sha1'
2
+ require 'rubygems'
3
+ require 'xapian'
4
+ require 'yaml'
5
+ require 'progressbar'
6
+
7
+ module XapianDb
8
+
9
+ # Configure XapianDb
10
+ def self.setup(&block)
11
+ XapianDb::Config.setup(&block)
12
+ end
13
+
14
+ # Create a database. Overwrites an existing database on disk, if
15
+ # option :in_memory is set to false.
16
+ def self.create_db(options = {})
17
+ if options[:path]
18
+ PersistentDatabase.new(:path => options[:path], :create => true)
19
+ else
20
+ InMemoryDatabase.new
21
+ end
22
+ end
23
+
24
+ # Open a database.
25
+ def self.open_db(options = {})
26
+ if options[:path]
27
+ PersistentDatabase.new(:path => options[:path], :create => false)
28
+ else
29
+ InMemoryDatabase.new
30
+ end
31
+ end
32
+
33
+ # Access he configured database
34
+ def self.database
35
+ XapianDb::Config.database
36
+ end
37
+
38
+ # Query the database
39
+ def self.search(expression)
40
+ XapianDb::Config.database.search(expression)
41
+ end
42
+
43
+ end
44
+
45
+ require File.dirname(__FILE__) + '/xapian_db/config'
46
+ require File.dirname(__FILE__) + '/xapian_db/adapters/generic_adapter'
47
+ require File.dirname(__FILE__) + '/xapian_db/adapters/datamapper_adapter'
48
+ require File.dirname(__FILE__) + '/xapian_db/adapters/active_record_adapter'
49
+ require File.dirname(__FILE__) + '/xapian_db/index_writers/direct_writer'
50
+ require File.dirname(__FILE__) + '/xapian_db/database'
51
+ require File.dirname(__FILE__) + '/xapian_db/document_blueprint'
52
+ require File.dirname(__FILE__) + '/xapian_db/indexer'
53
+ require File.dirname(__FILE__) + '/xapian_db/query_parser'
54
+ require File.dirname(__FILE__) + '/xapian_db/resultset'
55
+
56
+ # Configure XapianDB if we are in a Rails app
57
+ require File.dirname(__FILE__) + '/xapian_db/railtie' if defined?(Rails)
metadata ADDED
@@ -0,0 +1,97 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: xapian_db
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 3
8
+ - 1
9
+ version: 0.3.1
10
+ platform: ruby
11
+ authors:
12
+ - Gernot kogler
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-12-06 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: progressbar
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 9
31
+ - 0
32
+ version: 0.9.0
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Ruby library to use a Xapian db as a key/value store with high performance fulltext search
36
+ email: gernot.kogler (at) garaio (dot) com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - CHANGELOG
43
+ files:
44
+ - CHANGELOG
45
+ - lib/xapian_db.rb
46
+ - lib/xapian_db/railtie.rb
47
+ - lib/xapian_db/config.rb
48
+ - lib/xapian_db/index_writers/direct_writer.rb
49
+ - lib/xapian_db/database.rb
50
+ - lib/xapian_db/document_blueprint.rb
51
+ - lib/xapian_db/indexer.rb
52
+ - lib/xapian_db/adapters/generic_adapter.rb
53
+ - lib/xapian_db/adapters/datamapper_adapter.rb
54
+ - lib/xapian_db/adapters/active_record_adapter.rb
55
+ - lib/xapian_db/query_parser.rb
56
+ - lib/xapian_db/resultset.rb
57
+ - examples/basic.rb
58
+ has_rdoc: true
59
+ homepage: https://github.com/garaio/xapian_db
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --line-numbers
65
+ - --inline-source
66
+ - --title
67
+ - Xapian-DB
68
+ - --main
69
+ - README.rdoc
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ segments:
78
+ - 0
79
+ version: "0"
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ">="
84
+ - !ruby/object:Gem::Version
85
+ segments:
86
+ - 1
87
+ - 2
88
+ version: "1.2"
89
+ requirements: []
90
+
91
+ rubyforge_project:
92
+ rubygems_version: 1.3.7
93
+ signing_key:
94
+ specification_version: 3
95
+ summary: Ruby library to use a Xapian db as a key/value store with high performance fulltext search
96
+ test_files: []
97
+