xapian_db 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,50 +1,69 @@
1
1
  # encoding: utf-8
2
2
 
3
- # Singleton class representing a Xapian database.
4
3
  # @author Gernot Kogler
5
4
 
6
5
  module XapianDb
7
6
 
8
- # Base class for a Xapian database.
7
+ # Base class for a Xapian database
9
8
  class Database
10
- attr_reader :reader
11
-
9
+
10
+ # A readable xapian database (see http://xapian.org/docs/apidoc/html/classXapian_1_1Database.html)
11
+ attr_reader :reader
12
+
12
13
  # Size of the database (number of docs)
14
+ # @return [Integer] The number of docs in the database
13
15
  def size
14
16
  reader.doccount
15
17
  end
16
-
18
+
17
19
  # Store a Xapian document
20
+ # @param [Xapian::Document] doc A Xapian document (see http://xapian.org/docs/sourcedoc/html/classXapian_1_1Document.html).
21
+ # While you can pass any valid xapian document, you might want to use the {XapianDb::Indexer} to build a xapian doc
18
22
  def store_doc(doc)
19
23
  # We always replace; Xapian adds the document automatically if
20
24
  # it is not found
21
25
  writer.replace_document("Q#{doc.data}", doc)
22
26
  end
23
27
 
24
- # Delete a document by a unique term; this method is used by the
28
+ # Delete a document identified by a unique term; this method is used by the
25
29
  # orm adapters
30
+ # @param [String] term A term that uniquely identifies a document
26
31
  def delete_doc_with_unique_term(term)
27
32
  writer.delete_document("Q#{term}")
28
33
  true
29
34
  end
30
35
 
31
- # Delete all docs of a specific class
36
+ # Delete all docs of a specific class
37
+ # @param [Class] klass A class that has a {XapianDb::DocumentBlueprint} configuration
32
38
  def delete_docs_of_class(klass)
33
39
  writer.delete_document("C#{klass}")
34
40
  true
35
41
  end
36
-
42
+
37
43
  # Perform a search
38
- def search(expression)
44
+ # @param [String] expression A valid search expression.
45
+ # @param [Hash] options
46
+ # @option options [Integer] :per_page (10) How many docs per page?
47
+ # @example Simple Query
48
+ # resultset = db.search("foo")
49
+ # @example Wildcard Query
50
+ # resultset = db.search("fo*")
51
+ # @example Boolean Query
52
+ # resultset = db.search("foo or baz")
53
+ # @example Field Query
54
+ # resultset = db.search("name:foo")
55
+ # @return [XapianDb::Resultset] The resultset
56
+ def search(expression, options={})
57
+ opts = {:per_page => 10}.merge(options)
39
58
  @query_parser ||= QueryParser.new(self)
40
- query = @query_parser.parse(expression)
41
- enquiry = Xapian::Enquire.new(reader)
59
+ query = @query_parser.parse(expression)
60
+ enquiry = Xapian::Enquire.new(reader)
42
61
  enquiry.query = query
43
- Resultset.new(enquiry)
62
+ Resultset.new(enquiry, opts)
44
63
  end
45
-
64
+
46
65
  end
47
-
66
+
48
67
  # In Memory database
49
68
  class InMemoryDatabase < Database
50
69
 
@@ -52,51 +71,65 @@ module XapianDb
52
71
  @writer ||= Xapian::inmemory_open
53
72
  @reader = @writer
54
73
  end
55
-
74
+
75
+ # Get the writer to write to the database
76
+ # @return [Xapian::WritableDatabase] A xapian database that is writable (see http://xapian.org/docs/apidoc/html/classXapian_1_1WritableDatabase.html)
56
77
  def writer
57
78
  @writer
58
79
  end
59
80
 
60
- # Commit all pending changes
81
+ # Commit all pending changes
61
82
  def commit
62
83
  # Nothing to do for an in memory database
63
84
  end
64
-
85
+
65
86
  end
66
87
 
67
88
  # Persistent database on disk
68
89
  class PersistentDatabase < Database
69
-
90
+
91
+ # Constructor
92
+ # @param [Hash] options Options for the persistent database
93
+ # @option options [String] :path A path to the file system
94
+ # @option options [Boolean] :create Should the database be created? <b>Will overwrite an existing database if true!</b>
95
+ # @example Force the creation of a database. Will overwrite an existing database
96
+ # db = XapianDb::PersistentDatabase.new(:path => "/tmp/mydb", :create => true)
97
+ # @example Open an existing database. The database must exist
98
+ # db = XapianDb::PersistentDatabase.new(:path => "/tmp/mydb", :create => false)
70
99
  def initialize(options)
71
100
  @path = options[:path]
72
101
  @db_flag = options[:create] ? Xapian::DB_CREATE_OR_OVERWRITE : Xapian::DB_OPEN
73
102
  if options[:create]
74
- # make sure the path exists; Xapian will not create the necessary directories
103
+ # make sure the path exists; Xapian will not create the necessary directories
75
104
  FileUtils.makedirs @path
76
105
  @writer = Xapian::WritableDatabase.new(@path, @db_flag)
77
106
  end
78
107
  @reader = Xapian::Database.new(@path)
79
108
  end
80
-
81
- # Get the readable instance of the database
109
+
110
+ # Get the readable instance of the database. On each access this method reopens the readable database
111
+ # to make sure you get the latest changes to the index
112
+ # @return [Xapian::Database] A readable xapian database (see http://xapian.org/docs/apidoc/html/classXapian_1_1Database.html)
82
113
  def reader
83
114
  # Always reopen the readable database so we get live index data
84
115
  # TODO: make this configurable
85
116
  @reader.reopen
86
117
  @reader
87
118
  end
88
-
89
- # The writer is instantiated layzily to avoid a permanent write lock on the database
119
+
120
+ # The writer is instantiated layzily to avoid a permanent write lock on the database. Please note that
121
+ # you will get locking exceptions if you open the same database multiple times and access the writer
122
+ # in more than one instance!
123
+ # @return [Xapian::WritableDatabase] A xapian database that is writable (see http://xapian.org/docs/apidoc/html/classXapian_1_1WritableDatabase.html)
90
124
  def writer
91
125
  @writer ||= Xapian::WritableDatabase.new(@path, @db_flag)
92
126
  end
93
-
94
- # Commit all pending changes
127
+
128
+ # Commit all pending changes
95
129
  def commit
96
130
  writer.commit
97
- reader.reopen
98
131
  end
99
-
132
+
100
133
  end
101
-
134
+
102
135
  end
@@ -1,19 +1,32 @@
1
1
  # encoding: utf-8
2
2
 
3
- # A document blueprint describes the mapping of an object to a Xapian document
4
- # for a given class.
5
- # @author Gernot Kogler
6
-
7
3
  module XapianDb
8
-
4
+
5
+ # A document blueprint describes the mapping of an object to a Xapian document
6
+ # for a given class.
7
+ # @example A simple document blueprint configuration for the class Person
8
+ # XapianDb::DocumentBlueprint.setup(Person) do |blueprint|
9
+ # # Our Person class has a method lang_cd. We use this method to
10
+ # # index each person with its language
11
+ # blueprint.language_method :lang_cd
12
+ # blueprint.attribute :name, :weight => 10
13
+ # blueprint.attribute :first_name
14
+ # blueprint.index :remarks
15
+ # end
16
+ # @author Gernot Kogler
9
17
  class DocumentBlueprint
10
18
 
11
- # ---------------------------------------------------------------------------------
19
+ # ---------------------------------------------------------------------------------
12
20
  # Singleton methods
13
- # ---------------------------------------------------------------------------------
21
+ # ---------------------------------------------------------------------------------
14
22
  class << self
15
23
 
16
- # Configure the blueprint for a class
24
+ # Configure the blueprint for a class.
25
+ # Available options:
26
+ # - language_method (see {#language_method} for details)
27
+ # - adapter (see {#adapter} for details)
28
+ # - attribute (see {#attribute} for details)
29
+ # - index (see {#index} for details)
17
30
  def setup(klass, &block)
18
31
  @blueprints ||= {}
19
32
  blueprint = DocumentBlueprint.new
@@ -24,46 +37,54 @@ module XapianDb
24
37
  @adapter.add_class_helper_methods_to klass
25
38
  @searchable_prefixes = nil # force rebuild of the searchable prefixes
26
39
  end
27
-
40
+
28
41
  # Get the blueprint for a class
42
+ # @return [DocumentBlueprint]
29
43
  def blueprint_for(klass)
30
44
  @blueprints[klass] if @blueprints
31
45
  end
32
46
 
33
47
  # Return an array of all configured text methods in any blueprint
48
+ # @return [Array<String>] All searchable prefixes
34
49
  def searchable_prefixes
35
50
  return [] unless @blueprints
36
51
  return @searchable_prefixes unless @searchable_prefixes.nil?
37
52
  prefixes = []
38
- @blueprints.each do |klass, blueprint|
53
+ @blueprints.values.each do |blueprint|
39
54
  prefixes << blueprint.searchable_prefixes
40
55
  end
41
56
  @searchable_prefixes = prefixes.flatten.compact.uniq
42
57
  end
43
-
58
+
44
59
  end
45
60
 
46
- # ---------------------------------------------------------------------------------
61
+ # ---------------------------------------------------------------------------------
47
62
  # Instance methods
48
- # ---------------------------------------------------------------------------------
63
+ # ---------------------------------------------------------------------------------
64
+
65
+ # Set / get the indexer
66
+ # @return [XapianDb::Indexer]
49
67
  attr_accessor :indexer
50
-
68
+
51
69
  # Return an array of all configured text methods in this blueprint
70
+ # @return [Array<String>] All searchable prefixes
52
71
  def searchable_prefixes
53
- @prefixes ||= indexed_methods.map{|method_name, options| method_name}
72
+ @prefixes ||= indexed_methods.keys
54
73
  end
55
-
74
+
56
75
  # Lazily build and return a module that implements accessors for each field
76
+ # @return [Module] A module containing all accessor methods
57
77
  def accessors_module
58
78
  return @accessors_module unless @accessors_module.nil?
59
79
  @accessors_module = Module.new
60
-
80
+
81
+ # Add the accessor for the indexed class
61
82
  @accessors_module.instance_eval do
62
83
  define_method :domain_class do
63
84
  self.values[0].value
64
85
  end
65
86
  end
66
-
87
+
67
88
  @attributes.each_with_index do |field, index|
68
89
  @accessors_module.instance_eval do
69
90
  define_method field do
@@ -72,50 +93,81 @@ module XapianDb
72
93
  end
73
94
  end
74
95
  # Let the adapter add its document helper methods (if any)
75
- adapter = XapianDb::Config.adapter || XapianDb::Adapters::GenericAdapter
96
+ adapter = @adapter || XapianDb::Config.adapter || XapianDb::Adapters::GenericAdapter
76
97
  adapter.add_doc_helper_methods_to(@accessors_module)
77
98
  @accessors_module
78
99
  end
79
-
80
- # ---------------------------------------------------------------------------------
100
+
101
+ # ---------------------------------------------------------------------------------
81
102
  # Blueprint DSL methods
82
- # ---------------------------------------------------------------------------------
83
- attr_reader :adapter, :attributes, :indexed_methods
84
-
103
+ # ---------------------------------------------------------------------------------
104
+
105
+ # The name of the method that returns a Xapian compliant language code. The
106
+ # configured class must implement this method.
107
+ attr_reader :lang_method
108
+
109
+ # Collection of the configured attribute methods
110
+ # @return [Array<Symbol>] The names of the configured attribute methods
111
+ attr_reader :attributes
112
+
113
+ # Collection of the configured index methods
114
+ # @return [Hash<Symbol, IndexOptions>] A hashtable containing all index methods as
115
+ # keys and IndexOptions as values
116
+ attr_reader :indexed_methods
117
+
118
+ # Set / read a custom adapter.
119
+ # Use this configuration option if you need a specific adapter for an indexed class.
120
+ # If set, it overrides the globally configured adapter (see also {Config#adapter})
121
+ attr_accessor :adapter
122
+
85
123
  # Construct the blueprint
86
124
  def initialize
87
125
  @attributes = []
88
126
  @indexed_methods = {}
89
127
  end
90
-
91
- # Set a custom adapter for this blueprint
92
- def adapter=(adapter)
93
- @adapter = adapter
128
+
129
+ # Set the name of the method to get the language for an indexed object
130
+ # @param [Symbol] lang The method name. The method must return a language supported
131
+ # by Xapian (see http://xapian.org/docs/apidoc/html/classXapian_1_1Stem.html for supported languages)
132
+ def language_method(lang)
133
+ @lang_method = lang
94
134
  end
95
-
96
- # Add an attribute to the list
97
- # TODO: Make sure the name does not collide with a method name of Xapian::Document since
98
- # we generate methods in the documents for all defined fields
135
+
136
+ # Add an attribute to the blueprint. Attributes will be stored in the xapian documents an can be
137
+ # accessed from a search result.
138
+ # @param [String] name The name of the method that delivers the value for the attribute
139
+ # @param [Hash] options
140
+ # @option options [Integer] :weight (1) The weight for this attribute.
141
+ # @option options [Boolean] :index (true) Should the attribute be indexed?
142
+ # @todo Make sure the name does not collide with a method name of Xapian::Document since
99
143
  def attribute(name, options={})
100
144
  opts = {:index => true}.merge(options)
101
145
  @attributes << name
102
146
  self.index(name, opts) if opts[:index]
103
147
  end
104
148
 
105
- # Add an indexed value to the list
149
+ # Add an indexed value to the blueprint. Indexed values are not accessible from a search result.
150
+ # @param [String] name The name of the method that delivers the value for the index
151
+ # @param [Hash] options
152
+ # @option options [Integer] :weight (1) The weight for this indexed value
106
153
  def index(name, options={})
107
154
  @indexed_methods[name] = IndexOptions.new(options)
108
155
  end
109
156
 
110
- # Options for an indexed text
111
- class IndexOptions
157
+ # Options for an indexed method
158
+ class IndexOptions
159
+
160
+ # The weight for the indexed value
112
161
  attr_accessor :weight
113
-
162
+
163
+ # Constructor
164
+ # @param [Hash] options
165
+ # @option options [Integer] :weight (1) The weight for the indexed value
114
166
  def initialize(options)
115
167
  @weight = options[:weight] || 1
116
168
  end
117
169
  end
118
-
170
+
119
171
  end
120
-
172
+
121
173
  end
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
 
3
- # This writer writes changes directly to the open database.
3
+ # This writer writes changes directly to the open database.
4
4
  # Use the direct writer only for single process environments
5
5
  # (one single rails app server, e.g. one mongrel).
6
6
  # For multi process environemnts you should use a writer that
@@ -9,12 +9,13 @@
9
9
 
10
10
  module XapianDb
11
11
  module IndexWriters
12
-
12
+
13
13
  class DirectWriter
14
-
14
+
15
15
  class << self
16
-
16
+
17
17
  # Update an object in the index
18
+ # @param [Object] obj An instance of a class with a blueprint configuration
18
19
  def index(obj)
19
20
  blueprint = XapianDb::DocumentBlueprint.blueprint_for(obj.class)
20
21
  doc = blueprint.indexer.build_document_for(obj)
@@ -23,30 +24,41 @@ module XapianDb
23
24
  end
24
25
 
25
26
  # Remove an object from the index
27
+ # @param [Object] obj An instance of a class with a blueprint configuration
26
28
  def unindex(obj)
27
29
  XapianDb.database.delete_doc_with_unique_term(obj.xapian_id)
28
30
  XapianDb.database.commit
29
31
  end
30
32
 
31
33
  # Reindex all objects of a given class
32
- def reindex_class(klass)
34
+ # @param [Class] klass The class to reindex
35
+ # @param [Hash] options Options for reindexing
36
+ # @option options [Boolean] :verbose (false) Should the reindexing give status informations?
37
+ def reindex_class(klass, options={})
38
+ opts = {:verbose => false}.merge(options)
33
39
  # First, delete all docs of this class
34
40
  XapianDb.database.delete_docs_of_class(klass)
35
41
  blueprint = XapianDb::DocumentBlueprint.blueprint_for(klass)
36
- obj_count = klass.count
37
- puts "Reindexing #{obj_count} objects..."
38
- pbar = ProgressBar.new("Status", obj_count)
39
- klass.all.each do |obj|
42
+ show_progressbar = false
43
+ if opts[:verbose]
44
+ if defined?(ProgressBar)
45
+ show_progressbar = true
46
+ end
47
+ obj_count = klass.count
48
+ puts "Reindexing #{obj_count} objects..."
49
+ pbar = ProgressBar.new("Status", obj_count) if show_progressbar
50
+ end
51
+ klass.all.each do |obj|
40
52
  doc = blueprint.indexer.build_document_for(obj)
41
53
  XapianDb.database.store_doc(doc)
42
- pbar.inc
54
+ pbar.inc if show_progressbar
43
55
  end
44
56
  XapianDb.database.commit
45
57
  end
46
-
58
+
47
59
  end
48
-
49
- end
50
-
60
+
61
+ end
62
+
51
63
  end
52
64
  end
@@ -1,18 +1,39 @@
1
1
  # encoding: utf-8
2
2
 
3
- # The indexer creates a Xapian::Document from a configured object
4
- # @author Gernot Kogler
5
-
6
3
  module XapianDb
7
-
4
+
5
+ # The indexer creates a Xapian::Document from an object. They object must be an instance
6
+ # of a class that has a blueprint configuration.
7
+ # @author Gernot Kogler
8
8
  class Indexer
9
-
9
+
10
+ # Supported languages and mapping to the stemmer to use
11
+ LANGUAGE_MAP = {:da => :danish,
12
+ :nl => :dutch,
13
+ :en => :english,
14
+ :fi => :finnish,
15
+ :fr => :french,
16
+ :de => :german2, # Normalises umlauts and ß
17
+ :hu => :hungarian,
18
+ :it => :italian,
19
+ :nb => :norwegian,
20
+ :nn => :norwegian,
21
+ :no => :norwegian,
22
+ :pt => :portuguese,
23
+ :ro => :romanian,
24
+ :ru => :russian,
25
+ :es => :spanish,
26
+ :sv => :swedish,
27
+ :tr => :turkish}
28
+ # Constructor
29
+ # @param [XapianDb::DocumentBlueprint] document_blueprint The blueprint to use
10
30
  def initialize(document_blueprint)
11
31
  @document_blueprint = document_blueprint
12
32
  end
13
-
14
- # Build the doc for an object. The object must respond to 'xapian_id'.
33
+
34
+ # Build the document for an object. The object must respond to 'xapian_id'.
15
35
  # The configured adapter should implement this method.
36
+ # @return [Xapian::Document] The xapian document (see http://xapian.org/docs/sourcedoc/html/classXapian_1_1Document.html)
16
37
  def build_document_for(obj)
17
38
  @obj = obj
18
39
  @blueprint = DocumentBlueprint.blueprint_for(@obj.class)
@@ -22,15 +43,15 @@ module XapianDb
22
43
  index_text
23
44
  @xapian_doc
24
45
  end
25
-
46
+
26
47
  private
27
-
48
+
28
49
  # Store all configured fields
29
50
  def store_fields
30
51
 
31
52
  # We store the class name of the object at position 0
32
53
  @xapian_doc.add_value(0, @obj.class.name)
33
-
54
+
34
55
  pos = 1
35
56
  @blueprint.attributes.each do |attribute, options|
36
57
  value = @obj.send(attribute)
@@ -38,24 +59,20 @@ module XapianDb
38
59
  pos += 1
39
60
  end
40
61
  end
41
-
62
+
42
63
  # Index all configured text methods
43
64
  def index_text
44
- term_generator = Xapian::TermGenerator.new()
65
+ term_generator = Xapian::TermGenerator.new
45
66
  term_generator.document = @xapian_doc
46
- # TODO: make this configurable globally and per document
47
- # (retrieve the language from the object, if configured)
48
- stemmer = Xapian::Stem.new("english")
49
- term_generator.stemmer = stemmer
67
+ term_generator.stemmer = get_stemmer
50
68
  # TODO: Configure and enable these features
51
69
  # tg.stopper = stopper if stopper
52
- # tg.stemmer = stemmer
53
70
  # tg.set_flags Xapian::TermGenerator::FLAG_SPELLING if db.spelling
54
71
 
55
72
  # Always index the class and the primary key
56
73
  @xapian_doc.add_term("C#{@obj.class}")
57
74
  @xapian_doc.add_term("Q#{@obj.xapian_id}")
58
-
75
+
59
76
  @blueprint.indexed_methods.each do |method, options|
60
77
  value = @obj.send(method)
61
78
  unless value.nil?
@@ -69,7 +86,21 @@ module XapianDb
69
86
  end
70
87
  end
71
88
  end
72
-
89
+
90
+ private
91
+
92
+ # Configure the stemmer to use
93
+ def get_stemmer
94
+ # Do we have a language config on the blueprint?
95
+ if @blueprint.lang_method
96
+ lang = @obj.send(@blueprint.lang_method)
97
+ return Xapian::Stem.new(LANGUAGE_MAP[lang.to_sym].to_s) if lang && LANGUAGE_MAP.has_key?(lang.to_sym)
98
+ end
99
+ # Do we have a global stemmer?
100
+ return XapianDb::Config.stemmer if XapianDb::Config.stemmer
101
+ return Xapian::Stem.new("none") # No language config
102
+ end
103
+
73
104
  end
74
-
105
+
75
106
  end
@@ -1,34 +1,37 @@
1
1
  # encoding: utf-8
2
2
 
3
- # Parse a query expression and convert it to Xapian Query arguments
4
- # @author Gernot Kogler
5
-
6
3
  module XapianDb
7
-
4
+
5
+ # Parse a query expression and create a xapian query object
6
+ # @author Gernot Kogler
8
7
  class QueryParser
9
-
8
+
9
+ # Constructor
10
+ # @param [XapianDb::Database] database The database to query
10
11
  def initialize(database)
11
12
  @db = database
12
-
13
+
13
14
  # Set the parser options
14
15
  @query_flags = 0
15
- @query_flags |= Xapian::QueryParser::FLAG_WILDCARD # enable wildcards
16
- @query_flags |= Xapian::QueryParser::FLAG_BOOLEAN
17
- @query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE
16
+ @query_flags |= Xapian::QueryParser::FLAG_WILDCARD # enable wildcards
17
+ @query_flags |= Xapian::QueryParser::FLAG_BOOLEAN # enable boolean operators
18
+ @query_flags |= Xapian::QueryParser::FLAG_BOOLEAN_ANY_CASE # enable case insensitive boolean operators
18
19
  end
19
-
20
+
21
+ # Parse an expression
22
+ # @return [Xapian::Query] The query object (see http://xapian.org/docs/apidoc/html/classXapian_1_1Query.html)
20
23
  def parse(expression)
21
24
  parser = Xapian::QueryParser.new
22
25
  parser.database = @db.reader
23
26
  parser.default_op = Xapian::Query::OP_AND # Could be made configurable
24
27
  # TODO: Setup stopper, stemmer, defaults and fields
25
-
26
- # Add the searchable prefixes to allow searches by field
28
+
29
+ # Add the searchable prefixes to allow searches by field
27
30
  # (like "name:Kogler")
28
31
  XapianDb::DocumentBlueprint.searchable_prefixes.each{|prefix| parser.add_prefix(prefix.to_s.downcase, "X#{prefix.to_s.upcase}") }
29
32
  parser.parse_query(expression, @query_flags)
30
33
  end
31
-
34
+
32
35
  end
33
-
36
+
34
37
  end
@@ -1,12 +1,12 @@
1
1
  # encoding: utf-8
2
2
 
3
- # Configuration for a rails app
4
- # @author Gernot Kogler
5
-
6
3
  require 'xapian_db'
7
4
  require 'rails'
8
5
 
9
6
  module XapianDb
7
+
8
+ # Configuration for a rails app
9
+ # @author Gernot Kogler
10
10
  class Railtie < ::Rails::Railtie
11
11
 
12
12
  config.before_configuration do
@@ -25,7 +25,7 @@ module XapianDb
25
25
  adapter = :active_record
26
26
  writer = :direct
27
27
  end
28
-
28
+
29
29
  # Do the configuration
30
30
  XapianDb::Config.setup do |config|
31
31
  if database_path == ":memory:"
@@ -33,11 +33,12 @@ module XapianDb
33
33
  else
34
34
  config.database database_path
35
35
  end
36
- config.adapter adapter.to_sym
36
+ config.adapter adapter.to_sym
37
37
  config.writer writer.to_sym
38
+ config.language(env_config["language"]) if env_config["language"]
38
39
  end
39
-
40
+
40
41
  end
41
-
42
+
42
43
  end
43
44
  end