acts_as_xapian 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ module ActsAsXapian
2
+ class Index
3
+ @@db_path = nil
4
+ @@init_values = []
5
+
6
+ cattr_reader :config, :db_path, :stemmer
7
+
8
+ class <<self
9
+ ######################################################################
10
+ # Initialisation
11
+ def init(classname = nil, options = nil)
12
+ # store class and options for use later, when we open the db in readable_init
13
+ @@init_values.push([classname,options]) unless classname.nil?
14
+ end
15
+
16
+ # Reads the config file (if any) and sets up the path to the database we'll be using
17
+ def prepare_environment
18
+ return unless @@db_path.nil?
19
+
20
+ # barf if we can't figure out the environment
21
+ environment = (ENV['RAILS_ENV'] || RAILS_ENV)
22
+ raise "Set RAILS_ENV, so acts_as_xapian can find the right Xapian database" unless environment
23
+
24
+ # check for a config file
25
+ config_file = File.join(RAILS_ROOT, 'config', 'xapian.yml')
26
+ @@config = File.exists?(config_file) ? YAML.load_file(config_file)[environment] : {}
27
+ # figure out where the DBs should go
28
+ if config['base_db_path']
29
+ db_parent_path = File.join(RAILS_ROOT, config['base_db_path'])
30
+ else
31
+ db_parent_path = File.join(RAILS_ROOT, 'db', 'xapiandbs')
32
+ end
33
+
34
+ # make the directory for the xapian databases to go in
35
+ Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path)
36
+
37
+ @@db_path = File.join(db_parent_path, environment)
38
+
39
+ # make some things that don't depend on the db
40
+ # XXX this gets made once for each acts_as_xapian. Oh well.
41
+ @@stemmer = Xapian::Stem.new('english')
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,159 @@
1
+ module ActsAsXapian
2
+ # Base class for Search and Similar below
3
+ class QueryBase
4
+ attr_accessor :offset, :limit, :query, :query_models, :runtime, :cached_results
5
+ @@unlimited = 1000000
6
+
7
+ # Return a description of the query
8
+ def description
9
+ self.query.description
10
+ end
11
+
12
+ # Returns the mset for the query
13
+ def matches(reload = false)
14
+ return @matches unless @matches.nil? || reload
15
+
16
+ begin
17
+ self.runtime += Benchmark::realtime do
18
+ # If using find_options conditions have Xapian return the entire match set
19
+ # TODO Revisit. This is extremely inefficient for large indices
20
+ @matches = @index.enquire.mset(@postpone_limit ? 0 : @offset, @postpone_limit ? @@unlimited : @limit, @check_at_least)
21
+ end
22
+ @matches
23
+ rescue IOError => e
24
+ if @retried.nil? && /DatabaseModifiedError/.match(e.message.to_s)
25
+ @retried = true
26
+ @index.reset_enquire!
27
+ initialize_enquire
28
+ retry
29
+ end
30
+ raise e
31
+ end
32
+ end
33
+
34
+ # Estimate total number of results
35
+ # Note: Unreliable if using find_options with conditions or joins
36
+ def matches_estimated
37
+ @matches_estimated || self.matches.matches_estimated
38
+ end
39
+
40
+ # Return query string with spelling correction
41
+ def spelling_correction
42
+ correction = @index.query_parser.get_corrected_query_string
43
+ correction.empty? ? nil : correction
44
+ end
45
+
46
+ # Return array of models found
47
+ def results
48
+ # If they've already pulled out the results, just return them.
49
+ return self.cached_results unless self.cached_results.nil?
50
+
51
+ docs = nil
52
+ self.runtime += Benchmark::realtime do
53
+ # Pull out all the results
54
+ docs = self.matches.matches.map {|doc| {:data => doc.document.data, :percent => doc.percent, :weight => doc.weight, :collapse_count => doc.collapse_count} }
55
+ end
56
+
57
+ # Log time taken, excluding database lookups below which will be displayed separately by ActiveRecord
58
+ ActiveRecord::Base.logger.debug(" Xapian query (%.5fs) #{self.log_description.gsub('%','%%')}" % self.runtime) if ActiveRecord::Base.logger
59
+
60
+ # Group the ids by the model they belong to
61
+ lhash = docs.inject({}) do |s,doc|
62
+ model_name, id = doc[:data].split('-')
63
+ (s[model_name] ||= []) << id
64
+ s
65
+ end
66
+
67
+ if @postpone_limit
68
+ found = lhash.map do |(class_name, ids)|
69
+ model = class_name.constantize # constantize is expensive do once
70
+ model.with_xapian_scope(ids) { model.find(:all, @find_options.merge(:select => "#{model.table_name}.#{model.primary_key}")) }.map {|m| m.xapian_document_term }
71
+ end.flatten
72
+
73
+ self.runtime += Benchmark::realtime do
74
+ found = found.inject({}) {|s,i| s[i] = true; s } # hash key searching is MUCH faster than an array sequential scan
75
+ docs.delete_if {|doc| !found.delete(doc[:data]) }
76
+
77
+ @matches_estimated = docs.size
78
+
79
+ docs = docs[@offset,@limit] || []
80
+
81
+ lhash = docs.inject({}) do |s,doc|
82
+ model_name, id = doc[:data].split('-')
83
+ (s[model_name] ||= []) << id
84
+ s
85
+ end
86
+ end
87
+ end
88
+
89
+ # for each class, look up the associated ids
90
+ chash = lhash.inject({}) do |out, (class_name, ids)|
91
+ model = class_name.constantize # constantize is expensive do once
92
+ found = model.with_xapian_scope(ids) { model.find(:all, @find_options) }
93
+ out[class_name] = found.inject({}) {|s,f| s[f.id] = f; s }
94
+ out
95
+ end
96
+
97
+ # add the model to each doc
98
+ docs.each do |doc|
99
+ model_name, id = doc[:data].split('-')
100
+ doc[:model] = chash[model_name][id.to_i]
101
+ end
102
+
103
+ self.cached_results = docs
104
+ end
105
+
106
+ protected
107
+
108
+ def initialize_db(models)
109
+ self.runtime = 0.0
110
+
111
+ @index = ReadableIndex.index_for(models)
112
+
113
+ raise "ActsAsXapian::ReadableIndex not initialized" if @index.nil?
114
+ end
115
+
116
+ # Set self.query before calling this
117
+ def initialize_query(options)
118
+ self.runtime += Benchmark::realtime do
119
+ @offset = options[:offset].to_i
120
+ @limit = (options[:limit] || @@unlimited).to_i
121
+ @check_at_least = (options[:check_at_least] || 100).to_i
122
+ @sort_by_prefix = options[:sort_by_prefix]
123
+ @sort_by_ascending = options[:sort_by_ascending].nil? ? true : options[:sort_by_ascending]
124
+ @collapse_by_prefix = options[:collapse_by_prefix]
125
+ @find_options = options[:find_options]
126
+ @postpone_limit = !(@find_options.blank? || (@find_options[:conditions].blank? && @find_options[:joins].blank?))
127
+
128
+ self.cached_results = nil
129
+ end
130
+
131
+ initialize_enquire
132
+ end
133
+
134
+ def initialize_enquire
135
+ self.runtime += Benchmark::realtime do
136
+ @index.enquire.query = self.query
137
+
138
+ if @sort_by_prefix.nil?
139
+ @index.enquire.sort_by_relevance!
140
+ else
141
+ value = @index.values_by_prefix[@sort_by_prefix]
142
+ raise "couldn't find prefix '#{@sort_by_prefix}'" if value.nil?
143
+ # Xapian has inverted the meaning of ascending order to handle relevence sorting
144
+ # "keys which sort higher by string compare are better"
145
+ @index.enquire.sort_by_value_then_relevance!(value, !@sort_by_ascending)
146
+ end
147
+
148
+ if @collapse_by_prefix.nil?
149
+ @index.enquire.collapse_key = Xapian.BAD_VALUENO
150
+ else
151
+ value = @index.values_by_prefix[@collapse_by_prefix]
152
+ raise "couldn't find prefix '#{@collapse_by_prefix}'" if value.nil?
153
+ @index.enquire.collapse_key = value
154
+ end
155
+ end
156
+ true
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,117 @@
1
+ module ActsAsXapian
2
+ class ReadableIndex < Index
3
+ @@available_indicies = {}
4
+
5
+ attr_reader :enquire, :query_parser, :values_by_prefix
6
+
7
+ # Takes an array of model classes and returns an index object to be
8
+ # used for searching across the given models
9
+ #
10
+ # Prevents query parser interaction across multiple models unless
11
+ # performing a multi model search
12
+ def self.index_for(models)
13
+ index_key = models.map {|m| m.to_s }.sort.join('---')
14
+ if @@available_indicies.key?(index_key)
15
+ index = @@available_indicies[index_key]
16
+ index.reset_enquire!
17
+ index
18
+ else
19
+ index = self.new(models)
20
+ @@available_indicies[index_key] = index
21
+ index
22
+ end
23
+ end
24
+
25
+ # Opens the db for reading and builds the query parser
26
+ def initialize(models)
27
+ raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
28
+ raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
29
+
30
+ self.class.prepare_environment
31
+
32
+ # basic Xapian objects
33
+ begin
34
+ @db = Xapian::Database.new(@@db_path)
35
+ @enquire = Xapian::Enquire.new(@db)
36
+ rescue IOError
37
+ raise "Xapian database not opened; have you built it with rake xapian:rebuild_index ?"
38
+ end
39
+
40
+ init_query_parser(models)
41
+ end
42
+
43
+ # Creates a new search session
44
+ def reset_enquire!
45
+ @db.reopen # This grabs the latest db updates
46
+ @enquire = Xapian::Enquire.new(@db)
47
+ rescue IOError
48
+ raise "Xapian database not opened; have you built it with rake xapian:rebuild_index ?"
49
+ end
50
+
51
+ protected
52
+
53
+ # Make a new query parser
54
+ def init_query_parser(models)
55
+ # for queries
56
+ @query_parser = Xapian::QueryParser.new
57
+ @query_parser.stemmer = @@stemmer
58
+ @query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
59
+ @query_parser.database = @db
60
+ @query_parser.default_op = Xapian::Query::OP_AND
61
+
62
+ @terms_by_capital = {}
63
+ @values_by_number = {}
64
+ @values_by_prefix = {}
65
+ @value_ranges_store = []
66
+
67
+ models.each do |klass|
68
+ options = klass.xapian_options
69
+ # go through the various field types, and tell query parser about them,
70
+ # and error check them - i.e. check for consistency between models
71
+ @query_parser.add_boolean_prefix("model", "M")
72
+ @query_parser.add_boolean_prefix("modelid", "I")
73
+ (options[:terms] || []).each do |term|
74
+ raise "Use up to 3 single capital letters for term code" unless term[1].match(/^[A-Z]{1,3}$/)
75
+ raise "M and I are reserved for use as the model/id term" if term[1] == "M" || term[1] == "I"
76
+ raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" || term[2] == "modelid"
77
+ raise "Z is reserved for stemming terms" if term[1] == "Z"
78
+ raise "Already have code '#{term[1]}' in another model but with different prefix '#{@terms_by_capital[term[1]]}'" if @terms_by_capital.key?(term[1]) && @terms_by_capital[term[1]] != term[2]
79
+ @terms_by_capital[term[1]] = term[2]
80
+ @query_parser.add_prefix(term[2], term[1])
81
+ end
82
+ values = (options[:values] || [])
83
+ values = values.select {|i| i[3] == :number } + values.reject {|i| i[3] == :number }
84
+ values.each do |value|
85
+ raise "Value index '#{value[1]}' must be an integer, is #{value[1].class}" unless value[1].instance_of?(Fixnum)
86
+ raise "Already have value index '#{value[1]}' in another model but with different prefix '#{@values_by_number[value[1]]}'" if @values_by_number.key?(value[1]) && @values_by_number[value[1]] != value[2]
87
+ raise "Already have value prefix '#{value[2]}' in another model but with different index '#{@values_by_prefix[value[2]]}'" if value[3] == :number && @values_by_prefix.key?(value[2]) && @values_by_prefix[value[2]] != value[1]
88
+
89
+ # date types are special, mark them so the first model they're seen for
90
+ if !@values_by_number.key?(value[1])
91
+ value_range = case value[3]
92
+ when :date
93
+ Xapian::DateValueRangeProcessor.new(value[1])
94
+ when :string
95
+ Xapian::StringValueRangeProcessor.new(value[1])
96
+ when :number
97
+ Xapian::NumberValueRangeProcessor.new(value[1],"#{value[2]}:",true)
98
+ else
99
+ raise "Unknown value type '#{value[3]}'"
100
+ end
101
+
102
+ @query_parser.add_valuerangeprocessor(value_range)
103
+
104
+ # stop it being garbage collected, as
105
+ # add_valuerangeprocessor ref is outside Ruby's GC
106
+ @value_ranges_store.push(value_range)
107
+ end
108
+
109
+ @values_by_number[value[1]] = value[2]
110
+ @values_by_prefix[value[2]] = value[1]
111
+ end
112
+ end
113
+
114
+ @values_by_prefix.freeze # This can be read outside the instance. Make sure it can't be changed there
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,67 @@
1
+ module ActsAsXapian
2
+ # Search for a query string, returns an array of hashes in result order.
3
+ # Each hash contains the actual Rails object in :model, and other detail
4
+ # about relevancy etc. in other keys.
5
+ class Search < QueryBase
6
+ attr_accessor :query_string
7
+
8
+ @@parse_query_flags = Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE |
9
+ Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD |
10
+ Xapian::QueryParser::FLAG_SPELLING_CORRECTION
11
+
12
+ # Note that model_classes is not only sometimes useful here - it's
13
+ # essential to make sure the classes have been loaded, and thus
14
+ # acts_as_xapian called on them, so we know the fields for the query
15
+ # parser.
16
+
17
+ # model_classes - model classes to search within, e.g. [PublicBody,
18
+ # User]. Can take a single model class, or you can express the model
19
+ # class names in strings if you like.
20
+ # query_string - user inputed query string, with syntax much like Google Search
21
+ #
22
+ # options include
23
+ # - :limit - limit the number of records returned
24
+ # - :offset - start with this record number
25
+ # - :check_at_least - used for total match estimates. Set higher for greater accuracy at the cost of slower queries. default: 100
26
+ # - :sort_by_prefix - determines which data field to sort by. default: sort by relevance
27
+ # - :sort_by_ascending - determines which direction to sort. default: true (ascending sort)
28
+ # - :collapse_by_prefix - groups the return set by this prefix
29
+ # - :find_options - These options are passed through to the active record find. Be careful if searching against multiple model classes.
30
+ def initialize(model_classes, query_string, options = {})
31
+ # Check parameters, convert to actual array of model classes
32
+ model_classes = Array(model_classes).map do |model_class|
33
+ model_class = model_class.constantize if model_class.instance_of?(String)
34
+ raise "pass in the model class itself, or a string containing its name" unless model_class.instance_of?(Class)
35
+ model_class
36
+ end
37
+
38
+ # Set things up
39
+ self.initialize_db(model_classes)
40
+
41
+ # Case of a string, searching for a Google-like syntax query
42
+ self.query_string = query_string
43
+
44
+ # Construct query which only finds things from specified models
45
+ model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map {|mc| "M#{mc}" })
46
+ user_query = @index.query_parser.parse_query(self.query_string, @@parse_query_flags)
47
+ self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query)
48
+
49
+ # Call base class constructor
50
+ self.initialize_query(options)
51
+ end
52
+
53
+ # Return just normal words in the query i.e. Not operators, ones in
54
+ # date ranges or similar. Use this for cheap highlighting with
55
+ # TextHelper::highlight, and excerpt.
56
+ def words_to_highlight
57
+ query_nopunc = self.query_string.gsub(/[^\w:\.\/_]/i, " ").gsub(/\s+/, " ")
58
+ # Split on ' ' and remove anything with a :, . or / in it or boolean operators
59
+ query_nopunc.split(" ").reject {|o| o.match(/(:|\.|\/)|^(AND|NOT|OR|XOR)$/) }
60
+ end
61
+
62
+ # Text for lines in log file
63
+ def log_description
64
+ "Search: #{self.query_string}"
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,61 @@
1
+ module ActsAsXapian
2
+ # Search for models which contain theimportant terms taken from a specified
3
+ # list of models. i.e. Use to find documents similar to one (or more)
4
+ # documents, or use to refine searches.
5
+ class Similar < QueryBase
6
+ attr_accessor :query_models
7
+ attr_accessor :important_terms
8
+
9
+ # model_classes - model classes to search within, e.g. [PublicBody, User]
10
+ # query_models - list of models you want to find things similar to
11
+ def initialize(model_classes, query_models, options = {})
12
+ self.initialize_db(model_classes)
13
+
14
+ self.runtime += Benchmark::realtime do
15
+ # Case of an array, searching for models similar to those models in the array
16
+ self.query_models = query_models
17
+
18
+ # Find the documents by their unique term
19
+ input_models_query = Xapian::Query.new(Xapian::Query::OP_OR, query_models.map {|m| "I#{m.xapian_document_term}" })
20
+ begin
21
+ @index.enquire.query = input_models_query
22
+
23
+ # Get set of relevant terms for those documents
24
+ selection = Xapian::RSet.new()
25
+ @index.enquire.mset(0, 100, 100).matches.each {|m| selection.add_document(m.docid) } # XXX so this whole method will only work with 100 docs
26
+
27
+ # Bit weird that the function to make esets is part of the enquire
28
+ # object. This explains what exactly it does, which is to exclude
29
+ # terms in the existing query.
30
+ # http://thread.gmane.org/gmane.comp.search.xapian.general/3673/focus=3681
31
+ #
32
+ # Do main search for them
33
+ self.important_terms = @index.enquire.eset(40, selection).terms.map {|e| e.name }
34
+ rescue IOError => e
35
+ if @retried.nil? && /DatabaseModifiedError/.match(e.message.to_s)
36
+ @retried = true
37
+ @index.reset_enquire!
38
+ retry
39
+ end
40
+ raise e
41
+ end
42
+
43
+ similar_query = Xapian::Query.new(Xapian::Query::OP_OR, self.important_terms)
44
+ # Exclude original
45
+ combined_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, similar_query, input_models_query)
46
+
47
+ # Restrain to model classes
48
+ model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map {|mc| "M#{mc}" })
49
+ self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, combined_query)
50
+ end
51
+
52
+ # Call base class constructor
53
+ self.initialize_query(options)
54
+ end
55
+
56
+ # Text for lines in log file
57
+ def log_description
58
+ "Similar: #{self.query_models}"
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,152 @@
1
+ module ActsAsXapian
2
+ class WriteableIndex < Index
3
+ @@writable_db = nil
4
+ @@writable_suffix = nil
5
+
6
+ cattr_reader :term_generator
7
+
8
+ class << self
9
+ def delete_document(*args)
10
+ @@writable_db.delete_document(*args)
11
+ end
12
+
13
+ def replace_document(*args)
14
+ @@writable_db.replace_document(*args)
15
+ end
16
+
17
+ def writable_init(suffix = "")
18
+ raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
19
+ raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
20
+
21
+ # if DB is not nil, then we're already initialised, so don't do it again
22
+ return unless @@writable_db.nil?
23
+
24
+ prepare_environment
25
+
26
+ new_path = @@db_path + suffix
27
+ raise "writable_suffix/suffix inconsistency" if @@writable_suffix && @@writable_suffix != suffix
28
+
29
+ # for indexing
30
+ @@writable_db = Xapian::WritableDatabase.new(new_path, Xapian::DB_CREATE_OR_OPEN)
31
+ @@term_generator = Xapian::TermGenerator.new()
32
+ @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0)
33
+ @@term_generator.database = @@writable_db
34
+ @@term_generator.stemmer = @@stemmer
35
+ @@writable_suffix = suffix
36
+ end
37
+
38
+ ######################################################################
39
+ # Index
40
+
41
+ # Update index with any changes needed, call this offline. Only call it
42
+ # from a script that exits - otherwise Xapian's writable database won't
43
+ # flush your changes. Specifying flush will reduce performance, but
44
+ # make sure that each index update is definitely saved to disk before
45
+ # logging in the database that it has been.
46
+ def update_index(flush = false, verbose = false)
47
+ # puts "start of self.update_index" if verbose
48
+
49
+ # Before calling writable_init we have to make sure every model class has been initialized.
50
+ # i.e. has had its class code loaded, so acts_as_xapian has been called inside it, and
51
+ # we have the info from acts_as_xapian.
52
+ model_classes = ActsAsXapianJob.find(:all, :select => 'model', :group => 'model').map {|a| a.model.constantize }
53
+ # If there are no models in the queue, then nothing to do
54
+ return if model_classes.empty?
55
+
56
+ self.writable_init
57
+
58
+ ids_to_refresh = ActsAsXapianJob.find(:all, :select => 'id').map { |i| i.id }
59
+ ids_to_refresh.each do |id|
60
+ begin
61
+ ActsAsXapianJob.transaction do
62
+ job = ActsAsXapianJob.find(id, :lock =>true)
63
+ puts "ActsAsXapian::WriteableIndex.update_index #{job.action} #{job.model} #{job.model_id.to_s}" if verbose
64
+ begin
65
+ case job.action
66
+ when 'update'
67
+ # XXX Index functions may reference other models, so we could eager load here too?
68
+ model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include]
69
+ model.xapian_index
70
+ when 'destroy'
71
+ # Make dummy model with right id, just for destruction
72
+ model = job.model.constantize.new
73
+ model.id = job.model_id
74
+ model.xapian_destroy
75
+ else
76
+ raise "unknown ActsAsXapianJob action '#{job.action}'"
77
+ end
78
+ rescue ActiveRecord::RecordNotFound => e
79
+ job.action = 'destroy'
80
+ retry
81
+ end
82
+ job.destroy
83
+
84
+ @@writable_db.flush if flush
85
+ end
86
+ rescue => detail
87
+ # print any error, and carry on so other things are indexed
88
+ # XXX If item is later deleted, this should give up, and it
89
+ # won't. It will keep trying (assuming update_index called from
90
+ # regular cron job) and mayhap cause trouble.
91
+ STDERR.puts("#{detail.backtrace.join("\n")}\nFAILED ActsAsXapian::WriteableIndex.update_index job #{id} #{$!}")
92
+ end
93
+ end
94
+ end
95
+
96
+ # You must specify *all* the models here, this totally rebuilds the Xapian database.
97
+ # You'll want any readers to reopen the database after this.
98
+ def rebuild_index(model_classes, verbose = false)
99
+ raise "when rebuilding all, please call as first and only thing done in process / task" unless @@writable_db.nil?
100
+
101
+ prepare_environment
102
+
103
+ # Delete any existing .new database, and open a new one
104
+ new_path = "#{self.db_path}.new"
105
+ if File.exist?(new_path)
106
+ raise "found existing #{new_path} which is not Xapian flint database, please delete for me" unless File.exist?(File.join(new_path, "iamflint"))
107
+ FileUtils.rm_r(new_path)
108
+ end
109
+ self.writable_init(".new")
110
+
111
+ # Index everything
112
+
113
+ most_recent_job = ActsAsXapianJob.find(:first, :order => 'id DESC')
114
+ batch_size = 1000
115
+ model_classes.each do |model_class|
116
+ all_ids = model_class.find(:all, :select => model_class.primary_key, :order => model_class.primary_key).map {|i| i.id }
117
+ all_ids.each_slice(batch_size) do |ids|
118
+ puts "ActsAsXapian::WriteableIndex: New batch. Including ids #{ids.first} to #{ids.last}" if verbose
119
+ models = model_class.find(:all, :conditions => {model_class.primary_key => ids})
120
+ models.each do |model|
121
+ puts "ActsAsXapian::WriteableIndex.rebuild_index #{model_class} #{model.id}" if verbose
122
+ model.xapian_index
123
+ end
124
+ end
125
+ end
126
+
127
+ @@writable_db.flush
128
+
129
+ # Rename into place
130
+ old_path = self.db_path
131
+ temp_path = "#{old_path}.tmp"
132
+ if File.exist?(temp_path)
133
+ raise "temporary database found #{temp_path} which is not Xapian flint database, please delete for me" unless File.exist?(File.join(temp_path, "iamflint"))
134
+ FileUtils.rm_r(temp_path)
135
+ end
136
+ FileUtils.mv(old_path, temp_path) if File.exist?(old_path)
137
+ FileUtils.mv(new_path, old_path)
138
+
139
+ # Delete old database
140
+ if File.exist?(temp_path)
141
+ raise "old database now at #{temp_path} is not Xapian flint database, please delete for me" unless File.exist?(File.join(temp_path, "iamflint"))
142
+ FileUtils.rm_r(temp_path)
143
+ end
144
+
145
+ ActsAsXapianJob.delete_all ['id <= ?', most_recent_job.id] if most_recent_job
146
+
147
+ # You'll want to restart your FastCGI or Mongrel processes after this,
148
+ # so they get the new db
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,5 @@
1
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'base' )
2
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'query_base' )
3
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'search' )
4
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'similar' )
5
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'core_ext/array' )
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ActsAsXapian" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'acts_as_xapian'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end