acts_as_xapian 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,45 @@
1
+ module ActsAsXapian
2
+ class Index
3
+ @@db_path = nil
4
+ @@init_values = []
5
+
6
+ cattr_reader :config, :db_path, :stemmer
7
+
8
+ class <<self
9
+ ######################################################################
10
+ # Initialisation
11
+ def init(classname = nil, options = nil)
12
+ # store class and options for use later, when we open the db in readable_init
13
+ @@init_values.push([classname,options]) unless classname.nil?
14
+ end
15
+
16
+ # Reads the config file (if any) and sets up the path to the database we'll be using
17
+ def prepare_environment
18
+ return unless @@db_path.nil?
19
+
20
+ # barf if we can't figure out the environment
21
+ environment = (ENV['RAILS_ENV'] || RAILS_ENV)
22
+ raise "Set RAILS_ENV, so acts_as_xapian can find the right Xapian database" unless environment
23
+
24
+ # check for a config file
25
+ config_file = File.join(RAILS_ROOT, 'config', 'xapian.yml')
26
+ @@config = File.exists?(config_file) ? YAML.load_file(config_file)[environment] : {}
27
+ # figure out where the DBs should go
28
+ if config['base_db_path']
29
+ db_parent_path = File.join(RAILS_ROOT, config['base_db_path'])
30
+ else
31
+ db_parent_path = File.join(RAILS_ROOT, 'db', 'xapiandbs')
32
+ end
33
+
34
+ # make the directory for the xapian databases to go in
35
+ Dir.mkdir(db_parent_path) unless File.exists?(db_parent_path)
36
+
37
+ @@db_path = File.join(db_parent_path, environment)
38
+
39
+ # make some things that don't depend on the db
40
+ # XXX this gets made once for each acts_as_xapian. Oh well.
41
+ @@stemmer = Xapian::Stem.new('english')
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,159 @@
1
+ module ActsAsXapian
2
+ # Base class for Search and Similar below
3
+ class QueryBase
4
+ attr_accessor :offset, :limit, :query, :query_models, :runtime, :cached_results
5
+ @@unlimited = 1000000
6
+
7
+ # Return a description of the query
8
+ def description
9
+ self.query.description
10
+ end
11
+
12
+ # Returns the mset for the query
13
+ def matches(reload = false)
14
+ return @matches unless @matches.nil? || reload
15
+
16
+ begin
17
+ self.runtime += Benchmark::realtime do
18
+ # If using find_options conditions have Xapian return the entire match set
19
+ # TODO Revisit. This is extremely inefficient for large indices
20
+ @matches = @index.enquire.mset(@postpone_limit ? 0 : @offset, @postpone_limit ? @@unlimited : @limit, @check_at_least)
21
+ end
22
+ @matches
23
+ rescue IOError => e
24
+ if @retried.nil? && /DatabaseModifiedError/.match(e.message.to_s)
25
+ @retried = true
26
+ @index.reset_enquire!
27
+ initialize_enquire
28
+ retry
29
+ end
30
+ raise e
31
+ end
32
+ end
33
+
34
+ # Estimate total number of results
35
+ # Note: Unreliable if using find_options with conditions or joins
36
+ def matches_estimated
37
+ @matches_estimated || self.matches.matches_estimated
38
+ end
39
+
40
+ # Return query string with spelling correction
41
+ def spelling_correction
42
+ correction = @index.query_parser.get_corrected_query_string
43
+ correction.empty? ? nil : correction
44
+ end
45
+
46
+ # Return array of models found
47
+ def results
48
+ # If they've already pulled out the results, just return them.
49
+ return self.cached_results unless self.cached_results.nil?
50
+
51
+ docs = nil
52
+ self.runtime += Benchmark::realtime do
53
+ # Pull out all the results
54
+ docs = self.matches.matches.map {|doc| {:data => doc.document.data, :percent => doc.percent, :weight => doc.weight, :collapse_count => doc.collapse_count} }
55
+ end
56
+
57
+ # Log time taken, excluding database lookups below which will be displayed separately by ActiveRecord
58
+ ActiveRecord::Base.logger.debug(" Xapian query (%.5fs) #{self.log_description.gsub('%','%%')}" % self.runtime) if ActiveRecord::Base.logger
59
+
60
+ # Group the ids by the model they belong to
61
+ lhash = docs.inject({}) do |s,doc|
62
+ model_name, id = doc[:data].split('-')
63
+ (s[model_name] ||= []) << id
64
+ s
65
+ end
66
+
67
+ if @postpone_limit
68
+ found = lhash.map do |(class_name, ids)|
69
+ model = class_name.constantize # constantize is expensive do once
70
+ model.with_xapian_scope(ids) { model.find(:all, @find_options.merge(:select => "#{model.table_name}.#{model.primary_key}")) }.map {|m| m.xapian_document_term }
71
+ end.flatten
72
+
73
+ self.runtime += Benchmark::realtime do
74
+ found = found.inject({}) {|s,i| s[i] = true; s } # hash key searching is MUCH faster than an array sequential scan
75
+ docs.delete_if {|doc| !found.delete(doc[:data]) }
76
+
77
+ @matches_estimated = docs.size
78
+
79
+ docs = docs[@offset,@limit] || []
80
+
81
+ lhash = docs.inject({}) do |s,doc|
82
+ model_name, id = doc[:data].split('-')
83
+ (s[model_name] ||= []) << id
84
+ s
85
+ end
86
+ end
87
+ end
88
+
89
+ # for each class, look up the associated ids
90
+ chash = lhash.inject({}) do |out, (class_name, ids)|
91
+ model = class_name.constantize # constantize is expensive do once
92
+ found = model.with_xapian_scope(ids) { model.find(:all, @find_options) }
93
+ out[class_name] = found.inject({}) {|s,f| s[f.id] = f; s }
94
+ out
95
+ end
96
+
97
+ # add the model to each doc
98
+ docs.each do |doc|
99
+ model_name, id = doc[:data].split('-')
100
+ doc[:model] = chash[model_name][id.to_i]
101
+ end
102
+
103
+ self.cached_results = docs
104
+ end
105
+
106
+ protected
107
+
108
+ def initialize_db(models)
109
+ self.runtime = 0.0
110
+
111
+ @index = ReadableIndex.index_for(models)
112
+
113
+ raise "ActsAsXapian::ReadableIndex not initialized" if @index.nil?
114
+ end
115
+
116
+ # Set self.query before calling this
117
+ def initialize_query(options)
118
+ self.runtime += Benchmark::realtime do
119
+ @offset = options[:offset].to_i
120
+ @limit = (options[:limit] || @@unlimited).to_i
121
+ @check_at_least = (options[:check_at_least] || 100).to_i
122
+ @sort_by_prefix = options[:sort_by_prefix]
123
+ @sort_by_ascending = options[:sort_by_ascending].nil? ? true : options[:sort_by_ascending]
124
+ @collapse_by_prefix = options[:collapse_by_prefix]
125
+ @find_options = options[:find_options]
126
+ @postpone_limit = !(@find_options.blank? || (@find_options[:conditions].blank? && @find_options[:joins].blank?))
127
+
128
+ self.cached_results = nil
129
+ end
130
+
131
+ initialize_enquire
132
+ end
133
+
134
+ def initialize_enquire
135
+ self.runtime += Benchmark::realtime do
136
+ @index.enquire.query = self.query
137
+
138
+ if @sort_by_prefix.nil?
139
+ @index.enquire.sort_by_relevance!
140
+ else
141
+ value = @index.values_by_prefix[@sort_by_prefix]
142
+ raise "couldn't find prefix '#{@sort_by_prefix}'" if value.nil?
143
+ # Xapian has inverted the meaning of ascending order to handle relevence sorting
144
+ # "keys which sort higher by string compare are better"
145
+ @index.enquire.sort_by_value_then_relevance!(value, !@sort_by_ascending)
146
+ end
147
+
148
+ if @collapse_by_prefix.nil?
149
+ @index.enquire.collapse_key = Xapian.BAD_VALUENO
150
+ else
151
+ value = @index.values_by_prefix[@collapse_by_prefix]
152
+ raise "couldn't find prefix '#{@collapse_by_prefix}'" if value.nil?
153
+ @index.enquire.collapse_key = value
154
+ end
155
+ end
156
+ true
157
+ end
158
+ end
159
+ end
@@ -0,0 +1,117 @@
1
+ module ActsAsXapian
2
+ class ReadableIndex < Index
3
+ @@available_indicies = {}
4
+
5
+ attr_reader :enquire, :query_parser, :values_by_prefix
6
+
7
+ # Takes an array of model classes and returns an index object to be
8
+ # used for searching across the given models
9
+ #
10
+ # Prevents query parser interaction across multiple models unless
11
+ # performing a multi model search
12
+ def self.index_for(models)
13
+ index_key = models.map {|m| m.to_s }.sort.join('---')
14
+ if @@available_indicies.key?(index_key)
15
+ index = @@available_indicies[index_key]
16
+ index.reset_enquire!
17
+ index
18
+ else
19
+ index = self.new(models)
20
+ @@available_indicies[index_key] = index
21
+ index
22
+ end
23
+ end
24
+
25
+ # Opens the db for reading and builds the query parser
26
+ def initialize(models)
27
+ raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
28
+ raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
29
+
30
+ self.class.prepare_environment
31
+
32
+ # basic Xapian objects
33
+ begin
34
+ @db = Xapian::Database.new(@@db_path)
35
+ @enquire = Xapian::Enquire.new(@db)
36
+ rescue IOError
37
+ raise "Xapian database not opened; have you built it with rake xapian:rebuild_index ?"
38
+ end
39
+
40
+ init_query_parser(models)
41
+ end
42
+
43
+ # Creates a new search session
44
+ def reset_enquire!
45
+ @db.reopen # This grabs the latest db updates
46
+ @enquire = Xapian::Enquire.new(@db)
47
+ rescue IOError
48
+ raise "Xapian database not opened; have you built it with rake xapian:rebuild_index ?"
49
+ end
50
+
51
+ protected
52
+
53
+ # Make a new query parser
54
+ def init_query_parser(models)
55
+ # for queries
56
+ @query_parser = Xapian::QueryParser.new
57
+ @query_parser.stemmer = @@stemmer
58
+ @query_parser.stemming_strategy = Xapian::QueryParser::STEM_SOME
59
+ @query_parser.database = @db
60
+ @query_parser.default_op = Xapian::Query::OP_AND
61
+
62
+ @terms_by_capital = {}
63
+ @values_by_number = {}
64
+ @values_by_prefix = {}
65
+ @value_ranges_store = []
66
+
67
+ models.each do |klass|
68
+ options = klass.xapian_options
69
+ # go through the various field types, and tell query parser about them,
70
+ # and error check them - i.e. check for consistency between models
71
+ @query_parser.add_boolean_prefix("model", "M")
72
+ @query_parser.add_boolean_prefix("modelid", "I")
73
+ (options[:terms] || []).each do |term|
74
+ raise "Use up to 3 single capital letters for term code" unless term[1].match(/^[A-Z]{1,3}$/)
75
+ raise "M and I are reserved for use as the model/id term" if term[1] == "M" || term[1] == "I"
76
+ raise "model and modelid are reserved for use as the model/id prefixes" if term[2] == "model" || term[2] == "modelid"
77
+ raise "Z is reserved for stemming terms" if term[1] == "Z"
78
+ raise "Already have code '#{term[1]}' in another model but with different prefix '#{@terms_by_capital[term[1]]}'" if @terms_by_capital.key?(term[1]) && @terms_by_capital[term[1]] != term[2]
79
+ @terms_by_capital[term[1]] = term[2]
80
+ @query_parser.add_prefix(term[2], term[1])
81
+ end
82
+ values = (options[:values] || [])
83
+ values = values.select {|i| i[3] == :number } + values.reject {|i| i[3] == :number }
84
+ values.each do |value|
85
+ raise "Value index '#{value[1]}' must be an integer, is #{value[1].class}" unless value[1].instance_of?(Fixnum)
86
+ raise "Already have value index '#{value[1]}' in another model but with different prefix '#{@values_by_number[value[1]]}'" if @values_by_number.key?(value[1]) && @values_by_number[value[1]] != value[2]
87
+ raise "Already have value prefix '#{value[2]}' in another model but with different index '#{@values_by_prefix[value[2]]}'" if value[3] == :number && @values_by_prefix.key?(value[2]) && @values_by_prefix[value[2]] != value[1]
88
+
89
+ # date types are special, mark them so the first model they're seen for
90
+ if !@values_by_number.key?(value[1])
91
+ value_range = case value[3]
92
+ when :date
93
+ Xapian::DateValueRangeProcessor.new(value[1])
94
+ when :string
95
+ Xapian::StringValueRangeProcessor.new(value[1])
96
+ when :number
97
+ Xapian::NumberValueRangeProcessor.new(value[1],"#{value[2]}:",true)
98
+ else
99
+ raise "Unknown value type '#{value[3]}'"
100
+ end
101
+
102
+ @query_parser.add_valuerangeprocessor(value_range)
103
+
104
+ # stop it being garbage collected, as
105
+ # add_valuerangeprocessor ref is outside Ruby's GC
106
+ @value_ranges_store.push(value_range)
107
+ end
108
+
109
+ @values_by_number[value[1]] = value[2]
110
+ @values_by_prefix[value[2]] = value[1]
111
+ end
112
+ end
113
+
114
+ @values_by_prefix.freeze # This can be read outside the instance. Make sure it can't be changed there
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,67 @@
1
+ module ActsAsXapian
2
+ # Search for a query string, returns an array of hashes in result order.
3
+ # Each hash contains the actual Rails object in :model, and other detail
4
+ # about relevancy etc. in other keys.
5
+ class Search < QueryBase
6
+ attr_accessor :query_string
7
+
8
+ @@parse_query_flags = Xapian::QueryParser::FLAG_BOOLEAN | Xapian::QueryParser::FLAG_PHRASE |
9
+ Xapian::QueryParser::FLAG_LOVEHATE | Xapian::QueryParser::FLAG_WILDCARD |
10
+ Xapian::QueryParser::FLAG_SPELLING_CORRECTION
11
+
12
+ # Note that model_classes is not only sometimes useful here - it's
13
+ # essential to make sure the classes have been loaded, and thus
14
+ # acts_as_xapian called on them, so we know the fields for the query
15
+ # parser.
16
+
17
+ # model_classes - model classes to search within, e.g. [PublicBody,
18
+ # User]. Can take a single model class, or you can express the model
19
+ # class names in strings if you like.
20
+ # query_string - user inputed query string, with syntax much like Google Search
21
+ #
22
+ # options include
23
+ # - :limit - limit the number of records returned
24
+ # - :offset - start with this record number
25
+ # - :check_at_least - used for total match estimates. Set higher for greater accuracy at the cost of slower queries. default: 100
26
+ # - :sort_by_prefix - determines which data field to sort by. default: sort by relevance
27
+ # - :sort_by_ascending - determines which direction to sort. default: true (ascending sort)
28
+ # - :collapse_by_prefix - groups the return set by this prefix
29
+ # - :find_options - These options are passed through to the active record find. Be careful if searching against multiple model classes.
30
+ def initialize(model_classes, query_string, options = {})
31
+ # Check parameters, convert to actual array of model classes
32
+ model_classes = Array(model_classes).map do |model_class|
33
+ model_class = model_class.constantize if model_class.instance_of?(String)
34
+ raise "pass in the model class itself, or a string containing its name" unless model_class.instance_of?(Class)
35
+ model_class
36
+ end
37
+
38
+ # Set things up
39
+ self.initialize_db(model_classes)
40
+
41
+ # Case of a string, searching for a Google-like syntax query
42
+ self.query_string = query_string
43
+
44
+ # Construct query which only finds things from specified models
45
+ model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map {|mc| "M#{mc}" })
46
+ user_query = @index.query_parser.parse_query(self.query_string, @@parse_query_flags)
47
+ self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, user_query)
48
+
49
+ # Call base class constructor
50
+ self.initialize_query(options)
51
+ end
52
+
53
+ # Return just normal words in the query i.e. Not operators, ones in
54
+ # date ranges or similar. Use this for cheap highlighting with
55
+ # TextHelper::highlight, and excerpt.
56
+ def words_to_highlight
57
+ query_nopunc = self.query_string.gsub(/[^\w:\.\/_]/i, " ").gsub(/\s+/, " ")
58
+ # Split on ' ' and remove anything with a :, . or / in it or boolean operators
59
+ query_nopunc.split(" ").reject {|o| o.match(/(:|\.|\/)|^(AND|NOT|OR|XOR)$/) }
60
+ end
61
+
62
+ # Text for lines in log file
63
+ def log_description
64
+ "Search: #{self.query_string}"
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,61 @@
1
+ module ActsAsXapian
2
+ # Search for models which contain theimportant terms taken from a specified
3
+ # list of models. i.e. Use to find documents similar to one (or more)
4
+ # documents, or use to refine searches.
5
+ class Similar < QueryBase
6
+ attr_accessor :query_models
7
+ attr_accessor :important_terms
8
+
9
+ # model_classes - model classes to search within, e.g. [PublicBody, User]
10
+ # query_models - list of models you want to find things similar to
11
+ def initialize(model_classes, query_models, options = {})
12
+ self.initialize_db(model_classes)
13
+
14
+ self.runtime += Benchmark::realtime do
15
+ # Case of an array, searching for models similar to those models in the array
16
+ self.query_models = query_models
17
+
18
+ # Find the documents by their unique term
19
+ input_models_query = Xapian::Query.new(Xapian::Query::OP_OR, query_models.map {|m| "I#{m.xapian_document_term}" })
20
+ begin
21
+ @index.enquire.query = input_models_query
22
+
23
+ # Get set of relevant terms for those documents
24
+ selection = Xapian::RSet.new()
25
+ @index.enquire.mset(0, 100, 100).matches.each {|m| selection.add_document(m.docid) } # XXX so this whole method will only work with 100 docs
26
+
27
+ # Bit weird that the function to make esets is part of the enquire
28
+ # object. This explains what exactly it does, which is to exclude
29
+ # terms in the existing query.
30
+ # http://thread.gmane.org/gmane.comp.search.xapian.general/3673/focus=3681
31
+ #
32
+ # Do main search for them
33
+ self.important_terms = @index.enquire.eset(40, selection).terms.map {|e| e.name }
34
+ rescue IOError => e
35
+ if @retried.nil? && /DatabaseModifiedError/.match(e.message.to_s)
36
+ @retried = true
37
+ @index.reset_enquire!
38
+ retry
39
+ end
40
+ raise e
41
+ end
42
+
43
+ similar_query = Xapian::Query.new(Xapian::Query::OP_OR, self.important_terms)
44
+ # Exclude original
45
+ combined_query = Xapian::Query.new(Xapian::Query::OP_AND_NOT, similar_query, input_models_query)
46
+
47
+ # Restrain to model classes
48
+ model_query = Xapian::Query.new(Xapian::Query::OP_OR, model_classes.map {|mc| "M#{mc}" })
49
+ self.query = Xapian::Query.new(Xapian::Query::OP_AND, model_query, combined_query)
50
+ end
51
+
52
+ # Call base class constructor
53
+ self.initialize_query(options)
54
+ end
55
+
56
+ # Text for lines in log file
57
+ def log_description
58
+ "Similar: #{self.query_models}"
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,152 @@
1
+ module ActsAsXapian
2
+ class WriteableIndex < Index
3
+ @@writable_db = nil
4
+ @@writable_suffix = nil
5
+
6
+ cattr_reader :term_generator
7
+
8
+ class << self
9
+ def delete_document(*args)
10
+ @@writable_db.delete_document(*args)
11
+ end
12
+
13
+ def replace_document(*args)
14
+ @@writable_db.replace_document(*args)
15
+ end
16
+
17
+ def writable_init(suffix = "")
18
+ raise NoXapianRubyBindingsError.new("Xapian Ruby bindings not installed") unless ActsAsXapian.bindings_available
19
+ raise "acts_as_xapian hasn't been called in any models" if @@init_values.empty?
20
+
21
+ # if DB is not nil, then we're already initialised, so don't do it again
22
+ return unless @@writable_db.nil?
23
+
24
+ prepare_environment
25
+
26
+ new_path = @@db_path + suffix
27
+ raise "writable_suffix/suffix inconsistency" if @@writable_suffix && @@writable_suffix != suffix
28
+
29
+ # for indexing
30
+ @@writable_db = Xapian::WritableDatabase.new(new_path, Xapian::DB_CREATE_OR_OPEN)
31
+ @@term_generator = Xapian::TermGenerator.new()
32
+ @@term_generator.set_flags(Xapian::TermGenerator::FLAG_SPELLING, 0)
33
+ @@term_generator.database = @@writable_db
34
+ @@term_generator.stemmer = @@stemmer
35
+ @@writable_suffix = suffix
36
+ end
37
+
38
+ ######################################################################
39
+ # Index
40
+
41
+ # Update index with any changes needed, call this offline. Only call it
42
+ # from a script that exits - otherwise Xapian's writable database won't
43
+ # flush your changes. Specifying flush will reduce performance, but
44
+ # make sure that each index update is definitely saved to disk before
45
+ # logging in the database that it has been.
46
+ def update_index(flush = false, verbose = false)
47
+ # puts "start of self.update_index" if verbose
48
+
49
+ # Before calling writable_init we have to make sure every model class has been initialized.
50
+ # i.e. has had its class code loaded, so acts_as_xapian has been called inside it, and
51
+ # we have the info from acts_as_xapian.
52
+ model_classes = ActsAsXapianJob.find(:all, :select => 'model', :group => 'model').map {|a| a.model.constantize }
53
+ # If there are no models in the queue, then nothing to do
54
+ return if model_classes.empty?
55
+
56
+ self.writable_init
57
+
58
+ ids_to_refresh = ActsAsXapianJob.find(:all, :select => 'id').map { |i| i.id }
59
+ ids_to_refresh.each do |id|
60
+ begin
61
+ ActsAsXapianJob.transaction do
62
+ job = ActsAsXapianJob.find(id, :lock =>true)
63
+ puts "ActsAsXapian::WriteableIndex.update_index #{job.action} #{job.model} #{job.model_id.to_s}" if verbose
64
+ begin
65
+ case job.action
66
+ when 'update'
67
+ # XXX Index functions may reference other models, so we could eager load here too?
68
+ model = job.model.constantize.find(job.model_id) # :include => cls.constantize.xapian_options[:include]
69
+ model.xapian_index
70
+ when 'destroy'
71
+ # Make dummy model with right id, just for destruction
72
+ model = job.model.constantize.new
73
+ model.id = job.model_id
74
+ model.xapian_destroy
75
+ else
76
+ raise "unknown ActsAsXapianJob action '#{job.action}'"
77
+ end
78
+ rescue ActiveRecord::RecordNotFound => e
79
+ job.action = 'destroy'
80
+ retry
81
+ end
82
+ job.destroy
83
+
84
+ @@writable_db.flush if flush
85
+ end
86
+ rescue => detail
87
+ # print any error, and carry on so other things are indexed
88
+ # XXX If item is later deleted, this should give up, and it
89
+ # won't. It will keep trying (assuming update_index called from
90
+ # regular cron job) and mayhap cause trouble.
91
+ STDERR.puts("#{detail.backtrace.join("\n")}\nFAILED ActsAsXapian::WriteableIndex.update_index job #{id} #{$!}")
92
+ end
93
+ end
94
+ end
95
+
96
+ # You must specify *all* the models here, this totally rebuilds the Xapian database.
97
+ # You'll want any readers to reopen the database after this.
98
+ def rebuild_index(model_classes, verbose = false)
99
+ raise "when rebuilding all, please call as first and only thing done in process / task" unless @@writable_db.nil?
100
+
101
+ prepare_environment
102
+
103
+ # Delete any existing .new database, and open a new one
104
+ new_path = "#{self.db_path}.new"
105
+ if File.exist?(new_path)
106
+ raise "found existing #{new_path} which is not Xapian flint database, please delete for me" unless File.exist?(File.join(new_path, "iamflint"))
107
+ FileUtils.rm_r(new_path)
108
+ end
109
+ self.writable_init(".new")
110
+
111
+ # Index everything
112
+
113
+ most_recent_job = ActsAsXapianJob.find(:first, :order => 'id DESC')
114
+ batch_size = 1000
115
+ model_classes.each do |model_class|
116
+ all_ids = model_class.find(:all, :select => model_class.primary_key, :order => model_class.primary_key).map {|i| i.id }
117
+ all_ids.each_slice(batch_size) do |ids|
118
+ puts "ActsAsXapian::WriteableIndex: New batch. Including ids #{ids.first} to #{ids.last}" if verbose
119
+ models = model_class.find(:all, :conditions => {model_class.primary_key => ids})
120
+ models.each do |model|
121
+ puts "ActsAsXapian::WriteableIndex.rebuild_index #{model_class} #{model.id}" if verbose
122
+ model.xapian_index
123
+ end
124
+ end
125
+ end
126
+
127
+ @@writable_db.flush
128
+
129
+ # Rename into place
130
+ old_path = self.db_path
131
+ temp_path = "#{old_path}.tmp"
132
+ if File.exist?(temp_path)
133
+ raise "temporary database found #{temp_path} which is not Xapian flint database, please delete for me" unless File.exist?(File.join(temp_path, "iamflint"))
134
+ FileUtils.rm_r(temp_path)
135
+ end
136
+ FileUtils.mv(old_path, temp_path) if File.exist?(old_path)
137
+ FileUtils.mv(new_path, old_path)
138
+
139
+ # Delete old database
140
+ if File.exist?(temp_path)
141
+ raise "old database now at #{temp_path} is not Xapian flint database, please delete for me" unless File.exist?(File.join(temp_path, "iamflint"))
142
+ FileUtils.rm_r(temp_path)
143
+ end
144
+
145
+ ActsAsXapianJob.delete_all ['id <= ?', most_recent_job.id] if most_recent_job
146
+
147
+ # You'll want to restart your FastCGI or Mongrel processes after this,
148
+ # so they get the new db
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,5 @@
1
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'base' )
2
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'query_base' )
3
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'search' )
4
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'similar' )
5
+ require File.join(File.dirname(__FILE__), 'acts_as_xapian', 'core_ext/array' )
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "ActsAsXapian" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'acts_as_xapian'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end