acts_as_ferret 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/class_methods.rb CHANGED
@@ -1,505 +1,245 @@
1
- module FerretMixin
2
- module Acts #:nodoc:
3
- module ARFerret #:nodoc:
4
-
5
- # declare the class level helper methods
6
- # which will load the relevant instance methods defined below when invoked
7
- module ClassMethods
1
+ module ActsAsFerret
8
2
 
9
- # helper that defines a method that adds the given field to a lucene
10
- # document instance
11
- def define_to_field_method(field, options = {})
12
- options = {
13
- :store => :no,
14
- :highlight => :yes,
15
- :index => :yes,
16
- :term_vector => :with_positions_offsets,
17
- :boost => 1.0 }.update(options)
18
- fields_for_ferret[field] = options
19
- define_method("#{field}_to_ferret".to_sym) do
20
- begin
21
- val = content_for_field_name(field)
22
- rescue
23
- logger.warn("Error retrieving value for field #{field}: #{$!}")
24
- val = ''
25
- end
26
- logger.debug("Adding field #{field} with value '#{val}' to index")
27
- val
28
- end
29
- end
30
-
31
- def add_fields(field_config)
32
- if field_config.respond_to?(:each_pair)
33
- field_config.each_pair do |key,val|
34
- define_to_field_method(key,val)
35
- end
36
- elsif field_config.respond_to?(:each)
37
- field_config.each do |field|
38
- define_to_field_method(field)
39
- end
40
- end
41
- end
42
-
43
- def reloadable?; false end
44
-
45
- @@ferret_indexes = Hash.new
46
- def ferret_indexes; @@ferret_indexes end
47
-
48
- @@multi_indexes = Hash.new
49
- def multi_indexes; @@multi_indexes end
50
-
51
- # declares a class as ferret-searchable.
52
- #
53
- # options are:
54
- #
55
- # fields:: names all fields to include in the index. If not given,
56
- # all attributes of the class will be indexed. You may also give
57
- # symbols pointing to instance methods of your model here, i.e.
58
- # to retrieve and index data from a related model.
59
- #
60
- # additional_fields:: names fields to include in the index, in addition
61
- # to those derived from the db scheme. use if you want to add
62
- # custom fields derived from methods to the db fields (which will be picked
63
- # by aaf). This option will be ignored when the fields option is given, in
64
- # that case additional fields get specified there.
65
- #
66
- # index_dir:: declares the directory where to put the index for this class.
67
- # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
68
- # The index directory will be created if it doesn't exist.
69
- #
70
- # single_index:: set this to true to let this class use a Ferret
71
- # index that is shared by all classes having :single_index set to true.
72
- # :store_class_name is set to true implicitly, as well as index_dir, so
73
- # don't bother setting these when using this option. the shared index
74
- # will be located in index/<RAILS_ENV>/shared .
75
- #
76
- # store_class_name:: to make search across multiple models useful, set
77
- # this to true. the model class name will be stored in a keyword field
78
- # named class_name
79
- #
80
- # ferret_options may be:
81
- # or_default:: - whether query terms are required by
82
- # default (the default, false), or not (true)
83
- #
84
- # analyzer:: the analyzer to use for query parsing (default: nil,
85
- # wihch means the ferret StandardAnalyzer gets used)
86
- #
87
- def acts_as_ferret(options={}, ferret_options={})
88
- configuration = {
89
- :index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}",
90
- :store_class_name => false,
91
- :single_index => false,
92
- }
93
- ferret_configuration = {
94
- :or_default => false,
95
- :handle_parse_errors => true,
96
- :default_field => '*'
97
- #:max_clauses => 512,
98
- #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
99
- # :wild_card_downcase => true
100
- }
101
- configuration.update(options) if options.is_a?(Hash)
102
-
103
- # apply appropriate settings for shared index
104
- if configuration[:single_index]
105
- configuration[:index_dir] = "#{FerretMixin::Acts::ARFerret::index_dir}/shared"
106
- configuration[:store_class_name] = true
107
- end
108
- ferret_configuration.update(ferret_options) if ferret_options.is_a?(Hash)
109
- # these properties are somewhat vital to the plugin and shouldn't
110
- # be overwritten by the user:
111
- ferret_configuration.update(
112
-
113
- :key => (configuration[:single_index] ? [:id, :class_name] : :id),
114
- :path => configuration[:index_dir],
115
- :auto_flush => true,
116
- :create_if_missing => true
117
- )
118
-
119
- class_eval <<-EOV
120
- include FerretMixin::Acts::ARFerret::InstanceMethods
121
-
122
-
123
- after_create :ferret_create
124
- after_update :ferret_update
125
- after_destroy :ferret_destroy
126
-
127
- cattr_accessor :fields_for_ferret
128
- cattr_accessor :configuration
129
- cattr_accessor :ferret_configuration
130
-
131
- @@fields_for_ferret = Hash.new
132
- @@configuration = configuration
133
- @@ferret_configuration = ferret_configuration
134
-
135
- if configuration[:fields]
136
- add_fields(configuration[:fields])
137
- else
138
- add_fields(self.new.attributes.keys.map { |k| k.to_sym })
139
- add_fields(configuration[:additional_fields])
140
- end
141
-
142
- EOV
143
- FerretMixin::Acts::ARFerret::ensure_directory configuration[:index_dir]
3
+ module ClassMethods
4
+
5
+ # rebuild the index from all data stored for this model.
6
+ # This is called automatically when no index exists yet.
7
+ #
8
+ # When calling this method manually, you can give any additional
9
+ # model classes that should also go into this index as parameters.
10
+ # Useful when using the :single_index option.
11
+ # Note that attributes named the same in different models will share
12
+ # the same field options in the shared index.
13
+ def rebuild_index(*models)
14
+ models << self unless models.include?(self)
15
+ aaf_index.rebuild_index(models.map(&:to_s))
16
+ end
17
+
18
+ # Retrieve the index instance for this model class. This can either be a
19
+ # LocalIndex, or a RemoteIndex instance.
20
+ #
21
+ # Index instances are stored in a hash, using the index directory
22
+ # as the key. So model classes sharing a single index will share their
23
+ # Index object, too.
24
+ def aaf_index
25
+ ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
26
+ end
27
+
28
+ # Finds instances by contents. Terms are ANDed by default, can be circumvented
29
+ # by using OR between terms.
30
+ # options:
31
+ # offset:: first hit to retrieve (useful for paging)
32
+ # limit:: number of hits to retrieve, or :all to retrieve
33
+ # all results
34
+ # lazy:: Array of field names whose contents should be read directly
35
+ # from the index. Those fields have to be marked
36
+ # :store => :yes in their field options. Give true to get all
37
+ # stored fields (if you have a shared index, you have to
38
+ # explicitly state the fields you want to fetch, true won't
39
+ # work)
40
+ # models:: only for single_index scenarios: an Array of other Model classes to
41
+ # include in this search. Use :all to query all models.
42
+ #
43
+ # find_options is a hash passed on to active_record's find when
44
+ # retrieving the data from db, useful to i.e. prefetch relationships.
45
+ #
46
+ # this method returns a SearchResults instance, which really is an Array that has
47
+ # been decorated with a total_hits accessor that delivers the total
48
+ # number of hits (including those not fetched because of a low num_docs
49
+ # value).
50
+ # Please keep in mind that the number of total hits might be wrong if you specify
51
+ # both ferret options and active record find_options that somehow limit the result
52
+ # set (e.g. :num_docs and some :conditions).
53
+ def find_by_contents(q, options = {}, find_options = {})
54
+ total_hits, result = find_records_lazy_or_not q, options, find_options
55
+ logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
56
+ return SearchResults.new(result, total_hits)
57
+ end
58
+
59
+
60
+
61
+ # return the total number of hits for the given query
62
+ def total_hits(q, options={})
63
+ aaf_index.total_hits(q, options)
64
+ end
144
65
 
145
- # now that all fields have been added, we can initialize the default
146
- # field list to be used by the query parser.
147
- # It will include all content fields *not* marked as :untokenized.
148
- # This fixes the otherwise failing CommentTest#test_stopwords
149
- ferret_configuration[:default_field] = fields_for_ferret.keys.select do |f|
150
- fields_for_ferret[f][:index] != :untokenized
151
- end
152
- logger.debug "set default field list to #{ferret_configuration[:default_field].inspect}"
153
- end
154
-
155
- def class_index_dir
156
- configuration[:index_dir]
66
+ # Finds instance model name, ids and scores by contents.
67
+ # Useful e.g. if you want to search across models or do not want to fetch
68
+ # all result records (yet).
69
+ #
70
+ # Options are the same as for find_by_contents
71
+ #
72
+ # A block can be given too, it will be executed with every result:
73
+ # find_id_by_contents(q, options) do |model, id, score|
74
+ # id_array << id
75
+ # scores_by_id[id] = score
76
+ # end
77
+ # NOTE: in case a block is given, only the total_hits value will be returned
78
+ # instead of the [total_hits, results] array!
79
+ #
80
+ def find_id_by_contents(q, options = {}, &block)
81
+ deprecated_options_support(options)
82
+ aaf_index.find_id_by_contents(q, options, &block)
83
+ end
84
+
85
+ # requires the store_class_name option of acts_as_ferret to be true
86
+ # for all models queried this way.
87
+ def multi_search(query, additional_models = [], options = {}, find_options = {})
88
+ result = []
89
+
90
+ if options[:lazy]
91
+ logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
92
+ total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
93
+ result << FerretResult.new(model, id, score, data)
157
94
  end
158
-
159
- # rebuild the index from all data stored for this model.
160
- # This is called automatically when no index exists yet.
161
- #
162
- # TODO: the automatic index initialization only works if
163
- # every model class has it's
164
- # own index, otherwise the index will get populated only
165
- # with instances from the first model loaded
166
- #
167
- # When calling this method manually, you can give any additional
168
- # model classes that should also go into this index as parameters.
169
- # Useful when using the :single_index option.
170
- # Note that attributes named the same in different models will share
171
- # the same field options in the shared index.
172
- def rebuild_index(*models)
173
- models << self
174
- # default attributes for fields
175
- fi = Ferret::Index::FieldInfos.new(:store => :no,
176
- :index => :yes,
177
- :term_vector => :no,
178
- :boost => 1.0)
179
- # primary key
180
- fi.add_field(:id, :store => :yes, :index => :untokenized)
181
- # class_name
182
- if configuration[:store_class_name]
183
- fi.add_field(:class_name, :store => :yes, :index => :untokenized)
184
- end
185
- # collect field options from all models
186
- fields = {}
187
- models.each do |model|
188
- fields.update(model.fields_for_ferret)
189
- end
190
- logger.debug("class #{self.name}: fields for index: #{fields.keys.join(',')}")
191
- fields.each_pair do |field, options|
192
- fi.add_field(field, { :store => :no,
193
- :index => :yes }.update(options))
194
- end
195
- fi.create_index(ferret_configuration[:path])
196
-
197
- index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false))
198
- #index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => true))
199
- batch_size = 1000
200
- models.each do |model|
201
- # index in batches of 1000 to limit memory consumption (fixes #24)
202
- model.transaction do
203
- 0.step(model.count, batch_size) do |i|
204
- model.find(:all, :limit => batch_size, :offset => i).each do |rec|
205
- index << rec.to_doc
206
- end
207
- end
208
- end
209
- end
210
- logger.debug("Created Ferret index in: #{class_index_dir}")
211
- index.flush
212
- index.optimize
213
- index.close
214
- # close combined index readers, just in case
215
- # this seems to fix a strange test failure that seems to relate to a
216
- # multi_index looking at an old version of the content_base index.
217
- @@multi_indexes.each_pair do |key, index|
218
- # puts "#{key} -- #{self.name}"
219
- # TODO only close those where necessary (watch inheritance, where
220
- # self.name is base class of a class where key is made from)
221
- index.close #if key =~ /#{self.name}/
222
- end
223
- @@multi_indexes = Hash.new
224
- end
225
-
226
- # Retrieve the Ferret::Index::Index instance for this model class.
227
- #
228
- # Index instances are stored in a hash, using the index directory
229
- # as the key. So model classes sharing a single index will share their
230
- # Index object, too.
231
- def ferret_index
232
- ferret_indexes[class_index_dir] ||= create_index_instance
233
- end
234
-
235
- # creates a new Index::Index instance. Before that, a check is done
236
- # to see if the index exists in the file system. If not, index rebuild
237
- # from all model data retrieved by find(:all) is triggered.
238
- def create_index_instance
239
- rebuild_index unless File.file? "#{class_index_dir}/segments"
240
- Ferret::Index::Index.new(ferret_configuration)
95
+ else
96
+ id_arrays = {}
97
+ rank = 0
98
+ total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
99
+ id_arrays[model] ||= {}
100
+ id_arrays[model][id] = [ rank += 1, score ]
241
101
  end
242
-
243
- # Finds instances by contents. Terms are ANDed by default, can be circumvented
244
- # by using OR between terms.
245
- # options:
246
- # offset:: first hit to retrieve (useful for paging)
247
- # limit:: number of hits to retrieve, or :all to retrieve
248
- # all results
249
- # models:: only for single_index scenarios: a list of other Model classes to
250
- # include in this search.
251
- #
252
- # find_options is a hash passed on to active_record's find when
253
- # retrieving the data from db, useful to i.e. prefetch relationships.
254
- #
255
- # this method returns a SearchResults instance, which really is an Array that has
256
- # been decorated with a total_hits accessor that delivers the total
257
- # number of hits (including those not fetched because of a low num_docs
258
- # value).
259
- # Please keep in mind that the number of total hits might be wrong if you specify
260
- # both ferret options and active record find_options that somehow limit the result
261
- # set (e.g. :num_docs and some :conditions).
262
- def find_by_contents(q, options = {}, find_options = {})
263
- # handle shared index
264
- return single_index_find_by_contents(q, options, find_options) if configuration[:single_index]
265
- results = {}
266
- total_hits = find_id_by_contents(q, options) do |model, id, score|
267
- # stores ids, index of each id for later ordering of
268
- # results, and score
269
- results[id] = [ results.size + 1, score ]
270
- end
271
- result = []
272
- begin
273
- # TODO: in case of STI AR will filter out hits from other
274
- # classes for us, but this
275
- # will lead to less results retrieved --> scoping of ferret query
276
- # to self.class is still needed.
277
- # from the ferret ML (thanks Curtis Hatter)
278
- # > I created a method in my base STI class so I can scope my query. For scoping
279
- # > I used something like the following line:
280
- # >
281
- # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
282
- # >
283
- # > Though you could make it more generic by simply asking
284
- # > "self.descends_from_active_record?" which is how rails decides if it should
285
- # > scope your "find" query for STI models. You can check out "base.rb" in
286
- # > activerecord to see that.
287
- # but maybe better do the scoping in find_id_by_contents...
288
- if results.any?
289
- conditions = combine_conditions([ "#{table_name}.#{primary_key} in (?)", results.keys ],
290
- find_options[:conditions])
291
- result = self.find(:all,
292
- find_options.merge(:conditions => conditions))
293
- # correct result size if the user specified conditions
294
- total_hits = result.length if find_options[:conditions]
295
- end
296
- rescue ActiveRecord::RecordNotFound
297
- logger.warn "REBUILD YOUR INDEX! One of the id's in the index didn't have an associated record"
298
- end
102
+ result = retrieve_records(id_arrays, find_options)
103
+ end
299
104
 
300
- # order results as they were found by ferret, unless an AR :order
301
- # option was given
302
- unless find_options[:order]
303
- result.sort! { |a, b| results[a.id.to_s].first <=> results[b.id.to_s].first }
304
- end
305
- # set scores
306
- result.each { |r| r.ferret_score = results[r.id.to_s].last }
307
-
308
- logger.debug "Query: #{q}\nResult ids: #{results.keys.inspect},\nresult: #{result}"
309
- return SearchResults.new(result, total_hits)
310
- end
105
+ SearchResults.new(result, total_hits)
106
+ end
107
+
108
+ # returns an array of hashes, each containing :class_name,
109
+ # :id and :score for a hit.
110
+ #
111
+ # if a block is given, class_name, id and score of each hit will
112
+ # be yielded, and the total number of hits is returned.
113
+ def id_multi_search(query, additional_models = [], options = {}, &proc)
114
+ deprecated_options_support(options)
115
+ additional_models = [ additional_models ] unless additional_models.is_a? Array
116
+ additional_models << self
117
+ aaf_index.id_multi_search(query, additional_models.map(&:to_s), options, &proc)
118
+ end
119
+
311
120
 
312
- # determine all field names in the shared index
313
- def single_index_field_names(models)
314
- @single_index_field_names ||= (
315
- searcher = Ferret::Search::Searcher.new(class_index_dir)
316
- if searcher.reader.respond_to?(:get_field_names)
317
- (searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a
318
- else
319
- puts <<-END
320
- unable to retrieve field names for class #{self.name}, please
321
- consider naming all indexed fields in your call to acts_as_ferret!
322
- END
323
- models.map { |m| m.content_columns.map { |col| col.name } }.flatten
324
- end
325
- )
121
+ protected
326
122
 
327
- end
328
-
123
+ def find_records_lazy_or_not(q, options = {}, find_options = {})
124
+ if options[:lazy]
125
+ logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
126
+ lazy_find_by_contents q, options
127
+ else
128
+ ar_find_by_contents q, options, find_options
129
+ end
130
+ end
329
131
 
330
- # weiter: checken ob ferret-bug, dass wir die queries so selber bauen
331
- # muessen - liegt am downcasen des qparsers ? - gucken ob jetzt mit
332
- # ferret geht (content_cols) und dave um zugriff auf qp bitten, oder
333
- # auf reader
334
- # TODO: slow on large result sets - fetches result set objects one-by-one
335
- def single_index_find_by_contents(q, options = {}, find_options = {})
336
- result = []
132
+ def ar_find_by_contents(q, options = {}, find_options = {})
133
+ result_ids = {}
134
+ total_hits = find_id_by_contents(q, options) do |model, id, score, data|
135
+ # stores ids, index of each id for later ordering of
136
+ # results, and score
137
+ result_ids[id] = [ result_ids.size + 1, score ]
138
+ end
337
139
 
338
- unless options[:models] == :all # search needs to be restricted by one or more class names
339
- options[:models] ||= []
340
- # add this class to the list of given models
341
- options[:models] << self unless options[:models].include?(self)
342
- # keep original query
343
- original_query = q
344
-
345
- # work around ferret bug in #process_query (doesn't ensure the
346
- # reader is open)
347
- ferret_index.synchronize do
348
- ferret_index.send(:ensure_reader_open)
349
- original_query = ferret_index.process_query(q)
350
- end if q.is_a? String
140
+ result = retrieve_records( { self.name => result_ids }, find_options )
141
+ # correct result size if the user specified conditions
142
+ total_hits = result.length if find_options[:conditions]
351
143
 
352
- q = Ferret::Search::BooleanQuery.new
353
- q.add_query(original_query, :must)
354
- model_query = Ferret::Search::BooleanQuery.new
355
- options[:models].each do |model|
356
- model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should)
357
- end
358
- q.add_query(model_query, :must)
359
- end
360
- #puts q.to_s
361
- total_hits = find_id_by_contents(q, options) do |model, id, score|
362
- o = Object.const_get(model).find(id, find_options.dup)
363
- o.ferret_score = score
364
- result << o
365
- end
366
- return SearchResults.new(result, total_hits)
367
- end
368
- protected :single_index_find_by_contents
144
+ # order results as they were found by ferret, unless an AR :order
145
+ # option was given
146
+ result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
369
147
 
370
- # return the total number of hits for the given query
371
- def total_hits(q, options={})
372
- ferret_index.search(q, options).total_hits
373
- end
148
+ [ total_hits, result ]
149
+ end
374
150
 
375
- # Finds instance model name, ids and scores by contents.
376
- # Useful if you want to search across models
377
- # Terms are ANDed by default, can be circumvented by using OR between terms.
378
- #
379
- # Example controller code (not tested):
380
- # def multi_search(query)
381
- # result = []
382
- # result << (Model1.find_id_by_contents query)
383
- # result << (Model2.find_id_by_contents query)
384
- # result << (Model3.find_id_by_contents query)
385
- # result.flatten!
386
- # result.sort! {|element| element[:score]}
387
- # # Figure out for yourself how to retreive and present the data from modelname and id
388
- # end
389
- #
390
- # Note that the scores retrieved this way aren't normalized across
391
- # indexes, so that the order of results after sorting by score will
392
- # differ from the order you would get when running the same query
393
- # on a single index containing all the data from Model1, Model2
394
- # and Model
395
- #
396
- # options are:
397
- #
398
- # first_doc:: first hit to retrieve (useful for paging)
399
- # num_docs:: number of hits to retrieve, or :all to retrieve all
400
- # results.
401
- #
402
- # a block can be given too, it will be executed with every result:
403
- # find_id_by_contents(q, options) do |model, id, score|
404
- # id_array << id
405
- # scores_by_id[id] = score
406
- # end
407
- # NOTE: in case a block is given, the total_hits value will be returned
408
- # instead of the result list!
409
- #
410
- def find_id_by_contents(q, options = {})
411
- deprecated_options_support(options)
151
+ def lazy_find_by_contents(q, options = {})
152
+ result = []
153
+ total_hits = find_id_by_contents(q, options) do |model, id, score, data|
154
+ result << FerretResult.new(model, id, score, data)
155
+ end
156
+ [ total_hits, result ]
157
+ end
412
158
 
413
- result = []
414
- index = self.ferret_index
415
- # puts "query: #{index.process_query q}"
416
- total_hits = index.search_each(q, options) do |hit, score|
417
- # only collect result data if we intend to return it
418
- doc = index[hit]
419
- model = configuration[:store_class_name] ? doc[:class_name] : self.name
420
- if block_given?
421
- yield model, doc[:id], score
422
- else
423
- result << { :model => model, :id => doc[:id], :score => score }
424
- end
425
- end
426
- logger.debug "id_score_model array: #{result.inspect}"
427
- return block_given? ? total_hits : result
428
- end
429
-
430
- # requires the store_class_name option of acts_as_ferret to be true
431
- # for all models queried this way.
432
- #
433
- # TODO: not optimal as each instance is fetched in a db call for it's
434
- # own.
435
- def multi_search(query, additional_models = [], options = {})
436
- result = []
437
- total_hits = id_multi_search(query, additional_models, options) do |model, id, score|
438
- r = Object.const_get(model).find(id)
439
- r.ferret_score = score
440
- result << r
441
- end
442
- SearchResults.new(result, total_hits)
443
- end
444
-
445
- # returns an array of hashes, each containing :class_name,
446
- # :id and :score for a hit.
447
- #
448
- # if a block is given, class_name, id and score of each hit will
449
- # be yielded, and the total number of hits is returned.
450
- #
451
- def id_multi_search(query, additional_models = [], options = {})
452
- deprecated_options_support(options)
453
- additional_models = [ additional_models ] unless additional_models.is_a? Array
454
- additional_models << self
455
- searcher = multi_index(additional_models)
456
- result = []
457
- total_hits = searcher.search_each(query, options) do |hit, score|
458
- doc = searcher[hit]
459
- if block_given?
460
- yield doc[:class_name], doc[:id], score
461
- else
462
- result << { :model => doc[:class_name], :id => doc[:id], :score => score }
463
- end
464
- end
465
- return block_given? ? total_hits : result
466
- end
467
-
468
- # returns a MultiIndex instance operating on a MultiReader
469
- def multi_index(model_classes)
470
- model_classes.sort! { |a, b| a.name <=> b.name }
471
- key = model_classes.inject("") { |s, clazz| s << clazz.name }
472
- multi_config = ferret_configuration.dup
473
- multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
474
- @@multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
475
- end
476
159
 
477
- private
160
+ def model_find(model, id, find_options = {})
161
+ model.constantize.find(id, find_options)
162
+ end
478
163
 
479
- def deprecated_options_support(options)
480
- if options[:num_docs]
481
- logger.warn ":num_docs is deprecated, use :limit instead!"
482
- options[:limit] ||= options[:num_docs]
483
- end
484
- if options[:first_doc]
485
- logger.warn ":first_doc is deprecated, use :offset instead!"
486
- options[:offset] ||= options[:first_doc]
487
- end
164
+ # retrieves search result records from a data structure like this:
165
+ # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
166
+ #
167
+ # TODO: in case of STI AR will filter out hits from other
168
+ # classes for us, but this
169
+ # will lead to less results retrieved --> scoping of ferret query
170
+ # to self.class is still needed.
171
+ # from the ferret ML (thanks Curtis Hatter)
172
+ # > I created a method in my base STI class so I can scope my query. For scoping
173
+ # > I used something like the following line:
174
+ # >
175
+ # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
176
+ # >
177
+ # > Though you could make it more generic by simply asking
178
+ # > "self.descends_from_active_record?" which is how rails decides if it should
179
+ # > scope your "find" query for STI models. You can check out "base.rb" in
180
+ # > activerecord to see that.
181
+ # but maybe better do the scoping in find_id_by_contents...
182
+ def retrieve_records(id_arrays, find_options = {})
183
+ result = []
184
+ # get objects for each model
185
+ id_arrays.each do |model, id_array|
186
+ next if id_array.empty?
187
+ begin
188
+ model = model.constantize
189
+ # merge conditions
190
+ conditions = combine_conditions([ "#{model.table_name}.#{primary_key} in (?)", id_array.keys ],
191
+ find_options[:conditions])
192
+ # fetch
193
+ tmp_result = model.find(:all, find_options.merge(:conditions => conditions))
194
+ # set scores and rank
195
+ tmp_result.each do |record|
196
+ record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
197
+ end
198
+ # merge with result array
199
+ result.concat tmp_result
200
+ rescue TypeError
201
+ raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
488
202
  end
203
+ end
204
+ return result
205
+ end
489
206
 
490
- # combine our conditions with those given by user, if any
491
- def combine_conditions(conditions, *additional_conditions)
492
- if additional_conditions.any?
493
- cust_opts = additional_conditions.dup.flatten
494
- conditions.first << " and " << cust_opts.shift
495
- conditions.concat(cust_opts)
496
- end
497
- conditions
498
- end
207
+ def deprecated_options_support(options)
208
+ if options[:num_docs]
209
+ logger.warn ":num_docs is deprecated, use :limit instead!"
210
+ options[:limit] ||= options[:num_docs]
211
+ end
212
+ if options[:first_doc]
213
+ logger.warn ":first_doc is deprecated, use :offset instead!"
214
+ options[:offset] ||= options[:first_doc]
215
+ end
216
+ end
499
217
 
218
+ # combine our conditions with those given by user, if any
219
+ def combine_conditions(conditions, *additional_conditions)
220
+ returning conditions do
221
+ if additional_conditions.any?
222
+ cust_opts = additional_conditions.dup.flatten
223
+ conditions.first << " and " << cust_opts.shift
224
+ conditions.concat(cust_opts)
225
+ end
500
226
  end
501
-
502
227
  end
228
+
229
+ # creates a new Index::Index instance. Before that, a check is done
230
+ # to see if the index exists in the file system. If not, index rebuild
231
+ # from all model data retrieved by find(:all) is triggered.
232
+ def create_index_instance
233
+ if aaf_configuration[:remote]
234
+ RemoteIndex
235
+ elsif aaf_configuration[:single_index]
236
+ SharedIndex
237
+ else
238
+ LocalIndex
239
+ end.new(aaf_configuration)
240
+ end
241
+
503
242
  end
243
+
504
244
  end
505
245