acts_as_ferret 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/class_methods.rb CHANGED
@@ -1,505 +1,245 @@
1
- module FerretMixin
2
- module Acts #:nodoc:
3
- module ARFerret #:nodoc:
4
-
5
- # declare the class level helper methods
6
- # which will load the relevant instance methods defined below when invoked
7
- module ClassMethods
1
+ module ActsAsFerret
8
2
 
9
- # helper that defines a method that adds the given field to a lucene
10
- # document instance
11
- def define_to_field_method(field, options = {})
12
- options = {
13
- :store => :no,
14
- :highlight => :yes,
15
- :index => :yes,
16
- :term_vector => :with_positions_offsets,
17
- :boost => 1.0 }.update(options)
18
- fields_for_ferret[field] = options
19
- define_method("#{field}_to_ferret".to_sym) do
20
- begin
21
- val = content_for_field_name(field)
22
- rescue
23
- logger.warn("Error retrieving value for field #{field}: #{$!}")
24
- val = ''
25
- end
26
- logger.debug("Adding field #{field} with value '#{val}' to index")
27
- val
28
- end
29
- end
30
-
31
- def add_fields(field_config)
32
- if field_config.respond_to?(:each_pair)
33
- field_config.each_pair do |key,val|
34
- define_to_field_method(key,val)
35
- end
36
- elsif field_config.respond_to?(:each)
37
- field_config.each do |field|
38
- define_to_field_method(field)
39
- end
40
- end
41
- end
42
-
43
- def reloadable?; false end
44
-
45
- @@ferret_indexes = Hash.new
46
- def ferret_indexes; @@ferret_indexes end
47
-
48
- @@multi_indexes = Hash.new
49
- def multi_indexes; @@multi_indexes end
50
-
51
- # declares a class as ferret-searchable.
52
- #
53
- # options are:
54
- #
55
- # fields:: names all fields to include in the index. If not given,
56
- # all attributes of the class will be indexed. You may also give
57
- # symbols pointing to instance methods of your model here, i.e.
58
- # to retrieve and index data from a related model.
59
- #
60
- # additional_fields:: names fields to include in the index, in addition
61
- # to those derived from the db scheme. use if you want to add
62
- # custom fields derived from methods to the db fields (which will be picked
63
- # by aaf). This option will be ignored when the fields option is given, in
64
- # that case additional fields get specified there.
65
- #
66
- # index_dir:: declares the directory where to put the index for this class.
67
- # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
68
- # The index directory will be created if it doesn't exist.
69
- #
70
- # single_index:: set this to true to let this class use a Ferret
71
- # index that is shared by all classes having :single_index set to true.
72
- # :store_class_name is set to true implicitly, as well as index_dir, so
73
- # don't bother setting these when using this option. the shared index
74
- # will be located in index/<RAILS_ENV>/shared .
75
- #
76
- # store_class_name:: to make search across multiple models useful, set
77
- # this to true. the model class name will be stored in a keyword field
78
- # named class_name
79
- #
80
- # ferret_options may be:
81
- # or_default:: - whether query terms are required by
82
- # default (the default, false), or not (true)
83
- #
84
- # analyzer:: the analyzer to use for query parsing (default: nil,
85
- # wihch means the ferret StandardAnalyzer gets used)
86
- #
87
- def acts_as_ferret(options={}, ferret_options={})
88
- configuration = {
89
- :index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}",
90
- :store_class_name => false,
91
- :single_index => false,
92
- }
93
- ferret_configuration = {
94
- :or_default => false,
95
- :handle_parse_errors => true,
96
- :default_field => '*'
97
- #:max_clauses => 512,
98
- #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
99
- # :wild_card_downcase => true
100
- }
101
- configuration.update(options) if options.is_a?(Hash)
102
-
103
- # apply appropriate settings for shared index
104
- if configuration[:single_index]
105
- configuration[:index_dir] = "#{FerretMixin::Acts::ARFerret::index_dir}/shared"
106
- configuration[:store_class_name] = true
107
- end
108
- ferret_configuration.update(ferret_options) if ferret_options.is_a?(Hash)
109
- # these properties are somewhat vital to the plugin and shouldn't
110
- # be overwritten by the user:
111
- ferret_configuration.update(
112
-
113
- :key => (configuration[:single_index] ? [:id, :class_name] : :id),
114
- :path => configuration[:index_dir],
115
- :auto_flush => true,
116
- :create_if_missing => true
117
- )
118
-
119
- class_eval <<-EOV
120
- include FerretMixin::Acts::ARFerret::InstanceMethods
121
-
122
-
123
- after_create :ferret_create
124
- after_update :ferret_update
125
- after_destroy :ferret_destroy
126
-
127
- cattr_accessor :fields_for_ferret
128
- cattr_accessor :configuration
129
- cattr_accessor :ferret_configuration
130
-
131
- @@fields_for_ferret = Hash.new
132
- @@configuration = configuration
133
- @@ferret_configuration = ferret_configuration
134
-
135
- if configuration[:fields]
136
- add_fields(configuration[:fields])
137
- else
138
- add_fields(self.new.attributes.keys.map { |k| k.to_sym })
139
- add_fields(configuration[:additional_fields])
140
- end
141
-
142
- EOV
143
- FerretMixin::Acts::ARFerret::ensure_directory configuration[:index_dir]
3
+ module ClassMethods
4
+
5
+ # rebuild the index from all data stored for this model.
6
+ # This is called automatically when no index exists yet.
7
+ #
8
+ # When calling this method manually, you can give any additional
9
+ # model classes that should also go into this index as parameters.
10
+ # Useful when using the :single_index option.
11
+ # Note that attributes named the same in different models will share
12
+ # the same field options in the shared index.
13
+ def rebuild_index(*models)
14
+ models << self unless models.include?(self)
15
+ aaf_index.rebuild_index(models.map(&:to_s))
16
+ end
17
+
18
+ # Retrieve the index instance for this model class. This can either be a
19
+ # LocalIndex, or a RemoteIndex instance.
20
+ #
21
+ # Index instances are stored in a hash, using the index directory
22
+ # as the key. So model classes sharing a single index will share their
23
+ # Index object, too.
24
+ def aaf_index
25
+ ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
26
+ end
27
+
28
+ # Finds instances by contents. Terms are ANDed by default, can be circumvented
29
+ # by using OR between terms.
30
+ # options:
31
+ # offset:: first hit to retrieve (useful for paging)
32
+ # limit:: number of hits to retrieve, or :all to retrieve
33
+ # all results
34
+ # lazy:: Array of field names whose contents should be read directly
35
+ # from the index. Those fields have to be marked
36
+ # :store => :yes in their field options. Give true to get all
37
+ # stored fields (if you have a shared index, you have to
38
+ # explicitly state the fields you want to fetch, true won't
39
+ # work)
40
+ # models:: only for single_index scenarios: an Array of other Model classes to
41
+ # include in this search. Use :all to query all models.
42
+ #
43
+ # find_options is a hash passed on to active_record's find when
44
+ # retrieving the data from db, useful to i.e. prefetch relationships.
45
+ #
46
+ # this method returns a SearchResults instance, which really is an Array that has
47
+ # been decorated with a total_hits accessor that delivers the total
48
+ # number of hits (including those not fetched because of a low num_docs
49
+ # value).
50
+ # Please keep in mind that the number of total hits might be wrong if you specify
51
+ # both ferret options and active record find_options that somehow limit the result
52
+ # set (e.g. :num_docs and some :conditions).
53
+ def find_by_contents(q, options = {}, find_options = {})
54
+ total_hits, result = find_records_lazy_or_not q, options, find_options
55
+ logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
56
+ return SearchResults.new(result, total_hits)
57
+ end
58
+
59
+
60
+
61
+ # return the total number of hits for the given query
62
+ def total_hits(q, options={})
63
+ aaf_index.total_hits(q, options)
64
+ end
144
65
 
145
- # now that all fields have been added, we can initialize the default
146
- # field list to be used by the query parser.
147
- # It will include all content fields *not* marked as :untokenized.
148
- # This fixes the otherwise failing CommentTest#test_stopwords
149
- ferret_configuration[:default_field] = fields_for_ferret.keys.select do |f|
150
- fields_for_ferret[f][:index] != :untokenized
151
- end
152
- logger.debug "set default field list to #{ferret_configuration[:default_field].inspect}"
153
- end
154
-
155
- def class_index_dir
156
- configuration[:index_dir]
66
+ # Finds instance model name, ids and scores by contents.
67
+ # Useful e.g. if you want to search across models or do not want to fetch
68
+ # all result records (yet).
69
+ #
70
+ # Options are the same as for find_by_contents
71
+ #
72
+ # A block can be given too, it will be executed with every result:
73
+ # find_id_by_contents(q, options) do |model, id, score|
74
+ # id_array << id
75
+ # scores_by_id[id] = score
76
+ # end
77
+ # NOTE: in case a block is given, only the total_hits value will be returned
78
+ # instead of the [total_hits, results] array!
79
+ #
80
+ def find_id_by_contents(q, options = {}, &block)
81
+ deprecated_options_support(options)
82
+ aaf_index.find_id_by_contents(q, options, &block)
83
+ end
84
+
85
+ # requires the store_class_name option of acts_as_ferret to be true
86
+ # for all models queried this way.
87
+ def multi_search(query, additional_models = [], options = {}, find_options = {})
88
+ result = []
89
+
90
+ if options[:lazy]
91
+ logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
92
+ total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
93
+ result << FerretResult.new(model, id, score, data)
157
94
  end
158
-
159
- # rebuild the index from all data stored for this model.
160
- # This is called automatically when no index exists yet.
161
- #
162
- # TODO: the automatic index initialization only works if
163
- # every model class has it's
164
- # own index, otherwise the index will get populated only
165
- # with instances from the first model loaded
166
- #
167
- # When calling this method manually, you can give any additional
168
- # model classes that should also go into this index as parameters.
169
- # Useful when using the :single_index option.
170
- # Note that attributes named the same in different models will share
171
- # the same field options in the shared index.
172
- def rebuild_index(*models)
173
- models << self
174
- # default attributes for fields
175
- fi = Ferret::Index::FieldInfos.new(:store => :no,
176
- :index => :yes,
177
- :term_vector => :no,
178
- :boost => 1.0)
179
- # primary key
180
- fi.add_field(:id, :store => :yes, :index => :untokenized)
181
- # class_name
182
- if configuration[:store_class_name]
183
- fi.add_field(:class_name, :store => :yes, :index => :untokenized)
184
- end
185
- # collect field options from all models
186
- fields = {}
187
- models.each do |model|
188
- fields.update(model.fields_for_ferret)
189
- end
190
- logger.debug("class #{self.name}: fields for index: #{fields.keys.join(',')}")
191
- fields.each_pair do |field, options|
192
- fi.add_field(field, { :store => :no,
193
- :index => :yes }.update(options))
194
- end
195
- fi.create_index(ferret_configuration[:path])
196
-
197
- index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false))
198
- #index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => true))
199
- batch_size = 1000
200
- models.each do |model|
201
- # index in batches of 1000 to limit memory consumption (fixes #24)
202
- model.transaction do
203
- 0.step(model.count, batch_size) do |i|
204
- model.find(:all, :limit => batch_size, :offset => i).each do |rec|
205
- index << rec.to_doc
206
- end
207
- end
208
- end
209
- end
210
- logger.debug("Created Ferret index in: #{class_index_dir}")
211
- index.flush
212
- index.optimize
213
- index.close
214
- # close combined index readers, just in case
215
- # this seems to fix a strange test failure that seems to relate to a
216
- # multi_index looking at an old version of the content_base index.
217
- @@multi_indexes.each_pair do |key, index|
218
- # puts "#{key} -- #{self.name}"
219
- # TODO only close those where necessary (watch inheritance, where
220
- # self.name is base class of a class where key is made from)
221
- index.close #if key =~ /#{self.name}/
222
- end
223
- @@multi_indexes = Hash.new
224
- end
225
-
226
- # Retrieve the Ferret::Index::Index instance for this model class.
227
- #
228
- # Index instances are stored in a hash, using the index directory
229
- # as the key. So model classes sharing a single index will share their
230
- # Index object, too.
231
- def ferret_index
232
- ferret_indexes[class_index_dir] ||= create_index_instance
233
- end
234
-
235
- # creates a new Index::Index instance. Before that, a check is done
236
- # to see if the index exists in the file system. If not, index rebuild
237
- # from all model data retrieved by find(:all) is triggered.
238
- def create_index_instance
239
- rebuild_index unless File.file? "#{class_index_dir}/segments"
240
- Ferret::Index::Index.new(ferret_configuration)
95
+ else
96
+ id_arrays = {}
97
+ rank = 0
98
+ total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
99
+ id_arrays[model] ||= {}
100
+ id_arrays[model][id] = [ rank += 1, score ]
241
101
  end
242
-
243
- # Finds instances by contents. Terms are ANDed by default, can be circumvented
244
- # by using OR between terms.
245
- # options:
246
- # offset:: first hit to retrieve (useful for paging)
247
- # limit:: number of hits to retrieve, or :all to retrieve
248
- # all results
249
- # models:: only for single_index scenarios: a list of other Model classes to
250
- # include in this search.
251
- #
252
- # find_options is a hash passed on to active_record's find when
253
- # retrieving the data from db, useful to i.e. prefetch relationships.
254
- #
255
- # this method returns a SearchResults instance, which really is an Array that has
256
- # been decorated with a total_hits accessor that delivers the total
257
- # number of hits (including those not fetched because of a low num_docs
258
- # value).
259
- # Please keep in mind that the number of total hits might be wrong if you specify
260
- # both ferret options and active record find_options that somehow limit the result
261
- # set (e.g. :num_docs and some :conditions).
262
- def find_by_contents(q, options = {}, find_options = {})
263
- # handle shared index
264
- return single_index_find_by_contents(q, options, find_options) if configuration[:single_index]
265
- results = {}
266
- total_hits = find_id_by_contents(q, options) do |model, id, score|
267
- # stores ids, index of each id for later ordering of
268
- # results, and score
269
- results[id] = [ results.size + 1, score ]
270
- end
271
- result = []
272
- begin
273
- # TODO: in case of STI AR will filter out hits from other
274
- # classes for us, but this
275
- # will lead to less results retrieved --> scoping of ferret query
276
- # to self.class is still needed.
277
- # from the ferret ML (thanks Curtis Hatter)
278
- # > I created a method in my base STI class so I can scope my query. For scoping
279
- # > I used something like the following line:
280
- # >
281
- # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
282
- # >
283
- # > Though you could make it more generic by simply asking
284
- # > "self.descends_from_active_record?" which is how rails decides if it should
285
- # > scope your "find" query for STI models. You can check out "base.rb" in
286
- # > activerecord to see that.
287
- # but maybe better do the scoping in find_id_by_contents...
288
- if results.any?
289
- conditions = combine_conditions([ "#{table_name}.#{primary_key} in (?)", results.keys ],
290
- find_options[:conditions])
291
- result = self.find(:all,
292
- find_options.merge(:conditions => conditions))
293
- # correct result size if the user specified conditions
294
- total_hits = result.length if find_options[:conditions]
295
- end
296
- rescue ActiveRecord::RecordNotFound
297
- logger.warn "REBUILD YOUR INDEX! One of the id's in the index didn't have an associated record"
298
- end
102
+ result = retrieve_records(id_arrays, find_options)
103
+ end
299
104
 
300
- # order results as they were found by ferret, unless an AR :order
301
- # option was given
302
- unless find_options[:order]
303
- result.sort! { |a, b| results[a.id.to_s].first <=> results[b.id.to_s].first }
304
- end
305
- # set scores
306
- result.each { |r| r.ferret_score = results[r.id.to_s].last }
307
-
308
- logger.debug "Query: #{q}\nResult ids: #{results.keys.inspect},\nresult: #{result}"
309
- return SearchResults.new(result, total_hits)
310
- end
105
+ SearchResults.new(result, total_hits)
106
+ end
107
+
108
+ # returns an array of hashes, each containing :class_name,
109
+ # :id and :score for a hit.
110
+ #
111
+ # if a block is given, class_name, id and score of each hit will
112
+ # be yielded, and the total number of hits is returned.
113
+ def id_multi_search(query, additional_models = [], options = {}, &proc)
114
+ deprecated_options_support(options)
115
+ additional_models = [ additional_models ] unless additional_models.is_a? Array
116
+ additional_models << self
117
+ aaf_index.id_multi_search(query, additional_models.map(&:to_s), options, &proc)
118
+ end
119
+
311
120
 
312
- # determine all field names in the shared index
313
- def single_index_field_names(models)
314
- @single_index_field_names ||= (
315
- searcher = Ferret::Search::Searcher.new(class_index_dir)
316
- if searcher.reader.respond_to?(:get_field_names)
317
- (searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a
318
- else
319
- puts <<-END
320
- unable to retrieve field names for class #{self.name}, please
321
- consider naming all indexed fields in your call to acts_as_ferret!
322
- END
323
- models.map { |m| m.content_columns.map { |col| col.name } }.flatten
324
- end
325
- )
121
+ protected
326
122
 
327
- end
328
-
123
+ def find_records_lazy_or_not(q, options = {}, find_options = {})
124
+ if options[:lazy]
125
+ logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
126
+ lazy_find_by_contents q, options
127
+ else
128
+ ar_find_by_contents q, options, find_options
129
+ end
130
+ end
329
131
 
330
- # weiter: checken ob ferret-bug, dass wir die queries so selber bauen
331
- # muessen - liegt am downcasen des qparsers ? - gucken ob jetzt mit
332
- # ferret geht (content_cols) und dave um zugriff auf qp bitten, oder
333
- # auf reader
334
- # TODO: slow on large result sets - fetches result set objects one-by-one
335
- def single_index_find_by_contents(q, options = {}, find_options = {})
336
- result = []
132
+ def ar_find_by_contents(q, options = {}, find_options = {})
133
+ result_ids = {}
134
+ total_hits = find_id_by_contents(q, options) do |model, id, score, data|
135
+ # stores ids, index of each id for later ordering of
136
+ # results, and score
137
+ result_ids[id] = [ result_ids.size + 1, score ]
138
+ end
337
139
 
338
- unless options[:models] == :all # search needs to be restricted by one or more class names
339
- options[:models] ||= []
340
- # add this class to the list of given models
341
- options[:models] << self unless options[:models].include?(self)
342
- # keep original query
343
- original_query = q
344
-
345
- # work around ferret bug in #process_query (doesn't ensure the
346
- # reader is open)
347
- ferret_index.synchronize do
348
- ferret_index.send(:ensure_reader_open)
349
- original_query = ferret_index.process_query(q)
350
- end if q.is_a? String
140
+ result = retrieve_records( { self.name => result_ids }, find_options )
141
+ # correct result size if the user specified conditions
142
+ total_hits = result.length if find_options[:conditions]
351
143
 
352
- q = Ferret::Search::BooleanQuery.new
353
- q.add_query(original_query, :must)
354
- model_query = Ferret::Search::BooleanQuery.new
355
- options[:models].each do |model|
356
- model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should)
357
- end
358
- q.add_query(model_query, :must)
359
- end
360
- #puts q.to_s
361
- total_hits = find_id_by_contents(q, options) do |model, id, score|
362
- o = Object.const_get(model).find(id, find_options.dup)
363
- o.ferret_score = score
364
- result << o
365
- end
366
- return SearchResults.new(result, total_hits)
367
- end
368
- protected :single_index_find_by_contents
144
+ # order results as they were found by ferret, unless an AR :order
145
+ # option was given
146
+ result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
369
147
 
370
- # return the total number of hits for the given query
371
- def total_hits(q, options={})
372
- ferret_index.search(q, options).total_hits
373
- end
148
+ [ total_hits, result ]
149
+ end
374
150
 
375
- # Finds instance model name, ids and scores by contents.
376
- # Useful if you want to search across models
377
- # Terms are ANDed by default, can be circumvented by using OR between terms.
378
- #
379
- # Example controller code (not tested):
380
- # def multi_search(query)
381
- # result = []
382
- # result << (Model1.find_id_by_contents query)
383
- # result << (Model2.find_id_by_contents query)
384
- # result << (Model3.find_id_by_contents query)
385
- # result.flatten!
386
- # result.sort! {|element| element[:score]}
387
- # # Figure out for yourself how to retreive and present the data from modelname and id
388
- # end
389
- #
390
- # Note that the scores retrieved this way aren't normalized across
391
- # indexes, so that the order of results after sorting by score will
392
- # differ from the order you would get when running the same query
393
- # on a single index containing all the data from Model1, Model2
394
- # and Model
395
- #
396
- # options are:
397
- #
398
- # first_doc:: first hit to retrieve (useful for paging)
399
- # num_docs:: number of hits to retrieve, or :all to retrieve all
400
- # results.
401
- #
402
- # a block can be given too, it will be executed with every result:
403
- # find_id_by_contents(q, options) do |model, id, score|
404
- # id_array << id
405
- # scores_by_id[id] = score
406
- # end
407
- # NOTE: in case a block is given, the total_hits value will be returned
408
- # instead of the result list!
409
- #
410
- def find_id_by_contents(q, options = {})
411
- deprecated_options_support(options)
151
+ def lazy_find_by_contents(q, options = {})
152
+ result = []
153
+ total_hits = find_id_by_contents(q, options) do |model, id, score, data|
154
+ result << FerretResult.new(model, id, score, data)
155
+ end
156
+ [ total_hits, result ]
157
+ end
412
158
 
413
- result = []
414
- index = self.ferret_index
415
- # puts "query: #{index.process_query q}"
416
- total_hits = index.search_each(q, options) do |hit, score|
417
- # only collect result data if we intend to return it
418
- doc = index[hit]
419
- model = configuration[:store_class_name] ? doc[:class_name] : self.name
420
- if block_given?
421
- yield model, doc[:id], score
422
- else
423
- result << { :model => model, :id => doc[:id], :score => score }
424
- end
425
- end
426
- logger.debug "id_score_model array: #{result.inspect}"
427
- return block_given? ? total_hits : result
428
- end
429
-
430
- # requires the store_class_name option of acts_as_ferret to be true
431
- # for all models queried this way.
432
- #
433
- # TODO: not optimal as each instance is fetched in a db call for it's
434
- # own.
435
- def multi_search(query, additional_models = [], options = {})
436
- result = []
437
- total_hits = id_multi_search(query, additional_models, options) do |model, id, score|
438
- r = Object.const_get(model).find(id)
439
- r.ferret_score = score
440
- result << r
441
- end
442
- SearchResults.new(result, total_hits)
443
- end
444
-
445
- # returns an array of hashes, each containing :class_name,
446
- # :id and :score for a hit.
447
- #
448
- # if a block is given, class_name, id and score of each hit will
449
- # be yielded, and the total number of hits is returned.
450
- #
451
- def id_multi_search(query, additional_models = [], options = {})
452
- deprecated_options_support(options)
453
- additional_models = [ additional_models ] unless additional_models.is_a? Array
454
- additional_models << self
455
- searcher = multi_index(additional_models)
456
- result = []
457
- total_hits = searcher.search_each(query, options) do |hit, score|
458
- doc = searcher[hit]
459
- if block_given?
460
- yield doc[:class_name], doc[:id], score
461
- else
462
- result << { :model => doc[:class_name], :id => doc[:id], :score => score }
463
- end
464
- end
465
- return block_given? ? total_hits : result
466
- end
467
-
468
- # returns a MultiIndex instance operating on a MultiReader
469
- def multi_index(model_classes)
470
- model_classes.sort! { |a, b| a.name <=> b.name }
471
- key = model_classes.inject("") { |s, clazz| s << clazz.name }
472
- multi_config = ferret_configuration.dup
473
- multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
474
- @@multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
475
- end
476
159
 
477
- private
160
+ def model_find(model, id, find_options = {})
161
+ model.constantize.find(id, find_options)
162
+ end
478
163
 
479
- def deprecated_options_support(options)
480
- if options[:num_docs]
481
- logger.warn ":num_docs is deprecated, use :limit instead!"
482
- options[:limit] ||= options[:num_docs]
483
- end
484
- if options[:first_doc]
485
- logger.warn ":first_doc is deprecated, use :offset instead!"
486
- options[:offset] ||= options[:first_doc]
487
- end
164
+ # retrieves search result records from a data structure like this:
165
+ # { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
166
+ #
167
+ # TODO: in case of STI AR will filter out hits from other
168
+ # classes for us, but this
169
+ # will lead to less results retrieved --> scoping of ferret query
170
+ # to self.class is still needed.
171
+ # from the ferret ML (thanks Curtis Hatter)
172
+ # > I created a method in my base STI class so I can scope my query. For scoping
173
+ # > I used something like the following line:
174
+ # >
175
+ # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
176
+ # >
177
+ # > Though you could make it more generic by simply asking
178
+ # > "self.descends_from_active_record?" which is how rails decides if it should
179
+ # > scope your "find" query for STI models. You can check out "base.rb" in
180
+ # > activerecord to see that.
181
+ # but maybe better do the scoping in find_id_by_contents...
182
+ def retrieve_records(id_arrays, find_options = {})
183
+ result = []
184
+ # get objects for each model
185
+ id_arrays.each do |model, id_array|
186
+ next if id_array.empty?
187
+ begin
188
+ model = model.constantize
189
+ # merge conditions
190
+ conditions = combine_conditions([ "#{model.table_name}.#{primary_key} in (?)", id_array.keys ],
191
+ find_options[:conditions])
192
+ # fetch
193
+ tmp_result = model.find(:all, find_options.merge(:conditions => conditions))
194
+ # set scores and rank
195
+ tmp_result.each do |record|
196
+ record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
197
+ end
198
+ # merge with result array
199
+ result.concat tmp_result
200
+ rescue TypeError
201
+ raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
488
202
  end
203
+ end
204
+ return result
205
+ end
489
206
 
490
- # combine our conditions with those given by user, if any
491
- def combine_conditions(conditions, *additional_conditions)
492
- if additional_conditions.any?
493
- cust_opts = additional_conditions.dup.flatten
494
- conditions.first << " and " << cust_opts.shift
495
- conditions.concat(cust_opts)
496
- end
497
- conditions
498
- end
207
+ def deprecated_options_support(options)
208
+ if options[:num_docs]
209
+ logger.warn ":num_docs is deprecated, use :limit instead!"
210
+ options[:limit] ||= options[:num_docs]
211
+ end
212
+ if options[:first_doc]
213
+ logger.warn ":first_doc is deprecated, use :offset instead!"
214
+ options[:offset] ||= options[:first_doc]
215
+ end
216
+ end
499
217
 
218
+ # combine our conditions with those given by user, if any
219
+ def combine_conditions(conditions, *additional_conditions)
220
+ returning conditions do
221
+ if additional_conditions.any?
222
+ cust_opts = additional_conditions.dup.flatten
223
+ conditions.first << " and " << cust_opts.shift
224
+ conditions.concat(cust_opts)
225
+ end
500
226
  end
501
-
502
227
  end
228
+
229
+ # creates a new Index::Index instance. Before that, a check is done
230
+ # to see if the index exists in the file system. If not, index rebuild
231
+ # from all model data retrieved by find(:all) is triggered.
232
+ def create_index_instance
233
+ if aaf_configuration[:remote]
234
+ RemoteIndex
235
+ elsif aaf_configuration[:single_index]
236
+ SharedIndex
237
+ else
238
+ LocalIndex
239
+ end.new(aaf_configuration)
240
+ end
241
+
503
242
  end
243
+
504
244
  end
505
245