acts_as_ferret 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,505 @@
1
+ module FerretMixin
2
+ module Acts #:nodoc:
3
+ module ARFerret #:nodoc:
4
+
5
+ # declare the class level helper methods
6
+ # which will load the relevant instance methods defined below when invoked
7
+ module ClassMethods
8
+
9
+ # helper that defines a method that adds the given field to a lucene
10
+ # document instance
11
+ def define_to_field_method(field, options = {})
12
+ options = {
13
+ :store => :no,
14
+ :highlight => :yes,
15
+ :index => :yes,
16
+ :term_vector => :with_positions_offsets,
17
+ :boost => 1.0 }.update(options)
18
+ fields_for_ferret[field] = options
19
+ define_method("#{field}_to_ferret".to_sym) do
20
+ begin
21
+ val = content_for_field_name(field)
22
+ rescue
23
+ logger.warn("Error retrieving value for field #{field}: #{$!}")
24
+ val = ''
25
+ end
26
+ logger.debug("Adding field #{field} with value '#{val}' to index")
27
+ val
28
+ end
29
+ end
30
+
31
+ def add_fields(field_config)
32
+ if field_config.respond_to?(:each_pair)
33
+ field_config.each_pair do |key,val|
34
+ define_to_field_method(key,val)
35
+ end
36
+ elsif field_config.respond_to?(:each)
37
+ field_config.each do |field|
38
+ define_to_field_method(field)
39
+ end
40
+ end
41
+ end
42
+
43
+ def reloadable?; false end
44
+
45
+ @@ferret_indexes = Hash.new
46
+ def ferret_indexes; @@ferret_indexes end
47
+
48
+ @@multi_indexes = Hash.new
49
+ def multi_indexes; @@multi_indexes end
50
+
51
+ # declares a class as ferret-searchable.
52
+ #
53
+ # options are:
54
+ #
55
+ # fields:: names all fields to include in the index. If not given,
56
+ # all attributes of the class will be indexed. You may also give
57
+ # symbols pointing to instance methods of your model here, i.e.
58
+ # to retrieve and index data from a related model.
59
+ #
60
+ # additional_fields:: names fields to include in the index, in addition
61
+ # to those derived from the db scheme. use if you want to add
62
+ # custom fields derived from methods to the db fields (which will be picked
63
+ # by aaf). This option will be ignored when the fields option is given, in
64
+ # that case additional fields get specified there.
65
+ #
66
+ # index_dir:: declares the directory where to put the index for this class.
67
+ # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
68
+ # The index directory will be created if it doesn't exist.
69
+ #
70
+ # single_index:: set this to true to let this class use a Ferret
71
+ # index that is shared by all classes having :single_index set to true.
72
+ # :store_class_name is set to true implicitly, as well as index_dir, so
73
+ # don't bother setting these when using this option. the shared index
74
+ # will be located in index/<RAILS_ENV>/shared .
75
+ #
76
+ # store_class_name:: to make search across multiple models useful, set
77
+ # this to true. the model class name will be stored in a keyword field
78
+ # named class_name
79
+ #
80
+ # ferret_options may be:
81
+ # or_default:: - whether query terms are required by
82
+ # default (the default, false), or not (true)
83
+ #
84
+ # analyzer:: the analyzer to use for query parsing (default: nil,
85
+ # wihch means the ferret StandardAnalyzer gets used)
86
+ #
87
+ def acts_as_ferret(options={}, ferret_options={})
88
+ configuration = {
89
+ :index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}",
90
+ :store_class_name => false,
91
+ :single_index => false,
92
+ }
93
+ ferret_configuration = {
94
+ :or_default => false,
95
+ :handle_parse_errors => true,
96
+ :default_field => '*'
97
+ #:max_clauses => 512,
98
+ #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
99
+ # :wild_card_downcase => true
100
+ }
101
+ configuration.update(options) if options.is_a?(Hash)
102
+
103
+ # apply appropriate settings for shared index
104
+ if configuration[:single_index]
105
+ configuration[:index_dir] = "#{FerretMixin::Acts::ARFerret::index_dir}/shared"
106
+ configuration[:store_class_name] = true
107
+ end
108
+ ferret_configuration.update(ferret_options) if ferret_options.is_a?(Hash)
109
+ # these properties are somewhat vital to the plugin and shouldn't
110
+ # be overwritten by the user:
111
+ ferret_configuration.update(
112
+
113
+ :key => (configuration[:single_index] ? [:id, :class_name] : :id),
114
+ :path => configuration[:index_dir],
115
+ :auto_flush => true,
116
+ :create_if_missing => true
117
+ )
118
+
119
+ class_eval <<-EOV
120
+ include FerretMixin::Acts::ARFerret::InstanceMethods
121
+
122
+
123
+ after_create :ferret_create
124
+ after_update :ferret_update
125
+ after_destroy :ferret_destroy
126
+
127
+ cattr_accessor :fields_for_ferret
128
+ cattr_accessor :configuration
129
+ cattr_accessor :ferret_configuration
130
+
131
+ @@fields_for_ferret = Hash.new
132
+ @@configuration = configuration
133
+ @@ferret_configuration = ferret_configuration
134
+
135
+ if configuration[:fields]
136
+ add_fields(configuration[:fields])
137
+ else
138
+ add_fields(self.new.attributes.keys.map { |k| k.to_sym })
139
+ add_fields(configuration[:additional_fields])
140
+ end
141
+
142
+ EOV
143
+ FerretMixin::Acts::ARFerret::ensure_directory configuration[:index_dir]
144
+
145
+ # now that all fields have been added, we can initialize the default
146
+ # field list to be used by the query parser.
147
+ # It will include all content fields *not* marked as :untokenized.
148
+ # This fixes the otherwise failing CommentTest#test_stopwords
149
+ ferret_configuration[:default_field] = fields_for_ferret.keys.select do |f|
150
+ fields_for_ferret[f][:index] != :untokenized
151
+ end
152
+ logger.debug "set default field list to #{ferret_configuration[:default_field].inspect}"
153
+ end
154
+
155
+ def class_index_dir
156
+ configuration[:index_dir]
157
+ end
158
+
159
+ # rebuild the index from all data stored for this model.
160
+ # This is called automatically when no index exists yet.
161
+ #
162
+ # TODO: the automatic index initialization only works if
163
+ # every model class has it's
164
+ # own index, otherwise the index will get populated only
165
+ # with instances from the first model loaded
166
+ #
167
+ # When calling this method manually, you can give any additional
168
+ # model classes that should also go into this index as parameters.
169
+ # Useful when using the :single_index option.
170
+ # Note that attributes named the same in different models will share
171
+ # the same field options in the shared index.
172
+ def rebuild_index(*models)
173
+ models << self
174
+ # default attributes for fields
175
+ fi = Ferret::Index::FieldInfos.new(:store => :no,
176
+ :index => :yes,
177
+ :term_vector => :no,
178
+ :boost => 1.0)
179
+ # primary key
180
+ fi.add_field(:id, :store => :yes, :index => :untokenized)
181
+ # class_name
182
+ if configuration[:store_class_name]
183
+ fi.add_field(:class_name, :store => :yes, :index => :untokenized)
184
+ end
185
+ # collect field options from all models
186
+ fields = {}
187
+ models.each do |model|
188
+ fields.update(model.fields_for_ferret)
189
+ end
190
+ logger.debug("class #{self.name}: fields for index: #{fields.keys.join(',')}")
191
+ fields.each_pair do |field, options|
192
+ fi.add_field(field, { :store => :no,
193
+ :index => :yes }.update(options))
194
+ end
195
+ fi.create_index(ferret_configuration[:path])
196
+
197
+ index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false))
198
+ #index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => true))
199
+ batch_size = 1000
200
+ models.each do |model|
201
+ # index in batches of 1000 to limit memory consumption (fixes #24)
202
+ model.transaction do
203
+ 0.step(model.count, batch_size) do |i|
204
+ model.find(:all, :limit => batch_size, :offset => i).each do |rec|
205
+ index << rec.to_doc
206
+ end
207
+ end
208
+ end
209
+ end
210
+ logger.debug("Created Ferret index in: #{class_index_dir}")
211
+ index.flush
212
+ index.optimize
213
+ index.close
214
+ # close combined index readers, just in case
215
+ # this seems to fix a strange test failure that seems to relate to a
216
+ # multi_index looking at an old version of the content_base index.
217
+ @@multi_indexes.each_pair do |key, index|
218
+ # puts "#{key} -- #{self.name}"
219
+ # TODO only close those where necessary (watch inheritance, where
220
+ # self.name is base class of a class where key is made from)
221
+ index.close #if key =~ /#{self.name}/
222
+ end
223
+ @@multi_indexes = Hash.new
224
+ end
225
+
226
+ # Retrieve the Ferret::Index::Index instance for this model class.
227
+ #
228
+ # Index instances are stored in a hash, using the index directory
229
+ # as the key. So model classes sharing a single index will share their
230
+ # Index object, too.
231
+ def ferret_index
232
+ ferret_indexes[class_index_dir] ||= create_index_instance
233
+ end
234
+
235
+ # creates a new Index::Index instance. Before that, a check is done
236
+ # to see if the index exists in the file system. If not, index rebuild
237
+ # from all model data retrieved by find(:all) is triggered.
238
+ def create_index_instance
239
+ rebuild_index unless File.file? "#{class_index_dir}/segments"
240
+ Ferret::Index::Index.new(ferret_configuration)
241
+ end
242
+
243
+ # Finds instances by contents. Terms are ANDed by default, can be circumvented
244
+ # by using OR between terms.
245
+ # options:
246
+ # offset:: first hit to retrieve (useful for paging)
247
+ # limit:: number of hits to retrieve, or :all to retrieve
248
+ # all results
249
+ # models:: only for single_index scenarios: a list of other Model classes to
250
+ # include in this search.
251
+ #
252
+ # find_options is a hash passed on to active_record's find when
253
+ # retrieving the data from db, useful to i.e. prefetch relationships.
254
+ #
255
+ # this method returns a SearchResults instance, which really is an Array that has
256
+ # been decorated with a total_hits accessor that delivers the total
257
+ # number of hits (including those not fetched because of a low num_docs
258
+ # value).
259
+ # Please keep in mind that the number of total hits might be wrong if you specify
260
+ # both ferret options and active record find_options that somehow limit the result
261
+ # set (e.g. :num_docs and some :conditions).
262
+ def find_by_contents(q, options = {}, find_options = {})
263
+ # handle shared index
264
+ return single_index_find_by_contents(q, options, find_options) if configuration[:single_index]
265
+ results = {}
266
+ total_hits = find_id_by_contents(q, options) do |model, id, score|
267
+ # stores ids, index of each id for later ordering of
268
+ # results, and score
269
+ results[id] = [ results.size + 1, score ]
270
+ end
271
+ result = []
272
+ begin
273
+ # TODO: in case of STI AR will filter out hits from other
274
+ # classes for us, but this
275
+ # will lead to less results retrieved --> scoping of ferret query
276
+ # to self.class is still needed.
277
+ # from the ferret ML (thanks Curtis Hatter)
278
+ # > I created a method in my base STI class so I can scope my query. For scoping
279
+ # > I used something like the following line:
280
+ # >
281
+ # > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
282
+ # >
283
+ # > Though you could make it more generic by simply asking
284
+ # > "self.descends_from_active_record?" which is how rails decides if it should
285
+ # > scope your "find" query for STI models. You can check out "base.rb" in
286
+ # > activerecord to see that.
287
+ # but maybe better do the scoping in find_id_by_contents...
288
+ if results.any?
289
+ conditions = combine_conditions([ "#{table_name}.#{primary_key} in (?)", results.keys ],
290
+ find_options[:conditions])
291
+ result = self.find(:all,
292
+ find_options.merge(:conditions => conditions))
293
+ # correct result size if the user specified conditions
294
+ total_hits = result.length if find_options[:conditions]
295
+ end
296
+ rescue ActiveRecord::RecordNotFound
297
+ logger.warn "REBUILD YOUR INDEX! One of the id's in the index didn't have an associated record"
298
+ end
299
+
300
+ # order results as they were found by ferret, unless an AR :order
301
+ # option was given
302
+ unless find_options[:order]
303
+ result.sort! { |a, b| results[a.id.to_s].first <=> results[b.id.to_s].first }
304
+ end
305
+ # set scores
306
+ result.each { |r| r.ferret_score = results[r.id.to_s].last }
307
+
308
+ logger.debug "Query: #{q}\nResult ids: #{results.keys.inspect},\nresult: #{result}"
309
+ return SearchResults.new(result, total_hits)
310
+ end
311
+
312
+ # determine all field names in the shared index
313
+ def single_index_field_names(models)
314
+ @single_index_field_names ||= (
315
+ searcher = Ferret::Search::Searcher.new(class_index_dir)
316
+ if searcher.reader.respond_to?(:get_field_names)
317
+ (searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a
318
+ else
319
+ puts <<-END
320
+ unable to retrieve field names for class #{self.name}, please
321
+ consider naming all indexed fields in your call to acts_as_ferret!
322
+ END
323
+ models.map { |m| m.content_columns.map { |col| col.name } }.flatten
324
+ end
325
+ )
326
+
327
+ end
328
+
329
+
330
+ # weiter: checken ob ferret-bug, dass wir die queries so selber bauen
331
+ # muessen - liegt am downcasen des qparsers ? - gucken ob jetzt mit
332
+ # ferret geht (content_cols) und dave um zugriff auf qp bitten, oder
333
+ # auf reader
334
+ # TODO: slow on large result sets - fetches result set objects one-by-one
335
+ def single_index_find_by_contents(q, options = {}, find_options = {})
336
+ result = []
337
+
338
+ unless options[:models] == :all # search needs to be restricted by one or more class names
339
+ options[:models] ||= []
340
+ # add this class to the list of given models
341
+ options[:models] << self unless options[:models].include?(self)
342
+ # keep original query
343
+ original_query = q
344
+
345
+ # work around ferret bug in #process_query (doesn't ensure the
346
+ # reader is open)
347
+ ferret_index.synchronize do
348
+ ferret_index.send(:ensure_reader_open)
349
+ original_query = ferret_index.process_query(q)
350
+ end if q.is_a? String
351
+
352
+ q = Ferret::Search::BooleanQuery.new
353
+ q.add_query(original_query, :must)
354
+ model_query = Ferret::Search::BooleanQuery.new
355
+ options[:models].each do |model|
356
+ model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should)
357
+ end
358
+ q.add_query(model_query, :must)
359
+ end
360
+ #puts q.to_s
361
+ total_hits = find_id_by_contents(q, options) do |model, id, score|
362
+ o = Object.const_get(model).find(id, find_options.dup)
363
+ o.ferret_score = score
364
+ result << o
365
+ end
366
+ return SearchResults.new(result, total_hits)
367
+ end
368
+ protected :single_index_find_by_contents
369
+
370
+ # return the total number of hits for the given query
371
+ def total_hits(q, options={})
372
+ ferret_index.search(q, options).total_hits
373
+ end
374
+
375
+ # Finds instance model name, ids and scores by contents.
376
+ # Useful if you want to search across models
377
+ # Terms are ANDed by default, can be circumvented by using OR between terms.
378
+ #
379
+ # Example controller code (not tested):
380
+ # def multi_search(query)
381
+ # result = []
382
+ # result << (Model1.find_id_by_contents query)
383
+ # result << (Model2.find_id_by_contents query)
384
+ # result << (Model3.find_id_by_contents query)
385
+ # result.flatten!
386
+ # result.sort! {|element| element[:score]}
387
+ # # Figure out for yourself how to retreive and present the data from modelname and id
388
+ # end
389
+ #
390
+ # Note that the scores retrieved this way aren't normalized across
391
+ # indexes, so that the order of results after sorting by score will
392
+ # differ from the order you would get when running the same query
393
+ # on a single index containing all the data from Model1, Model2
394
+ # and Model
395
+ #
396
+ # options are:
397
+ #
398
+ # first_doc:: first hit to retrieve (useful for paging)
399
+ # num_docs:: number of hits to retrieve, or :all to retrieve all
400
+ # results.
401
+ #
402
+ # a block can be given too, it will be executed with every result:
403
+ # find_id_by_contents(q, options) do |model, id, score|
404
+ # id_array << id
405
+ # scores_by_id[id] = score
406
+ # end
407
+ # NOTE: in case a block is given, the total_hits value will be returned
408
+ # instead of the result list!
409
+ #
410
+ def find_id_by_contents(q, options = {})
411
+ deprecated_options_support(options)
412
+
413
+ result = []
414
+ index = self.ferret_index
415
+ # puts "query: #{index.process_query q}"
416
+ total_hits = index.search_each(q, options) do |hit, score|
417
+ # only collect result data if we intend to return it
418
+ doc = index[hit]
419
+ model = configuration[:store_class_name] ? doc[:class_name] : self.name
420
+ if block_given?
421
+ yield model, doc[:id], score
422
+ else
423
+ result << { :model => model, :id => doc[:id], :score => score }
424
+ end
425
+ end
426
+ logger.debug "id_score_model array: #{result.inspect}"
427
+ return block_given? ? total_hits : result
428
+ end
429
+
430
+ # requires the store_class_name option of acts_as_ferret to be true
431
+ # for all models queried this way.
432
+ #
433
+ # TODO: not optimal as each instance is fetched in a db call for it's
434
+ # own.
435
+ def multi_search(query, additional_models = [], options = {})
436
+ result = []
437
+ total_hits = id_multi_search(query, additional_models, options) do |model, id, score|
438
+ r = Object.const_get(model).find(id)
439
+ r.ferret_score = score
440
+ result << r
441
+ end
442
+ SearchResults.new(result, total_hits)
443
+ end
444
+
445
+ # returns an array of hashes, each containing :class_name,
446
+ # :id and :score for a hit.
447
+ #
448
+ # if a block is given, class_name, id and score of each hit will
449
+ # be yielded, and the total number of hits is returned.
450
+ #
451
+ def id_multi_search(query, additional_models = [], options = {})
452
+ deprecated_options_support(options)
453
+ additional_models = [ additional_models ] unless additional_models.is_a? Array
454
+ additional_models << self
455
+ searcher = multi_index(additional_models)
456
+ result = []
457
+ total_hits = searcher.search_each(query, options) do |hit, score|
458
+ doc = searcher[hit]
459
+ if block_given?
460
+ yield doc[:class_name], doc[:id], score
461
+ else
462
+ result << { :model => doc[:class_name], :id => doc[:id], :score => score }
463
+ end
464
+ end
465
+ return block_given? ? total_hits : result
466
+ end
467
+
468
+ # returns a MultiIndex instance operating on a MultiReader
469
+ def multi_index(model_classes)
470
+ model_classes.sort! { |a, b| a.name <=> b.name }
471
+ key = model_classes.inject("") { |s, clazz| s << clazz.name }
472
+ multi_config = ferret_configuration.dup
473
+ multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
474
+ @@multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
475
+ end
476
+
477
+ private
478
+
479
+ def deprecated_options_support(options)
480
+ if options[:num_docs]
481
+ logger.warn ":num_docs is deprecated, use :limit instead!"
482
+ options[:limit] ||= options[:num_docs]
483
+ end
484
+ if options[:first_doc]
485
+ logger.warn ":first_doc is deprecated, use :offset instead!"
486
+ options[:offset] ||= options[:first_doc]
487
+ end
488
+ end
489
+
490
+ # combine our conditions with those given by user, if any
491
+ def combine_conditions(conditions, *additional_conditions)
492
+ if additional_conditions.any?
493
+ cust_opts = additional_conditions.dup.flatten
494
+ conditions.first << " and " << cust_opts.shift
495
+ conditions.concat(cust_opts)
496
+ end
497
+ conditions
498
+ end
499
+
500
+ end
501
+
502
+ end
503
+ end
504
+ end
505
+
@@ -0,0 +1,157 @@
1
+ module FerretMixin
2
+ module Acts #:nodoc:
3
+ module ARFerret #:nodoc:
4
+
5
+ module InstanceMethods
6
+ include MoreLikeThis
7
+
8
+ # Returns an array of strings with the matches highlighted. The +query+ can
9
+ # either a query String or a Ferret::Search::Query object.
10
+ #
11
+ # === Options
12
+ #
13
+ # field:: field to take the content from. This field has
14
+ # to have it's content stored in the index
15
+ # (:store => :yes in your call to aaf). If not
16
+ # given, all stored fields are searched, and the
17
+ # highlighted content found in all of them is returned.
18
+ # set :highlight => :no in the field options to
19
+ # avoid highlighting of contents from a :stored field.
20
+ # excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
21
+ # terms will be in the centre of the excerpt.
22
+ # num_excerpts:: Default: 2. Number of excerpts to return.
23
+ # pre_tag:: Default: "<em>". Tag to place to the left of the
24
+ # match.
25
+ # post_tag:: Default: "</em>". This tag should close the
26
+ # +:pre_tag+.
27
+ # ellipsis:: Default: "...". This is the string that is appended
28
+ # at the beginning and end of excerpts (unless the
29
+ # excerpt hits the start or end of the field. You'll
30
+ # probably want to change this so a Unicode elipsis
31
+ # character.
32
+ def highlight(query, options = {})
33
+ options = { :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>' }.update(options)
34
+ i = self.class.ferret_index
35
+ highlights = []
36
+ i.synchronize do
37
+ doc_num = self.document_number
38
+ if options[:field]
39
+ highlights << i.highlight(query, doc_num, options)
40
+ else
41
+ query = i.process_query(query) # process only once
42
+ fields_for_ferret.each_pair do |field, config|
43
+ next if config[:store] == :no || config[:highlight] == :no
44
+ options[:field] = field
45
+ highlights << i.highlight(query, doc_num, options)
46
+ end
47
+ end
48
+ end
49
+ return highlights.compact.flatten[0..options[:num_excerpts]-1]
50
+ end
51
+
52
+ # re-eneable ferret indexing after a call to #disable_ferret
53
+ def ferret_enable; @ferret_disabled = nil end
54
+
55
+ # returns true if ferret indexing is enabled
56
+ def ferret_enabled?; @ferret_disabled.nil? end
57
+
58
+ # Disable Ferret for a specified amount of time. ::once will disable
59
+ # Ferret for the next call to #save (this is the default), ::always will
60
+ # do so for all subsequent calls.
61
+ # To manually trigger reindexing of a record, you can call #ferret_update
62
+ # directly.
63
+ #
64
+ # When given a block, this will be executed without any ferret indexing of
65
+ # this object taking place. The optional argument in this case can be used
66
+ # to indicate if the object should be indexed after executing the block
67
+ # (::index_when_finished). Automatic Ferret indexing of this object will be
68
+ # turned on after the block has been executed. If passed ::index_when_true,
69
+ # the index will only be updated if the block evaluated not to false or nil.
70
+ def disable_ferret(option = :once)
71
+ if block_given?
72
+ @ferret_disabled = :always
73
+ result = yield
74
+ ferret_enable
75
+ ferret_update if option == :index_when_finished || (option == :index_when_true && result)
76
+ result
77
+ elsif [:once, :always].include?(option)
78
+ @ferret_disabled = option
79
+ else
80
+ raise ArgumentError.new("Invalid Argument #{option}")
81
+ end
82
+ end
83
+
84
+ # add to index
85
+ def ferret_create
86
+ if ferret_enabled?
87
+ logger.debug "ferret_create/update: #{self.class.name} : #{self.id}"
88
+ self.class.ferret_index << self.to_doc
89
+ else
90
+ ferret_enable if @ferret_disabled == :once
91
+ end
92
+ true # signal success to AR
93
+ end
94
+ alias :ferret_update :ferret_create
95
+
96
+
97
+ # remove from index
98
+ def ferret_destroy
99
+ logger.debug "ferret_destroy: #{self.class.name} : #{self.id}"
100
+ begin
101
+ self.class.ferret_index.query_delete(query_for_self)
102
+ rescue
103
+ logger.warn("Could not find indexed value for this object: #{$!}")
104
+ end
105
+ true # signal success to AR
106
+ end
107
+
108
+ # convert instance to ferret document
109
+ def to_doc
110
+ logger.debug "creating doc for class: #{self.class.name}, id: #{self.id}"
111
+ # Churn through the complete Active Record and add it to the Ferret document
112
+ doc = Ferret::Document.new
113
+ # store the id of each item
114
+ doc[:id] = self.id
115
+
116
+ # store the class name if configured to do so
117
+ if configuration[:store_class_name]
118
+ doc[:class_name] = self.class.name
119
+ end
120
+
121
+ # iterate through the fields and add them to the document
122
+ fields_for_ferret.each_pair do |field, config|
123
+ doc[field] = self.send("#{field}_to_ferret") unless config[:ignore]
124
+ end
125
+ return doc
126
+ end
127
+
128
+ # returns the ferret document number this record has.
129
+ def document_number
130
+ hits = self.class.ferret_index.search(query_for_self)
131
+ return hits.hits.first.doc if hits.total_hits == 1
132
+ raise "cannot determine document number from primary key: #{self}"
133
+ end
134
+
135
+ # holds the score this record had when it was found via
136
+ # acts_as_ferret
137
+ attr_accessor :ferret_score
138
+
139
+ protected
140
+
141
+ # build a ferret query matching only this record
142
+ def query_for_self
143
+ query = Ferret::Search::TermQuery.new(:id, self.id.to_s)
144
+ if self.class.configuration[:single_index]
145
+ bq = Ferret::Search::BooleanQuery.new
146
+ bq.add_query(query, :must)
147
+ bq.add_query(Ferret::Search::TermQuery.new(:class_name, self.class.name), :must)
148
+ return bq
149
+ end
150
+ return query
151
+ end
152
+
153
+ end
154
+
155
+ end
156
+ end
157
+ end