acts_as_ferret 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +29 -6
- data/config/ferret_server.yml +12 -0
- data/install.rb +19 -0
- data/lib/act_methods.rb +194 -0
- data/lib/acts_as_ferret.rb +74 -52
- data/lib/class_methods.rb +222 -482
- data/lib/ferret_result.rb +36 -0
- data/lib/ferret_server.rb +89 -0
- data/lib/index.rb +31 -0
- data/lib/instance_methods.rb +112 -143
- data/lib/local_index.rb +257 -0
- data/lib/more_like_this.rb +47 -41
- data/lib/multi_index.rb +8 -11
- data/lib/remote_index.rb +50 -0
- data/lib/shared_index.rb +14 -0
- data/lib/shared_index_class_methods.rb +90 -0
- data/rakefile +88 -147
- data/script/ferret_server +18 -0
- data/script/ferret_start +67 -0
- data/script/ferret_stop +22 -0
- metadata +23 -11
- data/.init.rb.swp +0 -0
- data/.rakefile.swp +0 -0
- data/lib/.acts_as_ferret.rb.swp +0 -0
- data/lib/.class_methods.rb.swo +0 -0
- data/lib/.class_methods.rb.swp +0 -0
data/lib/class_methods.rb
CHANGED
@@ -1,505 +1,245 @@
|
|
1
|
-
module
|
2
|
-
module Acts #:nodoc:
|
3
|
-
module ARFerret #:nodoc:
|
4
|
-
|
5
|
-
# declare the class level helper methods
|
6
|
-
# which will load the relevant instance methods defined below when invoked
|
7
|
-
module ClassMethods
|
1
|
+
module ActsAsFerret
|
8
2
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
# index that is shared by all classes having :single_index set to true.
|
72
|
-
# :store_class_name is set to true implicitly, as well as index_dir, so
|
73
|
-
# don't bother setting these when using this option. the shared index
|
74
|
-
# will be located in index/<RAILS_ENV>/shared .
|
75
|
-
#
|
76
|
-
# store_class_name:: to make search across multiple models useful, set
|
77
|
-
# this to true. the model class name will be stored in a keyword field
|
78
|
-
# named class_name
|
79
|
-
#
|
80
|
-
# ferret_options may be:
|
81
|
-
# or_default:: - whether query terms are required by
|
82
|
-
# default (the default, false), or not (true)
|
83
|
-
#
|
84
|
-
# analyzer:: the analyzer to use for query parsing (default: nil,
|
85
|
-
# wihch means the ferret StandardAnalyzer gets used)
|
86
|
-
#
|
87
|
-
def acts_as_ferret(options={}, ferret_options={})
|
88
|
-
configuration = {
|
89
|
-
:index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}",
|
90
|
-
:store_class_name => false,
|
91
|
-
:single_index => false,
|
92
|
-
}
|
93
|
-
ferret_configuration = {
|
94
|
-
:or_default => false,
|
95
|
-
:handle_parse_errors => true,
|
96
|
-
:default_field => '*'
|
97
|
-
#:max_clauses => 512,
|
98
|
-
#:analyzer => Ferret::Analysis::StandardAnalyzer.new,
|
99
|
-
# :wild_card_downcase => true
|
100
|
-
}
|
101
|
-
configuration.update(options) if options.is_a?(Hash)
|
102
|
-
|
103
|
-
# apply appropriate settings for shared index
|
104
|
-
if configuration[:single_index]
|
105
|
-
configuration[:index_dir] = "#{FerretMixin::Acts::ARFerret::index_dir}/shared"
|
106
|
-
configuration[:store_class_name] = true
|
107
|
-
end
|
108
|
-
ferret_configuration.update(ferret_options) if ferret_options.is_a?(Hash)
|
109
|
-
# these properties are somewhat vital to the plugin and shouldn't
|
110
|
-
# be overwritten by the user:
|
111
|
-
ferret_configuration.update(
|
112
|
-
|
113
|
-
:key => (configuration[:single_index] ? [:id, :class_name] : :id),
|
114
|
-
:path => configuration[:index_dir],
|
115
|
-
:auto_flush => true,
|
116
|
-
:create_if_missing => true
|
117
|
-
)
|
118
|
-
|
119
|
-
class_eval <<-EOV
|
120
|
-
include FerretMixin::Acts::ARFerret::InstanceMethods
|
121
|
-
|
122
|
-
|
123
|
-
after_create :ferret_create
|
124
|
-
after_update :ferret_update
|
125
|
-
after_destroy :ferret_destroy
|
126
|
-
|
127
|
-
cattr_accessor :fields_for_ferret
|
128
|
-
cattr_accessor :configuration
|
129
|
-
cattr_accessor :ferret_configuration
|
130
|
-
|
131
|
-
@@fields_for_ferret = Hash.new
|
132
|
-
@@configuration = configuration
|
133
|
-
@@ferret_configuration = ferret_configuration
|
134
|
-
|
135
|
-
if configuration[:fields]
|
136
|
-
add_fields(configuration[:fields])
|
137
|
-
else
|
138
|
-
add_fields(self.new.attributes.keys.map { |k| k.to_sym })
|
139
|
-
add_fields(configuration[:additional_fields])
|
140
|
-
end
|
141
|
-
|
142
|
-
EOV
|
143
|
-
FerretMixin::Acts::ARFerret::ensure_directory configuration[:index_dir]
|
3
|
+
module ClassMethods
|
4
|
+
|
5
|
+
# rebuild the index from all data stored for this model.
|
6
|
+
# This is called automatically when no index exists yet.
|
7
|
+
#
|
8
|
+
# When calling this method manually, you can give any additional
|
9
|
+
# model classes that should also go into this index as parameters.
|
10
|
+
# Useful when using the :single_index option.
|
11
|
+
# Note that attributes named the same in different models will share
|
12
|
+
# the same field options in the shared index.
|
13
|
+
def rebuild_index(*models)
|
14
|
+
models << self unless models.include?(self)
|
15
|
+
aaf_index.rebuild_index(models.map(&:to_s))
|
16
|
+
end
|
17
|
+
|
18
|
+
# Retrieve the index instance for this model class. This can either be a
|
19
|
+
# LocalIndex, or a RemoteIndex instance.
|
20
|
+
#
|
21
|
+
# Index instances are stored in a hash, using the index directory
|
22
|
+
# as the key. So model classes sharing a single index will share their
|
23
|
+
# Index object, too.
|
24
|
+
def aaf_index
|
25
|
+
ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
|
26
|
+
end
|
27
|
+
|
28
|
+
# Finds instances by contents. Terms are ANDed by default, can be circumvented
|
29
|
+
# by using OR between terms.
|
30
|
+
# options:
|
31
|
+
# offset:: first hit to retrieve (useful for paging)
|
32
|
+
# limit:: number of hits to retrieve, or :all to retrieve
|
33
|
+
# all results
|
34
|
+
# lazy:: Array of field names whose contents should be read directly
|
35
|
+
# from the index. Those fields have to be marked
|
36
|
+
# :store => :yes in their field options. Give true to get all
|
37
|
+
# stored fields (if you have a shared index, you have to
|
38
|
+
# explicitly state the fields you want to fetch, true won't
|
39
|
+
# work)
|
40
|
+
# models:: only for single_index scenarios: an Array of other Model classes to
|
41
|
+
# include in this search. Use :all to query all models.
|
42
|
+
#
|
43
|
+
# find_options is a hash passed on to active_record's find when
|
44
|
+
# retrieving the data from db, useful to i.e. prefetch relationships.
|
45
|
+
#
|
46
|
+
# this method returns a SearchResults instance, which really is an Array that has
|
47
|
+
# been decorated with a total_hits accessor that delivers the total
|
48
|
+
# number of hits (including those not fetched because of a low num_docs
|
49
|
+
# value).
|
50
|
+
# Please keep in mind that the number of total hits might be wrong if you specify
|
51
|
+
# both ferret options and active record find_options that somehow limit the result
|
52
|
+
# set (e.g. :num_docs and some :conditions).
|
53
|
+
def find_by_contents(q, options = {}, find_options = {})
|
54
|
+
total_hits, result = find_records_lazy_or_not q, options, find_options
|
55
|
+
logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
|
56
|
+
return SearchResults.new(result, total_hits)
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
# return the total number of hits for the given query
|
62
|
+
def total_hits(q, options={})
|
63
|
+
aaf_index.total_hits(q, options)
|
64
|
+
end
|
144
65
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
66
|
+
# Finds instance model name, ids and scores by contents.
|
67
|
+
# Useful e.g. if you want to search across models or do not want to fetch
|
68
|
+
# all result records (yet).
|
69
|
+
#
|
70
|
+
# Options are the same as for find_by_contents
|
71
|
+
#
|
72
|
+
# A block can be given too, it will be executed with every result:
|
73
|
+
# find_id_by_contents(q, options) do |model, id, score|
|
74
|
+
# id_array << id
|
75
|
+
# scores_by_id[id] = score
|
76
|
+
# end
|
77
|
+
# NOTE: in case a block is given, only the total_hits value will be returned
|
78
|
+
# instead of the [total_hits, results] array!
|
79
|
+
#
|
80
|
+
def find_id_by_contents(q, options = {}, &block)
|
81
|
+
deprecated_options_support(options)
|
82
|
+
aaf_index.find_id_by_contents(q, options, &block)
|
83
|
+
end
|
84
|
+
|
85
|
+
# requires the store_class_name option of acts_as_ferret to be true
|
86
|
+
# for all models queried this way.
|
87
|
+
def multi_search(query, additional_models = [], options = {}, find_options = {})
|
88
|
+
result = []
|
89
|
+
|
90
|
+
if options[:lazy]
|
91
|
+
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
92
|
+
total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
|
93
|
+
result << FerretResult.new(model, id, score, data)
|
157
94
|
end
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
# own index, otherwise the index will get populated only
|
165
|
-
# with instances from the first model loaded
|
166
|
-
#
|
167
|
-
# When calling this method manually, you can give any additional
|
168
|
-
# model classes that should also go into this index as parameters.
|
169
|
-
# Useful when using the :single_index option.
|
170
|
-
# Note that attributes named the same in different models will share
|
171
|
-
# the same field options in the shared index.
|
172
|
-
def rebuild_index(*models)
|
173
|
-
models << self
|
174
|
-
# default attributes for fields
|
175
|
-
fi = Ferret::Index::FieldInfos.new(:store => :no,
|
176
|
-
:index => :yes,
|
177
|
-
:term_vector => :no,
|
178
|
-
:boost => 1.0)
|
179
|
-
# primary key
|
180
|
-
fi.add_field(:id, :store => :yes, :index => :untokenized)
|
181
|
-
# class_name
|
182
|
-
if configuration[:store_class_name]
|
183
|
-
fi.add_field(:class_name, :store => :yes, :index => :untokenized)
|
184
|
-
end
|
185
|
-
# collect field options from all models
|
186
|
-
fields = {}
|
187
|
-
models.each do |model|
|
188
|
-
fields.update(model.fields_for_ferret)
|
189
|
-
end
|
190
|
-
logger.debug("class #{self.name}: fields for index: #{fields.keys.join(',')}")
|
191
|
-
fields.each_pair do |field, options|
|
192
|
-
fi.add_field(field, { :store => :no,
|
193
|
-
:index => :yes }.update(options))
|
194
|
-
end
|
195
|
-
fi.create_index(ferret_configuration[:path])
|
196
|
-
|
197
|
-
index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false))
|
198
|
-
#index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => true))
|
199
|
-
batch_size = 1000
|
200
|
-
models.each do |model|
|
201
|
-
# index in batches of 1000 to limit memory consumption (fixes #24)
|
202
|
-
model.transaction do
|
203
|
-
0.step(model.count, batch_size) do |i|
|
204
|
-
model.find(:all, :limit => batch_size, :offset => i).each do |rec|
|
205
|
-
index << rec.to_doc
|
206
|
-
end
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|
210
|
-
logger.debug("Created Ferret index in: #{class_index_dir}")
|
211
|
-
index.flush
|
212
|
-
index.optimize
|
213
|
-
index.close
|
214
|
-
# close combined index readers, just in case
|
215
|
-
# this seems to fix a strange test failure that seems to relate to a
|
216
|
-
# multi_index looking at an old version of the content_base index.
|
217
|
-
@@multi_indexes.each_pair do |key, index|
|
218
|
-
# puts "#{key} -- #{self.name}"
|
219
|
-
# TODO only close those where necessary (watch inheritance, where
|
220
|
-
# self.name is base class of a class where key is made from)
|
221
|
-
index.close #if key =~ /#{self.name}/
|
222
|
-
end
|
223
|
-
@@multi_indexes = Hash.new
|
224
|
-
end
|
225
|
-
|
226
|
-
# Retrieve the Ferret::Index::Index instance for this model class.
|
227
|
-
#
|
228
|
-
# Index instances are stored in a hash, using the index directory
|
229
|
-
# as the key. So model classes sharing a single index will share their
|
230
|
-
# Index object, too.
|
231
|
-
def ferret_index
|
232
|
-
ferret_indexes[class_index_dir] ||= create_index_instance
|
233
|
-
end
|
234
|
-
|
235
|
-
# creates a new Index::Index instance. Before that, a check is done
|
236
|
-
# to see if the index exists in the file system. If not, index rebuild
|
237
|
-
# from all model data retrieved by find(:all) is triggered.
|
238
|
-
def create_index_instance
|
239
|
-
rebuild_index unless File.file? "#{class_index_dir}/segments"
|
240
|
-
Ferret::Index::Index.new(ferret_configuration)
|
95
|
+
else
|
96
|
+
id_arrays = {}
|
97
|
+
rank = 0
|
98
|
+
total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
|
99
|
+
id_arrays[model] ||= {}
|
100
|
+
id_arrays[model][id] = [ rank += 1, score ]
|
241
101
|
end
|
242
|
-
|
243
|
-
|
244
|
-
# by using OR between terms.
|
245
|
-
# options:
|
246
|
-
# offset:: first hit to retrieve (useful for paging)
|
247
|
-
# limit:: number of hits to retrieve, or :all to retrieve
|
248
|
-
# all results
|
249
|
-
# models:: only for single_index scenarios: a list of other Model classes to
|
250
|
-
# include in this search.
|
251
|
-
#
|
252
|
-
# find_options is a hash passed on to active_record's find when
|
253
|
-
# retrieving the data from db, useful to i.e. prefetch relationships.
|
254
|
-
#
|
255
|
-
# this method returns a SearchResults instance, which really is an Array that has
|
256
|
-
# been decorated with a total_hits accessor that delivers the total
|
257
|
-
# number of hits (including those not fetched because of a low num_docs
|
258
|
-
# value).
|
259
|
-
# Please keep in mind that the number of total hits might be wrong if you specify
|
260
|
-
# both ferret options and active record find_options that somehow limit the result
|
261
|
-
# set (e.g. :num_docs and some :conditions).
|
262
|
-
def find_by_contents(q, options = {}, find_options = {})
|
263
|
-
# handle shared index
|
264
|
-
return single_index_find_by_contents(q, options, find_options) if configuration[:single_index]
|
265
|
-
results = {}
|
266
|
-
total_hits = find_id_by_contents(q, options) do |model, id, score|
|
267
|
-
# stores ids, index of each id for later ordering of
|
268
|
-
# results, and score
|
269
|
-
results[id] = [ results.size + 1, score ]
|
270
|
-
end
|
271
|
-
result = []
|
272
|
-
begin
|
273
|
-
# TODO: in case of STI AR will filter out hits from other
|
274
|
-
# classes for us, but this
|
275
|
-
# will lead to less results retrieved --> scoping of ferret query
|
276
|
-
# to self.class is still needed.
|
277
|
-
# from the ferret ML (thanks Curtis Hatter)
|
278
|
-
# > I created a method in my base STI class so I can scope my query. For scoping
|
279
|
-
# > I used something like the following line:
|
280
|
-
# >
|
281
|
-
# > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
|
282
|
-
# >
|
283
|
-
# > Though you could make it more generic by simply asking
|
284
|
-
# > "self.descends_from_active_record?" which is how rails decides if it should
|
285
|
-
# > scope your "find" query for STI models. You can check out "base.rb" in
|
286
|
-
# > activerecord to see that.
|
287
|
-
# but maybe better do the scoping in find_id_by_contents...
|
288
|
-
if results.any?
|
289
|
-
conditions = combine_conditions([ "#{table_name}.#{primary_key} in (?)", results.keys ],
|
290
|
-
find_options[:conditions])
|
291
|
-
result = self.find(:all,
|
292
|
-
find_options.merge(:conditions => conditions))
|
293
|
-
# correct result size if the user specified conditions
|
294
|
-
total_hits = result.length if find_options[:conditions]
|
295
|
-
end
|
296
|
-
rescue ActiveRecord::RecordNotFound
|
297
|
-
logger.warn "REBUILD YOUR INDEX! One of the id's in the index didn't have an associated record"
|
298
|
-
end
|
102
|
+
result = retrieve_records(id_arrays, find_options)
|
103
|
+
end
|
299
104
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
105
|
+
SearchResults.new(result, total_hits)
|
106
|
+
end
|
107
|
+
|
108
|
+
# returns an array of hashes, each containing :class_name,
|
109
|
+
# :id and :score for a hit.
|
110
|
+
#
|
111
|
+
# if a block is given, class_name, id and score of each hit will
|
112
|
+
# be yielded, and the total number of hits is returned.
|
113
|
+
def id_multi_search(query, additional_models = [], options = {}, &proc)
|
114
|
+
deprecated_options_support(options)
|
115
|
+
additional_models = [ additional_models ] unless additional_models.is_a? Array
|
116
|
+
additional_models << self
|
117
|
+
aaf_index.id_multi_search(query, additional_models.map(&:to_s), options, &proc)
|
118
|
+
end
|
119
|
+
|
311
120
|
|
312
|
-
|
313
|
-
def single_index_field_names(models)
|
314
|
-
@single_index_field_names ||= (
|
315
|
-
searcher = Ferret::Search::Searcher.new(class_index_dir)
|
316
|
-
if searcher.reader.respond_to?(:get_field_names)
|
317
|
-
(searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a
|
318
|
-
else
|
319
|
-
puts <<-END
|
320
|
-
unable to retrieve field names for class #{self.name}, please
|
321
|
-
consider naming all indexed fields in your call to acts_as_ferret!
|
322
|
-
END
|
323
|
-
models.map { |m| m.content_columns.map { |col| col.name } }.flatten
|
324
|
-
end
|
325
|
-
)
|
121
|
+
protected
|
326
122
|
|
327
|
-
|
328
|
-
|
123
|
+
def find_records_lazy_or_not(q, options = {}, find_options = {})
|
124
|
+
if options[:lazy]
|
125
|
+
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
126
|
+
lazy_find_by_contents q, options
|
127
|
+
else
|
128
|
+
ar_find_by_contents q, options, find_options
|
129
|
+
end
|
130
|
+
end
|
329
131
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
#
|
334
|
-
#
|
335
|
-
|
336
|
-
|
132
|
+
def ar_find_by_contents(q, options = {}, find_options = {})
|
133
|
+
result_ids = {}
|
134
|
+
total_hits = find_id_by_contents(q, options) do |model, id, score, data|
|
135
|
+
# stores ids, index of each id for later ordering of
|
136
|
+
# results, and score
|
137
|
+
result_ids[id] = [ result_ids.size + 1, score ]
|
138
|
+
end
|
337
139
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
options[:models] << self unless options[:models].include?(self)
|
342
|
-
# keep original query
|
343
|
-
original_query = q
|
344
|
-
|
345
|
-
# work around ferret bug in #process_query (doesn't ensure the
|
346
|
-
# reader is open)
|
347
|
-
ferret_index.synchronize do
|
348
|
-
ferret_index.send(:ensure_reader_open)
|
349
|
-
original_query = ferret_index.process_query(q)
|
350
|
-
end if q.is_a? String
|
140
|
+
result = retrieve_records( { self.name => result_ids }, find_options )
|
141
|
+
# correct result size if the user specified conditions
|
142
|
+
total_hits = result.length if find_options[:conditions]
|
351
143
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
options[:models].each do |model|
|
356
|
-
model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should)
|
357
|
-
end
|
358
|
-
q.add_query(model_query, :must)
|
359
|
-
end
|
360
|
-
#puts q.to_s
|
361
|
-
total_hits = find_id_by_contents(q, options) do |model, id, score|
|
362
|
-
o = Object.const_get(model).find(id, find_options.dup)
|
363
|
-
o.ferret_score = score
|
364
|
-
result << o
|
365
|
-
end
|
366
|
-
return SearchResults.new(result, total_hits)
|
367
|
-
end
|
368
|
-
protected :single_index_find_by_contents
|
144
|
+
# order results as they were found by ferret, unless an AR :order
|
145
|
+
# option was given
|
146
|
+
result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
|
369
147
|
|
370
|
-
|
371
|
-
|
372
|
-
ferret_index.search(q, options).total_hits
|
373
|
-
end
|
148
|
+
[ total_hits, result ]
|
149
|
+
end
|
374
150
|
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
# result << (Model1.find_id_by_contents query)
|
383
|
-
# result << (Model2.find_id_by_contents query)
|
384
|
-
# result << (Model3.find_id_by_contents query)
|
385
|
-
# result.flatten!
|
386
|
-
# result.sort! {|element| element[:score]}
|
387
|
-
# # Figure out for yourself how to retreive and present the data from modelname and id
|
388
|
-
# end
|
389
|
-
#
|
390
|
-
# Note that the scores retrieved this way aren't normalized across
|
391
|
-
# indexes, so that the order of results after sorting by score will
|
392
|
-
# differ from the order you would get when running the same query
|
393
|
-
# on a single index containing all the data from Model1, Model2
|
394
|
-
# and Model
|
395
|
-
#
|
396
|
-
# options are:
|
397
|
-
#
|
398
|
-
# first_doc:: first hit to retrieve (useful for paging)
|
399
|
-
# num_docs:: number of hits to retrieve, or :all to retrieve all
|
400
|
-
# results.
|
401
|
-
#
|
402
|
-
# a block can be given too, it will be executed with every result:
|
403
|
-
# find_id_by_contents(q, options) do |model, id, score|
|
404
|
-
# id_array << id
|
405
|
-
# scores_by_id[id] = score
|
406
|
-
# end
|
407
|
-
# NOTE: in case a block is given, the total_hits value will be returned
|
408
|
-
# instead of the result list!
|
409
|
-
#
|
410
|
-
def find_id_by_contents(q, options = {})
|
411
|
-
deprecated_options_support(options)
|
151
|
+
def lazy_find_by_contents(q, options = {})
|
152
|
+
result = []
|
153
|
+
total_hits = find_id_by_contents(q, options) do |model, id, score, data|
|
154
|
+
result << FerretResult.new(model, id, score, data)
|
155
|
+
end
|
156
|
+
[ total_hits, result ]
|
157
|
+
end
|
412
158
|
|
413
|
-
result = []
|
414
|
-
index = self.ferret_index
|
415
|
-
# puts "query: #{index.process_query q}"
|
416
|
-
total_hits = index.search_each(q, options) do |hit, score|
|
417
|
-
# only collect result data if we intend to return it
|
418
|
-
doc = index[hit]
|
419
|
-
model = configuration[:store_class_name] ? doc[:class_name] : self.name
|
420
|
-
if block_given?
|
421
|
-
yield model, doc[:id], score
|
422
|
-
else
|
423
|
-
result << { :model => model, :id => doc[:id], :score => score }
|
424
|
-
end
|
425
|
-
end
|
426
|
-
logger.debug "id_score_model array: #{result.inspect}"
|
427
|
-
return block_given? ? total_hits : result
|
428
|
-
end
|
429
|
-
|
430
|
-
# requires the store_class_name option of acts_as_ferret to be true
|
431
|
-
# for all models queried this way.
|
432
|
-
#
|
433
|
-
# TODO: not optimal as each instance is fetched in a db call for it's
|
434
|
-
# own.
|
435
|
-
def multi_search(query, additional_models = [], options = {})
|
436
|
-
result = []
|
437
|
-
total_hits = id_multi_search(query, additional_models, options) do |model, id, score|
|
438
|
-
r = Object.const_get(model).find(id)
|
439
|
-
r.ferret_score = score
|
440
|
-
result << r
|
441
|
-
end
|
442
|
-
SearchResults.new(result, total_hits)
|
443
|
-
end
|
444
|
-
|
445
|
-
# returns an array of hashes, each containing :class_name,
|
446
|
-
# :id and :score for a hit.
|
447
|
-
#
|
448
|
-
# if a block is given, class_name, id and score of each hit will
|
449
|
-
# be yielded, and the total number of hits is returned.
|
450
|
-
#
|
451
|
-
def id_multi_search(query, additional_models = [], options = {})
|
452
|
-
deprecated_options_support(options)
|
453
|
-
additional_models = [ additional_models ] unless additional_models.is_a? Array
|
454
|
-
additional_models << self
|
455
|
-
searcher = multi_index(additional_models)
|
456
|
-
result = []
|
457
|
-
total_hits = searcher.search_each(query, options) do |hit, score|
|
458
|
-
doc = searcher[hit]
|
459
|
-
if block_given?
|
460
|
-
yield doc[:class_name], doc[:id], score
|
461
|
-
else
|
462
|
-
result << { :model => doc[:class_name], :id => doc[:id], :score => score }
|
463
|
-
end
|
464
|
-
end
|
465
|
-
return block_given? ? total_hits : result
|
466
|
-
end
|
467
|
-
|
468
|
-
# returns a MultiIndex instance operating on a MultiReader
|
469
|
-
def multi_index(model_classes)
|
470
|
-
model_classes.sort! { |a, b| a.name <=> b.name }
|
471
|
-
key = model_classes.inject("") { |s, clazz| s << clazz.name }
|
472
|
-
multi_config = ferret_configuration.dup
|
473
|
-
multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
|
474
|
-
@@multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
|
475
|
-
end
|
476
159
|
|
477
|
-
|
160
|
+
def model_find(model, id, find_options = {})
|
161
|
+
model.constantize.find(id, find_options)
|
162
|
+
end
|
478
163
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
164
|
+
# retrieves search result records from a data structure like this:
|
165
|
+
# { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
|
166
|
+
#
|
167
|
+
# TODO: in case of STI AR will filter out hits from other
|
168
|
+
# classes for us, but this
|
169
|
+
# will lead to less results retrieved --> scoping of ferret query
|
170
|
+
# to self.class is still needed.
|
171
|
+
# from the ferret ML (thanks Curtis Hatter)
|
172
|
+
# > I created a method in my base STI class so I can scope my query. For scoping
|
173
|
+
# > I used something like the following line:
|
174
|
+
# >
|
175
|
+
# > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
|
176
|
+
# >
|
177
|
+
# > Though you could make it more generic by simply asking
|
178
|
+
# > "self.descends_from_active_record?" which is how rails decides if it should
|
179
|
+
# > scope your "find" query for STI models. You can check out "base.rb" in
|
180
|
+
# > activerecord to see that.
|
181
|
+
# but maybe better do the scoping in find_id_by_contents...
|
182
|
+
def retrieve_records(id_arrays, find_options = {})
|
183
|
+
result = []
|
184
|
+
# get objects for each model
|
185
|
+
id_arrays.each do |model, id_array|
|
186
|
+
next if id_array.empty?
|
187
|
+
begin
|
188
|
+
model = model.constantize
|
189
|
+
# merge conditions
|
190
|
+
conditions = combine_conditions([ "#{model.table_name}.#{primary_key} in (?)", id_array.keys ],
|
191
|
+
find_options[:conditions])
|
192
|
+
# fetch
|
193
|
+
tmp_result = model.find(:all, find_options.merge(:conditions => conditions))
|
194
|
+
# set scores and rank
|
195
|
+
tmp_result.each do |record|
|
196
|
+
record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
|
197
|
+
end
|
198
|
+
# merge with result array
|
199
|
+
result.concat tmp_result
|
200
|
+
rescue TypeError
|
201
|
+
raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
|
488
202
|
end
|
203
|
+
end
|
204
|
+
return result
|
205
|
+
end
|
489
206
|
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
207
|
+
def deprecated_options_support(options)
|
208
|
+
if options[:num_docs]
|
209
|
+
logger.warn ":num_docs is deprecated, use :limit instead!"
|
210
|
+
options[:limit] ||= options[:num_docs]
|
211
|
+
end
|
212
|
+
if options[:first_doc]
|
213
|
+
logger.warn ":first_doc is deprecated, use :offset instead!"
|
214
|
+
options[:offset] ||= options[:first_doc]
|
215
|
+
end
|
216
|
+
end
|
499
217
|
|
218
|
+
# combine our conditions with those given by user, if any
|
219
|
+
def combine_conditions(conditions, *additional_conditions)
|
220
|
+
returning conditions do
|
221
|
+
if additional_conditions.any?
|
222
|
+
cust_opts = additional_conditions.dup.flatten
|
223
|
+
conditions.first << " and " << cust_opts.shift
|
224
|
+
conditions.concat(cust_opts)
|
225
|
+
end
|
500
226
|
end
|
501
|
-
|
502
227
|
end
|
228
|
+
|
229
|
+
# creates a new Index::Index instance. Before that, a check is done
|
230
|
+
# to see if the index exists in the file system. If not, index rebuild
|
231
|
+
# from all model data retrieved by find(:all) is triggered.
|
232
|
+
def create_index_instance
|
233
|
+
if aaf_configuration[:remote]
|
234
|
+
RemoteIndex
|
235
|
+
elsif aaf_configuration[:single_index]
|
236
|
+
SharedIndex
|
237
|
+
else
|
238
|
+
LocalIndex
|
239
|
+
end.new(aaf_configuration)
|
240
|
+
end
|
241
|
+
|
503
242
|
end
|
243
|
+
|
504
244
|
end
|
505
245
|
|