acts_as_ferret 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +29 -6
- data/config/ferret_server.yml +12 -0
- data/install.rb +19 -0
- data/lib/act_methods.rb +194 -0
- data/lib/acts_as_ferret.rb +74 -52
- data/lib/class_methods.rb +222 -482
- data/lib/ferret_result.rb +36 -0
- data/lib/ferret_server.rb +89 -0
- data/lib/index.rb +31 -0
- data/lib/instance_methods.rb +112 -143
- data/lib/local_index.rb +257 -0
- data/lib/more_like_this.rb +47 -41
- data/lib/multi_index.rb +8 -11
- data/lib/remote_index.rb +50 -0
- data/lib/shared_index.rb +14 -0
- data/lib/shared_index_class_methods.rb +90 -0
- data/rakefile +88 -147
- data/script/ferret_server +18 -0
- data/script/ferret_start +67 -0
- data/script/ferret_stop +22 -0
- metadata +23 -11
- data/.init.rb.swp +0 -0
- data/.rakefile.swp +0 -0
- data/lib/.acts_as_ferret.rb.swp +0 -0
- data/lib/.class_methods.rb.swo +0 -0
- data/lib/.class_methods.rb.swp +0 -0
data/lib/class_methods.rb
CHANGED
@@ -1,505 +1,245 @@
|
|
1
|
-
module
|
2
|
-
module Acts #:nodoc:
|
3
|
-
module ARFerret #:nodoc:
|
4
|
-
|
5
|
-
# declare the class level helper methods
|
6
|
-
# which will load the relevant instance methods defined below when invoked
|
7
|
-
module ClassMethods
|
1
|
+
module ActsAsFerret
|
8
2
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
# index that is shared by all classes having :single_index set to true.
|
72
|
-
# :store_class_name is set to true implicitly, as well as index_dir, so
|
73
|
-
# don't bother setting these when using this option. the shared index
|
74
|
-
# will be located in index/<RAILS_ENV>/shared .
|
75
|
-
#
|
76
|
-
# store_class_name:: to make search across multiple models useful, set
|
77
|
-
# this to true. the model class name will be stored in a keyword field
|
78
|
-
# named class_name
|
79
|
-
#
|
80
|
-
# ferret_options may be:
|
81
|
-
# or_default:: - whether query terms are required by
|
82
|
-
# default (the default, false), or not (true)
|
83
|
-
#
|
84
|
-
# analyzer:: the analyzer to use for query parsing (default: nil,
|
85
|
-
# wihch means the ferret StandardAnalyzer gets used)
|
86
|
-
#
|
87
|
-
def acts_as_ferret(options={}, ferret_options={})
|
88
|
-
configuration = {
|
89
|
-
:index_dir => "#{FerretMixin::Acts::ARFerret::index_dir}/#{self.name.underscore}",
|
90
|
-
:store_class_name => false,
|
91
|
-
:single_index => false,
|
92
|
-
}
|
93
|
-
ferret_configuration = {
|
94
|
-
:or_default => false,
|
95
|
-
:handle_parse_errors => true,
|
96
|
-
:default_field => '*'
|
97
|
-
#:max_clauses => 512,
|
98
|
-
#:analyzer => Ferret::Analysis::StandardAnalyzer.new,
|
99
|
-
# :wild_card_downcase => true
|
100
|
-
}
|
101
|
-
configuration.update(options) if options.is_a?(Hash)
|
102
|
-
|
103
|
-
# apply appropriate settings for shared index
|
104
|
-
if configuration[:single_index]
|
105
|
-
configuration[:index_dir] = "#{FerretMixin::Acts::ARFerret::index_dir}/shared"
|
106
|
-
configuration[:store_class_name] = true
|
107
|
-
end
|
108
|
-
ferret_configuration.update(ferret_options) if ferret_options.is_a?(Hash)
|
109
|
-
# these properties are somewhat vital to the plugin and shouldn't
|
110
|
-
# be overwritten by the user:
|
111
|
-
ferret_configuration.update(
|
112
|
-
|
113
|
-
:key => (configuration[:single_index] ? [:id, :class_name] : :id),
|
114
|
-
:path => configuration[:index_dir],
|
115
|
-
:auto_flush => true,
|
116
|
-
:create_if_missing => true
|
117
|
-
)
|
118
|
-
|
119
|
-
class_eval <<-EOV
|
120
|
-
include FerretMixin::Acts::ARFerret::InstanceMethods
|
121
|
-
|
122
|
-
|
123
|
-
after_create :ferret_create
|
124
|
-
after_update :ferret_update
|
125
|
-
after_destroy :ferret_destroy
|
126
|
-
|
127
|
-
cattr_accessor :fields_for_ferret
|
128
|
-
cattr_accessor :configuration
|
129
|
-
cattr_accessor :ferret_configuration
|
130
|
-
|
131
|
-
@@fields_for_ferret = Hash.new
|
132
|
-
@@configuration = configuration
|
133
|
-
@@ferret_configuration = ferret_configuration
|
134
|
-
|
135
|
-
if configuration[:fields]
|
136
|
-
add_fields(configuration[:fields])
|
137
|
-
else
|
138
|
-
add_fields(self.new.attributes.keys.map { |k| k.to_sym })
|
139
|
-
add_fields(configuration[:additional_fields])
|
140
|
-
end
|
141
|
-
|
142
|
-
EOV
|
143
|
-
FerretMixin::Acts::ARFerret::ensure_directory configuration[:index_dir]
|
3
|
+
module ClassMethods
|
4
|
+
|
5
|
+
# rebuild the index from all data stored for this model.
|
6
|
+
# This is called automatically when no index exists yet.
|
7
|
+
#
|
8
|
+
# When calling this method manually, you can give any additional
|
9
|
+
# model classes that should also go into this index as parameters.
|
10
|
+
# Useful when using the :single_index option.
|
11
|
+
# Note that attributes named the same in different models will share
|
12
|
+
# the same field options in the shared index.
|
13
|
+
def rebuild_index(*models)
|
14
|
+
models << self unless models.include?(self)
|
15
|
+
aaf_index.rebuild_index(models.map(&:to_s))
|
16
|
+
end
|
17
|
+
|
18
|
+
# Retrieve the index instance for this model class. This can either be a
|
19
|
+
# LocalIndex, or a RemoteIndex instance.
|
20
|
+
#
|
21
|
+
# Index instances are stored in a hash, using the index directory
|
22
|
+
# as the key. So model classes sharing a single index will share their
|
23
|
+
# Index object, too.
|
24
|
+
def aaf_index
|
25
|
+
ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
|
26
|
+
end
|
27
|
+
|
28
|
+
# Finds instances by contents. Terms are ANDed by default, can be circumvented
|
29
|
+
# by using OR between terms.
|
30
|
+
# options:
|
31
|
+
# offset:: first hit to retrieve (useful for paging)
|
32
|
+
# limit:: number of hits to retrieve, or :all to retrieve
|
33
|
+
# all results
|
34
|
+
# lazy:: Array of field names whose contents should be read directly
|
35
|
+
# from the index. Those fields have to be marked
|
36
|
+
# :store => :yes in their field options. Give true to get all
|
37
|
+
# stored fields (if you have a shared index, you have to
|
38
|
+
# explicitly state the fields you want to fetch, true won't
|
39
|
+
# work)
|
40
|
+
# models:: only for single_index scenarios: an Array of other Model classes to
|
41
|
+
# include in this search. Use :all to query all models.
|
42
|
+
#
|
43
|
+
# find_options is a hash passed on to active_record's find when
|
44
|
+
# retrieving the data from db, useful to i.e. prefetch relationships.
|
45
|
+
#
|
46
|
+
# this method returns a SearchResults instance, which really is an Array that has
|
47
|
+
# been decorated with a total_hits accessor that delivers the total
|
48
|
+
# number of hits (including those not fetched because of a low num_docs
|
49
|
+
# value).
|
50
|
+
# Please keep in mind that the number of total hits might be wrong if you specify
|
51
|
+
# both ferret options and active record find_options that somehow limit the result
|
52
|
+
# set (e.g. :num_docs and some :conditions).
|
53
|
+
def find_by_contents(q, options = {}, find_options = {})
|
54
|
+
total_hits, result = find_records_lazy_or_not q, options, find_options
|
55
|
+
logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
|
56
|
+
return SearchResults.new(result, total_hits)
|
57
|
+
end
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
# return the total number of hits for the given query
|
62
|
+
def total_hits(q, options={})
|
63
|
+
aaf_index.total_hits(q, options)
|
64
|
+
end
|
144
65
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
66
|
+
# Finds instance model name, ids and scores by contents.
|
67
|
+
# Useful e.g. if you want to search across models or do not want to fetch
|
68
|
+
# all result records (yet).
|
69
|
+
#
|
70
|
+
# Options are the same as for find_by_contents
|
71
|
+
#
|
72
|
+
# A block can be given too, it will be executed with every result:
|
73
|
+
# find_id_by_contents(q, options) do |model, id, score|
|
74
|
+
# id_array << id
|
75
|
+
# scores_by_id[id] = score
|
76
|
+
# end
|
77
|
+
# NOTE: in case a block is given, only the total_hits value will be returned
|
78
|
+
# instead of the [total_hits, results] array!
|
79
|
+
#
|
80
|
+
def find_id_by_contents(q, options = {}, &block)
|
81
|
+
deprecated_options_support(options)
|
82
|
+
aaf_index.find_id_by_contents(q, options, &block)
|
83
|
+
end
|
84
|
+
|
85
|
+
# requires the store_class_name option of acts_as_ferret to be true
|
86
|
+
# for all models queried this way.
|
87
|
+
def multi_search(query, additional_models = [], options = {}, find_options = {})
|
88
|
+
result = []
|
89
|
+
|
90
|
+
if options[:lazy]
|
91
|
+
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
92
|
+
total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
|
93
|
+
result << FerretResult.new(model, id, score, data)
|
157
94
|
end
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
# own index, otherwise the index will get populated only
|
165
|
-
# with instances from the first model loaded
|
166
|
-
#
|
167
|
-
# When calling this method manually, you can give any additional
|
168
|
-
# model classes that should also go into this index as parameters.
|
169
|
-
# Useful when using the :single_index option.
|
170
|
-
# Note that attributes named the same in different models will share
|
171
|
-
# the same field options in the shared index.
|
172
|
-
def rebuild_index(*models)
|
173
|
-
models << self
|
174
|
-
# default attributes for fields
|
175
|
-
fi = Ferret::Index::FieldInfos.new(:store => :no,
|
176
|
-
:index => :yes,
|
177
|
-
:term_vector => :no,
|
178
|
-
:boost => 1.0)
|
179
|
-
# primary key
|
180
|
-
fi.add_field(:id, :store => :yes, :index => :untokenized)
|
181
|
-
# class_name
|
182
|
-
if configuration[:store_class_name]
|
183
|
-
fi.add_field(:class_name, :store => :yes, :index => :untokenized)
|
184
|
-
end
|
185
|
-
# collect field options from all models
|
186
|
-
fields = {}
|
187
|
-
models.each do |model|
|
188
|
-
fields.update(model.fields_for_ferret)
|
189
|
-
end
|
190
|
-
logger.debug("class #{self.name}: fields for index: #{fields.keys.join(',')}")
|
191
|
-
fields.each_pair do |field, options|
|
192
|
-
fi.add_field(field, { :store => :no,
|
193
|
-
:index => :yes }.update(options))
|
194
|
-
end
|
195
|
-
fi.create_index(ferret_configuration[:path])
|
196
|
-
|
197
|
-
index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => false))
|
198
|
-
#index = Ferret::Index::Index.new(ferret_configuration.dup.update(:auto_flush => true))
|
199
|
-
batch_size = 1000
|
200
|
-
models.each do |model|
|
201
|
-
# index in batches of 1000 to limit memory consumption (fixes #24)
|
202
|
-
model.transaction do
|
203
|
-
0.step(model.count, batch_size) do |i|
|
204
|
-
model.find(:all, :limit => batch_size, :offset => i).each do |rec|
|
205
|
-
index << rec.to_doc
|
206
|
-
end
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|
210
|
-
logger.debug("Created Ferret index in: #{class_index_dir}")
|
211
|
-
index.flush
|
212
|
-
index.optimize
|
213
|
-
index.close
|
214
|
-
# close combined index readers, just in case
|
215
|
-
# this seems to fix a strange test failure that seems to relate to a
|
216
|
-
# multi_index looking at an old version of the content_base index.
|
217
|
-
@@multi_indexes.each_pair do |key, index|
|
218
|
-
# puts "#{key} -- #{self.name}"
|
219
|
-
# TODO only close those where necessary (watch inheritance, where
|
220
|
-
# self.name is base class of a class where key is made from)
|
221
|
-
index.close #if key =~ /#{self.name}/
|
222
|
-
end
|
223
|
-
@@multi_indexes = Hash.new
|
224
|
-
end
|
225
|
-
|
226
|
-
# Retrieve the Ferret::Index::Index instance for this model class.
|
227
|
-
#
|
228
|
-
# Index instances are stored in a hash, using the index directory
|
229
|
-
# as the key. So model classes sharing a single index will share their
|
230
|
-
# Index object, too.
|
231
|
-
def ferret_index
|
232
|
-
ferret_indexes[class_index_dir] ||= create_index_instance
|
233
|
-
end
|
234
|
-
|
235
|
-
# creates a new Index::Index instance. Before that, a check is done
|
236
|
-
# to see if the index exists in the file system. If not, index rebuild
|
237
|
-
# from all model data retrieved by find(:all) is triggered.
|
238
|
-
def create_index_instance
|
239
|
-
rebuild_index unless File.file? "#{class_index_dir}/segments"
|
240
|
-
Ferret::Index::Index.new(ferret_configuration)
|
95
|
+
else
|
96
|
+
id_arrays = {}
|
97
|
+
rank = 0
|
98
|
+
total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
|
99
|
+
id_arrays[model] ||= {}
|
100
|
+
id_arrays[model][id] = [ rank += 1, score ]
|
241
101
|
end
|
242
|
-
|
243
|
-
|
244
|
-
# by using OR between terms.
|
245
|
-
# options:
|
246
|
-
# offset:: first hit to retrieve (useful for paging)
|
247
|
-
# limit:: number of hits to retrieve, or :all to retrieve
|
248
|
-
# all results
|
249
|
-
# models:: only for single_index scenarios: a list of other Model classes to
|
250
|
-
# include in this search.
|
251
|
-
#
|
252
|
-
# find_options is a hash passed on to active_record's find when
|
253
|
-
# retrieving the data from db, useful to i.e. prefetch relationships.
|
254
|
-
#
|
255
|
-
# this method returns a SearchResults instance, which really is an Array that has
|
256
|
-
# been decorated with a total_hits accessor that delivers the total
|
257
|
-
# number of hits (including those not fetched because of a low num_docs
|
258
|
-
# value).
|
259
|
-
# Please keep in mind that the number of total hits might be wrong if you specify
|
260
|
-
# both ferret options and active record find_options that somehow limit the result
|
261
|
-
# set (e.g. :num_docs and some :conditions).
|
262
|
-
def find_by_contents(q, options = {}, find_options = {})
|
263
|
-
# handle shared index
|
264
|
-
return single_index_find_by_contents(q, options, find_options) if configuration[:single_index]
|
265
|
-
results = {}
|
266
|
-
total_hits = find_id_by_contents(q, options) do |model, id, score|
|
267
|
-
# stores ids, index of each id for later ordering of
|
268
|
-
# results, and score
|
269
|
-
results[id] = [ results.size + 1, score ]
|
270
|
-
end
|
271
|
-
result = []
|
272
|
-
begin
|
273
|
-
# TODO: in case of STI AR will filter out hits from other
|
274
|
-
# classes for us, but this
|
275
|
-
# will lead to less results retrieved --> scoping of ferret query
|
276
|
-
# to self.class is still needed.
|
277
|
-
# from the ferret ML (thanks Curtis Hatter)
|
278
|
-
# > I created a method in my base STI class so I can scope my query. For scoping
|
279
|
-
# > I used something like the following line:
|
280
|
-
# >
|
281
|
-
# > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
|
282
|
-
# >
|
283
|
-
# > Though you could make it more generic by simply asking
|
284
|
-
# > "self.descends_from_active_record?" which is how rails decides if it should
|
285
|
-
# > scope your "find" query for STI models. You can check out "base.rb" in
|
286
|
-
# > activerecord to see that.
|
287
|
-
# but maybe better do the scoping in find_id_by_contents...
|
288
|
-
if results.any?
|
289
|
-
conditions = combine_conditions([ "#{table_name}.#{primary_key} in (?)", results.keys ],
|
290
|
-
find_options[:conditions])
|
291
|
-
result = self.find(:all,
|
292
|
-
find_options.merge(:conditions => conditions))
|
293
|
-
# correct result size if the user specified conditions
|
294
|
-
total_hits = result.length if find_options[:conditions]
|
295
|
-
end
|
296
|
-
rescue ActiveRecord::RecordNotFound
|
297
|
-
logger.warn "REBUILD YOUR INDEX! One of the id's in the index didn't have an associated record"
|
298
|
-
end
|
102
|
+
result = retrieve_records(id_arrays, find_options)
|
103
|
+
end
|
299
104
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
105
|
+
SearchResults.new(result, total_hits)
|
106
|
+
end
|
107
|
+
|
108
|
+
# returns an array of hashes, each containing :class_name,
|
109
|
+
# :id and :score for a hit.
|
110
|
+
#
|
111
|
+
# if a block is given, class_name, id and score of each hit will
|
112
|
+
# be yielded, and the total number of hits is returned.
|
113
|
+
def id_multi_search(query, additional_models = [], options = {}, &proc)
|
114
|
+
deprecated_options_support(options)
|
115
|
+
additional_models = [ additional_models ] unless additional_models.is_a? Array
|
116
|
+
additional_models << self
|
117
|
+
aaf_index.id_multi_search(query, additional_models.map(&:to_s), options, &proc)
|
118
|
+
end
|
119
|
+
|
311
120
|
|
312
|
-
|
313
|
-
def single_index_field_names(models)
|
314
|
-
@single_index_field_names ||= (
|
315
|
-
searcher = Ferret::Search::Searcher.new(class_index_dir)
|
316
|
-
if searcher.reader.respond_to?(:get_field_names)
|
317
|
-
(searcher.reader.send(:get_field_names) - ['id', 'class_name']).to_a
|
318
|
-
else
|
319
|
-
puts <<-END
|
320
|
-
unable to retrieve field names for class #{self.name}, please
|
321
|
-
consider naming all indexed fields in your call to acts_as_ferret!
|
322
|
-
END
|
323
|
-
models.map { |m| m.content_columns.map { |col| col.name } }.flatten
|
324
|
-
end
|
325
|
-
)
|
121
|
+
protected
|
326
122
|
|
327
|
-
|
328
|
-
|
123
|
+
def find_records_lazy_or_not(q, options = {}, find_options = {})
|
124
|
+
if options[:lazy]
|
125
|
+
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
126
|
+
lazy_find_by_contents q, options
|
127
|
+
else
|
128
|
+
ar_find_by_contents q, options, find_options
|
129
|
+
end
|
130
|
+
end
|
329
131
|
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
#
|
334
|
-
#
|
335
|
-
|
336
|
-
|
132
|
+
def ar_find_by_contents(q, options = {}, find_options = {})
|
133
|
+
result_ids = {}
|
134
|
+
total_hits = find_id_by_contents(q, options) do |model, id, score, data|
|
135
|
+
# stores ids, index of each id for later ordering of
|
136
|
+
# results, and score
|
137
|
+
result_ids[id] = [ result_ids.size + 1, score ]
|
138
|
+
end
|
337
139
|
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
options[:models] << self unless options[:models].include?(self)
|
342
|
-
# keep original query
|
343
|
-
original_query = q
|
344
|
-
|
345
|
-
# work around ferret bug in #process_query (doesn't ensure the
|
346
|
-
# reader is open)
|
347
|
-
ferret_index.synchronize do
|
348
|
-
ferret_index.send(:ensure_reader_open)
|
349
|
-
original_query = ferret_index.process_query(q)
|
350
|
-
end if q.is_a? String
|
140
|
+
result = retrieve_records( { self.name => result_ids }, find_options )
|
141
|
+
# correct result size if the user specified conditions
|
142
|
+
total_hits = result.length if find_options[:conditions]
|
351
143
|
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
options[:models].each do |model|
|
356
|
-
model_query.add_query(Ferret::Search::TermQuery.new(:class_name, model.name), :should)
|
357
|
-
end
|
358
|
-
q.add_query(model_query, :must)
|
359
|
-
end
|
360
|
-
#puts q.to_s
|
361
|
-
total_hits = find_id_by_contents(q, options) do |model, id, score|
|
362
|
-
o = Object.const_get(model).find(id, find_options.dup)
|
363
|
-
o.ferret_score = score
|
364
|
-
result << o
|
365
|
-
end
|
366
|
-
return SearchResults.new(result, total_hits)
|
367
|
-
end
|
368
|
-
protected :single_index_find_by_contents
|
144
|
+
# order results as they were found by ferret, unless an AR :order
|
145
|
+
# option was given
|
146
|
+
result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
|
369
147
|
|
370
|
-
|
371
|
-
|
372
|
-
ferret_index.search(q, options).total_hits
|
373
|
-
end
|
148
|
+
[ total_hits, result ]
|
149
|
+
end
|
374
150
|
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
# result << (Model1.find_id_by_contents query)
|
383
|
-
# result << (Model2.find_id_by_contents query)
|
384
|
-
# result << (Model3.find_id_by_contents query)
|
385
|
-
# result.flatten!
|
386
|
-
# result.sort! {|element| element[:score]}
|
387
|
-
# # Figure out for yourself how to retreive and present the data from modelname and id
|
388
|
-
# end
|
389
|
-
#
|
390
|
-
# Note that the scores retrieved this way aren't normalized across
|
391
|
-
# indexes, so that the order of results after sorting by score will
|
392
|
-
# differ from the order you would get when running the same query
|
393
|
-
# on a single index containing all the data from Model1, Model2
|
394
|
-
# and Model
|
395
|
-
#
|
396
|
-
# options are:
|
397
|
-
#
|
398
|
-
# first_doc:: first hit to retrieve (useful for paging)
|
399
|
-
# num_docs:: number of hits to retrieve, or :all to retrieve all
|
400
|
-
# results.
|
401
|
-
#
|
402
|
-
# a block can be given too, it will be executed with every result:
|
403
|
-
# find_id_by_contents(q, options) do |model, id, score|
|
404
|
-
# id_array << id
|
405
|
-
# scores_by_id[id] = score
|
406
|
-
# end
|
407
|
-
# NOTE: in case a block is given, the total_hits value will be returned
|
408
|
-
# instead of the result list!
|
409
|
-
#
|
410
|
-
def find_id_by_contents(q, options = {})
|
411
|
-
deprecated_options_support(options)
|
151
|
+
def lazy_find_by_contents(q, options = {})
|
152
|
+
result = []
|
153
|
+
total_hits = find_id_by_contents(q, options) do |model, id, score, data|
|
154
|
+
result << FerretResult.new(model, id, score, data)
|
155
|
+
end
|
156
|
+
[ total_hits, result ]
|
157
|
+
end
|
412
158
|
|
413
|
-
result = []
|
414
|
-
index = self.ferret_index
|
415
|
-
# puts "query: #{index.process_query q}"
|
416
|
-
total_hits = index.search_each(q, options) do |hit, score|
|
417
|
-
# only collect result data if we intend to return it
|
418
|
-
doc = index[hit]
|
419
|
-
model = configuration[:store_class_name] ? doc[:class_name] : self.name
|
420
|
-
if block_given?
|
421
|
-
yield model, doc[:id], score
|
422
|
-
else
|
423
|
-
result << { :model => model, :id => doc[:id], :score => score }
|
424
|
-
end
|
425
|
-
end
|
426
|
-
logger.debug "id_score_model array: #{result.inspect}"
|
427
|
-
return block_given? ? total_hits : result
|
428
|
-
end
|
429
|
-
|
430
|
-
# requires the store_class_name option of acts_as_ferret to be true
|
431
|
-
# for all models queried this way.
|
432
|
-
#
|
433
|
-
# TODO: not optimal as each instance is fetched in a db call for it's
|
434
|
-
# own.
|
435
|
-
def multi_search(query, additional_models = [], options = {})
|
436
|
-
result = []
|
437
|
-
total_hits = id_multi_search(query, additional_models, options) do |model, id, score|
|
438
|
-
r = Object.const_get(model).find(id)
|
439
|
-
r.ferret_score = score
|
440
|
-
result << r
|
441
|
-
end
|
442
|
-
SearchResults.new(result, total_hits)
|
443
|
-
end
|
444
|
-
|
445
|
-
# returns an array of hashes, each containing :class_name,
|
446
|
-
# :id and :score for a hit.
|
447
|
-
#
|
448
|
-
# if a block is given, class_name, id and score of each hit will
|
449
|
-
# be yielded, and the total number of hits is returned.
|
450
|
-
#
|
451
|
-
def id_multi_search(query, additional_models = [], options = {})
|
452
|
-
deprecated_options_support(options)
|
453
|
-
additional_models = [ additional_models ] unless additional_models.is_a? Array
|
454
|
-
additional_models << self
|
455
|
-
searcher = multi_index(additional_models)
|
456
|
-
result = []
|
457
|
-
total_hits = searcher.search_each(query, options) do |hit, score|
|
458
|
-
doc = searcher[hit]
|
459
|
-
if block_given?
|
460
|
-
yield doc[:class_name], doc[:id], score
|
461
|
-
else
|
462
|
-
result << { :model => doc[:class_name], :id => doc[:id], :score => score }
|
463
|
-
end
|
464
|
-
end
|
465
|
-
return block_given? ? total_hits : result
|
466
|
-
end
|
467
|
-
|
468
|
-
# returns a MultiIndex instance operating on a MultiReader
|
469
|
-
def multi_index(model_classes)
|
470
|
-
model_classes.sort! { |a, b| a.name <=> b.name }
|
471
|
-
key = model_classes.inject("") { |s, clazz| s << clazz.name }
|
472
|
-
multi_config = ferret_configuration.dup
|
473
|
-
multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
|
474
|
-
@@multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
|
475
|
-
end
|
476
159
|
|
477
|
-
|
160
|
+
def model_find(model, id, find_options = {})
|
161
|
+
model.constantize.find(id, find_options)
|
162
|
+
end
|
478
163
|
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
164
|
+
# retrieves search result records from a data structure like this:
|
165
|
+
# { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
|
166
|
+
#
|
167
|
+
# TODO: in case of STI AR will filter out hits from other
|
168
|
+
# classes for us, but this
|
169
|
+
# will lead to less results retrieved --> scoping of ferret query
|
170
|
+
# to self.class is still needed.
|
171
|
+
# from the ferret ML (thanks Curtis Hatter)
|
172
|
+
# > I created a method in my base STI class so I can scope my query. For scoping
|
173
|
+
# > I used something like the following line:
|
174
|
+
# >
|
175
|
+
# > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
|
176
|
+
# >
|
177
|
+
# > Though you could make it more generic by simply asking
|
178
|
+
# > "self.descends_from_active_record?" which is how rails decides if it should
|
179
|
+
# > scope your "find" query for STI models. You can check out "base.rb" in
|
180
|
+
# > activerecord to see that.
|
181
|
+
# but maybe better do the scoping in find_id_by_contents...
|
182
|
+
def retrieve_records(id_arrays, find_options = {})
|
183
|
+
result = []
|
184
|
+
# get objects for each model
|
185
|
+
id_arrays.each do |model, id_array|
|
186
|
+
next if id_array.empty?
|
187
|
+
begin
|
188
|
+
model = model.constantize
|
189
|
+
# merge conditions
|
190
|
+
conditions = combine_conditions([ "#{model.table_name}.#{primary_key} in (?)", id_array.keys ],
|
191
|
+
find_options[:conditions])
|
192
|
+
# fetch
|
193
|
+
tmp_result = model.find(:all, find_options.merge(:conditions => conditions))
|
194
|
+
# set scores and rank
|
195
|
+
tmp_result.each do |record|
|
196
|
+
record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
|
197
|
+
end
|
198
|
+
# merge with result array
|
199
|
+
result.concat tmp_result
|
200
|
+
rescue TypeError
|
201
|
+
raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
|
488
202
|
end
|
203
|
+
end
|
204
|
+
return result
|
205
|
+
end
|
489
206
|
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
207
|
+
def deprecated_options_support(options)
|
208
|
+
if options[:num_docs]
|
209
|
+
logger.warn ":num_docs is deprecated, use :limit instead!"
|
210
|
+
options[:limit] ||= options[:num_docs]
|
211
|
+
end
|
212
|
+
if options[:first_doc]
|
213
|
+
logger.warn ":first_doc is deprecated, use :offset instead!"
|
214
|
+
options[:offset] ||= options[:first_doc]
|
215
|
+
end
|
216
|
+
end
|
499
217
|
|
218
|
+
# combine our conditions with those given by user, if any
|
219
|
+
def combine_conditions(conditions, *additional_conditions)
|
220
|
+
returning conditions do
|
221
|
+
if additional_conditions.any?
|
222
|
+
cust_opts = additional_conditions.dup.flatten
|
223
|
+
conditions.first << " and " << cust_opts.shift
|
224
|
+
conditions.concat(cust_opts)
|
225
|
+
end
|
500
226
|
end
|
501
|
-
|
502
227
|
end
|
228
|
+
|
229
|
+
# creates a new Index::Index instance. Before that, a check is done
|
230
|
+
# to see if the index exists in the file system. If not, index rebuild
|
231
|
+
# from all model data retrieved by find(:all) is triggered.
|
232
|
+
def create_index_instance
|
233
|
+
if aaf_configuration[:remote]
|
234
|
+
RemoteIndex
|
235
|
+
elsif aaf_configuration[:single_index]
|
236
|
+
SharedIndex
|
237
|
+
else
|
238
|
+
LocalIndex
|
239
|
+
end.new(aaf_configuration)
|
240
|
+
end
|
241
|
+
|
503
242
|
end
|
243
|
+
|
504
244
|
end
|
505
245
|
|