sdsykes_acts_as_ferret 0.4.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README +51 -0
- data/bin/aaf_install +25 -0
- data/config/ferret_server.yml +23 -0
- data/doc/README.win32 +23 -0
- data/doc/monit-example +22 -0
- data/init.rb +22 -0
- data/install.rb +18 -0
- data/lib/act_methods.rb +254 -0
- data/lib/acts_as_ferret.rb +151 -0
- data/lib/bulk_indexer.rb +35 -0
- data/lib/class_methods.rb +459 -0
- data/lib/ferret_cap_tasks.rb +21 -0
- data/lib/ferret_extensions.rb +115 -0
- data/lib/ferret_result.rb +36 -0
- data/lib/ferret_server.rb +203 -0
- data/lib/index.rb +31 -0
- data/lib/instance_methods.rb +156 -0
- data/lib/local_index.rb +211 -0
- data/lib/more_like_this.rb +217 -0
- data/lib/multi_index.rb +83 -0
- data/lib/remote_index.rb +50 -0
- data/lib/search_results.rb +53 -0
- data/lib/server_manager.rb +46 -0
- data/lib/shared_index.rb +14 -0
- data/lib/shared_index_class_methods.rb +90 -0
- data/lib/unix_daemon.rb +63 -0
- data/script/ferret_daemon +94 -0
- data/script/ferret_server +10 -0
- data/script/ferret_service +178 -0
- data/sdsykes_acts_as_ferret.gemspec +21 -0
- metadata +99 -0
@@ -0,0 +1,151 @@
|
|
1
|
+
# Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
# of this software and associated documentation files (the "Software"), to deal
|
5
|
+
# in the Software without restriction, including without limitation the rights
|
6
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
# copies of the Software, and to permit persons to whom the Software is
|
8
|
+
# furnished to do so, subject to the following conditions:
|
9
|
+
#
|
10
|
+
# The above copyright notice and this permission notice shall be included in all
|
11
|
+
# copies or substantial portions of the Software.
|
12
|
+
#
|
13
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
|
+
# SOFTWARE.
|
20
|
+
|
21
|
+
require 'active_support'
|
22
|
+
require 'active_record'
|
23
|
+
require 'set'
|
24
|
+
require 'enumerator'
|
25
|
+
require 'ferret'
|
26
|
+
|
27
|
+
require 'bulk_indexer'
|
28
|
+
require 'ferret_extensions'
|
29
|
+
require 'act_methods'
|
30
|
+
require 'search_results'
|
31
|
+
require 'class_methods'
|
32
|
+
require 'shared_index_class_methods'
|
33
|
+
require 'ferret_result'
|
34
|
+
require 'instance_methods'
|
35
|
+
|
36
|
+
require 'multi_index'
|
37
|
+
require 'more_like_this'
|
38
|
+
|
39
|
+
require 'index'
|
40
|
+
require 'local_index'
|
41
|
+
require 'shared_index'
|
42
|
+
require 'remote_index'
|
43
|
+
|
44
|
+
require 'ferret_server'
|
45
|
+
|
46
|
+
|
47
|
+
# The Rails ActiveRecord Ferret Mixin.
|
48
|
+
#
|
49
|
+
# This mixin adds full text search capabilities to any Rails model.
|
50
|
+
#
|
51
|
+
# The current version emerged from on the original acts_as_ferret plugin done by
|
52
|
+
# Kasper Weibel and a modified version done by Thomas Lockney, which both can be
|
53
|
+
# found on the Ferret Wiki: http://ferret.davebalmain.com/trac/wiki/FerretOnRails.
|
54
|
+
#
|
55
|
+
# basic usage:
|
56
|
+
# include the following in your model class (specifiying the fields you want to get indexed):
|
57
|
+
# acts_as_ferret :fields => [ :title, :description ]
|
58
|
+
#
|
59
|
+
# now you can use ModelClass.find_by_contents(query) to find instances of your model
|
60
|
+
# whose indexed fields match a given query. All query terms are required by default, but
|
61
|
+
# explicit OR queries are possible. This differs from the ferret default, but imho is the more
|
62
|
+
# often needed/expected behaviour (more query terms result in less results).
|
63
|
+
#
|
64
|
+
# Released under the MIT license.
|
65
|
+
#
|
66
|
+
# Authors:
|
67
|
+
# Kasper Weibel Nielsen-Refs (original author)
|
68
|
+
# Jens Kraemer <jk@jkraemer.net> (active maintainer)
|
69
|
+
#
|
70
|
+
module ActsAsFerret
|
71
|
+
|
72
|
+
# global Hash containing all multi indexes created by all classes using the plugin
|
73
|
+
# key is the concatenation of alphabetically sorted names of the classes the
|
74
|
+
# searcher searches.
|
75
|
+
@@multi_indexes = Hash.new
|
76
|
+
def self.multi_indexes; @@multi_indexes end
|
77
|
+
|
78
|
+
# global Hash containing the ferret indexes of all classes using the plugin
|
79
|
+
# key is the index directory.
|
80
|
+
@@ferret_indexes = Hash.new
|
81
|
+
def self.ferret_indexes; @@ferret_indexes end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
def self.ensure_directory(dir)
|
86
|
+
FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
|
87
|
+
end
|
88
|
+
|
89
|
+
# make sure the default index base dir exists. by default, all indexes are created
|
90
|
+
# under RAILS_ROOT/index/RAILS_ENV
|
91
|
+
def self.init_index_basedir
|
92
|
+
index_base = "#{RAILS_ROOT}/index"
|
93
|
+
@@index_dir = "#{index_base}/#{RAILS_ENV}"
|
94
|
+
end
|
95
|
+
|
96
|
+
mattr_accessor :index_dir
|
97
|
+
init_index_basedir
|
98
|
+
|
99
|
+
def self.append_features(base)
|
100
|
+
super
|
101
|
+
base.extend(ClassMethods)
|
102
|
+
end
|
103
|
+
|
104
|
+
# builds a FieldInfos instance for creation of an index containing fields
|
105
|
+
# for the given model classes.
|
106
|
+
def self.field_infos(models)
|
107
|
+
# default attributes for fields
|
108
|
+
fi = Ferret::Index::FieldInfos.new(:store => :no,
|
109
|
+
:index => :yes,
|
110
|
+
:term_vector => :no,
|
111
|
+
:boost => 1.0)
|
112
|
+
# primary key
|
113
|
+
fi.add_field(:id, :store => :yes, :index => :untokenized)
|
114
|
+
fields = {}
|
115
|
+
have_class_name = false
|
116
|
+
models.each do |model|
|
117
|
+
fields.update(model.aaf_configuration[:ferret_fields])
|
118
|
+
# class_name
|
119
|
+
if !have_class_name && model.aaf_configuration[:store_class_name]
|
120
|
+
fi.add_field(:class_name, :store => :yes, :index => :untokenized)
|
121
|
+
have_class_name = true
|
122
|
+
end
|
123
|
+
end
|
124
|
+
fields.each_pair do |field, options|
|
125
|
+
options = options.dup
|
126
|
+
options.delete(:boost) if options[:boost].is_a?(Symbol)
|
127
|
+
fi.add_field(field, { :store => :no,
|
128
|
+
:index => :yes }.update(options))
|
129
|
+
end
|
130
|
+
return fi
|
131
|
+
end
|
132
|
+
|
133
|
+
def self.close_multi_indexes
|
134
|
+
# close combined index readers, just in case
|
135
|
+
# this seems to fix a strange test failure that seems to relate to a
|
136
|
+
# multi_index looking at an old version of the content_base index.
|
137
|
+
multi_indexes.each_pair do |key, index|
|
138
|
+
# puts "#{key} -- #{self.name}"
|
139
|
+
# TODO only close those where necessary (watch inheritance, where
|
140
|
+
# self.name is base class of a class where key is made from)
|
141
|
+
index.close #if key =~ /#{self.name}/
|
142
|
+
end
|
143
|
+
multi_indexes.clear
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
# include acts_as_ferret method into ActiveRecord::Base
|
149
|
+
ActiveRecord::Base.extend ActsAsFerret::ActMethods
|
150
|
+
|
151
|
+
|
data/lib/bulk_indexer.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
class BulkIndexer
|
3
|
+
def initialize(args = {})
|
4
|
+
@batch_size = args[:batch_size] || 1000
|
5
|
+
@logger = args[:logger]
|
6
|
+
@model = args[:model]
|
7
|
+
@work_done = 0
|
8
|
+
@index = args[:index]
|
9
|
+
if args[:reindex]
|
10
|
+
@reindex = true
|
11
|
+
@model_count = @model.count.to_f
|
12
|
+
else
|
13
|
+
@model_count = args[:total]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def index_records(records, offset)
|
18
|
+
batch_time = measure_time {
|
19
|
+
records.each { |rec| @index << rec.to_doc if rec.ferret_enabled?(true) }
|
20
|
+
}.to_f
|
21
|
+
@work_done = offset.to_f / @model_count * 100.0 if @model_count > 0
|
22
|
+
remaining_time = ( batch_time / @batch_size ) * ( @model_count - offset + @batch_size )
|
23
|
+
@logger.info "#{@reindex ? 're' : 'bulk '}index model #{@model.name} : #{'%.2f' % @work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
def measure_time
|
28
|
+
t1 = Time.now
|
29
|
+
yield
|
30
|
+
Time.now - t1
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,459 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
|
3
|
+
module ClassMethods
|
4
|
+
|
5
|
+
# Disables ferret index updates for this model. When a block is given,
|
6
|
+
# Ferret will be re-enabled again after executing the block.
|
7
|
+
def disable_ferret
|
8
|
+
aaf_configuration[:enabled] = false
|
9
|
+
if block_given?
|
10
|
+
yield
|
11
|
+
enable_ferret
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def enable_ferret
|
16
|
+
aaf_configuration[:enabled] = true
|
17
|
+
end
|
18
|
+
|
19
|
+
def ferret_enabled?
|
20
|
+
aaf_configuration[:enabled]
|
21
|
+
end
|
22
|
+
|
23
|
+
# rebuild the index from all data stored for this model.
|
24
|
+
# This is called automatically when no index exists yet.
|
25
|
+
#
|
26
|
+
# When calling this method manually, you can give any additional
|
27
|
+
# model classes that should also go into this index as parameters.
|
28
|
+
# Useful when using the :single_index option.
|
29
|
+
# Note that attributes named the same in different models will share
|
30
|
+
# the same field options in the shared index.
|
31
|
+
def rebuild_index(*models)
|
32
|
+
models << self unless models.include?(self)
|
33
|
+
aaf_index.rebuild_index models.map(&:to_s)
|
34
|
+
index_dir = find_last_index_version(aaf_configuration[:index_base_dir]) unless aaf_configuration[:remote]
|
35
|
+
end
|
36
|
+
|
37
|
+
# re-index a number records specified by the given ids. Use for large
|
38
|
+
# indexing jobs i.e. after modifying a lot of records with Ferret disabled.
|
39
|
+
# Please note that the state of Ferret (enabled or disabled at class or
|
40
|
+
# record level) is not checked by this method, so if you need to do so
|
41
|
+
# (e.g. because of a custom ferret_enabled? implementation), you have to do
|
42
|
+
# so yourself.
|
43
|
+
def bulk_index(*ids)
|
44
|
+
options = Hash === ids.last ? ids.pop : {}
|
45
|
+
ids = ids.first if ids.size == 1 && ids.first.is_a?(Enumerable)
|
46
|
+
aaf_index.bulk_index(ids, options)
|
47
|
+
end
|
48
|
+
|
49
|
+
# true if our db and table appear to be suitable for the mysql fast batch
|
50
|
+
# hack (see
|
51
|
+
# http://weblog.jamisbuck.org/2007/4/6/faking-cursors-in-activerecord)
|
52
|
+
def use_fast_batches?
|
53
|
+
if connection.class.name =~ /Mysql/ && primary_key == 'id' && aaf_configuration[:mysql_fast_batches]
|
54
|
+
logger.info "using mysql specific batched find :all. Turn off with :mysql_fast_batches => false if you encounter problems (i.e. because of non-integer UUIDs in the id column)"
|
55
|
+
true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# runs across all records yielding those to be indexed when the index is rebuilt
|
60
|
+
def records_for_rebuild(batch_size = 1000)
|
61
|
+
transaction do
|
62
|
+
if use_fast_batches?
|
63
|
+
offset = 0
|
64
|
+
while (rows = find :all, :conditions => [ "#{table_name}.id > ?", offset ], :limit => batch_size).any?
|
65
|
+
offset = rows.last.id
|
66
|
+
yield rows, offset
|
67
|
+
end
|
68
|
+
else
|
69
|
+
# sql server adapter won't batch correctly without defined ordering
|
70
|
+
order = "#{primary_key} ASC" if connection.class.name =~ /SQLServer/
|
71
|
+
0.step(self.count, batch_size) do |offset|
|
72
|
+
yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
# yields the records with the given ids, in batches of batch_size
|
79
|
+
def records_for_bulk_index(ids, batch_size = 1000)
|
80
|
+
transaction do
|
81
|
+
offset = 0
|
82
|
+
ids.each_slice(batch_size) do |id_slice|
|
83
|
+
logger.debug "########## slice: #{id_slice.join(',')}"
|
84
|
+
records = find( :all, :conditions => ["id in (?)", id_slice] )
|
85
|
+
logger.debug "########## slice records: #{records.inspect}"
|
86
|
+
#yield records, offset
|
87
|
+
yield find( :all, :conditions => ["id in (?)", id_slice] ), offset
|
88
|
+
offset += batch_size
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Switches this class to a new index located in dir.
|
94
|
+
# Used by the DRb server when switching to a new index version.
|
95
|
+
def index_dir=(dir)
|
96
|
+
logger.debug "changing index dir to #{dir}"
|
97
|
+
aaf_configuration[:index_dir] = aaf_configuration[:ferret][:path] = dir
|
98
|
+
aaf_index.reopen!
|
99
|
+
logger.debug "index dir is now #{dir}"
|
100
|
+
end
|
101
|
+
|
102
|
+
# Retrieve the index instance for this model class. This can either be a
|
103
|
+
# LocalIndex, or a RemoteIndex instance.
|
104
|
+
#
|
105
|
+
# Index instances are stored in a hash, using the index directory
|
106
|
+
# as the key. So model classes sharing a single index will share their
|
107
|
+
# Index object, too.
|
108
|
+
def aaf_index
|
109
|
+
ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
|
110
|
+
end
|
111
|
+
|
112
|
+
# Finds instances by searching the Ferret index. Terms are ANDed by default, use
|
113
|
+
# OR between terms for ORed queries. Or specify +:or_default => true+ in the
|
114
|
+
# +:ferret+ options hash of acts_as_ferret.
|
115
|
+
#
|
116
|
+
# You may either use the +offset+ and +limit+ options to implement your own
|
117
|
+
# pagination logic, or use the +page+ and +per_page+ options to use the
|
118
|
+
# built in pagination support which is compatible with will_paginate's view
|
119
|
+
# helpers. If +page+ and +per_page+ are given, +offset+ and +limit+ will be
|
120
|
+
# ignored.
|
121
|
+
#
|
122
|
+
# == options:
|
123
|
+
# page:: page of search results to retrieve
|
124
|
+
# per_page:: number of search results that are displayed per page
|
125
|
+
# offset:: first hit to retrieve (useful for paging)
|
126
|
+
# limit:: number of hits to retrieve, or :all to retrieve
|
127
|
+
# all results
|
128
|
+
# lazy:: Array of field names whose contents should be read directly
|
129
|
+
# from the index. Those fields have to be marked
|
130
|
+
# +:store => :yes+ in their field options. Give true to get all
|
131
|
+
# stored fields. Note that if you have a shared index, you have
|
132
|
+
# to explicitly state the fields you want to fetch, true won't
|
133
|
+
# work here)
|
134
|
+
# models:: only for single_index scenarios: an Array of other Model classes to
|
135
|
+
# include in this search. Use :all to query all models.
|
136
|
+
# multi:: Specify additional model classes to search through. Each of
|
137
|
+
# these, as well as this class, has to have the
|
138
|
+
# :store_class_name option set to true. This option replaces the
|
139
|
+
# multi_search method.
|
140
|
+
#
|
141
|
+
# +find_options+ is a hash passed on to active_record's find when
|
142
|
+
# retrieving the data from db, useful to i.e. prefetch relationships with
|
143
|
+
# :include or to specify additional filter criteria with :conditions.
|
144
|
+
#
|
145
|
+
# This method returns a +SearchResults+ instance, which really is an Array that has
|
146
|
+
# been decorated with a total_hits attribute holding the total number of hits.
|
147
|
+
# Additionally, SearchResults is compatible with the pagination helper
|
148
|
+
# methods of the will_paginate plugin.
|
149
|
+
#
|
150
|
+
# Please keep in mind that the number of results delivered might be less than
|
151
|
+
# +limit+ if you specify any active record conditions that further limit
|
152
|
+
# the result. Use +limit+ and +offset+ as AR find_options instead.
|
153
|
+
# +page+ and +per_page+ are supposed to work regardless of any
|
154
|
+
# +conitions+ present in +find_options+.
|
155
|
+
def find_with_ferret(q, options = {}, find_options = {})
|
156
|
+
if options[:per_page]
|
157
|
+
options[:page] = options[:page] ? options[:page].to_i : 1
|
158
|
+
limit = options[:per_page]
|
159
|
+
offset = (options[:page] - 1) * limit
|
160
|
+
if find_options[:conditions] && !options[:multi]
|
161
|
+
find_options[:limit] = limit
|
162
|
+
find_options[:offset] = offset
|
163
|
+
options[:limit] = :all
|
164
|
+
options.delete :offset
|
165
|
+
else
|
166
|
+
# do pagination with ferret (or after everything is done in the case
|
167
|
+
# of multi_search)
|
168
|
+
options[:limit] = limit
|
169
|
+
options[:offset] = offset
|
170
|
+
end
|
171
|
+
elsif find_options[:conditions]
|
172
|
+
if options[:multi]
|
173
|
+
# multisearch ignores find_options limit and offset
|
174
|
+
options[:limit] ||= find_options.delete(:limit)
|
175
|
+
options[:offset] ||= find_options.delete(:offset)
|
176
|
+
else
|
177
|
+
# let the db do the limiting and offsetting for single-table searches
|
178
|
+
find_options[:limit] ||= options.delete(:limit)
|
179
|
+
find_options[:offset] ||= options.delete(:offset)
|
180
|
+
options[:limit] = :all
|
181
|
+
end
|
182
|
+
end
|
183
|
+
|
184
|
+
total_hits, result = if options[:multi].blank?
|
185
|
+
find_records_lazy_or_not q, options, find_options
|
186
|
+
else
|
187
|
+
_multi_search q, options.delete(:multi), options, find_options
|
188
|
+
end
|
189
|
+
logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
|
190
|
+
SearchResults.new(result, total_hits, options[:page], options[:per_page])
|
191
|
+
end
|
192
|
+
alias find_by_contents find_with_ferret
|
193
|
+
|
194
|
+
|
195
|
+
|
196
|
+
# Returns the total number of hits for the given query
|
197
|
+
# To count the results of a query across multiple models, specify an array of
|
198
|
+
# class names with the :multi option.
|
199
|
+
#
|
200
|
+
# Note that since we don't query the database here, this method won't deliver
|
201
|
+
# the expected results when used on an AR association.
|
202
|
+
def total_hits(q, options={})
|
203
|
+
if options[:models]
|
204
|
+
# backwards compatibility
|
205
|
+
logger.warn "the :models option of total_hits is deprecated, please use :multi instead"
|
206
|
+
options[:multi] = options[:models]
|
207
|
+
end
|
208
|
+
if models = options[:multi]
|
209
|
+
options[:multi] = add_self_to_model_list_if_necessary(models).map(&:to_s)
|
210
|
+
end
|
211
|
+
aaf_index.total_hits(q, options)
|
212
|
+
end
|
213
|
+
|
214
|
+
# Finds instance model name, ids and scores by contents.
|
215
|
+
# Useful e.g. if you want to search across models or do not want to fetch
|
216
|
+
# all result records (yet).
|
217
|
+
#
|
218
|
+
# Options are the same as for find_by_contents
|
219
|
+
#
|
220
|
+
# A block can be given too, it will be executed with every result:
|
221
|
+
# find_id_by_contents(q, options) do |model, id, score|
|
222
|
+
# id_array << id
|
223
|
+
# scores_by_id[id] = score
|
224
|
+
# end
|
225
|
+
# NOTE: in case a block is given, only the total_hits value will be returned
|
226
|
+
# instead of the [total_hits, results] array!
|
227
|
+
#
|
228
|
+
def find_id_by_contents(q, options = {}, &block)
|
229
|
+
deprecated_options_support(options)
|
230
|
+
aaf_index.find_id_by_contents(q, options, &block)
|
231
|
+
end
|
232
|
+
|
233
|
+
|
234
|
+
# returns an array of hashes, each containing :class_name,
|
235
|
+
# :id and :score for a hit.
|
236
|
+
#
|
237
|
+
# if a block is given, class_name, id and score of each hit will
|
238
|
+
# be yielded, and the total number of hits is returned.
|
239
|
+
def id_multi_search(query, additional_models = [], options = {}, &proc)
|
240
|
+
deprecated_options_support(options)
|
241
|
+
models = add_self_to_model_list_if_necessary(additional_models)
|
242
|
+
aaf_index.id_multi_search(query, models.map(&:to_s), options, &proc)
|
243
|
+
end
|
244
|
+
|
245
|
+
|
246
|
+
protected
|
247
|
+
|
248
|
+
def _multi_search(query, additional_models = [], options = {}, find_options = {})
|
249
|
+
result = []
|
250
|
+
|
251
|
+
if options[:lazy]
|
252
|
+
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
253
|
+
total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
|
254
|
+
result << FerretResult.new(model, id, score, data)
|
255
|
+
end
|
256
|
+
else
|
257
|
+
id_arrays = {}
|
258
|
+
rank = 0
|
259
|
+
|
260
|
+
limit = options.delete(:limit)
|
261
|
+
offset = options.delete(:offset) || 0
|
262
|
+
options[:limit] = :all
|
263
|
+
total_hits = id_multi_search(query, additional_models, options) do |model, id, score, data|
|
264
|
+
id_arrays[model] ||= {}
|
265
|
+
id_arrays[model][id] = [ rank += 1, score ]
|
266
|
+
end
|
267
|
+
result = retrieve_records(id_arrays, find_options)
|
268
|
+
total_hits = result.size if find_options[:conditions]
|
269
|
+
# total_hits += offset if offset
|
270
|
+
if limit && limit != :all
|
271
|
+
result = result[offset..limit+offset-1]
|
272
|
+
end
|
273
|
+
end
|
274
|
+
[total_hits, result]
|
275
|
+
end
|
276
|
+
|
277
|
+
def add_self_to_model_list_if_necessary(models)
|
278
|
+
models = [ models ] unless models.is_a? Array
|
279
|
+
models << self unless models.include?(self)
|
280
|
+
models
|
281
|
+
end
|
282
|
+
|
283
|
+
def find_records_lazy_or_not(q, options = {}, find_options = {})
|
284
|
+
if options[:lazy]
|
285
|
+
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
286
|
+
lazy_find_by_contents q, options
|
287
|
+
else
|
288
|
+
ar_find_by_contents q, options, find_options
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
def ar_find_by_contents(q, options = {}, find_options = {})
|
293
|
+
result_ids = {}
|
294
|
+
total_hits = find_id_by_contents(q, options) do |model, id, score, data|
|
295
|
+
# stores ids, index and score of each hit for later ordering of
|
296
|
+
# results
|
297
|
+
result_ids[id] = [ result_ids.size + 1, score ]
|
298
|
+
end
|
299
|
+
|
300
|
+
result = retrieve_records( { self.name => result_ids }, find_options )
|
301
|
+
|
302
|
+
# count total_hits via sql when using conditions or when we're called
|
303
|
+
# from an ActiveRecord association.
|
304
|
+
if find_options[:conditions] or caller.find{ |call| call =~ %r{active_record/associations} }
|
305
|
+
# chances are the ferret result count is not our total_hits value, so
|
306
|
+
# we correct this here.
|
307
|
+
if options[:limit] != :all || options[:page] || options[:offset] || find_options[:limit] || find_options[:offset]
|
308
|
+
# our ferret result has been limited, so we need to re-run that
|
309
|
+
# search to get the full result set from ferret.
|
310
|
+
result_ids = {}
|
311
|
+
find_id_by_contents(q, options.update(:limit => :all, :offset => 0)) do |model, id, score, data|
|
312
|
+
result_ids[id] = [ result_ids.size + 1, score ]
|
313
|
+
end
|
314
|
+
# Now ask the database for the total size of the final result set.
|
315
|
+
total_hits = count_records( { self.name => result_ids }, find_options )
|
316
|
+
else
|
317
|
+
# what we got from the database is our full result set, so take
|
318
|
+
# it's size
|
319
|
+
total_hits = result.length
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
[ total_hits, result ]
|
324
|
+
end
|
325
|
+
|
326
|
+
def lazy_find_by_contents(q, options = {})
|
327
|
+
result = []
|
328
|
+
total_hits = find_id_by_contents(q, options) do |model, id, score, data|
|
329
|
+
result << FerretResult.new(model, id, score, data)
|
330
|
+
end
|
331
|
+
[ total_hits, result ]
|
332
|
+
end
|
333
|
+
|
334
|
+
|
335
|
+
def model_find(model, id, find_options = {})
|
336
|
+
model.constantize.find(id, find_options)
|
337
|
+
end
|
338
|
+
|
339
|
+
# retrieves search result records from a data structure like this:
|
340
|
+
# { 'Model1' => { '1' => [ rank, score ], '2' => [ rank, score ] }
|
341
|
+
#
|
342
|
+
# TODO: in case of STI AR will filter out hits from other
|
343
|
+
# classes for us, but this
|
344
|
+
# will lead to less results retrieved --> scoping of ferret query
|
345
|
+
# to self.class is still needed.
|
346
|
+
# from the ferret ML (thanks Curtis Hatter)
|
347
|
+
# > I created a method in my base STI class so I can scope my query. For scoping
|
348
|
+
# > I used something like the following line:
|
349
|
+
# >
|
350
|
+
# > query << " role:#{self.class.eql?(Contents) '*' : self.class}"
|
351
|
+
# >
|
352
|
+
# > Though you could make it more generic by simply asking
|
353
|
+
# > "self.descends_from_active_record?" which is how rails decides if it should
|
354
|
+
# > scope your "find" query for STI models. You can check out "base.rb" in
|
355
|
+
# > activerecord to see that.
|
356
|
+
# but maybe better do the scoping in find_id_by_contents...
|
357
|
+
def retrieve_records(id_arrays, find_options = {})
|
358
|
+
result = []
|
359
|
+
# get objects for each model
|
360
|
+
id_arrays.each do |model, id_array|
|
361
|
+
next if id_array.empty?
|
362
|
+
begin
|
363
|
+
model = model.constantize
|
364
|
+
rescue
|
365
|
+
raise "Please use ':store_class_name => true' if you want to use multi_search.\n#{$!}"
|
366
|
+
end
|
367
|
+
|
368
|
+
# merge conditions
|
369
|
+
conditions = combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)",
|
370
|
+
id_array.keys ],
|
371
|
+
find_options[:conditions])
|
372
|
+
|
373
|
+
# check for include association that might only exist on some models in case of multi_search
|
374
|
+
filtered_include_options = []
|
375
|
+
if include_options = find_options[:include]
|
376
|
+
include_options = [ include_options ] unless include_options.respond_to?(:each)
|
377
|
+
include_options.each do |include_option|
|
378
|
+
filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
|
379
|
+
end
|
380
|
+
end
|
381
|
+
filtered_include_options = nil if filtered_include_options.empty?
|
382
|
+
|
383
|
+
# fetch
|
384
|
+
tmp_result = model.find(:all, find_options.merge(:conditions => conditions,
|
385
|
+
:include => filtered_include_options))
|
386
|
+
|
387
|
+
# set scores and rank
|
388
|
+
tmp_result.each do |record|
|
389
|
+
record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
|
390
|
+
end
|
391
|
+
# merge with result array
|
392
|
+
result.concat tmp_result
|
393
|
+
end
|
394
|
+
|
395
|
+
# order results as they were found by ferret, unless an AR :order
|
396
|
+
# option was given
|
397
|
+
result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
|
398
|
+
return result
|
399
|
+
end
|
400
|
+
|
401
|
+
def count_records(id_arrays, find_options = {})
|
402
|
+
count_options = find_options.dup
|
403
|
+
count_options.delete :limit
|
404
|
+
count_options.delete :offset
|
405
|
+
count = 0
|
406
|
+
id_arrays.each do |model, id_array|
|
407
|
+
next if id_array.empty?
|
408
|
+
begin
|
409
|
+
model = model.constantize
|
410
|
+
# merge conditions
|
411
|
+
conditions = combine_conditions([ "#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
|
412
|
+
find_options[:conditions])
|
413
|
+
opts = find_options.merge :conditions => conditions
|
414
|
+
opts.delete :limit; opts.delete :offset
|
415
|
+
count += model.count opts
|
416
|
+
rescue TypeError
|
417
|
+
raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
|
418
|
+
end
|
419
|
+
end
|
420
|
+
count
|
421
|
+
end
|
422
|
+
|
423
|
+
def deprecated_options_support(options)
|
424
|
+
if options[:num_docs]
|
425
|
+
logger.warn ":num_docs is deprecated, use :limit instead!"
|
426
|
+
options[:limit] ||= options[:num_docs]
|
427
|
+
end
|
428
|
+
if options[:first_doc]
|
429
|
+
logger.warn ":first_doc is deprecated, use :offset instead!"
|
430
|
+
options[:offset] ||= options[:first_doc]
|
431
|
+
end
|
432
|
+
end
|
433
|
+
|
434
|
+
# creates a new Index instance.
|
435
|
+
def create_index_instance
|
436
|
+
if aaf_configuration[:remote]
|
437
|
+
RemoteIndex
|
438
|
+
elsif aaf_configuration[:single_index]
|
439
|
+
SharedIndex
|
440
|
+
else
|
441
|
+
LocalIndex
|
442
|
+
end.new(aaf_configuration)
|
443
|
+
end
|
444
|
+
|
445
|
+
# combine our conditions with those given by user, if any
|
446
|
+
def combine_conditions(conditions, additional_conditions = [])
|
447
|
+
conditions.tap do
|
448
|
+
if additional_conditions && additional_conditions.any?
|
449
|
+
cust_opts = additional_conditions.respond_to?(:shift) ? additional_conditions.dup : [ additional_conditions ]
|
450
|
+
conditions.first << " and " << cust_opts.shift
|
451
|
+
conditions.concat(cust_opts)
|
452
|
+
end
|
453
|
+
end
|
454
|
+
end
|
455
|
+
|
456
|
+
end
|
457
|
+
|
458
|
+
end
|
459
|
+
|