acts_as_ferret 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/act_methods.rb +81 -33
- data/lib/acts_as_ferret.rb +42 -28
- data/lib/class_methods.rb +123 -52
- data/lib/ferret_cap_tasks.rb +21 -0
- data/lib/ferret_extensions.rb +81 -0
- data/lib/ferret_server.rb +111 -69
- data/lib/index.rb +1 -1
- data/lib/local_index.rb +26 -74
- data/lib/more_like_this.rb +15 -7
- data/lib/multi_index.rb +2 -3
- data/rakefile +3 -4
- data/script/ferret_start +9 -4
- data/script/ferret_stop +5 -1
- metadata +19 -17
data/lib/act_methods.rb
CHANGED
@@ -37,33 +37,54 @@ module ActsAsFerret #:nodoc:
|
|
37
37
|
# this to true. the model class name will be stored in a keyword field
|
38
38
|
# named class_name
|
39
39
|
#
|
40
|
-
#
|
41
|
-
#
|
42
|
-
#
|
40
|
+
# reindex_batch_size:: reindexing is done in batches of this size, default is 1000
|
41
|
+
#
|
42
|
+
# ferret:: Hash of Options that directly influence the way the Ferret engine works. You
|
43
|
+
# can use most of the options the Ferret::I class accepts here, too. Among the
|
44
|
+
# more useful are:
|
45
|
+
#
|
46
|
+
# or_default:: whether query terms are required by
|
47
|
+
# default (the default, false), or not (true)
|
43
48
|
#
|
44
|
-
#
|
45
|
-
#
|
49
|
+
# analyzer:: the analyzer to use for query parsing (default: nil,
|
50
|
+
# which means the ferret StandardAnalyzer gets used)
|
46
51
|
#
|
47
|
-
#
|
48
|
-
#
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
#
|
55
|
-
#
|
56
|
-
#
|
52
|
+
# default_field:: use to set one or more fields that are searched for query terms
|
53
|
+
# that don't have an explicit field list. This list should *not*
|
54
|
+
# contain any untokenized fields. If it does, you're asking
|
55
|
+
# for trouble (i.e. not getting results for queries having
|
56
|
+
# stop words in them). Aaf by default initializes the default field
|
57
|
+
# list to contain all tokenized fields. If you use :single_index => true,
|
58
|
+
# you really should set this option specifying your default field
|
59
|
+
# list (which should be equal in all your classes sharing the index).
|
60
|
+
# Otherwise you might get incorrect search results and you won't get
|
61
|
+
# any lazy loading of stored field data.
|
57
62
|
#
|
63
|
+
# For downwards compatibility reasons you can also specify the Ferret options in the
|
64
|
+
# last Hash argument.
|
58
65
|
def acts_as_ferret(options={}, ferret_options={})
|
59
66
|
|
60
67
|
# force local mode if running *inside* the Ferret server - somewhere the
|
61
68
|
# real indexing has to be done after all :-)
|
62
|
-
|
69
|
+
# Usually the automatic detection of server mode works fine, however if you
|
70
|
+
# require your model classes in environment.rb they will get loaded before the
|
71
|
+
# DRb server is started, so this code is executed too early and detection won't
|
72
|
+
# work. In this case you'll get endless loops resulting in "stack level too deep"
|
73
|
+
# errors.
|
74
|
+
# To get around this, start the server with the environment variable
|
75
|
+
# FERRET_USE_LOCAL_INDEX set to '1'.
|
76
|
+
logger.debug "Asked for a remote server ? #{options[:remote].inspect}, ENV[\"FERRET_USE_LOCAL_INDEX\"] is #{ENV["FERRET_USE_LOCAL_INDEX"].inspect}, looks like we are#{ActsAsFerret::Remote::Server.running || ENV['FERRET_USE_LOCAL_INDEX'] ? '' : ' not'} the server"
|
77
|
+
options.delete(:remote) if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
|
63
78
|
|
64
79
|
if options[:remote] && options[:remote] !~ /^druby/
|
65
80
|
# read server location from config/ferret_server.yml
|
66
|
-
options[:remote] = ActsAsFerret::Remote::Config.load("#{RAILS_ROOT}/config/ferret_server.yml")[:uri]
|
81
|
+
options[:remote] = ActsAsFerret::Remote::Config.load("#{RAILS_ROOT}/config/ferret_server.yml")[:uri] rescue nil
|
82
|
+
end
|
83
|
+
|
84
|
+
if options[:remote]
|
85
|
+
logger.debug "Will use remote index server which should be available at #{options[:remote]}"
|
86
|
+
else
|
87
|
+
logger.debug "Will use local index."
|
67
88
|
end
|
68
89
|
|
69
90
|
|
@@ -87,31 +108,38 @@ module ActsAsFerret #:nodoc:
|
|
87
108
|
:name => self.table_name,
|
88
109
|
:class_name => self.name,
|
89
110
|
:single_index => false,
|
90
|
-
:
|
91
|
-
|
92
|
-
|
93
|
-
:default_field => nil # will be set later on
|
94
|
-
#:max_clauses => 512,
|
95
|
-
#:analyzer => Ferret::Analysis::StandardAnalyzer.new,
|
96
|
-
# :wild_card_downcase => true
|
97
|
-
}
|
111
|
+
:reindex_batch_size => 1000,
|
112
|
+
:ferret => {}, # Ferret config Hash
|
113
|
+
:ferret_fields => {} # list of indexed fields that will be filled later
|
98
114
|
}
|
99
115
|
|
100
116
|
# merge aaf options with args
|
101
117
|
aaf_configuration.update(options) if options.is_a?(Hash)
|
102
|
-
|
103
|
-
# list of indexed fields will be filled later
|
104
|
-
aaf_configuration[:ferret_fields] = Hash.new
|
105
|
-
|
106
118
|
# apply appropriate settings for shared index
|
107
119
|
if aaf_configuration[:single_index]
|
108
120
|
aaf_configuration[:index_dir] = "#{ActsAsFerret::index_dir}/shared"
|
109
121
|
aaf_configuration[:store_class_name] = true
|
110
122
|
end
|
111
123
|
|
112
|
-
#
|
124
|
+
# set ferret default options
|
125
|
+
aaf_configuration[:ferret].reverse_merge!( :or_default => false,
|
126
|
+
:handle_parse_errors => true,
|
127
|
+
:default_field => nil # will be set later on
|
128
|
+
#:max_clauses => 512,
|
129
|
+
#:analyzer => Ferret::Analysis::StandardAnalyzer.new,
|
130
|
+
# :wild_card_downcase => true
|
131
|
+
)
|
132
|
+
|
133
|
+
# merge ferret options with those from second parameter hash
|
113
134
|
aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash)
|
114
135
|
|
136
|
+
unless options[:remote]
|
137
|
+
ActsAsFerret::ensure_directory aaf_configuration[:index_dir]
|
138
|
+
aaf_configuration[:index_base_dir] = aaf_configuration[:index_dir]
|
139
|
+
aaf_configuration[:index_dir] = find_last_index_version(aaf_configuration[:index_dir])
|
140
|
+
logger.debug "using index in #{aaf_configuration[:index_dir]}"
|
141
|
+
end
|
142
|
+
|
115
143
|
# these properties are somewhat vital to the plugin and shouldn't
|
116
144
|
# be overwritten by the user:
|
117
145
|
aaf_configuration[:ferret].update(
|
@@ -128,8 +156,6 @@ module ActsAsFerret #:nodoc:
|
|
128
156
|
add_fields(aaf_configuration[:additional_fields])
|
129
157
|
end
|
130
158
|
|
131
|
-
ActsAsFerret::ensure_directory aaf_configuration[:index_dir] unless options[:remote]
|
132
|
-
|
133
159
|
# now that all fields have been added, we can initialize the default
|
134
160
|
# field list to be used by the query parser.
|
135
161
|
# It will include all content fields *not* marked as :untokenized.
|
@@ -151,11 +177,32 @@ module ActsAsFerret #:nodoc:
|
|
151
177
|
end
|
152
178
|
end
|
153
179
|
logger.info "default field list: #{aaf_configuration[:ferret][:default_field].inspect}"
|
180
|
+
|
181
|
+
if options[:remote]
|
182
|
+
aaf_index.ensure_index_exists
|
183
|
+
end
|
154
184
|
end
|
155
185
|
|
156
186
|
|
157
187
|
protected
|
158
188
|
|
189
|
+
# find the most recent version of an index
|
190
|
+
def find_last_index_version(basedir)
|
191
|
+
# check for versioned index
|
192
|
+
versions = Dir.entries(basedir).select do |f|
|
193
|
+
dir = File.join(basedir, f)
|
194
|
+
File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
|
195
|
+
end
|
196
|
+
if versions.any?
|
197
|
+
# select latest version
|
198
|
+
versions.sort!
|
199
|
+
File.join basedir, versions.last
|
200
|
+
else
|
201
|
+
basedir
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
|
159
206
|
# helper that defines a method that adds the given field to a ferret
|
160
207
|
# document instance
|
161
208
|
def define_to_field_method(field, options = {})
|
@@ -164,6 +211,7 @@ module ActsAsFerret #:nodoc:
|
|
164
211
|
:index => :yes,
|
165
212
|
:term_vector => :with_positions_offsets,
|
166
213
|
:boost => 1.0 )
|
214
|
+
options[:term_vector] = :no if options[:index] == :no
|
167
215
|
aaf_configuration[:ferret_fields][field] = options
|
168
216
|
define_method("#{field}_to_ferret".to_sym) do
|
169
217
|
begin
|
@@ -178,7 +226,7 @@ module ActsAsFerret #:nodoc:
|
|
178
226
|
end
|
179
227
|
|
180
228
|
def add_fields(field_config)
|
181
|
-
if field_config.
|
229
|
+
if field_config.is_a? Hash
|
182
230
|
field_config.each_pair do |key,val|
|
183
231
|
define_to_field_method(key,val)
|
184
232
|
end
|
data/lib/acts_as_ferret.rb
CHANGED
@@ -23,6 +23,7 @@ require 'active_record'
|
|
23
23
|
require 'set'
|
24
24
|
require 'ferret'
|
25
25
|
|
26
|
+
require 'ferret_extensions'
|
26
27
|
require 'act_methods'
|
27
28
|
require 'class_methods'
|
28
29
|
require 'shared_index_class_methods'
|
@@ -75,6 +76,7 @@ module ActsAsFerret
|
|
75
76
|
# key is the index directory.
|
76
77
|
@@ferret_indexes = Hash.new
|
77
78
|
def self.ferret_indexes; @@ferret_indexes end
|
79
|
+
|
78
80
|
|
79
81
|
# decorator that adds a total_hits accessor to search result arrays
|
80
82
|
class SearchResults
|
@@ -92,7 +94,7 @@ module ActsAsFerret
|
|
92
94
|
end
|
93
95
|
|
94
96
|
def self.ensure_directory(dir)
|
95
|
-
FileUtils.mkdir_p dir unless File.directory? dir
|
97
|
+
FileUtils.mkdir_p dir unless (File.directory?(dir) || File.symlink?(dir))
|
96
98
|
end
|
97
99
|
|
98
100
|
# make sure the default index base dir exists. by default, all indexes are created
|
@@ -110,37 +112,49 @@ module ActsAsFerret
|
|
110
112
|
base.extend(ClassMethods)
|
111
113
|
end
|
112
114
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
115
|
+
# builds a FieldInfos instance for creation of an index containing fields
|
116
|
+
# for the given model classes.
|
117
|
+
def self.field_infos(models)
|
118
|
+
# default attributes for fields
|
119
|
+
fi = Ferret::Index::FieldInfos.new(:store => :no,
|
120
|
+
:index => :yes,
|
121
|
+
:term_vector => :no,
|
122
|
+
:boost => 1.0)
|
123
|
+
# primary key
|
124
|
+
fi.add_field(:id, :store => :yes, :index => :untokenized)
|
125
|
+
fields = {}
|
126
|
+
have_class_name = false
|
127
|
+
models.each do |model|
|
128
|
+
fields.update(model.aaf_configuration[:ferret_fields])
|
129
|
+
# class_name
|
130
|
+
if !have_class_name && model.aaf_configuration[:store_class_name]
|
131
|
+
fi.add_field(:class_name, :store => :yes, :index => :untokenized)
|
132
|
+
have_class_name = true
|
133
|
+
end
|
134
|
+
end
|
135
|
+
fields.each_pair do |field, options|
|
136
|
+
fi.add_field(field, { :store => :no,
|
137
|
+
:index => :yes }.update(options))
|
130
138
|
end
|
131
|
-
|
139
|
+
return fi
|
132
140
|
end
|
133
|
-
end
|
134
141
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
142
|
+
def self.close_multi_indexes
|
143
|
+
# close combined index readers, just in case
|
144
|
+
# this seems to fix a strange test failure that seems to relate to a
|
145
|
+
# multi_index looking at an old version of the content_base index.
|
146
|
+
multi_indexes.each_pair do |key, index|
|
147
|
+
# puts "#{key} -- #{self.name}"
|
148
|
+
# TODO only close those where necessary (watch inheritance, where
|
149
|
+
# self.name is base class of a class where key is made from)
|
150
|
+
index.close #if key =~ /#{self.name}/
|
151
|
+
end
|
152
|
+
multi_indexes.clear
|
139
153
|
end
|
140
154
|
|
141
|
-
def self._load(string)
|
142
|
-
raise "invalid value: #{string}" unless string =~ /^(\w+):<(\w+)>(\!)?$/
|
143
|
-
new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
|
144
|
-
end
|
145
155
|
end
|
146
156
|
|
157
|
+
# include acts_as_ferret method into ActiveRecord::Base
|
158
|
+
ActiveRecord::Base.extend ActsAsFerret::ActMethods
|
159
|
+
|
160
|
+
|
data/lib/class_methods.rb
CHANGED
@@ -12,8 +12,38 @@ module ActsAsFerret
|
|
12
12
|
# the same field options in the shared index.
|
13
13
|
def rebuild_index(*models)
|
14
14
|
models << self unless models.include?(self)
|
15
|
-
aaf_index.rebuild_index
|
16
|
-
|
15
|
+
aaf_index.rebuild_index models.map(&:to_s)
|
16
|
+
index_dir = find_last_index_version(aaf_configuration[:index_base_dir]) unless aaf_configuration[:remote]
|
17
|
+
end
|
18
|
+
|
19
|
+
# runs across all records yielding those to be indexed when the index is rebuilt
|
20
|
+
def records_for_rebuild(batch_size = 1000)
|
21
|
+
transaction do
|
22
|
+
if connection.class.name =~ /Mysql/ && primary_key == 'id'
|
23
|
+
logger.info "using mysql specific batched find :all"
|
24
|
+
offset = 0
|
25
|
+
while (rows = find :all, :conditions => ["id > ?", offset ], :limit => batch_size).any?
|
26
|
+
offset = rows.last.id
|
27
|
+
yield rows, offset
|
28
|
+
end
|
29
|
+
else
|
30
|
+
# sql server adapter won't batch correctly without defined ordering
|
31
|
+
order = "#{primary_key} ASC" if connection.class.name =~ /SQLServer/
|
32
|
+
0.step(self.count, batch_size) do |offset|
|
33
|
+
yield find( :all, :limit => batch_size, :offset => offset, :order => order ), offset
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Switches this class to a new index located in dir.
|
40
|
+
# Used by the DRb server when switching to a new index version.
|
41
|
+
def index_dir=(dir)
|
42
|
+
logger.debug "changing index dir to #{dir}"
|
43
|
+
aaf_configuration[:index_dir] = aaf_configuration[:ferret][:path] = dir
|
44
|
+
aaf_index.reopen!
|
45
|
+
logger.debug "index dir is now #{dir}"
|
46
|
+
end
|
17
47
|
|
18
48
|
# Retrieve the index instance for this model class. This can either be a
|
19
49
|
# LocalIndex, or a RemoteIndex instance.
|
@@ -25,41 +55,49 @@ module ActsAsFerret
|
|
25
55
|
ActsAsFerret::ferret_indexes[aaf_configuration[:index_dir]] ||= create_index_instance
|
26
56
|
end
|
27
57
|
|
28
|
-
# Finds instances by
|
29
|
-
#
|
30
|
-
# options
|
58
|
+
# Finds instances by searching the Ferret index. Terms are ANDed by default, use
|
59
|
+
# OR between terms for ORed queries. Or specify +:or_default => true+ in the
|
60
|
+
# +:ferret+ options hash of acts_as_ferret.
|
61
|
+
#
|
62
|
+
# == options:
|
31
63
|
# offset:: first hit to retrieve (useful for paging)
|
32
64
|
# limit:: number of hits to retrieve, or :all to retrieve
|
33
65
|
# all results
|
34
66
|
# lazy:: Array of field names whose contents should be read directly
|
35
67
|
# from the index. Those fields have to be marked
|
36
|
-
#
|
37
|
-
# stored fields
|
38
|
-
# explicitly state the fields you want to fetch, true won't
|
39
|
-
# work)
|
68
|
+
# +:store => :yes+ in their field options. Give true to get all
|
69
|
+
# stored fields. Note that if you have a shared index, you have
|
70
|
+
# to explicitly state the fields you want to fetch, true won't
|
71
|
+
# work here)
|
40
72
|
# models:: only for single_index scenarios: an Array of other Model classes to
|
41
73
|
# include in this search. Use :all to query all models.
|
42
74
|
#
|
43
|
-
# find_options is a hash passed on to active_record's find when
|
44
|
-
# retrieving the data from db, useful to i.e. prefetch relationships
|
75
|
+
# +find_options+ is a hash passed on to active_record's find when
|
76
|
+
# retrieving the data from db, useful to i.e. prefetch relationships with
|
77
|
+
# :include or to specify additional filter criteria with :conditions.
|
78
|
+
#
|
79
|
+
# This method returns a +SearchResults+ instance, which really is an Array that has
|
80
|
+
# been decorated with a total_hits attribute holding the total number of hits.
|
45
81
|
#
|
46
|
-
# this method returns a SearchResults instance, which really is an Array that has
|
47
|
-
# been decorated with a total_hits accessor that delivers the total
|
48
|
-
# number of hits (including those not fetched because of a low num_docs
|
49
|
-
# value).
|
50
82
|
# Please keep in mind that the number of total hits might be wrong if you specify
|
51
83
|
# both ferret options and active record find_options that somehow limit the result
|
52
|
-
# set (e.g.
|
53
|
-
def
|
84
|
+
# set (e.g. +:num_docs+ and some +:conditions+).
|
85
|
+
def find_with_ferret(q, options = {}, find_options = {})
|
54
86
|
total_hits, result = find_records_lazy_or_not q, options, find_options
|
55
87
|
logger.debug "Query: #{q}\ntotal hits: #{total_hits}, results delivered: #{result.size}"
|
56
88
|
return SearchResults.new(result, total_hits)
|
57
89
|
end
|
90
|
+
alias find_by_contents find_with_ferret
|
58
91
|
|
59
92
|
|
60
93
|
|
61
|
-
#
|
94
|
+
# Returns the total number of hits for the given query
|
95
|
+
# To count the results of a multi_search query, specify an array of
|
96
|
+
# class names with the :models option.
|
62
97
|
def total_hits(q, options={})
|
98
|
+
if models = options[:models]
|
99
|
+
options[:models] = add_self_to_model_list_if_necessary(models).map(&:to_s)
|
100
|
+
end
|
63
101
|
aaf_index.total_hits(q, options)
|
64
102
|
end
|
65
103
|
|
@@ -81,7 +119,7 @@ module ActsAsFerret
|
|
81
119
|
deprecated_options_support(options)
|
82
120
|
aaf_index.find_id_by_contents(q, options, &block)
|
83
121
|
end
|
84
|
-
|
122
|
+
|
85
123
|
# requires the store_class_name option of acts_as_ferret to be true
|
86
124
|
# for all models queried this way.
|
87
125
|
def multi_search(query, additional_models = [], options = {}, find_options = {})
|
@@ -112,14 +150,18 @@ module ActsAsFerret
|
|
112
150
|
# be yielded, and the total number of hits is returned.
|
113
151
|
def id_multi_search(query, additional_models = [], options = {}, &proc)
|
114
152
|
deprecated_options_support(options)
|
115
|
-
additional_models =
|
116
|
-
additional_models << self
|
153
|
+
additional_models = add_self_to_model_list_if_necessary(additional_models)
|
117
154
|
aaf_index.id_multi_search(query, additional_models.map(&:to_s), options, &proc)
|
118
155
|
end
|
119
156
|
|
120
157
|
|
121
158
|
protected
|
122
159
|
|
160
|
+
def add_self_to_model_list_if_necessary(models)
|
161
|
+
models = [ models ] unless models.is_a? Array
|
162
|
+
models << self unless models.include?(self)
|
163
|
+
end
|
164
|
+
|
123
165
|
def find_records_lazy_or_not(q, options = {}, find_options = {})
|
124
166
|
if options[:lazy]
|
125
167
|
logger.warn "find_options #{find_options} are ignored because :lazy => true" unless find_options.empty?
|
@@ -138,12 +180,20 @@ module ActsAsFerret
|
|
138
180
|
end
|
139
181
|
|
140
182
|
result = retrieve_records( { self.name => result_ids }, find_options )
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
183
|
+
|
184
|
+
if find_options[:conditions]
|
185
|
+
if options[:limit] != :all
|
186
|
+
# correct result size if the user specified conditions
|
187
|
+
# wenn conditions: options[:limit] != :all --> ferret-query mit :all wiederholen und select count machen
|
188
|
+
result_ids = {}
|
189
|
+
find_id_by_contents(q, options.update(:limit => :all)) do |model, id, score, data|
|
190
|
+
result_ids[id] = [ result_ids.size + 1, score ]
|
191
|
+
end
|
192
|
+
total_hits = count_records( { self.name => result_ids }, find_options )
|
193
|
+
else
|
194
|
+
total_hits = result.length
|
195
|
+
end
|
196
|
+
end
|
147
197
|
|
148
198
|
[ total_hits, result ]
|
149
199
|
end
|
@@ -186,22 +236,56 @@ module ActsAsFerret
|
|
186
236
|
next if id_array.empty?
|
187
237
|
begin
|
188
238
|
model = model.constantize
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
239
|
+
rescue
|
240
|
+
raise "Please use ':store_class_name => true' if you want to use multi_search.\n#{$!}"
|
241
|
+
end
|
242
|
+
|
243
|
+
# check for include association that might only exist on some models in case of multi_search
|
244
|
+
filtered_include_options = []
|
245
|
+
if include_options = find_options[:include]
|
246
|
+
include_options.each do |include_option|
|
247
|
+
filtered_include_options << include_option if model.reflections.has_key?(include_option.is_a?(Hash) ? include_option.keys[0].to_sym : include_option.to_sym)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
filtered_include_options=nil if filtered_include_options.empty?
|
251
|
+
|
252
|
+
# fetch
|
253
|
+
tmp_result = nil
|
254
|
+
model.send(:with_scope, :find => find_options) do
|
255
|
+
tmp_result = model.find( :all, :conditions => [
|
256
|
+
"#{model.table_name}.#{model.primary_key} in (?)", id_array.keys ],
|
257
|
+
:include => filtered_include_options )
|
258
|
+
end
|
259
|
+
|
260
|
+
# set scores and rank
|
261
|
+
tmp_result.each do |record|
|
262
|
+
record.ferret_rank, record.ferret_score = id_array[record.id.to_s]
|
263
|
+
end
|
264
|
+
# merge with result array
|
265
|
+
result.concat tmp_result
|
266
|
+
end
|
267
|
+
|
268
|
+
# order results as they were found by ferret, unless an AR :order
|
269
|
+
# option was given
|
270
|
+
result.sort! { |a, b| a.ferret_rank <=> b.ferret_rank } unless find_options[:order]
|
271
|
+
return result
|
272
|
+
end
|
273
|
+
|
274
|
+
def count_records(id_arrays, find_options = {})
|
275
|
+
count = 0
|
276
|
+
id_arrays.each do |model, id_array|
|
277
|
+
next if id_array.empty?
|
278
|
+
begin
|
279
|
+
model = model.constantize
|
280
|
+
model.send(:with_scope, :find => find_options) do
|
281
|
+
count += model.count(:conditions => [ "#{model.table_name}.#{model.primary_key} in (?)",
|
282
|
+
id_array.keys ])
|
197
283
|
end
|
198
|
-
# merge with result array
|
199
|
-
result.concat tmp_result
|
200
284
|
rescue TypeError
|
201
285
|
raise "#{model} must use :store_class_name option if you want to use multi_search against it.\n#{$!}"
|
202
286
|
end
|
203
287
|
end
|
204
|
-
|
288
|
+
count
|
205
289
|
end
|
206
290
|
|
207
291
|
def deprecated_options_support(options)
|
@@ -215,20 +299,7 @@ module ActsAsFerret
|
|
215
299
|
end
|
216
300
|
end
|
217
301
|
|
218
|
-
#
|
219
|
-
def combine_conditions(conditions, *additional_conditions)
|
220
|
-
returning conditions do
|
221
|
-
if additional_conditions.any?
|
222
|
-
cust_opts = additional_conditions.dup.flatten
|
223
|
-
conditions.first << " and " << cust_opts.shift
|
224
|
-
conditions.concat(cust_opts)
|
225
|
-
end
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
# creates a new Index::Index instance. Before that, a check is done
|
230
|
-
# to see if the index exists in the file system. If not, index rebuild
|
231
|
-
# from all model data retrieved by find(:all) is triggered.
|
302
|
+
# creates a new Index instance.
|
232
303
|
def create_index_instance
|
233
304
|
if aaf_configuration[:remote]
|
234
305
|
RemoteIndex
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# Ferret DRb server Capistrano tasks
|
2
|
+
# Usage:
|
3
|
+
# Add require 'vendor/plugins/acts_as_ferret/lib/ferret_cap_tasks' to your
|
4
|
+
# config/deploy.rb
|
5
|
+
# call ferret.restart where you restart your Mongrels.
|
6
|
+
# ferret.stop and ferret.start are available, too.
|
7
|
+
module FerretCapTasks
|
8
|
+
def start
|
9
|
+
run "cd #{current_path}; RAILS_ENV=production script/ferret_start"
|
10
|
+
end
|
11
|
+
|
12
|
+
def stop
|
13
|
+
run "cd #{current_path}; RAILS_ENV=production script/ferret_stop"
|
14
|
+
end
|
15
|
+
|
16
|
+
def restart
|
17
|
+
stop
|
18
|
+
start
|
19
|
+
end
|
20
|
+
end
|
21
|
+
Capistrano.plugin :ferret, FerretCapTasks
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Ferret
|
2
|
+
|
3
|
+
|
4
|
+
class Index::Index
|
5
|
+
attr_accessor :batch_size
|
6
|
+
attr_accessor :logger
|
7
|
+
|
8
|
+
def index_models(models)
|
9
|
+
models.each { |model| index_model model }
|
10
|
+
flush
|
11
|
+
optimize
|
12
|
+
close
|
13
|
+
ActsAsFerret::close_multi_indexes
|
14
|
+
end
|
15
|
+
|
16
|
+
def index_model(model)
|
17
|
+
@batch_size ||= 0
|
18
|
+
work_done = 0
|
19
|
+
batch_time = 0
|
20
|
+
logger.info "reindexing model #{model.name}"
|
21
|
+
|
22
|
+
model_count = model.count.to_f
|
23
|
+
model.records_for_rebuild(@batch_size) do |records, offset|
|
24
|
+
#records = [ records ] unless records.is_a?(Array)
|
25
|
+
batch_time = measure_time {
|
26
|
+
records.each { |rec| self << rec.to_doc if rec.ferret_enabled?(true) }
|
27
|
+
}.to_f
|
28
|
+
work_done = offset.to_f / model_count * 100.0 if model_count > 0
|
29
|
+
remaining_time = ( batch_time / @batch_size ) * ( model_count - offset + @batch_size )
|
30
|
+
logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def measure_time
|
35
|
+
t1 = Time.now
|
36
|
+
yield
|
37
|
+
Time.now - t1
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# add marshalling support to SortFields
|
44
|
+
class Search::SortField
|
45
|
+
def _dump(depth)
|
46
|
+
to_s
|
47
|
+
end
|
48
|
+
|
49
|
+
def self._load(string)
|
50
|
+
case string
|
51
|
+
when /<DOC(_ID)?>!/ : Ferret::Search::SortField::DOC_ID_REV
|
52
|
+
when /<DOC(_ID)?>/ : Ferret::Search::SortField::DOC_ID
|
53
|
+
when '<SCORE>!' : Ferret::Search::SortField::SCORE_REV
|
54
|
+
when '<SCORE>' : Ferret::Search::SortField::SCORE
|
55
|
+
when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
|
56
|
+
else raise "invalid value: #{string}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# add marshalling support to Sort
|
62
|
+
class Search::Sort
|
63
|
+
def _dump(depth)
|
64
|
+
to_s
|
65
|
+
end
|
66
|
+
|
67
|
+
def self._load(string)
|
68
|
+
# we exclude the last <DOC> sorting as it is appended by new anyway
|
69
|
+
if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
|
70
|
+
sort_fields = $1.split(',').map do |value|
|
71
|
+
value.strip!
|
72
|
+
Ferret::Search::SortField._load value unless value.blank?
|
73
|
+
end
|
74
|
+
new sort_fields.compact
|
75
|
+
else
|
76
|
+
raise "invalid value: #{string}"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|
data/lib/ferret_server.rb
CHANGED
@@ -6,84 +6,126 @@ require 'erb'
|
|
6
6
|
|
7
7
|
module ActsAsFerret
|
8
8
|
|
9
|
-
module Remote
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
9
|
+
module Remote
|
10
|
+
|
11
|
+
module Config
|
12
|
+
class << self
|
13
|
+
DEFAULTS = {
|
14
|
+
'host' => 'localhost',
|
15
|
+
'port' => '9009'
|
16
|
+
}
|
17
|
+
# read connection settings from config file
|
18
|
+
def load(file = "#{RAILS_ROOT}/config/ferret_server.yml")
|
19
|
+
config = DEFAULTS.merge(YAML.load(ERB.new(IO.read(file)).result))
|
20
|
+
if config = config[RAILS_ENV]
|
21
|
+
config[:uri] = "druby://#{config['host']}:#{config['port']}"
|
22
|
+
return config
|
23
|
+
end
|
24
|
+
{}
|
23
25
|
end
|
24
26
|
end
|
25
27
|
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# This class acts as a drb server listening for indexing and
|
29
|
-
# search requests from models declared to 'acts_as_ferret :remote => true'
|
30
|
-
#
|
31
|
-
# Usage:
|
32
|
-
# - copy doc/ferret_server.yml to RAILS_ROOT/config and modify to suit
|
33
|
-
# your needs.
|
34
|
-
# - run script/ferret_server (in the plugin directory) via script/runner:
|
35
|
-
# RAILS_ENV=production script/runner vendor/plugins/acts_as_ferret/script/ferret_server
|
36
|
-
#
|
37
|
-
# TODO: automate installation of files to script/ and config/
|
38
|
-
class Server
|
39
|
-
|
40
|
-
cattr_accessor :running
|
41
|
-
|
42
|
-
def self.start(uri = nil)
|
43
|
-
ActiveRecord::Base.allow_concurrency = true
|
44
|
-
uri ||= ActsAsFerret::Remote::Config.load[:uri]
|
45
|
-
DRb.start_service(uri, ActsAsFerret::Remote::Server.new)
|
46
|
-
self.running = true
|
47
|
-
end
|
48
28
|
|
49
|
-
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
# handles all incoming method calls, and sends them on to the LocalIndex
|
54
|
-
# instance of the correct model class.
|
29
|
+
# This class acts as a drb server listening for indexing and
|
30
|
+
# search requests from models declared to 'acts_as_ferret :remote => true'
|
55
31
|
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
#
|
59
|
-
#
|
60
|
-
#
|
32
|
+
# Usage:
|
33
|
+
# - modify RAILS_ROOT/config/ferret_server.yml to suit your needs.
|
34
|
+
# - environments for which no section in the config file exists will use
|
35
|
+
# the index locally (good for unit tests/development mode)
|
36
|
+
# - run script/ferret_start to start the server:
|
37
|
+
# RAILS_ENV=production script/ferret_start
|
61
38
|
#
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
39
|
+
class Server
|
40
|
+
|
41
|
+
cattr_accessor :running
|
42
|
+
|
43
|
+
def self.start(uri = nil)
|
44
|
+
ActiveRecord::Base.allow_concurrency = true
|
45
|
+
ActiveRecord::Base.logger = Logger.new("#{RAILS_ROOT}/log/ferret_server.log")
|
46
|
+
uri ||= ActsAsFerret::Remote::Config.load[:uri]
|
47
|
+
DRb.start_service(uri, ActsAsFerret::Remote::Server.new)
|
48
|
+
self.running = true
|
72
49
|
end
|
73
|
-
rescue
|
74
|
-
@logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}"
|
75
|
-
raise
|
76
|
-
end
|
77
50
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
end
|
51
|
+
def initialize
|
52
|
+
@logger = ActiveRecord::Base.logger
|
53
|
+
end
|
82
54
|
|
83
|
-
|
84
|
-
|
85
|
-
|
55
|
+
# handles all incoming method calls, and sends them on to the LocalIndex
|
56
|
+
# instance of the correct model class.
|
57
|
+
#
|
58
|
+
# Calls are not queued atm, so this will block until the call returned.
|
59
|
+
#
|
60
|
+
def method_missing(name, *args)
|
61
|
+
@logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})"
|
62
|
+
with_class args.shift do |clazz|
|
63
|
+
begin
|
64
|
+
clazz.aaf_index.send name, *args
|
65
|
+
rescue NoMethodError
|
66
|
+
@logger.debug "no luck, trying to call class method instead"
|
67
|
+
clazz.send name, *args
|
68
|
+
end
|
69
|
+
end
|
70
|
+
rescue
|
71
|
+
@logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}"
|
72
|
+
raise
|
73
|
+
end
|
86
74
|
|
75
|
+
# make sure we have a versioned index in place, building one if necessary
|
76
|
+
def ensure_index_exists(class_name)
|
77
|
+
@logger.debug "DRb server: ensure_index_exists for class #{class_name}"
|
78
|
+
with_class class_name do |clazz|
|
79
|
+
dir = clazz.aaf_configuration[:index_dir]
|
80
|
+
unless File.directory?(dir) && File.file?(File.join(dir, 'segments')) && dir =~ %r{/\d+(_\d+)?$}
|
81
|
+
rebuild_index(clazz)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
# hides LocalIndex#rebuild_index to implement index versioning
|
87
|
+
def rebuild_index(clazz, *models)
|
88
|
+
with_class clazz do |clazz|
|
89
|
+
models = models.flatten.uniq.map(&:constantize)
|
90
|
+
models << clazz unless models.include?(clazz)
|
91
|
+
index = new_index_for(clazz, models)
|
92
|
+
@logger.debug "DRb server: rebuild index for class(es) #{models.inspect} in #{index.options[:path]}"
|
93
|
+
index.index_models models
|
94
|
+
new_version = File.join clazz.aaf_configuration[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S')
|
95
|
+
# create a unique directory name (needed for unit tests where
|
96
|
+
# multiple rebuilds per second may occur)
|
97
|
+
if File.exists?(new_version)
|
98
|
+
i = 0
|
99
|
+
i+=1 while File.exists?("#{new_version}_#{i}")
|
100
|
+
new_version << "_#{i}"
|
101
|
+
end
|
102
|
+
|
103
|
+
File.rename index.options[:path], new_version
|
104
|
+
clazz.index_dir = new_version
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
protected
|
110
|
+
|
111
|
+
def with_class(clazz, *args)
|
112
|
+
clazz = clazz.constantize if String === clazz
|
113
|
+
yield clazz, *args
|
114
|
+
end
|
115
|
+
|
116
|
+
def new_index_for(clazz, models)
|
117
|
+
aaf_configuration = clazz.aaf_configuration
|
118
|
+
ferret_cfg = aaf_configuration[:ferret].dup
|
119
|
+
ferret_cfg.update :auto_flush => false,
|
120
|
+
:create => true,
|
121
|
+
:field_infos => ActsAsFerret::field_infos(models),
|
122
|
+
:path => File.join(aaf_configuration[:index_base_dir], 'rebuild')
|
123
|
+
returning Ferret::Index::Index.new ferret_cfg do |i|
|
124
|
+
i.batch_size = aaf_configuration[:reindex_batch_size]
|
125
|
+
i.logger = @logger
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
87
130
|
end
|
88
131
|
end
|
89
|
-
end
|
data/lib/index.rb
CHANGED
data/lib/local_index.rb
CHANGED
@@ -9,16 +9,30 @@ module ActsAsFerret
|
|
9
9
|
ensure_index_exists
|
10
10
|
end
|
11
11
|
|
12
|
+
def reopen!
|
13
|
+
if @ferret_index
|
14
|
+
@ferret_index.close
|
15
|
+
@ferret_index = nil
|
16
|
+
end
|
17
|
+
logger.debug "reopening index at #{aaf_configuration[:ferret][:path]}"
|
18
|
+
ferret_index
|
19
|
+
end
|
20
|
+
|
12
21
|
# The 'real' Ferret Index instance
|
13
22
|
def ferret_index
|
14
23
|
ensure_index_exists
|
15
|
-
@ferret_index ||= Ferret::Index::Index.new(aaf_configuration[:ferret])
|
24
|
+
returning @ferret_index ||= Ferret::Index::Index.new(aaf_configuration[:ferret]) do
|
25
|
+
@ferret_index.batch_size = aaf_configuration[:reindex_batch_size]
|
26
|
+
@ferret_index.logger = logger
|
27
|
+
end
|
16
28
|
end
|
17
29
|
|
18
30
|
# Checks for the presence of a segments file in the index directory
|
19
31
|
# Rebuilds the index if none exists.
|
20
32
|
def ensure_index_exists
|
33
|
+
logger.debug "LocalIndex: ensure_index_exists at #{aaf_configuration[:index_dir]}"
|
21
34
|
unless File.file? "#{aaf_configuration[:index_dir]}/segments"
|
35
|
+
ActsAsFerret::ensure_directory(aaf_configuration[:index_dir])
|
22
36
|
close
|
23
37
|
rebuild_index
|
24
38
|
end
|
@@ -37,20 +51,15 @@ module ActsAsFerret
|
|
37
51
|
# to. Arguments can be given in shared index scenarios to name multiple
|
38
52
|
# model classes to include in the index
|
39
53
|
def rebuild_index(*models)
|
40
|
-
logger.debug "rebuild index: #{models.inspect}"
|
41
54
|
models << aaf_configuration[:class_name] unless models.include?(aaf_configuration[:class_name])
|
42
55
|
models = models.flatten.uniq.map(&:constantize)
|
56
|
+
logger.debug "rebuild index: #{models.inspect}"
|
43
57
|
index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false,
|
44
|
-
:field_infos => field_infos(models),
|
58
|
+
:field_infos => ActsAsFerret::field_infos(models),
|
45
59
|
:create => true))
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
logger.debug("Created Ferret index in: #{aaf_configuration[:index_dir]}")
|
50
|
-
index.flush
|
51
|
-
index.optimize
|
52
|
-
index.close
|
53
|
-
close_multi_indexes
|
60
|
+
index.batch_size = aaf_configuration[:reindex_batch_size]
|
61
|
+
index.logger = logger
|
62
|
+
index.index_models models
|
54
63
|
end
|
55
64
|
|
56
65
|
# Parses the given query string into a Ferret Query object.
|
@@ -64,8 +73,11 @@ module ActsAsFerret
|
|
64
73
|
end
|
65
74
|
|
66
75
|
# Total number of hits for the given query.
|
76
|
+
# To count the results of a multi_search query, specify an array of
|
77
|
+
# class names with the :models option.
|
67
78
|
def total_hits(query, options = {})
|
68
|
-
|
79
|
+
index = (models = options.delete(:models)) ? multi_index(models) : ferret_index
|
80
|
+
index.search(query, options).total_hits
|
69
81
|
end
|
70
82
|
|
71
83
|
def determine_lazy_fields(options = {})
|
@@ -108,7 +120,6 @@ module ActsAsFerret
|
|
108
120
|
# If a block is given, model, id and score are yielded and the number of
|
109
121
|
# total hits is returned. Otherwise [total_hits, result_array] is returned.
|
110
122
|
def id_multi_search(query, models, options = {})
|
111
|
-
models.map!(&:constantize)
|
112
123
|
index = multi_index(models)
|
113
124
|
result = []
|
114
125
|
lazy_fields = determine_lazy_fields options
|
@@ -117,6 +128,7 @@ module ActsAsFerret
|
|
117
128
|
# fetch stored fields if lazy loading
|
118
129
|
data = {}
|
119
130
|
lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
|
131
|
+
raise "':store_class_name => true' required for multi_search to work" if doc[:class_name].blank?
|
120
132
|
if block_given?
|
121
133
|
yield doc[:class_name], doc[:id], score, doc, data
|
122
134
|
else
|
@@ -184,6 +196,7 @@ module ActsAsFerret
|
|
184
196
|
|
185
197
|
# returns a MultiIndex instance operating on a MultiReader
|
186
198
|
def multi_index(model_classes)
|
199
|
+
model_classes.map!(&:constantize) if String === model_classes.first
|
187
200
|
model_classes.sort! { |a, b| a.name <=> b.name }
|
188
201
|
key = model_classes.inject("") { |s, clazz| s + clazz.name }
|
189
202
|
multi_config = aaf_configuration[:ferret].dup
|
@@ -191,67 +204,6 @@ module ActsAsFerret
|
|
191
204
|
ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
|
192
205
|
end
|
193
206
|
|
194
|
-
def close_multi_indexes
|
195
|
-
# close combined index readers, just in case
|
196
|
-
# this seems to fix a strange test failure that seems to relate to a
|
197
|
-
# multi_index looking at an old version of the content_base index.
|
198
|
-
ActsAsFerret::multi_indexes.each_pair do |key, index|
|
199
|
-
# puts "#{key} -- #{self.name}"
|
200
|
-
# TODO only close those where necessary (watch inheritance, where
|
201
|
-
# self.name is base class of a class where key is made from)
|
202
|
-
index.close #if key =~ /#{self.name}/
|
203
|
-
end
|
204
|
-
ActsAsFerret::multi_indexes.clear
|
205
|
-
end
|
206
|
-
|
207
|
-
def reindex_model(index, model = aaf_configuration[:class_name].constantize)
|
208
|
-
# index in batches of 1000 to limit memory consumption (fixes #24)
|
209
|
-
# TODO make configurable through options
|
210
|
-
batch_size = 1000
|
211
|
-
model_count = model.count.to_f
|
212
|
-
work_done = 0
|
213
|
-
batch_time = 0
|
214
|
-
logger.info "reindexing model #{model.name}"
|
215
|
-
order = "#{model.primary_key} ASC" # this works around a bug in sqlserver-adapter (where paging only works with an order applied)
|
216
|
-
model.transaction do
|
217
|
-
0.step(model.count, batch_size) do |i|
|
218
|
-
b1 = Time.now.to_f
|
219
|
-
model.find(:all, :limit => batch_size, :offset => i, :order => order).each do |rec|
|
220
|
-
index << rec.to_doc if rec.ferret_enabled?(true)
|
221
|
-
end
|
222
|
-
batch_time = Time.now.to_f - b1
|
223
|
-
work_done = i.to_f / model_count * 100.0 if model_count > 0
|
224
|
-
remaining_time = ( batch_time / batch_size ) * ( model_count - i + batch_size )
|
225
|
-
logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
|
226
|
-
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
|
230
|
-
# builds a FieldInfos instance for creation of an index containing fields
|
231
|
-
# for the given model classes.
|
232
|
-
def field_infos(models)
|
233
|
-
# default attributes for fields
|
234
|
-
fi = Ferret::Index::FieldInfos.new(:store => :no,
|
235
|
-
:index => :yes,
|
236
|
-
:term_vector => :no,
|
237
|
-
:boost => 1.0)
|
238
|
-
# primary key
|
239
|
-
fi.add_field(:id, :store => :yes, :index => :untokenized)
|
240
|
-
# class_name
|
241
|
-
if aaf_configuration[:store_class_name]
|
242
|
-
fi.add_field(:class_name, :store => :yes, :index => :untokenized)
|
243
|
-
end
|
244
|
-
fields = {}
|
245
|
-
models.each do |model|
|
246
|
-
fields.update(model.aaf_configuration[:ferret_fields])
|
247
|
-
end
|
248
|
-
fields.each_pair do |field, options|
|
249
|
-
fi.add_field(field, { :store => :no,
|
250
|
-
:index => :yes }.update(options))
|
251
|
-
end
|
252
|
-
return fi
|
253
|
-
end
|
254
|
-
|
255
207
|
end
|
256
208
|
|
257
209
|
end
|
data/lib/more_like_this.rb
CHANGED
@@ -29,8 +29,9 @@ module ActsAsFerret #:nodoc:
|
|
29
29
|
# equals Ferret's internal similarity implementation)
|
30
30
|
# :analyzer => 'Ferret::Analysis::StandardAnalyzer' # class name of the analyzer to use
|
31
31
|
# :append_to_query => nil # proc taking a query object as argument, which will be called after generating the query. can be used to further manipulate the query used to find related documents, i.e. to constrain the search to a given class in single table inheritance scenarios
|
32
|
-
#
|
33
|
-
|
32
|
+
# ferret_options : Ferret options handed over to find_by_contents (i.e. for limits and sorting)
|
33
|
+
# ar_options : options handed over to find_by_contents for AR scoping
|
34
|
+
def more_like_this(options = {}, ferret_options = {}, ar_options = {})
|
34
35
|
options = {
|
35
36
|
:field_names => nil, # Default field names
|
36
37
|
:min_term_freq => 2, # Ignore terms with less than this frequency in the source doc.
|
@@ -52,13 +53,16 @@ module ActsAsFerret #:nodoc:
|
|
52
53
|
options[:base_class] = clazz.name
|
53
54
|
query = clazz.aaf_index.build_more_like_this_query(self.id, self.class.name, options)
|
54
55
|
options[:append_to_query].call(query) if options[:append_to_query]
|
55
|
-
clazz.find_by_contents(query,
|
56
|
+
clazz.find_by_contents(query, ferret_options, ar_options)
|
56
57
|
end
|
57
58
|
|
58
59
|
end
|
59
60
|
|
60
61
|
module IndexMethods
|
61
62
|
|
63
|
+
# TODO to allow morelikethis for unsaved records, we have to give the
|
64
|
+
# unsaved record's data to this method. check how this will work out
|
65
|
+
# via drb...
|
62
66
|
def build_more_like_this_query(id, class_name, options)
|
63
67
|
[:similarity, :analyzer].each { |sym| options[sym] = options[sym].constantize.new }
|
64
68
|
ferret_index.synchronize do # avoid that concurrent writes close our reader
|
@@ -103,7 +107,8 @@ module ActsAsFerret #:nodoc:
|
|
103
107
|
# creates a term/term_frequency map for terms from the fields
|
104
108
|
# given in options[:field_names]
|
105
109
|
def retrieve_terms(id, class_name, reader, options)
|
106
|
-
|
110
|
+
raise "more_like_this atm only works on saved records" if id.nil?
|
111
|
+
document_number = document_number(id, class_name) rescue nil
|
107
112
|
field_names = options[:field_names]
|
108
113
|
max_num_tokens = options[:max_num_tokens]
|
109
114
|
term_freq_map = Hash.new(0)
|
@@ -111,7 +116,7 @@ module ActsAsFerret #:nodoc:
|
|
111
116
|
record = nil
|
112
117
|
field_names.each do |field|
|
113
118
|
#puts "field: #{field}"
|
114
|
-
term_freq_vector = reader.term_vector(document_number, field)
|
119
|
+
term_freq_vector = reader.term_vector(document_number, field) if document_number
|
115
120
|
#if false
|
116
121
|
if term_freq_vector
|
117
122
|
# use stored term vector
|
@@ -123,8 +128,11 @@ module ActsAsFerret #:nodoc:
|
|
123
128
|
# puts 'no stored term vector'
|
124
129
|
# no term vector stored, but we have stored the contents in the index
|
125
130
|
# -> extract terms from there
|
126
|
-
|
127
|
-
|
131
|
+
content = nil
|
132
|
+
if document_number
|
133
|
+
doc = reader[document_number]
|
134
|
+
content = doc[field]
|
135
|
+
end
|
128
136
|
unless content
|
129
137
|
# no term vector, no stored content, so try content from this instance
|
130
138
|
record ||= options[:base_class].constantize.find(id)
|
data/lib/multi_index.rb
CHANGED
@@ -8,7 +8,7 @@ module ActsAsFerret #:nodoc:
|
|
8
8
|
# ensure all models indexes exist
|
9
9
|
@model_classes.each { |m| m.aaf_index.ensure_index_exists }
|
10
10
|
default_fields = @model_classes.inject([]) do |fields, c|
|
11
|
-
fields + c.aaf_configuration[:ferret][:default_field]
|
11
|
+
fields + [ c.aaf_configuration[:ferret][:default_field] ].flatten
|
12
12
|
end
|
13
13
|
@options = {
|
14
14
|
:default_field => default_fields
|
@@ -69,9 +69,8 @@ module ActsAsFerret #:nodoc:
|
|
69
69
|
begin
|
70
70
|
reader = Ferret::Index::IndexReader.new(clazz.aaf_configuration[:index_dir])
|
71
71
|
rescue Exception
|
72
|
-
|
72
|
+
raise "error opening #{clazz.aaf_configuration[:index_dir]}: #{$!}"
|
73
73
|
end
|
74
|
-
reader
|
75
74
|
}
|
76
75
|
close
|
77
76
|
@reader = Ferret::Index::IndexReader.new(@sub_readers)
|
data/rakefile
CHANGED
@@ -55,7 +55,8 @@ if PKG_VERSION
|
|
55
55
|
s.platform = Gem::Platform::RUBY
|
56
56
|
s.summary = "acts_as_ferret - Ferret based full text search for any ActiveRecord model"
|
57
57
|
s.files = Dir.glob('**/*', File::FNM_DOTMATCH).reject do |f|
|
58
|
-
[ /\.$/, /sqlite$/, /\.log$/, /^pkg/, /\.svn/,
|
58
|
+
[ /\.$/, /sqlite$/, /\.log$/, /^pkg/, /\.svn/, /\.\w+\.sw.$/,
|
59
|
+
/^html/, /\~$/, /\/\._/, /\/#/ ].any? {|regex| f =~ regex }
|
59
60
|
end
|
60
61
|
#s.files = FileList["{lib,test}/**/*"].to_a + %w(README MIT-LICENSE CHANGELOG)
|
61
62
|
# s.files.delete ...
|
@@ -113,9 +114,7 @@ if PKG_VERSION
|
|
113
114
|
|
114
115
|
# Upload release to rubyforge
|
115
116
|
desc "Upload release to rubyforge"
|
116
|
-
|
117
|
-
task :prel => [ :package ] do
|
118
|
-
#task :prel do
|
117
|
+
task :prel => [ :tag, :prerelease, :package ] do
|
119
118
|
`rubyforge login`
|
120
119
|
release_command = "rubyforge add_release #{RUBYFORGE_PROJECT} #{PKG_NAME} '#{PKG_VERSION}' pkg/#{PKG_NAME}-#{PKG_VERSION}.gem"
|
121
120
|
puts release_command
|
data/script/ferret_start
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
2
|
-
|
1
|
+
#!/usr/bin/env ruby
|
3
2
|
# Ferret DRb server launcher script
|
4
3
|
#
|
5
4
|
# Place doc/ferret_server.yml into RAILS_ROOT/config and fit to taste. Start
|
@@ -18,12 +17,17 @@
|
|
18
17
|
# The server writes a log file in log/ferret_server.log, it's
|
19
18
|
# STDOUT gets redirected to log/ferret_server.out
|
20
19
|
|
20
|
+
ENV['FERRET_USE_LOCAL_INDEX'] = 'true'
|
21
|
+
require File.dirname(__FILE__) + '/../config/boot'
|
22
|
+
require RAILS_ROOT + '/config/environment'
|
23
|
+
|
21
24
|
|
22
25
|
config = ActsAsFerret::Remote::Config.load
|
23
26
|
@pid_file = config['pid_file']
|
24
27
|
|
25
28
|
def write_pid_file
|
26
|
-
|
29
|
+
raise "No PID file defined" if @pid_file.blank?
|
30
|
+
open(@pid_file,"w") {|f| f.write(Process.pid) }
|
27
31
|
end
|
28
32
|
|
29
33
|
def safefork
|
@@ -59,7 +63,8 @@ begin
|
|
59
63
|
STDOUT.reopen "#{RAILS_ROOT}/log/ferret_server.out", "a" # point them somewhere sensible
|
60
64
|
STDERR.reopen STDOUT # STDOUT/STDERR should go to a logfile
|
61
65
|
rescue
|
62
|
-
puts "Error starting ferret DRb server: #{$!}"
|
66
|
+
$stderr.puts "Error starting ferret DRb server: #{$!}"
|
67
|
+
$stderr.puts $!.backtrace
|
63
68
|
exit(1)
|
64
69
|
end
|
65
70
|
DRb.thread.join
|
data/script/ferret_stop
CHANGED
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: acts_as_ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.4.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.4.1
|
7
|
+
date: 2007-07-17 00:00:00 +02:00
|
8
8
|
summary: acts_as_ferret - Ferret based full text search for any ActiveRecord model
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -29,32 +29,34 @@ post_install_message:
|
|
29
29
|
authors:
|
30
30
|
- Jens Kraemer
|
31
31
|
files:
|
32
|
+
- script
|
33
|
+
- config
|
34
|
+
- doc
|
32
35
|
- lib
|
33
36
|
- LICENSE
|
34
37
|
- rakefile
|
35
38
|
- init.rb
|
36
|
-
- README
|
37
|
-
- script
|
38
|
-
- doc
|
39
|
-
- config
|
40
39
|
- install.rb
|
41
|
-
-
|
42
|
-
-
|
43
|
-
-
|
44
|
-
-
|
45
|
-
-
|
40
|
+
- README
|
41
|
+
- script/ferret_start
|
42
|
+
- script/ferret_stop
|
43
|
+
- script/ferret_server
|
44
|
+
- config/ferret_server.yml
|
46
45
|
- lib/ferret_server.rb
|
46
|
+
- lib/more_like_this.rb
|
47
47
|
- lib/shared_index.rb
|
48
|
+
- lib/ferret_cap_tasks.rb
|
48
49
|
- lib/local_index.rb
|
50
|
+
- lib/multi_index.rb
|
49
51
|
- lib/remote_index.rb
|
52
|
+
- lib/acts_as_ferret.rb
|
53
|
+
- lib/ferret_result.rb
|
54
|
+
- lib/shared_index_class_methods.rb
|
55
|
+
- lib/ferret_extensions.rb
|
50
56
|
- lib/index.rb
|
57
|
+
- lib/instance_methods.rb
|
58
|
+
- lib/class_methods.rb
|
51
59
|
- lib/act_methods.rb
|
52
|
-
- lib/shared_index_class_methods.rb
|
53
|
-
- lib/ferret_result.rb
|
54
|
-
- script/ferret_server
|
55
|
-
- script/ferret_start
|
56
|
-
- script/ferret_stop
|
57
|
-
- config/ferret_server.yml
|
58
60
|
test_files: []
|
59
61
|
|
60
62
|
rdoc_options: []
|