acts_as_ferret 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +29 -6
- data/config/ferret_server.yml +12 -0
- data/install.rb +19 -0
- data/lib/act_methods.rb +194 -0
- data/lib/acts_as_ferret.rb +74 -52
- data/lib/class_methods.rb +222 -482
- data/lib/ferret_result.rb +36 -0
- data/lib/ferret_server.rb +89 -0
- data/lib/index.rb +31 -0
- data/lib/instance_methods.rb +112 -143
- data/lib/local_index.rb +257 -0
- data/lib/more_like_this.rb +47 -41
- data/lib/multi_index.rb +8 -11
- data/lib/remote_index.rb +50 -0
- data/lib/shared_index.rb +14 -0
- data/lib/shared_index_class_methods.rb +90 -0
- data/rakefile +88 -147
- data/script/ferret_server +18 -0
- data/script/ferret_start +67 -0
- data/script/ferret_stop +22 -0
- metadata +23 -11
- data/.init.rb.swp +0 -0
- data/.rakefile.swp +0 -0
- data/lib/.acts_as_ferret.rb.swp +0 -0
- data/lib/.class_methods.rb.swo +0 -0
- data/lib/.class_methods.rb.swp +0 -0
data/README
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
acts_as_ferret
|
1
|
+
= acts_as_ferret
|
2
2
|
|
3
3
|
This ActiveRecord mixin adds full text search capabilities to any Rails model.
|
4
4
|
|
@@ -6,10 +6,28 @@ It is heavily based on the original acts_as_ferret plugin done by
|
|
6
6
|
Kasper Weibel and a modified version done by Thomas Lockney, which
|
7
7
|
both can be found on http://ferret.davebalmain.com/trac/wiki/FerretOnRails
|
8
8
|
|
9
|
-
|
9
|
+
== Installation
|
10
|
+
|
11
|
+
=== Installation inside your Rails project via script/plugin
|
12
|
+
|
13
|
+
script/plugin install svn://projects.jkraemer.net/acts_as_ferret/trunk/plugin/acts_as_ferret
|
14
|
+
|
15
|
+
|
16
|
+
=== System-wide installation with Rubygems
|
17
|
+
|
18
|
+
<tt>sudo gem install acts_as_ferret</tt>
|
19
|
+
|
20
|
+
To use acts_as_ferret in your project, add the following line to your
|
21
|
+
project's config/environment.rb:
|
22
|
+
|
23
|
+
<tt>require 'acts_as_ferret'</tt>
|
24
|
+
|
25
|
+
|
26
|
+
== Usage
|
27
|
+
|
10
28
|
include the following in your model class (specifiying the fields you want to get indexed):
|
11
29
|
|
12
|
-
acts_as_ferret :fields => [
|
30
|
+
<tt>acts_as_ferret :fields => [ :title, :description ]</tt>
|
13
31
|
|
14
32
|
now you can use ModelClass.find_by_contents(query) to find instances of your model
|
15
33
|
whose indexed fields match a given query. All query terms are required by default,
|
@@ -17,10 +35,15 @@ but explicit OR queries are possible. This differs from the ferret default, but
|
|
17
35
|
imho is the more often needed/expected behaviour (more query terms result in
|
18
36
|
less results).
|
19
37
|
|
38
|
+
Please see ActsAsFerret::ActMethods#acts_as_ferret for more information.
|
39
|
+
|
40
|
+
== License
|
41
|
+
|
20
42
|
Released under the MIT license.
|
21
43
|
|
22
|
-
Authors
|
23
|
-
|
24
|
-
|
44
|
+
== Authors
|
45
|
+
|
46
|
+
* Kasper Weibel Nielsen-Refs (original author)
|
47
|
+
* Jens Kraemer <jk@jkraemer.net> (current maintainer)
|
25
48
|
|
26
49
|
|
data/install.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# acts_as_ferret install script
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
def install(file)
|
5
|
+
puts "Installing: #{file}"
|
6
|
+
target = File.join(File.dirname(__FILE__), '..', '..', '..', file)
|
7
|
+
if File.exists?(target)
|
8
|
+
puts "target #{target} already exists, skipping"
|
9
|
+
else
|
10
|
+
FileUtils.cp File.join(File.dirname(__FILE__), file), target
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
install File.join( 'script', 'ferret_start' )
|
15
|
+
install File.join( 'script', 'ferret_stop' )
|
16
|
+
install File.join( 'config', 'ferret_server.yml' )
|
17
|
+
|
18
|
+
puts IO.read(File.join(File.dirname(__FILE__), 'README'))
|
19
|
+
|
data/lib/act_methods.rb
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
module ActsAsFerret #:nodoc:
|
2
|
+
|
3
|
+
# This module defines the acts_as_ferret method and is included into
|
4
|
+
# ActiveRecord::Base
|
5
|
+
module ActMethods
|
6
|
+
|
7
|
+
|
8
|
+
def reloadable?; false end
|
9
|
+
|
10
|
+
# declares a class as ferret-searchable.
|
11
|
+
#
|
12
|
+
# ====options:
|
13
|
+
# fields:: names all fields to include in the index. If not given,
|
14
|
+
# all attributes of the class will be indexed. You may also give
|
15
|
+
# symbols pointing to instance methods of your model here, i.e.
|
16
|
+
# to retrieve and index data from a related model.
|
17
|
+
#
|
18
|
+
# additional_fields:: names fields to include in the index, in addition
|
19
|
+
# to those derived from the db scheme. use if you want
|
20
|
+
# to add custom fields derived from methods to the db
|
21
|
+
# fields (which will be picked by aaf). This option will
|
22
|
+
# be ignored when the fields option is given, in that
|
23
|
+
# case additional fields get specified there.
|
24
|
+
#
|
25
|
+
# index_dir:: declares the directory where to put the index for this class.
|
26
|
+
# The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
|
27
|
+
# The index directory will be created if it doesn't exist.
|
28
|
+
#
|
29
|
+
# single_index:: set this to true to let this class use a Ferret
|
30
|
+
# index that is shared by all classes having :single_index set to true.
|
31
|
+
# :store_class_name is set to true implicitly, as well as index_dir, so
|
32
|
+
# don't bother setting these when using this option. the shared index
|
33
|
+
# will be located in index/<RAILS_ENV>/shared .
|
34
|
+
#
|
35
|
+
# store_class_name:: to make search across multiple models (with either
|
36
|
+
# single_index or the multi_search method) useful, set
|
37
|
+
# this to true. the model class name will be stored in a keyword field
|
38
|
+
# named class_name
|
39
|
+
#
|
40
|
+
# ====ferret_options:
|
41
|
+
# or_default:: whether query terms are required by
|
42
|
+
# default (the default, false), or not (true)
|
43
|
+
#
|
44
|
+
# analyzer:: the analyzer to use for query parsing (default: nil,
|
45
|
+
# which means the ferret StandardAnalyzer gets used)
|
46
|
+
#
|
47
|
+
# default_field:: use to set one or more fields that are searched for query terms
|
48
|
+
# that don't have an explicit field list. This list should *not*
|
49
|
+
# contain any untokenized fields. If it does, you're asking
|
50
|
+
# for trouble (i.e. not getting results for queries having
|
51
|
+
# stop words in them). Aaf by default initializes the default field
|
52
|
+
# list to contain all tokenized fields. If you use :single_index => true,
|
53
|
+
# you really should set this option specifying your default field
|
54
|
+
# list (which should be equal in all your classes sharing the index).
|
55
|
+
# Otherwise you might get incorrect search results and you won't get
|
56
|
+
# any lazy loading of stored field data.
|
57
|
+
#
|
58
|
+
def acts_as_ferret(options={}, ferret_options={})
|
59
|
+
|
60
|
+
# force local mode if running *inside* the Ferret server - somewhere the
|
61
|
+
# real indexing has to be done after all :-)
|
62
|
+
options.delete(:remote) if ActsAsFerret::Remote::Server.running
|
63
|
+
|
64
|
+
if options[:remote] && options[:remote] !~ /^druby/
|
65
|
+
# read server location from config/ferret_server.yml
|
66
|
+
options[:remote] = ActsAsFerret::Remote::Config.load("#{RAILS_ROOT}/config/ferret_server.yml")[:uri]
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
extend ClassMethods
|
71
|
+
extend SharedIndexClassMethods if options[:single_index]
|
72
|
+
|
73
|
+
include InstanceMethods
|
74
|
+
include MoreLikeThis::InstanceMethods
|
75
|
+
|
76
|
+
# AR hooks
|
77
|
+
after_create :ferret_create
|
78
|
+
after_update :ferret_update
|
79
|
+
after_destroy :ferret_destroy
|
80
|
+
|
81
|
+
cattr_accessor :aaf_configuration
|
82
|
+
|
83
|
+
# default config
|
84
|
+
self.aaf_configuration = {
|
85
|
+
:index_dir => "#{ActsAsFerret::index_dir}/#{self.name.underscore}",
|
86
|
+
:store_class_name => false,
|
87
|
+
:name => self.table_name,
|
88
|
+
:class_name => self.name,
|
89
|
+
:single_index => false,
|
90
|
+
:ferret => {
|
91
|
+
:or_default => false,
|
92
|
+
:handle_parse_errors => true,
|
93
|
+
:default_field => nil # will be set later on
|
94
|
+
#:max_clauses => 512,
|
95
|
+
#:analyzer => Ferret::Analysis::StandardAnalyzer.new,
|
96
|
+
# :wild_card_downcase => true
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
# merge aaf options with args
|
101
|
+
aaf_configuration.update(options) if options.is_a?(Hash)
|
102
|
+
|
103
|
+
# list of indexed fields will be filled later
|
104
|
+
aaf_configuration[:ferret_fields] = Hash.new
|
105
|
+
|
106
|
+
# apply appropriate settings for shared index
|
107
|
+
if aaf_configuration[:single_index]
|
108
|
+
aaf_configuration[:index_dir] = "#{ActsAsFerret::index_dir}/shared"
|
109
|
+
aaf_configuration[:store_class_name] = true
|
110
|
+
end
|
111
|
+
|
112
|
+
# merge default ferret options with args
|
113
|
+
aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash)
|
114
|
+
|
115
|
+
# these properties are somewhat vital to the plugin and shouldn't
|
116
|
+
# be overwritten by the user:
|
117
|
+
aaf_configuration[:ferret].update(
|
118
|
+
:key => (aaf_configuration[:single_index] ? [:id, :class_name] : :id),
|
119
|
+
:path => aaf_configuration[:index_dir],
|
120
|
+
:auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
|
121
|
+
:create_if_missing => true
|
122
|
+
)
|
123
|
+
|
124
|
+
if aaf_configuration[:fields]
|
125
|
+
add_fields(aaf_configuration[:fields])
|
126
|
+
else
|
127
|
+
add_fields(self.new.attributes.keys.map { |k| k.to_sym })
|
128
|
+
add_fields(aaf_configuration[:additional_fields])
|
129
|
+
end
|
130
|
+
|
131
|
+
ActsAsFerret::ensure_directory aaf_configuration[:index_dir] unless options[:remote]
|
132
|
+
|
133
|
+
# now that all fields have been added, we can initialize the default
|
134
|
+
# field list to be used by the query parser.
|
135
|
+
# It will include all content fields *not* marked as :untokenized.
|
136
|
+
# This fixes the otherwise failing CommentTest#test_stopwords. Basically
|
137
|
+
# this means that by default only tokenized fields (which is the default)
|
138
|
+
# will be searched. If you want to search inside the contents of an
|
139
|
+
# untokenized field, you'll have to explicitly specify it in your query.
|
140
|
+
#
|
141
|
+
# Unfortunately this is not very useful with a shared index (see
|
142
|
+
# http://projects.jkraemer.net/acts_as_ferret/ticket/85)
|
143
|
+
# You should consider specifying the default field list to search for as
|
144
|
+
# part of the ferret_options hash in your call to acts_as_ferret.
|
145
|
+
aaf_configuration[:ferret][:default_field] ||= if aaf_configuration[:single_index]
|
146
|
+
logger.warn "You really should set the acts_as_ferret :default_field option when using a shared index!"
|
147
|
+
'*'
|
148
|
+
else
|
149
|
+
aaf_configuration[:ferret_fields].keys.select do |f|
|
150
|
+
aaf_configuration[:ferret_fields][f][:index] != :untokenized
|
151
|
+
end
|
152
|
+
end
|
153
|
+
logger.info "default field list: #{aaf_configuration[:ferret][:default_field].inspect}"
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
protected
|
158
|
+
|
159
|
+
# helper that defines a method that adds the given field to a ferret
|
160
|
+
# document instance
|
161
|
+
def define_to_field_method(field, options = {})
|
162
|
+
options.reverse_merge!( :store => :no,
|
163
|
+
:highlight => :yes,
|
164
|
+
:index => :yes,
|
165
|
+
:term_vector => :with_positions_offsets,
|
166
|
+
:boost => 1.0 )
|
167
|
+
aaf_configuration[:ferret_fields][field] = options
|
168
|
+
define_method("#{field}_to_ferret".to_sym) do
|
169
|
+
begin
|
170
|
+
val = content_for_field_name(field)
|
171
|
+
rescue
|
172
|
+
logger.warn("Error retrieving value for field #{field}: #{$!}")
|
173
|
+
val = ''
|
174
|
+
end
|
175
|
+
logger.debug("Adding field #{field} with value '#{val}' to index")
|
176
|
+
val
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def add_fields(field_config)
|
181
|
+
if field_config.respond_to?(:each_pair)
|
182
|
+
field_config.each_pair do |key,val|
|
183
|
+
define_to_field_method(key,val)
|
184
|
+
end
|
185
|
+
elsif field_config.respond_to?(:each)
|
186
|
+
field_config.each do |field|
|
187
|
+
define_to_field_method(field)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
data/lib/acts_as_ferret.rb
CHANGED
@@ -18,18 +18,27 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
19
|
# SOFTWARE.
|
20
20
|
|
21
|
+
require 'active_support'
|
21
22
|
require 'active_record'
|
22
23
|
require 'set'
|
23
24
|
require 'ferret'
|
24
25
|
|
25
|
-
require '
|
26
|
-
require 'more_like_this'
|
26
|
+
require 'act_methods'
|
27
27
|
require 'class_methods'
|
28
|
+
require 'shared_index_class_methods'
|
29
|
+
require 'ferret_result'
|
28
30
|
require 'instance_methods'
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
32
|
+
require 'multi_index'
|
33
|
+
require 'more_like_this'
|
34
|
+
|
35
|
+
require 'index'
|
36
|
+
require 'local_index'
|
37
|
+
require 'shared_index'
|
38
|
+
require 'remote_index'
|
39
|
+
|
40
|
+
require 'ferret_server'
|
41
|
+
|
33
42
|
|
34
43
|
# The Rails ActiveRecord Ferret Mixin.
|
35
44
|
#
|
@@ -41,7 +50,7 @@ require 'instance_methods'
|
|
41
50
|
#
|
42
51
|
# basic usage:
|
43
52
|
# include the following in your model class (specifiying the fields you want to get indexed):
|
44
|
-
# acts_as_ferret :fields => [
|
53
|
+
# acts_as_ferret :fields => [ :title, :description ]
|
45
54
|
#
|
46
55
|
# now you can use ModelClass.find_by_contents(query) to find instances of your model
|
47
56
|
# whose indexed fields match a given query. All query terms are required by default, but
|
@@ -54,59 +63,61 @@ require 'instance_methods'
|
|
54
63
|
# Kasper Weibel Nielsen-Refs (original author)
|
55
64
|
# Jens Kraemer <jk@jkraemer.net> (active maintainer)
|
56
65
|
#
|
57
|
-
module
|
58
|
-
module Acts #:nodoc:
|
59
|
-
module ARFerret #:nodoc:
|
66
|
+
module ActsAsFerret
|
60
67
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
@total_hits = total_hits
|
67
|
-
end
|
68
|
-
def method_missing(symbol, *args, &block)
|
69
|
-
@results.send(symbol, *args, &block)
|
70
|
-
end
|
71
|
-
def respond_to?(name)
|
72
|
-
self.methods.include?(name) || @results.respond_to?(name)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.ensure_directory(dir)
|
77
|
-
FileUtils.mkdir_p dir unless File.directory? dir
|
78
|
-
end
|
79
|
-
|
80
|
-
# make sure the default index base dir exists. by default, all indexes are created
|
81
|
-
# under RAILS_ROOT/index/RAILS_ENV
|
82
|
-
def self.init_index_basedir
|
83
|
-
index_base = "#{RAILS_ROOT}/index"
|
84
|
-
ensure_directory index_base
|
85
|
-
@@index_dir = "#{index_base}/#{RAILS_ENV}"
|
86
|
-
ensure_directory @@index_dir
|
87
|
-
end
|
88
|
-
|
89
|
-
mattr_accessor :index_dir
|
90
|
-
init_index_basedir
|
91
|
-
|
92
|
-
def self.append_features(base)
|
93
|
-
super
|
94
|
-
base.extend(ClassMethods)
|
95
|
-
end
|
68
|
+
# global Hash containing all multi indexes created by all classes using the plugin
|
69
|
+
# key is the concatenation of alphabetically sorted names of the classes the
|
70
|
+
# searcher searches.
|
71
|
+
@@multi_indexes = Hash.new
|
72
|
+
def self.multi_indexes; @@multi_indexes end
|
96
73
|
|
97
|
-
|
98
|
-
|
74
|
+
# global Hash containing the ferret indexes of all classes using the plugin
|
75
|
+
# key is the index directory.
|
76
|
+
@@ferret_indexes = Hash.new
|
77
|
+
def self.ferret_indexes; @@ferret_indexes end
|
78
|
+
|
79
|
+
# decorator that adds a total_hits accessor to search result arrays
|
80
|
+
class SearchResults
|
81
|
+
attr_reader :total_hits
|
82
|
+
def initialize(results, total_hits)
|
83
|
+
@results = results
|
84
|
+
@total_hits = total_hits
|
85
|
+
end
|
86
|
+
def method_missing(symbol, *args, &block)
|
87
|
+
@results.send(symbol, *args, &block)
|
88
|
+
end
|
89
|
+
def respond_to?(name)
|
90
|
+
self.methods.include?(name) || @results.respond_to?(name)
|
99
91
|
end
|
100
92
|
end
|
93
|
+
|
94
|
+
def self.ensure_directory(dir)
|
95
|
+
FileUtils.mkdir_p dir unless File.directory? dir
|
96
|
+
end
|
97
|
+
|
98
|
+
# make sure the default index base dir exists. by default, all indexes are created
|
99
|
+
# under RAILS_ROOT/index/RAILS_ENV
|
100
|
+
def self.init_index_basedir
|
101
|
+
index_base = "#{RAILS_ROOT}/index"
|
102
|
+
@@index_dir = "#{index_base}/#{RAILS_ENV}"
|
103
|
+
end
|
104
|
+
|
105
|
+
mattr_accessor :index_dir
|
106
|
+
init_index_basedir
|
107
|
+
|
108
|
+
def self.append_features(base)
|
109
|
+
super
|
110
|
+
base.extend(ClassMethods)
|
111
|
+
end
|
112
|
+
|
101
113
|
end
|
102
114
|
|
103
|
-
#
|
104
|
-
|
105
|
-
ActiveRecord::Base.class_eval do
|
106
|
-
include FerretMixin::Acts::ARFerret
|
107
|
-
end
|
115
|
+
# include acts_as_ferret method into ActiveRecord::Base
|
116
|
+
ActiveRecord::Base.extend ActsAsFerret::ActMethods
|
108
117
|
|
109
118
|
|
119
|
+
# small Ferret monkey patch
|
120
|
+
# TODO check if this is still necessary
|
110
121
|
class Ferret::Index::MultiReader
|
111
122
|
def latest?
|
112
123
|
# TODO: Exception handling added to resolve ticket #6.
|
@@ -121,4 +132,15 @@ class Ferret::Index::MultiReader
|
|
121
132
|
end
|
122
133
|
end
|
123
134
|
|
124
|
-
#
|
135
|
+
# add marshalling support to SortFields
|
136
|
+
class Ferret::Search::SortField
|
137
|
+
def _dump(depth)
|
138
|
+
to_s
|
139
|
+
end
|
140
|
+
|
141
|
+
def self._load(string)
|
142
|
+
raise "invalid value: #{string}" unless string =~ /^(\w+):<(\w+)>(\!)?$/
|
143
|
+
new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|