acts_as_ferret 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +29 -6
- data/config/ferret_server.yml +12 -0
- data/install.rb +19 -0
- data/lib/act_methods.rb +194 -0
- data/lib/acts_as_ferret.rb +74 -52
- data/lib/class_methods.rb +222 -482
- data/lib/ferret_result.rb +36 -0
- data/lib/ferret_server.rb +89 -0
- data/lib/index.rb +31 -0
- data/lib/instance_methods.rb +112 -143
- data/lib/local_index.rb +257 -0
- data/lib/more_like_this.rb +47 -41
- data/lib/multi_index.rb +8 -11
- data/lib/remote_index.rb +50 -0
- data/lib/shared_index.rb +14 -0
- data/lib/shared_index_class_methods.rb +90 -0
- data/rakefile +88 -147
- data/script/ferret_server +18 -0
- data/script/ferret_start +67 -0
- data/script/ferret_stop +22 -0
- metadata +23 -11
- data/.init.rb.swp +0 -0
- data/.rakefile.swp +0 -0
- data/lib/.acts_as_ferret.rb.swp +0 -0
- data/lib/.class_methods.rb.swo +0 -0
- data/lib/.class_methods.rb.swp +0 -0
data/README
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
acts_as_ferret
|
1
|
+
= acts_as_ferret
|
2
2
|
|
3
3
|
This ActiveRecord mixin adds full text search capabilities to any Rails model.
|
4
4
|
|
@@ -6,10 +6,28 @@ It is heavily based on the original acts_as_ferret plugin done by
|
|
6
6
|
Kasper Weibel and a modified version done by Thomas Lockney, which
|
7
7
|
both can be found on http://ferret.davebalmain.com/trac/wiki/FerretOnRails
|
8
8
|
|
9
|
-
|
9
|
+
== Installation
|
10
|
+
|
11
|
+
=== Installation inside your Rails project via script/plugin
|
12
|
+
|
13
|
+
script/plugin install svn://projects.jkraemer.net/acts_as_ferret/trunk/plugin/acts_as_ferret
|
14
|
+
|
15
|
+
|
16
|
+
=== System-wide installation with Rubygems
|
17
|
+
|
18
|
+
<tt>sudo gem install acts_as_ferret</tt>
|
19
|
+
|
20
|
+
To use acts_as_ferret in your project, add the following line to your
|
21
|
+
project's config/environment.rb:
|
22
|
+
|
23
|
+
<tt>require 'acts_as_ferret'</tt>
|
24
|
+
|
25
|
+
|
26
|
+
== Usage
|
27
|
+
|
10
28
|
include the following in your model class (specifiying the fields you want to get indexed):
|
11
29
|
|
12
|
-
acts_as_ferret :fields => [
|
30
|
+
<tt>acts_as_ferret :fields => [ :title, :description ]</tt>
|
13
31
|
|
14
32
|
now you can use ModelClass.find_by_contents(query) to find instances of your model
|
15
33
|
whose indexed fields match a given query. All query terms are required by default,
|
@@ -17,10 +35,15 @@ but explicit OR queries are possible. This differs from the ferret default, but
|
|
17
35
|
imho is the more often needed/expected behaviour (more query terms result in
|
18
36
|
less results).
|
19
37
|
|
38
|
+
Please see ActsAsFerret::ActMethods#acts_as_ferret for more information.
|
39
|
+
|
40
|
+
== License
|
41
|
+
|
20
42
|
Released under the MIT license.
|
21
43
|
|
22
|
-
Authors
|
23
|
-
|
24
|
-
|
44
|
+
== Authors
|
45
|
+
|
46
|
+
* Kasper Weibel Nielsen-Refs (original author)
|
47
|
+
* Jens Kraemer <jk@jkraemer.net> (current maintainer)
|
25
48
|
|
26
49
|
|
data/install.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# acts_as_ferret install script
|
2
|
+
require 'fileutils'
|
3
|
+
|
4
|
+
def install(file)
|
5
|
+
puts "Installing: #{file}"
|
6
|
+
target = File.join(File.dirname(__FILE__), '..', '..', '..', file)
|
7
|
+
if File.exists?(target)
|
8
|
+
puts "target #{target} already exists, skipping"
|
9
|
+
else
|
10
|
+
FileUtils.cp File.join(File.dirname(__FILE__), file), target
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
install File.join( 'script', 'ferret_start' )
|
15
|
+
install File.join( 'script', 'ferret_stop' )
|
16
|
+
install File.join( 'config', 'ferret_server.yml' )
|
17
|
+
|
18
|
+
puts IO.read(File.join(File.dirname(__FILE__), 'README'))
|
19
|
+
|
data/lib/act_methods.rb
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
module ActsAsFerret #:nodoc:
|
2
|
+
|
3
|
+
# This module defines the acts_as_ferret method and is included into
|
4
|
+
# ActiveRecord::Base
|
5
|
+
module ActMethods
|
6
|
+
|
7
|
+
|
8
|
+
def reloadable?; false end
|
9
|
+
|
10
|
+
# declares a class as ferret-searchable.
|
11
|
+
#
|
12
|
+
# ====options:
|
13
|
+
# fields:: names all fields to include in the index. If not given,
|
14
|
+
# all attributes of the class will be indexed. You may also give
|
15
|
+
# symbols pointing to instance methods of your model here, i.e.
|
16
|
+
# to retrieve and index data from a related model.
|
17
|
+
#
|
18
|
+
# additional_fields:: names fields to include in the index, in addition
|
19
|
+
# to those derived from the db scheme. use if you want
|
20
|
+
# to add custom fields derived from methods to the db
|
21
|
+
# fields (which will be picked by aaf). This option will
|
22
|
+
# be ignored when the fields option is given, in that
|
23
|
+
# case additional fields get specified there.
|
24
|
+
#
|
25
|
+
# index_dir:: declares the directory where to put the index for this class.
|
26
|
+
# The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
|
27
|
+
# The index directory will be created if it doesn't exist.
|
28
|
+
#
|
29
|
+
# single_index:: set this to true to let this class use a Ferret
|
30
|
+
# index that is shared by all classes having :single_index set to true.
|
31
|
+
# :store_class_name is set to true implicitly, as well as index_dir, so
|
32
|
+
# don't bother setting these when using this option. the shared index
|
33
|
+
# will be located in index/<RAILS_ENV>/shared .
|
34
|
+
#
|
35
|
+
# store_class_name:: to make search across multiple models (with either
|
36
|
+
# single_index or the multi_search method) useful, set
|
37
|
+
# this to true. the model class name will be stored in a keyword field
|
38
|
+
# named class_name
|
39
|
+
#
|
40
|
+
# ====ferret_options:
|
41
|
+
# or_default:: whether query terms are required by
|
42
|
+
# default (the default, false), or not (true)
|
43
|
+
#
|
44
|
+
# analyzer:: the analyzer to use for query parsing (default: nil,
|
45
|
+
# which means the ferret StandardAnalyzer gets used)
|
46
|
+
#
|
47
|
+
# default_field:: use to set one or more fields that are searched for query terms
|
48
|
+
# that don't have an explicit field list. This list should *not*
|
49
|
+
# contain any untokenized fields. If it does, you're asking
|
50
|
+
# for trouble (i.e. not getting results for queries having
|
51
|
+
# stop words in them). Aaf by default initializes the default field
|
52
|
+
# list to contain all tokenized fields. If you use :single_index => true,
|
53
|
+
# you really should set this option specifying your default field
|
54
|
+
# list (which should be equal in all your classes sharing the index).
|
55
|
+
# Otherwise you might get incorrect search results and you won't get
|
56
|
+
# any lazy loading of stored field data.
|
57
|
+
#
|
58
|
+
def acts_as_ferret(options={}, ferret_options={})
|
59
|
+
|
60
|
+
# force local mode if running *inside* the Ferret server - somewhere the
|
61
|
+
# real indexing has to be done after all :-)
|
62
|
+
options.delete(:remote) if ActsAsFerret::Remote::Server.running
|
63
|
+
|
64
|
+
if options[:remote] && options[:remote] !~ /^druby/
|
65
|
+
# read server location from config/ferret_server.yml
|
66
|
+
options[:remote] = ActsAsFerret::Remote::Config.load("#{RAILS_ROOT}/config/ferret_server.yml")[:uri]
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
extend ClassMethods
|
71
|
+
extend SharedIndexClassMethods if options[:single_index]
|
72
|
+
|
73
|
+
include InstanceMethods
|
74
|
+
include MoreLikeThis::InstanceMethods
|
75
|
+
|
76
|
+
# AR hooks
|
77
|
+
after_create :ferret_create
|
78
|
+
after_update :ferret_update
|
79
|
+
after_destroy :ferret_destroy
|
80
|
+
|
81
|
+
cattr_accessor :aaf_configuration
|
82
|
+
|
83
|
+
# default config
|
84
|
+
self.aaf_configuration = {
|
85
|
+
:index_dir => "#{ActsAsFerret::index_dir}/#{self.name.underscore}",
|
86
|
+
:store_class_name => false,
|
87
|
+
:name => self.table_name,
|
88
|
+
:class_name => self.name,
|
89
|
+
:single_index => false,
|
90
|
+
:ferret => {
|
91
|
+
:or_default => false,
|
92
|
+
:handle_parse_errors => true,
|
93
|
+
:default_field => nil # will be set later on
|
94
|
+
#:max_clauses => 512,
|
95
|
+
#:analyzer => Ferret::Analysis::StandardAnalyzer.new,
|
96
|
+
# :wild_card_downcase => true
|
97
|
+
}
|
98
|
+
}
|
99
|
+
|
100
|
+
# merge aaf options with args
|
101
|
+
aaf_configuration.update(options) if options.is_a?(Hash)
|
102
|
+
|
103
|
+
# list of indexed fields will be filled later
|
104
|
+
aaf_configuration[:ferret_fields] = Hash.new
|
105
|
+
|
106
|
+
# apply appropriate settings for shared index
|
107
|
+
if aaf_configuration[:single_index]
|
108
|
+
aaf_configuration[:index_dir] = "#{ActsAsFerret::index_dir}/shared"
|
109
|
+
aaf_configuration[:store_class_name] = true
|
110
|
+
end
|
111
|
+
|
112
|
+
# merge default ferret options with args
|
113
|
+
aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash)
|
114
|
+
|
115
|
+
# these properties are somewhat vital to the plugin and shouldn't
|
116
|
+
# be overwritten by the user:
|
117
|
+
aaf_configuration[:ferret].update(
|
118
|
+
:key => (aaf_configuration[:single_index] ? [:id, :class_name] : :id),
|
119
|
+
:path => aaf_configuration[:index_dir],
|
120
|
+
:auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
|
121
|
+
:create_if_missing => true
|
122
|
+
)
|
123
|
+
|
124
|
+
if aaf_configuration[:fields]
|
125
|
+
add_fields(aaf_configuration[:fields])
|
126
|
+
else
|
127
|
+
add_fields(self.new.attributes.keys.map { |k| k.to_sym })
|
128
|
+
add_fields(aaf_configuration[:additional_fields])
|
129
|
+
end
|
130
|
+
|
131
|
+
ActsAsFerret::ensure_directory aaf_configuration[:index_dir] unless options[:remote]
|
132
|
+
|
133
|
+
# now that all fields have been added, we can initialize the default
|
134
|
+
# field list to be used by the query parser.
|
135
|
+
# It will include all content fields *not* marked as :untokenized.
|
136
|
+
# This fixes the otherwise failing CommentTest#test_stopwords. Basically
|
137
|
+
# this means that by default only tokenized fields (which is the default)
|
138
|
+
# will be searched. If you want to search inside the contents of an
|
139
|
+
# untokenized field, you'll have to explicitly specify it in your query.
|
140
|
+
#
|
141
|
+
# Unfortunately this is not very useful with a shared index (see
|
142
|
+
# http://projects.jkraemer.net/acts_as_ferret/ticket/85)
|
143
|
+
# You should consider specifying the default field list to search for as
|
144
|
+
# part of the ferret_options hash in your call to acts_as_ferret.
|
145
|
+
aaf_configuration[:ferret][:default_field] ||= if aaf_configuration[:single_index]
|
146
|
+
logger.warn "You really should set the acts_as_ferret :default_field option when using a shared index!"
|
147
|
+
'*'
|
148
|
+
else
|
149
|
+
aaf_configuration[:ferret_fields].keys.select do |f|
|
150
|
+
aaf_configuration[:ferret_fields][f][:index] != :untokenized
|
151
|
+
end
|
152
|
+
end
|
153
|
+
logger.info "default field list: #{aaf_configuration[:ferret][:default_field].inspect}"
|
154
|
+
end
|
155
|
+
|
156
|
+
|
157
|
+
protected
|
158
|
+
|
159
|
+
# helper that defines a method that adds the given field to a ferret
|
160
|
+
# document instance
|
161
|
+
def define_to_field_method(field, options = {})
|
162
|
+
options.reverse_merge!( :store => :no,
|
163
|
+
:highlight => :yes,
|
164
|
+
:index => :yes,
|
165
|
+
:term_vector => :with_positions_offsets,
|
166
|
+
:boost => 1.0 )
|
167
|
+
aaf_configuration[:ferret_fields][field] = options
|
168
|
+
define_method("#{field}_to_ferret".to_sym) do
|
169
|
+
begin
|
170
|
+
val = content_for_field_name(field)
|
171
|
+
rescue
|
172
|
+
logger.warn("Error retrieving value for field #{field}: #{$!}")
|
173
|
+
val = ''
|
174
|
+
end
|
175
|
+
logger.debug("Adding field #{field} with value '#{val}' to index")
|
176
|
+
val
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
def add_fields(field_config)
|
181
|
+
if field_config.respond_to?(:each_pair)
|
182
|
+
field_config.each_pair do |key,val|
|
183
|
+
define_to_field_method(key,val)
|
184
|
+
end
|
185
|
+
elsif field_config.respond_to?(:each)
|
186
|
+
field_config.each do |field|
|
187
|
+
define_to_field_method(field)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
data/lib/acts_as_ferret.rb
CHANGED
@@ -18,18 +18,27 @@
|
|
18
18
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
19
19
|
# SOFTWARE.
|
20
20
|
|
21
|
+
require 'active_support'
|
21
22
|
require 'active_record'
|
22
23
|
require 'set'
|
23
24
|
require 'ferret'
|
24
25
|
|
25
|
-
require '
|
26
|
-
require 'more_like_this'
|
26
|
+
require 'act_methods'
|
27
27
|
require 'class_methods'
|
28
|
+
require 'shared_index_class_methods'
|
29
|
+
require 'ferret_result'
|
28
30
|
require 'instance_methods'
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
32
|
+
require 'multi_index'
|
33
|
+
require 'more_like_this'
|
34
|
+
|
35
|
+
require 'index'
|
36
|
+
require 'local_index'
|
37
|
+
require 'shared_index'
|
38
|
+
require 'remote_index'
|
39
|
+
|
40
|
+
require 'ferret_server'
|
41
|
+
|
33
42
|
|
34
43
|
# The Rails ActiveRecord Ferret Mixin.
|
35
44
|
#
|
@@ -41,7 +50,7 @@ require 'instance_methods'
|
|
41
50
|
#
|
42
51
|
# basic usage:
|
43
52
|
# include the following in your model class (specifiying the fields you want to get indexed):
|
44
|
-
# acts_as_ferret :fields => [
|
53
|
+
# acts_as_ferret :fields => [ :title, :description ]
|
45
54
|
#
|
46
55
|
# now you can use ModelClass.find_by_contents(query) to find instances of your model
|
47
56
|
# whose indexed fields match a given query. All query terms are required by default, but
|
@@ -54,59 +63,61 @@ require 'instance_methods'
|
|
54
63
|
# Kasper Weibel Nielsen-Refs (original author)
|
55
64
|
# Jens Kraemer <jk@jkraemer.net> (active maintainer)
|
56
65
|
#
|
57
|
-
module
|
58
|
-
module Acts #:nodoc:
|
59
|
-
module ARFerret #:nodoc:
|
66
|
+
module ActsAsFerret
|
60
67
|
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
@total_hits = total_hits
|
67
|
-
end
|
68
|
-
def method_missing(symbol, *args, &block)
|
69
|
-
@results.send(symbol, *args, &block)
|
70
|
-
end
|
71
|
-
def respond_to?(name)
|
72
|
-
self.methods.include?(name) || @results.respond_to?(name)
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
def self.ensure_directory(dir)
|
77
|
-
FileUtils.mkdir_p dir unless File.directory? dir
|
78
|
-
end
|
79
|
-
|
80
|
-
# make sure the default index base dir exists. by default, all indexes are created
|
81
|
-
# under RAILS_ROOT/index/RAILS_ENV
|
82
|
-
def self.init_index_basedir
|
83
|
-
index_base = "#{RAILS_ROOT}/index"
|
84
|
-
ensure_directory index_base
|
85
|
-
@@index_dir = "#{index_base}/#{RAILS_ENV}"
|
86
|
-
ensure_directory @@index_dir
|
87
|
-
end
|
88
|
-
|
89
|
-
mattr_accessor :index_dir
|
90
|
-
init_index_basedir
|
91
|
-
|
92
|
-
def self.append_features(base)
|
93
|
-
super
|
94
|
-
base.extend(ClassMethods)
|
95
|
-
end
|
68
|
+
# global Hash containing all multi indexes created by all classes using the plugin
|
69
|
+
# key is the concatenation of alphabetically sorted names of the classes the
|
70
|
+
# searcher searches.
|
71
|
+
@@multi_indexes = Hash.new
|
72
|
+
def self.multi_indexes; @@multi_indexes end
|
96
73
|
|
97
|
-
|
98
|
-
|
74
|
+
# global Hash containing the ferret indexes of all classes using the plugin
|
75
|
+
# key is the index directory.
|
76
|
+
@@ferret_indexes = Hash.new
|
77
|
+
def self.ferret_indexes; @@ferret_indexes end
|
78
|
+
|
79
|
+
# decorator that adds a total_hits accessor to search result arrays
|
80
|
+
class SearchResults
|
81
|
+
attr_reader :total_hits
|
82
|
+
def initialize(results, total_hits)
|
83
|
+
@results = results
|
84
|
+
@total_hits = total_hits
|
85
|
+
end
|
86
|
+
def method_missing(symbol, *args, &block)
|
87
|
+
@results.send(symbol, *args, &block)
|
88
|
+
end
|
89
|
+
def respond_to?(name)
|
90
|
+
self.methods.include?(name) || @results.respond_to?(name)
|
99
91
|
end
|
100
92
|
end
|
93
|
+
|
94
|
+
def self.ensure_directory(dir)
|
95
|
+
FileUtils.mkdir_p dir unless File.directory? dir
|
96
|
+
end
|
97
|
+
|
98
|
+
# make sure the default index base dir exists. by default, all indexes are created
|
99
|
+
# under RAILS_ROOT/index/RAILS_ENV
|
100
|
+
def self.init_index_basedir
|
101
|
+
index_base = "#{RAILS_ROOT}/index"
|
102
|
+
@@index_dir = "#{index_base}/#{RAILS_ENV}"
|
103
|
+
end
|
104
|
+
|
105
|
+
mattr_accessor :index_dir
|
106
|
+
init_index_basedir
|
107
|
+
|
108
|
+
def self.append_features(base)
|
109
|
+
super
|
110
|
+
base.extend(ClassMethods)
|
111
|
+
end
|
112
|
+
|
101
113
|
end
|
102
114
|
|
103
|
-
#
|
104
|
-
|
105
|
-
ActiveRecord::Base.class_eval do
|
106
|
-
include FerretMixin::Acts::ARFerret
|
107
|
-
end
|
115
|
+
# include acts_as_ferret method into ActiveRecord::Base
|
116
|
+
ActiveRecord::Base.extend ActsAsFerret::ActMethods
|
108
117
|
|
109
118
|
|
119
|
+
# small Ferret monkey patch
|
120
|
+
# TODO check if this is still necessary
|
110
121
|
class Ferret::Index::MultiReader
|
111
122
|
def latest?
|
112
123
|
# TODO: Exception handling added to resolve ticket #6.
|
@@ -121,4 +132,15 @@ class Ferret::Index::MultiReader
|
|
121
132
|
end
|
122
133
|
end
|
123
134
|
|
124
|
-
#
|
135
|
+
# add marshalling support to SortFields
|
136
|
+
class Ferret::Search::SortField
|
137
|
+
def _dump(depth)
|
138
|
+
to_s
|
139
|
+
end
|
140
|
+
|
141
|
+
def self._load(string)
|
142
|
+
raise "invalid value: #{string}" unless string =~ /^(\w+):<(\w+)>(\!)?$/
|
143
|
+
new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|