sdsykes_acts_as_ferret 0.4.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2006 Kasper Weibel, Jens Kraemer
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,51 @@
1
+ = acts_as_ferret
2
+
3
+ Do not use this unless you particularly need AAF 0.4.3.
4
+ This is a fixed version of AAF 0.4.3 to work without warnings on Rails 2.3.10.
5
+
6
+ This ActiveRecord mixin adds full text search capabilities to any Rails model.
7
+
8
+ It is heavily based on the original acts_as_ferret plugin done by
9
+ Kasper Weibel and a modified version done by Thomas Lockney, which
10
+ both can be found on http://ferret.davebalmain.com/trac/wiki/FerretOnRails
11
+
12
+ == Installation
13
+
14
+ === System-wide installation with Rubygems
15
+
16
+ <tt>sudo gem install sdsykes_acts_as_ferret</tt>
17
+
18
+ To use acts_as_ferret in your project, add the following line to your
19
+ project's config/environment.rb:
20
+
21
+ <tt>gem 'sdsykes_acts_as_ferret'</tt>
22
+ <tt>require 'acts_as_ferret'</tt>
23
+
24
+ Call the aaf_install script inside your project directory to install the sample
25
+ config file and the drb server start/stop script.
26
+
27
+
28
+ == Usage
29
+
30
+ include the following in your model class (specifiying the fields you want to get indexed):
31
+
32
+ <tt>acts_as_ferret :fields => [ :title, :description ]</tt>
33
+
34
+ now you can use ModelClass.find_by_contents(query) to find instances of your model
35
+ whose indexed fields match a given query. All query terms are required by default,
36
+ but explicit OR queries are possible. This differs from the ferret default, but
37
+ imho is the more often needed/expected behaviour (more query terms result in
38
+ less results).
39
+
40
+ Please see ActsAsFerret::ActMethods#acts_as_ferret for more information.
41
+
42
+ == License
43
+
44
+ Released under the MIT license.
45
+
46
+ == Authors
47
+
48
+ * Kasper Weibel Nielsen-Refs (original author)
49
+ * Jens Kraemer <jk@jkraemer.net> (current maintainer)
50
+
51
+
@@ -0,0 +1,25 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # acts_as_ferret gem install script
4
+ # Use inside the root of your Rails project
5
+ require 'fileutils'
6
+
7
+ @basedir = File.join(File.dirname(__FILE__), '..')
8
+
9
+ def install(dir, file, executable=false)
10
+ puts "Installing: #{file}"
11
+ target = File.join('.', dir, file)
12
+ if File.exists?(target)
13
+ puts "#{target} already exists, skipping"
14
+ else
15
+ FileUtils.cp File.join(@basedir, dir, file), target
16
+ FileUtils.chmod 0755, target if executable
17
+ end
18
+ end
19
+
20
+
21
+ install 'script', 'ferret_server', true
22
+ install 'config', 'ferret_server.yml'
23
+
24
+ puts IO.read(File.join(@basedir, 'README'))
25
+
@@ -0,0 +1,23 @@
1
+ # configuration for the acts_as_ferret DRb server
2
+ # host: where to reach the DRb server (used by application processes to contact the server)
3
+ # port: which port the server should listen on
4
+ # pid_file: location of the server's pid file (relative to RAILS_ROOT)
5
+ # log_file: log file (default: RAILS_ROOT/log/ferret_server.log
6
+ # log_level: log level for the server's logger
7
+ production:
8
+ host: localhost
9
+ port: 9010
10
+ pid_file: log/ferret.pid
11
+ log_file: log/ferret_server.log
12
+ log_level: warn
13
+
14
+ # aaf won't try to use the DRb server in environments that are not
15
+ # configured here.
16
+ #development:
17
+ # host: localhost
18
+ # port: 9010
19
+ # pid_file: log/ferret.pid
20
+ #test:
21
+ # host: localhost
22
+ # port: 9009
23
+ # pid_file: log/ferret.pid
@@ -0,0 +1,23 @@
1
+ Credits
2
+ =======
3
+
4
+ The Win32 service support scripts have been written by
5
+ Herryanto Siatono <herryanto@pluitsolutions.com>.
6
+
7
+ See his accompanying blog posting at
8
+ http://www.pluitsolutions.com/2007/07/30/acts-as-ferret-drbserver-win32-service/
9
+
10
+
11
+ Usage
12
+ =====
13
+
14
+ There are two scripts:
15
+
16
+ script/ferret_service is used to install/remove/start/stop the win32 service.
17
+
18
+ script/ferret_daemon is to be called by Win32 service to start/stop the
19
+ DRbServer.
20
+
21
+ Run 'ruby script/ferret_service -h' for more info.
22
+
23
+
@@ -0,0 +1,22 @@
1
+ # monit configuration snippet to watch the Ferret DRb server shipped with
2
+ # acts_as_ferret
3
+ check process ferret with pidfile /path/to/ferret.pid
4
+
5
+ # username is the user the drb server should be running as (It's good practice
6
+ # to run such services as a non-privileged user)
7
+ start program = "/bin/su -c 'cd /path/to/your/app/current/ && script/ferret_server -e production start' username"
8
+ stop program = "/bin/su -c 'cd /path/to/your/app/current/ && script/ferret_server -e production stop' username"
9
+
10
+ # cpu usage boundaries
11
+ if cpu > 60% for 2 cycles then alert
12
+ if cpu > 90% for 5 cycles then restart
13
+
14
+ # memory usage varies with index size and usage scenarios, so check how
15
+ # much memory your DRb server uses up usually and add some spare to that
16
+ # before enabling this rule:
17
+ # if totalmem > 50.0 MB for 5 cycles then restart
18
+
19
+ # adjust port numbers according to your setup:
20
+ if failed port 9010 then alert
21
+ if failed port 9010 for 2 cycles then restart
22
+ group ferret
data/init.rb ADDED
@@ -0,0 +1,22 @@
1
+ # Copyright (c) 2006 Kasper Weibel Nielsen-Refs, Thomas Lockney, Jens Krämer
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in all
11
+ # copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19
+ # SOFTWARE.
20
+
21
+ require 'acts_as_ferret'
22
+
@@ -0,0 +1,18 @@
1
+ # acts_as_ferret install script
2
+ require 'fileutils'
3
+
4
+ def install(file)
5
+ puts "Installing: #{file}"
6
+ target = File.join(File.dirname(__FILE__), '..', '..', '..', file)
7
+ if File.exists?(target)
8
+ puts "target #{target} already exists, skipping"
9
+ else
10
+ FileUtils.cp File.join(File.dirname(__FILE__), file), target
11
+ end
12
+ end
13
+
14
+ install File.join( 'script', 'ferret_server' )
15
+ install File.join( 'config', 'ferret_server.yml' )
16
+
17
+ puts IO.read(File.join(File.dirname(__FILE__), 'README'))
18
+
@@ -0,0 +1,254 @@
1
+ module ActsAsFerret #:nodoc:
2
+
3
+ # This module defines the acts_as_ferret method and is included into
4
+ # ActiveRecord::Base
5
+ module ActMethods
6
+
7
+
8
+ def reloadable?; false end
9
+
10
+ # declares a class as ferret-searchable.
11
+ #
12
+ # ====options:
13
+ # fields:: names all fields to include in the index. If not given,
14
+ # all attributes of the class will be indexed. You may also give
15
+ # symbols pointing to instance methods of your model here, i.e.
16
+ # to retrieve and index data from a related model.
17
+ #
18
+ # additional_fields:: names fields to include in the index, in addition
19
+ # to those derived from the db scheme. use if you want
20
+ # to add custom fields derived from methods to the db
21
+ # fields (which will be picked by aaf). This option will
22
+ # be ignored when the fields option is given, in that
23
+ # case additional fields get specified there.
24
+ #
25
+ # index_dir:: declares the directory where to put the index for this class.
26
+ # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
27
+ # The index directory will be created if it doesn't exist.
28
+ #
29
+ # single_index:: set this to true to let this class use a Ferret
30
+ # index that is shared by all classes having :single_index set to true.
31
+ # :store_class_name is set to true implicitly, as well as index_dir, so
32
+ # don't bother setting these when using this option. the shared index
33
+ # will be located in index/<RAILS_ENV>/shared .
34
+ #
35
+ # store_class_name:: to make search across multiple models (with either
36
+ # single_index or the multi_search method) useful, set
37
+ # this to true. the model class name will be stored in a keyword field
38
+ # named class_name
39
+ #
40
+ # reindex_batch_size:: reindexing is done in batches of this size, default is 1000
41
+ # mysql_fast_batches:: set this to false to disable the faster mysql batching
42
+ # algorithm if this model uses a non-integer primary key named
43
+ # 'id' on MySQL.
44
+ #
45
+ # ferret:: Hash of Options that directly influence the way the Ferret engine works. You
46
+ # can use most of the options the Ferret::I class accepts here, too. Among the
47
+ # more useful are:
48
+ #
49
+ # or_default:: whether query terms are required by
50
+ # default (the default, false), or not (true)
51
+ #
52
+ # analyzer:: the analyzer to use for query parsing (default: nil,
53
+ # which means the ferret StandardAnalyzer gets used)
54
+ #
55
+ # default_field:: use to set one or more fields that are searched for query terms
56
+ # that don't have an explicit field list. This list should *not*
57
+ # contain any untokenized fields. If it does, you're asking
58
+ # for trouble (i.e. not getting results for queries having
59
+ # stop words in them). Aaf by default initializes the default field
60
+ # list to contain all tokenized fields. If you use :single_index => true,
61
+ # you really should set this option specifying your default field
62
+ # list (which should be equal in all your classes sharing the index).
63
+ # Otherwise you might get incorrect search results and you won't get
64
+ # any lazy loading of stored field data.
65
+ #
66
+ # For downwards compatibility reasons you can also specify the Ferret options in the
67
+ # last Hash argument.
68
+ def acts_as_ferret(options={}, ferret_options={})
69
+ # default to DRb mode
70
+ options[:remote] = true if options[:remote].nil?
71
+
72
+ # force local mode if running *inside* the Ferret server - somewhere the
73
+ # real indexing has to be done after all :-)
74
+ # Usually the automatic detection of server mode works fine, however if you
75
+ # require your model classes in environment.rb they will get loaded before the
76
+ # DRb server is started, so this code is executed too early and detection won't
77
+ # work. In this case you'll get endless loops resulting in "stack level too deep"
78
+ # errors.
79
+ # To get around this, start the DRb server with the environment variable
80
+ # FERRET_USE_LOCAL_INDEX set to '1'.
81
+ logger.debug "Asked for a remote server ? #{options[:remote].inspect}, ENV[\"FERRET_USE_LOCAL_INDEX\"] is #{ENV["FERRET_USE_LOCAL_INDEX"].inspect}, looks like we are#{ActsAsFerret::Remote::Server.running || ENV['FERRET_USE_LOCAL_INDEX'] ? '' : ' not'} the server"
82
+ options.delete(:remote) if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
83
+
84
+ if options[:remote] && options[:remote] !~ /^druby/
85
+ # read server location from config/ferret_server.yml
86
+ options[:remote] = ActsAsFerret::Remote::Config.new.uri rescue nil
87
+ end
88
+
89
+ if options[:remote]
90
+ logger.debug "Will use remote index server which should be available at #{options[:remote]}"
91
+ else
92
+ logger.debug "Will use local index."
93
+ end
94
+
95
+
96
+ extend ClassMethods
97
+ extend SharedIndexClassMethods if options[:single_index]
98
+
99
+ include InstanceMethods
100
+ include MoreLikeThis::InstanceMethods
101
+
102
+ # AR hooks
103
+ after_create :ferret_create
104
+ after_update :ferret_update
105
+ after_destroy :ferret_destroy
106
+
107
+ cattr_accessor :aaf_configuration
108
+
109
+ # default config
110
+ self.aaf_configuration = {
111
+ :index_dir => "#{ActsAsFerret::index_dir}/#{self.name.underscore}",
112
+ :store_class_name => false,
113
+ :name => self.table_name,
114
+ :class_name => self.name,
115
+ :single_index => false,
116
+ :reindex_batch_size => 1000,
117
+ :ferret => {}, # Ferret config Hash
118
+ :ferret_fields => {}, # list of indexed fields that will be filled later
119
+ :enabled => true, # used for class-wide disabling of Ferret
120
+ :mysql_fast_batches => true # turn off to disable the faster, id based batching mechanism for MySQL
121
+ }
122
+
123
+ # merge aaf options with args
124
+ aaf_configuration.update(options) if options.is_a?(Hash)
125
+ # apply appropriate settings for shared index
126
+ if aaf_configuration[:single_index]
127
+ aaf_configuration[:index_dir] = "#{ActsAsFerret::index_dir}/shared"
128
+ aaf_configuration[:store_class_name] = true
129
+ end
130
+
131
+ # set ferret default options
132
+ aaf_configuration[:ferret].reverse_merge!( :or_default => false,
133
+ :handle_parse_errors => true,
134
+ :default_field => nil # will be set later on
135
+ #:max_clauses => 512,
136
+ #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
137
+ # :wild_card_downcase => true
138
+ )
139
+
140
+ # merge ferret options with those from second parameter hash
141
+ aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash)
142
+
143
+ unless options[:remote]
144
+ ActsAsFerret::ensure_directory aaf_configuration[:index_dir]
145
+ aaf_configuration[:index_base_dir] = aaf_configuration[:index_dir]
146
+ aaf_configuration[:index_dir] = find_last_index_version(aaf_configuration[:index_dir])
147
+ logger.debug "using index in #{aaf_configuration[:index_dir]}"
148
+ end
149
+
150
+ # these properties are somewhat vital to the plugin and shouldn't
151
+ # be overwritten by the user:
152
+ aaf_configuration[:ferret].update(
153
+ :key => (aaf_configuration[:single_index] ? [:id, :class_name] : :id),
154
+ :path => aaf_configuration[:index_dir],
155
+ :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
156
+ :create_if_missing => true
157
+ )
158
+
159
+ if aaf_configuration[:fields]
160
+ add_fields(aaf_configuration[:fields])
161
+ else
162
+ add_fields(self.new.attributes.keys.map { |k| k.to_sym })
163
+ add_fields(aaf_configuration[:additional_fields])
164
+ end
165
+
166
+ # now that all fields have been added, we can initialize the default
167
+ # field list to be used by the query parser.
168
+ # It will include all content fields *not* marked as :untokenized.
169
+ # This fixes the otherwise failing CommentTest#test_stopwords. Basically
170
+ # this means that by default only tokenized fields (which is the default)
171
+ # will be searched. If you want to search inside the contents of an
172
+ # untokenized field, you'll have to explicitly specify it in your query.
173
+ #
174
+ # Unfortunately this is not very useful with a shared index (see
175
+ # http://projects.jkraemer.net/acts_as_ferret/ticket/85)
176
+ # You should consider specifying the default field list to search for as
177
+ # part of the ferret_options hash in your call to acts_as_ferret.
178
+ aaf_configuration[:ferret][:default_field] ||= if aaf_configuration[:single_index]
179
+ logger.warn "You really should set the acts_as_ferret :default_field option when using a shared index!"
180
+ '*'
181
+ else
182
+ aaf_configuration[:ferret_fields].keys.select do |f|
183
+ aaf_configuration[:ferret_fields][f][:index] != :untokenized
184
+ end
185
+ end
186
+ logger.info "default field list: #{aaf_configuration[:ferret][:default_field].inspect}"
187
+
188
+ if options[:remote]
189
+ aaf_index.ensure_index_exists
190
+ end
191
+ end
192
+
193
+
194
+ protected
195
+
196
+ # find the most recent version of an index
197
+ def find_last_index_version(basedir)
198
+ # check for versioned index
199
+ versions = Dir.entries(basedir).select do |f|
200
+ dir = File.join(basedir, f)
201
+ File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
202
+ end
203
+ if versions.any?
204
+ # select latest version
205
+ versions.sort!
206
+ File.join basedir, versions.last
207
+ else
208
+ basedir
209
+ end
210
+ end
211
+
212
+
213
+ # helper that defines a method that adds the given field to a ferret
214
+ # document instance
215
+ def define_to_field_method(field, options = {})
216
+ if options[:boost].is_a?(Symbol)
217
+ dynamic_boost = options[:boost]
218
+ options.delete :boost
219
+ end
220
+ options.reverse_merge!( :store => :no,
221
+ :highlight => :yes,
222
+ :index => :yes,
223
+ :term_vector => :with_positions_offsets,
224
+ :boost => 1.0 )
225
+ options[:term_vector] = :no if options[:index] == :no
226
+ aaf_configuration[:ferret_fields][field] = options
227
+
228
+ define_method("#{field}_to_ferret".to_sym) do
229
+ begin
230
+ val = content_for_field_name(field, dynamic_boost)
231
+ rescue
232
+ logger.warn("Error retrieving value for field #{field}: #{$!}")
233
+ val = ''
234
+ end
235
+ logger.debug("Adding field #{field} with value '#{val}' to index")
236
+ val
237
+ end
238
+ end
239
+
240
+ def add_fields(field_config)
241
+ if field_config.is_a? Hash
242
+ field_config.each_pair do |key,val|
243
+ define_to_field_method(key,val)
244
+ end
245
+ elsif field_config.respond_to?(:each)
246
+ field_config.each do |field|
247
+ define_to_field_method(field)
248
+ end
249
+ end
250
+ end
251
+
252
+ end
253
+
254
+ end