sdsykes_acts_as_ferret 0.4.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,21 @@
1
+ # Ferret DRb server Capistrano tasks
2
+ # Usage:
3
+ # Add require 'vendor/plugins/acts_as_ferret/lib/ferret_cap_tasks' to your
4
+ # config/deploy.rb
5
+ # call ferret.restart where you restart your Mongrels.
6
+ # ferret.stop and ferret.start are available, too.
7
+ module FerretCapTasks
8
+ def start
9
+ run "cd #{current_path}; RAILS_ENV=production script/ferret_start"
10
+ end
11
+
12
+ def stop
13
+ run "cd #{current_path}; RAILS_ENV=production script/ferret_stop"
14
+ end
15
+
16
+ def restart
17
+ stop
18
+ start
19
+ end
20
+ end
21
+ Capistrano.plugin :ferret, FerretCapTasks
@@ -0,0 +1,115 @@
1
+ module Ferret
2
+
3
+ module Analysis
4
+
5
+ # = PerFieldAnalyzer
6
+ #
7
+ # This PerFieldAnalyzer is a workaround to a memory leak in
8
+ # ferret 0.11.4. It does basically do the same as the original
9
+ # Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
10
+ #
11
+ # http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
12
+ #
13
+ # Thanks to Ben from omdb.org for tracking this down and creating this
14
+ # workaround.
15
+ # You can read more about the issue there:
16
+ # http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
17
+ class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
18
+ def initialize( default_analyzer = StandardAnalyzer.new )
19
+ @analyzers = {}
20
+ @default_analyzer = default_analyzer
21
+ end
22
+
23
+ def add_field( field, analyzer )
24
+ @analyzers[field] = analyzer
25
+ end
26
+ alias []= add_field
27
+
28
+ def token_stream(field, string)
29
+ @analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
30
+ @default_analyzer.token_stream(field, string)
31
+ end
32
+ end
33
+ end
34
+
35
+ class Index::Index
36
+ attr_accessor :batch_size, :logger
37
+
38
+ def index_models(models)
39
+ models.each { |model| index_model model }
40
+ flush
41
+ optimize
42
+ close
43
+ ActsAsFerret::close_multi_indexes
44
+ end
45
+
46
+ def index_model(model)
47
+ bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
48
+ :model => model, :index => self, :reindex => true)
49
+ logger.info "reindexing model #{model.name}"
50
+
51
+ model.records_for_rebuild(@batch_size) do |records, offset|
52
+ bulk_indexer.index_records(records, offset)
53
+ end
54
+ end
55
+
56
+ def bulk_index(model, ids, options = {})
57
+ options.reverse_merge! :optimize => true
58
+ orig_flush = @auto_flush
59
+ @auto_flush = false
60
+ bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
61
+ :model => model, :index => self, :total => ids.size)
62
+ model.records_for_bulk_index(ids, @batch_size) do |records, offset|
63
+ logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
64
+ bulk_indexer.index_records(records, offset)
65
+ end
66
+ logger.info 'finishing bulk index...'
67
+ flush
68
+ if options[:optimize]
69
+ logger.info 'optimizing...'
70
+ optimize
71
+ end
72
+ @auto_flush = orig_flush
73
+ end
74
+
75
+ end
76
+
77
+ # add marshalling support to SortFields
78
+ class Search::SortField
79
+ def _dump(depth)
80
+ to_s
81
+ end
82
+
83
+ def self._load(string)
84
+ case string
85
+ when /<DOC(_ID)?>!/ : Ferret::Search::SortField::DOC_ID_REV
86
+ when /<DOC(_ID)?>/ : Ferret::Search::SortField::DOC_ID
87
+ when '<SCORE>!' : Ferret::Search::SortField::SCORE_REV
88
+ when '<SCORE>' : Ferret::Search::SortField::SCORE
89
+ when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
90
+ else raise "invalid value: #{string}"
91
+ end
92
+ end
93
+ end
94
+
95
+ # add marshalling support to Sort
96
+ class Search::Sort
97
+ def _dump(depth)
98
+ to_s
99
+ end
100
+
101
+ def self._load(string)
102
+ # we exclude the last <DOC> sorting as it is appended by new anyway
103
+ if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
104
+ sort_fields = $1.split(',').map do |value|
105
+ value.strip!
106
+ Ferret::Search::SortField._load value unless value.blank?
107
+ end
108
+ new sort_fields.compact
109
+ else
110
+ raise "invalid value: #{string}"
111
+ end
112
+ end
113
+ end
114
+
115
+ end
@@ -0,0 +1,36 @@
1
+ module ActsAsFerret
2
+
3
+ # mixed into the FerretResult and AR classes calling acts_as_ferret
4
+ module ResultAttributes
5
+ # holds the score this record had when it was found via
6
+ # acts_as_ferret
7
+ attr_accessor :ferret_score
8
+
9
+ attr_accessor :ferret_rank
10
+ end
11
+
12
+ class FerretResult
13
+ include ResultAttributes
14
+ attr_accessor :id
15
+
16
+ def initialize(model, id, score, data = {})
17
+ @model = model.constantize
18
+ @id = id
19
+ @ferret_score = score
20
+ @data = data
21
+ end
22
+
23
+ def method_missing(method, *args)
24
+ if @ar_record || @data[method].nil?
25
+ ferret_load_record unless @ar_record
26
+ @ar_record.send method, *args
27
+ else
28
+ @data[method]
29
+ end
30
+ end
31
+
32
+ def ferret_load_record
33
+ @ar_record = @model.find(id)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,203 @@
1
+ require 'drb'
2
+ require 'thread'
3
+ require 'yaml'
4
+ require 'erb'
5
+
6
+ ################################################################################
7
+ module ActsAsFerret
8
+ module Remote
9
+
10
+ ################################################################################
11
+ class Config
12
+
13
+ ################################################################################
14
+ DEFAULTS = {
15
+ 'host' => 'localhost',
16
+ 'port' => '9009',
17
+ 'cf' => "#{RAILS_ROOT}/config/ferret_server.yml",
18
+ 'pid_file' => "#{RAILS_ROOT}/log/ferret_server.pid",
19
+ 'log_file' => "#{RAILS_ROOT}/log/ferret_server.log",
20
+ 'log_level' => 'debug',
21
+ }
22
+
23
+ ################################################################################
24
+ # load the configuration file and apply default settings
25
+ def initialize (file=DEFAULTS['cf'])
26
+ @everything = YAML.load(ERB.new(IO.read(file)).result)
27
+ raise "malformed ferret server config" unless @everything.is_a?(Hash)
28
+ @config = DEFAULTS.merge(@everything[RAILS_ENV] || {})
29
+ @config['uri'] = "druby://#{host}:#{port}" if @everything[RAILS_ENV]
30
+ end
31
+
32
+ ################################################################################
33
+ # treat the keys of the config data as methods
34
+ def method_missing (name, *args)
35
+ @config.has_key?(name.to_s) ? @config[name.to_s] : super
36
+ end
37
+
38
+ end
39
+
40
+ #################################################################################
41
+ # This class acts as a drb server listening for indexing and
42
+ # search requests from models declared to 'acts_as_ferret :remote => true'
43
+ #
44
+ # Usage:
45
+ # - modify RAILS_ROOT/config/ferret_server.yml to suit your needs.
46
+ # - environments for which no section in the config file exists will use
47
+ # the index locally (good for unit tests/development mode)
48
+ # - run script/ferret_server to start the server:
49
+ # script/ferret_server -e production start
50
+ # - to stop the server run
51
+ # script/ferret_server -e production stop
52
+ #
53
+ class Server
54
+
55
+ #################################################################################
56
+ # FIXME include detection of OS and include the correct file
57
+ require 'unix_daemon'
58
+ include(ActsAsFerret::Remote::UnixDaemon)
59
+
60
+ ################################################################################
61
+ cattr_accessor :running
62
+
63
+ ################################################################################
64
+ def initialize
65
+ @cfg = ActsAsFerret::Remote::Config.new
66
+ ActiveRecord::Base.allow_concurrency = true
67
+ ActiveRecord::Base.logger = @logger = Logger.new(@cfg.log_file)
68
+ ActiveRecord::Base.logger.level = Logger.const_get(@cfg.log_level.upcase) rescue Logger::DEBUG
69
+ end
70
+
71
+ ################################################################################
72
+ # start the server
73
+ def start
74
+ raise "ferret_server not configured for #{RAILS_ENV}" unless (@cfg.uri rescue nil)
75
+ $stdout.puts("starting ferret server...")
76
+
77
+ platform_daemon do
78
+ self.class.running = true
79
+ DRb.start_service(@cfg.uri, self)
80
+ DRb.thread.join
81
+ end
82
+ rescue Exception => e
83
+ @logger.error(e.to_s)
84
+ raise
85
+ end
86
+
87
+ #################################################################################
88
+ # handles all incoming method calls, and sends them on to the LocalIndex
89
+ # instance of the correct model class.
90
+ #
91
+ # Calls are not queued atm, so this will block until the call returned.
92
+ #
93
+ def method_missing(name, *args)
94
+ @logger.debug "\#method_missing(#{name.inspect}, #{args.inspect})"
95
+ retried = false
96
+ with_class args.shift do |clazz|
97
+ reconnect_when_needed(clazz) do
98
+ # using respond_to? here so we not have to catch NoMethodError
99
+ # which would silently catch those from deep inside the indexing
100
+ # code, too...
101
+ if clazz.aaf_index.respond_to?(name)
102
+ clazz.aaf_index.send name, *args
103
+ elsif clazz.respond_to?(name)
104
+ @logger.debug "no luck, trying to call class method instead"
105
+ clazz.send name, *args
106
+ else
107
+ raise NoMethodError.new("method #{name} not supported by DRb server")
108
+ end
109
+ end
110
+ end
111
+ rescue => e
112
+ @logger.error "ferret server error #{$!}\n#{$!.backtrace.join "\n"}"
113
+ raise e
114
+ end
115
+
116
+ # make sure we have a versioned index in place, building one if necessary
117
+ def ensure_index_exists(class_name)
118
+ @logger.debug "DRb server: ensure_index_exists for class #{class_name}"
119
+ with_class class_name do |clazz|
120
+ dir = clazz.aaf_configuration[:index_dir]
121
+ unless File.directory?(dir) && File.file?(File.join(dir, 'segments')) && dir =~ %r{/\d+(_\d+)?$}
122
+ rebuild_index(clazz)
123
+ end
124
+ end
125
+ end
126
+
127
+ # disconnects the db connection for the class specified by class_name
128
+ # used only in unit tests to check the automatic reconnection feature
129
+ def db_disconnect!(class_name)
130
+ with_class class_name do |clazz|
131
+ clazz.connection.disconnect!
132
+ end
133
+ end
134
+
135
+ # hides LocalIndex#rebuild_index to implement index versioning
136
+ def rebuild_index(clazz, *models)
137
+ with_class clazz do |clazz|
138
+ models = models.flatten.uniq.map(&:constantize)
139
+ models << clazz unless models.include?(clazz)
140
+ index = new_index_for(clazz, models)
141
+ reconnect_when_needed(clazz) do
142
+ @logger.debug "DRb server: rebuild index for class(es) #{models.inspect} in #{index.options[:path]}"
143
+ index.index_models models
144
+ end
145
+ new_version = File.join clazz.aaf_configuration[:index_base_dir], Time.now.utc.strftime('%Y%m%d%H%M%S')
146
+ # create a unique directory name (needed for unit tests where
147
+ # multiple rebuilds per second may occur)
148
+ if File.exists?(new_version)
149
+ i = 0
150
+ i+=1 while File.exists?("#{new_version}_#{i}")
151
+ new_version << "_#{i}"
152
+ end
153
+
154
+ File.rename index.options[:path], new_version
155
+ clazz.index_dir = new_version
156
+ end
157
+ end
158
+
159
+
160
+ protected
161
+
162
+ def with_class(clazz, *args)
163
+ clazz = clazz.constantize if String === clazz
164
+ yield clazz, *args
165
+ end
166
+
167
+ def reconnect_when_needed(clazz)
168
+ retried = false
169
+ begin
170
+ yield
171
+ rescue ActiveRecord::StatementInvalid => e
172
+ if e.message =~ /MySQL server has gone away/
173
+ if retried
174
+ raise e
175
+ else
176
+ @logger.info "StatementInvalid caught, trying to reconnect..."
177
+ clazz.connection.reconnect!
178
+ retried = true
179
+ retry
180
+ end
181
+ else
182
+ @logger.error "StatementInvalid caught, but unsure what to do with it: #{e}"
183
+ raise e
184
+ end
185
+ end
186
+ end
187
+
188
+ def new_index_for(clazz, models)
189
+ aaf_configuration = clazz.aaf_configuration
190
+ ferret_cfg = aaf_configuration[:ferret].dup
191
+ ferret_cfg.update :auto_flush => false,
192
+ :create => true,
193
+ :field_infos => ActsAsFerret::field_infos(models),
194
+ :path => File.join(aaf_configuration[:index_base_dir], 'rebuild')
195
+ Ferret::Index::Index.new(ferret_cfg).tap do |i|
196
+ i.batch_size = aaf_configuration[:reindex_batch_size]
197
+ i.logger = @logger
198
+ end
199
+ end
200
+
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,31 @@
1
+ module ActsAsFerret
2
+
3
+ # base class for local and remote indexes
4
+ class AbstractIndex
5
+
6
+ attr_reader :aaf_configuration
7
+ attr_accessor :logger
8
+ def initialize(aaf_configuration)
9
+ @aaf_configuration = aaf_configuration
10
+ @logger = Logger.new("#{RAILS_ROOT}/log/ferret_index.log")
11
+ end
12
+
13
+ class << self
14
+ def proxy_method(name, *args)
15
+ define_method name do |*args|
16
+ @server.send name, model_class_name, *args
17
+ end
18
+ end
19
+
20
+ def index_proxy_method(*names)
21
+ names.each do |name|
22
+ define_method name do |*args|
23
+ @server.send :"index_#{name}", model_class_name, *args
24
+ end
25
+ end
26
+ end
27
+
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,156 @@
1
+ module ActsAsFerret #:nodoc:
2
+
3
+ module InstanceMethods
4
+ include ResultAttributes
5
+
6
+ # Returns an array of strings with the matches highlighted. The +query+ can
7
+ # either be a String or a Ferret::Search::Query object.
8
+ #
9
+ # === Options
10
+ #
11
+ # field:: field to take the content from. This field has
12
+ # to have it's content stored in the index
13
+ # (:store => :yes in your call to aaf). If not
14
+ # given, all stored fields are searched, and the
15
+ # highlighted content found in all of them is returned.
16
+ # set :highlight => :no in the field options to
17
+ # avoid highlighting of contents from a :stored field.
18
+ # excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
19
+ # terms will be in the centre of the excerpt.
20
+ # num_excerpts:: Default: 2. Number of excerpts to return.
21
+ # pre_tag:: Default: "<em>". Tag to place to the left of the
22
+ # match.
23
+ # post_tag:: Default: "</em>". This tag should close the
24
+ # +:pre_tag+.
25
+ # ellipsis:: Default: "...". This is the string that is appended
26
+ # at the beginning and end of excerpts (unless the
27
+ # excerpt hits the start or end of the field. You'll
28
+ # probably want to change this so a Unicode elipsis
29
+ # character.
30
+ def highlight(query, options = {})
31
+ self.class.aaf_index.highlight(id, self.class.name, query, options)
32
+ end
33
+
34
+ # re-eneable ferret indexing for this instance after a call to #disable_ferret
35
+ def enable_ferret
36
+ @ferret_disabled = nil
37
+ end
38
+ alias ferret_enable enable_ferret # compatibility
39
+
40
+ # returns true if ferret indexing is enabled for this record.
41
+ #
42
+ # The optional is_bulk_index parameter will be true if the method is called
43
+ # by rebuild_index or bulk_index, and false otherwise.
44
+ #
45
+ # If is_bulk_index is true, the class level ferret_enabled state will be
46
+ # ignored by this method (per-instance ferret_enabled checks however will
47
+ # take place, so if you override this method to forbid indexing of certain
48
+ # records you're still safe).
49
+ def ferret_enabled?(is_bulk_index = false)
50
+ @ferret_disabled.nil? && (is_bulk_index || self.class.ferret_enabled?)
51
+ end
52
+
53
+ # Disable Ferret for this record for a specified amount of time. ::once will
54
+ # disable Ferret for the next call to #save (this is the default), ::always
55
+ # will do so for all subsequent calls.
56
+ #
57
+ # Note that this will turn off only the create and update hooks, but not the
58
+ # destroy hook. I think that's reasonable, if you think the opposite, please
59
+ # tell me.
60
+ #
61
+ # To manually trigger reindexing of a record after you're finished modifying
62
+ # it, you can call #ferret_update directly instead of #save (remember to
63
+ # enable ferret again before).
64
+ #
65
+ # When given a block, this will be executed without any ferret indexing of
66
+ # this object taking place. The optional argument in this case can be used
67
+ # to indicate if the object should be indexed after executing the block
68
+ # (::index_when_finished). Automatic Ferret indexing of this object will be
69
+ # turned on after the block has been executed. If passed ::index_when_true,
70
+ # the index will only be updated if the block evaluated not to false or nil.
71
+ #
72
+ def disable_ferret(option = :once)
73
+ if block_given?
74
+ @ferret_disabled = :always
75
+ result = yield
76
+ ferret_enable
77
+ ferret_update if option == :index_when_finished || (option == :index_when_true && result)
78
+ result
79
+ elsif [:once, :always].include?(option)
80
+ @ferret_disabled = option
81
+ else
82
+ raise ArgumentError.new("Invalid Argument #{option}")
83
+ end
84
+ end
85
+
86
+ # add to index
87
+ def ferret_create
88
+ if ferret_enabled?
89
+ logger.debug "ferret_create/update: #{self.class.name} : #{self.id}"
90
+ self.class.aaf_index << self
91
+ else
92
+ ferret_enable if @ferret_disabled == :once
93
+ end
94
+ true # signal success to AR
95
+ end
96
+ alias :ferret_update :ferret_create
97
+
98
+
99
+ # remove from index
100
+ def ferret_destroy
101
+ logger.debug "ferret_destroy: #{self.class.name} : #{self.id}"
102
+ begin
103
+ self.class.aaf_index.remove self.id, self.class.name
104
+ rescue
105
+ logger.warn("Could not find indexed value for this object: #{$!}\n#{$!.backtrace}")
106
+ end
107
+ true # signal success to AR
108
+ end
109
+
110
+ # turn this instance into a ferret document (which basically is a hash of
111
+ # fieldname => value pairs)
112
+ def to_doc
113
+ logger.debug "creating doc for class: #{self.class.name}, id: #{self.id}"
114
+ Ferret::Document.new.tap do |doc|
115
+ # store the id of each item
116
+ doc[:id] = self.id
117
+
118
+ # store the class name if configured to do so
119
+ doc[:class_name] = self.class.name if aaf_configuration[:store_class_name]
120
+
121
+ # iterate through the fields and add them to the document
122
+ aaf_configuration[:ferret_fields].each_pair do |field, config|
123
+ doc[field] = self.send("#{field}_to_ferret") unless config[:ignore]
124
+ end
125
+ if aaf_configuration[:boost]
126
+ if self.respond_to?(aaf_configuration[:boost])
127
+ boost = self.send aaf_configuration[:boost]
128
+ doc.boost = boost.to_i if boost
129
+ else
130
+ logger.error "boost option should point to an instance method: #{aaf_configuration[:boost]}"
131
+ end
132
+ end
133
+ end
134
+ end
135
+
136
+ def document_number
137
+ self.class.aaf_index.document_number(id, self.class.name)
138
+ end
139
+
140
+ def query_for_record
141
+ self.class.aaf_index.query_for_record(id, self.class.name)
142
+ end
143
+
144
+ def content_for_field_name(field, dynamic_boost = nil)
145
+ field_data = self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.send(field.to_sym)
146
+ if (dynamic_boost && boost_value = self.send(dynamic_boost))
147
+ field_data = Ferret::Field.new(field_data)
148
+ field_data.boost = boost_value.to_i
149
+ end
150
+ field_data
151
+ end
152
+
153
+
154
+ end
155
+
156
+ end