acts_as_ferret 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +29 -6
- data/config/ferret_server.yml +12 -0
- data/install.rb +19 -0
- data/lib/act_methods.rb +194 -0
- data/lib/acts_as_ferret.rb +74 -52
- data/lib/class_methods.rb +222 -482
- data/lib/ferret_result.rb +36 -0
- data/lib/ferret_server.rb +89 -0
- data/lib/index.rb +31 -0
- data/lib/instance_methods.rb +112 -143
- data/lib/local_index.rb +257 -0
- data/lib/more_like_this.rb +47 -41
- data/lib/multi_index.rb +8 -11
- data/lib/remote_index.rb +50 -0
- data/lib/shared_index.rb +14 -0
- data/lib/shared_index_class_methods.rb +90 -0
- data/rakefile +88 -147
- data/script/ferret_server +18 -0
- data/script/ferret_start +67 -0
- data/script/ferret_stop +22 -0
- metadata +23 -11
- data/.init.rb.swp +0 -0
- data/.rakefile.swp +0 -0
- data/lib/.acts_as_ferret.rb.swp +0 -0
- data/lib/.class_methods.rb.swo +0 -0
- data/lib/.class_methods.rb.swp +0 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
|
3
|
+
# mixed into the FerretResult and AR classes calling acts_as_ferret
|
4
|
+
module ResultAttributes
|
5
|
+
# holds the score this record had when it was found via
|
6
|
+
# acts_as_ferret
|
7
|
+
attr_accessor :ferret_score
|
8
|
+
|
9
|
+
attr_accessor :ferret_rank
|
10
|
+
end
|
11
|
+
|
12
|
+
class FerretResult
|
13
|
+
include ResultAttributes
|
14
|
+
attr_accessor :id
|
15
|
+
|
16
|
+
def initialize(model, id, score, data = {})
|
17
|
+
@model = model.constantize
|
18
|
+
@id = id
|
19
|
+
@ferret_score = score
|
20
|
+
@data = data
|
21
|
+
end
|
22
|
+
|
23
|
+
def method_missing(method, *args)
|
24
|
+
if @ar_record || @data[method].nil?
|
25
|
+
ferret_load_record unless @ar_record
|
26
|
+
@ar_record.send method, *args
|
27
|
+
else
|
28
|
+
@data[method]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def ferret_load_record
|
33
|
+
@ar_record = @model.find(id)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'drb'
|
2
|
+
require 'thread'
|
3
|
+
require 'yaml'
|
4
|
+
require 'erb'
|
5
|
+
|
6
|
+
|
7
|
+
module ActsAsFerret
|
8
|
+
|
9
|
+
module Remote
|
10
|
+
|
11
|
+
module Config
|
12
|
+
class << self
|
13
|
+
DEFAULTS = {
|
14
|
+
'host' => 'localhost',
|
15
|
+
'port' => '9009'
|
16
|
+
}
|
17
|
+
# reads connection settings from config file
|
18
|
+
def load(file = "#{RAILS_ROOT}/config/ferret_server.yml")
|
19
|
+
config = DEFAULTS.merge(YAML.load(ERB.new(IO.read(file)).result))
|
20
|
+
if config = config[RAILS_ENV]
|
21
|
+
config[:uri] = "druby://#{config['host']}:#{config['port']}"
|
22
|
+
return config
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# This class acts as a drb server listening for indexing and
|
29
|
+
# search requests from models declared to 'acts_as_ferret :remote => true'
|
30
|
+
#
|
31
|
+
# Usage:
|
32
|
+
# - copy doc/ferret_server.yml to RAILS_ROOT/config and modify to suit
|
33
|
+
# your needs.
|
34
|
+
# - run script/ferret_server (in the plugin directory) via script/runner:
|
35
|
+
# RAILS_ENV=production script/runner vendor/plugins/acts_as_ferret/script/ferret_server
|
36
|
+
#
|
37
|
+
# TODO: automate installation of files to script/ and config/
|
38
|
+
class Server
|
39
|
+
|
40
|
+
cattr_accessor :running
|
41
|
+
|
42
|
+
def self.start(uri = nil)
|
43
|
+
ActiveRecord::Base.allow_concurrency = true
|
44
|
+
uri ||= ActsAsFerret::Remote::Config.load[:uri]
|
45
|
+
DRb.start_service(uri, ActsAsFerret::Remote::Server.new)
|
46
|
+
self.running = true
|
47
|
+
end
|
48
|
+
|
49
|
+
def initialize
|
50
|
+
@logger = Logger.new("#{RAILS_ROOT}/log/ferret_server.log")
|
51
|
+
end
|
52
|
+
|
53
|
+
# handles all incoming method calls, and sends them on to the LocalIndex
|
54
|
+
# instance of the correct model class.
|
55
|
+
#
|
56
|
+
# Calls are not queued atm, so this will block until the call returned.
|
57
|
+
# Might throw the occasional LockError, too, which most probably means that you're
|
58
|
+
# a) rebuilding your index or
|
59
|
+
# b) have *really* high load. I wasn't able to reproduce this case until
|
60
|
+
# now, if you do, please contact me.
|
61
|
+
#
|
62
|
+
# TODO: rebuild indexes in separate directory so no lock errors in these
|
63
|
+
# cases.
|
64
|
+
def method_missing(name, *args)
|
65
|
+
clazz = args.shift.constantize
|
66
|
+
begin
|
67
|
+
@logger.debug "call index method: #{name} with #{args.inspect}"
|
68
|
+
clazz.aaf_index.send name, *args
|
69
|
+
rescue NoMethodError
|
70
|
+
@logger.debug "no luck, trying to call class method instead"
|
71
|
+
clazz.send name, *args
|
72
|
+
end
|
73
|
+
rescue
|
74
|
+
@logger.error "ferret server error #{$!}\n#{$!.backtrace.join '\n'}"
|
75
|
+
raise
|
76
|
+
end
|
77
|
+
|
78
|
+
def ferret_index(class_name)
|
79
|
+
# TODO check if in use!
|
80
|
+
class_name.constantize.aaf_index.ferret_index
|
81
|
+
end
|
82
|
+
|
83
|
+
# the main loop taking stuff from the queue and running it...
|
84
|
+
#def run
|
85
|
+
#end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/index.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
|
3
|
+
# base class for local and remote indexes
|
4
|
+
class AbstractIndex
|
5
|
+
|
6
|
+
attr_reader :aaf_configuration
|
7
|
+
attr_reader :logger
|
8
|
+
def initialize(aaf_configuration)
|
9
|
+
@aaf_configuration = aaf_configuration
|
10
|
+
@logger = Logger.new("#{RAILS_ROOT}/log/ferret_index.log")
|
11
|
+
end
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def proxy_method(name, *args)
|
15
|
+
define_method name do |*args|
|
16
|
+
@server.send name, model_class_name, *args
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def index_proxy_method(*names)
|
21
|
+
names.each do |name|
|
22
|
+
define_method name do |*args|
|
23
|
+
@server.send :"index_#{name}", model_class_name, *args
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
data/lib/instance_methods.rb
CHANGED
@@ -1,157 +1,126 @@
|
|
1
|
-
module
|
2
|
-
module Acts #:nodoc:
|
3
|
-
module ARFerret #:nodoc:
|
1
|
+
module ActsAsFerret #:nodoc:
|
4
2
|
|
5
|
-
|
6
|
-
|
3
|
+
module InstanceMethods
|
4
|
+
include ResultAttributes
|
7
5
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
options[:field] = field
|
45
|
-
highlights << i.highlight(query, doc_num, options)
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
49
|
-
return highlights.compact.flatten[0..options[:num_excerpts]-1]
|
50
|
-
end
|
51
|
-
|
52
|
-
# re-eneable ferret indexing after a call to #disable_ferret
|
53
|
-
def ferret_enable; @ferret_disabled = nil end
|
54
|
-
|
55
|
-
# returns true if ferret indexing is enabled
|
56
|
-
def ferret_enabled?; @ferret_disabled.nil? end
|
6
|
+
# Returns an array of strings with the matches highlighted. The +query+ can
|
7
|
+
# either be a String or a Ferret::Search::Query object.
|
8
|
+
#
|
9
|
+
# === Options
|
10
|
+
#
|
11
|
+
# field:: field to take the content from. This field has
|
12
|
+
# to have it's content stored in the index
|
13
|
+
# (:store => :yes in your call to aaf). If not
|
14
|
+
# given, all stored fields are searched, and the
|
15
|
+
# highlighted content found in all of them is returned.
|
16
|
+
# set :highlight => :no in the field options to
|
17
|
+
# avoid highlighting of contents from a :stored field.
|
18
|
+
# excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
19
|
+
# terms will be in the centre of the excerpt.
|
20
|
+
# num_excerpts:: Default: 2. Number of excerpts to return.
|
21
|
+
# pre_tag:: Default: "<em>". Tag to place to the left of the
|
22
|
+
# match.
|
23
|
+
# post_tag:: Default: "</em>". This tag should close the
|
24
|
+
# +:pre_tag+.
|
25
|
+
# ellipsis:: Default: "...". This is the string that is appended
|
26
|
+
# at the beginning and end of excerpts (unless the
|
27
|
+
# excerpt hits the start or end of the field. You'll
|
28
|
+
# probably want to change this so a Unicode elipsis
|
29
|
+
# character.
|
30
|
+
def highlight(query, options = {})
|
31
|
+
self.class.aaf_index.highlight(id, self.class.name, query, options)
|
32
|
+
end
|
33
|
+
|
34
|
+
# re-eneable ferret indexing after a call to #disable_ferret
|
35
|
+
def ferret_enable; @ferret_disabled = nil end
|
36
|
+
|
37
|
+
# returns true if ferret indexing is enabled
|
38
|
+
# the optional parameter will be true if the method is called by rebuild_index,
|
39
|
+
# and false otherwise. I.e. useful to enable a model only for indexing during
|
40
|
+
# scheduled reindex runs.
|
41
|
+
def ferret_enabled?(is_rebuild = false); @ferret_disabled.nil? end
|
57
42
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
43
|
+
# Disable Ferret for a specified amount of time. ::once will disable
|
44
|
+
# Ferret for the next call to #save (this is the default), ::always will
|
45
|
+
# do so for all subsequent calls.
|
46
|
+
# To manually trigger reindexing of a record, you can call #ferret_update
|
47
|
+
# directly.
|
48
|
+
#
|
49
|
+
# When given a block, this will be executed without any ferret indexing of
|
50
|
+
# this object taking place. The optional argument in this case can be used
|
51
|
+
# to indicate if the object should be indexed after executing the block
|
52
|
+
# (::index_when_finished). Automatic Ferret indexing of this object will be
|
53
|
+
# turned on after the block has been executed. If passed ::index_when_true,
|
54
|
+
# the index will only be updated if the block evaluated not to false or nil.
|
55
|
+
def disable_ferret(option = :once)
|
56
|
+
if block_given?
|
57
|
+
@ferret_disabled = :always
|
58
|
+
result = yield
|
59
|
+
ferret_enable
|
60
|
+
ferret_update if option == :index_when_finished || (option == :index_when_true && result)
|
61
|
+
result
|
62
|
+
elsif [:once, :always].include?(option)
|
63
|
+
@ferret_disabled = option
|
64
|
+
else
|
65
|
+
raise ArgumentError.new("Invalid Argument #{option}")
|
66
|
+
end
|
67
|
+
end
|
83
68
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
69
|
+
# add to index
|
70
|
+
def ferret_create
|
71
|
+
if ferret_enabled?
|
72
|
+
logger.debug "ferret_create/update: #{self.class.name} : #{self.id}"
|
73
|
+
self.class.aaf_index << self
|
74
|
+
else
|
75
|
+
ferret_enable if @ferret_disabled == :once
|
76
|
+
end
|
77
|
+
true # signal success to AR
|
78
|
+
end
|
79
|
+
alias :ferret_update :ferret_create
|
80
|
+
|
96
81
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
82
|
+
# remove from index
|
83
|
+
def ferret_destroy
|
84
|
+
logger.debug "ferret_destroy: #{self.class.name} : #{self.id}"
|
85
|
+
begin
|
86
|
+
self.class.aaf_index.remove self.id, self.class.name
|
87
|
+
rescue
|
88
|
+
logger.warn("Could not find indexed value for this object: #{$!}\n#{$!.backtrace}")
|
89
|
+
end
|
90
|
+
true # signal success to AR
|
91
|
+
end
|
92
|
+
|
93
|
+
# turn this instance into a ferret document (which basically is a hash of
|
94
|
+
# fieldname => value pairs)
|
95
|
+
def to_doc
|
96
|
+
logger.debug "creating doc for class: #{self.class.name}, id: #{self.id}"
|
97
|
+
returning doc = Ferret::Document.new do
|
98
|
+
# store the id of each item
|
99
|
+
doc[:id] = self.id
|
115
100
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
fields_for_ferret.each_pair do |field, config|
|
123
|
-
doc[field] = self.send("#{field}_to_ferret") unless config[:ignore]
|
124
|
-
end
|
125
|
-
return doc
|
101
|
+
# store the class name if configured to do so
|
102
|
+
doc[:class_name] = self.class.name if aaf_configuration[:store_class_name]
|
103
|
+
|
104
|
+
# iterate through the fields and add them to the document
|
105
|
+
aaf_configuration[:ferret_fields].each_pair do |field, config|
|
106
|
+
doc[field] = self.send("#{field}_to_ferret") unless config[:ignore]
|
126
107
|
end
|
108
|
+
end
|
109
|
+
end
|
127
110
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
return hits.hits.first.doc if hits.total_hits == 1
|
132
|
-
raise "cannot determine document number from primary key: #{self}"
|
133
|
-
end
|
111
|
+
def document_number
|
112
|
+
self.class.aaf_index.document_number(id, self.class.name)
|
113
|
+
end
|
134
114
|
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
protected
|
115
|
+
def query_for_record
|
116
|
+
self.class.aaf_index.query_for_record(id, self.class.name)
|
117
|
+
end
|
140
118
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
if self.class.configuration[:single_index]
|
145
|
-
bq = Ferret::Search::BooleanQuery.new
|
146
|
-
bq.add_query(query, :must)
|
147
|
-
bq.add_query(Ferret::Search::TermQuery.new(:class_name, self.class.name), :must)
|
148
|
-
return bq
|
149
|
-
end
|
150
|
-
return query
|
151
|
-
end
|
119
|
+
def content_for_field_name(field)
|
120
|
+
self[field] || self.instance_variable_get("@#{field.to_s}".to_sym) || self.send(field.to_sym)
|
121
|
+
end
|
152
122
|
|
153
|
-
end
|
154
123
|
|
155
|
-
end
|
156
124
|
end
|
125
|
+
|
157
126
|
end
|
data/lib/local_index.rb
ADDED
@@ -0,0 +1,257 @@
|
|
1
|
+
module ActsAsFerret
|
2
|
+
|
3
|
+
class LocalIndex < AbstractIndex
|
4
|
+
include MoreLikeThis::IndexMethods
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(aaf_configuration)
|
8
|
+
super
|
9
|
+
ensure_index_exists
|
10
|
+
end
|
11
|
+
|
12
|
+
# The 'real' Ferret Index instance
|
13
|
+
def ferret_index
|
14
|
+
ensure_index_exists
|
15
|
+
@ferret_index ||= Ferret::Index::Index.new(aaf_configuration[:ferret])
|
16
|
+
end
|
17
|
+
|
18
|
+
# Checks for the presence of a segments file in the index directory
|
19
|
+
# Rebuilds the index if none exists.
|
20
|
+
def ensure_index_exists
|
21
|
+
unless File.file? "#{aaf_configuration[:index_dir]}/segments"
|
22
|
+
close
|
23
|
+
rebuild_index
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# Closes the underlying index instance
|
28
|
+
def close
|
29
|
+
@ferret_index.close if @ferret_index
|
30
|
+
rescue StandardError
|
31
|
+
# is raised when index already closed
|
32
|
+
ensure
|
33
|
+
@ferret_index = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# rebuilds the index from all records of the model class this index belongs
|
37
|
+
# to. Arguments can be given in shared index scenarios to name multiple
|
38
|
+
# model classes to include in the index
|
39
|
+
def rebuild_index(*models)
|
40
|
+
logger.debug "rebuild index: #{models.inspect}"
|
41
|
+
models << aaf_configuration[:class_name] unless models.include?(aaf_configuration[:class_name])
|
42
|
+
models = models.flatten.uniq.map(&:constantize)
|
43
|
+
index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false,
|
44
|
+
:field_infos => field_infos(models),
|
45
|
+
:create => true))
|
46
|
+
models.each do |model|
|
47
|
+
reindex_model(index, model)
|
48
|
+
end
|
49
|
+
logger.debug("Created Ferret index in: #{aaf_configuration[:index_dir]}")
|
50
|
+
index.flush
|
51
|
+
index.optimize
|
52
|
+
index.close
|
53
|
+
close_multi_indexes
|
54
|
+
end
|
55
|
+
|
56
|
+
# Parses the given query string into a Ferret Query object.
|
57
|
+
def process_query(query)
|
58
|
+
# work around ferret bug in #process_query (doesn't ensure the
|
59
|
+
# reader is open)
|
60
|
+
ferret_index.synchronize do
|
61
|
+
ferret_index.send(:ensure_reader_open)
|
62
|
+
original_query = ferret_index.process_query(query)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Total number of hits for the given query.
|
67
|
+
def total_hits(query, options = {})
|
68
|
+
ferret_index.search(query, options).total_hits
|
69
|
+
end
|
70
|
+
|
71
|
+
def determine_lazy_fields(options = {})
|
72
|
+
stored_fields = options[:lazy]
|
73
|
+
if stored_fields && !(Array === stored_fields)
|
74
|
+
stored_fields = aaf_configuration[:ferret_fields].select { |field, config| config[:store] == :yes }.map(&:first)
|
75
|
+
end
|
76
|
+
logger.debug "stored_fields: #{stored_fields}"
|
77
|
+
return stored_fields
|
78
|
+
end
|
79
|
+
|
80
|
+
# Queries the Ferret index to retrieve model class, id, score and the
|
81
|
+
# values of any fields stored in the index for each hit.
|
82
|
+
# If a block is given, these are yielded and the number of total hits is
|
83
|
+
# returned. Otherwise [total_hits, result_array] is returned.
|
84
|
+
def find_id_by_contents(query, options = {})
|
85
|
+
result = []
|
86
|
+
index = ferret_index
|
87
|
+
logger.debug "query: #{ferret_index.process_query query}" # TODO only enable this for debugging purposes
|
88
|
+
lazy_fields = determine_lazy_fields options
|
89
|
+
|
90
|
+
total_hits = index.search_each(query, options) do |hit, score|
|
91
|
+
doc = index[hit]
|
92
|
+
model = aaf_configuration[:store_class_name] ? doc[:class_name] : aaf_configuration[:class_name]
|
93
|
+
# fetch stored fields if lazy loading
|
94
|
+
data = {}
|
95
|
+
lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
|
96
|
+
if block_given?
|
97
|
+
yield model, doc[:id], score, data
|
98
|
+
else
|
99
|
+
result << { :model => model, :id => doc[:id], :score => score, :data => data }
|
100
|
+
end
|
101
|
+
end
|
102
|
+
#logger.debug "id_score_model array: #{result.inspect}"
|
103
|
+
return block_given? ? total_hits : [total_hits, result]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Queries multiple Ferret indexes to retrieve model class, id and score for
|
107
|
+
# each hit. Use the models parameter to give the list of models to search.
|
108
|
+
# If a block is given, model, id and score are yielded and the number of
|
109
|
+
# total hits is returned. Otherwise [total_hits, result_array] is returned.
|
110
|
+
def id_multi_search(query, models, options = {})
|
111
|
+
models.map!(&:constantize)
|
112
|
+
index = multi_index(models)
|
113
|
+
result = []
|
114
|
+
lazy_fields = determine_lazy_fields options
|
115
|
+
total_hits = index.search_each(query, options) do |hit, score|
|
116
|
+
doc = index[hit]
|
117
|
+
# fetch stored fields if lazy loading
|
118
|
+
data = {}
|
119
|
+
lazy_fields.each { |field| data[field] = doc[field] } if lazy_fields
|
120
|
+
if block_given?
|
121
|
+
yield doc[:class_name], doc[:id], score, doc, data
|
122
|
+
else
|
123
|
+
result << { :model => doc[:class_name], :id => doc[:id], :score => score, :data => data }
|
124
|
+
end
|
125
|
+
end
|
126
|
+
return block_given? ? total_hits : [ total_hits, result ]
|
127
|
+
end
|
128
|
+
|
129
|
+
######################################
|
130
|
+
# methods working on a single record
|
131
|
+
# called from instance_methods, here to simplify interfacing with the
|
132
|
+
# remote ferret server
|
133
|
+
# TODO having to pass id and class_name around like this isn't nice
|
134
|
+
######################################
|
135
|
+
|
136
|
+
# add record to index
|
137
|
+
# record may be the full AR object, a Ferret document instance or a Hash
|
138
|
+
def add(record)
|
139
|
+
record = record.to_doc unless Hash === record || Ferret::Document === record
|
140
|
+
ferret_index << record
|
141
|
+
end
|
142
|
+
alias << add
|
143
|
+
|
144
|
+
# delete record from index
|
145
|
+
def remove(id, class_name)
|
146
|
+
ferret_index.query_delete query_for_record(id, class_name)
|
147
|
+
end
|
148
|
+
|
149
|
+
# highlight search terms for the record with the given id.
|
150
|
+
def highlight(id, class_name, query, options = {})
|
151
|
+
options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
|
152
|
+
highlights = []
|
153
|
+
ferret_index.synchronize do
|
154
|
+
doc_num = document_number(id, class_name)
|
155
|
+
if options[:field]
|
156
|
+
highlights << ferret_index.highlight(query, doc_num, options)
|
157
|
+
else
|
158
|
+
query = process_query(query) # process only once
|
159
|
+
aaf_configuration[:ferret_fields].each_pair do |field, config|
|
160
|
+
next if config[:store] == :no || config[:highlight] == :no
|
161
|
+
options[:field] = field
|
162
|
+
highlights << ferret_index.highlight(query, doc_num, options)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
return highlights.compact.flatten[0..options[:num_excerpts]-1]
|
167
|
+
end
|
168
|
+
|
169
|
+
# retrieves the ferret document number of the record with the given id.
|
170
|
+
def document_number(id, class_name)
|
171
|
+
hits = ferret_index.search(query_for_record(id, class_name))
|
172
|
+
return hits.hits.first.doc if hits.total_hits == 1
|
173
|
+
raise "cannot determine document number from primary key: #{id}"
|
174
|
+
end
|
175
|
+
|
176
|
+
# build a ferret query matching only the record with the given id
|
177
|
+
# the class name only needs to be given in case of a shared index configuration
|
178
|
+
def query_for_record(id, class_name = nil)
|
179
|
+
Ferret::Search::TermQuery.new(:id, id.to_s)
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
protected
|
184
|
+
|
185
|
+
# returns a MultiIndex instance operating on a MultiReader
|
186
|
+
def multi_index(model_classes)
|
187
|
+
model_classes.sort! { |a, b| a.name <=> b.name }
|
188
|
+
key = model_classes.inject("") { |s, clazz| s + clazz.name }
|
189
|
+
multi_config = aaf_configuration[:ferret].dup
|
190
|
+
multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
|
191
|
+
ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)
|
192
|
+
end
|
193
|
+
|
194
|
+
def close_multi_indexes
|
195
|
+
# close combined index readers, just in case
|
196
|
+
# this seems to fix a strange test failure that seems to relate to a
|
197
|
+
# multi_index looking at an old version of the content_base index.
|
198
|
+
ActsAsFerret::multi_indexes.each_pair do |key, index|
|
199
|
+
# puts "#{key} -- #{self.name}"
|
200
|
+
# TODO only close those where necessary (watch inheritance, where
|
201
|
+
# self.name is base class of a class where key is made from)
|
202
|
+
index.close #if key =~ /#{self.name}/
|
203
|
+
end
|
204
|
+
ActsAsFerret::multi_indexes.clear
|
205
|
+
end
|
206
|
+
|
207
|
+
def reindex_model(index, model = aaf_configuration[:class_name].constantize)
|
208
|
+
# index in batches of 1000 to limit memory consumption (fixes #24)
|
209
|
+
# TODO make configurable through options
|
210
|
+
batch_size = 1000
|
211
|
+
model_count = model.count.to_f
|
212
|
+
work_done = 0
|
213
|
+
batch_time = 0
|
214
|
+
logger.info "reindexing model #{model.name}"
|
215
|
+
order = "#{model.primary_key} ASC" # this works around a bug in sqlserver-adapter (where paging only works with an order applied)
|
216
|
+
model.transaction do
|
217
|
+
0.step(model.count, batch_size) do |i|
|
218
|
+
b1 = Time.now.to_f
|
219
|
+
model.find(:all, :limit => batch_size, :offset => i, :order => order).each do |rec|
|
220
|
+
index << rec.to_doc if rec.ferret_enabled?(true)
|
221
|
+
end
|
222
|
+
batch_time = Time.now.to_f - b1
|
223
|
+
work_done = i.to_f / model_count * 100.0 if model_count > 0
|
224
|
+
remaining_time = ( batch_time / batch_size ) * ( model_count - i + batch_size )
|
225
|
+
logger.info "reindex model #{model.name} : #{'%.2f' % work_done}% complete : #{'%.2f' % remaining_time} secs to finish"
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# builds a FieldInfos instance for creation of an index containing fields
|
231
|
+
# for the given model classes.
|
232
|
+
def field_infos(models)
|
233
|
+
# default attributes for fields
|
234
|
+
fi = Ferret::Index::FieldInfos.new(:store => :no,
|
235
|
+
:index => :yes,
|
236
|
+
:term_vector => :no,
|
237
|
+
:boost => 1.0)
|
238
|
+
# primary key
|
239
|
+
fi.add_field(:id, :store => :yes, :index => :untokenized)
|
240
|
+
# class_name
|
241
|
+
if aaf_configuration[:store_class_name]
|
242
|
+
fi.add_field(:class_name, :store => :yes, :index => :untokenized)
|
243
|
+
end
|
244
|
+
fields = {}
|
245
|
+
models.each do |model|
|
246
|
+
fields.update(model.aaf_configuration[:ferret_fields])
|
247
|
+
end
|
248
|
+
fields.each_pair do |field, options|
|
249
|
+
fi.add_field(field, { :store => :no,
|
250
|
+
:index => :yes }.update(options))
|
251
|
+
end
|
252
|
+
return fi
|
253
|
+
end
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|