ultrasphinx 1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/LICENSE +184 -0
- data/Manifest +21 -0
- data/README +94 -0
- data/Rakefile +21 -0
- data/examples/app.multi +2 -0
- data/examples/default.base +34 -0
- data/init.rb +2 -0
- data/lib/ultrasphinx.rb +20 -0
- data/lib/ultrasphinx/autoload.rb +13 -0
- data/lib/ultrasphinx/core_extensions.rb +51 -0
- data/lib/ultrasphinx/fields.rb +78 -0
- data/lib/ultrasphinx/is_indexed.rb +89 -0
- data/lib/ultrasphinx/search.rb +441 -0
- data/lib/ultrasphinx/spell.rb +41 -0
- data/lib/ultrasphinx/ultrasphinx.rb +276 -0
- data/tasks/ultrasphinx.rake +125 -0
- data/vendor/sphinx/README +40 -0
- data/vendor/sphinx/Rakefile +21 -0
- data/vendor/sphinx/init.rb +1 -0
- data/vendor/sphinx/lib/client.rb +647 -0
- metadata +66 -0
@@ -0,0 +1,78 @@
|
|
1
|
+
|
2
|
+
require 'singleton'
|
3
|
+
|
4
|
+
module Ultrasphinx
|
5
|
+
|
6
|
+
class Fields < Hash
|
7
|
+
include Singleton
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
self["class_id"] = "numeric"
|
11
|
+
self["class"] = "text"
|
12
|
+
end
|
13
|
+
|
14
|
+
def check_type_match(field, new_type)
|
15
|
+
# tries to smoosh fields together by name in the sphinx query schema; raises if their types don't match
|
16
|
+
field, new_type = field.to_s, COLUMN_TYPES[new_type.to_sym]
|
17
|
+
if self[field]
|
18
|
+
raise ConfigurationError, "Column type mismatch for #{field.inspect}; was already #{self[field].inspect}, but is now #{new_type.inspect}." unless self[field] == new_type
|
19
|
+
else
|
20
|
+
self[field] = new_type
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def cast(source_string, field)
|
25
|
+
if self[field] == "date"
|
26
|
+
"UNIX_TIMESTAMP(#{source_string})"
|
27
|
+
elsif source_string =~ /GROUP_CONCAT/
|
28
|
+
"CAST(#{source_string} AS CHAR)"
|
29
|
+
else
|
30
|
+
source_string
|
31
|
+
end + " AS #{field}"
|
32
|
+
end
|
33
|
+
|
34
|
+
def null(field)
|
35
|
+
case self[field]
|
36
|
+
when 'text'
|
37
|
+
"''"
|
38
|
+
when 'numeric'
|
39
|
+
"0"
|
40
|
+
when 'date'
|
41
|
+
"UNIX_TIMESTAMP('1970-01-01 00:00:00')"
|
42
|
+
end + " AS #{field}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def configure(configuration)
|
46
|
+
|
47
|
+
configuration.each do |model, options|
|
48
|
+
klass = model.constantize
|
49
|
+
|
50
|
+
begin
|
51
|
+
# fields are from the model
|
52
|
+
options[:fields].to_a.each do |entry|
|
53
|
+
entry = {:field => entry, :as => entry} unless entry.is_a? Hash
|
54
|
+
unless klass.columns_hash[entry[:field]]
|
55
|
+
ActiveRecord::Base.logger.warn "ultrasphinx: WARNING: field #{entry[:field]} is not present in #{model}"
|
56
|
+
else
|
57
|
+
check_type_match(entry[:as], klass.columns_hash[entry[:field]].type)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
# joins are whatever they are in the target
|
61
|
+
options[:includes].to_a.each do |join|
|
62
|
+
check_type_match(join[:as] || join[:field], join[:model].constantize.columns_hash[join[:field]].type)
|
63
|
+
end
|
64
|
+
# regular concats are CHAR (I think), group_concats are BLOB and need to be cast to CHAR, e.g. :text
|
65
|
+
options[:concats].to_a.each do |concats|
|
66
|
+
check_type_match(concats[:as], :text)
|
67
|
+
end
|
68
|
+
rescue ActiveRecord::StatementInvalid
|
69
|
+
ActiveRecord::Base.logger.warn "ultrasphinx: WARNING: model #{model} does not exist in the database yet"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
self
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
|
2
|
+
require 'ultrasphinx'
|
3
|
+
|
4
|
+
module ActiveRecord
|
5
|
+
class Base
|
6
|
+
|
7
|
+
=begin rdoc
|
8
|
+
|
9
|
+
The is_indexed macro configures a model for indexing. Its parameters are used to generate SQL queries for Sphinx.
|
10
|
+
|
11
|
+
== Indexing single fields
|
12
|
+
|
13
|
+
Use the <tt>:fields</tt> key.
|
14
|
+
|
15
|
+
Accepts an array of field names.
|
16
|
+
:fields => ["created_at", "title", "body"]
|
17
|
+
|
18
|
+
== Indexing fields from belongs_to associations
|
19
|
+
|
20
|
+
Use the <tt>:includes</tt> key.
|
21
|
+
|
22
|
+
Accepts an array of hashes.
|
23
|
+
|
24
|
+
Each should contain a <tt>:model</tt> key (the class name of the included model), a <tt>:field</tt> key (the name of the field to include), and an optional <tt>:as</tt> key (what to name the field in the parent). You can use the optional key <tt>:association_sql</tt> if you need to pass a custom JOIN string, in which case the default JOIN will not be generated.
|
25
|
+
|
26
|
+
== Scoping the searchable records
|
27
|
+
|
28
|
+
Use the <tt>:conditions</tt> key.
|
29
|
+
|
30
|
+
SQL conditions, to scope which records are selected for indexing. Accepts a string.
|
31
|
+
:conditions => "created_at < NOW() AND deleted IS NOT NULL"
|
32
|
+
The <tt>:conditions</tt> key is especially useful if you delete records by marking them deleted rather than removing them from the database.
|
33
|
+
|
34
|
+
== Concatenating multiple fields
|
35
|
+
|
36
|
+
Use the <tt>:concats</tt> key (MySQL only).
|
37
|
+
|
38
|
+
Accepts an array of option hashes, which can be of two types:
|
39
|
+
|
40
|
+
1. To concatenate many fields within one record, use a regular (or horizontal) concatenation. Regular concatenations contain a <tt>:fields</tt> key (again, an array of field names), and a mandatory <tt>:as</tt> key (the name of the result of the concatenation). For example, to concatenate the <tt>title</tt> and <tt>body</tt> into one field called <tt>text</tt>:
|
41
|
+
:concats => [{:fields => ["title", "body"], :as => "text"}]
|
42
|
+
|
43
|
+
2. To group and concatenate a field from a set of associated records, use a group (or vertical) concatenation. Group concatenations join into another table, and can be used to index a number of associated models as one field in a parent model. Group concatenations contain a <tt>:model</tt> key (the class name of the included model), a <tt>:field</tt> key (the field on the included model to concatenate), and an optional <tt>:as</tt> key (also the name of the result of the concatenation). For example, to concatenate all <tt>Post#body</tt> contents into the parent's <tt>responses</tt> field:
|
44
|
+
:concats => {:model => "Post", :field => "body", :as => "responses"}
|
45
|
+
|
46
|
+
Optional group concatenation keys are <tt>:association_name</tt> (if your <tt>has_many</tt> association can't be derived from the model name), <tt>:association_sql</tt>, if you need to pass a custom JOIN string (for example, a double JOIN for a <tt>has_many :through</tt>), and <tt>:conditions</tt> (if you need custom WHERE conditions for this particular association).
|
47
|
+
|
48
|
+
== Example
|
49
|
+
|
50
|
+
Here's an example configuration using most of the options, taken from production code:
|
51
|
+
|
52
|
+
class Story < ActiveRecord::Base
|
53
|
+
is_indexed :fields => [
|
54
|
+
"title",
|
55
|
+
"published_at"
|
56
|
+
],
|
57
|
+
:includes => [
|
58
|
+
{:model => "Category", :field => "name", :as => "category"}
|
59
|
+
],
|
60
|
+
:concats => [
|
61
|
+
{:fields => ["title", "long_description", "short_description"], :as => "editorial"},
|
62
|
+
{:model => "Page", :field => "body", :as => "body", :association_name => "pages"},
|
63
|
+
{:model => "Comment", :field => "body", :as => "comments",
|
64
|
+
:conditions => "comments.item_type = '#{base_class}'"}
|
65
|
+
],
|
66
|
+
:conditions => self.live_condition_string
|
67
|
+
end
|
68
|
+
|
69
|
+
=end
|
70
|
+
|
71
|
+
def self.is_indexed opts = {}
|
72
|
+
|
73
|
+
opts.assert_valid_keys [:fields, :concats, :conditions, :includes, :nulls]
|
74
|
+
|
75
|
+
Array(opts[:concats]).each do |concat|
|
76
|
+
concat.assert_valid_keys [:model, :conditions, :field, :as, :fields, :association_name, :association_sql]
|
77
|
+
raise Ultrasphinx::ConfigurationError, "You can't mix regular concat and group concats" if concat[:fields] and (concat[:field] or concat[:model] or concat[:association_name])
|
78
|
+
raise Ultrasphinx::ConfigurationError, "Group concats must not have multiple fields" if concat[:field].is_a? Array
|
79
|
+
raise Ultrasphinx::ConfigurationError, "Regular concats should have multiple fields" if concat[:fields] and !concat[:fields].is_a?(Array)
|
80
|
+
end
|
81
|
+
|
82
|
+
Array(opts[:joins]).each do |join|
|
83
|
+
join.assert_valid_keys [:model, :field, :as]
|
84
|
+
end
|
85
|
+
|
86
|
+
Ultrasphinx::MODEL_CONFIGURATION[self.name] = opts
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,441 @@
|
|
1
|
+
|
2
|
+
module Ultrasphinx
|
3
|
+
|
4
|
+
=begin rdoc
|
5
|
+
Command-interface Search object.
|
6
|
+
|
7
|
+
== Making a search
|
8
|
+
|
9
|
+
To perform a search, instantiate an Ultrasphinx::Search object. Parameters are the query string, and an optional hash of query options.
|
10
|
+
@search = Ultrasphinx::Search.new(
|
11
|
+
@query,
|
12
|
+
:sort_mode => 'descending',
|
13
|
+
:sort_by => 'created_at'
|
14
|
+
)
|
15
|
+
|
16
|
+
Now, to run the query, call its <tt>run()</tt> method. Your results will be available as ActiveRecord instances via <tt>results()</tt>. Example:
|
17
|
+
@search.run
|
18
|
+
@search.results
|
19
|
+
|
20
|
+
== Query options
|
21
|
+
<tt>:per_page</tt>:: An integer.. How many results per page.
|
22
|
+
<tt>:page</tt>:: An integer. Which page of the paginated results to return.
|
23
|
+
<tt>:models</tt>:: An array or string. The class name of the model you want to search, an array of models names to search, or nil for all available models.
|
24
|
+
<tt>:sort_mode</tt>:: 'relevance' or 'ascending' or 'descending'. How to order the result set. Note that 'time' and 'extended' modes are available, but not tested.
|
25
|
+
<tt>:sort_by</tt>:: A field name. What field to order by for 'ascending' or 'descending' mode. Has no effect for 'relevance'.
|
26
|
+
<tt>:weights</tt>:: A hash. Text-field names and associated query weighting. The default weight for every field is 1.0. Example: <tt>:weights => {"title" => 2.0}</tt>
|
27
|
+
<tt>:raw_filters</tt>:: A hash. Field names and associated values. You can use a single value, an array of values, or a range.
|
28
|
+
|
29
|
+
Note that you can set up your own query defaults in <tt>environment.rb</tt>:
|
30
|
+
|
31
|
+
Ultrasphinx::Search.query_defaults = {
|
32
|
+
:per_page => 10,
|
33
|
+
:sort_mode => :relevance,
|
34
|
+
:weights => {"title" => 2.0}
|
35
|
+
}
|
36
|
+
|
37
|
+
== Cache_fu integration
|
38
|
+
|
39
|
+
The <tt>get_cache</tt> method will be used to instantiate records for models that respond to it. Otherwise, <tt>find</tt> is used.
|
40
|
+
|
41
|
+
== Excerpt mode
|
42
|
+
|
43
|
+
You can have Sphinx excerpt and highlight the matched sections in the associated fields. Instead of calling <tt>run</tt>, call <tt>excerpt</tt>.
|
44
|
+
|
45
|
+
@search.excerpt
|
46
|
+
|
47
|
+
The returned models will be frozen and have their field contents temporarily changed to the excerpted and highlighted results.
|
48
|
+
|
49
|
+
You need to set the <tt>content_methods</tt> key on Ultrasphinx::Search.excerpting_options to whatever methods you need the excerpter to try to excerpt. This way Ruby-only methods are supported (for example, a metadata method which combines various model fields, or an aliased field so that the original record contents are still available).
|
50
|
+
|
51
|
+
There are some other keys you can set, such as excerpt size, HTML tags to highlight with, and number of words on either side of each excerpt chunk. Example (in <tt>environment.rb</tt>):
|
52
|
+
|
53
|
+
Ultrasphinx::Search.excerpting_options = {
|
54
|
+
'before_match' => "<strong>",
|
55
|
+
'after_match' => "</strong>",
|
56
|
+
'chunk_separator' => "...",
|
57
|
+
'limit' => 256,
|
58
|
+
'around' => 3,
|
59
|
+
'content_methods' => [[:title], [:body, :description, :content], [:metadata]]
|
60
|
+
}
|
61
|
+
|
62
|
+
Note that your database is never changed by anything Ultrasphinx does.
|
63
|
+
|
64
|
+
=end
|
65
|
+
|
66
|
+
class Search
|
67
|
+
unloadable if RAILS_ENV == "development"
|
68
|
+
|
69
|
+
cattr_accessor :query_defaults
|
70
|
+
self.query_defaults ||= {:page => 1,
|
71
|
+
:models => nil,
|
72
|
+
:per_page => 20,
|
73
|
+
:sort_by => 'created_at',
|
74
|
+
:sort_mode => :relevance,
|
75
|
+
:weights => nil,
|
76
|
+
:raw_filters => nil}
|
77
|
+
|
78
|
+
cattr_accessor :excerpting_options
|
79
|
+
self.excerpting_options ||= {
|
80
|
+
'before_match' => "<strong>", 'after_match' => "</strong>",
|
81
|
+
'chunk_separator' => "...",
|
82
|
+
'limit' => 256,
|
83
|
+
'around' => 3,
|
84
|
+
# results should respond to one in each group of these, in precedence order, in order for the excerpting to fire
|
85
|
+
'content_methods' => [[:title, :name], [:body, :description, :content], [:metadata]]
|
86
|
+
}
|
87
|
+
|
88
|
+
cattr_accessor :client_options
|
89
|
+
self.client_options ||= {
|
90
|
+
:with_subtotals => true,
|
91
|
+
:max_retries => 4,
|
92
|
+
:retry_sleep_time => 3
|
93
|
+
}
|
94
|
+
|
95
|
+
# mode to integer mappings
|
96
|
+
SPHINX_CLIENT_PARAMS = {
|
97
|
+
:sort_mode => {
|
98
|
+
:relevance => Sphinx::Client::SPH_SORT_RELEVANCE,
|
99
|
+
:descending => Sphinx::Client::SPH_SORT_ATTR_DESC,
|
100
|
+
:ascending => Sphinx::Client::SPH_SORT_ATTR_ASC,
|
101
|
+
:time => Sphinx::Client::SPH_SORT_TIME_SEGMENTS,
|
102
|
+
:extended => Sphinx::Client::SPH_SORT_EXTENDED,
|
103
|
+
:desc => Sphinx::Client::SPH_SORT_ATTR_DESC, # legacy compatibility
|
104
|
+
:asc => Sphinx::Client::SPH_SORT_ATTR_ASC
|
105
|
+
}
|
106
|
+
}
|
107
|
+
|
108
|
+
def self.get_models_to_class_ids #:nodoc:
|
109
|
+
# reading the conf file makes sure that we are in sync with the actual sphinx index,
|
110
|
+
# not whatever you happened to change your models to most recently
|
111
|
+
unless File.exist? CONF_PATH
|
112
|
+
Ultrasphinx.say "configuration file not found for #{ENV['RAILS_ENV'].inspect} environment"
|
113
|
+
Ultrasphinx.say "please run 'rake ultrasphinx:configure'"
|
114
|
+
else
|
115
|
+
begin
|
116
|
+
lines = open(CONF_PATH).readlines
|
117
|
+
sources = lines.select {|s| s =~ /^source \w/ }.map {|s| s[/source ([\w\d_-]*)/, 1].classify }
|
118
|
+
ids = lines.select {|s| s =~ /^sql_query / }.map {|s| s[/(\d*) AS class_id/, 1].to_i }
|
119
|
+
|
120
|
+
raise unless sources.size == ids.size
|
121
|
+
Hash[*sources.zip(ids).flatten]
|
122
|
+
|
123
|
+
rescue
|
124
|
+
Ultrasphinx.say "#{CONF_PATH} file is corrupted"
|
125
|
+
Ultrasphinx.say "please run 'rake ultrasphinx:configure'"
|
126
|
+
end
|
127
|
+
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
MODELS_TO_IDS = get_models_to_class_ids || {}
|
132
|
+
|
133
|
+
MAX_MATCHES = DAEMON_SETTINGS["max_matches"].to_i
|
134
|
+
|
135
|
+
# Returns the options hash you used.
|
136
|
+
def options; @options; end
|
137
|
+
|
138
|
+
# Returns the query string used.
|
139
|
+
def query; @query; end
|
140
|
+
|
141
|
+
# Returns an array of result objects.
|
142
|
+
def results; @results; end
|
143
|
+
|
144
|
+
# Returns the raw response from the Sphinx client.
|
145
|
+
def response; @response; end
|
146
|
+
|
147
|
+
# Returns a hash of total result counts, scoped to each available model.
|
148
|
+
def subtotals; @subtotals; end
|
149
|
+
|
150
|
+
# Returns the total result count.
|
151
|
+
def total
|
152
|
+
[response['total_found'] || 0, MAX_MATCHES].min
|
153
|
+
end
|
154
|
+
|
155
|
+
# Returns the number of results on this particular page, and may range from 0 up to per_page().
|
156
|
+
def found
|
157
|
+
results.size
|
158
|
+
end
|
159
|
+
|
160
|
+
# Returns the response time of the query, in milliseconds.
|
161
|
+
def time
|
162
|
+
response['time']
|
163
|
+
end
|
164
|
+
|
165
|
+
# Returns whether the query has been run.
|
166
|
+
def run?
|
167
|
+
!response.blank?
|
168
|
+
end
|
169
|
+
|
170
|
+
# Returns the current page number of the result set. (Page indexes begin at 1.)
|
171
|
+
def page
|
172
|
+
options[:page]
|
173
|
+
end
|
174
|
+
|
175
|
+
# Returns the number of records per page.
|
176
|
+
def per_page
|
177
|
+
options[:per_page]
|
178
|
+
end
|
179
|
+
|
180
|
+
# Returns the last available page number in the result set.
|
181
|
+
def last_page
|
182
|
+
(total / per_page) + (total % per_page == 0 ? 0 : 1)
|
183
|
+
end
|
184
|
+
|
185
|
+
# Builds a new command-interface Search object.
|
186
|
+
def initialize query, opts = {}
|
187
|
+
@query = query || ""
|
188
|
+
@parsed_query = parse_google_to_sphinx(@query)
|
189
|
+
|
190
|
+
@options = self.class.query_defaults.merge(opts._coerce_basic_types)
|
191
|
+
@options[:raw_filters] ||= {}
|
192
|
+
@options[:models] = Array(@options[:models])
|
193
|
+
|
194
|
+
@results, @subtotals, @response = [], {}, {}
|
195
|
+
|
196
|
+
raise Sphinx::SphinxArgumentError, "Invalid options: #{@extra * ', '}" if (@extra = (@options.keys - (SPHINX_CLIENT_PARAMS.merge(self.class.query_defaults).keys))).size > 0
|
197
|
+
end
|
198
|
+
|
199
|
+
# Run the search, filling results with an array of ActiveRecord objects.
|
200
|
+
def run(reify = true)
|
201
|
+
@request = build_request_with_options(@options)
|
202
|
+
tries = 0
|
203
|
+
|
204
|
+
logger.info "** ultrasphinx: searching for #{query.inspect} (parsed as #{@parsed_query.inspect}), options #{@options.inspect}"
|
205
|
+
|
206
|
+
begin
|
207
|
+
@response = @request.Query(@parsed_query)
|
208
|
+
logger.info "** ultrasphinx: search returned, error #{@request.GetLastError.inspect}, warning #{@request.GetLastWarning.inspect}, returned #{total}/#{response['total_found']} in #{time} seconds."
|
209
|
+
|
210
|
+
@subtotals = get_subtotals(@request, @parsed_query) if self.class.client_options[:with_subtotals]
|
211
|
+
@results = response['matches']
|
212
|
+
|
213
|
+
# if you don't reify, you'll have to do the modulus reversal yourself to get record ids
|
214
|
+
@results = reify_results(@results) if reify
|
215
|
+
|
216
|
+
rescue Sphinx::SphinxResponseError, Sphinx::SphinxTemporaryError, Errno::EPIPE => e
|
217
|
+
if (tries += 1) <= self.class.client_options[:max_retries]
|
218
|
+
logger.warn "** ultrasphinx: restarting query (#{tries} attempts already) (#{e})"
|
219
|
+
sleep(self.class.client_options[:retry_sleep_time]) if tries == self.class.client_options[:max_retries]
|
220
|
+
retry
|
221
|
+
else
|
222
|
+
logger.warn "** ultrasphinx: query failed"
|
223
|
+
raise e
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
self
|
228
|
+
end
|
229
|
+
|
230
|
+
|
231
|
+
# Overwrite the configured content accessors with excerpted and highlighted versions of themselves.
|
232
|
+
# Runs run if it hasn't already been done.
|
233
|
+
def excerpt
|
234
|
+
|
235
|
+
run unless run?
|
236
|
+
return if results.empty?
|
237
|
+
|
238
|
+
# see what fields each result might respond to for our excerpting
|
239
|
+
results_with_content_methods = results.map do |result|
|
240
|
+
[result] << self.class.excerpting_options['content_methods'].map do |methods|
|
241
|
+
methods.detect { |x| result.respond_to? x }
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
# fetch the actual field contents
|
246
|
+
texts = results_with_content_methods.map do |result, methods|
|
247
|
+
methods.map do |method|
|
248
|
+
method and strip_bogus_characters(result.send(method)) or ""
|
249
|
+
end
|
250
|
+
end.flatten
|
251
|
+
|
252
|
+
# ship to sphinx to highlight and excerpt
|
253
|
+
responses = @request.BuildExcerpts(
|
254
|
+
texts,
|
255
|
+
UNIFIED_INDEX_NAME,
|
256
|
+
strip_query_commands(@parsed_query),
|
257
|
+
self.class.excerpting_options.except('content_methods')
|
258
|
+
).in_groups_of(self.class.excerpting_options['content_methods'].size)
|
259
|
+
|
260
|
+
results_with_content_methods.each_with_index do |result_and_methods, i|
|
261
|
+
# override the individual model accessors with the excerpted data
|
262
|
+
result, methods = result_and_methods
|
263
|
+
methods.each_with_index do |method, j|
|
264
|
+
result._metaclass.send(:define_method, method) { responses[i][j] } if method
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
@results = results_with_content_methods.map(&:first).map(&:freeze)
|
269
|
+
|
270
|
+
self
|
271
|
+
end
|
272
|
+
|
273
|
+
|
274
|
+
private
|
275
|
+
|
276
|
+
def build_request_with_options opts
|
277
|
+
|
278
|
+
request = Sphinx::Client.new
|
279
|
+
|
280
|
+
request.SetServer(PLUGIN_SETTINGS['server_host'], PLUGIN_SETTINGS['server_port'])
|
281
|
+
request.SetMatchMode Sphinx::Client::SPH_MATCH_EXTENDED # force extended query mode
|
282
|
+
|
283
|
+
offset, limit = opts[:per_page] * (opts[:page] - 1), opts[:per_page]
|
284
|
+
|
285
|
+
request.SetLimits offset, limit, [offset + limit, MAX_MATCHES].min
|
286
|
+
request.SetSortMode SPHINX_CLIENT_PARAMS[:sort_mode][opts[:sort_mode]], opts[:sort_by].to_s
|
287
|
+
|
288
|
+
if weights = opts[:weights]
|
289
|
+
# order the weights hash according to the field order for sphinx, and set the missing fields to 1.0
|
290
|
+
# XXX we shouldn't really have to hit Fields.instance from within Ultrasphinx::Search
|
291
|
+
request.SetWeights(Fields.instance.select{|n,t| t == 'text'}.map(&:first).sort.inject([]) do |array, field|
|
292
|
+
array << (weights[field] || 1.0)
|
293
|
+
end)
|
294
|
+
end
|
295
|
+
|
296
|
+
unless opts[:models].compact.empty?
|
297
|
+
request.SetFilter 'class_id', opts[:models].map{|m| MODELS_TO_IDS[m.to_s]}
|
298
|
+
end
|
299
|
+
|
300
|
+
# extract ranged raw filters
|
301
|
+
# XXX some of this mangling might not be necessary
|
302
|
+
opts[:raw_filters].each do |field, value|
|
303
|
+
begin
|
304
|
+
unless value.is_a? Range
|
305
|
+
request.SetFilter field, Array(value)
|
306
|
+
else
|
307
|
+
min, max = [value.first, value.last].map do |x|
|
308
|
+
x._to_numeric if x.is_a? String
|
309
|
+
end
|
310
|
+
unless min.class != max.class
|
311
|
+
min, max = max, min if min > max
|
312
|
+
request.SetFilterRange field, min, max
|
313
|
+
end
|
314
|
+
end
|
315
|
+
rescue NoMethodError => e
|
316
|
+
raise Sphinx::SphinxArgumentError, "filter: #{field.inspect}:#{value.inspect} is invalid"
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
# request.SetIdRange # never useful
|
321
|
+
# request.SetGroup # never useful
|
322
|
+
|
323
|
+
request
|
324
|
+
end
|
325
|
+
|
326
|
+
def get_subtotals(request, query)
|
327
|
+
# XXX andrew says there's a better way to do this
|
328
|
+
subtotals, filtered_request = {}, request.dup
|
329
|
+
|
330
|
+
MODELS_TO_IDS.each do |name, class_id|
|
331
|
+
filtered_request.instance_eval { @filters.delete_if {|f| f['attr'] == 'class_id'} }
|
332
|
+
filtered_request.SetFilter 'class_id', [class_id]
|
333
|
+
subtotals[name] = request.Query(query)['total_found']
|
334
|
+
end
|
335
|
+
|
336
|
+
subtotals
|
337
|
+
end
|
338
|
+
|
339
|
+
def strip_bogus_characters(s)
|
340
|
+
# used to remove some garbage before highlighting
|
341
|
+
s.gsub(/<.*?>|\.\.\.|\342\200\246|\n|\r/, " ").gsub(/http.*?( |$)/, ' ') if s
|
342
|
+
end
|
343
|
+
|
344
|
+
def strip_query_commands(s)
|
345
|
+
# XXX dumb hack for query commands, since sphinx doesn't intelligently parse the query in excerpt mode
|
346
|
+
s.gsub(/AND|OR|NOT|\@\w+/, "")
|
347
|
+
end
|
348
|
+
|
349
|
+
def parse_google_to_sphinx query
|
350
|
+
# alters google-style querystring into sphinx-style
|
351
|
+
return if query.blank?
|
352
|
+
|
353
|
+
# remove AND's, always
|
354
|
+
query = " #{query} ".gsub(" AND ", " ")
|
355
|
+
|
356
|
+
# split query on spaces that are not inside sets of quotes or parens
|
357
|
+
query = query.scan(/[^"() ]*["(][^")]*[")]|[^"() ]+/)
|
358
|
+
|
359
|
+
query.each_with_index do |token, index|
|
360
|
+
|
361
|
+
# recurse for parens, if necessary
|
362
|
+
if token =~ /^(.*?)\((.*)\)(.*?$)/
|
363
|
+
token = query[index] = "#{$1}(#{parse_google_to_sphinx $2})#{$3}"
|
364
|
+
end
|
365
|
+
|
366
|
+
# translate to sphinx-language
|
367
|
+
case token
|
368
|
+
when "OR"
|
369
|
+
query[index] = "|"
|
370
|
+
when "NOT"
|
371
|
+
query[index] = "-#{query[index+1]}"
|
372
|
+
query[index+1] = ""
|
373
|
+
when "AND"
|
374
|
+
query[index] = ""
|
375
|
+
when /:/
|
376
|
+
query[query.size] = "@" + query[index].sub(":", " ")
|
377
|
+
query[index] = ""
|
378
|
+
end
|
379
|
+
|
380
|
+
end
|
381
|
+
query.join(" ").squeeze(" ")
|
382
|
+
end
|
383
|
+
|
384
|
+
def reify_results(sphinx_ids)
|
385
|
+
|
386
|
+
# order by position and then toss the rest of the data
|
387
|
+
# make sure you are using the bundled Sphinx client, which has a patch
|
388
|
+
sphinx_ids = sphinx_ids.sort_by do |key, value|
|
389
|
+
value['index'] or raise ConfigurationError, "Your Sphinx client is not properly patched."
|
390
|
+
end.map(&:first)
|
391
|
+
|
392
|
+
# inverse-modulus map the sphinx ids to the table-specific ids
|
393
|
+
ids = Hash.new([])
|
394
|
+
sphinx_ids.each do |id|
|
395
|
+
ids[MODELS_TO_IDS.invert[id % MODELS_TO_IDS.size]] += [id / MODELS_TO_IDS.size] # yay math
|
396
|
+
end
|
397
|
+
raise Sphinx::SphinxResponseError, "impossible document id in query result" unless ids.values.flatten.size == sphinx_ids.size
|
398
|
+
|
399
|
+
# fetch them for real
|
400
|
+
results = []
|
401
|
+
ids.each do |model, id_set|
|
402
|
+
klass = model.constantize
|
403
|
+
finder = klass.respond_to?(:get_cache) ? :get_cache : :find
|
404
|
+
logger.debug "** ultrasphinx: using #{klass.name}\##{finder} as finder method"
|
405
|
+
|
406
|
+
begin
|
407
|
+
results += case instances = id_set.map {|id| klass.send(finder, id)} # XXX temporary until we update cache_fu
|
408
|
+
when Hash
|
409
|
+
instances.values
|
410
|
+
when Array
|
411
|
+
instances
|
412
|
+
else
|
413
|
+
Array(instances)
|
414
|
+
end
|
415
|
+
rescue ActiveRecord:: ActiveRecordError => e
|
416
|
+
raise Sphinx::SphinxResponseError, e.inspect
|
417
|
+
end
|
418
|
+
end
|
419
|
+
|
420
|
+
# put them back in order
|
421
|
+
results.sort_by do |r|
|
422
|
+
raise Sphinx::SphinxResponseError, "Bogus ActiveRecord id for #{r.class}:#{r.id}" unless r.id
|
423
|
+
index = (sphinx_ids.index(sphinx_id = r.id * MODELS_TO_IDS.size + MODELS_TO_IDS[r.class.base_class.name]))
|
424
|
+
raise Sphinx::SphinxResponseError, "Bogus reverse id for #{r.class}:#{r.id} (Sphinx:#{sphinx_id})" unless index
|
425
|
+
index / sphinx_ids.size.to_f
|
426
|
+
end
|
427
|
+
|
428
|
+
# add an accessor for absolute search rank for each record
|
429
|
+
results.each_with_index do |r, index|
|
430
|
+
i = per_page * page + index
|
431
|
+
r._metaclass.send(:define_method, "result_index") { i }
|
432
|
+
end
|
433
|
+
|
434
|
+
end
|
435
|
+
|
436
|
+
def logger
|
437
|
+
RAILS_DEFAULT_LOGGER
|
438
|
+
end
|
439
|
+
|
440
|
+
end
|
441
|
+
end
|