dpickett-thinking-sphinx 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENCE +20 -0
- data/README +107 -0
- data/lib/thinking_sphinx/active_record/delta.rb +74 -0
- data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
- data/lib/thinking_sphinx/active_record/search.rb +57 -0
- data/lib/thinking_sphinx/active_record.rb +245 -0
- data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
- data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
- data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
- data/lib/thinking_sphinx/association.rb +144 -0
- data/lib/thinking_sphinx/attribute.rb +254 -0
- data/lib/thinking_sphinx/class_facet.rb +20 -0
- data/lib/thinking_sphinx/collection.rb +142 -0
- data/lib/thinking_sphinx/configuration.rb +236 -0
- data/lib/thinking_sphinx/core/string.rb +22 -0
- data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
- data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
- data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
- data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
- data/lib/thinking_sphinx/deltas.rb +22 -0
- data/lib/thinking_sphinx/facet.rb +58 -0
- data/lib/thinking_sphinx/facet_collection.rb +45 -0
- data/lib/thinking_sphinx/field.rb +172 -0
- data/lib/thinking_sphinx/index/builder.rb +233 -0
- data/lib/thinking_sphinx/index/faux_column.rb +110 -0
- data/lib/thinking_sphinx/index.rb +432 -0
- data/lib/thinking_sphinx/rails_additions.rb +133 -0
- data/lib/thinking_sphinx/search.rb +654 -0
- data/lib/thinking_sphinx/tasks.rb +128 -0
- data/lib/thinking_sphinx.rb +145 -0
- data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
- data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
- data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
- data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
- data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
- data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
- data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
- data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
- data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
- data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
- data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
- data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
- data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
- data/spec/unit/thinking_sphinx_spec.rb +129 -0
- data/tasks/distribution.rb +48 -0
- data/tasks/rails.rake +1 -0
- data/tasks/testing.rb +86 -0
- data/vendor/after_commit/LICENSE +20 -0
- data/vendor/after_commit/README +16 -0
- data/vendor/after_commit/Rakefile +22 -0
- data/vendor/after_commit/init.rb +5 -0
- data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
- data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
- data/vendor/after_commit/lib/after_commit.rb +42 -0
- data/vendor/after_commit/test/after_commit_test.rb +53 -0
- data/vendor/delayed_job/lib/delayed/job.rb +251 -0
- data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
- data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
- data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
- data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
- data/vendor/riddle/lib/riddle/client/message.rb +65 -0
- data/vendor/riddle/lib/riddle/client/response.rb +84 -0
- data/vendor/riddle/lib/riddle/client.rb +619 -0
- data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
- data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
- data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
- data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
- data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
- data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
- data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
- data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
- data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
- data/vendor/riddle/lib/riddle/configuration.rb +33 -0
- data/vendor/riddle/lib/riddle/controller.rb +44 -0
- data/vendor/riddle/lib/riddle.rb +30 -0
- metadata +158 -0
@@ -0,0 +1,654 @@
|
|
1
|
+
module ThinkingSphinx
|
2
|
+
# Once you've got those indexes in and built, this is the stuff that
|
3
|
+
# matters - how to search! This class provides a generic search
|
4
|
+
# interface - which you can use to search all your indexed models at once.
|
5
|
+
# Most times, you will just want a specific model's results - to search and
|
6
|
+
# search_for_ids methods will do the job in exactly the same manner when
|
7
|
+
# called from a model.
|
8
|
+
#
|
9
|
+
class Search
|
10
|
+
class << self
|
11
|
+
# Searches for results that match the parameters provided. Will only
|
12
|
+
# return the ids for the matching objects. See #search for syntax
|
13
|
+
# examples.
|
14
|
+
#
|
15
|
+
# Note that this only searches the Sphinx index, with no ActiveRecord
|
16
|
+
# queries. Thus, if your index is not in sync with the database, this
|
17
|
+
# method may return ids that no longer exist there.
|
18
|
+
#
|
19
|
+
def search_for_ids(*args)
|
20
|
+
results, client = search_results(*args.clone)
|
21
|
+
|
22
|
+
options = args.extract_options!
|
23
|
+
page = options[:page] ? options[:page].to_i : 1
|
24
|
+
|
25
|
+
ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Searches through the Sphinx indexes for relevant matches. There's
|
29
|
+
# various ways to search, sort, group and filter - which are covered
|
30
|
+
# below.
|
31
|
+
#
|
32
|
+
# Also, if you have WillPaginate installed, the search method can be used
|
33
|
+
# just like paginate. The same parameters - :page and :per_page - work as
|
34
|
+
# expected, and the returned result set can be used by the will_paginate
|
35
|
+
# helper.
|
36
|
+
#
|
37
|
+
# == Basic Searching
|
38
|
+
#
|
39
|
+
# The simplest way of searching is straight text.
|
40
|
+
#
|
41
|
+
# ThinkingSphinx::Search.search "pat"
|
42
|
+
# ThinkingSphinx::Search.search "google"
|
43
|
+
# User.search "pat", :page => (params[:page] || 1)
|
44
|
+
# Article.search "relevant news issue of the day"
|
45
|
+
#
|
46
|
+
# If you specify :include, like in an #find call, this will be respected
|
47
|
+
# when loading the relevant models from the search results.
|
48
|
+
#
|
49
|
+
# User.search "pat", :include => :posts
|
50
|
+
#
|
51
|
+
# == Match Modes
|
52
|
+
#
|
53
|
+
# Sphinx supports 5 different matching modes. By default Thinking Sphinx
|
54
|
+
# uses :all, which unsurprisingly requires all the supplied search terms
|
55
|
+
# to match a result.
|
56
|
+
#
|
57
|
+
# Alternative modes include:
|
58
|
+
#
|
59
|
+
# User.search "pat allan", :match_mode => :any
|
60
|
+
# User.search "pat allan", :match_mode => :phrase
|
61
|
+
# User.search "pat | allan", :match_mode => :boolean
|
62
|
+
# User.search "@name pat | @username pat", :match_mode => :extended
|
63
|
+
#
|
64
|
+
# Any will find results with any of the search terms. Phrase treats the search
|
65
|
+
# terms a single phrase instead of individual words. Boolean and extended allow
|
66
|
+
# for more complex query syntax, refer to the sphinx documentation for further
|
67
|
+
# details.
|
68
|
+
#
|
69
|
+
# == Weighting
|
70
|
+
#
|
71
|
+
# Sphinx has support for weighting, where matches in one field can be considered
|
72
|
+
# more important than in another. Weights are integers, with 1 as the default.
|
73
|
+
# They can be set per-search like this:
|
74
|
+
#
|
75
|
+
# User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
|
76
|
+
#
|
77
|
+
# If you're searching multiple models, you can set per-index weights:
|
78
|
+
#
|
79
|
+
# ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
|
80
|
+
#
|
81
|
+
# See http://sphinxsearch.com/doc.html#weighting for further details.
|
82
|
+
#
|
83
|
+
# == Searching by Fields
|
84
|
+
#
|
85
|
+
# If you want to step it up a level, you can limit your search terms to
|
86
|
+
# specific fields:
|
87
|
+
#
|
88
|
+
# User.search :conditions => {:name => "pat"}
|
89
|
+
#
|
90
|
+
# This uses Sphinx's extended match mode, unless you specify a different
|
91
|
+
# match mode explicitly (but then this way of searching won't work). Also
|
92
|
+
# note that you don't need to put in a search string.
|
93
|
+
#
|
94
|
+
# == Searching by Attributes
|
95
|
+
#
|
96
|
+
# Also known as filters, you can limit your searches to documents that
|
97
|
+
# have specific values for their attributes. There are two ways to do
|
98
|
+
# this. The first is one that works in all scenarios - using the :with
|
99
|
+
# option.
|
100
|
+
#
|
101
|
+
# ThinkingSphinx::Search.search :with => {:parent_id => 10}
|
102
|
+
#
|
103
|
+
# The second is only viable if you're searching with a specific model
|
104
|
+
# (not multi-model searching). With a single model, Thinking Sphinx
|
105
|
+
# can figure out what attributes and fields are available, so you can
|
106
|
+
# put it all in the :conditions hash, and it will sort it out.
|
107
|
+
#
|
108
|
+
# Node.search :conditions => {:parent_id => 10}
|
109
|
+
#
|
110
|
+
# Filters can be single values, arrays of values, or ranges.
|
111
|
+
#
|
112
|
+
# Article.search "East Timor", :conditions => {:rating => 3..5}
|
113
|
+
#
|
114
|
+
# == Excluding by Attributes
|
115
|
+
#
|
116
|
+
# Sphinx also supports negative filtering - where the filters are of
|
117
|
+
# attribute values to exclude. This is done with the :without option:
|
118
|
+
#
|
119
|
+
# User.search :without => {:role_id => 1}
|
120
|
+
#
|
121
|
+
# == Excluding by Primary Key
|
122
|
+
#
|
123
|
+
# There is a shortcut to exclude records by their ActiveRecord primary key:
|
124
|
+
#
|
125
|
+
# User.search :without_ids => 1
|
126
|
+
#
|
127
|
+
# Pass an array or a single value.
|
128
|
+
#
|
129
|
+
# The primary key must be an integer as a negative filter is used. Note
|
130
|
+
# that for multi-model search, an id may occur in more than one model.
|
131
|
+
#
|
132
|
+
# == Infix (Star) Searching
|
133
|
+
#
|
134
|
+
# By default, Sphinx uses English stemming, e.g. matching "shoes" if you
|
135
|
+
# search for "shoe". It won't find "Melbourne" if you search for
|
136
|
+
# "elbourn", though.
|
137
|
+
#
|
138
|
+
# Enable infix searching by something like this in config/sphinx.yml:
|
139
|
+
#
|
140
|
+
# development:
|
141
|
+
# enable_star: 1
|
142
|
+
# min_infix_length: 2
|
143
|
+
#
|
144
|
+
# Note that this will make indexing take longer.
|
145
|
+
#
|
146
|
+
# With those settings (and after reindexing), wildcard asterisks can be used
|
147
|
+
# in queries:
|
148
|
+
#
|
149
|
+
# Location.search "*elbourn*"
|
150
|
+
#
|
151
|
+
# To automatically add asterisks around every token (but not operators),
|
152
|
+
# pass the :star option:
|
153
|
+
#
|
154
|
+
# Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
|
155
|
+
#
|
156
|
+
# This would become "*elbourn* -*ustrali*". The :star option only adds the
|
157
|
+
# asterisks. You need to make the config/sphinx.yml changes yourself.
|
158
|
+
#
|
159
|
+
# By default, the tokens are assumed to match the regular expression /\w+/u.
|
160
|
+
# If you've modified the charset_table, pass another regular expression, e.g.
|
161
|
+
#
|
162
|
+
# User.search("oo@bar.c", :star => /[\w@.]+/u)
|
163
|
+
#
|
164
|
+
# to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
|
165
|
+
#
|
166
|
+
# == Sorting
|
167
|
+
#
|
168
|
+
# Sphinx can only sort by attributes, so generally you will need to avoid
|
169
|
+
# using field names in your :order option. However, if you're searching
|
170
|
+
# on a single model, and have specified some fields as sortable, you can
|
171
|
+
# use those field names and Thinking Sphinx will interpret accordingly.
|
172
|
+
# Remember: this will only happen for single-model searches, and only
|
173
|
+
# through the :order option.
|
174
|
+
#
|
175
|
+
# Location.search "Melbourne", :order => :state
|
176
|
+
# User.search :conditions => {:role_id => 2}, :order => "name ASC"
|
177
|
+
#
|
178
|
+
# Keep in mind that if you use a string, you *must* specify the direction
|
179
|
+
# (ASC or DESC) else Sphinx won't return any results. If you use a symbol
|
180
|
+
# then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
|
181
|
+
# use the :sort_mode option:
|
182
|
+
#
|
183
|
+
# Location.search "Melbourne", :order => :state, :sort_mode => :desc
|
184
|
+
#
|
185
|
+
# Of course, there are other sort modes - check out the Sphinx
|
186
|
+
# documentation[http://sphinxsearch.com/doc.html] for that level of
|
187
|
+
# detail though.
|
188
|
+
#
|
189
|
+
# == Grouping
|
190
|
+
#
|
191
|
+
# For this you can use the group_by, group_clause and group_function
|
192
|
+
# options - which are all directly linked to Sphinx's expectations. No
|
193
|
+
# magic from Thinking Sphinx. It can get a little tricky, so make sure
|
194
|
+
# you read all the relevant
|
195
|
+
# documentation[http://sphinxsearch.com/doc.html#clustering] first.
|
196
|
+
#
|
197
|
+
# Yes this section will be expanded, but this is a start.
|
198
|
+
#
|
199
|
+
# == Geo/Location Searching
|
200
|
+
#
|
201
|
+
# Sphinx - and therefore Thinking Sphinx - has the facility to search
|
202
|
+
# around a geographical point, using a given latitude and longitude. To
|
203
|
+
# take advantage of this, you will need to have both of those values in
|
204
|
+
# attributes. To search with that point, you can then use one of the
|
205
|
+
# following syntax examples:
|
206
|
+
#
|
207
|
+
# Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
|
208
|
+
# Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
|
209
|
+
# :latitude_attr => "latit", :longitude_attr => "longit"
|
210
|
+
#
|
211
|
+
# The first example applies when your latitude and longitude attributes
|
212
|
+
# are named any of lat, latitude, lon, long or longitude. If that's not
|
213
|
+
# the case, you will need to explicitly state them in your search, _or_
|
214
|
+
# you can do so in your model:
|
215
|
+
#
|
216
|
+
# define_index do
|
217
|
+
# has :latit # Float column, stored in radians
|
218
|
+
# has :longit # Float column, stored in radians
|
219
|
+
#
|
220
|
+
# set_property :latitude_attr => "latit"
|
221
|
+
# set_property :longitude_attr => "longit"
|
222
|
+
# end
|
223
|
+
#
|
224
|
+
# Now, geo-location searching really only has an affect if you have a
|
225
|
+
# filter, sort or grouping clause related to it - otherwise it's just a
|
226
|
+
# normal search, and _will not_ return a distance value otherwise. To
|
227
|
+
# make use of the positioning difference, use the special attribute
|
228
|
+
# "@geodist" in any of your filters or sorting or grouping clauses.
|
229
|
+
#
|
230
|
+
# And don't forget - both the latitude and longitude you use in your
|
231
|
+
# search, and the values in your indexes, need to be stored as a float in radians,
|
232
|
+
# _not_ degrees. Keep in mind that if you do this conversion in SQL
|
233
|
+
# you will need to explicitly declare a column type of :float.
|
234
|
+
#
|
235
|
+
# define_index do
|
236
|
+
# has 'RADIANS(lat)', :as => :lat, :type => :float
|
237
|
+
# # ...
|
238
|
+
# end
|
239
|
+
#
|
240
|
+
# Once you've got your results set, you can access the distances as
|
241
|
+
# follows:
|
242
|
+
#
|
243
|
+
# @results.each_with_geodist do |result, distance|
|
244
|
+
# # ...
|
245
|
+
# end
|
246
|
+
#
|
247
|
+
# The distance value is returned as a float, representing the distance in
|
248
|
+
# metres.
|
249
|
+
#
|
250
|
+
# == Handling a Stale Index
|
251
|
+
#
|
252
|
+
# Especially if you don't use delta indexing, you risk having records in the
|
253
|
+
# Sphinx index that are no longer in the database. By default, those will simply
|
254
|
+
# come back as nils:
|
255
|
+
#
|
256
|
+
# >> pat_user.delete
|
257
|
+
# >> User.search("pat")
|
258
|
+
# Sphinx Result: [1,2]
|
259
|
+
# => [nil, <#User id: 2>]
|
260
|
+
#
|
261
|
+
# (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
|
262
|
+
#
|
263
|
+
# You can simply Array#compact these results or handle the nils in some other way, but
|
264
|
+
# Sphinx will still report two results, and the missing records may upset your layout.
|
265
|
+
#
|
266
|
+
# If you pass :retry_stale => true to a single-model search, missing records will
|
267
|
+
# cause Thinking Sphinx to retry the query but excluding those records. Since search
|
268
|
+
# is paginated, the new search could potentially include missing records as well, so by
|
269
|
+
# default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
|
270
|
+
# times, and so on. If there are still missing ids on the last retry, they are
|
271
|
+
# shown as nils.
|
272
|
+
#
|
273
|
+
def search(*args)
|
274
|
+
query = args.clone # an array
|
275
|
+
options = query.extract_options!
|
276
|
+
|
277
|
+
retry_search_on_stale_index(query, options) do
|
278
|
+
results, client = search_results(*(query + [options]))
|
279
|
+
|
280
|
+
::ActiveRecord::Base.logger.error(
|
281
|
+
"Sphinx Error: #{results[:error]}"
|
282
|
+
) if results[:error]
|
283
|
+
|
284
|
+
klass = options[:class]
|
285
|
+
page = options[:page] ? options[:page].to_i : 1
|
286
|
+
|
287
|
+
ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
def retry_search_on_stale_index(query, options, &block)
|
292
|
+
stale_ids = []
|
293
|
+
stale_retries_left = case options[:retry_stale]
|
294
|
+
when true: 3 # default to three retries
|
295
|
+
when nil, false: 0 # no retries
|
296
|
+
else options[:retry_stale].to_i
|
297
|
+
end
|
298
|
+
begin
|
299
|
+
# Passing this in an option so Collection.create_from_results can see it.
|
300
|
+
# It should only raise on stale records if there are any retries left.
|
301
|
+
options[:raise_on_stale] = stale_retries_left > 0
|
302
|
+
block.call
|
303
|
+
# If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
|
304
|
+
# in the DB and the :raise_on_stale option is set, this exception is raised. We retry
|
305
|
+
# a limited number of times, excluding the stale ids from the search.
|
306
|
+
rescue StaleIdsException => e
|
307
|
+
stale_retries_left -= 1
|
308
|
+
|
309
|
+
stale_ids |= e.ids # For logging
|
310
|
+
options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
|
311
|
+
|
312
|
+
tries = stale_retries_left
|
313
|
+
::ActiveRecord::Base.logger.debug("Sphinx Stale Ids (%s %s left): %s" % [
|
314
|
+
tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
|
315
|
+
])
|
316
|
+
|
317
|
+
retry
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
def count(*args)
|
322
|
+
results, client = search_results(*args.clone)
|
323
|
+
results[:total_found] || 0
|
324
|
+
end
|
325
|
+
|
326
|
+
# Checks if a document with the given id exists within a specific index.
|
327
|
+
# Expected parameters:
|
328
|
+
#
|
329
|
+
# - ID of the document
|
330
|
+
# - Index to check within
|
331
|
+
# - Options hash (defaults to {})
|
332
|
+
#
|
333
|
+
# Example:
|
334
|
+
#
|
335
|
+
# ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
|
336
|
+
#
|
337
|
+
def search_for_id(*args)
|
338
|
+
options = args.extract_options!
|
339
|
+
client = client_from_options options
|
340
|
+
|
341
|
+
query, filters = search_conditions(
|
342
|
+
options[:class], options[:conditions] || {}
|
343
|
+
)
|
344
|
+
client.filters += filters
|
345
|
+
client.match_mode = :extended unless query.empty?
|
346
|
+
client.id_range = args.first..args.first
|
347
|
+
|
348
|
+
begin
|
349
|
+
return client.query(query, args[1])[:matches].length > 0
|
350
|
+
rescue Errno::ECONNREFUSED => err
|
351
|
+
raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
def facets(*args)
|
356
|
+
hash = ThinkingSphinx::FacetCollection.new args
|
357
|
+
options = args.extract_options!.clone.merge! :group_function => :attr
|
358
|
+
|
359
|
+
klasses = options[:classes] || [options[:class]]
|
360
|
+
klasses = [] if options[:class].nil?
|
361
|
+
|
362
|
+
#no classes specified so get classes from resultset
|
363
|
+
if klasses.empty?
|
364
|
+
options[:group_by] = "class_crc"
|
365
|
+
results = search(*(args + [options]))
|
366
|
+
|
367
|
+
hash[:class] = {}
|
368
|
+
results.each_with_groupby_and_count do |result, group, count|
|
369
|
+
hash[:class][result.class.name] = count
|
370
|
+
klasses << result.class
|
371
|
+
end
|
372
|
+
end
|
373
|
+
|
374
|
+
klasses.each do |klass|
|
375
|
+
klass.sphinx_facets.inject(hash) do |hash, facet|
|
376
|
+
if facet.name != :class || options[:include_class_facets]
|
377
|
+
hash.add_from_results facet,
|
378
|
+
search(*(args +
|
379
|
+
[options.merge(:group_by => facet.attribute_name)]))
|
380
|
+
end
|
381
|
+
|
382
|
+
hash
|
383
|
+
end
|
384
|
+
end
|
385
|
+
|
386
|
+
hash
|
387
|
+
end
|
388
|
+
|
389
|
+
private
|
390
|
+
|
391
|
+
|
392
|
+
# This method handles the common search functionality, and returns both
|
393
|
+
# the result hash and the client. Not super elegant, but it'll do for
|
394
|
+
# the moment.
|
395
|
+
#
|
396
|
+
def search_results(*args)
|
397
|
+
options = args.extract_options!
|
398
|
+
query = args.join(' ')
|
399
|
+
client = client_from_options options
|
400
|
+
|
401
|
+
query = star_query(query, options[:star]) if options[:star]
|
402
|
+
|
403
|
+
extra_query, filters = search_conditions(
|
404
|
+
options[:class], options[:conditions] || {}
|
405
|
+
)
|
406
|
+
client.filters += filters
|
407
|
+
client.match_mode = :extended unless extra_query.empty?
|
408
|
+
query = [query, extra_query].join(' ')
|
409
|
+
query.strip! # Because "" and " " are not equivalent
|
410
|
+
|
411
|
+
set_sort_options! client, options
|
412
|
+
|
413
|
+
client.limit = options[:per_page].to_i if options[:per_page]
|
414
|
+
page = options[:page] ? options[:page].to_i : 1
|
415
|
+
client.offset = (page - 1) * client.limit
|
416
|
+
|
417
|
+
begin
|
418
|
+
::ActiveRecord::Base.logger.debug "Sphinx: #{query}"
|
419
|
+
results = client.query query
|
420
|
+
::ActiveRecord::Base.logger.debug "Sphinx Result: #{results[:matches].collect{|m| m[:attributes]["sphinx_internal_id"]}.inspect}"
|
421
|
+
rescue Errno::ECONNREFUSED => err
|
422
|
+
raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
|
423
|
+
end
|
424
|
+
|
425
|
+
return results, client
|
426
|
+
end
|
427
|
+
|
428
|
+
# Set all the appropriate settings for the client, using the provided
|
429
|
+
# options hash.
|
430
|
+
#
|
431
|
+
def client_from_options(options = {})
|
432
|
+
config = ThinkingSphinx::Configuration.instance
|
433
|
+
client = Riddle::Client.new config.address, config.port
|
434
|
+
klass = options[:class]
|
435
|
+
index_options = klass ? klass.sphinx_index_options : {}
|
436
|
+
|
437
|
+
# The Riddle default is per-query max_matches=1000. If we set the
|
438
|
+
# per-server max to a smaller value in sphinx.yml, we need to override
|
439
|
+
# the Riddle default or else we get search errors like
|
440
|
+
# "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
|
441
|
+
if per_server_max_matches = config.configuration.searchd.max_matches
|
442
|
+
options[:max_matches] ||= per_server_max_matches
|
443
|
+
end
|
444
|
+
|
445
|
+
# Turn :index_weights => { "foo" => 2, User => 1 }
|
446
|
+
# into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
|
447
|
+
if iw = options[:index_weights]
|
448
|
+
options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
|
449
|
+
if index.is_a?(Class)
|
450
|
+
name = ThinkingSphinx::Index.name(index)
|
451
|
+
hash["#{name}_core"] = weight
|
452
|
+
hash["#{name}_delta"] = weight
|
453
|
+
else
|
454
|
+
hash[index] = weight
|
455
|
+
end
|
456
|
+
hash
|
457
|
+
end
|
458
|
+
end
|
459
|
+
|
460
|
+
[
|
461
|
+
:max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
|
462
|
+
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
463
|
+
:retry_count, :retry_delay, :index_weights, :rank_mode,
|
464
|
+
:max_query_time, :field_weights, :filters, :anchor, :limit
|
465
|
+
].each do |key|
|
466
|
+
client.send(
|
467
|
+
key.to_s.concat("=").to_sym,
|
468
|
+
options[key] || index_options[key] || client.send(key)
|
469
|
+
)
|
470
|
+
end
|
471
|
+
|
472
|
+
options[:classes] = [klass] if klass
|
473
|
+
|
474
|
+
client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
|
475
|
+
|
476
|
+
client.filters << Riddle::Client::Filter.new(
|
477
|
+
"sphinx_deleted", [0]
|
478
|
+
)
|
479
|
+
|
480
|
+
# class filters
|
481
|
+
client.filters << Riddle::Client::Filter.new(
|
482
|
+
"class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
|
483
|
+
) if options[:classes]
|
484
|
+
|
485
|
+
# normal attribute filters
|
486
|
+
client.filters += options[:with].collect { |attr,val|
|
487
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val)
|
488
|
+
} if options[:with]
|
489
|
+
|
490
|
+
# exclusive attribute filters
|
491
|
+
client.filters += options[:without].collect { |attr,val|
|
492
|
+
Riddle::Client::Filter.new attr.to_s, filter_value(val), true
|
493
|
+
} if options[:without]
|
494
|
+
|
495
|
+
# exclusive attribute filter on primary key
|
496
|
+
client.filters += Array(options[:without_ids]).collect { |id|
|
497
|
+
Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
|
498
|
+
} if options[:without_ids]
|
499
|
+
|
500
|
+
client
|
501
|
+
end
|
502
|
+
|
503
|
+
def star_query(query, custom_token = nil)
|
504
|
+
token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
|
505
|
+
|
506
|
+
query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
|
507
|
+
pre, proper, post = $`, $&, $'
|
508
|
+
is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
|
509
|
+
is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
|
510
|
+
has_star = pre.ends_with?("*") || post.starts_with?("*")
|
511
|
+
if is_operator || is_quote || has_star
|
512
|
+
proper
|
513
|
+
else
|
514
|
+
"*#{proper}*"
|
515
|
+
end
|
516
|
+
end
|
517
|
+
end
|
518
|
+
|
519
|
+
def filter_value(value)
|
520
|
+
case value
|
521
|
+
when Range
|
522
|
+
value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
|
523
|
+
when Array
|
524
|
+
value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
|
525
|
+
else
|
526
|
+
Array(value)
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
# Returns the integer timestamp for a Time object.
|
531
|
+
#
|
532
|
+
# If using Rails 2.1+, need to handle timezones to translate them back to
|
533
|
+
# UTC, as that's what datetimes will be stored as by MySQL.
|
534
|
+
#
|
535
|
+
# in_time_zone is a method that was added for the timezone support in
|
536
|
+
# Rails 2.1, which is why it's used for testing. I'm sure there's better
|
537
|
+
# ways, but this does the job.
|
538
|
+
#
|
539
|
+
def timestamp(value)
|
540
|
+
value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
|
541
|
+
end
|
542
|
+
|
543
|
+
# Translate field and attribute conditions to the relevant search string
|
544
|
+
# and filters.
|
545
|
+
#
|
546
|
+
def search_conditions(klass, conditions={})
|
547
|
+
attributes = klass ? klass.sphinx_indexes.collect { |index|
|
548
|
+
index.attributes.collect { |attrib| attrib.unique_name }
|
549
|
+
}.flatten : []
|
550
|
+
|
551
|
+
search_string = []
|
552
|
+
filters = []
|
553
|
+
|
554
|
+
conditions.each do |key,val|
|
555
|
+
if attributes.include?(key.to_sym)
|
556
|
+
filters << Riddle::Client::Filter.new(
|
557
|
+
key.to_s, filter_value(val)
|
558
|
+
)
|
559
|
+
else
|
560
|
+
search_string << "@#{key} #{val}"
|
561
|
+
end
|
562
|
+
end
|
563
|
+
|
564
|
+
return search_string.join(' '), filters
|
565
|
+
end
|
566
|
+
|
567
|
+
# Return the appropriate latitude and longitude values, depending on
|
568
|
+
# whether the relevant attributes have been defined, and also whether
|
569
|
+
# there's actually any values.
|
570
|
+
#
|
571
|
+
def anchor_conditions(klass, options)
|
572
|
+
attributes = klass ? klass.sphinx_indexes.collect { |index|
|
573
|
+
index.attributes.collect { |attrib| attrib.unique_name }
|
574
|
+
}.flatten : []
|
575
|
+
|
576
|
+
lat_attr = klass ? klass.sphinx_indexes.collect { |index|
|
577
|
+
index.options[:latitude_attr]
|
578
|
+
}.compact.first : nil
|
579
|
+
|
580
|
+
lon_attr = klass ? klass.sphinx_indexes.collect { |index|
|
581
|
+
index.options[:longitude_attr]
|
582
|
+
}.compact.first : nil
|
583
|
+
|
584
|
+
lat_attr = options[:latitude_attr] if options[:latitude_attr]
|
585
|
+
lat_attr ||= :lat if attributes.include?(:lat)
|
586
|
+
lat_attr ||= :latitude if attributes.include?(:latitude)
|
587
|
+
|
588
|
+
lon_attr = options[:longitude_attr] if options[:longitude_attr]
|
589
|
+
lon_attr ||= :lng if attributes.include?(:lng)
|
590
|
+
lon_attr ||= :lon if attributes.include?(:lon)
|
591
|
+
lon_attr ||= :long if attributes.include?(:long)
|
592
|
+
lon_attr ||= :longitude if attributes.include?(:longitude)
|
593
|
+
|
594
|
+
lat = options[:lat]
|
595
|
+
lon = options[:lon]
|
596
|
+
|
597
|
+
if options[:geo]
|
598
|
+
lat = options[:geo].first
|
599
|
+
lon = options[:geo].last
|
600
|
+
end
|
601
|
+
|
602
|
+
lat && lon ? {
|
603
|
+
:latitude_attribute => lat_attr.to_s,
|
604
|
+
:latitude => lat,
|
605
|
+
:longitude_attribute => lon_attr.to_s,
|
606
|
+
:longitude => lon
|
607
|
+
} : nil
|
608
|
+
end
|
609
|
+
|
610
|
+
# Set the sort options using the :order key as well as the appropriate
|
611
|
+
# Riddle settings.
|
612
|
+
#
|
613
|
+
def set_sort_options!(client, options)
|
614
|
+
klass = options[:class]
|
615
|
+
fields = klass ? klass.sphinx_indexes.collect { |index|
|
616
|
+
index.fields.collect { |field| field.unique_name }
|
617
|
+
}.flatten : []
|
618
|
+
index_options = klass ? klass.sphinx_index_options : {}
|
619
|
+
|
620
|
+
order = options[:order] || index_options[:order]
|
621
|
+
case order
|
622
|
+
when Symbol
|
623
|
+
client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
|
624
|
+
if fields.include?(order)
|
625
|
+
client.sort_by = order.to_s.concat("_sort")
|
626
|
+
else
|
627
|
+
client.sort_by = order.to_s
|
628
|
+
end
|
629
|
+
when String
|
630
|
+
client.sort_mode = :extended
|
631
|
+
client.sort_by = sorted_fields_to_attributes(order, fields)
|
632
|
+
else
|
633
|
+
# do nothing
|
634
|
+
end
|
635
|
+
|
636
|
+
client.sort_mode = :attr_asc if client.sort_mode == :asc
|
637
|
+
client.sort_mode = :attr_desc if client.sort_mode == :desc
|
638
|
+
end
|
639
|
+
|
640
|
+
# Search through a collection of fields and translate any appearances
|
641
|
+
# of them in a string to their attribute equivalent for sorting.
|
642
|
+
#
|
643
|
+
def sorted_fields_to_attributes(string, fields)
|
644
|
+
fields.each { |field|
|
645
|
+
string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
|
646
|
+
match.gsub field.to_s, field.to_s.concat("_sort")
|
647
|
+
}
|
648
|
+
}
|
649
|
+
|
650
|
+
string
|
651
|
+
end
|
652
|
+
end
|
653
|
+
end
|
654
|
+
end
|