sphinx 0.9.9.2117
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/README.rdoc +243 -0
- data/Rakefile +45 -0
- data/VERSION.yml +5 -0
- data/init.rb +1 -0
- data/lib/sphinx/buffered_io.rb +26 -0
- data/lib/sphinx/client.rb +2426 -0
- data/lib/sphinx/constants.rb +179 -0
- data/lib/sphinx/indifferent_access.rb +152 -0
- data/lib/sphinx/request.rb +121 -0
- data/lib/sphinx/response.rb +71 -0
- data/lib/sphinx/server.rb +170 -0
- data/lib/sphinx/timeout.rb +31 -0
- data/lib/sphinx.rb +51 -0
- data/spec/client_response_spec.rb +170 -0
- data/spec/client_spec.rb +669 -0
- data/spec/client_validations_spec.rb +859 -0
- data/spec/fixtures/default_search.php +8 -0
- data/spec/fixtures/default_search_index.php +8 -0
- data/spec/fixtures/excerpt_custom.php +11 -0
- data/spec/fixtures/excerpt_default.php +8 -0
- data/spec/fixtures/excerpt_flags.php +12 -0
- data/spec/fixtures/field_weights.php +9 -0
- data/spec/fixtures/filter.php +9 -0
- data/spec/fixtures/filter_exclude.php +9 -0
- data/spec/fixtures/filter_float_range.php +9 -0
- data/spec/fixtures/filter_float_range_exclude.php +9 -0
- data/spec/fixtures/filter_range.php +9 -0
- data/spec/fixtures/filter_range_exclude.php +9 -0
- data/spec/fixtures/filter_range_int64.php +10 -0
- data/spec/fixtures/filter_ranges.php +10 -0
- data/spec/fixtures/filters.php +10 -0
- data/spec/fixtures/filters_different.php +13 -0
- data/spec/fixtures/geo_anchor.php +9 -0
- data/spec/fixtures/group_by_attr.php +9 -0
- data/spec/fixtures/group_by_attrpair.php +9 -0
- data/spec/fixtures/group_by_day.php +9 -0
- data/spec/fixtures/group_by_day_sort.php +9 -0
- data/spec/fixtures/group_by_month.php +9 -0
- data/spec/fixtures/group_by_week.php +9 -0
- data/spec/fixtures/group_by_year.php +9 -0
- data/spec/fixtures/group_distinct.php +10 -0
- data/spec/fixtures/id_range.php +9 -0
- data/spec/fixtures/id_range64.php +9 -0
- data/spec/fixtures/index_weights.php +9 -0
- data/spec/fixtures/keywords.php +8 -0
- data/spec/fixtures/limits.php +9 -0
- data/spec/fixtures/limits_cutoff.php +9 -0
- data/spec/fixtures/limits_max.php +9 -0
- data/spec/fixtures/limits_max_cutoff.php +9 -0
- data/spec/fixtures/match_all.php +9 -0
- data/spec/fixtures/match_any.php +9 -0
- data/spec/fixtures/match_boolean.php +9 -0
- data/spec/fixtures/match_extended.php +9 -0
- data/spec/fixtures/match_extended2.php +9 -0
- data/spec/fixtures/match_fullscan.php +9 -0
- data/spec/fixtures/match_phrase.php +9 -0
- data/spec/fixtures/max_query_time.php +9 -0
- data/spec/fixtures/miltiple_queries.php +12 -0
- data/spec/fixtures/ranking_bm25.php +9 -0
- data/spec/fixtures/ranking_fieldmask.php +9 -0
- data/spec/fixtures/ranking_matchany.php +9 -0
- data/spec/fixtures/ranking_none.php +9 -0
- data/spec/fixtures/ranking_proximity.php +9 -0
- data/spec/fixtures/ranking_proximity_bm25.php +9 -0
- data/spec/fixtures/ranking_wordcount.php +9 -0
- data/spec/fixtures/retries.php +9 -0
- data/spec/fixtures/retries_delay.php +9 -0
- data/spec/fixtures/select.php +9 -0
- data/spec/fixtures/set_override.php +11 -0
- data/spec/fixtures/sort_attr_asc.php +9 -0
- data/spec/fixtures/sort_attr_desc.php +9 -0
- data/spec/fixtures/sort_expr.php +9 -0
- data/spec/fixtures/sort_extended.php +9 -0
- data/spec/fixtures/sort_relevance.php +9 -0
- data/spec/fixtures/sort_time_segments.php +9 -0
- data/spec/fixtures/sphinxapi.php +1633 -0
- data/spec/fixtures/update_attributes.php +8 -0
- data/spec/fixtures/update_attributes_mva.php +8 -0
- data/spec/fixtures/weights.php +9 -0
- data/spec/spec_helper.rb +24 -0
- data/spec/sphinx/sphinx-id64.conf +67 -0
- data/spec/sphinx/sphinx.conf +67 -0
- data/spec/sphinx/sphinx_test.sql +88 -0
- data/sphinx.gemspec +127 -0
- metadata +142 -0
@@ -0,0 +1,2426 @@
|
|
1
|
+
module Sphinx
|
2
|
+
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
3
|
+
# daemon and perform requests.
|
4
|
+
#
|
5
|
+
# @example
|
6
|
+
# sphinx = Sphinx::Client.new
|
7
|
+
# result = sphinx.query('test')
|
8
|
+
# ids = result['matches'].map { |match| match['id'] }
|
9
|
+
# posts = Post.all :conditions => { :id => ids },
|
10
|
+
# :order => "FIELD(id,#{ids.join(',')})"
|
11
|
+
#
|
12
|
+
# docs = posts.map(&:body)
|
13
|
+
# excerpts = sphinx.build_excerpts(docs, 'index', 'test')
|
14
|
+
#
|
15
|
+
class Client
|
16
|
+
include Sphinx::Constants
|
17
|
+
|
18
|
+
#=================================================================
|
19
|
+
# Some internal attributes to use inside client API
|
20
|
+
#=================================================================
|
21
|
+
|
22
|
+
# List of searchd servers to connect to.
|
23
|
+
# @private
|
24
|
+
attr_reader :servers
|
25
|
+
# Connection timeout in seconds.
|
26
|
+
# @private
|
27
|
+
attr_reader :timeout
|
28
|
+
# Number of connection retries.
|
29
|
+
# @private
|
30
|
+
attr_reader :retries
|
31
|
+
# Request timeout in seconds.
|
32
|
+
# @private
|
33
|
+
attr_reader :reqtimeout
|
34
|
+
# Number of request retries.
|
35
|
+
# @private
|
36
|
+
attr_reader :reqretries
|
37
|
+
# Log debug/info/warn to the given Logger, defaults to nil.
|
38
|
+
# @private
|
39
|
+
attr_reader :logger
|
40
|
+
|
41
|
+
# Constructs the <tt>Sphinx::Client</tt> object and sets options
|
42
|
+
# to their default values.
|
43
|
+
#
|
44
|
+
# @param [Logger] logger a logger object to put logs to. No logging
|
45
|
+
# will be performed when not set.
|
46
|
+
#
|
47
|
+
def initialize(logger = nil)
|
48
|
+
# per-query settings
|
49
|
+
@offset = 0 # how many records to seek from result-set start (default is 0)
|
50
|
+
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
51
|
+
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
52
|
+
@weights = [] # per-field weights (default is 1 for all fields)
|
53
|
+
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
54
|
+
@sortby = '' # attribute to sort by (defualt is "")
|
55
|
+
@min_id = 0 # min ID to match (default is 0, which means no limit)
|
56
|
+
@max_id = 0 # max ID to match (default is 0, which means no limit)
|
57
|
+
@filters = [] # search filters
|
58
|
+
@groupby = '' # group-by attribute name
|
59
|
+
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
60
|
+
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
61
|
+
@groupdistinct = '' # group-by count-distinct attribute
|
62
|
+
@maxmatches = 1000 # max matches to retrieve
|
63
|
+
@cutoff = 0 # cutoff to stop searching at (default is 0)
|
64
|
+
@retrycount = 0 # distributed retries count
|
65
|
+
@retrydelay = 0 # distributed retries delay
|
66
|
+
@anchor = [] # geographical anchor point
|
67
|
+
@indexweights = [] # per-index weights
|
68
|
+
@ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
69
|
+
@maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
|
70
|
+
@fieldweights = {} # per-field-name weights
|
71
|
+
@overrides = [] # per-query attribute values overrides
|
72
|
+
@select = '*' # select-list (attributes or expressions, with optional aliases)
|
73
|
+
|
74
|
+
# per-reply fields (for single-query case)
|
75
|
+
@error = '' # last error message
|
76
|
+
@warning = '' # last warning message
|
77
|
+
@connerror = false # connection error vs remote error flag
|
78
|
+
|
79
|
+
@reqs = [] # requests storage (for multi-query case)
|
80
|
+
@mbenc = '' # stored mbstring encoding
|
81
|
+
@timeout = 0 # connect timeout
|
82
|
+
@retries = 1 # number of connect retries in case of emergency
|
83
|
+
@reqtimeout = 0 # request timeout
|
84
|
+
@reqretries = 1 # number of request retries in case of emergency
|
85
|
+
|
86
|
+
# per-client-object settings
|
87
|
+
# searchd servers list
|
88
|
+
@servers = [Sphinx::Server.new(self, 'localhost', 9312, false)].freeze
|
89
|
+
@logger = logger
|
90
|
+
|
91
|
+
logger.info { "[sphinx] version: #{VERSION}, #{@servers.inspect}" } if logger
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns a string representation of the sphinx client object.
|
95
|
+
#
|
96
|
+
def inspect
|
97
|
+
params = {
|
98
|
+
:error => @error,
|
99
|
+
:warning => @warning,
|
100
|
+
:connect_error => @connerror,
|
101
|
+
:servers => @servers,
|
102
|
+
:connect_timeout => { :timeout => @timeout, :retries => @retries },
|
103
|
+
:request_timeout => { :timeout => @reqtimeout, :retries => @reqretries },
|
104
|
+
:retries => { :count => @retrycount, :delay => @retrydelay },
|
105
|
+
:limits => { :offset => @offset, :limit => @limit, :max => @maxmatches, :cutoff => @cutoff },
|
106
|
+
:max_query_time => @maxquerytime,
|
107
|
+
:overrides => @overrides,
|
108
|
+
:select => @select,
|
109
|
+
:match_mode => @mode,
|
110
|
+
:ranking_mode => @ranker,
|
111
|
+
:sort_mode => { :mode => @sort, :sortby => @sortby },
|
112
|
+
:weights => @weights,
|
113
|
+
:field_weights => @fieldweights,
|
114
|
+
:index_weights => @indexweights,
|
115
|
+
:id_range => { :min => @min_id, :max => @max_id },
|
116
|
+
:filters => @filters,
|
117
|
+
:geo_anchor => @anchor,
|
118
|
+
:group_by => { :attribute => @groupby, :func => @groupfunc, :sort => @groupsort },
|
119
|
+
:group_distinct => @groupdistinct
|
120
|
+
}
|
121
|
+
|
122
|
+
"<Sphinx::Client: %d servers, params: %s>" %
|
123
|
+
[@servers.length, params.inspect]
|
124
|
+
end
|
125
|
+
|
126
|
+
#=================================================================
|
127
|
+
# General API functions
|
128
|
+
#=================================================================
|
129
|
+
|
130
|
+
# Returns last error message, as a string, in human readable format. If there
|
131
|
+
# were no errors during the previous API call, empty string is returned.
|
132
|
+
#
|
133
|
+
# You should call it when any other function (such as {#query}) fails (typically,
|
134
|
+
# the failing function returns false). The returned string will contain the
|
135
|
+
# error description.
|
136
|
+
#
|
137
|
+
# The error message is not reset by this call; so you can safely call it
|
138
|
+
# several times if needed.
|
139
|
+
#
|
140
|
+
# @return [String] last error message.
|
141
|
+
#
|
142
|
+
# @example
|
143
|
+
# puts sphinx.last_error
|
144
|
+
#
|
145
|
+
# @see #last_warning
|
146
|
+
# @see #connect_error?
|
147
|
+
#
|
148
|
+
def last_error
|
149
|
+
@error
|
150
|
+
end
|
151
|
+
alias :GetLastError :last_error
|
152
|
+
|
153
|
+
# Returns last warning message, as a string, in human readable format. If there
|
154
|
+
# were no warnings during the previous API call, empty string is returned.
|
155
|
+
#
|
156
|
+
# You should call it to verify whether your request (such as {#query}) was
|
157
|
+
# completed but with warnings. For instance, search query against a distributed
|
158
|
+
# index might complete succesfully even if several remote agents timed out.
|
159
|
+
# In that case, a warning message would be produced.
|
160
|
+
#
|
161
|
+
# The warning message is not reset by this call; so you can safely call it
|
162
|
+
# several times if needed.
|
163
|
+
#
|
164
|
+
# @return [String] last warning message.
|
165
|
+
#
|
166
|
+
# @example
|
167
|
+
# puts sphinx.last_warning
|
168
|
+
#
|
169
|
+
# @see #last_error
|
170
|
+
# @see #connect_error?
|
171
|
+
#
|
172
|
+
def last_warning
|
173
|
+
@warning
|
174
|
+
end
|
175
|
+
alias :GetLastWarning :last_warning
|
176
|
+
|
177
|
+
# Checks whether the last error was a network error on API side, or a
|
178
|
+
# remote error reported by searchd. Returns true if the last connection
|
179
|
+
# attempt to searchd failed on API side, false otherwise (if the error
|
180
|
+
# was remote, or there were no connection attempts at all).
|
181
|
+
#
|
182
|
+
# @return [Boolean] the value indicating whether last error was a
|
183
|
+
# nework error on API side.
|
184
|
+
#
|
185
|
+
# @example
|
186
|
+
# puts "Connection failed!" if sphinx.connect_error?
|
187
|
+
#
|
188
|
+
# @see #last_error
|
189
|
+
# @see #last_warning
|
190
|
+
#
|
191
|
+
def connect_error?
|
192
|
+
@connerror || false
|
193
|
+
end
|
194
|
+
alias :IsConnectError :connect_error?
|
195
|
+
|
196
|
+
# Sets searchd host name and TCP port. All subsequent requests will
|
197
|
+
# use the new host and port settings. Default +host+ and +port+ are
|
198
|
+
# 'localhost' and 9312, respectively.
|
199
|
+
#
|
200
|
+
# Also, you can specify an absolute path to Sphinx's UNIX socket as +host+,
|
201
|
+
# in this case pass port as +0+ or +nil+.
|
202
|
+
#
|
203
|
+
# @param [String] host the searchd host name or UNIX socket absolute path.
|
204
|
+
# @param [Integer] port the searchd port name (could be any if UNIX
|
205
|
+
# socket path specified).
|
206
|
+
# @return [Sphinx::Client] self.
|
207
|
+
#
|
208
|
+
# @example
|
209
|
+
# sphinx.set_server('localhost', 9312)
|
210
|
+
# sphinx.set_server('/opt/sphinx/var/run/sphinx.sock')
|
211
|
+
#
|
212
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
213
|
+
# @see #set_servers
|
214
|
+
# @see #set_connect_timeout
|
215
|
+
# @see #set_request_timeout
|
216
|
+
#
|
217
|
+
def set_server(host, port = 9312)
|
218
|
+
raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
|
219
|
+
|
220
|
+
path = nil
|
221
|
+
# Check if UNIX socket should be used
|
222
|
+
if host[0] == ?/
|
223
|
+
path = host
|
224
|
+
elsif host[0, 7] == 'unix://'
|
225
|
+
path = host[7..-1]
|
226
|
+
else
|
227
|
+
raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
|
228
|
+
end
|
229
|
+
|
230
|
+
host = port = nil unless path.nil?
|
231
|
+
|
232
|
+
@servers = [Sphinx::Server.new(self, host, port, path)].freeze
|
233
|
+
logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
|
234
|
+
self
|
235
|
+
end
|
236
|
+
alias :SetServer :set_server
|
237
|
+
|
238
|
+
# Sets the list of searchd servers. Each subsequent request will use next
|
239
|
+
# server in list (round-robin). In case of one server failure, request could
|
240
|
+
# be retried on another server (see {#set_connect_timeout} and
|
241
|
+
# {#set_request_timeout}).
|
242
|
+
#
|
243
|
+
# Method accepts an +Array+ of +Hash+es, each of them should have <tt>:host</tt>
|
244
|
+
# and <tt>:port</tt> (to connect to searchd through network) or <tt>:path</tt>
|
245
|
+
# (an absolute path to UNIX socket) specified.
|
246
|
+
#
|
247
|
+
# @param [Array<Hash>] servers an +Array+ of +Hash+ objects with servers parameters.
|
248
|
+
# @option servers [String] :host the searchd host name or UNIX socket absolute path.
|
249
|
+
# @option servers [String] :path the searchd UNIX socket absolute path.
|
250
|
+
# @option servers [Integer] :port (9312) the searchd port name (skiped when UNIX
|
251
|
+
# socket path specified)
|
252
|
+
# @return [Sphinx::Client] self.
|
253
|
+
#
|
254
|
+
# @example
|
255
|
+
# sphinx.set_servers([
|
256
|
+
# { :host => 'browse01.local' }, # default port is 9312
|
257
|
+
# { :host => 'browse02.local', :port => 9312 },
|
258
|
+
# { :path => '/opt/sphinx/var/run/sphinx.sock' }
|
259
|
+
# ])
|
260
|
+
#
|
261
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
262
|
+
# @see #set_server
|
263
|
+
# @see #set_connect_timeout
|
264
|
+
# @see #set_request_timeout
|
265
|
+
#
|
266
|
+
def set_servers(servers)
|
267
|
+
raise ArgumentError, '"servers" argument must be Array' unless servers.kind_of?(Array)
|
268
|
+
raise ArgumentError, '"servers" argument must be not empty' if servers.empty?
|
269
|
+
|
270
|
+
@servers = servers.map do |server|
|
271
|
+
raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash)
|
272
|
+
|
273
|
+
server = server.with_indifferent_access
|
274
|
+
|
275
|
+
host = server[:path] || server[:host]
|
276
|
+
port = server[:port] || 9312
|
277
|
+
path = nil
|
278
|
+
raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
|
279
|
+
|
280
|
+
# Check if UNIX socket should be used
|
281
|
+
if host[0] == ?/
|
282
|
+
path = host
|
283
|
+
elsif host[0, 7] == 'unix://'
|
284
|
+
path = host[7..-1]
|
285
|
+
else
|
286
|
+
raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
|
287
|
+
end
|
288
|
+
|
289
|
+
host = port = nil unless path.nil?
|
290
|
+
|
291
|
+
Sphinx::Server.new(self, host, port, path)
|
292
|
+
end.freeze
|
293
|
+
logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
|
294
|
+
self
|
295
|
+
end
|
296
|
+
alias :SetServers :set_servers
|
297
|
+
|
298
|
+
# Sets the time allowed to spend connecting to the server before giving up
|
299
|
+
# and number of retries to perform.
|
300
|
+
#
|
301
|
+
# In the event of a failure to connect, an appropriate error code should
|
302
|
+
# be returned back to the application in order for application-level error
|
303
|
+
# handling to advise the user.
|
304
|
+
#
|
305
|
+
# When multiple servers configured through {#set_servers} method, and +retries+
|
306
|
+
# number is greater than 1, library will try to connect to another server.
|
307
|
+
# In case of single server configured, it will try to reconnect +retries+
|
308
|
+
# times.
|
309
|
+
#
|
310
|
+
# Please note, this timeout will only be used for connection establishing, not
|
311
|
+
# for regular API requests.
|
312
|
+
#
|
313
|
+
# @param [Integer] timeout a connection timeout in seconds.
|
314
|
+
# @param [Integer] retries number of connect retries.
|
315
|
+
# @return [Sphinx::Client] self.
|
316
|
+
#
|
317
|
+
# @example Set connection timeout to 1 second and number of retries to 5
|
318
|
+
# sphinx.set_connect_timeout(1, 5)
|
319
|
+
#
|
320
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
321
|
+
# @see #set_server
|
322
|
+
# @see #set_servers
|
323
|
+
# @see #set_request_timeout
|
324
|
+
#
|
325
|
+
def set_connect_timeout(timeout, retries = 1)
|
326
|
+
raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
|
327
|
+
raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
|
328
|
+
raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
|
329
|
+
|
330
|
+
@timeout = timeout
|
331
|
+
@retries = retries
|
332
|
+
self
|
333
|
+
end
|
334
|
+
alias :SetConnectTimeout :set_connect_timeout
|
335
|
+
|
336
|
+
# Sets the time allowed to spend performing request to the server before giving up
|
337
|
+
# and number of retries to perform.
|
338
|
+
#
|
339
|
+
# In the event of a failure to do request, an appropriate error code should
|
340
|
+
# be returned back to the application in order for application-level error
|
341
|
+
# handling to advise the user.
|
342
|
+
#
|
343
|
+
# When multiple servers configured through {#set_servers} method, and +retries+
|
344
|
+
# number is greater than 1, library will try to do another try with this server
|
345
|
+
# (with full reconnect). If connection would fail, behavior depends on
|
346
|
+
# {#set_connect_timeout} settings.
|
347
|
+
#
|
348
|
+
# Please note, this timeout will only be used for request performing, not
|
349
|
+
# for connection establishing.
|
350
|
+
#
|
351
|
+
# @param [Integer] timeout a request timeout in seconds.
|
352
|
+
# @param [Integer] retries number of request retries.
|
353
|
+
# @return [Sphinx::Client] self.
|
354
|
+
#
|
355
|
+
# @example Set request timeout to 1 second and number of retries to 5
|
356
|
+
# sphinx.set_request_timeout(1, 5)
|
357
|
+
#
|
358
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
359
|
+
# @see #set_server
|
360
|
+
# @see #set_servers
|
361
|
+
# @see #set_connect_timeout
|
362
|
+
#
|
363
|
+
def set_request_timeout(timeout, retries = 1)
|
364
|
+
raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
|
365
|
+
raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
|
366
|
+
raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
|
367
|
+
|
368
|
+
@reqtimeout = timeout
|
369
|
+
@reqretries = retries
|
370
|
+
self
|
371
|
+
end
|
372
|
+
alias :SetRequestTimeout :set_request_timeout
|
373
|
+
|
374
|
+
# Sets distributed retry count and delay.
|
375
|
+
#
|
376
|
+
# On temporary failures searchd will attempt up to +count+ retries
|
377
|
+
# per agent. +delay+ is the delay between the retries, in milliseconds.
|
378
|
+
# Retries are disabled by default. Note that this call will not make
|
379
|
+
# the API itself retry on temporary failure; it only tells searchd
|
380
|
+
# to do so. Currently, the list of temporary failures includes all
|
381
|
+
# kinds of connection failures and maxed out (too busy) remote agents.
|
382
|
+
#
|
383
|
+
# @param [Integer] count a number of retries to perform.
|
384
|
+
# @param [Integer] delay a delay between the retries.
|
385
|
+
# @return [Sphinx::Client] self.
|
386
|
+
#
|
387
|
+
# @example Perform 5 retries with 200 ms between them
|
388
|
+
# sphinx.set_retries(5, 200)
|
389
|
+
#
|
390
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
391
|
+
# @see #set_connect_timeout
|
392
|
+
# @see #set_request_timeout
|
393
|
+
#
|
394
|
+
def set_retries(count, delay = 0)
|
395
|
+
raise ArgumentError, '"count" argument must be Integer' unless count.kind_of?(Integer)
|
396
|
+
raise ArgumentError, '"delay" argument must be Integer' unless delay.kind_of?(Integer)
|
397
|
+
|
398
|
+
@retrycount = count
|
399
|
+
@retrydelay = delay
|
400
|
+
self
|
401
|
+
end
|
402
|
+
alias :SetRetries :set_retries
|
403
|
+
|
404
|
+
#=================================================================
|
405
|
+
# General query settings
|
406
|
+
#=================================================================
|
407
|
+
|
408
|
+
# Sets offset into server-side result set (+offset+) and amount of matches to
|
409
|
+
# return to client starting from that offset (+limit+). Can additionally control
|
410
|
+
# maximum server-side result set size for current query (+max_matches+) and the
|
411
|
+
# threshold amount of matches to stop searching at (+cutoff+). All parameters
|
412
|
+
# must be non-negative integers.
|
413
|
+
#
|
414
|
+
# First two parameters to {#set_limits} are identical in behavior to MySQL LIMIT
|
415
|
+
# clause. They instruct searchd to return at most +limit+ matches starting from
|
416
|
+
# match number +offset+. The default offset and limit settings are +0+ and +20+,
|
417
|
+
# that is, to return first +20+ matches.
|
418
|
+
#
|
419
|
+
# +max_matches+ setting controls how much matches searchd will keep in RAM
|
420
|
+
# while searching. All matching documents will be normally processed, ranked,
|
421
|
+
# filtered, and sorted even if max_matches is set to +1+. But only best +N+
|
422
|
+
# documents are stored in memory at any given moment for performance and RAM
|
423
|
+
# usage reasons, and this setting controls that N. Note that there are two
|
424
|
+
# places where max_matches limit is enforced. Per-query limit is controlled
|
425
|
+
# by this API call, but there also is per-server limit controlled by +max_matches+
|
426
|
+
# setting in the config file. To prevent RAM usage abuse, server will not
|
427
|
+
# allow to set per-query limit higher than the per-server limit.
|
428
|
+
#
|
429
|
+
# You can't retrieve more than +max_matches+ matches to the client application.
|
430
|
+
# The default limit is set to +1000+. Normally, you must not have to go over
|
431
|
+
# this limit. One thousand records is enough to present to the end user.
|
432
|
+
# And if you're thinking about pulling the results to application for further
|
433
|
+
# sorting or filtering, that would be much more efficient if performed on
|
434
|
+
# Sphinx side.
|
435
|
+
#
|
436
|
+
# +cutoff+ setting is intended for advanced performance control. It tells
|
437
|
+
# searchd to forcibly stop search query once $cutoff matches had been found
|
438
|
+
# and processed.
|
439
|
+
#
|
440
|
+
# @param [Integer] offset an offset into server-side result set.
|
441
|
+
# @param [Integer] limit an amount of matches to return.
|
442
|
+
# @param [Integer] max a maximum server-side result set size.
|
443
|
+
# @param [Integer] cutoff a threshold amount of matches to stop searching at.
|
444
|
+
# @return [Sphinx::Client] self.
|
445
|
+
#
|
446
|
+
# @example
|
447
|
+
# sphinx.set_limits(100, 50, 1000, 5000)
|
448
|
+
#
|
449
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
450
|
+
#
|
451
|
+
def set_limits(offset, limit, max = 0, cutoff = 0)
|
452
|
+
raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer)
|
453
|
+
raise ArgumentError, '"limit" argument must be Integer' unless limit.kind_of?(Integer)
|
454
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
|
455
|
+
raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.kind_of?(Integer)
|
456
|
+
|
457
|
+
raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
|
458
|
+
raise ArgumentError, '"limit" argument should be greater to zero' unless limit > 0
|
459
|
+
raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
|
460
|
+
raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0
|
461
|
+
|
462
|
+
@offset = offset
|
463
|
+
@limit = limit
|
464
|
+
@maxmatches = max if max > 0
|
465
|
+
@cutoff = cutoff if cutoff > 0
|
466
|
+
self
|
467
|
+
end
|
468
|
+
alias :SetLimits :set_limits
|
469
|
+
|
470
|
+
# Sets maximum search query time, in milliseconds. Parameter must be a
|
471
|
+
# non-negative integer. Default valus is +0+ which means "do not limit".
|
472
|
+
#
|
473
|
+
# Similar to +cutoff+ setting from {#set_limits}, but limits elapsed query
|
474
|
+
# time instead of processed matches count. Local search queries will be
|
475
|
+
# stopped once that much time has elapsed. Note that if you're performing
|
476
|
+
# a search which queries several local indexes, this limit applies to each
|
477
|
+
# index separately.
|
478
|
+
#
|
479
|
+
# @param [Integer] max maximum search query time in milliseconds.
|
480
|
+
# @return [Sphinx::Client] self.
|
481
|
+
#
|
482
|
+
# @example
|
483
|
+
# sphinx.set_max_query_time(200)
|
484
|
+
#
|
485
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
486
|
+
#
|
487
|
+
def set_max_query_time(max)
|
488
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
|
489
|
+
raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
|
490
|
+
|
491
|
+
@maxquerytime = max
|
492
|
+
self
|
493
|
+
end
|
494
|
+
alias :SetMaxQueryTime :set_max_query_time
|
495
|
+
|
496
|
+
# Sets temporary (per-query) per-document attribute value overrides. Only
|
497
|
+
# supports scalar attributes. +values+ must be a +Hash+ that maps document
|
498
|
+
# IDs to overridden attribute values.
|
499
|
+
#
|
500
|
+
# Override feature lets you "temporary" update attribute values for some
|
501
|
+
# documents within a single query, leaving all other queries unaffected.
|
502
|
+
# This might be useful for personalized data. For example, assume you're
|
503
|
+
# implementing a personalized search function that wants to boost the posts
|
504
|
+
# that the user's friends recommend. Such data is not just dynamic, but
|
505
|
+
# also personal; so you can't simply put it in the index because you don't
|
506
|
+
# want everyone's searches affected. Overrides, on the other hand, are local
|
507
|
+
# to a single query and invisible to everyone else. So you can, say, setup
|
508
|
+
# a "friends_weight" value for every document, defaulting to 0, then
|
509
|
+
# temporary override it with 1 for documents 123, 456 and 789 (recommended
|
510
|
+
# by exactly the friends of current user), and use that value when ranking.
|
511
|
+
#
|
512
|
+
# You can specify attribute type as String ("integer", "float", etc),
|
513
|
+
# Symbol (:integer, :float, etc), or
|
514
|
+
# Fixnum constant (SPH_ATTR_INTEGER, SPH_ATTR_FLOAT, etc).
|
515
|
+
#
|
516
|
+
# @param [String, Symbol] attribute an attribute name to override values of.
|
517
|
+
# @param [Integer, String, Symbol] attrtype attribute type.
|
518
|
+
# @param [Hash] values a +Hash+ that maps document IDs to overridden attribute values.
|
519
|
+
# @return [Sphinx::Client] self.
|
520
|
+
#
|
521
|
+
# @example
|
522
|
+
# sphinx.set_override(:friends_weight, :integer, {123 => 1, 456 => 1, 789 => 1})
|
523
|
+
#
|
524
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
525
|
+
#
|
526
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setoverride Section 6.2.3, "SetOverride"
|
527
|
+
#
|
528
|
+
def set_override(attribute, attrtype, values)
|
529
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
530
|
+
|
531
|
+
case attrtype
|
532
|
+
when String, Symbol
|
533
|
+
begin
|
534
|
+
attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}")
|
535
|
+
rescue NameError
|
536
|
+
raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid"
|
537
|
+
end
|
538
|
+
when Fixnum
|
539
|
+
raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype)
|
540
|
+
else
|
541
|
+
raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol'
|
542
|
+
end
|
543
|
+
|
544
|
+
raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
|
545
|
+
|
546
|
+
values.each do |id, value|
|
547
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.kind_of?(Integer)
|
548
|
+
case attrtype
|
549
|
+
when SPH_ATTR_TIMESTAMP
|
550
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Integer) or value.kind_of?(Time)
|
551
|
+
when SPH_ATTR_FLOAT
|
552
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Numeric)
|
553
|
+
else
|
554
|
+
# SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT
|
555
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.kind_of?(Integer)
|
556
|
+
end
|
557
|
+
end
|
558
|
+
|
559
|
+
@overrides << { 'attr' => attribute.to_s, 'type' => attrtype, 'values' => values }
|
560
|
+
self
|
561
|
+
end
|
562
|
+
alias :SetOverride :set_override
|
563
|
+
|
564
|
+
# Sets the select clause, listing specific attributes to fetch, and
|
565
|
+
# expressions to compute and fetch. Clause syntax mimics SQL.
|
566
|
+
#
|
567
|
+
# {#set_select} is very similar to the part of a typical SQL query between
|
568
|
+
# +SELECT+ and +FROM+. It lets you choose what attributes (columns) to
|
569
|
+
# fetch, and also what expressions over the columns to compute and fetch.
|
570
|
+
# A certain difference from SQL is that expressions must always be aliased
|
571
|
+
# to a correct identifier (consisting of letters and digits) using +AS+
|
572
|
+
# keyword. SQL also lets you do that but does not require to. Sphinx enforces
|
573
|
+
# aliases so that the computation results can always be returned under a
|
574
|
+
# "normal" name in the result set, used in other clauses, etc.
|
575
|
+
#
|
576
|
+
# Everything else is basically identical to SQL. Star ('*') is supported.
|
577
|
+
# Functions are supported. Arbitrary amount of expressions is supported.
|
578
|
+
# Computed expressions can be used for sorting, filtering, and grouping,
|
579
|
+
# just as the regular attributes.
|
580
|
+
#
|
581
|
+
# Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
|
582
|
+
# <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported when using
|
583
|
+
# <tt>GROUP BY</tt>.
|
584
|
+
#
|
585
|
+
# Expression sorting (Section 4.5, “SPH_SORT_EXPR mode”) and geodistance
|
586
|
+
# functions ({#set_geo_anchor}) are now internally implemented
|
587
|
+
# using this computed expressions mechanism, using magic names '<tt>@expr</tt>'
|
588
|
+
# and '<tt>@geodist</tt>' respectively.
|
589
|
+
#
|
590
|
+
# @param [String] select a select clause, listing specific attributes to fetch.
|
591
|
+
# @return [Sphinx::Client] self.
|
592
|
+
#
|
593
|
+
# @example
|
594
|
+
# sphinx.set_select('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight')
|
595
|
+
# sphinx.set_select('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40')
|
596
|
+
# sphinx.set_select('*, AVG(price) AS avgprice')
|
597
|
+
#
|
598
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
599
|
+
#
|
600
|
+
# @see http://www.sphinxsearch.com/docs/current.html#sort-expr Section 4.5, "SPH_SORT_EXPR mode"
|
601
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
|
602
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setselect Section 6.2.4, "SetSelect"
|
603
|
+
#
|
604
|
+
def set_select(select)
|
605
|
+
raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String)
|
606
|
+
|
607
|
+
@select = select
|
608
|
+
self
|
609
|
+
end
|
610
|
+
alias :SetSelect :set_select
|
611
|
+
|
612
|
+
#=================================================================
|
613
|
+
# Full-text search query settings
|
614
|
+
#=================================================================
|
615
|
+
|
616
|
+
# Sets full-text query matching mode.
|
617
|
+
#
|
618
|
+
# Parameter must be a +Fixnum+ constant specifying one of the known modes
|
619
|
+
# (+SPH_MATCH_ALL+, +SPH_MATCH_ANY+, etc), +String+ with identifier (<tt>"all"</tt>,
|
620
|
+
# <tt>"any"</tt>, etc), or a +Symbol+ (<tt>:all</tt>, <tt>:any</tt>, etc).
|
621
|
+
#
|
622
|
+
# @param [Integer, String, Symbol] mode full-text query matching mode.
|
623
|
+
# @return [Sphinx::Client] self.
|
624
|
+
#
|
625
|
+
# @example
|
626
|
+
# sphinx.set_match_mode(Sphinx::SPH_MATCH_ALL)
|
627
|
+
# sphinx.set_match_mode(:all)
|
628
|
+
# sphinx.set_match_mode('all')
|
629
|
+
#
|
630
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
631
|
+
#
|
632
|
+
# @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
|
633
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
|
634
|
+
#
|
635
|
+
def set_match_mode(mode)
|
636
|
+
case mode
|
637
|
+
when String, Symbol
|
638
|
+
begin
|
639
|
+
mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}")
|
640
|
+
rescue NameError
|
641
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
|
642
|
+
end
|
643
|
+
when Fixnum
|
644
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode)
|
645
|
+
else
|
646
|
+
raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
|
647
|
+
end
|
648
|
+
|
649
|
+
@mode = mode
|
650
|
+
self
|
651
|
+
end
|
652
|
+
alias :SetMatchMode :set_match_mode
|
653
|
+
|
654
|
+
# Sets ranking mode. Only available in +SPH_MATCH_EXTENDED2+
|
655
|
+
# matching mode at the time of this writing. Parameter must be a
|
656
|
+
# constant specifying one of the known modes.
|
657
|
+
#
|
658
|
+
# You can specify ranking mode as String ("proximity_bm25", "bm25", etc),
|
659
|
+
# Symbol (:proximity_bm25, :bm25, etc), or
|
660
|
+
# Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc).
|
661
|
+
#
|
662
|
+
# @param [Integer, String, Symbol] ranker ranking mode.
|
663
|
+
# @return [Sphinx::Client] self.
|
664
|
+
#
|
665
|
+
# @example
|
666
|
+
# sphinx.set_ranking_mode(Sphinx::SPH_RANK_BM25)
|
667
|
+
# sphinx.set_ranking_mode(:bm25)
|
668
|
+
# sphinx.set_ranking_mode('bm25')
|
669
|
+
#
|
670
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
671
|
+
#
|
672
|
+
# @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
|
673
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
|
674
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setrankingmode Section 6.3.2, "SetRankingMode"
|
675
|
+
#
|
676
|
+
def set_ranking_mode(ranker)
|
677
|
+
case ranker
|
678
|
+
when String, Symbol
|
679
|
+
begin
|
680
|
+
ranker = self.class.const_get("SPH_RANK_#{ranker.to_s.upcase}")
|
681
|
+
rescue NameError
|
682
|
+
raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid"
|
683
|
+
end
|
684
|
+
when Fixnum
|
685
|
+
raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_FIELDMASK).include?(ranker)
|
686
|
+
else
|
687
|
+
raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol'
|
688
|
+
end
|
689
|
+
|
690
|
+
@ranker = ranker
|
691
|
+
self
|
692
|
+
end
|
693
|
+
alias :SetRankingMode :set_ranking_mode
|
694
|
+
|
695
|
+
# Set matches sorting mode.
|
696
|
+
#
|
697
|
+
# You can specify sorting mode as String ("relevance", "attr_desc", etc),
|
698
|
+
# Symbol (:relevance, :attr_desc, etc), or
|
699
|
+
# Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc).
|
700
|
+
#
|
701
|
+
# @param [Integer, String, Symbol] mode matches sorting mode.
|
702
|
+
# @param [String] sortby sorting clause, with the syntax depending on
|
703
|
+
# specific mode. Should be specified unless sorting mode is
|
704
|
+
# +SPH_SORT_RELEVANCE+.
|
705
|
+
# @return [Sphinx::Client] self.
|
706
|
+
#
|
707
|
+
# @example
|
708
|
+
# sphinx.set_sort_mode(Sphinx::SPH_SORT_ATTR_ASC, 'attr')
|
709
|
+
# sphinx.set_sort_mode(:attr_asc, 'attr')
|
710
|
+
# sphinx.set_sort_mode('attr_asc', 'attr')
|
711
|
+
#
|
712
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
713
|
+
#
|
714
|
+
# @see http://www.sphinxsearch.com/docs/current.html#sorting-modes Section 4.5, "Sorting modes"
|
715
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setsortmode Section 6.3.3, "SetSortMode"
|
716
|
+
#
|
717
|
+
def set_sort_mode(mode, sortby = '')
|
718
|
+
case mode
|
719
|
+
when String, Symbol
|
720
|
+
begin
|
721
|
+
mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}")
|
722
|
+
rescue NameError
|
723
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
|
724
|
+
end
|
725
|
+
when Fixnum
|
726
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode)
|
727
|
+
else
|
728
|
+
raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
|
729
|
+
end
|
730
|
+
|
731
|
+
raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String)
|
732
|
+
raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty?
|
733
|
+
|
734
|
+
@sort = mode
|
735
|
+
@sortby = sortby
|
736
|
+
self
|
737
|
+
end
|
738
|
+
alias :SetSortMode :set_sort_mode
|
739
|
+
|
740
|
+
# Binds per-field weights in the order of appearance in the index.
|
741
|
+
#
|
742
|
+
# @param [Array<Integer>] weights an +Array+ of integer per-field weights.
|
743
|
+
# @return [Sphinx::Client] self.
|
744
|
+
#
|
745
|
+
# @example
|
746
|
+
# sphinx.set_weights([1, 3, 5])
|
747
|
+
#
|
748
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
749
|
+
#
|
750
|
+
# @deprecated Use {#set_field_weights} instead.
|
751
|
+
# @see #set_field_weights
|
752
|
+
#
|
753
|
+
def set_weights(weights)
|
754
|
+
raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array)
|
755
|
+
weights.each do |weight|
|
756
|
+
raise ArgumentError, '"weights" argument must be Array of integers' unless weight.kind_of?(Integer)
|
757
|
+
end
|
758
|
+
|
759
|
+
@weights = weights
|
760
|
+
self
|
761
|
+
end
|
762
|
+
alias :SetWeights :set_weights
|
763
|
+
|
764
|
+
# Binds per-field weights by name. Parameter must be a +Hash+
|
765
|
+
# mapping string field names to integer weights.
|
766
|
+
#
|
767
|
+
# Match ranking can be affected by per-field weights. For instance,
|
768
|
+
# see Section 4.4, "Weighting" for an explanation how phrase
|
769
|
+
# proximity ranking is affected. This call lets you specify what
|
770
|
+
# non-default weights to assign to different full-text fields.
|
771
|
+
#
|
772
|
+
# The weights must be positive 32-bit integers. The final weight
|
773
|
+
# will be a 32-bit integer too. Default weight value is 1. Unknown
|
774
|
+
# field names will be silently ignored.
|
775
|
+
#
|
776
|
+
# There is no enforced limit on the maximum weight value at the
|
777
|
+
# moment. However, beware that if you set it too high you can
|
778
|
+
# start hitting 32-bit wraparound issues. For instance, if
|
779
|
+
# you set a weight of 10,000,000 and search in extended mode,
|
780
|
+
# then maximum possible weight will be equal to 10 million (your
|
781
|
+
# weight) by 1 thousand (internal BM25 scaling factor, see
|
782
|
+
# Section 4.4, “Weighting”) by 1 or more (phrase proximity rank).
|
783
|
+
# The result is at least 10 billion that does not fit in 32 bits
|
784
|
+
# and will be wrapped around, producing unexpected results.
|
785
|
+
#
|
786
|
+
# @param [Hash] weights a +Hash+ mapping string field names to
|
787
|
+
# integer weights.
|
788
|
+
# @return [Sphinx::Client] self.
|
789
|
+
#
|
790
|
+
# @example
|
791
|
+
# sphinx.set_field_weights(:title => 20, :text => 10)
|
792
|
+
#
|
793
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
794
|
+
#
|
795
|
+
# @see http://www.sphinxsearch.com/docs/current.html#weighting Section 4.4, "Weighting"
|
796
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setfieldweights Section 6.3.5, "SetFieldWeights"
|
797
|
+
#
|
798
|
+
def set_field_weights(weights)
|
799
|
+
raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
|
800
|
+
weights.each do |name, weight|
|
801
|
+
unless (name.kind_of?(String) or name.kind_of?(Symbol)) and weight.kind_of?(Integer)
|
802
|
+
raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
|
803
|
+
end
|
804
|
+
end
|
805
|
+
|
806
|
+
@fieldweights = weights
|
807
|
+
self
|
808
|
+
end
|
809
|
+
alias :SetFieldWeights :set_field_weights
|
810
|
+
|
811
|
+
# Sets per-index weights, and enables weighted summing of match
|
812
|
+
# weights across different indexes. Parameter must be a hash
|
813
|
+
# (associative array) mapping string index names to integer
|
814
|
+
# weights. Default is empty array that means to disable weighting
|
815
|
+
# summing.
|
816
|
+
#
|
817
|
+
# When a match with the same document ID is found in several
|
818
|
+
# different local indexes, by default Sphinx simply chooses the
|
819
|
+
# match from the index specified last in the query. This is to
|
820
|
+
# support searching through partially overlapping index partitions.
|
821
|
+
#
|
822
|
+
# However in some cases the indexes are not just partitions,
|
823
|
+
# and you might want to sum the weights across the indexes
|
824
|
+
# instead of picking one. {#set_index_weights} lets you do that.
|
825
|
+
# With summing enabled, final match weight in result set will be
|
826
|
+
# computed as a sum of match weight coming from the given index
|
827
|
+
# multiplied by respective per-index weight specified in this
|
828
|
+
# call. Ie. if the document 123 is found in index A with the
|
829
|
+
# weight of 2, and also in index B with the weight of 3, and
|
830
|
+
# you called {#set_index_weights} with <tt>{"A"=>100, "B"=>10}</tt>,
|
831
|
+
# the final weight return to the client will be 2*100+3*10 = 230.
|
832
|
+
#
|
833
|
+
# @param [Hash] weights a +Hash+ mapping string index names to
|
834
|
+
# integer weights.
|
835
|
+
# @return [Sphinx::Client] self.
|
836
|
+
#
|
837
|
+
# @example
|
838
|
+
# sphinx.set_field_weights(:fresh => 20, :archived => 10)
|
839
|
+
#
|
840
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
841
|
+
#
|
842
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setindexweights Section 6.3.6, "SetIndexWeights"
|
843
|
+
#
|
844
|
+
def set_index_weights(weights)
|
845
|
+
raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
|
846
|
+
weights.each do |index, weight|
|
847
|
+
unless (index.kind_of?(String) or index.kind_of?(Symbol)) and weight.kind_of?(Integer)
|
848
|
+
raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
|
849
|
+
end
|
850
|
+
end
|
851
|
+
|
852
|
+
@indexweights = weights
|
853
|
+
self
|
854
|
+
end
|
855
|
+
alias :SetIndexWeights :set_index_weights
|
856
|
+
|
857
|
+
#=================================================================
|
858
|
+
# Result set filtering settings
|
859
|
+
#=================================================================
|
860
|
+
|
861
|
+
# Sets an accepted range of document IDs. Parameters must be integers.
|
862
|
+
# Defaults are 0 and 0; that combination means to not limit by range.
|
863
|
+
#
|
864
|
+
# After this call, only those records that have document ID between
|
865
|
+
# +min+ and +max+ (including IDs exactly equal to +min+ or +max+)
|
866
|
+
# will be matched.
|
867
|
+
#
|
868
|
+
# @param [Integer] min min document ID.
|
869
|
+
# @param [Integer] min max document ID.
|
870
|
+
# @return [Sphinx::Client] self.
|
871
|
+
#
|
872
|
+
# @example
|
873
|
+
# sphinx.set_id_range(10, 1000)
|
874
|
+
#
|
875
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
876
|
+
#
|
877
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setidrange Section 6.4.1, "SetIDRange"
|
878
|
+
#
|
879
|
+
def set_id_range(min, max)
|
880
|
+
raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
|
881
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
|
882
|
+
raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
|
883
|
+
|
884
|
+
@min_id = min
|
885
|
+
@max_id = max
|
886
|
+
self
|
887
|
+
end
|
888
|
+
alias :SetIDRange :set_id_range
|
889
|
+
|
890
|
+
# Adds new integer values set filter.
|
891
|
+
#
|
892
|
+
# On this call, additional new filter is added to the existing
|
893
|
+
# list of filters. $attribute must be a string with attribute
|
894
|
+
# name. +values+ must be a plain array containing integer
|
895
|
+
# values. +exclude+ must be a boolean value; it controls
|
896
|
+
# whether to accept the matching documents (default mode, when
|
897
|
+
# +exclude+ is +false+) or reject them.
|
898
|
+
#
|
899
|
+
# Only those documents where +attribute+ column value stored in
|
900
|
+
# the index matches any of the values from +values+ array will
|
901
|
+
# be matched (or rejected, if +exclude+ is +true+).
|
902
|
+
#
|
903
|
+
# @param [String, Symbol] attribute an attribute name to filter by.
|
904
|
+
# @param [Array<Integer>, Integer] values an +Array+ of integers or
|
905
|
+
# single Integer with given attribute values.
|
906
|
+
# @param [Boolean] exclude indicating whether documents with given attribute
|
907
|
+
# matching specified values should be excluded from search results.
|
908
|
+
# @return [Sphinx::Client] self.
|
909
|
+
#
|
910
|
+
# @example
|
911
|
+
# sphinx.set_filter(:group_id, [10, 15, 20])
|
912
|
+
# sphinx.set_filter(:group_id, [10, 15, 20], true)
|
913
|
+
#
|
914
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
915
|
+
#
|
916
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilter Section 6.4.2, "SetFilter"
|
917
|
+
# @see #set_filter_range
|
918
|
+
# @see #set_filter_float_range
|
919
|
+
#
|
920
|
+
def set_filter(attribute, values, exclude = false)
|
921
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
922
|
+
values = [values] if values.kind_of?(Integer)
|
923
|
+
raise ArgumentError, '"values" argument must be Array' unless values.kind_of?(Array)
|
924
|
+
raise ArgumentError, '"values" argument must be Array of Integers' unless values.all? { |v| v.kind_of?(Integer) }
|
925
|
+
raise ArgumentError, '"exclude" argument must be Boolean' unless [TrueClass, FalseClass].include?(exclude.class)
|
926
|
+
|
927
|
+
if values.any?
|
928
|
+
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values }
|
929
|
+
end
|
930
|
+
self
|
931
|
+
end
|
932
|
+
alias :SetFilter :set_filter
|
933
|
+
|
934
|
+
# Adds new integer range filter.
|
935
|
+
#
|
936
|
+
# On this call, additional new filter is added to the existing
|
937
|
+
# list of filters. +attribute+ must be a string with attribute
|
938
|
+
# name. +min+ and +max+ must be integers that define the acceptable
|
939
|
+
# attribute values range (including the boundaries). +exclude+
|
940
|
+
# must be a boolean value; it controls whether to accept the
|
941
|
+
# matching documents (default mode, when +exclude+ is false) or
|
942
|
+
# reject them.
|
943
|
+
#
|
944
|
+
# Only those documents where +attribute+ column value stored
|
945
|
+
# in the index is between +min+ and +max+ (including values
|
946
|
+
# that are exactly equal to +min+ or +max+) will be matched
|
947
|
+
# (or rejected, if +exclude+ is true).
|
948
|
+
#
|
949
|
+
# @param [String, Symbol] attribute an attribute name to filter by.
|
950
|
+
# @param [Integer] min min value of the given attribute.
|
951
|
+
# @param [Integer] max max value of the given attribute.
|
952
|
+
# @param [Boolean] exclude indicating whether documents with given attribute
|
953
|
+
# matching specified boundaries should be excluded from search results.
|
954
|
+
# @return [Sphinx::Client] self.
|
955
|
+
#
|
956
|
+
# @example
|
957
|
+
# sphinx.set_filter_range(:group_id, 10, 20)
|
958
|
+
# sphinx.set_filter_range(:group_id, 10, 20, true)
|
959
|
+
#
|
960
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
961
|
+
#
|
962
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterrange Section 6.4.3, "SetFilterRange"
|
963
|
+
# @see #set_filter
|
964
|
+
# @see #set_filter_float_range
|
965
|
+
#
|
966
|
+
def set_filter_range(attribute, min, max, exclude = false)
|
967
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
968
|
+
raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
|
969
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
|
970
|
+
raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
|
971
|
+
raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
|
972
|
+
|
973
|
+
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max }
|
974
|
+
self
|
975
|
+
end
|
976
|
+
alias :SetFilterRange :set_filter_range
|
977
|
+
|
978
|
+
# Adds new float range filter.
|
979
|
+
#
|
980
|
+
# On this call, additional new filter is added to the existing
|
981
|
+
# list of filters. +attribute+ must be a string with attribute name.
|
982
|
+
# +min+ and +max+ must be floats that define the acceptable
|
983
|
+
# attribute values range (including the boundaries). +exclude+ must
|
984
|
+
# be a boolean value; it controls whether to accept the matching
|
985
|
+
# documents (default mode, when +exclude+ is false) or reject them.
|
986
|
+
#
|
987
|
+
# Only those documents where +attribute+ column value stored in
|
988
|
+
# the index is between +min+ and +max+ (including values that are
|
989
|
+
# exactly equal to +min+ or +max+) will be matched (or rejected,
|
990
|
+
# if +exclude+ is true).
|
991
|
+
#
|
992
|
+
# @param [String, Symbol] attribute an attribute name to filter by.
|
993
|
+
# @param [Numeric] min min value of the given attribute.
|
994
|
+
# @param [Numeric] max max value of the given attribute.
|
995
|
+
# @param [Boolean] exclude indicating whether documents with given attribute
|
996
|
+
# matching specified boundaries should be excluded from search results.
|
997
|
+
# @return [Sphinx::Client] self.
|
998
|
+
#
|
999
|
+
# @example
|
1000
|
+
# sphinx.set_filter_float_range(:group_id, 10.5, 20)
|
1001
|
+
# sphinx.set_filter_float_range(:group_id, 10.5, 20, true)
|
1002
|
+
#
|
1003
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1004
|
+
#
|
1005
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterfloatrange Section 6.4.4, "SetFilterFloatRange"
|
1006
|
+
# @see #set_filter
|
1007
|
+
# @see #set_filter_range
|
1008
|
+
#
|
1009
|
+
def set_filter_float_range(attribute, min, max, exclude = false)
|
1010
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
1011
|
+
raise ArgumentError, '"min" argument must be Numeric' unless min.kind_of?(Numeric)
|
1012
|
+
raise ArgumentError, '"max" argument must be Numeric' unless max.kind_of?(Numeric)
|
1013
|
+
raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
|
1014
|
+
raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
|
1015
|
+
|
1016
|
+
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f }
|
1017
|
+
self
|
1018
|
+
end
|
1019
|
+
alias :SetFilterFloatRange :set_filter_float_range
|
1020
|
+
|
1021
|
+
# Sets anchor point for and geosphere distance (geodistance)
|
1022
|
+
# calculations, and enable them.
|
1023
|
+
#
|
1024
|
+
# +attrlat+ and +attrlong+ must be strings that contain the names
|
1025
|
+
# of latitude and longitude attributes, respectively. +lat+ and
|
1026
|
+
# +long+ are floats that specify anchor point latitude and
|
1027
|
+
# longitude, in radians.
|
1028
|
+
#
|
1029
|
+
# Once an anchor point is set, you can use magic <tt>"@geodist"</tt>
|
1030
|
+
# attribute name in your filters and/or sorting expressions.
|
1031
|
+
# Sphinx will compute geosphere distance between the given anchor
|
1032
|
+
# point and a point specified by latitude and lognitude attributes
|
1033
|
+
# from each full-text match, and attach this value to the resulting
|
1034
|
+
# match. The latitude and longitude values both in {#set_geo_anchor}
|
1035
|
+
# and the index attribute data are expected to be in radians.
|
1036
|
+
# The result will be returned in meters, so geodistance value of
|
1037
|
+
# 1000.0 means 1 km. 1 mile is approximately 1609.344 meters.
|
1038
|
+
#
|
1039
|
+
# @param [String, Symbol] attrlat a name of latitude attribute.
|
1040
|
+
# @param [String, Symbol] attrlong a name of longitude attribute.
|
1041
|
+
# @param [Numeric] lat an anchor point latitude, in radians.
|
1042
|
+
# @param [Numeric] long an anchor point longitude, in radians.
|
1043
|
+
# @return [Sphinx::Client] self.
|
1044
|
+
#
|
1045
|
+
# @example
|
1046
|
+
# sphinx.set_geo_anchor(:latitude, :longitude, 192.5, 143.5)
|
1047
|
+
#
|
1048
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1049
|
+
#
|
1050
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
|
1051
|
+
#
|
1052
|
+
def set_geo_anchor(attrlat, attrlong, lat, long)
|
1053
|
+
raise ArgumentError, '"attrlat" argument must be String or Symbol' unless attrlat.kind_of?(String) or attrlat.kind_of?(Symbol)
|
1054
|
+
raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol)
|
1055
|
+
raise ArgumentError, '"lat" argument must be Numeric' unless lat.kind_of?(Numeric)
|
1056
|
+
raise ArgumentError, '"long" argument must be Numeric' unless long.kind_of?(Numeric)
|
1057
|
+
|
1058
|
+
@anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f }
|
1059
|
+
self
|
1060
|
+
end
|
1061
|
+
alias :SetGeoAnchor :set_geo_anchor
|
1062
|
+
|
1063
|
+
#=================================================================
|
1064
|
+
# GROUP BY settings
|
1065
|
+
#=================================================================
|
1066
|
+
|
1067
|
+
# Sets grouping attribute, function, and groups sorting mode; and
|
1068
|
+
# enables grouping (as described in Section 4.6, "Grouping (clustering) search results").
|
1069
|
+
#
|
1070
|
+
# +attribute+ is a string that contains group-by attribute name.
|
1071
|
+
# +func+ is a constant that chooses a function applied to the
|
1072
|
+
# attribute value in order to compute group-by key. +groupsort+
|
1073
|
+
# is a clause that controls how the groups will be sorted. Its
|
1074
|
+
# syntax is similar to that described in Section 4.5,
|
1075
|
+
# "SPH_SORT_EXTENDED mode".
|
1076
|
+
#
|
1077
|
+
# Grouping feature is very similar in nature to <tt>GROUP BY</tt> clause
|
1078
|
+
# from SQL. Results produces by this function call are going to
|
1079
|
+
# be the same as produced by the following pseudo code:
|
1080
|
+
#
|
1081
|
+
# SELECT ... GROUP BY func(attribute) ORDER BY groupsort
|
1082
|
+
#
|
1083
|
+
# Note that it's +groupsort+ that affects the order of matches in
|
1084
|
+
# the final result set. Sorting mode (see {#set_sort_mode}) affect
|
1085
|
+
# the ordering of matches within group, ie. what match will be
|
1086
|
+
# selected as the best one from the group. So you can for instance
|
1087
|
+
# order the groups by matches count and select the most relevant
|
1088
|
+
# match within each group at the same time.
|
1089
|
+
#
|
1090
|
+
# Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
|
1091
|
+
# <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported
|
1092
|
+
# through {#set_select} API call when using <tt>GROUP BY</tt>.
|
1093
|
+
#
|
1094
|
+
# You can specify group function and attribute as String
|
1095
|
+
# ("attr", "day", etc), Symbol (:attr, :day, etc), or
|
1096
|
+
# Fixnum constant (SPH_GROUPBY_ATTR, SPH_GROUPBY_DAY, etc).
|
1097
|
+
#
|
1098
|
+
# @param [String, Symbol] attribute an attribute name to group by.
|
1099
|
+
# @param [Integer, String, Symbol] func a grouping function.
|
1100
|
+
# @param [String] groupsort a groups sorting mode.
|
1101
|
+
# @return [Sphinx::Client] self.
|
1102
|
+
#
|
1103
|
+
# @example
|
1104
|
+
# sphinx.set_group_by(:tag_id, :attr)
|
1105
|
+
#
|
1106
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1107
|
+
#
|
1108
|
+
# @see http://www.sphinxsearch.com/docs/current.html#clustering Section 4.6, "Grouping (clustering) search results"
|
1109
|
+
# @see http://www.sphinxsearch.com/docs/current.html#sort-extended Section 4.5, "SPH_SORT_EXTENDED mode"
|
1110
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupby Section 6.5.1, "SetGroupBy"
|
1111
|
+
# @see #set_sort_mode
|
1112
|
+
# @see #set_select
|
1113
|
+
# @see #set_group_distinct
|
1114
|
+
#
|
1115
|
+
def set_group_by(attribute, func, groupsort = '@group desc')
|
1116
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
1117
|
+
raise ArgumentError, '"groupsort" argument must be String' unless groupsort.kind_of?(String)
|
1118
|
+
|
1119
|
+
case func
|
1120
|
+
when String, Symbol
|
1121
|
+
begin
|
1122
|
+
func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}")
|
1123
|
+
rescue NameError
|
1124
|
+
raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid"
|
1125
|
+
end
|
1126
|
+
when Fixnum
|
1127
|
+
raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func)
|
1128
|
+
else
|
1129
|
+
raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol'
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
@groupby = attribute.to_s
|
1133
|
+
@groupfunc = func
|
1134
|
+
@groupsort = groupsort
|
1135
|
+
self
|
1136
|
+
end
|
1137
|
+
alias :SetGroupBy :set_group_by
|
1138
|
+
|
1139
|
+
# Sets attribute name for per-group distinct values count
|
1140
|
+
# calculations. Only available for grouping queries.
|
1141
|
+
#
|
1142
|
+
# +attribute+ is a string that contains the attribute name. For
|
1143
|
+
# each group, all values of this attribute will be stored (as
|
1144
|
+
# RAM limits permit), then the amount of distinct values will
|
1145
|
+
# be calculated and returned to the client. This feature is
|
1146
|
+
# similar to <tt>COUNT(DISTINCT)</tt> clause in standard SQL;
|
1147
|
+
# so these Sphinx calls:
|
1148
|
+
#
|
1149
|
+
# sphinx.set_group_by(:category, :attr, '@count desc')
|
1150
|
+
# sphinx.set_group_distinct(:vendor)
|
1151
|
+
#
|
1152
|
+
# can be expressed using the following SQL clauses:
|
1153
|
+
#
|
1154
|
+
# SELECT id, weight, all-attributes,
|
1155
|
+
# COUNT(DISTINCT vendor) AS @distinct,
|
1156
|
+
# COUNT(*) AS @count
|
1157
|
+
# FROM products
|
1158
|
+
# GROUP BY category
|
1159
|
+
# ORDER BY @count DESC
|
1160
|
+
#
|
1161
|
+
# In the sample pseudo code shown just above, {#set_group_distinct}
|
1162
|
+
# call corresponds to <tt>COUNT(DISINCT vendor)</tt> clause only.
|
1163
|
+
# <tt>GROUP BY</tt>, <tt>ORDER BY</tt>, and <tt>COUNT(*)</tt>
|
1164
|
+
# clauses are all an equivalent of {#set_group_by} settings. Both
|
1165
|
+
# queries will return one matching row for each category. In
|
1166
|
+
# addition to indexed attributes, matches will also contain
|
1167
|
+
# total per-category matches count, and the count of distinct
|
1168
|
+
# vendor IDs within each category.
|
1169
|
+
#
|
1170
|
+
# @param [String, Symbol] attribute an attribute name.
|
1171
|
+
# @return [Sphinx::Client] self.
|
1172
|
+
#
|
1173
|
+
# @example
|
1174
|
+
# sphinx.set_group_distinct(:category_id)
|
1175
|
+
#
|
1176
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1177
|
+
#
|
1178
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupdistinct Section 6.5.2, "SetGroupDistinct"
|
1179
|
+
# @see #set_group_by
|
1180
|
+
#
|
1181
|
+
def set_group_distinct(attribute)
|
1182
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
1183
|
+
|
1184
|
+
@groupdistinct = attribute.to_s
|
1185
|
+
self
|
1186
|
+
end
|
1187
|
+
alias :SetGroupDistinct :set_group_distinct
|
1188
|
+
|
1189
|
+
#=================================================================
|
1190
|
+
# Querying
|
1191
|
+
#=================================================================
|
1192
|
+
|
1193
|
+
# Clears all currently set filters.
|
1194
|
+
#
|
1195
|
+
# This call is only normally required when using multi-queries. You might want
|
1196
|
+
# to set different filters for different queries in the batch. To do that,
|
1197
|
+
# you should call {#reset_filters} and add new filters using the respective calls.
|
1198
|
+
#
|
1199
|
+
# @return [Sphinx::Client] self.
|
1200
|
+
#
|
1201
|
+
# @example
|
1202
|
+
# sphinx.reset_filters
|
1203
|
+
#
|
1204
|
+
# @see #set_filter
|
1205
|
+
# @see #set_filter_range
|
1206
|
+
# @see #set_filter_float_range
|
1207
|
+
# @see #set_geo_anchor
|
1208
|
+
#
|
1209
|
+
def reset_filters
|
1210
|
+
@filters = []
|
1211
|
+
@anchor = []
|
1212
|
+
self
|
1213
|
+
end
|
1214
|
+
alias :ResetFilters :reset_filters
|
1215
|
+
|
1216
|
+
# Clears all currently group-by settings, and disables group-by.
|
1217
|
+
#
|
1218
|
+
# This call is only normally required when using multi-queries. You can
|
1219
|
+
# change individual group-by settings using {#set_group_by} and {#set_group_distinct}
|
1220
|
+
# calls, but you can not disable group-by using those calls. {#reset_group_by}
|
1221
|
+
# fully resets previous group-by settings and disables group-by mode in the
|
1222
|
+
# current state, so that subsequent {#add_query} calls can perform non-grouping
|
1223
|
+
# searches.
|
1224
|
+
#
|
1225
|
+
# @return [Sphinx::Client] self.
|
1226
|
+
#
|
1227
|
+
# @example
|
1228
|
+
# sphinx.reset_group_by
|
1229
|
+
#
|
1230
|
+
# @see #set_group_by
|
1231
|
+
# @see #set_group_distinct
|
1232
|
+
#
|
1233
|
+
def reset_group_by
|
1234
|
+
@groupby = ''
|
1235
|
+
@groupfunc = SPH_GROUPBY_DAY
|
1236
|
+
@groupsort = '@group desc'
|
1237
|
+
@groupdistinct = ''
|
1238
|
+
self
|
1239
|
+
end
|
1240
|
+
alias :ResetGroupBy :reset_group_by
|
1241
|
+
|
1242
|
+
# Clear all attribute value overrides (for multi-queries).
|
1243
|
+
#
|
1244
|
+
# This call is only normally required when using multi-queries. You might want
|
1245
|
+
# to set field overrides for different queries in the batch. To do that,
|
1246
|
+
# you should call {#reset_overrides} and add new overrides using the
|
1247
|
+
# respective calls.
|
1248
|
+
#
|
1249
|
+
# @return [Sphinx::Client] self.
|
1250
|
+
#
|
1251
|
+
# @example
|
1252
|
+
# sphinx.reset_overrides
|
1253
|
+
#
|
1254
|
+
# @see #set_override
|
1255
|
+
#
|
1256
|
+
def reset_overrides
|
1257
|
+
@overrides = []
|
1258
|
+
self
|
1259
|
+
end
|
1260
|
+
alias :ResetOverrides :reset_overrides
|
1261
|
+
|
1262
|
+
# Connects to searchd server, runs given search query with
|
1263
|
+
# current settings, obtains and returns the result set.
|
1264
|
+
#
|
1265
|
+
# +query+ is a query string. +index+ is an index name (or names)
|
1266
|
+
# string. Returns false and sets {#last_error} message on general
|
1267
|
+
# error. Returns search result set on success. Additionally,
|
1268
|
+
# the contents of +comment+ are sent to the query log, marked in
|
1269
|
+
# square brackets, just before the search terms, which can be very
|
1270
|
+
# useful for debugging. Currently, the comment is limited to 128
|
1271
|
+
# characters.
|
1272
|
+
#
|
1273
|
+
# Default value for +index+ is <tt>"*"</tt> that means to query
|
1274
|
+
# all local indexes. Characters allowed in index names include
|
1275
|
+
# Latin letters (a-z), numbers (0-9), minus sign (-), and
|
1276
|
+
# underscore (_); everything else is considered a separator.
|
1277
|
+
# Therefore, all of the following samples calls are valid and
|
1278
|
+
# will search the same two indexes:
|
1279
|
+
#
|
1280
|
+
# sphinx.query('test query', 'main delta')
|
1281
|
+
# sphinx.query('test query', 'main;delta')
|
1282
|
+
# sphinx.query('test query', 'main, delta');
|
1283
|
+
#
|
1284
|
+
# Index specification order matters. If document with identical
|
1285
|
+
# IDs are found in two or more indexes, weight and attribute
|
1286
|
+
# values from the very last matching index will be used for
|
1287
|
+
# sorting and returning to client (unless explicitly overridden
|
1288
|
+
# with {#set_index_weights}). Therefore, in the example above,
|
1289
|
+
# matches from "delta" index will always win over matches
|
1290
|
+
# from "main".
|
1291
|
+
#
|
1292
|
+
# On success, {#query} returns a result set that contains some
|
1293
|
+
# of the found matches (as requested by {#set_limits}) and
|
1294
|
+
# additional general per-query statistics. The result set
|
1295
|
+
# is an +Hash+ with the following keys and values:
|
1296
|
+
#
|
1297
|
+
# <tt>"matches"</tt>::
|
1298
|
+
# Array with small +Hash+es containing document weight and
|
1299
|
+
# attribute values.
|
1300
|
+
# <tt>"total"</tt>::
|
1301
|
+
# Total amount of matches retrieved on server (ie. to the server
|
1302
|
+
# side result set) by this query. You can retrieve up to this
|
1303
|
+
# amount of matches from server for this query text with current
|
1304
|
+
# query settings.
|
1305
|
+
# <tt>"total_found"</tt>::
|
1306
|
+
# Total amount of matching documents in index (that were found
|
1307
|
+
# and procesed on server).
|
1308
|
+
# <tt>"words"</tt>::
|
1309
|
+
# Hash which maps query keywords (case-folded, stemmed, and
|
1310
|
+
# otherwise processed) to a small Hash with per-keyword statitics
|
1311
|
+
# ("docs", "hits").
|
1312
|
+
# <tt>"error"</tt>::
|
1313
|
+
# Query error message reported by searchd (string, human readable).
|
1314
|
+
# Empty if there were no errors.
|
1315
|
+
# <tt>"warning"</tt>::
|
1316
|
+
# Query warning message reported by searchd (string, human readable).
|
1317
|
+
# Empty if there were no warnings.
|
1318
|
+
#
|
1319
|
+
# Please note: you can use both strings and symbols as <tt>Hash</tt> keys.
|
1320
|
+
#
|
1321
|
+
# It should be noted that {#query} carries out the same actions as
|
1322
|
+
# {#add_query} and {#run_queries} without the intermediate steps; it
|
1323
|
+
# is analoguous to a single {#add_query} call, followed by a
|
1324
|
+
# corresponding {#run_queries}, then returning the first array
|
1325
|
+
# element of matches (from the first, and only, query.)
|
1326
|
+
#
|
1327
|
+
# @param [String] query a query string.
|
1328
|
+
# @param [String] index an index name (or names).
|
1329
|
+
# @param [String] comment a comment to be sent to the query log.
|
1330
|
+
# @return [Hash, false] result set described above or +false+ on error.
|
1331
|
+
# @yield [Client] yields just before query performing. Useful to set
|
1332
|
+
# filters or sortings. When block does not accept any parameters, it
|
1333
|
+
# will be eval'ed inside {Client} instance itself. In this case you
|
1334
|
+
# can omit +set_+ prefix for configuration methods.
|
1335
|
+
# @yieldparam [Client] sphinx self.
|
1336
|
+
#
|
1337
|
+
# @example Regular query with previously set filters
|
1338
|
+
# sphinx.query('some search text', '*', 'search page')
|
1339
|
+
# @example Query with block
|
1340
|
+
# sphinx.query('test') do |sphinx|
|
1341
|
+
# sphinx.set_match_mode :all
|
1342
|
+
# sphinx.set_id_range 10, 100
|
1343
|
+
# end
|
1344
|
+
# @example Query with instant filters configuring
|
1345
|
+
# sphinx.query('test') do
|
1346
|
+
# match_mode :all
|
1347
|
+
# id_range 10, 100
|
1348
|
+
# end
|
1349
|
+
#
|
1350
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-query Section 6.6.1, "Query"
|
1351
|
+
# @see #add_query
|
1352
|
+
# @see #run_queries
|
1353
|
+
#
|
1354
|
+
def query(query, index = '*', comment = '', &block)
|
1355
|
+
@reqs = []
|
1356
|
+
|
1357
|
+
if block_given?
|
1358
|
+
if block.arity > 0
|
1359
|
+
yield self
|
1360
|
+
else
|
1361
|
+
begin
|
1362
|
+
@inside_eval = true
|
1363
|
+
instance_eval(&block)
|
1364
|
+
ensure
|
1365
|
+
@inside_eval = false
|
1366
|
+
end
|
1367
|
+
end
|
1368
|
+
end
|
1369
|
+
|
1370
|
+
logger.debug { "[sphinx] query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if logger
|
1371
|
+
|
1372
|
+
self.add_query(query, index, comment, false)
|
1373
|
+
results = self.run_queries
|
1374
|
+
|
1375
|
+
# probably network error; error message should be already filled
|
1376
|
+
return false unless results.instance_of?(Array)
|
1377
|
+
|
1378
|
+
@error = results[0]['error']
|
1379
|
+
@warning = results[0]['warning']
|
1380
|
+
|
1381
|
+
return false if results[0]['status'] == SEARCHD_ERROR
|
1382
|
+
return results[0]
|
1383
|
+
end
|
1384
|
+
alias :Query :query
|
1385
|
+
|
1386
|
+
# Adds additional query with current settings to multi-query batch.
|
1387
|
+
# +query+ is a query string. +index+ is an index name (or names)
|
1388
|
+
# string. Additionally if provided, the contents of +comment+ are
|
1389
|
+
# sent to the query log, marked in square brackets, just before
|
1390
|
+
# the search terms, which can be very useful for debugging.
|
1391
|
+
# Currently, this is limited to 128 characters. Returns index
|
1392
|
+
# to results array returned from {#run_queries}.
|
1393
|
+
#
|
1394
|
+
# Batch queries (or multi-queries) enable searchd to perform
|
1395
|
+
# internal optimizations if possible. They also reduce network
|
1396
|
+
# connection overheads and search process creation overheads in all
|
1397
|
+
# cases. They do not result in any additional overheads compared
|
1398
|
+
# to simple queries. Thus, if you run several different queries
|
1399
|
+
# from your web page, you should always consider using multi-queries.
|
1400
|
+
#
|
1401
|
+
# For instance, running the same full-text query but with different
|
1402
|
+
# sorting or group-by settings will enable searchd to perform
|
1403
|
+
# expensive full-text search and ranking operation only once, but
|
1404
|
+
# compute multiple group-by results from its output.
|
1405
|
+
#
|
1406
|
+
# This can be a big saver when you need to display not just plain
|
1407
|
+
# search results but also some per-category counts, such as the
|
1408
|
+
# amount of products grouped by vendor. Without multi-query, you
|
1409
|
+
# would have to run several queries which perform essentially the
|
1410
|
+
# same search and retrieve the same matches, but create result
|
1411
|
+
# sets differently. With multi-query, you simply pass all these
|
1412
|
+
# queries in a single batch and Sphinx optimizes the redundant
|
1413
|
+
# full-text search internally.
|
1414
|
+
#
|
1415
|
+
# {#add_query} internally saves full current settings state along
|
1416
|
+
# with the query, and you can safely change them afterwards for
|
1417
|
+
# subsequent {#add_query} calls. Already added queries will not
|
1418
|
+
# be affected; there's actually no way to change them at all.
|
1419
|
+
# Here's an example:
|
1420
|
+
#
|
1421
|
+
# sphinx.set_sort_mode(:relevance)
|
1422
|
+
# sphinx.add_query("hello world", "documents")
|
1423
|
+
#
|
1424
|
+
# sphinx.set_sort_mode(:attr_desc, :price)
|
1425
|
+
# sphinx.add_query("ipod", "products")
|
1426
|
+
#
|
1427
|
+
# sphinx.add_query("harry potter", "books")
|
1428
|
+
#
|
1429
|
+
# results = sphinx.run_queries
|
1430
|
+
#
|
1431
|
+
# With the code above, 1st query will search for "hello world"
|
1432
|
+
# in "documents" index and sort results by relevance, 2nd query
|
1433
|
+
# will search for "ipod" in "products" index and sort results
|
1434
|
+
# by price, and 3rd query will search for "harry potter" in
|
1435
|
+
# "books" index while still sorting by price. Note that 2nd
|
1436
|
+
# {#set_sort_mode} call does not affect the first query (because
|
1437
|
+
# it's already added) but affects both other subsequent queries.
|
1438
|
+
#
|
1439
|
+
# Additionally, any filters set up before an {#add_query} will
|
1440
|
+
# fall through to subsequent queries. So, if {#set_filter} is
|
1441
|
+
# called before the first query, the same filter will be in
|
1442
|
+
# place for the second (and subsequent) queries batched through
|
1443
|
+
# {#add_query} unless you call {#reset_filters} first. Alternatively,
|
1444
|
+
# you can add additional filters as well.
|
1445
|
+
#
|
1446
|
+
# This would also be true for grouping options and sorting options;
|
1447
|
+
# no current sorting, filtering, and grouping settings are affected
|
1448
|
+
# by this call; so subsequent queries will reuse current query settings.
|
1449
|
+
#
|
1450
|
+
# {#add_query} returns an index into an array of results that will
|
1451
|
+
# be returned from {#run_queries} call. It is simply a sequentially
|
1452
|
+
# increasing 0-based integer, ie. first call will return 0, second
|
1453
|
+
# will return 1, and so on. Just a small helper so you won't have
|
1454
|
+
# to track the indexes manualy if you need then.
|
1455
|
+
#
|
1456
|
+
# @param [String] query a query string.
|
1457
|
+
# @param [String] index an index name (or names).
|
1458
|
+
# @param [String] comment a comment to be sent to the query log.
|
1459
|
+
# @param [Boolean] log indicating whether this call should be logged.
|
1460
|
+
# @return [Integer] an index into an array of results that will
|
1461
|
+
# be returned from {#run_queries} call.
|
1462
|
+
#
|
1463
|
+
# @example
|
1464
|
+
# sphinx.add_query('some search text', '*', 'search page')
|
1465
|
+
#
|
1466
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-addquery Section 6.6.2, "AddQuery"
|
1467
|
+
# @see #query
|
1468
|
+
# @see #run_queries
|
1469
|
+
#
|
1470
|
+
def add_query(query, index = '*', comment = '', log = true)
|
1471
|
+
logger.debug { "[sphinx] add_query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if log and logger
|
1472
|
+
# build request
|
1473
|
+
|
1474
|
+
# mode and limits
|
1475
|
+
request = Request.new
|
1476
|
+
request.put_int @offset, @limit, @mode, @ranker, @sort
|
1477
|
+
request.put_string @sortby
|
1478
|
+
# query itself
|
1479
|
+
request.put_string query
|
1480
|
+
# weights
|
1481
|
+
request.put_int_array @weights
|
1482
|
+
# indexes
|
1483
|
+
request.put_string index
|
1484
|
+
# id64 range marker
|
1485
|
+
request.put_int 1
|
1486
|
+
# id64 range
|
1487
|
+
request.put_int64 @min_id.to_i, @max_id.to_i
|
1488
|
+
|
1489
|
+
# filters
|
1490
|
+
request.put_int @filters.length
|
1491
|
+
@filters.each do |filter|
|
1492
|
+
request.put_string filter['attr']
|
1493
|
+
request.put_int filter['type']
|
1494
|
+
|
1495
|
+
case filter['type']
|
1496
|
+
when SPH_FILTER_VALUES
|
1497
|
+
request.put_int64_array filter['values']
|
1498
|
+
when SPH_FILTER_RANGE
|
1499
|
+
request.put_int64 filter['min'], filter['max']
|
1500
|
+
when SPH_FILTER_FLOATRANGE
|
1501
|
+
request.put_float filter['min'], filter['max']
|
1502
|
+
else
|
1503
|
+
raise SphinxInternalError, 'Internal error: unhandled filter type'
|
1504
|
+
end
|
1505
|
+
request.put_int filter['exclude'] ? 1 : 0
|
1506
|
+
end
|
1507
|
+
|
1508
|
+
# group-by clause, max-matches count, group-sort clause, cutoff count
|
1509
|
+
request.put_int @groupfunc
|
1510
|
+
request.put_string @groupby
|
1511
|
+
request.put_int @maxmatches
|
1512
|
+
request.put_string @groupsort
|
1513
|
+
request.put_int @cutoff, @retrycount, @retrydelay
|
1514
|
+
request.put_string @groupdistinct
|
1515
|
+
|
1516
|
+
# anchor point
|
1517
|
+
if @anchor.empty?
|
1518
|
+
request.put_int 0
|
1519
|
+
else
|
1520
|
+
request.put_int 1
|
1521
|
+
request.put_string @anchor['attrlat'], @anchor['attrlong']
|
1522
|
+
request.put_float @anchor['lat'], @anchor['long']
|
1523
|
+
end
|
1524
|
+
|
1525
|
+
# per-index weights
|
1526
|
+
request.put_int @indexweights.length
|
1527
|
+
@indexweights.each do |idx, weight|
|
1528
|
+
request.put_string idx.to_s
|
1529
|
+
request.put_int weight
|
1530
|
+
end
|
1531
|
+
|
1532
|
+
# max query time
|
1533
|
+
request.put_int @maxquerytime
|
1534
|
+
|
1535
|
+
# per-field weights
|
1536
|
+
request.put_int @fieldweights.length
|
1537
|
+
@fieldweights.each do |field, weight|
|
1538
|
+
request.put_string field.to_s
|
1539
|
+
request.put_int weight
|
1540
|
+
end
|
1541
|
+
|
1542
|
+
# comment
|
1543
|
+
request.put_string comment
|
1544
|
+
|
1545
|
+
# attribute overrides
|
1546
|
+
request.put_int @overrides.length
|
1547
|
+
for entry in @overrides do
|
1548
|
+
request.put_string entry['attr']
|
1549
|
+
request.put_int entry['type'], entry['values'].size
|
1550
|
+
entry['values'].each do |id, val|
|
1551
|
+
request.put_int64 id
|
1552
|
+
case entry['type']
|
1553
|
+
when SPH_ATTR_FLOAT
|
1554
|
+
request.put_float val.to_f
|
1555
|
+
when SPH_ATTR_BIGINT
|
1556
|
+
request.put_int64 val.to_i
|
1557
|
+
else
|
1558
|
+
request.put_int val.to_i
|
1559
|
+
end
|
1560
|
+
end
|
1561
|
+
end
|
1562
|
+
|
1563
|
+
# select-list
|
1564
|
+
request.put_string @select
|
1565
|
+
|
1566
|
+
# store request to requests array
|
1567
|
+
@reqs << request.to_s;
|
1568
|
+
return @reqs.length - 1
|
1569
|
+
end
|
1570
|
+
alias :AddQuery :add_query
|
1571
|
+
|
1572
|
+
# Connect to searchd, runs a batch of all queries added using
|
1573
|
+
# {#add_query}, obtains and returns the result sets. Returns
|
1574
|
+
# +false+ and sets {#last_error} message on general error
|
1575
|
+
# (such as network I/O failure). Returns a plain array of
|
1576
|
+
# result sets on success.
|
1577
|
+
#
|
1578
|
+
# Each result set in the returned array is exactly the same as
|
1579
|
+
# the result set returned from {#query}.
|
1580
|
+
#
|
1581
|
+
# Note that the batch query request itself almost always succeds —
|
1582
|
+
# unless there's a network error, blocking index rotation in
|
1583
|
+
# progress, or another general failure which prevents the whole
|
1584
|
+
# request from being processed.
|
1585
|
+
#
|
1586
|
+
# However individual queries within the batch might very well
|
1587
|
+
# fail. In this case their respective result sets will contain
|
1588
|
+
# non-empty "error" message, but no matches or query statistics.
|
1589
|
+
# In the extreme case all queries within the batch could fail.
|
1590
|
+
# There still will be no general error reported, because API
|
1591
|
+
# was able to succesfully connect to searchd, submit the batch,
|
1592
|
+
# and receive the results — but every result set will have a
|
1593
|
+
# specific error message.
|
1594
|
+
#
|
1595
|
+
# @return [Array<Hash>] an +Array+ of +Hash+es which are exactly
|
1596
|
+
# the same as the result set returned from {#query}.
|
1597
|
+
#
|
1598
|
+
# @example
|
1599
|
+
# sphinx.add_query('some search text', '*', 'search page')
|
1600
|
+
# results = sphinx.run_queries
|
1601
|
+
#
|
1602
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-runqueries Section 6.6.3, "RunQueries"
|
1603
|
+
# @see #add_query
|
1604
|
+
#
|
1605
|
+
def run_queries
|
1606
|
+
logger.debug { "[sphinx] run_queries(#{@reqs.length} queries)" } if logger
|
1607
|
+
if @reqs.empty?
|
1608
|
+
@error = 'No queries defined, issue add_query() first'
|
1609
|
+
return false
|
1610
|
+
end
|
1611
|
+
|
1612
|
+
reqs, nreqs = @reqs.join(''), @reqs.length
|
1613
|
+
@reqs = []
|
1614
|
+
response = perform_request(:search, reqs, nreqs)
|
1615
|
+
|
1616
|
+
# parse response
|
1617
|
+
(1..nreqs).map do
|
1618
|
+
result = HashWithIndifferentAccess.new('error' => '', 'warning' => '')
|
1619
|
+
|
1620
|
+
# extract status
|
1621
|
+
status = result['status'] = response.get_int
|
1622
|
+
if status != SEARCHD_OK
|
1623
|
+
message = response.get_string
|
1624
|
+
if status == SEARCHD_WARNING
|
1625
|
+
result['warning'] = message
|
1626
|
+
else
|
1627
|
+
result['error'] = message
|
1628
|
+
next result
|
1629
|
+
end
|
1630
|
+
end
|
1631
|
+
|
1632
|
+
# read schema
|
1633
|
+
nfields = response.get_int
|
1634
|
+
result['fields'] = (1..nfields).map { response.get_string }
|
1635
|
+
|
1636
|
+
attrs_names_in_order = []
|
1637
|
+
nattrs = response.get_int
|
1638
|
+
attrs = (1..nattrs).inject({}) do |hash, idx|
|
1639
|
+
name, type = response.get_string, response.get_int
|
1640
|
+
hash[name] = type
|
1641
|
+
attrs_names_in_order << name
|
1642
|
+
hash
|
1643
|
+
end
|
1644
|
+
result['attrs'] = attrs
|
1645
|
+
|
1646
|
+
# read match count
|
1647
|
+
count, id64 = response.get_ints(2)
|
1648
|
+
|
1649
|
+
# read matches
|
1650
|
+
result['matches'] = (1..count).map do
|
1651
|
+
doc, weight = if id64 == 0
|
1652
|
+
response.get_ints(2)
|
1653
|
+
else
|
1654
|
+
[response.get_int64, response.get_int]
|
1655
|
+
end
|
1656
|
+
|
1657
|
+
# This is a single result put in the result['matches'] array
|
1658
|
+
match = { 'id' => doc, 'weight' => weight }
|
1659
|
+
match['attrs'] = attrs_names_in_order.inject({}) do |hash, name|
|
1660
|
+
hash[name] = case attrs[name]
|
1661
|
+
when SPH_ATTR_BIGINT
|
1662
|
+
# handle 64-bit ints
|
1663
|
+
response.get_int64
|
1664
|
+
when SPH_ATTR_FLOAT
|
1665
|
+
# handle floats
|
1666
|
+
response.get_float
|
1667
|
+
else
|
1668
|
+
# handle everything else as unsigned ints
|
1669
|
+
val = response.get_int
|
1670
|
+
if (attrs[name] & SPH_ATTR_MULTI) != 0
|
1671
|
+
(1..val).map { response.get_int }
|
1672
|
+
else
|
1673
|
+
val
|
1674
|
+
end
|
1675
|
+
end
|
1676
|
+
hash
|
1677
|
+
end
|
1678
|
+
match
|
1679
|
+
end
|
1680
|
+
result['total'], result['total_found'], msecs = response.get_ints(3)
|
1681
|
+
result['time'] = '%.3f' % (msecs / 1000.0)
|
1682
|
+
|
1683
|
+
nwords = response.get_int
|
1684
|
+
result['words'] = (1..nwords).inject({}) do |hash, idx|
|
1685
|
+
word = response.get_string
|
1686
|
+
docs, hits = response.get_ints(2)
|
1687
|
+
hash[word] = { 'docs' => docs, 'hits' => hits }
|
1688
|
+
hash
|
1689
|
+
end
|
1690
|
+
|
1691
|
+
result
|
1692
|
+
end
|
1693
|
+
end
|
1694
|
+
alias :RunQueries :run_queries
|
1695
|
+
|
1696
|
+
#=================================================================
|
1697
|
+
# Additional functionality
|
1698
|
+
#=================================================================
|
1699
|
+
|
1700
|
+
# Excerpts (snippets) builder function. Connects to searchd, asks
|
1701
|
+
# it to generate excerpts (snippets) from given documents, and
|
1702
|
+
# returns the results.
|
1703
|
+
#
|
1704
|
+
# +docs+ is a plain array of strings that carry the documents'
|
1705
|
+
# contents. +index+ is an index name string. Different settings
|
1706
|
+
# (such as charset, morphology, wordforms) from given index will
|
1707
|
+
# be used. +words+ is a string that contains the keywords to
|
1708
|
+
# highlight. They will be processed with respect to index settings.
|
1709
|
+
# For instance, if English stemming is enabled in the index,
|
1710
|
+
# "shoes" will be highlighted even if keyword is "shoe". Starting
|
1711
|
+
# with version 0.9.9-rc1, keywords can contain wildcards, that
|
1712
|
+
# work similarly to star-syntax available in queries.
|
1713
|
+
#
|
1714
|
+
# @param [Array<String>] docs an array of strings which represent
|
1715
|
+
# the documents' contents.
|
1716
|
+
# @param [String] index an index which settings will be used for
|
1717
|
+
# stemming, lexing and case folding.
|
1718
|
+
# @param [String] words a string which contains the words to highlight.
|
1719
|
+
# @param [Hash] opts a +Hash+ which contains additional optional
|
1720
|
+
# highlighting parameters.
|
1721
|
+
# @option opts [String] 'before_match' ("<b>") a string to insert before a
|
1722
|
+
# keyword match.
|
1723
|
+
# @option opts [String] 'after_match' ("</b>") a string to insert after a
|
1724
|
+
# keyword match.
|
1725
|
+
# @option opts [String] 'chunk_separator' (" ... ") a string to insert
|
1726
|
+
# between snippet chunks (passages).
|
1727
|
+
# @option opts [Integer] 'limit' (256) maximum snippet size, in symbols
|
1728
|
+
# (codepoints).
|
1729
|
+
# @option opts [Integer] 'around' (5) how many words to pick around
|
1730
|
+
# each matching keywords block.
|
1731
|
+
# @option opts [Boolean] 'exact_phrase' (false) whether to highlight exact
|
1732
|
+
# query phrase matches only instead of individual keywords.
|
1733
|
+
# @option opts [Boolean] 'single_passage' (false) whether to extract single
|
1734
|
+
# best passage only.
|
1735
|
+
# @option opts [Boolean] 'use_boundaries' (false) whether to extract
|
1736
|
+
# passages by phrase boundaries setup in tokenizer.
|
1737
|
+
# @option opts [Boolean] 'weight_order' (false) whether to sort the
|
1738
|
+
# extracted passages in order of relevance (decreasing weight),
|
1739
|
+
# or in order of appearance in the document (increasing position).
|
1740
|
+
# @return [Array<String>, false] a plain array of strings with
|
1741
|
+
# excerpts (snippets) on success; otherwise, +false+.
|
1742
|
+
#
|
1743
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1744
|
+
#
|
1745
|
+
# @example
|
1746
|
+
# sphinx.build_excerpts(['hello world', 'hello me'], 'idx', 'hello')
|
1747
|
+
#
|
1748
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-buildexcerpts Section 6.7.1, "BuildExcerpts"
|
1749
|
+
#
|
1750
|
+
def build_excerpts(docs, index, words, opts = {})
|
1751
|
+
raise ArgumentError, '"docs" argument must be Array' unless docs.kind_of?(Array)
|
1752
|
+
raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
|
1753
|
+
raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String)
|
1754
|
+
raise ArgumentError, '"opts" argument must be Hash' unless opts.kind_of?(Hash)
|
1755
|
+
|
1756
|
+
docs.each do |doc|
|
1757
|
+
raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String)
|
1758
|
+
end
|
1759
|
+
|
1760
|
+
# fixup options
|
1761
|
+
opts = HashWithIndifferentAccess.new(
|
1762
|
+
'before_match' => '<b>',
|
1763
|
+
'after_match' => '</b>',
|
1764
|
+
'chunk_separator' => ' ... ',
|
1765
|
+
'limit' => 256,
|
1766
|
+
'around' => 5,
|
1767
|
+
'exact_phrase' => false,
|
1768
|
+
'single_passage' => false,
|
1769
|
+
'use_boundaries' => false,
|
1770
|
+
'weight_order' => false
|
1771
|
+
).update(opts)
|
1772
|
+
|
1773
|
+
# build request
|
1774
|
+
|
1775
|
+
# v.1.0 req
|
1776
|
+
flags = 1
|
1777
|
+
flags |= 2 if opts['exact_phrase']
|
1778
|
+
flags |= 4 if opts['single_passage']
|
1779
|
+
flags |= 8 if opts['use_boundaries']
|
1780
|
+
flags |= 16 if opts['weight_order']
|
1781
|
+
|
1782
|
+
request = Request.new
|
1783
|
+
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
1784
|
+
# req index
|
1785
|
+
request.put_string index.to_s
|
1786
|
+
# req words
|
1787
|
+
request.put_string words
|
1788
|
+
|
1789
|
+
# options
|
1790
|
+
request.put_string opts['before_match']
|
1791
|
+
request.put_string opts['after_match']
|
1792
|
+
request.put_string opts['chunk_separator']
|
1793
|
+
request.put_int opts['limit'].to_i, opts['around'].to_i
|
1794
|
+
|
1795
|
+
# documents
|
1796
|
+
request.put_int docs.size
|
1797
|
+
request.put_string(*docs)
|
1798
|
+
|
1799
|
+
response = perform_request(:excerpt, request)
|
1800
|
+
|
1801
|
+
# parse response
|
1802
|
+
docs.map { response.get_string }
|
1803
|
+
end
|
1804
|
+
alias :BuildExcerpts :build_excerpts
|
1805
|
+
|
1806
|
+
# Extracts keywords from query using tokenizer settings for given
|
1807
|
+
# index, optionally with per-keyword occurrence statistics.
|
1808
|
+
# Returns an array of hashes with per-keyword information.
|
1809
|
+
#
|
1810
|
+
# +query+ is a query to extract keywords from. +index+ is a name of
|
1811
|
+
# the index to get tokenizing settings and keyword occurrence
|
1812
|
+
# statistics from. +hits+ is a boolean flag that indicates whether
|
1813
|
+
# keyword occurrence statistics are required.
|
1814
|
+
#
|
1815
|
+
# The result set consists of +Hash+es with the following keys and values:
|
1816
|
+
#
|
1817
|
+
# <tt>'tokenized'</tt>::
|
1818
|
+
# Tokenized keyword.
|
1819
|
+
# <tt>'normalized'</tt>::
|
1820
|
+
# Normalized keyword.
|
1821
|
+
# <tt>'docs'</tt>::
|
1822
|
+
# A number of documents where keyword is found (if +hits+ param is +true+).
|
1823
|
+
# <tt>'hits'</tt>::
|
1824
|
+
# A number of keywords occurrences among all documents (if +hits+ param is +true+).
|
1825
|
+
#
|
1826
|
+
# @param [String] query a query string.
|
1827
|
+
# @param [String] index an index to get tokenizing settings and
|
1828
|
+
# keyword occurrence statistics from.
|
1829
|
+
# @param [Boolean] hits indicates whether keyword occurrence
|
1830
|
+
# statistics are required.
|
1831
|
+
# @return [Array<Hash>] an +Array+ of +Hash+es in format specified
|
1832
|
+
# above.
|
1833
|
+
#
|
1834
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1835
|
+
#
|
1836
|
+
# @example
|
1837
|
+
# keywords = sphinx.build_keywords("this.is.my query", "test1", false)
|
1838
|
+
#
|
1839
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-buildkeywords Section 6.7.3, "BuildKeywords"
|
1840
|
+
#
|
1841
|
+
def build_keywords(query, index, hits)
|
1842
|
+
raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String)
|
1843
|
+
raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
|
1844
|
+
raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass)
|
1845
|
+
|
1846
|
+
# build request
|
1847
|
+
request = Request.new
|
1848
|
+
# v.1.0 req
|
1849
|
+
request.put_string query # req query
|
1850
|
+
request.put_string index # req index
|
1851
|
+
request.put_int hits ? 1 : 0
|
1852
|
+
|
1853
|
+
response = perform_request(:keywords, request)
|
1854
|
+
|
1855
|
+
# parse response
|
1856
|
+
nwords = response.get_int
|
1857
|
+
(0...nwords).map do
|
1858
|
+
tokenized = response.get_string
|
1859
|
+
normalized = response.get_string
|
1860
|
+
|
1861
|
+
entry = HashWithIndifferentAccess.new('tokenized' => tokenized, 'normalized' => normalized)
|
1862
|
+
entry['docs'], entry['hits'] = response.get_ints(2) if hits
|
1863
|
+
|
1864
|
+
entry
|
1865
|
+
end
|
1866
|
+
end
|
1867
|
+
alias :BuildKeywords :build_keywords
|
1868
|
+
|
1869
|
+
# Instantly updates given attribute values in given documents.
|
1870
|
+
# Returns number of actually updated documents (0 or more) on
|
1871
|
+
# success, or -1 on failure.
|
1872
|
+
#
|
1873
|
+
# +index+ is a name of the index (or indexes) to be updated.
|
1874
|
+
# +attrs+ is a plain array with string attribute names, listing
|
1875
|
+
# attributes that are updated. +values+ is a Hash where key is
|
1876
|
+
# document ID, and value is a plain array of new attribute values.
|
1877
|
+
#
|
1878
|
+
# +index+ can be either a single index name or a list, like in
|
1879
|
+
# {#query}. Unlike {#query}, wildcard is not allowed and all the
|
1880
|
+
# indexes to update must be specified explicitly. The list of
|
1881
|
+
# indexes can include distributed index names. Updates on
|
1882
|
+
# distributed indexes will be pushed to all agents.
|
1883
|
+
#
|
1884
|
+
# The updates only work with docinfo=extern storage strategy.
|
1885
|
+
# They are very fast because they're working fully in RAM, but
|
1886
|
+
# they can also be made persistent: updates are saved on disk
|
1887
|
+
# on clean searchd shutdown initiated by SIGTERM signal. With
|
1888
|
+
# additional restrictions, updates are also possible on MVA
|
1889
|
+
# attributes; refer to mva_updates_pool directive for details.
|
1890
|
+
#
|
1891
|
+
# The first sample statement will update document 1 in index
|
1892
|
+
# "test1", setting "group_id" to 456. The second one will update
|
1893
|
+
# documents 1001, 1002 and 1003 in index "products". For document
|
1894
|
+
# 1001, the new price will be set to 123 and the new amount in
|
1895
|
+
# stock to 5; for document 1002, the new price will be 37 and the
|
1896
|
+
# new amount will be 11; etc. The third one updates document 1
|
1897
|
+
# in index "test2", setting MVA attribute "group_id" to [456, 789].
|
1898
|
+
#
|
1899
|
+
# @example
|
1900
|
+
# sphinx.update_attributes("test1", ["group_id"], { 1 => [456] });
|
1901
|
+
# sphinx.update_attributes("products", ["price", "amount_in_stock"],
|
1902
|
+
# { 1001 => [123, 5], 1002 => [37, 11], 1003 => [25, 129] });
|
1903
|
+
# sphinx.update_attributes('test2', ['group_id'], { 1 => [[456, 789]] }, true)
|
1904
|
+
#
|
1905
|
+
# @param [String] index a name of the index to be updated.
|
1906
|
+
# @param [Array<String>] attrs an array of attribute name strings.
|
1907
|
+
# @param [Hash] values is a hash where key is document id, and
|
1908
|
+
# value is an array of new attribute values.
|
1909
|
+
# @param [Boolean] mva indicating whether to update MVA.
|
1910
|
+
# @return [Integer] number of actually updated documents (0 or more) on success,
|
1911
|
+
# -1 on failure.
|
1912
|
+
#
|
1913
|
+
# @raise [ArgumentError] Occurred when parameters are invalid.
|
1914
|
+
#
|
1915
|
+
# @see http://www.sphinxsearch.com/docs/current.html#api-func-updateatttributes Section 6.7.2, "UpdateAttributes"
|
1916
|
+
#
|
1917
|
+
def update_attributes(index, attrs, values, mva = false)
|
1918
|
+
# verify everything
|
1919
|
+
raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
|
1920
|
+
raise ArgumentError, '"mva" argument must be Boolean' unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass)
|
1921
|
+
|
1922
|
+
raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array)
|
1923
|
+
attrs.each do |attr|
|
1924
|
+
raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol)
|
1925
|
+
end
|
1926
|
+
|
1927
|
+
raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
|
1928
|
+
values.each do |id, entry|
|
1929
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.kind_of?(Integer)
|
1930
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array)
|
1931
|
+
raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length
|
1932
|
+
entry.each do |v|
|
1933
|
+
if mva
|
1934
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array)
|
1935
|
+
v.each do |vv|
|
1936
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.kind_of?(Integer)
|
1937
|
+
end
|
1938
|
+
else
|
1939
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.kind_of?(Integer)
|
1940
|
+
end
|
1941
|
+
end
|
1942
|
+
end
|
1943
|
+
|
1944
|
+
# build request
|
1945
|
+
request = Request.new
|
1946
|
+
request.put_string index
|
1947
|
+
|
1948
|
+
request.put_int attrs.length
|
1949
|
+
for attr in attrs
|
1950
|
+
request.put_string attr
|
1951
|
+
request.put_int mva ? 1 : 0
|
1952
|
+
end
|
1953
|
+
|
1954
|
+
request.put_int values.length
|
1955
|
+
values.each do |id, entry|
|
1956
|
+
request.put_int64 id
|
1957
|
+
if mva
|
1958
|
+
entry.each { |v| request.put_int_array v }
|
1959
|
+
else
|
1960
|
+
request.put_int(*entry)
|
1961
|
+
end
|
1962
|
+
end
|
1963
|
+
|
1964
|
+
response = perform_request(:update, request)
|
1965
|
+
|
1966
|
+
# parse response
|
1967
|
+
response.get_int
|
1968
|
+
end
|
1969
|
+
alias :UpdateAttributes :update_attributes
|
1970
|
+
|
1971
|
+
# Escapes characters that are treated as special operators by the
|
1972
|
+
# query language parser.
|
1973
|
+
#
|
1974
|
+
# This function might seem redundant because it's trivial to
|
1975
|
+
# implement in any calling application. However, as the set of
|
1976
|
+
# special characters might change over time, it makes sense to
|
1977
|
+
# have an API call that is guaranteed to escape all such
|
1978
|
+
# characters at all times.
|
1979
|
+
#
|
1980
|
+
# @param [String] string is a string to escape.
|
1981
|
+
# @return [String] an escaped string.
|
1982
|
+
#
|
1983
|
+
# @example:
|
1984
|
+
# escaped = sphinx.escape_string "escaping-sample@query/string"
|
1985
|
+
#
|
1986
|
+
def escape_string(string)
|
1987
|
+
string.to_s.gsub(/([\\()|\-!@~"&\/\^\$=])/, '\\\\\\1')
|
1988
|
+
end
|
1989
|
+
alias :EscapeString :escape_string
|
1990
|
+
|
1991
|
+
# Queries searchd status, and returns an array of status variable name
|
1992
|
+
# and value pairs.
|
1993
|
+
#
|
1994
|
+
# @return [Array<Array>, Array<Hash>] a table containing searchd status information.
|
1995
|
+
# If there are more than one server configured ({#set_servers}), an
|
1996
|
+
# +Array+ of +Hash+es will be returned, one for each server. Hash will
|
1997
|
+
# contain <tt>:server</tt> element with string name of server (<tt>host:port</tt>)
|
1998
|
+
# and <tt>:status</tt> table just like one for a single server. In case of
|
1999
|
+
# any error, it will be stored in the <tt>:error</tt> key.
|
2000
|
+
#
|
2001
|
+
# @example Single server
|
2002
|
+
# status = sphinx.status
|
2003
|
+
# puts status.map { |key, value| "#{key.rjust(20)}: #{value}" }
|
2004
|
+
#
|
2005
|
+
# @example Multiple servers
|
2006
|
+
# sphinx.set_servers([
|
2007
|
+
# { :host => 'localhost' },
|
2008
|
+
# { :host => 'browse02.local' }
|
2009
|
+
# ])
|
2010
|
+
# sphinx.status.each do |report|
|
2011
|
+
# puts "=== #{report[:server]}"
|
2012
|
+
# if report[:error]
|
2013
|
+
# puts "Error: #{report[:error]}"
|
2014
|
+
# else
|
2015
|
+
# puts report[:status].map { |key, value| "#{key.rjust(20)}: #{value}" }
|
2016
|
+
# end
|
2017
|
+
# end
|
2018
|
+
#
|
2019
|
+
def status
|
2020
|
+
request = Request.new
|
2021
|
+
request.put_int(1)
|
2022
|
+
|
2023
|
+
# parse response
|
2024
|
+
results = @servers.map do |server|
|
2025
|
+
begin
|
2026
|
+
response = perform_request(:status, request, nil, server)
|
2027
|
+
rows, cols = response.get_ints(2)
|
2028
|
+
status = (0...rows).map do
|
2029
|
+
(0...cols).map { response.get_string }
|
2030
|
+
end
|
2031
|
+
HashWithIndifferentAccess.new(:server => server.to_s, :status => status)
|
2032
|
+
rescue SphinxError
|
2033
|
+
# Re-raise error when a single server configured
|
2034
|
+
raise if @servers.size == 1
|
2035
|
+
HashWithIndifferentAccess.new(:server => server.to_s, :error => self.last_error)
|
2036
|
+
end
|
2037
|
+
end
|
2038
|
+
|
2039
|
+
@servers.size > 1 ? results : results.first[:status]
|
2040
|
+
end
|
2041
|
+
alias :Status :status
|
2042
|
+
|
2043
|
+
#=================================================================
|
2044
|
+
# Persistent connections
|
2045
|
+
#=================================================================
|
2046
|
+
|
2047
|
+
# Opens persistent connection to the server.
|
2048
|
+
#
|
2049
|
+
# This method could be used only when a single searchd server
|
2050
|
+
# configured.
|
2051
|
+
#
|
2052
|
+
# @return [Boolean] +true+ when persistent connection has been
|
2053
|
+
# established; otherwise, +false+.
|
2054
|
+
#
|
2055
|
+
# @example
|
2056
|
+
# begin
|
2057
|
+
# sphinx.open
|
2058
|
+
# # perform several requests
|
2059
|
+
# ensure
|
2060
|
+
# sphinx.close
|
2061
|
+
# end
|
2062
|
+
#
|
2063
|
+
# @see #close
|
2064
|
+
#
|
2065
|
+
def open
|
2066
|
+
if @servers.size > 1
|
2067
|
+
@error = 'too many servers. persistent socket allowed only for a single server.'
|
2068
|
+
return false
|
2069
|
+
end
|
2070
|
+
|
2071
|
+
if @servers.first.persistent?
|
2072
|
+
@error = 'already connected'
|
2073
|
+
return false;
|
2074
|
+
end
|
2075
|
+
|
2076
|
+
request = Request.new
|
2077
|
+
request.put_int(1)
|
2078
|
+
|
2079
|
+
perform_request(:persist, request, nil) do |server, socket|
|
2080
|
+
server.make_persistent!(socket)
|
2081
|
+
end
|
2082
|
+
|
2083
|
+
true
|
2084
|
+
end
|
2085
|
+
alias :Open :open
|
2086
|
+
|
2087
|
+
# Closes previously opened persistent connection.
|
2088
|
+
#
|
2089
|
+
# This method could be used only when a single searchd server
|
2090
|
+
# configured.
|
2091
|
+
#
|
2092
|
+
# @return [Boolean] +true+ when persistent connection has been
|
2093
|
+
# closed; otherwise, +false+.
|
2094
|
+
#
|
2095
|
+
# @example
|
2096
|
+
# begin
|
2097
|
+
# sphinx.open
|
2098
|
+
# # perform several requests
|
2099
|
+
# ensure
|
2100
|
+
# sphinx.close
|
2101
|
+
# end
|
2102
|
+
#
|
2103
|
+
# @see #open
|
2104
|
+
#
|
2105
|
+
def close
|
2106
|
+
if @servers.size > 1
|
2107
|
+
@error = 'too many servers. persistent socket allowed only for a single server.'
|
2108
|
+
return false
|
2109
|
+
end
|
2110
|
+
|
2111
|
+
unless @servers.first.persistent?
|
2112
|
+
@error = 'not connected'
|
2113
|
+
return false;
|
2114
|
+
end
|
2115
|
+
|
2116
|
+
@servers.first.close_persistent!
|
2117
|
+
end
|
2118
|
+
alias :Close :close
|
2119
|
+
|
2120
|
+
protected
|
2121
|
+
|
2122
|
+
# Connect, send query, get response.
|
2123
|
+
#
|
2124
|
+
# Use this method to communicate with Sphinx server. It ensures connection
|
2125
|
+
# will be instantiated properly, all headers will be generated properly, etc.
|
2126
|
+
#
|
2127
|
+
# @param [Symbol, String] command searchd command to perform (<tt>:search</tt>, <tt>:excerpt</tt>,
|
2128
|
+
# <tt>:update</tt>, <tt>:keywords</tt>, <tt>:persist</tt>, <tt>:status</tt>,
|
2129
|
+
# <tt>:query</tt>, <tt>:flushattrs</tt>. See <tt>SEARCHD_COMMAND_*</tt> for details).
|
2130
|
+
# @param [Sphinx::Request] request contains request body.
|
2131
|
+
# @param [Integer] additional additional integer data to be placed between header and body.
|
2132
|
+
# @param [Sphinx::Server] server where perform request on. This is special
|
2133
|
+
# parameter for internal usage. If specified, request will be performed
|
2134
|
+
# on specified server, and it will try to establish connection to this
|
2135
|
+
# server only once.
|
2136
|
+
#
|
2137
|
+
# @yield if block given, response will not be parsed, plain socket
|
2138
|
+
# will be yielded instead. This is special mode used for
|
2139
|
+
# persistent connections, do not use for other tasks.
|
2140
|
+
# @yieldparam [Sphinx::Server] server a server where request was performed on.
|
2141
|
+
# @yieldparam [Sphinx::BufferedIO] socket a socket used to perform the request.
|
2142
|
+
# @return [Sphinx::Response] contains response body.
|
2143
|
+
#
|
2144
|
+
# @see #parse_response
|
2145
|
+
#
|
2146
|
+
def perform_request(command, request, additional = nil, server = nil)
|
2147
|
+
if server
|
2148
|
+
attempts = 1
|
2149
|
+
else
|
2150
|
+
server = case request
|
2151
|
+
when String
|
2152
|
+
Zlib.crc32(request)
|
2153
|
+
when Request
|
2154
|
+
request.crc32
|
2155
|
+
else
|
2156
|
+
raise ArgumentError, "request argument must be String or Sphinx::Request"
|
2157
|
+
end
|
2158
|
+
attempts = nil
|
2159
|
+
end
|
2160
|
+
|
2161
|
+
with_server(server, attempts) do |server|
|
2162
|
+
logger.info { "[sphinx] #{command} on server #{server}" } if logger
|
2163
|
+
|
2164
|
+
cmd = command.to_s.upcase
|
2165
|
+
command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}")
|
2166
|
+
command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}")
|
2167
|
+
|
2168
|
+
with_socket(server) do |socket|
|
2169
|
+
len = request.to_s.length + (additional.nil? ? 0 : 4)
|
2170
|
+
header = [command_id, command_ver, len].pack('nnN')
|
2171
|
+
header << [additional].pack('N') unless additional.nil?
|
2172
|
+
|
2173
|
+
socket.write(header + request.to_s)
|
2174
|
+
|
2175
|
+
if block_given?
|
2176
|
+
yield server, socket
|
2177
|
+
else
|
2178
|
+
parse_response(socket, command_ver)
|
2179
|
+
end
|
2180
|
+
end
|
2181
|
+
end
|
2182
|
+
end
|
2183
|
+
|
2184
|
+
# This is internal method which gets and parses response packet from
|
2185
|
+
# searchd server.
|
2186
|
+
#
|
2187
|
+
# There are several exceptions which could be thrown in this method:
|
2188
|
+
#
|
2189
|
+
# @param [Sphinx::BufferedIO] socket an input stream object.
|
2190
|
+
# @param [Integer] client_version a command version which client supports.
|
2191
|
+
# @return [Sphinx::Response] could be used for context-based
|
2192
|
+
# parsing of reply from the server.
|
2193
|
+
#
|
2194
|
+
# @raise [SystemCallError, SocketError] should be handled by caller (see {#with_socket}).
|
2195
|
+
# @raise [SphinxResponseError] incomplete reply from searchd.
|
2196
|
+
# @raise [SphinxInternalError] searchd internal error.
|
2197
|
+
# @raise [SphinxTemporaryError] searchd temporary error.
|
2198
|
+
# @raise [SphinxUnknownError] searchd unknown error.
|
2199
|
+
#
|
2200
|
+
# @see #with_socket
|
2201
|
+
# @private
|
2202
|
+
#
|
2203
|
+
def parse_response(socket, client_version)
|
2204
|
+
response = ''
|
2205
|
+
status = ver = len = 0
|
2206
|
+
|
2207
|
+
# Read server reply from server. All exceptions are handled by {#with_socket}.
|
2208
|
+
header = socket.read(8)
|
2209
|
+
if header.length == 8
|
2210
|
+
status, ver, len = header.unpack('n2N')
|
2211
|
+
response = socket.read(len) if len > 0
|
2212
|
+
end
|
2213
|
+
|
2214
|
+
# check response
|
2215
|
+
read = response.length
|
2216
|
+
if response.empty? or read != len.to_i
|
2217
|
+
error = len > 0 \
|
2218
|
+
? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
|
2219
|
+
: 'received zero-sized searchd response'
|
2220
|
+
raise SphinxResponseError, error
|
2221
|
+
end
|
2222
|
+
|
2223
|
+
# check status
|
2224
|
+
if (status == SEARCHD_WARNING)
|
2225
|
+
wlen = response[0, 4].unpack('N*').first
|
2226
|
+
@warning = response[4, wlen]
|
2227
|
+
return response[4 + wlen, response.length - 4 - wlen]
|
2228
|
+
end
|
2229
|
+
|
2230
|
+
if status == SEARCHD_ERROR
|
2231
|
+
error = 'searchd error: ' + response[4, response.length - 4]
|
2232
|
+
raise SphinxInternalError, error
|
2233
|
+
end
|
2234
|
+
|
2235
|
+
if status == SEARCHD_RETRY
|
2236
|
+
error = 'temporary searchd error: ' + response[4, response.length - 4]
|
2237
|
+
raise SphinxTemporaryError, error
|
2238
|
+
end
|
2239
|
+
|
2240
|
+
unless status == SEARCHD_OK
|
2241
|
+
error = "unknown status code: '#{status}'"
|
2242
|
+
raise SphinxUnknownError, error
|
2243
|
+
end
|
2244
|
+
|
2245
|
+
# check version
|
2246
|
+
if ver < client_version
|
2247
|
+
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
2248
|
+
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
2249
|
+
end
|
2250
|
+
|
2251
|
+
Response.new(response)
|
2252
|
+
end
|
2253
|
+
|
2254
|
+
# This is internal method which selects next server (round-robin)
|
2255
|
+
# and yields it to the block passed.
|
2256
|
+
#
|
2257
|
+
# In case of connection error, it will try next server several times
|
2258
|
+
# (see {#set_connect_timeout} method details). If all servers are down,
|
2259
|
+
# it will set error attribute (could be retrieved with {#last_error}
|
2260
|
+
# method) with the last exception message, and {#connect_error?}
|
2261
|
+
# method will return true. Also, {SphinxConnectError} exception
|
2262
|
+
# will be raised.
|
2263
|
+
#
|
2264
|
+
# @overload with_server(server_index)
|
2265
|
+
# Get the server based on some seed value (usually CRC32 of
|
2266
|
+
# request. In this case initial server will be choosed using
|
2267
|
+
# this seed value, in case of connetion failure next server
|
2268
|
+
# in servers list will be used).
|
2269
|
+
# @param [Integer] server_index server index, must be any
|
2270
|
+
# integer value (not necessarily less than number of servers.)
|
2271
|
+
# @param [Integer] attempts how many retries to perform. Use
|
2272
|
+
# +nil+ to perform retries configured with {#set_connect_timeout}.
|
2273
|
+
# @overload with_server(server)
|
2274
|
+
# Get the server specified as a parameter. If specified, request
|
2275
|
+
# will be performed on specified server, and it will try to
|
2276
|
+
# establish connection to this server only once.
|
2277
|
+
# @param [Server] server server to perform request on.
|
2278
|
+
# @param [Integer] attempts how many retries to perform. Use
|
2279
|
+
# +nil+ to perform retries configured with {#set_connect_timeout}.
|
2280
|
+
#
|
2281
|
+
# @yield a block which performs request on a given server.
|
2282
|
+
# @yieldparam [Sphinx::Server] server contains information
|
2283
|
+
# about the server to perform request on.
|
2284
|
+
# @raise [SphinxConnectError] on any connection error.
|
2285
|
+
#
|
2286
|
+
def with_server(server = nil, attempts = nil)
|
2287
|
+
case server
|
2288
|
+
when Server
|
2289
|
+
idx = @servers.index(server) || 0
|
2290
|
+
s = server
|
2291
|
+
when Integer
|
2292
|
+
idx = server % @servers.size
|
2293
|
+
s = @servers[idx]
|
2294
|
+
when NilClass
|
2295
|
+
idx = 0
|
2296
|
+
s = @servers[idx]
|
2297
|
+
else
|
2298
|
+
raise ArgumentError, 'server argument must be Integer or Sphinx::Server'
|
2299
|
+
end
|
2300
|
+
attempts ||= @retries
|
2301
|
+
begin
|
2302
|
+
yield s
|
2303
|
+
rescue SphinxConnectError => e
|
2304
|
+
logger.warn { "[sphinx] server failed: #{e.class.name}: #{e.message}" } if logger
|
2305
|
+
# Connection error! Do we need to try it again?
|
2306
|
+
attempts -= 1
|
2307
|
+
if attempts > 0
|
2308
|
+
logger.info { "[sphinx] connection to server #{s.inspect} DIED! Retrying operation..." } if logger
|
2309
|
+
# Get the next server
|
2310
|
+
idx = (idx + 1) % @servers.size
|
2311
|
+
s = @servers[idx]
|
2312
|
+
retry
|
2313
|
+
end
|
2314
|
+
|
2315
|
+
# Re-raise original exception
|
2316
|
+
@error = e.message
|
2317
|
+
@connerror = true
|
2318
|
+
raise
|
2319
|
+
end
|
2320
|
+
end
|
2321
|
+
|
2322
|
+
# This is internal method which retrieves socket for a given server,
|
2323
|
+
# initiates Sphinx session, and yields this socket to a block passed.
|
2324
|
+
#
|
2325
|
+
# In case of any problems with session initiation, {SphinxConnectError}
|
2326
|
+
# will be raised, because this is part of connection establishing. See
|
2327
|
+
# {#with_server} method details to get more infromation about how this
|
2328
|
+
# exception is handled.
|
2329
|
+
#
|
2330
|
+
# Socket retrieving routine is wrapped in a block with it's own
|
2331
|
+
# timeout value (see {#set_connect_timeout}). This is done in
|
2332
|
+
# {Server#get_socket} method, so check it for details.
|
2333
|
+
#
|
2334
|
+
# Request execution is wrapped with block with another timeout
|
2335
|
+
# (see {#set_request_timeout}). This ensures no Sphinx request will
|
2336
|
+
# take unreasonable time.
|
2337
|
+
#
|
2338
|
+
# In case of any Sphinx error (incomplete reply, internal or temporary
|
2339
|
+
# error), connection to the server will be re-established, and request
|
2340
|
+
# will be retried (see {#set_request_timeout}). Of course, if connection
|
2341
|
+
# could not be established, next server will be selected (see explanation
|
2342
|
+
# above).
|
2343
|
+
#
|
2344
|
+
# @param [Sphinx::Server] server contains information
|
2345
|
+
# about the server to perform request on.
|
2346
|
+
# @yield a block which will actually perform the request.
|
2347
|
+
# @yieldparam [Sphinx::BufferedIO] socket a socket used to
|
2348
|
+
# perform the request.
|
2349
|
+
#
|
2350
|
+
# @raise [SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError]
|
2351
|
+
# on any response error.
|
2352
|
+
# @raise [SphinxConnectError] on any connection error.
|
2353
|
+
#
|
2354
|
+
def with_socket(server)
|
2355
|
+
attempts = @reqretries
|
2356
|
+
socket = nil
|
2357
|
+
|
2358
|
+
begin
|
2359
|
+
s = server.get_socket do |sock|
|
2360
|
+
# Remember socket to close it in case of emergency
|
2361
|
+
socket = sock
|
2362
|
+
|
2363
|
+
# send my version
|
2364
|
+
# this is a subtle part. we must do it before (!) reading back from searchd.
|
2365
|
+
# because otherwise under some conditions (reported on FreeBSD for instance)
|
2366
|
+
# TCP stack could throttle write-write-read pattern because of Nagle.
|
2367
|
+
sock.write([1].pack('N'))
|
2368
|
+
v = sock.read(4).unpack('N*').first
|
2369
|
+
|
2370
|
+
# Ouch, invalid protocol!
|
2371
|
+
if v < 1
|
2372
|
+
raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'"
|
2373
|
+
end
|
2374
|
+
end
|
2375
|
+
|
2376
|
+
Sphinx::safe_execute(@reqtimeout) do
|
2377
|
+
yield s
|
2378
|
+
end
|
2379
|
+
rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e
|
2380
|
+
logger.warn { "[sphinx] socket failure: #{e.message}" } if logger
|
2381
|
+
# Ouch, communication problem, will be treated as a connection problem.
|
2382
|
+
raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})"
|
2383
|
+
rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e
|
2384
|
+
# EOFError should not occur in ideal world, because we compare response length
|
2385
|
+
# with a value passed by Sphinx. But we want to ensure that client will not
|
2386
|
+
# fail with unexpected error when Sphinx implementation has bugs, aren't we?
|
2387
|
+
if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error)
|
2388
|
+
new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})")
|
2389
|
+
new_e.set_backtrace(e.backtrace)
|
2390
|
+
e = new_e
|
2391
|
+
end
|
2392
|
+
logger.warn { "[sphinx] generic failure: #{e.class.name}: #{e.message}" } if logger
|
2393
|
+
|
2394
|
+
# Close previously opened socket (in case of it has been really opened)
|
2395
|
+
server.free_socket(socket)
|
2396
|
+
|
2397
|
+
# Request error! Do we need to try it again?
|
2398
|
+
attempts -= 1
|
2399
|
+
retry if attempts > 0
|
2400
|
+
|
2401
|
+
# Re-raise original exception
|
2402
|
+
@error = e.message
|
2403
|
+
raise e
|
2404
|
+
ensure
|
2405
|
+
# Close previously opened socket on any other error
|
2406
|
+
server.free_socket(socket)
|
2407
|
+
end
|
2408
|
+
end
|
2409
|
+
|
2410
|
+
# Enables ability to skip +set_+ prefix for methods inside {#query} block.
|
2411
|
+
#
|
2412
|
+
# @example
|
2413
|
+
# sphinx.query('test') do
|
2414
|
+
# match_mode :all
|
2415
|
+
# id_range 10, 100
|
2416
|
+
# end
|
2417
|
+
#
|
2418
|
+
def method_missing(method_id, *arguments, &block)
|
2419
|
+
if @inside_eval and self.respond_to?("set_#{method_id}")
|
2420
|
+
self.send("set_#{method_id}", *arguments)
|
2421
|
+
else
|
2422
|
+
super
|
2423
|
+
end
|
2424
|
+
end
|
2425
|
+
end
|
2426
|
+
end
|