sphinx 0.9.10 → 0.9.10.2043
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -2
- data/Rakefile +1 -1
- data/VERSION.yml +1 -0
- data/lib/sphinx.rb +9 -3
- data/lib/sphinx/buffered_io.rb +22 -0
- data/lib/sphinx/client.rb +661 -259
- data/lib/sphinx/server.rb +167 -0
- data/lib/sphinx/timeout.rb +28 -0
- data/spec/client_response_spec.rb +29 -28
- data/spec/client_spec.rb +650 -471
- data/spec/client_validations_spec.rb +850 -0
- data/spec/spec_helper.rb +24 -0
- data/sphinx.gemspec +11 -7
- metadata +9 -5
- data/install.rb +0 -5
- data/sphinx.yml.tpl +0 -3
- data/tasks/sphinx.rake +0 -75
data/.gitignore
CHANGED
@@ -1,2 +1,2 @@
|
|
1
|
-
|
2
|
-
pkg
|
1
|
+
rdoc
|
2
|
+
pkg
|
data/Rakefile
CHANGED
@@ -31,6 +31,6 @@ Rake::RDocTask.new(:rdoc) do |rdoc|
|
|
31
31
|
rdoc.rdoc_dir = 'rdoc'
|
32
32
|
rdoc.title = 'Sphinx Client API'
|
33
33
|
rdoc.options << '--line-numbers' << '--inline-source'
|
34
|
-
rdoc.rdoc_files.include('README')
|
34
|
+
rdoc.rdoc_files.include('README.rdoc')
|
35
35
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
36
36
|
end
|
data/VERSION.yml
CHANGED
data/lib/sphinx.rb
CHANGED
@@ -1,6 +1,12 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'net/protocol'
|
3
|
+
|
4
|
+
module Sphinx
|
5
|
+
end
|
6
|
+
|
1
7
|
require File.dirname(__FILE__) + '/sphinx/request'
|
2
8
|
require File.dirname(__FILE__) + '/sphinx/response'
|
9
|
+
require File.dirname(__FILE__) + '/sphinx/timeout'
|
10
|
+
require File.dirname(__FILE__) + '/sphinx/buffered_io'
|
11
|
+
require File.dirname(__FILE__) + '/sphinx/server'
|
3
12
|
require File.dirname(__FILE__) + '/sphinx/client'
|
4
|
-
|
5
|
-
module Sphinx
|
6
|
-
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class Sphinx::BufferedIO < Net::BufferedIO # :nodoc:
|
2
|
+
BUFSIZE = 1024 * 16
|
3
|
+
|
4
|
+
if RUBY_VERSION < '1.9.1'
|
5
|
+
def rbuf_fill
|
6
|
+
begin
|
7
|
+
@rbuf << @io.read_nonblock(BUFSIZE)
|
8
|
+
rescue Errno::EWOULDBLOCK
|
9
|
+
retry unless @read_timeout
|
10
|
+
if IO.select([@io], nil, nil, @read_timeout)
|
11
|
+
retry
|
12
|
+
else
|
13
|
+
raise Timeout::Error, 'IO timeout'
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def setsockopt(*args)
|
20
|
+
@io.setsockopt(*args)
|
21
|
+
end
|
22
|
+
end
|
data/lib/sphinx/client.rb
CHANGED
@@ -24,13 +24,10 @@
|
|
24
24
|
# docs = posts.map(&:body)
|
25
25
|
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
26
|
|
27
|
-
require 'socket'
|
28
|
-
|
29
27
|
module Sphinx
|
30
28
|
# :stopdoc:
|
31
29
|
|
32
30
|
class SphinxError < StandardError; end
|
33
|
-
class SphinxArgumentError < SphinxError; end
|
34
31
|
class SphinxConnectError < SphinxError; end
|
35
32
|
class SphinxResponseError < SphinxError; end
|
36
33
|
class SphinxInternalError < SphinxError; end
|
@@ -40,7 +37,6 @@ module Sphinx
|
|
40
37
|
# :startdoc:
|
41
38
|
|
42
39
|
class Client
|
43
|
-
|
44
40
|
# :stopdoc:
|
45
41
|
|
46
42
|
# Known searchd commands
|
@@ -91,6 +87,12 @@ module Sphinx
|
|
91
87
|
SEARCHD_RETRY = 2
|
92
88
|
# general success, warning message and command-specific reply follow
|
93
89
|
SEARCHD_WARNING = 3
|
90
|
+
|
91
|
+
attr_reader :servers
|
92
|
+
attr_reader :timeout
|
93
|
+
attr_reader :retries
|
94
|
+
attr_reader :reqtimeout
|
95
|
+
attr_reader :reqretries
|
94
96
|
|
95
97
|
# :startdoc:
|
96
98
|
|
@@ -191,12 +193,6 @@ module Sphinx
|
|
191
193
|
|
192
194
|
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
193
195
|
def initialize
|
194
|
-
# per-client-object settings
|
195
|
-
@host = 'localhost' # searchd host (default is "localhost")
|
196
|
-
@port = 3312 # searchd port (default is 3312)
|
197
|
-
@path = false
|
198
|
-
@socket = false
|
199
|
-
|
200
196
|
# per-query settings
|
201
197
|
@offset = 0 # how many records to seek from result-set start (default is 0)
|
202
198
|
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
@@ -231,56 +227,204 @@ module Sphinx
|
|
231
227
|
@reqs = [] # requests storage (for multi-query case)
|
232
228
|
@mbenc = '' # stored mbstring encoding
|
233
229
|
@timeout = 0 # connect timeout
|
230
|
+
@retries = 1 # number of connect retries in case of emergency
|
231
|
+
@reqtimeout = 0 # request timeout
|
232
|
+
@reqretries = 1 # number of request retries in case of emergency
|
233
|
+
|
234
|
+
# per-client-object settings
|
235
|
+
# searchd servers list
|
236
|
+
@servers = [Sphinx::Server.new(self, 'localhost', 3312, false)].freeze
|
237
|
+
@lastserver = -1
|
234
238
|
end
|
235
239
|
|
236
|
-
#
|
240
|
+
# Returns last error message, as a string, in human readable format. If there
|
241
|
+
# were no errors during the previous API call, empty string is returned.
|
242
|
+
#
|
243
|
+
# You should call it when any other function (such as +Query+) fails (typically,
|
244
|
+
# the failing function returns false). The returned string will contain the
|
245
|
+
# error description.
|
246
|
+
#
|
247
|
+
# The error message is not reset by this call; so you can safely call it
|
248
|
+
# several times if needed.
|
249
|
+
#
|
237
250
|
def GetLastError
|
238
251
|
@error
|
239
252
|
end
|
240
253
|
|
241
|
-
#
|
254
|
+
# Returns last warning message, as a string, in human readable format. If there
|
255
|
+
# were no warnings during the previous API call, empty string is returned.
|
256
|
+
#
|
257
|
+
# You should call it to verify whether your request (such as +Query+) was
|
258
|
+
# completed but with warnings. For instance, search query against a distributed
|
259
|
+
# index might complete succesfully even if several remote agents timed out.
|
260
|
+
# In that case, a warning message would be produced.
|
261
|
+
#
|
262
|
+
# The warning message is not reset by this call; so you can safely call it
|
263
|
+
# several times if needed.
|
264
|
+
#
|
242
265
|
def GetLastWarning
|
243
266
|
@warning
|
244
267
|
end
|
245
268
|
|
246
|
-
#
|
247
|
-
# searchd
|
269
|
+
# Checks whether the last error was a network error on API side, or a
|
270
|
+
# remote error reported by searchd. Returns true if the last connection
|
271
|
+
# attempt to searchd failed on API side, false otherwise (if the error
|
272
|
+
# was remote, or there were no connection attempts at all).
|
273
|
+
#
|
248
274
|
def IsConnectError
|
249
|
-
@connerror
|
275
|
+
@connerror || false
|
250
276
|
end
|
251
277
|
|
252
|
-
#
|
278
|
+
# Sets searchd host name and TCP port. All subsequent requests will
|
279
|
+
# use the new host and port settings. Default +host+ and +port+ are
|
280
|
+
# 'localhost' and 3312, respectively.
|
281
|
+
#
|
282
|
+
# Also, you can specify an absolute path to Sphinx's UNIX socket as +host+,
|
283
|
+
# in this case pass port as +0+ or +nil+.
|
284
|
+
#
|
253
285
|
def SetServer(host, port)
|
254
|
-
|
286
|
+
raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
|
255
287
|
|
288
|
+
path = nil
|
289
|
+
# Check if UNIX socket should be used
|
256
290
|
if host[0] == ?/
|
257
|
-
|
258
|
-
return
|
291
|
+
path = host
|
259
292
|
elsif host[0, 7] == 'unix://'
|
260
|
-
|
293
|
+
path = host[7..-1]
|
294
|
+
else
|
295
|
+
raise ArgumentError, '"port" argument must be Integer' unless port.respond_to?(:integer?) and port.integer?
|
261
296
|
end
|
297
|
+
|
298
|
+
host = port = nil unless path.nil?
|
299
|
+
|
300
|
+
@servers = [Sphinx::Server.new(self, host, port, path)].freeze
|
301
|
+
end
|
302
|
+
|
303
|
+
# Sets the list of searchd servers. Each subsequent request will use next
|
304
|
+
# server in list (round-robin). In case of one server failure, request could
|
305
|
+
# be retried on another server (see +SetConnectTimeout+ and +SetRequestTimeout+).
|
306
|
+
#
|
307
|
+
# Method accepts an +Array+ of +Hash+es, each of them should have :host
|
308
|
+
# and :port (to connect to searchd through network) or :path (an absolute path
|
309
|
+
# to UNIX socket) specified.
|
310
|
+
#
|
311
|
+
def SetServers(servers)
|
312
|
+
raise ArgumentError, '"servers" argument must be Array' unless servers.kind_of?(Array)
|
313
|
+
raise ArgumentError, '"servers" argument must be not empty' if servers.empty?
|
262
314
|
|
263
|
-
|
315
|
+
@servers = servers.map do |server|
|
316
|
+
raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash)
|
317
|
+
|
318
|
+
host = server[:path] || server['path'] || server[:host] || server['host']
|
319
|
+
port = server[:port] || server['port']
|
320
|
+
path = nil
|
321
|
+
raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
|
322
|
+
|
323
|
+
# Check if UNIX socket should be used
|
324
|
+
if host[0] == ?/
|
325
|
+
path = host
|
326
|
+
elsif host[0, 7] == 'unix://'
|
327
|
+
path = host[7..-1]
|
328
|
+
else
|
329
|
+
raise ArgumentError, '"port" argument must be Integer' unless port.respond_to?(:integer?) and port.integer?
|
330
|
+
end
|
264
331
|
|
265
|
-
|
266
|
-
|
332
|
+
host = port = nil unless path.nil?
|
333
|
+
|
334
|
+
Sphinx::Server.new(self, host, port, path)
|
335
|
+
end.freeze
|
267
336
|
end
|
268
337
|
|
269
|
-
|
270
|
-
|
338
|
+
# Sets the time allowed to spend connecting to the server before giving up
|
339
|
+
# and number of retries to perform.
|
340
|
+
#
|
341
|
+
# In the event of a failure to connect, an appropriate error code should
|
342
|
+
# be returned back to the application in order for application-level error
|
343
|
+
# handling to advise the user.
|
344
|
+
#
|
345
|
+
# When multiple servers configured through +SetServers+ method, and +retries+
|
346
|
+
# number is greater than 1, library will try to connect to another server.
|
347
|
+
# In case of single server configured, it will try to reconnect +retries+
|
348
|
+
# times.
|
349
|
+
#
|
350
|
+
# Please note, this timeout will only be used for connection establishing, not
|
351
|
+
# for regular API requests.
|
352
|
+
#
|
353
|
+
def SetConnectTimeout(timeout, retries = 1)
|
354
|
+
raise ArgumentError, '"timeout" argument must be Integer' unless timeout.respond_to?(:integer?) and timeout.integer?
|
355
|
+
raise ArgumentError, '"retries" argument must be Integer' unless retries.respond_to?(:integer?) and retries.integer?
|
356
|
+
raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
|
271
357
|
|
272
358
|
@timeout = timeout
|
359
|
+
@retries = retries
|
360
|
+
end
|
361
|
+
|
362
|
+
# Sets the time allowed to spend performing request to the server before giving up
|
363
|
+
# and number of retries to perform.
|
364
|
+
#
|
365
|
+
# In the event of a failure to do request, an appropriate error code should
|
366
|
+
# be returned back to the application in order for application-level error
|
367
|
+
# handling to advise the user.
|
368
|
+
#
|
369
|
+
# When multiple servers configured through +SetServers+ method, and +retries+
|
370
|
+
# number is greater than 1, library will try to do another try with this server
|
371
|
+
# (with full reconnect). If connection would fail, behavior depends on
|
372
|
+
# +SetConnectTimeout+ settings.
|
373
|
+
#
|
374
|
+
# Please note, this timeout will only be used for request performing, not
|
375
|
+
# for connection establishing.
|
376
|
+
#
|
377
|
+
def SetRequestTimeout(timeout, retries = 1)
|
378
|
+
raise ArgumentError, '"timeout" argument must be Integer' unless timeout.respond_to?(:integer?) and timeout.integer?
|
379
|
+
raise ArgumentError, '"retries" argument must be Integer' unless retries.respond_to?(:integer?) and retries.integer?
|
380
|
+
raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
|
381
|
+
|
382
|
+
@reqtimeout = timeout
|
383
|
+
@reqretries = retries
|
273
384
|
end
|
274
385
|
|
275
|
-
#
|
276
|
-
#
|
386
|
+
# Sets offset into server-side result set (+offset+) and amount of matches to
|
387
|
+
# return to client starting from that offset (+limit+). Can additionally control
|
388
|
+
# maximum server-side result set size for current query (+max_matches+) and the
|
389
|
+
# threshold amount of matches to stop searching at (+cutoff+). All parameters
|
390
|
+
# must be non-negative integers.
|
391
|
+
#
|
392
|
+
# First two parameters to +SetLimits+ are identical in behavior to MySQL LIMIT
|
393
|
+
# clause. They instruct searchd to return at most +limit+ matches starting from
|
394
|
+
# match number +offset+. The default offset and limit settings are +0+ and +20+,
|
395
|
+
# that is, to return first +20+ matches.
|
396
|
+
#
|
397
|
+
# +max_matches+ setting controls how much matches searchd will keep in RAM
|
398
|
+
# while searching. All matching documents will be normally processed, ranked,
|
399
|
+
# filtered, and sorted even if max_matches is set to +1+. But only best +N+
|
400
|
+
# documents are stored in memory at any given moment for performance and RAM
|
401
|
+
# usage reasons, and this setting controls that N. Note that there are two
|
402
|
+
# places where max_matches limit is enforced. Per-query limit is controlled
|
403
|
+
# by this API call, but there also is per-server limit controlled by +max_matches+
|
404
|
+
# setting in the config file. To prevent RAM usage abuse, server will not
|
405
|
+
# allow to set per-query limit higher than the per-server limit.
|
406
|
+
#
|
407
|
+
# You can't retrieve more than +max_matches+ matches to the client application.
|
408
|
+
# The default limit is set to +1000+. Normally, you must not have to go over
|
409
|
+
# this limit. One thousand records is enough to present to the end user.
|
410
|
+
# And if you're thinking about pulling the results to application for further
|
411
|
+
# sorting or filtering, that would be much more efficient if performed on
|
412
|
+
# Sphinx side.
|
413
|
+
#
|
414
|
+
# +cutoff+ setting is intended for advanced performance control. It tells
|
415
|
+
# searchd to forcibly stop search query once $cutoff matches had been found
|
416
|
+
# and processed.
|
417
|
+
#
|
277
418
|
def SetLimits(offset, limit, max = 0, cutoff = 0)
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
419
|
+
raise ArgumentError, '"offset" argument must be Integer' unless offset.respond_to?(:integer?) and offset.integer?
|
420
|
+
raise ArgumentError, '"limit" argument must be Integer' unless limit.respond_to?(:integer?) and limit.integer?
|
421
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.respond_to?(:integer?) and max.integer?
|
422
|
+
raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.respond_to?(:integer?) and cutoff.integer?
|
423
|
+
|
424
|
+
raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
|
425
|
+
raise ArgumentError, '"limit" argument should be greater to zero' unless limit > 0
|
426
|
+
raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
|
427
|
+
raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0
|
284
428
|
|
285
429
|
@offset = offset
|
286
430
|
@limit = limit
|
@@ -288,51 +432,92 @@ module Sphinx
|
|
288
432
|
@cutoff = cutoff if cutoff > 0
|
289
433
|
end
|
290
434
|
|
291
|
-
#
|
292
|
-
# integer
|
435
|
+
# Sets maximum search query time, in milliseconds. Parameter must be a
|
436
|
+
# non-negative integer. Default valus is +0+ which means "do not limit".
|
437
|
+
#
|
438
|
+
# Similar to +cutoff+ setting from +SetLimits+, but limits elapsed query
|
439
|
+
# time instead of processed matches count. Local search queries will be
|
440
|
+
# stopped once that much time has elapsed. Note that if you're performing
|
441
|
+
# a search which queries several local indexes, this limit applies to each
|
442
|
+
# index separately.
|
443
|
+
#
|
293
444
|
def SetMaxQueryTime(max)
|
294
|
-
|
295
|
-
|
445
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.respond_to?(:integer?) and max.integer?
|
446
|
+
raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
|
447
|
+
|
296
448
|
@maxquerytime = max
|
297
449
|
end
|
298
450
|
|
299
|
-
#
|
451
|
+
# Sets full-text query matching mode.
|
452
|
+
#
|
453
|
+
# Parameter must be a +Fixnum+ constant specifying one of the known modes
|
454
|
+
# (+SPH_MATCH_ALL+, +SPH_MATCH_ANY+, etc), +String+ with identifier (<tt>"all"</tt>,
|
455
|
+
# <tt>"any"</tt>, etc), or a +Symbol+ (<tt>:all</tt>, <tt>:any</tt>, etc).
|
456
|
+
#
|
457
|
+
# Corresponding sections in Sphinx reference manual:
|
458
|
+
# * {Section 4.1, "Matching modes"}[http://www.sphinxsearch.com/docs/current.html#matching-modes] for details.
|
459
|
+
# * {Section 6.3.1, "SetMatchMode"}[http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode] for details.
|
460
|
+
#
|
300
461
|
def SetMatchMode(mode)
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
462
|
+
case mode
|
463
|
+
when String, Symbol
|
464
|
+
begin
|
465
|
+
mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}")
|
466
|
+
rescue NameError
|
467
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
|
468
|
+
end
|
469
|
+
when Fixnum
|
470
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode)
|
471
|
+
else
|
472
|
+
raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
|
473
|
+
end
|
308
474
|
|
309
475
|
@mode = mode
|
310
476
|
end
|
311
477
|
|
312
478
|
# Set ranking mode.
|
479
|
+
#
|
480
|
+
# You can specify ranking mode as String ("proximity_bm25", "bm25", etc),
|
481
|
+
# Symbol (:proximity_bm25, :bm25, etc), or
|
482
|
+
# Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc).
|
313
483
|
def SetRankingMode(ranker)
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
484
|
+
case ranker
|
485
|
+
when String, Symbol
|
486
|
+
begin
|
487
|
+
ranker = self.class.const_get("SPH_RANK_#{ranker.to_s.upcase}")
|
488
|
+
rescue NameError
|
489
|
+
raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid"
|
490
|
+
end
|
491
|
+
when Fixnum
|
492
|
+
raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_SPH04).include?(ranker)
|
493
|
+
else
|
494
|
+
raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol'
|
495
|
+
end
|
322
496
|
|
323
497
|
@ranker = ranker
|
324
498
|
end
|
325
499
|
|
326
500
|
# Set matches sorting mode.
|
501
|
+
#
|
502
|
+
# You can specify sorting mode as String ("relevance", "attr_desc", etc),
|
503
|
+
# Symbol (:relevance, :attr_desc, etc), or
|
504
|
+
# Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc).
|
327
505
|
def SetSortMode(mode, sortby = '')
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
506
|
+
case mode
|
507
|
+
when String, Symbol
|
508
|
+
begin
|
509
|
+
mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}")
|
510
|
+
rescue NameError
|
511
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
|
512
|
+
end
|
513
|
+
when Fixnum
|
514
|
+
raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode)
|
515
|
+
else
|
516
|
+
raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
|
517
|
+
end
|
518
|
+
|
519
|
+
raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String)
|
520
|
+
raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty?
|
336
521
|
|
337
522
|
@sort = mode
|
338
523
|
@sortby = sortby
|
@@ -342,9 +527,9 @@ module Sphinx
|
|
342
527
|
#
|
343
528
|
# DEPRECATED; use SetFieldWeights() instead.
|
344
529
|
def SetWeights(weights)
|
345
|
-
|
530
|
+
raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array)
|
346
531
|
weights.each do |weight|
|
347
|
-
|
532
|
+
raise ArgumentError, '"weights" argument must be Array of integers' unless weight.respond_to?(:integer?) and weight.integer?
|
348
533
|
end
|
349
534
|
|
350
535
|
@weights = weights
|
@@ -352,15 +537,16 @@ module Sphinx
|
|
352
537
|
|
353
538
|
# Bind per-field weights by name.
|
354
539
|
#
|
355
|
-
# Takes string (field name) to integer
|
540
|
+
# Takes string (field name) to integer (field weight) hash as an argument.
|
356
541
|
# * Takes precedence over SetWeights().
|
357
542
|
# * Unknown names will be silently ignored.
|
358
543
|
# * Unbound fields will be silently given a weight of 1.
|
359
544
|
def SetFieldWeights(weights)
|
360
|
-
|
545
|
+
raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
|
361
546
|
weights.each do |name, weight|
|
362
|
-
|
363
|
-
|
547
|
+
unless (name.kind_of?(String) or name.kind_of?(Symbol)) and (weight.respond_to?(:integer?) and weight.integer?)
|
548
|
+
raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
|
549
|
+
end
|
364
550
|
end
|
365
551
|
|
366
552
|
@fieldweights = weights
|
@@ -368,10 +554,11 @@ module Sphinx
|
|
368
554
|
|
369
555
|
# Bind per-index weights by name.
|
370
556
|
def SetIndexWeights(weights)
|
371
|
-
|
557
|
+
raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
|
372
558
|
weights.each do |index, weight|
|
373
|
-
|
374
|
-
|
559
|
+
unless (index.kind_of?(String) or index.kind_of?(Symbol)) and (weight.respond_to?(:integer?) and weight.integer?)
|
560
|
+
raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
|
561
|
+
end
|
375
562
|
end
|
376
563
|
|
377
564
|
@indexweights = weights
|
@@ -381,9 +568,9 @@ module Sphinx
|
|
381
568
|
#
|
382
569
|
# Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
|
383
570
|
def SetIDRange(min, max)
|
384
|
-
|
385
|
-
|
386
|
-
|
571
|
+
raise ArgumentError, '"min" argument must be Integer' unless min.respond_to?(:integer?) and min.integer?
|
572
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.respond_to?(:integer?) and max.integer?
|
573
|
+
raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
|
387
574
|
|
388
575
|
@min_id = min
|
389
576
|
@max_id = max
|
@@ -394,17 +581,16 @@ module Sphinx
|
|
394
581
|
# Only match those records where <tt>attribute</tt> column values
|
395
582
|
# are in specified set.
|
396
583
|
def SetFilter(attribute, values, exclude = false)
|
397
|
-
|
398
|
-
|
399
|
-
|
584
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
585
|
+
raise ArgumentError, '"values" argument must be Array' unless values.kind_of?(Array)
|
586
|
+
raise ArgumentError, '"values" argument must not be empty' if values.empty?
|
587
|
+
raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
|
400
588
|
|
401
|
-
|
402
|
-
values.
|
403
|
-
assert { value.instance_of? Fixnum }
|
404
|
-
end
|
405
|
-
|
406
|
-
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
589
|
+
values.each do |value|
|
590
|
+
raise ArgumentError, '"values" argument must be Array of Integer' unless value.respond_to?(:integer?) and value.integer?
|
407
591
|
end
|
592
|
+
|
593
|
+
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values }
|
408
594
|
end
|
409
595
|
|
410
596
|
# Set range filter.
|
@@ -412,12 +598,13 @@ module Sphinx
|
|
412
598
|
# Only match those records where <tt>attribute</tt> column value
|
413
599
|
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
414
600
|
def SetFilterRange(attribute, min, max, exclude = false)
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
601
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
602
|
+
raise ArgumentError, '"min" argument must be Integer' unless min.respond_to?(:integer?) and min.integer?
|
603
|
+
raise ArgumentError, '"max" argument must be Integer' unless max.respond_to?(:integer?) and max.integer?
|
604
|
+
raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
|
605
|
+
raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
|
419
606
|
|
420
|
-
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
607
|
+
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max }
|
421
608
|
end
|
422
609
|
|
423
610
|
# Set float range filter.
|
@@ -425,12 +612,13 @@ module Sphinx
|
|
425
612
|
# Only match those records where <tt>attribute</tt> column value
|
426
613
|
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
427
614
|
def SetFilterFloatRange(attribute, min, max, exclude = false)
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
615
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
616
|
+
raise ArgumentError, '"min" argument must be Float or Integer' unless min.kind_of?(Float) or (min.respond_to?(:integer?) and min.integer?)
|
617
|
+
raise ArgumentError, '"max" argument must be Float or Integer' unless max.kind_of?(Float) or (max.respond_to?(:integer?) and max.integer?)
|
618
|
+
raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
|
619
|
+
raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
|
432
620
|
|
433
|
-
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
621
|
+
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f }
|
434
622
|
end
|
435
623
|
|
436
624
|
# Setup anchor point for geosphere distance calculations.
|
@@ -444,12 +632,12 @@ module Sphinx
|
|
444
632
|
# * <tt>lat</tt> -- is anchor point latitude, in radians
|
445
633
|
# * <tt>long</tt> -- is anchor point longitude, in radians
|
446
634
|
def SetGeoAnchor(attrlat, attrlong, lat, long)
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
635
|
+
raise ArgumentError, '"attrlat" argument must be String or Symbol' unless attrlat.kind_of?(String) or attrlat.kind_of?(Symbol)
|
636
|
+
raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol)
|
637
|
+
raise ArgumentError, '"lat" argument must be Float or Integer' unless lat.kind_of?(Float) or (lat.respond_to?(:integer?) and lat.integer?)
|
638
|
+
raise ArgumentError, '"long" argument must be Float or Integer' unless long.kind_of?(Float) or (long.respond_to?(:integer?) and long.integer?)
|
451
639
|
|
452
|
-
@anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
|
640
|
+
@anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f }
|
453
641
|
end
|
454
642
|
|
455
643
|
# Set grouping attribute and function.
|
@@ -489,60 +677,167 @@ module Sphinx
|
|
489
677
|
# matches published, with day number and per-day match count attached,
|
490
678
|
# and sorted by day number in descending order (ie. recent days first).
|
491
679
|
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
680
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
681
|
+
raise ArgumentError, '"groupsort" argument must be String' unless groupsort.kind_of?(String)
|
682
|
+
|
683
|
+
case func
|
684
|
+
when String, Symbol
|
685
|
+
begin
|
686
|
+
func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}")
|
687
|
+
rescue NameError
|
688
|
+
raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid"
|
689
|
+
end
|
690
|
+
when Fixnum
|
691
|
+
raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func)
|
692
|
+
else
|
693
|
+
raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol'
|
694
|
+
end
|
500
695
|
|
501
|
-
@groupby = attribute
|
696
|
+
@groupby = attribute.to_s
|
502
697
|
@groupfunc = func
|
503
698
|
@groupsort = groupsort
|
504
699
|
end
|
505
700
|
|
506
701
|
# Set count-distinct attribute for group-by queries.
|
507
702
|
def SetGroupDistinct(attribute)
|
508
|
-
|
509
|
-
|
703
|
+
raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
|
704
|
+
|
705
|
+
@groupdistinct = attribute.to_s
|
510
706
|
end
|
511
707
|
|
512
|
-
#
|
708
|
+
# Sets distributed retry count and delay.
|
709
|
+
#
|
710
|
+
# On temporary failures searchd will attempt up to +count+ retries per
|
711
|
+
# agent. +delay+ is the delay between the retries, in milliseconds. Retries
|
712
|
+
# are disabled by default. Note that this call will not make the API itself
|
713
|
+
# retry on temporary failure; it only tells searchd to do so. Currently,
|
714
|
+
# the list of temporary failures includes all kinds of +connect+
|
715
|
+
# failures and maxed out (too busy) remote agents.
|
716
|
+
#
|
513
717
|
def SetRetries(count, delay = 0)
|
514
|
-
|
515
|
-
|
718
|
+
raise ArgumentError, '"count" argument must be Integer' unless count.respond_to?(:integer?) and count.integer?
|
719
|
+
raise ArgumentError, '"delay" argument must be Integer' unless delay.respond_to?(:integer?) and delay.integer?
|
516
720
|
|
517
721
|
@retrycount = count
|
518
722
|
@retrydelay = delay
|
519
723
|
end
|
520
724
|
|
521
|
-
#
|
725
|
+
# Sets temporary (per-query) per-document attribute value overrides. Only
|
726
|
+
# supports scalar attributes. +values+ must be a +Hash+ that maps document
|
727
|
+
# IDs to overridden attribute values.
|
728
|
+
#
|
729
|
+
# Override feature lets you "temporary" update attribute values for some
|
730
|
+
# documents within a single query, leaving all other queries unaffected.
|
731
|
+
# This might be useful for personalized data. For example, assume you're
|
732
|
+
# implementing a personalized search function that wants to boost the posts
|
733
|
+
# that the user's friends recommend. Such data is not just dynamic, but
|
734
|
+
# also personal; so you can't simply put it in the index because you don't
|
735
|
+
# want everyone's searches affected. Overrides, on the other hand, are local
|
736
|
+
# to a single query and invisible to everyone else. So you can, say, setup
|
737
|
+
# a "friends_weight" value for every document, defaulting to 0, then
|
738
|
+
# temporary override it with 1 for documents 123, 456 and 789 (recommended
|
739
|
+
# by exactly the friends of current user), and use that value when ranking.
|
522
740
|
#
|
523
|
-
# There can be only one override per attribute.
|
524
|
-
# +values+ must be a hash that maps document IDs to attribute values.
|
525
741
|
def SetOverride(attrname, attrtype, values)
|
526
|
-
|
527
|
-
|
528
|
-
|
742
|
+
raise ArgumentError, '"attrname" argument must be String or Symbol' unless attrname.kind_of?(String) or attrname.kind_of?(Symbol)
|
743
|
+
|
744
|
+
case attrtype
|
745
|
+
when String, Symbol
|
746
|
+
begin
|
747
|
+
attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}")
|
748
|
+
rescue NameError
|
749
|
+
raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid"
|
750
|
+
end
|
751
|
+
when Fixnum
|
752
|
+
raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype)
|
753
|
+
else
|
754
|
+
raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol'
|
755
|
+
end
|
529
756
|
|
530
|
-
|
757
|
+
raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
|
758
|
+
|
759
|
+
values.each do |id, value|
|
760
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.respond_to?(:integer?) and id.integer?
|
761
|
+
case attrtype
|
762
|
+
when SPH_ATTR_TIMESTAMP
|
763
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless (value.respond_to?(:integer?) and value.integer?) or value.kind_of?(Time)
|
764
|
+
when SPH_ATTR_FLOAT
|
765
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Float or Integer' unless value.kind_of?(Float) or (value.respond_to?(:integer?) and value.integer?)
|
766
|
+
else
|
767
|
+
# SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT
|
768
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.respond_to?(:integer?) and value.integer?
|
769
|
+
end
|
770
|
+
end
|
771
|
+
|
772
|
+
@overrides << { 'attr' => attrname.to_s, 'type' => attrtype, 'values' => values }
|
531
773
|
end
|
532
774
|
|
533
|
-
#
|
775
|
+
# Sets the select clause, listing specific attributes to fetch, and
|
776
|
+
# expressions to compute and fetch. Clause syntax mimics SQL.
|
777
|
+
#
|
778
|
+
# +SetSelect+ is very similar to the part of a typical SQL query between
|
779
|
+
# +SELECT+ and +FROM+. It lets you choose what attributes (columns) to
|
780
|
+
# fetch, and also what expressions over the columns to compute and fetch.
|
781
|
+
# A certain difference from SQL is that expressions must always be aliased
|
782
|
+
# to a correct identifier (consisting of letters and digits) using +AS+
|
783
|
+
# keyword. SQL also lets you do that but does not require to. Sphinx enforces
|
784
|
+
# aliases so that the computation results can always be returned under a
|
785
|
+
#{ }"normal" name in the result set, used in other clauses, etc.
|
786
|
+
#
|
787
|
+
# Everything else is basically identical to SQL. Star ('*') is supported.
|
788
|
+
# Functions are supported. Arbitrary amount of expressions is supported.
|
789
|
+
# Computed expressions can be used for sorting, filtering, and grouping,
|
790
|
+
# just as the regular attributes.
|
791
|
+
#
|
792
|
+
# Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
|
793
|
+
# <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported when using
|
794
|
+
# <tt>GROUP BY</tt>.
|
795
|
+
#
|
796
|
+
# Expression sorting (Section 4.5, “SPH_SORT_EXPR mode”) and geodistance
|
797
|
+
# functions (+SetGeoAnchor+) are now internally implemented
|
798
|
+
# using this computed expressions mechanism, using magic names '<tt>@expr</tt>'
|
799
|
+
# and '<tt>@geodist</tt>' respectively.
|
800
|
+
#
|
801
|
+
# Usage example:
|
802
|
+
#
|
803
|
+
# sphinx.SetSelect('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight')
|
804
|
+
# sphinx.SetSelect('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40')
|
805
|
+
# sphinx.SetSelect('*, AVG(price) AS avgprice')
|
806
|
+
#
|
534
807
|
def SetSelect(select)
|
535
|
-
|
808
|
+
raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String)
|
809
|
+
|
536
810
|
@select = select
|
537
811
|
end
|
538
812
|
|
539
|
-
#
|
813
|
+
# Clears all currently set filters.
|
814
|
+
#
|
815
|
+
# This call is only normally required when using multi-queries. You might want
|
816
|
+
# to set different filters for different queries in the batch. To do that,
|
817
|
+
# you should call +ResetFilters+ and add new filters using the respective calls.
|
818
|
+
#
|
819
|
+
# Usage example:
|
820
|
+
#
|
821
|
+
# sphinx.ResetFilters
|
822
|
+
#
|
540
823
|
def ResetFilters
|
541
824
|
@filters = []
|
542
825
|
@anchor = []
|
543
826
|
end
|
544
827
|
|
545
|
-
#
|
828
|
+
# Clears all currently group-by settings, and disables group-by.
|
829
|
+
#
|
830
|
+
# This call is only normally required when using multi-queries. You can
|
831
|
+
# change individual group-by settings using +SetGroupBy+ and +SetGroupDistinct+
|
832
|
+
# calls, but you can not disable group-by using those calls. +ResetGroupBy+
|
833
|
+
# fully resets previous group-by settings and disables group-by mode in the
|
834
|
+
# current state, so that subsequent +AddQuery+ calls can perform non-grouping
|
835
|
+
# searches.
|
836
|
+
#
|
837
|
+
# Usage example:
|
838
|
+
#
|
839
|
+
# sphinx.ResetGroupBy
|
840
|
+
#
|
546
841
|
def ResetGroupBy
|
547
842
|
@groupby = ''
|
548
843
|
@groupfunc = SPH_GROUPBY_DAY
|
@@ -582,7 +877,6 @@ module Sphinx
|
|
582
877
|
# * <tt>'time'</tt> -- search time
|
583
878
|
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
584
879
|
def Query(query, index = '*', comment = '')
|
585
|
-
assert { @reqs.empty? }
|
586
880
|
@reqs = []
|
587
881
|
|
588
882
|
self.AddQuery(query, index, comment)
|
@@ -667,7 +961,7 @@ module Sphinx
|
|
667
961
|
# per-index weights
|
668
962
|
request.put_int @indexweights.length
|
669
963
|
@indexweights.each do |idx, weight|
|
670
|
-
request.put_string idx
|
964
|
+
request.put_string idx.to_s
|
671
965
|
request.put_int weight
|
672
966
|
end
|
673
967
|
|
@@ -677,7 +971,7 @@ module Sphinx
|
|
677
971
|
# per-field weights
|
678
972
|
request.put_int @fieldweights.length
|
679
973
|
@fieldweights.each do |field, weight|
|
680
|
-
request.put_string field
|
974
|
+
request.put_string field.to_s
|
681
975
|
request.put_int weight
|
682
976
|
end
|
683
977
|
|
@@ -690,17 +984,14 @@ module Sphinx
|
|
690
984
|
request.put_string entry['attr']
|
691
985
|
request.put_int entry['type'], entry['values'].size
|
692
986
|
entry['values'].each do |id, val|
|
693
|
-
assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
|
694
|
-
assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
|
695
|
-
|
696
987
|
request.put_int64 id
|
697
988
|
case entry['type']
|
698
989
|
when SPH_ATTR_FLOAT
|
699
|
-
request.put_float val
|
990
|
+
request.put_float val.to_f
|
700
991
|
when SPH_ATTR_BIGINT
|
701
|
-
request.put_int64 val
|
992
|
+
request.put_int64 val.to_i
|
702
993
|
else
|
703
|
-
request.put_int val
|
994
|
+
request.put_int val.to_i
|
704
995
|
end
|
705
996
|
end
|
706
997
|
end
|
@@ -723,6 +1014,7 @@ module Sphinx
|
|
723
1014
|
#
|
724
1015
|
# * <tt>'error'</tt> -- search error for this query
|
725
1016
|
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
1017
|
+
#
|
726
1018
|
def RunQueries
|
727
1019
|
if @reqs.empty?
|
728
1020
|
@error = 'No queries defined, issue AddQuery() first'
|
@@ -732,7 +1024,7 @@ module Sphinx
|
|
732
1024
|
req = @reqs.join('')
|
733
1025
|
nreqs = @reqs.length
|
734
1026
|
@reqs = []
|
735
|
-
response =
|
1027
|
+
response = perform_request(:search, req, nreqs)
|
736
1028
|
|
737
1029
|
# parse response
|
738
1030
|
begin
|
@@ -868,23 +1160,28 @@ module Sphinx
|
|
868
1160
|
#
|
869
1161
|
# Returns false on failure.
|
870
1162
|
# Returns an array of string excerpts on success.
|
1163
|
+
#
|
871
1164
|
def BuildExcerpts(docs, index, words, opts = {})
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
1165
|
+
raise ArgumentError, '"docs" argument must be Array' unless docs.kind_of?(Array)
|
1166
|
+
raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
|
1167
|
+
raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String)
|
1168
|
+
raise ArgumentError, '"opts" argument must be Hash' unless opts.kind_of?(Hash)
|
1169
|
+
|
1170
|
+
docs.each do |doc|
|
1171
|
+
raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String)
|
1172
|
+
end
|
876
1173
|
|
877
1174
|
# fixup options
|
878
|
-
opts['before_match']
|
879
|
-
opts['after_match']
|
880
|
-
opts['chunk_separator'] ||= ' ... ';
|
881
|
-
opts['limit']
|
882
|
-
opts['around']
|
883
|
-
opts['exact_phrase']
|
884
|
-
opts['single_passage']
|
885
|
-
opts['use_boundaries']
|
886
|
-
opts['weight_order']
|
887
|
-
opts['query_mode']
|
1175
|
+
opts['before_match'] ||= opts[:before_match] || '<b>';
|
1176
|
+
opts['after_match'] ||= opts[:after_match] || '</b>';
|
1177
|
+
opts['chunk_separator'] ||= opts[:chunk_separator] || ' ... ';
|
1178
|
+
opts['limit'] ||= opts[:limit] || 256;
|
1179
|
+
opts['around'] ||= opts[:around] || 5;
|
1180
|
+
opts['exact_phrase'] ||= opts[:exact_phrase] || false
|
1181
|
+
opts['single_passage'] ||= opts[:single_passage] || false
|
1182
|
+
opts['use_boundaries'] ||= opts[:use_boundaries] || false
|
1183
|
+
opts['weight_order'] ||= opts[:weight_order] || false
|
1184
|
+
opts['query_mode'] ||= opts[:query_mode] || false
|
888
1185
|
|
889
1186
|
# build request
|
890
1187
|
|
@@ -899,7 +1196,7 @@ module Sphinx
|
|
899
1196
|
request = Request.new
|
900
1197
|
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
901
1198
|
# req index
|
902
|
-
request.put_string index
|
1199
|
+
request.put_string index.to_s
|
903
1200
|
# req words
|
904
1201
|
request.put_string words
|
905
1202
|
|
@@ -911,13 +1208,9 @@ module Sphinx
|
|
911
1208
|
|
912
1209
|
# documents
|
913
1210
|
request.put_int docs.size
|
914
|
-
docs
|
915
|
-
assert { doc.instance_of? String }
|
916
|
-
|
917
|
-
request.put_string doc
|
918
|
-
end
|
1211
|
+
request.put_string(*docs)
|
919
1212
|
|
920
|
-
response =
|
1213
|
+
response = perform_request(:excerpt, request)
|
921
1214
|
|
922
1215
|
# parse response
|
923
1216
|
begin
|
@@ -936,9 +1229,9 @@ module Sphinx
|
|
936
1229
|
#
|
937
1230
|
# Returns an array of words on success.
|
938
1231
|
def BuildKeywords(query, index, hits)
|
939
|
-
|
940
|
-
|
941
|
-
|
1232
|
+
raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String)
|
1233
|
+
raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
|
1234
|
+
raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass)
|
942
1235
|
|
943
1236
|
# build request
|
944
1237
|
request = Request.new
|
@@ -947,7 +1240,7 @@ module Sphinx
|
|
947
1240
|
request.put_string index # req index
|
948
1241
|
request.put_int hits ? 1 : 0
|
949
1242
|
|
950
|
-
response =
|
1243
|
+
response = perform_request(:keywords, request)
|
951
1244
|
|
952
1245
|
# parse response
|
953
1246
|
begin
|
@@ -983,27 +1276,31 @@ module Sphinx
|
|
983
1276
|
#
|
984
1277
|
# Usage example:
|
985
1278
|
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
|
1279
|
+
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [[456, 789]] }, true)
|
1280
|
+
#
|
986
1281
|
def UpdateAttributes(index, attrs, values, mva = false)
|
987
1282
|
# verify everything
|
988
|
-
|
989
|
-
|
1283
|
+
raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
|
1284
|
+
raise ArgumentError, '"mva" argument must be Boolean' unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass)
|
990
1285
|
|
991
|
-
|
1286
|
+
raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array)
|
992
1287
|
attrs.each do |attr|
|
993
|
-
|
1288
|
+
raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol)
|
994
1289
|
end
|
995
1290
|
|
996
|
-
|
1291
|
+
raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
|
997
1292
|
values.each do |id, entry|
|
998
|
-
|
999
|
-
|
1000
|
-
|
1293
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.respond_to?(:integer?) and id.integer?
|
1294
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array)
|
1295
|
+
raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length
|
1001
1296
|
entry.each do |v|
|
1002
1297
|
if mva
|
1003
|
-
|
1004
|
-
v.each
|
1298
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array)
|
1299
|
+
v.each do |vv|
|
1300
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.respond_to?(:integer?) and vv.integer?
|
1301
|
+
end
|
1005
1302
|
else
|
1006
|
-
|
1303
|
+
raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.respond_to?(:integer?) and v.integer?
|
1007
1304
|
end
|
1008
1305
|
end
|
1009
1306
|
end
|
@@ -1028,7 +1325,7 @@ module Sphinx
|
|
1028
1325
|
end
|
1029
1326
|
end
|
1030
1327
|
|
1031
|
-
response =
|
1328
|
+
response = perform_request(:update, request)
|
1032
1329
|
|
1033
1330
|
# parse response
|
1034
1331
|
begin
|
@@ -1041,35 +1338,57 @@ module Sphinx
|
|
1041
1338
|
|
1042
1339
|
# persistent connections
|
1043
1340
|
|
1341
|
+
# Opens persistent connection to the server.
|
1342
|
+
#
|
1044
1343
|
def Open
|
1045
|
-
|
1046
|
-
@error = '
|
1344
|
+
if @servers.size > 1
|
1345
|
+
@error = 'too many servers. persistent socket allowed only for a single server.'
|
1047
1346
|
return false
|
1048
1347
|
end
|
1049
1348
|
|
1349
|
+
if @servers.first.persistent?
|
1350
|
+
@error = 'already connected'
|
1351
|
+
return false;
|
1352
|
+
end
|
1353
|
+
|
1050
1354
|
request = Request.new
|
1051
1355
|
request.put_int(1)
|
1052
|
-
|
1356
|
+
|
1357
|
+
perform_request(:persist, request, nil) do |server, socket|
|
1358
|
+
server.make_persistent!(socket)
|
1359
|
+
end
|
1053
1360
|
|
1054
1361
|
true
|
1055
1362
|
end
|
1056
1363
|
|
1364
|
+
# Closes previously opened persistent connection.
|
1365
|
+
#
|
1057
1366
|
def Close
|
1058
|
-
if @
|
1367
|
+
if @servers.size > 1
|
1368
|
+
@error = 'too many servers. persistent socket allowed only for a single server.'
|
1369
|
+
return false
|
1370
|
+
end
|
1371
|
+
|
1372
|
+
unless @servers.first.persistent?
|
1059
1373
|
@error = 'not connected'
|
1060
1374
|
return false;
|
1061
1375
|
end
|
1062
1376
|
|
1063
|
-
@
|
1064
|
-
@socket = false
|
1065
|
-
|
1066
|
-
true
|
1377
|
+
@servers.first.close_persistent!
|
1067
1378
|
end
|
1068
1379
|
|
1380
|
+
# Queries searchd status, and returns an array of status variable name
|
1381
|
+
# and value pairs.
|
1382
|
+
#
|
1383
|
+
# Usage example:
|
1384
|
+
#
|
1385
|
+
# status = sphinx.Status
|
1386
|
+
# puts status.map { |key, value| "#{key.rjust(20)}: #{value}" }
|
1387
|
+
#
|
1069
1388
|
def Status
|
1070
1389
|
request = Request.new
|
1071
1390
|
request.put_int(1)
|
1072
|
-
response =
|
1391
|
+
response = perform_request(:status, request)
|
1073
1392
|
|
1074
1393
|
# parse response
|
1075
1394
|
begin
|
@@ -1092,7 +1411,7 @@ module Sphinx
|
|
1092
1411
|
|
1093
1412
|
def FlushAttrs
|
1094
1413
|
request = Request.new
|
1095
|
-
response =
|
1414
|
+
response = perform_request(:flushattrs, request)
|
1096
1415
|
|
1097
1416
|
# parse response
|
1098
1417
|
begin
|
@@ -1103,74 +1422,76 @@ module Sphinx
|
|
1103
1422
|
end
|
1104
1423
|
|
1105
1424
|
protected
|
1106
|
-
|
1107
|
-
# Connect
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1425
|
+
|
1426
|
+
# Connect, send query, get response.
|
1427
|
+
#
|
1428
|
+
# Use this method to communicate with Sphinx server. It ensures connection
|
1429
|
+
# will be instantiated properly, all headers will be generated properly, etc.
|
1430
|
+
#
|
1431
|
+
# Parameters:
|
1432
|
+
# * +command+ -- searchd command to perform (<tt>:search</tt>, <tt>:excerpt</tt>,
|
1433
|
+
# <tt>:update</tt>, <tt>:keywords</tt>, <tt>:persist</tt>, <tt>:status</tt>,
|
1434
|
+
# <tt>:query</tt>, <tt>:flushattrs</tt>. See <tt>SEARCHD_COMMAND_*</tt> for details).
|
1435
|
+
# * +request+ -- an instance of <tt>Sphinx::Request</tt> class. Contains request body.
|
1436
|
+
# * +additional+ -- additional integer data to be placed between header and body.
|
1437
|
+
# * +block+ -- if given, response will not be parsed, plain socket will be
|
1438
|
+
# passed instead. this is special mode used for persistent connections,
|
1439
|
+
# do not use for other tasks.
|
1440
|
+
#
|
1441
|
+
def perform_request(command, request, additional = nil, &block)
|
1442
|
+
with_server do |server|
|
1443
|
+
cmd = command.to_s.upcase
|
1444
|
+
command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}")
|
1445
|
+
command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}")
|
1127
1446
|
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1447
|
+
with_socket(server) do |socket|
|
1448
|
+
len = request.to_s.length + (additional.nil? ? 0 : 4)
|
1449
|
+
header = [command_id, command_ver, len].pack('nnN')
|
1450
|
+
header << [additional].pack('N') unless additional.nil?
|
1451
|
+
|
1452
|
+
socket.write(header + request.to_s)
|
1453
|
+
|
1454
|
+
if block_given?
|
1455
|
+
yield server, socket
|
1456
|
+
else
|
1457
|
+
parse_response(socket, command_ver)
|
1458
|
+
end
|
1459
|
+
end
|
1139
1460
|
end
|
1140
|
-
|
1141
|
-
sock
|
1142
1461
|
end
|
1143
|
-
|
1144
|
-
#
|
1145
|
-
|
1462
|
+
|
1463
|
+
# This is internal method which gets and parses response packet from
|
1464
|
+
# searchd server.
|
1465
|
+
#
|
1466
|
+
# There are several exceptions which could be thrown in this method:
|
1467
|
+
#
|
1468
|
+
# * various network errors -- should be handled by caller (see +with_socket+).
|
1469
|
+
# * +SphinxResponseError+ -- incomplete reply from searchd.
|
1470
|
+
# * +SphinxInternalError+ -- searchd error.
|
1471
|
+
# * +SphinxTemporaryError+ -- temporary searchd error.
|
1472
|
+
# * +SphinxUnknownError+ -- unknows searchd error.
|
1473
|
+
#
|
1474
|
+
# Method returns an instance of <tt>Sphinx::Response</tt> class, which
|
1475
|
+
# could be used for context-based parsing of reply from the server.
|
1476
|
+
#
|
1477
|
+
def parse_response(socket, client_version)
|
1146
1478
|
response = ''
|
1147
|
-
len = 0
|
1479
|
+
status = ver = len = 0
|
1148
1480
|
|
1149
|
-
|
1481
|
+
# Read server reply from server. All exceptions are handled by +with_socket+.
|
1482
|
+
header = socket.read(8)
|
1150
1483
|
if header.length == 8
|
1151
1484
|
status, ver, len = header.unpack('n2N')
|
1152
|
-
|
1153
|
-
while left > 0 do
|
1154
|
-
begin
|
1155
|
-
chunk = sock.recv(left)
|
1156
|
-
if chunk
|
1157
|
-
response << chunk
|
1158
|
-
left -= chunk.length
|
1159
|
-
end
|
1160
|
-
rescue EOFError
|
1161
|
-
break
|
1162
|
-
end
|
1163
|
-
end
|
1485
|
+
response = socket.read(len) if len > 0
|
1164
1486
|
end
|
1165
|
-
sock.close if @socket === false
|
1166
1487
|
|
1167
1488
|
# check response
|
1168
1489
|
read = response.length
|
1169
1490
|
if response.empty? or read != len.to_i
|
1170
|
-
|
1491
|
+
error = len > 0 \
|
1171
1492
|
? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
|
1172
1493
|
: 'received zero-sized searchd response'
|
1173
|
-
raise SphinxResponseError,
|
1494
|
+
raise SphinxResponseError, error
|
1174
1495
|
end
|
1175
1496
|
|
1176
1497
|
# check status
|
@@ -1181,18 +1502,18 @@ module Sphinx
|
|
1181
1502
|
end
|
1182
1503
|
|
1183
1504
|
if status == SEARCHD_ERROR
|
1184
|
-
|
1185
|
-
raise SphinxInternalError,
|
1505
|
+
error = 'searchd error: ' + response[4, response.length - 4]
|
1506
|
+
raise SphinxInternalError, error
|
1186
1507
|
end
|
1187
1508
|
|
1188
1509
|
if status == SEARCHD_RETRY
|
1189
|
-
|
1190
|
-
raise SphinxTemporaryError,
|
1510
|
+
error = 'temporary searchd error: ' + response[4, response.length - 4]
|
1511
|
+
raise SphinxTemporaryError, error
|
1191
1512
|
end
|
1192
1513
|
|
1193
1514
|
unless status == SEARCHD_OK
|
1194
|
-
|
1195
|
-
raise SphinxUnknownError,
|
1515
|
+
error = "unknown status code: '#{status}'"
|
1516
|
+
raise SphinxUnknownError, error
|
1196
1517
|
end
|
1197
1518
|
|
1198
1519
|
# check version
|
@@ -1201,30 +1522,111 @@ module Sphinx
|
|
1201
1522
|
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
1202
1523
|
end
|
1203
1524
|
|
1204
|
-
|
1525
|
+
Response.new(response)
|
1205
1526
|
end
|
1206
1527
|
|
1207
|
-
#
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1528
|
+
# This is internal method which selects next server (round-robin)
|
1529
|
+
# and yields it to the block passed.
|
1530
|
+
#
|
1531
|
+
# In case of connection error, it will try next server several times
|
1532
|
+
# (see +SetConnectionTimeout+ method details). If all servers are down,
|
1533
|
+
# it will set +error+ attribute value with the last exception message,
|
1534
|
+
# and <tt>connection_timeout?</tt> method will return true. Also,
|
1535
|
+
# +SphinxConnectErorr+ exception will be raised.
|
1536
|
+
#
|
1537
|
+
def with_server
|
1538
|
+
attempts = @retries
|
1539
|
+
begin
|
1540
|
+
# Get the next server
|
1541
|
+
@lastserver = (@lastserver + 1) % @servers.size
|
1542
|
+
server = @servers[@lastserver]
|
1543
|
+
yield server
|
1544
|
+
rescue SphinxConnectError => e
|
1545
|
+
# Connection error! Do we need to try it again?
|
1546
|
+
attempts -= 1
|
1547
|
+
retry if attempts > 0
|
1548
|
+
|
1549
|
+
# Re-raise original exception
|
1550
|
+
@error = e.message
|
1551
|
+
@connerror = true
|
1552
|
+
raise
|
1553
|
+
end
|
1222
1554
|
end
|
1223
1555
|
|
1224
|
-
#
|
1225
|
-
|
1226
|
-
|
1556
|
+
# This is internal method which retrieves socket for a given server,
|
1557
|
+
# initiates Sphinx session, and yields this socket to a block passed.
|
1558
|
+
#
|
1559
|
+
# In case of any problems with session initiation, +SphinxConnectError+
|
1560
|
+
# will be raised, because this is part of connection establishing. See
|
1561
|
+
# +with_server+ method details to get more infromation about how this
|
1562
|
+
# exception is handled.
|
1563
|
+
#
|
1564
|
+
# Socket retrieving routine is wrapped in a block with it's own
|
1565
|
+
# timeout value (see +SetConnectTimeout+). This is done in
|
1566
|
+
# <tt>Server#get_socket</tt> method, so check it for details.
|
1567
|
+
#
|
1568
|
+
# Request execution is wrapped with block with another timeout
|
1569
|
+
# (see +SetRequestTimeout+). This ensures no Sphinx request will
|
1570
|
+
# take unreasonable time.
|
1571
|
+
#
|
1572
|
+
# In case of any Sphinx error (incomplete reply, internal or temporary
|
1573
|
+
# error), connection to the server will be re-established, and request
|
1574
|
+
# will be retried (see +SetRequestTimeout+). Of course, if connection
|
1575
|
+
# could not be established, next server will be selected (see explanation
|
1576
|
+
# above).
|
1577
|
+
#
|
1578
|
+
def with_socket(server)
|
1579
|
+
attempts = @reqretries
|
1580
|
+
socket = nil
|
1581
|
+
|
1582
|
+
begin
|
1583
|
+
s = server.get_socket do |sock|
|
1584
|
+
# Remember socket to close it in case of emergency
|
1585
|
+
socket = sock
|
1586
|
+
|
1587
|
+
# send my version
|
1588
|
+
# this is a subtle part. we must do it before (!) reading back from searchd.
|
1589
|
+
# because otherwise under some conditions (reported on FreeBSD for instance)
|
1590
|
+
# TCP stack could throttle write-write-read pattern because of Nagle.
|
1591
|
+
sock.write([1].pack('N'))
|
1592
|
+
v = sock.read(4).unpack('N*').first
|
1593
|
+
|
1594
|
+
# Ouch, invalid protocol!
|
1595
|
+
if v < 1
|
1596
|
+
raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'"
|
1597
|
+
end
|
1598
|
+
end
|
1599
|
+
|
1600
|
+
Sphinx::safe_execute(@reqtimeout) do
|
1601
|
+
yield s
|
1602
|
+
end
|
1603
|
+
rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e
|
1604
|
+
# Ouch, communication problem, will be treated as a connection problem.
|
1605
|
+
raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})"
|
1606
|
+
rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e
|
1607
|
+
# EOFError should not occur in ideal world, because we compare response length
|
1608
|
+
# with a value passed by Sphinx. But we want to ensure that client will not
|
1609
|
+
# fail with unexpected error when Sphinx implementation has bugs, aren't we?
|
1610
|
+
if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error)
|
1611
|
+
new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})")
|
1612
|
+
new_e.set_backtrace(e.backtrace)
|
1613
|
+
e = new_e
|
1614
|
+
end
|
1615
|
+
|
1616
|
+
# Close previously opened socket (in case of it has been really opened)
|
1617
|
+
server.free_socket(socket)
|
1618
|
+
|
1619
|
+
# Request error! Do we need to try it again?
|
1620
|
+
attempts -= 1
|
1621
|
+
retry if attempts > 0
|
1622
|
+
|
1623
|
+
# Re-raise original exception
|
1624
|
+
@error = e.message
|
1625
|
+
raise e
|
1626
|
+
ensure
|
1627
|
+
# Close previously opened socket on any other error
|
1628
|
+
server.free_socket(socket)
|
1629
|
+
end
|
1227
1630
|
end
|
1228
|
-
# :startdoc:
|
1229
1631
|
end
|
1230
1632
|
end
|