sphinx 0.9.9.2117

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. data/.gitignore +4 -0
  2. data/README.rdoc +243 -0
  3. data/Rakefile +45 -0
  4. data/VERSION.yml +5 -0
  5. data/init.rb +1 -0
  6. data/lib/sphinx/buffered_io.rb +26 -0
  7. data/lib/sphinx/client.rb +2426 -0
  8. data/lib/sphinx/constants.rb +179 -0
  9. data/lib/sphinx/indifferent_access.rb +152 -0
  10. data/lib/sphinx/request.rb +121 -0
  11. data/lib/sphinx/response.rb +71 -0
  12. data/lib/sphinx/server.rb +170 -0
  13. data/lib/sphinx/timeout.rb +31 -0
  14. data/lib/sphinx.rb +51 -0
  15. data/spec/client_response_spec.rb +170 -0
  16. data/spec/client_spec.rb +669 -0
  17. data/spec/client_validations_spec.rb +859 -0
  18. data/spec/fixtures/default_search.php +8 -0
  19. data/spec/fixtures/default_search_index.php +8 -0
  20. data/spec/fixtures/excerpt_custom.php +11 -0
  21. data/spec/fixtures/excerpt_default.php +8 -0
  22. data/spec/fixtures/excerpt_flags.php +12 -0
  23. data/spec/fixtures/field_weights.php +9 -0
  24. data/spec/fixtures/filter.php +9 -0
  25. data/spec/fixtures/filter_exclude.php +9 -0
  26. data/spec/fixtures/filter_float_range.php +9 -0
  27. data/spec/fixtures/filter_float_range_exclude.php +9 -0
  28. data/spec/fixtures/filter_range.php +9 -0
  29. data/spec/fixtures/filter_range_exclude.php +9 -0
  30. data/spec/fixtures/filter_range_int64.php +10 -0
  31. data/spec/fixtures/filter_ranges.php +10 -0
  32. data/spec/fixtures/filters.php +10 -0
  33. data/spec/fixtures/filters_different.php +13 -0
  34. data/spec/fixtures/geo_anchor.php +9 -0
  35. data/spec/fixtures/group_by_attr.php +9 -0
  36. data/spec/fixtures/group_by_attrpair.php +9 -0
  37. data/spec/fixtures/group_by_day.php +9 -0
  38. data/spec/fixtures/group_by_day_sort.php +9 -0
  39. data/spec/fixtures/group_by_month.php +9 -0
  40. data/spec/fixtures/group_by_week.php +9 -0
  41. data/spec/fixtures/group_by_year.php +9 -0
  42. data/spec/fixtures/group_distinct.php +10 -0
  43. data/spec/fixtures/id_range.php +9 -0
  44. data/spec/fixtures/id_range64.php +9 -0
  45. data/spec/fixtures/index_weights.php +9 -0
  46. data/spec/fixtures/keywords.php +8 -0
  47. data/spec/fixtures/limits.php +9 -0
  48. data/spec/fixtures/limits_cutoff.php +9 -0
  49. data/spec/fixtures/limits_max.php +9 -0
  50. data/spec/fixtures/limits_max_cutoff.php +9 -0
  51. data/spec/fixtures/match_all.php +9 -0
  52. data/spec/fixtures/match_any.php +9 -0
  53. data/spec/fixtures/match_boolean.php +9 -0
  54. data/spec/fixtures/match_extended.php +9 -0
  55. data/spec/fixtures/match_extended2.php +9 -0
  56. data/spec/fixtures/match_fullscan.php +9 -0
  57. data/spec/fixtures/match_phrase.php +9 -0
  58. data/spec/fixtures/max_query_time.php +9 -0
  59. data/spec/fixtures/miltiple_queries.php +12 -0
  60. data/spec/fixtures/ranking_bm25.php +9 -0
  61. data/spec/fixtures/ranking_fieldmask.php +9 -0
  62. data/spec/fixtures/ranking_matchany.php +9 -0
  63. data/spec/fixtures/ranking_none.php +9 -0
  64. data/spec/fixtures/ranking_proximity.php +9 -0
  65. data/spec/fixtures/ranking_proximity_bm25.php +9 -0
  66. data/spec/fixtures/ranking_wordcount.php +9 -0
  67. data/spec/fixtures/retries.php +9 -0
  68. data/spec/fixtures/retries_delay.php +9 -0
  69. data/spec/fixtures/select.php +9 -0
  70. data/spec/fixtures/set_override.php +11 -0
  71. data/spec/fixtures/sort_attr_asc.php +9 -0
  72. data/spec/fixtures/sort_attr_desc.php +9 -0
  73. data/spec/fixtures/sort_expr.php +9 -0
  74. data/spec/fixtures/sort_extended.php +9 -0
  75. data/spec/fixtures/sort_relevance.php +9 -0
  76. data/spec/fixtures/sort_time_segments.php +9 -0
  77. data/spec/fixtures/sphinxapi.php +1633 -0
  78. data/spec/fixtures/update_attributes.php +8 -0
  79. data/spec/fixtures/update_attributes_mva.php +8 -0
  80. data/spec/fixtures/weights.php +9 -0
  81. data/spec/spec_helper.rb +24 -0
  82. data/spec/sphinx/sphinx-id64.conf +67 -0
  83. data/spec/sphinx/sphinx.conf +67 -0
  84. data/spec/sphinx/sphinx_test.sql +88 -0
  85. data/sphinx.gemspec +127 -0
  86. metadata +142 -0
@@ -0,0 +1,2426 @@
1
+ module Sphinx
2
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
3
+ # daemon and perform requests.
4
+ #
5
+ # @example
6
+ # sphinx = Sphinx::Client.new
7
+ # result = sphinx.query('test')
8
+ # ids = result['matches'].map { |match| match['id'] }
9
+ # posts = Post.all :conditions => { :id => ids },
10
+ # :order => "FIELD(id,#{ids.join(',')})"
11
+ #
12
+ # docs = posts.map(&:body)
13
+ # excerpts = sphinx.build_excerpts(docs, 'index', 'test')
14
+ #
15
+ class Client
16
+ include Sphinx::Constants
17
+
18
+ #=================================================================
19
+ # Some internal attributes to use inside client API
20
+ #=================================================================
21
+
22
+ # List of searchd servers to connect to.
23
+ # @private
24
+ attr_reader :servers
25
+ # Connection timeout in seconds.
26
+ # @private
27
+ attr_reader :timeout
28
+ # Number of connection retries.
29
+ # @private
30
+ attr_reader :retries
31
+ # Request timeout in seconds.
32
+ # @private
33
+ attr_reader :reqtimeout
34
+ # Number of request retries.
35
+ # @private
36
+ attr_reader :reqretries
37
+ # Log debug/info/warn to the given Logger, defaults to nil.
38
+ # @private
39
+ attr_reader :logger
40
+
41
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options
42
+ # to their default values.
43
+ #
44
+ # @param [Logger] logger a logger object to put logs to. No logging
45
+ # will be performed when not set.
46
+ #
47
+ def initialize(logger = nil)
48
+ # per-query settings
49
+ @offset = 0 # how many records to seek from result-set start (default is 0)
50
+ @limit = 20 # how many records to return from result-set starting at offset (default is 20)
51
+ @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
52
+ @weights = [] # per-field weights (default is 1 for all fields)
53
+ @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
54
+ @sortby = '' # attribute to sort by (defualt is "")
55
+ @min_id = 0 # min ID to match (default is 0, which means no limit)
56
+ @max_id = 0 # max ID to match (default is 0, which means no limit)
57
+ @filters = [] # search filters
58
+ @groupby = '' # group-by attribute name
59
+ @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
60
+ @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
61
+ @groupdistinct = '' # group-by count-distinct attribute
62
+ @maxmatches = 1000 # max matches to retrieve
63
+ @cutoff = 0 # cutoff to stop searching at (default is 0)
64
+ @retrycount = 0 # distributed retries count
65
+ @retrydelay = 0 # distributed retries delay
66
+ @anchor = [] # geographical anchor point
67
+ @indexweights = [] # per-index weights
68
+ @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
69
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
70
+ @fieldweights = {} # per-field-name weights
71
+ @overrides = [] # per-query attribute values overrides
72
+ @select = '*' # select-list (attributes or expressions, with optional aliases)
73
+
74
+ # per-reply fields (for single-query case)
75
+ @error = '' # last error message
76
+ @warning = '' # last warning message
77
+ @connerror = false # connection error vs remote error flag
78
+
79
+ @reqs = [] # requests storage (for multi-query case)
80
+ @mbenc = '' # stored mbstring encoding
81
+ @timeout = 0 # connect timeout
82
+ @retries = 1 # number of connect retries in case of emergency
83
+ @reqtimeout = 0 # request timeout
84
+ @reqretries = 1 # number of request retries in case of emergency
85
+
86
+ # per-client-object settings
87
+ # searchd servers list
88
+ @servers = [Sphinx::Server.new(self, 'localhost', 9312, false)].freeze
89
+ @logger = logger
90
+
91
+ logger.info { "[sphinx] version: #{VERSION}, #{@servers.inspect}" } if logger
92
+ end
93
+
94
+ # Returns a string representation of the sphinx client object.
95
+ #
96
+ def inspect
97
+ params = {
98
+ :error => @error,
99
+ :warning => @warning,
100
+ :connect_error => @connerror,
101
+ :servers => @servers,
102
+ :connect_timeout => { :timeout => @timeout, :retries => @retries },
103
+ :request_timeout => { :timeout => @reqtimeout, :retries => @reqretries },
104
+ :retries => { :count => @retrycount, :delay => @retrydelay },
105
+ :limits => { :offset => @offset, :limit => @limit, :max => @maxmatches, :cutoff => @cutoff },
106
+ :max_query_time => @maxquerytime,
107
+ :overrides => @overrides,
108
+ :select => @select,
109
+ :match_mode => @mode,
110
+ :ranking_mode => @ranker,
111
+ :sort_mode => { :mode => @sort, :sortby => @sortby },
112
+ :weights => @weights,
113
+ :field_weights => @fieldweights,
114
+ :index_weights => @indexweights,
115
+ :id_range => { :min => @min_id, :max => @max_id },
116
+ :filters => @filters,
117
+ :geo_anchor => @anchor,
118
+ :group_by => { :attribute => @groupby, :func => @groupfunc, :sort => @groupsort },
119
+ :group_distinct => @groupdistinct
120
+ }
121
+
122
+ "<Sphinx::Client: %d servers, params: %s>" %
123
+ [@servers.length, params.inspect]
124
+ end
125
+
126
+ #=================================================================
127
+ # General API functions
128
+ #=================================================================
129
+
130
+ # Returns last error message, as a string, in human readable format. If there
131
+ # were no errors during the previous API call, empty string is returned.
132
+ #
133
+ # You should call it when any other function (such as {#query}) fails (typically,
134
+ # the failing function returns false). The returned string will contain the
135
+ # error description.
136
+ #
137
+ # The error message is not reset by this call; so you can safely call it
138
+ # several times if needed.
139
+ #
140
+ # @return [String] last error message.
141
+ #
142
+ # @example
143
+ # puts sphinx.last_error
144
+ #
145
+ # @see #last_warning
146
+ # @see #connect_error?
147
+ #
148
+ def last_error
149
+ @error
150
+ end
151
+ alias :GetLastError :last_error
152
+
153
+ # Returns last warning message, as a string, in human readable format. If there
154
+ # were no warnings during the previous API call, empty string is returned.
155
+ #
156
+ # You should call it to verify whether your request (such as {#query}) was
157
+ # completed but with warnings. For instance, search query against a distributed
158
+ # index might complete succesfully even if several remote agents timed out.
159
+ # In that case, a warning message would be produced.
160
+ #
161
+ # The warning message is not reset by this call; so you can safely call it
162
+ # several times if needed.
163
+ #
164
+ # @return [String] last warning message.
165
+ #
166
+ # @example
167
+ # puts sphinx.last_warning
168
+ #
169
+ # @see #last_error
170
+ # @see #connect_error?
171
+ #
172
+ def last_warning
173
+ @warning
174
+ end
175
+ alias :GetLastWarning :last_warning
176
+
177
+ # Checks whether the last error was a network error on API side, or a
178
+ # remote error reported by searchd. Returns true if the last connection
179
+ # attempt to searchd failed on API side, false otherwise (if the error
180
+ # was remote, or there were no connection attempts at all).
181
+ #
182
+ # @return [Boolean] the value indicating whether last error was a
183
+ # nework error on API side.
184
+ #
185
+ # @example
186
+ # puts "Connection failed!" if sphinx.connect_error?
187
+ #
188
+ # @see #last_error
189
+ # @see #last_warning
190
+ #
191
+ def connect_error?
192
+ @connerror || false
193
+ end
194
+ alias :IsConnectError :connect_error?
195
+
196
+ # Sets searchd host name and TCP port. All subsequent requests will
197
+ # use the new host and port settings. Default +host+ and +port+ are
198
+ # 'localhost' and 9312, respectively.
199
+ #
200
+ # Also, you can specify an absolute path to Sphinx's UNIX socket as +host+,
201
+ # in this case pass port as +0+ or +nil+.
202
+ #
203
+ # @param [String] host the searchd host name or UNIX socket absolute path.
204
+ # @param [Integer] port the searchd port name (could be any if UNIX
205
+ # socket path specified).
206
+ # @return [Sphinx::Client] self.
207
+ #
208
+ # @example
209
+ # sphinx.set_server('localhost', 9312)
210
+ # sphinx.set_server('/opt/sphinx/var/run/sphinx.sock')
211
+ #
212
+ # @raise [ArgumentError] Occurred when parameters are invalid.
213
+ # @see #set_servers
214
+ # @see #set_connect_timeout
215
+ # @see #set_request_timeout
216
+ #
217
+ def set_server(host, port = 9312)
218
+ raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
219
+
220
+ path = nil
221
+ # Check if UNIX socket should be used
222
+ if host[0] == ?/
223
+ path = host
224
+ elsif host[0, 7] == 'unix://'
225
+ path = host[7..-1]
226
+ else
227
+ raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
228
+ end
229
+
230
+ host = port = nil unless path.nil?
231
+
232
+ @servers = [Sphinx::Server.new(self, host, port, path)].freeze
233
+ logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
234
+ self
235
+ end
236
+ alias :SetServer :set_server
237
+
238
+ # Sets the list of searchd servers. Each subsequent request will use next
239
+ # server in list (round-robin). In case of one server failure, request could
240
+ # be retried on another server (see {#set_connect_timeout} and
241
+ # {#set_request_timeout}).
242
+ #
243
+ # Method accepts an +Array+ of +Hash+es, each of them should have <tt>:host</tt>
244
+ # and <tt>:port</tt> (to connect to searchd through network) or <tt>:path</tt>
245
+ # (an absolute path to UNIX socket) specified.
246
+ #
247
+ # @param [Array<Hash>] servers an +Array+ of +Hash+ objects with servers parameters.
248
+ # @option servers [String] :host the searchd host name or UNIX socket absolute path.
249
+ # @option servers [String] :path the searchd UNIX socket absolute path.
250
+ # @option servers [Integer] :port (9312) the searchd port name (skiped when UNIX
251
+ # socket path specified)
252
+ # @return [Sphinx::Client] self.
253
+ #
254
+ # @example
255
+ # sphinx.set_servers([
256
+ # { :host => 'browse01.local' }, # default port is 9312
257
+ # { :host => 'browse02.local', :port => 9312 },
258
+ # { :path => '/opt/sphinx/var/run/sphinx.sock' }
259
+ # ])
260
+ #
261
+ # @raise [ArgumentError] Occurred when parameters are invalid.
262
+ # @see #set_server
263
+ # @see #set_connect_timeout
264
+ # @see #set_request_timeout
265
+ #
266
+ def set_servers(servers)
267
+ raise ArgumentError, '"servers" argument must be Array' unless servers.kind_of?(Array)
268
+ raise ArgumentError, '"servers" argument must be not empty' if servers.empty?
269
+
270
+ @servers = servers.map do |server|
271
+ raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash)
272
+
273
+ server = server.with_indifferent_access
274
+
275
+ host = server[:path] || server[:host]
276
+ port = server[:port] || 9312
277
+ path = nil
278
+ raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
279
+
280
+ # Check if UNIX socket should be used
281
+ if host[0] == ?/
282
+ path = host
283
+ elsif host[0, 7] == 'unix://'
284
+ path = host[7..-1]
285
+ else
286
+ raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
287
+ end
288
+
289
+ host = port = nil unless path.nil?
290
+
291
+ Sphinx::Server.new(self, host, port, path)
292
+ end.freeze
293
+ logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
294
+ self
295
+ end
296
+ alias :SetServers :set_servers
297
+
298
+ # Sets the time allowed to spend connecting to the server before giving up
299
+ # and number of retries to perform.
300
+ #
301
+ # In the event of a failure to connect, an appropriate error code should
302
+ # be returned back to the application in order for application-level error
303
+ # handling to advise the user.
304
+ #
305
+ # When multiple servers configured through {#set_servers} method, and +retries+
306
+ # number is greater than 1, library will try to connect to another server.
307
+ # In case of single server configured, it will try to reconnect +retries+
308
+ # times.
309
+ #
310
+ # Please note, this timeout will only be used for connection establishing, not
311
+ # for regular API requests.
312
+ #
313
+ # @param [Integer] timeout a connection timeout in seconds.
314
+ # @param [Integer] retries number of connect retries.
315
+ # @return [Sphinx::Client] self.
316
+ #
317
+ # @example Set connection timeout to 1 second and number of retries to 5
318
+ # sphinx.set_connect_timeout(1, 5)
319
+ #
320
+ # @raise [ArgumentError] Occurred when parameters are invalid.
321
+ # @see #set_server
322
+ # @see #set_servers
323
+ # @see #set_request_timeout
324
+ #
325
+ def set_connect_timeout(timeout, retries = 1)
326
+ raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
327
+ raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
328
+ raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
329
+
330
+ @timeout = timeout
331
+ @retries = retries
332
+ self
333
+ end
334
+ alias :SetConnectTimeout :set_connect_timeout
335
+
336
+ # Sets the time allowed to spend performing request to the server before giving up
337
+ # and number of retries to perform.
338
+ #
339
+ # In the event of a failure to do request, an appropriate error code should
340
+ # be returned back to the application in order for application-level error
341
+ # handling to advise the user.
342
+ #
343
+ # When multiple servers configured through {#set_servers} method, and +retries+
344
+ # number is greater than 1, library will try to do another try with this server
345
+ # (with full reconnect). If connection would fail, behavior depends on
346
+ # {#set_connect_timeout} settings.
347
+ #
348
+ # Please note, this timeout will only be used for request performing, not
349
+ # for connection establishing.
350
+ #
351
+ # @param [Integer] timeout a request timeout in seconds.
352
+ # @param [Integer] retries number of request retries.
353
+ # @return [Sphinx::Client] self.
354
+ #
355
+ # @example Set request timeout to 1 second and number of retries to 5
356
+ # sphinx.set_request_timeout(1, 5)
357
+ #
358
+ # @raise [ArgumentError] Occurred when parameters are invalid.
359
+ # @see #set_server
360
+ # @see #set_servers
361
+ # @see #set_connect_timeout
362
+ #
363
+ def set_request_timeout(timeout, retries = 1)
364
+ raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
365
+ raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
366
+ raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
367
+
368
+ @reqtimeout = timeout
369
+ @reqretries = retries
370
+ self
371
+ end
372
+ alias :SetRequestTimeout :set_request_timeout
373
+
374
+ # Sets distributed retry count and delay.
375
+ #
376
+ # On temporary failures searchd will attempt up to +count+ retries
377
+ # per agent. +delay+ is the delay between the retries, in milliseconds.
378
+ # Retries are disabled by default. Note that this call will not make
379
+ # the API itself retry on temporary failure; it only tells searchd
380
+ # to do so. Currently, the list of temporary failures includes all
381
+ # kinds of connection failures and maxed out (too busy) remote agents.
382
+ #
383
+ # @param [Integer] count a number of retries to perform.
384
+ # @param [Integer] delay a delay between the retries.
385
+ # @return [Sphinx::Client] self.
386
+ #
387
+ # @example Perform 5 retries with 200 ms between them
388
+ # sphinx.set_retries(5, 200)
389
+ #
390
+ # @raise [ArgumentError] Occurred when parameters are invalid.
391
+ # @see #set_connect_timeout
392
+ # @see #set_request_timeout
393
+ #
394
+ def set_retries(count, delay = 0)
395
+ raise ArgumentError, '"count" argument must be Integer' unless count.kind_of?(Integer)
396
+ raise ArgumentError, '"delay" argument must be Integer' unless delay.kind_of?(Integer)
397
+
398
+ @retrycount = count
399
+ @retrydelay = delay
400
+ self
401
+ end
402
+ alias :SetRetries :set_retries
403
+
404
+ #=================================================================
405
+ # General query settings
406
+ #=================================================================
407
+
408
+ # Sets offset into server-side result set (+offset+) and amount of matches to
409
+ # return to client starting from that offset (+limit+). Can additionally control
410
+ # maximum server-side result set size for current query (+max_matches+) and the
411
+ # threshold amount of matches to stop searching at (+cutoff+). All parameters
412
+ # must be non-negative integers.
413
+ #
414
+ # First two parameters to {#set_limits} are identical in behavior to MySQL LIMIT
415
+ # clause. They instruct searchd to return at most +limit+ matches starting from
416
+ # match number +offset+. The default offset and limit settings are +0+ and +20+,
417
+ # that is, to return first +20+ matches.
418
+ #
419
+ # +max_matches+ setting controls how much matches searchd will keep in RAM
420
+ # while searching. All matching documents will be normally processed, ranked,
421
+ # filtered, and sorted even if max_matches is set to +1+. But only best +N+
422
+ # documents are stored in memory at any given moment for performance and RAM
423
+ # usage reasons, and this setting controls that N. Note that there are two
424
+ # places where max_matches limit is enforced. Per-query limit is controlled
425
+ # by this API call, but there also is per-server limit controlled by +max_matches+
426
+ # setting in the config file. To prevent RAM usage abuse, server will not
427
+ # allow to set per-query limit higher than the per-server limit.
428
+ #
429
+ # You can't retrieve more than +max_matches+ matches to the client application.
430
+ # The default limit is set to +1000+. Normally, you must not have to go over
431
+ # this limit. One thousand records is enough to present to the end user.
432
+ # And if you're thinking about pulling the results to application for further
433
+ # sorting or filtering, that would be much more efficient if performed on
434
+ # Sphinx side.
435
+ #
436
+ # +cutoff+ setting is intended for advanced performance control. It tells
437
+ # searchd to forcibly stop search query once $cutoff matches had been found
438
+ # and processed.
439
+ #
440
+ # @param [Integer] offset an offset into server-side result set.
441
+ # @param [Integer] limit an amount of matches to return.
442
+ # @param [Integer] max a maximum server-side result set size.
443
+ # @param [Integer] cutoff a threshold amount of matches to stop searching at.
444
+ # @return [Sphinx::Client] self.
445
+ #
446
+ # @example
447
+ # sphinx.set_limits(100, 50, 1000, 5000)
448
+ #
449
+ # @raise [ArgumentError] Occurred when parameters are invalid.
450
+ #
451
+ def set_limits(offset, limit, max = 0, cutoff = 0)
452
+ raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer)
453
+ raise ArgumentError, '"limit" argument must be Integer' unless limit.kind_of?(Integer)
454
+ raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
455
+ raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.kind_of?(Integer)
456
+
457
+ raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
458
+ raise ArgumentError, '"limit" argument should be greater to zero' unless limit > 0
459
+ raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
460
+ raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0
461
+
462
+ @offset = offset
463
+ @limit = limit
464
+ @maxmatches = max if max > 0
465
+ @cutoff = cutoff if cutoff > 0
466
+ self
467
+ end
468
+ alias :SetLimits :set_limits
469
+
470
+ # Sets maximum search query time, in milliseconds. Parameter must be a
471
+ # non-negative integer. Default valus is +0+ which means "do not limit".
472
+ #
473
+ # Similar to +cutoff+ setting from {#set_limits}, but limits elapsed query
474
+ # time instead of processed matches count. Local search queries will be
475
+ # stopped once that much time has elapsed. Note that if you're performing
476
+ # a search which queries several local indexes, this limit applies to each
477
+ # index separately.
478
+ #
479
+ # @param [Integer] max maximum search query time in milliseconds.
480
+ # @return [Sphinx::Client] self.
481
+ #
482
+ # @example
483
+ # sphinx.set_max_query_time(200)
484
+ #
485
+ # @raise [ArgumentError] Occurred when parameters are invalid.
486
+ #
487
+ def set_max_query_time(max)
488
+ raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
489
+ raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
490
+
491
+ @maxquerytime = max
492
+ self
493
+ end
494
+ alias :SetMaxQueryTime :set_max_query_time
495
+
496
+ # Sets temporary (per-query) per-document attribute value overrides. Only
497
+ # supports scalar attributes. +values+ must be a +Hash+ that maps document
498
+ # IDs to overridden attribute values.
499
+ #
500
+ # Override feature lets you "temporary" update attribute values for some
501
+ # documents within a single query, leaving all other queries unaffected.
502
+ # This might be useful for personalized data. For example, assume you're
503
+ # implementing a personalized search function that wants to boost the posts
504
+ # that the user's friends recommend. Such data is not just dynamic, but
505
+ # also personal; so you can't simply put it in the index because you don't
506
+ # want everyone's searches affected. Overrides, on the other hand, are local
507
+ # to a single query and invisible to everyone else. So you can, say, setup
508
+ # a "friends_weight" value for every document, defaulting to 0, then
509
+ # temporary override it with 1 for documents 123, 456 and 789 (recommended
510
+ # by exactly the friends of current user), and use that value when ranking.
511
+ #
512
+ # You can specify attribute type as String ("integer", "float", etc),
513
+ # Symbol (:integer, :float, etc), or
514
+ # Fixnum constant (SPH_ATTR_INTEGER, SPH_ATTR_FLOAT, etc).
515
+ #
516
+ # @param [String, Symbol] attribute an attribute name to override values of.
517
+ # @param [Integer, String, Symbol] attrtype attribute type.
518
+ # @param [Hash] values a +Hash+ that maps document IDs to overridden attribute values.
519
+ # @return [Sphinx::Client] self.
520
+ #
521
+ # @example
522
+ # sphinx.set_override(:friends_weight, :integer, {123 => 1, 456 => 1, 789 => 1})
523
+ #
524
+ # @raise [ArgumentError] Occurred when parameters are invalid.
525
+ #
526
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setoverride Section 6.2.3, "SetOverride"
527
+ #
528
+ def set_override(attribute, attrtype, values)
529
+ raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
530
+
531
+ case attrtype
532
+ when String, Symbol
533
+ begin
534
+ attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}")
535
+ rescue NameError
536
+ raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid"
537
+ end
538
+ when Fixnum
539
+ raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype)
540
+ else
541
+ raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol'
542
+ end
543
+
544
+ raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
545
+
546
+ values.each do |id, value|
547
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.kind_of?(Integer)
548
+ case attrtype
549
+ when SPH_ATTR_TIMESTAMP
550
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Integer) or value.kind_of?(Time)
551
+ when SPH_ATTR_FLOAT
552
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Numeric)
553
+ else
554
+ # SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT
555
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.kind_of?(Integer)
556
+ end
557
+ end
558
+
559
+ @overrides << { 'attr' => attribute.to_s, 'type' => attrtype, 'values' => values }
560
+ self
561
+ end
562
+ alias :SetOverride :set_override
563
+
564
+ # Sets the select clause, listing specific attributes to fetch, and
565
+ # expressions to compute and fetch. Clause syntax mimics SQL.
566
+ #
567
+ # {#set_select} is very similar to the part of a typical SQL query between
568
+ # +SELECT+ and +FROM+. It lets you choose what attributes (columns) to
569
+ # fetch, and also what expressions over the columns to compute and fetch.
570
+ # A certain difference from SQL is that expressions must always be aliased
571
+ # to a correct identifier (consisting of letters and digits) using +AS+
572
+ # keyword. SQL also lets you do that but does not require to. Sphinx enforces
573
+ # aliases so that the computation results can always be returned under a
574
+ # "normal" name in the result set, used in other clauses, etc.
575
+ #
576
+ # Everything else is basically identical to SQL. Star ('*') is supported.
577
+ # Functions are supported. Arbitrary amount of expressions is supported.
578
+ # Computed expressions can be used for sorting, filtering, and grouping,
579
+ # just as the regular attributes.
580
+ #
581
+ # Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
582
+ # <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported when using
583
+ # <tt>GROUP BY</tt>.
584
+ #
585
+ # Expression sorting (Section 4.5, “SPH_SORT_EXPR mode”) and geodistance
586
+ # functions ({#set_geo_anchor}) are now internally implemented
587
+ # using this computed expressions mechanism, using magic names '<tt>@expr</tt>'
588
+ # and '<tt>@geodist</tt>' respectively.
589
+ #
590
+ # @param [String] select a select clause, listing specific attributes to fetch.
591
+ # @return [Sphinx::Client] self.
592
+ #
593
+ # @example
594
+ # sphinx.set_select('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight')
595
+ # sphinx.set_select('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40')
596
+ # sphinx.set_select('*, AVG(price) AS avgprice')
597
+ #
598
+ # @raise [ArgumentError] Occurred when parameters are invalid.
599
+ #
600
+ # @see http://www.sphinxsearch.com/docs/current.html#sort-expr Section 4.5, "SPH_SORT_EXPR mode"
601
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
602
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setselect Section 6.2.4, "SetSelect"
603
+ #
604
+ def set_select(select)
605
+ raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String)
606
+
607
+ @select = select
608
+ self
609
+ end
610
+ alias :SetSelect :set_select
611
+
612
+ #=================================================================
613
+ # Full-text search query settings
614
+ #=================================================================
615
+
616
+ # Sets full-text query matching mode.
617
+ #
618
+ # Parameter must be a +Fixnum+ constant specifying one of the known modes
619
+ # (+SPH_MATCH_ALL+, +SPH_MATCH_ANY+, etc), +String+ with identifier (<tt>"all"</tt>,
620
+ # <tt>"any"</tt>, etc), or a +Symbol+ (<tt>:all</tt>, <tt>:any</tt>, etc).
621
+ #
622
+ # @param [Integer, String, Symbol] mode full-text query matching mode.
623
+ # @return [Sphinx::Client] self.
624
+ #
625
+ # @example
626
+ # sphinx.set_match_mode(Sphinx::SPH_MATCH_ALL)
627
+ # sphinx.set_match_mode(:all)
628
+ # sphinx.set_match_mode('all')
629
+ #
630
+ # @raise [ArgumentError] Occurred when parameters are invalid.
631
+ #
632
+ # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
633
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
634
+ #
635
+ def set_match_mode(mode)
636
+ case mode
637
+ when String, Symbol
638
+ begin
639
+ mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}")
640
+ rescue NameError
641
+ raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
642
+ end
643
+ when Fixnum
644
+ raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode)
645
+ else
646
+ raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
647
+ end
648
+
649
+ @mode = mode
650
+ self
651
+ end
652
+ alias :SetMatchMode :set_match_mode
653
+
654
+ # Sets ranking mode. Only available in +SPH_MATCH_EXTENDED2+
655
+ # matching mode at the time of this writing. Parameter must be a
656
+ # constant specifying one of the known modes.
657
+ #
658
+ # You can specify ranking mode as String ("proximity_bm25", "bm25", etc),
659
+ # Symbol (:proximity_bm25, :bm25, etc), or
660
+ # Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc).
661
+ #
662
+ # @param [Integer, String, Symbol] ranker ranking mode.
663
+ # @return [Sphinx::Client] self.
664
+ #
665
+ # @example
666
+ # sphinx.set_ranking_mode(Sphinx::SPH_RANK_BM25)
667
+ # sphinx.set_ranking_mode(:bm25)
668
+ # sphinx.set_ranking_mode('bm25')
669
+ #
670
+ # @raise [ArgumentError] Occurred when parameters are invalid.
671
+ #
672
+ # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
673
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
674
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setrankingmode Section 6.3.2, "SetRankingMode"
675
+ #
676
+ def set_ranking_mode(ranker)
677
+ case ranker
678
+ when String, Symbol
679
+ begin
680
+ ranker = self.class.const_get("SPH_RANK_#{ranker.to_s.upcase}")
681
+ rescue NameError
682
+ raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid"
683
+ end
684
+ when Fixnum
685
+ raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_FIELDMASK).include?(ranker)
686
+ else
687
+ raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol'
688
+ end
689
+
690
+ @ranker = ranker
691
+ self
692
+ end
693
+ alias :SetRankingMode :set_ranking_mode
694
+
695
+ # Set matches sorting mode.
696
+ #
697
+ # You can specify sorting mode as String ("relevance", "attr_desc", etc),
698
+ # Symbol (:relevance, :attr_desc, etc), or
699
+ # Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc).
700
+ #
701
+ # @param [Integer, String, Symbol] mode matches sorting mode.
702
+ # @param [String] sortby sorting clause, with the syntax depending on
703
+ # specific mode. Should be specified unless sorting mode is
704
+ # +SPH_SORT_RELEVANCE+.
705
+ # @return [Sphinx::Client] self.
706
+ #
707
+ # @example
708
+ # sphinx.set_sort_mode(Sphinx::SPH_SORT_ATTR_ASC, 'attr')
709
+ # sphinx.set_sort_mode(:attr_asc, 'attr')
710
+ # sphinx.set_sort_mode('attr_asc', 'attr')
711
+ #
712
+ # @raise [ArgumentError] Occurred when parameters are invalid.
713
+ #
714
+ # @see http://www.sphinxsearch.com/docs/current.html#sorting-modes Section 4.5, "Sorting modes"
715
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setsortmode Section 6.3.3, "SetSortMode"
716
+ #
717
+ def set_sort_mode(mode, sortby = '')
718
+ case mode
719
+ when String, Symbol
720
+ begin
721
+ mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}")
722
+ rescue NameError
723
+ raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
724
+ end
725
+ when Fixnum
726
+ raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode)
727
+ else
728
+ raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
729
+ end
730
+
731
+ raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String)
732
+ raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty?
733
+
734
+ @sort = mode
735
+ @sortby = sortby
736
+ self
737
+ end
738
+ alias :SetSortMode :set_sort_mode
739
+
740
+ # Binds per-field weights in the order of appearance in the index.
741
+ #
742
+ # @param [Array<Integer>] weights an +Array+ of integer per-field weights.
743
+ # @return [Sphinx::Client] self.
744
+ #
745
+ # @example
746
+ # sphinx.set_weights([1, 3, 5])
747
+ #
748
+ # @raise [ArgumentError] Occurred when parameters are invalid.
749
+ #
750
+ # @deprecated Use {#set_field_weights} instead.
751
+ # @see #set_field_weights
752
+ #
753
+ def set_weights(weights)
754
+ raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array)
755
+ weights.each do |weight|
756
+ raise ArgumentError, '"weights" argument must be Array of integers' unless weight.kind_of?(Integer)
757
+ end
758
+
759
+ @weights = weights
760
+ self
761
+ end
762
+ alias :SetWeights :set_weights
763
+
764
+ # Binds per-field weights by name. Parameter must be a +Hash+
765
+ # mapping string field names to integer weights.
766
+ #
767
+ # Match ranking can be affected by per-field weights. For instance,
768
+ # see Section 4.4, "Weighting" for an explanation how phrase
769
+ # proximity ranking is affected. This call lets you specify what
770
+ # non-default weights to assign to different full-text fields.
771
+ #
772
+ # The weights must be positive 32-bit integers. The final weight
773
+ # will be a 32-bit integer too. Default weight value is 1. Unknown
774
+ # field names will be silently ignored.
775
+ #
776
+ # There is no enforced limit on the maximum weight value at the
777
+ # moment. However, beware that if you set it too high you can
778
+ # start hitting 32-bit wraparound issues. For instance, if
779
+ # you set a weight of 10,000,000 and search in extended mode,
780
+ # then maximum possible weight will be equal to 10 million (your
781
+ # weight) by 1 thousand (internal BM25 scaling factor, see
782
+ # Section 4.4, “Weighting”) by 1 or more (phrase proximity rank).
783
+ # The result is at least 10 billion that does not fit in 32 bits
784
+ # and will be wrapped around, producing unexpected results.
785
+ #
786
+ # @param [Hash] weights a +Hash+ mapping string field names to
787
+ # integer weights.
788
+ # @return [Sphinx::Client] self.
789
+ #
790
+ # @example
791
+ # sphinx.set_field_weights(:title => 20, :text => 10)
792
+ #
793
+ # @raise [ArgumentError] Occurred when parameters are invalid.
794
+ #
795
+ # @see http://www.sphinxsearch.com/docs/current.html#weighting Section 4.4, "Weighting"
796
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfieldweights Section 6.3.5, "SetFieldWeights"
797
+ #
798
+ def set_field_weights(weights)
799
+ raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
800
+ weights.each do |name, weight|
801
+ unless (name.kind_of?(String) or name.kind_of?(Symbol)) and weight.kind_of?(Integer)
802
+ raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
803
+ end
804
+ end
805
+
806
+ @fieldweights = weights
807
+ self
808
+ end
809
+ alias :SetFieldWeights :set_field_weights
810
+
811
+ # Sets per-index weights, and enables weighted summing of match
812
+ # weights across different indexes. Parameter must be a hash
813
+ # (associative array) mapping string index names to integer
814
+ # weights. Default is empty array that means to disable weighting
815
+ # summing.
816
+ #
817
+ # When a match with the same document ID is found in several
818
+ # different local indexes, by default Sphinx simply chooses the
819
+ # match from the index specified last in the query. This is to
820
+ # support searching through partially overlapping index partitions.
821
+ #
822
+ # However in some cases the indexes are not just partitions,
823
+ # and you might want to sum the weights across the indexes
824
+ # instead of picking one. {#set_index_weights} lets you do that.
825
+ # With summing enabled, final match weight in result set will be
826
+ # computed as a sum of match weight coming from the given index
827
+ # multiplied by respective per-index weight specified in this
828
+ # call. Ie. if the document 123 is found in index A with the
829
+ # weight of 2, and also in index B with the weight of 3, and
830
+ # you called {#set_index_weights} with <tt>{"A"=>100, "B"=>10}</tt>,
831
+ # the final weight return to the client will be 2*100+3*10 = 230.
832
+ #
833
+ # @param [Hash] weights a +Hash+ mapping string index names to
834
+ # integer weights.
835
+ # @return [Sphinx::Client] self.
836
+ #
837
+ # @example
838
+ # sphinx.set_field_weights(:fresh => 20, :archived => 10)
839
+ #
840
+ # @raise [ArgumentError] Occurred when parameters are invalid.
841
+ #
842
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setindexweights Section 6.3.6, "SetIndexWeights"
843
+ #
844
+ def set_index_weights(weights)
845
+ raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
846
+ weights.each do |index, weight|
847
+ unless (index.kind_of?(String) or index.kind_of?(Symbol)) and weight.kind_of?(Integer)
848
+ raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
849
+ end
850
+ end
851
+
852
+ @indexweights = weights
853
+ self
854
+ end
855
+ alias :SetIndexWeights :set_index_weights
856
+
857
+ #=================================================================
858
+ # Result set filtering settings
859
+ #=================================================================
860
+
861
+ # Sets an accepted range of document IDs. Parameters must be integers.
862
+ # Defaults are 0 and 0; that combination means to not limit by range.
863
+ #
864
+ # After this call, only those records that have document ID between
865
+ # +min+ and +max+ (including IDs exactly equal to +min+ or +max+)
866
+ # will be matched.
867
+ #
868
+ # @param [Integer] min min document ID.
869
+ # @param [Integer] min max document ID.
870
+ # @return [Sphinx::Client] self.
871
+ #
872
+ # @example
873
+ # sphinx.set_id_range(10, 1000)
874
+ #
875
+ # @raise [ArgumentError] Occurred when parameters are invalid.
876
+ #
877
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setidrange Section 6.4.1, "SetIDRange"
878
+ #
879
+ def set_id_range(min, max)
880
+ raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
881
+ raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
882
+ raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
883
+
884
+ @min_id = min
885
+ @max_id = max
886
+ self
887
+ end
888
+ alias :SetIDRange :set_id_range
889
+
890
+ # Adds new integer values set filter.
891
+ #
892
+ # On this call, additional new filter is added to the existing
893
+ # list of filters. $attribute must be a string with attribute
894
+ # name. +values+ must be a plain array containing integer
895
+ # values. +exclude+ must be a boolean value; it controls
896
+ # whether to accept the matching documents (default mode, when
897
+ # +exclude+ is +false+) or reject them.
898
+ #
899
+ # Only those documents where +attribute+ column value stored in
900
+ # the index matches any of the values from +values+ array will
901
+ # be matched (or rejected, if +exclude+ is +true+).
902
+ #
903
+ # @param [String, Symbol] attribute an attribute name to filter by.
904
+ # @param [Array<Integer>, Integer] values an +Array+ of integers or
905
+ # single Integer with given attribute values.
906
+ # @param [Boolean] exclude indicating whether documents with given attribute
907
+ # matching specified values should be excluded from search results.
908
+ # @return [Sphinx::Client] self.
909
+ #
910
+ # @example
911
+ # sphinx.set_filter(:group_id, [10, 15, 20])
912
+ # sphinx.set_filter(:group_id, [10, 15, 20], true)
913
+ #
914
+ # @raise [ArgumentError] Occurred when parameters are invalid.
915
+ #
916
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilter Section 6.4.2, "SetFilter"
917
+ # @see #set_filter_range
918
+ # @see #set_filter_float_range
919
+ #
920
+ def set_filter(attribute, values, exclude = false)
921
+ raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
922
+ values = [values] if values.kind_of?(Integer)
923
+ raise ArgumentError, '"values" argument must be Array' unless values.kind_of?(Array)
924
+ raise ArgumentError, '"values" argument must be Array of Integers' unless values.all? { |v| v.kind_of?(Integer) }
925
+ raise ArgumentError, '"exclude" argument must be Boolean' unless [TrueClass, FalseClass].include?(exclude.class)
926
+
927
+ if values.any?
928
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values }
929
+ end
930
+ self
931
+ end
932
+ alias :SetFilter :set_filter
933
+
934
+ # Adds new integer range filter.
935
+ #
936
+ # On this call, additional new filter is added to the existing
937
+ # list of filters. +attribute+ must be a string with attribute
938
+ # name. +min+ and +max+ must be integers that define the acceptable
939
+ # attribute values range (including the boundaries). +exclude+
940
+ # must be a boolean value; it controls whether to accept the
941
+ # matching documents (default mode, when +exclude+ is false) or
942
+ # reject them.
943
+ #
944
+ # Only those documents where +attribute+ column value stored
945
+ # in the index is between +min+ and +max+ (including values
946
+ # that are exactly equal to +min+ or +max+) will be matched
947
+ # (or rejected, if +exclude+ is true).
948
+ #
949
+ # @param [String, Symbol] attribute an attribute name to filter by.
950
+ # @param [Integer] min min value of the given attribute.
951
+ # @param [Integer] max max value of the given attribute.
952
+ # @param [Boolean] exclude indicating whether documents with given attribute
953
+ # matching specified boundaries should be excluded from search results.
954
+ # @return [Sphinx::Client] self.
955
+ #
956
+ # @example
957
+ # sphinx.set_filter_range(:group_id, 10, 20)
958
+ # sphinx.set_filter_range(:group_id, 10, 20, true)
959
+ #
960
+ # @raise [ArgumentError] Occurred when parameters are invalid.
961
+ #
962
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterrange Section 6.4.3, "SetFilterRange"
963
+ # @see #set_filter
964
+ # @see #set_filter_float_range
965
+ #
966
+ def set_filter_range(attribute, min, max, exclude = false)
967
+ raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
968
+ raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
969
+ raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
970
+ raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
971
+ raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
972
+
973
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max }
974
+ self
975
+ end
976
+ alias :SetFilterRange :set_filter_range
977
+
978
+ # Adds new float range filter.
979
+ #
980
+ # On this call, additional new filter is added to the existing
981
+ # list of filters. +attribute+ must be a string with attribute name.
982
+ # +min+ and +max+ must be floats that define the acceptable
983
+ # attribute values range (including the boundaries). +exclude+ must
984
+ # be a boolean value; it controls whether to accept the matching
985
+ # documents (default mode, when +exclude+ is false) or reject them.
986
+ #
987
+ # Only those documents where +attribute+ column value stored in
988
+ # the index is between +min+ and +max+ (including values that are
989
+ # exactly equal to +min+ or +max+) will be matched (or rejected,
990
+ # if +exclude+ is true).
991
+ #
992
+ # @param [String, Symbol] attribute an attribute name to filter by.
993
+ # @param [Numeric] min min value of the given attribute.
994
+ # @param [Numeric] max max value of the given attribute.
995
+ # @param [Boolean] exclude indicating whether documents with given attribute
996
+ # matching specified boundaries should be excluded from search results.
997
+ # @return [Sphinx::Client] self.
998
+ #
999
+ # @example
1000
+ # sphinx.set_filter_float_range(:group_id, 10.5, 20)
1001
+ # sphinx.set_filter_float_range(:group_id, 10.5, 20, true)
1002
+ #
1003
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1004
+ #
1005
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterfloatrange Section 6.4.4, "SetFilterFloatRange"
1006
+ # @see #set_filter
1007
+ # @see #set_filter_range
1008
+ #
1009
+ def set_filter_float_range(attribute, min, max, exclude = false)
1010
+ raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1011
+ raise ArgumentError, '"min" argument must be Numeric' unless min.kind_of?(Numeric)
1012
+ raise ArgumentError, '"max" argument must be Numeric' unless max.kind_of?(Numeric)
1013
+ raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
1014
+ raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
1015
+
1016
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f }
1017
+ self
1018
+ end
1019
+ alias :SetFilterFloatRange :set_filter_float_range
1020
+
1021
+ # Sets anchor point for and geosphere distance (geodistance)
1022
+ # calculations, and enable them.
1023
+ #
1024
+ # +attrlat+ and +attrlong+ must be strings that contain the names
1025
+ # of latitude and longitude attributes, respectively. +lat+ and
1026
+ # +long+ are floats that specify anchor point latitude and
1027
+ # longitude, in radians.
1028
+ #
1029
+ # Once an anchor point is set, you can use magic <tt>"@geodist"</tt>
1030
+ # attribute name in your filters and/or sorting expressions.
1031
+ # Sphinx will compute geosphere distance between the given anchor
1032
+ # point and a point specified by latitude and lognitude attributes
1033
+ # from each full-text match, and attach this value to the resulting
1034
+ # match. The latitude and longitude values both in {#set_geo_anchor}
1035
+ # and the index attribute data are expected to be in radians.
1036
+ # The result will be returned in meters, so geodistance value of
1037
+ # 1000.0 means 1 km. 1 mile is approximately 1609.344 meters.
1038
+ #
1039
+ # @param [String, Symbol] attrlat a name of latitude attribute.
1040
+ # @param [String, Symbol] attrlong a name of longitude attribute.
1041
+ # @param [Numeric] lat an anchor point latitude, in radians.
1042
+ # @param [Numeric] long an anchor point longitude, in radians.
1043
+ # @return [Sphinx::Client] self.
1044
+ #
1045
+ # @example
1046
+ # sphinx.set_geo_anchor(:latitude, :longitude, 192.5, 143.5)
1047
+ #
1048
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1049
+ #
1050
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
1051
+ #
1052
+ def set_geo_anchor(attrlat, attrlong, lat, long)
1053
+ raise ArgumentError, '"attrlat" argument must be String or Symbol' unless attrlat.kind_of?(String) or attrlat.kind_of?(Symbol)
1054
+ raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol)
1055
+ raise ArgumentError, '"lat" argument must be Numeric' unless lat.kind_of?(Numeric)
1056
+ raise ArgumentError, '"long" argument must be Numeric' unless long.kind_of?(Numeric)
1057
+
1058
+ @anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f }
1059
+ self
1060
+ end
1061
+ alias :SetGeoAnchor :set_geo_anchor
1062
+
1063
+ #=================================================================
1064
+ # GROUP BY settings
1065
+ #=================================================================
1066
+
1067
+ # Sets grouping attribute, function, and groups sorting mode; and
1068
+ # enables grouping (as described in Section 4.6, "Grouping (clustering) search results").
1069
+ #
1070
+ # +attribute+ is a string that contains group-by attribute name.
1071
+ # +func+ is a constant that chooses a function applied to the
1072
+ # attribute value in order to compute group-by key. +groupsort+
1073
+ # is a clause that controls how the groups will be sorted. Its
1074
+ # syntax is similar to that described in Section 4.5,
1075
+ # "SPH_SORT_EXTENDED mode".
1076
+ #
1077
+ # Grouping feature is very similar in nature to <tt>GROUP BY</tt> clause
1078
+ # from SQL. Results produces by this function call are going to
1079
+ # be the same as produced by the following pseudo code:
1080
+ #
1081
+ # SELECT ... GROUP BY func(attribute) ORDER BY groupsort
1082
+ #
1083
+ # Note that it's +groupsort+ that affects the order of matches in
1084
+ # the final result set. Sorting mode (see {#set_sort_mode}) affect
1085
+ # the ordering of matches within group, ie. what match will be
1086
+ # selected as the best one from the group. So you can for instance
1087
+ # order the groups by matches count and select the most relevant
1088
+ # match within each group at the same time.
1089
+ #
1090
+ # Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
1091
+ # <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported
1092
+ # through {#set_select} API call when using <tt>GROUP BY</tt>.
1093
+ #
1094
+ # You can specify group function and attribute as String
1095
+ # ("attr", "day", etc), Symbol (:attr, :day, etc), or
1096
+ # Fixnum constant (SPH_GROUPBY_ATTR, SPH_GROUPBY_DAY, etc).
1097
+ #
1098
+ # @param [String, Symbol] attribute an attribute name to group by.
1099
+ # @param [Integer, String, Symbol] func a grouping function.
1100
+ # @param [String] groupsort a groups sorting mode.
1101
+ # @return [Sphinx::Client] self.
1102
+ #
1103
+ # @example
1104
+ # sphinx.set_group_by(:tag_id, :attr)
1105
+ #
1106
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1107
+ #
1108
+ # @see http://www.sphinxsearch.com/docs/current.html#clustering Section 4.6, "Grouping (clustering) search results"
1109
+ # @see http://www.sphinxsearch.com/docs/current.html#sort-extended Section 4.5, "SPH_SORT_EXTENDED mode"
1110
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupby Section 6.5.1, "SetGroupBy"
1111
+ # @see #set_sort_mode
1112
+ # @see #set_select
1113
+ # @see #set_group_distinct
1114
+ #
1115
+ def set_group_by(attribute, func, groupsort = '@group desc')
1116
+ raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1117
+ raise ArgumentError, '"groupsort" argument must be String' unless groupsort.kind_of?(String)
1118
+
1119
+ case func
1120
+ when String, Symbol
1121
+ begin
1122
+ func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}")
1123
+ rescue NameError
1124
+ raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid"
1125
+ end
1126
+ when Fixnum
1127
+ raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func)
1128
+ else
1129
+ raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol'
1130
+ end
1131
+
1132
+ @groupby = attribute.to_s
1133
+ @groupfunc = func
1134
+ @groupsort = groupsort
1135
+ self
1136
+ end
1137
+ alias :SetGroupBy :set_group_by
1138
+
1139
+ # Sets attribute name for per-group distinct values count
1140
+ # calculations. Only available for grouping queries.
1141
+ #
1142
+ # +attribute+ is a string that contains the attribute name. For
1143
+ # each group, all values of this attribute will be stored (as
1144
+ # RAM limits permit), then the amount of distinct values will
1145
+ # be calculated and returned to the client. This feature is
1146
+ # similar to <tt>COUNT(DISTINCT)</tt> clause in standard SQL;
1147
+ # so these Sphinx calls:
1148
+ #
1149
+ # sphinx.set_group_by(:category, :attr, '@count desc')
1150
+ # sphinx.set_group_distinct(:vendor)
1151
+ #
1152
+ # can be expressed using the following SQL clauses:
1153
+ #
1154
+ # SELECT id, weight, all-attributes,
1155
+ # COUNT(DISTINCT vendor) AS @distinct,
1156
+ # COUNT(*) AS @count
1157
+ # FROM products
1158
+ # GROUP BY category
1159
+ # ORDER BY @count DESC
1160
+ #
1161
+ # In the sample pseudo code shown just above, {#set_group_distinct}
1162
+ # call corresponds to <tt>COUNT(DISINCT vendor)</tt> clause only.
1163
+ # <tt>GROUP BY</tt>, <tt>ORDER BY</tt>, and <tt>COUNT(*)</tt>
1164
+ # clauses are all an equivalent of {#set_group_by} settings. Both
1165
+ # queries will return one matching row for each category. In
1166
+ # addition to indexed attributes, matches will also contain
1167
+ # total per-category matches count, and the count of distinct
1168
+ # vendor IDs within each category.
1169
+ #
1170
+ # @param [String, Symbol] attribute an attribute name.
1171
+ # @return [Sphinx::Client] self.
1172
+ #
1173
+ # @example
1174
+ # sphinx.set_group_distinct(:category_id)
1175
+ #
1176
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1177
+ #
1178
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupdistinct Section 6.5.2, "SetGroupDistinct"
1179
+ # @see #set_group_by
1180
+ #
1181
+ def set_group_distinct(attribute)
1182
+ raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1183
+
1184
+ @groupdistinct = attribute.to_s
1185
+ self
1186
+ end
1187
+ alias :SetGroupDistinct :set_group_distinct
1188
+
1189
+ #=================================================================
1190
+ # Querying
1191
+ #=================================================================
1192
+
1193
+ # Clears all currently set filters.
1194
+ #
1195
+ # This call is only normally required when using multi-queries. You might want
1196
+ # to set different filters for different queries in the batch. To do that,
1197
+ # you should call {#reset_filters} and add new filters using the respective calls.
1198
+ #
1199
+ # @return [Sphinx::Client] self.
1200
+ #
1201
+ # @example
1202
+ # sphinx.reset_filters
1203
+ #
1204
+ # @see #set_filter
1205
+ # @see #set_filter_range
1206
+ # @see #set_filter_float_range
1207
+ # @see #set_geo_anchor
1208
+ #
1209
+ def reset_filters
1210
+ @filters = []
1211
+ @anchor = []
1212
+ self
1213
+ end
1214
+ alias :ResetFilters :reset_filters
1215
+
1216
+ # Clears all currently group-by settings, and disables group-by.
1217
+ #
1218
+ # This call is only normally required when using multi-queries. You can
1219
+ # change individual group-by settings using {#set_group_by} and {#set_group_distinct}
1220
+ # calls, but you can not disable group-by using those calls. {#reset_group_by}
1221
+ # fully resets previous group-by settings and disables group-by mode in the
1222
+ # current state, so that subsequent {#add_query} calls can perform non-grouping
1223
+ # searches.
1224
+ #
1225
+ # @return [Sphinx::Client] self.
1226
+ #
1227
+ # @example
1228
+ # sphinx.reset_group_by
1229
+ #
1230
+ # @see #set_group_by
1231
+ # @see #set_group_distinct
1232
+ #
1233
+ def reset_group_by
1234
+ @groupby = ''
1235
+ @groupfunc = SPH_GROUPBY_DAY
1236
+ @groupsort = '@group desc'
1237
+ @groupdistinct = ''
1238
+ self
1239
+ end
1240
+ alias :ResetGroupBy :reset_group_by
1241
+
1242
+ # Clear all attribute value overrides (for multi-queries).
1243
+ #
1244
+ # This call is only normally required when using multi-queries. You might want
1245
+ # to set field overrides for different queries in the batch. To do that,
1246
+ # you should call {#reset_overrides} and add new overrides using the
1247
+ # respective calls.
1248
+ #
1249
+ # @return [Sphinx::Client] self.
1250
+ #
1251
+ # @example
1252
+ # sphinx.reset_overrides
1253
+ #
1254
+ # @see #set_override
1255
+ #
1256
+ def reset_overrides
1257
+ @overrides = []
1258
+ self
1259
+ end
1260
+ alias :ResetOverrides :reset_overrides
1261
+
1262
+ # Connects to searchd server, runs given search query with
1263
+ # current settings, obtains and returns the result set.
1264
+ #
1265
+ # +query+ is a query string. +index+ is an index name (or names)
1266
+ # string. Returns false and sets {#last_error} message on general
1267
+ # error. Returns search result set on success. Additionally,
1268
+ # the contents of +comment+ are sent to the query log, marked in
1269
+ # square brackets, just before the search terms, which can be very
1270
+ # useful for debugging. Currently, the comment is limited to 128
1271
+ # characters.
1272
+ #
1273
+ # Default value for +index+ is <tt>"*"</tt> that means to query
1274
+ # all local indexes. Characters allowed in index names include
1275
+ # Latin letters (a-z), numbers (0-9), minus sign (-), and
1276
+ # underscore (_); everything else is considered a separator.
1277
+ # Therefore, all of the following samples calls are valid and
1278
+ # will search the same two indexes:
1279
+ #
1280
+ # sphinx.query('test query', 'main delta')
1281
+ # sphinx.query('test query', 'main;delta')
1282
+ # sphinx.query('test query', 'main, delta');
1283
+ #
1284
+ # Index specification order matters. If document with identical
1285
+ # IDs are found in two or more indexes, weight and attribute
1286
+ # values from the very last matching index will be used for
1287
+ # sorting and returning to client (unless explicitly overridden
1288
+ # with {#set_index_weights}). Therefore, in the example above,
1289
+ # matches from "delta" index will always win over matches
1290
+ # from "main".
1291
+ #
1292
+ # On success, {#query} returns a result set that contains some
1293
+ # of the found matches (as requested by {#set_limits}) and
1294
+ # additional general per-query statistics. The result set
1295
+ # is an +Hash+ with the following keys and values:
1296
+ #
1297
+ # <tt>"matches"</tt>::
1298
+ # Array with small +Hash+es containing document weight and
1299
+ # attribute values.
1300
+ # <tt>"total"</tt>::
1301
+ # Total amount of matches retrieved on server (ie. to the server
1302
+ # side result set) by this query. You can retrieve up to this
1303
+ # amount of matches from server for this query text with current
1304
+ # query settings.
1305
+ # <tt>"total_found"</tt>::
1306
+ # Total amount of matching documents in index (that were found
1307
+ # and procesed on server).
1308
+ # <tt>"words"</tt>::
1309
+ # Hash which maps query keywords (case-folded, stemmed, and
1310
+ # otherwise processed) to a small Hash with per-keyword statitics
1311
+ # ("docs", "hits").
1312
+ # <tt>"error"</tt>::
1313
+ # Query error message reported by searchd (string, human readable).
1314
+ # Empty if there were no errors.
1315
+ # <tt>"warning"</tt>::
1316
+ # Query warning message reported by searchd (string, human readable).
1317
+ # Empty if there were no warnings.
1318
+ #
1319
+ # Please note: you can use both strings and symbols as <tt>Hash</tt> keys.
1320
+ #
1321
+ # It should be noted that {#query} carries out the same actions as
1322
+ # {#add_query} and {#run_queries} without the intermediate steps; it
1323
+ # is analoguous to a single {#add_query} call, followed by a
1324
+ # corresponding {#run_queries}, then returning the first array
1325
+ # element of matches (from the first, and only, query.)
1326
+ #
1327
+ # @param [String] query a query string.
1328
+ # @param [String] index an index name (or names).
1329
+ # @param [String] comment a comment to be sent to the query log.
1330
+ # @return [Hash, false] result set described above or +false+ on error.
1331
+ # @yield [Client] yields just before query performing. Useful to set
1332
+ # filters or sortings. When block does not accept any parameters, it
1333
+ # will be eval'ed inside {Client} instance itself. In this case you
1334
+ # can omit +set_+ prefix for configuration methods.
1335
+ # @yieldparam [Client] sphinx self.
1336
+ #
1337
+ # @example Regular query with previously set filters
1338
+ # sphinx.query('some search text', '*', 'search page')
1339
+ # @example Query with block
1340
+ # sphinx.query('test') do |sphinx|
1341
+ # sphinx.set_match_mode :all
1342
+ # sphinx.set_id_range 10, 100
1343
+ # end
1344
+ # @example Query with instant filters configuring
1345
+ # sphinx.query('test') do
1346
+ # match_mode :all
1347
+ # id_range 10, 100
1348
+ # end
1349
+ #
1350
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-query Section 6.6.1, "Query"
1351
+ # @see #add_query
1352
+ # @see #run_queries
1353
+ #
1354
+ def query(query, index = '*', comment = '', &block)
1355
+ @reqs = []
1356
+
1357
+ if block_given?
1358
+ if block.arity > 0
1359
+ yield self
1360
+ else
1361
+ begin
1362
+ @inside_eval = true
1363
+ instance_eval(&block)
1364
+ ensure
1365
+ @inside_eval = false
1366
+ end
1367
+ end
1368
+ end
1369
+
1370
+ logger.debug { "[sphinx] query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if logger
1371
+
1372
+ self.add_query(query, index, comment, false)
1373
+ results = self.run_queries
1374
+
1375
+ # probably network error; error message should be already filled
1376
+ return false unless results.instance_of?(Array)
1377
+
1378
+ @error = results[0]['error']
1379
+ @warning = results[0]['warning']
1380
+
1381
+ return false if results[0]['status'] == SEARCHD_ERROR
1382
+ return results[0]
1383
+ end
1384
+ alias :Query :query
1385
+
1386
+ # Adds additional query with current settings to multi-query batch.
1387
+ # +query+ is a query string. +index+ is an index name (or names)
1388
+ # string. Additionally if provided, the contents of +comment+ are
1389
+ # sent to the query log, marked in square brackets, just before
1390
+ # the search terms, which can be very useful for debugging.
1391
+ # Currently, this is limited to 128 characters. Returns index
1392
+ # to results array returned from {#run_queries}.
1393
+ #
1394
+ # Batch queries (or multi-queries) enable searchd to perform
1395
+ # internal optimizations if possible. They also reduce network
1396
+ # connection overheads and search process creation overheads in all
1397
+ # cases. They do not result in any additional overheads compared
1398
+ # to simple queries. Thus, if you run several different queries
1399
+ # from your web page, you should always consider using multi-queries.
1400
+ #
1401
+ # For instance, running the same full-text query but with different
1402
+ # sorting or group-by settings will enable searchd to perform
1403
+ # expensive full-text search and ranking operation only once, but
1404
+ # compute multiple group-by results from its output.
1405
+ #
1406
+ # This can be a big saver when you need to display not just plain
1407
+ # search results but also some per-category counts, such as the
1408
+ # amount of products grouped by vendor. Without multi-query, you
1409
+ # would have to run several queries which perform essentially the
1410
+ # same search and retrieve the same matches, but create result
1411
+ # sets differently. With multi-query, you simply pass all these
1412
+ # queries in a single batch and Sphinx optimizes the redundant
1413
+ # full-text search internally.
1414
+ #
1415
+ # {#add_query} internally saves full current settings state along
1416
+ # with the query, and you can safely change them afterwards for
1417
+ # subsequent {#add_query} calls. Already added queries will not
1418
+ # be affected; there's actually no way to change them at all.
1419
+ # Here's an example:
1420
+ #
1421
+ # sphinx.set_sort_mode(:relevance)
1422
+ # sphinx.add_query("hello world", "documents")
1423
+ #
1424
+ # sphinx.set_sort_mode(:attr_desc, :price)
1425
+ # sphinx.add_query("ipod", "products")
1426
+ #
1427
+ # sphinx.add_query("harry potter", "books")
1428
+ #
1429
+ # results = sphinx.run_queries
1430
+ #
1431
+ # With the code above, 1st query will search for "hello world"
1432
+ # in "documents" index and sort results by relevance, 2nd query
1433
+ # will search for "ipod" in "products" index and sort results
1434
+ # by price, and 3rd query will search for "harry potter" in
1435
+ # "books" index while still sorting by price. Note that 2nd
1436
+ # {#set_sort_mode} call does not affect the first query (because
1437
+ # it's already added) but affects both other subsequent queries.
1438
+ #
1439
+ # Additionally, any filters set up before an {#add_query} will
1440
+ # fall through to subsequent queries. So, if {#set_filter} is
1441
+ # called before the first query, the same filter will be in
1442
+ # place for the second (and subsequent) queries batched through
1443
+ # {#add_query} unless you call {#reset_filters} first. Alternatively,
1444
+ # you can add additional filters as well.
1445
+ #
1446
+ # This would also be true for grouping options and sorting options;
1447
+ # no current sorting, filtering, and grouping settings are affected
1448
+ # by this call; so subsequent queries will reuse current query settings.
1449
+ #
1450
+ # {#add_query} returns an index into an array of results that will
1451
+ # be returned from {#run_queries} call. It is simply a sequentially
1452
+ # increasing 0-based integer, ie. first call will return 0, second
1453
+ # will return 1, and so on. Just a small helper so you won't have
1454
+ # to track the indexes manualy if you need then.
1455
+ #
1456
+ # @param [String] query a query string.
1457
+ # @param [String] index an index name (or names).
1458
+ # @param [String] comment a comment to be sent to the query log.
1459
+ # @param [Boolean] log indicating whether this call should be logged.
1460
+ # @return [Integer] an index into an array of results that will
1461
+ # be returned from {#run_queries} call.
1462
+ #
1463
+ # @example
1464
+ # sphinx.add_query('some search text', '*', 'search page')
1465
+ #
1466
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-addquery Section 6.6.2, "AddQuery"
1467
+ # @see #query
1468
+ # @see #run_queries
1469
+ #
1470
+ def add_query(query, index = '*', comment = '', log = true)
1471
+ logger.debug { "[sphinx] add_query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if log and logger
1472
+ # build request
1473
+
1474
+ # mode and limits
1475
+ request = Request.new
1476
+ request.put_int @offset, @limit, @mode, @ranker, @sort
1477
+ request.put_string @sortby
1478
+ # query itself
1479
+ request.put_string query
1480
+ # weights
1481
+ request.put_int_array @weights
1482
+ # indexes
1483
+ request.put_string index
1484
+ # id64 range marker
1485
+ request.put_int 1
1486
+ # id64 range
1487
+ request.put_int64 @min_id.to_i, @max_id.to_i
1488
+
1489
+ # filters
1490
+ request.put_int @filters.length
1491
+ @filters.each do |filter|
1492
+ request.put_string filter['attr']
1493
+ request.put_int filter['type']
1494
+
1495
+ case filter['type']
1496
+ when SPH_FILTER_VALUES
1497
+ request.put_int64_array filter['values']
1498
+ when SPH_FILTER_RANGE
1499
+ request.put_int64 filter['min'], filter['max']
1500
+ when SPH_FILTER_FLOATRANGE
1501
+ request.put_float filter['min'], filter['max']
1502
+ else
1503
+ raise SphinxInternalError, 'Internal error: unhandled filter type'
1504
+ end
1505
+ request.put_int filter['exclude'] ? 1 : 0
1506
+ end
1507
+
1508
+ # group-by clause, max-matches count, group-sort clause, cutoff count
1509
+ request.put_int @groupfunc
1510
+ request.put_string @groupby
1511
+ request.put_int @maxmatches
1512
+ request.put_string @groupsort
1513
+ request.put_int @cutoff, @retrycount, @retrydelay
1514
+ request.put_string @groupdistinct
1515
+
1516
+ # anchor point
1517
+ if @anchor.empty?
1518
+ request.put_int 0
1519
+ else
1520
+ request.put_int 1
1521
+ request.put_string @anchor['attrlat'], @anchor['attrlong']
1522
+ request.put_float @anchor['lat'], @anchor['long']
1523
+ end
1524
+
1525
+ # per-index weights
1526
+ request.put_int @indexweights.length
1527
+ @indexweights.each do |idx, weight|
1528
+ request.put_string idx.to_s
1529
+ request.put_int weight
1530
+ end
1531
+
1532
+ # max query time
1533
+ request.put_int @maxquerytime
1534
+
1535
+ # per-field weights
1536
+ request.put_int @fieldweights.length
1537
+ @fieldweights.each do |field, weight|
1538
+ request.put_string field.to_s
1539
+ request.put_int weight
1540
+ end
1541
+
1542
+ # comment
1543
+ request.put_string comment
1544
+
1545
+ # attribute overrides
1546
+ request.put_int @overrides.length
1547
+ for entry in @overrides do
1548
+ request.put_string entry['attr']
1549
+ request.put_int entry['type'], entry['values'].size
1550
+ entry['values'].each do |id, val|
1551
+ request.put_int64 id
1552
+ case entry['type']
1553
+ when SPH_ATTR_FLOAT
1554
+ request.put_float val.to_f
1555
+ when SPH_ATTR_BIGINT
1556
+ request.put_int64 val.to_i
1557
+ else
1558
+ request.put_int val.to_i
1559
+ end
1560
+ end
1561
+ end
1562
+
1563
+ # select-list
1564
+ request.put_string @select
1565
+
1566
+ # store request to requests array
1567
+ @reqs << request.to_s;
1568
+ return @reqs.length - 1
1569
+ end
1570
+ alias :AddQuery :add_query
1571
+
1572
+ # Connect to searchd, runs a batch of all queries added using
1573
+ # {#add_query}, obtains and returns the result sets. Returns
1574
+ # +false+ and sets {#last_error} message on general error
1575
+ # (such as network I/O failure). Returns a plain array of
1576
+ # result sets on success.
1577
+ #
1578
+ # Each result set in the returned array is exactly the same as
1579
+ # the result set returned from {#query}.
1580
+ #
1581
+ # Note that the batch query request itself almost always succeds —
1582
+ # unless there's a network error, blocking index rotation in
1583
+ # progress, or another general failure which prevents the whole
1584
+ # request from being processed.
1585
+ #
1586
+ # However individual queries within the batch might very well
1587
+ # fail. In this case their respective result sets will contain
1588
+ # non-empty "error" message, but no matches or query statistics.
1589
+ # In the extreme case all queries within the batch could fail.
1590
+ # There still will be no general error reported, because API
1591
+ # was able to succesfully connect to searchd, submit the batch,
1592
+ # and receive the results — but every result set will have a
1593
+ # specific error message.
1594
+ #
1595
+ # @return [Array<Hash>] an +Array+ of +Hash+es which are exactly
1596
+ # the same as the result set returned from {#query}.
1597
+ #
1598
+ # @example
1599
+ # sphinx.add_query('some search text', '*', 'search page')
1600
+ # results = sphinx.run_queries
1601
+ #
1602
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-runqueries Section 6.6.3, "RunQueries"
1603
+ # @see #add_query
1604
+ #
1605
+ def run_queries
1606
+ logger.debug { "[sphinx] run_queries(#{@reqs.length} queries)" } if logger
1607
+ if @reqs.empty?
1608
+ @error = 'No queries defined, issue add_query() first'
1609
+ return false
1610
+ end
1611
+
1612
+ reqs, nreqs = @reqs.join(''), @reqs.length
1613
+ @reqs = []
1614
+ response = perform_request(:search, reqs, nreqs)
1615
+
1616
+ # parse response
1617
+ (1..nreqs).map do
1618
+ result = HashWithIndifferentAccess.new('error' => '', 'warning' => '')
1619
+
1620
+ # extract status
1621
+ status = result['status'] = response.get_int
1622
+ if status != SEARCHD_OK
1623
+ message = response.get_string
1624
+ if status == SEARCHD_WARNING
1625
+ result['warning'] = message
1626
+ else
1627
+ result['error'] = message
1628
+ next result
1629
+ end
1630
+ end
1631
+
1632
+ # read schema
1633
+ nfields = response.get_int
1634
+ result['fields'] = (1..nfields).map { response.get_string }
1635
+
1636
+ attrs_names_in_order = []
1637
+ nattrs = response.get_int
1638
+ attrs = (1..nattrs).inject({}) do |hash, idx|
1639
+ name, type = response.get_string, response.get_int
1640
+ hash[name] = type
1641
+ attrs_names_in_order << name
1642
+ hash
1643
+ end
1644
+ result['attrs'] = attrs
1645
+
1646
+ # read match count
1647
+ count, id64 = response.get_ints(2)
1648
+
1649
+ # read matches
1650
+ result['matches'] = (1..count).map do
1651
+ doc, weight = if id64 == 0
1652
+ response.get_ints(2)
1653
+ else
1654
+ [response.get_int64, response.get_int]
1655
+ end
1656
+
1657
+ # This is a single result put in the result['matches'] array
1658
+ match = { 'id' => doc, 'weight' => weight }
1659
+ match['attrs'] = attrs_names_in_order.inject({}) do |hash, name|
1660
+ hash[name] = case attrs[name]
1661
+ when SPH_ATTR_BIGINT
1662
+ # handle 64-bit ints
1663
+ response.get_int64
1664
+ when SPH_ATTR_FLOAT
1665
+ # handle floats
1666
+ response.get_float
1667
+ else
1668
+ # handle everything else as unsigned ints
1669
+ val = response.get_int
1670
+ if (attrs[name] & SPH_ATTR_MULTI) != 0
1671
+ (1..val).map { response.get_int }
1672
+ else
1673
+ val
1674
+ end
1675
+ end
1676
+ hash
1677
+ end
1678
+ match
1679
+ end
1680
+ result['total'], result['total_found'], msecs = response.get_ints(3)
1681
+ result['time'] = '%.3f' % (msecs / 1000.0)
1682
+
1683
+ nwords = response.get_int
1684
+ result['words'] = (1..nwords).inject({}) do |hash, idx|
1685
+ word = response.get_string
1686
+ docs, hits = response.get_ints(2)
1687
+ hash[word] = { 'docs' => docs, 'hits' => hits }
1688
+ hash
1689
+ end
1690
+
1691
+ result
1692
+ end
1693
+ end
1694
+ alias :RunQueries :run_queries
1695
+
1696
+ #=================================================================
1697
+ # Additional functionality
1698
+ #=================================================================
1699
+
1700
+ # Excerpts (snippets) builder function. Connects to searchd, asks
1701
+ # it to generate excerpts (snippets) from given documents, and
1702
+ # returns the results.
1703
+ #
1704
+ # +docs+ is a plain array of strings that carry the documents'
1705
+ # contents. +index+ is an index name string. Different settings
1706
+ # (such as charset, morphology, wordforms) from given index will
1707
+ # be used. +words+ is a string that contains the keywords to
1708
+ # highlight. They will be processed with respect to index settings.
1709
+ # For instance, if English stemming is enabled in the index,
1710
+ # "shoes" will be highlighted even if keyword is "shoe". Starting
1711
+ # with version 0.9.9-rc1, keywords can contain wildcards, that
1712
+ # work similarly to star-syntax available in queries.
1713
+ #
1714
+ # @param [Array<String>] docs an array of strings which represent
1715
+ # the documents' contents.
1716
+ # @param [String] index an index which settings will be used for
1717
+ # stemming, lexing and case folding.
1718
+ # @param [String] words a string which contains the words to highlight.
1719
+ # @param [Hash] opts a +Hash+ which contains additional optional
1720
+ # highlighting parameters.
1721
+ # @option opts [String] 'before_match' ("<b>") a string to insert before a
1722
+ # keyword match.
1723
+ # @option opts [String] 'after_match' ("</b>") a string to insert after a
1724
+ # keyword match.
1725
+ # @option opts [String] 'chunk_separator' (" ... ") a string to insert
1726
+ # between snippet chunks (passages).
1727
+ # @option opts [Integer] 'limit' (256) maximum snippet size, in symbols
1728
+ # (codepoints).
1729
+ # @option opts [Integer] 'around' (5) how many words to pick around
1730
+ # each matching keywords block.
1731
+ # @option opts [Boolean] 'exact_phrase' (false) whether to highlight exact
1732
+ # query phrase matches only instead of individual keywords.
1733
+ # @option opts [Boolean] 'single_passage' (false) whether to extract single
1734
+ # best passage only.
1735
+ # @option opts [Boolean] 'use_boundaries' (false) whether to extract
1736
+ # passages by phrase boundaries setup in tokenizer.
1737
+ # @option opts [Boolean] 'weight_order' (false) whether to sort the
1738
+ # extracted passages in order of relevance (decreasing weight),
1739
+ # or in order of appearance in the document (increasing position).
1740
+ # @return [Array<String>, false] a plain array of strings with
1741
+ # excerpts (snippets) on success; otherwise, +false+.
1742
+ #
1743
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1744
+ #
1745
+ # @example
1746
+ # sphinx.build_excerpts(['hello world', 'hello me'], 'idx', 'hello')
1747
+ #
1748
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildexcerpts Section 6.7.1, "BuildExcerpts"
1749
+ #
1750
+ def build_excerpts(docs, index, words, opts = {})
1751
+ raise ArgumentError, '"docs" argument must be Array' unless docs.kind_of?(Array)
1752
+ raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1753
+ raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String)
1754
+ raise ArgumentError, '"opts" argument must be Hash' unless opts.kind_of?(Hash)
1755
+
1756
+ docs.each do |doc|
1757
+ raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String)
1758
+ end
1759
+
1760
+ # fixup options
1761
+ opts = HashWithIndifferentAccess.new(
1762
+ 'before_match' => '<b>',
1763
+ 'after_match' => '</b>',
1764
+ 'chunk_separator' => ' ... ',
1765
+ 'limit' => 256,
1766
+ 'around' => 5,
1767
+ 'exact_phrase' => false,
1768
+ 'single_passage' => false,
1769
+ 'use_boundaries' => false,
1770
+ 'weight_order' => false
1771
+ ).update(opts)
1772
+
1773
+ # build request
1774
+
1775
+ # v.1.0 req
1776
+ flags = 1
1777
+ flags |= 2 if opts['exact_phrase']
1778
+ flags |= 4 if opts['single_passage']
1779
+ flags |= 8 if opts['use_boundaries']
1780
+ flags |= 16 if opts['weight_order']
1781
+
1782
+ request = Request.new
1783
+ request.put_int 0, flags # mode=0, flags=1 (remove spaces)
1784
+ # req index
1785
+ request.put_string index.to_s
1786
+ # req words
1787
+ request.put_string words
1788
+
1789
+ # options
1790
+ request.put_string opts['before_match']
1791
+ request.put_string opts['after_match']
1792
+ request.put_string opts['chunk_separator']
1793
+ request.put_int opts['limit'].to_i, opts['around'].to_i
1794
+
1795
+ # documents
1796
+ request.put_int docs.size
1797
+ request.put_string(*docs)
1798
+
1799
+ response = perform_request(:excerpt, request)
1800
+
1801
+ # parse response
1802
+ docs.map { response.get_string }
1803
+ end
1804
+ alias :BuildExcerpts :build_excerpts
1805
+
1806
+ # Extracts keywords from query using tokenizer settings for given
1807
+ # index, optionally with per-keyword occurrence statistics.
1808
+ # Returns an array of hashes with per-keyword information.
1809
+ #
1810
+ # +query+ is a query to extract keywords from. +index+ is a name of
1811
+ # the index to get tokenizing settings and keyword occurrence
1812
+ # statistics from. +hits+ is a boolean flag that indicates whether
1813
+ # keyword occurrence statistics are required.
1814
+ #
1815
+ # The result set consists of +Hash+es with the following keys and values:
1816
+ #
1817
+ # <tt>'tokenized'</tt>::
1818
+ # Tokenized keyword.
1819
+ # <tt>'normalized'</tt>::
1820
+ # Normalized keyword.
1821
+ # <tt>'docs'</tt>::
1822
+ # A number of documents where keyword is found (if +hits+ param is +true+).
1823
+ # <tt>'hits'</tt>::
1824
+ # A number of keywords occurrences among all documents (if +hits+ param is +true+).
1825
+ #
1826
+ # @param [String] query a query string.
1827
+ # @param [String] index an index to get tokenizing settings and
1828
+ # keyword occurrence statistics from.
1829
+ # @param [Boolean] hits indicates whether keyword occurrence
1830
+ # statistics are required.
1831
+ # @return [Array<Hash>] an +Array+ of +Hash+es in format specified
1832
+ # above.
1833
+ #
1834
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1835
+ #
1836
+ # @example
1837
+ # keywords = sphinx.build_keywords("this.is.my query", "test1", false)
1838
+ #
1839
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildkeywords Section 6.7.3, "BuildKeywords"
1840
+ #
1841
+ def build_keywords(query, index, hits)
1842
+ raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String)
1843
+ raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1844
+ raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass)
1845
+
1846
+ # build request
1847
+ request = Request.new
1848
+ # v.1.0 req
1849
+ request.put_string query # req query
1850
+ request.put_string index # req index
1851
+ request.put_int hits ? 1 : 0
1852
+
1853
+ response = perform_request(:keywords, request)
1854
+
1855
+ # parse response
1856
+ nwords = response.get_int
1857
+ (0...nwords).map do
1858
+ tokenized = response.get_string
1859
+ normalized = response.get_string
1860
+
1861
+ entry = HashWithIndifferentAccess.new('tokenized' => tokenized, 'normalized' => normalized)
1862
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
1863
+
1864
+ entry
1865
+ end
1866
+ end
1867
+ alias :BuildKeywords :build_keywords
1868
+
1869
+ # Instantly updates given attribute values in given documents.
1870
+ # Returns number of actually updated documents (0 or more) on
1871
+ # success, or -1 on failure.
1872
+ #
1873
+ # +index+ is a name of the index (or indexes) to be updated.
1874
+ # +attrs+ is a plain array with string attribute names, listing
1875
+ # attributes that are updated. +values+ is a Hash where key is
1876
+ # document ID, and value is a plain array of new attribute values.
1877
+ #
1878
+ # +index+ can be either a single index name or a list, like in
1879
+ # {#query}. Unlike {#query}, wildcard is not allowed and all the
1880
+ # indexes to update must be specified explicitly. The list of
1881
+ # indexes can include distributed index names. Updates on
1882
+ # distributed indexes will be pushed to all agents.
1883
+ #
1884
+ # The updates only work with docinfo=extern storage strategy.
1885
+ # They are very fast because they're working fully in RAM, but
1886
+ # they can also be made persistent: updates are saved on disk
1887
+ # on clean searchd shutdown initiated by SIGTERM signal. With
1888
+ # additional restrictions, updates are also possible on MVA
1889
+ # attributes; refer to mva_updates_pool directive for details.
1890
+ #
1891
+ # The first sample statement will update document 1 in index
1892
+ # "test1", setting "group_id" to 456. The second one will update
1893
+ # documents 1001, 1002 and 1003 in index "products". For document
1894
+ # 1001, the new price will be set to 123 and the new amount in
1895
+ # stock to 5; for document 1002, the new price will be 37 and the
1896
+ # new amount will be 11; etc. The third one updates document 1
1897
+ # in index "test2", setting MVA attribute "group_id" to [456, 789].
1898
+ #
1899
+ # @example
1900
+ # sphinx.update_attributes("test1", ["group_id"], { 1 => [456] });
1901
+ # sphinx.update_attributes("products", ["price", "amount_in_stock"],
1902
+ # { 1001 => [123, 5], 1002 => [37, 11], 1003 => [25, 129] });
1903
+ # sphinx.update_attributes('test2', ['group_id'], { 1 => [[456, 789]] }, true)
1904
+ #
1905
+ # @param [String] index a name of the index to be updated.
1906
+ # @param [Array<String>] attrs an array of attribute name strings.
1907
+ # @param [Hash] values is a hash where key is document id, and
1908
+ # value is an array of new attribute values.
1909
+ # @param [Boolean] mva indicating whether to update MVA.
1910
+ # @return [Integer] number of actually updated documents (0 or more) on success,
1911
+ # -1 on failure.
1912
+ #
1913
+ # @raise [ArgumentError] Occurred when parameters are invalid.
1914
+ #
1915
+ # @see http://www.sphinxsearch.com/docs/current.html#api-func-updateatttributes Section 6.7.2, "UpdateAttributes"
1916
+ #
1917
+ def update_attributes(index, attrs, values, mva = false)
1918
+ # verify everything
1919
+ raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1920
+ raise ArgumentError, '"mva" argument must be Boolean' unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass)
1921
+
1922
+ raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array)
1923
+ attrs.each do |attr|
1924
+ raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol)
1925
+ end
1926
+
1927
+ raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
1928
+ values.each do |id, entry|
1929
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.kind_of?(Integer)
1930
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array)
1931
+ raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length
1932
+ entry.each do |v|
1933
+ if mva
1934
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array)
1935
+ v.each do |vv|
1936
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.kind_of?(Integer)
1937
+ end
1938
+ else
1939
+ raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.kind_of?(Integer)
1940
+ end
1941
+ end
1942
+ end
1943
+
1944
+ # build request
1945
+ request = Request.new
1946
+ request.put_string index
1947
+
1948
+ request.put_int attrs.length
1949
+ for attr in attrs
1950
+ request.put_string attr
1951
+ request.put_int mva ? 1 : 0
1952
+ end
1953
+
1954
+ request.put_int values.length
1955
+ values.each do |id, entry|
1956
+ request.put_int64 id
1957
+ if mva
1958
+ entry.each { |v| request.put_int_array v }
1959
+ else
1960
+ request.put_int(*entry)
1961
+ end
1962
+ end
1963
+
1964
+ response = perform_request(:update, request)
1965
+
1966
+ # parse response
1967
+ response.get_int
1968
+ end
1969
+ alias :UpdateAttributes :update_attributes
1970
+
1971
+ # Escapes characters that are treated as special operators by the
1972
+ # query language parser.
1973
+ #
1974
+ # This function might seem redundant because it's trivial to
1975
+ # implement in any calling application. However, as the set of
1976
+ # special characters might change over time, it makes sense to
1977
+ # have an API call that is guaranteed to escape all such
1978
+ # characters at all times.
1979
+ #
1980
+ # @param [String] string is a string to escape.
1981
+ # @return [String] an escaped string.
1982
+ #
1983
+ # @example:
1984
+ # escaped = sphinx.escape_string "escaping-sample@query/string"
1985
+ #
1986
+ def escape_string(string)
1987
+ string.to_s.gsub(/([\\()|\-!@~"&\/\^\$=])/, '\\\\\\1')
1988
+ end
1989
+ alias :EscapeString :escape_string
1990
+
1991
+ # Queries searchd status, and returns an array of status variable name
1992
+ # and value pairs.
1993
+ #
1994
+ # @return [Array<Array>, Array<Hash>] a table containing searchd status information.
1995
+ # If there are more than one server configured ({#set_servers}), an
1996
+ # +Array+ of +Hash+es will be returned, one for each server. Hash will
1997
+ # contain <tt>:server</tt> element with string name of server (<tt>host:port</tt>)
1998
+ # and <tt>:status</tt> table just like one for a single server. In case of
1999
+ # any error, it will be stored in the <tt>:error</tt> key.
2000
+ #
2001
+ # @example Single server
2002
+ # status = sphinx.status
2003
+ # puts status.map { |key, value| "#{key.rjust(20)}: #{value}" }
2004
+ #
2005
+ # @example Multiple servers
2006
+ # sphinx.set_servers([
2007
+ # { :host => 'localhost' },
2008
+ # { :host => 'browse02.local' }
2009
+ # ])
2010
+ # sphinx.status.each do |report|
2011
+ # puts "=== #{report[:server]}"
2012
+ # if report[:error]
2013
+ # puts "Error: #{report[:error]}"
2014
+ # else
2015
+ # puts report[:status].map { |key, value| "#{key.rjust(20)}: #{value}" }
2016
+ # end
2017
+ # end
2018
+ #
2019
+ def status
2020
+ request = Request.new
2021
+ request.put_int(1)
2022
+
2023
+ # parse response
2024
+ results = @servers.map do |server|
2025
+ begin
2026
+ response = perform_request(:status, request, nil, server)
2027
+ rows, cols = response.get_ints(2)
2028
+ status = (0...rows).map do
2029
+ (0...cols).map { response.get_string }
2030
+ end
2031
+ HashWithIndifferentAccess.new(:server => server.to_s, :status => status)
2032
+ rescue SphinxError
2033
+ # Re-raise error when a single server configured
2034
+ raise if @servers.size == 1
2035
+ HashWithIndifferentAccess.new(:server => server.to_s, :error => self.last_error)
2036
+ end
2037
+ end
2038
+
2039
+ @servers.size > 1 ? results : results.first[:status]
2040
+ end
2041
+ alias :Status :status
2042
+
2043
+ #=================================================================
2044
+ # Persistent connections
2045
+ #=================================================================
2046
+
2047
+ # Opens persistent connection to the server.
2048
+ #
2049
+ # This method could be used only when a single searchd server
2050
+ # configured.
2051
+ #
2052
+ # @return [Boolean] +true+ when persistent connection has been
2053
+ # established; otherwise, +false+.
2054
+ #
2055
+ # @example
2056
+ # begin
2057
+ # sphinx.open
2058
+ # # perform several requests
2059
+ # ensure
2060
+ # sphinx.close
2061
+ # end
2062
+ #
2063
+ # @see #close
2064
+ #
2065
+ def open
2066
+ if @servers.size > 1
2067
+ @error = 'too many servers. persistent socket allowed only for a single server.'
2068
+ return false
2069
+ end
2070
+
2071
+ if @servers.first.persistent?
2072
+ @error = 'already connected'
2073
+ return false;
2074
+ end
2075
+
2076
+ request = Request.new
2077
+ request.put_int(1)
2078
+
2079
+ perform_request(:persist, request, nil) do |server, socket|
2080
+ server.make_persistent!(socket)
2081
+ end
2082
+
2083
+ true
2084
+ end
2085
+ alias :Open :open
2086
+
2087
+ # Closes previously opened persistent connection.
2088
+ #
2089
+ # This method could be used only when a single searchd server
2090
+ # configured.
2091
+ #
2092
+ # @return [Boolean] +true+ when persistent connection has been
2093
+ # closed; otherwise, +false+.
2094
+ #
2095
+ # @example
2096
+ # begin
2097
+ # sphinx.open
2098
+ # # perform several requests
2099
+ # ensure
2100
+ # sphinx.close
2101
+ # end
2102
+ #
2103
+ # @see #open
2104
+ #
2105
+ def close
2106
+ if @servers.size > 1
2107
+ @error = 'too many servers. persistent socket allowed only for a single server.'
2108
+ return false
2109
+ end
2110
+
2111
+ unless @servers.first.persistent?
2112
+ @error = 'not connected'
2113
+ return false;
2114
+ end
2115
+
2116
+ @servers.first.close_persistent!
2117
+ end
2118
+ alias :Close :close
2119
+
2120
+ protected
2121
+
2122
+ # Connect, send query, get response.
2123
+ #
2124
+ # Use this method to communicate with Sphinx server. It ensures connection
2125
+ # will be instantiated properly, all headers will be generated properly, etc.
2126
+ #
2127
+ # @param [Symbol, String] command searchd command to perform (<tt>:search</tt>, <tt>:excerpt</tt>,
2128
+ # <tt>:update</tt>, <tt>:keywords</tt>, <tt>:persist</tt>, <tt>:status</tt>,
2129
+ # <tt>:query</tt>, <tt>:flushattrs</tt>. See <tt>SEARCHD_COMMAND_*</tt> for details).
2130
+ # @param [Sphinx::Request] request contains request body.
2131
+ # @param [Integer] additional additional integer data to be placed between header and body.
2132
+ # @param [Sphinx::Server] server where perform request on. This is special
2133
+ # parameter for internal usage. If specified, request will be performed
2134
+ # on specified server, and it will try to establish connection to this
2135
+ # server only once.
2136
+ #
2137
+ # @yield if block given, response will not be parsed, plain socket
2138
+ # will be yielded instead. This is special mode used for
2139
+ # persistent connections, do not use for other tasks.
2140
+ # @yieldparam [Sphinx::Server] server a server where request was performed on.
2141
+ # @yieldparam [Sphinx::BufferedIO] socket a socket used to perform the request.
2142
+ # @return [Sphinx::Response] contains response body.
2143
+ #
2144
+ # @see #parse_response
2145
+ #
2146
+ def perform_request(command, request, additional = nil, server = nil)
2147
+ if server
2148
+ attempts = 1
2149
+ else
2150
+ server = case request
2151
+ when String
2152
+ Zlib.crc32(request)
2153
+ when Request
2154
+ request.crc32
2155
+ else
2156
+ raise ArgumentError, "request argument must be String or Sphinx::Request"
2157
+ end
2158
+ attempts = nil
2159
+ end
2160
+
2161
+ with_server(server, attempts) do |server|
2162
+ logger.info { "[sphinx] #{command} on server #{server}" } if logger
2163
+
2164
+ cmd = command.to_s.upcase
2165
+ command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}")
2166
+ command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}")
2167
+
2168
+ with_socket(server) do |socket|
2169
+ len = request.to_s.length + (additional.nil? ? 0 : 4)
2170
+ header = [command_id, command_ver, len].pack('nnN')
2171
+ header << [additional].pack('N') unless additional.nil?
2172
+
2173
+ socket.write(header + request.to_s)
2174
+
2175
+ if block_given?
2176
+ yield server, socket
2177
+ else
2178
+ parse_response(socket, command_ver)
2179
+ end
2180
+ end
2181
+ end
2182
+ end
2183
+
2184
+ # This is internal method which gets and parses response packet from
2185
+ # searchd server.
2186
+ #
2187
+ # There are several exceptions which could be thrown in this method:
2188
+ #
2189
+ # @param [Sphinx::BufferedIO] socket an input stream object.
2190
+ # @param [Integer] client_version a command version which client supports.
2191
+ # @return [Sphinx::Response] could be used for context-based
2192
+ # parsing of reply from the server.
2193
+ #
2194
+ # @raise [SystemCallError, SocketError] should be handled by caller (see {#with_socket}).
2195
+ # @raise [SphinxResponseError] incomplete reply from searchd.
2196
+ # @raise [SphinxInternalError] searchd internal error.
2197
+ # @raise [SphinxTemporaryError] searchd temporary error.
2198
+ # @raise [SphinxUnknownError] searchd unknown error.
2199
+ #
2200
+ # @see #with_socket
2201
+ # @private
2202
+ #
2203
+ def parse_response(socket, client_version)
2204
+ response = ''
2205
+ status = ver = len = 0
2206
+
2207
+ # Read server reply from server. All exceptions are handled by {#with_socket}.
2208
+ header = socket.read(8)
2209
+ if header.length == 8
2210
+ status, ver, len = header.unpack('n2N')
2211
+ response = socket.read(len) if len > 0
2212
+ end
2213
+
2214
+ # check response
2215
+ read = response.length
2216
+ if response.empty? or read != len.to_i
2217
+ error = len > 0 \
2218
+ ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
2219
+ : 'received zero-sized searchd response'
2220
+ raise SphinxResponseError, error
2221
+ end
2222
+
2223
+ # check status
2224
+ if (status == SEARCHD_WARNING)
2225
+ wlen = response[0, 4].unpack('N*').first
2226
+ @warning = response[4, wlen]
2227
+ return response[4 + wlen, response.length - 4 - wlen]
2228
+ end
2229
+
2230
+ if status == SEARCHD_ERROR
2231
+ error = 'searchd error: ' + response[4, response.length - 4]
2232
+ raise SphinxInternalError, error
2233
+ end
2234
+
2235
+ if status == SEARCHD_RETRY
2236
+ error = 'temporary searchd error: ' + response[4, response.length - 4]
2237
+ raise SphinxTemporaryError, error
2238
+ end
2239
+
2240
+ unless status == SEARCHD_OK
2241
+ error = "unknown status code: '#{status}'"
2242
+ raise SphinxUnknownError, error
2243
+ end
2244
+
2245
+ # check version
2246
+ if ver < client_version
2247
+ @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
2248
+ "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
2249
+ end
2250
+
2251
+ Response.new(response)
2252
+ end
2253
+
2254
+ # This is internal method which selects next server (round-robin)
2255
+ # and yields it to the block passed.
2256
+ #
2257
+ # In case of connection error, it will try next server several times
2258
+ # (see {#set_connect_timeout} method details). If all servers are down,
2259
+ # it will set error attribute (could be retrieved with {#last_error}
2260
+ # method) with the last exception message, and {#connect_error?}
2261
+ # method will return true. Also, {SphinxConnectError} exception
2262
+ # will be raised.
2263
+ #
2264
+ # @overload with_server(server_index)
2265
+ # Get the server based on some seed value (usually CRC32 of
2266
+ # request. In this case initial server will be choosed using
2267
+ # this seed value, in case of connetion failure next server
2268
+ # in servers list will be used).
2269
+ # @param [Integer] server_index server index, must be any
2270
+ # integer value (not necessarily less than number of servers.)
2271
+ # @param [Integer] attempts how many retries to perform. Use
2272
+ # +nil+ to perform retries configured with {#set_connect_timeout}.
2273
+ # @overload with_server(server)
2274
+ # Get the server specified as a parameter. If specified, request
2275
+ # will be performed on specified server, and it will try to
2276
+ # establish connection to this server only once.
2277
+ # @param [Server] server server to perform request on.
2278
+ # @param [Integer] attempts how many retries to perform. Use
2279
+ # +nil+ to perform retries configured with {#set_connect_timeout}.
2280
+ #
2281
+ # @yield a block which performs request on a given server.
2282
+ # @yieldparam [Sphinx::Server] server contains information
2283
+ # about the server to perform request on.
2284
+ # @raise [SphinxConnectError] on any connection error.
2285
+ #
2286
+ def with_server(server = nil, attempts = nil)
2287
+ case server
2288
+ when Server
2289
+ idx = @servers.index(server) || 0
2290
+ s = server
2291
+ when Integer
2292
+ idx = server % @servers.size
2293
+ s = @servers[idx]
2294
+ when NilClass
2295
+ idx = 0
2296
+ s = @servers[idx]
2297
+ else
2298
+ raise ArgumentError, 'server argument must be Integer or Sphinx::Server'
2299
+ end
2300
+ attempts ||= @retries
2301
+ begin
2302
+ yield s
2303
+ rescue SphinxConnectError => e
2304
+ logger.warn { "[sphinx] server failed: #{e.class.name}: #{e.message}" } if logger
2305
+ # Connection error! Do we need to try it again?
2306
+ attempts -= 1
2307
+ if attempts > 0
2308
+ logger.info { "[sphinx] connection to server #{s.inspect} DIED! Retrying operation..." } if logger
2309
+ # Get the next server
2310
+ idx = (idx + 1) % @servers.size
2311
+ s = @servers[idx]
2312
+ retry
2313
+ end
2314
+
2315
+ # Re-raise original exception
2316
+ @error = e.message
2317
+ @connerror = true
2318
+ raise
2319
+ end
2320
+ end
2321
+
2322
+ # This is internal method which retrieves socket for a given server,
2323
+ # initiates Sphinx session, and yields this socket to a block passed.
2324
+ #
2325
+ # In case of any problems with session initiation, {SphinxConnectError}
2326
+ # will be raised, because this is part of connection establishing. See
2327
+ # {#with_server} method details to get more infromation about how this
2328
+ # exception is handled.
2329
+ #
2330
+ # Socket retrieving routine is wrapped in a block with it's own
2331
+ # timeout value (see {#set_connect_timeout}). This is done in
2332
+ # {Server#get_socket} method, so check it for details.
2333
+ #
2334
+ # Request execution is wrapped with block with another timeout
2335
+ # (see {#set_request_timeout}). This ensures no Sphinx request will
2336
+ # take unreasonable time.
2337
+ #
2338
+ # In case of any Sphinx error (incomplete reply, internal or temporary
2339
+ # error), connection to the server will be re-established, and request
2340
+ # will be retried (see {#set_request_timeout}). Of course, if connection
2341
+ # could not be established, next server will be selected (see explanation
2342
+ # above).
2343
+ #
2344
+ # @param [Sphinx::Server] server contains information
2345
+ # about the server to perform request on.
2346
+ # @yield a block which will actually perform the request.
2347
+ # @yieldparam [Sphinx::BufferedIO] socket a socket used to
2348
+ # perform the request.
2349
+ #
2350
+ # @raise [SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError]
2351
+ # on any response error.
2352
+ # @raise [SphinxConnectError] on any connection error.
2353
+ #
2354
+ def with_socket(server)
2355
+ attempts = @reqretries
2356
+ socket = nil
2357
+
2358
+ begin
2359
+ s = server.get_socket do |sock|
2360
+ # Remember socket to close it in case of emergency
2361
+ socket = sock
2362
+
2363
+ # send my version
2364
+ # this is a subtle part. we must do it before (!) reading back from searchd.
2365
+ # because otherwise under some conditions (reported on FreeBSD for instance)
2366
+ # TCP stack could throttle write-write-read pattern because of Nagle.
2367
+ sock.write([1].pack('N'))
2368
+ v = sock.read(4).unpack('N*').first
2369
+
2370
+ # Ouch, invalid protocol!
2371
+ if v < 1
2372
+ raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'"
2373
+ end
2374
+ end
2375
+
2376
+ Sphinx::safe_execute(@reqtimeout) do
2377
+ yield s
2378
+ end
2379
+ rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e
2380
+ logger.warn { "[sphinx] socket failure: #{e.message}" } if logger
2381
+ # Ouch, communication problem, will be treated as a connection problem.
2382
+ raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})"
2383
+ rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e
2384
+ # EOFError should not occur in ideal world, because we compare response length
2385
+ # with a value passed by Sphinx. But we want to ensure that client will not
2386
+ # fail with unexpected error when Sphinx implementation has bugs, aren't we?
2387
+ if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error)
2388
+ new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})")
2389
+ new_e.set_backtrace(e.backtrace)
2390
+ e = new_e
2391
+ end
2392
+ logger.warn { "[sphinx] generic failure: #{e.class.name}: #{e.message}" } if logger
2393
+
2394
+ # Close previously opened socket (in case of it has been really opened)
2395
+ server.free_socket(socket)
2396
+
2397
+ # Request error! Do we need to try it again?
2398
+ attempts -= 1
2399
+ retry if attempts > 0
2400
+
2401
+ # Re-raise original exception
2402
+ @error = e.message
2403
+ raise e
2404
+ ensure
2405
+ # Close previously opened socket on any other error
2406
+ server.free_socket(socket)
2407
+ end
2408
+ end
2409
+
2410
+ # Enables ability to skip +set_+ prefix for methods inside {#query} block.
2411
+ #
2412
+ # @example
2413
+ # sphinx.query('test') do
2414
+ # match_mode :all
2415
+ # id_range 10, 100
2416
+ # end
2417
+ #
2418
+ def method_missing(method_id, *arguments, &block)
2419
+ if @inside_eval and self.respond_to?("set_#{method_id}")
2420
+ self.send("set_#{method_id}", *arguments)
2421
+ else
2422
+ super
2423
+ end
2424
+ end
2425
+ end
2426
+ end