sphinx 0.9.9.2117 → 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sphinx/client.rb CHANGED
@@ -1,50 +1,202 @@
1
+ # = client.rb - Sphinx Client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 — 2009 Dmytro Shteflyuk
5
+ # License:: Distributes under the same terms as Ruby
6
+ # Version:: 0.9.10-r2043
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the Ruby license.
10
+ # You can freely distribute/modify this library.
11
+
12
+ # ==Sphinx Client API
13
+ #
14
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
+ # daemon and get search results from Sphinx.
16
+ #
17
+ # ===Usage
18
+ #
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+
27
+ require 'socket'
28
+
1
29
  module Sphinx
2
- # The Sphinx Client API is used to communicate with <tt>searchd</tt>
3
- # daemon and perform requests.
4
- #
5
- # @example
6
- # sphinx = Sphinx::Client.new
7
- # result = sphinx.query('test')
8
- # ids = result['matches'].map { |match| match['id'] }
9
- # posts = Post.all :conditions => { :id => ids },
10
- # :order => "FIELD(id,#{ids.join(',')})"
11
- #
12
- # docs = posts.map(&:body)
13
- # excerpts = sphinx.build_excerpts(docs, 'index', 'test')
14
- #
15
- class Client
16
- include Sphinx::Constants
17
-
18
- #=================================================================
19
- # Some internal attributes to use inside client API
20
- #=================================================================
30
+ # :stopdoc:
21
31
 
22
- # List of searchd servers to connect to.
23
- # @private
24
- attr_reader :servers
25
- # Connection timeout in seconds.
26
- # @private
27
- attr_reader :timeout
28
- # Number of connection retries.
29
- # @private
30
- attr_reader :retries
31
- # Request timeout in seconds.
32
- # @private
33
- attr_reader :reqtimeout
34
- # Number of request retries.
35
- # @private
36
- attr_reader :reqretries
37
- # Log debug/info/warn to the given Logger, defaults to nil.
38
- # @private
39
- attr_reader :logger
32
+ class SphinxError < StandardError; end
33
+ class SphinxArgumentError < SphinxError; end
34
+ class SphinxConnectError < SphinxError; end
35
+ class SphinxResponseError < SphinxError; end
36
+ class SphinxInternalError < SphinxError; end
37
+ class SphinxTemporaryError < SphinxError; end
38
+ class SphinxUnknownError < SphinxError; end
40
39
 
41
- # Constructs the <tt>Sphinx::Client</tt> object and sets options
42
- # to their default values.
43
- #
44
- # @param [Logger] logger a logger object to put logs to. No logging
45
- # will be performed when not set.
46
- #
47
- def initialize(logger = nil)
40
+ # :startdoc:
41
+
42
+ class Client
43
+
44
+ # :stopdoc:
45
+
46
+ # Known searchd commands
47
+
48
+ # search command
49
+ SEARCHD_COMMAND_SEARCH = 0
50
+ # excerpt command
51
+ SEARCHD_COMMAND_EXCERPT = 1
52
+ # update command
53
+ SEARCHD_COMMAND_UPDATE = 2
54
+ # keywords command
55
+ SEARCHD_COMMAND_KEYWORDS = 3
56
+ # persist command
57
+ SEARCHD_COMMAND_PERSIST = 4
58
+ # status command
59
+ SEARCHD_COMMAND_STATUS = 5
60
+ # query command
61
+ SEARCHD_COMMAND_QUERY = 6
62
+ # flushattrs command
63
+ SEARCHD_COMMAND_FLUSHATTRS = 7
64
+
65
+ # Current client-side command implementation versions
66
+
67
+ # search command version
68
+ VER_COMMAND_SEARCH = 0x117
69
+ # excerpt command version
70
+ VER_COMMAND_EXCERPT = 0x100
71
+ # update command version
72
+ VER_COMMAND_UPDATE = 0x102
73
+ # keywords command version
74
+ VER_COMMAND_KEYWORDS = 0x100
75
+ # persist command version
76
+ VER_COMMAND_PERSIST = 0x000
77
+ # status command version
78
+ VER_COMMAND_STATUS = 0x100
79
+ # query command version
80
+ VER_COMMAND_QUERY = 0x100
81
+ # flushattrs command version
82
+ VER_COMMAND_FLUSHATTRS = 0x100
83
+
84
+ # Known searchd status codes
85
+
86
+ # general success, command-specific reply follows
87
+ SEARCHD_OK = 0
88
+ # general failure, command-specific reply may follow
89
+ SEARCHD_ERROR = 1
90
+ # temporaty failure, client should retry later
91
+ SEARCHD_RETRY = 2
92
+ # general success, warning message and command-specific reply follow
93
+ SEARCHD_WARNING = 3
94
+
95
+ # :startdoc:
96
+
97
+ # Known match modes
98
+
99
+ # match all query words
100
+ SPH_MATCH_ALL = 0
101
+ # match any query word
102
+ SPH_MATCH_ANY = 1
103
+ # match this exact phrase
104
+ SPH_MATCH_PHRASE = 2
105
+ # match this boolean query
106
+ SPH_MATCH_BOOLEAN = 3
107
+ # match this extended query
108
+ SPH_MATCH_EXTENDED = 4
109
+ # match all document IDs w/o fulltext query, apply filters
110
+ SPH_MATCH_FULLSCAN = 5
111
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
112
+ SPH_MATCH_EXTENDED2 = 6
113
+
114
+ # Known ranking modes (ext2 only)
115
+
116
+ # default mode, phrase proximity major factor and BM25 minor one
117
+ SPH_RANK_PROXIMITY_BM25 = 0
118
+ # statistical mode, BM25 ranking only (faster but worse quality)
119
+ SPH_RANK_BM25 = 1
120
+ # no ranking, all matches get a weight of 1
121
+ SPH_RANK_NONE = 2
122
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
123
+ SPH_RANK_WORDCOUNT = 3
124
+ # phrase proximity
125
+ SPH_RANK_PROXIMITY = 4
126
+ # emulate old match-any weighting
127
+ SPH_RANK_MATCHANY = 5
128
+ # sets bits where there were matches
129
+ SPH_RANK_FIELDMASK = 6
130
+ # codename SPH04, phrase proximity + bm25 + head/exact boost
131
+ SPH_RANK_SPH04 = 7
132
+
133
+ # Known sort modes
134
+
135
+ # sort by document relevance desc, then by date
136
+ SPH_SORT_RELEVANCE = 0
137
+ # sort by document date desc, then by relevance desc
138
+ SPH_SORT_ATTR_DESC = 1
139
+ # sort by document date asc, then by relevance desc
140
+ SPH_SORT_ATTR_ASC = 2
141
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
142
+ SPH_SORT_TIME_SEGMENTS = 3
143
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
144
+ SPH_SORT_EXTENDED = 4
145
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
146
+ SPH_SORT_EXPR = 5
147
+
148
+ # Known filter types
149
+
150
+ # filter by integer values set
151
+ SPH_FILTER_VALUES = 0
152
+ # filter by integer range
153
+ SPH_FILTER_RANGE = 1
154
+ # filter by float range
155
+ SPH_FILTER_FLOATRANGE = 2
156
+
157
+ # Known attribute types
158
+
159
+ # this attr is just an integer
160
+ SPH_ATTR_INTEGER = 1
161
+ # this attr is a timestamp
162
+ SPH_ATTR_TIMESTAMP = 2
163
+ # this attr is an ordinal string number (integer at search time,
164
+ # specially handled at indexing time)
165
+ SPH_ATTR_ORDINAL = 3
166
+ # this attr is a boolean bit field
167
+ SPH_ATTR_BOOL = 4
168
+ # this attr is a float
169
+ SPH_ATTR_FLOAT = 5
170
+ # signed 64-bit integer
171
+ SPH_ATTR_BIGINT = 6
172
+ # string (binary; in-memory)
173
+ SPH_ATTR_STRING = 7
174
+ # this attr has multiple values (0 or more)
175
+ SPH_ATTR_MULTI = 0x40000000
176
+
177
+ # Known grouping functions
178
+
179
+ # group by day
180
+ SPH_GROUPBY_DAY = 0
181
+ # group by week
182
+ SPH_GROUPBY_WEEK = 1
183
+ # group by month
184
+ SPH_GROUPBY_MONTH = 2
185
+ # group by year
186
+ SPH_GROUPBY_YEAR = 3
187
+ # group by attribute value
188
+ SPH_GROUPBY_ATTR = 4
189
+ # group by sequential attrs pair
190
+ SPH_GROUPBY_ATTRPAIR = 5
191
+
192
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
193
+ def initialize
194
+ # per-client-object settings
195
+ @host = 'localhost' # searchd host (default is "localhost")
196
+ @port = 3312 # searchd port (default is 3312)
197
+ @path = false
198
+ @socket = false
199
+
48
200
  # per-query settings
49
201
  @offset = 0 # how many records to seek from result-set start (default is 0)
50
202
  @limit = 20 # how many records to return from result-set starting at offset (default is 20)
@@ -66,1411 +218,401 @@ module Sphinx
66
218
  @anchor = [] # geographical anchor point
67
219
  @indexweights = [] # per-index weights
68
220
  @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
69
- @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
221
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
70
222
  @fieldweights = {} # per-field-name weights
71
223
  @overrides = [] # per-query attribute values overrides
72
224
  @select = '*' # select-list (attributes or expressions, with optional aliases)
73
-
225
+
74
226
  # per-reply fields (for single-query case)
75
227
  @error = '' # last error message
76
228
  @warning = '' # last warning message
77
229
  @connerror = false # connection error vs remote error flag
78
-
230
+
79
231
  @reqs = [] # requests storage (for multi-query case)
80
232
  @mbenc = '' # stored mbstring encoding
81
233
  @timeout = 0 # connect timeout
82
- @retries = 1 # number of connect retries in case of emergency
83
- @reqtimeout = 0 # request timeout
84
- @reqretries = 1 # number of request retries in case of emergency
85
-
86
- # per-client-object settings
87
- # searchd servers list
88
- @servers = [Sphinx::Server.new(self, 'localhost', 9312, false)].freeze
89
- @logger = logger
90
-
91
- logger.info { "[sphinx] version: #{VERSION}, #{@servers.inspect}" } if logger
92
- end
93
-
94
- # Returns a string representation of the sphinx client object.
95
- #
96
- def inspect
97
- params = {
98
- :error => @error,
99
- :warning => @warning,
100
- :connect_error => @connerror,
101
- :servers => @servers,
102
- :connect_timeout => { :timeout => @timeout, :retries => @retries },
103
- :request_timeout => { :timeout => @reqtimeout, :retries => @reqretries },
104
- :retries => { :count => @retrycount, :delay => @retrydelay },
105
- :limits => { :offset => @offset, :limit => @limit, :max => @maxmatches, :cutoff => @cutoff },
106
- :max_query_time => @maxquerytime,
107
- :overrides => @overrides,
108
- :select => @select,
109
- :match_mode => @mode,
110
- :ranking_mode => @ranker,
111
- :sort_mode => { :mode => @sort, :sortby => @sortby },
112
- :weights => @weights,
113
- :field_weights => @fieldweights,
114
- :index_weights => @indexweights,
115
- :id_range => { :min => @min_id, :max => @max_id },
116
- :filters => @filters,
117
- :geo_anchor => @anchor,
118
- :group_by => { :attribute => @groupby, :func => @groupfunc, :sort => @groupsort },
119
- :group_distinct => @groupdistinct
120
- }
121
-
122
- "<Sphinx::Client: %d servers, params: %s>" %
123
- [@servers.length, params.inspect]
124
234
  end
125
-
126
- #=================================================================
127
- # General API functions
128
- #=================================================================
129
-
130
- # Returns last error message, as a string, in human readable format. If there
131
- # were no errors during the previous API call, empty string is returned.
132
- #
133
- # You should call it when any other function (such as {#query}) fails (typically,
134
- # the failing function returns false). The returned string will contain the
135
- # error description.
136
- #
137
- # The error message is not reset by this call; so you can safely call it
138
- # several times if needed.
139
- #
140
- # @return [String] last error message.
141
- #
142
- # @example
143
- # puts sphinx.last_error
144
- #
145
- # @see #last_warning
146
- # @see #connect_error?
147
- #
148
- def last_error
235
+
236
+ # Get last error message.
237
+ def GetLastError
149
238
  @error
150
239
  end
151
- alias :GetLastError :last_error
152
-
153
- # Returns last warning message, as a string, in human readable format. If there
154
- # were no warnings during the previous API call, empty string is returned.
155
- #
156
- # You should call it to verify whether your request (such as {#query}) was
157
- # completed but with warnings. For instance, search query against a distributed
158
- # index might complete succesfully even if several remote agents timed out.
159
- # In that case, a warning message would be produced.
160
- #
161
- # The warning message is not reset by this call; so you can safely call it
162
- # several times if needed.
163
- #
164
- # @return [String] last warning message.
165
- #
166
- # @example
167
- # puts sphinx.last_warning
168
- #
169
- # @see #last_error
170
- # @see #connect_error?
171
- #
172
- def last_warning
240
+
241
+ # Get last warning message.
242
+ def GetLastWarning
173
243
  @warning
174
244
  end
175
- alias :GetLastWarning :last_warning
176
-
177
- # Checks whether the last error was a network error on API side, or a
178
- # remote error reported by searchd. Returns true if the last connection
179
- # attempt to searchd failed on API side, false otherwise (if the error
180
- # was remote, or there were no connection attempts at all).
181
- #
182
- # @return [Boolean] the value indicating whether last error was a
183
- # nework error on API side.
184
- #
185
- # @example
186
- # puts "Connection failed!" if sphinx.connect_error?
187
- #
188
- # @see #last_error
189
- # @see #last_warning
190
- #
191
- def connect_error?
192
- @connerror || false
245
+
246
+ # Get last error flag (to tell network connection errors from
247
+ # searchd errors or broken responses)
248
+ def IsConnectError
249
+ @connerror
193
250
  end
194
- alias :IsConnectError :connect_error?
195
-
196
- # Sets searchd host name and TCP port. All subsequent requests will
197
- # use the new host and port settings. Default +host+ and +port+ are
198
- # 'localhost' and 9312, respectively.
199
- #
200
- # Also, you can specify an absolute path to Sphinx's UNIX socket as +host+,
201
- # in this case pass port as +0+ or +nil+.
202
- #
203
- # @param [String] host the searchd host name or UNIX socket absolute path.
204
- # @param [Integer] port the searchd port name (could be any if UNIX
205
- # socket path specified).
206
- # @return [Sphinx::Client] self.
207
- #
208
- # @example
209
- # sphinx.set_server('localhost', 9312)
210
- # sphinx.set_server('/opt/sphinx/var/run/sphinx.sock')
211
- #
212
- # @raise [ArgumentError] Occurred when parameters are invalid.
213
- # @see #set_servers
214
- # @see #set_connect_timeout
215
- # @see #set_request_timeout
216
- #
217
- def set_server(host, port = 9312)
218
- raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
219
-
220
- path = nil
221
- # Check if UNIX socket should be used
251
+
252
+ # Set searchd host name (string) and port (integer).
253
+ def SetServer(host, port)
254
+ assert { host.instance_of? String }
255
+
222
256
  if host[0] == ?/
223
- path = host
257
+ @path = host
258
+ return
224
259
  elsif host[0, 7] == 'unix://'
225
- path = host[7..-1]
226
- else
227
- raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
260
+ @path = host[7..-1]
228
261
  end
262
+
263
+ assert { port.instance_of? Fixnum }
229
264
 
230
- host = port = nil unless path.nil?
231
-
232
- @servers = [Sphinx::Server.new(self, host, port, path)].freeze
233
- logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
234
- self
235
- end
236
- alias :SetServer :set_server
237
-
238
- # Sets the list of searchd servers. Each subsequent request will use next
239
- # server in list (round-robin). In case of one server failure, request could
240
- # be retried on another server (see {#set_connect_timeout} and
241
- # {#set_request_timeout}).
242
- #
243
- # Method accepts an +Array+ of +Hash+es, each of them should have <tt>:host</tt>
244
- # and <tt>:port</tt> (to connect to searchd through network) or <tt>:path</tt>
245
- # (an absolute path to UNIX socket) specified.
246
- #
247
- # @param [Array<Hash>] servers an +Array+ of +Hash+ objects with servers parameters.
248
- # @option servers [String] :host the searchd host name or UNIX socket absolute path.
249
- # @option servers [String] :path the searchd UNIX socket absolute path.
250
- # @option servers [Integer] :port (9312) the searchd port name (skiped when UNIX
251
- # socket path specified)
252
- # @return [Sphinx::Client] self.
253
- #
254
- # @example
255
- # sphinx.set_servers([
256
- # { :host => 'browse01.local' }, # default port is 9312
257
- # { :host => 'browse02.local', :port => 9312 },
258
- # { :path => '/opt/sphinx/var/run/sphinx.sock' }
259
- # ])
260
- #
261
- # @raise [ArgumentError] Occurred when parameters are invalid.
262
- # @see #set_server
263
- # @see #set_connect_timeout
264
- # @see #set_request_timeout
265
- #
266
- def set_servers(servers)
267
- raise ArgumentError, '"servers" argument must be Array' unless servers.kind_of?(Array)
268
- raise ArgumentError, '"servers" argument must be not empty' if servers.empty?
269
-
270
- @servers = servers.map do |server|
271
- raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash)
272
-
273
- server = server.with_indifferent_access
274
-
275
- host = server[:path] || server[:host]
276
- port = server[:port] || 9312
277
- path = nil
278
- raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
279
-
280
- # Check if UNIX socket should be used
281
- if host[0] == ?/
282
- path = host
283
- elsif host[0, 7] == 'unix://'
284
- path = host[7..-1]
285
- else
286
- raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
287
- end
288
-
289
- host = port = nil unless path.nil?
290
-
291
- Sphinx::Server.new(self, host, port, path)
292
- end.freeze
293
- logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
294
- self
265
+ @host = host
266
+ @port = port
295
267
  end
296
- alias :SetServers :set_servers
297
-
298
- # Sets the time allowed to spend connecting to the server before giving up
299
- # and number of retries to perform.
300
- #
301
- # In the event of a failure to connect, an appropriate error code should
302
- # be returned back to the application in order for application-level error
303
- # handling to advise the user.
304
- #
305
- # When multiple servers configured through {#set_servers} method, and +retries+
306
- # number is greater than 1, library will try to connect to another server.
307
- # In case of single server configured, it will try to reconnect +retries+
308
- # times.
309
- #
310
- # Please note, this timeout will only be used for connection establishing, not
311
- # for regular API requests.
312
- #
313
- # @param [Integer] timeout a connection timeout in seconds.
314
- # @param [Integer] retries number of connect retries.
315
- # @return [Sphinx::Client] self.
316
- #
317
- # @example Set connection timeout to 1 second and number of retries to 5
318
- # sphinx.set_connect_timeout(1, 5)
319
- #
320
- # @raise [ArgumentError] Occurred when parameters are invalid.
321
- # @see #set_server
322
- # @see #set_servers
323
- # @see #set_request_timeout
324
- #
325
- def set_connect_timeout(timeout, retries = 1)
326
- raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
327
- raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
328
- raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
329
-
268
+
269
+ def SetConnectTimeout(timeout)
270
+ assert { timeout.instance_of? Fixnum }
271
+
330
272
  @timeout = timeout
331
- @retries = retries
332
- self
333
- end
334
- alias :SetConnectTimeout :set_connect_timeout
335
-
336
- # Sets the time allowed to spend performing request to the server before giving up
337
- # and number of retries to perform.
338
- #
339
- # In the event of a failure to do request, an appropriate error code should
340
- # be returned back to the application in order for application-level error
341
- # handling to advise the user.
342
- #
343
- # When multiple servers configured through {#set_servers} method, and +retries+
344
- # number is greater than 1, library will try to do another try with this server
345
- # (with full reconnect). If connection would fail, behavior depends on
346
- # {#set_connect_timeout} settings.
347
- #
348
- # Please note, this timeout will only be used for request performing, not
349
- # for connection establishing.
350
- #
351
- # @param [Integer] timeout a request timeout in seconds.
352
- # @param [Integer] retries number of request retries.
353
- # @return [Sphinx::Client] self.
354
- #
355
- # @example Set request timeout to 1 second and number of retries to 5
356
- # sphinx.set_request_timeout(1, 5)
357
- #
358
- # @raise [ArgumentError] Occurred when parameters are invalid.
359
- # @see #set_server
360
- # @see #set_servers
361
- # @see #set_connect_timeout
362
- #
363
- def set_request_timeout(timeout, retries = 1)
364
- raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
365
- raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
366
- raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
367
-
368
- @reqtimeout = timeout
369
- @reqretries = retries
370
- self
371
- end
372
- alias :SetRequestTimeout :set_request_timeout
373
-
374
- # Sets distributed retry count and delay.
375
- #
376
- # On temporary failures searchd will attempt up to +count+ retries
377
- # per agent. +delay+ is the delay between the retries, in milliseconds.
378
- # Retries are disabled by default. Note that this call will not make
379
- # the API itself retry on temporary failure; it only tells searchd
380
- # to do so. Currently, the list of temporary failures includes all
381
- # kinds of connection failures and maxed out (too busy) remote agents.
382
- #
383
- # @param [Integer] count a number of retries to perform.
384
- # @param [Integer] delay a delay between the retries.
385
- # @return [Sphinx::Client] self.
386
- #
387
- # @example Perform 5 retries with 200 ms between them
388
- # sphinx.set_retries(5, 200)
389
- #
390
- # @raise [ArgumentError] Occurred when parameters are invalid.
391
- # @see #set_connect_timeout
392
- # @see #set_request_timeout
393
- #
394
- def set_retries(count, delay = 0)
395
- raise ArgumentError, '"count" argument must be Integer' unless count.kind_of?(Integer)
396
- raise ArgumentError, '"delay" argument must be Integer' unless delay.kind_of?(Integer)
397
-
398
- @retrycount = count
399
- @retrydelay = delay
400
- self
401
273
  end
402
- alias :SetRetries :set_retries
403
-
404
- #=================================================================
405
- # General query settings
406
- #=================================================================
407
-
408
- # Sets offset into server-side result set (+offset+) and amount of matches to
409
- # return to client starting from that offset (+limit+). Can additionally control
410
- # maximum server-side result set size for current query (+max_matches+) and the
411
- # threshold amount of matches to stop searching at (+cutoff+). All parameters
412
- # must be non-negative integers.
413
- #
414
- # First two parameters to {#set_limits} are identical in behavior to MySQL LIMIT
415
- # clause. They instruct searchd to return at most +limit+ matches starting from
416
- # match number +offset+. The default offset and limit settings are +0+ and +20+,
417
- # that is, to return first +20+ matches.
418
- #
419
- # +max_matches+ setting controls how much matches searchd will keep in RAM
420
- # while searching. All matching documents will be normally processed, ranked,
421
- # filtered, and sorted even if max_matches is set to +1+. But only best +N+
422
- # documents are stored in memory at any given moment for performance and RAM
423
- # usage reasons, and this setting controls that N. Note that there are two
424
- # places where max_matches limit is enforced. Per-query limit is controlled
425
- # by this API call, but there also is per-server limit controlled by +max_matches+
426
- # setting in the config file. To prevent RAM usage abuse, server will not
427
- # allow to set per-query limit higher than the per-server limit.
428
- #
429
- # You can't retrieve more than +max_matches+ matches to the client application.
430
- # The default limit is set to +1000+. Normally, you must not have to go over
431
- # this limit. One thousand records is enough to present to the end user.
432
- # And if you're thinking about pulling the results to application for further
433
- # sorting or filtering, that would be much more efficient if performed on
434
- # Sphinx side.
435
- #
436
- # +cutoff+ setting is intended for advanced performance control. It tells
437
- # searchd to forcibly stop search query once $cutoff matches had been found
438
- # and processed.
439
- #
440
- # @param [Integer] offset an offset into server-side result set.
441
- # @param [Integer] limit an amount of matches to return.
442
- # @param [Integer] max a maximum server-side result set size.
443
- # @param [Integer] cutoff a threshold amount of matches to stop searching at.
444
- # @return [Sphinx::Client] self.
445
- #
446
- # @example
447
- # sphinx.set_limits(100, 50, 1000, 5000)
448
- #
449
- # @raise [ArgumentError] Occurred when parameters are invalid.
450
- #
451
- def set_limits(offset, limit, max = 0, cutoff = 0)
452
- raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer)
453
- raise ArgumentError, '"limit" argument must be Integer' unless limit.kind_of?(Integer)
454
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
455
- raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.kind_of?(Integer)
456
-
457
- raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
458
- raise ArgumentError, '"limit" argument should be greater to zero' unless limit > 0
459
- raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
460
- raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0
274
+
275
+ # Set offset and count into result set,
276
+ # and optionally set max-matches and cutoff limits.
277
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
278
+ assert { offset.instance_of? Fixnum }
279
+ assert { limit.instance_of? Fixnum }
280
+ assert { max.instance_of? Fixnum }
281
+ assert { offset >= 0 }
282
+ assert { limit > 0 }
283
+ assert { max >= 0 }
461
284
 
462
285
  @offset = offset
463
286
  @limit = limit
464
287
  @maxmatches = max if max > 0
465
288
  @cutoff = cutoff if cutoff > 0
466
- self
467
289
  end
468
- alias :SetLimits :set_limits
469
-
470
- # Sets maximum search query time, in milliseconds. Parameter must be a
471
- # non-negative integer. Default valus is +0+ which means "do not limit".
472
- #
473
- # Similar to +cutoff+ setting from {#set_limits}, but limits elapsed query
474
- # time instead of processed matches count. Local search queries will be
475
- # stopped once that much time has elapsed. Note that if you're performing
476
- # a search which queries several local indexes, this limit applies to each
477
- # index separately.
478
- #
479
- # @param [Integer] max maximum search query time in milliseconds.
480
- # @return [Sphinx::Client] self.
481
- #
482
- # @example
483
- # sphinx.set_max_query_time(200)
484
- #
485
- # @raise [ArgumentError] Occurred when parameters are invalid.
486
- #
487
- def set_max_query_time(max)
488
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
489
- raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
490
-
290
+
291
+ # Set maximum query time, in milliseconds, per-index,
292
+ # integer, 0 means "do not limit"
293
+ def SetMaxQueryTime(max)
294
+ assert { max.instance_of? Fixnum }
295
+ assert { max >= 0 }
491
296
  @maxquerytime = max
492
- self
493
- end
494
- alias :SetMaxQueryTime :set_max_query_time
495
-
496
- # Sets temporary (per-query) per-document attribute value overrides. Only
497
- # supports scalar attributes. +values+ must be a +Hash+ that maps document
498
- # IDs to overridden attribute values.
499
- #
500
- # Override feature lets you "temporary" update attribute values for some
501
- # documents within a single query, leaving all other queries unaffected.
502
- # This might be useful for personalized data. For example, assume you're
503
- # implementing a personalized search function that wants to boost the posts
504
- # that the user's friends recommend. Such data is not just dynamic, but
505
- # also personal; so you can't simply put it in the index because you don't
506
- # want everyone's searches affected. Overrides, on the other hand, are local
507
- # to a single query and invisible to everyone else. So you can, say, setup
508
- # a "friends_weight" value for every document, defaulting to 0, then
509
- # temporary override it with 1 for documents 123, 456 and 789 (recommended
510
- # by exactly the friends of current user), and use that value when ranking.
511
- #
512
- # You can specify attribute type as String ("integer", "float", etc),
513
- # Symbol (:integer, :float, etc), or
514
- # Fixnum constant (SPH_ATTR_INTEGER, SPH_ATTR_FLOAT, etc).
515
- #
516
- # @param [String, Symbol] attribute an attribute name to override values of.
517
- # @param [Integer, String, Symbol] attrtype attribute type.
518
- # @param [Hash] values a +Hash+ that maps document IDs to overridden attribute values.
519
- # @return [Sphinx::Client] self.
520
- #
521
- # @example
522
- # sphinx.set_override(:friends_weight, :integer, {123 => 1, 456 => 1, 789 => 1})
523
- #
524
- # @raise [ArgumentError] Occurred when parameters are invalid.
525
- #
526
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setoverride Section 6.2.3, "SetOverride"
527
- #
528
- def set_override(attribute, attrtype, values)
529
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
530
-
531
- case attrtype
532
- when String, Symbol
533
- begin
534
- attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}")
535
- rescue NameError
536
- raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid"
537
- end
538
- when Fixnum
539
- raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype)
540
- else
541
- raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol'
542
- end
543
-
544
- raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
545
-
546
- values.each do |id, value|
547
- raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.kind_of?(Integer)
548
- case attrtype
549
- when SPH_ATTR_TIMESTAMP
550
- raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Integer) or value.kind_of?(Time)
551
- when SPH_ATTR_FLOAT
552
- raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Numeric)
553
- else
554
- # SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT
555
- raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.kind_of?(Integer)
556
- end
557
- end
558
-
559
- @overrides << { 'attr' => attribute.to_s, 'type' => attrtype, 'values' => values }
560
- self
561
- end
562
- alias :SetOverride :set_override
563
-
564
- # Sets the select clause, listing specific attributes to fetch, and
565
- # expressions to compute and fetch. Clause syntax mimics SQL.
566
- #
567
- # {#set_select} is very similar to the part of a typical SQL query between
568
- # +SELECT+ and +FROM+. It lets you choose what attributes (columns) to
569
- # fetch, and also what expressions over the columns to compute and fetch.
570
- # A certain difference from SQL is that expressions must always be aliased
571
- # to a correct identifier (consisting of letters and digits) using +AS+
572
- # keyword. SQL also lets you do that but does not require to. Sphinx enforces
573
- # aliases so that the computation results can always be returned under a
574
- # "normal" name in the result set, used in other clauses, etc.
575
- #
576
- # Everything else is basically identical to SQL. Star ('*') is supported.
577
- # Functions are supported. Arbitrary amount of expressions is supported.
578
- # Computed expressions can be used for sorting, filtering, and grouping,
579
- # just as the regular attributes.
580
- #
581
- # Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
582
- # <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported when using
583
- # <tt>GROUP BY</tt>.
584
- #
585
- # Expression sorting (Section 4.5, “SPH_SORT_EXPR mode”) and geodistance
586
- # functions ({#set_geo_anchor}) are now internally implemented
587
- # using this computed expressions mechanism, using magic names '<tt>@expr</tt>'
588
- # and '<tt>@geodist</tt>' respectively.
589
- #
590
- # @param [String] select a select clause, listing specific attributes to fetch.
591
- # @return [Sphinx::Client] self.
592
- #
593
- # @example
594
- # sphinx.set_select('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight')
595
- # sphinx.set_select('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40')
596
- # sphinx.set_select('*, AVG(price) AS avgprice')
597
- #
598
- # @raise [ArgumentError] Occurred when parameters are invalid.
599
- #
600
- # @see http://www.sphinxsearch.com/docs/current.html#sort-expr Section 4.5, "SPH_SORT_EXPR mode"
601
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
602
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setselect Section 6.2.4, "SetSelect"
603
- #
604
- def set_select(select)
605
- raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String)
606
-
607
- @select = select
608
- self
609
297
  end
610
- alias :SetSelect :set_select
611
-
612
- #=================================================================
613
- # Full-text search query settings
614
- #=================================================================
615
-
616
- # Sets full-text query matching mode.
617
- #
618
- # Parameter must be a +Fixnum+ constant specifying one of the known modes
619
- # (+SPH_MATCH_ALL+, +SPH_MATCH_ANY+, etc), +String+ with identifier (<tt>"all"</tt>,
620
- # <tt>"any"</tt>, etc), or a +Symbol+ (<tt>:all</tt>, <tt>:any</tt>, etc).
621
- #
622
- # @param [Integer, String, Symbol] mode full-text query matching mode.
623
- # @return [Sphinx::Client] self.
624
- #
625
- # @example
626
- # sphinx.set_match_mode(Sphinx::SPH_MATCH_ALL)
627
- # sphinx.set_match_mode(:all)
628
- # sphinx.set_match_mode('all')
629
- #
630
- # @raise [ArgumentError] Occurred when parameters are invalid.
631
- #
632
- # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
633
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
634
- #
635
- def set_match_mode(mode)
636
- case mode
637
- when String, Symbol
638
- begin
639
- mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}")
640
- rescue NameError
641
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
642
- end
643
- when Fixnum
644
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode)
645
- else
646
- raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
647
- end
298
+
299
+ # Set matching mode.
300
+ def SetMatchMode(mode)
301
+ assert { mode == SPH_MATCH_ALL \
302
+ || mode == SPH_MATCH_ANY \
303
+ || mode == SPH_MATCH_PHRASE \
304
+ || mode == SPH_MATCH_BOOLEAN \
305
+ || mode == SPH_MATCH_EXTENDED \
306
+ || mode == SPH_MATCH_FULLSCAN \
307
+ || mode == SPH_MATCH_EXTENDED2 }
648
308
 
649
309
  @mode = mode
650
- self
651
310
  end
652
- alias :SetMatchMode :set_match_mode
653
-
654
- # Sets ranking mode. Only available in +SPH_MATCH_EXTENDED2+
655
- # matching mode at the time of this writing. Parameter must be a
656
- # constant specifying one of the known modes.
657
- #
658
- # You can specify ranking mode as String ("proximity_bm25", "bm25", etc),
659
- # Symbol (:proximity_bm25, :bm25, etc), or
660
- # Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc).
661
- #
662
- # @param [Integer, String, Symbol] ranker ranking mode.
663
- # @return [Sphinx::Client] self.
664
- #
665
- # @example
666
- # sphinx.set_ranking_mode(Sphinx::SPH_RANK_BM25)
667
- # sphinx.set_ranking_mode(:bm25)
668
- # sphinx.set_ranking_mode('bm25')
669
- #
670
- # @raise [ArgumentError] Occurred when parameters are invalid.
671
- #
672
- # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
673
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
674
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setrankingmode Section 6.3.2, "SetRankingMode"
675
- #
676
- def set_ranking_mode(ranker)
677
- case ranker
678
- when String, Symbol
679
- begin
680
- ranker = self.class.const_get("SPH_RANK_#{ranker.to_s.upcase}")
681
- rescue NameError
682
- raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid"
683
- end
684
- when Fixnum
685
- raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_FIELDMASK).include?(ranker)
686
- else
687
- raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol'
688
- end
311
+
312
+ # Set ranking mode.
313
+ def SetRankingMode(ranker)
314
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
315
+ || ranker == SPH_RANK_BM25 \
316
+ || ranker == SPH_RANK_NONE \
317
+ || ranker == SPH_RANK_WORDCOUNT \
318
+ || ranker == SPH_RANK_PROXIMITY \
319
+ || ranker == SPH_RANK_MATCHANY \
320
+ || ranker == SPH_RANK_FIELDMASK \
321
+ || ranker == SPH_RANK_SPH04 }
689
322
 
690
323
  @ranker = ranker
691
- self
692
324
  end
693
- alias :SetRankingMode :set_ranking_mode
694
-
325
+
695
326
  # Set matches sorting mode.
696
- #
697
- # You can specify sorting mode as String ("relevance", "attr_desc", etc),
698
- # Symbol (:relevance, :attr_desc, etc), or
699
- # Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc).
700
- #
701
- # @param [Integer, String, Symbol] mode matches sorting mode.
702
- # @param [String] sortby sorting clause, with the syntax depending on
703
- # specific mode. Should be specified unless sorting mode is
704
- # +SPH_SORT_RELEVANCE+.
705
- # @return [Sphinx::Client] self.
706
- #
707
- # @example
708
- # sphinx.set_sort_mode(Sphinx::SPH_SORT_ATTR_ASC, 'attr')
709
- # sphinx.set_sort_mode(:attr_asc, 'attr')
710
- # sphinx.set_sort_mode('attr_asc', 'attr')
711
- #
712
- # @raise [ArgumentError] Occurred when parameters are invalid.
713
- #
714
- # @see http://www.sphinxsearch.com/docs/current.html#sorting-modes Section 4.5, "Sorting modes"
715
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setsortmode Section 6.3.3, "SetSortMode"
716
- #
717
- def set_sort_mode(mode, sortby = '')
718
- case mode
719
- when String, Symbol
720
- begin
721
- mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}")
722
- rescue NameError
723
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
724
- end
725
- when Fixnum
726
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode)
727
- else
728
- raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
729
- end
730
-
731
- raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String)
732
- raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty?
327
+ def SetSortMode(mode, sortby = '')
328
+ assert { mode == SPH_SORT_RELEVANCE \
329
+ || mode == SPH_SORT_ATTR_DESC \
330
+ || mode == SPH_SORT_ATTR_ASC \
331
+ || mode == SPH_SORT_TIME_SEGMENTS \
332
+ || mode == SPH_SORT_EXTENDED \
333
+ || mode == SPH_SORT_EXPR }
334
+ assert { sortby.instance_of? String }
335
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
733
336
 
734
337
  @sort = mode
735
338
  @sortby = sortby
736
- self
737
339
  end
738
- alias :SetSortMode :set_sort_mode
739
-
740
- # Binds per-field weights in the order of appearance in the index.
741
- #
742
- # @param [Array<Integer>] weights an +Array+ of integer per-field weights.
743
- # @return [Sphinx::Client] self.
744
- #
745
- # @example
746
- # sphinx.set_weights([1, 3, 5])
747
- #
748
- # @raise [ArgumentError] Occurred when parameters are invalid.
749
- #
750
- # @deprecated Use {#set_field_weights} instead.
751
- # @see #set_field_weights
340
+
341
+ # Bind per-field weights by order.
752
342
  #
753
- def set_weights(weights)
754
- raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array)
343
+ # DEPRECATED; use SetFieldWeights() instead.
344
+ def SetWeights(weights)
345
+ assert { weights.instance_of? Array }
755
346
  weights.each do |weight|
756
- raise ArgumentError, '"weights" argument must be Array of integers' unless weight.kind_of?(Integer)
347
+ assert { weight.instance_of? Fixnum }
757
348
  end
758
349
 
759
350
  @weights = weights
760
- self
761
351
  end
762
- alias :SetWeights :set_weights
763
352
 
764
- # Binds per-field weights by name. Parameter must be a +Hash+
765
- # mapping string field names to integer weights.
766
- #
767
- # Match ranking can be affected by per-field weights. For instance,
768
- # see Section 4.4, "Weighting" for an explanation how phrase
769
- # proximity ranking is affected. This call lets you specify what
770
- # non-default weights to assign to different full-text fields.
771
- #
772
- # The weights must be positive 32-bit integers. The final weight
773
- # will be a 32-bit integer too. Default weight value is 1. Unknown
774
- # field names will be silently ignored.
775
- #
776
- # There is no enforced limit on the maximum weight value at the
777
- # moment. However, beware that if you set it too high you can
778
- # start hitting 32-bit wraparound issues. For instance, if
779
- # you set a weight of 10,000,000 and search in extended mode,
780
- # then maximum possible weight will be equal to 10 million (your
781
- # weight) by 1 thousand (internal BM25 scaling factor, see
782
- # Section 4.4, “Weighting”) by 1 or more (phrase proximity rank).
783
- # The result is at least 10 billion that does not fit in 32 bits
784
- # and will be wrapped around, producing unexpected results.
785
- #
786
- # @param [Hash] weights a +Hash+ mapping string field names to
787
- # integer weights.
788
- # @return [Sphinx::Client] self.
353
+ # Bind per-field weights by name.
789
354
  #
790
- # @example
791
- # sphinx.set_field_weights(:title => 20, :text => 10)
792
- #
793
- # @raise [ArgumentError] Occurred when parameters are invalid.
794
- #
795
- # @see http://www.sphinxsearch.com/docs/current.html#weighting Section 4.4, "Weighting"
796
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfieldweights Section 6.3.5, "SetFieldWeights"
797
- #
798
- def set_field_weights(weights)
799
- raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
355
+ # Takes string (field name) to integer name (field weight) hash as an argument.
356
+ # * Takes precedence over SetWeights().
357
+ # * Unknown names will be silently ignored.
358
+ # * Unbound fields will be silently given a weight of 1.
359
+ def SetFieldWeights(weights)
360
+ assert { weights.instance_of? Hash }
800
361
  weights.each do |name, weight|
801
- unless (name.kind_of?(String) or name.kind_of?(Symbol)) and weight.kind_of?(Integer)
802
- raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
803
- end
362
+ assert { name.instance_of? String }
363
+ assert { weight.instance_of? Fixnum }
804
364
  end
805
365
 
806
366
  @fieldweights = weights
807
- self
808
367
  end
809
- alias :SetFieldWeights :set_field_weights
810
-
811
- # Sets per-index weights, and enables weighted summing of match
812
- # weights across different indexes. Parameter must be a hash
813
- # (associative array) mapping string index names to integer
814
- # weights. Default is empty array that means to disable weighting
815
- # summing.
816
- #
817
- # When a match with the same document ID is found in several
818
- # different local indexes, by default Sphinx simply chooses the
819
- # match from the index specified last in the query. This is to
820
- # support searching through partially overlapping index partitions.
821
- #
822
- # However in some cases the indexes are not just partitions,
823
- # and you might want to sum the weights across the indexes
824
- # instead of picking one. {#set_index_weights} lets you do that.
825
- # With summing enabled, final match weight in result set will be
826
- # computed as a sum of match weight coming from the given index
827
- # multiplied by respective per-index weight specified in this
828
- # call. Ie. if the document 123 is found in index A with the
829
- # weight of 2, and also in index B with the weight of 3, and
830
- # you called {#set_index_weights} with <tt>{"A"=>100, "B"=>10}</tt>,
831
- # the final weight return to the client will be 2*100+3*10 = 230.
832
- #
833
- # @param [Hash] weights a +Hash+ mapping string index names to
834
- # integer weights.
835
- # @return [Sphinx::Client] self.
836
- #
837
- # @example
838
- # sphinx.set_field_weights(:fresh => 20, :archived => 10)
839
- #
840
- # @raise [ArgumentError] Occurred when parameters are invalid.
841
- #
842
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setindexweights Section 6.3.6, "SetIndexWeights"
843
- #
844
- def set_index_weights(weights)
845
- raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
368
+
369
+ # Bind per-index weights by name.
370
+ def SetIndexWeights(weights)
371
+ assert { weights.instance_of? Hash }
846
372
  weights.each do |index, weight|
847
- unless (index.kind_of?(String) or index.kind_of?(Symbol)) and weight.kind_of?(Integer)
848
- raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
849
- end
373
+ assert { index.instance_of? String }
374
+ assert { weight.instance_of? Fixnum }
850
375
  end
851
-
376
+
852
377
  @indexweights = weights
853
- self
854
378
  end
855
- alias :SetIndexWeights :set_index_weights
856
-
857
- #=================================================================
858
- # Result set filtering settings
859
- #=================================================================
860
-
861
- # Sets an accepted range of document IDs. Parameters must be integers.
862
- # Defaults are 0 and 0; that combination means to not limit by range.
863
- #
864
- # After this call, only those records that have document ID between
865
- # +min+ and +max+ (including IDs exactly equal to +min+ or +max+)
866
- # will be matched.
867
- #
868
- # @param [Integer] min min document ID.
869
- # @param [Integer] min max document ID.
870
- # @return [Sphinx::Client] self.
871
- #
872
- # @example
873
- # sphinx.set_id_range(10, 1000)
874
- #
875
- # @raise [ArgumentError] Occurred when parameters are invalid.
876
- #
877
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setidrange Section 6.4.1, "SetIDRange"
878
- #
879
- def set_id_range(min, max)
880
- raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
881
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
882
- raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
379
+
380
+ # Set IDs range to match.
381
+ #
382
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
383
+ def SetIDRange(min, max)
384
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
385
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
386
+ assert { min <= max }
883
387
 
884
388
  @min_id = min
885
389
  @max_id = max
886
- self
887
390
  end
888
- alias :SetIDRange :set_id_range
889
-
890
- # Adds new integer values set filter.
891
- #
892
- # On this call, additional new filter is added to the existing
893
- # list of filters. $attribute must be a string with attribute
894
- # name. +values+ must be a plain array containing integer
895
- # values. +exclude+ must be a boolean value; it controls
896
- # whether to accept the matching documents (default mode, when
897
- # +exclude+ is +false+) or reject them.
898
- #
899
- # Only those documents where +attribute+ column value stored in
900
- # the index matches any of the values from +values+ array will
901
- # be matched (or rejected, if +exclude+ is +true+).
902
- #
903
- # @param [String, Symbol] attribute an attribute name to filter by.
904
- # @param [Array<Integer>, Integer] values an +Array+ of integers or
905
- # single Integer with given attribute values.
906
- # @param [Boolean] exclude indicating whether documents with given attribute
907
- # matching specified values should be excluded from search results.
908
- # @return [Sphinx::Client] self.
909
- #
910
- # @example
911
- # sphinx.set_filter(:group_id, [10, 15, 20])
912
- # sphinx.set_filter(:group_id, [10, 15, 20], true)
913
- #
914
- # @raise [ArgumentError] Occurred when parameters are invalid.
915
- #
916
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilter Section 6.4.2, "SetFilter"
917
- # @see #set_filter_range
918
- # @see #set_filter_float_range
919
- #
920
- def set_filter(attribute, values, exclude = false)
921
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
922
- values = [values] if values.kind_of?(Integer)
923
- raise ArgumentError, '"values" argument must be Array' unless values.kind_of?(Array)
924
- raise ArgumentError, '"values" argument must be Array of Integers' unless values.all? { |v| v.kind_of?(Integer) }
925
- raise ArgumentError, '"exclude" argument must be Boolean' unless [TrueClass, FalseClass].include?(exclude.class)
926
-
927
- if values.any?
928
- @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values }
391
+
392
+ # Set values filter.
393
+ #
394
+ # Only match those records where <tt>attribute</tt> column values
395
+ # are in specified set.
396
+ def SetFilter(attribute, values, exclude = false)
397
+ assert { attribute.instance_of? String }
398
+ assert { values.instance_of? Array }
399
+ assert { !values.empty? }
400
+
401
+ if values.instance_of?(Array) && values.size > 0
402
+ values.each do |value|
403
+ assert { value.instance_of? Fixnum }
404
+ end
405
+
406
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
929
407
  end
930
- self
931
408
  end
932
- alias :SetFilter :set_filter
933
-
934
- # Adds new integer range filter.
935
- #
936
- # On this call, additional new filter is added to the existing
937
- # list of filters. +attribute+ must be a string with attribute
938
- # name. +min+ and +max+ must be integers that define the acceptable
939
- # attribute values range (including the boundaries). +exclude+
940
- # must be a boolean value; it controls whether to accept the
941
- # matching documents (default mode, when +exclude+ is false) or
942
- # reject them.
943
- #
944
- # Only those documents where +attribute+ column value stored
945
- # in the index is between +min+ and +max+ (including values
946
- # that are exactly equal to +min+ or +max+) will be matched
947
- # (or rejected, if +exclude+ is true).
948
- #
949
- # @param [String, Symbol] attribute an attribute name to filter by.
950
- # @param [Integer] min min value of the given attribute.
951
- # @param [Integer] max max value of the given attribute.
952
- # @param [Boolean] exclude indicating whether documents with given attribute
953
- # matching specified boundaries should be excluded from search results.
954
- # @return [Sphinx::Client] self.
955
- #
956
- # @example
957
- # sphinx.set_filter_range(:group_id, 10, 20)
958
- # sphinx.set_filter_range(:group_id, 10, 20, true)
959
- #
960
- # @raise [ArgumentError] Occurred when parameters are invalid.
961
- #
962
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterrange Section 6.4.3, "SetFilterRange"
963
- # @see #set_filter
964
- # @see #set_filter_float_range
965
- #
966
- def set_filter_range(attribute, min, max, exclude = false)
967
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
968
- raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
969
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
970
- raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
971
- raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
972
-
973
- @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max }
974
- self
409
+
410
+ # Set range filter.
411
+ #
412
+ # Only match those records where <tt>attribute</tt> column value
413
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
414
+ def SetFilterRange(attribute, min, max, exclude = false)
415
+ assert { attribute.instance_of? String }
416
+ assert { min.instance_of? Fixnum or min.instance_of? Bignum }
417
+ assert { max.instance_of? Fixnum or max.instance_of? Bignum }
418
+ assert { min <= max }
419
+
420
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
975
421
  end
976
- alias :SetFilterRange :set_filter_range
977
-
978
- # Adds new float range filter.
979
- #
980
- # On this call, additional new filter is added to the existing
981
- # list of filters. +attribute+ must be a string with attribute name.
982
- # +min+ and +max+ must be floats that define the acceptable
983
- # attribute values range (including the boundaries). +exclude+ must
984
- # be a boolean value; it controls whether to accept the matching
985
- # documents (default mode, when +exclude+ is false) or reject them.
986
- #
987
- # Only those documents where +attribute+ column value stored in
988
- # the index is between +min+ and +max+ (including values that are
989
- # exactly equal to +min+ or +max+) will be matched (or rejected,
990
- # if +exclude+ is true).
991
- #
992
- # @param [String, Symbol] attribute an attribute name to filter by.
993
- # @param [Numeric] min min value of the given attribute.
994
- # @param [Numeric] max max value of the given attribute.
995
- # @param [Boolean] exclude indicating whether documents with given attribute
996
- # matching specified boundaries should be excluded from search results.
997
- # @return [Sphinx::Client] self.
998
- #
999
- # @example
1000
- # sphinx.set_filter_float_range(:group_id, 10.5, 20)
1001
- # sphinx.set_filter_float_range(:group_id, 10.5, 20, true)
1002
- #
1003
- # @raise [ArgumentError] Occurred when parameters are invalid.
1004
- #
1005
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterfloatrange Section 6.4.4, "SetFilterFloatRange"
1006
- # @see #set_filter
1007
- # @see #set_filter_range
1008
- #
1009
- def set_filter_float_range(attribute, min, max, exclude = false)
1010
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1011
- raise ArgumentError, '"min" argument must be Numeric' unless min.kind_of?(Numeric)
1012
- raise ArgumentError, '"max" argument must be Numeric' unless max.kind_of?(Numeric)
1013
- raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
1014
- raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
1015
-
1016
- @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f }
1017
- self
422
+
423
+ # Set float range filter.
424
+ #
425
+ # Only match those records where <tt>attribute</tt> column value
426
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
427
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
428
+ assert { attribute.instance_of? String }
429
+ assert { min.instance_of? Float }
430
+ assert { max.instance_of? Float }
431
+ assert { min <= max }
432
+
433
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
1018
434
  end
1019
- alias :SetFilterFloatRange :set_filter_float_range
1020
-
1021
- # Sets anchor point for and geosphere distance (geodistance)
1022
- # calculations, and enable them.
1023
- #
1024
- # +attrlat+ and +attrlong+ must be strings that contain the names
1025
- # of latitude and longitude attributes, respectively. +lat+ and
1026
- # +long+ are floats that specify anchor point latitude and
1027
- # longitude, in radians.
1028
- #
1029
- # Once an anchor point is set, you can use magic <tt>"@geodist"</tt>
1030
- # attribute name in your filters and/or sorting expressions.
1031
- # Sphinx will compute geosphere distance between the given anchor
1032
- # point and a point specified by latitude and lognitude attributes
1033
- # from each full-text match, and attach this value to the resulting
1034
- # match. The latitude and longitude values both in {#set_geo_anchor}
1035
- # and the index attribute data are expected to be in radians.
1036
- # The result will be returned in meters, so geodistance value of
1037
- # 1000.0 means 1 km. 1 mile is approximately 1609.344 meters.
1038
- #
1039
- # @param [String, Symbol] attrlat a name of latitude attribute.
1040
- # @param [String, Symbol] attrlong a name of longitude attribute.
1041
- # @param [Numeric] lat an anchor point latitude, in radians.
1042
- # @param [Numeric] long an anchor point longitude, in radians.
1043
- # @return [Sphinx::Client] self.
1044
- #
1045
- # @example
1046
- # sphinx.set_geo_anchor(:latitude, :longitude, 192.5, 143.5)
1047
- #
1048
- # @raise [ArgumentError] Occurred when parameters are invalid.
1049
- #
1050
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
1051
- #
1052
- def set_geo_anchor(attrlat, attrlong, lat, long)
1053
- raise ArgumentError, '"attrlat" argument must be String or Symbol' unless attrlat.kind_of?(String) or attrlat.kind_of?(Symbol)
1054
- raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol)
1055
- raise ArgumentError, '"lat" argument must be Numeric' unless lat.kind_of?(Numeric)
1056
- raise ArgumentError, '"long" argument must be Numeric' unless long.kind_of?(Numeric)
1057
-
1058
- @anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f }
1059
- self
435
+
436
+ # Setup anchor point for geosphere distance calculations.
437
+ #
438
+ # Required to use <tt>@geodist</tt> in filters and sorting
439
+ # distance will be computed to this point. Latitude and longitude
440
+ # must be in radians.
441
+ #
442
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
443
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
444
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
445
+ # * <tt>long</tt> -- is anchor point longitude, in radians
446
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
447
+ assert { attrlat.instance_of? String }
448
+ assert { attrlong.instance_of? String }
449
+ assert { lat.instance_of? Float }
450
+ assert { long.instance_of? Float }
451
+
452
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
1060
453
  end
1061
- alias :SetGeoAnchor :set_geo_anchor
1062
-
1063
- #=================================================================
1064
- # GROUP BY settings
1065
- #=================================================================
1066
-
1067
- # Sets grouping attribute, function, and groups sorting mode; and
1068
- # enables grouping (as described in Section 4.6, "Grouping (clustering) search results").
1069
- #
1070
- # +attribute+ is a string that contains group-by attribute name.
1071
- # +func+ is a constant that chooses a function applied to the
1072
- # attribute value in order to compute group-by key. +groupsort+
1073
- # is a clause that controls how the groups will be sorted. Its
1074
- # syntax is similar to that described in Section 4.5,
1075
- # "SPH_SORT_EXTENDED mode".
1076
- #
1077
- # Grouping feature is very similar in nature to <tt>GROUP BY</tt> clause
1078
- # from SQL. Results produces by this function call are going to
1079
- # be the same as produced by the following pseudo code:
1080
- #
1081
- # SELECT ... GROUP BY func(attribute) ORDER BY groupsort
1082
- #
1083
- # Note that it's +groupsort+ that affects the order of matches in
1084
- # the final result set. Sorting mode (see {#set_sort_mode}) affect
1085
- # the ordering of matches within group, ie. what match will be
1086
- # selected as the best one from the group. So you can for instance
1087
- # order the groups by matches count and select the most relevant
1088
- # match within each group at the same time.
1089
- #
1090
- # Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
1091
- # <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported
1092
- # through {#set_select} API call when using <tt>GROUP BY</tt>.
1093
- #
1094
- # You can specify group function and attribute as String
1095
- # ("attr", "day", etc), Symbol (:attr, :day, etc), or
1096
- # Fixnum constant (SPH_GROUPBY_ATTR, SPH_GROUPBY_DAY, etc).
1097
- #
1098
- # @param [String, Symbol] attribute an attribute name to group by.
1099
- # @param [Integer, String, Symbol] func a grouping function.
1100
- # @param [String] groupsort a groups sorting mode.
1101
- # @return [Sphinx::Client] self.
1102
- #
1103
- # @example
1104
- # sphinx.set_group_by(:tag_id, :attr)
1105
- #
1106
- # @raise [ArgumentError] Occurred when parameters are invalid.
1107
- #
1108
- # @see http://www.sphinxsearch.com/docs/current.html#clustering Section 4.6, "Grouping (clustering) search results"
1109
- # @see http://www.sphinxsearch.com/docs/current.html#sort-extended Section 4.5, "SPH_SORT_EXTENDED mode"
1110
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupby Section 6.5.1, "SetGroupBy"
1111
- # @see #set_sort_mode
1112
- # @see #set_select
1113
- # @see #set_group_distinct
1114
- #
1115
- def set_group_by(attribute, func, groupsort = '@group desc')
1116
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1117
- raise ArgumentError, '"groupsort" argument must be String' unless groupsort.kind_of?(String)
1118
-
1119
- case func
1120
- when String, Symbol
1121
- begin
1122
- func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}")
1123
- rescue NameError
1124
- raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid"
1125
- end
1126
- when Fixnum
1127
- raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func)
1128
- else
1129
- raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol'
1130
- end
1131
-
1132
- @groupby = attribute.to_s
454
+
455
+ # Set grouping attribute and function.
456
+ #
457
+ # In grouping mode, all matches are assigned to different groups
458
+ # based on grouping function value.
459
+ #
460
+ # Each group keeps track of the total match count, and the best match
461
+ # (in this group) according to current sorting function.
462
+ #
463
+ # The final result set contains one best match per group, with
464
+ # grouping function value and matches count attached.
465
+ #
466
+ # Groups in result set could be sorted by any sorting clause,
467
+ # including both document attributes and the following special
468
+ # internal Sphinx attributes:
469
+ #
470
+ # * @id - match document ID;
471
+ # * @weight, @rank, @relevance - match weight;
472
+ # * @group - groupby function value;
473
+ # * @count - amount of matches in group.
474
+ #
475
+ # the default mode is to sort by groupby value in descending order,
476
+ # ie. by '@group desc'.
477
+ #
478
+ # 'total_found' would contain total amount of matching groups over
479
+ # the whole index.
480
+ #
481
+ # WARNING: grouping is done in fixed memory and thus its results
482
+ # are only approximate; so there might be more groups reported
483
+ # in total_found than actually present. @count might also
484
+ # be underestimated.
485
+ #
486
+ # For example, if sorting by relevance and grouping by "published"
487
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
488
+ # contain one most relevant match per each day when there were any
489
+ # matches published, with day number and per-day match count attached,
490
+ # and sorted by day number in descending order (ie. recent days first).
491
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
492
+ assert { attribute.instance_of? String }
493
+ assert { groupsort.instance_of? String }
494
+ assert { func == SPH_GROUPBY_DAY \
495
+ || func == SPH_GROUPBY_WEEK \
496
+ || func == SPH_GROUPBY_MONTH \
497
+ || func == SPH_GROUPBY_YEAR \
498
+ || func == SPH_GROUPBY_ATTR \
499
+ || func == SPH_GROUPBY_ATTRPAIR }
500
+
501
+ @groupby = attribute
1133
502
  @groupfunc = func
1134
503
  @groupsort = groupsort
1135
- self
1136
504
  end
1137
- alias :SetGroupBy :set_group_by
1138
-
1139
- # Sets attribute name for per-group distinct values count
1140
- # calculations. Only available for grouping queries.
1141
- #
1142
- # +attribute+ is a string that contains the attribute name. For
1143
- # each group, all values of this attribute will be stored (as
1144
- # RAM limits permit), then the amount of distinct values will
1145
- # be calculated and returned to the client. This feature is
1146
- # similar to <tt>COUNT(DISTINCT)</tt> clause in standard SQL;
1147
- # so these Sphinx calls:
1148
- #
1149
- # sphinx.set_group_by(:category, :attr, '@count desc')
1150
- # sphinx.set_group_distinct(:vendor)
1151
- #
1152
- # can be expressed using the following SQL clauses:
1153
- #
1154
- # SELECT id, weight, all-attributes,
1155
- # COUNT(DISTINCT vendor) AS @distinct,
1156
- # COUNT(*) AS @count
1157
- # FROM products
1158
- # GROUP BY category
1159
- # ORDER BY @count DESC
1160
- #
1161
- # In the sample pseudo code shown just above, {#set_group_distinct}
1162
- # call corresponds to <tt>COUNT(DISINCT vendor)</tt> clause only.
1163
- # <tt>GROUP BY</tt>, <tt>ORDER BY</tt>, and <tt>COUNT(*)</tt>
1164
- # clauses are all an equivalent of {#set_group_by} settings. Both
1165
- # queries will return one matching row for each category. In
1166
- # addition to indexed attributes, matches will also contain
1167
- # total per-category matches count, and the count of distinct
1168
- # vendor IDs within each category.
1169
- #
1170
- # @param [String, Symbol] attribute an attribute name.
1171
- # @return [Sphinx::Client] self.
1172
- #
1173
- # @example
1174
- # sphinx.set_group_distinct(:category_id)
1175
- #
1176
- # @raise [ArgumentError] Occurred when parameters are invalid.
1177
- #
1178
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupdistinct Section 6.5.2, "SetGroupDistinct"
1179
- # @see #set_group_by
505
+
506
+ # Set count-distinct attribute for group-by queries.
507
+ def SetGroupDistinct(attribute)
508
+ assert { attribute.instance_of? String }
509
+ @groupdistinct = attribute
510
+ end
511
+
512
+ # Set distributed retries count and delay.
513
+ def SetRetries(count, delay = 0)
514
+ assert { count.instance_of? Fixnum }
515
+ assert { delay.instance_of? Fixnum }
516
+
517
+ @retrycount = count
518
+ @retrydelay = delay
519
+ end
520
+
521
+ # Set attribute values override
1180
522
  #
1181
- def set_group_distinct(attribute)
1182
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
523
+ # There can be only one override per attribute.
524
+ # +values+ must be a hash that maps document IDs to attribute values.
525
+ def SetOverride(attrname, attrtype, values)
526
+ assert { attrname.instance_of? String }
527
+ assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
528
+ assert { values.instance_of? Hash }
1183
529
 
1184
- @groupdistinct = attribute.to_s
1185
- self
530
+ @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
1186
531
  end
1187
- alias :SetGroupDistinct :set_group_distinct
1188
532
 
1189
- #=================================================================
1190
- # Querying
1191
- #=================================================================
1192
-
1193
- # Clears all currently set filters.
1194
- #
1195
- # This call is only normally required when using multi-queries. You might want
1196
- # to set different filters for different queries in the batch. To do that,
1197
- # you should call {#reset_filters} and add new filters using the respective calls.
1198
- #
1199
- # @return [Sphinx::Client] self.
1200
- #
1201
- # @example
1202
- # sphinx.reset_filters
1203
- #
1204
- # @see #set_filter
1205
- # @see #set_filter_range
1206
- # @see #set_filter_float_range
1207
- # @see #set_geo_anchor
1208
- #
1209
- def reset_filters
1210
- @filters = []
1211
- @anchor = []
1212
- self
533
+ # Set select-list (attributes or expressions), SQL-like syntax.
534
+ def SetSelect(select)
535
+ assert { select.instance_of? String }
536
+ @select = select
1213
537
  end
1214
- alias :ResetFilters :reset_filters
1215
-
1216
- # Clears all currently group-by settings, and disables group-by.
1217
- #
1218
- # This call is only normally required when using multi-queries. You can
1219
- # change individual group-by settings using {#set_group_by} and {#set_group_distinct}
1220
- # calls, but you can not disable group-by using those calls. {#reset_group_by}
1221
- # fully resets previous group-by settings and disables group-by mode in the
1222
- # current state, so that subsequent {#add_query} calls can perform non-grouping
1223
- # searches.
1224
- #
1225
- # @return [Sphinx::Client] self.
1226
- #
1227
- # @example
1228
- # sphinx.reset_group_by
1229
- #
1230
- # @see #set_group_by
1231
- # @see #set_group_distinct
1232
- #
1233
- def reset_group_by
538
+
539
+ # Clear all filters (for multi-queries).
540
+ def ResetFilters
541
+ @filters = []
542
+ @anchor = []
543
+ end
544
+
545
+ # Clear groupby settings (for multi-queries).
546
+ def ResetGroupBy
1234
547
  @groupby = ''
1235
548
  @groupfunc = SPH_GROUPBY_DAY
1236
549
  @groupsort = '@group desc'
1237
550
  @groupdistinct = ''
1238
- self
1239
551
  end
1240
- alias :ResetGroupBy :reset_group_by
1241
-
552
+
1242
553
  # Clear all attribute value overrides (for multi-queries).
1243
- #
1244
- # This call is only normally required when using multi-queries. You might want
1245
- # to set field overrides for different queries in the batch. To do that,
1246
- # you should call {#reset_overrides} and add new overrides using the
1247
- # respective calls.
1248
- #
1249
- # @return [Sphinx::Client] self.
1250
- #
1251
- # @example
1252
- # sphinx.reset_overrides
1253
- #
1254
- # @see #set_override
1255
- #
1256
- def reset_overrides
554
+ def ResetOverrides
1257
555
  @overrides = []
1258
- self
1259
556
  end
1260
- alias :ResetOverrides :reset_overrides
1261
-
1262
- # Connects to searchd server, runs given search query with
1263
- # current settings, obtains and returns the result set.
1264
- #
1265
- # +query+ is a query string. +index+ is an index name (or names)
1266
- # string. Returns false and sets {#last_error} message on general
1267
- # error. Returns search result set on success. Additionally,
1268
- # the contents of +comment+ are sent to the query log, marked in
1269
- # square brackets, just before the search terms, which can be very
1270
- # useful for debugging. Currently, the comment is limited to 128
1271
- # characters.
1272
- #
1273
- # Default value for +index+ is <tt>"*"</tt> that means to query
1274
- # all local indexes. Characters allowed in index names include
1275
- # Latin letters (a-z), numbers (0-9), minus sign (-), and
1276
- # underscore (_); everything else is considered a separator.
1277
- # Therefore, all of the following samples calls are valid and
1278
- # will search the same two indexes:
1279
- #
1280
- # sphinx.query('test query', 'main delta')
1281
- # sphinx.query('test query', 'main;delta')
1282
- # sphinx.query('test query', 'main, delta');
1283
- #
1284
- # Index specification order matters. If document with identical
1285
- # IDs are found in two or more indexes, weight and attribute
1286
- # values from the very last matching index will be used for
1287
- # sorting and returning to client (unless explicitly overridden
1288
- # with {#set_index_weights}). Therefore, in the example above,
1289
- # matches from "delta" index will always win over matches
1290
- # from "main".
1291
- #
1292
- # On success, {#query} returns a result set that contains some
1293
- # of the found matches (as requested by {#set_limits}) and
1294
- # additional general per-query statistics. The result set
1295
- # is an +Hash+ with the following keys and values:
1296
- #
1297
- # <tt>"matches"</tt>::
1298
- # Array with small +Hash+es containing document weight and
1299
- # attribute values.
1300
- # <tt>"total"</tt>::
1301
- # Total amount of matches retrieved on server (ie. to the server
1302
- # side result set) by this query. You can retrieve up to this
1303
- # amount of matches from server for this query text with current
1304
- # query settings.
1305
- # <tt>"total_found"</tt>::
1306
- # Total amount of matching documents in index (that were found
1307
- # and procesed on server).
1308
- # <tt>"words"</tt>::
1309
- # Hash which maps query keywords (case-folded, stemmed, and
1310
- # otherwise processed) to a small Hash with per-keyword statitics
1311
- # ("docs", "hits").
1312
- # <tt>"error"</tt>::
1313
- # Query error message reported by searchd (string, human readable).
1314
- # Empty if there were no errors.
1315
- # <tt>"warning"</tt>::
1316
- # Query warning message reported by searchd (string, human readable).
1317
- # Empty if there were no warnings.
1318
- #
1319
- # Please note: you can use both strings and symbols as <tt>Hash</tt> keys.
1320
- #
1321
- # It should be noted that {#query} carries out the same actions as
1322
- # {#add_query} and {#run_queries} without the intermediate steps; it
1323
- # is analoguous to a single {#add_query} call, followed by a
1324
- # corresponding {#run_queries}, then returning the first array
1325
- # element of matches (from the first, and only, query.)
1326
- #
1327
- # @param [String] query a query string.
1328
- # @param [String] index an index name (or names).
1329
- # @param [String] comment a comment to be sent to the query log.
1330
- # @return [Hash, false] result set described above or +false+ on error.
1331
- # @yield [Client] yields just before query performing. Useful to set
1332
- # filters or sortings. When block does not accept any parameters, it
1333
- # will be eval'ed inside {Client} instance itself. In this case you
1334
- # can omit +set_+ prefix for configuration methods.
1335
- # @yieldparam [Client] sphinx self.
1336
- #
1337
- # @example Regular query with previously set filters
1338
- # sphinx.query('some search text', '*', 'search page')
1339
- # @example Query with block
1340
- # sphinx.query('test') do |sphinx|
1341
- # sphinx.set_match_mode :all
1342
- # sphinx.set_id_range 10, 100
1343
- # end
1344
- # @example Query with instant filters configuring
1345
- # sphinx.query('test') do
1346
- # match_mode :all
1347
- # id_range 10, 100
1348
- # end
1349
- #
1350
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-query Section 6.6.1, "Query"
1351
- # @see #add_query
1352
- # @see #run_queries
1353
- #
1354
- def query(query, index = '*', comment = '', &block)
557
+
558
+ # Connect to searchd server and run given search query.
559
+ #
560
+ # <tt>query</tt> is query string
561
+
562
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
563
+ # to query all indexes. Accepted characters for index names are letters, numbers,
564
+ # dash, and underscore; everything else is considered a separator. Therefore,
565
+ # all the following calls are valid and will search two indexes:
566
+ #
567
+ # sphinx.Query('test query', 'main delta')
568
+ # sphinx.Query('test query', 'main;delta')
569
+ # sphinx.Query('test query', 'main, delta')
570
+ #
571
+ # Index order matters. If identical IDs are found in two or more indexes,
572
+ # weight and attribute values from the very last matching index will be used
573
+ # for sorting and returning to client. Therefore, in the example above,
574
+ # matches from "delta" index will always "win" over matches from "main".
575
+ #
576
+ # Returns false on failure.
577
+ # Returns hash which has the following keys on success:
578
+ #
579
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
580
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
581
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
582
+ # * <tt>'time'</tt> -- search time
583
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
584
+ def Query(query, index = '*', comment = '')
585
+ assert { @reqs.empty? }
1355
586
  @reqs = []
1356
-
1357
- if block_given?
1358
- if block.arity > 0
1359
- yield self
1360
- else
1361
- begin
1362
- @inside_eval = true
1363
- instance_eval(&block)
1364
- ensure
1365
- @inside_eval = false
1366
- end
1367
- end
1368
- end
1369
-
1370
- logger.debug { "[sphinx] query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if logger
1371
-
1372
- self.add_query(query, index, comment, false)
1373
- results = self.run_queries
1374
-
587
+
588
+ self.AddQuery(query, index, comment)
589
+ results = self.RunQueries
590
+
1375
591
  # probably network error; error message should be already filled
1376
592
  return false unless results.instance_of?(Array)
1377
-
593
+
1378
594
  @error = results[0]['error']
1379
595
  @warning = results[0]['warning']
1380
-
596
+
1381
597
  return false if results[0]['status'] == SEARCHD_ERROR
1382
598
  return results[0]
1383
599
  end
1384
- alias :Query :query
1385
-
1386
- # Adds additional query with current settings to multi-query batch.
1387
- # +query+ is a query string. +index+ is an index name (or names)
1388
- # string. Additionally if provided, the contents of +comment+ are
1389
- # sent to the query log, marked in square brackets, just before
1390
- # the search terms, which can be very useful for debugging.
1391
- # Currently, this is limited to 128 characters. Returns index
1392
- # to results array returned from {#run_queries}.
1393
- #
1394
- # Batch queries (or multi-queries) enable searchd to perform
1395
- # internal optimizations if possible. They also reduce network
1396
- # connection overheads and search process creation overheads in all
1397
- # cases. They do not result in any additional overheads compared
1398
- # to simple queries. Thus, if you run several different queries
1399
- # from your web page, you should always consider using multi-queries.
1400
- #
1401
- # For instance, running the same full-text query but with different
1402
- # sorting or group-by settings will enable searchd to perform
1403
- # expensive full-text search and ranking operation only once, but
1404
- # compute multiple group-by results from its output.
1405
- #
1406
- # This can be a big saver when you need to display not just plain
1407
- # search results but also some per-category counts, such as the
1408
- # amount of products grouped by vendor. Without multi-query, you
1409
- # would have to run several queries which perform essentially the
1410
- # same search and retrieve the same matches, but create result
1411
- # sets differently. With multi-query, you simply pass all these
1412
- # queries in a single batch and Sphinx optimizes the redundant
1413
- # full-text search internally.
1414
- #
1415
- # {#add_query} internally saves full current settings state along
1416
- # with the query, and you can safely change them afterwards for
1417
- # subsequent {#add_query} calls. Already added queries will not
1418
- # be affected; there's actually no way to change them at all.
1419
- # Here's an example:
1420
- #
1421
- # sphinx.set_sort_mode(:relevance)
1422
- # sphinx.add_query("hello world", "documents")
1423
- #
1424
- # sphinx.set_sort_mode(:attr_desc, :price)
1425
- # sphinx.add_query("ipod", "products")
1426
- #
1427
- # sphinx.add_query("harry potter", "books")
1428
- #
1429
- # results = sphinx.run_queries
1430
- #
1431
- # With the code above, 1st query will search for "hello world"
1432
- # in "documents" index and sort results by relevance, 2nd query
1433
- # will search for "ipod" in "products" index and sort results
1434
- # by price, and 3rd query will search for "harry potter" in
1435
- # "books" index while still sorting by price. Note that 2nd
1436
- # {#set_sort_mode} call does not affect the first query (because
1437
- # it's already added) but affects both other subsequent queries.
1438
- #
1439
- # Additionally, any filters set up before an {#add_query} will
1440
- # fall through to subsequent queries. So, if {#set_filter} is
1441
- # called before the first query, the same filter will be in
1442
- # place for the second (and subsequent) queries batched through
1443
- # {#add_query} unless you call {#reset_filters} first. Alternatively,
1444
- # you can add additional filters as well.
1445
- #
1446
- # This would also be true for grouping options and sorting options;
1447
- # no current sorting, filtering, and grouping settings are affected
1448
- # by this call; so subsequent queries will reuse current query settings.
1449
- #
1450
- # {#add_query} returns an index into an array of results that will
1451
- # be returned from {#run_queries} call. It is simply a sequentially
1452
- # increasing 0-based integer, ie. first call will return 0, second
1453
- # will return 1, and so on. Just a small helper so you won't have
1454
- # to track the indexes manualy if you need then.
1455
- #
1456
- # @param [String] query a query string.
1457
- # @param [String] index an index name (or names).
1458
- # @param [String] comment a comment to be sent to the query log.
1459
- # @param [Boolean] log indicating whether this call should be logged.
1460
- # @return [Integer] an index into an array of results that will
1461
- # be returned from {#run_queries} call.
600
+
601
+ # Add query to batch.
1462
602
  #
1463
- # @example
1464
- # sphinx.add_query('some search text', '*', 'search page')
603
+ # Batch queries enable searchd to perform internal optimizations,
604
+ # if possible; and reduce network connection overheads in all cases.
1465
605
  #
1466
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-addquery Section 6.6.2, "AddQuery"
1467
- # @see #query
1468
- # @see #run_queries
606
+ # For instance, running exactly the same query with different
607
+ # groupby settings will enable searched to perform expensive
608
+ # full-text search and ranking operation only once, but compute
609
+ # multiple groupby results from its output.
1469
610
  #
1470
- def add_query(query, index = '*', comment = '', log = true)
1471
- logger.debug { "[sphinx] add_query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if log and logger
611
+ # Parameters are exactly the same as in <tt>Query</tt> call.
612
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
613
+ def AddQuery(query, index = '*', comment = '')
1472
614
  # build request
1473
-
615
+
1474
616
  # mode and limits
1475
617
  request = Request.new
1476
618
  request.put_int @offset, @limit, @mode, @ranker, @sort
@@ -1484,8 +626,8 @@ module Sphinx
1484
626
  # id64 range marker
1485
627
  request.put_int 1
1486
628
  # id64 range
1487
- request.put_int64 @min_id.to_i, @max_id.to_i
1488
-
629
+ request.put_int64 @min_id.to_i, @max_id.to_i
630
+
1489
631
  # filters
1490
632
  request.put_int @filters.length
1491
633
  @filters.each do |filter|
@@ -1504,7 +646,7 @@ module Sphinx
1504
646
  end
1505
647
  request.put_int filter['exclude'] ? 1 : 0
1506
648
  end
1507
-
649
+
1508
650
  # group-by clause, max-matches count, group-sort clause, cutoff count
1509
651
  request.put_int @groupfunc
1510
652
  request.put_string @groupby
@@ -1512,7 +654,7 @@ module Sphinx
1512
654
  request.put_string @groupsort
1513
655
  request.put_int @cutoff, @retrycount, @retrydelay
1514
656
  request.put_string @groupdistinct
1515
-
657
+
1516
658
  # anchor point
1517
659
  if @anchor.empty?
1518
660
  request.put_int 0
@@ -1521,328 +663,283 @@ module Sphinx
1521
663
  request.put_string @anchor['attrlat'], @anchor['attrlong']
1522
664
  request.put_float @anchor['lat'], @anchor['long']
1523
665
  end
1524
-
666
+
1525
667
  # per-index weights
1526
668
  request.put_int @indexweights.length
1527
669
  @indexweights.each do |idx, weight|
1528
- request.put_string idx.to_s
670
+ request.put_string idx
1529
671
  request.put_int weight
1530
672
  end
1531
-
673
+
1532
674
  # max query time
1533
675
  request.put_int @maxquerytime
1534
-
676
+
1535
677
  # per-field weights
1536
678
  request.put_int @fieldweights.length
1537
679
  @fieldweights.each do |field, weight|
1538
- request.put_string field.to_s
680
+ request.put_string field
1539
681
  request.put_int weight
1540
682
  end
1541
-
683
+
1542
684
  # comment
1543
685
  request.put_string comment
1544
-
686
+
1545
687
  # attribute overrides
1546
688
  request.put_int @overrides.length
1547
689
  for entry in @overrides do
1548
690
  request.put_string entry['attr']
1549
691
  request.put_int entry['type'], entry['values'].size
1550
692
  entry['values'].each do |id, val|
693
+ assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
694
+ assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
695
+
1551
696
  request.put_int64 id
1552
697
  case entry['type']
1553
698
  when SPH_ATTR_FLOAT
1554
- request.put_float val.to_f
699
+ request.put_float val
1555
700
  when SPH_ATTR_BIGINT
1556
- request.put_int64 val.to_i
701
+ request.put_int64 val
1557
702
  else
1558
- request.put_int val.to_i
703
+ request.put_int val
1559
704
  end
1560
705
  end
1561
706
  end
1562
-
707
+
1563
708
  # select-list
1564
709
  request.put_string @select
1565
-
710
+
1566
711
  # store request to requests array
1567
712
  @reqs << request.to_s;
1568
713
  return @reqs.length - 1
1569
714
  end
1570
- alias :AddQuery :add_query
1571
-
1572
- # Connect to searchd, runs a batch of all queries added using
1573
- # {#add_query}, obtains and returns the result sets. Returns
1574
- # +false+ and sets {#last_error} message on general error
1575
- # (such as network I/O failure). Returns a plain array of
1576
- # result sets on success.
1577
- #
1578
- # Each result set in the returned array is exactly the same as
1579
- # the result set returned from {#query}.
1580
- #
1581
- # Note that the batch query request itself almost always succeds —
1582
- # unless there's a network error, blocking index rotation in
1583
- # progress, or another general failure which prevents the whole
1584
- # request from being processed.
1585
- #
1586
- # However individual queries within the batch might very well
1587
- # fail. In this case their respective result sets will contain
1588
- # non-empty "error" message, but no matches or query statistics.
1589
- # In the extreme case all queries within the batch could fail.
1590
- # There still will be no general error reported, because API
1591
- # was able to succesfully connect to searchd, submit the batch,
1592
- # and receive the results — but every result set will have a
1593
- # specific error message.
1594
- #
1595
- # @return [Array<Hash>] an +Array+ of +Hash+es which are exactly
1596
- # the same as the result set returned from {#query}.
715
+
716
+ # Run queries batch.
1597
717
  #
1598
- # @example
1599
- # sphinx.add_query('some search text', '*', 'search page')
1600
- # results = sphinx.run_queries
718
+ # Returns an array of result sets on success.
719
+ # Returns false on network IO failure.
1601
720
  #
1602
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-runqueries Section 6.6.3, "RunQueries"
1603
- # @see #add_query
721
+ # Each result set in returned array is a hash which containts
722
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
1604
723
  #
1605
- def run_queries
1606
- logger.debug { "[sphinx] run_queries(#{@reqs.length} queries)" } if logger
724
+ # * <tt>'error'</tt> -- search error for this query
725
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
726
+ def RunQueries
1607
727
  if @reqs.empty?
1608
- @error = 'No queries defined, issue add_query() first'
728
+ @error = 'No queries defined, issue AddQuery() first'
1609
729
  return false
1610
730
  end
1611
731
 
1612
- reqs, nreqs = @reqs.join(''), @reqs.length
732
+ req = @reqs.join('')
733
+ nreqs = @reqs.length
1613
734
  @reqs = []
1614
- response = perform_request(:search, reqs, nreqs)
1615
-
735
+ response = PerformRequest(:search, req, nreqs)
736
+
1616
737
  # parse response
1617
- (1..nreqs).map do
1618
- result = HashWithIndifferentAccess.new('error' => '', 'warning' => '')
1619
-
1620
- # extract status
1621
- status = result['status'] = response.get_int
1622
- if status != SEARCHD_OK
1623
- message = response.get_string
1624
- if status == SEARCHD_WARNING
1625
- result['warning'] = message
1626
- else
1627
- result['error'] = message
1628
- next result
738
+ begin
739
+ results = []
740
+ ires = 0
741
+ while ires < nreqs
742
+ ires += 1
743
+ result = {}
744
+
745
+ result['error'] = ''
746
+ result['warning'] = ''
747
+
748
+ # extract status
749
+ status = result['status'] = response.get_int
750
+ if status != SEARCHD_OK
751
+ message = response.get_string
752
+ if status == SEARCHD_WARNING
753
+ result['warning'] = message
754
+ else
755
+ result['error'] = message
756
+ results << result
757
+ next
758
+ end
1629
759
  end
1630
- end
1631
-
1632
- # read schema
1633
- nfields = response.get_int
1634
- result['fields'] = (1..nfields).map { response.get_string }
1635
-
1636
- attrs_names_in_order = []
1637
- nattrs = response.get_int
1638
- attrs = (1..nattrs).inject({}) do |hash, idx|
1639
- name, type = response.get_string, response.get_int
1640
- hash[name] = type
1641
- attrs_names_in_order << name
1642
- hash
1643
- end
1644
- result['attrs'] = attrs
1645
-
1646
- # read match count
1647
- count, id64 = response.get_ints(2)
1648
-
1649
- # read matches
1650
- result['matches'] = (1..count).map do
1651
- doc, weight = if id64 == 0
1652
- response.get_ints(2)
1653
- else
1654
- [response.get_int64, response.get_int]
760
+
761
+ # read schema
762
+ fields = []
763
+ attrs = {}
764
+ attrs_names_in_order = []
765
+
766
+ nfields = response.get_int
767
+ while nfields > 0
768
+ nfields -= 1
769
+ fields << response.get_string
1655
770
  end
1656
-
1657
- # This is a single result put in the result['matches'] array
1658
- match = { 'id' => doc, 'weight' => weight }
1659
- match['attrs'] = attrs_names_in_order.inject({}) do |hash, name|
1660
- hash[name] = case attrs[name]
1661
- when SPH_ATTR_BIGINT
1662
- # handle 64-bit ints
1663
- response.get_int64
1664
- when SPH_ATTR_FLOAT
1665
- # handle floats
1666
- response.get_float
1667
- else
1668
- # handle everything else as unsigned ints
1669
- val = response.get_int
1670
- if (attrs[name] & SPH_ATTR_MULTI) != 0
1671
- (1..val).map { response.get_int }
771
+ result['fields'] = fields
772
+
773
+ nattrs = response.get_int
774
+ while nattrs > 0
775
+ nattrs -= 1
776
+ attr = response.get_string
777
+ type = response.get_int
778
+ attrs[attr] = type
779
+ attrs_names_in_order << attr
780
+ end
781
+ result['attrs'] = attrs
782
+
783
+ # read match count
784
+ count = response.get_int
785
+ id64 = response.get_int
786
+
787
+ # read matches
788
+ result['matches'] = []
789
+ while count > 0
790
+ count -= 1
791
+
792
+ if id64 != 0
793
+ doc = response.get_int64
794
+ weight = response.get_int
795
+ else
796
+ doc, weight = response.get_ints(2)
797
+ end
798
+
799
+ r = {} # This is a single result put in the result['matches'] array
800
+ r['id'] = doc
801
+ r['weight'] = weight
802
+ attrs_names_in_order.each do |a|
803
+ r['attrs'] ||= {}
804
+
805
+ case attrs[a]
806
+ when SPH_ATTR_BIGINT
807
+ # handle 64-bit ints
808
+ r['attrs'][a] = response.get_int64
809
+ when SPH_ATTR_FLOAT
810
+ # handle floats
811
+ r['attrs'][a] = response.get_float
812
+ when SPH_ATTR_STRING
813
+ r['attrs'][a] = response.get_string
1672
814
  else
1673
- val
1674
- end
815
+ # handle everything else as unsigned ints
816
+ val = response.get_int
817
+ if (attrs[a] & SPH_ATTR_MULTI) != 0
818
+ r['attrs'][a] = []
819
+ 1.upto(val) do
820
+ r['attrs'][a] << response.get_int
821
+ end
822
+ else
823
+ r['attrs'][a] = val
824
+ end
825
+ end
1675
826
  end
1676
- hash
827
+ result['matches'] << r
1677
828
  end
1678
- match
1679
- end
1680
- result['total'], result['total_found'], msecs = response.get_ints(3)
1681
- result['time'] = '%.3f' % (msecs / 1000.0)
1682
-
1683
- nwords = response.get_int
1684
- result['words'] = (1..nwords).inject({}) do |hash, idx|
1685
- word = response.get_string
1686
- docs, hits = response.get_ints(2)
1687
- hash[word] = { 'docs' => docs, 'hits' => hits }
1688
- hash
829
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
830
+ result['time'] = '%.3f' % (msecs / 1000.0)
831
+
832
+ result['words'] = {}
833
+ while words > 0
834
+ words -= 1
835
+ word = response.get_string
836
+ docs, hits = response.get_ints(2)
837
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
838
+ end
839
+
840
+ results << result
1689
841
  end
1690
-
1691
- result
842
+ #rescue EOFError
843
+ # @error = 'incomplete reply'
844
+ # raise SphinxResponseError, @error
1692
845
  end
846
+
847
+ return results
1693
848
  end
1694
- alias :RunQueries :run_queries
1695
-
1696
- #=================================================================
1697
- # Additional functionality
1698
- #=================================================================
1699
-
1700
- # Excerpts (snippets) builder function. Connects to searchd, asks
1701
- # it to generate excerpts (snippets) from given documents, and
1702
- # returns the results.
1703
- #
1704
- # +docs+ is a plain array of strings that carry the documents'
1705
- # contents. +index+ is an index name string. Different settings
1706
- # (such as charset, morphology, wordforms) from given index will
1707
- # be used. +words+ is a string that contains the keywords to
1708
- # highlight. They will be processed with respect to index settings.
1709
- # For instance, if English stemming is enabled in the index,
1710
- # "shoes" will be highlighted even if keyword is "shoe". Starting
1711
- # with version 0.9.9-rc1, keywords can contain wildcards, that
1712
- # work similarly to star-syntax available in queries.
1713
- #
1714
- # @param [Array<String>] docs an array of strings which represent
1715
- # the documents' contents.
1716
- # @param [String] index an index which settings will be used for
1717
- # stemming, lexing and case folding.
1718
- # @param [String] words a string which contains the words to highlight.
1719
- # @param [Hash] opts a +Hash+ which contains additional optional
1720
- # highlighting parameters.
1721
- # @option opts [String] 'before_match' ("<b>") a string to insert before a
1722
- # keyword match.
1723
- # @option opts [String] 'after_match' ("</b>") a string to insert after a
1724
- # keyword match.
1725
- # @option opts [String] 'chunk_separator' (" ... ") a string to insert
1726
- # between snippet chunks (passages).
1727
- # @option opts [Integer] 'limit' (256) maximum snippet size, in symbols
1728
- # (codepoints).
1729
- # @option opts [Integer] 'around' (5) how many words to pick around
1730
- # each matching keywords block.
1731
- # @option opts [Boolean] 'exact_phrase' (false) whether to highlight exact
1732
- # query phrase matches only instead of individual keywords.
1733
- # @option opts [Boolean] 'single_passage' (false) whether to extract single
1734
- # best passage only.
1735
- # @option opts [Boolean] 'use_boundaries' (false) whether to extract
1736
- # passages by phrase boundaries setup in tokenizer.
1737
- # @option opts [Boolean] 'weight_order' (false) whether to sort the
1738
- # extracted passages in order of relevance (decreasing weight),
1739
- # or in order of appearance in the document (increasing position).
1740
- # @return [Array<String>, false] a plain array of strings with
1741
- # excerpts (snippets) on success; otherwise, +false+.
1742
- #
1743
- # @raise [ArgumentError] Occurred when parameters are invalid.
1744
- #
1745
- # @example
1746
- # sphinx.build_excerpts(['hello world', 'hello me'], 'idx', 'hello')
1747
- #
1748
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildexcerpts Section 6.7.1, "BuildExcerpts"
1749
- #
1750
- def build_excerpts(docs, index, words, opts = {})
1751
- raise ArgumentError, '"docs" argument must be Array' unless docs.kind_of?(Array)
1752
- raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1753
- raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String)
1754
- raise ArgumentError, '"opts" argument must be Hash' unless opts.kind_of?(Hash)
1755
-
1756
- docs.each do |doc|
1757
- raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String)
1758
- end
849
+
850
+ # Connect to searchd server and generate exceprts from given documents.
851
+ #
852
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
853
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
854
+ # for stemming, lexing and case folding
855
+ # * <tt>words</tt> -- a string which contains the words to highlight
856
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
857
+ #
858
+ # You can use following parameters:
859
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
860
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
861
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
862
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
863
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
864
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
865
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
866
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
867
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
868
+ #
869
+ # Returns false on failure.
870
+ # Returns an array of string excerpts on success.
871
+ def BuildExcerpts(docs, index, words, opts = {})
872
+ assert { docs.instance_of? Array }
873
+ assert { index.instance_of? String }
874
+ assert { words.instance_of? String }
875
+ assert { opts.instance_of? Hash }
1759
876
 
1760
877
  # fixup options
1761
- opts = HashWithIndifferentAccess.new(
1762
- 'before_match' => '<b>',
1763
- 'after_match' => '</b>',
1764
- 'chunk_separator' => ' ... ',
1765
- 'limit' => 256,
1766
- 'around' => 5,
1767
- 'exact_phrase' => false,
1768
- 'single_passage' => false,
1769
- 'use_boundaries' => false,
1770
- 'weight_order' => false
1771
- ).update(opts)
1772
-
878
+ opts['before_match'] ||= '<b>';
879
+ opts['after_match'] ||= '</b>';
880
+ opts['chunk_separator'] ||= ' ... ';
881
+ opts['limit'] ||= 256;
882
+ opts['around'] ||= 5;
883
+ opts['exact_phrase'] ||= false
884
+ opts['single_passage'] ||= false
885
+ opts['use_boundaries'] ||= false
886
+ opts['weight_order'] ||= false
887
+ opts['query_mode'] ||= false
888
+
1773
889
  # build request
1774
-
890
+
1775
891
  # v.1.0 req
1776
892
  flags = 1
1777
893
  flags |= 2 if opts['exact_phrase']
1778
894
  flags |= 4 if opts['single_passage']
1779
895
  flags |= 8 if opts['use_boundaries']
1780
896
  flags |= 16 if opts['weight_order']
1781
-
897
+ flags |= 32 if opts['query_mode']
898
+
1782
899
  request = Request.new
1783
900
  request.put_int 0, flags # mode=0, flags=1 (remove spaces)
1784
901
  # req index
1785
- request.put_string index.to_s
902
+ request.put_string index
1786
903
  # req words
1787
904
  request.put_string words
1788
-
905
+
1789
906
  # options
1790
907
  request.put_string opts['before_match']
1791
908
  request.put_string opts['after_match']
1792
909
  request.put_string opts['chunk_separator']
1793
910
  request.put_int opts['limit'].to_i, opts['around'].to_i
1794
-
911
+
1795
912
  # documents
1796
913
  request.put_int docs.size
1797
- request.put_string(*docs)
1798
-
1799
- response = perform_request(:excerpt, request)
914
+ docs.each do |doc|
915
+ assert { doc.instance_of? String }
1800
916
 
917
+ request.put_string doc
918
+ end
919
+
920
+ response = PerformRequest(:excerpt, request)
921
+
1801
922
  # parse response
1802
- docs.map { response.get_string }
923
+ begin
924
+ res = []
925
+ docs.each do |doc|
926
+ res << response.get_string
927
+ end
928
+ rescue EOFError
929
+ @error = 'incomplete reply'
930
+ raise SphinxResponseError, @error
931
+ end
932
+ return res
1803
933
  end
1804
- alias :BuildExcerpts :build_excerpts
1805
-
1806
- # Extracts keywords from query using tokenizer settings for given
1807
- # index, optionally with per-keyword occurrence statistics.
1808
- # Returns an array of hashes with per-keyword information.
1809
- #
1810
- # +query+ is a query to extract keywords from. +index+ is a name of
1811
- # the index to get tokenizing settings and keyword occurrence
1812
- # statistics from. +hits+ is a boolean flag that indicates whether
1813
- # keyword occurrence statistics are required.
1814
- #
1815
- # The result set consists of +Hash+es with the following keys and values:
1816
- #
1817
- # <tt>'tokenized'</tt>::
1818
- # Tokenized keyword.
1819
- # <tt>'normalized'</tt>::
1820
- # Normalized keyword.
1821
- # <tt>'docs'</tt>::
1822
- # A number of documents where keyword is found (if +hits+ param is +true+).
1823
- # <tt>'hits'</tt>::
1824
- # A number of keywords occurrences among all documents (if +hits+ param is +true+).
1825
- #
1826
- # @param [String] query a query string.
1827
- # @param [String] index an index to get tokenizing settings and
1828
- # keyword occurrence statistics from.
1829
- # @param [Boolean] hits indicates whether keyword occurrence
1830
- # statistics are required.
1831
- # @return [Array<Hash>] an +Array+ of +Hash+es in format specified
1832
- # above.
1833
- #
1834
- # @raise [ArgumentError] Occurred when parameters are invalid.
1835
- #
1836
- # @example
1837
- # keywords = sphinx.build_keywords("this.is.my query", "test1", false)
1838
- #
1839
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildkeywords Section 6.7.3, "BuildKeywords"
1840
- #
1841
- def build_keywords(query, index, hits)
1842
- raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String)
1843
- raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1844
- raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass)
1845
-
934
+
935
+ # Connect to searchd server, and generate keyword list for a given query.
936
+ #
937
+ # Returns an array of words on success.
938
+ def BuildKeywords(query, index, hits)
939
+ assert { query.instance_of? String }
940
+ assert { index.instance_of? String }
941
+ assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
942
+
1846
943
  # build request
1847
944
  request = Request.new
1848
945
  # v.1.0 req
@@ -1850,107 +947,77 @@ module Sphinx
1850
947
  request.put_string index # req index
1851
948
  request.put_int hits ? 1 : 0
1852
949
 
1853
- response = perform_request(:keywords, request)
1854
-
950
+ response = PerformRequest(:keywords, request)
951
+
1855
952
  # parse response
1856
- nwords = response.get_int
1857
- (0...nwords).map do
1858
- tokenized = response.get_string
1859
- normalized = response.get_string
1860
-
1861
- entry = HashWithIndifferentAccess.new('tokenized' => tokenized, 'normalized' => normalized)
1862
- entry['docs'], entry['hits'] = response.get_ints(2) if hits
1863
-
1864
- entry
953
+ begin
954
+ res = []
955
+ nwords = response.get_int
956
+ 0.upto(nwords - 1) do |i|
957
+ tokenized = response.get_string
958
+ normalized = response.get_string
959
+
960
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
961
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
962
+
963
+ res << entry
964
+ end
965
+ rescue EOFError
966
+ @error = 'incomplete reply'
967
+ raise SphinxResponseError, @error
1865
968
  end
969
+
970
+ return res
1866
971
  end
1867
- alias :BuildKeywords :build_keywords
1868
972
 
1869
- # Instantly updates given attribute values in given documents.
1870
- # Returns number of actually updated documents (0 or more) on
1871
- # success, or -1 on failure.
1872
- #
1873
- # +index+ is a name of the index (or indexes) to be updated.
1874
- # +attrs+ is a plain array with string attribute names, listing
1875
- # attributes that are updated. +values+ is a Hash where key is
1876
- # document ID, and value is a plain array of new attribute values.
1877
- #
1878
- # +index+ can be either a single index name or a list, like in
1879
- # {#query}. Unlike {#query}, wildcard is not allowed and all the
1880
- # indexes to update must be specified explicitly. The list of
1881
- # indexes can include distributed index names. Updates on
1882
- # distributed indexes will be pushed to all agents.
1883
- #
1884
- # The updates only work with docinfo=extern storage strategy.
1885
- # They are very fast because they're working fully in RAM, but
1886
- # they can also be made persistent: updates are saved on disk
1887
- # on clean searchd shutdown initiated by SIGTERM signal. With
1888
- # additional restrictions, updates are also possible on MVA
1889
- # attributes; refer to mva_updates_pool directive for details.
973
+ # Batch update given attributes in given rows in given indexes.
1890
974
  #
1891
- # The first sample statement will update document 1 in index
1892
- # "test1", setting "group_id" to 456. The second one will update
1893
- # documents 1001, 1002 and 1003 in index "products". For document
1894
- # 1001, the new price will be set to 123 and the new amount in
1895
- # stock to 5; for document 1002, the new price will be 37 and the
1896
- # new amount will be 11; etc. The third one updates document 1
1897
- # in index "test2", setting MVA attribute "group_id" to [456, 789].
975
+ # * +index+ is a name of the index to be updated
976
+ # * +attrs+ is an array of attribute name strings.
977
+ # * +values+ is a hash where key is document id, and value is an array of
978
+ # * +mva+ identifies whether update MVA
979
+ # new attribute values
1898
980
  #
1899
- # @example
1900
- # sphinx.update_attributes("test1", ["group_id"], { 1 => [456] });
1901
- # sphinx.update_attributes("products", ["price", "amount_in_stock"],
1902
- # { 1001 => [123, 5], 1002 => [37, 11], 1003 => [25, 129] });
1903
- # sphinx.update_attributes('test2', ['group_id'], { 1 => [[456, 789]] }, true)
981
+ # Returns number of actually updated documents (0 or more) on success.
982
+ # Returns -1 on failure.
1904
983
  #
1905
- # @param [String] index a name of the index to be updated.
1906
- # @param [Array<String>] attrs an array of attribute name strings.
1907
- # @param [Hash] values is a hash where key is document id, and
1908
- # value is an array of new attribute values.
1909
- # @param [Boolean] mva indicating whether to update MVA.
1910
- # @return [Integer] number of actually updated documents (0 or more) on success,
1911
- # -1 on failure.
1912
- #
1913
- # @raise [ArgumentError] Occurred when parameters are invalid.
1914
- #
1915
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-updateatttributes Section 6.7.2, "UpdateAttributes"
1916
- #
1917
- def update_attributes(index, attrs, values, mva = false)
984
+ # Usage example:
985
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
986
+ def UpdateAttributes(index, attrs, values, mva = false)
1918
987
  # verify everything
1919
- raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1920
- raise ArgumentError, '"mva" argument must be Boolean' unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass)
1921
-
1922
- raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array)
988
+ assert { index.instance_of? String }
989
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
990
+
991
+ assert { attrs.instance_of? Array }
1923
992
  attrs.each do |attr|
1924
- raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol)
993
+ assert { attr.instance_of? String }
1925
994
  end
1926
-
1927
- raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
995
+
996
+ assert { values.instance_of? Hash }
1928
997
  values.each do |id, entry|
1929
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.kind_of?(Integer)
1930
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array)
1931
- raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length
998
+ assert { id.instance_of? Fixnum }
999
+ assert { entry.instance_of? Array }
1000
+ assert { entry.length == attrs.length }
1932
1001
  entry.each do |v|
1933
1002
  if mva
1934
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array)
1935
- v.each do |vv|
1936
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.kind_of?(Integer)
1937
- end
1003
+ assert { v.instance_of? Array }
1004
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
1938
1005
  else
1939
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.kind_of?(Integer)
1006
+ assert { v.instance_of? Fixnum }
1940
1007
  end
1941
1008
  end
1942
1009
  end
1943
-
1010
+
1944
1011
  # build request
1945
1012
  request = Request.new
1946
1013
  request.put_string index
1947
-
1014
+
1948
1015
  request.put_int attrs.length
1949
1016
  for attr in attrs
1950
1017
  request.put_string attr
1951
1018
  request.put_int mva ? 1 : 0
1952
1019
  end
1953
-
1020
+
1954
1021
  request.put_int values.length
1955
1022
  values.each do |id, entry|
1956
1023
  request.put_int64 id
@@ -1960,266 +1027,152 @@ module Sphinx
1960
1027
  request.put_int(*entry)
1961
1028
  end
1962
1029
  end
1963
-
1964
- response = perform_request(:update, request)
1965
-
1966
- # parse response
1967
- response.get_int
1968
- end
1969
- alias :UpdateAttributes :update_attributes
1970
-
1971
- # Escapes characters that are treated as special operators by the
1972
- # query language parser.
1973
- #
1974
- # This function might seem redundant because it's trivial to
1975
- # implement in any calling application. However, as the set of
1976
- # special characters might change over time, it makes sense to
1977
- # have an API call that is guaranteed to escape all such
1978
- # characters at all times.
1979
- #
1980
- # @param [String] string is a string to escape.
1981
- # @return [String] an escaped string.
1982
- #
1983
- # @example:
1984
- # escaped = sphinx.escape_string "escaping-sample@query/string"
1985
- #
1986
- def escape_string(string)
1987
- string.to_s.gsub(/([\\()|\-!@~"&\/\^\$=])/, '\\\\\\1')
1988
- end
1989
- alias :EscapeString :escape_string
1990
-
1991
- # Queries searchd status, and returns an array of status variable name
1992
- # and value pairs.
1993
- #
1994
- # @return [Array<Array>, Array<Hash>] a table containing searchd status information.
1995
- # If there are more than one server configured ({#set_servers}), an
1996
- # +Array+ of +Hash+es will be returned, one for each server. Hash will
1997
- # contain <tt>:server</tt> element with string name of server (<tt>host:port</tt>)
1998
- # and <tt>:status</tt> table just like one for a single server. In case of
1999
- # any error, it will be stored in the <tt>:error</tt> key.
2000
- #
2001
- # @example Single server
2002
- # status = sphinx.status
2003
- # puts status.map { |key, value| "#{key.rjust(20)}: #{value}" }
2004
- #
2005
- # @example Multiple servers
2006
- # sphinx.set_servers([
2007
- # { :host => 'localhost' },
2008
- # { :host => 'browse02.local' }
2009
- # ])
2010
- # sphinx.status.each do |report|
2011
- # puts "=== #{report[:server]}"
2012
- # if report[:error]
2013
- # puts "Error: #{report[:error]}"
2014
- # else
2015
- # puts report[:status].map { |key, value| "#{key.rjust(20)}: #{value}" }
2016
- # end
2017
- # end
2018
- #
2019
- def status
2020
- request = Request.new
2021
- request.put_int(1)
2022
-
1030
+
1031
+ response = PerformRequest(:update, request)
1032
+
2023
1033
  # parse response
2024
- results = @servers.map do |server|
2025
- begin
2026
- response = perform_request(:status, request, nil, server)
2027
- rows, cols = response.get_ints(2)
2028
- status = (0...rows).map do
2029
- (0...cols).map { response.get_string }
2030
- end
2031
- HashWithIndifferentAccess.new(:server => server.to_s, :status => status)
2032
- rescue SphinxError
2033
- # Re-raise error when a single server configured
2034
- raise if @servers.size == 1
2035
- HashWithIndifferentAccess.new(:server => server.to_s, :error => self.last_error)
2036
- end
1034
+ begin
1035
+ return response.get_int
1036
+ rescue EOFError
1037
+ @error = 'incomplete reply'
1038
+ raise SphinxResponseError, @error
2037
1039
  end
2038
-
2039
- @servers.size > 1 ? results : results.first[:status]
2040
1040
  end
2041
- alias :Status :status
2042
-
2043
- #=================================================================
2044
- # Persistent connections
2045
- #=================================================================
2046
-
2047
- # Opens persistent connection to the server.
2048
- #
2049
- # This method could be used only when a single searchd server
2050
- # configured.
2051
- #
2052
- # @return [Boolean] +true+ when persistent connection has been
2053
- # established; otherwise, +false+.
2054
- #
2055
- # @example
2056
- # begin
2057
- # sphinx.open
2058
- # # perform several requests
2059
- # ensure
2060
- # sphinx.close
2061
- # end
2062
- #
2063
- # @see #close
2064
- #
2065
- def open
2066
- if @servers.size > 1
2067
- @error = 'too many servers. persistent socket allowed only for a single server.'
2068
- return false
2069
- end
2070
-
2071
- if @servers.first.persistent?
1041
+
1042
+ # persistent connections
1043
+
1044
+ def Open
1045
+ unless @socket === false
2072
1046
  @error = 'already connected'
2073
- return false;
1047
+ return false
2074
1048
  end
2075
-
1049
+
2076
1050
  request = Request.new
2077
1051
  request.put_int(1)
2078
-
2079
- perform_request(:persist, request, nil) do |server, socket|
2080
- server.make_persistent!(socket)
2081
- end
1052
+ @socket = PerformRequest(:persist, request, nil, true)
2082
1053
 
2083
1054
  true
2084
1055
  end
2085
- alias :Open :open
2086
-
2087
- # Closes previously opened persistent connection.
2088
- #
2089
- # This method could be used only when a single searchd server
2090
- # configured.
2091
- #
2092
- # @return [Boolean] +true+ when persistent connection has been
2093
- # closed; otherwise, +false+.
2094
- #
2095
- # @example
2096
- # begin
2097
- # sphinx.open
2098
- # # perform several requests
2099
- # ensure
2100
- # sphinx.close
2101
- # end
2102
- #
2103
- # @see #open
2104
- #
2105
- def close
2106
- if @servers.size > 1
2107
- @error = 'too many servers. persistent socket allowed only for a single server.'
2108
- return false
2109
- end
2110
-
2111
- unless @servers.first.persistent?
1056
+
1057
+ def Close
1058
+ if @socket === false
2112
1059
  @error = 'not connected'
2113
1060
  return false;
2114
1061
  end
1062
+
1063
+ @socket.close
1064
+ @socket = false
1065
+
1066
+ true
1067
+ end
1068
+
1069
+ def Status
1070
+ request = Request.new
1071
+ request.put_int(1)
1072
+ response = PerformRequest(:status, request)
2115
1073
 
2116
- @servers.first.close_persistent!
1074
+ # parse response
1075
+ begin
1076
+ rows, cols = response.get_ints(2)
1077
+
1078
+ res = []
1079
+ 0.upto(rows - 1) do |i|
1080
+ res[i] = []
1081
+ 0.upto(cols - 1) do |j|
1082
+ res[i] << response.get_string
1083
+ end
1084
+ end
1085
+ rescue EOFError
1086
+ @error = 'incomplete reply'
1087
+ raise SphinxResponseError, @error
1088
+ end
1089
+
1090
+ res
2117
1091
  end
2118
- alias :Close :close
1092
+
1093
+ def FlushAttrs
1094
+ request = Request.new
1095
+ response = PerformRequest(:flushattrs, request)
2119
1096
 
1097
+ # parse response
1098
+ begin
1099
+ response.get_int
1100
+ rescue EOFError
1101
+ -1
1102
+ end
1103
+ end
1104
+
2120
1105
  protected
2121
-
2122
- # Connect, send query, get response.
2123
- #
2124
- # Use this method to communicate with Sphinx server. It ensures connection
2125
- # will be instantiated properly, all headers will be generated properly, etc.
2126
- #
2127
- # @param [Symbol, String] command searchd command to perform (<tt>:search</tt>, <tt>:excerpt</tt>,
2128
- # <tt>:update</tt>, <tt>:keywords</tt>, <tt>:persist</tt>, <tt>:status</tt>,
2129
- # <tt>:query</tt>, <tt>:flushattrs</tt>. See <tt>SEARCHD_COMMAND_*</tt> for details).
2130
- # @param [Sphinx::Request] request contains request body.
2131
- # @param [Integer] additional additional integer data to be placed between header and body.
2132
- # @param [Sphinx::Server] server where perform request on. This is special
2133
- # parameter for internal usage. If specified, request will be performed
2134
- # on specified server, and it will try to establish connection to this
2135
- # server only once.
2136
- #
2137
- # @yield if block given, response will not be parsed, plain socket
2138
- # will be yielded instead. This is special mode used for
2139
- # persistent connections, do not use for other tasks.
2140
- # @yieldparam [Sphinx::Server] server a server where request was performed on.
2141
- # @yieldparam [Sphinx::BufferedIO] socket a socket used to perform the request.
2142
- # @return [Sphinx::Response] contains response body.
2143
- #
2144
- # @see #parse_response
2145
- #
2146
- def perform_request(command, request, additional = nil, server = nil)
2147
- if server
2148
- attempts = 1
2149
- else
2150
- server = case request
2151
- when String
2152
- Zlib.crc32(request)
2153
- when Request
2154
- request.crc32
2155
- else
2156
- raise ArgumentError, "request argument must be String or Sphinx::Request"
1106
+
1107
+ # Connect to searchd server.
1108
+ def Connect
1109
+ return @socket unless @socket === false
1110
+
1111
+ begin
1112
+ if @path
1113
+ sock = UNIXSocket.new(@path)
1114
+ else
1115
+ sock = TCPSocket.new(@host, @port)
1116
+ end
1117
+ rescue => e
1118
+ location = @path || "#{@host}:#{@port}"
1119
+ @error = "connection to #{location} failed ("
1120
+ if e.kind_of?(SystemCallError)
1121
+ @error << "errno=#{e.class::Errno}, "
2157
1122
  end
2158
- attempts = nil
1123
+ @error << "msg=#{e.message})"
1124
+ @connerror = true
1125
+ raise SphinxConnectError, @error
2159
1126
  end
2160
1127
 
2161
- with_server(server, attempts) do |server|
2162
- logger.info { "[sphinx] #{command} on server #{server}" } if logger
2163
-
2164
- cmd = command.to_s.upcase
2165
- command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}")
2166
- command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}")
2167
-
2168
- with_socket(server) do |socket|
2169
- len = request.to_s.length + (additional.nil? ? 0 : 4)
2170
- header = [command_id, command_ver, len].pack('nnN')
2171
- header << [additional].pack('N') unless additional.nil?
2172
-
2173
- socket.write(header + request.to_s)
2174
-
2175
- if block_given?
2176
- yield server, socket
2177
- else
2178
- parse_response(socket, command_ver)
2179
- end
2180
- end
1128
+ # send my version
1129
+ # this is a subtle part. we must do it before (!) reading back from searchd.
1130
+ # because otherwise under some conditions (reported on FreeBSD for instance)
1131
+ # TCP stack could throttle write-write-read pattern because of Nagle.
1132
+ sock.send([1].pack('N'), 0)
1133
+
1134
+ v = sock.recv(4).unpack('N*').first
1135
+ if v < 1
1136
+ sock.close
1137
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
1138
+ raise SphinxConnectError, @error
2181
1139
  end
1140
+
1141
+ sock
2182
1142
  end
2183
-
2184
- # This is internal method which gets and parses response packet from
2185
- # searchd server.
2186
- #
2187
- # There are several exceptions which could be thrown in this method:
2188
- #
2189
- # @param [Sphinx::BufferedIO] socket an input stream object.
2190
- # @param [Integer] client_version a command version which client supports.
2191
- # @return [Sphinx::Response] could be used for context-based
2192
- # parsing of reply from the server.
2193
- #
2194
- # @raise [SystemCallError, SocketError] should be handled by caller (see {#with_socket}).
2195
- # @raise [SphinxResponseError] incomplete reply from searchd.
2196
- # @raise [SphinxInternalError] searchd internal error.
2197
- # @raise [SphinxTemporaryError] searchd temporary error.
2198
- # @raise [SphinxUnknownError] searchd unknown error.
2199
- #
2200
- # @see #with_socket
2201
- # @private
2202
- #
2203
- def parse_response(socket, client_version)
1143
+
1144
+ # Get and check response packet from searchd server.
1145
+ def GetResponse(sock, client_version)
2204
1146
  response = ''
2205
- status = ver = len = 0
2206
-
2207
- # Read server reply from server. All exceptions are handled by {#with_socket}.
2208
- header = socket.read(8)
1147
+ len = 0
1148
+
1149
+ header = sock.recv(8)
2209
1150
  if header.length == 8
2210
1151
  status, ver, len = header.unpack('n2N')
2211
- response = socket.read(len) if len > 0
1152
+ left = len.to_i
1153
+ while left > 0 do
1154
+ begin
1155
+ chunk = sock.recv(left)
1156
+ if chunk
1157
+ response << chunk
1158
+ left -= chunk.length
1159
+ end
1160
+ rescue EOFError
1161
+ break
1162
+ end
1163
+ end
2212
1164
  end
2213
-
1165
+ sock.close if @socket === false
1166
+
2214
1167
  # check response
2215
1168
  read = response.length
2216
1169
  if response.empty? or read != len.to_i
2217
- error = len > 0 \
1170
+ @error = len \
2218
1171
  ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
2219
1172
  : 'received zero-sized searchd response'
2220
- raise SphinxResponseError, error
1173
+ raise SphinxResponseError, @error
2221
1174
  end
2222
-
1175
+
2223
1176
  # check status
2224
1177
  if (status == SEARCHD_WARNING)
2225
1178
  wlen = response[0, 4].unpack('N*').first
@@ -2228,199 +1181,50 @@ module Sphinx
2228
1181
  end
2229
1182
 
2230
1183
  if status == SEARCHD_ERROR
2231
- error = 'searchd error: ' + response[4, response.length - 4]
2232
- raise SphinxInternalError, error
1184
+ @error = 'searchd error: ' + response[4, response.length - 4]
1185
+ raise SphinxInternalError, @error
2233
1186
  end
2234
-
1187
+
2235
1188
  if status == SEARCHD_RETRY
2236
- error = 'temporary searchd error: ' + response[4, response.length - 4]
2237
- raise SphinxTemporaryError, error
1189
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1190
+ raise SphinxTemporaryError, @error
2238
1191
  end
2239
-
1192
+
2240
1193
  unless status == SEARCHD_OK
2241
- error = "unknown status code: '#{status}'"
2242
- raise SphinxUnknownError, error
1194
+ @error = "unknown status code: '#{status}'"
1195
+ raise SphinxUnknownError, @error
2243
1196
  end
2244
-
1197
+
2245
1198
  # check version
2246
1199
  if ver < client_version
2247
1200
  @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
2248
1201
  "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
2249
1202
  end
2250
-
2251
- Response.new(response)
2252
- end
2253
-
2254
- # This is internal method which selects next server (round-robin)
2255
- # and yields it to the block passed.
2256
- #
2257
- # In case of connection error, it will try next server several times
2258
- # (see {#set_connect_timeout} method details). If all servers are down,
2259
- # it will set error attribute (could be retrieved with {#last_error}
2260
- # method) with the last exception message, and {#connect_error?}
2261
- # method will return true. Also, {SphinxConnectError} exception
2262
- # will be raised.
2263
- #
2264
- # @overload with_server(server_index)
2265
- # Get the server based on some seed value (usually CRC32 of
2266
- # request. In this case initial server will be choosed using
2267
- # this seed value, in case of connetion failure next server
2268
- # in servers list will be used).
2269
- # @param [Integer] server_index server index, must be any
2270
- # integer value (not necessarily less than number of servers.)
2271
- # @param [Integer] attempts how many retries to perform. Use
2272
- # +nil+ to perform retries configured with {#set_connect_timeout}.
2273
- # @overload with_server(server)
2274
- # Get the server specified as a parameter. If specified, request
2275
- # will be performed on specified server, and it will try to
2276
- # establish connection to this server only once.
2277
- # @param [Server] server server to perform request on.
2278
- # @param [Integer] attempts how many retries to perform. Use
2279
- # +nil+ to perform retries configured with {#set_connect_timeout}.
2280
- #
2281
- # @yield a block which performs request on a given server.
2282
- # @yieldparam [Sphinx::Server] server contains information
2283
- # about the server to perform request on.
2284
- # @raise [SphinxConnectError] on any connection error.
2285
- #
2286
- def with_server(server = nil, attempts = nil)
2287
- case server
2288
- when Server
2289
- idx = @servers.index(server) || 0
2290
- s = server
2291
- when Integer
2292
- idx = server % @servers.size
2293
- s = @servers[idx]
2294
- when NilClass
2295
- idx = 0
2296
- s = @servers[idx]
2297
- else
2298
- raise ArgumentError, 'server argument must be Integer or Sphinx::Server'
2299
- end
2300
- attempts ||= @retries
2301
- begin
2302
- yield s
2303
- rescue SphinxConnectError => e
2304
- logger.warn { "[sphinx] server failed: #{e.class.name}: #{e.message}" } if logger
2305
- # Connection error! Do we need to try it again?
2306
- attempts -= 1
2307
- if attempts > 0
2308
- logger.info { "[sphinx] connection to server #{s.inspect} DIED! Retrying operation..." } if logger
2309
- # Get the next server
2310
- idx = (idx + 1) % @servers.size
2311
- s = @servers[idx]
2312
- retry
2313
- end
2314
-
2315
- # Re-raise original exception
2316
- @error = e.message
2317
- @connerror = true
2318
- raise
2319
- end
1203
+
1204
+ return response
2320
1205
  end
2321
-
2322
- # This is internal method which retrieves socket for a given server,
2323
- # initiates Sphinx session, and yields this socket to a block passed.
2324
- #
2325
- # In case of any problems with session initiation, {SphinxConnectError}
2326
- # will be raised, because this is part of connection establishing. See
2327
- # {#with_server} method details to get more infromation about how this
2328
- # exception is handled.
2329
- #
2330
- # Socket retrieving routine is wrapped in a block with it's own
2331
- # timeout value (see {#set_connect_timeout}). This is done in
2332
- # {Server#get_socket} method, so check it for details.
2333
- #
2334
- # Request execution is wrapped with block with another timeout
2335
- # (see {#set_request_timeout}). This ensures no Sphinx request will
2336
- # take unreasonable time.
2337
- #
2338
- # In case of any Sphinx error (incomplete reply, internal or temporary
2339
- # error), connection to the server will be re-established, and request
2340
- # will be retried (see {#set_request_timeout}). Of course, if connection
2341
- # could not be established, next server will be selected (see explanation
2342
- # above).
2343
- #
2344
- # @param [Sphinx::Server] server contains information
2345
- # about the server to perform request on.
2346
- # @yield a block which will actually perform the request.
2347
- # @yieldparam [Sphinx::BufferedIO] socket a socket used to
2348
- # perform the request.
2349
- #
2350
- # @raise [SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError]
2351
- # on any response error.
2352
- # @raise [SphinxConnectError] on any connection error.
2353
- #
2354
- def with_socket(server)
2355
- attempts = @reqretries
2356
- socket = nil
2357
-
2358
- begin
2359
- s = server.get_socket do |sock|
2360
- # Remember socket to close it in case of emergency
2361
- socket = sock
2362
-
2363
- # send my version
2364
- # this is a subtle part. we must do it before (!) reading back from searchd.
2365
- # because otherwise under some conditions (reported on FreeBSD for instance)
2366
- # TCP stack could throttle write-write-read pattern because of Nagle.
2367
- sock.write([1].pack('N'))
2368
- v = sock.read(4).unpack('N*').first
2369
-
2370
- # Ouch, invalid protocol!
2371
- if v < 1
2372
- raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'"
2373
- end
2374
- end
2375
-
2376
- Sphinx::safe_execute(@reqtimeout) do
2377
- yield s
2378
- end
2379
- rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e
2380
- logger.warn { "[sphinx] socket failure: #{e.message}" } if logger
2381
- # Ouch, communication problem, will be treated as a connection problem.
2382
- raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})"
2383
- rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e
2384
- # EOFError should not occur in ideal world, because we compare response length
2385
- # with a value passed by Sphinx. But we want to ensure that client will not
2386
- # fail with unexpected error when Sphinx implementation has bugs, aren't we?
2387
- if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error)
2388
- new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})")
2389
- new_e.set_backtrace(e.backtrace)
2390
- e = new_e
2391
- end
2392
- logger.warn { "[sphinx] generic failure: #{e.class.name}: #{e.message}" } if logger
2393
-
2394
- # Close previously opened socket (in case of it has been really opened)
2395
- server.free_socket(socket)
2396
-
2397
- # Request error! Do we need to try it again?
2398
- attempts -= 1
2399
- retry if attempts > 0
2400
-
2401
- # Re-raise original exception
2402
- @error = e.message
2403
- raise e
2404
- ensure
2405
- # Close previously opened socket on any other error
2406
- server.free_socket(socket)
2407
- end
1206
+
1207
+ # Connect, send query, get response.
1208
+ def PerformRequest(command, request, additional = nil, skip_response = false)
1209
+ cmd = command.to_s.upcase
1210
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1211
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1212
+
1213
+ sock = self.Connect
1214
+ len = request.to_s.length + (additional != nil ? 4 : 0)
1215
+ header = [command_id, command_ver, len].pack('nnN')
1216
+ header << [additional].pack('N') if additional != nil
1217
+ sock.send(header + request.to_s, 0)
1218
+
1219
+ return sock if skip_response
1220
+ response = self.GetResponse(sock, command_ver)
1221
+ return Response.new(response)
2408
1222
  end
2409
-
2410
- # Enables ability to skip +set_+ prefix for methods inside {#query} block.
2411
- #
2412
- # @example
2413
- # sphinx.query('test') do
2414
- # match_mode :all
2415
- # id_range 10, 100
2416
- # end
2417
- #
2418
- def method_missing(method_id, *arguments, &block)
2419
- if @inside_eval and self.respond_to?("set_#{method_id}")
2420
- self.send("set_#{method_id}", *arguments)
2421
- else
2422
- super
2423
- end
1223
+
1224
+ # :stopdoc:
1225
+ def assert
1226
+ raise 'Assertion failed!' unless yield if $DEBUG
2424
1227
  end
1228
+ # :startdoc:
2425
1229
  end
2426
1230
  end