sphinx 0.9.9.2117 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sphinx/client.rb CHANGED
@@ -1,50 +1,202 @@
1
+ # = client.rb - Sphinx Client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 — 2009 Dmytro Shteflyuk
5
+ # License:: Distributes under the same terms as Ruby
6
+ # Version:: 0.9.10-r2043
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the Ruby license.
10
+ # You can freely distribute/modify this library.
11
+
12
+ # ==Sphinx Client API
13
+ #
14
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
+ # daemon and get search results from Sphinx.
16
+ #
17
+ # ===Usage
18
+ #
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+
27
+ require 'socket'
28
+
1
29
  module Sphinx
2
- # The Sphinx Client API is used to communicate with <tt>searchd</tt>
3
- # daemon and perform requests.
4
- #
5
- # @example
6
- # sphinx = Sphinx::Client.new
7
- # result = sphinx.query('test')
8
- # ids = result['matches'].map { |match| match['id'] }
9
- # posts = Post.all :conditions => { :id => ids },
10
- # :order => "FIELD(id,#{ids.join(',')})"
11
- #
12
- # docs = posts.map(&:body)
13
- # excerpts = sphinx.build_excerpts(docs, 'index', 'test')
14
- #
15
- class Client
16
- include Sphinx::Constants
17
-
18
- #=================================================================
19
- # Some internal attributes to use inside client API
20
- #=================================================================
30
+ # :stopdoc:
21
31
 
22
- # List of searchd servers to connect to.
23
- # @private
24
- attr_reader :servers
25
- # Connection timeout in seconds.
26
- # @private
27
- attr_reader :timeout
28
- # Number of connection retries.
29
- # @private
30
- attr_reader :retries
31
- # Request timeout in seconds.
32
- # @private
33
- attr_reader :reqtimeout
34
- # Number of request retries.
35
- # @private
36
- attr_reader :reqretries
37
- # Log debug/info/warn to the given Logger, defaults to nil.
38
- # @private
39
- attr_reader :logger
32
+ class SphinxError < StandardError; end
33
+ class SphinxArgumentError < SphinxError; end
34
+ class SphinxConnectError < SphinxError; end
35
+ class SphinxResponseError < SphinxError; end
36
+ class SphinxInternalError < SphinxError; end
37
+ class SphinxTemporaryError < SphinxError; end
38
+ class SphinxUnknownError < SphinxError; end
40
39
 
41
- # Constructs the <tt>Sphinx::Client</tt> object and sets options
42
- # to their default values.
43
- #
44
- # @param [Logger] logger a logger object to put logs to. No logging
45
- # will be performed when not set.
46
- #
47
- def initialize(logger = nil)
40
+ # :startdoc:
41
+
42
+ class Client
43
+
44
+ # :stopdoc:
45
+
46
+ # Known searchd commands
47
+
48
+ # search command
49
+ SEARCHD_COMMAND_SEARCH = 0
50
+ # excerpt command
51
+ SEARCHD_COMMAND_EXCERPT = 1
52
+ # update command
53
+ SEARCHD_COMMAND_UPDATE = 2
54
+ # keywords command
55
+ SEARCHD_COMMAND_KEYWORDS = 3
56
+ # persist command
57
+ SEARCHD_COMMAND_PERSIST = 4
58
+ # status command
59
+ SEARCHD_COMMAND_STATUS = 5
60
+ # query command
61
+ SEARCHD_COMMAND_QUERY = 6
62
+ # flushattrs command
63
+ SEARCHD_COMMAND_FLUSHATTRS = 7
64
+
65
+ # Current client-side command implementation versions
66
+
67
+ # search command version
68
+ VER_COMMAND_SEARCH = 0x117
69
+ # excerpt command version
70
+ VER_COMMAND_EXCERPT = 0x100
71
+ # update command version
72
+ VER_COMMAND_UPDATE = 0x102
73
+ # keywords command version
74
+ VER_COMMAND_KEYWORDS = 0x100
75
+ # persist command version
76
+ VER_COMMAND_PERSIST = 0x000
77
+ # status command version
78
+ VER_COMMAND_STATUS = 0x100
79
+ # query command version
80
+ VER_COMMAND_QUERY = 0x100
81
+ # flushattrs command version
82
+ VER_COMMAND_FLUSHATTRS = 0x100
83
+
84
+ # Known searchd status codes
85
+
86
+ # general success, command-specific reply follows
87
+ SEARCHD_OK = 0
88
+ # general failure, command-specific reply may follow
89
+ SEARCHD_ERROR = 1
90
+ # temporaty failure, client should retry later
91
+ SEARCHD_RETRY = 2
92
+ # general success, warning message and command-specific reply follow
93
+ SEARCHD_WARNING = 3
94
+
95
+ # :startdoc:
96
+
97
+ # Known match modes
98
+
99
+ # match all query words
100
+ SPH_MATCH_ALL = 0
101
+ # match any query word
102
+ SPH_MATCH_ANY = 1
103
+ # match this exact phrase
104
+ SPH_MATCH_PHRASE = 2
105
+ # match this boolean query
106
+ SPH_MATCH_BOOLEAN = 3
107
+ # match this extended query
108
+ SPH_MATCH_EXTENDED = 4
109
+ # match all document IDs w/o fulltext query, apply filters
110
+ SPH_MATCH_FULLSCAN = 5
111
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
112
+ SPH_MATCH_EXTENDED2 = 6
113
+
114
+ # Known ranking modes (ext2 only)
115
+
116
+ # default mode, phrase proximity major factor and BM25 minor one
117
+ SPH_RANK_PROXIMITY_BM25 = 0
118
+ # statistical mode, BM25 ranking only (faster but worse quality)
119
+ SPH_RANK_BM25 = 1
120
+ # no ranking, all matches get a weight of 1
121
+ SPH_RANK_NONE = 2
122
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
123
+ SPH_RANK_WORDCOUNT = 3
124
+ # phrase proximity
125
+ SPH_RANK_PROXIMITY = 4
126
+ # emulate old match-any weighting
127
+ SPH_RANK_MATCHANY = 5
128
+ # sets bits where there were matches
129
+ SPH_RANK_FIELDMASK = 6
130
+ # codename SPH04, phrase proximity + bm25 + head/exact boost
131
+ SPH_RANK_SPH04 = 7
132
+
133
+ # Known sort modes
134
+
135
+ # sort by document relevance desc, then by date
136
+ SPH_SORT_RELEVANCE = 0
137
+ # sort by document date desc, then by relevance desc
138
+ SPH_SORT_ATTR_DESC = 1
139
+ # sort by document date asc, then by relevance desc
140
+ SPH_SORT_ATTR_ASC = 2
141
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
142
+ SPH_SORT_TIME_SEGMENTS = 3
143
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
144
+ SPH_SORT_EXTENDED = 4
145
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
146
+ SPH_SORT_EXPR = 5
147
+
148
+ # Known filter types
149
+
150
+ # filter by integer values set
151
+ SPH_FILTER_VALUES = 0
152
+ # filter by integer range
153
+ SPH_FILTER_RANGE = 1
154
+ # filter by float range
155
+ SPH_FILTER_FLOATRANGE = 2
156
+
157
+ # Known attribute types
158
+
159
+ # this attr is just an integer
160
+ SPH_ATTR_INTEGER = 1
161
+ # this attr is a timestamp
162
+ SPH_ATTR_TIMESTAMP = 2
163
+ # this attr is an ordinal string number (integer at search time,
164
+ # specially handled at indexing time)
165
+ SPH_ATTR_ORDINAL = 3
166
+ # this attr is a boolean bit field
167
+ SPH_ATTR_BOOL = 4
168
+ # this attr is a float
169
+ SPH_ATTR_FLOAT = 5
170
+ # signed 64-bit integer
171
+ SPH_ATTR_BIGINT = 6
172
+ # string (binary; in-memory)
173
+ SPH_ATTR_STRING = 7
174
+ # this attr has multiple values (0 or more)
175
+ SPH_ATTR_MULTI = 0x40000000
176
+
177
+ # Known grouping functions
178
+
179
+ # group by day
180
+ SPH_GROUPBY_DAY = 0
181
+ # group by week
182
+ SPH_GROUPBY_WEEK = 1
183
+ # group by month
184
+ SPH_GROUPBY_MONTH = 2
185
+ # group by year
186
+ SPH_GROUPBY_YEAR = 3
187
+ # group by attribute value
188
+ SPH_GROUPBY_ATTR = 4
189
+ # group by sequential attrs pair
190
+ SPH_GROUPBY_ATTRPAIR = 5
191
+
192
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
193
+ def initialize
194
+ # per-client-object settings
195
+ @host = 'localhost' # searchd host (default is "localhost")
196
+ @port = 3312 # searchd port (default is 3312)
197
+ @path = false
198
+ @socket = false
199
+
48
200
  # per-query settings
49
201
  @offset = 0 # how many records to seek from result-set start (default is 0)
50
202
  @limit = 20 # how many records to return from result-set starting at offset (default is 20)
@@ -66,1411 +218,401 @@ module Sphinx
66
218
  @anchor = [] # geographical anchor point
67
219
  @indexweights = [] # per-index weights
68
220
  @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
69
- @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
221
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
70
222
  @fieldweights = {} # per-field-name weights
71
223
  @overrides = [] # per-query attribute values overrides
72
224
  @select = '*' # select-list (attributes or expressions, with optional aliases)
73
-
225
+
74
226
  # per-reply fields (for single-query case)
75
227
  @error = '' # last error message
76
228
  @warning = '' # last warning message
77
229
  @connerror = false # connection error vs remote error flag
78
-
230
+
79
231
  @reqs = [] # requests storage (for multi-query case)
80
232
  @mbenc = '' # stored mbstring encoding
81
233
  @timeout = 0 # connect timeout
82
- @retries = 1 # number of connect retries in case of emergency
83
- @reqtimeout = 0 # request timeout
84
- @reqretries = 1 # number of request retries in case of emergency
85
-
86
- # per-client-object settings
87
- # searchd servers list
88
- @servers = [Sphinx::Server.new(self, 'localhost', 9312, false)].freeze
89
- @logger = logger
90
-
91
- logger.info { "[sphinx] version: #{VERSION}, #{@servers.inspect}" } if logger
92
- end
93
-
94
- # Returns a string representation of the sphinx client object.
95
- #
96
- def inspect
97
- params = {
98
- :error => @error,
99
- :warning => @warning,
100
- :connect_error => @connerror,
101
- :servers => @servers,
102
- :connect_timeout => { :timeout => @timeout, :retries => @retries },
103
- :request_timeout => { :timeout => @reqtimeout, :retries => @reqretries },
104
- :retries => { :count => @retrycount, :delay => @retrydelay },
105
- :limits => { :offset => @offset, :limit => @limit, :max => @maxmatches, :cutoff => @cutoff },
106
- :max_query_time => @maxquerytime,
107
- :overrides => @overrides,
108
- :select => @select,
109
- :match_mode => @mode,
110
- :ranking_mode => @ranker,
111
- :sort_mode => { :mode => @sort, :sortby => @sortby },
112
- :weights => @weights,
113
- :field_weights => @fieldweights,
114
- :index_weights => @indexweights,
115
- :id_range => { :min => @min_id, :max => @max_id },
116
- :filters => @filters,
117
- :geo_anchor => @anchor,
118
- :group_by => { :attribute => @groupby, :func => @groupfunc, :sort => @groupsort },
119
- :group_distinct => @groupdistinct
120
- }
121
-
122
- "<Sphinx::Client: %d servers, params: %s>" %
123
- [@servers.length, params.inspect]
124
234
  end
125
-
126
- #=================================================================
127
- # General API functions
128
- #=================================================================
129
-
130
- # Returns last error message, as a string, in human readable format. If there
131
- # were no errors during the previous API call, empty string is returned.
132
- #
133
- # You should call it when any other function (such as {#query}) fails (typically,
134
- # the failing function returns false). The returned string will contain the
135
- # error description.
136
- #
137
- # The error message is not reset by this call; so you can safely call it
138
- # several times if needed.
139
- #
140
- # @return [String] last error message.
141
- #
142
- # @example
143
- # puts sphinx.last_error
144
- #
145
- # @see #last_warning
146
- # @see #connect_error?
147
- #
148
- def last_error
235
+
236
+ # Get last error message.
237
+ def GetLastError
149
238
  @error
150
239
  end
151
- alias :GetLastError :last_error
152
-
153
- # Returns last warning message, as a string, in human readable format. If there
154
- # were no warnings during the previous API call, empty string is returned.
155
- #
156
- # You should call it to verify whether your request (such as {#query}) was
157
- # completed but with warnings. For instance, search query against a distributed
158
- # index might complete succesfully even if several remote agents timed out.
159
- # In that case, a warning message would be produced.
160
- #
161
- # The warning message is not reset by this call; so you can safely call it
162
- # several times if needed.
163
- #
164
- # @return [String] last warning message.
165
- #
166
- # @example
167
- # puts sphinx.last_warning
168
- #
169
- # @see #last_error
170
- # @see #connect_error?
171
- #
172
- def last_warning
240
+
241
+ # Get last warning message.
242
+ def GetLastWarning
173
243
  @warning
174
244
  end
175
- alias :GetLastWarning :last_warning
176
-
177
- # Checks whether the last error was a network error on API side, or a
178
- # remote error reported by searchd. Returns true if the last connection
179
- # attempt to searchd failed on API side, false otherwise (if the error
180
- # was remote, or there were no connection attempts at all).
181
- #
182
- # @return [Boolean] the value indicating whether last error was a
183
- # nework error on API side.
184
- #
185
- # @example
186
- # puts "Connection failed!" if sphinx.connect_error?
187
- #
188
- # @see #last_error
189
- # @see #last_warning
190
- #
191
- def connect_error?
192
- @connerror || false
245
+
246
+ # Get last error flag (to tell network connection errors from
247
+ # searchd errors or broken responses)
248
+ def IsConnectError
249
+ @connerror
193
250
  end
194
- alias :IsConnectError :connect_error?
195
-
196
- # Sets searchd host name and TCP port. All subsequent requests will
197
- # use the new host and port settings. Default +host+ and +port+ are
198
- # 'localhost' and 9312, respectively.
199
- #
200
- # Also, you can specify an absolute path to Sphinx's UNIX socket as +host+,
201
- # in this case pass port as +0+ or +nil+.
202
- #
203
- # @param [String] host the searchd host name or UNIX socket absolute path.
204
- # @param [Integer] port the searchd port name (could be any if UNIX
205
- # socket path specified).
206
- # @return [Sphinx::Client] self.
207
- #
208
- # @example
209
- # sphinx.set_server('localhost', 9312)
210
- # sphinx.set_server('/opt/sphinx/var/run/sphinx.sock')
211
- #
212
- # @raise [ArgumentError] Occurred when parameters are invalid.
213
- # @see #set_servers
214
- # @see #set_connect_timeout
215
- # @see #set_request_timeout
216
- #
217
- def set_server(host, port = 9312)
218
- raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
219
-
220
- path = nil
221
- # Check if UNIX socket should be used
251
+
252
+ # Set searchd host name (string) and port (integer).
253
+ def SetServer(host, port)
254
+ assert { host.instance_of? String }
255
+
222
256
  if host[0] == ?/
223
- path = host
257
+ @path = host
258
+ return
224
259
  elsif host[0, 7] == 'unix://'
225
- path = host[7..-1]
226
- else
227
- raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
260
+ @path = host[7..-1]
228
261
  end
262
+
263
+ assert { port.instance_of? Fixnum }
229
264
 
230
- host = port = nil unless path.nil?
231
-
232
- @servers = [Sphinx::Server.new(self, host, port, path)].freeze
233
- logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
234
- self
235
- end
236
- alias :SetServer :set_server
237
-
238
- # Sets the list of searchd servers. Each subsequent request will use next
239
- # server in list (round-robin). In case of one server failure, request could
240
- # be retried on another server (see {#set_connect_timeout} and
241
- # {#set_request_timeout}).
242
- #
243
- # Method accepts an +Array+ of +Hash+es, each of them should have <tt>:host</tt>
244
- # and <tt>:port</tt> (to connect to searchd through network) or <tt>:path</tt>
245
- # (an absolute path to UNIX socket) specified.
246
- #
247
- # @param [Array<Hash>] servers an +Array+ of +Hash+ objects with servers parameters.
248
- # @option servers [String] :host the searchd host name or UNIX socket absolute path.
249
- # @option servers [String] :path the searchd UNIX socket absolute path.
250
- # @option servers [Integer] :port (9312) the searchd port name (skiped when UNIX
251
- # socket path specified)
252
- # @return [Sphinx::Client] self.
253
- #
254
- # @example
255
- # sphinx.set_servers([
256
- # { :host => 'browse01.local' }, # default port is 9312
257
- # { :host => 'browse02.local', :port => 9312 },
258
- # { :path => '/opt/sphinx/var/run/sphinx.sock' }
259
- # ])
260
- #
261
- # @raise [ArgumentError] Occurred when parameters are invalid.
262
- # @see #set_server
263
- # @see #set_connect_timeout
264
- # @see #set_request_timeout
265
- #
266
- def set_servers(servers)
267
- raise ArgumentError, '"servers" argument must be Array' unless servers.kind_of?(Array)
268
- raise ArgumentError, '"servers" argument must be not empty' if servers.empty?
269
-
270
- @servers = servers.map do |server|
271
- raise ArgumentError, '"servers" argument must be Array of Hashes' unless server.kind_of?(Hash)
272
-
273
- server = server.with_indifferent_access
274
-
275
- host = server[:path] || server[:host]
276
- port = server[:port] || 9312
277
- path = nil
278
- raise ArgumentError, '"host" argument must be String' unless host.kind_of?(String)
279
-
280
- # Check if UNIX socket should be used
281
- if host[0] == ?/
282
- path = host
283
- elsif host[0, 7] == 'unix://'
284
- path = host[7..-1]
285
- else
286
- raise ArgumentError, '"port" argument must be Integer' unless port.kind_of?(Integer)
287
- end
288
-
289
- host = port = nil unless path.nil?
290
-
291
- Sphinx::Server.new(self, host, port, path)
292
- end.freeze
293
- logger.info { "[sphinx] servers now: #{@servers.inspect}" } if logger
294
- self
265
+ @host = host
266
+ @port = port
295
267
  end
296
- alias :SetServers :set_servers
297
-
298
- # Sets the time allowed to spend connecting to the server before giving up
299
- # and number of retries to perform.
300
- #
301
- # In the event of a failure to connect, an appropriate error code should
302
- # be returned back to the application in order for application-level error
303
- # handling to advise the user.
304
- #
305
- # When multiple servers configured through {#set_servers} method, and +retries+
306
- # number is greater than 1, library will try to connect to another server.
307
- # In case of single server configured, it will try to reconnect +retries+
308
- # times.
309
- #
310
- # Please note, this timeout will only be used for connection establishing, not
311
- # for regular API requests.
312
- #
313
- # @param [Integer] timeout a connection timeout in seconds.
314
- # @param [Integer] retries number of connect retries.
315
- # @return [Sphinx::Client] self.
316
- #
317
- # @example Set connection timeout to 1 second and number of retries to 5
318
- # sphinx.set_connect_timeout(1, 5)
319
- #
320
- # @raise [ArgumentError] Occurred when parameters are invalid.
321
- # @see #set_server
322
- # @see #set_servers
323
- # @see #set_request_timeout
324
- #
325
- def set_connect_timeout(timeout, retries = 1)
326
- raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
327
- raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
328
- raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
329
-
268
+
269
+ def SetConnectTimeout(timeout)
270
+ assert { timeout.instance_of? Fixnum }
271
+
330
272
  @timeout = timeout
331
- @retries = retries
332
- self
333
- end
334
- alias :SetConnectTimeout :set_connect_timeout
335
-
336
- # Sets the time allowed to spend performing request to the server before giving up
337
- # and number of retries to perform.
338
- #
339
- # In the event of a failure to do request, an appropriate error code should
340
- # be returned back to the application in order for application-level error
341
- # handling to advise the user.
342
- #
343
- # When multiple servers configured through {#set_servers} method, and +retries+
344
- # number is greater than 1, library will try to do another try with this server
345
- # (with full reconnect). If connection would fail, behavior depends on
346
- # {#set_connect_timeout} settings.
347
- #
348
- # Please note, this timeout will only be used for request performing, not
349
- # for connection establishing.
350
- #
351
- # @param [Integer] timeout a request timeout in seconds.
352
- # @param [Integer] retries number of request retries.
353
- # @return [Sphinx::Client] self.
354
- #
355
- # @example Set request timeout to 1 second and number of retries to 5
356
- # sphinx.set_request_timeout(1, 5)
357
- #
358
- # @raise [ArgumentError] Occurred when parameters are invalid.
359
- # @see #set_server
360
- # @see #set_servers
361
- # @see #set_connect_timeout
362
- #
363
- def set_request_timeout(timeout, retries = 1)
364
- raise ArgumentError, '"timeout" argument must be Integer' unless timeout.kind_of?(Integer)
365
- raise ArgumentError, '"retries" argument must be Integer' unless retries.kind_of?(Integer)
366
- raise ArgumentError, '"retries" argument must be greater than 0' unless retries > 0
367
-
368
- @reqtimeout = timeout
369
- @reqretries = retries
370
- self
371
- end
372
- alias :SetRequestTimeout :set_request_timeout
373
-
374
- # Sets distributed retry count and delay.
375
- #
376
- # On temporary failures searchd will attempt up to +count+ retries
377
- # per agent. +delay+ is the delay between the retries, in milliseconds.
378
- # Retries are disabled by default. Note that this call will not make
379
- # the API itself retry on temporary failure; it only tells searchd
380
- # to do so. Currently, the list of temporary failures includes all
381
- # kinds of connection failures and maxed out (too busy) remote agents.
382
- #
383
- # @param [Integer] count a number of retries to perform.
384
- # @param [Integer] delay a delay between the retries.
385
- # @return [Sphinx::Client] self.
386
- #
387
- # @example Perform 5 retries with 200 ms between them
388
- # sphinx.set_retries(5, 200)
389
- #
390
- # @raise [ArgumentError] Occurred when parameters are invalid.
391
- # @see #set_connect_timeout
392
- # @see #set_request_timeout
393
- #
394
- def set_retries(count, delay = 0)
395
- raise ArgumentError, '"count" argument must be Integer' unless count.kind_of?(Integer)
396
- raise ArgumentError, '"delay" argument must be Integer' unless delay.kind_of?(Integer)
397
-
398
- @retrycount = count
399
- @retrydelay = delay
400
- self
401
273
  end
402
- alias :SetRetries :set_retries
403
-
404
- #=================================================================
405
- # General query settings
406
- #=================================================================
407
-
408
- # Sets offset into server-side result set (+offset+) and amount of matches to
409
- # return to client starting from that offset (+limit+). Can additionally control
410
- # maximum server-side result set size for current query (+max_matches+) and the
411
- # threshold amount of matches to stop searching at (+cutoff+). All parameters
412
- # must be non-negative integers.
413
- #
414
- # First two parameters to {#set_limits} are identical in behavior to MySQL LIMIT
415
- # clause. They instruct searchd to return at most +limit+ matches starting from
416
- # match number +offset+. The default offset and limit settings are +0+ and +20+,
417
- # that is, to return first +20+ matches.
418
- #
419
- # +max_matches+ setting controls how much matches searchd will keep in RAM
420
- # while searching. All matching documents will be normally processed, ranked,
421
- # filtered, and sorted even if max_matches is set to +1+. But only best +N+
422
- # documents are stored in memory at any given moment for performance and RAM
423
- # usage reasons, and this setting controls that N. Note that there are two
424
- # places where max_matches limit is enforced. Per-query limit is controlled
425
- # by this API call, but there also is per-server limit controlled by +max_matches+
426
- # setting in the config file. To prevent RAM usage abuse, server will not
427
- # allow to set per-query limit higher than the per-server limit.
428
- #
429
- # You can't retrieve more than +max_matches+ matches to the client application.
430
- # The default limit is set to +1000+. Normally, you must not have to go over
431
- # this limit. One thousand records is enough to present to the end user.
432
- # And if you're thinking about pulling the results to application for further
433
- # sorting or filtering, that would be much more efficient if performed on
434
- # Sphinx side.
435
- #
436
- # +cutoff+ setting is intended for advanced performance control. It tells
437
- # searchd to forcibly stop search query once $cutoff matches had been found
438
- # and processed.
439
- #
440
- # @param [Integer] offset an offset into server-side result set.
441
- # @param [Integer] limit an amount of matches to return.
442
- # @param [Integer] max a maximum server-side result set size.
443
- # @param [Integer] cutoff a threshold amount of matches to stop searching at.
444
- # @return [Sphinx::Client] self.
445
- #
446
- # @example
447
- # sphinx.set_limits(100, 50, 1000, 5000)
448
- #
449
- # @raise [ArgumentError] Occurred when parameters are invalid.
450
- #
451
- def set_limits(offset, limit, max = 0, cutoff = 0)
452
- raise ArgumentError, '"offset" argument must be Integer' unless offset.kind_of?(Integer)
453
- raise ArgumentError, '"limit" argument must be Integer' unless limit.kind_of?(Integer)
454
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
455
- raise ArgumentError, '"cutoff" argument must be Integer' unless cutoff.kind_of?(Integer)
456
-
457
- raise ArgumentError, '"offset" argument should be greater or equal to zero' unless offset >= 0
458
- raise ArgumentError, '"limit" argument should be greater to zero' unless limit > 0
459
- raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
460
- raise ArgumentError, '"cutoff" argument should be greater or equal to zero' unless cutoff >= 0
274
+
275
+ # Set offset and count into result set,
276
+ # and optionally set max-matches and cutoff limits.
277
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
278
+ assert { offset.instance_of? Fixnum }
279
+ assert { limit.instance_of? Fixnum }
280
+ assert { max.instance_of? Fixnum }
281
+ assert { offset >= 0 }
282
+ assert { limit > 0 }
283
+ assert { max >= 0 }
461
284
 
462
285
  @offset = offset
463
286
  @limit = limit
464
287
  @maxmatches = max if max > 0
465
288
  @cutoff = cutoff if cutoff > 0
466
- self
467
289
  end
468
- alias :SetLimits :set_limits
469
-
470
- # Sets maximum search query time, in milliseconds. Parameter must be a
471
- # non-negative integer. Default valus is +0+ which means "do not limit".
472
- #
473
- # Similar to +cutoff+ setting from {#set_limits}, but limits elapsed query
474
- # time instead of processed matches count. Local search queries will be
475
- # stopped once that much time has elapsed. Note that if you're performing
476
- # a search which queries several local indexes, this limit applies to each
477
- # index separately.
478
- #
479
- # @param [Integer] max maximum search query time in milliseconds.
480
- # @return [Sphinx::Client] self.
481
- #
482
- # @example
483
- # sphinx.set_max_query_time(200)
484
- #
485
- # @raise [ArgumentError] Occurred when parameters are invalid.
486
- #
487
- def set_max_query_time(max)
488
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
489
- raise ArgumentError, '"max" argument should be greater or equal to zero' unless max >= 0
490
-
290
+
291
+ # Set maximum query time, in milliseconds, per-index,
292
+ # integer, 0 means "do not limit"
293
+ def SetMaxQueryTime(max)
294
+ assert { max.instance_of? Fixnum }
295
+ assert { max >= 0 }
491
296
  @maxquerytime = max
492
- self
493
- end
494
- alias :SetMaxQueryTime :set_max_query_time
495
-
496
- # Sets temporary (per-query) per-document attribute value overrides. Only
497
- # supports scalar attributes. +values+ must be a +Hash+ that maps document
498
- # IDs to overridden attribute values.
499
- #
500
- # Override feature lets you "temporary" update attribute values for some
501
- # documents within a single query, leaving all other queries unaffected.
502
- # This might be useful for personalized data. For example, assume you're
503
- # implementing a personalized search function that wants to boost the posts
504
- # that the user's friends recommend. Such data is not just dynamic, but
505
- # also personal; so you can't simply put it in the index because you don't
506
- # want everyone's searches affected. Overrides, on the other hand, are local
507
- # to a single query and invisible to everyone else. So you can, say, setup
508
- # a "friends_weight" value for every document, defaulting to 0, then
509
- # temporary override it with 1 for documents 123, 456 and 789 (recommended
510
- # by exactly the friends of current user), and use that value when ranking.
511
- #
512
- # You can specify attribute type as String ("integer", "float", etc),
513
- # Symbol (:integer, :float, etc), or
514
- # Fixnum constant (SPH_ATTR_INTEGER, SPH_ATTR_FLOAT, etc).
515
- #
516
- # @param [String, Symbol] attribute an attribute name to override values of.
517
- # @param [Integer, String, Symbol] attrtype attribute type.
518
- # @param [Hash] values a +Hash+ that maps document IDs to overridden attribute values.
519
- # @return [Sphinx::Client] self.
520
- #
521
- # @example
522
- # sphinx.set_override(:friends_weight, :integer, {123 => 1, 456 => 1, 789 => 1})
523
- #
524
- # @raise [ArgumentError] Occurred when parameters are invalid.
525
- #
526
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setoverride Section 6.2.3, "SetOverride"
527
- #
528
- def set_override(attribute, attrtype, values)
529
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
530
-
531
- case attrtype
532
- when String, Symbol
533
- begin
534
- attrtype = self.class.const_get("SPH_ATTR_#{attrtype.to_s.upcase}")
535
- rescue NameError
536
- raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid"
537
- end
538
- when Fixnum
539
- raise ArgumentError, "\"attrtype\" argument value \"#{attrtype}\" is invalid" unless (SPH_ATTR_INTEGER..SPH_ATTR_BIGINT).include?(attrtype)
540
- else
541
- raise ArgumentError, '"attrtype" argument must be Fixnum, String, or Symbol'
542
- end
543
-
544
- raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
545
-
546
- values.each do |id, value|
547
- raise ArgumentError, '"values" argument must be Hash map of Integer to Integer or Time' unless id.kind_of?(Integer)
548
- case attrtype
549
- when SPH_ATTR_TIMESTAMP
550
- raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Integer) or value.kind_of?(Time)
551
- when SPH_ATTR_FLOAT
552
- raise ArgumentError, '"values" argument must be Hash map of Integer to Numeric' unless value.kind_of?(Numeric)
553
- else
554
- # SPH_ATTR_INTEGER, SPH_ATTR_ORDINAL, SPH_ATTR_BOOL, SPH_ATTR_BIGINT
555
- raise ArgumentError, '"values" argument must be Hash map of Integer to Integer' unless value.kind_of?(Integer)
556
- end
557
- end
558
-
559
- @overrides << { 'attr' => attribute.to_s, 'type' => attrtype, 'values' => values }
560
- self
561
- end
562
- alias :SetOverride :set_override
563
-
564
- # Sets the select clause, listing specific attributes to fetch, and
565
- # expressions to compute and fetch. Clause syntax mimics SQL.
566
- #
567
- # {#set_select} is very similar to the part of a typical SQL query between
568
- # +SELECT+ and +FROM+. It lets you choose what attributes (columns) to
569
- # fetch, and also what expressions over the columns to compute and fetch.
570
- # A certain difference from SQL is that expressions must always be aliased
571
- # to a correct identifier (consisting of letters and digits) using +AS+
572
- # keyword. SQL also lets you do that but does not require to. Sphinx enforces
573
- # aliases so that the computation results can always be returned under a
574
- # "normal" name in the result set, used in other clauses, etc.
575
- #
576
- # Everything else is basically identical to SQL. Star ('*') is supported.
577
- # Functions are supported. Arbitrary amount of expressions is supported.
578
- # Computed expressions can be used for sorting, filtering, and grouping,
579
- # just as the regular attributes.
580
- #
581
- # Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
582
- # <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported when using
583
- # <tt>GROUP BY</tt>.
584
- #
585
- # Expression sorting (Section 4.5, “SPH_SORT_EXPR mode”) and geodistance
586
- # functions ({#set_geo_anchor}) are now internally implemented
587
- # using this computed expressions mechanism, using magic names '<tt>@expr</tt>'
588
- # and '<tt>@geodist</tt>' respectively.
589
- #
590
- # @param [String] select a select clause, listing specific attributes to fetch.
591
- # @return [Sphinx::Client] self.
592
- #
593
- # @example
594
- # sphinx.set_select('*, @weight+(user_karma+ln(pageviews))*0.1 AS myweight')
595
- # sphinx.set_select('exp_years, salary_gbp*{$gbp_usd_rate} AS salary_usd, IF(age>40,1,0) AS over40')
596
- # sphinx.set_select('*, AVG(price) AS avgprice')
597
- #
598
- # @raise [ArgumentError] Occurred when parameters are invalid.
599
- #
600
- # @see http://www.sphinxsearch.com/docs/current.html#sort-expr Section 4.5, "SPH_SORT_EXPR mode"
601
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
602
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setselect Section 6.2.4, "SetSelect"
603
- #
604
- def set_select(select)
605
- raise ArgumentError, '"select" argument must be String' unless select.kind_of?(String)
606
-
607
- @select = select
608
- self
609
297
  end
610
- alias :SetSelect :set_select
611
-
612
- #=================================================================
613
- # Full-text search query settings
614
- #=================================================================
615
-
616
- # Sets full-text query matching mode.
617
- #
618
- # Parameter must be a +Fixnum+ constant specifying one of the known modes
619
- # (+SPH_MATCH_ALL+, +SPH_MATCH_ANY+, etc), +String+ with identifier (<tt>"all"</tt>,
620
- # <tt>"any"</tt>, etc), or a +Symbol+ (<tt>:all</tt>, <tt>:any</tt>, etc).
621
- #
622
- # @param [Integer, String, Symbol] mode full-text query matching mode.
623
- # @return [Sphinx::Client] self.
624
- #
625
- # @example
626
- # sphinx.set_match_mode(Sphinx::SPH_MATCH_ALL)
627
- # sphinx.set_match_mode(:all)
628
- # sphinx.set_match_mode('all')
629
- #
630
- # @raise [ArgumentError] Occurred when parameters are invalid.
631
- #
632
- # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
633
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
634
- #
635
- def set_match_mode(mode)
636
- case mode
637
- when String, Symbol
638
- begin
639
- mode = self.class.const_get("SPH_MATCH_#{mode.to_s.upcase}")
640
- rescue NameError
641
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
642
- end
643
- when Fixnum
644
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_MATCH_ALL..SPH_MATCH_EXTENDED2).include?(mode)
645
- else
646
- raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
647
- end
298
+
299
+ # Set matching mode.
300
+ def SetMatchMode(mode)
301
+ assert { mode == SPH_MATCH_ALL \
302
+ || mode == SPH_MATCH_ANY \
303
+ || mode == SPH_MATCH_PHRASE \
304
+ || mode == SPH_MATCH_BOOLEAN \
305
+ || mode == SPH_MATCH_EXTENDED \
306
+ || mode == SPH_MATCH_FULLSCAN \
307
+ || mode == SPH_MATCH_EXTENDED2 }
648
308
 
649
309
  @mode = mode
650
- self
651
310
  end
652
- alias :SetMatchMode :set_match_mode
653
-
654
- # Sets ranking mode. Only available in +SPH_MATCH_EXTENDED2+
655
- # matching mode at the time of this writing. Parameter must be a
656
- # constant specifying one of the known modes.
657
- #
658
- # You can specify ranking mode as String ("proximity_bm25", "bm25", etc),
659
- # Symbol (:proximity_bm25, :bm25, etc), or
660
- # Fixnum constant (SPH_RANK_PROXIMITY_BM25, SPH_RANK_BM25, etc).
661
- #
662
- # @param [Integer, String, Symbol] ranker ranking mode.
663
- # @return [Sphinx::Client] self.
664
- #
665
- # @example
666
- # sphinx.set_ranking_mode(Sphinx::SPH_RANK_BM25)
667
- # sphinx.set_ranking_mode(:bm25)
668
- # sphinx.set_ranking_mode('bm25')
669
- #
670
- # @raise [ArgumentError] Occurred when parameters are invalid.
671
- #
672
- # @see http://www.sphinxsearch.com/docs/current.html#matching-modes Section 4.1, "Matching modes"
673
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setmatchmode Section 6.3.1, "SetMatchMode"
674
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setrankingmode Section 6.3.2, "SetRankingMode"
675
- #
676
- def set_ranking_mode(ranker)
677
- case ranker
678
- when String, Symbol
679
- begin
680
- ranker = self.class.const_get("SPH_RANK_#{ranker.to_s.upcase}")
681
- rescue NameError
682
- raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid"
683
- end
684
- when Fixnum
685
- raise ArgumentError, "\"ranker\" argument value \"#{ranker}\" is invalid" unless (SPH_RANK_PROXIMITY_BM25..SPH_RANK_FIELDMASK).include?(ranker)
686
- else
687
- raise ArgumentError, '"ranker" argument must be Fixnum, String, or Symbol'
688
- end
311
+
312
+ # Set ranking mode.
313
+ def SetRankingMode(ranker)
314
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
315
+ || ranker == SPH_RANK_BM25 \
316
+ || ranker == SPH_RANK_NONE \
317
+ || ranker == SPH_RANK_WORDCOUNT \
318
+ || ranker == SPH_RANK_PROXIMITY \
319
+ || ranker == SPH_RANK_MATCHANY \
320
+ || ranker == SPH_RANK_FIELDMASK \
321
+ || ranker == SPH_RANK_SPH04 }
689
322
 
690
323
  @ranker = ranker
691
- self
692
324
  end
693
- alias :SetRankingMode :set_ranking_mode
694
-
325
+
695
326
  # Set matches sorting mode.
696
- #
697
- # You can specify sorting mode as String ("relevance", "attr_desc", etc),
698
- # Symbol (:relevance, :attr_desc, etc), or
699
- # Fixnum constant (SPH_SORT_RELEVANCE, SPH_SORT_ATTR_DESC, etc).
700
- #
701
- # @param [Integer, String, Symbol] mode matches sorting mode.
702
- # @param [String] sortby sorting clause, with the syntax depending on
703
- # specific mode. Should be specified unless sorting mode is
704
- # +SPH_SORT_RELEVANCE+.
705
- # @return [Sphinx::Client] self.
706
- #
707
- # @example
708
- # sphinx.set_sort_mode(Sphinx::SPH_SORT_ATTR_ASC, 'attr')
709
- # sphinx.set_sort_mode(:attr_asc, 'attr')
710
- # sphinx.set_sort_mode('attr_asc', 'attr')
711
- #
712
- # @raise [ArgumentError] Occurred when parameters are invalid.
713
- #
714
- # @see http://www.sphinxsearch.com/docs/current.html#sorting-modes Section 4.5, "Sorting modes"
715
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setsortmode Section 6.3.3, "SetSortMode"
716
- #
717
- def set_sort_mode(mode, sortby = '')
718
- case mode
719
- when String, Symbol
720
- begin
721
- mode = self.class.const_get("SPH_SORT_#{mode.to_s.upcase}")
722
- rescue NameError
723
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid"
724
- end
725
- when Fixnum
726
- raise ArgumentError, "\"mode\" argument value \"#{mode}\" is invalid" unless (SPH_SORT_RELEVANCE..SPH_SORT_EXPR).include?(mode)
727
- else
728
- raise ArgumentError, '"mode" argument must be Fixnum, String, or Symbol'
729
- end
730
-
731
- raise ArgumentError, '"sortby" argument must be String' unless sortby.kind_of?(String)
732
- raise ArgumentError, '"sortby" should not be empty unless mode is SPH_SORT_RELEVANCE' unless mode == SPH_SORT_RELEVANCE or !sortby.empty?
327
+ def SetSortMode(mode, sortby = '')
328
+ assert { mode == SPH_SORT_RELEVANCE \
329
+ || mode == SPH_SORT_ATTR_DESC \
330
+ || mode == SPH_SORT_ATTR_ASC \
331
+ || mode == SPH_SORT_TIME_SEGMENTS \
332
+ || mode == SPH_SORT_EXTENDED \
333
+ || mode == SPH_SORT_EXPR }
334
+ assert { sortby.instance_of? String }
335
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
733
336
 
734
337
  @sort = mode
735
338
  @sortby = sortby
736
- self
737
339
  end
738
- alias :SetSortMode :set_sort_mode
739
-
740
- # Binds per-field weights in the order of appearance in the index.
741
- #
742
- # @param [Array<Integer>] weights an +Array+ of integer per-field weights.
743
- # @return [Sphinx::Client] self.
744
- #
745
- # @example
746
- # sphinx.set_weights([1, 3, 5])
747
- #
748
- # @raise [ArgumentError] Occurred when parameters are invalid.
749
- #
750
- # @deprecated Use {#set_field_weights} instead.
751
- # @see #set_field_weights
340
+
341
+ # Bind per-field weights by order.
752
342
  #
753
- def set_weights(weights)
754
- raise ArgumentError, '"weights" argument must be Array' unless weights.kind_of?(Array)
343
+ # DEPRECATED; use SetFieldWeights() instead.
344
+ def SetWeights(weights)
345
+ assert { weights.instance_of? Array }
755
346
  weights.each do |weight|
756
- raise ArgumentError, '"weights" argument must be Array of integers' unless weight.kind_of?(Integer)
347
+ assert { weight.instance_of? Fixnum }
757
348
  end
758
349
 
759
350
  @weights = weights
760
- self
761
351
  end
762
- alias :SetWeights :set_weights
763
352
 
764
- # Binds per-field weights by name. Parameter must be a +Hash+
765
- # mapping string field names to integer weights.
766
- #
767
- # Match ranking can be affected by per-field weights. For instance,
768
- # see Section 4.4, "Weighting" for an explanation how phrase
769
- # proximity ranking is affected. This call lets you specify what
770
- # non-default weights to assign to different full-text fields.
771
- #
772
- # The weights must be positive 32-bit integers. The final weight
773
- # will be a 32-bit integer too. Default weight value is 1. Unknown
774
- # field names will be silently ignored.
775
- #
776
- # There is no enforced limit on the maximum weight value at the
777
- # moment. However, beware that if you set it too high you can
778
- # start hitting 32-bit wraparound issues. For instance, if
779
- # you set a weight of 10,000,000 and search in extended mode,
780
- # then maximum possible weight will be equal to 10 million (your
781
- # weight) by 1 thousand (internal BM25 scaling factor, see
782
- # Section 4.4, “Weighting”) by 1 or more (phrase proximity rank).
783
- # The result is at least 10 billion that does not fit in 32 bits
784
- # and will be wrapped around, producing unexpected results.
785
- #
786
- # @param [Hash] weights a +Hash+ mapping string field names to
787
- # integer weights.
788
- # @return [Sphinx::Client] self.
353
+ # Bind per-field weights by name.
789
354
  #
790
- # @example
791
- # sphinx.set_field_weights(:title => 20, :text => 10)
792
- #
793
- # @raise [ArgumentError] Occurred when parameters are invalid.
794
- #
795
- # @see http://www.sphinxsearch.com/docs/current.html#weighting Section 4.4, "Weighting"
796
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfieldweights Section 6.3.5, "SetFieldWeights"
797
- #
798
- def set_field_weights(weights)
799
- raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
355
+ # Takes string (field name) to integer name (field weight) hash as an argument.
356
+ # * Takes precedence over SetWeights().
357
+ # * Unknown names will be silently ignored.
358
+ # * Unbound fields will be silently given a weight of 1.
359
+ def SetFieldWeights(weights)
360
+ assert { weights.instance_of? Hash }
800
361
  weights.each do |name, weight|
801
- unless (name.kind_of?(String) or name.kind_of?(Symbol)) and weight.kind_of?(Integer)
802
- raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
803
- end
362
+ assert { name.instance_of? String }
363
+ assert { weight.instance_of? Fixnum }
804
364
  end
805
365
 
806
366
  @fieldweights = weights
807
- self
808
367
  end
809
- alias :SetFieldWeights :set_field_weights
810
-
811
- # Sets per-index weights, and enables weighted summing of match
812
- # weights across different indexes. Parameter must be a hash
813
- # (associative array) mapping string index names to integer
814
- # weights. Default is empty array that means to disable weighting
815
- # summing.
816
- #
817
- # When a match with the same document ID is found in several
818
- # different local indexes, by default Sphinx simply chooses the
819
- # match from the index specified last in the query. This is to
820
- # support searching through partially overlapping index partitions.
821
- #
822
- # However in some cases the indexes are not just partitions,
823
- # and you might want to sum the weights across the indexes
824
- # instead of picking one. {#set_index_weights} lets you do that.
825
- # With summing enabled, final match weight in result set will be
826
- # computed as a sum of match weight coming from the given index
827
- # multiplied by respective per-index weight specified in this
828
- # call. Ie. if the document 123 is found in index A with the
829
- # weight of 2, and also in index B with the weight of 3, and
830
- # you called {#set_index_weights} with <tt>{"A"=>100, "B"=>10}</tt>,
831
- # the final weight return to the client will be 2*100+3*10 = 230.
832
- #
833
- # @param [Hash] weights a +Hash+ mapping string index names to
834
- # integer weights.
835
- # @return [Sphinx::Client] self.
836
- #
837
- # @example
838
- # sphinx.set_field_weights(:fresh => 20, :archived => 10)
839
- #
840
- # @raise [ArgumentError] Occurred when parameters are invalid.
841
- #
842
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setindexweights Section 6.3.6, "SetIndexWeights"
843
- #
844
- def set_index_weights(weights)
845
- raise ArgumentError, '"weights" argument must be Hash' unless weights.kind_of?(Hash)
368
+
369
+ # Bind per-index weights by name.
370
+ def SetIndexWeights(weights)
371
+ assert { weights.instance_of? Hash }
846
372
  weights.each do |index, weight|
847
- unless (index.kind_of?(String) or index.kind_of?(Symbol)) and weight.kind_of?(Integer)
848
- raise ArgumentError, '"weights" argument must be Hash map of strings to integers'
849
- end
373
+ assert { index.instance_of? String }
374
+ assert { weight.instance_of? Fixnum }
850
375
  end
851
-
376
+
852
377
  @indexweights = weights
853
- self
854
378
  end
855
- alias :SetIndexWeights :set_index_weights
856
-
857
- #=================================================================
858
- # Result set filtering settings
859
- #=================================================================
860
-
861
- # Sets an accepted range of document IDs. Parameters must be integers.
862
- # Defaults are 0 and 0; that combination means to not limit by range.
863
- #
864
- # After this call, only those records that have document ID between
865
- # +min+ and +max+ (including IDs exactly equal to +min+ or +max+)
866
- # will be matched.
867
- #
868
- # @param [Integer] min min document ID.
869
- # @param [Integer] min max document ID.
870
- # @return [Sphinx::Client] self.
871
- #
872
- # @example
873
- # sphinx.set_id_range(10, 1000)
874
- #
875
- # @raise [ArgumentError] Occurred when parameters are invalid.
876
- #
877
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setidrange Section 6.4.1, "SetIDRange"
878
- #
879
- def set_id_range(min, max)
880
- raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
881
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
882
- raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
379
+
380
+ # Set IDs range to match.
381
+ #
382
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
383
+ def SetIDRange(min, max)
384
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
385
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
386
+ assert { min <= max }
883
387
 
884
388
  @min_id = min
885
389
  @max_id = max
886
- self
887
390
  end
888
- alias :SetIDRange :set_id_range
889
-
890
- # Adds new integer values set filter.
891
- #
892
- # On this call, additional new filter is added to the existing
893
- # list of filters. $attribute must be a string with attribute
894
- # name. +values+ must be a plain array containing integer
895
- # values. +exclude+ must be a boolean value; it controls
896
- # whether to accept the matching documents (default mode, when
897
- # +exclude+ is +false+) or reject them.
898
- #
899
- # Only those documents where +attribute+ column value stored in
900
- # the index matches any of the values from +values+ array will
901
- # be matched (or rejected, if +exclude+ is +true+).
902
- #
903
- # @param [String, Symbol] attribute an attribute name to filter by.
904
- # @param [Array<Integer>, Integer] values an +Array+ of integers or
905
- # single Integer with given attribute values.
906
- # @param [Boolean] exclude indicating whether documents with given attribute
907
- # matching specified values should be excluded from search results.
908
- # @return [Sphinx::Client] self.
909
- #
910
- # @example
911
- # sphinx.set_filter(:group_id, [10, 15, 20])
912
- # sphinx.set_filter(:group_id, [10, 15, 20], true)
913
- #
914
- # @raise [ArgumentError] Occurred when parameters are invalid.
915
- #
916
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilter Section 6.4.2, "SetFilter"
917
- # @see #set_filter_range
918
- # @see #set_filter_float_range
919
- #
920
- def set_filter(attribute, values, exclude = false)
921
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
922
- values = [values] if values.kind_of?(Integer)
923
- raise ArgumentError, '"values" argument must be Array' unless values.kind_of?(Array)
924
- raise ArgumentError, '"values" argument must be Array of Integers' unless values.all? { |v| v.kind_of?(Integer) }
925
- raise ArgumentError, '"exclude" argument must be Boolean' unless [TrueClass, FalseClass].include?(exclude.class)
926
-
927
- if values.any?
928
- @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute.to_s, 'exclude' => exclude, 'values' => values }
391
+
392
+ # Set values filter.
393
+ #
394
+ # Only match those records where <tt>attribute</tt> column values
395
+ # are in specified set.
396
+ def SetFilter(attribute, values, exclude = false)
397
+ assert { attribute.instance_of? String }
398
+ assert { values.instance_of? Array }
399
+ assert { !values.empty? }
400
+
401
+ if values.instance_of?(Array) && values.size > 0
402
+ values.each do |value|
403
+ assert { value.instance_of? Fixnum }
404
+ end
405
+
406
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
929
407
  end
930
- self
931
408
  end
932
- alias :SetFilter :set_filter
933
-
934
- # Adds new integer range filter.
935
- #
936
- # On this call, additional new filter is added to the existing
937
- # list of filters. +attribute+ must be a string with attribute
938
- # name. +min+ and +max+ must be integers that define the acceptable
939
- # attribute values range (including the boundaries). +exclude+
940
- # must be a boolean value; it controls whether to accept the
941
- # matching documents (default mode, when +exclude+ is false) or
942
- # reject them.
943
- #
944
- # Only those documents where +attribute+ column value stored
945
- # in the index is between +min+ and +max+ (including values
946
- # that are exactly equal to +min+ or +max+) will be matched
947
- # (or rejected, if +exclude+ is true).
948
- #
949
- # @param [String, Symbol] attribute an attribute name to filter by.
950
- # @param [Integer] min min value of the given attribute.
951
- # @param [Integer] max max value of the given attribute.
952
- # @param [Boolean] exclude indicating whether documents with given attribute
953
- # matching specified boundaries should be excluded from search results.
954
- # @return [Sphinx::Client] self.
955
- #
956
- # @example
957
- # sphinx.set_filter_range(:group_id, 10, 20)
958
- # sphinx.set_filter_range(:group_id, 10, 20, true)
959
- #
960
- # @raise [ArgumentError] Occurred when parameters are invalid.
961
- #
962
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterrange Section 6.4.3, "SetFilterRange"
963
- # @see #set_filter
964
- # @see #set_filter_float_range
965
- #
966
- def set_filter_range(attribute, min, max, exclude = false)
967
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
968
- raise ArgumentError, '"min" argument must be Integer' unless min.kind_of?(Integer)
969
- raise ArgumentError, '"max" argument must be Integer' unless max.kind_of?(Integer)
970
- raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
971
- raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
972
-
973
- @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min, 'max' => max }
974
- self
409
+
410
+ # Set range filter.
411
+ #
412
+ # Only match those records where <tt>attribute</tt> column value
413
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
414
+ def SetFilterRange(attribute, min, max, exclude = false)
415
+ assert { attribute.instance_of? String }
416
+ assert { min.instance_of? Fixnum or min.instance_of? Bignum }
417
+ assert { max.instance_of? Fixnum or max.instance_of? Bignum }
418
+ assert { min <= max }
419
+
420
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
975
421
  end
976
- alias :SetFilterRange :set_filter_range
977
-
978
- # Adds new float range filter.
979
- #
980
- # On this call, additional new filter is added to the existing
981
- # list of filters. +attribute+ must be a string with attribute name.
982
- # +min+ and +max+ must be floats that define the acceptable
983
- # attribute values range (including the boundaries). +exclude+ must
984
- # be a boolean value; it controls whether to accept the matching
985
- # documents (default mode, when +exclude+ is false) or reject them.
986
- #
987
- # Only those documents where +attribute+ column value stored in
988
- # the index is between +min+ and +max+ (including values that are
989
- # exactly equal to +min+ or +max+) will be matched (or rejected,
990
- # if +exclude+ is true).
991
- #
992
- # @param [String, Symbol] attribute an attribute name to filter by.
993
- # @param [Numeric] min min value of the given attribute.
994
- # @param [Numeric] max max value of the given attribute.
995
- # @param [Boolean] exclude indicating whether documents with given attribute
996
- # matching specified boundaries should be excluded from search results.
997
- # @return [Sphinx::Client] self.
998
- #
999
- # @example
1000
- # sphinx.set_filter_float_range(:group_id, 10.5, 20)
1001
- # sphinx.set_filter_float_range(:group_id, 10.5, 20, true)
1002
- #
1003
- # @raise [ArgumentError] Occurred when parameters are invalid.
1004
- #
1005
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setfilterfloatrange Section 6.4.4, "SetFilterFloatRange"
1006
- # @see #set_filter
1007
- # @see #set_filter_range
1008
- #
1009
- def set_filter_float_range(attribute, min, max, exclude = false)
1010
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1011
- raise ArgumentError, '"min" argument must be Numeric' unless min.kind_of?(Numeric)
1012
- raise ArgumentError, '"max" argument must be Numeric' unless max.kind_of?(Numeric)
1013
- raise ArgumentError, '"max" argument greater or equal to "min"' unless min <= max
1014
- raise ArgumentError, '"exclude" argument must be Boolean' unless exclude.kind_of?(TrueClass) or exclude.kind_of?(FalseClass)
1015
-
1016
- @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute.to_s, 'exclude' => exclude, 'min' => min.to_f, 'max' => max.to_f }
1017
- self
422
+
423
+ # Set float range filter.
424
+ #
425
+ # Only match those records where <tt>attribute</tt> column value
426
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
427
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
428
+ assert { attribute.instance_of? String }
429
+ assert { min.instance_of? Float }
430
+ assert { max.instance_of? Float }
431
+ assert { min <= max }
432
+
433
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
1018
434
  end
1019
- alias :SetFilterFloatRange :set_filter_float_range
1020
-
1021
- # Sets anchor point for and geosphere distance (geodistance)
1022
- # calculations, and enable them.
1023
- #
1024
- # +attrlat+ and +attrlong+ must be strings that contain the names
1025
- # of latitude and longitude attributes, respectively. +lat+ and
1026
- # +long+ are floats that specify anchor point latitude and
1027
- # longitude, in radians.
1028
- #
1029
- # Once an anchor point is set, you can use magic <tt>"@geodist"</tt>
1030
- # attribute name in your filters and/or sorting expressions.
1031
- # Sphinx will compute geosphere distance between the given anchor
1032
- # point and a point specified by latitude and lognitude attributes
1033
- # from each full-text match, and attach this value to the resulting
1034
- # match. The latitude and longitude values both in {#set_geo_anchor}
1035
- # and the index attribute data are expected to be in radians.
1036
- # The result will be returned in meters, so geodistance value of
1037
- # 1000.0 means 1 km. 1 mile is approximately 1609.344 meters.
1038
- #
1039
- # @param [String, Symbol] attrlat a name of latitude attribute.
1040
- # @param [String, Symbol] attrlong a name of longitude attribute.
1041
- # @param [Numeric] lat an anchor point latitude, in radians.
1042
- # @param [Numeric] long an anchor point longitude, in radians.
1043
- # @return [Sphinx::Client] self.
1044
- #
1045
- # @example
1046
- # sphinx.set_geo_anchor(:latitude, :longitude, 192.5, 143.5)
1047
- #
1048
- # @raise [ArgumentError] Occurred when parameters are invalid.
1049
- #
1050
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgeoanchor Section 6.4.5, "SetGeoAnchor"
1051
- #
1052
- def set_geo_anchor(attrlat, attrlong, lat, long)
1053
- raise ArgumentError, '"attrlat" argument must be String or Symbol' unless attrlat.kind_of?(String) or attrlat.kind_of?(Symbol)
1054
- raise ArgumentError, '"attrlong" argument must be String or Symbol' unless attrlong.kind_of?(String) or attrlong.kind_of?(Symbol)
1055
- raise ArgumentError, '"lat" argument must be Numeric' unless lat.kind_of?(Numeric)
1056
- raise ArgumentError, '"long" argument must be Numeric' unless long.kind_of?(Numeric)
1057
-
1058
- @anchor = { 'attrlat' => attrlat.to_s, 'attrlong' => attrlong.to_s, 'lat' => lat.to_f, 'long' => long.to_f }
1059
- self
435
+
436
+ # Setup anchor point for geosphere distance calculations.
437
+ #
438
+ # Required to use <tt>@geodist</tt> in filters and sorting
439
+ # distance will be computed to this point. Latitude and longitude
440
+ # must be in radians.
441
+ #
442
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
443
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
444
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
445
+ # * <tt>long</tt> -- is anchor point longitude, in radians
446
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
447
+ assert { attrlat.instance_of? String }
448
+ assert { attrlong.instance_of? String }
449
+ assert { lat.instance_of? Float }
450
+ assert { long.instance_of? Float }
451
+
452
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
1060
453
  end
1061
- alias :SetGeoAnchor :set_geo_anchor
1062
-
1063
- #=================================================================
1064
- # GROUP BY settings
1065
- #=================================================================
1066
-
1067
- # Sets grouping attribute, function, and groups sorting mode; and
1068
- # enables grouping (as described in Section 4.6, "Grouping (clustering) search results").
1069
- #
1070
- # +attribute+ is a string that contains group-by attribute name.
1071
- # +func+ is a constant that chooses a function applied to the
1072
- # attribute value in order to compute group-by key. +groupsort+
1073
- # is a clause that controls how the groups will be sorted. Its
1074
- # syntax is similar to that described in Section 4.5,
1075
- # "SPH_SORT_EXTENDED mode".
1076
- #
1077
- # Grouping feature is very similar in nature to <tt>GROUP BY</tt> clause
1078
- # from SQL. Results produces by this function call are going to
1079
- # be the same as produced by the following pseudo code:
1080
- #
1081
- # SELECT ... GROUP BY func(attribute) ORDER BY groupsort
1082
- #
1083
- # Note that it's +groupsort+ that affects the order of matches in
1084
- # the final result set. Sorting mode (see {#set_sort_mode}) affect
1085
- # the ordering of matches within group, ie. what match will be
1086
- # selected as the best one from the group. So you can for instance
1087
- # order the groups by matches count and select the most relevant
1088
- # match within each group at the same time.
1089
- #
1090
- # Starting with version 0.9.9-rc2, aggregate functions (<tt>AVG()</tt>,
1091
- # <tt>MIN()</tt>, <tt>MAX()</tt>, <tt>SUM()</tt>) are supported
1092
- # through {#set_select} API call when using <tt>GROUP BY</tt>.
1093
- #
1094
- # You can specify group function and attribute as String
1095
- # ("attr", "day", etc), Symbol (:attr, :day, etc), or
1096
- # Fixnum constant (SPH_GROUPBY_ATTR, SPH_GROUPBY_DAY, etc).
1097
- #
1098
- # @param [String, Symbol] attribute an attribute name to group by.
1099
- # @param [Integer, String, Symbol] func a grouping function.
1100
- # @param [String] groupsort a groups sorting mode.
1101
- # @return [Sphinx::Client] self.
1102
- #
1103
- # @example
1104
- # sphinx.set_group_by(:tag_id, :attr)
1105
- #
1106
- # @raise [ArgumentError] Occurred when parameters are invalid.
1107
- #
1108
- # @see http://www.sphinxsearch.com/docs/current.html#clustering Section 4.6, "Grouping (clustering) search results"
1109
- # @see http://www.sphinxsearch.com/docs/current.html#sort-extended Section 4.5, "SPH_SORT_EXTENDED mode"
1110
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupby Section 6.5.1, "SetGroupBy"
1111
- # @see #set_sort_mode
1112
- # @see #set_select
1113
- # @see #set_group_distinct
1114
- #
1115
- def set_group_by(attribute, func, groupsort = '@group desc')
1116
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
1117
- raise ArgumentError, '"groupsort" argument must be String' unless groupsort.kind_of?(String)
1118
-
1119
- case func
1120
- when String, Symbol
1121
- begin
1122
- func = self.class.const_get("SPH_GROUPBY_#{func.to_s.upcase}")
1123
- rescue NameError
1124
- raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid"
1125
- end
1126
- when Fixnum
1127
- raise ArgumentError, "\"func\" argument value \"#{func}\" is invalid" unless (SPH_GROUPBY_DAY..SPH_GROUPBY_ATTRPAIR).include?(func)
1128
- else
1129
- raise ArgumentError, '"func" argument must be Fixnum, String, or Symbol'
1130
- end
1131
-
1132
- @groupby = attribute.to_s
454
+
455
+ # Set grouping attribute and function.
456
+ #
457
+ # In grouping mode, all matches are assigned to different groups
458
+ # based on grouping function value.
459
+ #
460
+ # Each group keeps track of the total match count, and the best match
461
+ # (in this group) according to current sorting function.
462
+ #
463
+ # The final result set contains one best match per group, with
464
+ # grouping function value and matches count attached.
465
+ #
466
+ # Groups in result set could be sorted by any sorting clause,
467
+ # including both document attributes and the following special
468
+ # internal Sphinx attributes:
469
+ #
470
+ # * @id - match document ID;
471
+ # * @weight, @rank, @relevance - match weight;
472
+ # * @group - groupby function value;
473
+ # * @count - amount of matches in group.
474
+ #
475
+ # the default mode is to sort by groupby value in descending order,
476
+ # ie. by '@group desc'.
477
+ #
478
+ # 'total_found' would contain total amount of matching groups over
479
+ # the whole index.
480
+ #
481
+ # WARNING: grouping is done in fixed memory and thus its results
482
+ # are only approximate; so there might be more groups reported
483
+ # in total_found than actually present. @count might also
484
+ # be underestimated.
485
+ #
486
+ # For example, if sorting by relevance and grouping by "published"
487
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
488
+ # contain one most relevant match per each day when there were any
489
+ # matches published, with day number and per-day match count attached,
490
+ # and sorted by day number in descending order (ie. recent days first).
491
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
492
+ assert { attribute.instance_of? String }
493
+ assert { groupsort.instance_of? String }
494
+ assert { func == SPH_GROUPBY_DAY \
495
+ || func == SPH_GROUPBY_WEEK \
496
+ || func == SPH_GROUPBY_MONTH \
497
+ || func == SPH_GROUPBY_YEAR \
498
+ || func == SPH_GROUPBY_ATTR \
499
+ || func == SPH_GROUPBY_ATTRPAIR }
500
+
501
+ @groupby = attribute
1133
502
  @groupfunc = func
1134
503
  @groupsort = groupsort
1135
- self
1136
504
  end
1137
- alias :SetGroupBy :set_group_by
1138
-
1139
- # Sets attribute name for per-group distinct values count
1140
- # calculations. Only available for grouping queries.
1141
- #
1142
- # +attribute+ is a string that contains the attribute name. For
1143
- # each group, all values of this attribute will be stored (as
1144
- # RAM limits permit), then the amount of distinct values will
1145
- # be calculated and returned to the client. This feature is
1146
- # similar to <tt>COUNT(DISTINCT)</tt> clause in standard SQL;
1147
- # so these Sphinx calls:
1148
- #
1149
- # sphinx.set_group_by(:category, :attr, '@count desc')
1150
- # sphinx.set_group_distinct(:vendor)
1151
- #
1152
- # can be expressed using the following SQL clauses:
1153
- #
1154
- # SELECT id, weight, all-attributes,
1155
- # COUNT(DISTINCT vendor) AS @distinct,
1156
- # COUNT(*) AS @count
1157
- # FROM products
1158
- # GROUP BY category
1159
- # ORDER BY @count DESC
1160
- #
1161
- # In the sample pseudo code shown just above, {#set_group_distinct}
1162
- # call corresponds to <tt>COUNT(DISINCT vendor)</tt> clause only.
1163
- # <tt>GROUP BY</tt>, <tt>ORDER BY</tt>, and <tt>COUNT(*)</tt>
1164
- # clauses are all an equivalent of {#set_group_by} settings. Both
1165
- # queries will return one matching row for each category. In
1166
- # addition to indexed attributes, matches will also contain
1167
- # total per-category matches count, and the count of distinct
1168
- # vendor IDs within each category.
1169
- #
1170
- # @param [String, Symbol] attribute an attribute name.
1171
- # @return [Sphinx::Client] self.
1172
- #
1173
- # @example
1174
- # sphinx.set_group_distinct(:category_id)
1175
- #
1176
- # @raise [ArgumentError] Occurred when parameters are invalid.
1177
- #
1178
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-setgroupdistinct Section 6.5.2, "SetGroupDistinct"
1179
- # @see #set_group_by
505
+
506
+ # Set count-distinct attribute for group-by queries.
507
+ def SetGroupDistinct(attribute)
508
+ assert { attribute.instance_of? String }
509
+ @groupdistinct = attribute
510
+ end
511
+
512
+ # Set distributed retries count and delay.
513
+ def SetRetries(count, delay = 0)
514
+ assert { count.instance_of? Fixnum }
515
+ assert { delay.instance_of? Fixnum }
516
+
517
+ @retrycount = count
518
+ @retrydelay = delay
519
+ end
520
+
521
+ # Set attribute values override
1180
522
  #
1181
- def set_group_distinct(attribute)
1182
- raise ArgumentError, '"attribute" argument must be String or Symbol' unless attribute.kind_of?(String) or attribute.kind_of?(Symbol)
523
+ # There can be only one override per attribute.
524
+ # +values+ must be a hash that maps document IDs to attribute values.
525
+ def SetOverride(attrname, attrtype, values)
526
+ assert { attrname.instance_of? String }
527
+ assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
528
+ assert { values.instance_of? Hash }
1183
529
 
1184
- @groupdistinct = attribute.to_s
1185
- self
530
+ @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
1186
531
  end
1187
- alias :SetGroupDistinct :set_group_distinct
1188
532
 
1189
- #=================================================================
1190
- # Querying
1191
- #=================================================================
1192
-
1193
- # Clears all currently set filters.
1194
- #
1195
- # This call is only normally required when using multi-queries. You might want
1196
- # to set different filters for different queries in the batch. To do that,
1197
- # you should call {#reset_filters} and add new filters using the respective calls.
1198
- #
1199
- # @return [Sphinx::Client] self.
1200
- #
1201
- # @example
1202
- # sphinx.reset_filters
1203
- #
1204
- # @see #set_filter
1205
- # @see #set_filter_range
1206
- # @see #set_filter_float_range
1207
- # @see #set_geo_anchor
1208
- #
1209
- def reset_filters
1210
- @filters = []
1211
- @anchor = []
1212
- self
533
+ # Set select-list (attributes or expressions), SQL-like syntax.
534
+ def SetSelect(select)
535
+ assert { select.instance_of? String }
536
+ @select = select
1213
537
  end
1214
- alias :ResetFilters :reset_filters
1215
-
1216
- # Clears all currently group-by settings, and disables group-by.
1217
- #
1218
- # This call is only normally required when using multi-queries. You can
1219
- # change individual group-by settings using {#set_group_by} and {#set_group_distinct}
1220
- # calls, but you can not disable group-by using those calls. {#reset_group_by}
1221
- # fully resets previous group-by settings and disables group-by mode in the
1222
- # current state, so that subsequent {#add_query} calls can perform non-grouping
1223
- # searches.
1224
- #
1225
- # @return [Sphinx::Client] self.
1226
- #
1227
- # @example
1228
- # sphinx.reset_group_by
1229
- #
1230
- # @see #set_group_by
1231
- # @see #set_group_distinct
1232
- #
1233
- def reset_group_by
538
+
539
+ # Clear all filters (for multi-queries).
540
+ def ResetFilters
541
+ @filters = []
542
+ @anchor = []
543
+ end
544
+
545
+ # Clear groupby settings (for multi-queries).
546
+ def ResetGroupBy
1234
547
  @groupby = ''
1235
548
  @groupfunc = SPH_GROUPBY_DAY
1236
549
  @groupsort = '@group desc'
1237
550
  @groupdistinct = ''
1238
- self
1239
551
  end
1240
- alias :ResetGroupBy :reset_group_by
1241
-
552
+
1242
553
  # Clear all attribute value overrides (for multi-queries).
1243
- #
1244
- # This call is only normally required when using multi-queries. You might want
1245
- # to set field overrides for different queries in the batch. To do that,
1246
- # you should call {#reset_overrides} and add new overrides using the
1247
- # respective calls.
1248
- #
1249
- # @return [Sphinx::Client] self.
1250
- #
1251
- # @example
1252
- # sphinx.reset_overrides
1253
- #
1254
- # @see #set_override
1255
- #
1256
- def reset_overrides
554
+ def ResetOverrides
1257
555
  @overrides = []
1258
- self
1259
556
  end
1260
- alias :ResetOverrides :reset_overrides
1261
-
1262
- # Connects to searchd server, runs given search query with
1263
- # current settings, obtains and returns the result set.
1264
- #
1265
- # +query+ is a query string. +index+ is an index name (or names)
1266
- # string. Returns false and sets {#last_error} message on general
1267
- # error. Returns search result set on success. Additionally,
1268
- # the contents of +comment+ are sent to the query log, marked in
1269
- # square brackets, just before the search terms, which can be very
1270
- # useful for debugging. Currently, the comment is limited to 128
1271
- # characters.
1272
- #
1273
- # Default value for +index+ is <tt>"*"</tt> that means to query
1274
- # all local indexes. Characters allowed in index names include
1275
- # Latin letters (a-z), numbers (0-9), minus sign (-), and
1276
- # underscore (_); everything else is considered a separator.
1277
- # Therefore, all of the following samples calls are valid and
1278
- # will search the same two indexes:
1279
- #
1280
- # sphinx.query('test query', 'main delta')
1281
- # sphinx.query('test query', 'main;delta')
1282
- # sphinx.query('test query', 'main, delta');
1283
- #
1284
- # Index specification order matters. If document with identical
1285
- # IDs are found in two or more indexes, weight and attribute
1286
- # values from the very last matching index will be used for
1287
- # sorting and returning to client (unless explicitly overridden
1288
- # with {#set_index_weights}). Therefore, in the example above,
1289
- # matches from "delta" index will always win over matches
1290
- # from "main".
1291
- #
1292
- # On success, {#query} returns a result set that contains some
1293
- # of the found matches (as requested by {#set_limits}) and
1294
- # additional general per-query statistics. The result set
1295
- # is an +Hash+ with the following keys and values:
1296
- #
1297
- # <tt>"matches"</tt>::
1298
- # Array with small +Hash+es containing document weight and
1299
- # attribute values.
1300
- # <tt>"total"</tt>::
1301
- # Total amount of matches retrieved on server (ie. to the server
1302
- # side result set) by this query. You can retrieve up to this
1303
- # amount of matches from server for this query text with current
1304
- # query settings.
1305
- # <tt>"total_found"</tt>::
1306
- # Total amount of matching documents in index (that were found
1307
- # and procesed on server).
1308
- # <tt>"words"</tt>::
1309
- # Hash which maps query keywords (case-folded, stemmed, and
1310
- # otherwise processed) to a small Hash with per-keyword statitics
1311
- # ("docs", "hits").
1312
- # <tt>"error"</tt>::
1313
- # Query error message reported by searchd (string, human readable).
1314
- # Empty if there were no errors.
1315
- # <tt>"warning"</tt>::
1316
- # Query warning message reported by searchd (string, human readable).
1317
- # Empty if there were no warnings.
1318
- #
1319
- # Please note: you can use both strings and symbols as <tt>Hash</tt> keys.
1320
- #
1321
- # It should be noted that {#query} carries out the same actions as
1322
- # {#add_query} and {#run_queries} without the intermediate steps; it
1323
- # is analoguous to a single {#add_query} call, followed by a
1324
- # corresponding {#run_queries}, then returning the first array
1325
- # element of matches (from the first, and only, query.)
1326
- #
1327
- # @param [String] query a query string.
1328
- # @param [String] index an index name (or names).
1329
- # @param [String] comment a comment to be sent to the query log.
1330
- # @return [Hash, false] result set described above or +false+ on error.
1331
- # @yield [Client] yields just before query performing. Useful to set
1332
- # filters or sortings. When block does not accept any parameters, it
1333
- # will be eval'ed inside {Client} instance itself. In this case you
1334
- # can omit +set_+ prefix for configuration methods.
1335
- # @yieldparam [Client] sphinx self.
1336
- #
1337
- # @example Regular query with previously set filters
1338
- # sphinx.query('some search text', '*', 'search page')
1339
- # @example Query with block
1340
- # sphinx.query('test') do |sphinx|
1341
- # sphinx.set_match_mode :all
1342
- # sphinx.set_id_range 10, 100
1343
- # end
1344
- # @example Query with instant filters configuring
1345
- # sphinx.query('test') do
1346
- # match_mode :all
1347
- # id_range 10, 100
1348
- # end
1349
- #
1350
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-query Section 6.6.1, "Query"
1351
- # @see #add_query
1352
- # @see #run_queries
1353
- #
1354
- def query(query, index = '*', comment = '', &block)
557
+
558
+ # Connect to searchd server and run given search query.
559
+ #
560
+ # <tt>query</tt> is query string
561
+
562
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
563
+ # to query all indexes. Accepted characters for index names are letters, numbers,
564
+ # dash, and underscore; everything else is considered a separator. Therefore,
565
+ # all the following calls are valid and will search two indexes:
566
+ #
567
+ # sphinx.Query('test query', 'main delta')
568
+ # sphinx.Query('test query', 'main;delta')
569
+ # sphinx.Query('test query', 'main, delta')
570
+ #
571
+ # Index order matters. If identical IDs are found in two or more indexes,
572
+ # weight and attribute values from the very last matching index will be used
573
+ # for sorting and returning to client. Therefore, in the example above,
574
+ # matches from "delta" index will always "win" over matches from "main".
575
+ #
576
+ # Returns false on failure.
577
+ # Returns hash which has the following keys on success:
578
+ #
579
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
580
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
581
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
582
+ # * <tt>'time'</tt> -- search time
583
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
584
+ def Query(query, index = '*', comment = '')
585
+ assert { @reqs.empty? }
1355
586
  @reqs = []
1356
-
1357
- if block_given?
1358
- if block.arity > 0
1359
- yield self
1360
- else
1361
- begin
1362
- @inside_eval = true
1363
- instance_eval(&block)
1364
- ensure
1365
- @inside_eval = false
1366
- end
1367
- end
1368
- end
1369
-
1370
- logger.debug { "[sphinx] query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if logger
1371
-
1372
- self.add_query(query, index, comment, false)
1373
- results = self.run_queries
1374
-
587
+
588
+ self.AddQuery(query, index, comment)
589
+ results = self.RunQueries
590
+
1375
591
  # probably network error; error message should be already filled
1376
592
  return false unless results.instance_of?(Array)
1377
-
593
+
1378
594
  @error = results[0]['error']
1379
595
  @warning = results[0]['warning']
1380
-
596
+
1381
597
  return false if results[0]['status'] == SEARCHD_ERROR
1382
598
  return results[0]
1383
599
  end
1384
- alias :Query :query
1385
-
1386
- # Adds additional query with current settings to multi-query batch.
1387
- # +query+ is a query string. +index+ is an index name (or names)
1388
- # string. Additionally if provided, the contents of +comment+ are
1389
- # sent to the query log, marked in square brackets, just before
1390
- # the search terms, which can be very useful for debugging.
1391
- # Currently, this is limited to 128 characters. Returns index
1392
- # to results array returned from {#run_queries}.
1393
- #
1394
- # Batch queries (or multi-queries) enable searchd to perform
1395
- # internal optimizations if possible. They also reduce network
1396
- # connection overheads and search process creation overheads in all
1397
- # cases. They do not result in any additional overheads compared
1398
- # to simple queries. Thus, if you run several different queries
1399
- # from your web page, you should always consider using multi-queries.
1400
- #
1401
- # For instance, running the same full-text query but with different
1402
- # sorting or group-by settings will enable searchd to perform
1403
- # expensive full-text search and ranking operation only once, but
1404
- # compute multiple group-by results from its output.
1405
- #
1406
- # This can be a big saver when you need to display not just plain
1407
- # search results but also some per-category counts, such as the
1408
- # amount of products grouped by vendor. Without multi-query, you
1409
- # would have to run several queries which perform essentially the
1410
- # same search and retrieve the same matches, but create result
1411
- # sets differently. With multi-query, you simply pass all these
1412
- # queries in a single batch and Sphinx optimizes the redundant
1413
- # full-text search internally.
1414
- #
1415
- # {#add_query} internally saves full current settings state along
1416
- # with the query, and you can safely change them afterwards for
1417
- # subsequent {#add_query} calls. Already added queries will not
1418
- # be affected; there's actually no way to change them at all.
1419
- # Here's an example:
1420
- #
1421
- # sphinx.set_sort_mode(:relevance)
1422
- # sphinx.add_query("hello world", "documents")
1423
- #
1424
- # sphinx.set_sort_mode(:attr_desc, :price)
1425
- # sphinx.add_query("ipod", "products")
1426
- #
1427
- # sphinx.add_query("harry potter", "books")
1428
- #
1429
- # results = sphinx.run_queries
1430
- #
1431
- # With the code above, 1st query will search for "hello world"
1432
- # in "documents" index and sort results by relevance, 2nd query
1433
- # will search for "ipod" in "products" index and sort results
1434
- # by price, and 3rd query will search for "harry potter" in
1435
- # "books" index while still sorting by price. Note that 2nd
1436
- # {#set_sort_mode} call does not affect the first query (because
1437
- # it's already added) but affects both other subsequent queries.
1438
- #
1439
- # Additionally, any filters set up before an {#add_query} will
1440
- # fall through to subsequent queries. So, if {#set_filter} is
1441
- # called before the first query, the same filter will be in
1442
- # place for the second (and subsequent) queries batched through
1443
- # {#add_query} unless you call {#reset_filters} first. Alternatively,
1444
- # you can add additional filters as well.
1445
- #
1446
- # This would also be true for grouping options and sorting options;
1447
- # no current sorting, filtering, and grouping settings are affected
1448
- # by this call; so subsequent queries will reuse current query settings.
1449
- #
1450
- # {#add_query} returns an index into an array of results that will
1451
- # be returned from {#run_queries} call. It is simply a sequentially
1452
- # increasing 0-based integer, ie. first call will return 0, second
1453
- # will return 1, and so on. Just a small helper so you won't have
1454
- # to track the indexes manualy if you need then.
1455
- #
1456
- # @param [String] query a query string.
1457
- # @param [String] index an index name (or names).
1458
- # @param [String] comment a comment to be sent to the query log.
1459
- # @param [Boolean] log indicating whether this call should be logged.
1460
- # @return [Integer] an index into an array of results that will
1461
- # be returned from {#run_queries} call.
600
+
601
+ # Add query to batch.
1462
602
  #
1463
- # @example
1464
- # sphinx.add_query('some search text', '*', 'search page')
603
+ # Batch queries enable searchd to perform internal optimizations,
604
+ # if possible; and reduce network connection overheads in all cases.
1465
605
  #
1466
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-addquery Section 6.6.2, "AddQuery"
1467
- # @see #query
1468
- # @see #run_queries
606
+ # For instance, running exactly the same query with different
607
+ # groupby settings will enable searched to perform expensive
608
+ # full-text search and ranking operation only once, but compute
609
+ # multiple groupby results from its output.
1469
610
  #
1470
- def add_query(query, index = '*', comment = '', log = true)
1471
- logger.debug { "[sphinx] add_query('#{query}', '#{index}', '#{comment}'), #{self.inspect}" } if log and logger
611
+ # Parameters are exactly the same as in <tt>Query</tt> call.
612
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
613
+ def AddQuery(query, index = '*', comment = '')
1472
614
  # build request
1473
-
615
+
1474
616
  # mode and limits
1475
617
  request = Request.new
1476
618
  request.put_int @offset, @limit, @mode, @ranker, @sort
@@ -1484,8 +626,8 @@ module Sphinx
1484
626
  # id64 range marker
1485
627
  request.put_int 1
1486
628
  # id64 range
1487
- request.put_int64 @min_id.to_i, @max_id.to_i
1488
-
629
+ request.put_int64 @min_id.to_i, @max_id.to_i
630
+
1489
631
  # filters
1490
632
  request.put_int @filters.length
1491
633
  @filters.each do |filter|
@@ -1504,7 +646,7 @@ module Sphinx
1504
646
  end
1505
647
  request.put_int filter['exclude'] ? 1 : 0
1506
648
  end
1507
-
649
+
1508
650
  # group-by clause, max-matches count, group-sort clause, cutoff count
1509
651
  request.put_int @groupfunc
1510
652
  request.put_string @groupby
@@ -1512,7 +654,7 @@ module Sphinx
1512
654
  request.put_string @groupsort
1513
655
  request.put_int @cutoff, @retrycount, @retrydelay
1514
656
  request.put_string @groupdistinct
1515
-
657
+
1516
658
  # anchor point
1517
659
  if @anchor.empty?
1518
660
  request.put_int 0
@@ -1521,328 +663,283 @@ module Sphinx
1521
663
  request.put_string @anchor['attrlat'], @anchor['attrlong']
1522
664
  request.put_float @anchor['lat'], @anchor['long']
1523
665
  end
1524
-
666
+
1525
667
  # per-index weights
1526
668
  request.put_int @indexweights.length
1527
669
  @indexweights.each do |idx, weight|
1528
- request.put_string idx.to_s
670
+ request.put_string idx
1529
671
  request.put_int weight
1530
672
  end
1531
-
673
+
1532
674
  # max query time
1533
675
  request.put_int @maxquerytime
1534
-
676
+
1535
677
  # per-field weights
1536
678
  request.put_int @fieldweights.length
1537
679
  @fieldweights.each do |field, weight|
1538
- request.put_string field.to_s
680
+ request.put_string field
1539
681
  request.put_int weight
1540
682
  end
1541
-
683
+
1542
684
  # comment
1543
685
  request.put_string comment
1544
-
686
+
1545
687
  # attribute overrides
1546
688
  request.put_int @overrides.length
1547
689
  for entry in @overrides do
1548
690
  request.put_string entry['attr']
1549
691
  request.put_int entry['type'], entry['values'].size
1550
692
  entry['values'].each do |id, val|
693
+ assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
694
+ assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
695
+
1551
696
  request.put_int64 id
1552
697
  case entry['type']
1553
698
  when SPH_ATTR_FLOAT
1554
- request.put_float val.to_f
699
+ request.put_float val
1555
700
  when SPH_ATTR_BIGINT
1556
- request.put_int64 val.to_i
701
+ request.put_int64 val
1557
702
  else
1558
- request.put_int val.to_i
703
+ request.put_int val
1559
704
  end
1560
705
  end
1561
706
  end
1562
-
707
+
1563
708
  # select-list
1564
709
  request.put_string @select
1565
-
710
+
1566
711
  # store request to requests array
1567
712
  @reqs << request.to_s;
1568
713
  return @reqs.length - 1
1569
714
  end
1570
- alias :AddQuery :add_query
1571
-
1572
- # Connect to searchd, runs a batch of all queries added using
1573
- # {#add_query}, obtains and returns the result sets. Returns
1574
- # +false+ and sets {#last_error} message on general error
1575
- # (such as network I/O failure). Returns a plain array of
1576
- # result sets on success.
1577
- #
1578
- # Each result set in the returned array is exactly the same as
1579
- # the result set returned from {#query}.
1580
- #
1581
- # Note that the batch query request itself almost always succeds —
1582
- # unless there's a network error, blocking index rotation in
1583
- # progress, or another general failure which prevents the whole
1584
- # request from being processed.
1585
- #
1586
- # However individual queries within the batch might very well
1587
- # fail. In this case their respective result sets will contain
1588
- # non-empty "error" message, but no matches or query statistics.
1589
- # In the extreme case all queries within the batch could fail.
1590
- # There still will be no general error reported, because API
1591
- # was able to succesfully connect to searchd, submit the batch,
1592
- # and receive the results — but every result set will have a
1593
- # specific error message.
1594
- #
1595
- # @return [Array<Hash>] an +Array+ of +Hash+es which are exactly
1596
- # the same as the result set returned from {#query}.
715
+
716
+ # Run queries batch.
1597
717
  #
1598
- # @example
1599
- # sphinx.add_query('some search text', '*', 'search page')
1600
- # results = sphinx.run_queries
718
+ # Returns an array of result sets on success.
719
+ # Returns false on network IO failure.
1601
720
  #
1602
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-runqueries Section 6.6.3, "RunQueries"
1603
- # @see #add_query
721
+ # Each result set in returned array is a hash which containts
722
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
1604
723
  #
1605
- def run_queries
1606
- logger.debug { "[sphinx] run_queries(#{@reqs.length} queries)" } if logger
724
+ # * <tt>'error'</tt> -- search error for this query
725
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
726
+ def RunQueries
1607
727
  if @reqs.empty?
1608
- @error = 'No queries defined, issue add_query() first'
728
+ @error = 'No queries defined, issue AddQuery() first'
1609
729
  return false
1610
730
  end
1611
731
 
1612
- reqs, nreqs = @reqs.join(''), @reqs.length
732
+ req = @reqs.join('')
733
+ nreqs = @reqs.length
1613
734
  @reqs = []
1614
- response = perform_request(:search, reqs, nreqs)
1615
-
735
+ response = PerformRequest(:search, req, nreqs)
736
+
1616
737
  # parse response
1617
- (1..nreqs).map do
1618
- result = HashWithIndifferentAccess.new('error' => '', 'warning' => '')
1619
-
1620
- # extract status
1621
- status = result['status'] = response.get_int
1622
- if status != SEARCHD_OK
1623
- message = response.get_string
1624
- if status == SEARCHD_WARNING
1625
- result['warning'] = message
1626
- else
1627
- result['error'] = message
1628
- next result
738
+ begin
739
+ results = []
740
+ ires = 0
741
+ while ires < nreqs
742
+ ires += 1
743
+ result = {}
744
+
745
+ result['error'] = ''
746
+ result['warning'] = ''
747
+
748
+ # extract status
749
+ status = result['status'] = response.get_int
750
+ if status != SEARCHD_OK
751
+ message = response.get_string
752
+ if status == SEARCHD_WARNING
753
+ result['warning'] = message
754
+ else
755
+ result['error'] = message
756
+ results << result
757
+ next
758
+ end
1629
759
  end
1630
- end
1631
-
1632
- # read schema
1633
- nfields = response.get_int
1634
- result['fields'] = (1..nfields).map { response.get_string }
1635
-
1636
- attrs_names_in_order = []
1637
- nattrs = response.get_int
1638
- attrs = (1..nattrs).inject({}) do |hash, idx|
1639
- name, type = response.get_string, response.get_int
1640
- hash[name] = type
1641
- attrs_names_in_order << name
1642
- hash
1643
- end
1644
- result['attrs'] = attrs
1645
-
1646
- # read match count
1647
- count, id64 = response.get_ints(2)
1648
-
1649
- # read matches
1650
- result['matches'] = (1..count).map do
1651
- doc, weight = if id64 == 0
1652
- response.get_ints(2)
1653
- else
1654
- [response.get_int64, response.get_int]
760
+
761
+ # read schema
762
+ fields = []
763
+ attrs = {}
764
+ attrs_names_in_order = []
765
+
766
+ nfields = response.get_int
767
+ while nfields > 0
768
+ nfields -= 1
769
+ fields << response.get_string
1655
770
  end
1656
-
1657
- # This is a single result put in the result['matches'] array
1658
- match = { 'id' => doc, 'weight' => weight }
1659
- match['attrs'] = attrs_names_in_order.inject({}) do |hash, name|
1660
- hash[name] = case attrs[name]
1661
- when SPH_ATTR_BIGINT
1662
- # handle 64-bit ints
1663
- response.get_int64
1664
- when SPH_ATTR_FLOAT
1665
- # handle floats
1666
- response.get_float
1667
- else
1668
- # handle everything else as unsigned ints
1669
- val = response.get_int
1670
- if (attrs[name] & SPH_ATTR_MULTI) != 0
1671
- (1..val).map { response.get_int }
771
+ result['fields'] = fields
772
+
773
+ nattrs = response.get_int
774
+ while nattrs > 0
775
+ nattrs -= 1
776
+ attr = response.get_string
777
+ type = response.get_int
778
+ attrs[attr] = type
779
+ attrs_names_in_order << attr
780
+ end
781
+ result['attrs'] = attrs
782
+
783
+ # read match count
784
+ count = response.get_int
785
+ id64 = response.get_int
786
+
787
+ # read matches
788
+ result['matches'] = []
789
+ while count > 0
790
+ count -= 1
791
+
792
+ if id64 != 0
793
+ doc = response.get_int64
794
+ weight = response.get_int
795
+ else
796
+ doc, weight = response.get_ints(2)
797
+ end
798
+
799
+ r = {} # This is a single result put in the result['matches'] array
800
+ r['id'] = doc
801
+ r['weight'] = weight
802
+ attrs_names_in_order.each do |a|
803
+ r['attrs'] ||= {}
804
+
805
+ case attrs[a]
806
+ when SPH_ATTR_BIGINT
807
+ # handle 64-bit ints
808
+ r['attrs'][a] = response.get_int64
809
+ when SPH_ATTR_FLOAT
810
+ # handle floats
811
+ r['attrs'][a] = response.get_float
812
+ when SPH_ATTR_STRING
813
+ r['attrs'][a] = response.get_string
1672
814
  else
1673
- val
1674
- end
815
+ # handle everything else as unsigned ints
816
+ val = response.get_int
817
+ if (attrs[a] & SPH_ATTR_MULTI) != 0
818
+ r['attrs'][a] = []
819
+ 1.upto(val) do
820
+ r['attrs'][a] << response.get_int
821
+ end
822
+ else
823
+ r['attrs'][a] = val
824
+ end
825
+ end
1675
826
  end
1676
- hash
827
+ result['matches'] << r
1677
828
  end
1678
- match
1679
- end
1680
- result['total'], result['total_found'], msecs = response.get_ints(3)
1681
- result['time'] = '%.3f' % (msecs / 1000.0)
1682
-
1683
- nwords = response.get_int
1684
- result['words'] = (1..nwords).inject({}) do |hash, idx|
1685
- word = response.get_string
1686
- docs, hits = response.get_ints(2)
1687
- hash[word] = { 'docs' => docs, 'hits' => hits }
1688
- hash
829
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
830
+ result['time'] = '%.3f' % (msecs / 1000.0)
831
+
832
+ result['words'] = {}
833
+ while words > 0
834
+ words -= 1
835
+ word = response.get_string
836
+ docs, hits = response.get_ints(2)
837
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
838
+ end
839
+
840
+ results << result
1689
841
  end
1690
-
1691
- result
842
+ #rescue EOFError
843
+ # @error = 'incomplete reply'
844
+ # raise SphinxResponseError, @error
1692
845
  end
846
+
847
+ return results
1693
848
  end
1694
- alias :RunQueries :run_queries
1695
-
1696
- #=================================================================
1697
- # Additional functionality
1698
- #=================================================================
1699
-
1700
- # Excerpts (snippets) builder function. Connects to searchd, asks
1701
- # it to generate excerpts (snippets) from given documents, and
1702
- # returns the results.
1703
- #
1704
- # +docs+ is a plain array of strings that carry the documents'
1705
- # contents. +index+ is an index name string. Different settings
1706
- # (such as charset, morphology, wordforms) from given index will
1707
- # be used. +words+ is a string that contains the keywords to
1708
- # highlight. They will be processed with respect to index settings.
1709
- # For instance, if English stemming is enabled in the index,
1710
- # "shoes" will be highlighted even if keyword is "shoe". Starting
1711
- # with version 0.9.9-rc1, keywords can contain wildcards, that
1712
- # work similarly to star-syntax available in queries.
1713
- #
1714
- # @param [Array<String>] docs an array of strings which represent
1715
- # the documents' contents.
1716
- # @param [String] index an index which settings will be used for
1717
- # stemming, lexing and case folding.
1718
- # @param [String] words a string which contains the words to highlight.
1719
- # @param [Hash] opts a +Hash+ which contains additional optional
1720
- # highlighting parameters.
1721
- # @option opts [String] 'before_match' ("<b>") a string to insert before a
1722
- # keyword match.
1723
- # @option opts [String] 'after_match' ("</b>") a string to insert after a
1724
- # keyword match.
1725
- # @option opts [String] 'chunk_separator' (" ... ") a string to insert
1726
- # between snippet chunks (passages).
1727
- # @option opts [Integer] 'limit' (256) maximum snippet size, in symbols
1728
- # (codepoints).
1729
- # @option opts [Integer] 'around' (5) how many words to pick around
1730
- # each matching keywords block.
1731
- # @option opts [Boolean] 'exact_phrase' (false) whether to highlight exact
1732
- # query phrase matches only instead of individual keywords.
1733
- # @option opts [Boolean] 'single_passage' (false) whether to extract single
1734
- # best passage only.
1735
- # @option opts [Boolean] 'use_boundaries' (false) whether to extract
1736
- # passages by phrase boundaries setup in tokenizer.
1737
- # @option opts [Boolean] 'weight_order' (false) whether to sort the
1738
- # extracted passages in order of relevance (decreasing weight),
1739
- # or in order of appearance in the document (increasing position).
1740
- # @return [Array<String>, false] a plain array of strings with
1741
- # excerpts (snippets) on success; otherwise, +false+.
1742
- #
1743
- # @raise [ArgumentError] Occurred when parameters are invalid.
1744
- #
1745
- # @example
1746
- # sphinx.build_excerpts(['hello world', 'hello me'], 'idx', 'hello')
1747
- #
1748
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildexcerpts Section 6.7.1, "BuildExcerpts"
1749
- #
1750
- def build_excerpts(docs, index, words, opts = {})
1751
- raise ArgumentError, '"docs" argument must be Array' unless docs.kind_of?(Array)
1752
- raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1753
- raise ArgumentError, '"words" argument must be String' unless words.kind_of?(String)
1754
- raise ArgumentError, '"opts" argument must be Hash' unless opts.kind_of?(Hash)
1755
-
1756
- docs.each do |doc|
1757
- raise ArgumentError, '"docs" argument must be Array of Strings' unless doc.kind_of?(String)
1758
- end
849
+
850
+ # Connect to searchd server and generate exceprts from given documents.
851
+ #
852
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
853
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
854
+ # for stemming, lexing and case folding
855
+ # * <tt>words</tt> -- a string which contains the words to highlight
856
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
857
+ #
858
+ # You can use following parameters:
859
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
860
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
861
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
862
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
863
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
864
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
865
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
866
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
867
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
868
+ #
869
+ # Returns false on failure.
870
+ # Returns an array of string excerpts on success.
871
+ def BuildExcerpts(docs, index, words, opts = {})
872
+ assert { docs.instance_of? Array }
873
+ assert { index.instance_of? String }
874
+ assert { words.instance_of? String }
875
+ assert { opts.instance_of? Hash }
1759
876
 
1760
877
  # fixup options
1761
- opts = HashWithIndifferentAccess.new(
1762
- 'before_match' => '<b>',
1763
- 'after_match' => '</b>',
1764
- 'chunk_separator' => ' ... ',
1765
- 'limit' => 256,
1766
- 'around' => 5,
1767
- 'exact_phrase' => false,
1768
- 'single_passage' => false,
1769
- 'use_boundaries' => false,
1770
- 'weight_order' => false
1771
- ).update(opts)
1772
-
878
+ opts['before_match'] ||= '<b>';
879
+ opts['after_match'] ||= '</b>';
880
+ opts['chunk_separator'] ||= ' ... ';
881
+ opts['limit'] ||= 256;
882
+ opts['around'] ||= 5;
883
+ opts['exact_phrase'] ||= false
884
+ opts['single_passage'] ||= false
885
+ opts['use_boundaries'] ||= false
886
+ opts['weight_order'] ||= false
887
+ opts['query_mode'] ||= false
888
+
1773
889
  # build request
1774
-
890
+
1775
891
  # v.1.0 req
1776
892
  flags = 1
1777
893
  flags |= 2 if opts['exact_phrase']
1778
894
  flags |= 4 if opts['single_passage']
1779
895
  flags |= 8 if opts['use_boundaries']
1780
896
  flags |= 16 if opts['weight_order']
1781
-
897
+ flags |= 32 if opts['query_mode']
898
+
1782
899
  request = Request.new
1783
900
  request.put_int 0, flags # mode=0, flags=1 (remove spaces)
1784
901
  # req index
1785
- request.put_string index.to_s
902
+ request.put_string index
1786
903
  # req words
1787
904
  request.put_string words
1788
-
905
+
1789
906
  # options
1790
907
  request.put_string opts['before_match']
1791
908
  request.put_string opts['after_match']
1792
909
  request.put_string opts['chunk_separator']
1793
910
  request.put_int opts['limit'].to_i, opts['around'].to_i
1794
-
911
+
1795
912
  # documents
1796
913
  request.put_int docs.size
1797
- request.put_string(*docs)
1798
-
1799
- response = perform_request(:excerpt, request)
914
+ docs.each do |doc|
915
+ assert { doc.instance_of? String }
1800
916
 
917
+ request.put_string doc
918
+ end
919
+
920
+ response = PerformRequest(:excerpt, request)
921
+
1801
922
  # parse response
1802
- docs.map { response.get_string }
923
+ begin
924
+ res = []
925
+ docs.each do |doc|
926
+ res << response.get_string
927
+ end
928
+ rescue EOFError
929
+ @error = 'incomplete reply'
930
+ raise SphinxResponseError, @error
931
+ end
932
+ return res
1803
933
  end
1804
- alias :BuildExcerpts :build_excerpts
1805
-
1806
- # Extracts keywords from query using tokenizer settings for given
1807
- # index, optionally with per-keyword occurrence statistics.
1808
- # Returns an array of hashes with per-keyword information.
1809
- #
1810
- # +query+ is a query to extract keywords from. +index+ is a name of
1811
- # the index to get tokenizing settings and keyword occurrence
1812
- # statistics from. +hits+ is a boolean flag that indicates whether
1813
- # keyword occurrence statistics are required.
1814
- #
1815
- # The result set consists of +Hash+es with the following keys and values:
1816
- #
1817
- # <tt>'tokenized'</tt>::
1818
- # Tokenized keyword.
1819
- # <tt>'normalized'</tt>::
1820
- # Normalized keyword.
1821
- # <tt>'docs'</tt>::
1822
- # A number of documents where keyword is found (if +hits+ param is +true+).
1823
- # <tt>'hits'</tt>::
1824
- # A number of keywords occurrences among all documents (if +hits+ param is +true+).
1825
- #
1826
- # @param [String] query a query string.
1827
- # @param [String] index an index to get tokenizing settings and
1828
- # keyword occurrence statistics from.
1829
- # @param [Boolean] hits indicates whether keyword occurrence
1830
- # statistics are required.
1831
- # @return [Array<Hash>] an +Array+ of +Hash+es in format specified
1832
- # above.
1833
- #
1834
- # @raise [ArgumentError] Occurred when parameters are invalid.
1835
- #
1836
- # @example
1837
- # keywords = sphinx.build_keywords("this.is.my query", "test1", false)
1838
- #
1839
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-buildkeywords Section 6.7.3, "BuildKeywords"
1840
- #
1841
- def build_keywords(query, index, hits)
1842
- raise ArgumentError, '"query" argument must be String' unless query.kind_of?(String)
1843
- raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1844
- raise ArgumentError, '"hits" argument must be Boolean' unless hits.kind_of?(TrueClass) or hits.kind_of?(FalseClass)
1845
-
934
+
935
+ # Connect to searchd server, and generate keyword list for a given query.
936
+ #
937
+ # Returns an array of words on success.
938
+ def BuildKeywords(query, index, hits)
939
+ assert { query.instance_of? String }
940
+ assert { index.instance_of? String }
941
+ assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
942
+
1846
943
  # build request
1847
944
  request = Request.new
1848
945
  # v.1.0 req
@@ -1850,107 +947,77 @@ module Sphinx
1850
947
  request.put_string index # req index
1851
948
  request.put_int hits ? 1 : 0
1852
949
 
1853
- response = perform_request(:keywords, request)
1854
-
950
+ response = PerformRequest(:keywords, request)
951
+
1855
952
  # parse response
1856
- nwords = response.get_int
1857
- (0...nwords).map do
1858
- tokenized = response.get_string
1859
- normalized = response.get_string
1860
-
1861
- entry = HashWithIndifferentAccess.new('tokenized' => tokenized, 'normalized' => normalized)
1862
- entry['docs'], entry['hits'] = response.get_ints(2) if hits
1863
-
1864
- entry
953
+ begin
954
+ res = []
955
+ nwords = response.get_int
956
+ 0.upto(nwords - 1) do |i|
957
+ tokenized = response.get_string
958
+ normalized = response.get_string
959
+
960
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
961
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
962
+
963
+ res << entry
964
+ end
965
+ rescue EOFError
966
+ @error = 'incomplete reply'
967
+ raise SphinxResponseError, @error
1865
968
  end
969
+
970
+ return res
1866
971
  end
1867
- alias :BuildKeywords :build_keywords
1868
972
 
1869
- # Instantly updates given attribute values in given documents.
1870
- # Returns number of actually updated documents (0 or more) on
1871
- # success, or -1 on failure.
1872
- #
1873
- # +index+ is a name of the index (or indexes) to be updated.
1874
- # +attrs+ is a plain array with string attribute names, listing
1875
- # attributes that are updated. +values+ is a Hash where key is
1876
- # document ID, and value is a plain array of new attribute values.
1877
- #
1878
- # +index+ can be either a single index name or a list, like in
1879
- # {#query}. Unlike {#query}, wildcard is not allowed and all the
1880
- # indexes to update must be specified explicitly. The list of
1881
- # indexes can include distributed index names. Updates on
1882
- # distributed indexes will be pushed to all agents.
1883
- #
1884
- # The updates only work with docinfo=extern storage strategy.
1885
- # They are very fast because they're working fully in RAM, but
1886
- # they can also be made persistent: updates are saved on disk
1887
- # on clean searchd shutdown initiated by SIGTERM signal. With
1888
- # additional restrictions, updates are also possible on MVA
1889
- # attributes; refer to mva_updates_pool directive for details.
973
+ # Batch update given attributes in given rows in given indexes.
1890
974
  #
1891
- # The first sample statement will update document 1 in index
1892
- # "test1", setting "group_id" to 456. The second one will update
1893
- # documents 1001, 1002 and 1003 in index "products". For document
1894
- # 1001, the new price will be set to 123 and the new amount in
1895
- # stock to 5; for document 1002, the new price will be 37 and the
1896
- # new amount will be 11; etc. The third one updates document 1
1897
- # in index "test2", setting MVA attribute "group_id" to [456, 789].
975
+ # * +index+ is a name of the index to be updated
976
+ # * +attrs+ is an array of attribute name strings.
977
+ # * +values+ is a hash where key is document id, and value is an array of
978
+ # * +mva+ identifies whether update MVA
979
+ # new attribute values
1898
980
  #
1899
- # @example
1900
- # sphinx.update_attributes("test1", ["group_id"], { 1 => [456] });
1901
- # sphinx.update_attributes("products", ["price", "amount_in_stock"],
1902
- # { 1001 => [123, 5], 1002 => [37, 11], 1003 => [25, 129] });
1903
- # sphinx.update_attributes('test2', ['group_id'], { 1 => [[456, 789]] }, true)
981
+ # Returns number of actually updated documents (0 or more) on success.
982
+ # Returns -1 on failure.
1904
983
  #
1905
- # @param [String] index a name of the index to be updated.
1906
- # @param [Array<String>] attrs an array of attribute name strings.
1907
- # @param [Hash] values is a hash where key is document id, and
1908
- # value is an array of new attribute values.
1909
- # @param [Boolean] mva indicating whether to update MVA.
1910
- # @return [Integer] number of actually updated documents (0 or more) on success,
1911
- # -1 on failure.
1912
- #
1913
- # @raise [ArgumentError] Occurred when parameters are invalid.
1914
- #
1915
- # @see http://www.sphinxsearch.com/docs/current.html#api-func-updateatttributes Section 6.7.2, "UpdateAttributes"
1916
- #
1917
- def update_attributes(index, attrs, values, mva = false)
984
+ # Usage example:
985
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
986
+ def UpdateAttributes(index, attrs, values, mva = false)
1918
987
  # verify everything
1919
- raise ArgumentError, '"index" argument must be String' unless index.kind_of?(String) or index.kind_of?(Symbol)
1920
- raise ArgumentError, '"mva" argument must be Boolean' unless mva.kind_of?(TrueClass) or mva.kind_of?(FalseClass)
1921
-
1922
- raise ArgumentError, '"attrs" argument must be Array' unless attrs.kind_of?(Array)
988
+ assert { index.instance_of? String }
989
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
990
+
991
+ assert { attrs.instance_of? Array }
1923
992
  attrs.each do |attr|
1924
- raise ArgumentError, '"attrs" argument must be Array of Strings' unless attr.kind_of?(String) or attr.kind_of?(Symbol)
993
+ assert { attr.instance_of? String }
1925
994
  end
1926
-
1927
- raise ArgumentError, '"values" argument must be Hash' unless values.kind_of?(Hash)
995
+
996
+ assert { values.instance_of? Hash }
1928
997
  values.each do |id, entry|
1929
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless id.kind_of?(Integer)
1930
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array' unless entry.kind_of?(Array)
1931
- raise ArgumentError, "\"values\" argument Hash values Array must have #{attrs.length} elements" unless entry.length == attrs.length
998
+ assert { id.instance_of? Fixnum }
999
+ assert { entry.instance_of? Array }
1000
+ assert { entry.length == attrs.length }
1932
1001
  entry.each do |v|
1933
1002
  if mva
1934
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays' unless v.kind_of?(Array)
1935
- v.each do |vv|
1936
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Arrays of Integers' unless vv.kind_of?(Integer)
1937
- end
1003
+ assert { v.instance_of? Array }
1004
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
1938
1005
  else
1939
- raise ArgumentError, '"values" argument must be Hash map of Integer to Array of Integers' unless v.kind_of?(Integer)
1006
+ assert { v.instance_of? Fixnum }
1940
1007
  end
1941
1008
  end
1942
1009
  end
1943
-
1010
+
1944
1011
  # build request
1945
1012
  request = Request.new
1946
1013
  request.put_string index
1947
-
1014
+
1948
1015
  request.put_int attrs.length
1949
1016
  for attr in attrs
1950
1017
  request.put_string attr
1951
1018
  request.put_int mva ? 1 : 0
1952
1019
  end
1953
-
1020
+
1954
1021
  request.put_int values.length
1955
1022
  values.each do |id, entry|
1956
1023
  request.put_int64 id
@@ -1960,266 +1027,152 @@ module Sphinx
1960
1027
  request.put_int(*entry)
1961
1028
  end
1962
1029
  end
1963
-
1964
- response = perform_request(:update, request)
1965
-
1966
- # parse response
1967
- response.get_int
1968
- end
1969
- alias :UpdateAttributes :update_attributes
1970
-
1971
- # Escapes characters that are treated as special operators by the
1972
- # query language parser.
1973
- #
1974
- # This function might seem redundant because it's trivial to
1975
- # implement in any calling application. However, as the set of
1976
- # special characters might change over time, it makes sense to
1977
- # have an API call that is guaranteed to escape all such
1978
- # characters at all times.
1979
- #
1980
- # @param [String] string is a string to escape.
1981
- # @return [String] an escaped string.
1982
- #
1983
- # @example:
1984
- # escaped = sphinx.escape_string "escaping-sample@query/string"
1985
- #
1986
- def escape_string(string)
1987
- string.to_s.gsub(/([\\()|\-!@~"&\/\^\$=])/, '\\\\\\1')
1988
- end
1989
- alias :EscapeString :escape_string
1990
-
1991
- # Queries searchd status, and returns an array of status variable name
1992
- # and value pairs.
1993
- #
1994
- # @return [Array<Array>, Array<Hash>] a table containing searchd status information.
1995
- # If there are more than one server configured ({#set_servers}), an
1996
- # +Array+ of +Hash+es will be returned, one for each server. Hash will
1997
- # contain <tt>:server</tt> element with string name of server (<tt>host:port</tt>)
1998
- # and <tt>:status</tt> table just like one for a single server. In case of
1999
- # any error, it will be stored in the <tt>:error</tt> key.
2000
- #
2001
- # @example Single server
2002
- # status = sphinx.status
2003
- # puts status.map { |key, value| "#{key.rjust(20)}: #{value}" }
2004
- #
2005
- # @example Multiple servers
2006
- # sphinx.set_servers([
2007
- # { :host => 'localhost' },
2008
- # { :host => 'browse02.local' }
2009
- # ])
2010
- # sphinx.status.each do |report|
2011
- # puts "=== #{report[:server]}"
2012
- # if report[:error]
2013
- # puts "Error: #{report[:error]}"
2014
- # else
2015
- # puts report[:status].map { |key, value| "#{key.rjust(20)}: #{value}" }
2016
- # end
2017
- # end
2018
- #
2019
- def status
2020
- request = Request.new
2021
- request.put_int(1)
2022
-
1030
+
1031
+ response = PerformRequest(:update, request)
1032
+
2023
1033
  # parse response
2024
- results = @servers.map do |server|
2025
- begin
2026
- response = perform_request(:status, request, nil, server)
2027
- rows, cols = response.get_ints(2)
2028
- status = (0...rows).map do
2029
- (0...cols).map { response.get_string }
2030
- end
2031
- HashWithIndifferentAccess.new(:server => server.to_s, :status => status)
2032
- rescue SphinxError
2033
- # Re-raise error when a single server configured
2034
- raise if @servers.size == 1
2035
- HashWithIndifferentAccess.new(:server => server.to_s, :error => self.last_error)
2036
- end
1034
+ begin
1035
+ return response.get_int
1036
+ rescue EOFError
1037
+ @error = 'incomplete reply'
1038
+ raise SphinxResponseError, @error
2037
1039
  end
2038
-
2039
- @servers.size > 1 ? results : results.first[:status]
2040
1040
  end
2041
- alias :Status :status
2042
-
2043
- #=================================================================
2044
- # Persistent connections
2045
- #=================================================================
2046
-
2047
- # Opens persistent connection to the server.
2048
- #
2049
- # This method could be used only when a single searchd server
2050
- # configured.
2051
- #
2052
- # @return [Boolean] +true+ when persistent connection has been
2053
- # established; otherwise, +false+.
2054
- #
2055
- # @example
2056
- # begin
2057
- # sphinx.open
2058
- # # perform several requests
2059
- # ensure
2060
- # sphinx.close
2061
- # end
2062
- #
2063
- # @see #close
2064
- #
2065
- def open
2066
- if @servers.size > 1
2067
- @error = 'too many servers. persistent socket allowed only for a single server.'
2068
- return false
2069
- end
2070
-
2071
- if @servers.first.persistent?
1041
+
1042
+ # persistent connections
1043
+
1044
+ def Open
1045
+ unless @socket === false
2072
1046
  @error = 'already connected'
2073
- return false;
1047
+ return false
2074
1048
  end
2075
-
1049
+
2076
1050
  request = Request.new
2077
1051
  request.put_int(1)
2078
-
2079
- perform_request(:persist, request, nil) do |server, socket|
2080
- server.make_persistent!(socket)
2081
- end
1052
+ @socket = PerformRequest(:persist, request, nil, true)
2082
1053
 
2083
1054
  true
2084
1055
  end
2085
- alias :Open :open
2086
-
2087
- # Closes previously opened persistent connection.
2088
- #
2089
- # This method could be used only when a single searchd server
2090
- # configured.
2091
- #
2092
- # @return [Boolean] +true+ when persistent connection has been
2093
- # closed; otherwise, +false+.
2094
- #
2095
- # @example
2096
- # begin
2097
- # sphinx.open
2098
- # # perform several requests
2099
- # ensure
2100
- # sphinx.close
2101
- # end
2102
- #
2103
- # @see #open
2104
- #
2105
- def close
2106
- if @servers.size > 1
2107
- @error = 'too many servers. persistent socket allowed only for a single server.'
2108
- return false
2109
- end
2110
-
2111
- unless @servers.first.persistent?
1056
+
1057
+ def Close
1058
+ if @socket === false
2112
1059
  @error = 'not connected'
2113
1060
  return false;
2114
1061
  end
1062
+
1063
+ @socket.close
1064
+ @socket = false
1065
+
1066
+ true
1067
+ end
1068
+
1069
+ def Status
1070
+ request = Request.new
1071
+ request.put_int(1)
1072
+ response = PerformRequest(:status, request)
2115
1073
 
2116
- @servers.first.close_persistent!
1074
+ # parse response
1075
+ begin
1076
+ rows, cols = response.get_ints(2)
1077
+
1078
+ res = []
1079
+ 0.upto(rows - 1) do |i|
1080
+ res[i] = []
1081
+ 0.upto(cols - 1) do |j|
1082
+ res[i] << response.get_string
1083
+ end
1084
+ end
1085
+ rescue EOFError
1086
+ @error = 'incomplete reply'
1087
+ raise SphinxResponseError, @error
1088
+ end
1089
+
1090
+ res
2117
1091
  end
2118
- alias :Close :close
1092
+
1093
+ def FlushAttrs
1094
+ request = Request.new
1095
+ response = PerformRequest(:flushattrs, request)
2119
1096
 
1097
+ # parse response
1098
+ begin
1099
+ response.get_int
1100
+ rescue EOFError
1101
+ -1
1102
+ end
1103
+ end
1104
+
2120
1105
  protected
2121
-
2122
- # Connect, send query, get response.
2123
- #
2124
- # Use this method to communicate with Sphinx server. It ensures connection
2125
- # will be instantiated properly, all headers will be generated properly, etc.
2126
- #
2127
- # @param [Symbol, String] command searchd command to perform (<tt>:search</tt>, <tt>:excerpt</tt>,
2128
- # <tt>:update</tt>, <tt>:keywords</tt>, <tt>:persist</tt>, <tt>:status</tt>,
2129
- # <tt>:query</tt>, <tt>:flushattrs</tt>. See <tt>SEARCHD_COMMAND_*</tt> for details).
2130
- # @param [Sphinx::Request] request contains request body.
2131
- # @param [Integer] additional additional integer data to be placed between header and body.
2132
- # @param [Sphinx::Server] server where perform request on. This is special
2133
- # parameter for internal usage. If specified, request will be performed
2134
- # on specified server, and it will try to establish connection to this
2135
- # server only once.
2136
- #
2137
- # @yield if block given, response will not be parsed, plain socket
2138
- # will be yielded instead. This is special mode used for
2139
- # persistent connections, do not use for other tasks.
2140
- # @yieldparam [Sphinx::Server] server a server where request was performed on.
2141
- # @yieldparam [Sphinx::BufferedIO] socket a socket used to perform the request.
2142
- # @return [Sphinx::Response] contains response body.
2143
- #
2144
- # @see #parse_response
2145
- #
2146
- def perform_request(command, request, additional = nil, server = nil)
2147
- if server
2148
- attempts = 1
2149
- else
2150
- server = case request
2151
- when String
2152
- Zlib.crc32(request)
2153
- when Request
2154
- request.crc32
2155
- else
2156
- raise ArgumentError, "request argument must be String or Sphinx::Request"
1106
+
1107
+ # Connect to searchd server.
1108
+ def Connect
1109
+ return @socket unless @socket === false
1110
+
1111
+ begin
1112
+ if @path
1113
+ sock = UNIXSocket.new(@path)
1114
+ else
1115
+ sock = TCPSocket.new(@host, @port)
1116
+ end
1117
+ rescue => e
1118
+ location = @path || "#{@host}:#{@port}"
1119
+ @error = "connection to #{location} failed ("
1120
+ if e.kind_of?(SystemCallError)
1121
+ @error << "errno=#{e.class::Errno}, "
2157
1122
  end
2158
- attempts = nil
1123
+ @error << "msg=#{e.message})"
1124
+ @connerror = true
1125
+ raise SphinxConnectError, @error
2159
1126
  end
2160
1127
 
2161
- with_server(server, attempts) do |server|
2162
- logger.info { "[sphinx] #{command} on server #{server}" } if logger
2163
-
2164
- cmd = command.to_s.upcase
2165
- command_id = Sphinx::Client.const_get("SEARCHD_COMMAND_#{cmd}")
2166
- command_ver = Sphinx::Client.const_get("VER_COMMAND_#{cmd}")
2167
-
2168
- with_socket(server) do |socket|
2169
- len = request.to_s.length + (additional.nil? ? 0 : 4)
2170
- header = [command_id, command_ver, len].pack('nnN')
2171
- header << [additional].pack('N') unless additional.nil?
2172
-
2173
- socket.write(header + request.to_s)
2174
-
2175
- if block_given?
2176
- yield server, socket
2177
- else
2178
- parse_response(socket, command_ver)
2179
- end
2180
- end
1128
+ # send my version
1129
+ # this is a subtle part. we must do it before (!) reading back from searchd.
1130
+ # because otherwise under some conditions (reported on FreeBSD for instance)
1131
+ # TCP stack could throttle write-write-read pattern because of Nagle.
1132
+ sock.send([1].pack('N'), 0)
1133
+
1134
+ v = sock.recv(4).unpack('N*').first
1135
+ if v < 1
1136
+ sock.close
1137
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
1138
+ raise SphinxConnectError, @error
2181
1139
  end
1140
+
1141
+ sock
2182
1142
  end
2183
-
2184
- # This is internal method which gets and parses response packet from
2185
- # searchd server.
2186
- #
2187
- # There are several exceptions which could be thrown in this method:
2188
- #
2189
- # @param [Sphinx::BufferedIO] socket an input stream object.
2190
- # @param [Integer] client_version a command version which client supports.
2191
- # @return [Sphinx::Response] could be used for context-based
2192
- # parsing of reply from the server.
2193
- #
2194
- # @raise [SystemCallError, SocketError] should be handled by caller (see {#with_socket}).
2195
- # @raise [SphinxResponseError] incomplete reply from searchd.
2196
- # @raise [SphinxInternalError] searchd internal error.
2197
- # @raise [SphinxTemporaryError] searchd temporary error.
2198
- # @raise [SphinxUnknownError] searchd unknown error.
2199
- #
2200
- # @see #with_socket
2201
- # @private
2202
- #
2203
- def parse_response(socket, client_version)
1143
+
1144
+ # Get and check response packet from searchd server.
1145
+ def GetResponse(sock, client_version)
2204
1146
  response = ''
2205
- status = ver = len = 0
2206
-
2207
- # Read server reply from server. All exceptions are handled by {#with_socket}.
2208
- header = socket.read(8)
1147
+ len = 0
1148
+
1149
+ header = sock.recv(8)
2209
1150
  if header.length == 8
2210
1151
  status, ver, len = header.unpack('n2N')
2211
- response = socket.read(len) if len > 0
1152
+ left = len.to_i
1153
+ while left > 0 do
1154
+ begin
1155
+ chunk = sock.recv(left)
1156
+ if chunk
1157
+ response << chunk
1158
+ left -= chunk.length
1159
+ end
1160
+ rescue EOFError
1161
+ break
1162
+ end
1163
+ end
2212
1164
  end
2213
-
1165
+ sock.close if @socket === false
1166
+
2214
1167
  # check response
2215
1168
  read = response.length
2216
1169
  if response.empty? or read != len.to_i
2217
- error = len > 0 \
1170
+ @error = len \
2218
1171
  ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
2219
1172
  : 'received zero-sized searchd response'
2220
- raise SphinxResponseError, error
1173
+ raise SphinxResponseError, @error
2221
1174
  end
2222
-
1175
+
2223
1176
  # check status
2224
1177
  if (status == SEARCHD_WARNING)
2225
1178
  wlen = response[0, 4].unpack('N*').first
@@ -2228,199 +1181,50 @@ module Sphinx
2228
1181
  end
2229
1182
 
2230
1183
  if status == SEARCHD_ERROR
2231
- error = 'searchd error: ' + response[4, response.length - 4]
2232
- raise SphinxInternalError, error
1184
+ @error = 'searchd error: ' + response[4, response.length - 4]
1185
+ raise SphinxInternalError, @error
2233
1186
  end
2234
-
1187
+
2235
1188
  if status == SEARCHD_RETRY
2236
- error = 'temporary searchd error: ' + response[4, response.length - 4]
2237
- raise SphinxTemporaryError, error
1189
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1190
+ raise SphinxTemporaryError, @error
2238
1191
  end
2239
-
1192
+
2240
1193
  unless status == SEARCHD_OK
2241
- error = "unknown status code: '#{status}'"
2242
- raise SphinxUnknownError, error
1194
+ @error = "unknown status code: '#{status}'"
1195
+ raise SphinxUnknownError, @error
2243
1196
  end
2244
-
1197
+
2245
1198
  # check version
2246
1199
  if ver < client_version
2247
1200
  @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
2248
1201
  "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
2249
1202
  end
2250
-
2251
- Response.new(response)
2252
- end
2253
-
2254
- # This is internal method which selects next server (round-robin)
2255
- # and yields it to the block passed.
2256
- #
2257
- # In case of connection error, it will try next server several times
2258
- # (see {#set_connect_timeout} method details). If all servers are down,
2259
- # it will set error attribute (could be retrieved with {#last_error}
2260
- # method) with the last exception message, and {#connect_error?}
2261
- # method will return true. Also, {SphinxConnectError} exception
2262
- # will be raised.
2263
- #
2264
- # @overload with_server(server_index)
2265
- # Get the server based on some seed value (usually CRC32 of
2266
- # request. In this case initial server will be choosed using
2267
- # this seed value, in case of connetion failure next server
2268
- # in servers list will be used).
2269
- # @param [Integer] server_index server index, must be any
2270
- # integer value (not necessarily less than number of servers.)
2271
- # @param [Integer] attempts how many retries to perform. Use
2272
- # +nil+ to perform retries configured with {#set_connect_timeout}.
2273
- # @overload with_server(server)
2274
- # Get the server specified as a parameter. If specified, request
2275
- # will be performed on specified server, and it will try to
2276
- # establish connection to this server only once.
2277
- # @param [Server] server server to perform request on.
2278
- # @param [Integer] attempts how many retries to perform. Use
2279
- # +nil+ to perform retries configured with {#set_connect_timeout}.
2280
- #
2281
- # @yield a block which performs request on a given server.
2282
- # @yieldparam [Sphinx::Server] server contains information
2283
- # about the server to perform request on.
2284
- # @raise [SphinxConnectError] on any connection error.
2285
- #
2286
- def with_server(server = nil, attempts = nil)
2287
- case server
2288
- when Server
2289
- idx = @servers.index(server) || 0
2290
- s = server
2291
- when Integer
2292
- idx = server % @servers.size
2293
- s = @servers[idx]
2294
- when NilClass
2295
- idx = 0
2296
- s = @servers[idx]
2297
- else
2298
- raise ArgumentError, 'server argument must be Integer or Sphinx::Server'
2299
- end
2300
- attempts ||= @retries
2301
- begin
2302
- yield s
2303
- rescue SphinxConnectError => e
2304
- logger.warn { "[sphinx] server failed: #{e.class.name}: #{e.message}" } if logger
2305
- # Connection error! Do we need to try it again?
2306
- attempts -= 1
2307
- if attempts > 0
2308
- logger.info { "[sphinx] connection to server #{s.inspect} DIED! Retrying operation..." } if logger
2309
- # Get the next server
2310
- idx = (idx + 1) % @servers.size
2311
- s = @servers[idx]
2312
- retry
2313
- end
2314
-
2315
- # Re-raise original exception
2316
- @error = e.message
2317
- @connerror = true
2318
- raise
2319
- end
1203
+
1204
+ return response
2320
1205
  end
2321
-
2322
- # This is internal method which retrieves socket for a given server,
2323
- # initiates Sphinx session, and yields this socket to a block passed.
2324
- #
2325
- # In case of any problems with session initiation, {SphinxConnectError}
2326
- # will be raised, because this is part of connection establishing. See
2327
- # {#with_server} method details to get more infromation about how this
2328
- # exception is handled.
2329
- #
2330
- # Socket retrieving routine is wrapped in a block with it's own
2331
- # timeout value (see {#set_connect_timeout}). This is done in
2332
- # {Server#get_socket} method, so check it for details.
2333
- #
2334
- # Request execution is wrapped with block with another timeout
2335
- # (see {#set_request_timeout}). This ensures no Sphinx request will
2336
- # take unreasonable time.
2337
- #
2338
- # In case of any Sphinx error (incomplete reply, internal or temporary
2339
- # error), connection to the server will be re-established, and request
2340
- # will be retried (see {#set_request_timeout}). Of course, if connection
2341
- # could not be established, next server will be selected (see explanation
2342
- # above).
2343
- #
2344
- # @param [Sphinx::Server] server contains information
2345
- # about the server to perform request on.
2346
- # @yield a block which will actually perform the request.
2347
- # @yieldparam [Sphinx::BufferedIO] socket a socket used to
2348
- # perform the request.
2349
- #
2350
- # @raise [SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError]
2351
- # on any response error.
2352
- # @raise [SphinxConnectError] on any connection error.
2353
- #
2354
- def with_socket(server)
2355
- attempts = @reqretries
2356
- socket = nil
2357
-
2358
- begin
2359
- s = server.get_socket do |sock|
2360
- # Remember socket to close it in case of emergency
2361
- socket = sock
2362
-
2363
- # send my version
2364
- # this is a subtle part. we must do it before (!) reading back from searchd.
2365
- # because otherwise under some conditions (reported on FreeBSD for instance)
2366
- # TCP stack could throttle write-write-read pattern because of Nagle.
2367
- sock.write([1].pack('N'))
2368
- v = sock.read(4).unpack('N*').first
2369
-
2370
- # Ouch, invalid protocol!
2371
- if v < 1
2372
- raise SphinxConnectError, "expected searchd protocol version 1+, got version '#{v}'"
2373
- end
2374
- end
2375
-
2376
- Sphinx::safe_execute(@reqtimeout) do
2377
- yield s
2378
- end
2379
- rescue SocketError, SystemCallError, IOError, ::Errno::EPIPE => e
2380
- logger.warn { "[sphinx] socket failure: #{e.message}" } if logger
2381
- # Ouch, communication problem, will be treated as a connection problem.
2382
- raise SphinxConnectError, "failed to read searchd response (msg=#{e.message})"
2383
- rescue SphinxResponseError, SphinxInternalError, SphinxTemporaryError, SphinxUnknownError, ::Timeout::Error, EOFError => e
2384
- # EOFError should not occur in ideal world, because we compare response length
2385
- # with a value passed by Sphinx. But we want to ensure that client will not
2386
- # fail with unexpected error when Sphinx implementation has bugs, aren't we?
2387
- if e.kind_of?(EOFError) or e.kind_of?(::Timeout::Error)
2388
- new_e = SphinxResponseError.new("failed to read searchd response (msg=#{e.message})")
2389
- new_e.set_backtrace(e.backtrace)
2390
- e = new_e
2391
- end
2392
- logger.warn { "[sphinx] generic failure: #{e.class.name}: #{e.message}" } if logger
2393
-
2394
- # Close previously opened socket (in case of it has been really opened)
2395
- server.free_socket(socket)
2396
-
2397
- # Request error! Do we need to try it again?
2398
- attempts -= 1
2399
- retry if attempts > 0
2400
-
2401
- # Re-raise original exception
2402
- @error = e.message
2403
- raise e
2404
- ensure
2405
- # Close previously opened socket on any other error
2406
- server.free_socket(socket)
2407
- end
1206
+
1207
+ # Connect, send query, get response.
1208
+ def PerformRequest(command, request, additional = nil, skip_response = false)
1209
+ cmd = command.to_s.upcase
1210
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1211
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1212
+
1213
+ sock = self.Connect
1214
+ len = request.to_s.length + (additional != nil ? 4 : 0)
1215
+ header = [command_id, command_ver, len].pack('nnN')
1216
+ header << [additional].pack('N') if additional != nil
1217
+ sock.send(header + request.to_s, 0)
1218
+
1219
+ return sock if skip_response
1220
+ response = self.GetResponse(sock, command_ver)
1221
+ return Response.new(response)
2408
1222
  end
2409
-
2410
- # Enables ability to skip +set_+ prefix for methods inside {#query} block.
2411
- #
2412
- # @example
2413
- # sphinx.query('test') do
2414
- # match_mode :all
2415
- # id_range 10, 100
2416
- # end
2417
- #
2418
- def method_missing(method_id, *arguments, &block)
2419
- if @inside_eval and self.respond_to?("set_#{method_id}")
2420
- self.send("set_#{method_id}", *arguments)
2421
- else
2422
- super
2423
- end
1223
+
1224
+ # :stopdoc:
1225
+ def assert
1226
+ raise 'Assertion failed!' unless yield if $DEBUG
2424
1227
  end
1228
+ # :startdoc:
2425
1229
  end
2426
1230
  end