zinx 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/sphinx/sphinx.rb +5 -5
- data/lib/sphinx/sphinx/client.rb +1125 -1125
- data/lib/sphinx/sphinx/request.rb +50 -50
- data/lib/sphinx/sphinx/response.rb +68 -68
- data/lib/zinx.rb +271 -271
- data/test/test.rb +11 -11
- metadata +4 -4
data/lib/sphinx/sphinx.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/sphinx/request'
|
2
|
-
require File.dirname(__FILE__) + '/sphinx/response'
|
3
|
-
require File.dirname(__FILE__) + '/sphinx/client'
|
4
|
-
|
5
|
-
module Sphinx
|
1
|
+
require File.dirname(__FILE__) + '/sphinx/request'
|
2
|
+
require File.dirname(__FILE__) + '/sphinx/response'
|
3
|
+
require File.dirname(__FILE__) + '/sphinx/client'
|
4
|
+
|
5
|
+
module Sphinx
|
6
6
|
end
|
data/lib/sphinx/sphinx/client.rb
CHANGED
@@ -1,1125 +1,1125 @@
|
|
1
|
-
# = client.rb - Sphinx Client API
|
2
|
-
#
|
3
|
-
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
-
# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
|
5
|
-
# License:: Distributes under the same terms as Ruby
|
6
|
-
# Version:: 0.9.9-r1299
|
7
|
-
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
-
#
|
9
|
-
# This library is distributed under the terms of the Ruby license.
|
10
|
-
# You can freely distribute/modify this library.
|
11
|
-
|
12
|
-
# ==Sphinx Client API
|
13
|
-
#
|
14
|
-
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
-
# daemon and get search results from Sphinx.
|
16
|
-
#
|
17
|
-
# ===Usage
|
18
|
-
#
|
19
|
-
# sphinx = Sphinx::Client.new
|
20
|
-
# result = sphinx.Query('test')
|
21
|
-
# ids = result['matches'].map { |match| match['id'] }.join(',')
|
22
|
-
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
-
#
|
24
|
-
# docs = posts.map(&:body)
|
25
|
-
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
-
|
27
|
-
require 'socket'
|
28
|
-
|
29
|
-
module Sphinx
|
30
|
-
# :stopdoc:
|
31
|
-
|
32
|
-
class SphinxError < StandardError; end
|
33
|
-
class SphinxArgumentError < SphinxError; end
|
34
|
-
class SphinxConnectError < SphinxError; end
|
35
|
-
class SphinxResponseError < SphinxError; end
|
36
|
-
class SphinxInternalError < SphinxError; end
|
37
|
-
class SphinxTemporaryError < SphinxError; end
|
38
|
-
class SphinxUnknownError < SphinxError; end
|
39
|
-
|
40
|
-
# :startdoc:
|
41
|
-
|
42
|
-
class Client
|
43
|
-
|
44
|
-
# :stopdoc:
|
45
|
-
|
46
|
-
# Known searchd commands
|
47
|
-
|
48
|
-
# search command
|
49
|
-
SEARCHD_COMMAND_SEARCH = 0
|
50
|
-
# excerpt command
|
51
|
-
SEARCHD_COMMAND_EXCERPT = 1
|
52
|
-
# update command
|
53
|
-
SEARCHD_COMMAND_UPDATE = 2
|
54
|
-
# keywords command
|
55
|
-
SEARCHD_COMMAND_KEYWORDS = 3
|
56
|
-
|
57
|
-
# Current client-side command implementation versions
|
58
|
-
|
59
|
-
# search command version
|
60
|
-
VER_COMMAND_SEARCH = 0x119
|
61
|
-
# excerpt command version
|
62
|
-
VER_COMMAND_EXCERPT = 0x102
|
63
|
-
# update command version
|
64
|
-
VER_COMMAND_UPDATE = 0x102
|
65
|
-
# keywords command version
|
66
|
-
VER_COMMAND_KEYWORDS = 0x100
|
67
|
-
|
68
|
-
# Known searchd status codes
|
69
|
-
|
70
|
-
# general success, command-specific reply follows
|
71
|
-
SEARCHD_OK = 0
|
72
|
-
# general failure, command-specific reply may follow
|
73
|
-
SEARCHD_ERROR = 1
|
74
|
-
# temporaty failure, client should retry later
|
75
|
-
SEARCHD_RETRY = 2
|
76
|
-
# general success, warning message and command-specific reply follow
|
77
|
-
SEARCHD_WARNING = 3
|
78
|
-
|
79
|
-
# :startdoc:
|
80
|
-
|
81
|
-
# Known match modes
|
82
|
-
|
83
|
-
# match all query words
|
84
|
-
SPH_MATCH_ALL = 0
|
85
|
-
# match any query word
|
86
|
-
SPH_MATCH_ANY = 1
|
87
|
-
# match this exact phrase
|
88
|
-
SPH_MATCH_PHRASE = 2
|
89
|
-
# match this boolean query
|
90
|
-
SPH_MATCH_BOOLEAN = 3
|
91
|
-
# match this extended query
|
92
|
-
SPH_MATCH_EXTENDED = 4
|
93
|
-
# match all document IDs w/o fulltext query, apply filters
|
94
|
-
SPH_MATCH_FULLSCAN = 5
|
95
|
-
# extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
|
96
|
-
SPH_MATCH_EXTENDED2 = 6
|
97
|
-
|
98
|
-
# Known ranking modes (ext2 only)
|
99
|
-
|
100
|
-
# default mode, phrase proximity major factor and BM25 minor one
|
101
|
-
SPH_RANK_PROXIMITY_BM25 = 0
|
102
|
-
# statistical mode, BM25 ranking only (faster but worse quality)
|
103
|
-
SPH_RANK_BM25 = 1
|
104
|
-
# no ranking, all matches get a weight of 1
|
105
|
-
SPH_RANK_NONE = 2
|
106
|
-
# simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
107
|
-
SPH_RANK_WORDCOUNT = 3
|
108
|
-
# phrase proximity
|
109
|
-
SPH_RANK_PROXIMITY = 4
|
110
|
-
|
111
|
-
# Known sort modes
|
112
|
-
|
113
|
-
# sort by document relevance desc, then by date
|
114
|
-
SPH_SORT_RELEVANCE = 0
|
115
|
-
# sort by document date desc, then by relevance desc
|
116
|
-
SPH_SORT_ATTR_DESC = 1
|
117
|
-
# sort by document date asc, then by relevance desc
|
118
|
-
SPH_SORT_ATTR_ASC = 2
|
119
|
-
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
120
|
-
SPH_SORT_TIME_SEGMENTS = 3
|
121
|
-
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
122
|
-
SPH_SORT_EXTENDED = 4
|
123
|
-
# sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
|
124
|
-
SPH_SORT_EXPR = 5
|
125
|
-
|
126
|
-
# Known filter types
|
127
|
-
|
128
|
-
# filter by integer values set
|
129
|
-
SPH_FILTER_VALUES = 0
|
130
|
-
# filter by integer range
|
131
|
-
SPH_FILTER_RANGE = 1
|
132
|
-
# filter by float range
|
133
|
-
SPH_FILTER_FLOATRANGE = 2
|
134
|
-
|
135
|
-
# Known attribute types
|
136
|
-
|
137
|
-
# this attr is just an integer
|
138
|
-
SPH_ATTR_INTEGER = 1
|
139
|
-
# this attr is a timestamp
|
140
|
-
SPH_ATTR_TIMESTAMP = 2
|
141
|
-
# this attr is an ordinal string number (integer at search time,
|
142
|
-
# specially handled at indexing time)
|
143
|
-
SPH_ATTR_ORDINAL = 3
|
144
|
-
# this attr is a boolean bit field
|
145
|
-
SPH_ATTR_BOOL = 4
|
146
|
-
# this attr is a float
|
147
|
-
SPH_ATTR_FLOAT = 5
|
148
|
-
# signed 64-bit integer
|
149
|
-
SPH_ATTR_BIGINT = 6
|
150
|
-
# string
|
151
|
-
SPH_ATTR_STRING = 7
|
152
|
-
# this attr has multiple values (0 or more)
|
153
|
-
SPH_ATTR_MULTI = 0x40000001
|
154
|
-
SPH_ATTR_MULTI64 = 0x40000002
|
155
|
-
|
156
|
-
# Known grouping functions
|
157
|
-
|
158
|
-
# group by day
|
159
|
-
SPH_GROUPBY_DAY = 0
|
160
|
-
# group by week
|
161
|
-
SPH_GROUPBY_WEEK = 1
|
162
|
-
# group by month
|
163
|
-
SPH_GROUPBY_MONTH = 2
|
164
|
-
# group by year
|
165
|
-
SPH_GROUPBY_YEAR = 3
|
166
|
-
# group by attribute value
|
167
|
-
SPH_GROUPBY_ATTR = 4
|
168
|
-
# group by sequential attrs pair
|
169
|
-
SPH_GROUPBY_ATTRPAIR = 5
|
170
|
-
|
171
|
-
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
172
|
-
def initialize
|
173
|
-
# per-client-object settings
|
174
|
-
@host = 'localhost' # searchd host (default is "localhost")
|
175
|
-
@port = 9312 # searchd port (default is 9312)
|
176
|
-
|
177
|
-
# per-query settings
|
178
|
-
@offset = 0 # how many records to seek from result-set start (default is 0)
|
179
|
-
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
180
|
-
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
181
|
-
@weights = [] # per-field weights (default is 1 for all fields)
|
182
|
-
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
183
|
-
@sortby = '' # attribute to sort by (defualt is "")
|
184
|
-
@min_id = 0 # min ID to match (default is 0, which means no limit)
|
185
|
-
@max_id = 0 # max ID to match (default is 0, which means no limit)
|
186
|
-
@filters = [] # search filters
|
187
|
-
@groupby = '' # group-by attribute name
|
188
|
-
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
189
|
-
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
190
|
-
@groupdistinct = '' # group-by count-distinct attribute
|
191
|
-
@maxmatches = 1000 # max matches to retrieve
|
192
|
-
@cutoff = 0 # cutoff to stop searching at (default is 0)
|
193
|
-
@retrycount = 0 # distributed retries count
|
194
|
-
@retrydelay = 0 # distributed retries delay
|
195
|
-
@anchor = [] # geographical anchor point
|
196
|
-
@indexweights = [] # per-index weights
|
197
|
-
@ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
198
|
-
@maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
|
199
|
-
@fieldweights = {} # per-field-name weights
|
200
|
-
@overrides = [] # per-query attribute values overrides
|
201
|
-
@select = '*' # select-list (attributes or expressions, with optional aliases)
|
202
|
-
|
203
|
-
# per-reply fields (for single-query case)
|
204
|
-
@error = '' # last error message
|
205
|
-
@warning = '' # last warning message
|
206
|
-
|
207
|
-
@reqs = [] # requests storage (for multi-query case)
|
208
|
-
@mbenc = '' # stored mbstring encoding
|
209
|
-
end
|
210
|
-
|
211
|
-
# Get last error message.
|
212
|
-
def GetLastError
|
213
|
-
@error
|
214
|
-
end
|
215
|
-
|
216
|
-
# Get last warning message.
|
217
|
-
def GetLastWarning
|
218
|
-
@warning
|
219
|
-
end
|
220
|
-
|
221
|
-
# Set searchd host name (string) and port (integer).
|
222
|
-
def SetServer(host, port)
|
223
|
-
assert { host.instance_of? String }
|
224
|
-
assert { port.instance_of? Fixnum }
|
225
|
-
|
226
|
-
@host = host
|
227
|
-
@port = port
|
228
|
-
end
|
229
|
-
|
230
|
-
# Set offset and count into result set,
|
231
|
-
# and optionally set max-matches and cutoff limits.
|
232
|
-
def SetLimits(offset, limit, max = 0, cutoff = 0)
|
233
|
-
assert { offset.instance_of? Fixnum }
|
234
|
-
assert { limit.instance_of? Fixnum }
|
235
|
-
assert { max.instance_of? Fixnum }
|
236
|
-
assert { offset >= 0 }
|
237
|
-
assert { limit > 0 }
|
238
|
-
assert { max >= 0 }
|
239
|
-
|
240
|
-
@offset = offset
|
241
|
-
@limit = limit
|
242
|
-
@maxmatches = max if max > 0
|
243
|
-
@cutoff = cutoff if cutoff > 0
|
244
|
-
end
|
245
|
-
|
246
|
-
# Set maximum query time, in milliseconds, per-index,
|
247
|
-
# integer, 0 means "do not limit"
|
248
|
-
def SetMaxQueryTime(max)
|
249
|
-
assert { max.instance_of? Fixnum }
|
250
|
-
assert { max >= 0 }
|
251
|
-
@maxquerytime = max
|
252
|
-
end
|
253
|
-
|
254
|
-
# Set matching mode.
|
255
|
-
def SetMatchMode(mode)
|
256
|
-
assert { mode == SPH_MATCH_ALL \
|
257
|
-
|| mode == SPH_MATCH_ANY \
|
258
|
-
|| mode == SPH_MATCH_PHRASE \
|
259
|
-
|| mode == SPH_MATCH_BOOLEAN \
|
260
|
-
|| mode == SPH_MATCH_EXTENDED \
|
261
|
-
|| mode == SPH_MATCH_FULLSCAN \
|
262
|
-
|| mode == SPH_MATCH_EXTENDED2 }
|
263
|
-
|
264
|
-
@mode = mode
|
265
|
-
end
|
266
|
-
|
267
|
-
# Set ranking mode.
|
268
|
-
def SetRankingMode(ranker)
|
269
|
-
assert { ranker == SPH_RANK_PROXIMITY_BM25 \
|
270
|
-
|| ranker == SPH_RANK_BM25 \
|
271
|
-
|| ranker == SPH_RANK_NONE \
|
272
|
-
|| ranker == SPH_RANK_WORDCOUNT \
|
273
|
-
|| ranker == SPH_RANK_PROXIMITY }
|
274
|
-
|
275
|
-
@ranker = ranker
|
276
|
-
end
|
277
|
-
|
278
|
-
# Set matches sorting mode.
|
279
|
-
def SetSortMode(mode, sortby = '')
|
280
|
-
assert { mode == SPH_SORT_RELEVANCE \
|
281
|
-
|| mode == SPH_SORT_ATTR_DESC \
|
282
|
-
|| mode == SPH_SORT_ATTR_ASC \
|
283
|
-
|| mode == SPH_SORT_TIME_SEGMENTS \
|
284
|
-
|| mode == SPH_SORT_EXTENDED \
|
285
|
-
|| mode == SPH_SORT_EXPR }
|
286
|
-
assert { sortby.instance_of? String }
|
287
|
-
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
288
|
-
|
289
|
-
@sort = mode
|
290
|
-
@sortby = sortby
|
291
|
-
end
|
292
|
-
|
293
|
-
# Bind per-field weights by order.
|
294
|
-
#
|
295
|
-
# DEPRECATED; use SetFieldWeights() instead.
|
296
|
-
def SetWeights(weights)
|
297
|
-
assert { weights.instance_of? Array }
|
298
|
-
weights.each do |weight|
|
299
|
-
assert { weight.instance_of? Fixnum }
|
300
|
-
end
|
301
|
-
|
302
|
-
@weights = weights
|
303
|
-
end
|
304
|
-
|
305
|
-
# Bind per-field weights by name.
|
306
|
-
#
|
307
|
-
# Takes string (field name) to integer name (field weight) hash as an argument.
|
308
|
-
# * Takes precedence over SetWeights().
|
309
|
-
# * Unknown names will be silently ignored.
|
310
|
-
# * Unbound fields will be silently given a weight of 1.
|
311
|
-
def SetFieldWeights(weights)
|
312
|
-
assert { weights.instance_of? Hash }
|
313
|
-
weights.each do |name, weight|
|
314
|
-
assert { name.instance_of? String }
|
315
|
-
assert { weight.instance_of? Fixnum }
|
316
|
-
end
|
317
|
-
|
318
|
-
@fieldweights = weights
|
319
|
-
end
|
320
|
-
|
321
|
-
# Bind per-index weights by name.
|
322
|
-
def SetIndexWeights(weights)
|
323
|
-
assert { weights.instance_of? Hash }
|
324
|
-
weights.each do |index, weight|
|
325
|
-
assert { index.instance_of? String }
|
326
|
-
assert { weight.instance_of? Fixnum }
|
327
|
-
end
|
328
|
-
|
329
|
-
@indexweights = weights
|
330
|
-
end
|
331
|
-
|
332
|
-
# Set IDs range to match.
|
333
|
-
#
|
334
|
-
# Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
|
335
|
-
def SetIDRange(min, max)
|
336
|
-
assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
|
337
|
-
assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
|
338
|
-
assert { min <= max }
|
339
|
-
|
340
|
-
@min_id = min
|
341
|
-
@max_id = max
|
342
|
-
end
|
343
|
-
|
344
|
-
# Set values filter.
|
345
|
-
#
|
346
|
-
# Only match those records where <tt>attribute</tt> column values
|
347
|
-
# are in specified set.
|
348
|
-
def SetFilter(attribute, values, exclude = false)
|
349
|
-
assert { attribute.instance_of? String }
|
350
|
-
assert { values.instance_of? Array }
|
351
|
-
assert { !values.empty? }
|
352
|
-
|
353
|
-
if values.instance_of?(Array) && values.size > 0
|
354
|
-
values.each do |value|
|
355
|
-
assert { value.instance_of? Fixnum }
|
356
|
-
end
|
357
|
-
|
358
|
-
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
# Set range filter.
|
363
|
-
#
|
364
|
-
# Only match those records where <tt>attribute</tt> column value
|
365
|
-
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
366
|
-
def SetFilterRange(attribute, min, max, exclude = false)
|
367
|
-
assert { attribute.instance_of? String }
|
368
|
-
assert { min.instance_of? Fixnum or min.instance_of? Bignum }
|
369
|
-
assert { max.instance_of? Fixnum or max.instance_of? Bignum }
|
370
|
-
assert { min <= max }
|
371
|
-
|
372
|
-
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
373
|
-
end
|
374
|
-
|
375
|
-
# Set float range filter.
|
376
|
-
#
|
377
|
-
# Only match those records where <tt>attribute</tt> column value
|
378
|
-
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
379
|
-
def SetFilterFloatRange(attribute, min, max, exclude = false)
|
380
|
-
assert { attribute.instance_of? String }
|
381
|
-
assert { min.instance_of? Float }
|
382
|
-
assert { max.instance_of? Float }
|
383
|
-
assert { min <= max }
|
384
|
-
|
385
|
-
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
386
|
-
end
|
387
|
-
|
388
|
-
# Setup anchor point for geosphere distance calculations.
|
389
|
-
#
|
390
|
-
# Required to use <tt>@geodist</tt> in filters and sorting
|
391
|
-
# distance will be computed to this point. Latitude and longitude
|
392
|
-
# must be in radians.
|
393
|
-
#
|
394
|
-
# * <tt>attrlat</tt> -- is the name of latitude attribute
|
395
|
-
# * <tt>attrlong</tt> -- is the name of longitude attribute
|
396
|
-
# * <tt>lat</tt> -- is anchor point latitude, in radians
|
397
|
-
# * <tt>long</tt> -- is anchor point longitude, in radians
|
398
|
-
def SetGeoAnchor(attrlat, attrlong, lat, long)
|
399
|
-
assert { attrlat.instance_of? String }
|
400
|
-
assert { attrlong.instance_of? String }
|
401
|
-
assert { lat.instance_of? Float }
|
402
|
-
assert { long.instance_of? Float }
|
403
|
-
|
404
|
-
@anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
|
405
|
-
end
|
406
|
-
|
407
|
-
# Set grouping attribute and function.
|
408
|
-
#
|
409
|
-
# In grouping mode, all matches are assigned to different groups
|
410
|
-
# based on grouping function value.
|
411
|
-
#
|
412
|
-
# Each group keeps track of the total match count, and the best match
|
413
|
-
# (in this group) according to current sorting function.
|
414
|
-
#
|
415
|
-
# The final result set contains one best match per group, with
|
416
|
-
# grouping function value and matches count attached.
|
417
|
-
#
|
418
|
-
# Groups in result set could be sorted by any sorting clause,
|
419
|
-
# including both document attributes and the following special
|
420
|
-
# internal Sphinx attributes:
|
421
|
-
#
|
422
|
-
# * @id - match document ID;
|
423
|
-
# * @weight, @rank, @relevance - match weight;
|
424
|
-
# * @group - groupby function value;
|
425
|
-
# * @count - amount of matches in group.
|
426
|
-
#
|
427
|
-
# the default mode is to sort by groupby value in descending order,
|
428
|
-
# ie. by '@group desc'.
|
429
|
-
#
|
430
|
-
# 'total_found' would contain total amount of matching groups over
|
431
|
-
# the whole index.
|
432
|
-
#
|
433
|
-
# WARNING: grouping is done in fixed memory and thus its results
|
434
|
-
# are only approximate; so there might be more groups reported
|
435
|
-
# in total_found than actually present. @count might also
|
436
|
-
# be underestimated.
|
437
|
-
#
|
438
|
-
# For example, if sorting by relevance and grouping by "published"
|
439
|
-
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
440
|
-
# contain one most relevant match per each day when there were any
|
441
|
-
# matches published, with day number and per-day match count attached,
|
442
|
-
# and sorted by day number in descending order (ie. recent days first).
|
443
|
-
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
444
|
-
assert { attribute.instance_of? String }
|
445
|
-
assert { groupsort.instance_of? String }
|
446
|
-
assert { func == SPH_GROUPBY_DAY \
|
447
|
-
|| func == SPH_GROUPBY_WEEK \
|
448
|
-
|| func == SPH_GROUPBY_MONTH \
|
449
|
-
|| func == SPH_GROUPBY_YEAR \
|
450
|
-
|| func == SPH_GROUPBY_ATTR \
|
451
|
-
|| func == SPH_GROUPBY_ATTRPAIR }
|
452
|
-
|
453
|
-
@groupby = attribute
|
454
|
-
@groupfunc = func
|
455
|
-
@groupsort = groupsort
|
456
|
-
end
|
457
|
-
|
458
|
-
# Set count-distinct attribute for group-by queries.
|
459
|
-
def SetGroupDistinct(attribute)
|
460
|
-
assert { attribute.instance_of? String }
|
461
|
-
@groupdistinct = attribute
|
462
|
-
end
|
463
|
-
|
464
|
-
# Set distributed retries count and delay.
|
465
|
-
def SetRetries(count, delay = 0)
|
466
|
-
assert { count.instance_of? Fixnum }
|
467
|
-
assert { delay.instance_of? Fixnum }
|
468
|
-
|
469
|
-
@retrycount = count
|
470
|
-
@retrydelay = delay
|
471
|
-
end
|
472
|
-
|
473
|
-
# Set attribute values override
|
474
|
-
#
|
475
|
-
# There can be only one override per attribute.
|
476
|
-
# +values+ must be a hash that maps document IDs to attribute values.
|
477
|
-
def SetOverride(attrname, attrtype, values)
|
478
|
-
assert { attrname.instance_of? String }
|
479
|
-
assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
|
480
|
-
assert { values.instance_of? Hash }
|
481
|
-
|
482
|
-
@overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
|
483
|
-
end
|
484
|
-
|
485
|
-
# Set select-list (attributes or expressions), SQL-like syntax.
|
486
|
-
def SetSelect(select)
|
487
|
-
assert { select.instance_of? String }
|
488
|
-
@select = select
|
489
|
-
end
|
490
|
-
|
491
|
-
# Clear all filters (for multi-queries).
|
492
|
-
def ResetFilters
|
493
|
-
@filters = []
|
494
|
-
@anchor = []
|
495
|
-
end
|
496
|
-
|
497
|
-
# Clear groupby settings (for multi-queries).
|
498
|
-
def ResetGroupBy
|
499
|
-
@groupby = ''
|
500
|
-
@groupfunc = SPH_GROUPBY_DAY
|
501
|
-
@groupsort = '@group desc'
|
502
|
-
@groupdistinct = ''
|
503
|
-
end
|
504
|
-
|
505
|
-
# Clear all attribute value overrides (for multi-queries).
|
506
|
-
def ResetOverrides
|
507
|
-
@overrides = []
|
508
|
-
end
|
509
|
-
|
510
|
-
# Connect to searchd server and run given search query.
|
511
|
-
#
|
512
|
-
# <tt>query</tt> is query string
|
513
|
-
|
514
|
-
# <tt>index</tt> is index name (or names) to query. default value is "*" which means
|
515
|
-
# to query all indexes. Accepted characters for index names are letters, numbers,
|
516
|
-
# dash, and underscore; everything else is considered a separator. Therefore,
|
517
|
-
# all the following calls are valid and will search two indexes:
|
518
|
-
#
|
519
|
-
# sphinx.Query('test query', 'main delta')
|
520
|
-
# sphinx.Query('test query', 'main;delta')
|
521
|
-
# sphinx.Query('test query', 'main, delta')
|
522
|
-
#
|
523
|
-
# Index order matters. If identical IDs are found in two or more indexes,
|
524
|
-
# weight and attribute values from the very last matching index will be used
|
525
|
-
# for sorting and returning to client. Therefore, in the example above,
|
526
|
-
# matches from "delta" index will always "win" over matches from "main".
|
527
|
-
#
|
528
|
-
# Returns false on failure.
|
529
|
-
# Returns hash which has the following keys on success:
|
530
|
-
#
|
531
|
-
# * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
|
532
|
-
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
533
|
-
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
534
|
-
# * <tt>'time'</tt> -- search time
|
535
|
-
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
536
|
-
def Query(query, index = '*', comment = '')
|
537
|
-
assert { @reqs.empty? }
|
538
|
-
@reqs = []
|
539
|
-
|
540
|
-
self.AddQuery(query, index, comment)
|
541
|
-
results = self.RunQueries
|
542
|
-
|
543
|
-
# probably network error; error message should be already filled
|
544
|
-
return false unless results.instance_of?(Array)
|
545
|
-
|
546
|
-
@error = results[0]['error']
|
547
|
-
@warning = results[0]['warning']
|
548
|
-
|
549
|
-
return false if results[0]['status'] == SEARCHD_ERROR
|
550
|
-
return results[0]
|
551
|
-
end
|
552
|
-
|
553
|
-
# Add query to batch.
|
554
|
-
#
|
555
|
-
# Batch queries enable searchd to perform internal optimizations,
|
556
|
-
# if possible; and reduce network connection overheads in all cases.
|
557
|
-
#
|
558
|
-
# For instance, running exactly the same query with different
|
559
|
-
# groupby settings will enable searched to perform expensive
|
560
|
-
# full-text search and ranking operation only once, but compute
|
561
|
-
# multiple groupby results from its output.
|
562
|
-
#
|
563
|
-
# Parameters are exactly the same as in <tt>Query</tt> call.
|
564
|
-
# Returns index to results array returned by <tt>RunQueries</tt> call.
|
565
|
-
def AddQuery(query, index = '*', comment = '')
|
566
|
-
# build request
|
567
|
-
|
568
|
-
# mode and limits
|
569
|
-
request = Request.new
|
570
|
-
request.put_int @offset, @limit, @mode, @ranker, @sort
|
571
|
-
request.put_string @sortby
|
572
|
-
# query itself
|
573
|
-
request.put_string query
|
574
|
-
# weights
|
575
|
-
request.put_int_array @weights
|
576
|
-
# indexes
|
577
|
-
request.put_string index
|
578
|
-
# id64 range marker
|
579
|
-
request.put_int 1
|
580
|
-
# id64 range
|
581
|
-
request.put_int64 @min_id.to_i, @max_id.to_i
|
582
|
-
|
583
|
-
# filters
|
584
|
-
request.put_int @filters.length
|
585
|
-
@filters.each do |filter|
|
586
|
-
request.put_string filter['attr']
|
587
|
-
request.put_int filter['type']
|
588
|
-
|
589
|
-
case filter['type']
|
590
|
-
when SPH_FILTER_VALUES
|
591
|
-
request.put_int64_array filter['values']
|
592
|
-
when SPH_FILTER_RANGE
|
593
|
-
request.put_int64 filter['min'], filter['max']
|
594
|
-
when SPH_FILTER_FLOATRANGE
|
595
|
-
request.put_float filter['min'], filter['max']
|
596
|
-
else
|
597
|
-
raise SphinxInternalError, 'Internal error: unhandled filter type'
|
598
|
-
end
|
599
|
-
request.put_int filter['exclude'] ? 1 : 0
|
600
|
-
end
|
601
|
-
|
602
|
-
# group-by clause, max-matches count, group-sort clause, cutoff count
|
603
|
-
request.put_int @groupfunc
|
604
|
-
request.put_string @groupby
|
605
|
-
request.put_int @maxmatches
|
606
|
-
request.put_string @groupsort
|
607
|
-
request.put_int @cutoff, @retrycount, @retrydelay
|
608
|
-
request.put_string @groupdistinct
|
609
|
-
|
610
|
-
# anchor point
|
611
|
-
if @anchor.empty?
|
612
|
-
request.put_int 0
|
613
|
-
else
|
614
|
-
request.put_int 1
|
615
|
-
request.put_string @anchor['attrlat'], @anchor['attrlong']
|
616
|
-
request.put_float @anchor['lat'], @anchor['long']
|
617
|
-
end
|
618
|
-
|
619
|
-
# per-index weights
|
620
|
-
request.put_int @indexweights.length
|
621
|
-
@indexweights.each do |idx, weight|
|
622
|
-
request.put_string idx
|
623
|
-
request.put_int weight
|
624
|
-
end
|
625
|
-
|
626
|
-
# max query time
|
627
|
-
request.put_int @maxquerytime
|
628
|
-
|
629
|
-
# per-field weights
|
630
|
-
request.put_int @fieldweights.length
|
631
|
-
@fieldweights.each do |field, weight|
|
632
|
-
request.put_string field
|
633
|
-
request.put_int weight
|
634
|
-
end
|
635
|
-
|
636
|
-
# comment
|
637
|
-
request.put_string comment
|
638
|
-
|
639
|
-
# attribute overrides
|
640
|
-
request.put_int @overrides.length
|
641
|
-
for entry in @overrides do
|
642
|
-
request.put_string entry['attr']
|
643
|
-
request.put_int entry['type'], entry['values'].size
|
644
|
-
entry['values'].each do |id, val|
|
645
|
-
assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
|
646
|
-
assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
|
647
|
-
|
648
|
-
request.put_int64 id
|
649
|
-
case entry['type']
|
650
|
-
when SPH_ATTR_FLOAT
|
651
|
-
request.put_float val
|
652
|
-
when SPH_ATTR_BIGINT
|
653
|
-
request.put_int64 val
|
654
|
-
else
|
655
|
-
request.put_int val
|
656
|
-
end
|
657
|
-
end
|
658
|
-
end
|
659
|
-
|
660
|
-
# select-list
|
661
|
-
request.put_string @select
|
662
|
-
|
663
|
-
# store request to requests array
|
664
|
-
@reqs << request.to_s;
|
665
|
-
return @reqs.length - 1
|
666
|
-
end
|
667
|
-
|
668
|
-
# Run queries batch.
|
669
|
-
#
|
670
|
-
# Returns an array of result sets on success.
|
671
|
-
# Returns false on network IO failure.
|
672
|
-
#
|
673
|
-
# Each result set in returned array is a hash which containts
|
674
|
-
# the same keys as the hash returned by <tt>Query</tt>, plus:
|
675
|
-
#
|
676
|
-
# * <tt>'error'</tt> -- search error for this query
|
677
|
-
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
678
|
-
def RunQueries
|
679
|
-
if @reqs.empty?
|
680
|
-
@error = 'No queries defined, issue AddQuery() first'
|
681
|
-
return false
|
682
|
-
end
|
683
|
-
|
684
|
-
req = @reqs.join('')
|
685
|
-
nreqs = @reqs.length
|
686
|
-
@reqs = []
|
687
|
-
response = PerformRequest(:search, req, nreqs)
|
688
|
-
|
689
|
-
# parse response
|
690
|
-
begin
|
691
|
-
results = []
|
692
|
-
ires = 0
|
693
|
-
while ires < nreqs
|
694
|
-
ires += 1
|
695
|
-
result = {}
|
696
|
-
|
697
|
-
result['error'] = ''
|
698
|
-
result['warning'] = ''
|
699
|
-
|
700
|
-
# extract status
|
701
|
-
status = result['status'] = response.get_int
|
702
|
-
if status != SEARCHD_OK
|
703
|
-
message = response.get_string
|
704
|
-
if status == SEARCHD_WARNING
|
705
|
-
result['warning'] = message
|
706
|
-
else
|
707
|
-
result['error'] = message
|
708
|
-
results << result
|
709
|
-
next
|
710
|
-
end
|
711
|
-
end
|
712
|
-
|
713
|
-
# read schema
|
714
|
-
fields = []
|
715
|
-
attrs = {}
|
716
|
-
attrs_names_in_order = []
|
717
|
-
|
718
|
-
nfields = response.get_int
|
719
|
-
while nfields > 0
|
720
|
-
nfields -= 1
|
721
|
-
fields << response.get_string
|
722
|
-
end
|
723
|
-
result['fields'] = fields
|
724
|
-
|
725
|
-
nattrs = response.get_int
|
726
|
-
while nattrs > 0
|
727
|
-
nattrs -= 1
|
728
|
-
attr = response.get_string
|
729
|
-
type = response.get_int
|
730
|
-
attrs[attr] = type
|
731
|
-
attrs_names_in_order << attr
|
732
|
-
end
|
733
|
-
result['attrs'] = attrs
|
734
|
-
|
735
|
-
# read match count
|
736
|
-
count = response.get_int
|
737
|
-
id64 = response.get_int
|
738
|
-
|
739
|
-
# read matches
|
740
|
-
result['matches'] = []
|
741
|
-
while count > 0
|
742
|
-
count -= 1
|
743
|
-
|
744
|
-
if id64 != 0
|
745
|
-
doc = response.get_int64
|
746
|
-
weight = response.get_int
|
747
|
-
else
|
748
|
-
doc, weight = response.get_ints(2)
|
749
|
-
end
|
750
|
-
|
751
|
-
r = {} # This is a single result put in the result['matches'] array
|
752
|
-
r['id'] = doc
|
753
|
-
r['weight'] = weight
|
754
|
-
attrs_names_in_order.each do |a|
|
755
|
-
r['attrs'] ||= {}
|
756
|
-
|
757
|
-
case attrs[a]
|
758
|
-
when SPH_ATTR_BIGINT
|
759
|
-
# handle 64-bit ints
|
760
|
-
r['attrs'][a] = response.get_int64
|
761
|
-
when SPH_ATTR_FLOAT
|
762
|
-
# handle floats
|
763
|
-
r['attrs'][a] = response.get_float
|
764
|
-
when SPH_ATTR_STRING
|
765
|
-
# handle string
|
766
|
-
r['attrs'][a] = response.get_string
|
767
|
-
else
|
768
|
-
# handle everything else as unsigned ints
|
769
|
-
val = response.get_int
|
770
|
-
if attrs[a]==SPH_ATTR_MULTI
|
771
|
-
r['attrs'][a] = []
|
772
|
-
1.upto(val) do
|
773
|
-
r['attrs'][a] << response.get_int
|
774
|
-
end
|
775
|
-
elsif attrs[a]==SPH_ATTR_MULTI64
|
776
|
-
r['attrs'][a] = []
|
777
|
-
val = val/2
|
778
|
-
1.upto(val) do
|
779
|
-
r['attrs'][a] << response.get_int64
|
780
|
-
end
|
781
|
-
else
|
782
|
-
r['attrs'][a] = val
|
783
|
-
end
|
784
|
-
end
|
785
|
-
end
|
786
|
-
result['matches'] << r
|
787
|
-
end
|
788
|
-
result['total'], result['total_found'], msecs, words = response.get_ints(4)
|
789
|
-
result['time'] = '%.3f' % (msecs / 1000.0)
|
790
|
-
|
791
|
-
result['words'] = {}
|
792
|
-
while words > 0
|
793
|
-
words -= 1
|
794
|
-
word = response.get_string
|
795
|
-
docs, hits = response.get_ints(2)
|
796
|
-
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
797
|
-
end
|
798
|
-
|
799
|
-
results << result
|
800
|
-
end
|
801
|
-
#rescue EOFError
|
802
|
-
# @error = 'incomplete reply'
|
803
|
-
# raise SphinxResponseError, @error
|
804
|
-
end
|
805
|
-
|
806
|
-
return results
|
807
|
-
end
|
808
|
-
|
809
|
-
# Connect to searchd server and generate exceprts from given documents.
|
810
|
-
#
|
811
|
-
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
812
|
-
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
813
|
-
# for stemming, lexing and case folding
|
814
|
-
# * <tt>words</tt> -- a string which contains the words to highlight
|
815
|
-
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
816
|
-
#
|
817
|
-
# You can use following parameters:
|
818
|
-
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
819
|
-
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
820
|
-
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
821
|
-
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
822
|
-
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
823
|
-
# * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
|
824
|
-
# * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
|
825
|
-
# * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
|
826
|
-
# * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
|
827
|
-
#
|
828
|
-
# Returns false on failure.
|
829
|
-
# Returns an array of string excerpts on success.
|
830
|
-
def BuildExcerpts(docs, index, words, opts = {})
|
831
|
-
assert { docs.instance_of? Array }
|
832
|
-
assert { index.instance_of? String }
|
833
|
-
assert { words.instance_of? String }
|
834
|
-
assert { opts.instance_of? Hash }
|
835
|
-
|
836
|
-
# fixup options
|
837
|
-
opts['before_match'] ||= '<b>';
|
838
|
-
opts['after_match'] ||= '</b>';
|
839
|
-
opts['chunk_separator'] ||= ' ... ';
|
840
|
-
opts['html_strip_mode'] ||= 'index';
|
841
|
-
opts['limit'] ||= 256;
|
842
|
-
opts['limit_passages'] ||= 0;
|
843
|
-
opts['limit_words'] ||= 0;
|
844
|
-
opts['around'] ||= 5;
|
845
|
-
opts['start_passage_id'] ||= 1;
|
846
|
-
opts['exact_phrase'] ||= false
|
847
|
-
opts['single_passage'] ||= false
|
848
|
-
opts['use_boundaries'] ||= false
|
849
|
-
opts['weight_order'] ||= false
|
850
|
-
opts['load_files'] ||= false
|
851
|
-
opts['allow_empty'] ||= false
|
852
|
-
|
853
|
-
# build request
|
854
|
-
|
855
|
-
# v.1.0 req
|
856
|
-
flags = 1
|
857
|
-
flags |= 2 if opts['exact_phrase']
|
858
|
-
flags |= 4 if opts['single_passage']
|
859
|
-
flags |= 8 if opts['use_boundaries']
|
860
|
-
flags |= 16 if opts['weight_order']
|
861
|
-
flags |= 32 if opts['query_mode']
|
862
|
-
flags |= 64 if opts['force_all_words']
|
863
|
-
flags |= 128 if opts['load_files']
|
864
|
-
flags |= 256 if opts['allow_empty']
|
865
|
-
|
866
|
-
request = Request.new
|
867
|
-
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
868
|
-
# req index
|
869
|
-
request.put_string index
|
870
|
-
# req words
|
871
|
-
request.put_string words
|
872
|
-
|
873
|
-
# options
|
874
|
-
request.put_string opts['before_match']
|
875
|
-
request.put_string opts['after_match']
|
876
|
-
request.put_string opts['chunk_separator']
|
877
|
-
request.put_int opts['limit'].to_i, opts['around'].to_i
|
878
|
-
|
879
|
-
# options v1.2
|
880
|
-
request.put_int opts['limit_passages'].to_i
|
881
|
-
request.put_int opts['limit_words'].to_i
|
882
|
-
request.put_int opts['start_passage_id'].to_i
|
883
|
-
request.put_string opts['html_strip_mode']
|
884
|
-
|
885
|
-
# documents
|
886
|
-
request.put_int docs.size
|
887
|
-
docs.each do |doc|
|
888
|
-
assert { doc.instance_of? String }
|
889
|
-
|
890
|
-
request.put_string doc
|
891
|
-
end
|
892
|
-
|
893
|
-
response = PerformRequest(:excerpt, request)
|
894
|
-
|
895
|
-
# parse response
|
896
|
-
begin
|
897
|
-
res = []
|
898
|
-
docs.each do |doc|
|
899
|
-
res << response.get_string
|
900
|
-
end
|
901
|
-
rescue EOFError
|
902
|
-
@error = 'incomplete reply'
|
903
|
-
raise SphinxResponseError, @error
|
904
|
-
end
|
905
|
-
return res
|
906
|
-
end
|
907
|
-
|
908
|
-
# Connect to searchd server, and generate keyword list for a given query.
|
909
|
-
#
|
910
|
-
# Returns an array of words on success.
|
911
|
-
def BuildKeywords(query, index, hits)
|
912
|
-
assert { query.instance_of? String }
|
913
|
-
assert { index.instance_of? String }
|
914
|
-
assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
|
915
|
-
|
916
|
-
# build request
|
917
|
-
request = Request.new
|
918
|
-
# v.1.0 req
|
919
|
-
request.put_string query # req query
|
920
|
-
request.put_string index # req index
|
921
|
-
request.put_int hits ? 1 : 0
|
922
|
-
|
923
|
-
response = PerformRequest(:keywords, request)
|
924
|
-
|
925
|
-
# parse response
|
926
|
-
begin
|
927
|
-
res = []
|
928
|
-
nwords = response.get_int
|
929
|
-
0.upto(nwords - 1) do |i|
|
930
|
-
tokenized = response.get_string
|
931
|
-
normalized = response.get_string
|
932
|
-
|
933
|
-
entry = { 'tokenized' => tokenized, 'normalized' => normalized }
|
934
|
-
entry['docs'], entry['hits'] = response.get_ints(2) if hits
|
935
|
-
|
936
|
-
res << entry
|
937
|
-
end
|
938
|
-
rescue EOFError
|
939
|
-
@error = 'incomplete reply'
|
940
|
-
raise SphinxResponseError, @error
|
941
|
-
end
|
942
|
-
|
943
|
-
return res
|
944
|
-
end
|
945
|
-
|
946
|
-
# Batch update given attributes in given rows in given indexes.
|
947
|
-
#
|
948
|
-
# * +index+ is a name of the index to be updated
|
949
|
-
# * +attrs+ is an array of attribute name strings.
|
950
|
-
# * +values+ is a hash where key is document id, and value is an array of
|
951
|
-
# * +mva+ identifies whether update MVA
|
952
|
-
# new attribute values
|
953
|
-
#
|
954
|
-
# Returns number of actually updated documents (0 or more) on success.
|
955
|
-
# Returns -1 on failure.
|
956
|
-
#
|
957
|
-
# Usage example:
|
958
|
-
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
|
959
|
-
def UpdateAttributes(index, attrs, values, mva = false)
|
960
|
-
# verify everything
|
961
|
-
assert { index.instance_of? String }
|
962
|
-
assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
|
963
|
-
|
964
|
-
assert { attrs.instance_of? Array }
|
965
|
-
attrs.each do |attr|
|
966
|
-
assert { attr.instance_of? String }
|
967
|
-
end
|
968
|
-
|
969
|
-
assert { values.instance_of? Hash }
|
970
|
-
values.each do |id, entry|
|
971
|
-
assert { id.instance_of? Fixnum }
|
972
|
-
assert { entry.instance_of? Array }
|
973
|
-
assert { entry.length == attrs.length }
|
974
|
-
entry.each do |v|
|
975
|
-
if mva
|
976
|
-
assert { v.instance_of? Array }
|
977
|
-
v.each { |vv| assert { vv.instance_of? Fixnum } }
|
978
|
-
else
|
979
|
-
assert { v.instance_of? Fixnum }
|
980
|
-
end
|
981
|
-
end
|
982
|
-
end
|
983
|
-
|
984
|
-
# build request
|
985
|
-
request = Request.new
|
986
|
-
request.put_string index
|
987
|
-
|
988
|
-
request.put_int attrs.length
|
989
|
-
for attr in attrs
|
990
|
-
request.put_string attr
|
991
|
-
request.put_int mva ? 1 : 0
|
992
|
-
end
|
993
|
-
|
994
|
-
request.put_int values.length
|
995
|
-
values.each do |id, entry|
|
996
|
-
request.put_int64 id
|
997
|
-
if mva
|
998
|
-
entry.each { |v| request.put_int_array v }
|
999
|
-
else
|
1000
|
-
request.put_int(*entry)
|
1001
|
-
end
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
response = PerformRequest(:update, request)
|
1005
|
-
|
1006
|
-
# parse response
|
1007
|
-
begin
|
1008
|
-
return response.get_int
|
1009
|
-
rescue EOFError
|
1010
|
-
@error = 'incomplete reply'
|
1011
|
-
raise SphinxResponseError, @error
|
1012
|
-
end
|
1013
|
-
end
|
1014
|
-
|
1015
|
-
protected
|
1016
|
-
|
1017
|
-
# Connect to searchd server.
|
1018
|
-
def Connect
|
1019
|
-
begin
|
1020
|
-
if @host[0,1]=='/'
|
1021
|
-
sock = UNIXSocket.new(@host)
|
1022
|
-
else
|
1023
|
-
sock = TCPSocket.new(@host, @port)
|
1024
|
-
end
|
1025
|
-
rescue => err
|
1026
|
-
@error = "connection to #{@host}:#{@port} failed (error=#{err})"
|
1027
|
-
raise SphinxConnectError, @error
|
1028
|
-
end
|
1029
|
-
|
1030
|
-
v = sock.recv(4).unpack('N*').first
|
1031
|
-
if v < 1
|
1032
|
-
sock.close
|
1033
|
-
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
1034
|
-
raise SphinxConnectError, @error
|
1035
|
-
end
|
1036
|
-
|
1037
|
-
sock.send([1].pack('N'), 0)
|
1038
|
-
sock
|
1039
|
-
end
|
1040
|
-
|
1041
|
-
# Get and check response packet from searchd server.
|
1042
|
-
def GetResponse(sock, client_version)
|
1043
|
-
response = ''
|
1044
|
-
len = 0
|
1045
|
-
|
1046
|
-
header = sock.recv(8)
|
1047
|
-
if header.length == 8
|
1048
|
-
status, ver, len = header.unpack('n2N')
|
1049
|
-
left = len.to_i
|
1050
|
-
while left > 0 do
|
1051
|
-
begin
|
1052
|
-
chunk = sock.recv(left)
|
1053
|
-
if chunk
|
1054
|
-
response << chunk
|
1055
|
-
left -= chunk.length
|
1056
|
-
end
|
1057
|
-
rescue EOFError
|
1058
|
-
break
|
1059
|
-
end
|
1060
|
-
end
|
1061
|
-
end
|
1062
|
-
sock.close
|
1063
|
-
|
1064
|
-
# check response
|
1065
|
-
read = response.length
|
1066
|
-
if response.empty? or read != len.to_i
|
1067
|
-
@error = response.empty? \
|
1068
|
-
? 'received zero-sized searchd response' \
|
1069
|
-
: "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
|
1070
|
-
raise SphinxResponseError, @error
|
1071
|
-
end
|
1072
|
-
|
1073
|
-
# check status
|
1074
|
-
if (status == SEARCHD_WARNING)
|
1075
|
-
wlen = response[0, 4].unpack('N*').first
|
1076
|
-
@warning = response[4, wlen]
|
1077
|
-
return response[4 + wlen, response.length - 4 - wlen]
|
1078
|
-
end
|
1079
|
-
|
1080
|
-
if status == SEARCHD_ERROR
|
1081
|
-
@error = 'searchd error: ' + response[4, response.length - 4]
|
1082
|
-
raise SphinxInternalError, @error
|
1083
|
-
end
|
1084
|
-
|
1085
|
-
if status == SEARCHD_RETRY
|
1086
|
-
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
1087
|
-
raise SphinxTemporaryError, @error
|
1088
|
-
end
|
1089
|
-
|
1090
|
-
unless status == SEARCHD_OK
|
1091
|
-
@error = "unknown status code: '#{status}'"
|
1092
|
-
raise SphinxUnknownError, @error
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
# check version
|
1096
|
-
if ver < client_version
|
1097
|
-
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
1098
|
-
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
return response
|
1102
|
-
end
|
1103
|
-
|
1104
|
-
# Connect, send query, get response.
|
1105
|
-
def PerformRequest(command, request, additional = nil)
|
1106
|
-
cmd = command.to_s.upcase
|
1107
|
-
command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
|
1108
|
-
command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
|
1109
|
-
|
1110
|
-
sock = self.Connect
|
1111
|
-
len = request.to_s.length + (additional != nil ? 8 : 0)
|
1112
|
-
header = [command_id, command_ver, len].pack('nnN')
|
1113
|
-
header << [0, additional].pack('NN') if additional != nil
|
1114
|
-
sock.send(header + request.to_s, 0)
|
1115
|
-
response = self.GetResponse(sock, command_ver)
|
1116
|
-
return Response.new(response)
|
1117
|
-
end
|
1118
|
-
|
1119
|
-
# :stopdoc:
|
1120
|
-
def assert
|
1121
|
-
raise 'Assertion failed!' unless yield if $DEBUG
|
1122
|
-
end
|
1123
|
-
# :startdoc:
|
1124
|
-
end
|
1125
|
-
end
|
1
|
+
# = client.rb - Sphinx Client API
|
2
|
+
#
|
3
|
+
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
+
# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
|
5
|
+
# License:: Distributes under the same terms as Ruby
|
6
|
+
# Version:: 0.9.9-r1299
|
7
|
+
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
+
#
|
9
|
+
# This library is distributed under the terms of the Ruby license.
|
10
|
+
# You can freely distribute/modify this library.
|
11
|
+
|
12
|
+
# ==Sphinx Client API
|
13
|
+
#
|
14
|
+
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
+
# daemon and get search results from Sphinx.
|
16
|
+
#
|
17
|
+
# ===Usage
|
18
|
+
#
|
19
|
+
# sphinx = Sphinx::Client.new
|
20
|
+
# result = sphinx.Query('test')
|
21
|
+
# ids = result['matches'].map { |match| match['id'] }.join(',')
|
22
|
+
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
+
#
|
24
|
+
# docs = posts.map(&:body)
|
25
|
+
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
+
|
27
|
+
require 'socket'
|
28
|
+
|
29
|
+
module Sphinx
|
30
|
+
# :stopdoc:
|
31
|
+
|
32
|
+
class SphinxError < StandardError; end
|
33
|
+
class SphinxArgumentError < SphinxError; end
|
34
|
+
class SphinxConnectError < SphinxError; end
|
35
|
+
class SphinxResponseError < SphinxError; end
|
36
|
+
class SphinxInternalError < SphinxError; end
|
37
|
+
class SphinxTemporaryError < SphinxError; end
|
38
|
+
class SphinxUnknownError < SphinxError; end
|
39
|
+
|
40
|
+
# :startdoc:
|
41
|
+
|
42
|
+
class Client
|
43
|
+
|
44
|
+
# :stopdoc:
|
45
|
+
|
46
|
+
# Known searchd commands
|
47
|
+
|
48
|
+
# search command
|
49
|
+
SEARCHD_COMMAND_SEARCH = 0
|
50
|
+
# excerpt command
|
51
|
+
SEARCHD_COMMAND_EXCERPT = 1
|
52
|
+
# update command
|
53
|
+
SEARCHD_COMMAND_UPDATE = 2
|
54
|
+
# keywords command
|
55
|
+
SEARCHD_COMMAND_KEYWORDS = 3
|
56
|
+
|
57
|
+
# Current client-side command implementation versions
|
58
|
+
|
59
|
+
# search command version
|
60
|
+
VER_COMMAND_SEARCH = 0x119
|
61
|
+
# excerpt command version
|
62
|
+
VER_COMMAND_EXCERPT = 0x102
|
63
|
+
# update command version
|
64
|
+
VER_COMMAND_UPDATE = 0x102
|
65
|
+
# keywords command version
|
66
|
+
VER_COMMAND_KEYWORDS = 0x100
|
67
|
+
|
68
|
+
# Known searchd status codes
|
69
|
+
|
70
|
+
# general success, command-specific reply follows
|
71
|
+
SEARCHD_OK = 0
|
72
|
+
# general failure, command-specific reply may follow
|
73
|
+
SEARCHD_ERROR = 1
|
74
|
+
# temporaty failure, client should retry later
|
75
|
+
SEARCHD_RETRY = 2
|
76
|
+
# general success, warning message and command-specific reply follow
|
77
|
+
SEARCHD_WARNING = 3
|
78
|
+
|
79
|
+
# :startdoc:
|
80
|
+
|
81
|
+
# Known match modes
|
82
|
+
|
83
|
+
# match all query words
|
84
|
+
SPH_MATCH_ALL = 0
|
85
|
+
# match any query word
|
86
|
+
SPH_MATCH_ANY = 1
|
87
|
+
# match this exact phrase
|
88
|
+
SPH_MATCH_PHRASE = 2
|
89
|
+
# match this boolean query
|
90
|
+
SPH_MATCH_BOOLEAN = 3
|
91
|
+
# match this extended query
|
92
|
+
SPH_MATCH_EXTENDED = 4
|
93
|
+
# match all document IDs w/o fulltext query, apply filters
|
94
|
+
SPH_MATCH_FULLSCAN = 5
|
95
|
+
# extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
|
96
|
+
SPH_MATCH_EXTENDED2 = 6
|
97
|
+
|
98
|
+
# Known ranking modes (ext2 only)
|
99
|
+
|
100
|
+
# default mode, phrase proximity major factor and BM25 minor one
|
101
|
+
SPH_RANK_PROXIMITY_BM25 = 0
|
102
|
+
# statistical mode, BM25 ranking only (faster but worse quality)
|
103
|
+
SPH_RANK_BM25 = 1
|
104
|
+
# no ranking, all matches get a weight of 1
|
105
|
+
SPH_RANK_NONE = 2
|
106
|
+
# simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
107
|
+
SPH_RANK_WORDCOUNT = 3
|
108
|
+
# phrase proximity
|
109
|
+
SPH_RANK_PROXIMITY = 4
|
110
|
+
|
111
|
+
# Known sort modes
|
112
|
+
|
113
|
+
# sort by document relevance desc, then by date
|
114
|
+
SPH_SORT_RELEVANCE = 0
|
115
|
+
# sort by document date desc, then by relevance desc
|
116
|
+
SPH_SORT_ATTR_DESC = 1
|
117
|
+
# sort by document date asc, then by relevance desc
|
118
|
+
SPH_SORT_ATTR_ASC = 2
|
119
|
+
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
120
|
+
SPH_SORT_TIME_SEGMENTS = 3
|
121
|
+
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
122
|
+
SPH_SORT_EXTENDED = 4
|
123
|
+
# sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
|
124
|
+
SPH_SORT_EXPR = 5
|
125
|
+
|
126
|
+
# Known filter types
|
127
|
+
|
128
|
+
# filter by integer values set
|
129
|
+
SPH_FILTER_VALUES = 0
|
130
|
+
# filter by integer range
|
131
|
+
SPH_FILTER_RANGE = 1
|
132
|
+
# filter by float range
|
133
|
+
SPH_FILTER_FLOATRANGE = 2
|
134
|
+
|
135
|
+
# Known attribute types
|
136
|
+
|
137
|
+
# this attr is just an integer
|
138
|
+
SPH_ATTR_INTEGER = 1
|
139
|
+
# this attr is a timestamp
|
140
|
+
SPH_ATTR_TIMESTAMP = 2
|
141
|
+
# this attr is an ordinal string number (integer at search time,
|
142
|
+
# specially handled at indexing time)
|
143
|
+
SPH_ATTR_ORDINAL = 3
|
144
|
+
# this attr is a boolean bit field
|
145
|
+
SPH_ATTR_BOOL = 4
|
146
|
+
# this attr is a float
|
147
|
+
SPH_ATTR_FLOAT = 5
|
148
|
+
# signed 64-bit integer
|
149
|
+
SPH_ATTR_BIGINT = 6
|
150
|
+
# string
|
151
|
+
SPH_ATTR_STRING = 7
|
152
|
+
# this attr has multiple values (0 or more)
|
153
|
+
SPH_ATTR_MULTI = 0x40000001
|
154
|
+
SPH_ATTR_MULTI64 = 0x40000002
|
155
|
+
|
156
|
+
# Known grouping functions
|
157
|
+
|
158
|
+
# group by day
|
159
|
+
SPH_GROUPBY_DAY = 0
|
160
|
+
# group by week
|
161
|
+
SPH_GROUPBY_WEEK = 1
|
162
|
+
# group by month
|
163
|
+
SPH_GROUPBY_MONTH = 2
|
164
|
+
# group by year
|
165
|
+
SPH_GROUPBY_YEAR = 3
|
166
|
+
# group by attribute value
|
167
|
+
SPH_GROUPBY_ATTR = 4
|
168
|
+
# group by sequential attrs pair
|
169
|
+
SPH_GROUPBY_ATTRPAIR = 5
|
170
|
+
|
171
|
+
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
172
|
+
def initialize
|
173
|
+
# per-client-object settings
|
174
|
+
@host = 'localhost' # searchd host (default is "localhost")
|
175
|
+
@port = 9312 # searchd port (default is 9312)
|
176
|
+
|
177
|
+
# per-query settings
|
178
|
+
@offset = 0 # how many records to seek from result-set start (default is 0)
|
179
|
+
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
180
|
+
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
181
|
+
@weights = [] # per-field weights (default is 1 for all fields)
|
182
|
+
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
183
|
+
@sortby = '' # attribute to sort by (defualt is "")
|
184
|
+
@min_id = 0 # min ID to match (default is 0, which means no limit)
|
185
|
+
@max_id = 0 # max ID to match (default is 0, which means no limit)
|
186
|
+
@filters = [] # search filters
|
187
|
+
@groupby = '' # group-by attribute name
|
188
|
+
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
189
|
+
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
190
|
+
@groupdistinct = '' # group-by count-distinct attribute
|
191
|
+
@maxmatches = 1000 # max matches to retrieve
|
192
|
+
@cutoff = 0 # cutoff to stop searching at (default is 0)
|
193
|
+
@retrycount = 0 # distributed retries count
|
194
|
+
@retrydelay = 0 # distributed retries delay
|
195
|
+
@anchor = [] # geographical anchor point
|
196
|
+
@indexweights = [] # per-index weights
|
197
|
+
@ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
198
|
+
@maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
|
199
|
+
@fieldweights = {} # per-field-name weights
|
200
|
+
@overrides = [] # per-query attribute values overrides
|
201
|
+
@select = '*' # select-list (attributes or expressions, with optional aliases)
|
202
|
+
|
203
|
+
# per-reply fields (for single-query case)
|
204
|
+
@error = '' # last error message
|
205
|
+
@warning = '' # last warning message
|
206
|
+
|
207
|
+
@reqs = [] # requests storage (for multi-query case)
|
208
|
+
@mbenc = '' # stored mbstring encoding
|
209
|
+
end
|
210
|
+
|
211
|
+
# Get last error message.
|
212
|
+
def GetLastError
|
213
|
+
@error
|
214
|
+
end
|
215
|
+
|
216
|
+
# Get last warning message.
|
217
|
+
def GetLastWarning
|
218
|
+
@warning
|
219
|
+
end
|
220
|
+
|
221
|
+
# Set searchd host name (string) and port (integer).
|
222
|
+
def SetServer(host, port)
|
223
|
+
assert { host.instance_of? String }
|
224
|
+
assert { port.instance_of? Fixnum }
|
225
|
+
|
226
|
+
@host = host
|
227
|
+
@port = port
|
228
|
+
end
|
229
|
+
|
230
|
+
# Set offset and count into result set,
|
231
|
+
# and optionally set max-matches and cutoff limits.
|
232
|
+
def SetLimits(offset, limit, max = 0, cutoff = 0)
|
233
|
+
assert { offset.instance_of? Fixnum }
|
234
|
+
assert { limit.instance_of? Fixnum }
|
235
|
+
assert { max.instance_of? Fixnum }
|
236
|
+
assert { offset >= 0 }
|
237
|
+
assert { limit > 0 }
|
238
|
+
assert { max >= 0 }
|
239
|
+
|
240
|
+
@offset = offset
|
241
|
+
@limit = limit
|
242
|
+
@maxmatches = max if max > 0
|
243
|
+
@cutoff = cutoff if cutoff > 0
|
244
|
+
end
|
245
|
+
|
246
|
+
# Set maximum query time, in milliseconds, per-index,
|
247
|
+
# integer, 0 means "do not limit"
|
248
|
+
def SetMaxQueryTime(max)
|
249
|
+
assert { max.instance_of? Fixnum }
|
250
|
+
assert { max >= 0 }
|
251
|
+
@maxquerytime = max
|
252
|
+
end
|
253
|
+
|
254
|
+
# Set matching mode.
|
255
|
+
def SetMatchMode(mode)
|
256
|
+
assert { mode == SPH_MATCH_ALL \
|
257
|
+
|| mode == SPH_MATCH_ANY \
|
258
|
+
|| mode == SPH_MATCH_PHRASE \
|
259
|
+
|| mode == SPH_MATCH_BOOLEAN \
|
260
|
+
|| mode == SPH_MATCH_EXTENDED \
|
261
|
+
|| mode == SPH_MATCH_FULLSCAN \
|
262
|
+
|| mode == SPH_MATCH_EXTENDED2 }
|
263
|
+
|
264
|
+
@mode = mode
|
265
|
+
end
|
266
|
+
|
267
|
+
# Set ranking mode.
|
268
|
+
def SetRankingMode(ranker)
|
269
|
+
assert { ranker == SPH_RANK_PROXIMITY_BM25 \
|
270
|
+
|| ranker == SPH_RANK_BM25 \
|
271
|
+
|| ranker == SPH_RANK_NONE \
|
272
|
+
|| ranker == SPH_RANK_WORDCOUNT \
|
273
|
+
|| ranker == SPH_RANK_PROXIMITY }
|
274
|
+
|
275
|
+
@ranker = ranker
|
276
|
+
end
|
277
|
+
|
278
|
+
# Set matches sorting mode.
|
279
|
+
def SetSortMode(mode, sortby = '')
|
280
|
+
assert { mode == SPH_SORT_RELEVANCE \
|
281
|
+
|| mode == SPH_SORT_ATTR_DESC \
|
282
|
+
|| mode == SPH_SORT_ATTR_ASC \
|
283
|
+
|| mode == SPH_SORT_TIME_SEGMENTS \
|
284
|
+
|| mode == SPH_SORT_EXTENDED \
|
285
|
+
|| mode == SPH_SORT_EXPR }
|
286
|
+
assert { sortby.instance_of? String }
|
287
|
+
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
288
|
+
|
289
|
+
@sort = mode
|
290
|
+
@sortby = sortby
|
291
|
+
end
|
292
|
+
|
293
|
+
# Bind per-field weights by order.
|
294
|
+
#
|
295
|
+
# DEPRECATED; use SetFieldWeights() instead.
|
296
|
+
def SetWeights(weights)
|
297
|
+
assert { weights.instance_of? Array }
|
298
|
+
weights.each do |weight|
|
299
|
+
assert { weight.instance_of? Fixnum }
|
300
|
+
end
|
301
|
+
|
302
|
+
@weights = weights
|
303
|
+
end
|
304
|
+
|
305
|
+
# Bind per-field weights by name.
|
306
|
+
#
|
307
|
+
# Takes string (field name) to integer name (field weight) hash as an argument.
|
308
|
+
# * Takes precedence over SetWeights().
|
309
|
+
# * Unknown names will be silently ignored.
|
310
|
+
# * Unbound fields will be silently given a weight of 1.
|
311
|
+
def SetFieldWeights(weights)
|
312
|
+
assert { weights.instance_of? Hash }
|
313
|
+
weights.each do |name, weight|
|
314
|
+
assert { name.instance_of? String }
|
315
|
+
assert { weight.instance_of? Fixnum }
|
316
|
+
end
|
317
|
+
|
318
|
+
@fieldweights = weights
|
319
|
+
end
|
320
|
+
|
321
|
+
# Bind per-index weights by name.
|
322
|
+
def SetIndexWeights(weights)
|
323
|
+
assert { weights.instance_of? Hash }
|
324
|
+
weights.each do |index, weight|
|
325
|
+
assert { index.instance_of? String }
|
326
|
+
assert { weight.instance_of? Fixnum }
|
327
|
+
end
|
328
|
+
|
329
|
+
@indexweights = weights
|
330
|
+
end
|
331
|
+
|
332
|
+
# Set IDs range to match.
|
333
|
+
#
|
334
|
+
# Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
|
335
|
+
def SetIDRange(min, max)
|
336
|
+
assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
|
337
|
+
assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
|
338
|
+
assert { min <= max }
|
339
|
+
|
340
|
+
@min_id = min
|
341
|
+
@max_id = max
|
342
|
+
end
|
343
|
+
|
344
|
+
# Set values filter.
|
345
|
+
#
|
346
|
+
# Only match those records where <tt>attribute</tt> column values
|
347
|
+
# are in specified set.
|
348
|
+
def SetFilter(attribute, values, exclude = false)
|
349
|
+
assert { attribute.instance_of? String }
|
350
|
+
assert { values.instance_of? Array }
|
351
|
+
assert { !values.empty? }
|
352
|
+
|
353
|
+
if values.instance_of?(Array) && values.size > 0
|
354
|
+
values.each do |value|
|
355
|
+
assert { value.instance_of? Fixnum }
|
356
|
+
end
|
357
|
+
|
358
|
+
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
# Set range filter.
|
363
|
+
#
|
364
|
+
# Only match those records where <tt>attribute</tt> column value
|
365
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
366
|
+
def SetFilterRange(attribute, min, max, exclude = false)
|
367
|
+
assert { attribute.instance_of? String }
|
368
|
+
assert { min.instance_of? Fixnum or min.instance_of? Bignum }
|
369
|
+
assert { max.instance_of? Fixnum or max.instance_of? Bignum }
|
370
|
+
assert { min <= max }
|
371
|
+
|
372
|
+
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
373
|
+
end
|
374
|
+
|
375
|
+
# Set float range filter.
|
376
|
+
#
|
377
|
+
# Only match those records where <tt>attribute</tt> column value
|
378
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
379
|
+
def SetFilterFloatRange(attribute, min, max, exclude = false)
|
380
|
+
assert { attribute.instance_of? String }
|
381
|
+
assert { min.instance_of? Float }
|
382
|
+
assert { max.instance_of? Float }
|
383
|
+
assert { min <= max }
|
384
|
+
|
385
|
+
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
386
|
+
end
|
387
|
+
|
388
|
+
# Setup anchor point for geosphere distance calculations.
|
389
|
+
#
|
390
|
+
# Required to use <tt>@geodist</tt> in filters and sorting
|
391
|
+
# distance will be computed to this point. Latitude and longitude
|
392
|
+
# must be in radians.
|
393
|
+
#
|
394
|
+
# * <tt>attrlat</tt> -- is the name of latitude attribute
|
395
|
+
# * <tt>attrlong</tt> -- is the name of longitude attribute
|
396
|
+
# * <tt>lat</tt> -- is anchor point latitude, in radians
|
397
|
+
# * <tt>long</tt> -- is anchor point longitude, in radians
|
398
|
+
def SetGeoAnchor(attrlat, attrlong, lat, long)
|
399
|
+
assert { attrlat.instance_of? String }
|
400
|
+
assert { attrlong.instance_of? String }
|
401
|
+
assert { lat.instance_of? Float }
|
402
|
+
assert { long.instance_of? Float }
|
403
|
+
|
404
|
+
@anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
|
405
|
+
end
|
406
|
+
|
407
|
+
# Set grouping attribute and function.
|
408
|
+
#
|
409
|
+
# In grouping mode, all matches are assigned to different groups
|
410
|
+
# based on grouping function value.
|
411
|
+
#
|
412
|
+
# Each group keeps track of the total match count, and the best match
|
413
|
+
# (in this group) according to current sorting function.
|
414
|
+
#
|
415
|
+
# The final result set contains one best match per group, with
|
416
|
+
# grouping function value and matches count attached.
|
417
|
+
#
|
418
|
+
# Groups in result set could be sorted by any sorting clause,
|
419
|
+
# including both document attributes and the following special
|
420
|
+
# internal Sphinx attributes:
|
421
|
+
#
|
422
|
+
# * @id - match document ID;
|
423
|
+
# * @weight, @rank, @relevance - match weight;
|
424
|
+
# * @group - groupby function value;
|
425
|
+
# * @count - amount of matches in group.
|
426
|
+
#
|
427
|
+
# the default mode is to sort by groupby value in descending order,
|
428
|
+
# ie. by '@group desc'.
|
429
|
+
#
|
430
|
+
# 'total_found' would contain total amount of matching groups over
|
431
|
+
# the whole index.
|
432
|
+
#
|
433
|
+
# WARNING: grouping is done in fixed memory and thus its results
|
434
|
+
# are only approximate; so there might be more groups reported
|
435
|
+
# in total_found than actually present. @count might also
|
436
|
+
# be underestimated.
|
437
|
+
#
|
438
|
+
# For example, if sorting by relevance and grouping by "published"
|
439
|
+
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
440
|
+
# contain one most relevant match per each day when there were any
|
441
|
+
# matches published, with day number and per-day match count attached,
|
442
|
+
# and sorted by day number in descending order (ie. recent days first).
|
443
|
+
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
444
|
+
assert { attribute.instance_of? String }
|
445
|
+
assert { groupsort.instance_of? String }
|
446
|
+
assert { func == SPH_GROUPBY_DAY \
|
447
|
+
|| func == SPH_GROUPBY_WEEK \
|
448
|
+
|| func == SPH_GROUPBY_MONTH \
|
449
|
+
|| func == SPH_GROUPBY_YEAR \
|
450
|
+
|| func == SPH_GROUPBY_ATTR \
|
451
|
+
|| func == SPH_GROUPBY_ATTRPAIR }
|
452
|
+
|
453
|
+
@groupby = attribute
|
454
|
+
@groupfunc = func
|
455
|
+
@groupsort = groupsort
|
456
|
+
end
|
457
|
+
|
458
|
+
# Set count-distinct attribute for group-by queries.
|
459
|
+
def SetGroupDistinct(attribute)
|
460
|
+
assert { attribute.instance_of? String }
|
461
|
+
@groupdistinct = attribute
|
462
|
+
end
|
463
|
+
|
464
|
+
# Set distributed retries count and delay.
|
465
|
+
def SetRetries(count, delay = 0)
|
466
|
+
assert { count.instance_of? Fixnum }
|
467
|
+
assert { delay.instance_of? Fixnum }
|
468
|
+
|
469
|
+
@retrycount = count
|
470
|
+
@retrydelay = delay
|
471
|
+
end
|
472
|
+
|
473
|
+
# Set attribute values override
|
474
|
+
#
|
475
|
+
# There can be only one override per attribute.
|
476
|
+
# +values+ must be a hash that maps document IDs to attribute values.
|
477
|
+
def SetOverride(attrname, attrtype, values)
|
478
|
+
assert { attrname.instance_of? String }
|
479
|
+
assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
|
480
|
+
assert { values.instance_of? Hash }
|
481
|
+
|
482
|
+
@overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
|
483
|
+
end
|
484
|
+
|
485
|
+
# Set select-list (attributes or expressions), SQL-like syntax.
|
486
|
+
def SetSelect(select)
|
487
|
+
assert { select.instance_of? String }
|
488
|
+
@select = select
|
489
|
+
end
|
490
|
+
|
491
|
+
# Clear all filters (for multi-queries).
|
492
|
+
def ResetFilters
|
493
|
+
@filters = []
|
494
|
+
@anchor = []
|
495
|
+
end
|
496
|
+
|
497
|
+
# Clear groupby settings (for multi-queries).
|
498
|
+
def ResetGroupBy
|
499
|
+
@groupby = ''
|
500
|
+
@groupfunc = SPH_GROUPBY_DAY
|
501
|
+
@groupsort = '@group desc'
|
502
|
+
@groupdistinct = ''
|
503
|
+
end
|
504
|
+
|
505
|
+
# Clear all attribute value overrides (for multi-queries).
|
506
|
+
def ResetOverrides
|
507
|
+
@overrides = []
|
508
|
+
end
|
509
|
+
|
510
|
+
# Connect to searchd server and run given search query.
|
511
|
+
#
|
512
|
+
# <tt>query</tt> is query string
|
513
|
+
|
514
|
+
# <tt>index</tt> is index name (or names) to query. default value is "*" which means
|
515
|
+
# to query all indexes. Accepted characters for index names are letters, numbers,
|
516
|
+
# dash, and underscore; everything else is considered a separator. Therefore,
|
517
|
+
# all the following calls are valid and will search two indexes:
|
518
|
+
#
|
519
|
+
# sphinx.Query('test query', 'main delta')
|
520
|
+
# sphinx.Query('test query', 'main;delta')
|
521
|
+
# sphinx.Query('test query', 'main, delta')
|
522
|
+
#
|
523
|
+
# Index order matters. If identical IDs are found in two or more indexes,
|
524
|
+
# weight and attribute values from the very last matching index will be used
|
525
|
+
# for sorting and returning to client. Therefore, in the example above,
|
526
|
+
# matches from "delta" index will always "win" over matches from "main".
|
527
|
+
#
|
528
|
+
# Returns false on failure.
|
529
|
+
# Returns hash which has the following keys on success:
|
530
|
+
#
|
531
|
+
# * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
|
532
|
+
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
533
|
+
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
534
|
+
# * <tt>'time'</tt> -- search time
|
535
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
536
|
+
def Query(query, index = '*', comment = '')
|
537
|
+
assert { @reqs.empty? }
|
538
|
+
@reqs = []
|
539
|
+
|
540
|
+
self.AddQuery(query, index, comment)
|
541
|
+
results = self.RunQueries
|
542
|
+
|
543
|
+
# probably network error; error message should be already filled
|
544
|
+
return false unless results.instance_of?(Array)
|
545
|
+
|
546
|
+
@error = results[0]['error']
|
547
|
+
@warning = results[0]['warning']
|
548
|
+
|
549
|
+
return false if results[0]['status'] == SEARCHD_ERROR
|
550
|
+
return results[0]
|
551
|
+
end
|
552
|
+
|
553
|
+
# Add query to batch.
|
554
|
+
#
|
555
|
+
# Batch queries enable searchd to perform internal optimizations,
|
556
|
+
# if possible; and reduce network connection overheads in all cases.
|
557
|
+
#
|
558
|
+
# For instance, running exactly the same query with different
|
559
|
+
# groupby settings will enable searched to perform expensive
|
560
|
+
# full-text search and ranking operation only once, but compute
|
561
|
+
# multiple groupby results from its output.
|
562
|
+
#
|
563
|
+
# Parameters are exactly the same as in <tt>Query</tt> call.
|
564
|
+
# Returns index to results array returned by <tt>RunQueries</tt> call.
|
565
|
+
def AddQuery(query, index = '*', comment = '')
|
566
|
+
# build request
|
567
|
+
|
568
|
+
# mode and limits
|
569
|
+
request = Request.new
|
570
|
+
request.put_int @offset, @limit, @mode, @ranker, @sort
|
571
|
+
request.put_string @sortby
|
572
|
+
# query itself
|
573
|
+
request.put_string query
|
574
|
+
# weights
|
575
|
+
request.put_int_array @weights
|
576
|
+
# indexes
|
577
|
+
request.put_string index
|
578
|
+
# id64 range marker
|
579
|
+
request.put_int 1
|
580
|
+
# id64 range
|
581
|
+
request.put_int64 @min_id.to_i, @max_id.to_i
|
582
|
+
|
583
|
+
# filters
|
584
|
+
request.put_int @filters.length
|
585
|
+
@filters.each do |filter|
|
586
|
+
request.put_string filter['attr']
|
587
|
+
request.put_int filter['type']
|
588
|
+
|
589
|
+
case filter['type']
|
590
|
+
when SPH_FILTER_VALUES
|
591
|
+
request.put_int64_array filter['values']
|
592
|
+
when SPH_FILTER_RANGE
|
593
|
+
request.put_int64 filter['min'], filter['max']
|
594
|
+
when SPH_FILTER_FLOATRANGE
|
595
|
+
request.put_float filter['min'], filter['max']
|
596
|
+
else
|
597
|
+
raise SphinxInternalError, 'Internal error: unhandled filter type'
|
598
|
+
end
|
599
|
+
request.put_int filter['exclude'] ? 1 : 0
|
600
|
+
end
|
601
|
+
|
602
|
+
# group-by clause, max-matches count, group-sort clause, cutoff count
|
603
|
+
request.put_int @groupfunc
|
604
|
+
request.put_string @groupby
|
605
|
+
request.put_int @maxmatches
|
606
|
+
request.put_string @groupsort
|
607
|
+
request.put_int @cutoff, @retrycount, @retrydelay
|
608
|
+
request.put_string @groupdistinct
|
609
|
+
|
610
|
+
# anchor point
|
611
|
+
if @anchor.empty?
|
612
|
+
request.put_int 0
|
613
|
+
else
|
614
|
+
request.put_int 1
|
615
|
+
request.put_string @anchor['attrlat'], @anchor['attrlong']
|
616
|
+
request.put_float @anchor['lat'], @anchor['long']
|
617
|
+
end
|
618
|
+
|
619
|
+
# per-index weights
|
620
|
+
request.put_int @indexweights.length
|
621
|
+
@indexweights.each do |idx, weight|
|
622
|
+
request.put_string idx
|
623
|
+
request.put_int weight
|
624
|
+
end
|
625
|
+
|
626
|
+
# max query time
|
627
|
+
request.put_int @maxquerytime
|
628
|
+
|
629
|
+
# per-field weights
|
630
|
+
request.put_int @fieldweights.length
|
631
|
+
@fieldweights.each do |field, weight|
|
632
|
+
request.put_string field
|
633
|
+
request.put_int weight
|
634
|
+
end
|
635
|
+
|
636
|
+
# comment
|
637
|
+
request.put_string comment
|
638
|
+
|
639
|
+
# attribute overrides
|
640
|
+
request.put_int @overrides.length
|
641
|
+
for entry in @overrides do
|
642
|
+
request.put_string entry['attr']
|
643
|
+
request.put_int entry['type'], entry['values'].size
|
644
|
+
entry['values'].each do |id, val|
|
645
|
+
assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
|
646
|
+
assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
|
647
|
+
|
648
|
+
request.put_int64 id
|
649
|
+
case entry['type']
|
650
|
+
when SPH_ATTR_FLOAT
|
651
|
+
request.put_float val
|
652
|
+
when SPH_ATTR_BIGINT
|
653
|
+
request.put_int64 val
|
654
|
+
else
|
655
|
+
request.put_int val
|
656
|
+
end
|
657
|
+
end
|
658
|
+
end
|
659
|
+
|
660
|
+
# select-list
|
661
|
+
request.put_string @select
|
662
|
+
|
663
|
+
# store request to requests array
|
664
|
+
@reqs << request.to_s;
|
665
|
+
return @reqs.length - 1
|
666
|
+
end
|
667
|
+
|
668
|
+
# Run queries batch.
|
669
|
+
#
|
670
|
+
# Returns an array of result sets on success.
|
671
|
+
# Returns false on network IO failure.
|
672
|
+
#
|
673
|
+
# Each result set in returned array is a hash which containts
|
674
|
+
# the same keys as the hash returned by <tt>Query</tt>, plus:
|
675
|
+
#
|
676
|
+
# * <tt>'error'</tt> -- search error for this query
|
677
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
678
|
+
def RunQueries
|
679
|
+
if @reqs.empty?
|
680
|
+
@error = 'No queries defined, issue AddQuery() first'
|
681
|
+
return false
|
682
|
+
end
|
683
|
+
|
684
|
+
req = @reqs.join('')
|
685
|
+
nreqs = @reqs.length
|
686
|
+
@reqs = []
|
687
|
+
response = PerformRequest(:search, req, nreqs)
|
688
|
+
|
689
|
+
# parse response
|
690
|
+
begin
|
691
|
+
results = []
|
692
|
+
ires = 0
|
693
|
+
while ires < nreqs
|
694
|
+
ires += 1
|
695
|
+
result = {}
|
696
|
+
|
697
|
+
result['error'] = ''
|
698
|
+
result['warning'] = ''
|
699
|
+
|
700
|
+
# extract status
|
701
|
+
status = result['status'] = response.get_int
|
702
|
+
if status != SEARCHD_OK
|
703
|
+
message = response.get_string
|
704
|
+
if status == SEARCHD_WARNING
|
705
|
+
result['warning'] = message
|
706
|
+
else
|
707
|
+
result['error'] = message
|
708
|
+
results << result
|
709
|
+
next
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
# read schema
|
714
|
+
fields = []
|
715
|
+
attrs = {}
|
716
|
+
attrs_names_in_order = []
|
717
|
+
|
718
|
+
nfields = response.get_int
|
719
|
+
while nfields > 0
|
720
|
+
nfields -= 1
|
721
|
+
fields << response.get_string
|
722
|
+
end
|
723
|
+
result['fields'] = fields
|
724
|
+
|
725
|
+
nattrs = response.get_int
|
726
|
+
while nattrs > 0
|
727
|
+
nattrs -= 1
|
728
|
+
attr = response.get_string
|
729
|
+
type = response.get_int
|
730
|
+
attrs[attr] = type
|
731
|
+
attrs_names_in_order << attr
|
732
|
+
end
|
733
|
+
result['attrs'] = attrs
|
734
|
+
|
735
|
+
# read match count
|
736
|
+
count = response.get_int
|
737
|
+
id64 = response.get_int
|
738
|
+
|
739
|
+
# read matches
|
740
|
+
result['matches'] = []
|
741
|
+
while count > 0
|
742
|
+
count -= 1
|
743
|
+
|
744
|
+
if id64 != 0
|
745
|
+
doc = response.get_int64
|
746
|
+
weight = response.get_int
|
747
|
+
else
|
748
|
+
doc, weight = response.get_ints(2)
|
749
|
+
end
|
750
|
+
|
751
|
+
r = {} # This is a single result put in the result['matches'] array
|
752
|
+
r['id'] = doc
|
753
|
+
r['weight'] = weight
|
754
|
+
attrs_names_in_order.each do |a|
|
755
|
+
r['attrs'] ||= {}
|
756
|
+
|
757
|
+
case attrs[a]
|
758
|
+
when SPH_ATTR_BIGINT
|
759
|
+
# handle 64-bit ints
|
760
|
+
r['attrs'][a] = response.get_int64
|
761
|
+
when SPH_ATTR_FLOAT
|
762
|
+
# handle floats
|
763
|
+
r['attrs'][a] = response.get_float
|
764
|
+
when SPH_ATTR_STRING
|
765
|
+
# handle string
|
766
|
+
r['attrs'][a] = response.get_string
|
767
|
+
else
|
768
|
+
# handle everything else as unsigned ints
|
769
|
+
val = response.get_int
|
770
|
+
if attrs[a]==SPH_ATTR_MULTI
|
771
|
+
r['attrs'][a] = []
|
772
|
+
1.upto(val) do
|
773
|
+
r['attrs'][a] << response.get_int
|
774
|
+
end
|
775
|
+
elsif attrs[a]==SPH_ATTR_MULTI64
|
776
|
+
r['attrs'][a] = []
|
777
|
+
val = val/2
|
778
|
+
1.upto(val) do
|
779
|
+
r['attrs'][a] << response.get_int64
|
780
|
+
end
|
781
|
+
else
|
782
|
+
r['attrs'][a] = val
|
783
|
+
end
|
784
|
+
end
|
785
|
+
end
|
786
|
+
result['matches'] << r
|
787
|
+
end
|
788
|
+
result['total'], result['total_found'], msecs, words = response.get_ints(4)
|
789
|
+
result['time'] = '%.3f' % (msecs / 1000.0)
|
790
|
+
|
791
|
+
result['words'] = {}
|
792
|
+
while words > 0
|
793
|
+
words -= 1
|
794
|
+
word = response.get_string
|
795
|
+
docs, hits = response.get_ints(2)
|
796
|
+
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
797
|
+
end
|
798
|
+
|
799
|
+
results << result
|
800
|
+
end
|
801
|
+
#rescue EOFError
|
802
|
+
# @error = 'incomplete reply'
|
803
|
+
# raise SphinxResponseError, @error
|
804
|
+
end
|
805
|
+
|
806
|
+
return results
|
807
|
+
end
|
808
|
+
|
809
|
+
# Connect to searchd server and generate exceprts from given documents.
|
810
|
+
#
|
811
|
+
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
812
|
+
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
813
|
+
# for stemming, lexing and case folding
|
814
|
+
# * <tt>words</tt> -- a string which contains the words to highlight
|
815
|
+
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
816
|
+
#
|
817
|
+
# You can use following parameters:
|
818
|
+
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
819
|
+
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
820
|
+
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
821
|
+
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
822
|
+
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
823
|
+
# * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
|
824
|
+
# * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
|
825
|
+
# * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
|
826
|
+
# * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
|
827
|
+
#
|
828
|
+
# Returns false on failure.
|
829
|
+
# Returns an array of string excerpts on success.
|
830
|
+
def BuildExcerpts(docs, index, words, opts = {})
|
831
|
+
assert { docs.instance_of? Array }
|
832
|
+
assert { index.instance_of? String }
|
833
|
+
assert { words.instance_of? String }
|
834
|
+
assert { opts.instance_of? Hash }
|
835
|
+
|
836
|
+
# fixup options
|
837
|
+
opts['before_match'] ||= '<b>';
|
838
|
+
opts['after_match'] ||= '</b>';
|
839
|
+
opts['chunk_separator'] ||= ' ... ';
|
840
|
+
opts['html_strip_mode'] ||= 'index';
|
841
|
+
opts['limit'] ||= 256;
|
842
|
+
opts['limit_passages'] ||= 0;
|
843
|
+
opts['limit_words'] ||= 0;
|
844
|
+
opts['around'] ||= 5;
|
845
|
+
opts['start_passage_id'] ||= 1;
|
846
|
+
opts['exact_phrase'] ||= false
|
847
|
+
opts['single_passage'] ||= false
|
848
|
+
opts['use_boundaries'] ||= false
|
849
|
+
opts['weight_order'] ||= false
|
850
|
+
opts['load_files'] ||= false
|
851
|
+
opts['allow_empty'] ||= false
|
852
|
+
|
853
|
+
# build request
|
854
|
+
|
855
|
+
# v.1.0 req
|
856
|
+
flags = 1
|
857
|
+
flags |= 2 if opts['exact_phrase']
|
858
|
+
flags |= 4 if opts['single_passage']
|
859
|
+
flags |= 8 if opts['use_boundaries']
|
860
|
+
flags |= 16 if opts['weight_order']
|
861
|
+
flags |= 32 if opts['query_mode']
|
862
|
+
flags |= 64 if opts['force_all_words']
|
863
|
+
flags |= 128 if opts['load_files']
|
864
|
+
flags |= 256 if opts['allow_empty']
|
865
|
+
|
866
|
+
request = Request.new
|
867
|
+
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
868
|
+
# req index
|
869
|
+
request.put_string index
|
870
|
+
# req words
|
871
|
+
request.put_string words
|
872
|
+
|
873
|
+
# options
|
874
|
+
request.put_string opts['before_match']
|
875
|
+
request.put_string opts['after_match']
|
876
|
+
request.put_string opts['chunk_separator']
|
877
|
+
request.put_int opts['limit'].to_i, opts['around'].to_i
|
878
|
+
|
879
|
+
# options v1.2
|
880
|
+
request.put_int opts['limit_passages'].to_i
|
881
|
+
request.put_int opts['limit_words'].to_i
|
882
|
+
request.put_int opts['start_passage_id'].to_i
|
883
|
+
request.put_string opts['html_strip_mode']
|
884
|
+
|
885
|
+
# documents
|
886
|
+
request.put_int docs.size
|
887
|
+
docs.each do |doc|
|
888
|
+
assert { doc.instance_of? String }
|
889
|
+
|
890
|
+
request.put_string doc
|
891
|
+
end
|
892
|
+
|
893
|
+
response = PerformRequest(:excerpt, request)
|
894
|
+
|
895
|
+
# parse response
|
896
|
+
begin
|
897
|
+
res = []
|
898
|
+
docs.each do |doc|
|
899
|
+
res << response.get_string
|
900
|
+
end
|
901
|
+
rescue EOFError
|
902
|
+
@error = 'incomplete reply'
|
903
|
+
raise SphinxResponseError, @error
|
904
|
+
end
|
905
|
+
return res
|
906
|
+
end
|
907
|
+
|
908
|
+
# Connect to searchd server, and generate keyword list for a given query.
|
909
|
+
#
|
910
|
+
# Returns an array of words on success.
|
911
|
+
def BuildKeywords(query, index, hits)
|
912
|
+
assert { query.instance_of? String }
|
913
|
+
assert { index.instance_of? String }
|
914
|
+
assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
|
915
|
+
|
916
|
+
# build request
|
917
|
+
request = Request.new
|
918
|
+
# v.1.0 req
|
919
|
+
request.put_string query # req query
|
920
|
+
request.put_string index # req index
|
921
|
+
request.put_int hits ? 1 : 0
|
922
|
+
|
923
|
+
response = PerformRequest(:keywords, request)
|
924
|
+
|
925
|
+
# parse response
|
926
|
+
begin
|
927
|
+
res = []
|
928
|
+
nwords = response.get_int
|
929
|
+
0.upto(nwords - 1) do |i|
|
930
|
+
tokenized = response.get_string
|
931
|
+
normalized = response.get_string
|
932
|
+
|
933
|
+
entry = { 'tokenized' => tokenized, 'normalized' => normalized }
|
934
|
+
entry['docs'], entry['hits'] = response.get_ints(2) if hits
|
935
|
+
|
936
|
+
res << entry
|
937
|
+
end
|
938
|
+
rescue EOFError
|
939
|
+
@error = 'incomplete reply'
|
940
|
+
raise SphinxResponseError, @error
|
941
|
+
end
|
942
|
+
|
943
|
+
return res
|
944
|
+
end
|
945
|
+
|
946
|
+
# Batch update given attributes in given rows in given indexes.
|
947
|
+
#
|
948
|
+
# * +index+ is a name of the index to be updated
|
949
|
+
# * +attrs+ is an array of attribute name strings.
|
950
|
+
# * +values+ is a hash where key is document id, and value is an array of
|
951
|
+
# * +mva+ identifies whether update MVA
|
952
|
+
# new attribute values
|
953
|
+
#
|
954
|
+
# Returns number of actually updated documents (0 or more) on success.
|
955
|
+
# Returns -1 on failure.
|
956
|
+
#
|
957
|
+
# Usage example:
|
958
|
+
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
|
959
|
+
def UpdateAttributes(index, attrs, values, mva = false)
|
960
|
+
# verify everything
|
961
|
+
assert { index.instance_of? String }
|
962
|
+
assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
|
963
|
+
|
964
|
+
assert { attrs.instance_of? Array }
|
965
|
+
attrs.each do |attr|
|
966
|
+
assert { attr.instance_of? String }
|
967
|
+
end
|
968
|
+
|
969
|
+
assert { values.instance_of? Hash }
|
970
|
+
values.each do |id, entry|
|
971
|
+
assert { id.instance_of? Fixnum }
|
972
|
+
assert { entry.instance_of? Array }
|
973
|
+
assert { entry.length == attrs.length }
|
974
|
+
entry.each do |v|
|
975
|
+
if mva
|
976
|
+
assert { v.instance_of? Array }
|
977
|
+
v.each { |vv| assert { vv.instance_of? Fixnum } }
|
978
|
+
else
|
979
|
+
assert { v.instance_of? Fixnum }
|
980
|
+
end
|
981
|
+
end
|
982
|
+
end
|
983
|
+
|
984
|
+
# build request
|
985
|
+
request = Request.new
|
986
|
+
request.put_string index
|
987
|
+
|
988
|
+
request.put_int attrs.length
|
989
|
+
for attr in attrs
|
990
|
+
request.put_string attr
|
991
|
+
request.put_int mva ? 1 : 0
|
992
|
+
end
|
993
|
+
|
994
|
+
request.put_int values.length
|
995
|
+
values.each do |id, entry|
|
996
|
+
request.put_int64 id
|
997
|
+
if mva
|
998
|
+
entry.each { |v| request.put_int_array v }
|
999
|
+
else
|
1000
|
+
request.put_int(*entry)
|
1001
|
+
end
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
response = PerformRequest(:update, request)
|
1005
|
+
|
1006
|
+
# parse response
|
1007
|
+
begin
|
1008
|
+
return response.get_int
|
1009
|
+
rescue EOFError
|
1010
|
+
@error = 'incomplete reply'
|
1011
|
+
raise SphinxResponseError, @error
|
1012
|
+
end
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
protected
|
1016
|
+
|
1017
|
+
# Connect to searchd server.
|
1018
|
+
def Connect
|
1019
|
+
begin
|
1020
|
+
if @host[0,1]=='/'
|
1021
|
+
sock = UNIXSocket.new(@host)
|
1022
|
+
else
|
1023
|
+
sock = TCPSocket.new(@host, @port)
|
1024
|
+
end
|
1025
|
+
rescue => err
|
1026
|
+
@error = "connection to #{@host}:#{@port} failed (error=#{err})"
|
1027
|
+
raise SphinxConnectError, @error
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
v = sock.recv(4).unpack('N*').first
|
1031
|
+
if v < 1
|
1032
|
+
sock.close
|
1033
|
+
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
1034
|
+
raise SphinxConnectError, @error
|
1035
|
+
end
|
1036
|
+
|
1037
|
+
sock.send([1].pack('N'), 0)
|
1038
|
+
sock
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
# Get and check response packet from searchd server.
|
1042
|
+
def GetResponse(sock, client_version)
|
1043
|
+
response = ''
|
1044
|
+
len = 0
|
1045
|
+
|
1046
|
+
header = sock.recv(8)
|
1047
|
+
if header.length == 8
|
1048
|
+
status, ver, len = header.unpack('n2N')
|
1049
|
+
left = len.to_i
|
1050
|
+
while left > 0 do
|
1051
|
+
begin
|
1052
|
+
chunk = sock.recv(left)
|
1053
|
+
if chunk
|
1054
|
+
response << chunk
|
1055
|
+
left -= chunk.length
|
1056
|
+
end
|
1057
|
+
rescue EOFError
|
1058
|
+
break
|
1059
|
+
end
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
sock.close
|
1063
|
+
|
1064
|
+
# check response
|
1065
|
+
read = response.length
|
1066
|
+
if response.empty? or read != len.to_i
|
1067
|
+
@error = response.empty? \
|
1068
|
+
? 'received zero-sized searchd response' \
|
1069
|
+
: "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
|
1070
|
+
raise SphinxResponseError, @error
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
# check status
|
1074
|
+
if (status == SEARCHD_WARNING)
|
1075
|
+
wlen = response[0, 4].unpack('N*').first
|
1076
|
+
@warning = response[4, wlen]
|
1077
|
+
return response[4 + wlen, response.length - 4 - wlen]
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
if status == SEARCHD_ERROR
|
1081
|
+
@error = 'searchd error: ' + response[4, response.length - 4]
|
1082
|
+
raise SphinxInternalError, @error
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
if status == SEARCHD_RETRY
|
1086
|
+
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
1087
|
+
raise SphinxTemporaryError, @error
|
1088
|
+
end
|
1089
|
+
|
1090
|
+
unless status == SEARCHD_OK
|
1091
|
+
@error = "unknown status code: '#{status}'"
|
1092
|
+
raise SphinxUnknownError, @error
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
# check version
|
1096
|
+
if ver < client_version
|
1097
|
+
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
1098
|
+
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
return response
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
# Connect, send query, get response.
|
1105
|
+
def PerformRequest(command, request, additional = nil)
|
1106
|
+
cmd = command.to_s.upcase
|
1107
|
+
command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
|
1108
|
+
command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
|
1109
|
+
|
1110
|
+
sock = self.Connect
|
1111
|
+
len = request.to_s.length + (additional != nil ? 8 : 0)
|
1112
|
+
header = [command_id, command_ver, len].pack('nnN')
|
1113
|
+
header << [0, additional].pack('NN') if additional != nil
|
1114
|
+
sock.send(header + request.to_s, 0)
|
1115
|
+
response = self.GetResponse(sock, command_ver)
|
1116
|
+
return Response.new(response)
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
# :stopdoc:
|
1120
|
+
def assert
|
1121
|
+
raise 'Assertion failed!' unless yield if $DEBUG
|
1122
|
+
end
|
1123
|
+
# :startdoc:
|
1124
|
+
end
|
1125
|
+
end
|