zinx 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/sphinx/sphinx.rb +5 -5
- data/lib/sphinx/sphinx/client.rb +1125 -1125
- data/lib/sphinx/sphinx/request.rb +50 -50
- data/lib/sphinx/sphinx/response.rb +68 -68
- data/lib/zinx.rb +271 -271
- data/test/test.rb +11 -11
- metadata +4 -4
data/lib/sphinx/sphinx.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require File.dirname(__FILE__) + '/sphinx/request'
|
2
|
-
require File.dirname(__FILE__) + '/sphinx/response'
|
3
|
-
require File.dirname(__FILE__) + '/sphinx/client'
|
4
|
-
|
5
|
-
module Sphinx
|
1
|
+
require File.dirname(__FILE__) + '/sphinx/request'
|
2
|
+
require File.dirname(__FILE__) + '/sphinx/response'
|
3
|
+
require File.dirname(__FILE__) + '/sphinx/client'
|
4
|
+
|
5
|
+
module Sphinx
|
6
6
|
end
|
data/lib/sphinx/sphinx/client.rb
CHANGED
@@ -1,1125 +1,1125 @@
|
|
1
|
-
# = client.rb - Sphinx Client API
|
2
|
-
#
|
3
|
-
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
-
# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
|
5
|
-
# License:: Distributes under the same terms as Ruby
|
6
|
-
# Version:: 0.9.9-r1299
|
7
|
-
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
-
#
|
9
|
-
# This library is distributed under the terms of the Ruby license.
|
10
|
-
# You can freely distribute/modify this library.
|
11
|
-
|
12
|
-
# ==Sphinx Client API
|
13
|
-
#
|
14
|
-
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
-
# daemon and get search results from Sphinx.
|
16
|
-
#
|
17
|
-
# ===Usage
|
18
|
-
#
|
19
|
-
# sphinx = Sphinx::Client.new
|
20
|
-
# result = sphinx.Query('test')
|
21
|
-
# ids = result['matches'].map { |match| match['id'] }.join(',')
|
22
|
-
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
-
#
|
24
|
-
# docs = posts.map(&:body)
|
25
|
-
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
-
|
27
|
-
require 'socket'
|
28
|
-
|
29
|
-
module Sphinx
|
30
|
-
# :stopdoc:
|
31
|
-
|
32
|
-
class SphinxError < StandardError; end
|
33
|
-
class SphinxArgumentError < SphinxError; end
|
34
|
-
class SphinxConnectError < SphinxError; end
|
35
|
-
class SphinxResponseError < SphinxError; end
|
36
|
-
class SphinxInternalError < SphinxError; end
|
37
|
-
class SphinxTemporaryError < SphinxError; end
|
38
|
-
class SphinxUnknownError < SphinxError; end
|
39
|
-
|
40
|
-
# :startdoc:
|
41
|
-
|
42
|
-
class Client
|
43
|
-
|
44
|
-
# :stopdoc:
|
45
|
-
|
46
|
-
# Known searchd commands
|
47
|
-
|
48
|
-
# search command
|
49
|
-
SEARCHD_COMMAND_SEARCH = 0
|
50
|
-
# excerpt command
|
51
|
-
SEARCHD_COMMAND_EXCERPT = 1
|
52
|
-
# update command
|
53
|
-
SEARCHD_COMMAND_UPDATE = 2
|
54
|
-
# keywords command
|
55
|
-
SEARCHD_COMMAND_KEYWORDS = 3
|
56
|
-
|
57
|
-
# Current client-side command implementation versions
|
58
|
-
|
59
|
-
# search command version
|
60
|
-
VER_COMMAND_SEARCH = 0x119
|
61
|
-
# excerpt command version
|
62
|
-
VER_COMMAND_EXCERPT = 0x102
|
63
|
-
# update command version
|
64
|
-
VER_COMMAND_UPDATE = 0x102
|
65
|
-
# keywords command version
|
66
|
-
VER_COMMAND_KEYWORDS = 0x100
|
67
|
-
|
68
|
-
# Known searchd status codes
|
69
|
-
|
70
|
-
# general success, command-specific reply follows
|
71
|
-
SEARCHD_OK = 0
|
72
|
-
# general failure, command-specific reply may follow
|
73
|
-
SEARCHD_ERROR = 1
|
74
|
-
# temporaty failure, client should retry later
|
75
|
-
SEARCHD_RETRY = 2
|
76
|
-
# general success, warning message and command-specific reply follow
|
77
|
-
SEARCHD_WARNING = 3
|
78
|
-
|
79
|
-
# :startdoc:
|
80
|
-
|
81
|
-
# Known match modes
|
82
|
-
|
83
|
-
# match all query words
|
84
|
-
SPH_MATCH_ALL = 0
|
85
|
-
# match any query word
|
86
|
-
SPH_MATCH_ANY = 1
|
87
|
-
# match this exact phrase
|
88
|
-
SPH_MATCH_PHRASE = 2
|
89
|
-
# match this boolean query
|
90
|
-
SPH_MATCH_BOOLEAN = 3
|
91
|
-
# match this extended query
|
92
|
-
SPH_MATCH_EXTENDED = 4
|
93
|
-
# match all document IDs w/o fulltext query, apply filters
|
94
|
-
SPH_MATCH_FULLSCAN = 5
|
95
|
-
# extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
|
96
|
-
SPH_MATCH_EXTENDED2 = 6
|
97
|
-
|
98
|
-
# Known ranking modes (ext2 only)
|
99
|
-
|
100
|
-
# default mode, phrase proximity major factor and BM25 minor one
|
101
|
-
SPH_RANK_PROXIMITY_BM25 = 0
|
102
|
-
# statistical mode, BM25 ranking only (faster but worse quality)
|
103
|
-
SPH_RANK_BM25 = 1
|
104
|
-
# no ranking, all matches get a weight of 1
|
105
|
-
SPH_RANK_NONE = 2
|
106
|
-
# simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
107
|
-
SPH_RANK_WORDCOUNT = 3
|
108
|
-
# phrase proximity
|
109
|
-
SPH_RANK_PROXIMITY = 4
|
110
|
-
|
111
|
-
# Known sort modes
|
112
|
-
|
113
|
-
# sort by document relevance desc, then by date
|
114
|
-
SPH_SORT_RELEVANCE = 0
|
115
|
-
# sort by document date desc, then by relevance desc
|
116
|
-
SPH_SORT_ATTR_DESC = 1
|
117
|
-
# sort by document date asc, then by relevance desc
|
118
|
-
SPH_SORT_ATTR_ASC = 2
|
119
|
-
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
120
|
-
SPH_SORT_TIME_SEGMENTS = 3
|
121
|
-
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
122
|
-
SPH_SORT_EXTENDED = 4
|
123
|
-
# sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
|
124
|
-
SPH_SORT_EXPR = 5
|
125
|
-
|
126
|
-
# Known filter types
|
127
|
-
|
128
|
-
# filter by integer values set
|
129
|
-
SPH_FILTER_VALUES = 0
|
130
|
-
# filter by integer range
|
131
|
-
SPH_FILTER_RANGE = 1
|
132
|
-
# filter by float range
|
133
|
-
SPH_FILTER_FLOATRANGE = 2
|
134
|
-
|
135
|
-
# Known attribute types
|
136
|
-
|
137
|
-
# this attr is just an integer
|
138
|
-
SPH_ATTR_INTEGER = 1
|
139
|
-
# this attr is a timestamp
|
140
|
-
SPH_ATTR_TIMESTAMP = 2
|
141
|
-
# this attr is an ordinal string number (integer at search time,
|
142
|
-
# specially handled at indexing time)
|
143
|
-
SPH_ATTR_ORDINAL = 3
|
144
|
-
# this attr is a boolean bit field
|
145
|
-
SPH_ATTR_BOOL = 4
|
146
|
-
# this attr is a float
|
147
|
-
SPH_ATTR_FLOAT = 5
|
148
|
-
# signed 64-bit integer
|
149
|
-
SPH_ATTR_BIGINT = 6
|
150
|
-
# string
|
151
|
-
SPH_ATTR_STRING = 7
|
152
|
-
# this attr has multiple values (0 or more)
|
153
|
-
SPH_ATTR_MULTI = 0x40000001
|
154
|
-
SPH_ATTR_MULTI64 = 0x40000002
|
155
|
-
|
156
|
-
# Known grouping functions
|
157
|
-
|
158
|
-
# group by day
|
159
|
-
SPH_GROUPBY_DAY = 0
|
160
|
-
# group by week
|
161
|
-
SPH_GROUPBY_WEEK = 1
|
162
|
-
# group by month
|
163
|
-
SPH_GROUPBY_MONTH = 2
|
164
|
-
# group by year
|
165
|
-
SPH_GROUPBY_YEAR = 3
|
166
|
-
# group by attribute value
|
167
|
-
SPH_GROUPBY_ATTR = 4
|
168
|
-
# group by sequential attrs pair
|
169
|
-
SPH_GROUPBY_ATTRPAIR = 5
|
170
|
-
|
171
|
-
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
172
|
-
def initialize
|
173
|
-
# per-client-object settings
|
174
|
-
@host = 'localhost' # searchd host (default is "localhost")
|
175
|
-
@port = 9312 # searchd port (default is 9312)
|
176
|
-
|
177
|
-
# per-query settings
|
178
|
-
@offset = 0 # how many records to seek from result-set start (default is 0)
|
179
|
-
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
180
|
-
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
181
|
-
@weights = [] # per-field weights (default is 1 for all fields)
|
182
|
-
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
183
|
-
@sortby = '' # attribute to sort by (defualt is "")
|
184
|
-
@min_id = 0 # min ID to match (default is 0, which means no limit)
|
185
|
-
@max_id = 0 # max ID to match (default is 0, which means no limit)
|
186
|
-
@filters = [] # search filters
|
187
|
-
@groupby = '' # group-by attribute name
|
188
|
-
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
189
|
-
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
190
|
-
@groupdistinct = '' # group-by count-distinct attribute
|
191
|
-
@maxmatches = 1000 # max matches to retrieve
|
192
|
-
@cutoff = 0 # cutoff to stop searching at (default is 0)
|
193
|
-
@retrycount = 0 # distributed retries count
|
194
|
-
@retrydelay = 0 # distributed retries delay
|
195
|
-
@anchor = [] # geographical anchor point
|
196
|
-
@indexweights = [] # per-index weights
|
197
|
-
@ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
198
|
-
@maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
|
199
|
-
@fieldweights = {} # per-field-name weights
|
200
|
-
@overrides = [] # per-query attribute values overrides
|
201
|
-
@select = '*' # select-list (attributes or expressions, with optional aliases)
|
202
|
-
|
203
|
-
# per-reply fields (for single-query case)
|
204
|
-
@error = '' # last error message
|
205
|
-
@warning = '' # last warning message
|
206
|
-
|
207
|
-
@reqs = [] # requests storage (for multi-query case)
|
208
|
-
@mbenc = '' # stored mbstring encoding
|
209
|
-
end
|
210
|
-
|
211
|
-
# Get last error message.
|
212
|
-
def GetLastError
|
213
|
-
@error
|
214
|
-
end
|
215
|
-
|
216
|
-
# Get last warning message.
|
217
|
-
def GetLastWarning
|
218
|
-
@warning
|
219
|
-
end
|
220
|
-
|
221
|
-
# Set searchd host name (string) and port (integer).
|
222
|
-
def SetServer(host, port)
|
223
|
-
assert { host.instance_of? String }
|
224
|
-
assert { port.instance_of? Fixnum }
|
225
|
-
|
226
|
-
@host = host
|
227
|
-
@port = port
|
228
|
-
end
|
229
|
-
|
230
|
-
# Set offset and count into result set,
|
231
|
-
# and optionally set max-matches and cutoff limits.
|
232
|
-
def SetLimits(offset, limit, max = 0, cutoff = 0)
|
233
|
-
assert { offset.instance_of? Fixnum }
|
234
|
-
assert { limit.instance_of? Fixnum }
|
235
|
-
assert { max.instance_of? Fixnum }
|
236
|
-
assert { offset >= 0 }
|
237
|
-
assert { limit > 0 }
|
238
|
-
assert { max >= 0 }
|
239
|
-
|
240
|
-
@offset = offset
|
241
|
-
@limit = limit
|
242
|
-
@maxmatches = max if max > 0
|
243
|
-
@cutoff = cutoff if cutoff > 0
|
244
|
-
end
|
245
|
-
|
246
|
-
# Set maximum query time, in milliseconds, per-index,
|
247
|
-
# integer, 0 means "do not limit"
|
248
|
-
def SetMaxQueryTime(max)
|
249
|
-
assert { max.instance_of? Fixnum }
|
250
|
-
assert { max >= 0 }
|
251
|
-
@maxquerytime = max
|
252
|
-
end
|
253
|
-
|
254
|
-
# Set matching mode.
|
255
|
-
def SetMatchMode(mode)
|
256
|
-
assert { mode == SPH_MATCH_ALL \
|
257
|
-
|| mode == SPH_MATCH_ANY \
|
258
|
-
|| mode == SPH_MATCH_PHRASE \
|
259
|
-
|| mode == SPH_MATCH_BOOLEAN \
|
260
|
-
|| mode == SPH_MATCH_EXTENDED \
|
261
|
-
|| mode == SPH_MATCH_FULLSCAN \
|
262
|
-
|| mode == SPH_MATCH_EXTENDED2 }
|
263
|
-
|
264
|
-
@mode = mode
|
265
|
-
end
|
266
|
-
|
267
|
-
# Set ranking mode.
|
268
|
-
def SetRankingMode(ranker)
|
269
|
-
assert { ranker == SPH_RANK_PROXIMITY_BM25 \
|
270
|
-
|| ranker == SPH_RANK_BM25 \
|
271
|
-
|| ranker == SPH_RANK_NONE \
|
272
|
-
|| ranker == SPH_RANK_WORDCOUNT \
|
273
|
-
|| ranker == SPH_RANK_PROXIMITY }
|
274
|
-
|
275
|
-
@ranker = ranker
|
276
|
-
end
|
277
|
-
|
278
|
-
# Set matches sorting mode.
|
279
|
-
def SetSortMode(mode, sortby = '')
|
280
|
-
assert { mode == SPH_SORT_RELEVANCE \
|
281
|
-
|| mode == SPH_SORT_ATTR_DESC \
|
282
|
-
|| mode == SPH_SORT_ATTR_ASC \
|
283
|
-
|| mode == SPH_SORT_TIME_SEGMENTS \
|
284
|
-
|| mode == SPH_SORT_EXTENDED \
|
285
|
-
|| mode == SPH_SORT_EXPR }
|
286
|
-
assert { sortby.instance_of? String }
|
287
|
-
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
288
|
-
|
289
|
-
@sort = mode
|
290
|
-
@sortby = sortby
|
291
|
-
end
|
292
|
-
|
293
|
-
# Bind per-field weights by order.
|
294
|
-
#
|
295
|
-
# DEPRECATED; use SetFieldWeights() instead.
|
296
|
-
def SetWeights(weights)
|
297
|
-
assert { weights.instance_of? Array }
|
298
|
-
weights.each do |weight|
|
299
|
-
assert { weight.instance_of? Fixnum }
|
300
|
-
end
|
301
|
-
|
302
|
-
@weights = weights
|
303
|
-
end
|
304
|
-
|
305
|
-
# Bind per-field weights by name.
|
306
|
-
#
|
307
|
-
# Takes string (field name) to integer name (field weight) hash as an argument.
|
308
|
-
# * Takes precedence over SetWeights().
|
309
|
-
# * Unknown names will be silently ignored.
|
310
|
-
# * Unbound fields will be silently given a weight of 1.
|
311
|
-
def SetFieldWeights(weights)
|
312
|
-
assert { weights.instance_of? Hash }
|
313
|
-
weights.each do |name, weight|
|
314
|
-
assert { name.instance_of? String }
|
315
|
-
assert { weight.instance_of? Fixnum }
|
316
|
-
end
|
317
|
-
|
318
|
-
@fieldweights = weights
|
319
|
-
end
|
320
|
-
|
321
|
-
# Bind per-index weights by name.
|
322
|
-
def SetIndexWeights(weights)
|
323
|
-
assert { weights.instance_of? Hash }
|
324
|
-
weights.each do |index, weight|
|
325
|
-
assert { index.instance_of? String }
|
326
|
-
assert { weight.instance_of? Fixnum }
|
327
|
-
end
|
328
|
-
|
329
|
-
@indexweights = weights
|
330
|
-
end
|
331
|
-
|
332
|
-
# Set IDs range to match.
|
333
|
-
#
|
334
|
-
# Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
|
335
|
-
def SetIDRange(min, max)
|
336
|
-
assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
|
337
|
-
assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
|
338
|
-
assert { min <= max }
|
339
|
-
|
340
|
-
@min_id = min
|
341
|
-
@max_id = max
|
342
|
-
end
|
343
|
-
|
344
|
-
# Set values filter.
|
345
|
-
#
|
346
|
-
# Only match those records where <tt>attribute</tt> column values
|
347
|
-
# are in specified set.
|
348
|
-
def SetFilter(attribute, values, exclude = false)
|
349
|
-
assert { attribute.instance_of? String }
|
350
|
-
assert { values.instance_of? Array }
|
351
|
-
assert { !values.empty? }
|
352
|
-
|
353
|
-
if values.instance_of?(Array) && values.size > 0
|
354
|
-
values.each do |value|
|
355
|
-
assert { value.instance_of? Fixnum }
|
356
|
-
end
|
357
|
-
|
358
|
-
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
# Set range filter.
|
363
|
-
#
|
364
|
-
# Only match those records where <tt>attribute</tt> column value
|
365
|
-
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
366
|
-
def SetFilterRange(attribute, min, max, exclude = false)
|
367
|
-
assert { attribute.instance_of? String }
|
368
|
-
assert { min.instance_of? Fixnum or min.instance_of? Bignum }
|
369
|
-
assert { max.instance_of? Fixnum or max.instance_of? Bignum }
|
370
|
-
assert { min <= max }
|
371
|
-
|
372
|
-
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
373
|
-
end
|
374
|
-
|
375
|
-
# Set float range filter.
|
376
|
-
#
|
377
|
-
# Only match those records where <tt>attribute</tt> column value
|
378
|
-
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
379
|
-
def SetFilterFloatRange(attribute, min, max, exclude = false)
|
380
|
-
assert { attribute.instance_of? String }
|
381
|
-
assert { min.instance_of? Float }
|
382
|
-
assert { max.instance_of? Float }
|
383
|
-
assert { min <= max }
|
384
|
-
|
385
|
-
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
386
|
-
end
|
387
|
-
|
388
|
-
# Setup anchor point for geosphere distance calculations.
|
389
|
-
#
|
390
|
-
# Required to use <tt>@geodist</tt> in filters and sorting
|
391
|
-
# distance will be computed to this point. Latitude and longitude
|
392
|
-
# must be in radians.
|
393
|
-
#
|
394
|
-
# * <tt>attrlat</tt> -- is the name of latitude attribute
|
395
|
-
# * <tt>attrlong</tt> -- is the name of longitude attribute
|
396
|
-
# * <tt>lat</tt> -- is anchor point latitude, in radians
|
397
|
-
# * <tt>long</tt> -- is anchor point longitude, in radians
|
398
|
-
def SetGeoAnchor(attrlat, attrlong, lat, long)
|
399
|
-
assert { attrlat.instance_of? String }
|
400
|
-
assert { attrlong.instance_of? String }
|
401
|
-
assert { lat.instance_of? Float }
|
402
|
-
assert { long.instance_of? Float }
|
403
|
-
|
404
|
-
@anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
|
405
|
-
end
|
406
|
-
|
407
|
-
# Set grouping attribute and function.
|
408
|
-
#
|
409
|
-
# In grouping mode, all matches are assigned to different groups
|
410
|
-
# based on grouping function value.
|
411
|
-
#
|
412
|
-
# Each group keeps track of the total match count, and the best match
|
413
|
-
# (in this group) according to current sorting function.
|
414
|
-
#
|
415
|
-
# The final result set contains one best match per group, with
|
416
|
-
# grouping function value and matches count attached.
|
417
|
-
#
|
418
|
-
# Groups in result set could be sorted by any sorting clause,
|
419
|
-
# including both document attributes and the following special
|
420
|
-
# internal Sphinx attributes:
|
421
|
-
#
|
422
|
-
# * @id - match document ID;
|
423
|
-
# * @weight, @rank, @relevance - match weight;
|
424
|
-
# * @group - groupby function value;
|
425
|
-
# * @count - amount of matches in group.
|
426
|
-
#
|
427
|
-
# the default mode is to sort by groupby value in descending order,
|
428
|
-
# ie. by '@group desc'.
|
429
|
-
#
|
430
|
-
# 'total_found' would contain total amount of matching groups over
|
431
|
-
# the whole index.
|
432
|
-
#
|
433
|
-
# WARNING: grouping is done in fixed memory and thus its results
|
434
|
-
# are only approximate; so there might be more groups reported
|
435
|
-
# in total_found than actually present. @count might also
|
436
|
-
# be underestimated.
|
437
|
-
#
|
438
|
-
# For example, if sorting by relevance and grouping by "published"
|
439
|
-
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
440
|
-
# contain one most relevant match per each day when there were any
|
441
|
-
# matches published, with day number and per-day match count attached,
|
442
|
-
# and sorted by day number in descending order (ie. recent days first).
|
443
|
-
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
444
|
-
assert { attribute.instance_of? String }
|
445
|
-
assert { groupsort.instance_of? String }
|
446
|
-
assert { func == SPH_GROUPBY_DAY \
|
447
|
-
|| func == SPH_GROUPBY_WEEK \
|
448
|
-
|| func == SPH_GROUPBY_MONTH \
|
449
|
-
|| func == SPH_GROUPBY_YEAR \
|
450
|
-
|| func == SPH_GROUPBY_ATTR \
|
451
|
-
|| func == SPH_GROUPBY_ATTRPAIR }
|
452
|
-
|
453
|
-
@groupby = attribute
|
454
|
-
@groupfunc = func
|
455
|
-
@groupsort = groupsort
|
456
|
-
end
|
457
|
-
|
458
|
-
# Set count-distinct attribute for group-by queries.
|
459
|
-
def SetGroupDistinct(attribute)
|
460
|
-
assert { attribute.instance_of? String }
|
461
|
-
@groupdistinct = attribute
|
462
|
-
end
|
463
|
-
|
464
|
-
# Set distributed retries count and delay.
|
465
|
-
def SetRetries(count, delay = 0)
|
466
|
-
assert { count.instance_of? Fixnum }
|
467
|
-
assert { delay.instance_of? Fixnum }
|
468
|
-
|
469
|
-
@retrycount = count
|
470
|
-
@retrydelay = delay
|
471
|
-
end
|
472
|
-
|
473
|
-
# Set attribute values override
|
474
|
-
#
|
475
|
-
# There can be only one override per attribute.
|
476
|
-
# +values+ must be a hash that maps document IDs to attribute values.
|
477
|
-
def SetOverride(attrname, attrtype, values)
|
478
|
-
assert { attrname.instance_of? String }
|
479
|
-
assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
|
480
|
-
assert { values.instance_of? Hash }
|
481
|
-
|
482
|
-
@overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
|
483
|
-
end
|
484
|
-
|
485
|
-
# Set select-list (attributes or expressions), SQL-like syntax.
|
486
|
-
def SetSelect(select)
|
487
|
-
assert { select.instance_of? String }
|
488
|
-
@select = select
|
489
|
-
end
|
490
|
-
|
491
|
-
# Clear all filters (for multi-queries).
|
492
|
-
def ResetFilters
|
493
|
-
@filters = []
|
494
|
-
@anchor = []
|
495
|
-
end
|
496
|
-
|
497
|
-
# Clear groupby settings (for multi-queries).
|
498
|
-
def ResetGroupBy
|
499
|
-
@groupby = ''
|
500
|
-
@groupfunc = SPH_GROUPBY_DAY
|
501
|
-
@groupsort = '@group desc'
|
502
|
-
@groupdistinct = ''
|
503
|
-
end
|
504
|
-
|
505
|
-
# Clear all attribute value overrides (for multi-queries).
|
506
|
-
def ResetOverrides
|
507
|
-
@overrides = []
|
508
|
-
end
|
509
|
-
|
510
|
-
# Connect to searchd server and run given search query.
|
511
|
-
#
|
512
|
-
# <tt>query</tt> is query string
|
513
|
-
|
514
|
-
# <tt>index</tt> is index name (or names) to query. default value is "*" which means
|
515
|
-
# to query all indexes. Accepted characters for index names are letters, numbers,
|
516
|
-
# dash, and underscore; everything else is considered a separator. Therefore,
|
517
|
-
# all the following calls are valid and will search two indexes:
|
518
|
-
#
|
519
|
-
# sphinx.Query('test query', 'main delta')
|
520
|
-
# sphinx.Query('test query', 'main;delta')
|
521
|
-
# sphinx.Query('test query', 'main, delta')
|
522
|
-
#
|
523
|
-
# Index order matters. If identical IDs are found in two or more indexes,
|
524
|
-
# weight and attribute values from the very last matching index will be used
|
525
|
-
# for sorting and returning to client. Therefore, in the example above,
|
526
|
-
# matches from "delta" index will always "win" over matches from "main".
|
527
|
-
#
|
528
|
-
# Returns false on failure.
|
529
|
-
# Returns hash which has the following keys on success:
|
530
|
-
#
|
531
|
-
# * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
|
532
|
-
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
533
|
-
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
534
|
-
# * <tt>'time'</tt> -- search time
|
535
|
-
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
536
|
-
def Query(query, index = '*', comment = '')
|
537
|
-
assert { @reqs.empty? }
|
538
|
-
@reqs = []
|
539
|
-
|
540
|
-
self.AddQuery(query, index, comment)
|
541
|
-
results = self.RunQueries
|
542
|
-
|
543
|
-
# probably network error; error message should be already filled
|
544
|
-
return false unless results.instance_of?(Array)
|
545
|
-
|
546
|
-
@error = results[0]['error']
|
547
|
-
@warning = results[0]['warning']
|
548
|
-
|
549
|
-
return false if results[0]['status'] == SEARCHD_ERROR
|
550
|
-
return results[0]
|
551
|
-
end
|
552
|
-
|
553
|
-
# Add query to batch.
|
554
|
-
#
|
555
|
-
# Batch queries enable searchd to perform internal optimizations,
|
556
|
-
# if possible; and reduce network connection overheads in all cases.
|
557
|
-
#
|
558
|
-
# For instance, running exactly the same query with different
|
559
|
-
# groupby settings will enable searched to perform expensive
|
560
|
-
# full-text search and ranking operation only once, but compute
|
561
|
-
# multiple groupby results from its output.
|
562
|
-
#
|
563
|
-
# Parameters are exactly the same as in <tt>Query</tt> call.
|
564
|
-
# Returns index to results array returned by <tt>RunQueries</tt> call.
|
565
|
-
def AddQuery(query, index = '*', comment = '')
|
566
|
-
# build request
|
567
|
-
|
568
|
-
# mode and limits
|
569
|
-
request = Request.new
|
570
|
-
request.put_int @offset, @limit, @mode, @ranker, @sort
|
571
|
-
request.put_string @sortby
|
572
|
-
# query itself
|
573
|
-
request.put_string query
|
574
|
-
# weights
|
575
|
-
request.put_int_array @weights
|
576
|
-
# indexes
|
577
|
-
request.put_string index
|
578
|
-
# id64 range marker
|
579
|
-
request.put_int 1
|
580
|
-
# id64 range
|
581
|
-
request.put_int64 @min_id.to_i, @max_id.to_i
|
582
|
-
|
583
|
-
# filters
|
584
|
-
request.put_int @filters.length
|
585
|
-
@filters.each do |filter|
|
586
|
-
request.put_string filter['attr']
|
587
|
-
request.put_int filter['type']
|
588
|
-
|
589
|
-
case filter['type']
|
590
|
-
when SPH_FILTER_VALUES
|
591
|
-
request.put_int64_array filter['values']
|
592
|
-
when SPH_FILTER_RANGE
|
593
|
-
request.put_int64 filter['min'], filter['max']
|
594
|
-
when SPH_FILTER_FLOATRANGE
|
595
|
-
request.put_float filter['min'], filter['max']
|
596
|
-
else
|
597
|
-
raise SphinxInternalError, 'Internal error: unhandled filter type'
|
598
|
-
end
|
599
|
-
request.put_int filter['exclude'] ? 1 : 0
|
600
|
-
end
|
601
|
-
|
602
|
-
# group-by clause, max-matches count, group-sort clause, cutoff count
|
603
|
-
request.put_int @groupfunc
|
604
|
-
request.put_string @groupby
|
605
|
-
request.put_int @maxmatches
|
606
|
-
request.put_string @groupsort
|
607
|
-
request.put_int @cutoff, @retrycount, @retrydelay
|
608
|
-
request.put_string @groupdistinct
|
609
|
-
|
610
|
-
# anchor point
|
611
|
-
if @anchor.empty?
|
612
|
-
request.put_int 0
|
613
|
-
else
|
614
|
-
request.put_int 1
|
615
|
-
request.put_string @anchor['attrlat'], @anchor['attrlong']
|
616
|
-
request.put_float @anchor['lat'], @anchor['long']
|
617
|
-
end
|
618
|
-
|
619
|
-
# per-index weights
|
620
|
-
request.put_int @indexweights.length
|
621
|
-
@indexweights.each do |idx, weight|
|
622
|
-
request.put_string idx
|
623
|
-
request.put_int weight
|
624
|
-
end
|
625
|
-
|
626
|
-
# max query time
|
627
|
-
request.put_int @maxquerytime
|
628
|
-
|
629
|
-
# per-field weights
|
630
|
-
request.put_int @fieldweights.length
|
631
|
-
@fieldweights.each do |field, weight|
|
632
|
-
request.put_string field
|
633
|
-
request.put_int weight
|
634
|
-
end
|
635
|
-
|
636
|
-
# comment
|
637
|
-
request.put_string comment
|
638
|
-
|
639
|
-
# attribute overrides
|
640
|
-
request.put_int @overrides.length
|
641
|
-
for entry in @overrides do
|
642
|
-
request.put_string entry['attr']
|
643
|
-
request.put_int entry['type'], entry['values'].size
|
644
|
-
entry['values'].each do |id, val|
|
645
|
-
assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
|
646
|
-
assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
|
647
|
-
|
648
|
-
request.put_int64 id
|
649
|
-
case entry['type']
|
650
|
-
when SPH_ATTR_FLOAT
|
651
|
-
request.put_float val
|
652
|
-
when SPH_ATTR_BIGINT
|
653
|
-
request.put_int64 val
|
654
|
-
else
|
655
|
-
request.put_int val
|
656
|
-
end
|
657
|
-
end
|
658
|
-
end
|
659
|
-
|
660
|
-
# select-list
|
661
|
-
request.put_string @select
|
662
|
-
|
663
|
-
# store request to requests array
|
664
|
-
@reqs << request.to_s;
|
665
|
-
return @reqs.length - 1
|
666
|
-
end
|
667
|
-
|
668
|
-
# Run queries batch.
|
669
|
-
#
|
670
|
-
# Returns an array of result sets on success.
|
671
|
-
# Returns false on network IO failure.
|
672
|
-
#
|
673
|
-
# Each result set in returned array is a hash which containts
|
674
|
-
# the same keys as the hash returned by <tt>Query</tt>, plus:
|
675
|
-
#
|
676
|
-
# * <tt>'error'</tt> -- search error for this query
|
677
|
-
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
678
|
-
def RunQueries
|
679
|
-
if @reqs.empty?
|
680
|
-
@error = 'No queries defined, issue AddQuery() first'
|
681
|
-
return false
|
682
|
-
end
|
683
|
-
|
684
|
-
req = @reqs.join('')
|
685
|
-
nreqs = @reqs.length
|
686
|
-
@reqs = []
|
687
|
-
response = PerformRequest(:search, req, nreqs)
|
688
|
-
|
689
|
-
# parse response
|
690
|
-
begin
|
691
|
-
results = []
|
692
|
-
ires = 0
|
693
|
-
while ires < nreqs
|
694
|
-
ires += 1
|
695
|
-
result = {}
|
696
|
-
|
697
|
-
result['error'] = ''
|
698
|
-
result['warning'] = ''
|
699
|
-
|
700
|
-
# extract status
|
701
|
-
status = result['status'] = response.get_int
|
702
|
-
if status != SEARCHD_OK
|
703
|
-
message = response.get_string
|
704
|
-
if status == SEARCHD_WARNING
|
705
|
-
result['warning'] = message
|
706
|
-
else
|
707
|
-
result['error'] = message
|
708
|
-
results << result
|
709
|
-
next
|
710
|
-
end
|
711
|
-
end
|
712
|
-
|
713
|
-
# read schema
|
714
|
-
fields = []
|
715
|
-
attrs = {}
|
716
|
-
attrs_names_in_order = []
|
717
|
-
|
718
|
-
nfields = response.get_int
|
719
|
-
while nfields > 0
|
720
|
-
nfields -= 1
|
721
|
-
fields << response.get_string
|
722
|
-
end
|
723
|
-
result['fields'] = fields
|
724
|
-
|
725
|
-
nattrs = response.get_int
|
726
|
-
while nattrs > 0
|
727
|
-
nattrs -= 1
|
728
|
-
attr = response.get_string
|
729
|
-
type = response.get_int
|
730
|
-
attrs[attr] = type
|
731
|
-
attrs_names_in_order << attr
|
732
|
-
end
|
733
|
-
result['attrs'] = attrs
|
734
|
-
|
735
|
-
# read match count
|
736
|
-
count = response.get_int
|
737
|
-
id64 = response.get_int
|
738
|
-
|
739
|
-
# read matches
|
740
|
-
result['matches'] = []
|
741
|
-
while count > 0
|
742
|
-
count -= 1
|
743
|
-
|
744
|
-
if id64 != 0
|
745
|
-
doc = response.get_int64
|
746
|
-
weight = response.get_int
|
747
|
-
else
|
748
|
-
doc, weight = response.get_ints(2)
|
749
|
-
end
|
750
|
-
|
751
|
-
r = {} # This is a single result put in the result['matches'] array
|
752
|
-
r['id'] = doc
|
753
|
-
r['weight'] = weight
|
754
|
-
attrs_names_in_order.each do |a|
|
755
|
-
r['attrs'] ||= {}
|
756
|
-
|
757
|
-
case attrs[a]
|
758
|
-
when SPH_ATTR_BIGINT
|
759
|
-
# handle 64-bit ints
|
760
|
-
r['attrs'][a] = response.get_int64
|
761
|
-
when SPH_ATTR_FLOAT
|
762
|
-
# handle floats
|
763
|
-
r['attrs'][a] = response.get_float
|
764
|
-
when SPH_ATTR_STRING
|
765
|
-
# handle string
|
766
|
-
r['attrs'][a] = response.get_string
|
767
|
-
else
|
768
|
-
# handle everything else as unsigned ints
|
769
|
-
val = response.get_int
|
770
|
-
if attrs[a]==SPH_ATTR_MULTI
|
771
|
-
r['attrs'][a] = []
|
772
|
-
1.upto(val) do
|
773
|
-
r['attrs'][a] << response.get_int
|
774
|
-
end
|
775
|
-
elsif attrs[a]==SPH_ATTR_MULTI64
|
776
|
-
r['attrs'][a] = []
|
777
|
-
val = val/2
|
778
|
-
1.upto(val) do
|
779
|
-
r['attrs'][a] << response.get_int64
|
780
|
-
end
|
781
|
-
else
|
782
|
-
r['attrs'][a] = val
|
783
|
-
end
|
784
|
-
end
|
785
|
-
end
|
786
|
-
result['matches'] << r
|
787
|
-
end
|
788
|
-
result['total'], result['total_found'], msecs, words = response.get_ints(4)
|
789
|
-
result['time'] = '%.3f' % (msecs / 1000.0)
|
790
|
-
|
791
|
-
result['words'] = {}
|
792
|
-
while words > 0
|
793
|
-
words -= 1
|
794
|
-
word = response.get_string
|
795
|
-
docs, hits = response.get_ints(2)
|
796
|
-
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
797
|
-
end
|
798
|
-
|
799
|
-
results << result
|
800
|
-
end
|
801
|
-
#rescue EOFError
|
802
|
-
# @error = 'incomplete reply'
|
803
|
-
# raise SphinxResponseError, @error
|
804
|
-
end
|
805
|
-
|
806
|
-
return results
|
807
|
-
end
|
808
|
-
|
809
|
-
# Connect to searchd server and generate exceprts from given documents.
|
810
|
-
#
|
811
|
-
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
812
|
-
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
813
|
-
# for stemming, lexing and case folding
|
814
|
-
# * <tt>words</tt> -- a string which contains the words to highlight
|
815
|
-
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
816
|
-
#
|
817
|
-
# You can use following parameters:
|
818
|
-
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
819
|
-
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
820
|
-
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
821
|
-
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
822
|
-
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
823
|
-
# * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
|
824
|
-
# * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
|
825
|
-
# * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
|
826
|
-
# * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
|
827
|
-
#
|
828
|
-
# Returns false on failure.
|
829
|
-
# Returns an array of string excerpts on success.
|
830
|
-
def BuildExcerpts(docs, index, words, opts = {})
|
831
|
-
assert { docs.instance_of? Array }
|
832
|
-
assert { index.instance_of? String }
|
833
|
-
assert { words.instance_of? String }
|
834
|
-
assert { opts.instance_of? Hash }
|
835
|
-
|
836
|
-
# fixup options
|
837
|
-
opts['before_match'] ||= '<b>';
|
838
|
-
opts['after_match'] ||= '</b>';
|
839
|
-
opts['chunk_separator'] ||= ' ... ';
|
840
|
-
opts['html_strip_mode'] ||= 'index';
|
841
|
-
opts['limit'] ||= 256;
|
842
|
-
opts['limit_passages'] ||= 0;
|
843
|
-
opts['limit_words'] ||= 0;
|
844
|
-
opts['around'] ||= 5;
|
845
|
-
opts['start_passage_id'] ||= 1;
|
846
|
-
opts['exact_phrase'] ||= false
|
847
|
-
opts['single_passage'] ||= false
|
848
|
-
opts['use_boundaries'] ||= false
|
849
|
-
opts['weight_order'] ||= false
|
850
|
-
opts['load_files'] ||= false
|
851
|
-
opts['allow_empty'] ||= false
|
852
|
-
|
853
|
-
# build request
|
854
|
-
|
855
|
-
# v.1.0 req
|
856
|
-
flags = 1
|
857
|
-
flags |= 2 if opts['exact_phrase']
|
858
|
-
flags |= 4 if opts['single_passage']
|
859
|
-
flags |= 8 if opts['use_boundaries']
|
860
|
-
flags |= 16 if opts['weight_order']
|
861
|
-
flags |= 32 if opts['query_mode']
|
862
|
-
flags |= 64 if opts['force_all_words']
|
863
|
-
flags |= 128 if opts['load_files']
|
864
|
-
flags |= 256 if opts['allow_empty']
|
865
|
-
|
866
|
-
request = Request.new
|
867
|
-
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
868
|
-
# req index
|
869
|
-
request.put_string index
|
870
|
-
# req words
|
871
|
-
request.put_string words
|
872
|
-
|
873
|
-
# options
|
874
|
-
request.put_string opts['before_match']
|
875
|
-
request.put_string opts['after_match']
|
876
|
-
request.put_string opts['chunk_separator']
|
877
|
-
request.put_int opts['limit'].to_i, opts['around'].to_i
|
878
|
-
|
879
|
-
# options v1.2
|
880
|
-
request.put_int opts['limit_passages'].to_i
|
881
|
-
request.put_int opts['limit_words'].to_i
|
882
|
-
request.put_int opts['start_passage_id'].to_i
|
883
|
-
request.put_string opts['html_strip_mode']
|
884
|
-
|
885
|
-
# documents
|
886
|
-
request.put_int docs.size
|
887
|
-
docs.each do |doc|
|
888
|
-
assert { doc.instance_of? String }
|
889
|
-
|
890
|
-
request.put_string doc
|
891
|
-
end
|
892
|
-
|
893
|
-
response = PerformRequest(:excerpt, request)
|
894
|
-
|
895
|
-
# parse response
|
896
|
-
begin
|
897
|
-
res = []
|
898
|
-
docs.each do |doc|
|
899
|
-
res << response.get_string
|
900
|
-
end
|
901
|
-
rescue EOFError
|
902
|
-
@error = 'incomplete reply'
|
903
|
-
raise SphinxResponseError, @error
|
904
|
-
end
|
905
|
-
return res
|
906
|
-
end
|
907
|
-
|
908
|
-
# Connect to searchd server, and generate keyword list for a given query.
|
909
|
-
#
|
910
|
-
# Returns an array of words on success.
|
911
|
-
def BuildKeywords(query, index, hits)
|
912
|
-
assert { query.instance_of? String }
|
913
|
-
assert { index.instance_of? String }
|
914
|
-
assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
|
915
|
-
|
916
|
-
# build request
|
917
|
-
request = Request.new
|
918
|
-
# v.1.0 req
|
919
|
-
request.put_string query # req query
|
920
|
-
request.put_string index # req index
|
921
|
-
request.put_int hits ? 1 : 0
|
922
|
-
|
923
|
-
response = PerformRequest(:keywords, request)
|
924
|
-
|
925
|
-
# parse response
|
926
|
-
begin
|
927
|
-
res = []
|
928
|
-
nwords = response.get_int
|
929
|
-
0.upto(nwords - 1) do |i|
|
930
|
-
tokenized = response.get_string
|
931
|
-
normalized = response.get_string
|
932
|
-
|
933
|
-
entry = { 'tokenized' => tokenized, 'normalized' => normalized }
|
934
|
-
entry['docs'], entry['hits'] = response.get_ints(2) if hits
|
935
|
-
|
936
|
-
res << entry
|
937
|
-
end
|
938
|
-
rescue EOFError
|
939
|
-
@error = 'incomplete reply'
|
940
|
-
raise SphinxResponseError, @error
|
941
|
-
end
|
942
|
-
|
943
|
-
return res
|
944
|
-
end
|
945
|
-
|
946
|
-
# Batch update given attributes in given rows in given indexes.
|
947
|
-
#
|
948
|
-
# * +index+ is a name of the index to be updated
|
949
|
-
# * +attrs+ is an array of attribute name strings.
|
950
|
-
# * +values+ is a hash where key is document id, and value is an array of
|
951
|
-
# * +mva+ identifies whether update MVA
|
952
|
-
# new attribute values
|
953
|
-
#
|
954
|
-
# Returns number of actually updated documents (0 or more) on success.
|
955
|
-
# Returns -1 on failure.
|
956
|
-
#
|
957
|
-
# Usage example:
|
958
|
-
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
|
959
|
-
def UpdateAttributes(index, attrs, values, mva = false)
|
960
|
-
# verify everything
|
961
|
-
assert { index.instance_of? String }
|
962
|
-
assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
|
963
|
-
|
964
|
-
assert { attrs.instance_of? Array }
|
965
|
-
attrs.each do |attr|
|
966
|
-
assert { attr.instance_of? String }
|
967
|
-
end
|
968
|
-
|
969
|
-
assert { values.instance_of? Hash }
|
970
|
-
values.each do |id, entry|
|
971
|
-
assert { id.instance_of? Fixnum }
|
972
|
-
assert { entry.instance_of? Array }
|
973
|
-
assert { entry.length == attrs.length }
|
974
|
-
entry.each do |v|
|
975
|
-
if mva
|
976
|
-
assert { v.instance_of? Array }
|
977
|
-
v.each { |vv| assert { vv.instance_of? Fixnum } }
|
978
|
-
else
|
979
|
-
assert { v.instance_of? Fixnum }
|
980
|
-
end
|
981
|
-
end
|
982
|
-
end
|
983
|
-
|
984
|
-
# build request
|
985
|
-
request = Request.new
|
986
|
-
request.put_string index
|
987
|
-
|
988
|
-
request.put_int attrs.length
|
989
|
-
for attr in attrs
|
990
|
-
request.put_string attr
|
991
|
-
request.put_int mva ? 1 : 0
|
992
|
-
end
|
993
|
-
|
994
|
-
request.put_int values.length
|
995
|
-
values.each do |id, entry|
|
996
|
-
request.put_int64 id
|
997
|
-
if mva
|
998
|
-
entry.each { |v| request.put_int_array v }
|
999
|
-
else
|
1000
|
-
request.put_int(*entry)
|
1001
|
-
end
|
1002
|
-
end
|
1003
|
-
|
1004
|
-
response = PerformRequest(:update, request)
|
1005
|
-
|
1006
|
-
# parse response
|
1007
|
-
begin
|
1008
|
-
return response.get_int
|
1009
|
-
rescue EOFError
|
1010
|
-
@error = 'incomplete reply'
|
1011
|
-
raise SphinxResponseError, @error
|
1012
|
-
end
|
1013
|
-
end
|
1014
|
-
|
1015
|
-
protected
|
1016
|
-
|
1017
|
-
# Connect to searchd server.
|
1018
|
-
def Connect
|
1019
|
-
begin
|
1020
|
-
if @host[0,1]=='/'
|
1021
|
-
sock = UNIXSocket.new(@host)
|
1022
|
-
else
|
1023
|
-
sock = TCPSocket.new(@host, @port)
|
1024
|
-
end
|
1025
|
-
rescue => err
|
1026
|
-
@error = "connection to #{@host}:#{@port} failed (error=#{err})"
|
1027
|
-
raise SphinxConnectError, @error
|
1028
|
-
end
|
1029
|
-
|
1030
|
-
v = sock.recv(4).unpack('N*').first
|
1031
|
-
if v < 1
|
1032
|
-
sock.close
|
1033
|
-
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
1034
|
-
raise SphinxConnectError, @error
|
1035
|
-
end
|
1036
|
-
|
1037
|
-
sock.send([1].pack('N'), 0)
|
1038
|
-
sock
|
1039
|
-
end
|
1040
|
-
|
1041
|
-
# Get and check response packet from searchd server.
|
1042
|
-
def GetResponse(sock, client_version)
|
1043
|
-
response = ''
|
1044
|
-
len = 0
|
1045
|
-
|
1046
|
-
header = sock.recv(8)
|
1047
|
-
if header.length == 8
|
1048
|
-
status, ver, len = header.unpack('n2N')
|
1049
|
-
left = len.to_i
|
1050
|
-
while left > 0 do
|
1051
|
-
begin
|
1052
|
-
chunk = sock.recv(left)
|
1053
|
-
if chunk
|
1054
|
-
response << chunk
|
1055
|
-
left -= chunk.length
|
1056
|
-
end
|
1057
|
-
rescue EOFError
|
1058
|
-
break
|
1059
|
-
end
|
1060
|
-
end
|
1061
|
-
end
|
1062
|
-
sock.close
|
1063
|
-
|
1064
|
-
# check response
|
1065
|
-
read = response.length
|
1066
|
-
if response.empty? or read != len.to_i
|
1067
|
-
@error = response.empty? \
|
1068
|
-
? 'received zero-sized searchd response' \
|
1069
|
-
: "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
|
1070
|
-
raise SphinxResponseError, @error
|
1071
|
-
end
|
1072
|
-
|
1073
|
-
# check status
|
1074
|
-
if (status == SEARCHD_WARNING)
|
1075
|
-
wlen = response[0, 4].unpack('N*').first
|
1076
|
-
@warning = response[4, wlen]
|
1077
|
-
return response[4 + wlen, response.length - 4 - wlen]
|
1078
|
-
end
|
1079
|
-
|
1080
|
-
if status == SEARCHD_ERROR
|
1081
|
-
@error = 'searchd error: ' + response[4, response.length - 4]
|
1082
|
-
raise SphinxInternalError, @error
|
1083
|
-
end
|
1084
|
-
|
1085
|
-
if status == SEARCHD_RETRY
|
1086
|
-
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
1087
|
-
raise SphinxTemporaryError, @error
|
1088
|
-
end
|
1089
|
-
|
1090
|
-
unless status == SEARCHD_OK
|
1091
|
-
@error = "unknown status code: '#{status}'"
|
1092
|
-
raise SphinxUnknownError, @error
|
1093
|
-
end
|
1094
|
-
|
1095
|
-
# check version
|
1096
|
-
if ver < client_version
|
1097
|
-
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
1098
|
-
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
1099
|
-
end
|
1100
|
-
|
1101
|
-
return response
|
1102
|
-
end
|
1103
|
-
|
1104
|
-
# Connect, send query, get response.
|
1105
|
-
def PerformRequest(command, request, additional = nil)
|
1106
|
-
cmd = command.to_s.upcase
|
1107
|
-
command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
|
1108
|
-
command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
|
1109
|
-
|
1110
|
-
sock = self.Connect
|
1111
|
-
len = request.to_s.length + (additional != nil ? 8 : 0)
|
1112
|
-
header = [command_id, command_ver, len].pack('nnN')
|
1113
|
-
header << [0, additional].pack('NN') if additional != nil
|
1114
|
-
sock.send(header + request.to_s, 0)
|
1115
|
-
response = self.GetResponse(sock, command_ver)
|
1116
|
-
return Response.new(response)
|
1117
|
-
end
|
1118
|
-
|
1119
|
-
# :stopdoc:
|
1120
|
-
def assert
|
1121
|
-
raise 'Assertion failed!' unless yield if $DEBUG
|
1122
|
-
end
|
1123
|
-
# :startdoc:
|
1124
|
-
end
|
1125
|
-
end
|
1
|
+
# = client.rb - Sphinx Client API
|
2
|
+
#
|
3
|
+
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
+
# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
|
5
|
+
# License:: Distributes under the same terms as Ruby
|
6
|
+
# Version:: 0.9.9-r1299
|
7
|
+
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
+
#
|
9
|
+
# This library is distributed under the terms of the Ruby license.
|
10
|
+
# You can freely distribute/modify this library.
|
11
|
+
|
12
|
+
# ==Sphinx Client API
|
13
|
+
#
|
14
|
+
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
+
# daemon and get search results from Sphinx.
|
16
|
+
#
|
17
|
+
# ===Usage
|
18
|
+
#
|
19
|
+
# sphinx = Sphinx::Client.new
|
20
|
+
# result = sphinx.Query('test')
|
21
|
+
# ids = result['matches'].map { |match| match['id'] }.join(',')
|
22
|
+
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
+
#
|
24
|
+
# docs = posts.map(&:body)
|
25
|
+
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
+
|
27
|
+
require 'socket'
|
28
|
+
|
29
|
+
module Sphinx
|
30
|
+
# :stopdoc:
|
31
|
+
|
32
|
+
class SphinxError < StandardError; end
|
33
|
+
class SphinxArgumentError < SphinxError; end
|
34
|
+
class SphinxConnectError < SphinxError; end
|
35
|
+
class SphinxResponseError < SphinxError; end
|
36
|
+
class SphinxInternalError < SphinxError; end
|
37
|
+
class SphinxTemporaryError < SphinxError; end
|
38
|
+
class SphinxUnknownError < SphinxError; end
|
39
|
+
|
40
|
+
# :startdoc:
|
41
|
+
|
42
|
+
class Client
|
43
|
+
|
44
|
+
# :stopdoc:
|
45
|
+
|
46
|
+
# Known searchd commands
|
47
|
+
|
48
|
+
# search command
|
49
|
+
SEARCHD_COMMAND_SEARCH = 0
|
50
|
+
# excerpt command
|
51
|
+
SEARCHD_COMMAND_EXCERPT = 1
|
52
|
+
# update command
|
53
|
+
SEARCHD_COMMAND_UPDATE = 2
|
54
|
+
# keywords command
|
55
|
+
SEARCHD_COMMAND_KEYWORDS = 3
|
56
|
+
|
57
|
+
# Current client-side command implementation versions
|
58
|
+
|
59
|
+
# search command version
|
60
|
+
VER_COMMAND_SEARCH = 0x119
|
61
|
+
# excerpt command version
|
62
|
+
VER_COMMAND_EXCERPT = 0x102
|
63
|
+
# update command version
|
64
|
+
VER_COMMAND_UPDATE = 0x102
|
65
|
+
# keywords command version
|
66
|
+
VER_COMMAND_KEYWORDS = 0x100
|
67
|
+
|
68
|
+
# Known searchd status codes
|
69
|
+
|
70
|
+
# general success, command-specific reply follows
|
71
|
+
SEARCHD_OK = 0
|
72
|
+
# general failure, command-specific reply may follow
|
73
|
+
SEARCHD_ERROR = 1
|
74
|
+
# temporaty failure, client should retry later
|
75
|
+
SEARCHD_RETRY = 2
|
76
|
+
# general success, warning message and command-specific reply follow
|
77
|
+
SEARCHD_WARNING = 3
|
78
|
+
|
79
|
+
# :startdoc:
|
80
|
+
|
81
|
+
# Known match modes
|
82
|
+
|
83
|
+
# match all query words
|
84
|
+
SPH_MATCH_ALL = 0
|
85
|
+
# match any query word
|
86
|
+
SPH_MATCH_ANY = 1
|
87
|
+
# match this exact phrase
|
88
|
+
SPH_MATCH_PHRASE = 2
|
89
|
+
# match this boolean query
|
90
|
+
SPH_MATCH_BOOLEAN = 3
|
91
|
+
# match this extended query
|
92
|
+
SPH_MATCH_EXTENDED = 4
|
93
|
+
# match all document IDs w/o fulltext query, apply filters
|
94
|
+
SPH_MATCH_FULLSCAN = 5
|
95
|
+
# extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
|
96
|
+
SPH_MATCH_EXTENDED2 = 6
|
97
|
+
|
98
|
+
# Known ranking modes (ext2 only)
|
99
|
+
|
100
|
+
# default mode, phrase proximity major factor and BM25 minor one
|
101
|
+
SPH_RANK_PROXIMITY_BM25 = 0
|
102
|
+
# statistical mode, BM25 ranking only (faster but worse quality)
|
103
|
+
SPH_RANK_BM25 = 1
|
104
|
+
# no ranking, all matches get a weight of 1
|
105
|
+
SPH_RANK_NONE = 2
|
106
|
+
# simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
107
|
+
SPH_RANK_WORDCOUNT = 3
|
108
|
+
# phrase proximity
|
109
|
+
SPH_RANK_PROXIMITY = 4
|
110
|
+
|
111
|
+
# Known sort modes
|
112
|
+
|
113
|
+
# sort by document relevance desc, then by date
|
114
|
+
SPH_SORT_RELEVANCE = 0
|
115
|
+
# sort by document date desc, then by relevance desc
|
116
|
+
SPH_SORT_ATTR_DESC = 1
|
117
|
+
# sort by document date asc, then by relevance desc
|
118
|
+
SPH_SORT_ATTR_ASC = 2
|
119
|
+
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
120
|
+
SPH_SORT_TIME_SEGMENTS = 3
|
121
|
+
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
122
|
+
SPH_SORT_EXTENDED = 4
|
123
|
+
# sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
|
124
|
+
SPH_SORT_EXPR = 5
|
125
|
+
|
126
|
+
# Known filter types
|
127
|
+
|
128
|
+
# filter by integer values set
|
129
|
+
SPH_FILTER_VALUES = 0
|
130
|
+
# filter by integer range
|
131
|
+
SPH_FILTER_RANGE = 1
|
132
|
+
# filter by float range
|
133
|
+
SPH_FILTER_FLOATRANGE = 2
|
134
|
+
|
135
|
+
# Known attribute types
|
136
|
+
|
137
|
+
# this attr is just an integer
|
138
|
+
SPH_ATTR_INTEGER = 1
|
139
|
+
# this attr is a timestamp
|
140
|
+
SPH_ATTR_TIMESTAMP = 2
|
141
|
+
# this attr is an ordinal string number (integer at search time,
|
142
|
+
# specially handled at indexing time)
|
143
|
+
SPH_ATTR_ORDINAL = 3
|
144
|
+
# this attr is a boolean bit field
|
145
|
+
SPH_ATTR_BOOL = 4
|
146
|
+
# this attr is a float
|
147
|
+
SPH_ATTR_FLOAT = 5
|
148
|
+
# signed 64-bit integer
|
149
|
+
SPH_ATTR_BIGINT = 6
|
150
|
+
# string
|
151
|
+
SPH_ATTR_STRING = 7
|
152
|
+
# this attr has multiple values (0 or more)
|
153
|
+
SPH_ATTR_MULTI = 0x40000001
|
154
|
+
SPH_ATTR_MULTI64 = 0x40000002
|
155
|
+
|
156
|
+
# Known grouping functions
|
157
|
+
|
158
|
+
# group by day
|
159
|
+
SPH_GROUPBY_DAY = 0
|
160
|
+
# group by week
|
161
|
+
SPH_GROUPBY_WEEK = 1
|
162
|
+
# group by month
|
163
|
+
SPH_GROUPBY_MONTH = 2
|
164
|
+
# group by year
|
165
|
+
SPH_GROUPBY_YEAR = 3
|
166
|
+
# group by attribute value
|
167
|
+
SPH_GROUPBY_ATTR = 4
|
168
|
+
# group by sequential attrs pair
|
169
|
+
SPH_GROUPBY_ATTRPAIR = 5
|
170
|
+
|
171
|
+
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
172
|
+
def initialize
|
173
|
+
# per-client-object settings
|
174
|
+
@host = 'localhost' # searchd host (default is "localhost")
|
175
|
+
@port = 9312 # searchd port (default is 9312)
|
176
|
+
|
177
|
+
# per-query settings
|
178
|
+
@offset = 0 # how many records to seek from result-set start (default is 0)
|
179
|
+
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
180
|
+
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
181
|
+
@weights = [] # per-field weights (default is 1 for all fields)
|
182
|
+
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
183
|
+
@sortby = '' # attribute to sort by (defualt is "")
|
184
|
+
@min_id = 0 # min ID to match (default is 0, which means no limit)
|
185
|
+
@max_id = 0 # max ID to match (default is 0, which means no limit)
|
186
|
+
@filters = [] # search filters
|
187
|
+
@groupby = '' # group-by attribute name
|
188
|
+
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
189
|
+
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
190
|
+
@groupdistinct = '' # group-by count-distinct attribute
|
191
|
+
@maxmatches = 1000 # max matches to retrieve
|
192
|
+
@cutoff = 0 # cutoff to stop searching at (default is 0)
|
193
|
+
@retrycount = 0 # distributed retries count
|
194
|
+
@retrydelay = 0 # distributed retries delay
|
195
|
+
@anchor = [] # geographical anchor point
|
196
|
+
@indexweights = [] # per-index weights
|
197
|
+
@ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
198
|
+
@maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
|
199
|
+
@fieldweights = {} # per-field-name weights
|
200
|
+
@overrides = [] # per-query attribute values overrides
|
201
|
+
@select = '*' # select-list (attributes or expressions, with optional aliases)
|
202
|
+
|
203
|
+
# per-reply fields (for single-query case)
|
204
|
+
@error = '' # last error message
|
205
|
+
@warning = '' # last warning message
|
206
|
+
|
207
|
+
@reqs = [] # requests storage (for multi-query case)
|
208
|
+
@mbenc = '' # stored mbstring encoding
|
209
|
+
end
|
210
|
+
|
211
|
+
# Get last error message.
|
212
|
+
def GetLastError
|
213
|
+
@error
|
214
|
+
end
|
215
|
+
|
216
|
+
# Get last warning message.
|
217
|
+
def GetLastWarning
|
218
|
+
@warning
|
219
|
+
end
|
220
|
+
|
221
|
+
# Set searchd host name (string) and port (integer).
|
222
|
+
def SetServer(host, port)
|
223
|
+
assert { host.instance_of? String }
|
224
|
+
assert { port.instance_of? Fixnum }
|
225
|
+
|
226
|
+
@host = host
|
227
|
+
@port = port
|
228
|
+
end
|
229
|
+
|
230
|
+
# Set offset and count into result set,
|
231
|
+
# and optionally set max-matches and cutoff limits.
|
232
|
+
def SetLimits(offset, limit, max = 0, cutoff = 0)
|
233
|
+
assert { offset.instance_of? Fixnum }
|
234
|
+
assert { limit.instance_of? Fixnum }
|
235
|
+
assert { max.instance_of? Fixnum }
|
236
|
+
assert { offset >= 0 }
|
237
|
+
assert { limit > 0 }
|
238
|
+
assert { max >= 0 }
|
239
|
+
|
240
|
+
@offset = offset
|
241
|
+
@limit = limit
|
242
|
+
@maxmatches = max if max > 0
|
243
|
+
@cutoff = cutoff if cutoff > 0
|
244
|
+
end
|
245
|
+
|
246
|
+
# Set maximum query time, in milliseconds, per-index,
|
247
|
+
# integer, 0 means "do not limit"
|
248
|
+
def SetMaxQueryTime(max)
|
249
|
+
assert { max.instance_of? Fixnum }
|
250
|
+
assert { max >= 0 }
|
251
|
+
@maxquerytime = max
|
252
|
+
end
|
253
|
+
|
254
|
+
# Set matching mode.
|
255
|
+
def SetMatchMode(mode)
|
256
|
+
assert { mode == SPH_MATCH_ALL \
|
257
|
+
|| mode == SPH_MATCH_ANY \
|
258
|
+
|| mode == SPH_MATCH_PHRASE \
|
259
|
+
|| mode == SPH_MATCH_BOOLEAN \
|
260
|
+
|| mode == SPH_MATCH_EXTENDED \
|
261
|
+
|| mode == SPH_MATCH_FULLSCAN \
|
262
|
+
|| mode == SPH_MATCH_EXTENDED2 }
|
263
|
+
|
264
|
+
@mode = mode
|
265
|
+
end
|
266
|
+
|
267
|
+
# Set ranking mode.
|
268
|
+
def SetRankingMode(ranker)
|
269
|
+
assert { ranker == SPH_RANK_PROXIMITY_BM25 \
|
270
|
+
|| ranker == SPH_RANK_BM25 \
|
271
|
+
|| ranker == SPH_RANK_NONE \
|
272
|
+
|| ranker == SPH_RANK_WORDCOUNT \
|
273
|
+
|| ranker == SPH_RANK_PROXIMITY }
|
274
|
+
|
275
|
+
@ranker = ranker
|
276
|
+
end
|
277
|
+
|
278
|
+
# Set matches sorting mode.
|
279
|
+
def SetSortMode(mode, sortby = '')
|
280
|
+
assert { mode == SPH_SORT_RELEVANCE \
|
281
|
+
|| mode == SPH_SORT_ATTR_DESC \
|
282
|
+
|| mode == SPH_SORT_ATTR_ASC \
|
283
|
+
|| mode == SPH_SORT_TIME_SEGMENTS \
|
284
|
+
|| mode == SPH_SORT_EXTENDED \
|
285
|
+
|| mode == SPH_SORT_EXPR }
|
286
|
+
assert { sortby.instance_of? String }
|
287
|
+
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
288
|
+
|
289
|
+
@sort = mode
|
290
|
+
@sortby = sortby
|
291
|
+
end
|
292
|
+
|
293
|
+
# Bind per-field weights by order.
|
294
|
+
#
|
295
|
+
# DEPRECATED; use SetFieldWeights() instead.
|
296
|
+
def SetWeights(weights)
|
297
|
+
assert { weights.instance_of? Array }
|
298
|
+
weights.each do |weight|
|
299
|
+
assert { weight.instance_of? Fixnum }
|
300
|
+
end
|
301
|
+
|
302
|
+
@weights = weights
|
303
|
+
end
|
304
|
+
|
305
|
+
# Bind per-field weights by name.
|
306
|
+
#
|
307
|
+
# Takes string (field name) to integer name (field weight) hash as an argument.
|
308
|
+
# * Takes precedence over SetWeights().
|
309
|
+
# * Unknown names will be silently ignored.
|
310
|
+
# * Unbound fields will be silently given a weight of 1.
|
311
|
+
def SetFieldWeights(weights)
|
312
|
+
assert { weights.instance_of? Hash }
|
313
|
+
weights.each do |name, weight|
|
314
|
+
assert { name.instance_of? String }
|
315
|
+
assert { weight.instance_of? Fixnum }
|
316
|
+
end
|
317
|
+
|
318
|
+
@fieldweights = weights
|
319
|
+
end
|
320
|
+
|
321
|
+
# Bind per-index weights by name.
|
322
|
+
def SetIndexWeights(weights)
|
323
|
+
assert { weights.instance_of? Hash }
|
324
|
+
weights.each do |index, weight|
|
325
|
+
assert { index.instance_of? String }
|
326
|
+
assert { weight.instance_of? Fixnum }
|
327
|
+
end
|
328
|
+
|
329
|
+
@indexweights = weights
|
330
|
+
end
|
331
|
+
|
332
|
+
# Set IDs range to match.
|
333
|
+
#
|
334
|
+
# Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
|
335
|
+
def SetIDRange(min, max)
|
336
|
+
assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
|
337
|
+
assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
|
338
|
+
assert { min <= max }
|
339
|
+
|
340
|
+
@min_id = min
|
341
|
+
@max_id = max
|
342
|
+
end
|
343
|
+
|
344
|
+
# Set values filter.
|
345
|
+
#
|
346
|
+
# Only match those records where <tt>attribute</tt> column values
|
347
|
+
# are in specified set.
|
348
|
+
def SetFilter(attribute, values, exclude = false)
|
349
|
+
assert { attribute.instance_of? String }
|
350
|
+
assert { values.instance_of? Array }
|
351
|
+
assert { !values.empty? }
|
352
|
+
|
353
|
+
if values.instance_of?(Array) && values.size > 0
|
354
|
+
values.each do |value|
|
355
|
+
assert { value.instance_of? Fixnum }
|
356
|
+
end
|
357
|
+
|
358
|
+
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
359
|
+
end
|
360
|
+
end
|
361
|
+
|
362
|
+
# Set range filter.
|
363
|
+
#
|
364
|
+
# Only match those records where <tt>attribute</tt> column value
|
365
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
366
|
+
def SetFilterRange(attribute, min, max, exclude = false)
|
367
|
+
assert { attribute.instance_of? String }
|
368
|
+
assert { min.instance_of? Fixnum or min.instance_of? Bignum }
|
369
|
+
assert { max.instance_of? Fixnum or max.instance_of? Bignum }
|
370
|
+
assert { min <= max }
|
371
|
+
|
372
|
+
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
373
|
+
end
|
374
|
+
|
375
|
+
# Set float range filter.
|
376
|
+
#
|
377
|
+
# Only match those records where <tt>attribute</tt> column value
|
378
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
379
|
+
def SetFilterFloatRange(attribute, min, max, exclude = false)
|
380
|
+
assert { attribute.instance_of? String }
|
381
|
+
assert { min.instance_of? Float }
|
382
|
+
assert { max.instance_of? Float }
|
383
|
+
assert { min <= max }
|
384
|
+
|
385
|
+
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
386
|
+
end
|
387
|
+
|
388
|
+
# Setup anchor point for geosphere distance calculations.
|
389
|
+
#
|
390
|
+
# Required to use <tt>@geodist</tt> in filters and sorting
|
391
|
+
# distance will be computed to this point. Latitude and longitude
|
392
|
+
# must be in radians.
|
393
|
+
#
|
394
|
+
# * <tt>attrlat</tt> -- is the name of latitude attribute
|
395
|
+
# * <tt>attrlong</tt> -- is the name of longitude attribute
|
396
|
+
# * <tt>lat</tt> -- is anchor point latitude, in radians
|
397
|
+
# * <tt>long</tt> -- is anchor point longitude, in radians
|
398
|
+
def SetGeoAnchor(attrlat, attrlong, lat, long)
|
399
|
+
assert { attrlat.instance_of? String }
|
400
|
+
assert { attrlong.instance_of? String }
|
401
|
+
assert { lat.instance_of? Float }
|
402
|
+
assert { long.instance_of? Float }
|
403
|
+
|
404
|
+
@anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
|
405
|
+
end
|
406
|
+
|
407
|
+
# Set grouping attribute and function.
|
408
|
+
#
|
409
|
+
# In grouping mode, all matches are assigned to different groups
|
410
|
+
# based on grouping function value.
|
411
|
+
#
|
412
|
+
# Each group keeps track of the total match count, and the best match
|
413
|
+
# (in this group) according to current sorting function.
|
414
|
+
#
|
415
|
+
# The final result set contains one best match per group, with
|
416
|
+
# grouping function value and matches count attached.
|
417
|
+
#
|
418
|
+
# Groups in result set could be sorted by any sorting clause,
|
419
|
+
# including both document attributes and the following special
|
420
|
+
# internal Sphinx attributes:
|
421
|
+
#
|
422
|
+
# * @id - match document ID;
|
423
|
+
# * @weight, @rank, @relevance - match weight;
|
424
|
+
# * @group - groupby function value;
|
425
|
+
# * @count - amount of matches in group.
|
426
|
+
#
|
427
|
+
# the default mode is to sort by groupby value in descending order,
|
428
|
+
# ie. by '@group desc'.
|
429
|
+
#
|
430
|
+
# 'total_found' would contain total amount of matching groups over
|
431
|
+
# the whole index.
|
432
|
+
#
|
433
|
+
# WARNING: grouping is done in fixed memory and thus its results
|
434
|
+
# are only approximate; so there might be more groups reported
|
435
|
+
# in total_found than actually present. @count might also
|
436
|
+
# be underestimated.
|
437
|
+
#
|
438
|
+
# For example, if sorting by relevance and grouping by "published"
|
439
|
+
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
440
|
+
# contain one most relevant match per each day when there were any
|
441
|
+
# matches published, with day number and per-day match count attached,
|
442
|
+
# and sorted by day number in descending order (ie. recent days first).
|
443
|
+
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
444
|
+
assert { attribute.instance_of? String }
|
445
|
+
assert { groupsort.instance_of? String }
|
446
|
+
assert { func == SPH_GROUPBY_DAY \
|
447
|
+
|| func == SPH_GROUPBY_WEEK \
|
448
|
+
|| func == SPH_GROUPBY_MONTH \
|
449
|
+
|| func == SPH_GROUPBY_YEAR \
|
450
|
+
|| func == SPH_GROUPBY_ATTR \
|
451
|
+
|| func == SPH_GROUPBY_ATTRPAIR }
|
452
|
+
|
453
|
+
@groupby = attribute
|
454
|
+
@groupfunc = func
|
455
|
+
@groupsort = groupsort
|
456
|
+
end
|
457
|
+
|
458
|
+
# Set count-distinct attribute for group-by queries.
|
459
|
+
def SetGroupDistinct(attribute)
|
460
|
+
assert { attribute.instance_of? String }
|
461
|
+
@groupdistinct = attribute
|
462
|
+
end
|
463
|
+
|
464
|
+
# Set distributed retries count and delay.
|
465
|
+
def SetRetries(count, delay = 0)
|
466
|
+
assert { count.instance_of? Fixnum }
|
467
|
+
assert { delay.instance_of? Fixnum }
|
468
|
+
|
469
|
+
@retrycount = count
|
470
|
+
@retrydelay = delay
|
471
|
+
end
|
472
|
+
|
473
|
+
# Set attribute values override
|
474
|
+
#
|
475
|
+
# There can be only one override per attribute.
|
476
|
+
# +values+ must be a hash that maps document IDs to attribute values.
|
477
|
+
def SetOverride(attrname, attrtype, values)
|
478
|
+
assert { attrname.instance_of? String }
|
479
|
+
assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
|
480
|
+
assert { values.instance_of? Hash }
|
481
|
+
|
482
|
+
@overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
|
483
|
+
end
|
484
|
+
|
485
|
+
# Set select-list (attributes or expressions), SQL-like syntax.
|
486
|
+
def SetSelect(select)
|
487
|
+
assert { select.instance_of? String }
|
488
|
+
@select = select
|
489
|
+
end
|
490
|
+
|
491
|
+
# Clear all filters (for multi-queries).
|
492
|
+
def ResetFilters
|
493
|
+
@filters = []
|
494
|
+
@anchor = []
|
495
|
+
end
|
496
|
+
|
497
|
+
# Clear groupby settings (for multi-queries).
|
498
|
+
def ResetGroupBy
|
499
|
+
@groupby = ''
|
500
|
+
@groupfunc = SPH_GROUPBY_DAY
|
501
|
+
@groupsort = '@group desc'
|
502
|
+
@groupdistinct = ''
|
503
|
+
end
|
504
|
+
|
505
|
+
# Clear all attribute value overrides (for multi-queries).
|
506
|
+
def ResetOverrides
|
507
|
+
@overrides = []
|
508
|
+
end
|
509
|
+
|
510
|
+
# Connect to searchd server and run given search query.
|
511
|
+
#
|
512
|
+
# <tt>query</tt> is query string
|
513
|
+
|
514
|
+
# <tt>index</tt> is index name (or names) to query. default value is "*" which means
|
515
|
+
# to query all indexes. Accepted characters for index names are letters, numbers,
|
516
|
+
# dash, and underscore; everything else is considered a separator. Therefore,
|
517
|
+
# all the following calls are valid and will search two indexes:
|
518
|
+
#
|
519
|
+
# sphinx.Query('test query', 'main delta')
|
520
|
+
# sphinx.Query('test query', 'main;delta')
|
521
|
+
# sphinx.Query('test query', 'main, delta')
|
522
|
+
#
|
523
|
+
# Index order matters. If identical IDs are found in two or more indexes,
|
524
|
+
# weight and attribute values from the very last matching index will be used
|
525
|
+
# for sorting and returning to client. Therefore, in the example above,
|
526
|
+
# matches from "delta" index will always "win" over matches from "main".
|
527
|
+
#
|
528
|
+
# Returns false on failure.
|
529
|
+
# Returns hash which has the following keys on success:
|
530
|
+
#
|
531
|
+
# * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
|
532
|
+
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
533
|
+
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
534
|
+
# * <tt>'time'</tt> -- search time
|
535
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
536
|
+
def Query(query, index = '*', comment = '')
|
537
|
+
assert { @reqs.empty? }
|
538
|
+
@reqs = []
|
539
|
+
|
540
|
+
self.AddQuery(query, index, comment)
|
541
|
+
results = self.RunQueries
|
542
|
+
|
543
|
+
# probably network error; error message should be already filled
|
544
|
+
return false unless results.instance_of?(Array)
|
545
|
+
|
546
|
+
@error = results[0]['error']
|
547
|
+
@warning = results[0]['warning']
|
548
|
+
|
549
|
+
return false if results[0]['status'] == SEARCHD_ERROR
|
550
|
+
return results[0]
|
551
|
+
end
|
552
|
+
|
553
|
+
# Add query to batch.
|
554
|
+
#
|
555
|
+
# Batch queries enable searchd to perform internal optimizations,
|
556
|
+
# if possible; and reduce network connection overheads in all cases.
|
557
|
+
#
|
558
|
+
# For instance, running exactly the same query with different
|
559
|
+
# groupby settings will enable searched to perform expensive
|
560
|
+
# full-text search and ranking operation only once, but compute
|
561
|
+
# multiple groupby results from its output.
|
562
|
+
#
|
563
|
+
# Parameters are exactly the same as in <tt>Query</tt> call.
|
564
|
+
# Returns index to results array returned by <tt>RunQueries</tt> call.
|
565
|
+
def AddQuery(query, index = '*', comment = '')
|
566
|
+
# build request
|
567
|
+
|
568
|
+
# mode and limits
|
569
|
+
request = Request.new
|
570
|
+
request.put_int @offset, @limit, @mode, @ranker, @sort
|
571
|
+
request.put_string @sortby
|
572
|
+
# query itself
|
573
|
+
request.put_string query
|
574
|
+
# weights
|
575
|
+
request.put_int_array @weights
|
576
|
+
# indexes
|
577
|
+
request.put_string index
|
578
|
+
# id64 range marker
|
579
|
+
request.put_int 1
|
580
|
+
# id64 range
|
581
|
+
request.put_int64 @min_id.to_i, @max_id.to_i
|
582
|
+
|
583
|
+
# filters
|
584
|
+
request.put_int @filters.length
|
585
|
+
@filters.each do |filter|
|
586
|
+
request.put_string filter['attr']
|
587
|
+
request.put_int filter['type']
|
588
|
+
|
589
|
+
case filter['type']
|
590
|
+
when SPH_FILTER_VALUES
|
591
|
+
request.put_int64_array filter['values']
|
592
|
+
when SPH_FILTER_RANGE
|
593
|
+
request.put_int64 filter['min'], filter['max']
|
594
|
+
when SPH_FILTER_FLOATRANGE
|
595
|
+
request.put_float filter['min'], filter['max']
|
596
|
+
else
|
597
|
+
raise SphinxInternalError, 'Internal error: unhandled filter type'
|
598
|
+
end
|
599
|
+
request.put_int filter['exclude'] ? 1 : 0
|
600
|
+
end
|
601
|
+
|
602
|
+
# group-by clause, max-matches count, group-sort clause, cutoff count
|
603
|
+
request.put_int @groupfunc
|
604
|
+
request.put_string @groupby
|
605
|
+
request.put_int @maxmatches
|
606
|
+
request.put_string @groupsort
|
607
|
+
request.put_int @cutoff, @retrycount, @retrydelay
|
608
|
+
request.put_string @groupdistinct
|
609
|
+
|
610
|
+
# anchor point
|
611
|
+
if @anchor.empty?
|
612
|
+
request.put_int 0
|
613
|
+
else
|
614
|
+
request.put_int 1
|
615
|
+
request.put_string @anchor['attrlat'], @anchor['attrlong']
|
616
|
+
request.put_float @anchor['lat'], @anchor['long']
|
617
|
+
end
|
618
|
+
|
619
|
+
# per-index weights
|
620
|
+
request.put_int @indexweights.length
|
621
|
+
@indexweights.each do |idx, weight|
|
622
|
+
request.put_string idx
|
623
|
+
request.put_int weight
|
624
|
+
end
|
625
|
+
|
626
|
+
# max query time
|
627
|
+
request.put_int @maxquerytime
|
628
|
+
|
629
|
+
# per-field weights
|
630
|
+
request.put_int @fieldweights.length
|
631
|
+
@fieldweights.each do |field, weight|
|
632
|
+
request.put_string field
|
633
|
+
request.put_int weight
|
634
|
+
end
|
635
|
+
|
636
|
+
# comment
|
637
|
+
request.put_string comment
|
638
|
+
|
639
|
+
# attribute overrides
|
640
|
+
request.put_int @overrides.length
|
641
|
+
for entry in @overrides do
|
642
|
+
request.put_string entry['attr']
|
643
|
+
request.put_int entry['type'], entry['values'].size
|
644
|
+
entry['values'].each do |id, val|
|
645
|
+
assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
|
646
|
+
assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
|
647
|
+
|
648
|
+
request.put_int64 id
|
649
|
+
case entry['type']
|
650
|
+
when SPH_ATTR_FLOAT
|
651
|
+
request.put_float val
|
652
|
+
when SPH_ATTR_BIGINT
|
653
|
+
request.put_int64 val
|
654
|
+
else
|
655
|
+
request.put_int val
|
656
|
+
end
|
657
|
+
end
|
658
|
+
end
|
659
|
+
|
660
|
+
# select-list
|
661
|
+
request.put_string @select
|
662
|
+
|
663
|
+
# store request to requests array
|
664
|
+
@reqs << request.to_s;
|
665
|
+
return @reqs.length - 1
|
666
|
+
end
|
667
|
+
|
668
|
+
# Run queries batch.
|
669
|
+
#
|
670
|
+
# Returns an array of result sets on success.
|
671
|
+
# Returns false on network IO failure.
|
672
|
+
#
|
673
|
+
# Each result set in returned array is a hash which containts
|
674
|
+
# the same keys as the hash returned by <tt>Query</tt>, plus:
|
675
|
+
#
|
676
|
+
# * <tt>'error'</tt> -- search error for this query
|
677
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
678
|
+
def RunQueries
|
679
|
+
if @reqs.empty?
|
680
|
+
@error = 'No queries defined, issue AddQuery() first'
|
681
|
+
return false
|
682
|
+
end
|
683
|
+
|
684
|
+
req = @reqs.join('')
|
685
|
+
nreqs = @reqs.length
|
686
|
+
@reqs = []
|
687
|
+
response = PerformRequest(:search, req, nreqs)
|
688
|
+
|
689
|
+
# parse response
|
690
|
+
begin
|
691
|
+
results = []
|
692
|
+
ires = 0
|
693
|
+
while ires < nreqs
|
694
|
+
ires += 1
|
695
|
+
result = {}
|
696
|
+
|
697
|
+
result['error'] = ''
|
698
|
+
result['warning'] = ''
|
699
|
+
|
700
|
+
# extract status
|
701
|
+
status = result['status'] = response.get_int
|
702
|
+
if status != SEARCHD_OK
|
703
|
+
message = response.get_string
|
704
|
+
if status == SEARCHD_WARNING
|
705
|
+
result['warning'] = message
|
706
|
+
else
|
707
|
+
result['error'] = message
|
708
|
+
results << result
|
709
|
+
next
|
710
|
+
end
|
711
|
+
end
|
712
|
+
|
713
|
+
# read schema
|
714
|
+
fields = []
|
715
|
+
attrs = {}
|
716
|
+
attrs_names_in_order = []
|
717
|
+
|
718
|
+
nfields = response.get_int
|
719
|
+
while nfields > 0
|
720
|
+
nfields -= 1
|
721
|
+
fields << response.get_string
|
722
|
+
end
|
723
|
+
result['fields'] = fields
|
724
|
+
|
725
|
+
nattrs = response.get_int
|
726
|
+
while nattrs > 0
|
727
|
+
nattrs -= 1
|
728
|
+
attr = response.get_string
|
729
|
+
type = response.get_int
|
730
|
+
attrs[attr] = type
|
731
|
+
attrs_names_in_order << attr
|
732
|
+
end
|
733
|
+
result['attrs'] = attrs
|
734
|
+
|
735
|
+
# read match count
|
736
|
+
count = response.get_int
|
737
|
+
id64 = response.get_int
|
738
|
+
|
739
|
+
# read matches
|
740
|
+
result['matches'] = []
|
741
|
+
while count > 0
|
742
|
+
count -= 1
|
743
|
+
|
744
|
+
if id64 != 0
|
745
|
+
doc = response.get_int64
|
746
|
+
weight = response.get_int
|
747
|
+
else
|
748
|
+
doc, weight = response.get_ints(2)
|
749
|
+
end
|
750
|
+
|
751
|
+
r = {} # This is a single result put in the result['matches'] array
|
752
|
+
r['id'] = doc
|
753
|
+
r['weight'] = weight
|
754
|
+
attrs_names_in_order.each do |a|
|
755
|
+
r['attrs'] ||= {}
|
756
|
+
|
757
|
+
case attrs[a]
|
758
|
+
when SPH_ATTR_BIGINT
|
759
|
+
# handle 64-bit ints
|
760
|
+
r['attrs'][a] = response.get_int64
|
761
|
+
when SPH_ATTR_FLOAT
|
762
|
+
# handle floats
|
763
|
+
r['attrs'][a] = response.get_float
|
764
|
+
when SPH_ATTR_STRING
|
765
|
+
# handle string
|
766
|
+
r['attrs'][a] = response.get_string
|
767
|
+
else
|
768
|
+
# handle everything else as unsigned ints
|
769
|
+
val = response.get_int
|
770
|
+
if attrs[a]==SPH_ATTR_MULTI
|
771
|
+
r['attrs'][a] = []
|
772
|
+
1.upto(val) do
|
773
|
+
r['attrs'][a] << response.get_int
|
774
|
+
end
|
775
|
+
elsif attrs[a]==SPH_ATTR_MULTI64
|
776
|
+
r['attrs'][a] = []
|
777
|
+
val = val/2
|
778
|
+
1.upto(val) do
|
779
|
+
r['attrs'][a] << response.get_int64
|
780
|
+
end
|
781
|
+
else
|
782
|
+
r['attrs'][a] = val
|
783
|
+
end
|
784
|
+
end
|
785
|
+
end
|
786
|
+
result['matches'] << r
|
787
|
+
end
|
788
|
+
result['total'], result['total_found'], msecs, words = response.get_ints(4)
|
789
|
+
result['time'] = '%.3f' % (msecs / 1000.0)
|
790
|
+
|
791
|
+
result['words'] = {}
|
792
|
+
while words > 0
|
793
|
+
words -= 1
|
794
|
+
word = response.get_string
|
795
|
+
docs, hits = response.get_ints(2)
|
796
|
+
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
797
|
+
end
|
798
|
+
|
799
|
+
results << result
|
800
|
+
end
|
801
|
+
#rescue EOFError
|
802
|
+
# @error = 'incomplete reply'
|
803
|
+
# raise SphinxResponseError, @error
|
804
|
+
end
|
805
|
+
|
806
|
+
return results
|
807
|
+
end
|
808
|
+
|
809
|
+
# Connect to searchd server and generate exceprts from given documents.
|
810
|
+
#
|
811
|
+
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
812
|
+
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
813
|
+
# for stemming, lexing and case folding
|
814
|
+
# * <tt>words</tt> -- a string which contains the words to highlight
|
815
|
+
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
816
|
+
#
|
817
|
+
# You can use following parameters:
|
818
|
+
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
819
|
+
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
820
|
+
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
821
|
+
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
822
|
+
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
823
|
+
# * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
|
824
|
+
# * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
|
825
|
+
# * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
|
826
|
+
# * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
|
827
|
+
#
|
828
|
+
# Returns false on failure.
|
829
|
+
# Returns an array of string excerpts on success.
|
830
|
+
def BuildExcerpts(docs, index, words, opts = {})
|
831
|
+
assert { docs.instance_of? Array }
|
832
|
+
assert { index.instance_of? String }
|
833
|
+
assert { words.instance_of? String }
|
834
|
+
assert { opts.instance_of? Hash }
|
835
|
+
|
836
|
+
# fixup options
|
837
|
+
opts['before_match'] ||= '<b>';
|
838
|
+
opts['after_match'] ||= '</b>';
|
839
|
+
opts['chunk_separator'] ||= ' ... ';
|
840
|
+
opts['html_strip_mode'] ||= 'index';
|
841
|
+
opts['limit'] ||= 256;
|
842
|
+
opts['limit_passages'] ||= 0;
|
843
|
+
opts['limit_words'] ||= 0;
|
844
|
+
opts['around'] ||= 5;
|
845
|
+
opts['start_passage_id'] ||= 1;
|
846
|
+
opts['exact_phrase'] ||= false
|
847
|
+
opts['single_passage'] ||= false
|
848
|
+
opts['use_boundaries'] ||= false
|
849
|
+
opts['weight_order'] ||= false
|
850
|
+
opts['load_files'] ||= false
|
851
|
+
opts['allow_empty'] ||= false
|
852
|
+
|
853
|
+
# build request
|
854
|
+
|
855
|
+
# v.1.0 req
|
856
|
+
flags = 1
|
857
|
+
flags |= 2 if opts['exact_phrase']
|
858
|
+
flags |= 4 if opts['single_passage']
|
859
|
+
flags |= 8 if opts['use_boundaries']
|
860
|
+
flags |= 16 if opts['weight_order']
|
861
|
+
flags |= 32 if opts['query_mode']
|
862
|
+
flags |= 64 if opts['force_all_words']
|
863
|
+
flags |= 128 if opts['load_files']
|
864
|
+
flags |= 256 if opts['allow_empty']
|
865
|
+
|
866
|
+
request = Request.new
|
867
|
+
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
868
|
+
# req index
|
869
|
+
request.put_string index
|
870
|
+
# req words
|
871
|
+
request.put_string words
|
872
|
+
|
873
|
+
# options
|
874
|
+
request.put_string opts['before_match']
|
875
|
+
request.put_string opts['after_match']
|
876
|
+
request.put_string opts['chunk_separator']
|
877
|
+
request.put_int opts['limit'].to_i, opts['around'].to_i
|
878
|
+
|
879
|
+
# options v1.2
|
880
|
+
request.put_int opts['limit_passages'].to_i
|
881
|
+
request.put_int opts['limit_words'].to_i
|
882
|
+
request.put_int opts['start_passage_id'].to_i
|
883
|
+
request.put_string opts['html_strip_mode']
|
884
|
+
|
885
|
+
# documents
|
886
|
+
request.put_int docs.size
|
887
|
+
docs.each do |doc|
|
888
|
+
assert { doc.instance_of? String }
|
889
|
+
|
890
|
+
request.put_string doc
|
891
|
+
end
|
892
|
+
|
893
|
+
response = PerformRequest(:excerpt, request)
|
894
|
+
|
895
|
+
# parse response
|
896
|
+
begin
|
897
|
+
res = []
|
898
|
+
docs.each do |doc|
|
899
|
+
res << response.get_string
|
900
|
+
end
|
901
|
+
rescue EOFError
|
902
|
+
@error = 'incomplete reply'
|
903
|
+
raise SphinxResponseError, @error
|
904
|
+
end
|
905
|
+
return res
|
906
|
+
end
|
907
|
+
|
908
|
+
# Connect to searchd server, and generate keyword list for a given query.
|
909
|
+
#
|
910
|
+
# Returns an array of words on success.
|
911
|
+
def BuildKeywords(query, index, hits)
|
912
|
+
assert { query.instance_of? String }
|
913
|
+
assert { index.instance_of? String }
|
914
|
+
assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
|
915
|
+
|
916
|
+
# build request
|
917
|
+
request = Request.new
|
918
|
+
# v.1.0 req
|
919
|
+
request.put_string query # req query
|
920
|
+
request.put_string index # req index
|
921
|
+
request.put_int hits ? 1 : 0
|
922
|
+
|
923
|
+
response = PerformRequest(:keywords, request)
|
924
|
+
|
925
|
+
# parse response
|
926
|
+
begin
|
927
|
+
res = []
|
928
|
+
nwords = response.get_int
|
929
|
+
0.upto(nwords - 1) do |i|
|
930
|
+
tokenized = response.get_string
|
931
|
+
normalized = response.get_string
|
932
|
+
|
933
|
+
entry = { 'tokenized' => tokenized, 'normalized' => normalized }
|
934
|
+
entry['docs'], entry['hits'] = response.get_ints(2) if hits
|
935
|
+
|
936
|
+
res << entry
|
937
|
+
end
|
938
|
+
rescue EOFError
|
939
|
+
@error = 'incomplete reply'
|
940
|
+
raise SphinxResponseError, @error
|
941
|
+
end
|
942
|
+
|
943
|
+
return res
|
944
|
+
end
|
945
|
+
|
946
|
+
# Batch update given attributes in given rows in given indexes.
|
947
|
+
#
|
948
|
+
# * +index+ is a name of the index to be updated
|
949
|
+
# * +attrs+ is an array of attribute name strings.
|
950
|
+
# * +values+ is a hash where key is document id, and value is an array of
|
951
|
+
# * +mva+ identifies whether update MVA
|
952
|
+
# new attribute values
|
953
|
+
#
|
954
|
+
# Returns number of actually updated documents (0 or more) on success.
|
955
|
+
# Returns -1 on failure.
|
956
|
+
#
|
957
|
+
# Usage example:
|
958
|
+
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
|
959
|
+
def UpdateAttributes(index, attrs, values, mva = false)
|
960
|
+
# verify everything
|
961
|
+
assert { index.instance_of? String }
|
962
|
+
assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
|
963
|
+
|
964
|
+
assert { attrs.instance_of? Array }
|
965
|
+
attrs.each do |attr|
|
966
|
+
assert { attr.instance_of? String }
|
967
|
+
end
|
968
|
+
|
969
|
+
assert { values.instance_of? Hash }
|
970
|
+
values.each do |id, entry|
|
971
|
+
assert { id.instance_of? Fixnum }
|
972
|
+
assert { entry.instance_of? Array }
|
973
|
+
assert { entry.length == attrs.length }
|
974
|
+
entry.each do |v|
|
975
|
+
if mva
|
976
|
+
assert { v.instance_of? Array }
|
977
|
+
v.each { |vv| assert { vv.instance_of? Fixnum } }
|
978
|
+
else
|
979
|
+
assert { v.instance_of? Fixnum }
|
980
|
+
end
|
981
|
+
end
|
982
|
+
end
|
983
|
+
|
984
|
+
# build request
|
985
|
+
request = Request.new
|
986
|
+
request.put_string index
|
987
|
+
|
988
|
+
request.put_int attrs.length
|
989
|
+
for attr in attrs
|
990
|
+
request.put_string attr
|
991
|
+
request.put_int mva ? 1 : 0
|
992
|
+
end
|
993
|
+
|
994
|
+
request.put_int values.length
|
995
|
+
values.each do |id, entry|
|
996
|
+
request.put_int64 id
|
997
|
+
if mva
|
998
|
+
entry.each { |v| request.put_int_array v }
|
999
|
+
else
|
1000
|
+
request.put_int(*entry)
|
1001
|
+
end
|
1002
|
+
end
|
1003
|
+
|
1004
|
+
response = PerformRequest(:update, request)
|
1005
|
+
|
1006
|
+
# parse response
|
1007
|
+
begin
|
1008
|
+
return response.get_int
|
1009
|
+
rescue EOFError
|
1010
|
+
@error = 'incomplete reply'
|
1011
|
+
raise SphinxResponseError, @error
|
1012
|
+
end
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
protected
|
1016
|
+
|
1017
|
+
# Connect to searchd server.
|
1018
|
+
def Connect
|
1019
|
+
begin
|
1020
|
+
if @host[0,1]=='/'
|
1021
|
+
sock = UNIXSocket.new(@host)
|
1022
|
+
else
|
1023
|
+
sock = TCPSocket.new(@host, @port)
|
1024
|
+
end
|
1025
|
+
rescue => err
|
1026
|
+
@error = "connection to #{@host}:#{@port} failed (error=#{err})"
|
1027
|
+
raise SphinxConnectError, @error
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
v = sock.recv(4).unpack('N*').first
|
1031
|
+
if v < 1
|
1032
|
+
sock.close
|
1033
|
+
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
1034
|
+
raise SphinxConnectError, @error
|
1035
|
+
end
|
1036
|
+
|
1037
|
+
sock.send([1].pack('N'), 0)
|
1038
|
+
sock
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
# Get and check response packet from searchd server.
|
1042
|
+
def GetResponse(sock, client_version)
|
1043
|
+
response = ''
|
1044
|
+
len = 0
|
1045
|
+
|
1046
|
+
header = sock.recv(8)
|
1047
|
+
if header.length == 8
|
1048
|
+
status, ver, len = header.unpack('n2N')
|
1049
|
+
left = len.to_i
|
1050
|
+
while left > 0 do
|
1051
|
+
begin
|
1052
|
+
chunk = sock.recv(left)
|
1053
|
+
if chunk
|
1054
|
+
response << chunk
|
1055
|
+
left -= chunk.length
|
1056
|
+
end
|
1057
|
+
rescue EOFError
|
1058
|
+
break
|
1059
|
+
end
|
1060
|
+
end
|
1061
|
+
end
|
1062
|
+
sock.close
|
1063
|
+
|
1064
|
+
# check response
|
1065
|
+
read = response.length
|
1066
|
+
if response.empty? or read != len.to_i
|
1067
|
+
@error = response.empty? \
|
1068
|
+
? 'received zero-sized searchd response' \
|
1069
|
+
: "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
|
1070
|
+
raise SphinxResponseError, @error
|
1071
|
+
end
|
1072
|
+
|
1073
|
+
# check status
|
1074
|
+
if (status == SEARCHD_WARNING)
|
1075
|
+
wlen = response[0, 4].unpack('N*').first
|
1076
|
+
@warning = response[4, wlen]
|
1077
|
+
return response[4 + wlen, response.length - 4 - wlen]
|
1078
|
+
end
|
1079
|
+
|
1080
|
+
if status == SEARCHD_ERROR
|
1081
|
+
@error = 'searchd error: ' + response[4, response.length - 4]
|
1082
|
+
raise SphinxInternalError, @error
|
1083
|
+
end
|
1084
|
+
|
1085
|
+
if status == SEARCHD_RETRY
|
1086
|
+
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
1087
|
+
raise SphinxTemporaryError, @error
|
1088
|
+
end
|
1089
|
+
|
1090
|
+
unless status == SEARCHD_OK
|
1091
|
+
@error = "unknown status code: '#{status}'"
|
1092
|
+
raise SphinxUnknownError, @error
|
1093
|
+
end
|
1094
|
+
|
1095
|
+
# check version
|
1096
|
+
if ver < client_version
|
1097
|
+
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
1098
|
+
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
1099
|
+
end
|
1100
|
+
|
1101
|
+
return response
|
1102
|
+
end
|
1103
|
+
|
1104
|
+
# Connect, send query, get response.
|
1105
|
+
def PerformRequest(command, request, additional = nil)
|
1106
|
+
cmd = command.to_s.upcase
|
1107
|
+
command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
|
1108
|
+
command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
|
1109
|
+
|
1110
|
+
sock = self.Connect
|
1111
|
+
len = request.to_s.length + (additional != nil ? 8 : 0)
|
1112
|
+
header = [command_id, command_ver, len].pack('nnN')
|
1113
|
+
header << [0, additional].pack('NN') if additional != nil
|
1114
|
+
sock.send(header + request.to_s, 0)
|
1115
|
+
response = self.GetResponse(sock, command_ver)
|
1116
|
+
return Response.new(response)
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
# :stopdoc:
|
1120
|
+
def assert
|
1121
|
+
raise 'Assertion failed!' unless yield if $DEBUG
|
1122
|
+
end
|
1123
|
+
# :startdoc:
|
1124
|
+
end
|
1125
|
+
end
|