ultrasphinx 1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/LICENSE +184 -0
- data/Manifest +21 -0
- data/README +94 -0
- data/Rakefile +21 -0
- data/examples/app.multi +2 -0
- data/examples/default.base +34 -0
- data/init.rb +2 -0
- data/lib/ultrasphinx.rb +20 -0
- data/lib/ultrasphinx/autoload.rb +13 -0
- data/lib/ultrasphinx/core_extensions.rb +51 -0
- data/lib/ultrasphinx/fields.rb +78 -0
- data/lib/ultrasphinx/is_indexed.rb +89 -0
- data/lib/ultrasphinx/search.rb +441 -0
- data/lib/ultrasphinx/spell.rb +41 -0
- data/lib/ultrasphinx/ultrasphinx.rb +276 -0
- data/tasks/ultrasphinx.rake +125 -0
- data/vendor/sphinx/README +40 -0
- data/vendor/sphinx/Rakefile +21 -0
- data/vendor/sphinx/init.rb +1 -0
- data/vendor/sphinx/lib/client.rb +647 -0
- metadata +66 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'spec/rake/spectask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :spec
|
7
|
+
|
8
|
+
desc 'Test the magic_enum plugin.'
|
9
|
+
Spec::Rake::SpecTask.new(:spec) do |t|
|
10
|
+
t.libs << 'lib'
|
11
|
+
t.pattern = 'spec/*_spec.rb'
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Generate documentation for the magic_enum plugin.'
|
15
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
16
|
+
rdoc.rdoc_dir = 'rdoc'
|
17
|
+
rdoc.title = 'MagicEnum'
|
18
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
19
|
+
rdoc.rdoc_files.include('README')
|
20
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
21
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/lib/client'
|
@@ -0,0 +1,647 @@
|
|
1
|
+
# = client.rb - Sphinx Client API
|
2
|
+
#
|
3
|
+
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
+
# Copyright:: Copyright (c) 2006 - 2007 Dmytro Shteflyuk
|
5
|
+
# License:: Distributes under the same terms as Ruby
|
6
|
+
# Version:: 0.3.0
|
7
|
+
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
+
#
|
9
|
+
# This library is distributed under the terms of the Ruby license.
|
10
|
+
# You can freely distribute/modify this library.
|
11
|
+
|
12
|
+
# ==Sphinx Client API
|
13
|
+
#
|
14
|
+
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
+
# daemon and get search results from Sphinx.
|
16
|
+
#
|
17
|
+
# ===Usage
|
18
|
+
#
|
19
|
+
# sphinx = Sphinx::Client.new
|
20
|
+
# result = sphinx.Query('test')
|
21
|
+
# ids = result['matches'].map { |id, value| id }.join(',')
|
22
|
+
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
+
#
|
24
|
+
# docs = posts.map(&:body)
|
25
|
+
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
+
module Sphinx
|
27
|
+
# :stopdoc:
|
28
|
+
|
29
|
+
class SphinxError < StandardError; end
|
30
|
+
class SphinxArgumentError < SphinxError; end
|
31
|
+
class SphinxConnectError < SphinxError; end
|
32
|
+
class SphinxResponseError < SphinxError; end
|
33
|
+
class SphinxInternalError < SphinxError; end
|
34
|
+
class SphinxTemporaryError < SphinxError; end
|
35
|
+
class SphinxUnknownError < SphinxError; end
|
36
|
+
|
37
|
+
# :startdoc:
|
38
|
+
|
39
|
+
class Client
|
40
|
+
|
41
|
+
# :stopdoc:
|
42
|
+
|
43
|
+
# Known searchd commands
|
44
|
+
|
45
|
+
# search command
|
46
|
+
SEARCHD_COMMAND_SEARCH = 0
|
47
|
+
# excerpt command
|
48
|
+
SEARCHD_COMMAND_EXCERPT = 1
|
49
|
+
# update command
|
50
|
+
SEARCHD_COMMAND_UPDATE = 2
|
51
|
+
|
52
|
+
# Current client-side command implementation versions
|
53
|
+
|
54
|
+
# search command version
|
55
|
+
VER_COMMAND_SEARCH = 0x107
|
56
|
+
# excerpt command version
|
57
|
+
VER_COMMAND_EXCERPT = 0x100
|
58
|
+
# update command version
|
59
|
+
VER_COMMAND_UPDATE = 0x100
|
60
|
+
|
61
|
+
# Known searchd status codes
|
62
|
+
|
63
|
+
# general success, command-specific reply follows
|
64
|
+
SEARCHD_OK = 0
|
65
|
+
# general failure, command-specific reply may follow
|
66
|
+
SEARCHD_ERROR = 1
|
67
|
+
# temporaty failure, client should retry later
|
68
|
+
SEARCHD_RETRY = 2
|
69
|
+
# general success, warning message and command-specific reply follow
|
70
|
+
SEARCHD_WARNING = 3
|
71
|
+
|
72
|
+
# :startdoc:
|
73
|
+
|
74
|
+
# Known match modes
|
75
|
+
|
76
|
+
# match all query words
|
77
|
+
SPH_MATCH_ALL = 0
|
78
|
+
# match any query word
|
79
|
+
SPH_MATCH_ANY = 1
|
80
|
+
# match this exact phrase
|
81
|
+
SPH_MATCH_PHRASE = 2
|
82
|
+
# match this boolean query
|
83
|
+
SPH_MATCH_BOOLEAN = 3
|
84
|
+
# match this extended query
|
85
|
+
SPH_MATCH_EXTENDED = 4
|
86
|
+
|
87
|
+
# Known sort modes
|
88
|
+
|
89
|
+
# sort by document relevance desc, then by date
|
90
|
+
SPH_SORT_RELEVANCE = 0
|
91
|
+
# sort by document date desc, then by relevance desc
|
92
|
+
SPH_SORT_ATTR_DESC = 1
|
93
|
+
# sort by document date asc, then by relevance desc
|
94
|
+
SPH_SORT_ATTR_ASC = 2
|
95
|
+
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
96
|
+
SPH_SORT_TIME_SEGMENTS = 3
|
97
|
+
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
98
|
+
SPH_SORT_EXTENDED = 4
|
99
|
+
|
100
|
+
# Known attribute types
|
101
|
+
|
102
|
+
# this attr is just an integer
|
103
|
+
SPH_ATTR_INTEGER = 1
|
104
|
+
# this attr is a timestamp
|
105
|
+
SPH_ATTR_TIMESTAMP = 2
|
106
|
+
|
107
|
+
# Known grouping functions
|
108
|
+
|
109
|
+
# group by day
|
110
|
+
SPH_GROUPBY_DAY = 0
|
111
|
+
# group by week
|
112
|
+
SPH_GROUPBY_WEEK = 1
|
113
|
+
# group by month
|
114
|
+
SPH_GROUPBY_MONTH = 2
|
115
|
+
# group by year
|
116
|
+
SPH_GROUPBY_YEAR = 3
|
117
|
+
# group by attribute value
|
118
|
+
SPH_GROUPBY_ATTR = 4
|
119
|
+
|
120
|
+
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
121
|
+
def initialize
|
122
|
+
@host = 'localhost' # searchd host (default is "localhost")
|
123
|
+
@port = 3312 # searchd port (default is 3312)
|
124
|
+
@offset = 0 # how many records to seek from result-set start (default is 0)
|
125
|
+
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
126
|
+
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
127
|
+
@weights = [] # per-field weights (default is 1 for all fields)
|
128
|
+
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
129
|
+
@sortby = '' # attribute to sort by (defualt is "")
|
130
|
+
@min_id = 0 # min ID to match (default is 0)
|
131
|
+
@max_id = 0xFFFFFFFF # max ID to match (default is UINT_MAX)
|
132
|
+
@filters = [] # search filters
|
133
|
+
@groupby = '' # group-by attribute name
|
134
|
+
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
135
|
+
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
136
|
+
@maxmatches = 1000 # max matches to retrieve
|
137
|
+
|
138
|
+
@error = '' # last error message
|
139
|
+
@warning = '' # last warning message
|
140
|
+
end
|
141
|
+
|
142
|
+
# Get last error message.
|
143
|
+
def GetLastError
|
144
|
+
@error
|
145
|
+
end
|
146
|
+
|
147
|
+
# Get last warning message.
|
148
|
+
def GetLastWarning
|
149
|
+
@warning
|
150
|
+
end
|
151
|
+
|
152
|
+
# Set searchd server.
|
153
|
+
def SetServer(host, port)
|
154
|
+
assert { host.instance_of? String }
|
155
|
+
assert { port.instance_of? Fixnum }
|
156
|
+
|
157
|
+
@host = host
|
158
|
+
@port = port
|
159
|
+
end
|
160
|
+
|
161
|
+
# Set match offset, count, and max number to retrieve.
|
162
|
+
def SetLimits(offset, limit, max = 0)
|
163
|
+
assert { offset.instance_of? Fixnum }
|
164
|
+
assert { limit.instance_of? Fixnum }
|
165
|
+
assert { max.instance_of? Fixnum }
|
166
|
+
assert { offset >= 0 }
|
167
|
+
assert { limit > 0 }
|
168
|
+
assert { max >= 0 }
|
169
|
+
|
170
|
+
@offset = offset
|
171
|
+
@limit = limit
|
172
|
+
@maxmatches = max if max > 0
|
173
|
+
end
|
174
|
+
|
175
|
+
# Set match mode.
|
176
|
+
def SetMatchMode(mode)
|
177
|
+
assert { mode == SPH_MATCH_ALL \
|
178
|
+
|| mode == SPH_MATCH_ANY \
|
179
|
+
|| mode == SPH_MATCH_PHRASE \
|
180
|
+
|| mode == SPH_MATCH_BOOLEAN \
|
181
|
+
|| mode == SPH_MATCH_EXTENDED }
|
182
|
+
|
183
|
+
@mode = mode
|
184
|
+
end
|
185
|
+
|
186
|
+
# Set matches sorting mode.
|
187
|
+
def SetSortMode(mode, sortby = '')
|
188
|
+
assert { mode == SPH_SORT_RELEVANCE \
|
189
|
+
|| mode == SPH_SORT_ATTR_DESC \
|
190
|
+
|| mode == SPH_SORT_ATTR_ASC \
|
191
|
+
|| mode == SPH_SORT_TIME_SEGMENTS \
|
192
|
+
|| mode == SPH_SORT_EXTENDED }
|
193
|
+
assert { sortby.instance_of? String }
|
194
|
+
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
195
|
+
|
196
|
+
@sort = mode
|
197
|
+
@sortby = sortby
|
198
|
+
end
|
199
|
+
|
200
|
+
# Set per-field weights.
|
201
|
+
def SetWeights(weights)
|
202
|
+
assert { weights.instance_of? Array }
|
203
|
+
weights.each do |weight|
|
204
|
+
assert { weight.instance_of? Fixnum }
|
205
|
+
end
|
206
|
+
|
207
|
+
@weights = weights
|
208
|
+
end
|
209
|
+
|
210
|
+
# Set IDs range to match.
|
211
|
+
#
|
212
|
+
# Only match those records where document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt>
|
213
|
+
# (including <tt>min_id</tt> and <tt>max_id</tt>).
|
214
|
+
def SetIDRange(min, max)
|
215
|
+
assert { min.instance_of? Fixnum }
|
216
|
+
assert { max.instance_of? Fixnum }
|
217
|
+
assert { min <= max }
|
218
|
+
|
219
|
+
@min_id = min
|
220
|
+
@max_id = max
|
221
|
+
end
|
222
|
+
|
223
|
+
# Set values filter.
|
224
|
+
#
|
225
|
+
# Only match those records where <tt>attribute</tt> column values
|
226
|
+
# are in specified set.
|
227
|
+
def SetFilter(attribute, values, exclude = false)
|
228
|
+
assert { attribute.instance_of? String }
|
229
|
+
assert { values.instance_of? Array }
|
230
|
+
assert { !values.empty? }
|
231
|
+
|
232
|
+
if values.instance_of?(Array) && values.size > 0
|
233
|
+
values.each do |value|
|
234
|
+
assert { value.instance_of? Fixnum }
|
235
|
+
end
|
236
|
+
|
237
|
+
@filters << { 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# Set range filter.
|
242
|
+
#
|
243
|
+
# Only match those records where <tt>attribute</tt> column value
|
244
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
245
|
+
def SetFilterRange(attribute, min, max, exclude = false)
|
246
|
+
assert { attribute.instance_of? String }
|
247
|
+
assert { min.instance_of? Fixnum }
|
248
|
+
assert { max.instance_of? Fixnum }
|
249
|
+
assert { min <= max }
|
250
|
+
|
251
|
+
@filters << { 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
252
|
+
end
|
253
|
+
|
254
|
+
# Set grouping attribute and function.
|
255
|
+
#
|
256
|
+
# In grouping mode, all matches are assigned to different groups
|
257
|
+
# based on grouping function value.
|
258
|
+
#
|
259
|
+
# Each group keeps track of the total match count, and the best match
|
260
|
+
# (in this group) according to current sorting function.
|
261
|
+
#
|
262
|
+
# The final result set contains one best match per group, with
|
263
|
+
# grouping function value and matches count attached.
|
264
|
+
#
|
265
|
+
# Groups in result set could be sorted by any sorting clause,
|
266
|
+
# including both document attributes and the following special
|
267
|
+
# internal Sphinx attributes:
|
268
|
+
#
|
269
|
+
# * @id - match document ID;
|
270
|
+
# * @weight, @rank, @relevance - match weight;
|
271
|
+
# * @group - groupby function value;
|
272
|
+
# * @count - amount of matches in group.
|
273
|
+
#
|
274
|
+
# the default mode is to sort by groupby value in descending order,
|
275
|
+
# ie. by '@group desc'.
|
276
|
+
#
|
277
|
+
# 'total_found' would contain total amount of matching groups over
|
278
|
+
# the whole index.
|
279
|
+
#
|
280
|
+
# WARNING: grouping is done in fixed memory and thus its results
|
281
|
+
# are only approximate; so there might be more groups reported
|
282
|
+
# in total_found than actually present. @count might also
|
283
|
+
# be underestimated.
|
284
|
+
#
|
285
|
+
# For example, if sorting by relevance and grouping by "published"
|
286
|
+
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
287
|
+
# contain one most relevant match per each day when there were any
|
288
|
+
# matches published, with day number and per-day match count attached,
|
289
|
+
# and sorted by day number in descending order (ie. recent days first).
|
290
|
+
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
291
|
+
assert { attribute.instance_of? String }
|
292
|
+
assert { groupsort.instance_of? String }
|
293
|
+
assert { func == SPH_GROUPBY_DAY \
|
294
|
+
|| func == SPH_GROUPBY_WEEK \
|
295
|
+
|| func == SPH_GROUPBY_MONTH \
|
296
|
+
|| func == SPH_GROUPBY_YEAR \
|
297
|
+
|| func == SPH_GROUPBY_ATTR }
|
298
|
+
|
299
|
+
@groupby = attribute
|
300
|
+
@groupfunc = func
|
301
|
+
@groupsort = groupsort
|
302
|
+
end
|
303
|
+
|
304
|
+
# Connect to searchd server and run given search query.
|
305
|
+
#
|
306
|
+
# * <tt>query</tt> -- query string
|
307
|
+
# * <tt>index</tt> -- index name to query, default is "*" which means to query all indexes
|
308
|
+
#
|
309
|
+
# returns hash which has the following keys on success:
|
310
|
+
#
|
311
|
+
# * <tt>'matches'</tt> -- hash which maps found document_id to ('weight', 'group') hash
|
312
|
+
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
313
|
+
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
314
|
+
# * <tt>'time'</tt> -- search time
|
315
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
316
|
+
def Query(query, index = '*')
|
317
|
+
sock = self.Connect
|
318
|
+
|
319
|
+
# build request
|
320
|
+
|
321
|
+
# mode and limits
|
322
|
+
req = [@offset, @limit, @mode, @sort].pack('NNNN')
|
323
|
+
req << [@sortby.length].pack('N') + @sortby
|
324
|
+
# query itself
|
325
|
+
req << [query.length].pack('N') + query
|
326
|
+
# weights
|
327
|
+
req << [@weights.length].pack('N')
|
328
|
+
req << @weights.pack('N' * @weights.length)
|
329
|
+
# indexes
|
330
|
+
req << [index.length].pack('N') + index
|
331
|
+
# id range
|
332
|
+
req << [@min_id.to_i, @max_id.to_i].pack('NN')
|
333
|
+
|
334
|
+
# filters
|
335
|
+
req << [@filters.length].pack('N')
|
336
|
+
@filters.each do |filter|
|
337
|
+
req << [filter['attr'].length].pack('N') + filter['attr']
|
338
|
+
|
339
|
+
unless filter['values'].nil?
|
340
|
+
req << [filter['values'].length].pack('N')
|
341
|
+
req << filter['values'].pack('N' * filter['values'].length)
|
342
|
+
else
|
343
|
+
req << [0, filter['min'], filter['max']].pack('NNN')
|
344
|
+
end
|
345
|
+
req << [filter['exclude'] ? 1 : 0].pack('N')
|
346
|
+
end
|
347
|
+
|
348
|
+
# group-by, max matches, sort-by-group flag
|
349
|
+
req << [@groupfunc, @groupby.length].pack('NN') + @groupby
|
350
|
+
req << [@maxmatches].pack('N')
|
351
|
+
req << [@groupsort.length].pack('N') + @groupsort
|
352
|
+
|
353
|
+
# send query, get response
|
354
|
+
len = req.length
|
355
|
+
# add header
|
356
|
+
req = [SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, len].pack('nnN') + req
|
357
|
+
sock.send(req, 0)
|
358
|
+
|
359
|
+
response = GetResponse(sock, VER_COMMAND_SEARCH)
|
360
|
+
|
361
|
+
# parse response
|
362
|
+
result = {}
|
363
|
+
max = response.length # protection from broken response
|
364
|
+
|
365
|
+
# read schema
|
366
|
+
p = 0
|
367
|
+
fields = []
|
368
|
+
attrs = {}
|
369
|
+
attrs_names_in_order = []
|
370
|
+
|
371
|
+
nfields = response[p, 4].unpack('N*').first; p += 4
|
372
|
+
while nfields > 0 and p < max
|
373
|
+
nfields -= 1
|
374
|
+
len = response[p, 4].unpack('N*').first; p += 4
|
375
|
+
fields << response[p, len]; p += len
|
376
|
+
end
|
377
|
+
result['fields'] = fields
|
378
|
+
|
379
|
+
nattrs = response[p, 4].unpack('N*').first; p += 4
|
380
|
+
while nattrs > 0 && p < max
|
381
|
+
nattrs -= 1
|
382
|
+
len = response[p, 4].unpack('N*').first; p += 4
|
383
|
+
attr = response[p, len]; p += len
|
384
|
+
type = response[p, 4].unpack('N*').first; p += 4
|
385
|
+
attrs[attr] = type
|
386
|
+
attrs_names_in_order << attr
|
387
|
+
end
|
388
|
+
result['attrs'] = attrs
|
389
|
+
|
390
|
+
# read match count
|
391
|
+
count = response[p, 4].unpack('N*').first; p += 4
|
392
|
+
|
393
|
+
# read matches
|
394
|
+
result['matches'], index = {}, 0
|
395
|
+
while count > 0 and p < max
|
396
|
+
count -= 1
|
397
|
+
doc, weight = response[p, 8].unpack('N*N*'); p += 8
|
398
|
+
|
399
|
+
result['matches'][doc] ||= {}
|
400
|
+
result['matches'][doc]['weight'] = weight
|
401
|
+
result['matches'][doc]['index'] = index
|
402
|
+
attrs_names_in_order.each do |attr|
|
403
|
+
val = response[p, 4].unpack('N*').first; p += 4
|
404
|
+
result['matches'][doc]['attrs'] ||= {}
|
405
|
+
result['matches'][doc]['attrs'][attr] = val
|
406
|
+
end
|
407
|
+
index += 1
|
408
|
+
end
|
409
|
+
result['total'], result['total_found'], msecs, words = response[p, 16].unpack('N*N*N*N*'); p += 16
|
410
|
+
result['time'] = '%.3f' % (msecs / 1000.0)
|
411
|
+
|
412
|
+
result['words'] = {}
|
413
|
+
while words > 0 and p < max
|
414
|
+
words -= 1
|
415
|
+
len = response[p, 4].unpack('N*').first; p += 4
|
416
|
+
word = response[p, len]; p += len
|
417
|
+
docs, hits = response[p, 8].unpack('N*N*'); p += 8
|
418
|
+
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
419
|
+
end
|
420
|
+
|
421
|
+
result
|
422
|
+
end
|
423
|
+
|
424
|
+
# Connect to searchd server and generate exceprts from given documents.
|
425
|
+
#
|
426
|
+
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
427
|
+
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
428
|
+
# for stemming, lexing and case folding
|
429
|
+
# * <tt>words</tt> -- a string which contains the words to highlight
|
430
|
+
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
431
|
+
#
|
432
|
+
# You can use following parameters:
|
433
|
+
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
434
|
+
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
435
|
+
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
436
|
+
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
437
|
+
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
438
|
+
#
|
439
|
+
# Returns an array of string excerpts on success.
|
440
|
+
def BuildExcerpts(docs, index, words, opts = {})
|
441
|
+
assert { docs.instance_of? Array }
|
442
|
+
assert { index.instance_of? String }
|
443
|
+
assert { words.instance_of? String }
|
444
|
+
assert { opts.instance_of? Hash }
|
445
|
+
|
446
|
+
sock = self.Connect
|
447
|
+
|
448
|
+
# fixup options
|
449
|
+
opts['before_match'] ||= '<b>';
|
450
|
+
opts['after_match'] ||= '</b>';
|
451
|
+
opts['chunk_separator'] ||= ' ... ';
|
452
|
+
opts['limit'] ||= 256;
|
453
|
+
opts['around'] ||= 5;
|
454
|
+
|
455
|
+
# build request
|
456
|
+
|
457
|
+
# v.1.0 req
|
458
|
+
req = [0, 1].pack('N2'); # mode=0, flags=1 (remove spaces)
|
459
|
+
# req index
|
460
|
+
req << [index.length].pack('N') + index
|
461
|
+
# req words
|
462
|
+
req << [words.length].pack('N') + words
|
463
|
+
|
464
|
+
# options
|
465
|
+
req << [opts['before_match'].length].pack('N') + opts['before_match']
|
466
|
+
req << [opts['after_match'].length].pack('N') + opts['after_match']
|
467
|
+
req << [opts['chunk_separator'].length].pack('N') + opts['chunk_separator']
|
468
|
+
req << [opts['limit'].to_i, opts['around'].to_i].pack('NN')
|
469
|
+
|
470
|
+
# documents
|
471
|
+
req << [docs.size].pack('N');
|
472
|
+
docs.each do |doc|
|
473
|
+
assert { doc.instance_of? String }
|
474
|
+
|
475
|
+
req << [doc.length].pack('N') + doc
|
476
|
+
end
|
477
|
+
|
478
|
+
# send query, get response
|
479
|
+
len = req.length
|
480
|
+
# add header
|
481
|
+
req = [SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, len].pack('nnN') + req
|
482
|
+
sock.send(req, 0)
|
483
|
+
|
484
|
+
response = GetResponse(sock, VER_COMMAND_EXCERPT)
|
485
|
+
|
486
|
+
# parse response
|
487
|
+
p = 0
|
488
|
+
res = []
|
489
|
+
rlen = response.length
|
490
|
+
docs.each do |doc|
|
491
|
+
len = response[p, 4].unpack('N*').first; p += 4
|
492
|
+
if p + len > rlen
|
493
|
+
@error = 'incomplete reply'
|
494
|
+
raise SphinxResponseError, @error
|
495
|
+
end
|
496
|
+
res << response[p, len]; p += len
|
497
|
+
end
|
498
|
+
return res
|
499
|
+
end
|
500
|
+
|
501
|
+
# Attribute updates
|
502
|
+
#
|
503
|
+
# Update specified attributes on specified documents.
|
504
|
+
#
|
505
|
+
# * <tt>index</tt> is a name of the index to be updated
|
506
|
+
# * <tt>attrs</tt> is an array of attribute name strings.
|
507
|
+
# * <tt>values</tt> is a hash where key is document id, and value is an array of
|
508
|
+
# new attribute values
|
509
|
+
#
|
510
|
+
# Returns number of actually updated documents (0 or more) on success.
|
511
|
+
# Returns -1 on failure.
|
512
|
+
#
|
513
|
+
# Usage example:
|
514
|
+
# sphinx.UpdateAttributes('index', ['group'], { 123 => [456] })
|
515
|
+
def UpdateAttributes(index, attrs, values)
|
516
|
+
# verify everything
|
517
|
+
assert { index.instance_of? String }
|
518
|
+
|
519
|
+
assert { attrs.instance_of? Array }
|
520
|
+
attrs.each do |attr|
|
521
|
+
assert { attr.instance_of? String }
|
522
|
+
end
|
523
|
+
|
524
|
+
assert { values.instance_of? Hash }
|
525
|
+
values.each do |id, entry|
|
526
|
+
assert { id.instance_of? Fixnum }
|
527
|
+
assert { entry.instance_of? Array }
|
528
|
+
assert { entry.length == attrs.length }
|
529
|
+
entry.each do |v|
|
530
|
+
assert { v.instance_of? Fixnum }
|
531
|
+
end
|
532
|
+
end
|
533
|
+
|
534
|
+
# build request
|
535
|
+
req = [index.length].pack('N') + index
|
536
|
+
|
537
|
+
req << [attrs.length].pack('N')
|
538
|
+
attrs.each do |attr|
|
539
|
+
req << [attr.length].pack('N') + attr
|
540
|
+
end
|
541
|
+
|
542
|
+
req << [values.length].pack('N')
|
543
|
+
values.each do |id, entry|
|
544
|
+
req << [id].pack('N')
|
545
|
+
req << entry.pack('N' * entry.length)
|
546
|
+
end
|
547
|
+
|
548
|
+
# connect, send query, get response
|
549
|
+
sock = self.Connect
|
550
|
+
len = req.length
|
551
|
+
req = [SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, len].pack('nnN') + req # add header
|
552
|
+
sock.send(req, 0)
|
553
|
+
|
554
|
+
response = self.GetResponse(sock, VER_COMMAND_UPDATE)
|
555
|
+
|
556
|
+
# parse response
|
557
|
+
response[0, 4].unpack('N*').first
|
558
|
+
end
|
559
|
+
|
560
|
+
protected
|
561
|
+
|
562
|
+
# Connect to searchd server.
|
563
|
+
def Connect
|
564
|
+
begin
|
565
|
+
sock = TCPSocket.new(@host, @port)
|
566
|
+
rescue
|
567
|
+
@error = "connection to #{@host}:#{@port} failed"
|
568
|
+
raise SphinxConnectError, @error
|
569
|
+
end
|
570
|
+
|
571
|
+
v = sock.recv(4).unpack('N*').first
|
572
|
+
if v < 1
|
573
|
+
sock.close
|
574
|
+
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
575
|
+
raise SphinxConnectError, @error
|
576
|
+
end
|
577
|
+
|
578
|
+
sock.send([1].pack('N'), 0)
|
579
|
+
sock
|
580
|
+
end
|
581
|
+
|
582
|
+
# Get and check response packet from searchd server.
|
583
|
+
def GetResponse(sock, client_version)
|
584
|
+
header = sock.recv(8)
|
585
|
+
status, ver, len = header.unpack('n2N')
|
586
|
+
response = ''
|
587
|
+
left = len
|
588
|
+
while left > 0 do
|
589
|
+
begin
|
590
|
+
chunk = sock.recv(left)
|
591
|
+
if chunk
|
592
|
+
response << chunk
|
593
|
+
left -= chunk.length
|
594
|
+
end
|
595
|
+
rescue EOFError
|
596
|
+
break
|
597
|
+
end
|
598
|
+
end
|
599
|
+
sock.close
|
600
|
+
|
601
|
+
# check response
|
602
|
+
read = response.length
|
603
|
+
if response.empty? or read != len
|
604
|
+
@error = len \
|
605
|
+
? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
|
606
|
+
: 'received zero-sized searchd response'
|
607
|
+
raise SphinxResponseError, @error
|
608
|
+
end
|
609
|
+
|
610
|
+
# check status
|
611
|
+
if (status == SEARCHD_WARNING)
|
612
|
+
wlen = response[0, 4].unpack('N*').first
|
613
|
+
@warning = response[4, wlen]
|
614
|
+
return response[4 + wlen, response.length - 4 - wlen]
|
615
|
+
end
|
616
|
+
|
617
|
+
if status == SEARCHD_ERROR
|
618
|
+
@error = 'searchd error: ' + response[4, response.length - 4]
|
619
|
+
raise SphinxInternalError, @error
|
620
|
+
end
|
621
|
+
|
622
|
+
if status == SEARCHD_RETRY
|
623
|
+
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
624
|
+
raise SphinxTemporaryError, @error
|
625
|
+
end
|
626
|
+
|
627
|
+
unless status == SEARCHD_OK
|
628
|
+
@error = "unknown status code: '#{status}'"
|
629
|
+
raise SphinxUnknownError, @error
|
630
|
+
end
|
631
|
+
|
632
|
+
# check version
|
633
|
+
if ver < client_version
|
634
|
+
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
635
|
+
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
636
|
+
end
|
637
|
+
|
638
|
+
return response
|
639
|
+
end
|
640
|
+
|
641
|
+
# :stopdoc:
|
642
|
+
def assert
|
643
|
+
raise 'Assertion failed!' unless yield if $DEBUG
|
644
|
+
end
|
645
|
+
# :startdoc:
|
646
|
+
end
|
647
|
+
end
|