zinx 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
- require File.dirname(__FILE__) + '/sphinx/request'
2
- require File.dirname(__FILE__) + '/sphinx/response'
3
- require File.dirname(__FILE__) + '/sphinx/client'
4
-
5
- module Sphinx
1
+ require File.dirname(__FILE__) + '/sphinx/request'
2
+ require File.dirname(__FILE__) + '/sphinx/response'
3
+ require File.dirname(__FILE__) + '/sphinx/client'
4
+
5
+ module Sphinx
6
6
  end
@@ -1,1125 +1,1125 @@
1
- # = client.rb - Sphinx Client API
2
- #
3
- # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
- # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5
- # License:: Distributes under the same terms as Ruby
6
- # Version:: 0.9.9-r1299
7
- # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
- #
9
- # This library is distributed under the terms of the Ruby license.
10
- # You can freely distribute/modify this library.
11
-
12
- # ==Sphinx Client API
13
- #
14
- # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
- # daemon and get search results from Sphinx.
16
- #
17
- # ===Usage
18
- #
19
- # sphinx = Sphinx::Client.new
20
- # result = sphinx.Query('test')
21
- # ids = result['matches'].map { |match| match['id'] }.join(',')
22
- # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
- #
24
- # docs = posts.map(&:body)
25
- # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
-
27
- require 'socket'
28
-
29
- module Sphinx
30
- # :stopdoc:
31
-
32
- class SphinxError < StandardError; end
33
- class SphinxArgumentError < SphinxError; end
34
- class SphinxConnectError < SphinxError; end
35
- class SphinxResponseError < SphinxError; end
36
- class SphinxInternalError < SphinxError; end
37
- class SphinxTemporaryError < SphinxError; end
38
- class SphinxUnknownError < SphinxError; end
39
-
40
- # :startdoc:
41
-
42
- class Client
43
-
44
- # :stopdoc:
45
-
46
- # Known searchd commands
47
-
48
- # search command
49
- SEARCHD_COMMAND_SEARCH = 0
50
- # excerpt command
51
- SEARCHD_COMMAND_EXCERPT = 1
52
- # update command
53
- SEARCHD_COMMAND_UPDATE = 2
54
- # keywords command
55
- SEARCHD_COMMAND_KEYWORDS = 3
56
-
57
- # Current client-side command implementation versions
58
-
59
- # search command version
60
- VER_COMMAND_SEARCH = 0x119
61
- # excerpt command version
62
- VER_COMMAND_EXCERPT = 0x102
63
- # update command version
64
- VER_COMMAND_UPDATE = 0x102
65
- # keywords command version
66
- VER_COMMAND_KEYWORDS = 0x100
67
-
68
- # Known searchd status codes
69
-
70
- # general success, command-specific reply follows
71
- SEARCHD_OK = 0
72
- # general failure, command-specific reply may follow
73
- SEARCHD_ERROR = 1
74
- # temporaty failure, client should retry later
75
- SEARCHD_RETRY = 2
76
- # general success, warning message and command-specific reply follow
77
- SEARCHD_WARNING = 3
78
-
79
- # :startdoc:
80
-
81
- # Known match modes
82
-
83
- # match all query words
84
- SPH_MATCH_ALL = 0
85
- # match any query word
86
- SPH_MATCH_ANY = 1
87
- # match this exact phrase
88
- SPH_MATCH_PHRASE = 2
89
- # match this boolean query
90
- SPH_MATCH_BOOLEAN = 3
91
- # match this extended query
92
- SPH_MATCH_EXTENDED = 4
93
- # match all document IDs w/o fulltext query, apply filters
94
- SPH_MATCH_FULLSCAN = 5
95
- # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
96
- SPH_MATCH_EXTENDED2 = 6
97
-
98
- # Known ranking modes (ext2 only)
99
-
100
- # default mode, phrase proximity major factor and BM25 minor one
101
- SPH_RANK_PROXIMITY_BM25 = 0
102
- # statistical mode, BM25 ranking only (faster but worse quality)
103
- SPH_RANK_BM25 = 1
104
- # no ranking, all matches get a weight of 1
105
- SPH_RANK_NONE = 2
106
- # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
107
- SPH_RANK_WORDCOUNT = 3
108
- # phrase proximity
109
- SPH_RANK_PROXIMITY = 4
110
-
111
- # Known sort modes
112
-
113
- # sort by document relevance desc, then by date
114
- SPH_SORT_RELEVANCE = 0
115
- # sort by document date desc, then by relevance desc
116
- SPH_SORT_ATTR_DESC = 1
117
- # sort by document date asc, then by relevance desc
118
- SPH_SORT_ATTR_ASC = 2
119
- # sort by time segments (hour/day/week/etc) desc, then by relevance desc
120
- SPH_SORT_TIME_SEGMENTS = 3
121
- # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
122
- SPH_SORT_EXTENDED = 4
123
- # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
124
- SPH_SORT_EXPR = 5
125
-
126
- # Known filter types
127
-
128
- # filter by integer values set
129
- SPH_FILTER_VALUES = 0
130
- # filter by integer range
131
- SPH_FILTER_RANGE = 1
132
- # filter by float range
133
- SPH_FILTER_FLOATRANGE = 2
134
-
135
- # Known attribute types
136
-
137
- # this attr is just an integer
138
- SPH_ATTR_INTEGER = 1
139
- # this attr is a timestamp
140
- SPH_ATTR_TIMESTAMP = 2
141
- # this attr is an ordinal string number (integer at search time,
142
- # specially handled at indexing time)
143
- SPH_ATTR_ORDINAL = 3
144
- # this attr is a boolean bit field
145
- SPH_ATTR_BOOL = 4
146
- # this attr is a float
147
- SPH_ATTR_FLOAT = 5
148
- # signed 64-bit integer
149
- SPH_ATTR_BIGINT = 6
150
- # string
151
- SPH_ATTR_STRING = 7
152
- # this attr has multiple values (0 or more)
153
- SPH_ATTR_MULTI = 0x40000001
154
- SPH_ATTR_MULTI64 = 0x40000002
155
-
156
- # Known grouping functions
157
-
158
- # group by day
159
- SPH_GROUPBY_DAY = 0
160
- # group by week
161
- SPH_GROUPBY_WEEK = 1
162
- # group by month
163
- SPH_GROUPBY_MONTH = 2
164
- # group by year
165
- SPH_GROUPBY_YEAR = 3
166
- # group by attribute value
167
- SPH_GROUPBY_ATTR = 4
168
- # group by sequential attrs pair
169
- SPH_GROUPBY_ATTRPAIR = 5
170
-
171
- # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
172
- def initialize
173
- # per-client-object settings
174
- @host = 'localhost' # searchd host (default is "localhost")
175
- @port = 9312 # searchd port (default is 9312)
176
-
177
- # per-query settings
178
- @offset = 0 # how many records to seek from result-set start (default is 0)
179
- @limit = 20 # how many records to return from result-set starting at offset (default is 20)
180
- @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
181
- @weights = [] # per-field weights (default is 1 for all fields)
182
- @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
183
- @sortby = '' # attribute to sort by (defualt is "")
184
- @min_id = 0 # min ID to match (default is 0, which means no limit)
185
- @max_id = 0 # max ID to match (default is 0, which means no limit)
186
- @filters = [] # search filters
187
- @groupby = '' # group-by attribute name
188
- @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
189
- @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
190
- @groupdistinct = '' # group-by count-distinct attribute
191
- @maxmatches = 1000 # max matches to retrieve
192
- @cutoff = 0 # cutoff to stop searching at (default is 0)
193
- @retrycount = 0 # distributed retries count
194
- @retrydelay = 0 # distributed retries delay
195
- @anchor = [] # geographical anchor point
196
- @indexweights = [] # per-index weights
197
- @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
198
- @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
199
- @fieldweights = {} # per-field-name weights
200
- @overrides = [] # per-query attribute values overrides
201
- @select = '*' # select-list (attributes or expressions, with optional aliases)
202
-
203
- # per-reply fields (for single-query case)
204
- @error = '' # last error message
205
- @warning = '' # last warning message
206
-
207
- @reqs = [] # requests storage (for multi-query case)
208
- @mbenc = '' # stored mbstring encoding
209
- end
210
-
211
- # Get last error message.
212
- def GetLastError
213
- @error
214
- end
215
-
216
- # Get last warning message.
217
- def GetLastWarning
218
- @warning
219
- end
220
-
221
- # Set searchd host name (string) and port (integer).
222
- def SetServer(host, port)
223
- assert { host.instance_of? String }
224
- assert { port.instance_of? Fixnum }
225
-
226
- @host = host
227
- @port = port
228
- end
229
-
230
- # Set offset and count into result set,
231
- # and optionally set max-matches and cutoff limits.
232
- def SetLimits(offset, limit, max = 0, cutoff = 0)
233
- assert { offset.instance_of? Fixnum }
234
- assert { limit.instance_of? Fixnum }
235
- assert { max.instance_of? Fixnum }
236
- assert { offset >= 0 }
237
- assert { limit > 0 }
238
- assert { max >= 0 }
239
-
240
- @offset = offset
241
- @limit = limit
242
- @maxmatches = max if max > 0
243
- @cutoff = cutoff if cutoff > 0
244
- end
245
-
246
- # Set maximum query time, in milliseconds, per-index,
247
- # integer, 0 means "do not limit"
248
- def SetMaxQueryTime(max)
249
- assert { max.instance_of? Fixnum }
250
- assert { max >= 0 }
251
- @maxquerytime = max
252
- end
253
-
254
- # Set matching mode.
255
- def SetMatchMode(mode)
256
- assert { mode == SPH_MATCH_ALL \
257
- || mode == SPH_MATCH_ANY \
258
- || mode == SPH_MATCH_PHRASE \
259
- || mode == SPH_MATCH_BOOLEAN \
260
- || mode == SPH_MATCH_EXTENDED \
261
- || mode == SPH_MATCH_FULLSCAN \
262
- || mode == SPH_MATCH_EXTENDED2 }
263
-
264
- @mode = mode
265
- end
266
-
267
- # Set ranking mode.
268
- def SetRankingMode(ranker)
269
- assert { ranker == SPH_RANK_PROXIMITY_BM25 \
270
- || ranker == SPH_RANK_BM25 \
271
- || ranker == SPH_RANK_NONE \
272
- || ranker == SPH_RANK_WORDCOUNT \
273
- || ranker == SPH_RANK_PROXIMITY }
274
-
275
- @ranker = ranker
276
- end
277
-
278
- # Set matches sorting mode.
279
- def SetSortMode(mode, sortby = '')
280
- assert { mode == SPH_SORT_RELEVANCE \
281
- || mode == SPH_SORT_ATTR_DESC \
282
- || mode == SPH_SORT_ATTR_ASC \
283
- || mode == SPH_SORT_TIME_SEGMENTS \
284
- || mode == SPH_SORT_EXTENDED \
285
- || mode == SPH_SORT_EXPR }
286
- assert { sortby.instance_of? String }
287
- assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
288
-
289
- @sort = mode
290
- @sortby = sortby
291
- end
292
-
293
- # Bind per-field weights by order.
294
- #
295
- # DEPRECATED; use SetFieldWeights() instead.
296
- def SetWeights(weights)
297
- assert { weights.instance_of? Array }
298
- weights.each do |weight|
299
- assert { weight.instance_of? Fixnum }
300
- end
301
-
302
- @weights = weights
303
- end
304
-
305
- # Bind per-field weights by name.
306
- #
307
- # Takes string (field name) to integer name (field weight) hash as an argument.
308
- # * Takes precedence over SetWeights().
309
- # * Unknown names will be silently ignored.
310
- # * Unbound fields will be silently given a weight of 1.
311
- def SetFieldWeights(weights)
312
- assert { weights.instance_of? Hash }
313
- weights.each do |name, weight|
314
- assert { name.instance_of? String }
315
- assert { weight.instance_of? Fixnum }
316
- end
317
-
318
- @fieldweights = weights
319
- end
320
-
321
- # Bind per-index weights by name.
322
- def SetIndexWeights(weights)
323
- assert { weights.instance_of? Hash }
324
- weights.each do |index, weight|
325
- assert { index.instance_of? String }
326
- assert { weight.instance_of? Fixnum }
327
- end
328
-
329
- @indexweights = weights
330
- end
331
-
332
- # Set IDs range to match.
333
- #
334
- # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
335
- def SetIDRange(min, max)
336
- assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
337
- assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
338
- assert { min <= max }
339
-
340
- @min_id = min
341
- @max_id = max
342
- end
343
-
344
- # Set values filter.
345
- #
346
- # Only match those records where <tt>attribute</tt> column values
347
- # are in specified set.
348
- def SetFilter(attribute, values, exclude = false)
349
- assert { attribute.instance_of? String }
350
- assert { values.instance_of? Array }
351
- assert { !values.empty? }
352
-
353
- if values.instance_of?(Array) && values.size > 0
354
- values.each do |value|
355
- assert { value.instance_of? Fixnum }
356
- end
357
-
358
- @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
359
- end
360
- end
361
-
362
- # Set range filter.
363
- #
364
- # Only match those records where <tt>attribute</tt> column value
365
- # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
366
- def SetFilterRange(attribute, min, max, exclude = false)
367
- assert { attribute.instance_of? String }
368
- assert { min.instance_of? Fixnum or min.instance_of? Bignum }
369
- assert { max.instance_of? Fixnum or max.instance_of? Bignum }
370
- assert { min <= max }
371
-
372
- @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
373
- end
374
-
375
- # Set float range filter.
376
- #
377
- # Only match those records where <tt>attribute</tt> column value
378
- # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
379
- def SetFilterFloatRange(attribute, min, max, exclude = false)
380
- assert { attribute.instance_of? String }
381
- assert { min.instance_of? Float }
382
- assert { max.instance_of? Float }
383
- assert { min <= max }
384
-
385
- @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
386
- end
387
-
388
- # Setup anchor point for geosphere distance calculations.
389
- #
390
- # Required to use <tt>@geodist</tt> in filters and sorting
391
- # distance will be computed to this point. Latitude and longitude
392
- # must be in radians.
393
- #
394
- # * <tt>attrlat</tt> -- is the name of latitude attribute
395
- # * <tt>attrlong</tt> -- is the name of longitude attribute
396
- # * <tt>lat</tt> -- is anchor point latitude, in radians
397
- # * <tt>long</tt> -- is anchor point longitude, in radians
398
- def SetGeoAnchor(attrlat, attrlong, lat, long)
399
- assert { attrlat.instance_of? String }
400
- assert { attrlong.instance_of? String }
401
- assert { lat.instance_of? Float }
402
- assert { long.instance_of? Float }
403
-
404
- @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
405
- end
406
-
407
- # Set grouping attribute and function.
408
- #
409
- # In grouping mode, all matches are assigned to different groups
410
- # based on grouping function value.
411
- #
412
- # Each group keeps track of the total match count, and the best match
413
- # (in this group) according to current sorting function.
414
- #
415
- # The final result set contains one best match per group, with
416
- # grouping function value and matches count attached.
417
- #
418
- # Groups in result set could be sorted by any sorting clause,
419
- # including both document attributes and the following special
420
- # internal Sphinx attributes:
421
- #
422
- # * @id - match document ID;
423
- # * @weight, @rank, @relevance - match weight;
424
- # * @group - groupby function value;
425
- # * @count - amount of matches in group.
426
- #
427
- # the default mode is to sort by groupby value in descending order,
428
- # ie. by '@group desc'.
429
- #
430
- # 'total_found' would contain total amount of matching groups over
431
- # the whole index.
432
- #
433
- # WARNING: grouping is done in fixed memory and thus its results
434
- # are only approximate; so there might be more groups reported
435
- # in total_found than actually present. @count might also
436
- # be underestimated.
437
- #
438
- # For example, if sorting by relevance and grouping by "published"
439
- # attribute with SPH_GROUPBY_DAY function, then the result set will
440
- # contain one most relevant match per each day when there were any
441
- # matches published, with day number and per-day match count attached,
442
- # and sorted by day number in descending order (ie. recent days first).
443
- def SetGroupBy(attribute, func, groupsort = '@group desc')
444
- assert { attribute.instance_of? String }
445
- assert { groupsort.instance_of? String }
446
- assert { func == SPH_GROUPBY_DAY \
447
- || func == SPH_GROUPBY_WEEK \
448
- || func == SPH_GROUPBY_MONTH \
449
- || func == SPH_GROUPBY_YEAR \
450
- || func == SPH_GROUPBY_ATTR \
451
- || func == SPH_GROUPBY_ATTRPAIR }
452
-
453
- @groupby = attribute
454
- @groupfunc = func
455
- @groupsort = groupsort
456
- end
457
-
458
- # Set count-distinct attribute for group-by queries.
459
- def SetGroupDistinct(attribute)
460
- assert { attribute.instance_of? String }
461
- @groupdistinct = attribute
462
- end
463
-
464
- # Set distributed retries count and delay.
465
- def SetRetries(count, delay = 0)
466
- assert { count.instance_of? Fixnum }
467
- assert { delay.instance_of? Fixnum }
468
-
469
- @retrycount = count
470
- @retrydelay = delay
471
- end
472
-
473
- # Set attribute values override
474
- #
475
- # There can be only one override per attribute.
476
- # +values+ must be a hash that maps document IDs to attribute values.
477
- def SetOverride(attrname, attrtype, values)
478
- assert { attrname.instance_of? String }
479
- assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
480
- assert { values.instance_of? Hash }
481
-
482
- @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
483
- end
484
-
485
- # Set select-list (attributes or expressions), SQL-like syntax.
486
- def SetSelect(select)
487
- assert { select.instance_of? String }
488
- @select = select
489
- end
490
-
491
- # Clear all filters (for multi-queries).
492
- def ResetFilters
493
- @filters = []
494
- @anchor = []
495
- end
496
-
497
- # Clear groupby settings (for multi-queries).
498
- def ResetGroupBy
499
- @groupby = ''
500
- @groupfunc = SPH_GROUPBY_DAY
501
- @groupsort = '@group desc'
502
- @groupdistinct = ''
503
- end
504
-
505
- # Clear all attribute value overrides (for multi-queries).
506
- def ResetOverrides
507
- @overrides = []
508
- end
509
-
510
- # Connect to searchd server and run given search query.
511
- #
512
- # <tt>query</tt> is query string
513
-
514
- # <tt>index</tt> is index name (or names) to query. default value is "*" which means
515
- # to query all indexes. Accepted characters for index names are letters, numbers,
516
- # dash, and underscore; everything else is considered a separator. Therefore,
517
- # all the following calls are valid and will search two indexes:
518
- #
519
- # sphinx.Query('test query', 'main delta')
520
- # sphinx.Query('test query', 'main;delta')
521
- # sphinx.Query('test query', 'main, delta')
522
- #
523
- # Index order matters. If identical IDs are found in two or more indexes,
524
- # weight and attribute values from the very last matching index will be used
525
- # for sorting and returning to client. Therefore, in the example above,
526
- # matches from "delta" index will always "win" over matches from "main".
527
- #
528
- # Returns false on failure.
529
- # Returns hash which has the following keys on success:
530
- #
531
- # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
532
- # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
533
- # * <tt>'total_found'</tt> -- total amount of matching documents in index
534
- # * <tt>'time'</tt> -- search time
535
- # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
536
- def Query(query, index = '*', comment = '')
537
- assert { @reqs.empty? }
538
- @reqs = []
539
-
540
- self.AddQuery(query, index, comment)
541
- results = self.RunQueries
542
-
543
- # probably network error; error message should be already filled
544
- return false unless results.instance_of?(Array)
545
-
546
- @error = results[0]['error']
547
- @warning = results[0]['warning']
548
-
549
- return false if results[0]['status'] == SEARCHD_ERROR
550
- return results[0]
551
- end
552
-
553
- # Add query to batch.
554
- #
555
- # Batch queries enable searchd to perform internal optimizations,
556
- # if possible; and reduce network connection overheads in all cases.
557
- #
558
- # For instance, running exactly the same query with different
559
- # groupby settings will enable searched to perform expensive
560
- # full-text search and ranking operation only once, but compute
561
- # multiple groupby results from its output.
562
- #
563
- # Parameters are exactly the same as in <tt>Query</tt> call.
564
- # Returns index to results array returned by <tt>RunQueries</tt> call.
565
- def AddQuery(query, index = '*', comment = '')
566
- # build request
567
-
568
- # mode and limits
569
- request = Request.new
570
- request.put_int @offset, @limit, @mode, @ranker, @sort
571
- request.put_string @sortby
572
- # query itself
573
- request.put_string query
574
- # weights
575
- request.put_int_array @weights
576
- # indexes
577
- request.put_string index
578
- # id64 range marker
579
- request.put_int 1
580
- # id64 range
581
- request.put_int64 @min_id.to_i, @max_id.to_i
582
-
583
- # filters
584
- request.put_int @filters.length
585
- @filters.each do |filter|
586
- request.put_string filter['attr']
587
- request.put_int filter['type']
588
-
589
- case filter['type']
590
- when SPH_FILTER_VALUES
591
- request.put_int64_array filter['values']
592
- when SPH_FILTER_RANGE
593
- request.put_int64 filter['min'], filter['max']
594
- when SPH_FILTER_FLOATRANGE
595
- request.put_float filter['min'], filter['max']
596
- else
597
- raise SphinxInternalError, 'Internal error: unhandled filter type'
598
- end
599
- request.put_int filter['exclude'] ? 1 : 0
600
- end
601
-
602
- # group-by clause, max-matches count, group-sort clause, cutoff count
603
- request.put_int @groupfunc
604
- request.put_string @groupby
605
- request.put_int @maxmatches
606
- request.put_string @groupsort
607
- request.put_int @cutoff, @retrycount, @retrydelay
608
- request.put_string @groupdistinct
609
-
610
- # anchor point
611
- if @anchor.empty?
612
- request.put_int 0
613
- else
614
- request.put_int 1
615
- request.put_string @anchor['attrlat'], @anchor['attrlong']
616
- request.put_float @anchor['lat'], @anchor['long']
617
- end
618
-
619
- # per-index weights
620
- request.put_int @indexweights.length
621
- @indexweights.each do |idx, weight|
622
- request.put_string idx
623
- request.put_int weight
624
- end
625
-
626
- # max query time
627
- request.put_int @maxquerytime
628
-
629
- # per-field weights
630
- request.put_int @fieldweights.length
631
- @fieldweights.each do |field, weight|
632
- request.put_string field
633
- request.put_int weight
634
- end
635
-
636
- # comment
637
- request.put_string comment
638
-
639
- # attribute overrides
640
- request.put_int @overrides.length
641
- for entry in @overrides do
642
- request.put_string entry['attr']
643
- request.put_int entry['type'], entry['values'].size
644
- entry['values'].each do |id, val|
645
- assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
646
- assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
647
-
648
- request.put_int64 id
649
- case entry['type']
650
- when SPH_ATTR_FLOAT
651
- request.put_float val
652
- when SPH_ATTR_BIGINT
653
- request.put_int64 val
654
- else
655
- request.put_int val
656
- end
657
- end
658
- end
659
-
660
- # select-list
661
- request.put_string @select
662
-
663
- # store request to requests array
664
- @reqs << request.to_s;
665
- return @reqs.length - 1
666
- end
667
-
668
- # Run queries batch.
669
- #
670
- # Returns an array of result sets on success.
671
- # Returns false on network IO failure.
672
- #
673
- # Each result set in returned array is a hash which containts
674
- # the same keys as the hash returned by <tt>Query</tt>, plus:
675
- #
676
- # * <tt>'error'</tt> -- search error for this query
677
- # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
678
- def RunQueries
679
- if @reqs.empty?
680
- @error = 'No queries defined, issue AddQuery() first'
681
- return false
682
- end
683
-
684
- req = @reqs.join('')
685
- nreqs = @reqs.length
686
- @reqs = []
687
- response = PerformRequest(:search, req, nreqs)
688
-
689
- # parse response
690
- begin
691
- results = []
692
- ires = 0
693
- while ires < nreqs
694
- ires += 1
695
- result = {}
696
-
697
- result['error'] = ''
698
- result['warning'] = ''
699
-
700
- # extract status
701
- status = result['status'] = response.get_int
702
- if status != SEARCHD_OK
703
- message = response.get_string
704
- if status == SEARCHD_WARNING
705
- result['warning'] = message
706
- else
707
- result['error'] = message
708
- results << result
709
- next
710
- end
711
- end
712
-
713
- # read schema
714
- fields = []
715
- attrs = {}
716
- attrs_names_in_order = []
717
-
718
- nfields = response.get_int
719
- while nfields > 0
720
- nfields -= 1
721
- fields << response.get_string
722
- end
723
- result['fields'] = fields
724
-
725
- nattrs = response.get_int
726
- while nattrs > 0
727
- nattrs -= 1
728
- attr = response.get_string
729
- type = response.get_int
730
- attrs[attr] = type
731
- attrs_names_in_order << attr
732
- end
733
- result['attrs'] = attrs
734
-
735
- # read match count
736
- count = response.get_int
737
- id64 = response.get_int
738
-
739
- # read matches
740
- result['matches'] = []
741
- while count > 0
742
- count -= 1
743
-
744
- if id64 != 0
745
- doc = response.get_int64
746
- weight = response.get_int
747
- else
748
- doc, weight = response.get_ints(2)
749
- end
750
-
751
- r = {} # This is a single result put in the result['matches'] array
752
- r['id'] = doc
753
- r['weight'] = weight
754
- attrs_names_in_order.each do |a|
755
- r['attrs'] ||= {}
756
-
757
- case attrs[a]
758
- when SPH_ATTR_BIGINT
759
- # handle 64-bit ints
760
- r['attrs'][a] = response.get_int64
761
- when SPH_ATTR_FLOAT
762
- # handle floats
763
- r['attrs'][a] = response.get_float
764
- when SPH_ATTR_STRING
765
- # handle string
766
- r['attrs'][a] = response.get_string
767
- else
768
- # handle everything else as unsigned ints
769
- val = response.get_int
770
- if attrs[a]==SPH_ATTR_MULTI
771
- r['attrs'][a] = []
772
- 1.upto(val) do
773
- r['attrs'][a] << response.get_int
774
- end
775
- elsif attrs[a]==SPH_ATTR_MULTI64
776
- r['attrs'][a] = []
777
- val = val/2
778
- 1.upto(val) do
779
- r['attrs'][a] << response.get_int64
780
- end
781
- else
782
- r['attrs'][a] = val
783
- end
784
- end
785
- end
786
- result['matches'] << r
787
- end
788
- result['total'], result['total_found'], msecs, words = response.get_ints(4)
789
- result['time'] = '%.3f' % (msecs / 1000.0)
790
-
791
- result['words'] = {}
792
- while words > 0
793
- words -= 1
794
- word = response.get_string
795
- docs, hits = response.get_ints(2)
796
- result['words'][word] = { 'docs' => docs, 'hits' => hits }
797
- end
798
-
799
- results << result
800
- end
801
- #rescue EOFError
802
- # @error = 'incomplete reply'
803
- # raise SphinxResponseError, @error
804
- end
805
-
806
- return results
807
- end
808
-
809
- # Connect to searchd server and generate exceprts from given documents.
810
- #
811
- # * <tt>docs</tt> -- an array of strings which represent the documents' contents
812
- # * <tt>index</tt> -- a string specifiying the index which settings will be used
813
- # for stemming, lexing and case folding
814
- # * <tt>words</tt> -- a string which contains the words to highlight
815
- # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
816
- #
817
- # You can use following parameters:
818
- # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
819
- # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
820
- # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
821
- # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
822
- # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
823
- # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
824
- # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
825
- # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
826
- # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
827
- #
828
- # Returns false on failure.
829
- # Returns an array of string excerpts on success.
830
- def BuildExcerpts(docs, index, words, opts = {})
831
- assert { docs.instance_of? Array }
832
- assert { index.instance_of? String }
833
- assert { words.instance_of? String }
834
- assert { opts.instance_of? Hash }
835
-
836
- # fixup options
837
- opts['before_match'] ||= '<b>';
838
- opts['after_match'] ||= '</b>';
839
- opts['chunk_separator'] ||= ' ... ';
840
- opts['html_strip_mode'] ||= 'index';
841
- opts['limit'] ||= 256;
842
- opts['limit_passages'] ||= 0;
843
- opts['limit_words'] ||= 0;
844
- opts['around'] ||= 5;
845
- opts['start_passage_id'] ||= 1;
846
- opts['exact_phrase'] ||= false
847
- opts['single_passage'] ||= false
848
- opts['use_boundaries'] ||= false
849
- opts['weight_order'] ||= false
850
- opts['load_files'] ||= false
851
- opts['allow_empty'] ||= false
852
-
853
- # build request
854
-
855
- # v.1.0 req
856
- flags = 1
857
- flags |= 2 if opts['exact_phrase']
858
- flags |= 4 if opts['single_passage']
859
- flags |= 8 if opts['use_boundaries']
860
- flags |= 16 if opts['weight_order']
861
- flags |= 32 if opts['query_mode']
862
- flags |= 64 if opts['force_all_words']
863
- flags |= 128 if opts['load_files']
864
- flags |= 256 if opts['allow_empty']
865
-
866
- request = Request.new
867
- request.put_int 0, flags # mode=0, flags=1 (remove spaces)
868
- # req index
869
- request.put_string index
870
- # req words
871
- request.put_string words
872
-
873
- # options
874
- request.put_string opts['before_match']
875
- request.put_string opts['after_match']
876
- request.put_string opts['chunk_separator']
877
- request.put_int opts['limit'].to_i, opts['around'].to_i
878
-
879
- # options v1.2
880
- request.put_int opts['limit_passages'].to_i
881
- request.put_int opts['limit_words'].to_i
882
- request.put_int opts['start_passage_id'].to_i
883
- request.put_string opts['html_strip_mode']
884
-
885
- # documents
886
- request.put_int docs.size
887
- docs.each do |doc|
888
- assert { doc.instance_of? String }
889
-
890
- request.put_string doc
891
- end
892
-
893
- response = PerformRequest(:excerpt, request)
894
-
895
- # parse response
896
- begin
897
- res = []
898
- docs.each do |doc|
899
- res << response.get_string
900
- end
901
- rescue EOFError
902
- @error = 'incomplete reply'
903
- raise SphinxResponseError, @error
904
- end
905
- return res
906
- end
907
-
908
- # Connect to searchd server, and generate keyword list for a given query.
909
- #
910
- # Returns an array of words on success.
911
- def BuildKeywords(query, index, hits)
912
- assert { query.instance_of? String }
913
- assert { index.instance_of? String }
914
- assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
915
-
916
- # build request
917
- request = Request.new
918
- # v.1.0 req
919
- request.put_string query # req query
920
- request.put_string index # req index
921
- request.put_int hits ? 1 : 0
922
-
923
- response = PerformRequest(:keywords, request)
924
-
925
- # parse response
926
- begin
927
- res = []
928
- nwords = response.get_int
929
- 0.upto(nwords - 1) do |i|
930
- tokenized = response.get_string
931
- normalized = response.get_string
932
-
933
- entry = { 'tokenized' => tokenized, 'normalized' => normalized }
934
- entry['docs'], entry['hits'] = response.get_ints(2) if hits
935
-
936
- res << entry
937
- end
938
- rescue EOFError
939
- @error = 'incomplete reply'
940
- raise SphinxResponseError, @error
941
- end
942
-
943
- return res
944
- end
945
-
946
- # Batch update given attributes in given rows in given indexes.
947
- #
948
- # * +index+ is a name of the index to be updated
949
- # * +attrs+ is an array of attribute name strings.
950
- # * +values+ is a hash where key is document id, and value is an array of
951
- # * +mva+ identifies whether update MVA
952
- # new attribute values
953
- #
954
- # Returns number of actually updated documents (0 or more) on success.
955
- # Returns -1 on failure.
956
- #
957
- # Usage example:
958
- # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
959
- def UpdateAttributes(index, attrs, values, mva = false)
960
- # verify everything
961
- assert { index.instance_of? String }
962
- assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
963
-
964
- assert { attrs.instance_of? Array }
965
- attrs.each do |attr|
966
- assert { attr.instance_of? String }
967
- end
968
-
969
- assert { values.instance_of? Hash }
970
- values.each do |id, entry|
971
- assert { id.instance_of? Fixnum }
972
- assert { entry.instance_of? Array }
973
- assert { entry.length == attrs.length }
974
- entry.each do |v|
975
- if mva
976
- assert { v.instance_of? Array }
977
- v.each { |vv| assert { vv.instance_of? Fixnum } }
978
- else
979
- assert { v.instance_of? Fixnum }
980
- end
981
- end
982
- end
983
-
984
- # build request
985
- request = Request.new
986
- request.put_string index
987
-
988
- request.put_int attrs.length
989
- for attr in attrs
990
- request.put_string attr
991
- request.put_int mva ? 1 : 0
992
- end
993
-
994
- request.put_int values.length
995
- values.each do |id, entry|
996
- request.put_int64 id
997
- if mva
998
- entry.each { |v| request.put_int_array v }
999
- else
1000
- request.put_int(*entry)
1001
- end
1002
- end
1003
-
1004
- response = PerformRequest(:update, request)
1005
-
1006
- # parse response
1007
- begin
1008
- return response.get_int
1009
- rescue EOFError
1010
- @error = 'incomplete reply'
1011
- raise SphinxResponseError, @error
1012
- end
1013
- end
1014
-
1015
- protected
1016
-
1017
- # Connect to searchd server.
1018
- def Connect
1019
- begin
1020
- if @host[0,1]=='/'
1021
- sock = UNIXSocket.new(@host)
1022
- else
1023
- sock = TCPSocket.new(@host, @port)
1024
- end
1025
- rescue => err
1026
- @error = "connection to #{@host}:#{@port} failed (error=#{err})"
1027
- raise SphinxConnectError, @error
1028
- end
1029
-
1030
- v = sock.recv(4).unpack('N*').first
1031
- if v < 1
1032
- sock.close
1033
- @error = "expected searchd protocol version 1+, got version '#{v}'"
1034
- raise SphinxConnectError, @error
1035
- end
1036
-
1037
- sock.send([1].pack('N'), 0)
1038
- sock
1039
- end
1040
-
1041
- # Get and check response packet from searchd server.
1042
- def GetResponse(sock, client_version)
1043
- response = ''
1044
- len = 0
1045
-
1046
- header = sock.recv(8)
1047
- if header.length == 8
1048
- status, ver, len = header.unpack('n2N')
1049
- left = len.to_i
1050
- while left > 0 do
1051
- begin
1052
- chunk = sock.recv(left)
1053
- if chunk
1054
- response << chunk
1055
- left -= chunk.length
1056
- end
1057
- rescue EOFError
1058
- break
1059
- end
1060
- end
1061
- end
1062
- sock.close
1063
-
1064
- # check response
1065
- read = response.length
1066
- if response.empty? or read != len.to_i
1067
- @error = response.empty? \
1068
- ? 'received zero-sized searchd response' \
1069
- : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
1070
- raise SphinxResponseError, @error
1071
- end
1072
-
1073
- # check status
1074
- if (status == SEARCHD_WARNING)
1075
- wlen = response[0, 4].unpack('N*').first
1076
- @warning = response[4, wlen]
1077
- return response[4 + wlen, response.length - 4 - wlen]
1078
- end
1079
-
1080
- if status == SEARCHD_ERROR
1081
- @error = 'searchd error: ' + response[4, response.length - 4]
1082
- raise SphinxInternalError, @error
1083
- end
1084
-
1085
- if status == SEARCHD_RETRY
1086
- @error = 'temporary searchd error: ' + response[4, response.length - 4]
1087
- raise SphinxTemporaryError, @error
1088
- end
1089
-
1090
- unless status == SEARCHD_OK
1091
- @error = "unknown status code: '#{status}'"
1092
- raise SphinxUnknownError, @error
1093
- end
1094
-
1095
- # check version
1096
- if ver < client_version
1097
- @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1098
- "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1099
- end
1100
-
1101
- return response
1102
- end
1103
-
1104
- # Connect, send query, get response.
1105
- def PerformRequest(command, request, additional = nil)
1106
- cmd = command.to_s.upcase
1107
- command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1108
- command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1109
-
1110
- sock = self.Connect
1111
- len = request.to_s.length + (additional != nil ? 8 : 0)
1112
- header = [command_id, command_ver, len].pack('nnN')
1113
- header << [0, additional].pack('NN') if additional != nil
1114
- sock.send(header + request.to_s, 0)
1115
- response = self.GetResponse(sock, command_ver)
1116
- return Response.new(response)
1117
- end
1118
-
1119
- # :stopdoc:
1120
- def assert
1121
- raise 'Assertion failed!' unless yield if $DEBUG
1122
- end
1123
- # :startdoc:
1124
- end
1125
- end
1
+ # = client.rb - Sphinx Client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5
+ # License:: Distributes under the same terms as Ruby
6
+ # Version:: 0.9.9-r1299
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the Ruby license.
10
+ # You can freely distribute/modify this library.
11
+
12
+ # ==Sphinx Client API
13
+ #
14
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
+ # daemon and get search results from Sphinx.
16
+ #
17
+ # ===Usage
18
+ #
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+
27
+ require 'socket'
28
+
29
+ module Sphinx
30
+ # :stopdoc:
31
+
32
+ class SphinxError < StandardError; end
33
+ class SphinxArgumentError < SphinxError; end
34
+ class SphinxConnectError < SphinxError; end
35
+ class SphinxResponseError < SphinxError; end
36
+ class SphinxInternalError < SphinxError; end
37
+ class SphinxTemporaryError < SphinxError; end
38
+ class SphinxUnknownError < SphinxError; end
39
+
40
+ # :startdoc:
41
+
42
+ class Client
43
+
44
+ # :stopdoc:
45
+
46
+ # Known searchd commands
47
+
48
+ # search command
49
+ SEARCHD_COMMAND_SEARCH = 0
50
+ # excerpt command
51
+ SEARCHD_COMMAND_EXCERPT = 1
52
+ # update command
53
+ SEARCHD_COMMAND_UPDATE = 2
54
+ # keywords command
55
+ SEARCHD_COMMAND_KEYWORDS = 3
56
+
57
+ # Current client-side command implementation versions
58
+
59
+ # search command version
60
+ VER_COMMAND_SEARCH = 0x119
61
+ # excerpt command version
62
+ VER_COMMAND_EXCERPT = 0x102
63
+ # update command version
64
+ VER_COMMAND_UPDATE = 0x102
65
+ # keywords command version
66
+ VER_COMMAND_KEYWORDS = 0x100
67
+
68
+ # Known searchd status codes
69
+
70
+ # general success, command-specific reply follows
71
+ SEARCHD_OK = 0
72
+ # general failure, command-specific reply may follow
73
+ SEARCHD_ERROR = 1
74
+ # temporaty failure, client should retry later
75
+ SEARCHD_RETRY = 2
76
+ # general success, warning message and command-specific reply follow
77
+ SEARCHD_WARNING = 3
78
+
79
+ # :startdoc:
80
+
81
+ # Known match modes
82
+
83
+ # match all query words
84
+ SPH_MATCH_ALL = 0
85
+ # match any query word
86
+ SPH_MATCH_ANY = 1
87
+ # match this exact phrase
88
+ SPH_MATCH_PHRASE = 2
89
+ # match this boolean query
90
+ SPH_MATCH_BOOLEAN = 3
91
+ # match this extended query
92
+ SPH_MATCH_EXTENDED = 4
93
+ # match all document IDs w/o fulltext query, apply filters
94
+ SPH_MATCH_FULLSCAN = 5
95
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
96
+ SPH_MATCH_EXTENDED2 = 6
97
+
98
+ # Known ranking modes (ext2 only)
99
+
100
+ # default mode, phrase proximity major factor and BM25 minor one
101
+ SPH_RANK_PROXIMITY_BM25 = 0
102
+ # statistical mode, BM25 ranking only (faster but worse quality)
103
+ SPH_RANK_BM25 = 1
104
+ # no ranking, all matches get a weight of 1
105
+ SPH_RANK_NONE = 2
106
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
107
+ SPH_RANK_WORDCOUNT = 3
108
+ # phrase proximity
109
+ SPH_RANK_PROXIMITY = 4
110
+
111
+ # Known sort modes
112
+
113
+ # sort by document relevance desc, then by date
114
+ SPH_SORT_RELEVANCE = 0
115
+ # sort by document date desc, then by relevance desc
116
+ SPH_SORT_ATTR_DESC = 1
117
+ # sort by document date asc, then by relevance desc
118
+ SPH_SORT_ATTR_ASC = 2
119
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
120
+ SPH_SORT_TIME_SEGMENTS = 3
121
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
122
+ SPH_SORT_EXTENDED = 4
123
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
124
+ SPH_SORT_EXPR = 5
125
+
126
+ # Known filter types
127
+
128
+ # filter by integer values set
129
+ SPH_FILTER_VALUES = 0
130
+ # filter by integer range
131
+ SPH_FILTER_RANGE = 1
132
+ # filter by float range
133
+ SPH_FILTER_FLOATRANGE = 2
134
+
135
+ # Known attribute types
136
+
137
+ # this attr is just an integer
138
+ SPH_ATTR_INTEGER = 1
139
+ # this attr is a timestamp
140
+ SPH_ATTR_TIMESTAMP = 2
141
+ # this attr is an ordinal string number (integer at search time,
142
+ # specially handled at indexing time)
143
+ SPH_ATTR_ORDINAL = 3
144
+ # this attr is a boolean bit field
145
+ SPH_ATTR_BOOL = 4
146
+ # this attr is a float
147
+ SPH_ATTR_FLOAT = 5
148
+ # signed 64-bit integer
149
+ SPH_ATTR_BIGINT = 6
150
+ # string
151
+ SPH_ATTR_STRING = 7
152
+ # this attr has multiple values (0 or more)
153
+ SPH_ATTR_MULTI = 0x40000001
154
+ SPH_ATTR_MULTI64 = 0x40000002
155
+
156
+ # Known grouping functions
157
+
158
+ # group by day
159
+ SPH_GROUPBY_DAY = 0
160
+ # group by week
161
+ SPH_GROUPBY_WEEK = 1
162
+ # group by month
163
+ SPH_GROUPBY_MONTH = 2
164
+ # group by year
165
+ SPH_GROUPBY_YEAR = 3
166
+ # group by attribute value
167
+ SPH_GROUPBY_ATTR = 4
168
+ # group by sequential attrs pair
169
+ SPH_GROUPBY_ATTRPAIR = 5
170
+
171
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
172
+ def initialize
173
+ # per-client-object settings
174
+ @host = 'localhost' # searchd host (default is "localhost")
175
+ @port = 9312 # searchd port (default is 9312)
176
+
177
+ # per-query settings
178
+ @offset = 0 # how many records to seek from result-set start (default is 0)
179
+ @limit = 20 # how many records to return from result-set starting at offset (default is 20)
180
+ @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
181
+ @weights = [] # per-field weights (default is 1 for all fields)
182
+ @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
183
+ @sortby = '' # attribute to sort by (defualt is "")
184
+ @min_id = 0 # min ID to match (default is 0, which means no limit)
185
+ @max_id = 0 # max ID to match (default is 0, which means no limit)
186
+ @filters = [] # search filters
187
+ @groupby = '' # group-by attribute name
188
+ @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
189
+ @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
190
+ @groupdistinct = '' # group-by count-distinct attribute
191
+ @maxmatches = 1000 # max matches to retrieve
192
+ @cutoff = 0 # cutoff to stop searching at (default is 0)
193
+ @retrycount = 0 # distributed retries count
194
+ @retrydelay = 0 # distributed retries delay
195
+ @anchor = [] # geographical anchor point
196
+ @indexweights = [] # per-index weights
197
+ @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
198
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
199
+ @fieldweights = {} # per-field-name weights
200
+ @overrides = [] # per-query attribute values overrides
201
+ @select = '*' # select-list (attributes or expressions, with optional aliases)
202
+
203
+ # per-reply fields (for single-query case)
204
+ @error = '' # last error message
205
+ @warning = '' # last warning message
206
+
207
+ @reqs = [] # requests storage (for multi-query case)
208
+ @mbenc = '' # stored mbstring encoding
209
+ end
210
+
211
+ # Get last error message.
212
+ def GetLastError
213
+ @error
214
+ end
215
+
216
+ # Get last warning message.
217
+ def GetLastWarning
218
+ @warning
219
+ end
220
+
221
+ # Set searchd host name (string) and port (integer).
222
+ def SetServer(host, port)
223
+ assert { host.instance_of? String }
224
+ assert { port.instance_of? Fixnum }
225
+
226
+ @host = host
227
+ @port = port
228
+ end
229
+
230
+ # Set offset and count into result set,
231
+ # and optionally set max-matches and cutoff limits.
232
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
233
+ assert { offset.instance_of? Fixnum }
234
+ assert { limit.instance_of? Fixnum }
235
+ assert { max.instance_of? Fixnum }
236
+ assert { offset >= 0 }
237
+ assert { limit > 0 }
238
+ assert { max >= 0 }
239
+
240
+ @offset = offset
241
+ @limit = limit
242
+ @maxmatches = max if max > 0
243
+ @cutoff = cutoff if cutoff > 0
244
+ end
245
+
246
+ # Set maximum query time, in milliseconds, per-index,
247
+ # integer, 0 means "do not limit"
248
+ def SetMaxQueryTime(max)
249
+ assert { max.instance_of? Fixnum }
250
+ assert { max >= 0 }
251
+ @maxquerytime = max
252
+ end
253
+
254
+ # Set matching mode.
255
+ def SetMatchMode(mode)
256
+ assert { mode == SPH_MATCH_ALL \
257
+ || mode == SPH_MATCH_ANY \
258
+ || mode == SPH_MATCH_PHRASE \
259
+ || mode == SPH_MATCH_BOOLEAN \
260
+ || mode == SPH_MATCH_EXTENDED \
261
+ || mode == SPH_MATCH_FULLSCAN \
262
+ || mode == SPH_MATCH_EXTENDED2 }
263
+
264
+ @mode = mode
265
+ end
266
+
267
+ # Set ranking mode.
268
+ def SetRankingMode(ranker)
269
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
270
+ || ranker == SPH_RANK_BM25 \
271
+ || ranker == SPH_RANK_NONE \
272
+ || ranker == SPH_RANK_WORDCOUNT \
273
+ || ranker == SPH_RANK_PROXIMITY }
274
+
275
+ @ranker = ranker
276
+ end
277
+
278
+ # Set matches sorting mode.
279
+ def SetSortMode(mode, sortby = '')
280
+ assert { mode == SPH_SORT_RELEVANCE \
281
+ || mode == SPH_SORT_ATTR_DESC \
282
+ || mode == SPH_SORT_ATTR_ASC \
283
+ || mode == SPH_SORT_TIME_SEGMENTS \
284
+ || mode == SPH_SORT_EXTENDED \
285
+ || mode == SPH_SORT_EXPR }
286
+ assert { sortby.instance_of? String }
287
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
288
+
289
+ @sort = mode
290
+ @sortby = sortby
291
+ end
292
+
293
+ # Bind per-field weights by order.
294
+ #
295
+ # DEPRECATED; use SetFieldWeights() instead.
296
+ def SetWeights(weights)
297
+ assert { weights.instance_of? Array }
298
+ weights.each do |weight|
299
+ assert { weight.instance_of? Fixnum }
300
+ end
301
+
302
+ @weights = weights
303
+ end
304
+
305
+ # Bind per-field weights by name.
306
+ #
307
+ # Takes string (field name) to integer name (field weight) hash as an argument.
308
+ # * Takes precedence over SetWeights().
309
+ # * Unknown names will be silently ignored.
310
+ # * Unbound fields will be silently given a weight of 1.
311
+ def SetFieldWeights(weights)
312
+ assert { weights.instance_of? Hash }
313
+ weights.each do |name, weight|
314
+ assert { name.instance_of? String }
315
+ assert { weight.instance_of? Fixnum }
316
+ end
317
+
318
+ @fieldweights = weights
319
+ end
320
+
321
+ # Bind per-index weights by name.
322
+ def SetIndexWeights(weights)
323
+ assert { weights.instance_of? Hash }
324
+ weights.each do |index, weight|
325
+ assert { index.instance_of? String }
326
+ assert { weight.instance_of? Fixnum }
327
+ end
328
+
329
+ @indexweights = weights
330
+ end
331
+
332
+ # Set IDs range to match.
333
+ #
334
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
335
+ def SetIDRange(min, max)
336
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
337
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
338
+ assert { min <= max }
339
+
340
+ @min_id = min
341
+ @max_id = max
342
+ end
343
+
344
+ # Set values filter.
345
+ #
346
+ # Only match those records where <tt>attribute</tt> column values
347
+ # are in specified set.
348
+ def SetFilter(attribute, values, exclude = false)
349
+ assert { attribute.instance_of? String }
350
+ assert { values.instance_of? Array }
351
+ assert { !values.empty? }
352
+
353
+ if values.instance_of?(Array) && values.size > 0
354
+ values.each do |value|
355
+ assert { value.instance_of? Fixnum }
356
+ end
357
+
358
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
359
+ end
360
+ end
361
+
362
+ # Set range filter.
363
+ #
364
+ # Only match those records where <tt>attribute</tt> column value
365
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
366
+ def SetFilterRange(attribute, min, max, exclude = false)
367
+ assert { attribute.instance_of? String }
368
+ assert { min.instance_of? Fixnum or min.instance_of? Bignum }
369
+ assert { max.instance_of? Fixnum or max.instance_of? Bignum }
370
+ assert { min <= max }
371
+
372
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
373
+ end
374
+
375
+ # Set float range filter.
376
+ #
377
+ # Only match those records where <tt>attribute</tt> column value
378
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
379
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
380
+ assert { attribute.instance_of? String }
381
+ assert { min.instance_of? Float }
382
+ assert { max.instance_of? Float }
383
+ assert { min <= max }
384
+
385
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
386
+ end
387
+
388
+ # Setup anchor point for geosphere distance calculations.
389
+ #
390
+ # Required to use <tt>@geodist</tt> in filters and sorting
391
+ # distance will be computed to this point. Latitude and longitude
392
+ # must be in radians.
393
+ #
394
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
395
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
396
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
397
+ # * <tt>long</tt> -- is anchor point longitude, in radians
398
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
399
+ assert { attrlat.instance_of? String }
400
+ assert { attrlong.instance_of? String }
401
+ assert { lat.instance_of? Float }
402
+ assert { long.instance_of? Float }
403
+
404
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
405
+ end
406
+
407
+ # Set grouping attribute and function.
408
+ #
409
+ # In grouping mode, all matches are assigned to different groups
410
+ # based on grouping function value.
411
+ #
412
+ # Each group keeps track of the total match count, and the best match
413
+ # (in this group) according to current sorting function.
414
+ #
415
+ # The final result set contains one best match per group, with
416
+ # grouping function value and matches count attached.
417
+ #
418
+ # Groups in result set could be sorted by any sorting clause,
419
+ # including both document attributes and the following special
420
+ # internal Sphinx attributes:
421
+ #
422
+ # * @id - match document ID;
423
+ # * @weight, @rank, @relevance - match weight;
424
+ # * @group - groupby function value;
425
+ # * @count - amount of matches in group.
426
+ #
427
+ # the default mode is to sort by groupby value in descending order,
428
+ # ie. by '@group desc'.
429
+ #
430
+ # 'total_found' would contain total amount of matching groups over
431
+ # the whole index.
432
+ #
433
+ # WARNING: grouping is done in fixed memory and thus its results
434
+ # are only approximate; so there might be more groups reported
435
+ # in total_found than actually present. @count might also
436
+ # be underestimated.
437
+ #
438
+ # For example, if sorting by relevance and grouping by "published"
439
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
440
+ # contain one most relevant match per each day when there were any
441
+ # matches published, with day number and per-day match count attached,
442
+ # and sorted by day number in descending order (ie. recent days first).
443
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
444
+ assert { attribute.instance_of? String }
445
+ assert { groupsort.instance_of? String }
446
+ assert { func == SPH_GROUPBY_DAY \
447
+ || func == SPH_GROUPBY_WEEK \
448
+ || func == SPH_GROUPBY_MONTH \
449
+ || func == SPH_GROUPBY_YEAR \
450
+ || func == SPH_GROUPBY_ATTR \
451
+ || func == SPH_GROUPBY_ATTRPAIR }
452
+
453
+ @groupby = attribute
454
+ @groupfunc = func
455
+ @groupsort = groupsort
456
+ end
457
+
458
+ # Set count-distinct attribute for group-by queries.
459
+ def SetGroupDistinct(attribute)
460
+ assert { attribute.instance_of? String }
461
+ @groupdistinct = attribute
462
+ end
463
+
464
+ # Set distributed retries count and delay.
465
+ def SetRetries(count, delay = 0)
466
+ assert { count.instance_of? Fixnum }
467
+ assert { delay.instance_of? Fixnum }
468
+
469
+ @retrycount = count
470
+ @retrydelay = delay
471
+ end
472
+
473
+ # Set attribute values override
474
+ #
475
+ # There can be only one override per attribute.
476
+ # +values+ must be a hash that maps document IDs to attribute values.
477
+ def SetOverride(attrname, attrtype, values)
478
+ assert { attrname.instance_of? String }
479
+ assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
480
+ assert { values.instance_of? Hash }
481
+
482
+ @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
483
+ end
484
+
485
+ # Set select-list (attributes or expressions), SQL-like syntax.
486
+ def SetSelect(select)
487
+ assert { select.instance_of? String }
488
+ @select = select
489
+ end
490
+
491
+ # Clear all filters (for multi-queries).
492
+ def ResetFilters
493
+ @filters = []
494
+ @anchor = []
495
+ end
496
+
497
+ # Clear groupby settings (for multi-queries).
498
+ def ResetGroupBy
499
+ @groupby = ''
500
+ @groupfunc = SPH_GROUPBY_DAY
501
+ @groupsort = '@group desc'
502
+ @groupdistinct = ''
503
+ end
504
+
505
+ # Clear all attribute value overrides (for multi-queries).
506
+ def ResetOverrides
507
+ @overrides = []
508
+ end
509
+
510
+ # Connect to searchd server and run given search query.
511
+ #
512
+ # <tt>query</tt> is query string
513
+
514
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
515
+ # to query all indexes. Accepted characters for index names are letters, numbers,
516
+ # dash, and underscore; everything else is considered a separator. Therefore,
517
+ # all the following calls are valid and will search two indexes:
518
+ #
519
+ # sphinx.Query('test query', 'main delta')
520
+ # sphinx.Query('test query', 'main;delta')
521
+ # sphinx.Query('test query', 'main, delta')
522
+ #
523
+ # Index order matters. If identical IDs are found in two or more indexes,
524
+ # weight and attribute values from the very last matching index will be used
525
+ # for sorting and returning to client. Therefore, in the example above,
526
+ # matches from "delta" index will always "win" over matches from "main".
527
+ #
528
+ # Returns false on failure.
529
+ # Returns hash which has the following keys on success:
530
+ #
531
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
532
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
533
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
534
+ # * <tt>'time'</tt> -- search time
535
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
536
+ def Query(query, index = '*', comment = '')
537
+ assert { @reqs.empty? }
538
+ @reqs = []
539
+
540
+ self.AddQuery(query, index, comment)
541
+ results = self.RunQueries
542
+
543
+ # probably network error; error message should be already filled
544
+ return false unless results.instance_of?(Array)
545
+
546
+ @error = results[0]['error']
547
+ @warning = results[0]['warning']
548
+
549
+ return false if results[0]['status'] == SEARCHD_ERROR
550
+ return results[0]
551
+ end
552
+
553
+ # Add query to batch.
554
+ #
555
+ # Batch queries enable searchd to perform internal optimizations,
556
+ # if possible; and reduce network connection overheads in all cases.
557
+ #
558
+ # For instance, running exactly the same query with different
559
+ # groupby settings will enable searched to perform expensive
560
+ # full-text search and ranking operation only once, but compute
561
+ # multiple groupby results from its output.
562
+ #
563
+ # Parameters are exactly the same as in <tt>Query</tt> call.
564
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
565
+ def AddQuery(query, index = '*', comment = '')
566
+ # build request
567
+
568
+ # mode and limits
569
+ request = Request.new
570
+ request.put_int @offset, @limit, @mode, @ranker, @sort
571
+ request.put_string @sortby
572
+ # query itself
573
+ request.put_string query
574
+ # weights
575
+ request.put_int_array @weights
576
+ # indexes
577
+ request.put_string index
578
+ # id64 range marker
579
+ request.put_int 1
580
+ # id64 range
581
+ request.put_int64 @min_id.to_i, @max_id.to_i
582
+
583
+ # filters
584
+ request.put_int @filters.length
585
+ @filters.each do |filter|
586
+ request.put_string filter['attr']
587
+ request.put_int filter['type']
588
+
589
+ case filter['type']
590
+ when SPH_FILTER_VALUES
591
+ request.put_int64_array filter['values']
592
+ when SPH_FILTER_RANGE
593
+ request.put_int64 filter['min'], filter['max']
594
+ when SPH_FILTER_FLOATRANGE
595
+ request.put_float filter['min'], filter['max']
596
+ else
597
+ raise SphinxInternalError, 'Internal error: unhandled filter type'
598
+ end
599
+ request.put_int filter['exclude'] ? 1 : 0
600
+ end
601
+
602
+ # group-by clause, max-matches count, group-sort clause, cutoff count
603
+ request.put_int @groupfunc
604
+ request.put_string @groupby
605
+ request.put_int @maxmatches
606
+ request.put_string @groupsort
607
+ request.put_int @cutoff, @retrycount, @retrydelay
608
+ request.put_string @groupdistinct
609
+
610
+ # anchor point
611
+ if @anchor.empty?
612
+ request.put_int 0
613
+ else
614
+ request.put_int 1
615
+ request.put_string @anchor['attrlat'], @anchor['attrlong']
616
+ request.put_float @anchor['lat'], @anchor['long']
617
+ end
618
+
619
+ # per-index weights
620
+ request.put_int @indexweights.length
621
+ @indexweights.each do |idx, weight|
622
+ request.put_string idx
623
+ request.put_int weight
624
+ end
625
+
626
+ # max query time
627
+ request.put_int @maxquerytime
628
+
629
+ # per-field weights
630
+ request.put_int @fieldweights.length
631
+ @fieldweights.each do |field, weight|
632
+ request.put_string field
633
+ request.put_int weight
634
+ end
635
+
636
+ # comment
637
+ request.put_string comment
638
+
639
+ # attribute overrides
640
+ request.put_int @overrides.length
641
+ for entry in @overrides do
642
+ request.put_string entry['attr']
643
+ request.put_int entry['type'], entry['values'].size
644
+ entry['values'].each do |id, val|
645
+ assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
646
+ assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
647
+
648
+ request.put_int64 id
649
+ case entry['type']
650
+ when SPH_ATTR_FLOAT
651
+ request.put_float val
652
+ when SPH_ATTR_BIGINT
653
+ request.put_int64 val
654
+ else
655
+ request.put_int val
656
+ end
657
+ end
658
+ end
659
+
660
+ # select-list
661
+ request.put_string @select
662
+
663
+ # store request to requests array
664
+ @reqs << request.to_s;
665
+ return @reqs.length - 1
666
+ end
667
+
668
+ # Run queries batch.
669
+ #
670
+ # Returns an array of result sets on success.
671
+ # Returns false on network IO failure.
672
+ #
673
+ # Each result set in returned array is a hash which containts
674
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
675
+ #
676
+ # * <tt>'error'</tt> -- search error for this query
677
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
678
+ def RunQueries
679
+ if @reqs.empty?
680
+ @error = 'No queries defined, issue AddQuery() first'
681
+ return false
682
+ end
683
+
684
+ req = @reqs.join('')
685
+ nreqs = @reqs.length
686
+ @reqs = []
687
+ response = PerformRequest(:search, req, nreqs)
688
+
689
+ # parse response
690
+ begin
691
+ results = []
692
+ ires = 0
693
+ while ires < nreqs
694
+ ires += 1
695
+ result = {}
696
+
697
+ result['error'] = ''
698
+ result['warning'] = ''
699
+
700
+ # extract status
701
+ status = result['status'] = response.get_int
702
+ if status != SEARCHD_OK
703
+ message = response.get_string
704
+ if status == SEARCHD_WARNING
705
+ result['warning'] = message
706
+ else
707
+ result['error'] = message
708
+ results << result
709
+ next
710
+ end
711
+ end
712
+
713
+ # read schema
714
+ fields = []
715
+ attrs = {}
716
+ attrs_names_in_order = []
717
+
718
+ nfields = response.get_int
719
+ while nfields > 0
720
+ nfields -= 1
721
+ fields << response.get_string
722
+ end
723
+ result['fields'] = fields
724
+
725
+ nattrs = response.get_int
726
+ while nattrs > 0
727
+ nattrs -= 1
728
+ attr = response.get_string
729
+ type = response.get_int
730
+ attrs[attr] = type
731
+ attrs_names_in_order << attr
732
+ end
733
+ result['attrs'] = attrs
734
+
735
+ # read match count
736
+ count = response.get_int
737
+ id64 = response.get_int
738
+
739
+ # read matches
740
+ result['matches'] = []
741
+ while count > 0
742
+ count -= 1
743
+
744
+ if id64 != 0
745
+ doc = response.get_int64
746
+ weight = response.get_int
747
+ else
748
+ doc, weight = response.get_ints(2)
749
+ end
750
+
751
+ r = {} # This is a single result put in the result['matches'] array
752
+ r['id'] = doc
753
+ r['weight'] = weight
754
+ attrs_names_in_order.each do |a|
755
+ r['attrs'] ||= {}
756
+
757
+ case attrs[a]
758
+ when SPH_ATTR_BIGINT
759
+ # handle 64-bit ints
760
+ r['attrs'][a] = response.get_int64
761
+ when SPH_ATTR_FLOAT
762
+ # handle floats
763
+ r['attrs'][a] = response.get_float
764
+ when SPH_ATTR_STRING
765
+ # handle string
766
+ r['attrs'][a] = response.get_string
767
+ else
768
+ # handle everything else as unsigned ints
769
+ val = response.get_int
770
+ if attrs[a]==SPH_ATTR_MULTI
771
+ r['attrs'][a] = []
772
+ 1.upto(val) do
773
+ r['attrs'][a] << response.get_int
774
+ end
775
+ elsif attrs[a]==SPH_ATTR_MULTI64
776
+ r['attrs'][a] = []
777
+ val = val/2
778
+ 1.upto(val) do
779
+ r['attrs'][a] << response.get_int64
780
+ end
781
+ else
782
+ r['attrs'][a] = val
783
+ end
784
+ end
785
+ end
786
+ result['matches'] << r
787
+ end
788
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
789
+ result['time'] = '%.3f' % (msecs / 1000.0)
790
+
791
+ result['words'] = {}
792
+ while words > 0
793
+ words -= 1
794
+ word = response.get_string
795
+ docs, hits = response.get_ints(2)
796
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
797
+ end
798
+
799
+ results << result
800
+ end
801
+ #rescue EOFError
802
+ # @error = 'incomplete reply'
803
+ # raise SphinxResponseError, @error
804
+ end
805
+
806
+ return results
807
+ end
808
+
809
+ # Connect to searchd server and generate exceprts from given documents.
810
+ #
811
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
812
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
813
+ # for stemming, lexing and case folding
814
+ # * <tt>words</tt> -- a string which contains the words to highlight
815
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
816
+ #
817
+ # You can use following parameters:
818
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
819
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
820
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
821
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
822
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
823
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
824
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
825
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
826
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
827
+ #
828
+ # Returns false on failure.
829
+ # Returns an array of string excerpts on success.
830
+ def BuildExcerpts(docs, index, words, opts = {})
831
+ assert { docs.instance_of? Array }
832
+ assert { index.instance_of? String }
833
+ assert { words.instance_of? String }
834
+ assert { opts.instance_of? Hash }
835
+
836
+ # fixup options
837
+ opts['before_match'] ||= '<b>';
838
+ opts['after_match'] ||= '</b>';
839
+ opts['chunk_separator'] ||= ' ... ';
840
+ opts['html_strip_mode'] ||= 'index';
841
+ opts['limit'] ||= 256;
842
+ opts['limit_passages'] ||= 0;
843
+ opts['limit_words'] ||= 0;
844
+ opts['around'] ||= 5;
845
+ opts['start_passage_id'] ||= 1;
846
+ opts['exact_phrase'] ||= false
847
+ opts['single_passage'] ||= false
848
+ opts['use_boundaries'] ||= false
849
+ opts['weight_order'] ||= false
850
+ opts['load_files'] ||= false
851
+ opts['allow_empty'] ||= false
852
+
853
+ # build request
854
+
855
+ # v.1.0 req
856
+ flags = 1
857
+ flags |= 2 if opts['exact_phrase']
858
+ flags |= 4 if opts['single_passage']
859
+ flags |= 8 if opts['use_boundaries']
860
+ flags |= 16 if opts['weight_order']
861
+ flags |= 32 if opts['query_mode']
862
+ flags |= 64 if opts['force_all_words']
863
+ flags |= 128 if opts['load_files']
864
+ flags |= 256 if opts['allow_empty']
865
+
866
+ request = Request.new
867
+ request.put_int 0, flags # mode=0, flags=1 (remove spaces)
868
+ # req index
869
+ request.put_string index
870
+ # req words
871
+ request.put_string words
872
+
873
+ # options
874
+ request.put_string opts['before_match']
875
+ request.put_string opts['after_match']
876
+ request.put_string opts['chunk_separator']
877
+ request.put_int opts['limit'].to_i, opts['around'].to_i
878
+
879
+ # options v1.2
880
+ request.put_int opts['limit_passages'].to_i
881
+ request.put_int opts['limit_words'].to_i
882
+ request.put_int opts['start_passage_id'].to_i
883
+ request.put_string opts['html_strip_mode']
884
+
885
+ # documents
886
+ request.put_int docs.size
887
+ docs.each do |doc|
888
+ assert { doc.instance_of? String }
889
+
890
+ request.put_string doc
891
+ end
892
+
893
+ response = PerformRequest(:excerpt, request)
894
+
895
+ # parse response
896
+ begin
897
+ res = []
898
+ docs.each do |doc|
899
+ res << response.get_string
900
+ end
901
+ rescue EOFError
902
+ @error = 'incomplete reply'
903
+ raise SphinxResponseError, @error
904
+ end
905
+ return res
906
+ end
907
+
908
+ # Connect to searchd server, and generate keyword list for a given query.
909
+ #
910
+ # Returns an array of words on success.
911
+ def BuildKeywords(query, index, hits)
912
+ assert { query.instance_of? String }
913
+ assert { index.instance_of? String }
914
+ assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
915
+
916
+ # build request
917
+ request = Request.new
918
+ # v.1.0 req
919
+ request.put_string query # req query
920
+ request.put_string index # req index
921
+ request.put_int hits ? 1 : 0
922
+
923
+ response = PerformRequest(:keywords, request)
924
+
925
+ # parse response
926
+ begin
927
+ res = []
928
+ nwords = response.get_int
929
+ 0.upto(nwords - 1) do |i|
930
+ tokenized = response.get_string
931
+ normalized = response.get_string
932
+
933
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
934
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
935
+
936
+ res << entry
937
+ end
938
+ rescue EOFError
939
+ @error = 'incomplete reply'
940
+ raise SphinxResponseError, @error
941
+ end
942
+
943
+ return res
944
+ end
945
+
946
+ # Batch update given attributes in given rows in given indexes.
947
+ #
948
+ # * +index+ is a name of the index to be updated
949
+ # * +attrs+ is an array of attribute name strings.
950
+ # * +values+ is a hash where key is document id, and value is an array of
951
+ # * +mva+ identifies whether update MVA
952
+ # new attribute values
953
+ #
954
+ # Returns number of actually updated documents (0 or more) on success.
955
+ # Returns -1 on failure.
956
+ #
957
+ # Usage example:
958
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
959
+ def UpdateAttributes(index, attrs, values, mva = false)
960
+ # verify everything
961
+ assert { index.instance_of? String }
962
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
963
+
964
+ assert { attrs.instance_of? Array }
965
+ attrs.each do |attr|
966
+ assert { attr.instance_of? String }
967
+ end
968
+
969
+ assert { values.instance_of? Hash }
970
+ values.each do |id, entry|
971
+ assert { id.instance_of? Fixnum }
972
+ assert { entry.instance_of? Array }
973
+ assert { entry.length == attrs.length }
974
+ entry.each do |v|
975
+ if mva
976
+ assert { v.instance_of? Array }
977
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
978
+ else
979
+ assert { v.instance_of? Fixnum }
980
+ end
981
+ end
982
+ end
983
+
984
+ # build request
985
+ request = Request.new
986
+ request.put_string index
987
+
988
+ request.put_int attrs.length
989
+ for attr in attrs
990
+ request.put_string attr
991
+ request.put_int mva ? 1 : 0
992
+ end
993
+
994
+ request.put_int values.length
995
+ values.each do |id, entry|
996
+ request.put_int64 id
997
+ if mva
998
+ entry.each { |v| request.put_int_array v }
999
+ else
1000
+ request.put_int(*entry)
1001
+ end
1002
+ end
1003
+
1004
+ response = PerformRequest(:update, request)
1005
+
1006
+ # parse response
1007
+ begin
1008
+ return response.get_int
1009
+ rescue EOFError
1010
+ @error = 'incomplete reply'
1011
+ raise SphinxResponseError, @error
1012
+ end
1013
+ end
1014
+
1015
+ protected
1016
+
1017
+ # Connect to searchd server.
1018
+ def Connect
1019
+ begin
1020
+ if @host[0,1]=='/'
1021
+ sock = UNIXSocket.new(@host)
1022
+ else
1023
+ sock = TCPSocket.new(@host, @port)
1024
+ end
1025
+ rescue => err
1026
+ @error = "connection to #{@host}:#{@port} failed (error=#{err})"
1027
+ raise SphinxConnectError, @error
1028
+ end
1029
+
1030
+ v = sock.recv(4).unpack('N*').first
1031
+ if v < 1
1032
+ sock.close
1033
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
1034
+ raise SphinxConnectError, @error
1035
+ end
1036
+
1037
+ sock.send([1].pack('N'), 0)
1038
+ sock
1039
+ end
1040
+
1041
+ # Get and check response packet from searchd server.
1042
+ def GetResponse(sock, client_version)
1043
+ response = ''
1044
+ len = 0
1045
+
1046
+ header = sock.recv(8)
1047
+ if header.length == 8
1048
+ status, ver, len = header.unpack('n2N')
1049
+ left = len.to_i
1050
+ while left > 0 do
1051
+ begin
1052
+ chunk = sock.recv(left)
1053
+ if chunk
1054
+ response << chunk
1055
+ left -= chunk.length
1056
+ end
1057
+ rescue EOFError
1058
+ break
1059
+ end
1060
+ end
1061
+ end
1062
+ sock.close
1063
+
1064
+ # check response
1065
+ read = response.length
1066
+ if response.empty? or read != len.to_i
1067
+ @error = response.empty? \
1068
+ ? 'received zero-sized searchd response' \
1069
+ : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
1070
+ raise SphinxResponseError, @error
1071
+ end
1072
+
1073
+ # check status
1074
+ if (status == SEARCHD_WARNING)
1075
+ wlen = response[0, 4].unpack('N*').first
1076
+ @warning = response[4, wlen]
1077
+ return response[4 + wlen, response.length - 4 - wlen]
1078
+ end
1079
+
1080
+ if status == SEARCHD_ERROR
1081
+ @error = 'searchd error: ' + response[4, response.length - 4]
1082
+ raise SphinxInternalError, @error
1083
+ end
1084
+
1085
+ if status == SEARCHD_RETRY
1086
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1087
+ raise SphinxTemporaryError, @error
1088
+ end
1089
+
1090
+ unless status == SEARCHD_OK
1091
+ @error = "unknown status code: '#{status}'"
1092
+ raise SphinxUnknownError, @error
1093
+ end
1094
+
1095
+ # check version
1096
+ if ver < client_version
1097
+ @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1098
+ "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1099
+ end
1100
+
1101
+ return response
1102
+ end
1103
+
1104
+ # Connect, send query, get response.
1105
+ def PerformRequest(command, request, additional = nil)
1106
+ cmd = command.to_s.upcase
1107
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1108
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1109
+
1110
+ sock = self.Connect
1111
+ len = request.to_s.length + (additional != nil ? 8 : 0)
1112
+ header = [command_id, command_ver, len].pack('nnN')
1113
+ header << [0, additional].pack('NN') if additional != nil
1114
+ sock.send(header + request.to_s, 0)
1115
+ response = self.GetResponse(sock, command_ver)
1116
+ return Response.new(response)
1117
+ end
1118
+
1119
+ # :stopdoc:
1120
+ def assert
1121
+ raise 'Assertion failed!' unless yield if $DEBUG
1122
+ end
1123
+ # :startdoc:
1124
+ end
1125
+ end