zinx 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
- require File.dirname(__FILE__) + '/sphinx/request'
2
- require File.dirname(__FILE__) + '/sphinx/response'
3
- require File.dirname(__FILE__) + '/sphinx/client'
4
-
5
- module Sphinx
1
+ require File.dirname(__FILE__) + '/sphinx/request'
2
+ require File.dirname(__FILE__) + '/sphinx/response'
3
+ require File.dirname(__FILE__) + '/sphinx/client'
4
+
5
+ module Sphinx
6
6
  end
@@ -1,1125 +1,1125 @@
1
- # = client.rb - Sphinx Client API
2
- #
3
- # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
- # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5
- # License:: Distributes under the same terms as Ruby
6
- # Version:: 0.9.9-r1299
7
- # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
- #
9
- # This library is distributed under the terms of the Ruby license.
10
- # You can freely distribute/modify this library.
11
-
12
- # ==Sphinx Client API
13
- #
14
- # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
- # daemon and get search results from Sphinx.
16
- #
17
- # ===Usage
18
- #
19
- # sphinx = Sphinx::Client.new
20
- # result = sphinx.Query('test')
21
- # ids = result['matches'].map { |match| match['id'] }.join(',')
22
- # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
- #
24
- # docs = posts.map(&:body)
25
- # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
-
27
- require 'socket'
28
-
29
- module Sphinx
30
- # :stopdoc:
31
-
32
- class SphinxError < StandardError; end
33
- class SphinxArgumentError < SphinxError; end
34
- class SphinxConnectError < SphinxError; end
35
- class SphinxResponseError < SphinxError; end
36
- class SphinxInternalError < SphinxError; end
37
- class SphinxTemporaryError < SphinxError; end
38
- class SphinxUnknownError < SphinxError; end
39
-
40
- # :startdoc:
41
-
42
- class Client
43
-
44
- # :stopdoc:
45
-
46
- # Known searchd commands
47
-
48
- # search command
49
- SEARCHD_COMMAND_SEARCH = 0
50
- # excerpt command
51
- SEARCHD_COMMAND_EXCERPT = 1
52
- # update command
53
- SEARCHD_COMMAND_UPDATE = 2
54
- # keywords command
55
- SEARCHD_COMMAND_KEYWORDS = 3
56
-
57
- # Current client-side command implementation versions
58
-
59
- # search command version
60
- VER_COMMAND_SEARCH = 0x119
61
- # excerpt command version
62
- VER_COMMAND_EXCERPT = 0x102
63
- # update command version
64
- VER_COMMAND_UPDATE = 0x102
65
- # keywords command version
66
- VER_COMMAND_KEYWORDS = 0x100
67
-
68
- # Known searchd status codes
69
-
70
- # general success, command-specific reply follows
71
- SEARCHD_OK = 0
72
- # general failure, command-specific reply may follow
73
- SEARCHD_ERROR = 1
74
- # temporaty failure, client should retry later
75
- SEARCHD_RETRY = 2
76
- # general success, warning message and command-specific reply follow
77
- SEARCHD_WARNING = 3
78
-
79
- # :startdoc:
80
-
81
- # Known match modes
82
-
83
- # match all query words
84
- SPH_MATCH_ALL = 0
85
- # match any query word
86
- SPH_MATCH_ANY = 1
87
- # match this exact phrase
88
- SPH_MATCH_PHRASE = 2
89
- # match this boolean query
90
- SPH_MATCH_BOOLEAN = 3
91
- # match this extended query
92
- SPH_MATCH_EXTENDED = 4
93
- # match all document IDs w/o fulltext query, apply filters
94
- SPH_MATCH_FULLSCAN = 5
95
- # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
96
- SPH_MATCH_EXTENDED2 = 6
97
-
98
- # Known ranking modes (ext2 only)
99
-
100
- # default mode, phrase proximity major factor and BM25 minor one
101
- SPH_RANK_PROXIMITY_BM25 = 0
102
- # statistical mode, BM25 ranking only (faster but worse quality)
103
- SPH_RANK_BM25 = 1
104
- # no ranking, all matches get a weight of 1
105
- SPH_RANK_NONE = 2
106
- # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
107
- SPH_RANK_WORDCOUNT = 3
108
- # phrase proximity
109
- SPH_RANK_PROXIMITY = 4
110
-
111
- # Known sort modes
112
-
113
- # sort by document relevance desc, then by date
114
- SPH_SORT_RELEVANCE = 0
115
- # sort by document date desc, then by relevance desc
116
- SPH_SORT_ATTR_DESC = 1
117
- # sort by document date asc, then by relevance desc
118
- SPH_SORT_ATTR_ASC = 2
119
- # sort by time segments (hour/day/week/etc) desc, then by relevance desc
120
- SPH_SORT_TIME_SEGMENTS = 3
121
- # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
122
- SPH_SORT_EXTENDED = 4
123
- # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
124
- SPH_SORT_EXPR = 5
125
-
126
- # Known filter types
127
-
128
- # filter by integer values set
129
- SPH_FILTER_VALUES = 0
130
- # filter by integer range
131
- SPH_FILTER_RANGE = 1
132
- # filter by float range
133
- SPH_FILTER_FLOATRANGE = 2
134
-
135
- # Known attribute types
136
-
137
- # this attr is just an integer
138
- SPH_ATTR_INTEGER = 1
139
- # this attr is a timestamp
140
- SPH_ATTR_TIMESTAMP = 2
141
- # this attr is an ordinal string number (integer at search time,
142
- # specially handled at indexing time)
143
- SPH_ATTR_ORDINAL = 3
144
- # this attr is a boolean bit field
145
- SPH_ATTR_BOOL = 4
146
- # this attr is a float
147
- SPH_ATTR_FLOAT = 5
148
- # signed 64-bit integer
149
- SPH_ATTR_BIGINT = 6
150
- # string
151
- SPH_ATTR_STRING = 7
152
- # this attr has multiple values (0 or more)
153
- SPH_ATTR_MULTI = 0x40000001
154
- SPH_ATTR_MULTI64 = 0x40000002
155
-
156
- # Known grouping functions
157
-
158
- # group by day
159
- SPH_GROUPBY_DAY = 0
160
- # group by week
161
- SPH_GROUPBY_WEEK = 1
162
- # group by month
163
- SPH_GROUPBY_MONTH = 2
164
- # group by year
165
- SPH_GROUPBY_YEAR = 3
166
- # group by attribute value
167
- SPH_GROUPBY_ATTR = 4
168
- # group by sequential attrs pair
169
- SPH_GROUPBY_ATTRPAIR = 5
170
-
171
- # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
172
- def initialize
173
- # per-client-object settings
174
- @host = 'localhost' # searchd host (default is "localhost")
175
- @port = 9312 # searchd port (default is 9312)
176
-
177
- # per-query settings
178
- @offset = 0 # how many records to seek from result-set start (default is 0)
179
- @limit = 20 # how many records to return from result-set starting at offset (default is 20)
180
- @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
181
- @weights = [] # per-field weights (default is 1 for all fields)
182
- @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
183
- @sortby = '' # attribute to sort by (defualt is "")
184
- @min_id = 0 # min ID to match (default is 0, which means no limit)
185
- @max_id = 0 # max ID to match (default is 0, which means no limit)
186
- @filters = [] # search filters
187
- @groupby = '' # group-by attribute name
188
- @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
189
- @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
190
- @groupdistinct = '' # group-by count-distinct attribute
191
- @maxmatches = 1000 # max matches to retrieve
192
- @cutoff = 0 # cutoff to stop searching at (default is 0)
193
- @retrycount = 0 # distributed retries count
194
- @retrydelay = 0 # distributed retries delay
195
- @anchor = [] # geographical anchor point
196
- @indexweights = [] # per-index weights
197
- @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
198
- @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
199
- @fieldweights = {} # per-field-name weights
200
- @overrides = [] # per-query attribute values overrides
201
- @select = '*' # select-list (attributes or expressions, with optional aliases)
202
-
203
- # per-reply fields (for single-query case)
204
- @error = '' # last error message
205
- @warning = '' # last warning message
206
-
207
- @reqs = [] # requests storage (for multi-query case)
208
- @mbenc = '' # stored mbstring encoding
209
- end
210
-
211
- # Get last error message.
212
- def GetLastError
213
- @error
214
- end
215
-
216
- # Get last warning message.
217
- def GetLastWarning
218
- @warning
219
- end
220
-
221
- # Set searchd host name (string) and port (integer).
222
- def SetServer(host, port)
223
- assert { host.instance_of? String }
224
- assert { port.instance_of? Fixnum }
225
-
226
- @host = host
227
- @port = port
228
- end
229
-
230
- # Set offset and count into result set,
231
- # and optionally set max-matches and cutoff limits.
232
- def SetLimits(offset, limit, max = 0, cutoff = 0)
233
- assert { offset.instance_of? Fixnum }
234
- assert { limit.instance_of? Fixnum }
235
- assert { max.instance_of? Fixnum }
236
- assert { offset >= 0 }
237
- assert { limit > 0 }
238
- assert { max >= 0 }
239
-
240
- @offset = offset
241
- @limit = limit
242
- @maxmatches = max if max > 0
243
- @cutoff = cutoff if cutoff > 0
244
- end
245
-
246
- # Set maximum query time, in milliseconds, per-index,
247
- # integer, 0 means "do not limit"
248
- def SetMaxQueryTime(max)
249
- assert { max.instance_of? Fixnum }
250
- assert { max >= 0 }
251
- @maxquerytime = max
252
- end
253
-
254
- # Set matching mode.
255
- def SetMatchMode(mode)
256
- assert { mode == SPH_MATCH_ALL \
257
- || mode == SPH_MATCH_ANY \
258
- || mode == SPH_MATCH_PHRASE \
259
- || mode == SPH_MATCH_BOOLEAN \
260
- || mode == SPH_MATCH_EXTENDED \
261
- || mode == SPH_MATCH_FULLSCAN \
262
- || mode == SPH_MATCH_EXTENDED2 }
263
-
264
- @mode = mode
265
- end
266
-
267
- # Set ranking mode.
268
- def SetRankingMode(ranker)
269
- assert { ranker == SPH_RANK_PROXIMITY_BM25 \
270
- || ranker == SPH_RANK_BM25 \
271
- || ranker == SPH_RANK_NONE \
272
- || ranker == SPH_RANK_WORDCOUNT \
273
- || ranker == SPH_RANK_PROXIMITY }
274
-
275
- @ranker = ranker
276
- end
277
-
278
- # Set matches sorting mode.
279
- def SetSortMode(mode, sortby = '')
280
- assert { mode == SPH_SORT_RELEVANCE \
281
- || mode == SPH_SORT_ATTR_DESC \
282
- || mode == SPH_SORT_ATTR_ASC \
283
- || mode == SPH_SORT_TIME_SEGMENTS \
284
- || mode == SPH_SORT_EXTENDED \
285
- || mode == SPH_SORT_EXPR }
286
- assert { sortby.instance_of? String }
287
- assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
288
-
289
- @sort = mode
290
- @sortby = sortby
291
- end
292
-
293
- # Bind per-field weights by order.
294
- #
295
- # DEPRECATED; use SetFieldWeights() instead.
296
- def SetWeights(weights)
297
- assert { weights.instance_of? Array }
298
- weights.each do |weight|
299
- assert { weight.instance_of? Fixnum }
300
- end
301
-
302
- @weights = weights
303
- end
304
-
305
- # Bind per-field weights by name.
306
- #
307
- # Takes string (field name) to integer name (field weight) hash as an argument.
308
- # * Takes precedence over SetWeights().
309
- # * Unknown names will be silently ignored.
310
- # * Unbound fields will be silently given a weight of 1.
311
- def SetFieldWeights(weights)
312
- assert { weights.instance_of? Hash }
313
- weights.each do |name, weight|
314
- assert { name.instance_of? String }
315
- assert { weight.instance_of? Fixnum }
316
- end
317
-
318
- @fieldweights = weights
319
- end
320
-
321
- # Bind per-index weights by name.
322
- def SetIndexWeights(weights)
323
- assert { weights.instance_of? Hash }
324
- weights.each do |index, weight|
325
- assert { index.instance_of? String }
326
- assert { weight.instance_of? Fixnum }
327
- end
328
-
329
- @indexweights = weights
330
- end
331
-
332
- # Set IDs range to match.
333
- #
334
- # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
335
- def SetIDRange(min, max)
336
- assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
337
- assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
338
- assert { min <= max }
339
-
340
- @min_id = min
341
- @max_id = max
342
- end
343
-
344
- # Set values filter.
345
- #
346
- # Only match those records where <tt>attribute</tt> column values
347
- # are in specified set.
348
- def SetFilter(attribute, values, exclude = false)
349
- assert { attribute.instance_of? String }
350
- assert { values.instance_of? Array }
351
- assert { !values.empty? }
352
-
353
- if values.instance_of?(Array) && values.size > 0
354
- values.each do |value|
355
- assert { value.instance_of? Fixnum }
356
- end
357
-
358
- @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
359
- end
360
- end
361
-
362
- # Set range filter.
363
- #
364
- # Only match those records where <tt>attribute</tt> column value
365
- # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
366
- def SetFilterRange(attribute, min, max, exclude = false)
367
- assert { attribute.instance_of? String }
368
- assert { min.instance_of? Fixnum or min.instance_of? Bignum }
369
- assert { max.instance_of? Fixnum or max.instance_of? Bignum }
370
- assert { min <= max }
371
-
372
- @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
373
- end
374
-
375
- # Set float range filter.
376
- #
377
- # Only match those records where <tt>attribute</tt> column value
378
- # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
379
- def SetFilterFloatRange(attribute, min, max, exclude = false)
380
- assert { attribute.instance_of? String }
381
- assert { min.instance_of? Float }
382
- assert { max.instance_of? Float }
383
- assert { min <= max }
384
-
385
- @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
386
- end
387
-
388
- # Setup anchor point for geosphere distance calculations.
389
- #
390
- # Required to use <tt>@geodist</tt> in filters and sorting
391
- # distance will be computed to this point. Latitude and longitude
392
- # must be in radians.
393
- #
394
- # * <tt>attrlat</tt> -- is the name of latitude attribute
395
- # * <tt>attrlong</tt> -- is the name of longitude attribute
396
- # * <tt>lat</tt> -- is anchor point latitude, in radians
397
- # * <tt>long</tt> -- is anchor point longitude, in radians
398
- def SetGeoAnchor(attrlat, attrlong, lat, long)
399
- assert { attrlat.instance_of? String }
400
- assert { attrlong.instance_of? String }
401
- assert { lat.instance_of? Float }
402
- assert { long.instance_of? Float }
403
-
404
- @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
405
- end
406
-
407
- # Set grouping attribute and function.
408
- #
409
- # In grouping mode, all matches are assigned to different groups
410
- # based on grouping function value.
411
- #
412
- # Each group keeps track of the total match count, and the best match
413
- # (in this group) according to current sorting function.
414
- #
415
- # The final result set contains one best match per group, with
416
- # grouping function value and matches count attached.
417
- #
418
- # Groups in result set could be sorted by any sorting clause,
419
- # including both document attributes and the following special
420
- # internal Sphinx attributes:
421
- #
422
- # * @id - match document ID;
423
- # * @weight, @rank, @relevance - match weight;
424
- # * @group - groupby function value;
425
- # * @count - amount of matches in group.
426
- #
427
- # the default mode is to sort by groupby value in descending order,
428
- # ie. by '@group desc'.
429
- #
430
- # 'total_found' would contain total amount of matching groups over
431
- # the whole index.
432
- #
433
- # WARNING: grouping is done in fixed memory and thus its results
434
- # are only approximate; so there might be more groups reported
435
- # in total_found than actually present. @count might also
436
- # be underestimated.
437
- #
438
- # For example, if sorting by relevance and grouping by "published"
439
- # attribute with SPH_GROUPBY_DAY function, then the result set will
440
- # contain one most relevant match per each day when there were any
441
- # matches published, with day number and per-day match count attached,
442
- # and sorted by day number in descending order (ie. recent days first).
443
- def SetGroupBy(attribute, func, groupsort = '@group desc')
444
- assert { attribute.instance_of? String }
445
- assert { groupsort.instance_of? String }
446
- assert { func == SPH_GROUPBY_DAY \
447
- || func == SPH_GROUPBY_WEEK \
448
- || func == SPH_GROUPBY_MONTH \
449
- || func == SPH_GROUPBY_YEAR \
450
- || func == SPH_GROUPBY_ATTR \
451
- || func == SPH_GROUPBY_ATTRPAIR }
452
-
453
- @groupby = attribute
454
- @groupfunc = func
455
- @groupsort = groupsort
456
- end
457
-
458
- # Set count-distinct attribute for group-by queries.
459
- def SetGroupDistinct(attribute)
460
- assert { attribute.instance_of? String }
461
- @groupdistinct = attribute
462
- end
463
-
464
- # Set distributed retries count and delay.
465
- def SetRetries(count, delay = 0)
466
- assert { count.instance_of? Fixnum }
467
- assert { delay.instance_of? Fixnum }
468
-
469
- @retrycount = count
470
- @retrydelay = delay
471
- end
472
-
473
- # Set attribute values override
474
- #
475
- # There can be only one override per attribute.
476
- # +values+ must be a hash that maps document IDs to attribute values.
477
- def SetOverride(attrname, attrtype, values)
478
- assert { attrname.instance_of? String }
479
- assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
480
- assert { values.instance_of? Hash }
481
-
482
- @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
483
- end
484
-
485
- # Set select-list (attributes or expressions), SQL-like syntax.
486
- def SetSelect(select)
487
- assert { select.instance_of? String }
488
- @select = select
489
- end
490
-
491
- # Clear all filters (for multi-queries).
492
- def ResetFilters
493
- @filters = []
494
- @anchor = []
495
- end
496
-
497
- # Clear groupby settings (for multi-queries).
498
- def ResetGroupBy
499
- @groupby = ''
500
- @groupfunc = SPH_GROUPBY_DAY
501
- @groupsort = '@group desc'
502
- @groupdistinct = ''
503
- end
504
-
505
- # Clear all attribute value overrides (for multi-queries).
506
- def ResetOverrides
507
- @overrides = []
508
- end
509
-
510
- # Connect to searchd server and run given search query.
511
- #
512
- # <tt>query</tt> is query string
513
-
514
- # <tt>index</tt> is index name (or names) to query. default value is "*" which means
515
- # to query all indexes. Accepted characters for index names are letters, numbers,
516
- # dash, and underscore; everything else is considered a separator. Therefore,
517
- # all the following calls are valid and will search two indexes:
518
- #
519
- # sphinx.Query('test query', 'main delta')
520
- # sphinx.Query('test query', 'main;delta')
521
- # sphinx.Query('test query', 'main, delta')
522
- #
523
- # Index order matters. If identical IDs are found in two or more indexes,
524
- # weight and attribute values from the very last matching index will be used
525
- # for sorting and returning to client. Therefore, in the example above,
526
- # matches from "delta" index will always "win" over matches from "main".
527
- #
528
- # Returns false on failure.
529
- # Returns hash which has the following keys on success:
530
- #
531
- # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
532
- # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
533
- # * <tt>'total_found'</tt> -- total amount of matching documents in index
534
- # * <tt>'time'</tt> -- search time
535
- # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
536
- def Query(query, index = '*', comment = '')
537
- assert { @reqs.empty? }
538
- @reqs = []
539
-
540
- self.AddQuery(query, index, comment)
541
- results = self.RunQueries
542
-
543
- # probably network error; error message should be already filled
544
- return false unless results.instance_of?(Array)
545
-
546
- @error = results[0]['error']
547
- @warning = results[0]['warning']
548
-
549
- return false if results[0]['status'] == SEARCHD_ERROR
550
- return results[0]
551
- end
552
-
553
- # Add query to batch.
554
- #
555
- # Batch queries enable searchd to perform internal optimizations,
556
- # if possible; and reduce network connection overheads in all cases.
557
- #
558
- # For instance, running exactly the same query with different
559
- # groupby settings will enable searched to perform expensive
560
- # full-text search and ranking operation only once, but compute
561
- # multiple groupby results from its output.
562
- #
563
- # Parameters are exactly the same as in <tt>Query</tt> call.
564
- # Returns index to results array returned by <tt>RunQueries</tt> call.
565
- def AddQuery(query, index = '*', comment = '')
566
- # build request
567
-
568
- # mode and limits
569
- request = Request.new
570
- request.put_int @offset, @limit, @mode, @ranker, @sort
571
- request.put_string @sortby
572
- # query itself
573
- request.put_string query
574
- # weights
575
- request.put_int_array @weights
576
- # indexes
577
- request.put_string index
578
- # id64 range marker
579
- request.put_int 1
580
- # id64 range
581
- request.put_int64 @min_id.to_i, @max_id.to_i
582
-
583
- # filters
584
- request.put_int @filters.length
585
- @filters.each do |filter|
586
- request.put_string filter['attr']
587
- request.put_int filter['type']
588
-
589
- case filter['type']
590
- when SPH_FILTER_VALUES
591
- request.put_int64_array filter['values']
592
- when SPH_FILTER_RANGE
593
- request.put_int64 filter['min'], filter['max']
594
- when SPH_FILTER_FLOATRANGE
595
- request.put_float filter['min'], filter['max']
596
- else
597
- raise SphinxInternalError, 'Internal error: unhandled filter type'
598
- end
599
- request.put_int filter['exclude'] ? 1 : 0
600
- end
601
-
602
- # group-by clause, max-matches count, group-sort clause, cutoff count
603
- request.put_int @groupfunc
604
- request.put_string @groupby
605
- request.put_int @maxmatches
606
- request.put_string @groupsort
607
- request.put_int @cutoff, @retrycount, @retrydelay
608
- request.put_string @groupdistinct
609
-
610
- # anchor point
611
- if @anchor.empty?
612
- request.put_int 0
613
- else
614
- request.put_int 1
615
- request.put_string @anchor['attrlat'], @anchor['attrlong']
616
- request.put_float @anchor['lat'], @anchor['long']
617
- end
618
-
619
- # per-index weights
620
- request.put_int @indexweights.length
621
- @indexweights.each do |idx, weight|
622
- request.put_string idx
623
- request.put_int weight
624
- end
625
-
626
- # max query time
627
- request.put_int @maxquerytime
628
-
629
- # per-field weights
630
- request.put_int @fieldweights.length
631
- @fieldweights.each do |field, weight|
632
- request.put_string field
633
- request.put_int weight
634
- end
635
-
636
- # comment
637
- request.put_string comment
638
-
639
- # attribute overrides
640
- request.put_int @overrides.length
641
- for entry in @overrides do
642
- request.put_string entry['attr']
643
- request.put_int entry['type'], entry['values'].size
644
- entry['values'].each do |id, val|
645
- assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
646
- assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
647
-
648
- request.put_int64 id
649
- case entry['type']
650
- when SPH_ATTR_FLOAT
651
- request.put_float val
652
- when SPH_ATTR_BIGINT
653
- request.put_int64 val
654
- else
655
- request.put_int val
656
- end
657
- end
658
- end
659
-
660
- # select-list
661
- request.put_string @select
662
-
663
- # store request to requests array
664
- @reqs << request.to_s;
665
- return @reqs.length - 1
666
- end
667
-
668
- # Run queries batch.
669
- #
670
- # Returns an array of result sets on success.
671
- # Returns false on network IO failure.
672
- #
673
- # Each result set in returned array is a hash which containts
674
- # the same keys as the hash returned by <tt>Query</tt>, plus:
675
- #
676
- # * <tt>'error'</tt> -- search error for this query
677
- # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
678
- def RunQueries
679
- if @reqs.empty?
680
- @error = 'No queries defined, issue AddQuery() first'
681
- return false
682
- end
683
-
684
- req = @reqs.join('')
685
- nreqs = @reqs.length
686
- @reqs = []
687
- response = PerformRequest(:search, req, nreqs)
688
-
689
- # parse response
690
- begin
691
- results = []
692
- ires = 0
693
- while ires < nreqs
694
- ires += 1
695
- result = {}
696
-
697
- result['error'] = ''
698
- result['warning'] = ''
699
-
700
- # extract status
701
- status = result['status'] = response.get_int
702
- if status != SEARCHD_OK
703
- message = response.get_string
704
- if status == SEARCHD_WARNING
705
- result['warning'] = message
706
- else
707
- result['error'] = message
708
- results << result
709
- next
710
- end
711
- end
712
-
713
- # read schema
714
- fields = []
715
- attrs = {}
716
- attrs_names_in_order = []
717
-
718
- nfields = response.get_int
719
- while nfields > 0
720
- nfields -= 1
721
- fields << response.get_string
722
- end
723
- result['fields'] = fields
724
-
725
- nattrs = response.get_int
726
- while nattrs > 0
727
- nattrs -= 1
728
- attr = response.get_string
729
- type = response.get_int
730
- attrs[attr] = type
731
- attrs_names_in_order << attr
732
- end
733
- result['attrs'] = attrs
734
-
735
- # read match count
736
- count = response.get_int
737
- id64 = response.get_int
738
-
739
- # read matches
740
- result['matches'] = []
741
- while count > 0
742
- count -= 1
743
-
744
- if id64 != 0
745
- doc = response.get_int64
746
- weight = response.get_int
747
- else
748
- doc, weight = response.get_ints(2)
749
- end
750
-
751
- r = {} # This is a single result put in the result['matches'] array
752
- r['id'] = doc
753
- r['weight'] = weight
754
- attrs_names_in_order.each do |a|
755
- r['attrs'] ||= {}
756
-
757
- case attrs[a]
758
- when SPH_ATTR_BIGINT
759
- # handle 64-bit ints
760
- r['attrs'][a] = response.get_int64
761
- when SPH_ATTR_FLOAT
762
- # handle floats
763
- r['attrs'][a] = response.get_float
764
- when SPH_ATTR_STRING
765
- # handle string
766
- r['attrs'][a] = response.get_string
767
- else
768
- # handle everything else as unsigned ints
769
- val = response.get_int
770
- if attrs[a]==SPH_ATTR_MULTI
771
- r['attrs'][a] = []
772
- 1.upto(val) do
773
- r['attrs'][a] << response.get_int
774
- end
775
- elsif attrs[a]==SPH_ATTR_MULTI64
776
- r['attrs'][a] = []
777
- val = val/2
778
- 1.upto(val) do
779
- r['attrs'][a] << response.get_int64
780
- end
781
- else
782
- r['attrs'][a] = val
783
- end
784
- end
785
- end
786
- result['matches'] << r
787
- end
788
- result['total'], result['total_found'], msecs, words = response.get_ints(4)
789
- result['time'] = '%.3f' % (msecs / 1000.0)
790
-
791
- result['words'] = {}
792
- while words > 0
793
- words -= 1
794
- word = response.get_string
795
- docs, hits = response.get_ints(2)
796
- result['words'][word] = { 'docs' => docs, 'hits' => hits }
797
- end
798
-
799
- results << result
800
- end
801
- #rescue EOFError
802
- # @error = 'incomplete reply'
803
- # raise SphinxResponseError, @error
804
- end
805
-
806
- return results
807
- end
808
-
809
- # Connect to searchd server and generate exceprts from given documents.
810
- #
811
- # * <tt>docs</tt> -- an array of strings which represent the documents' contents
812
- # * <tt>index</tt> -- a string specifiying the index which settings will be used
813
- # for stemming, lexing and case folding
814
- # * <tt>words</tt> -- a string which contains the words to highlight
815
- # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
816
- #
817
- # You can use following parameters:
818
- # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
819
- # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
820
- # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
821
- # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
822
- # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
823
- # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
824
- # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
825
- # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
826
- # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
827
- #
828
- # Returns false on failure.
829
- # Returns an array of string excerpts on success.
830
- def BuildExcerpts(docs, index, words, opts = {})
831
- assert { docs.instance_of? Array }
832
- assert { index.instance_of? String }
833
- assert { words.instance_of? String }
834
- assert { opts.instance_of? Hash }
835
-
836
- # fixup options
837
- opts['before_match'] ||= '<b>';
838
- opts['after_match'] ||= '</b>';
839
- opts['chunk_separator'] ||= ' ... ';
840
- opts['html_strip_mode'] ||= 'index';
841
- opts['limit'] ||= 256;
842
- opts['limit_passages'] ||= 0;
843
- opts['limit_words'] ||= 0;
844
- opts['around'] ||= 5;
845
- opts['start_passage_id'] ||= 1;
846
- opts['exact_phrase'] ||= false
847
- opts['single_passage'] ||= false
848
- opts['use_boundaries'] ||= false
849
- opts['weight_order'] ||= false
850
- opts['load_files'] ||= false
851
- opts['allow_empty'] ||= false
852
-
853
- # build request
854
-
855
- # v.1.0 req
856
- flags = 1
857
- flags |= 2 if opts['exact_phrase']
858
- flags |= 4 if opts['single_passage']
859
- flags |= 8 if opts['use_boundaries']
860
- flags |= 16 if opts['weight_order']
861
- flags |= 32 if opts['query_mode']
862
- flags |= 64 if opts['force_all_words']
863
- flags |= 128 if opts['load_files']
864
- flags |= 256 if opts['allow_empty']
865
-
866
- request = Request.new
867
- request.put_int 0, flags # mode=0, flags=1 (remove spaces)
868
- # req index
869
- request.put_string index
870
- # req words
871
- request.put_string words
872
-
873
- # options
874
- request.put_string opts['before_match']
875
- request.put_string opts['after_match']
876
- request.put_string opts['chunk_separator']
877
- request.put_int opts['limit'].to_i, opts['around'].to_i
878
-
879
- # options v1.2
880
- request.put_int opts['limit_passages'].to_i
881
- request.put_int opts['limit_words'].to_i
882
- request.put_int opts['start_passage_id'].to_i
883
- request.put_string opts['html_strip_mode']
884
-
885
- # documents
886
- request.put_int docs.size
887
- docs.each do |doc|
888
- assert { doc.instance_of? String }
889
-
890
- request.put_string doc
891
- end
892
-
893
- response = PerformRequest(:excerpt, request)
894
-
895
- # parse response
896
- begin
897
- res = []
898
- docs.each do |doc|
899
- res << response.get_string
900
- end
901
- rescue EOFError
902
- @error = 'incomplete reply'
903
- raise SphinxResponseError, @error
904
- end
905
- return res
906
- end
907
-
908
- # Connect to searchd server, and generate keyword list for a given query.
909
- #
910
- # Returns an array of words on success.
911
- def BuildKeywords(query, index, hits)
912
- assert { query.instance_of? String }
913
- assert { index.instance_of? String }
914
- assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
915
-
916
- # build request
917
- request = Request.new
918
- # v.1.0 req
919
- request.put_string query # req query
920
- request.put_string index # req index
921
- request.put_int hits ? 1 : 0
922
-
923
- response = PerformRequest(:keywords, request)
924
-
925
- # parse response
926
- begin
927
- res = []
928
- nwords = response.get_int
929
- 0.upto(nwords - 1) do |i|
930
- tokenized = response.get_string
931
- normalized = response.get_string
932
-
933
- entry = { 'tokenized' => tokenized, 'normalized' => normalized }
934
- entry['docs'], entry['hits'] = response.get_ints(2) if hits
935
-
936
- res << entry
937
- end
938
- rescue EOFError
939
- @error = 'incomplete reply'
940
- raise SphinxResponseError, @error
941
- end
942
-
943
- return res
944
- end
945
-
946
- # Batch update given attributes in given rows in given indexes.
947
- #
948
- # * +index+ is a name of the index to be updated
949
- # * +attrs+ is an array of attribute name strings.
950
- # * +values+ is a hash where key is document id, and value is an array of
951
- # * +mva+ identifies whether update MVA
952
- # new attribute values
953
- #
954
- # Returns number of actually updated documents (0 or more) on success.
955
- # Returns -1 on failure.
956
- #
957
- # Usage example:
958
- # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
959
- def UpdateAttributes(index, attrs, values, mva = false)
960
- # verify everything
961
- assert { index.instance_of? String }
962
- assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
963
-
964
- assert { attrs.instance_of? Array }
965
- attrs.each do |attr|
966
- assert { attr.instance_of? String }
967
- end
968
-
969
- assert { values.instance_of? Hash }
970
- values.each do |id, entry|
971
- assert { id.instance_of? Fixnum }
972
- assert { entry.instance_of? Array }
973
- assert { entry.length == attrs.length }
974
- entry.each do |v|
975
- if mva
976
- assert { v.instance_of? Array }
977
- v.each { |vv| assert { vv.instance_of? Fixnum } }
978
- else
979
- assert { v.instance_of? Fixnum }
980
- end
981
- end
982
- end
983
-
984
- # build request
985
- request = Request.new
986
- request.put_string index
987
-
988
- request.put_int attrs.length
989
- for attr in attrs
990
- request.put_string attr
991
- request.put_int mva ? 1 : 0
992
- end
993
-
994
- request.put_int values.length
995
- values.each do |id, entry|
996
- request.put_int64 id
997
- if mva
998
- entry.each { |v| request.put_int_array v }
999
- else
1000
- request.put_int(*entry)
1001
- end
1002
- end
1003
-
1004
- response = PerformRequest(:update, request)
1005
-
1006
- # parse response
1007
- begin
1008
- return response.get_int
1009
- rescue EOFError
1010
- @error = 'incomplete reply'
1011
- raise SphinxResponseError, @error
1012
- end
1013
- end
1014
-
1015
- protected
1016
-
1017
- # Connect to searchd server.
1018
- def Connect
1019
- begin
1020
- if @host[0,1]=='/'
1021
- sock = UNIXSocket.new(@host)
1022
- else
1023
- sock = TCPSocket.new(@host, @port)
1024
- end
1025
- rescue => err
1026
- @error = "connection to #{@host}:#{@port} failed (error=#{err})"
1027
- raise SphinxConnectError, @error
1028
- end
1029
-
1030
- v = sock.recv(4).unpack('N*').first
1031
- if v < 1
1032
- sock.close
1033
- @error = "expected searchd protocol version 1+, got version '#{v}'"
1034
- raise SphinxConnectError, @error
1035
- end
1036
-
1037
- sock.send([1].pack('N'), 0)
1038
- sock
1039
- end
1040
-
1041
- # Get and check response packet from searchd server.
1042
- def GetResponse(sock, client_version)
1043
- response = ''
1044
- len = 0
1045
-
1046
- header = sock.recv(8)
1047
- if header.length == 8
1048
- status, ver, len = header.unpack('n2N')
1049
- left = len.to_i
1050
- while left > 0 do
1051
- begin
1052
- chunk = sock.recv(left)
1053
- if chunk
1054
- response << chunk
1055
- left -= chunk.length
1056
- end
1057
- rescue EOFError
1058
- break
1059
- end
1060
- end
1061
- end
1062
- sock.close
1063
-
1064
- # check response
1065
- read = response.length
1066
- if response.empty? or read != len.to_i
1067
- @error = response.empty? \
1068
- ? 'received zero-sized searchd response' \
1069
- : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
1070
- raise SphinxResponseError, @error
1071
- end
1072
-
1073
- # check status
1074
- if (status == SEARCHD_WARNING)
1075
- wlen = response[0, 4].unpack('N*').first
1076
- @warning = response[4, wlen]
1077
- return response[4 + wlen, response.length - 4 - wlen]
1078
- end
1079
-
1080
- if status == SEARCHD_ERROR
1081
- @error = 'searchd error: ' + response[4, response.length - 4]
1082
- raise SphinxInternalError, @error
1083
- end
1084
-
1085
- if status == SEARCHD_RETRY
1086
- @error = 'temporary searchd error: ' + response[4, response.length - 4]
1087
- raise SphinxTemporaryError, @error
1088
- end
1089
-
1090
- unless status == SEARCHD_OK
1091
- @error = "unknown status code: '#{status}'"
1092
- raise SphinxUnknownError, @error
1093
- end
1094
-
1095
- # check version
1096
- if ver < client_version
1097
- @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1098
- "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1099
- end
1100
-
1101
- return response
1102
- end
1103
-
1104
- # Connect, send query, get response.
1105
- def PerformRequest(command, request, additional = nil)
1106
- cmd = command.to_s.upcase
1107
- command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1108
- command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1109
-
1110
- sock = self.Connect
1111
- len = request.to_s.length + (additional != nil ? 8 : 0)
1112
- header = [command_id, command_ver, len].pack('nnN')
1113
- header << [0, additional].pack('NN') if additional != nil
1114
- sock.send(header + request.to_s, 0)
1115
- response = self.GetResponse(sock, command_ver)
1116
- return Response.new(response)
1117
- end
1118
-
1119
- # :stopdoc:
1120
- def assert
1121
- raise 'Assertion failed!' unless yield if $DEBUG
1122
- end
1123
- # :startdoc:
1124
- end
1125
- end
1
+ # = client.rb - Sphinx Client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5
+ # License:: Distributes under the same terms as Ruby
6
+ # Version:: 0.9.9-r1299
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the Ruby license.
10
+ # You can freely distribute/modify this library.
11
+
12
+ # ==Sphinx Client API
13
+ #
14
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
+ # daemon and get search results from Sphinx.
16
+ #
17
+ # ===Usage
18
+ #
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+
27
+ require 'socket'
28
+
29
+ module Sphinx
30
+ # :stopdoc:
31
+
32
+ class SphinxError < StandardError; end
33
+ class SphinxArgumentError < SphinxError; end
34
+ class SphinxConnectError < SphinxError; end
35
+ class SphinxResponseError < SphinxError; end
36
+ class SphinxInternalError < SphinxError; end
37
+ class SphinxTemporaryError < SphinxError; end
38
+ class SphinxUnknownError < SphinxError; end
39
+
40
+ # :startdoc:
41
+
42
+ class Client
43
+
44
+ # :stopdoc:
45
+
46
+ # Known searchd commands
47
+
48
+ # search command
49
+ SEARCHD_COMMAND_SEARCH = 0
50
+ # excerpt command
51
+ SEARCHD_COMMAND_EXCERPT = 1
52
+ # update command
53
+ SEARCHD_COMMAND_UPDATE = 2
54
+ # keywords command
55
+ SEARCHD_COMMAND_KEYWORDS = 3
56
+
57
+ # Current client-side command implementation versions
58
+
59
+ # search command version
60
+ VER_COMMAND_SEARCH = 0x119
61
+ # excerpt command version
62
+ VER_COMMAND_EXCERPT = 0x102
63
+ # update command version
64
+ VER_COMMAND_UPDATE = 0x102
65
+ # keywords command version
66
+ VER_COMMAND_KEYWORDS = 0x100
67
+
68
+ # Known searchd status codes
69
+
70
+ # general success, command-specific reply follows
71
+ SEARCHD_OK = 0
72
+ # general failure, command-specific reply may follow
73
+ SEARCHD_ERROR = 1
74
+ # temporaty failure, client should retry later
75
+ SEARCHD_RETRY = 2
76
+ # general success, warning message and command-specific reply follow
77
+ SEARCHD_WARNING = 3
78
+
79
+ # :startdoc:
80
+
81
+ # Known match modes
82
+
83
+ # match all query words
84
+ SPH_MATCH_ALL = 0
85
+ # match any query word
86
+ SPH_MATCH_ANY = 1
87
+ # match this exact phrase
88
+ SPH_MATCH_PHRASE = 2
89
+ # match this boolean query
90
+ SPH_MATCH_BOOLEAN = 3
91
+ # match this extended query
92
+ SPH_MATCH_EXTENDED = 4
93
+ # match all document IDs w/o fulltext query, apply filters
94
+ SPH_MATCH_FULLSCAN = 5
95
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
96
+ SPH_MATCH_EXTENDED2 = 6
97
+
98
+ # Known ranking modes (ext2 only)
99
+
100
+ # default mode, phrase proximity major factor and BM25 minor one
101
+ SPH_RANK_PROXIMITY_BM25 = 0
102
+ # statistical mode, BM25 ranking only (faster but worse quality)
103
+ SPH_RANK_BM25 = 1
104
+ # no ranking, all matches get a weight of 1
105
+ SPH_RANK_NONE = 2
106
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
107
+ SPH_RANK_WORDCOUNT = 3
108
+ # phrase proximity
109
+ SPH_RANK_PROXIMITY = 4
110
+
111
+ # Known sort modes
112
+
113
+ # sort by document relevance desc, then by date
114
+ SPH_SORT_RELEVANCE = 0
115
+ # sort by document date desc, then by relevance desc
116
+ SPH_SORT_ATTR_DESC = 1
117
+ # sort by document date asc, then by relevance desc
118
+ SPH_SORT_ATTR_ASC = 2
119
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
120
+ SPH_SORT_TIME_SEGMENTS = 3
121
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
122
+ SPH_SORT_EXTENDED = 4
123
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
124
+ SPH_SORT_EXPR = 5
125
+
126
+ # Known filter types
127
+
128
+ # filter by integer values set
129
+ SPH_FILTER_VALUES = 0
130
+ # filter by integer range
131
+ SPH_FILTER_RANGE = 1
132
+ # filter by float range
133
+ SPH_FILTER_FLOATRANGE = 2
134
+
135
+ # Known attribute types
136
+
137
+ # this attr is just an integer
138
+ SPH_ATTR_INTEGER = 1
139
+ # this attr is a timestamp
140
+ SPH_ATTR_TIMESTAMP = 2
141
+ # this attr is an ordinal string number (integer at search time,
142
+ # specially handled at indexing time)
143
+ SPH_ATTR_ORDINAL = 3
144
+ # this attr is a boolean bit field
145
+ SPH_ATTR_BOOL = 4
146
+ # this attr is a float
147
+ SPH_ATTR_FLOAT = 5
148
+ # signed 64-bit integer
149
+ SPH_ATTR_BIGINT = 6
150
+ # string
151
+ SPH_ATTR_STRING = 7
152
+ # this attr has multiple values (0 or more)
153
+ SPH_ATTR_MULTI = 0x40000001
154
+ SPH_ATTR_MULTI64 = 0x40000002
155
+
156
+ # Known grouping functions
157
+
158
+ # group by day
159
+ SPH_GROUPBY_DAY = 0
160
+ # group by week
161
+ SPH_GROUPBY_WEEK = 1
162
+ # group by month
163
+ SPH_GROUPBY_MONTH = 2
164
+ # group by year
165
+ SPH_GROUPBY_YEAR = 3
166
+ # group by attribute value
167
+ SPH_GROUPBY_ATTR = 4
168
+ # group by sequential attrs pair
169
+ SPH_GROUPBY_ATTRPAIR = 5
170
+
171
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
172
+ def initialize
173
+ # per-client-object settings
174
+ @host = 'localhost' # searchd host (default is "localhost")
175
+ @port = 9312 # searchd port (default is 9312)
176
+
177
+ # per-query settings
178
+ @offset = 0 # how many records to seek from result-set start (default is 0)
179
+ @limit = 20 # how many records to return from result-set starting at offset (default is 20)
180
+ @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
181
+ @weights = [] # per-field weights (default is 1 for all fields)
182
+ @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
183
+ @sortby = '' # attribute to sort by (defualt is "")
184
+ @min_id = 0 # min ID to match (default is 0, which means no limit)
185
+ @max_id = 0 # max ID to match (default is 0, which means no limit)
186
+ @filters = [] # search filters
187
+ @groupby = '' # group-by attribute name
188
+ @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
189
+ @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
190
+ @groupdistinct = '' # group-by count-distinct attribute
191
+ @maxmatches = 1000 # max matches to retrieve
192
+ @cutoff = 0 # cutoff to stop searching at (default is 0)
193
+ @retrycount = 0 # distributed retries count
194
+ @retrydelay = 0 # distributed retries delay
195
+ @anchor = [] # geographical anchor point
196
+ @indexweights = [] # per-index weights
197
+ @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
198
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
199
+ @fieldweights = {} # per-field-name weights
200
+ @overrides = [] # per-query attribute values overrides
201
+ @select = '*' # select-list (attributes or expressions, with optional aliases)
202
+
203
+ # per-reply fields (for single-query case)
204
+ @error = '' # last error message
205
+ @warning = '' # last warning message
206
+
207
+ @reqs = [] # requests storage (for multi-query case)
208
+ @mbenc = '' # stored mbstring encoding
209
+ end
210
+
211
+ # Get last error message.
212
+ def GetLastError
213
+ @error
214
+ end
215
+
216
+ # Get last warning message.
217
+ def GetLastWarning
218
+ @warning
219
+ end
220
+
221
+ # Set searchd host name (string) and port (integer).
222
+ def SetServer(host, port)
223
+ assert { host.instance_of? String }
224
+ assert { port.instance_of? Fixnum }
225
+
226
+ @host = host
227
+ @port = port
228
+ end
229
+
230
+ # Set offset and count into result set,
231
+ # and optionally set max-matches and cutoff limits.
232
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
233
+ assert { offset.instance_of? Fixnum }
234
+ assert { limit.instance_of? Fixnum }
235
+ assert { max.instance_of? Fixnum }
236
+ assert { offset >= 0 }
237
+ assert { limit > 0 }
238
+ assert { max >= 0 }
239
+
240
+ @offset = offset
241
+ @limit = limit
242
+ @maxmatches = max if max > 0
243
+ @cutoff = cutoff if cutoff > 0
244
+ end
245
+
246
+ # Set maximum query time, in milliseconds, per-index,
247
+ # integer, 0 means "do not limit"
248
+ def SetMaxQueryTime(max)
249
+ assert { max.instance_of? Fixnum }
250
+ assert { max >= 0 }
251
+ @maxquerytime = max
252
+ end
253
+
254
+ # Set matching mode.
255
+ def SetMatchMode(mode)
256
+ assert { mode == SPH_MATCH_ALL \
257
+ || mode == SPH_MATCH_ANY \
258
+ || mode == SPH_MATCH_PHRASE \
259
+ || mode == SPH_MATCH_BOOLEAN \
260
+ || mode == SPH_MATCH_EXTENDED \
261
+ || mode == SPH_MATCH_FULLSCAN \
262
+ || mode == SPH_MATCH_EXTENDED2 }
263
+
264
+ @mode = mode
265
+ end
266
+
267
+ # Set ranking mode.
268
+ def SetRankingMode(ranker)
269
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
270
+ || ranker == SPH_RANK_BM25 \
271
+ || ranker == SPH_RANK_NONE \
272
+ || ranker == SPH_RANK_WORDCOUNT \
273
+ || ranker == SPH_RANK_PROXIMITY }
274
+
275
+ @ranker = ranker
276
+ end
277
+
278
+ # Set matches sorting mode.
279
+ def SetSortMode(mode, sortby = '')
280
+ assert { mode == SPH_SORT_RELEVANCE \
281
+ || mode == SPH_SORT_ATTR_DESC \
282
+ || mode == SPH_SORT_ATTR_ASC \
283
+ || mode == SPH_SORT_TIME_SEGMENTS \
284
+ || mode == SPH_SORT_EXTENDED \
285
+ || mode == SPH_SORT_EXPR }
286
+ assert { sortby.instance_of? String }
287
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
288
+
289
+ @sort = mode
290
+ @sortby = sortby
291
+ end
292
+
293
+ # Bind per-field weights by order.
294
+ #
295
+ # DEPRECATED; use SetFieldWeights() instead.
296
+ def SetWeights(weights)
297
+ assert { weights.instance_of? Array }
298
+ weights.each do |weight|
299
+ assert { weight.instance_of? Fixnum }
300
+ end
301
+
302
+ @weights = weights
303
+ end
304
+
305
+ # Bind per-field weights by name.
306
+ #
307
+ # Takes string (field name) to integer name (field weight) hash as an argument.
308
+ # * Takes precedence over SetWeights().
309
+ # * Unknown names will be silently ignored.
310
+ # * Unbound fields will be silently given a weight of 1.
311
+ def SetFieldWeights(weights)
312
+ assert { weights.instance_of? Hash }
313
+ weights.each do |name, weight|
314
+ assert { name.instance_of? String }
315
+ assert { weight.instance_of? Fixnum }
316
+ end
317
+
318
+ @fieldweights = weights
319
+ end
320
+
321
+ # Bind per-index weights by name.
322
+ def SetIndexWeights(weights)
323
+ assert { weights.instance_of? Hash }
324
+ weights.each do |index, weight|
325
+ assert { index.instance_of? String }
326
+ assert { weight.instance_of? Fixnum }
327
+ end
328
+
329
+ @indexweights = weights
330
+ end
331
+
332
+ # Set IDs range to match.
333
+ #
334
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
335
+ def SetIDRange(min, max)
336
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
337
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
338
+ assert { min <= max }
339
+
340
+ @min_id = min
341
+ @max_id = max
342
+ end
343
+
344
+ # Set values filter.
345
+ #
346
+ # Only match those records where <tt>attribute</tt> column values
347
+ # are in specified set.
348
+ def SetFilter(attribute, values, exclude = false)
349
+ assert { attribute.instance_of? String }
350
+ assert { values.instance_of? Array }
351
+ assert { !values.empty? }
352
+
353
+ if values.instance_of?(Array) && values.size > 0
354
+ values.each do |value|
355
+ assert { value.instance_of? Fixnum }
356
+ end
357
+
358
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
359
+ end
360
+ end
361
+
362
+ # Set range filter.
363
+ #
364
+ # Only match those records where <tt>attribute</tt> column value
365
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
366
+ def SetFilterRange(attribute, min, max, exclude = false)
367
+ assert { attribute.instance_of? String }
368
+ assert { min.instance_of? Fixnum or min.instance_of? Bignum }
369
+ assert { max.instance_of? Fixnum or max.instance_of? Bignum }
370
+ assert { min <= max }
371
+
372
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
373
+ end
374
+
375
+ # Set float range filter.
376
+ #
377
+ # Only match those records where <tt>attribute</tt> column value
378
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
379
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
380
+ assert { attribute.instance_of? String }
381
+ assert { min.instance_of? Float }
382
+ assert { max.instance_of? Float }
383
+ assert { min <= max }
384
+
385
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
386
+ end
387
+
388
+ # Setup anchor point for geosphere distance calculations.
389
+ #
390
+ # Required to use <tt>@geodist</tt> in filters and sorting
391
+ # distance will be computed to this point. Latitude and longitude
392
+ # must be in radians.
393
+ #
394
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
395
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
396
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
397
+ # * <tt>long</tt> -- is anchor point longitude, in radians
398
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
399
+ assert { attrlat.instance_of? String }
400
+ assert { attrlong.instance_of? String }
401
+ assert { lat.instance_of? Float }
402
+ assert { long.instance_of? Float }
403
+
404
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
405
+ end
406
+
407
+ # Set grouping attribute and function.
408
+ #
409
+ # In grouping mode, all matches are assigned to different groups
410
+ # based on grouping function value.
411
+ #
412
+ # Each group keeps track of the total match count, and the best match
413
+ # (in this group) according to current sorting function.
414
+ #
415
+ # The final result set contains one best match per group, with
416
+ # grouping function value and matches count attached.
417
+ #
418
+ # Groups in result set could be sorted by any sorting clause,
419
+ # including both document attributes and the following special
420
+ # internal Sphinx attributes:
421
+ #
422
+ # * @id - match document ID;
423
+ # * @weight, @rank, @relevance - match weight;
424
+ # * @group - groupby function value;
425
+ # * @count - amount of matches in group.
426
+ #
427
+ # the default mode is to sort by groupby value in descending order,
428
+ # ie. by '@group desc'.
429
+ #
430
+ # 'total_found' would contain total amount of matching groups over
431
+ # the whole index.
432
+ #
433
+ # WARNING: grouping is done in fixed memory and thus its results
434
+ # are only approximate; so there might be more groups reported
435
+ # in total_found than actually present. @count might also
436
+ # be underestimated.
437
+ #
438
+ # For example, if sorting by relevance and grouping by "published"
439
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
440
+ # contain one most relevant match per each day when there were any
441
+ # matches published, with day number and per-day match count attached,
442
+ # and sorted by day number in descending order (ie. recent days first).
443
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
444
+ assert { attribute.instance_of? String }
445
+ assert { groupsort.instance_of? String }
446
+ assert { func == SPH_GROUPBY_DAY \
447
+ || func == SPH_GROUPBY_WEEK \
448
+ || func == SPH_GROUPBY_MONTH \
449
+ || func == SPH_GROUPBY_YEAR \
450
+ || func == SPH_GROUPBY_ATTR \
451
+ || func == SPH_GROUPBY_ATTRPAIR }
452
+
453
+ @groupby = attribute
454
+ @groupfunc = func
455
+ @groupsort = groupsort
456
+ end
457
+
458
+ # Set count-distinct attribute for group-by queries.
459
+ def SetGroupDistinct(attribute)
460
+ assert { attribute.instance_of? String }
461
+ @groupdistinct = attribute
462
+ end
463
+
464
+ # Set distributed retries count and delay.
465
+ def SetRetries(count, delay = 0)
466
+ assert { count.instance_of? Fixnum }
467
+ assert { delay.instance_of? Fixnum }
468
+
469
+ @retrycount = count
470
+ @retrydelay = delay
471
+ end
472
+
473
+ # Set attribute values override
474
+ #
475
+ # There can be only one override per attribute.
476
+ # +values+ must be a hash that maps document IDs to attribute values.
477
+ def SetOverride(attrname, attrtype, values)
478
+ assert { attrname.instance_of? String }
479
+ assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
480
+ assert { values.instance_of? Hash }
481
+
482
+ @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
483
+ end
484
+
485
+ # Set select-list (attributes or expressions), SQL-like syntax.
486
+ def SetSelect(select)
487
+ assert { select.instance_of? String }
488
+ @select = select
489
+ end
490
+
491
+ # Clear all filters (for multi-queries).
492
+ def ResetFilters
493
+ @filters = []
494
+ @anchor = []
495
+ end
496
+
497
+ # Clear groupby settings (for multi-queries).
498
+ def ResetGroupBy
499
+ @groupby = ''
500
+ @groupfunc = SPH_GROUPBY_DAY
501
+ @groupsort = '@group desc'
502
+ @groupdistinct = ''
503
+ end
504
+
505
+ # Clear all attribute value overrides (for multi-queries).
506
+ def ResetOverrides
507
+ @overrides = []
508
+ end
509
+
510
+ # Connect to searchd server and run given search query.
511
+ #
512
+ # <tt>query</tt> is query string
513
+
514
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
515
+ # to query all indexes. Accepted characters for index names are letters, numbers,
516
+ # dash, and underscore; everything else is considered a separator. Therefore,
517
+ # all the following calls are valid and will search two indexes:
518
+ #
519
+ # sphinx.Query('test query', 'main delta')
520
+ # sphinx.Query('test query', 'main;delta')
521
+ # sphinx.Query('test query', 'main, delta')
522
+ #
523
+ # Index order matters. If identical IDs are found in two or more indexes,
524
+ # weight and attribute values from the very last matching index will be used
525
+ # for sorting and returning to client. Therefore, in the example above,
526
+ # matches from "delta" index will always "win" over matches from "main".
527
+ #
528
+ # Returns false on failure.
529
+ # Returns hash which has the following keys on success:
530
+ #
531
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
532
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
533
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
534
+ # * <tt>'time'</tt> -- search time
535
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
536
+ def Query(query, index = '*', comment = '')
537
+ assert { @reqs.empty? }
538
+ @reqs = []
539
+
540
+ self.AddQuery(query, index, comment)
541
+ results = self.RunQueries
542
+
543
+ # probably network error; error message should be already filled
544
+ return false unless results.instance_of?(Array)
545
+
546
+ @error = results[0]['error']
547
+ @warning = results[0]['warning']
548
+
549
+ return false if results[0]['status'] == SEARCHD_ERROR
550
+ return results[0]
551
+ end
552
+
553
+ # Add query to batch.
554
+ #
555
+ # Batch queries enable searchd to perform internal optimizations,
556
+ # if possible; and reduce network connection overheads in all cases.
557
+ #
558
+ # For instance, running exactly the same query with different
559
+ # groupby settings will enable searched to perform expensive
560
+ # full-text search and ranking operation only once, but compute
561
+ # multiple groupby results from its output.
562
+ #
563
+ # Parameters are exactly the same as in <tt>Query</tt> call.
564
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
565
+ def AddQuery(query, index = '*', comment = '')
566
+ # build request
567
+
568
+ # mode and limits
569
+ request = Request.new
570
+ request.put_int @offset, @limit, @mode, @ranker, @sort
571
+ request.put_string @sortby
572
+ # query itself
573
+ request.put_string query
574
+ # weights
575
+ request.put_int_array @weights
576
+ # indexes
577
+ request.put_string index
578
+ # id64 range marker
579
+ request.put_int 1
580
+ # id64 range
581
+ request.put_int64 @min_id.to_i, @max_id.to_i
582
+
583
+ # filters
584
+ request.put_int @filters.length
585
+ @filters.each do |filter|
586
+ request.put_string filter['attr']
587
+ request.put_int filter['type']
588
+
589
+ case filter['type']
590
+ when SPH_FILTER_VALUES
591
+ request.put_int64_array filter['values']
592
+ when SPH_FILTER_RANGE
593
+ request.put_int64 filter['min'], filter['max']
594
+ when SPH_FILTER_FLOATRANGE
595
+ request.put_float filter['min'], filter['max']
596
+ else
597
+ raise SphinxInternalError, 'Internal error: unhandled filter type'
598
+ end
599
+ request.put_int filter['exclude'] ? 1 : 0
600
+ end
601
+
602
+ # group-by clause, max-matches count, group-sort clause, cutoff count
603
+ request.put_int @groupfunc
604
+ request.put_string @groupby
605
+ request.put_int @maxmatches
606
+ request.put_string @groupsort
607
+ request.put_int @cutoff, @retrycount, @retrydelay
608
+ request.put_string @groupdistinct
609
+
610
+ # anchor point
611
+ if @anchor.empty?
612
+ request.put_int 0
613
+ else
614
+ request.put_int 1
615
+ request.put_string @anchor['attrlat'], @anchor['attrlong']
616
+ request.put_float @anchor['lat'], @anchor['long']
617
+ end
618
+
619
+ # per-index weights
620
+ request.put_int @indexweights.length
621
+ @indexweights.each do |idx, weight|
622
+ request.put_string idx
623
+ request.put_int weight
624
+ end
625
+
626
+ # max query time
627
+ request.put_int @maxquerytime
628
+
629
+ # per-field weights
630
+ request.put_int @fieldweights.length
631
+ @fieldweights.each do |field, weight|
632
+ request.put_string field
633
+ request.put_int weight
634
+ end
635
+
636
+ # comment
637
+ request.put_string comment
638
+
639
+ # attribute overrides
640
+ request.put_int @overrides.length
641
+ for entry in @overrides do
642
+ request.put_string entry['attr']
643
+ request.put_int entry['type'], entry['values'].size
644
+ entry['values'].each do |id, val|
645
+ assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
646
+ assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
647
+
648
+ request.put_int64 id
649
+ case entry['type']
650
+ when SPH_ATTR_FLOAT
651
+ request.put_float val
652
+ when SPH_ATTR_BIGINT
653
+ request.put_int64 val
654
+ else
655
+ request.put_int val
656
+ end
657
+ end
658
+ end
659
+
660
+ # select-list
661
+ request.put_string @select
662
+
663
+ # store request to requests array
664
+ @reqs << request.to_s;
665
+ return @reqs.length - 1
666
+ end
667
+
668
+ # Run queries batch.
669
+ #
670
+ # Returns an array of result sets on success.
671
+ # Returns false on network IO failure.
672
+ #
673
+ # Each result set in returned array is a hash which containts
674
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
675
+ #
676
+ # * <tt>'error'</tt> -- search error for this query
677
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
678
+ def RunQueries
679
+ if @reqs.empty?
680
+ @error = 'No queries defined, issue AddQuery() first'
681
+ return false
682
+ end
683
+
684
+ req = @reqs.join('')
685
+ nreqs = @reqs.length
686
+ @reqs = []
687
+ response = PerformRequest(:search, req, nreqs)
688
+
689
+ # parse response
690
+ begin
691
+ results = []
692
+ ires = 0
693
+ while ires < nreqs
694
+ ires += 1
695
+ result = {}
696
+
697
+ result['error'] = ''
698
+ result['warning'] = ''
699
+
700
+ # extract status
701
+ status = result['status'] = response.get_int
702
+ if status != SEARCHD_OK
703
+ message = response.get_string
704
+ if status == SEARCHD_WARNING
705
+ result['warning'] = message
706
+ else
707
+ result['error'] = message
708
+ results << result
709
+ next
710
+ end
711
+ end
712
+
713
+ # read schema
714
+ fields = []
715
+ attrs = {}
716
+ attrs_names_in_order = []
717
+
718
+ nfields = response.get_int
719
+ while nfields > 0
720
+ nfields -= 1
721
+ fields << response.get_string
722
+ end
723
+ result['fields'] = fields
724
+
725
+ nattrs = response.get_int
726
+ while nattrs > 0
727
+ nattrs -= 1
728
+ attr = response.get_string
729
+ type = response.get_int
730
+ attrs[attr] = type
731
+ attrs_names_in_order << attr
732
+ end
733
+ result['attrs'] = attrs
734
+
735
+ # read match count
736
+ count = response.get_int
737
+ id64 = response.get_int
738
+
739
+ # read matches
740
+ result['matches'] = []
741
+ while count > 0
742
+ count -= 1
743
+
744
+ if id64 != 0
745
+ doc = response.get_int64
746
+ weight = response.get_int
747
+ else
748
+ doc, weight = response.get_ints(2)
749
+ end
750
+
751
+ r = {} # This is a single result put in the result['matches'] array
752
+ r['id'] = doc
753
+ r['weight'] = weight
754
+ attrs_names_in_order.each do |a|
755
+ r['attrs'] ||= {}
756
+
757
+ case attrs[a]
758
+ when SPH_ATTR_BIGINT
759
+ # handle 64-bit ints
760
+ r['attrs'][a] = response.get_int64
761
+ when SPH_ATTR_FLOAT
762
+ # handle floats
763
+ r['attrs'][a] = response.get_float
764
+ when SPH_ATTR_STRING
765
+ # handle string
766
+ r['attrs'][a] = response.get_string
767
+ else
768
+ # handle everything else as unsigned ints
769
+ val = response.get_int
770
+ if attrs[a]==SPH_ATTR_MULTI
771
+ r['attrs'][a] = []
772
+ 1.upto(val) do
773
+ r['attrs'][a] << response.get_int
774
+ end
775
+ elsif attrs[a]==SPH_ATTR_MULTI64
776
+ r['attrs'][a] = []
777
+ val = val/2
778
+ 1.upto(val) do
779
+ r['attrs'][a] << response.get_int64
780
+ end
781
+ else
782
+ r['attrs'][a] = val
783
+ end
784
+ end
785
+ end
786
+ result['matches'] << r
787
+ end
788
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
789
+ result['time'] = '%.3f' % (msecs / 1000.0)
790
+
791
+ result['words'] = {}
792
+ while words > 0
793
+ words -= 1
794
+ word = response.get_string
795
+ docs, hits = response.get_ints(2)
796
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
797
+ end
798
+
799
+ results << result
800
+ end
801
+ #rescue EOFError
802
+ # @error = 'incomplete reply'
803
+ # raise SphinxResponseError, @error
804
+ end
805
+
806
+ return results
807
+ end
808
+
809
+ # Connect to searchd server and generate exceprts from given documents.
810
+ #
811
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
812
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
813
+ # for stemming, lexing and case folding
814
+ # * <tt>words</tt> -- a string which contains the words to highlight
815
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
816
+ #
817
+ # You can use following parameters:
818
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
819
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
820
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
821
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
822
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
823
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
824
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
825
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
826
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
827
+ #
828
+ # Returns false on failure.
829
+ # Returns an array of string excerpts on success.
830
+ def BuildExcerpts(docs, index, words, opts = {})
831
+ assert { docs.instance_of? Array }
832
+ assert { index.instance_of? String }
833
+ assert { words.instance_of? String }
834
+ assert { opts.instance_of? Hash }
835
+
836
+ # fixup options
837
+ opts['before_match'] ||= '<b>';
838
+ opts['after_match'] ||= '</b>';
839
+ opts['chunk_separator'] ||= ' ... ';
840
+ opts['html_strip_mode'] ||= 'index';
841
+ opts['limit'] ||= 256;
842
+ opts['limit_passages'] ||= 0;
843
+ opts['limit_words'] ||= 0;
844
+ opts['around'] ||= 5;
845
+ opts['start_passage_id'] ||= 1;
846
+ opts['exact_phrase'] ||= false
847
+ opts['single_passage'] ||= false
848
+ opts['use_boundaries'] ||= false
849
+ opts['weight_order'] ||= false
850
+ opts['load_files'] ||= false
851
+ opts['allow_empty'] ||= false
852
+
853
+ # build request
854
+
855
+ # v.1.0 req
856
+ flags = 1
857
+ flags |= 2 if opts['exact_phrase']
858
+ flags |= 4 if opts['single_passage']
859
+ flags |= 8 if opts['use_boundaries']
860
+ flags |= 16 if opts['weight_order']
861
+ flags |= 32 if opts['query_mode']
862
+ flags |= 64 if opts['force_all_words']
863
+ flags |= 128 if opts['load_files']
864
+ flags |= 256 if opts['allow_empty']
865
+
866
+ request = Request.new
867
+ request.put_int 0, flags # mode=0, flags=1 (remove spaces)
868
+ # req index
869
+ request.put_string index
870
+ # req words
871
+ request.put_string words
872
+
873
+ # options
874
+ request.put_string opts['before_match']
875
+ request.put_string opts['after_match']
876
+ request.put_string opts['chunk_separator']
877
+ request.put_int opts['limit'].to_i, opts['around'].to_i
878
+
879
+ # options v1.2
880
+ request.put_int opts['limit_passages'].to_i
881
+ request.put_int opts['limit_words'].to_i
882
+ request.put_int opts['start_passage_id'].to_i
883
+ request.put_string opts['html_strip_mode']
884
+
885
+ # documents
886
+ request.put_int docs.size
887
+ docs.each do |doc|
888
+ assert { doc.instance_of? String }
889
+
890
+ request.put_string doc
891
+ end
892
+
893
+ response = PerformRequest(:excerpt, request)
894
+
895
+ # parse response
896
+ begin
897
+ res = []
898
+ docs.each do |doc|
899
+ res << response.get_string
900
+ end
901
+ rescue EOFError
902
+ @error = 'incomplete reply'
903
+ raise SphinxResponseError, @error
904
+ end
905
+ return res
906
+ end
907
+
908
+ # Connect to searchd server, and generate keyword list for a given query.
909
+ #
910
+ # Returns an array of words on success.
911
+ def BuildKeywords(query, index, hits)
912
+ assert { query.instance_of? String }
913
+ assert { index.instance_of? String }
914
+ assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
915
+
916
+ # build request
917
+ request = Request.new
918
+ # v.1.0 req
919
+ request.put_string query # req query
920
+ request.put_string index # req index
921
+ request.put_int hits ? 1 : 0
922
+
923
+ response = PerformRequest(:keywords, request)
924
+
925
+ # parse response
926
+ begin
927
+ res = []
928
+ nwords = response.get_int
929
+ 0.upto(nwords - 1) do |i|
930
+ tokenized = response.get_string
931
+ normalized = response.get_string
932
+
933
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
934
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
935
+
936
+ res << entry
937
+ end
938
+ rescue EOFError
939
+ @error = 'incomplete reply'
940
+ raise SphinxResponseError, @error
941
+ end
942
+
943
+ return res
944
+ end
945
+
946
+ # Batch update given attributes in given rows in given indexes.
947
+ #
948
+ # * +index+ is a name of the index to be updated
949
+ # * +attrs+ is an array of attribute name strings.
950
+ # * +values+ is a hash where key is document id, and value is an array of
951
+ # * +mva+ identifies whether update MVA
952
+ # new attribute values
953
+ #
954
+ # Returns number of actually updated documents (0 or more) on success.
955
+ # Returns -1 on failure.
956
+ #
957
+ # Usage example:
958
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
959
+ def UpdateAttributes(index, attrs, values, mva = false)
960
+ # verify everything
961
+ assert { index.instance_of? String }
962
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
963
+
964
+ assert { attrs.instance_of? Array }
965
+ attrs.each do |attr|
966
+ assert { attr.instance_of? String }
967
+ end
968
+
969
+ assert { values.instance_of? Hash }
970
+ values.each do |id, entry|
971
+ assert { id.instance_of? Fixnum }
972
+ assert { entry.instance_of? Array }
973
+ assert { entry.length == attrs.length }
974
+ entry.each do |v|
975
+ if mva
976
+ assert { v.instance_of? Array }
977
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
978
+ else
979
+ assert { v.instance_of? Fixnum }
980
+ end
981
+ end
982
+ end
983
+
984
+ # build request
985
+ request = Request.new
986
+ request.put_string index
987
+
988
+ request.put_int attrs.length
989
+ for attr in attrs
990
+ request.put_string attr
991
+ request.put_int mva ? 1 : 0
992
+ end
993
+
994
+ request.put_int values.length
995
+ values.each do |id, entry|
996
+ request.put_int64 id
997
+ if mva
998
+ entry.each { |v| request.put_int_array v }
999
+ else
1000
+ request.put_int(*entry)
1001
+ end
1002
+ end
1003
+
1004
+ response = PerformRequest(:update, request)
1005
+
1006
+ # parse response
1007
+ begin
1008
+ return response.get_int
1009
+ rescue EOFError
1010
+ @error = 'incomplete reply'
1011
+ raise SphinxResponseError, @error
1012
+ end
1013
+ end
1014
+
1015
+ protected
1016
+
1017
+ # Connect to searchd server.
1018
+ def Connect
1019
+ begin
1020
+ if @host[0,1]=='/'
1021
+ sock = UNIXSocket.new(@host)
1022
+ else
1023
+ sock = TCPSocket.new(@host, @port)
1024
+ end
1025
+ rescue => err
1026
+ @error = "connection to #{@host}:#{@port} failed (error=#{err})"
1027
+ raise SphinxConnectError, @error
1028
+ end
1029
+
1030
+ v = sock.recv(4).unpack('N*').first
1031
+ if v < 1
1032
+ sock.close
1033
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
1034
+ raise SphinxConnectError, @error
1035
+ end
1036
+
1037
+ sock.send([1].pack('N'), 0)
1038
+ sock
1039
+ end
1040
+
1041
+ # Get and check response packet from searchd server.
1042
+ def GetResponse(sock, client_version)
1043
+ response = ''
1044
+ len = 0
1045
+
1046
+ header = sock.recv(8)
1047
+ if header.length == 8
1048
+ status, ver, len = header.unpack('n2N')
1049
+ left = len.to_i
1050
+ while left > 0 do
1051
+ begin
1052
+ chunk = sock.recv(left)
1053
+ if chunk
1054
+ response << chunk
1055
+ left -= chunk.length
1056
+ end
1057
+ rescue EOFError
1058
+ break
1059
+ end
1060
+ end
1061
+ end
1062
+ sock.close
1063
+
1064
+ # check response
1065
+ read = response.length
1066
+ if response.empty? or read != len.to_i
1067
+ @error = response.empty? \
1068
+ ? 'received zero-sized searchd response' \
1069
+ : "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})"
1070
+ raise SphinxResponseError, @error
1071
+ end
1072
+
1073
+ # check status
1074
+ if (status == SEARCHD_WARNING)
1075
+ wlen = response[0, 4].unpack('N*').first
1076
+ @warning = response[4, wlen]
1077
+ return response[4 + wlen, response.length - 4 - wlen]
1078
+ end
1079
+
1080
+ if status == SEARCHD_ERROR
1081
+ @error = 'searchd error: ' + response[4, response.length - 4]
1082
+ raise SphinxInternalError, @error
1083
+ end
1084
+
1085
+ if status == SEARCHD_RETRY
1086
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1087
+ raise SphinxTemporaryError, @error
1088
+ end
1089
+
1090
+ unless status == SEARCHD_OK
1091
+ @error = "unknown status code: '#{status}'"
1092
+ raise SphinxUnknownError, @error
1093
+ end
1094
+
1095
+ # check version
1096
+ if ver < client_version
1097
+ @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1098
+ "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1099
+ end
1100
+
1101
+ return response
1102
+ end
1103
+
1104
+ # Connect, send query, get response.
1105
+ def PerformRequest(command, request, additional = nil)
1106
+ cmd = command.to_s.upcase
1107
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1108
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1109
+
1110
+ sock = self.Connect
1111
+ len = request.to_s.length + (additional != nil ? 8 : 0)
1112
+ header = [command_id, command_ver, len].pack('nnN')
1113
+ header << [0, additional].pack('NN') if additional != nil
1114
+ sock.send(header + request.to_s, 0)
1115
+ response = self.GetResponse(sock, command_ver)
1116
+ return Response.new(response)
1117
+ end
1118
+
1119
+ # :stopdoc:
1120
+ def assert
1121
+ raise 'Assertion failed!' unless yield if $DEBUG
1122
+ end
1123
+ # :startdoc:
1124
+ end
1125
+ end