sander6-enygma 0.0.7 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1093 @@
1
+ # = client.rb - Sphinx Client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
5
+ # License:: Distributes under the same terms as Ruby
6
+ # Version:: 0.9.9-r1299
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the Ruby license.
10
+ # You can freely distribute/modify this library.
11
+
12
+ # ==Sphinx Client API
13
+ #
14
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
15
+ # daemon and get search results from Sphinx.
16
+ #
17
+ # ===Usage
18
+ #
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+
27
+ require 'socket'
28
+
29
+ module Sphinx
30
+ # :stopdoc:
31
+
32
+ class SphinxError < StandardError; end
33
+ class SphinxArgumentError < SphinxError; end
34
+ class SphinxConnectError < SphinxError; end
35
+ class SphinxResponseError < SphinxError; end
36
+ class SphinxInternalError < SphinxError; end
37
+ class SphinxTemporaryError < SphinxError; end
38
+ class SphinxUnknownError < SphinxError; end
39
+
40
+ # :startdoc:
41
+
42
+ class Client
43
+
44
+ # :stopdoc:
45
+
46
+ # Known searchd commands
47
+
48
+ # search command
49
+ SEARCHD_COMMAND_SEARCH = 0
50
+ # excerpt command
51
+ SEARCHD_COMMAND_EXCERPT = 1
52
+ # update command
53
+ SEARCHD_COMMAND_UPDATE = 2
54
+ # keywords command
55
+ SEARCHD_COMMAND_KEYWORDS = 3
56
+
57
+ # Current client-side command implementation versions
58
+
59
+ # search command version
60
+ VER_COMMAND_SEARCH = 0x116
61
+ # excerpt command version
62
+ VER_COMMAND_EXCERPT = 0x100
63
+ # update command version
64
+ VER_COMMAND_UPDATE = 0x102
65
+ # keywords command version
66
+ VER_COMMAND_KEYWORDS = 0x100
67
+
68
+ # Known searchd status codes
69
+
70
+ # general success, command-specific reply follows
71
+ SEARCHD_OK = 0
72
+ # general failure, command-specific reply may follow
73
+ SEARCHD_ERROR = 1
74
+ # temporaty failure, client should retry later
75
+ SEARCHD_RETRY = 2
76
+ # general success, warning message and command-specific reply follow
77
+ SEARCHD_WARNING = 3
78
+
79
+ # :startdoc:
80
+
81
+ # Known match modes
82
+
83
+ # match all query words
84
+ SPH_MATCH_ALL = 0
85
+ # match any query word
86
+ SPH_MATCH_ANY = 1
87
+ # match this exact phrase
88
+ SPH_MATCH_PHRASE = 2
89
+ # match this boolean query
90
+ SPH_MATCH_BOOLEAN = 3
91
+ # match this extended query
92
+ SPH_MATCH_EXTENDED = 4
93
+ # match all document IDs w/o fulltext query, apply filters
94
+ SPH_MATCH_FULLSCAN = 5
95
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
96
+ SPH_MATCH_EXTENDED2 = 6
97
+
98
+ # Known ranking modes (ext2 only)
99
+
100
+ # default mode, phrase proximity major factor and BM25 minor one
101
+ SPH_RANK_PROXIMITY_BM25 = 0
102
+ # statistical mode, BM25 ranking only (faster but worse quality)
103
+ SPH_RANK_BM25 = 1
104
+ # no ranking, all matches get a weight of 1
105
+ SPH_RANK_NONE = 2
106
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
107
+ SPH_RANK_WORDCOUNT = 3
108
+ # phrase proximity
109
+ SPH_RANK_PROXIMITY = 4
110
+
111
+ # Known sort modes
112
+
113
+ # sort by document relevance desc, then by date
114
+ SPH_SORT_RELEVANCE = 0
115
+ # sort by document date desc, then by relevance desc
116
+ SPH_SORT_ATTR_DESC = 1
117
+ # sort by document date asc, then by relevance desc
118
+ SPH_SORT_ATTR_ASC = 2
119
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
120
+ SPH_SORT_TIME_SEGMENTS = 3
121
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
122
+ SPH_SORT_EXTENDED = 4
123
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
124
+ SPH_SORT_EXPR = 5
125
+
126
+ # Known filter types
127
+
128
+ # filter by integer values set
129
+ SPH_FILTER_VALUES = 0
130
+ # filter by integer range
131
+ SPH_FILTER_RANGE = 1
132
+ # filter by float range
133
+ SPH_FILTER_FLOATRANGE = 2
134
+
135
+ # Known attribute types
136
+
137
+ # this attr is just an integer
138
+ SPH_ATTR_INTEGER = 1
139
+ # this attr is a timestamp
140
+ SPH_ATTR_TIMESTAMP = 2
141
+ # this attr is an ordinal string number (integer at search time,
142
+ # specially handled at indexing time)
143
+ SPH_ATTR_ORDINAL = 3
144
+ # this attr is a boolean bit field
145
+ SPH_ATTR_BOOL = 4
146
+ # this attr is a float
147
+ SPH_ATTR_FLOAT = 5
148
+ # signed 64-bit integer
149
+ SPH_ATTR_BIGINT = 6
150
+ # this attr has multiple values (0 or more)
151
+ SPH_ATTR_MULTI = 0x40000000
152
+
153
+ # Known grouping functions
154
+
155
+ # group by day
156
+ SPH_GROUPBY_DAY = 0
157
+ # group by week
158
+ SPH_GROUPBY_WEEK = 1
159
+ # group by month
160
+ SPH_GROUPBY_MONTH = 2
161
+ # group by year
162
+ SPH_GROUPBY_YEAR = 3
163
+ # group by attribute value
164
+ SPH_GROUPBY_ATTR = 4
165
+ # group by sequential attrs pair
166
+ SPH_GROUPBY_ATTRPAIR = 5
167
+
168
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
169
+ def initialize
170
+ # per-client-object settings
171
+ @host = 'localhost' # searchd host (default is "localhost")
172
+ @port = 3312 # searchd port (default is 3312)
173
+
174
+ # per-query settings
175
+ @offset = 0 # how many records to seek from result-set start (default is 0)
176
+ @limit = 20 # how many records to return from result-set starting at offset (default is 20)
177
+ @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
178
+ @weights = [] # per-field weights (default is 1 for all fields)
179
+ @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
180
+ @sortby = '' # attribute to sort by (defualt is "")
181
+ @min_id = 0 # min ID to match (default is 0, which means no limit)
182
+ @max_id = 0 # max ID to match (default is 0, which means no limit)
183
+ @filters = [] # search filters
184
+ @groupby = '' # group-by attribute name
185
+ @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
186
+ @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
187
+ @groupdistinct = '' # group-by count-distinct attribute
188
+ @maxmatches = 1000 # max matches to retrieve
189
+ @cutoff = 0 # cutoff to stop searching at (default is 0)
190
+ @retrycount = 0 # distributed retries count
191
+ @retrydelay = 0 # distributed retries delay
192
+ @anchor = [] # geographical anchor point
193
+ @indexweights = [] # per-index weights
194
+ @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
195
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
196
+ @fieldweights = {} # per-field-name weights
197
+ @overrides = [] # per-query attribute values overrides
198
+ @select = '*' # select-list (attributes or expressions, with optional aliases)
199
+
200
+ # per-reply fields (for single-query case)
201
+ @error = '' # last error message
202
+ @warning = '' # last warning message
203
+
204
+ @reqs = [] # requests storage (for multi-query case)
205
+ @mbenc = '' # stored mbstring encoding
206
+ end
207
+
208
+ # Get last error message.
209
+ def GetLastError
210
+ @error
211
+ end
212
+
213
+ # Get last warning message.
214
+ def GetLastWarning
215
+ @warning
216
+ end
217
+
218
+ # Set searchd host name (string) and port (integer).
219
+ def SetServer(host, port)
220
+ assert { host.instance_of? String }
221
+ assert { port.instance_of? Fixnum }
222
+
223
+ @host = host
224
+ @port = port
225
+ end
226
+
227
+ # Set offset and count into result set,
228
+ # and optionally set max-matches and cutoff limits.
229
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
230
+ assert { offset.instance_of? Fixnum }
231
+ assert { limit.instance_of? Fixnum }
232
+ assert { max.instance_of? Fixnum }
233
+ assert { offset >= 0 }
234
+ assert { limit > 0 }
235
+ assert { max >= 0 }
236
+
237
+ @offset = offset
238
+ @limit = limit
239
+ @maxmatches = max if max > 0
240
+ @cutoff = cutoff if cutoff > 0
241
+ end
242
+
243
+ # Set maximum query time, in milliseconds, per-index,
244
+ # integer, 0 means "do not limit"
245
+ def SetMaxQueryTime(max)
246
+ assert { max.instance_of? Fixnum }
247
+ assert { max >= 0 }
248
+ @maxquerytime = max
249
+ end
250
+
251
+ # Set matching mode.
252
+ def SetMatchMode(mode)
253
+ assert { mode == SPH_MATCH_ALL \
254
+ || mode == SPH_MATCH_ANY \
255
+ || mode == SPH_MATCH_PHRASE \
256
+ || mode == SPH_MATCH_BOOLEAN \
257
+ || mode == SPH_MATCH_EXTENDED \
258
+ || mode == SPH_MATCH_FULLSCAN \
259
+ || mode == SPH_MATCH_EXTENDED2 }
260
+
261
+ @mode = mode
262
+ end
263
+
264
+ # Set ranking mode.
265
+ def SetRankingMode(ranker)
266
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
267
+ || ranker == SPH_RANK_BM25 \
268
+ || ranker == SPH_RANK_NONE \
269
+ || ranker == SPH_RANK_WORDCOUNT \
270
+ || ranker == SPH_RANK_PROXIMITY }
271
+
272
+ @ranker = ranker
273
+ end
274
+
275
+ # Set matches sorting mode.
276
+ def SetSortMode(mode, sortby = '')
277
+ assert { mode == SPH_SORT_RELEVANCE \
278
+ || mode == SPH_SORT_ATTR_DESC \
279
+ || mode == SPH_SORT_ATTR_ASC \
280
+ || mode == SPH_SORT_TIME_SEGMENTS \
281
+ || mode == SPH_SORT_EXTENDED \
282
+ || mode == SPH_SORT_EXPR }
283
+ assert { sortby.instance_of? String }
284
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
285
+
286
+ @sort = mode
287
+ @sortby = sortby
288
+ end
289
+
290
+ # Bind per-field weights by order.
291
+ #
292
+ # DEPRECATED; use SetFieldWeights() instead.
293
+ def SetWeights(weights)
294
+ assert { weights.instance_of? Array }
295
+ weights.each do |weight|
296
+ assert { weight.instance_of? Fixnum }
297
+ end
298
+
299
+ @weights = weights
300
+ end
301
+
302
+ # Bind per-field weights by name.
303
+ #
304
+ # Takes string (field name) to integer name (field weight) hash as an argument.
305
+ # * Takes precedence over SetWeights().
306
+ # * Unknown names will be silently ignored.
307
+ # * Unbound fields will be silently given a weight of 1.
308
+ def SetFieldWeights(weights)
309
+ assert { weights.instance_of? Hash }
310
+ weights.each do |name, weight|
311
+ assert { name.instance_of? String }
312
+ assert { weight.instance_of? Fixnum }
313
+ end
314
+
315
+ @fieldweights = weights
316
+ end
317
+
318
+ # Bind per-index weights by name.
319
+ def SetIndexWeights(weights)
320
+ assert { weights.instance_of? Hash }
321
+ weights.each do |index, weight|
322
+ assert { index.instance_of? String }
323
+ assert { weight.instance_of? Fixnum }
324
+ end
325
+
326
+ @indexweights = weights
327
+ end
328
+
329
+ # Set IDs range to match.
330
+ #
331
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
332
+ def SetIDRange(min, max)
333
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
334
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
335
+ assert { min <= max }
336
+
337
+ @min_id = min
338
+ @max_id = max
339
+ end
340
+
341
+ # Set values filter.
342
+ #
343
+ # Only match those records where <tt>attribute</tt> column values
344
+ # are in specified set.
345
+ def SetFilter(attribute, values, exclude = false)
346
+ assert { attribute.instance_of? String }
347
+ assert { values.instance_of? Array }
348
+ assert { !values.empty? }
349
+
350
+ if values.instance_of?(Array) && values.size > 0
351
+ values.each do |value|
352
+ assert { value.instance_of? Fixnum }
353
+ end
354
+
355
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
356
+ end
357
+ end
358
+
359
+ # Set range filter.
360
+ #
361
+ # Only match those records where <tt>attribute</tt> column value
362
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
363
+ def SetFilterRange(attribute, min, max, exclude = false)
364
+ assert { attribute.instance_of? String }
365
+ assert { min.instance_of? Fixnum or min.instance_of? Bignum }
366
+ assert { max.instance_of? Fixnum or max.instance_of? Bignum }
367
+ assert { min <= max }
368
+
369
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
370
+ end
371
+
372
+ # Set float range filter.
373
+ #
374
+ # Only match those records where <tt>attribute</tt> column value
375
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
376
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
377
+ assert { attribute.instance_of? String }
378
+ assert { min.instance_of? Float }
379
+ assert { max.instance_of? Float }
380
+ assert { min <= max }
381
+
382
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
383
+ end
384
+
385
+ # Setup anchor point for geosphere distance calculations.
386
+ #
387
+ # Required to use <tt>@geodist</tt> in filters and sorting
388
+ # distance will be computed to this point. Latitude and longitude
389
+ # must be in radians.
390
+ #
391
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
392
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
393
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
394
+ # * <tt>long</tt> -- is anchor point longitude, in radians
395
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
396
+ assert { attrlat.instance_of? String }
397
+ assert { attrlong.instance_of? String }
398
+ assert { lat.instance_of? Float }
399
+ assert { long.instance_of? Float }
400
+
401
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
402
+ end
403
+
404
+ # Set grouping attribute and function.
405
+ #
406
+ # In grouping mode, all matches are assigned to different groups
407
+ # based on grouping function value.
408
+ #
409
+ # Each group keeps track of the total match count, and the best match
410
+ # (in this group) according to current sorting function.
411
+ #
412
+ # The final result set contains one best match per group, with
413
+ # grouping function value and matches count attached.
414
+ #
415
+ # Groups in result set could be sorted by any sorting clause,
416
+ # including both document attributes and the following special
417
+ # internal Sphinx attributes:
418
+ #
419
+ # * @id - match document ID;
420
+ # * @weight, @rank, @relevance - match weight;
421
+ # * @group - groupby function value;
422
+ # * @count - amount of matches in group.
423
+ #
424
+ # the default mode is to sort by groupby value in descending order,
425
+ # ie. by '@group desc'.
426
+ #
427
+ # 'total_found' would contain total amount of matching groups over
428
+ # the whole index.
429
+ #
430
+ # WARNING: grouping is done in fixed memory and thus its results
431
+ # are only approximate; so there might be more groups reported
432
+ # in total_found than actually present. @count might also
433
+ # be underestimated.
434
+ #
435
+ # For example, if sorting by relevance and grouping by "published"
436
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
437
+ # contain one most relevant match per each day when there were any
438
+ # matches published, with day number and per-day match count attached,
439
+ # and sorted by day number in descending order (ie. recent days first).
440
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
441
+ assert { attribute.instance_of? String }
442
+ assert { groupsort.instance_of? String }
443
+ assert { func == SPH_GROUPBY_DAY \
444
+ || func == SPH_GROUPBY_WEEK \
445
+ || func == SPH_GROUPBY_MONTH \
446
+ || func == SPH_GROUPBY_YEAR \
447
+ || func == SPH_GROUPBY_ATTR \
448
+ || func == SPH_GROUPBY_ATTRPAIR }
449
+
450
+ @groupby = attribute
451
+ @groupfunc = func
452
+ @groupsort = groupsort
453
+ end
454
+
455
+ # Set count-distinct attribute for group-by queries.
456
+ def SetGroupDistinct(attribute)
457
+ assert { attribute.instance_of? String }
458
+ @groupdistinct = attribute
459
+ end
460
+
461
+ # Set distributed retries count and delay.
462
+ def SetRetries(count, delay = 0)
463
+ assert { count.instance_of? Fixnum }
464
+ assert { delay.instance_of? Fixnum }
465
+
466
+ @retrycount = count
467
+ @retrydelay = delay
468
+ end
469
+
470
+ # Set attribute values override
471
+ #
472
+ # There can be only one override per attribute.
473
+ # +values+ must be a hash that maps document IDs to attribute values.
474
+ def SetOverride(attrname, attrtype, values)
475
+ assert { attrname.instance_of? String }
476
+ assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
477
+ assert { values.instance_of? Hash }
478
+
479
+ @overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
480
+ end
481
+
482
+ # Set select-list (attributes or expressions), SQL-like syntax.
483
+ def SetSelect(select)
484
+ assert { select.instance_of? String }
485
+ @select = select
486
+ end
487
+
488
+ # Clear all filters (for multi-queries).
489
+ def ResetFilters
490
+ @filters = []
491
+ @anchor = []
492
+ end
493
+
494
+ # Clear groupby settings (for multi-queries).
495
+ def ResetGroupBy
496
+ @groupby = ''
497
+ @groupfunc = SPH_GROUPBY_DAY
498
+ @groupsort = '@group desc'
499
+ @groupdistinct = ''
500
+ end
501
+
502
+ # Clear all attribute value overrides (for multi-queries).
503
+ def ResetOverrides
504
+ @overrides = []
505
+ end
506
+
507
+ # Connect to searchd server and run given search query.
508
+ #
509
+ # <tt>query</tt> is query string
510
+
511
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
512
+ # to query all indexes. Accepted characters for index names are letters, numbers,
513
+ # dash, and underscore; everything else is considered a separator. Therefore,
514
+ # all the following calls are valid and will search two indexes:
515
+ #
516
+ # sphinx.Query('test query', 'main delta')
517
+ # sphinx.Query('test query', 'main;delta')
518
+ # sphinx.Query('test query', 'main, delta')
519
+ #
520
+ # Index order matters. If identical IDs are found in two or more indexes,
521
+ # weight and attribute values from the very last matching index will be used
522
+ # for sorting and returning to client. Therefore, in the example above,
523
+ # matches from "delta" index will always "win" over matches from "main".
524
+ #
525
+ # Returns false on failure.
526
+ # Returns hash which has the following keys on success:
527
+ #
528
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
529
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
530
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
531
+ # * <tt>'time'</tt> -- search time
532
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
533
+ def Query(query, index = '*', comment = '')
534
+ assert { @reqs.empty? }
535
+ @reqs = []
536
+
537
+ self.AddQuery(query, index, comment)
538
+ results = self.RunQueries
539
+
540
+ # probably network error; error message should be already filled
541
+ return false unless results.instance_of?(Array)
542
+
543
+ @error = results[0]['error']
544
+ @warning = results[0]['warning']
545
+
546
+ return false if results[0]['status'] == SEARCHD_ERROR
547
+ return results[0]
548
+ end
549
+
550
+ # Add query to batch.
551
+ #
552
+ # Batch queries enable searchd to perform internal optimizations,
553
+ # if possible; and reduce network connection overheads in all cases.
554
+ #
555
+ # For instance, running exactly the same query with different
556
+ # groupby settings will enable searched to perform expensive
557
+ # full-text search and ranking operation only once, but compute
558
+ # multiple groupby results from its output.
559
+ #
560
+ # Parameters are exactly the same as in <tt>Query</tt> call.
561
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
562
+ def AddQuery(query, index = '*', comment = '')
563
+ # build request
564
+
565
+ # mode and limits
566
+ request = Request.new
567
+ request.put_int @offset, @limit, @mode, @ranker, @sort
568
+ request.put_string @sortby
569
+ # query itself
570
+ request.put_string query
571
+ # weights
572
+ request.put_int_array @weights
573
+ # indexes
574
+ request.put_string index
575
+ # id64 range marker
576
+ request.put_int 1
577
+ # id64 range
578
+ request.put_int64 @min_id.to_i, @max_id.to_i
579
+
580
+ # filters
581
+ request.put_int @filters.length
582
+ @filters.each do |filter|
583
+ request.put_string filter['attr']
584
+ request.put_int filter['type']
585
+
586
+ case filter['type']
587
+ when SPH_FILTER_VALUES
588
+ request.put_int64_array filter['values']
589
+ when SPH_FILTER_RANGE
590
+ request.put_int64 filter['min'], filter['max']
591
+ when SPH_FILTER_FLOATRANGE
592
+ request.put_float filter['min'], filter['max']
593
+ else
594
+ raise SphinxInternalError, 'Internal error: unhandled filter type'
595
+ end
596
+ request.put_int filter['exclude'] ? 1 : 0
597
+ end
598
+
599
+ # group-by clause, max-matches count, group-sort clause, cutoff count
600
+ request.put_int @groupfunc
601
+ request.put_string @groupby
602
+ request.put_int @maxmatches
603
+ request.put_string @groupsort
604
+ request.put_int @cutoff, @retrycount, @retrydelay
605
+ request.put_string @groupdistinct
606
+
607
+ # anchor point
608
+ if @anchor.empty?
609
+ request.put_int 0
610
+ else
611
+ request.put_int 1
612
+ request.put_string @anchor['attrlat'], @anchor['attrlong']
613
+ request.put_float @anchor['lat'], @anchor['long']
614
+ end
615
+
616
+ # per-index weights
617
+ request.put_int @indexweights.length
618
+ @indexweights.each do |idx, weight|
619
+ request.put_string idx
620
+ request.put_int weight
621
+ end
622
+
623
+ # max query time
624
+ request.put_int @maxquerytime
625
+
626
+ # per-field weights
627
+ request.put_int @fieldweights.length
628
+ @fieldweights.each do |field, weight|
629
+ request.put_string field
630
+ request.put_int weight
631
+ end
632
+
633
+ # comment
634
+ request.put_string comment
635
+
636
+ # attribute overrides
637
+ request.put_int @overrides.length
638
+ for entry in @overrides do
639
+ request.put_string entry['attr']
640
+ request.put_int entry['type'], entry['values'].size
641
+ entry['values'].each do |id, val|
642
+ assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
643
+ assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
644
+
645
+ request.put_int64 id
646
+ case entry['type']
647
+ when SPH_ATTR_FLOAT
648
+ request.put_float val
649
+ when SPH_ATTR_BIGINT
650
+ request.put_int64 val
651
+ else
652
+ request.put_int val
653
+ end
654
+ end
655
+ end
656
+
657
+ # select-list
658
+ request.put_string @select
659
+
660
+ # store request to requests array
661
+ @reqs << request.to_s;
662
+ return @reqs.length - 1
663
+ end
664
+
665
+ # Run queries batch.
666
+ #
667
+ # Returns an array of result sets on success.
668
+ # Returns false on network IO failure.
669
+ #
670
+ # Each result set in returned array is a hash which containts
671
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
672
+ #
673
+ # * <tt>'error'</tt> -- search error for this query
674
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
675
+ def RunQueries
676
+ if @reqs.empty?
677
+ @error = 'No queries defined, issue AddQuery() first'
678
+ return false
679
+ end
680
+
681
+ req = @reqs.join('')
682
+ nreqs = @reqs.length
683
+ @reqs = []
684
+ response = PerformRequest(:search, req, nreqs)
685
+
686
+ # parse response
687
+ begin
688
+ results = []
689
+ ires = 0
690
+ while ires < nreqs
691
+ ires += 1
692
+ result = {}
693
+
694
+ result['error'] = ''
695
+ result['warning'] = ''
696
+
697
+ # extract status
698
+ status = result['status'] = response.get_int
699
+ if status != SEARCHD_OK
700
+ message = response.get_string
701
+ if status == SEARCHD_WARNING
702
+ result['warning'] = message
703
+ else
704
+ result['error'] = message
705
+ results << result
706
+ next
707
+ end
708
+ end
709
+
710
+ # read schema
711
+ fields = []
712
+ attrs = {}
713
+ attrs_names_in_order = []
714
+
715
+ nfields = response.get_int
716
+ while nfields > 0
717
+ nfields -= 1
718
+ fields << response.get_string
719
+ end
720
+ result['fields'] = fields
721
+
722
+ nattrs = response.get_int
723
+ while nattrs > 0
724
+ nattrs -= 1
725
+ attr = response.get_string
726
+ type = response.get_int
727
+ attrs[attr] = type
728
+ attrs_names_in_order << attr
729
+ end
730
+ result['attrs'] = attrs
731
+
732
+ # read match count
733
+ count = response.get_int
734
+ id64 = response.get_int
735
+
736
+ # read matches
737
+ result['matches'] = []
738
+ while count > 0
739
+ count -= 1
740
+
741
+ if id64 != 0
742
+ doc = response.get_int64
743
+ weight = response.get_int
744
+ else
745
+ doc, weight = response.get_ints(2)
746
+ end
747
+
748
+ r = {} # This is a single result put in the result['matches'] array
749
+ r['id'] = doc
750
+ r['weight'] = weight
751
+ attrs_names_in_order.each do |a|
752
+ r['attrs'] ||= {}
753
+
754
+ case attrs[a]
755
+ when SPH_ATTR_BIGINT
756
+ # handle 64-bit ints
757
+ r['attrs'][a] = response.get_int64
758
+ when SPH_ATTR_FLOAT
759
+ # handle floats
760
+ r['attrs'][a] = response.get_float
761
+ else
762
+ # handle everything else as unsigned ints
763
+ val = response.get_int
764
+ if (attrs[a] & SPH_ATTR_MULTI) != 0
765
+ r['attrs'][a] = []
766
+ 1.upto(val) do
767
+ r['attrs'][a] << response.get_int
768
+ end
769
+ else
770
+ r['attrs'][a] = val
771
+ end
772
+ end
773
+ end
774
+ result['matches'] << r
775
+ end
776
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
777
+ result['time'] = '%.3f' % (msecs / 1000.0)
778
+
779
+ result['words'] = {}
780
+ while words > 0
781
+ words -= 1
782
+ word = response.get_string
783
+ docs, hits = response.get_ints(2)
784
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
785
+ end
786
+
787
+ results << result
788
+ end
789
+ #rescue EOFError
790
+ # @error = 'incomplete reply'
791
+ # raise SphinxResponseError, @error
792
+ end
793
+
794
+ return results
795
+ end
796
+
797
+ # Connect to searchd server and generate exceprts from given documents.
798
+ #
799
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
800
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
801
+ # for stemming, lexing and case folding
802
+ # * <tt>words</tt> -- a string which contains the words to highlight
803
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
804
+ #
805
+ # You can use following parameters:
806
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
807
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
808
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
809
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
810
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
811
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
812
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
813
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
814
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
815
+ #
816
+ # Returns false on failure.
817
+ # Returns an array of string excerpts on success.
818
+ def BuildExcerpts(docs, index, words, opts = {})
819
+ assert { docs.instance_of? Array }
820
+ assert { index.instance_of? String }
821
+ assert { words.instance_of? String }
822
+ assert { opts.instance_of? Hash }
823
+
824
+ # fixup options
825
+ opts['before_match'] ||= '<b>';
826
+ opts['after_match'] ||= '</b>';
827
+ opts['chunk_separator'] ||= ' ... ';
828
+ opts['limit'] ||= 256;
829
+ opts['around'] ||= 5;
830
+ opts['exact_phrase'] ||= false
831
+ opts['single_passage'] ||= false
832
+ opts['use_boundaries'] ||= false
833
+ opts['weight_order'] ||= false
834
+
835
+ # build request
836
+
837
+ # v.1.0 req
838
+ flags = 1
839
+ flags |= 2 if opts['exact_phrase']
840
+ flags |= 4 if opts['single_passage']
841
+ flags |= 8 if opts['use_boundaries']
842
+ flags |= 16 if opts['weight_order']
843
+
844
+ request = Request.new
845
+ request.put_int 0, flags # mode=0, flags=1 (remove spaces)
846
+ # req index
847
+ request.put_string index
848
+ # req words
849
+ request.put_string words
850
+
851
+ # options
852
+ request.put_string opts['before_match']
853
+ request.put_string opts['after_match']
854
+ request.put_string opts['chunk_separator']
855
+ request.put_int opts['limit'].to_i, opts['around'].to_i
856
+
857
+ # documents
858
+ request.put_int docs.size
859
+ docs.each do |doc|
860
+ assert { doc.instance_of? String }
861
+
862
+ request.put_string doc
863
+ end
864
+
865
+ response = PerformRequest(:excerpt, request)
866
+
867
+ # parse response
868
+ begin
869
+ res = []
870
+ docs.each do |doc|
871
+ res << response.get_string
872
+ end
873
+ rescue EOFError
874
+ @error = 'incomplete reply'
875
+ raise SphinxResponseError, @error
876
+ end
877
+ return res
878
+ end
879
+
880
+ # Connect to searchd server, and generate keyword list for a given query.
881
+ #
882
+ # Returns an array of words on success.
883
+ def BuildKeywords(query, index, hits)
884
+ assert { query.instance_of? String }
885
+ assert { index.instance_of? String }
886
+ assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
887
+
888
+ # build request
889
+ request = Request.new
890
+ # v.1.0 req
891
+ request.put_string query # req query
892
+ request.put_string index # req index
893
+ request.put_int hits ? 1 : 0
894
+
895
+ response = PerformRequest(:keywords, request)
896
+
897
+ # parse response
898
+ begin
899
+ res = []
900
+ nwords = response.get_int
901
+ 0.upto(nwords - 1) do |i|
902
+ tokenized = response.get_string
903
+ normalized = response.get_string
904
+
905
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
906
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
907
+
908
+ res << entry
909
+ end
910
+ rescue EOFError
911
+ @error = 'incomplete reply'
912
+ raise SphinxResponseError, @error
913
+ end
914
+
915
+ return res
916
+ end
917
+
918
+ # Batch update given attributes in given rows in given indexes.
919
+ #
920
+ # * +index+ is a name of the index to be updated
921
+ # * +attrs+ is an array of attribute name strings.
922
+ # * +values+ is a hash where key is document id, and value is an array of
923
+ # * +mva+ identifies whether update MVA
924
+ # new attribute values
925
+ #
926
+ # Returns number of actually updated documents (0 or more) on success.
927
+ # Returns -1 on failure.
928
+ #
929
+ # Usage example:
930
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
931
+ def UpdateAttributes(index, attrs, values, mva = false)
932
+ # verify everything
933
+ assert { index.instance_of? String }
934
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
935
+
936
+ assert { attrs.instance_of? Array }
937
+ attrs.each do |attr|
938
+ assert { attr.instance_of? String }
939
+ end
940
+
941
+ assert { values.instance_of? Hash }
942
+ values.each do |id, entry|
943
+ assert { id.instance_of? Fixnum }
944
+ assert { entry.instance_of? Array }
945
+ assert { entry.length == attrs.length }
946
+ entry.each do |v|
947
+ if mva
948
+ assert { v.instance_of? Array }
949
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
950
+ else
951
+ assert { v.instance_of? Fixnum }
952
+ end
953
+ end
954
+ end
955
+
956
+ # build request
957
+ request = Request.new
958
+ request.put_string index
959
+
960
+ request.put_int attrs.length
961
+ for attr in attrs
962
+ request.put_string attr
963
+ request.put_int mva ? 1 : 0
964
+ end
965
+
966
+ request.put_int values.length
967
+ values.each do |id, entry|
968
+ request.put_int64 id
969
+ if mva
970
+ entry.each { |v| request.put_int_array v }
971
+ else
972
+ request.put_int(*entry)
973
+ end
974
+ end
975
+
976
+ response = PerformRequest(:update, request)
977
+
978
+ # parse response
979
+ begin
980
+ return response.get_int
981
+ rescue EOFError
982
+ @error = 'incomplete reply'
983
+ raise SphinxResponseError, @error
984
+ end
985
+ end
986
+
987
+ protected
988
+
989
+ # Connect to searchd server.
990
+ def Connect
991
+ begin
992
+ sock = TCPSocket.new(@host, @port)
993
+ rescue
994
+ @error = "connection to #{@host}:#{@port} failed"
995
+ raise SphinxConnectError, @error
996
+ end
997
+
998
+ v = sock.recv(4).unpack('N*').first
999
+ if v < 1
1000
+ sock.close
1001
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
1002
+ raise SphinxConnectError, @error
1003
+ end
1004
+
1005
+ sock.send([1].pack('N'), 0)
1006
+ sock
1007
+ end
1008
+
1009
+ # Get and check response packet from searchd server.
1010
+ def GetResponse(sock, client_version)
1011
+ response = ''
1012
+ len = 0
1013
+
1014
+ header = sock.recv(8)
1015
+ if header.length == 8
1016
+ status, ver, len = header.unpack('n2N')
1017
+ left = len.to_i
1018
+ while left > 0 do
1019
+ begin
1020
+ chunk = sock.recv(left)
1021
+ if chunk
1022
+ response << chunk
1023
+ left -= chunk.length
1024
+ end
1025
+ rescue EOFError
1026
+ break
1027
+ end
1028
+ end
1029
+ end
1030
+ sock.close
1031
+
1032
+ # check response
1033
+ read = response.length
1034
+ if response.empty? or read != len.to_i
1035
+ @error = len \
1036
+ ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
1037
+ : 'received zero-sized searchd response'
1038
+ raise SphinxResponseError, @error
1039
+ end
1040
+
1041
+ # check status
1042
+ if (status == SEARCHD_WARNING)
1043
+ wlen = response[0, 4].unpack('N*').first
1044
+ @warning = response[4, wlen]
1045
+ return response[4 + wlen, response.length - 4 - wlen]
1046
+ end
1047
+
1048
+ if status == SEARCHD_ERROR
1049
+ @error = 'searchd error: ' + response[4, response.length - 4]
1050
+ raise SphinxInternalError, @error
1051
+ end
1052
+
1053
+ if status == SEARCHD_RETRY
1054
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1055
+ raise SphinxTemporaryError, @error
1056
+ end
1057
+
1058
+ unless status == SEARCHD_OK
1059
+ @error = "unknown status code: '#{status}'"
1060
+ raise SphinxUnknownError, @error
1061
+ end
1062
+
1063
+ # check version
1064
+ if ver < client_version
1065
+ @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1066
+ "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1067
+ end
1068
+
1069
+ return response
1070
+ end
1071
+
1072
+ # Connect, send query, get response.
1073
+ def PerformRequest(command, request, additional = nil)
1074
+ cmd = command.to_s.upcase
1075
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1076
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1077
+
1078
+ sock = self.Connect
1079
+ len = request.to_s.length + (additional != nil ? 4 : 0)
1080
+ header = [command_id, command_ver, len].pack('nnN')
1081
+ header << [additional].pack('N') if additional != nil
1082
+ sock.send(header + request.to_s, 0)
1083
+ response = self.GetResponse(sock, command_ver)
1084
+ return Response.new(response)
1085
+ end
1086
+
1087
+ # :stopdoc:
1088
+ def assert
1089
+ raise 'Assertion failed!' unless yield if $DEBUG
1090
+ end
1091
+ # :startdoc:
1092
+ end
1093
+ end