rlibsphinxclient 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/.gitignore +3 -0
  2. data/CHANGELOG.rdoc +18 -0
  3. data/MIT-LICENSE +20 -0
  4. data/README.rdoc +151 -0
  5. data/Rakefile +39 -0
  6. data/VERSION +1 -0
  7. data/ext/extconf.rb +20 -0
  8. data/ext/rlibsphinxclient.i +314 -0
  9. data/ext/rlibsphinxclient_wrap.c +5931 -0
  10. data/init.rb +1 -0
  11. data/lib/sphinx.rb +22 -0
  12. data/lib/sphinx/client.rb +1070 -0
  13. data/lib/sphinx/fast_client.rb +184 -0
  14. data/lib/sphinx/request.rb +49 -0
  15. data/lib/sphinx/response.rb +69 -0
  16. data/lib/sphinx/safe_executor.rb +11 -0
  17. data/lib/sphinx/timeout.rb +9 -0
  18. data/rlibsphinxclient.gemspec +117 -0
  19. data/spec/client_response_spec.rb +135 -0
  20. data/spec/client_spec.rb +548 -0
  21. data/spec/fixtures/default_search.php +8 -0
  22. data/spec/fixtures/default_search_index.php +8 -0
  23. data/spec/fixtures/excerpt_custom.php +11 -0
  24. data/spec/fixtures/excerpt_default.php +8 -0
  25. data/spec/fixtures/excerpt_flags.php +11 -0
  26. data/spec/fixtures/field_weights.php +9 -0
  27. data/spec/fixtures/filter.php +9 -0
  28. data/spec/fixtures/filter_exclude.php +9 -0
  29. data/spec/fixtures/filter_float_range.php +9 -0
  30. data/spec/fixtures/filter_float_range_exclude.php +9 -0
  31. data/spec/fixtures/filter_range.php +9 -0
  32. data/spec/fixtures/filter_range_exclude.php +9 -0
  33. data/spec/fixtures/filter_ranges.php +10 -0
  34. data/spec/fixtures/filters.php +10 -0
  35. data/spec/fixtures/filters_different.php +13 -0
  36. data/spec/fixtures/geo_anchor.php +9 -0
  37. data/spec/fixtures/group_by_attr.php +9 -0
  38. data/spec/fixtures/group_by_attrpair.php +9 -0
  39. data/spec/fixtures/group_by_day.php +9 -0
  40. data/spec/fixtures/group_by_day_sort.php +9 -0
  41. data/spec/fixtures/group_by_month.php +9 -0
  42. data/spec/fixtures/group_by_week.php +9 -0
  43. data/spec/fixtures/group_by_year.php +9 -0
  44. data/spec/fixtures/group_distinct.php +10 -0
  45. data/spec/fixtures/id_range.php +9 -0
  46. data/spec/fixtures/id_range64.php +9 -0
  47. data/spec/fixtures/index_weights.php +9 -0
  48. data/spec/fixtures/keywords.php +8 -0
  49. data/spec/fixtures/limits.php +9 -0
  50. data/spec/fixtures/limits_cutoff.php +9 -0
  51. data/spec/fixtures/limits_max.php +9 -0
  52. data/spec/fixtures/limits_max_cutoff.php +9 -0
  53. data/spec/fixtures/match_all.php +9 -0
  54. data/spec/fixtures/match_any.php +9 -0
  55. data/spec/fixtures/match_boolean.php +9 -0
  56. data/spec/fixtures/match_extended.php +9 -0
  57. data/spec/fixtures/match_extended2.php +9 -0
  58. data/spec/fixtures/match_fullscan.php +9 -0
  59. data/spec/fixtures/match_phrase.php +9 -0
  60. data/spec/fixtures/max_query_time.php +9 -0
  61. data/spec/fixtures/miltiple_queries.php +12 -0
  62. data/spec/fixtures/ranking_bm25.php +9 -0
  63. data/spec/fixtures/ranking_none.php +9 -0
  64. data/spec/fixtures/ranking_proximity_bm25.php +9 -0
  65. data/spec/fixtures/ranking_wordcount.php +9 -0
  66. data/spec/fixtures/retries.php +9 -0
  67. data/spec/fixtures/retries_delay.php +9 -0
  68. data/spec/fixtures/sort_attr_asc.php +9 -0
  69. data/spec/fixtures/sort_attr_desc.php +9 -0
  70. data/spec/fixtures/sort_expr.php +9 -0
  71. data/spec/fixtures/sort_extended.php +9 -0
  72. data/spec/fixtures/sort_relevance.php +9 -0
  73. data/spec/fixtures/sort_time_segments.php +9 -0
  74. data/spec/fixtures/sphinxapi.php +1181 -0
  75. data/spec/fixtures/update_attributes.php +8 -0
  76. data/spec/fixtures/weights.php +9 -0
  77. data/spec/sphinx/sphinx.conf +67 -0
  78. data/spec/sphinx/sphinx_test.sql +86 -0
  79. metadata +133 -0
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/lib/sphinx'
data/lib/sphinx.rb ADDED
@@ -0,0 +1,22 @@
1
+ =begin rdoc
2
+ The generated SWIG module for accessing libsphinxclient's C API.
3
+
4
+ Includes the full set of libsphinxclient static methods (as defined in <tt>$INCLUDE_PATH/libsphinxclient.h</tt>), and classes for the available structs.
5
+
6
+ A number of SWIG typemaps and C helper methods are also defined in <tt>ext/rlibsphinxclient.i</tt>.
7
+
8
+ =end
9
+ module Rlibsphinxclient
10
+ end
11
+
12
+ require 'rlibsphinxclient'
13
+
14
+ module Sphinx
15
+ end
16
+
17
+ require File.dirname(__FILE__) + '/sphinx/fast_client'
18
+ require File.dirname(__FILE__) + '/sphinx/request'
19
+ require File.dirname(__FILE__) + '/sphinx/response'
20
+ require File.dirname(__FILE__) + '/sphinx/client'
21
+ require File.dirname(__FILE__) + '/sphinx/timeout'
22
+ require File.dirname(__FILE__) + '/sphinx/safe_executor'
@@ -0,0 +1,1070 @@
1
+ # = client.rb - Pure Ruby Sphinx client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 - 2009 Dmytro Shteflyuk
5
+ # License:: Distributes under the MIT license.
6
+ # Version:: 0.2.2
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the MIT license.
10
+
11
+ # == Sphinx Client API
12
+ #
13
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
14
+ # daemon and get search results from Sphinx.
15
+ #
16
+ # === Usage
17
+ #
18
+ # begin
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+ # ensure
27
+ # sphinx.destroy
28
+ # end
29
+
30
+ require 'socket'
31
+
32
+ module Sphinx
33
+ # :stopdoc:
34
+
35
+ class SphinxError < StandardError; end
36
+ class SphinxArgumentError < SphinxError; end
37
+ class SphinxConnectError < SphinxError; end
38
+ class SphinxResponseError < SphinxError; end
39
+ class SphinxInternalError < SphinxError; end
40
+ class SphinxTemporaryError < SphinxError; end
41
+ class SphinxUnknownError < SphinxError; end
42
+ class SphinxRequestTimeout < SphinxError; end
43
+
44
+ # :startdoc:
45
+
46
+ # A pure Ruby Sphinx client API.
47
+ class Client
48
+
49
+ # :stopdoc:
50
+
51
+ # Known searchd commands
52
+
53
+ # search command
54
+ SEARCHD_COMMAND_SEARCH = 0
55
+ # excerpt command
56
+ SEARCHD_COMMAND_EXCERPT = 1
57
+ # update command
58
+ SEARCHD_COMMAND_UPDATE = 2
59
+ # keywords command
60
+ SEARCHD_COMMAND_KEYWORDS = 3
61
+
62
+ # Current client-side command implementation versions
63
+
64
+ # search command version
65
+ VER_COMMAND_SEARCH = 0x113
66
+ # excerpt command version
67
+ VER_COMMAND_EXCERPT = 0x100
68
+ # update command version
69
+ VER_COMMAND_UPDATE = 0x102
70
+ # keywords command version
71
+ VER_COMMAND_KEYWORDS = 0x100
72
+
73
+ # Known searchd status codes
74
+
75
+ # general success, command-specific reply follows
76
+ SEARCHD_OK = 0
77
+ # general failure, command-specific reply may follow
78
+ SEARCHD_ERROR = 1
79
+ # temporaty failure, client should retry later
80
+ SEARCHD_RETRY = 2
81
+ # general success, warning message and command-specific reply follow
82
+ SEARCHD_WARNING = 3
83
+
84
+ # :startdoc:
85
+
86
+ # Known match modes
87
+
88
+ # match all query words
89
+ SPH_MATCH_ALL = 0
90
+ # match any query word
91
+ SPH_MATCH_ANY = 1
92
+ # match this exact phrase
93
+ SPH_MATCH_PHRASE = 2
94
+ # match this boolean query
95
+ SPH_MATCH_BOOLEAN = 3
96
+ # match this extended query
97
+ SPH_MATCH_EXTENDED = 4
98
+ # match all document IDs w/o fulltext query, apply filters
99
+ SPH_MATCH_FULLSCAN = 5
100
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
101
+ SPH_MATCH_EXTENDED2 = 6
102
+
103
+ # Known ranking modes (ext2 only)
104
+
105
+ # default mode, phrase proximity major factor and BM25 minor one
106
+ SPH_RANK_PROXIMITY_BM25 = 0
107
+ # statistical mode, BM25 ranking only (faster but worse quality)
108
+ SPH_RANK_BM25 = 1
109
+ # no ranking, all matches get a weight of 1
110
+ SPH_RANK_NONE = 2
111
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
112
+ SPH_RANK_WORDCOUNT = 3
113
+
114
+ # Known sort modes
115
+
116
+ # sort by document relevance desc, then by date
117
+ SPH_SORT_RELEVANCE = 0
118
+ # sort by document date desc, then by relevance desc
119
+ SPH_SORT_ATTR_DESC = 1
120
+ # sort by document date asc, then by relevance desc
121
+ SPH_SORT_ATTR_ASC = 2
122
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
123
+ SPH_SORT_TIME_SEGMENTS = 3
124
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
125
+ SPH_SORT_EXTENDED = 4
126
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
127
+ SPH_SORT_EXPR = 5
128
+
129
+ # Known filter types
130
+
131
+ # filter by integer values set
132
+ SPH_FILTER_VALUES = 0
133
+ # filter by integer range
134
+ SPH_FILTER_RANGE = 1
135
+ # filter by float range
136
+ SPH_FILTER_FLOATRANGE = 2
137
+
138
+ # Known attribute types
139
+
140
+ # this attr is just an integer
141
+ SPH_ATTR_INTEGER = 1
142
+ # this attr is a timestamp
143
+ SPH_ATTR_TIMESTAMP = 2
144
+ # this attr is an ordinal string number (integer at search time,
145
+ # specially handled at indexing time)
146
+ SPH_ATTR_ORDINAL = 3
147
+ # this attr is a boolean bit field
148
+ SPH_ATTR_BOOL = 4
149
+ # this attr is a float
150
+ SPH_ATTR_FLOAT = 5
151
+ # this attr has multiple values (0 or more)
152
+ SPH_ATTR_MULTI = 0x40000000
153
+
154
+ # Known grouping functions
155
+
156
+ # group by day
157
+ SPH_GROUPBY_DAY = 0
158
+ # group by week
159
+ SPH_GROUPBY_WEEK = 1
160
+ # group by month
161
+ SPH_GROUPBY_MONTH = 2
162
+ # group by year
163
+ SPH_GROUPBY_YEAR = 3
164
+ # group by attribute value
165
+ SPH_GROUPBY_ATTR = 4
166
+ # group by sequential attrs pair
167
+ SPH_GROUPBY_ATTRPAIR = 5
168
+
169
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
170
+ def initialize
171
+ # per-client-object settings
172
+ @host = 'localhost' # searchd host (default is "localhost")
173
+ @port = 3312 # searchd port (default is 3312)
174
+
175
+ # per-query settings
176
+ @offset = 0 # how many records to seek from result-set start (default is 0)
177
+ @limit = 20 # how many records to return from result-set starting at offset (default is 20)
178
+ @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
179
+ @weights = [] # per-field weights (default is 1 for all fields)
180
+ @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
181
+ @sortby = '' # attribute to sort by (defualt is "")
182
+ @min_id = 0 # min ID to match (default is 0, which means no limit)
183
+ @max_id = 0 # max ID to match (default is 0, which means no limit)
184
+ @filters = [] # search filters
185
+ @groupby = '' # group-by attribute name
186
+ @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
187
+ @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
188
+ @groupdistinct = '' # group-by count-distinct attribute
189
+ @maxmatches = 1000 # max matches to retrieve
190
+ @cutoff = 0 # cutoff to stop searching at (default is 0)
191
+ @retrycount = 0 # distributed retries count
192
+ @retrydelay = 0 # distributed retries delay
193
+ @anchor = [] # geographical anchor point
194
+ @indexweights = [] # per-index weights
195
+ @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
196
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
197
+ @fieldweights = {} # per-field-name weights
198
+
199
+ # per-reply fields (for single-query case)
200
+ @error = '' # last error message
201
+ @warning = '' # last warning message
202
+
203
+ @reqs = [] # requests storage (for multi-query case)
204
+
205
+ # ruby client specific fields
206
+ @timeout = 5 # stored mbstring encoding
207
+ @attempts = 3 # number of attempts to do request when timeout exceeded
208
+ end
209
+
210
+ # Does nothing.
211
+ def destroy
212
+ end
213
+
214
+ # Get last error message.
215
+ def GetLastError
216
+ @error
217
+ end
218
+
219
+ # Get last warning message.
220
+ def GetLastWarning
221
+ @warning
222
+ end
223
+
224
+ # Set searchd host name (string) and port (integer).
225
+ def SetServer(host, port)
226
+ assert { host.instance_of? String }
227
+ assert { port.instance_of? Fixnum }
228
+
229
+ @host = host
230
+ @port = port
231
+ end
232
+
233
+ # Set offset and count into result set,
234
+ # and optionally set max-matches and cutoff limits.
235
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
236
+ assert { offset.instance_of? Fixnum }
237
+ assert { limit.instance_of? Fixnum }
238
+ assert { max.instance_of? Fixnum }
239
+ assert { offset >= 0 }
240
+ assert { limit > 0 }
241
+ assert { max >= 0 }
242
+
243
+ @offset = offset
244
+ @limit = limit
245
+ @maxmatches = max if max > 0
246
+ @cutoff = cutoff if cutoff > 0
247
+ end
248
+
249
+ # Set maximum query time, in milliseconds, per-index,
250
+ # integer, 0 means "do not limit"
251
+ def SetMaxQueryTime(max)
252
+ assert { max.instance_of? Fixnum }
253
+ assert { max >= 0 }
254
+ @maxquerytime = max
255
+ end
256
+
257
+ # Set matching mode.
258
+ def SetMatchMode(mode)
259
+ assert { mode == SPH_MATCH_ALL \
260
+ || mode == SPH_MATCH_ANY \
261
+ || mode == SPH_MATCH_PHRASE \
262
+ || mode == SPH_MATCH_BOOLEAN \
263
+ || mode == SPH_MATCH_EXTENDED \
264
+ || mode == SPH_MATCH_FULLSCAN \
265
+ || mode == SPH_MATCH_EXTENDED2 }
266
+
267
+ @mode = mode
268
+ end
269
+
270
+ # Set ranking mode.
271
+ def SetRankingMode(ranker)
272
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
273
+ || ranker == SPH_RANK_BM25 \
274
+ || ranker == SPH_RANK_NONE \
275
+ || ranker == SPH_RANK_WORDCOUNT }
276
+
277
+ @ranker = ranker
278
+ end
279
+
280
+ # Set matches sorting mode.
281
+ def SetSortMode(mode, sortby = '')
282
+ assert { mode == SPH_SORT_RELEVANCE \
283
+ || mode == SPH_SORT_ATTR_DESC \
284
+ || mode == SPH_SORT_ATTR_ASC \
285
+ || mode == SPH_SORT_TIME_SEGMENTS \
286
+ || mode == SPH_SORT_EXTENDED \
287
+ || mode == SPH_SORT_EXPR }
288
+ assert { sortby.instance_of? String }
289
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
290
+
291
+ @sort = mode
292
+ @sortby = sortby
293
+ end
294
+
295
+ # Bind per-field weights by order.
296
+ #
297
+ # DEPRECATED; use SetFieldWeights() instead.
298
+ def SetWeights(weights)
299
+ assert { weights.instance_of? Array }
300
+ weights.each do |weight|
301
+ assert { weight.instance_of? Fixnum }
302
+ end
303
+
304
+ @weights = weights
305
+ end
306
+
307
+ # Bind per-field weights by name.
308
+ #
309
+ # Takes string (field name) to integer name (field weight) hash as an argument.
310
+ # * Takes precedence over SetWeights().
311
+ # * Unknown names will be silently ignored.
312
+ # * Unbound fields will be silently given a weight of 1.
313
+ def SetFieldWeights(weights)
314
+ assert { weights.instance_of? Hash }
315
+ weights.each do |name, weight|
316
+ assert { name.instance_of? String }
317
+ assert { weight.instance_of? Fixnum }
318
+ end
319
+
320
+ @fieldweights = weights
321
+ end
322
+
323
+ # Bind per-index weights by name.
324
+ def SetIndexWeights(weights)
325
+ assert { weights.instance_of? Hash }
326
+ weights.each do |index, weight|
327
+ assert { index.instance_of? String }
328
+ assert { weight.instance_of? Fixnum }
329
+ end
330
+
331
+ @indexweights = weights
332
+ end
333
+
334
+ # Set IDs range to match.
335
+ #
336
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
337
+ def SetIDRange(min, max)
338
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
339
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
340
+ assert { min <= max }
341
+
342
+ @min_id = min
343
+ @max_id = max
344
+ end
345
+
346
+ # Set values filter.
347
+ #
348
+ # Only match those records where <tt>attribute</tt> column values
349
+ # are in specified set.
350
+ def SetFilter(attribute, values, exclude = false)
351
+ assert { attribute.instance_of? String }
352
+ assert { values.instance_of? Array }
353
+ assert { !values.empty? }
354
+
355
+ if values.instance_of?(Array) && values.size > 0
356
+ values.each do |value|
357
+ assert { value.instance_of? Fixnum }
358
+ end
359
+
360
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
361
+ end
362
+ end
363
+
364
+ # Set range filter.
365
+ #
366
+ # Only match those records where <tt>attribute</tt> column value
367
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
368
+ def SetFilterRange(attribute, min, max, exclude = false)
369
+ assert { attribute.instance_of? String }
370
+ assert { min.instance_of? Fixnum }
371
+ assert { max.instance_of? Fixnum }
372
+ assert { min <= max }
373
+
374
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
375
+ end
376
+
377
+ # Set float range filter.
378
+ #
379
+ # Only match those records where <tt>attribute</tt> column value
380
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
381
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
382
+ assert { attribute.instance_of? String }
383
+ assert { min.instance_of? Float }
384
+ assert { max.instance_of? Float }
385
+ assert { min <= max }
386
+
387
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
388
+ end
389
+
390
+ # Setup anchor point for geosphere distance calculations.
391
+ #
392
+ # Required to use <tt>@geodist</tt> in filters and sorting
393
+ # distance will be computed to this point. Latitude and longitude
394
+ # must be in radians.
395
+ #
396
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
397
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
398
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
399
+ # * <tt>long</tt> -- is anchor point longitude, in radians
400
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
401
+ assert { attrlat.instance_of? String }
402
+ assert { attrlong.instance_of? String }
403
+ assert { lat.instance_of? Float }
404
+ assert { long.instance_of? Float }
405
+
406
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
407
+ end
408
+
409
+ # Set grouping attribute and function.
410
+ #
411
+ # In grouping mode, all matches are assigned to different groups
412
+ # based on grouping function value.
413
+ #
414
+ # Each group keeps track of the total match count, and the best match
415
+ # (in this group) according to current sorting function.
416
+ #
417
+ # The final result set contains one best match per group, with
418
+ # grouping function value and matches count attached.
419
+ #
420
+ # Groups in result set could be sorted by any sorting clause,
421
+ # including both document attributes and the following special
422
+ # internal Sphinx attributes:
423
+ #
424
+ # * @id - match document ID;
425
+ # * @weight, @rank, @relevance - match weight;
426
+ # * @group - groupby function value;
427
+ # * @count - amount of matches in group.
428
+ #
429
+ # the default mode is to sort by groupby value in descending order,
430
+ # ie. by '@group desc'.
431
+ #
432
+ # 'total_found' would contain total amount of matching groups over
433
+ # the whole index.
434
+ #
435
+ # WARNING: grouping is done in fixed memory and thus its results
436
+ # are only approximate; so there might be more groups reported
437
+ # in total_found than actually present. @count might also
438
+ # be underestimated.
439
+ #
440
+ # For example, if sorting by relevance and grouping by "published"
441
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
442
+ # contain one most relevant match per each day when there were any
443
+ # matches published, with day number and per-day match count attached,
444
+ # and sorted by day number in descending order (ie. recent days first).
445
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
446
+ assert { attribute.instance_of? String }
447
+ assert { groupsort.instance_of? String }
448
+ assert { func == SPH_GROUPBY_DAY \
449
+ || func == SPH_GROUPBY_WEEK \
450
+ || func == SPH_GROUPBY_MONTH \
451
+ || func == SPH_GROUPBY_YEAR \
452
+ || func == SPH_GROUPBY_ATTR \
453
+ || func == SPH_GROUPBY_ATTRPAIR }
454
+
455
+ @groupby = attribute
456
+ @groupfunc = func
457
+ @groupsort = groupsort
458
+ end
459
+
460
+ # Set count-distinct attribute for group-by queries.
461
+ def SetGroupDistinct(attribute)
462
+ assert { attribute.instance_of? String }
463
+ @groupdistinct = attribute
464
+ end
465
+
466
+ # Set distributed retries count and delay.
467
+ def SetRetries(count, delay = 0)
468
+ assert { count.instance_of? Fixnum }
469
+ assert { delay.instance_of? Fixnum }
470
+
471
+ @retrycount = count
472
+ @retrydelay = delay
473
+ end
474
+
475
+ # Set request timeout and number of attempts to execute query in case
476
+ # of timeout exceeded (ruby client specific).
477
+ def SetTimeout(timeout, attempts = 3)
478
+ assert { timeout.instance_of? Fixnum }
479
+ assert { attempts.instance_of? Fixnum }
480
+
481
+ @timeout = timeout
482
+ @attempts = attempts
483
+ end
484
+
485
+ # Clear all filters (for multi-queries).
486
+ def ResetFilters
487
+ @filters = []
488
+ @anchor = []
489
+ end
490
+
491
+ # Clear groupby settings (for multi-queries).
492
+ def ResetGroupBy
493
+ @groupby = ''
494
+ @groupfunc = SPH_GROUPBY_DAY
495
+ @groupsort = '@group desc'
496
+ @groupdistinct = ''
497
+ end
498
+
499
+ # Connect to searchd server and run given search query.
500
+ #
501
+ # <tt>query</tt> is query string
502
+
503
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
504
+ # to query all indexes. Accepted characters for index names are letters, numbers,
505
+ # dash, and underscore; everything else is considered a separator. Therefore,
506
+ # all the following calls are valid and will search two indexes:
507
+ #
508
+ # sphinx.Query('test query', 'main delta')
509
+ # sphinx.Query('test query', 'main;delta')
510
+ # sphinx.Query('test query', 'main, delta')
511
+ #
512
+ # Index order matters. If identical IDs are found in two or more indexes,
513
+ # weight and attribute values from the very last matching index will be used
514
+ # for sorting and returning to client. Therefore, in the example above,
515
+ # matches from "delta" index will always "win" over matches from "main".
516
+ #
517
+ # Returns false on failure.
518
+ # Returns hash which has the following keys on success:
519
+ #
520
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
521
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
522
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
523
+ # * <tt>'time'</tt> -- search time
524
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
525
+ def Query(query, index = '*', comment = '')
526
+ assert { @reqs.empty? }
527
+ @reqs = []
528
+
529
+ self.AddQuery(query, index, comment)
530
+ results = self.RunQueries
531
+
532
+ # probably network error; error message should be already filled
533
+ return false unless results.instance_of?(Array)
534
+
535
+ @error = results[0]['error']
536
+ @warning = results[0]['warning']
537
+
538
+ return false if results[0]['status'] == SEARCHD_ERROR
539
+ return results[0]
540
+ end
541
+
542
+ # Add query to batch.
543
+ #
544
+ # Batch queries enable searchd to perform internal optimizations,
545
+ # if possible; and reduce network connection overheads in all cases.
546
+ #
547
+ # For instance, running exactly the same query with different
548
+ # groupby settings will enable searched to perform expensive
549
+ # full-text search and ranking operation only once, but compute
550
+ # multiple groupby results from its output.
551
+ #
552
+ # Parameters are exactly the same as in <tt>Query</tt> call.
553
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
554
+ def AddQuery(query, index = '*', comment = '')
555
+ # build request
556
+
557
+ # mode and limits
558
+ request = Request.new
559
+ request.put_int @offset, @limit, @mode, @ranker, @sort
560
+ request.put_string @sortby
561
+ # query itself
562
+ request.put_string query
563
+ # weights
564
+ request.put_int_array @weights
565
+ # indexes
566
+ request.put_string index
567
+ # id64 range marker
568
+ request.put_int 1
569
+ # id64 range
570
+ request.put_int64 @min_id.to_i, @max_id.to_i
571
+
572
+ # filters
573
+ request.put_int @filters.length
574
+ @filters.each do |filter|
575
+ request.put_string filter['attr']
576
+ request.put_int filter['type']
577
+
578
+ case filter['type']
579
+ when SPH_FILTER_VALUES
580
+ request.put_int_array filter['values']
581
+ when SPH_FILTER_RANGE
582
+ request.put_int filter['min'], filter['max']
583
+ when SPH_FILTER_FLOATRANGE
584
+ request.put_float filter['min'], filter['max']
585
+ else
586
+ raise SphinxInternalError, 'Internal error: unhandled filter type'
587
+ end
588
+ request.put_int filter['exclude'] ? 1 : 0
589
+ end
590
+
591
+ # group-by clause, max-matches count, group-sort clause, cutoff count
592
+ request.put_int @groupfunc
593
+ request.put_string @groupby
594
+ request.put_int @maxmatches
595
+ request.put_string @groupsort
596
+ request.put_int @cutoff, @retrycount, @retrydelay
597
+ request.put_string @groupdistinct
598
+
599
+ # anchor point
600
+ if @anchor.empty?
601
+ request.put_int 0
602
+ else
603
+ request.put_int 1
604
+ request.put_string @anchor['attrlat'], @anchor['attrlong']
605
+ request.put_float @anchor['lat'], @anchor['long']
606
+ end
607
+
608
+ # per-index weights
609
+ request.put_int @indexweights.length
610
+ @indexweights.each do |idx, weight|
611
+ request.put_string idx
612
+ request.put_int weight
613
+ end
614
+
615
+ # max query time
616
+ request.put_int @maxquerytime
617
+
618
+ # per-field weights
619
+ request.put_int @fieldweights.length
620
+ @fieldweights.each do |field, weight|
621
+ request.put_string field
622
+ request.put_int weight
623
+ end
624
+
625
+ request.put_string comment
626
+
627
+ # store request to requests array
628
+ @reqs << request.to_s;
629
+ return @reqs.length - 1
630
+ end
631
+
632
+ # Run queries batch.
633
+ #
634
+ # Returns an array of result sets on success.
635
+ # Returns false on network IO failure.
636
+ #
637
+ # Each result set in returned array is a hash which containts
638
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
639
+ #
640
+ # * <tt>'error'</tt> -- search error for this query
641
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
642
+ def RunQueries
643
+ if @reqs.empty?
644
+ @error = 'No queries defined, issue AddQuery() first'
645
+ return false
646
+ end
647
+
648
+ req = @reqs.join('')
649
+ nreqs = @reqs.length
650
+ @reqs = []
651
+ response = PerformRequest(:search, req, nreqs)
652
+
653
+ # parse response
654
+ begin
655
+ results = []
656
+ ires = 0
657
+ while ires < nreqs
658
+ ires += 1
659
+ result = {}
660
+
661
+ result['error'] = ''
662
+ result['warning'] = ''
663
+
664
+ # extract status
665
+ status = result['status'] = response.get_int
666
+ if status != SEARCHD_OK
667
+ message = response.get_string
668
+ if status == SEARCHD_WARNING
669
+ result['warning'] = message
670
+ else
671
+ result['error'] = message
672
+ results << result
673
+ next
674
+ end
675
+ end
676
+
677
+ # read schema
678
+ fields = []
679
+ attrs = {}
680
+ attrs_names_in_order = []
681
+
682
+ nfields = response.get_int
683
+ while nfields > 0
684
+ nfields -= 1
685
+ fields << response.get_string
686
+ end
687
+ result['fields'] = fields
688
+
689
+ nattrs = response.get_int
690
+ while nattrs > 0
691
+ nattrs -= 1
692
+ attr = response.get_string
693
+ type = response.get_int
694
+ attrs[attr] = type
695
+ attrs_names_in_order << attr
696
+ end
697
+ result['attrs'] = attrs
698
+
699
+ # read match count
700
+ count = response.get_int
701
+ id64 = response.get_int
702
+
703
+ # read matches
704
+ result['matches'] = []
705
+ while count > 0
706
+ count -= 1
707
+
708
+ if id64 != 0
709
+ doc = response.get_int64
710
+ weight = response.get_int
711
+ else
712
+ doc, weight = response.get_ints(2)
713
+ end
714
+
715
+ r = {} # This is a single result put in the result['matches'] array
716
+ r['id'] = doc
717
+ r['weight'] = weight
718
+ attrs_names_in_order.each do |a|
719
+ r['attrs'] ||= {}
720
+
721
+ # handle floats
722
+ if attrs[a] == SPH_ATTR_FLOAT
723
+ r['attrs'][a] = response.get_float
724
+ else
725
+ # handle everything else as unsigned ints
726
+ val = response.get_int
727
+ if (attrs[a] & SPH_ATTR_MULTI) != 0
728
+ r['attrs'][a] = []
729
+ 1.upto(val) do
730
+ r['attrs'][a] << response.get_int
731
+ end
732
+ else
733
+ r['attrs'][a] = val
734
+ end
735
+ end
736
+ end
737
+ result['matches'] << r
738
+ end
739
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
740
+ result['time'] = '%.3f' % (msecs / 1000.0)
741
+
742
+ result['words'] = {}
743
+ while words > 0
744
+ words -= 1
745
+ word = response.get_string
746
+ docs, hits = response.get_ints(2)
747
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
748
+ end
749
+
750
+ results << result
751
+ end
752
+ #rescue EOFError
753
+ # @error = 'incomplete reply'
754
+ # raise SphinxResponseError, @error
755
+ end
756
+
757
+ return results
758
+ end
759
+
760
+ # Connect to searchd server and generate exceprts from given documents.
761
+ #
762
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
763
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
764
+ # for stemming, lexing and case folding
765
+ # * <tt>words</tt> -- a string which contains the words to highlight
766
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
767
+ #
768
+ # You can use following parameters:
769
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
770
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
771
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
772
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
773
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
774
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
775
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
776
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
777
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
778
+ #
779
+ # Returns false on failure.
780
+ # Returns an array of string excerpts on success.
781
+ def BuildExcerpts(docs, index, words, opts = {})
782
+ assert { docs.instance_of? Array }
783
+ assert { index.instance_of? String }
784
+ assert { words.instance_of? String }
785
+ assert { opts.instance_of? Hash }
786
+
787
+ # fixup options
788
+ opts['before_match'] ||= '<b>';
789
+ opts['after_match'] ||= '</b>';
790
+ opts['chunk_separator'] ||= ' ... ';
791
+ opts['limit'] ||= 256;
792
+ opts['around'] ||= 5;
793
+ opts['exact_phrase'] ||= false
794
+ opts['single_passage'] ||= false
795
+ opts['use_boundaries'] ||= false
796
+ opts['weight_order'] ||= false
797
+
798
+ # build request
799
+
800
+ # v.1.0 req
801
+ flags = 1
802
+ flags |= 2 if opts['exact_phrase']
803
+ flags |= 4 if opts['single_passage']
804
+ flags |= 8 if opts['use_boundaries']
805
+ flags |= 16 if opts['weight_order']
806
+
807
+ request = Request.new
808
+ request.put_int 0, flags # mode=0, flags=1 (remove spaces)
809
+ # req index
810
+ request.put_string index
811
+ # req words
812
+ request.put_string words
813
+
814
+ # options
815
+ request.put_string opts['before_match']
816
+ request.put_string opts['after_match']
817
+ request.put_string opts['chunk_separator']
818
+ request.put_int opts['limit'].to_i, opts['around'].to_i
819
+
820
+ # documents
821
+ request.put_int docs.size
822
+ docs.each do |doc|
823
+ assert { doc.instance_of? String }
824
+
825
+ request.put_string doc
826
+ end
827
+
828
+ response = PerformRequest(:excerpt, request)
829
+
830
+ # parse response
831
+ begin
832
+ res = []
833
+ docs.each do |doc|
834
+ res << response.get_string
835
+ end
836
+ rescue EOFError
837
+ @error = 'incomplete reply'
838
+ raise SphinxResponseError, @error
839
+ end
840
+ return res
841
+ end
842
+
843
+ # Connect to searchd server, and generate keyword list for a given query.
844
+ #
845
+ # Returns an array of words on success.
846
+ def BuildKeywords(query, index, hits)
847
+ assert { query.instance_of? String }
848
+ assert { index.instance_of? String }
849
+ assert { hits.instance_of? Boolean }
850
+
851
+ # build request
852
+ request = Request.new
853
+ # v.1.0 req
854
+ request.put_string query # req query
855
+ request.put_string index # req index
856
+ request.put_int hits ? 1 : 0
857
+
858
+ response = PerformRequest(:keywords, request)
859
+
860
+ # parse response
861
+ begin
862
+ res = []
863
+ nwords = response.get_int
864
+ 0.upto(nwords - 1) do |i|
865
+ tokenized = response.get_string
866
+ normalized = response.get_string
867
+
868
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
869
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
870
+
871
+ res << entry
872
+ end
873
+ rescue EOFError
874
+ @error = 'incomplete reply'
875
+ raise SphinxResponseError, @error
876
+ end
877
+
878
+ return res
879
+ end
880
+
881
+ # Batch update given attributes in given rows in given indexes.
882
+ #
883
+ # * +index+ is a name of the index to be updated
884
+ # * +attrs+ is an array of attribute name strings.
885
+ # * +values+ is a hash where key is document id, and value is an array of
886
+ # * +mva+ identifies whether update MVA
887
+ # new attribute values
888
+ #
889
+ # Returns number of actually updated documents (0 or more) on success.
890
+ # Returns -1 on failure.
891
+ #
892
+ # Usage example:
893
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
894
+ def UpdateAttributes(index, attrs, values, mva = false)
895
+ # verify everything
896
+ assert { index.instance_of? String }
897
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
898
+
899
+ assert { attrs.instance_of? Array }
900
+ attrs.each do |attr|
901
+ assert { attr.instance_of? String }
902
+ end
903
+
904
+ assert { values.instance_of? Hash }
905
+ values.each do |id, entry|
906
+ assert { id.instance_of? Fixnum }
907
+ assert { entry.instance_of? Array }
908
+ assert { entry.length == attrs.length }
909
+ entry.each do |v|
910
+ if mva
911
+ assert { v.instance_of? Array }
912
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
913
+ else
914
+ assert { v.instance_of? Fixnum }
915
+ end
916
+ end
917
+ end
918
+
919
+ # build request
920
+ request = Request.new
921
+ request.put_string index
922
+
923
+ request.put_int attrs.length
924
+ for attr in attrs
925
+ request.put_string attr
926
+ request.put_int mva ? 1 : 0
927
+ end
928
+
929
+ request.put_int values.length
930
+ values.each do |id, entry|
931
+ request.put_int64_new id
932
+ if mva
933
+ entry.each { |v| request.put_int_array v }
934
+ else
935
+ request.put_int(*entry)
936
+ end
937
+ end
938
+
939
+ response = PerformRequest(:update, request)
940
+
941
+ # parse response
942
+ begin
943
+ return response.get_int
944
+ rescue EOFError
945
+ @error = 'incomplete reply'
946
+ raise SphinxResponseError, @error
947
+ end
948
+ end
949
+
950
+ protected
951
+
952
+ # Connect to searchd server.
953
+ def Connect
954
+ begin
955
+ sock = TCPSocket.new(@host, @port)
956
+ rescue
957
+ @error = "connection to #{@host}:#{@port} failed"
958
+ raise SphinxConnectError, @error
959
+ end
960
+
961
+ v = sock.recv(4).unpack('N*').first.to_i
962
+ if v < 1
963
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
964
+ raise SphinxConnectError, @error
965
+ end
966
+
967
+ sock.send([1].pack('N'), 0)
968
+ yield sock
969
+ ensure
970
+ sock.close rescue nil
971
+ end
972
+
973
+ # Get and check response packet from searchd server.
974
+ def GetResponse(sock, client_version)
975
+ response = ''
976
+ len = 0
977
+
978
+ header = sock.recv(8)
979
+ if header.length == 8
980
+ status, ver, len = header.unpack('n2N')
981
+ left = len.to_i
982
+ while left > 0 do
983
+ begin
984
+ chunk = sock.recv(left)
985
+ if chunk
986
+ response << chunk
987
+ left -= chunk.length
988
+ end
989
+ rescue EOFError
990
+ break
991
+ end
992
+ end
993
+ end
994
+ sock.close
995
+
996
+ # check response
997
+ read = response.length
998
+ if response.empty? or read != len.to_i
999
+ @error = len \
1000
+ ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
1001
+ : 'received zero-sized searchd response'
1002
+ raise SphinxResponseError, @error
1003
+ end
1004
+
1005
+ # check status
1006
+ if (status == SEARCHD_WARNING)
1007
+ wlen = response[0, 4].unpack('N*').first
1008
+ @warning = response[4, wlen]
1009
+ return response[4 + wlen, response.length - 4 - wlen]
1010
+ end
1011
+
1012
+ if status == SEARCHD_ERROR
1013
+ @error = 'searchd error: ' + response[4, response.length - 4]
1014
+ raise SphinxInternalError, @error
1015
+ end
1016
+
1017
+ if status == SEARCHD_RETRY
1018
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1019
+ raise SphinxTemporaryError, @error
1020
+ end
1021
+
1022
+ unless status == SEARCHD_OK
1023
+ @error = "unknown status code: '#{status}'"
1024
+ raise SphinxUnknownError, @error
1025
+ end
1026
+
1027
+ # check version
1028
+ if ver < client_version
1029
+ @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1030
+ "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1031
+ end
1032
+
1033
+ return response
1034
+ end
1035
+
1036
+ # Connect, send query, get response.
1037
+ def PerformRequest(command, request, additional = nil)
1038
+ cmd = command.to_s.upcase
1039
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1040
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1041
+
1042
+ len = request.to_s.length + (additional != nil ? 4 : 0)
1043
+ header = [command_id, command_ver, len].pack('nnN')
1044
+ header << [additional].pack('N') if additional != nil
1045
+
1046
+ begin
1047
+ SafeExecutor.execute(@timeout, @attempts) do
1048
+ self.Connect do |sock|
1049
+ sock.send(header + request.to_s, 0)
1050
+ response = self.GetResponse(sock, command_ver)
1051
+ return Response.new(response)
1052
+ end
1053
+ end
1054
+ rescue ::Timeout::Error
1055
+ @error = "request timeout: timeout=#@timeout; attempts=#@attempts"
1056
+ raise SphinxRequestTimeout, @error, $@
1057
+ end
1058
+ end
1059
+
1060
+ # :stopdoc:
1061
+ def assert
1062
+ unless $DEBUG
1063
+ def assert; end
1064
+ return
1065
+ end
1066
+ raise 'Assertion failed!' unless yield
1067
+ end
1068
+ # :startdoc:
1069
+ end
1070
+ end