rlibsphinxclient 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/.gitignore +3 -0
  2. data/CHANGELOG.rdoc +18 -0
  3. data/MIT-LICENSE +20 -0
  4. data/README.rdoc +151 -0
  5. data/Rakefile +39 -0
  6. data/VERSION +1 -0
  7. data/ext/extconf.rb +20 -0
  8. data/ext/rlibsphinxclient.i +314 -0
  9. data/ext/rlibsphinxclient_wrap.c +5931 -0
  10. data/init.rb +1 -0
  11. data/lib/sphinx.rb +22 -0
  12. data/lib/sphinx/client.rb +1070 -0
  13. data/lib/sphinx/fast_client.rb +184 -0
  14. data/lib/sphinx/request.rb +49 -0
  15. data/lib/sphinx/response.rb +69 -0
  16. data/lib/sphinx/safe_executor.rb +11 -0
  17. data/lib/sphinx/timeout.rb +9 -0
  18. data/rlibsphinxclient.gemspec +117 -0
  19. data/spec/client_response_spec.rb +135 -0
  20. data/spec/client_spec.rb +548 -0
  21. data/spec/fixtures/default_search.php +8 -0
  22. data/spec/fixtures/default_search_index.php +8 -0
  23. data/spec/fixtures/excerpt_custom.php +11 -0
  24. data/spec/fixtures/excerpt_default.php +8 -0
  25. data/spec/fixtures/excerpt_flags.php +11 -0
  26. data/spec/fixtures/field_weights.php +9 -0
  27. data/spec/fixtures/filter.php +9 -0
  28. data/spec/fixtures/filter_exclude.php +9 -0
  29. data/spec/fixtures/filter_float_range.php +9 -0
  30. data/spec/fixtures/filter_float_range_exclude.php +9 -0
  31. data/spec/fixtures/filter_range.php +9 -0
  32. data/spec/fixtures/filter_range_exclude.php +9 -0
  33. data/spec/fixtures/filter_ranges.php +10 -0
  34. data/spec/fixtures/filters.php +10 -0
  35. data/spec/fixtures/filters_different.php +13 -0
  36. data/spec/fixtures/geo_anchor.php +9 -0
  37. data/spec/fixtures/group_by_attr.php +9 -0
  38. data/spec/fixtures/group_by_attrpair.php +9 -0
  39. data/spec/fixtures/group_by_day.php +9 -0
  40. data/spec/fixtures/group_by_day_sort.php +9 -0
  41. data/spec/fixtures/group_by_month.php +9 -0
  42. data/spec/fixtures/group_by_week.php +9 -0
  43. data/spec/fixtures/group_by_year.php +9 -0
  44. data/spec/fixtures/group_distinct.php +10 -0
  45. data/spec/fixtures/id_range.php +9 -0
  46. data/spec/fixtures/id_range64.php +9 -0
  47. data/spec/fixtures/index_weights.php +9 -0
  48. data/spec/fixtures/keywords.php +8 -0
  49. data/spec/fixtures/limits.php +9 -0
  50. data/spec/fixtures/limits_cutoff.php +9 -0
  51. data/spec/fixtures/limits_max.php +9 -0
  52. data/spec/fixtures/limits_max_cutoff.php +9 -0
  53. data/spec/fixtures/match_all.php +9 -0
  54. data/spec/fixtures/match_any.php +9 -0
  55. data/spec/fixtures/match_boolean.php +9 -0
  56. data/spec/fixtures/match_extended.php +9 -0
  57. data/spec/fixtures/match_extended2.php +9 -0
  58. data/spec/fixtures/match_fullscan.php +9 -0
  59. data/spec/fixtures/match_phrase.php +9 -0
  60. data/spec/fixtures/max_query_time.php +9 -0
  61. data/spec/fixtures/miltiple_queries.php +12 -0
  62. data/spec/fixtures/ranking_bm25.php +9 -0
  63. data/spec/fixtures/ranking_none.php +9 -0
  64. data/spec/fixtures/ranking_proximity_bm25.php +9 -0
  65. data/spec/fixtures/ranking_wordcount.php +9 -0
  66. data/spec/fixtures/retries.php +9 -0
  67. data/spec/fixtures/retries_delay.php +9 -0
  68. data/spec/fixtures/sort_attr_asc.php +9 -0
  69. data/spec/fixtures/sort_attr_desc.php +9 -0
  70. data/spec/fixtures/sort_expr.php +9 -0
  71. data/spec/fixtures/sort_extended.php +9 -0
  72. data/spec/fixtures/sort_relevance.php +9 -0
  73. data/spec/fixtures/sort_time_segments.php +9 -0
  74. data/spec/fixtures/sphinxapi.php +1181 -0
  75. data/spec/fixtures/update_attributes.php +8 -0
  76. data/spec/fixtures/weights.php +9 -0
  77. data/spec/sphinx/sphinx.conf +67 -0
  78. data/spec/sphinx/sphinx_test.sql +86 -0
  79. metadata +133 -0
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require File.dirname(__FILE__) + '/lib/sphinx'
data/lib/sphinx.rb ADDED
@@ -0,0 +1,22 @@
1
+ =begin rdoc
2
+ The generated SWIG module for accessing libsphinxclient's C API.
3
+
4
+ Includes the full set of libsphinxclient static methods (as defined in <tt>$INCLUDE_PATH/libsphinxclient.h</tt>), and classes for the available structs.
5
+
6
+ A number of SWIG typemaps and C helper methods are also defined in <tt>ext/rlibsphinxclient.i</tt>.
7
+
8
+ =end
9
+ module Rlibsphinxclient
10
+ end
11
+
12
+ require 'rlibsphinxclient'
13
+
14
+ module Sphinx
15
+ end
16
+
17
+ require File.dirname(__FILE__) + '/sphinx/fast_client'
18
+ require File.dirname(__FILE__) + '/sphinx/request'
19
+ require File.dirname(__FILE__) + '/sphinx/response'
20
+ require File.dirname(__FILE__) + '/sphinx/client'
21
+ require File.dirname(__FILE__) + '/sphinx/timeout'
22
+ require File.dirname(__FILE__) + '/sphinx/safe_executor'
@@ -0,0 +1,1070 @@
1
+ # = client.rb - Pure Ruby Sphinx client API
2
+ #
3
+ # Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
4
+ # Copyright:: Copyright (c) 2006 - 2009 Dmytro Shteflyuk
5
+ # License:: Distributes under the MIT license.
6
+ # Version:: 0.2.2
7
+ # Website:: http://kpumuk.info/projects/ror-plugins/sphinx
8
+ #
9
+ # This library is distributed under the terms of the MIT license.
10
+
11
+ # == Sphinx Client API
12
+ #
13
+ # The Sphinx Client API is used to communicate with <tt>searchd</tt>
14
+ # daemon and get search results from Sphinx.
15
+ #
16
+ # === Usage
17
+ #
18
+ # begin
19
+ # sphinx = Sphinx::Client.new
20
+ # result = sphinx.Query('test')
21
+ # ids = result['matches'].map { |match| match['id'] }.join(',')
22
+ # posts = Post.find :all, :conditions => "id IN (#{ids})"
23
+ #
24
+ # docs = posts.map(&:body)
25
+ # excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
26
+ # ensure
27
+ # sphinx.destroy
28
+ # end
29
+
30
+ require 'socket'
31
+
32
+ module Sphinx
33
+ # :stopdoc:
34
+
35
+ class SphinxError < StandardError; end
36
+ class SphinxArgumentError < SphinxError; end
37
+ class SphinxConnectError < SphinxError; end
38
+ class SphinxResponseError < SphinxError; end
39
+ class SphinxInternalError < SphinxError; end
40
+ class SphinxTemporaryError < SphinxError; end
41
+ class SphinxUnknownError < SphinxError; end
42
+ class SphinxRequestTimeout < SphinxError; end
43
+
44
+ # :startdoc:
45
+
46
+ # A pure Ruby Sphinx client API.
47
+ class Client
48
+
49
+ # :stopdoc:
50
+
51
+ # Known searchd commands
52
+
53
+ # search command
54
+ SEARCHD_COMMAND_SEARCH = 0
55
+ # excerpt command
56
+ SEARCHD_COMMAND_EXCERPT = 1
57
+ # update command
58
+ SEARCHD_COMMAND_UPDATE = 2
59
+ # keywords command
60
+ SEARCHD_COMMAND_KEYWORDS = 3
61
+
62
+ # Current client-side command implementation versions
63
+
64
+ # search command version
65
+ VER_COMMAND_SEARCH = 0x113
66
+ # excerpt command version
67
+ VER_COMMAND_EXCERPT = 0x100
68
+ # update command version
69
+ VER_COMMAND_UPDATE = 0x102
70
+ # keywords command version
71
+ VER_COMMAND_KEYWORDS = 0x100
72
+
73
+ # Known searchd status codes
74
+
75
+ # general success, command-specific reply follows
76
+ SEARCHD_OK = 0
77
+ # general failure, command-specific reply may follow
78
+ SEARCHD_ERROR = 1
79
+ # temporaty failure, client should retry later
80
+ SEARCHD_RETRY = 2
81
+ # general success, warning message and command-specific reply follow
82
+ SEARCHD_WARNING = 3
83
+
84
+ # :startdoc:
85
+
86
+ # Known match modes
87
+
88
+ # match all query words
89
+ SPH_MATCH_ALL = 0
90
+ # match any query word
91
+ SPH_MATCH_ANY = 1
92
+ # match this exact phrase
93
+ SPH_MATCH_PHRASE = 2
94
+ # match this boolean query
95
+ SPH_MATCH_BOOLEAN = 3
96
+ # match this extended query
97
+ SPH_MATCH_EXTENDED = 4
98
+ # match all document IDs w/o fulltext query, apply filters
99
+ SPH_MATCH_FULLSCAN = 5
100
+ # extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
101
+ SPH_MATCH_EXTENDED2 = 6
102
+
103
+ # Known ranking modes (ext2 only)
104
+
105
+ # default mode, phrase proximity major factor and BM25 minor one
106
+ SPH_RANK_PROXIMITY_BM25 = 0
107
+ # statistical mode, BM25 ranking only (faster but worse quality)
108
+ SPH_RANK_BM25 = 1
109
+ # no ranking, all matches get a weight of 1
110
+ SPH_RANK_NONE = 2
111
+ # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
112
+ SPH_RANK_WORDCOUNT = 3
113
+
114
+ # Known sort modes
115
+
116
+ # sort by document relevance desc, then by date
117
+ SPH_SORT_RELEVANCE = 0
118
+ # sort by document date desc, then by relevance desc
119
+ SPH_SORT_ATTR_DESC = 1
120
+ # sort by document date asc, then by relevance desc
121
+ SPH_SORT_ATTR_ASC = 2
122
+ # sort by time segments (hour/day/week/etc) desc, then by relevance desc
123
+ SPH_SORT_TIME_SEGMENTS = 3
124
+ # sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
125
+ SPH_SORT_EXTENDED = 4
126
+ # sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
127
+ SPH_SORT_EXPR = 5
128
+
129
+ # Known filter types
130
+
131
+ # filter by integer values set
132
+ SPH_FILTER_VALUES = 0
133
+ # filter by integer range
134
+ SPH_FILTER_RANGE = 1
135
+ # filter by float range
136
+ SPH_FILTER_FLOATRANGE = 2
137
+
138
+ # Known attribute types
139
+
140
+ # this attr is just an integer
141
+ SPH_ATTR_INTEGER = 1
142
+ # this attr is a timestamp
143
+ SPH_ATTR_TIMESTAMP = 2
144
+ # this attr is an ordinal string number (integer at search time,
145
+ # specially handled at indexing time)
146
+ SPH_ATTR_ORDINAL = 3
147
+ # this attr is a boolean bit field
148
+ SPH_ATTR_BOOL = 4
149
+ # this attr is a float
150
+ SPH_ATTR_FLOAT = 5
151
+ # this attr has multiple values (0 or more)
152
+ SPH_ATTR_MULTI = 0x40000000
153
+
154
+ # Known grouping functions
155
+
156
+ # group by day
157
+ SPH_GROUPBY_DAY = 0
158
+ # group by week
159
+ SPH_GROUPBY_WEEK = 1
160
+ # group by month
161
+ SPH_GROUPBY_MONTH = 2
162
+ # group by year
163
+ SPH_GROUPBY_YEAR = 3
164
+ # group by attribute value
165
+ SPH_GROUPBY_ATTR = 4
166
+ # group by sequential attrs pair
167
+ SPH_GROUPBY_ATTRPAIR = 5
168
+
169
+ # Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
170
+ def initialize
171
+ # per-client-object settings
172
+ @host = 'localhost' # searchd host (default is "localhost")
173
+ @port = 3312 # searchd port (default is 3312)
174
+
175
+ # per-query settings
176
+ @offset = 0 # how many records to seek from result-set start (default is 0)
177
+ @limit = 20 # how many records to return from result-set starting at offset (default is 20)
178
+ @mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
179
+ @weights = [] # per-field weights (default is 1 for all fields)
180
+ @sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
181
+ @sortby = '' # attribute to sort by (defualt is "")
182
+ @min_id = 0 # min ID to match (default is 0, which means no limit)
183
+ @max_id = 0 # max ID to match (default is 0, which means no limit)
184
+ @filters = [] # search filters
185
+ @groupby = '' # group-by attribute name
186
+ @groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
187
+ @groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
188
+ @groupdistinct = '' # group-by count-distinct attribute
189
+ @maxmatches = 1000 # max matches to retrieve
190
+ @cutoff = 0 # cutoff to stop searching at (default is 0)
191
+ @retrycount = 0 # distributed retries count
192
+ @retrydelay = 0 # distributed retries delay
193
+ @anchor = [] # geographical anchor point
194
+ @indexweights = [] # per-index weights
195
+ @ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
196
+ @maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
197
+ @fieldweights = {} # per-field-name weights
198
+
199
+ # per-reply fields (for single-query case)
200
+ @error = '' # last error message
201
+ @warning = '' # last warning message
202
+
203
+ @reqs = [] # requests storage (for multi-query case)
204
+
205
+ # ruby client specific fields
206
+ @timeout = 5 # stored mbstring encoding
207
+ @attempts = 3 # number of attempts to do request when timeout exceeded
208
+ end
209
+
210
+ # Does nothing.
211
+ def destroy
212
+ end
213
+
214
+ # Get last error message.
215
+ def GetLastError
216
+ @error
217
+ end
218
+
219
+ # Get last warning message.
220
+ def GetLastWarning
221
+ @warning
222
+ end
223
+
224
+ # Set searchd host name (string) and port (integer).
225
+ def SetServer(host, port)
226
+ assert { host.instance_of? String }
227
+ assert { port.instance_of? Fixnum }
228
+
229
+ @host = host
230
+ @port = port
231
+ end
232
+
233
+ # Set offset and count into result set,
234
+ # and optionally set max-matches and cutoff limits.
235
+ def SetLimits(offset, limit, max = 0, cutoff = 0)
236
+ assert { offset.instance_of? Fixnum }
237
+ assert { limit.instance_of? Fixnum }
238
+ assert { max.instance_of? Fixnum }
239
+ assert { offset >= 0 }
240
+ assert { limit > 0 }
241
+ assert { max >= 0 }
242
+
243
+ @offset = offset
244
+ @limit = limit
245
+ @maxmatches = max if max > 0
246
+ @cutoff = cutoff if cutoff > 0
247
+ end
248
+
249
+ # Set maximum query time, in milliseconds, per-index,
250
+ # integer, 0 means "do not limit"
251
+ def SetMaxQueryTime(max)
252
+ assert { max.instance_of? Fixnum }
253
+ assert { max >= 0 }
254
+ @maxquerytime = max
255
+ end
256
+
257
+ # Set matching mode.
258
+ def SetMatchMode(mode)
259
+ assert { mode == SPH_MATCH_ALL \
260
+ || mode == SPH_MATCH_ANY \
261
+ || mode == SPH_MATCH_PHRASE \
262
+ || mode == SPH_MATCH_BOOLEAN \
263
+ || mode == SPH_MATCH_EXTENDED \
264
+ || mode == SPH_MATCH_FULLSCAN \
265
+ || mode == SPH_MATCH_EXTENDED2 }
266
+
267
+ @mode = mode
268
+ end
269
+
270
+ # Set ranking mode.
271
+ def SetRankingMode(ranker)
272
+ assert { ranker == SPH_RANK_PROXIMITY_BM25 \
273
+ || ranker == SPH_RANK_BM25 \
274
+ || ranker == SPH_RANK_NONE \
275
+ || ranker == SPH_RANK_WORDCOUNT }
276
+
277
+ @ranker = ranker
278
+ end
279
+
280
+ # Set matches sorting mode.
281
+ def SetSortMode(mode, sortby = '')
282
+ assert { mode == SPH_SORT_RELEVANCE \
283
+ || mode == SPH_SORT_ATTR_DESC \
284
+ || mode == SPH_SORT_ATTR_ASC \
285
+ || mode == SPH_SORT_TIME_SEGMENTS \
286
+ || mode == SPH_SORT_EXTENDED \
287
+ || mode == SPH_SORT_EXPR }
288
+ assert { sortby.instance_of? String }
289
+ assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
290
+
291
+ @sort = mode
292
+ @sortby = sortby
293
+ end
294
+
295
+ # Bind per-field weights by order.
296
+ #
297
+ # DEPRECATED; use SetFieldWeights() instead.
298
+ def SetWeights(weights)
299
+ assert { weights.instance_of? Array }
300
+ weights.each do |weight|
301
+ assert { weight.instance_of? Fixnum }
302
+ end
303
+
304
+ @weights = weights
305
+ end
306
+
307
+ # Bind per-field weights by name.
308
+ #
309
+ # Takes string (field name) to integer name (field weight) hash as an argument.
310
+ # * Takes precedence over SetWeights().
311
+ # * Unknown names will be silently ignored.
312
+ # * Unbound fields will be silently given a weight of 1.
313
+ def SetFieldWeights(weights)
314
+ assert { weights.instance_of? Hash }
315
+ weights.each do |name, weight|
316
+ assert { name.instance_of? String }
317
+ assert { weight.instance_of? Fixnum }
318
+ end
319
+
320
+ @fieldweights = weights
321
+ end
322
+
323
+ # Bind per-index weights by name.
324
+ def SetIndexWeights(weights)
325
+ assert { weights.instance_of? Hash }
326
+ weights.each do |index, weight|
327
+ assert { index.instance_of? String }
328
+ assert { weight.instance_of? Fixnum }
329
+ end
330
+
331
+ @indexweights = weights
332
+ end
333
+
334
+ # Set IDs range to match.
335
+ #
336
+ # Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
337
+ def SetIDRange(min, max)
338
+ assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
339
+ assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
340
+ assert { min <= max }
341
+
342
+ @min_id = min
343
+ @max_id = max
344
+ end
345
+
346
+ # Set values filter.
347
+ #
348
+ # Only match those records where <tt>attribute</tt> column values
349
+ # are in specified set.
350
+ def SetFilter(attribute, values, exclude = false)
351
+ assert { attribute.instance_of? String }
352
+ assert { values.instance_of? Array }
353
+ assert { !values.empty? }
354
+
355
+ if values.instance_of?(Array) && values.size > 0
356
+ values.each do |value|
357
+ assert { value.instance_of? Fixnum }
358
+ end
359
+
360
+ @filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
361
+ end
362
+ end
363
+
364
+ # Set range filter.
365
+ #
366
+ # Only match those records where <tt>attribute</tt> column value
367
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
368
+ def SetFilterRange(attribute, min, max, exclude = false)
369
+ assert { attribute.instance_of? String }
370
+ assert { min.instance_of? Fixnum }
371
+ assert { max.instance_of? Fixnum }
372
+ assert { min <= max }
373
+
374
+ @filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
375
+ end
376
+
377
+ # Set float range filter.
378
+ #
379
+ # Only match those records where <tt>attribute</tt> column value
380
+ # is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
381
+ def SetFilterFloatRange(attribute, min, max, exclude = false)
382
+ assert { attribute.instance_of? String }
383
+ assert { min.instance_of? Float }
384
+ assert { max.instance_of? Float }
385
+ assert { min <= max }
386
+
387
+ @filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
388
+ end
389
+
390
+ # Setup anchor point for geosphere distance calculations.
391
+ #
392
+ # Required to use <tt>@geodist</tt> in filters and sorting
393
+ # distance will be computed to this point. Latitude and longitude
394
+ # must be in radians.
395
+ #
396
+ # * <tt>attrlat</tt> -- is the name of latitude attribute
397
+ # * <tt>attrlong</tt> -- is the name of longitude attribute
398
+ # * <tt>lat</tt> -- is anchor point latitude, in radians
399
+ # * <tt>long</tt> -- is anchor point longitude, in radians
400
+ def SetGeoAnchor(attrlat, attrlong, lat, long)
401
+ assert { attrlat.instance_of? String }
402
+ assert { attrlong.instance_of? String }
403
+ assert { lat.instance_of? Float }
404
+ assert { long.instance_of? Float }
405
+
406
+ @anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
407
+ end
408
+
409
+ # Set grouping attribute and function.
410
+ #
411
+ # In grouping mode, all matches are assigned to different groups
412
+ # based on grouping function value.
413
+ #
414
+ # Each group keeps track of the total match count, and the best match
415
+ # (in this group) according to current sorting function.
416
+ #
417
+ # The final result set contains one best match per group, with
418
+ # grouping function value and matches count attached.
419
+ #
420
+ # Groups in result set could be sorted by any sorting clause,
421
+ # including both document attributes and the following special
422
+ # internal Sphinx attributes:
423
+ #
424
+ # * @id - match document ID;
425
+ # * @weight, @rank, @relevance - match weight;
426
+ # * @group - groupby function value;
427
+ # * @count - amount of matches in group.
428
+ #
429
+ # the default mode is to sort by groupby value in descending order,
430
+ # ie. by '@group desc'.
431
+ #
432
+ # 'total_found' would contain total amount of matching groups over
433
+ # the whole index.
434
+ #
435
+ # WARNING: grouping is done in fixed memory and thus its results
436
+ # are only approximate; so there might be more groups reported
437
+ # in total_found than actually present. @count might also
438
+ # be underestimated.
439
+ #
440
+ # For example, if sorting by relevance and grouping by "published"
441
+ # attribute with SPH_GROUPBY_DAY function, then the result set will
442
+ # contain one most relevant match per each day when there were any
443
+ # matches published, with day number and per-day match count attached,
444
+ # and sorted by day number in descending order (ie. recent days first).
445
+ def SetGroupBy(attribute, func, groupsort = '@group desc')
446
+ assert { attribute.instance_of? String }
447
+ assert { groupsort.instance_of? String }
448
+ assert { func == SPH_GROUPBY_DAY \
449
+ || func == SPH_GROUPBY_WEEK \
450
+ || func == SPH_GROUPBY_MONTH \
451
+ || func == SPH_GROUPBY_YEAR \
452
+ || func == SPH_GROUPBY_ATTR \
453
+ || func == SPH_GROUPBY_ATTRPAIR }
454
+
455
+ @groupby = attribute
456
+ @groupfunc = func
457
+ @groupsort = groupsort
458
+ end
459
+
460
+ # Set count-distinct attribute for group-by queries.
461
+ def SetGroupDistinct(attribute)
462
+ assert { attribute.instance_of? String }
463
+ @groupdistinct = attribute
464
+ end
465
+
466
+ # Set distributed retries count and delay.
467
+ def SetRetries(count, delay = 0)
468
+ assert { count.instance_of? Fixnum }
469
+ assert { delay.instance_of? Fixnum }
470
+
471
+ @retrycount = count
472
+ @retrydelay = delay
473
+ end
474
+
475
+ # Set request timeout and number of attempts to execute query in case
476
+ # of timeout exceeded (ruby client specific).
477
+ def SetTimeout(timeout, attempts = 3)
478
+ assert { timeout.instance_of? Fixnum }
479
+ assert { attempts.instance_of? Fixnum }
480
+
481
+ @timeout = timeout
482
+ @attempts = attempts
483
+ end
484
+
485
+ # Clear all filters (for multi-queries).
486
+ def ResetFilters
487
+ @filters = []
488
+ @anchor = []
489
+ end
490
+
491
+ # Clear groupby settings (for multi-queries).
492
+ def ResetGroupBy
493
+ @groupby = ''
494
+ @groupfunc = SPH_GROUPBY_DAY
495
+ @groupsort = '@group desc'
496
+ @groupdistinct = ''
497
+ end
498
+
499
+ # Connect to searchd server and run given search query.
500
+ #
501
+ # <tt>query</tt> is query string
502
+
503
+ # <tt>index</tt> is index name (or names) to query. default value is "*" which means
504
+ # to query all indexes. Accepted characters for index names are letters, numbers,
505
+ # dash, and underscore; everything else is considered a separator. Therefore,
506
+ # all the following calls are valid and will search two indexes:
507
+ #
508
+ # sphinx.Query('test query', 'main delta')
509
+ # sphinx.Query('test query', 'main;delta')
510
+ # sphinx.Query('test query', 'main, delta')
511
+ #
512
+ # Index order matters. If identical IDs are found in two or more indexes,
513
+ # weight and attribute values from the very last matching index will be used
514
+ # for sorting and returning to client. Therefore, in the example above,
515
+ # matches from "delta" index will always "win" over matches from "main".
516
+ #
517
+ # Returns false on failure.
518
+ # Returns hash which has the following keys on success:
519
+ #
520
+ # * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
521
+ # * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
522
+ # * <tt>'total_found'</tt> -- total amount of matching documents in index
523
+ # * <tt>'time'</tt> -- search time
524
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
525
+ def Query(query, index = '*', comment = '')
526
+ assert { @reqs.empty? }
527
+ @reqs = []
528
+
529
+ self.AddQuery(query, index, comment)
530
+ results = self.RunQueries
531
+
532
+ # probably network error; error message should be already filled
533
+ return false unless results.instance_of?(Array)
534
+
535
+ @error = results[0]['error']
536
+ @warning = results[0]['warning']
537
+
538
+ return false if results[0]['status'] == SEARCHD_ERROR
539
+ return results[0]
540
+ end
541
+
542
+ # Add query to batch.
543
+ #
544
+ # Batch queries enable searchd to perform internal optimizations,
545
+ # if possible; and reduce network connection overheads in all cases.
546
+ #
547
+ # For instance, running exactly the same query with different
548
+ # groupby settings will enable searched to perform expensive
549
+ # full-text search and ranking operation only once, but compute
550
+ # multiple groupby results from its output.
551
+ #
552
+ # Parameters are exactly the same as in <tt>Query</tt> call.
553
+ # Returns index to results array returned by <tt>RunQueries</tt> call.
554
+ def AddQuery(query, index = '*', comment = '')
555
+ # build request
556
+
557
+ # mode and limits
558
+ request = Request.new
559
+ request.put_int @offset, @limit, @mode, @ranker, @sort
560
+ request.put_string @sortby
561
+ # query itself
562
+ request.put_string query
563
+ # weights
564
+ request.put_int_array @weights
565
+ # indexes
566
+ request.put_string index
567
+ # id64 range marker
568
+ request.put_int 1
569
+ # id64 range
570
+ request.put_int64 @min_id.to_i, @max_id.to_i
571
+
572
+ # filters
573
+ request.put_int @filters.length
574
+ @filters.each do |filter|
575
+ request.put_string filter['attr']
576
+ request.put_int filter['type']
577
+
578
+ case filter['type']
579
+ when SPH_FILTER_VALUES
580
+ request.put_int_array filter['values']
581
+ when SPH_FILTER_RANGE
582
+ request.put_int filter['min'], filter['max']
583
+ when SPH_FILTER_FLOATRANGE
584
+ request.put_float filter['min'], filter['max']
585
+ else
586
+ raise SphinxInternalError, 'Internal error: unhandled filter type'
587
+ end
588
+ request.put_int filter['exclude'] ? 1 : 0
589
+ end
590
+
591
+ # group-by clause, max-matches count, group-sort clause, cutoff count
592
+ request.put_int @groupfunc
593
+ request.put_string @groupby
594
+ request.put_int @maxmatches
595
+ request.put_string @groupsort
596
+ request.put_int @cutoff, @retrycount, @retrydelay
597
+ request.put_string @groupdistinct
598
+
599
+ # anchor point
600
+ if @anchor.empty?
601
+ request.put_int 0
602
+ else
603
+ request.put_int 1
604
+ request.put_string @anchor['attrlat'], @anchor['attrlong']
605
+ request.put_float @anchor['lat'], @anchor['long']
606
+ end
607
+
608
+ # per-index weights
609
+ request.put_int @indexweights.length
610
+ @indexweights.each do |idx, weight|
611
+ request.put_string idx
612
+ request.put_int weight
613
+ end
614
+
615
+ # max query time
616
+ request.put_int @maxquerytime
617
+
618
+ # per-field weights
619
+ request.put_int @fieldweights.length
620
+ @fieldweights.each do |field, weight|
621
+ request.put_string field
622
+ request.put_int weight
623
+ end
624
+
625
+ request.put_string comment
626
+
627
+ # store request to requests array
628
+ @reqs << request.to_s;
629
+ return @reqs.length - 1
630
+ end
631
+
632
+ # Run queries batch.
633
+ #
634
+ # Returns an array of result sets on success.
635
+ # Returns false on network IO failure.
636
+ #
637
+ # Each result set in returned array is a hash which containts
638
+ # the same keys as the hash returned by <tt>Query</tt>, plus:
639
+ #
640
+ # * <tt>'error'</tt> -- search error for this query
641
+ # * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
642
+ def RunQueries
643
+ if @reqs.empty?
644
+ @error = 'No queries defined, issue AddQuery() first'
645
+ return false
646
+ end
647
+
648
+ req = @reqs.join('')
649
+ nreqs = @reqs.length
650
+ @reqs = []
651
+ response = PerformRequest(:search, req, nreqs)
652
+
653
+ # parse response
654
+ begin
655
+ results = []
656
+ ires = 0
657
+ while ires < nreqs
658
+ ires += 1
659
+ result = {}
660
+
661
+ result['error'] = ''
662
+ result['warning'] = ''
663
+
664
+ # extract status
665
+ status = result['status'] = response.get_int
666
+ if status != SEARCHD_OK
667
+ message = response.get_string
668
+ if status == SEARCHD_WARNING
669
+ result['warning'] = message
670
+ else
671
+ result['error'] = message
672
+ results << result
673
+ next
674
+ end
675
+ end
676
+
677
+ # read schema
678
+ fields = []
679
+ attrs = {}
680
+ attrs_names_in_order = []
681
+
682
+ nfields = response.get_int
683
+ while nfields > 0
684
+ nfields -= 1
685
+ fields << response.get_string
686
+ end
687
+ result['fields'] = fields
688
+
689
+ nattrs = response.get_int
690
+ while nattrs > 0
691
+ nattrs -= 1
692
+ attr = response.get_string
693
+ type = response.get_int
694
+ attrs[attr] = type
695
+ attrs_names_in_order << attr
696
+ end
697
+ result['attrs'] = attrs
698
+
699
+ # read match count
700
+ count = response.get_int
701
+ id64 = response.get_int
702
+
703
+ # read matches
704
+ result['matches'] = []
705
+ while count > 0
706
+ count -= 1
707
+
708
+ if id64 != 0
709
+ doc = response.get_int64
710
+ weight = response.get_int
711
+ else
712
+ doc, weight = response.get_ints(2)
713
+ end
714
+
715
+ r = {} # This is a single result put in the result['matches'] array
716
+ r['id'] = doc
717
+ r['weight'] = weight
718
+ attrs_names_in_order.each do |a|
719
+ r['attrs'] ||= {}
720
+
721
+ # handle floats
722
+ if attrs[a] == SPH_ATTR_FLOAT
723
+ r['attrs'][a] = response.get_float
724
+ else
725
+ # handle everything else as unsigned ints
726
+ val = response.get_int
727
+ if (attrs[a] & SPH_ATTR_MULTI) != 0
728
+ r['attrs'][a] = []
729
+ 1.upto(val) do
730
+ r['attrs'][a] << response.get_int
731
+ end
732
+ else
733
+ r['attrs'][a] = val
734
+ end
735
+ end
736
+ end
737
+ result['matches'] << r
738
+ end
739
+ result['total'], result['total_found'], msecs, words = response.get_ints(4)
740
+ result['time'] = '%.3f' % (msecs / 1000.0)
741
+
742
+ result['words'] = {}
743
+ while words > 0
744
+ words -= 1
745
+ word = response.get_string
746
+ docs, hits = response.get_ints(2)
747
+ result['words'][word] = { 'docs' => docs, 'hits' => hits }
748
+ end
749
+
750
+ results << result
751
+ end
752
+ #rescue EOFError
753
+ # @error = 'incomplete reply'
754
+ # raise SphinxResponseError, @error
755
+ end
756
+
757
+ return results
758
+ end
759
+
760
+ # Connect to searchd server and generate exceprts from given documents.
761
+ #
762
+ # * <tt>docs</tt> -- an array of strings which represent the documents' contents
763
+ # * <tt>index</tt> -- a string specifiying the index which settings will be used
764
+ # for stemming, lexing and case folding
765
+ # * <tt>words</tt> -- a string which contains the words to highlight
766
+ # * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
767
+ #
768
+ # You can use following parameters:
769
+ # * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
770
+ # * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
771
+ # * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
772
+ # * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
773
+ # * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
774
+ # * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
775
+ # * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
776
+ # * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
777
+ # * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
778
+ #
779
+ # Returns false on failure.
780
+ # Returns an array of string excerpts on success.
781
+ def BuildExcerpts(docs, index, words, opts = {})
782
+ assert { docs.instance_of? Array }
783
+ assert { index.instance_of? String }
784
+ assert { words.instance_of? String }
785
+ assert { opts.instance_of? Hash }
786
+
787
+ # fixup options
788
+ opts['before_match'] ||= '<b>';
789
+ opts['after_match'] ||= '</b>';
790
+ opts['chunk_separator'] ||= ' ... ';
791
+ opts['limit'] ||= 256;
792
+ opts['around'] ||= 5;
793
+ opts['exact_phrase'] ||= false
794
+ opts['single_passage'] ||= false
795
+ opts['use_boundaries'] ||= false
796
+ opts['weight_order'] ||= false
797
+
798
+ # build request
799
+
800
+ # v.1.0 req
801
+ flags = 1
802
+ flags |= 2 if opts['exact_phrase']
803
+ flags |= 4 if opts['single_passage']
804
+ flags |= 8 if opts['use_boundaries']
805
+ flags |= 16 if opts['weight_order']
806
+
807
+ request = Request.new
808
+ request.put_int 0, flags # mode=0, flags=1 (remove spaces)
809
+ # req index
810
+ request.put_string index
811
+ # req words
812
+ request.put_string words
813
+
814
+ # options
815
+ request.put_string opts['before_match']
816
+ request.put_string opts['after_match']
817
+ request.put_string opts['chunk_separator']
818
+ request.put_int opts['limit'].to_i, opts['around'].to_i
819
+
820
+ # documents
821
+ request.put_int docs.size
822
+ docs.each do |doc|
823
+ assert { doc.instance_of? String }
824
+
825
+ request.put_string doc
826
+ end
827
+
828
+ response = PerformRequest(:excerpt, request)
829
+
830
+ # parse response
831
+ begin
832
+ res = []
833
+ docs.each do |doc|
834
+ res << response.get_string
835
+ end
836
+ rescue EOFError
837
+ @error = 'incomplete reply'
838
+ raise SphinxResponseError, @error
839
+ end
840
+ return res
841
+ end
842
+
843
+ # Connect to searchd server, and generate keyword list for a given query.
844
+ #
845
+ # Returns an array of words on success.
846
+ def BuildKeywords(query, index, hits)
847
+ assert { query.instance_of? String }
848
+ assert { index.instance_of? String }
849
+ assert { hits.instance_of? Boolean }
850
+
851
+ # build request
852
+ request = Request.new
853
+ # v.1.0 req
854
+ request.put_string query # req query
855
+ request.put_string index # req index
856
+ request.put_int hits ? 1 : 0
857
+
858
+ response = PerformRequest(:keywords, request)
859
+
860
+ # parse response
861
+ begin
862
+ res = []
863
+ nwords = response.get_int
864
+ 0.upto(nwords - 1) do |i|
865
+ tokenized = response.get_string
866
+ normalized = response.get_string
867
+
868
+ entry = { 'tokenized' => tokenized, 'normalized' => normalized }
869
+ entry['docs'], entry['hits'] = response.get_ints(2) if hits
870
+
871
+ res << entry
872
+ end
873
+ rescue EOFError
874
+ @error = 'incomplete reply'
875
+ raise SphinxResponseError, @error
876
+ end
877
+
878
+ return res
879
+ end
880
+
881
+ # Batch update given attributes in given rows in given indexes.
882
+ #
883
+ # * +index+ is a name of the index to be updated
884
+ # * +attrs+ is an array of attribute name strings.
885
+ # * +values+ is a hash where key is document id, and value is an array of
886
+ # * +mva+ identifies whether update MVA
887
+ # new attribute values
888
+ #
889
+ # Returns number of actually updated documents (0 or more) on success.
890
+ # Returns -1 on failure.
891
+ #
892
+ # Usage example:
893
+ # sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
894
+ def UpdateAttributes(index, attrs, values, mva = false)
895
+ # verify everything
896
+ assert { index.instance_of? String }
897
+ assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
898
+
899
+ assert { attrs.instance_of? Array }
900
+ attrs.each do |attr|
901
+ assert { attr.instance_of? String }
902
+ end
903
+
904
+ assert { values.instance_of? Hash }
905
+ values.each do |id, entry|
906
+ assert { id.instance_of? Fixnum }
907
+ assert { entry.instance_of? Array }
908
+ assert { entry.length == attrs.length }
909
+ entry.each do |v|
910
+ if mva
911
+ assert { v.instance_of? Array }
912
+ v.each { |vv| assert { vv.instance_of? Fixnum } }
913
+ else
914
+ assert { v.instance_of? Fixnum }
915
+ end
916
+ end
917
+ end
918
+
919
+ # build request
920
+ request = Request.new
921
+ request.put_string index
922
+
923
+ request.put_int attrs.length
924
+ for attr in attrs
925
+ request.put_string attr
926
+ request.put_int mva ? 1 : 0
927
+ end
928
+
929
+ request.put_int values.length
930
+ values.each do |id, entry|
931
+ request.put_int64_new id
932
+ if mva
933
+ entry.each { |v| request.put_int_array v }
934
+ else
935
+ request.put_int(*entry)
936
+ end
937
+ end
938
+
939
+ response = PerformRequest(:update, request)
940
+
941
+ # parse response
942
+ begin
943
+ return response.get_int
944
+ rescue EOFError
945
+ @error = 'incomplete reply'
946
+ raise SphinxResponseError, @error
947
+ end
948
+ end
949
+
950
+ protected
951
+
952
+ # Connect to searchd server.
953
+ def Connect
954
+ begin
955
+ sock = TCPSocket.new(@host, @port)
956
+ rescue
957
+ @error = "connection to #{@host}:#{@port} failed"
958
+ raise SphinxConnectError, @error
959
+ end
960
+
961
+ v = sock.recv(4).unpack('N*').first.to_i
962
+ if v < 1
963
+ @error = "expected searchd protocol version 1+, got version '#{v}'"
964
+ raise SphinxConnectError, @error
965
+ end
966
+
967
+ sock.send([1].pack('N'), 0)
968
+ yield sock
969
+ ensure
970
+ sock.close rescue nil
971
+ end
972
+
973
+ # Get and check response packet from searchd server.
974
+ def GetResponse(sock, client_version)
975
+ response = ''
976
+ len = 0
977
+
978
+ header = sock.recv(8)
979
+ if header.length == 8
980
+ status, ver, len = header.unpack('n2N')
981
+ left = len.to_i
982
+ while left > 0 do
983
+ begin
984
+ chunk = sock.recv(left)
985
+ if chunk
986
+ response << chunk
987
+ left -= chunk.length
988
+ end
989
+ rescue EOFError
990
+ break
991
+ end
992
+ end
993
+ end
994
+ sock.close
995
+
996
+ # check response
997
+ read = response.length
998
+ if response.empty? or read != len.to_i
999
+ @error = len \
1000
+ ? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
1001
+ : 'received zero-sized searchd response'
1002
+ raise SphinxResponseError, @error
1003
+ end
1004
+
1005
+ # check status
1006
+ if (status == SEARCHD_WARNING)
1007
+ wlen = response[0, 4].unpack('N*').first
1008
+ @warning = response[4, wlen]
1009
+ return response[4 + wlen, response.length - 4 - wlen]
1010
+ end
1011
+
1012
+ if status == SEARCHD_ERROR
1013
+ @error = 'searchd error: ' + response[4, response.length - 4]
1014
+ raise SphinxInternalError, @error
1015
+ end
1016
+
1017
+ if status == SEARCHD_RETRY
1018
+ @error = 'temporary searchd error: ' + response[4, response.length - 4]
1019
+ raise SphinxTemporaryError, @error
1020
+ end
1021
+
1022
+ unless status == SEARCHD_OK
1023
+ @error = "unknown status code: '#{status}'"
1024
+ raise SphinxUnknownError, @error
1025
+ end
1026
+
1027
+ # check version
1028
+ if ver < client_version
1029
+ @warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
1030
+ "v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
1031
+ end
1032
+
1033
+ return response
1034
+ end
1035
+
1036
+ # Connect, send query, get response.
1037
+ def PerformRequest(command, request, additional = nil)
1038
+ cmd = command.to_s.upcase
1039
+ command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
1040
+ command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
1041
+
1042
+ len = request.to_s.length + (additional != nil ? 4 : 0)
1043
+ header = [command_id, command_ver, len].pack('nnN')
1044
+ header << [additional].pack('N') if additional != nil
1045
+
1046
+ begin
1047
+ SafeExecutor.execute(@timeout, @attempts) do
1048
+ self.Connect do |sock|
1049
+ sock.send(header + request.to_s, 0)
1050
+ response = self.GetResponse(sock, command_ver)
1051
+ return Response.new(response)
1052
+ end
1053
+ end
1054
+ rescue ::Timeout::Error
1055
+ @error = "request timeout: timeout=#@timeout; attempts=#@attempts"
1056
+ raise SphinxRequestTimeout, @error, $@
1057
+ end
1058
+ end
1059
+
1060
+ # :stopdoc:
1061
+ def assert
1062
+ unless $DEBUG
1063
+ def assert; end
1064
+ return
1065
+ end
1066
+ raise 'Assertion failed!' unless yield
1067
+ end
1068
+ # :startdoc:
1069
+ end
1070
+ end