model_set 0.10.6
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +39 -0
- data/VERSION.yml +5 -0
- data/lib/model_set/conditioned.rb +33 -0
- data/lib/model_set/conditions.rb +103 -0
- data/lib/model_set/query.rb +132 -0
- data/lib/model_set/raw_query.rb +41 -0
- data/lib/model_set/raw_sql_query.rb +19 -0
- data/lib/model_set/set_query.rb +34 -0
- data/lib/model_set/solr_query.rb +70 -0
- data/lib/model_set/sphinx_query.rb +206 -0
- data/lib/model_set/sql_base_query.rb +52 -0
- data/lib/model_set/sql_query.rb +109 -0
- data/lib/model_set.rb +743 -0
- data/lib/multi_set.rb +67 -0
- data/test/model_set_test.rb +329 -0
- data/test/multi_set_test.rb +65 -0
- data/test/test_helper.rb +23 -0
- data/vendor/sphinx_client/README.rdoc +41 -0
- data/vendor/sphinx_client/Rakefile +21 -0
- data/vendor/sphinx_client/init.rb +1 -0
- data/vendor/sphinx_client/install.rb +5 -0
- data/vendor/sphinx_client/lib/sphinx/client.rb +1093 -0
- data/vendor/sphinx_client/lib/sphinx/request.rb +50 -0
- data/vendor/sphinx_client/lib/sphinx/response.rb +69 -0
- data/vendor/sphinx_client/lib/sphinx.rb +6 -0
- data/vendor/sphinx_client/spec/client_response_spec.rb +112 -0
- data/vendor/sphinx_client/spec/client_spec.rb +469 -0
- data/vendor/sphinx_client/spec/fixtures/default_search.php +8 -0
- data/vendor/sphinx_client/spec/fixtures/default_search_index.php +8 -0
- data/vendor/sphinx_client/spec/fixtures/excerpt_custom.php +11 -0
- data/vendor/sphinx_client/spec/fixtures/excerpt_default.php +8 -0
- data/vendor/sphinx_client/spec/fixtures/excerpt_flags.php +11 -0
- data/vendor/sphinx_client/spec/fixtures/field_weights.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter_exclude.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter_float_range.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter_float_range_exclude.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter_range.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter_range_exclude.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/filter_range_int64.php +10 -0
- data/vendor/sphinx_client/spec/fixtures/filter_ranges.php +10 -0
- data/vendor/sphinx_client/spec/fixtures/filters.php +10 -0
- data/vendor/sphinx_client/spec/fixtures/filters_different.php +13 -0
- data/vendor/sphinx_client/spec/fixtures/geo_anchor.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_attr.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_attrpair.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_day.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_day_sort.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_month.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_week.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_by_year.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/group_distinct.php +10 -0
- data/vendor/sphinx_client/spec/fixtures/id_range.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/id_range64.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/index_weights.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/keywords.php +8 -0
- data/vendor/sphinx_client/spec/fixtures/limits.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/limits_cutoff.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/limits_max.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/limits_max_cutoff.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_all.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_any.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_boolean.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_extended.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_extended2.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_fullscan.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/match_phrase.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/max_query_time.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/miltiple_queries.php +12 -0
- data/vendor/sphinx_client/spec/fixtures/ranking_bm25.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/ranking_none.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/ranking_proximity.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/ranking_proximity_bm25.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/ranking_wordcount.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/retries.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/retries_delay.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/select.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/set_override.php +11 -0
- data/vendor/sphinx_client/spec/fixtures/sort_attr_asc.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/sort_attr_desc.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/sort_expr.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/sort_extended.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/sort_relevance.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/sort_time_segments.php +9 -0
- data/vendor/sphinx_client/spec/fixtures/sphinxapi.php +1269 -0
- data/vendor/sphinx_client/spec/fixtures/update_attributes.php +8 -0
- data/vendor/sphinx_client/spec/fixtures/update_attributes_mva.php +8 -0
- data/vendor/sphinx_client/spec/fixtures/weights.php +9 -0
- data/vendor/sphinx_client/spec/sphinx/sphinx-id64.conf +67 -0
- data/vendor/sphinx_client/spec/sphinx/sphinx.conf +67 -0
- data/vendor/sphinx_client/spec/sphinx/sphinx_test.sql +86 -0
- data/vendor/sphinx_client/sphinx.yml.tpl +3 -0
- data/vendor/sphinx_client/tasks/sphinx.rake +75 -0
- metadata +151 -0
@@ -0,0 +1,1093 @@
|
|
1
|
+
# = client.rb - Sphinx Client API
|
2
|
+
#
|
3
|
+
# Author:: Dmytro Shteflyuk <mailto:kpumuk@kpumuk.info>.
|
4
|
+
# Copyright:: Copyright (c) 2006 - 2008 Dmytro Shteflyuk
|
5
|
+
# License:: Distributes under the same terms as Ruby
|
6
|
+
# Version:: 0.9.9-r1299
|
7
|
+
# Website:: http://kpumuk.info/projects/ror-plugins/sphinx
|
8
|
+
#
|
9
|
+
# This library is distributed under the terms of the Ruby license.
|
10
|
+
# You can freely distribute/modify this library.
|
11
|
+
|
12
|
+
# ==Sphinx Client API
|
13
|
+
#
|
14
|
+
# The Sphinx Client API is used to communicate with <tt>searchd</tt>
|
15
|
+
# daemon and get search results from Sphinx.
|
16
|
+
#
|
17
|
+
# ===Usage
|
18
|
+
#
|
19
|
+
# sphinx = Sphinx::Client.new
|
20
|
+
# result = sphinx.Query('test')
|
21
|
+
# ids = result['matches'].map { |match| match['id'] }.join(',')
|
22
|
+
# posts = Post.find :all, :conditions => "id IN (#{ids})"
|
23
|
+
#
|
24
|
+
# docs = posts.map(&:body)
|
25
|
+
# excerpts = sphinx.BuildExcerpts(docs, 'index', 'test')
|
26
|
+
|
27
|
+
require 'socket'
|
28
|
+
|
29
|
+
module Sphinx
|
30
|
+
# :stopdoc:
|
31
|
+
|
32
|
+
class SphinxError < StandardError; end
|
33
|
+
class SphinxArgumentError < SphinxError; end
|
34
|
+
class SphinxConnectError < SphinxError; end
|
35
|
+
class SphinxResponseError < SphinxError; end
|
36
|
+
class SphinxInternalError < SphinxError; end
|
37
|
+
class SphinxTemporaryError < SphinxError; end
|
38
|
+
class SphinxUnknownError < SphinxError; end
|
39
|
+
|
40
|
+
# :startdoc:
|
41
|
+
|
42
|
+
class Client
|
43
|
+
|
44
|
+
# :stopdoc:
|
45
|
+
|
46
|
+
# Known searchd commands
|
47
|
+
|
48
|
+
# search command
|
49
|
+
SEARCHD_COMMAND_SEARCH = 0
|
50
|
+
# excerpt command
|
51
|
+
SEARCHD_COMMAND_EXCERPT = 1
|
52
|
+
# update command
|
53
|
+
SEARCHD_COMMAND_UPDATE = 2
|
54
|
+
# keywords command
|
55
|
+
SEARCHD_COMMAND_KEYWORDS = 3
|
56
|
+
|
57
|
+
# Current client-side command implementation versions
|
58
|
+
|
59
|
+
# search command version
|
60
|
+
VER_COMMAND_SEARCH = 0x116
|
61
|
+
# excerpt command version
|
62
|
+
VER_COMMAND_EXCERPT = 0x100
|
63
|
+
# update command version
|
64
|
+
VER_COMMAND_UPDATE = 0x102
|
65
|
+
# keywords command version
|
66
|
+
VER_COMMAND_KEYWORDS = 0x100
|
67
|
+
|
68
|
+
# Known searchd status codes
|
69
|
+
|
70
|
+
# general success, command-specific reply follows
|
71
|
+
SEARCHD_OK = 0
|
72
|
+
# general failure, command-specific reply may follow
|
73
|
+
SEARCHD_ERROR = 1
|
74
|
+
# temporaty failure, client should retry later
|
75
|
+
SEARCHD_RETRY = 2
|
76
|
+
# general success, warning message and command-specific reply follow
|
77
|
+
SEARCHD_WARNING = 3
|
78
|
+
|
79
|
+
# :startdoc:
|
80
|
+
|
81
|
+
# Known match modes
|
82
|
+
|
83
|
+
# match all query words
|
84
|
+
SPH_MATCH_ALL = 0
|
85
|
+
# match any query word
|
86
|
+
SPH_MATCH_ANY = 1
|
87
|
+
# match this exact phrase
|
88
|
+
SPH_MATCH_PHRASE = 2
|
89
|
+
# match this boolean query
|
90
|
+
SPH_MATCH_BOOLEAN = 3
|
91
|
+
# match this extended query
|
92
|
+
SPH_MATCH_EXTENDED = 4
|
93
|
+
# match all document IDs w/o fulltext query, apply filters
|
94
|
+
SPH_MATCH_FULLSCAN = 5
|
95
|
+
# extended engine V2 (TEMPORARY, WILL BE REMOVED IN 0.9.8-RELEASE)
|
96
|
+
SPH_MATCH_EXTENDED2 = 6
|
97
|
+
|
98
|
+
# Known ranking modes (ext2 only)
|
99
|
+
|
100
|
+
# default mode, phrase proximity major factor and BM25 minor one
|
101
|
+
SPH_RANK_PROXIMITY_BM25 = 0
|
102
|
+
# statistical mode, BM25 ranking only (faster but worse quality)
|
103
|
+
SPH_RANK_BM25 = 1
|
104
|
+
# no ranking, all matches get a weight of 1
|
105
|
+
SPH_RANK_NONE = 2
|
106
|
+
# simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
|
107
|
+
SPH_RANK_WORDCOUNT = 3
|
108
|
+
# phrase proximity
|
109
|
+
SPH_RANK_PROXIMITY = 4
|
110
|
+
|
111
|
+
# Known sort modes
|
112
|
+
|
113
|
+
# sort by document relevance desc, then by date
|
114
|
+
SPH_SORT_RELEVANCE = 0
|
115
|
+
# sort by document date desc, then by relevance desc
|
116
|
+
SPH_SORT_ATTR_DESC = 1
|
117
|
+
# sort by document date asc, then by relevance desc
|
118
|
+
SPH_SORT_ATTR_ASC = 2
|
119
|
+
# sort by time segments (hour/day/week/etc) desc, then by relevance desc
|
120
|
+
SPH_SORT_TIME_SEGMENTS = 3
|
121
|
+
# sort by SQL-like expression (eg. "@relevance DESC, price ASC, @id DESC")
|
122
|
+
SPH_SORT_EXTENDED = 4
|
123
|
+
# sort by arithmetic expression in descending order (eg. "@id + max(@weight,1000)*boost + log(price)")
|
124
|
+
SPH_SORT_EXPR = 5
|
125
|
+
|
126
|
+
# Known filter types
|
127
|
+
|
128
|
+
# filter by integer values set
|
129
|
+
SPH_FILTER_VALUES = 0
|
130
|
+
# filter by integer range
|
131
|
+
SPH_FILTER_RANGE = 1
|
132
|
+
# filter by float range
|
133
|
+
SPH_FILTER_FLOATRANGE = 2
|
134
|
+
|
135
|
+
# Known attribute types
|
136
|
+
|
137
|
+
# this attr is just an integer
|
138
|
+
SPH_ATTR_INTEGER = 1
|
139
|
+
# this attr is a timestamp
|
140
|
+
SPH_ATTR_TIMESTAMP = 2
|
141
|
+
# this attr is an ordinal string number (integer at search time,
|
142
|
+
# specially handled at indexing time)
|
143
|
+
SPH_ATTR_ORDINAL = 3
|
144
|
+
# this attr is a boolean bit field
|
145
|
+
SPH_ATTR_BOOL = 4
|
146
|
+
# this attr is a float
|
147
|
+
SPH_ATTR_FLOAT = 5
|
148
|
+
# signed 64-bit integer
|
149
|
+
SPH_ATTR_BIGINT = 6
|
150
|
+
# this attr has multiple values (0 or more)
|
151
|
+
SPH_ATTR_MULTI = 0x40000000
|
152
|
+
|
153
|
+
# Known grouping functions
|
154
|
+
|
155
|
+
# group by day
|
156
|
+
SPH_GROUPBY_DAY = 0
|
157
|
+
# group by week
|
158
|
+
SPH_GROUPBY_WEEK = 1
|
159
|
+
# group by month
|
160
|
+
SPH_GROUPBY_MONTH = 2
|
161
|
+
# group by year
|
162
|
+
SPH_GROUPBY_YEAR = 3
|
163
|
+
# group by attribute value
|
164
|
+
SPH_GROUPBY_ATTR = 4
|
165
|
+
# group by sequential attrs pair
|
166
|
+
SPH_GROUPBY_ATTRPAIR = 5
|
167
|
+
|
168
|
+
# Constructs the <tt>Sphinx::Client</tt> object and sets options to their default values.
|
169
|
+
def initialize
|
170
|
+
# per-client-object settings
|
171
|
+
@host = 'localhost' # searchd host (default is "localhost")
|
172
|
+
@port = 3312 # searchd port (default is 3312)
|
173
|
+
|
174
|
+
# per-query settings
|
175
|
+
@offset = 0 # how many records to seek from result-set start (default is 0)
|
176
|
+
@limit = 20 # how many records to return from result-set starting at offset (default is 20)
|
177
|
+
@mode = SPH_MATCH_ALL # query matching mode (default is SPH_MATCH_ALL)
|
178
|
+
@weights = [] # per-field weights (default is 1 for all fields)
|
179
|
+
@sort = SPH_SORT_RELEVANCE # match sorting mode (default is SPH_SORT_RELEVANCE)
|
180
|
+
@sortby = '' # attribute to sort by (defualt is "")
|
181
|
+
@min_id = 0 # min ID to match (default is 0, which means no limit)
|
182
|
+
@max_id = 0 # max ID to match (default is 0, which means no limit)
|
183
|
+
@filters = [] # search filters
|
184
|
+
@groupby = '' # group-by attribute name
|
185
|
+
@groupfunc = SPH_GROUPBY_DAY # function to pre-process group-by attribute value with
|
186
|
+
@groupsort = '@group desc' # group-by sorting clause (to sort groups in result set with)
|
187
|
+
@groupdistinct = '' # group-by count-distinct attribute
|
188
|
+
@maxmatches = 1000 # max matches to retrieve
|
189
|
+
@cutoff = 0 # cutoff to stop searching at (default is 0)
|
190
|
+
@retrycount = 0 # distributed retries count
|
191
|
+
@retrydelay = 0 # distributed retries delay
|
192
|
+
@anchor = [] # geographical anchor point
|
193
|
+
@indexweights = [] # per-index weights
|
194
|
+
@ranker = SPH_RANK_PROXIMITY_BM25 # ranking mode (default is SPH_RANK_PROXIMITY_BM25)
|
195
|
+
@maxquerytime = 0 # max query time, milliseconds (default is 0, do not limit)
|
196
|
+
@fieldweights = {} # per-field-name weights
|
197
|
+
@overrides = [] # per-query attribute values overrides
|
198
|
+
@select = '*' # select-list (attributes or expressions, with optional aliases)
|
199
|
+
|
200
|
+
# per-reply fields (for single-query case)
|
201
|
+
@error = '' # last error message
|
202
|
+
@warning = '' # last warning message
|
203
|
+
|
204
|
+
@reqs = [] # requests storage (for multi-query case)
|
205
|
+
@mbenc = '' # stored mbstring encoding
|
206
|
+
end
|
207
|
+
|
208
|
+
# Get last error message.
|
209
|
+
def GetLastError
|
210
|
+
@error
|
211
|
+
end
|
212
|
+
|
213
|
+
# Get last warning message.
|
214
|
+
def GetLastWarning
|
215
|
+
@warning
|
216
|
+
end
|
217
|
+
|
218
|
+
# Set searchd host name (string) and port (integer).
|
219
|
+
def SetServer(host, port)
|
220
|
+
assert { host.instance_of? String }
|
221
|
+
assert { port.instance_of? Fixnum }
|
222
|
+
|
223
|
+
@host = host
|
224
|
+
@port = port
|
225
|
+
end
|
226
|
+
|
227
|
+
# Set offset and count into result set,
|
228
|
+
# and optionally set max-matches and cutoff limits.
|
229
|
+
def SetLimits(offset, limit, max = 0, cutoff = 0)
|
230
|
+
assert { offset.instance_of? Fixnum }
|
231
|
+
assert { limit.instance_of? Fixnum }
|
232
|
+
assert { max.instance_of? Fixnum }
|
233
|
+
assert { offset >= 0 }
|
234
|
+
assert { limit > 0 }
|
235
|
+
assert { max >= 0 }
|
236
|
+
|
237
|
+
@offset = offset
|
238
|
+
@limit = limit
|
239
|
+
@maxmatches = max if max > 0
|
240
|
+
@cutoff = cutoff if cutoff > 0
|
241
|
+
end
|
242
|
+
|
243
|
+
# Set maximum query time, in milliseconds, per-index,
|
244
|
+
# integer, 0 means "do not limit"
|
245
|
+
def SetMaxQueryTime(max)
|
246
|
+
assert { max.instance_of? Fixnum }
|
247
|
+
assert { max >= 0 }
|
248
|
+
@maxquerytime = max
|
249
|
+
end
|
250
|
+
|
251
|
+
# Set matching mode.
|
252
|
+
def SetMatchMode(mode)
|
253
|
+
assert { mode == SPH_MATCH_ALL \
|
254
|
+
|| mode == SPH_MATCH_ANY \
|
255
|
+
|| mode == SPH_MATCH_PHRASE \
|
256
|
+
|| mode == SPH_MATCH_BOOLEAN \
|
257
|
+
|| mode == SPH_MATCH_EXTENDED \
|
258
|
+
|| mode == SPH_MATCH_FULLSCAN \
|
259
|
+
|| mode == SPH_MATCH_EXTENDED2 }
|
260
|
+
|
261
|
+
@mode = mode
|
262
|
+
end
|
263
|
+
|
264
|
+
# Set ranking mode.
|
265
|
+
def SetRankingMode(ranker)
|
266
|
+
assert { ranker == SPH_RANK_PROXIMITY_BM25 \
|
267
|
+
|| ranker == SPH_RANK_BM25 \
|
268
|
+
|| ranker == SPH_RANK_NONE \
|
269
|
+
|| ranker == SPH_RANK_WORDCOUNT \
|
270
|
+
|| ranker == SPH_RANK_PROXIMITY }
|
271
|
+
|
272
|
+
@ranker = ranker
|
273
|
+
end
|
274
|
+
|
275
|
+
# Set matches sorting mode.
|
276
|
+
def SetSortMode(mode, sortby = '')
|
277
|
+
assert { mode == SPH_SORT_RELEVANCE \
|
278
|
+
|| mode == SPH_SORT_ATTR_DESC \
|
279
|
+
|| mode == SPH_SORT_ATTR_ASC \
|
280
|
+
|| mode == SPH_SORT_TIME_SEGMENTS \
|
281
|
+
|| mode == SPH_SORT_EXTENDED \
|
282
|
+
|| mode == SPH_SORT_EXPR }
|
283
|
+
assert { sortby.instance_of? String }
|
284
|
+
assert { mode == SPH_SORT_RELEVANCE || !sortby.empty? }
|
285
|
+
|
286
|
+
@sort = mode
|
287
|
+
@sortby = sortby
|
288
|
+
end
|
289
|
+
|
290
|
+
# Bind per-field weights by order.
|
291
|
+
#
|
292
|
+
# DEPRECATED; use SetFieldWeights() instead.
|
293
|
+
def SetWeights(weights)
|
294
|
+
assert { weights.instance_of? Array }
|
295
|
+
weights.each do |weight|
|
296
|
+
assert { weight.instance_of? Fixnum }
|
297
|
+
end
|
298
|
+
|
299
|
+
@weights = weights
|
300
|
+
end
|
301
|
+
|
302
|
+
# Bind per-field weights by name.
|
303
|
+
#
|
304
|
+
# Takes string (field name) to integer name (field weight) hash as an argument.
|
305
|
+
# * Takes precedence over SetWeights().
|
306
|
+
# * Unknown names will be silently ignored.
|
307
|
+
# * Unbound fields will be silently given a weight of 1.
|
308
|
+
def SetFieldWeights(weights)
|
309
|
+
assert { weights.instance_of? Hash }
|
310
|
+
weights.each do |name, weight|
|
311
|
+
assert { name.instance_of? String }
|
312
|
+
assert { weight.instance_of? Fixnum }
|
313
|
+
end
|
314
|
+
|
315
|
+
@fieldweights = weights
|
316
|
+
end
|
317
|
+
|
318
|
+
# Bind per-index weights by name.
|
319
|
+
def SetIndexWeights(weights)
|
320
|
+
assert { weights.instance_of? Hash }
|
321
|
+
weights.each do |index, weight|
|
322
|
+
assert { index.instance_of? String }
|
323
|
+
assert { weight.instance_of? Fixnum }
|
324
|
+
end
|
325
|
+
|
326
|
+
@indexweights = weights
|
327
|
+
end
|
328
|
+
|
329
|
+
# Set IDs range to match.
|
330
|
+
#
|
331
|
+
# Only match records if document ID is beetwen <tt>min_id</tt> and <tt>max_id</tt> (inclusive).
|
332
|
+
def SetIDRange(min, max)
|
333
|
+
assert { min.instance_of?(Fixnum) or min.instance_of?(Bignum) }
|
334
|
+
assert { max.instance_of?(Fixnum) or max.instance_of?(Bignum) }
|
335
|
+
assert { min <= max }
|
336
|
+
|
337
|
+
@min_id = min
|
338
|
+
@max_id = max
|
339
|
+
end
|
340
|
+
|
341
|
+
# Set values filter.
|
342
|
+
#
|
343
|
+
# Only match those records where <tt>attribute</tt> column values
|
344
|
+
# are in specified set.
|
345
|
+
def SetFilter(attribute, values, exclude = false)
|
346
|
+
assert { attribute.instance_of? String }
|
347
|
+
assert { values.instance_of? Array }
|
348
|
+
assert { !values.empty? }
|
349
|
+
|
350
|
+
if values.instance_of?(Array) && values.size > 0
|
351
|
+
values.each do |value|
|
352
|
+
assert { value.instance_of? Fixnum or value.instance_of? Bignum }
|
353
|
+
end
|
354
|
+
|
355
|
+
@filters << { 'type' => SPH_FILTER_VALUES, 'attr' => attribute, 'exclude' => exclude, 'values' => values }
|
356
|
+
end
|
357
|
+
end
|
358
|
+
|
359
|
+
# Set range filter.
|
360
|
+
#
|
361
|
+
# Only match those records where <tt>attribute</tt> column value
|
362
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
363
|
+
def SetFilterRange(attribute, min, max, exclude = false)
|
364
|
+
assert { attribute.instance_of? String }
|
365
|
+
assert { min.instance_of? Fixnum or min.instance_of? Bignum }
|
366
|
+
assert { max.instance_of? Fixnum or max.instance_of? Bignum }
|
367
|
+
assert { min <= max }
|
368
|
+
|
369
|
+
@filters << { 'type' => SPH_FILTER_RANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
370
|
+
end
|
371
|
+
|
372
|
+
# Set float range filter.
|
373
|
+
#
|
374
|
+
# Only match those records where <tt>attribute</tt> column value
|
375
|
+
# is beetwen <tt>min</tt> and <tt>max</tt> (including <tt>min</tt> and <tt>max</tt>).
|
376
|
+
def SetFilterFloatRange(attribute, min, max, exclude = false)
|
377
|
+
assert { attribute.instance_of? String }
|
378
|
+
assert { min.instance_of? Float }
|
379
|
+
assert { max.instance_of? Float }
|
380
|
+
assert { min <= max }
|
381
|
+
|
382
|
+
@filters << { 'type' => SPH_FILTER_FLOATRANGE, 'attr' => attribute, 'exclude' => exclude, 'min' => min, 'max' => max }
|
383
|
+
end
|
384
|
+
|
385
|
+
# Setup anchor point for geosphere distance calculations.
|
386
|
+
#
|
387
|
+
# Required to use <tt>@geodist</tt> in filters and sorting
|
388
|
+
# distance will be computed to this point. Latitude and longitude
|
389
|
+
# must be in radians.
|
390
|
+
#
|
391
|
+
# * <tt>attrlat</tt> -- is the name of latitude attribute
|
392
|
+
# * <tt>attrlong</tt> -- is the name of longitude attribute
|
393
|
+
# * <tt>lat</tt> -- is anchor point latitude, in radians
|
394
|
+
# * <tt>long</tt> -- is anchor point longitude, in radians
|
395
|
+
def SetGeoAnchor(attrlat, attrlong, lat, long)
|
396
|
+
assert { attrlat.instance_of? String }
|
397
|
+
assert { attrlong.instance_of? String }
|
398
|
+
assert { lat.instance_of? Float }
|
399
|
+
assert { long.instance_of? Float }
|
400
|
+
|
401
|
+
@anchor = { 'attrlat' => attrlat, 'attrlong' => attrlong, 'lat' => lat, 'long' => long }
|
402
|
+
end
|
403
|
+
|
404
|
+
# Set grouping attribute and function.
|
405
|
+
#
|
406
|
+
# In grouping mode, all matches are assigned to different groups
|
407
|
+
# based on grouping function value.
|
408
|
+
#
|
409
|
+
# Each group keeps track of the total match count, and the best match
|
410
|
+
# (in this group) according to current sorting function.
|
411
|
+
#
|
412
|
+
# The final result set contains one best match per group, with
|
413
|
+
# grouping function value and matches count attached.
|
414
|
+
#
|
415
|
+
# Groups in result set could be sorted by any sorting clause,
|
416
|
+
# including both document attributes and the following special
|
417
|
+
# internal Sphinx attributes:
|
418
|
+
#
|
419
|
+
# * @id - match document ID;
|
420
|
+
# * @weight, @rank, @relevance - match weight;
|
421
|
+
# * @group - groupby function value;
|
422
|
+
# * @count - amount of matches in group.
|
423
|
+
#
|
424
|
+
# the default mode is to sort by groupby value in descending order,
|
425
|
+
# ie. by '@group desc'.
|
426
|
+
#
|
427
|
+
# 'total_found' would contain total amount of matching groups over
|
428
|
+
# the whole index.
|
429
|
+
#
|
430
|
+
# WARNING: grouping is done in fixed memory and thus its results
|
431
|
+
# are only approximate; so there might be more groups reported
|
432
|
+
# in total_found than actually present. @count might also
|
433
|
+
# be underestimated.
|
434
|
+
#
|
435
|
+
# For example, if sorting by relevance and grouping by "published"
|
436
|
+
# attribute with SPH_GROUPBY_DAY function, then the result set will
|
437
|
+
# contain one most relevant match per each day when there were any
|
438
|
+
# matches published, with day number and per-day match count attached,
|
439
|
+
# and sorted by day number in descending order (ie. recent days first).
|
440
|
+
def SetGroupBy(attribute, func, groupsort = '@group desc')
|
441
|
+
assert { attribute.instance_of? String }
|
442
|
+
assert { groupsort.instance_of? String }
|
443
|
+
assert { func == SPH_GROUPBY_DAY \
|
444
|
+
|| func == SPH_GROUPBY_WEEK \
|
445
|
+
|| func == SPH_GROUPBY_MONTH \
|
446
|
+
|| func == SPH_GROUPBY_YEAR \
|
447
|
+
|| func == SPH_GROUPBY_ATTR \
|
448
|
+
|| func == SPH_GROUPBY_ATTRPAIR }
|
449
|
+
|
450
|
+
@groupby = attribute
|
451
|
+
@groupfunc = func
|
452
|
+
@groupsort = groupsort
|
453
|
+
end
|
454
|
+
|
455
|
+
# Set count-distinct attribute for group-by queries.
|
456
|
+
def SetGroupDistinct(attribute)
|
457
|
+
assert { attribute.instance_of? String }
|
458
|
+
@groupdistinct = attribute
|
459
|
+
end
|
460
|
+
|
461
|
+
# Set distributed retries count and delay.
|
462
|
+
def SetRetries(count, delay = 0)
|
463
|
+
assert { count.instance_of? Fixnum }
|
464
|
+
assert { delay.instance_of? Fixnum }
|
465
|
+
|
466
|
+
@retrycount = count
|
467
|
+
@retrydelay = delay
|
468
|
+
end
|
469
|
+
|
470
|
+
# Set attribute values override
|
471
|
+
#
|
472
|
+
# There can be only one override per attribute.
|
473
|
+
# +values+ must be a hash that maps document IDs to attribute values.
|
474
|
+
def SetOverride(attrname, attrtype, values)
|
475
|
+
assert { attrname.instance_of? String }
|
476
|
+
assert { [SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT].include?(attrtype) }
|
477
|
+
assert { values.instance_of? Hash }
|
478
|
+
|
479
|
+
@overrides << { 'attr' => attrname, 'type' => attrtype, 'values' => values }
|
480
|
+
end
|
481
|
+
|
482
|
+
# Set select-list (attributes or expressions), SQL-like syntax.
|
483
|
+
def SetSelect(select)
|
484
|
+
assert { select.instance_of? String }
|
485
|
+
@select = select
|
486
|
+
end
|
487
|
+
|
488
|
+
# Clear all filters (for multi-queries).
|
489
|
+
def ResetFilters
|
490
|
+
@filters = []
|
491
|
+
@anchor = []
|
492
|
+
end
|
493
|
+
|
494
|
+
# Clear groupby settings (for multi-queries).
|
495
|
+
def ResetGroupBy
|
496
|
+
@groupby = ''
|
497
|
+
@groupfunc = SPH_GROUPBY_DAY
|
498
|
+
@groupsort = '@group desc'
|
499
|
+
@groupdistinct = ''
|
500
|
+
end
|
501
|
+
|
502
|
+
# Clear all attribute value overrides (for multi-queries).
|
503
|
+
def ResetOverrides
|
504
|
+
@overrides = []
|
505
|
+
end
|
506
|
+
|
507
|
+
# Connect to searchd server and run given search query.
|
508
|
+
#
|
509
|
+
# <tt>query</tt> is query string
|
510
|
+
|
511
|
+
# <tt>index</tt> is index name (or names) to query. default value is "*" which means
|
512
|
+
# to query all indexes. Accepted characters for index names are letters, numbers,
|
513
|
+
# dash, and underscore; everything else is considered a separator. Therefore,
|
514
|
+
# all the following calls are valid and will search two indexes:
|
515
|
+
#
|
516
|
+
# sphinx.Query('test query', 'main delta')
|
517
|
+
# sphinx.Query('test query', 'main;delta')
|
518
|
+
# sphinx.Query('test query', 'main, delta')
|
519
|
+
#
|
520
|
+
# Index order matters. If identical IDs are found in two or more indexes,
|
521
|
+
# weight and attribute values from the very last matching index will be used
|
522
|
+
# for sorting and returning to client. Therefore, in the example above,
|
523
|
+
# matches from "delta" index will always "win" over matches from "main".
|
524
|
+
#
|
525
|
+
# Returns false on failure.
|
526
|
+
# Returns hash which has the following keys on success:
|
527
|
+
#
|
528
|
+
# * <tt>'matches'</tt> -- array of hashes {'weight', 'group', 'id'}, where 'id' is document_id.
|
529
|
+
# * <tt>'total'</tt> -- total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
|
530
|
+
# * <tt>'total_found'</tt> -- total amount of matching documents in index
|
531
|
+
# * <tt>'time'</tt> -- search time
|
532
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ('docs', 'hits') hash
|
533
|
+
def Query(query, index = '*', comment = '')
|
534
|
+
assert { @reqs.empty? }
|
535
|
+
@reqs = []
|
536
|
+
|
537
|
+
self.AddQuery(query, index, comment)
|
538
|
+
results = self.RunQueries
|
539
|
+
|
540
|
+
# probably network error; error message should be already filled
|
541
|
+
return false unless results.instance_of?(Array)
|
542
|
+
|
543
|
+
@error = results[0]['error']
|
544
|
+
@warning = results[0]['warning']
|
545
|
+
|
546
|
+
return false if results[0]['status'] == SEARCHD_ERROR
|
547
|
+
return results[0]
|
548
|
+
end
|
549
|
+
|
550
|
+
# Add query to batch.
|
551
|
+
#
|
552
|
+
# Batch queries enable searchd to perform internal optimizations,
|
553
|
+
# if possible; and reduce network connection overheads in all cases.
|
554
|
+
#
|
555
|
+
# For instance, running exactly the same query with different
|
556
|
+
# groupby settings will enable searched to perform expensive
|
557
|
+
# full-text search and ranking operation only once, but compute
|
558
|
+
# multiple groupby results from its output.
|
559
|
+
#
|
560
|
+
# Parameters are exactly the same as in <tt>Query</tt> call.
|
561
|
+
# Returns index to results array returned by <tt>RunQueries</tt> call.
|
562
|
+
def AddQuery(query, index = '*', comment = '')
|
563
|
+
# build request
|
564
|
+
|
565
|
+
# mode and limits
|
566
|
+
request = Request.new
|
567
|
+
request.put_int @offset, @limit, @mode, @ranker, @sort
|
568
|
+
request.put_string @sortby
|
569
|
+
# query itself
|
570
|
+
request.put_string query
|
571
|
+
# weights
|
572
|
+
request.put_int_array @weights
|
573
|
+
# indexes
|
574
|
+
request.put_string index
|
575
|
+
# id64 range marker
|
576
|
+
request.put_int 1
|
577
|
+
# id64 range
|
578
|
+
request.put_int64 @min_id.to_i, @max_id.to_i
|
579
|
+
|
580
|
+
# filters
|
581
|
+
request.put_int @filters.length
|
582
|
+
@filters.each do |filter|
|
583
|
+
request.put_string filter['attr']
|
584
|
+
request.put_int filter['type']
|
585
|
+
|
586
|
+
case filter['type']
|
587
|
+
when SPH_FILTER_VALUES
|
588
|
+
request.put_int64_array filter['values']
|
589
|
+
when SPH_FILTER_RANGE
|
590
|
+
request.put_int64 filter['min'], filter['max']
|
591
|
+
when SPH_FILTER_FLOATRANGE
|
592
|
+
request.put_float filter['min'], filter['max']
|
593
|
+
else
|
594
|
+
raise SphinxInternalError, 'Internal error: unhandled filter type'
|
595
|
+
end
|
596
|
+
request.put_int filter['exclude'] ? 1 : 0
|
597
|
+
end
|
598
|
+
|
599
|
+
# group-by clause, max-matches count, group-sort clause, cutoff count
|
600
|
+
request.put_int @groupfunc
|
601
|
+
request.put_string @groupby
|
602
|
+
request.put_int @maxmatches
|
603
|
+
request.put_string @groupsort
|
604
|
+
request.put_int @cutoff, @retrycount, @retrydelay
|
605
|
+
request.put_string @groupdistinct
|
606
|
+
|
607
|
+
# anchor point
|
608
|
+
if @anchor.empty?
|
609
|
+
request.put_int 0
|
610
|
+
else
|
611
|
+
request.put_int 1
|
612
|
+
request.put_string @anchor['attrlat'], @anchor['attrlong']
|
613
|
+
request.put_float @anchor['lat'], @anchor['long']
|
614
|
+
end
|
615
|
+
|
616
|
+
# per-index weights
|
617
|
+
request.put_int @indexweights.length
|
618
|
+
@indexweights.each do |idx, weight|
|
619
|
+
request.put_string idx
|
620
|
+
request.put_int weight
|
621
|
+
end
|
622
|
+
|
623
|
+
# max query time
|
624
|
+
request.put_int @maxquerytime
|
625
|
+
|
626
|
+
# per-field weights
|
627
|
+
request.put_int @fieldweights.length
|
628
|
+
@fieldweights.each do |field, weight|
|
629
|
+
request.put_string field
|
630
|
+
request.put_int weight
|
631
|
+
end
|
632
|
+
|
633
|
+
# comment
|
634
|
+
request.put_string comment
|
635
|
+
|
636
|
+
# attribute overrides
|
637
|
+
request.put_int @overrides.length
|
638
|
+
for entry in @overrides do
|
639
|
+
request.put_string entry['attr']
|
640
|
+
request.put_int entry['type'], entry['values'].size
|
641
|
+
entry['values'].each do |id, val|
|
642
|
+
assert { id.instance_of?(Fixnum) || id.instance_of?(Bignum) }
|
643
|
+
assert { val.instance_of?(Fixnum) || val.instance_of?(Bignum) || val.instance_of?(Float) }
|
644
|
+
|
645
|
+
request.put_int64 id
|
646
|
+
case entry['type']
|
647
|
+
when SPH_ATTR_FLOAT
|
648
|
+
request.put_float val
|
649
|
+
when SPH_ATTR_BIGINT
|
650
|
+
request.put_int64 val
|
651
|
+
else
|
652
|
+
request.put_int val
|
653
|
+
end
|
654
|
+
end
|
655
|
+
end
|
656
|
+
|
657
|
+
# select-list
|
658
|
+
request.put_string @select
|
659
|
+
|
660
|
+
# store request to requests array
|
661
|
+
@reqs << request.to_s;
|
662
|
+
return @reqs.length - 1
|
663
|
+
end
|
664
|
+
|
665
|
+
# Run queries batch.
|
666
|
+
#
|
667
|
+
# Returns an array of result sets on success.
|
668
|
+
# Returns false on network IO failure.
|
669
|
+
#
|
670
|
+
# Each result set in returned array is a hash which containts
|
671
|
+
# the same keys as the hash returned by <tt>Query</tt>, plus:
|
672
|
+
#
|
673
|
+
# * <tt>'error'</tt> -- search error for this query
|
674
|
+
# * <tt>'words'</tt> -- hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
|
675
|
+
def RunQueries
|
676
|
+
if @reqs.empty?
|
677
|
+
@error = 'No queries defined, issue AddQuery() first'
|
678
|
+
return false
|
679
|
+
end
|
680
|
+
|
681
|
+
req = @reqs.join('')
|
682
|
+
nreqs = @reqs.length
|
683
|
+
@reqs = []
|
684
|
+
response = PerformRequest(:search, req, nreqs)
|
685
|
+
|
686
|
+
# parse response
|
687
|
+
begin
|
688
|
+
results = []
|
689
|
+
ires = 0
|
690
|
+
while ires < nreqs
|
691
|
+
ires += 1
|
692
|
+
result = {}
|
693
|
+
|
694
|
+
result['error'] = ''
|
695
|
+
result['warning'] = ''
|
696
|
+
|
697
|
+
# extract status
|
698
|
+
status = result['status'] = response.get_int
|
699
|
+
if status != SEARCHD_OK
|
700
|
+
message = response.get_string
|
701
|
+
if status == SEARCHD_WARNING
|
702
|
+
result['warning'] = message
|
703
|
+
else
|
704
|
+
result['error'] = message
|
705
|
+
results << result
|
706
|
+
next
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
# read schema
|
711
|
+
fields = []
|
712
|
+
attrs = {}
|
713
|
+
attrs_names_in_order = []
|
714
|
+
|
715
|
+
nfields = response.get_int
|
716
|
+
while nfields > 0
|
717
|
+
nfields -= 1
|
718
|
+
fields << response.get_string
|
719
|
+
end
|
720
|
+
result['fields'] = fields
|
721
|
+
|
722
|
+
nattrs = response.get_int
|
723
|
+
while nattrs > 0
|
724
|
+
nattrs -= 1
|
725
|
+
attr = response.get_string
|
726
|
+
type = response.get_int
|
727
|
+
attrs[attr] = type
|
728
|
+
attrs_names_in_order << attr
|
729
|
+
end
|
730
|
+
result['attrs'] = attrs
|
731
|
+
|
732
|
+
# read match count
|
733
|
+
count = response.get_int
|
734
|
+
id64 = response.get_int
|
735
|
+
|
736
|
+
# read matches
|
737
|
+
result['matches'] = []
|
738
|
+
while count > 0
|
739
|
+
count -= 1
|
740
|
+
|
741
|
+
if id64 != 0
|
742
|
+
doc = response.get_int64
|
743
|
+
weight = response.get_int
|
744
|
+
else
|
745
|
+
doc, weight = response.get_ints(2)
|
746
|
+
end
|
747
|
+
|
748
|
+
r = {} # This is a single result put in the result['matches'] array
|
749
|
+
r['id'] = doc
|
750
|
+
r['weight'] = weight
|
751
|
+
attrs_names_in_order.each do |a|
|
752
|
+
r['attrs'] ||= {}
|
753
|
+
|
754
|
+
case attrs[a]
|
755
|
+
when SPH_ATTR_BIGINT
|
756
|
+
# handle 64-bit ints
|
757
|
+
r['attrs'][a] = response.get_int64
|
758
|
+
when SPH_ATTR_FLOAT
|
759
|
+
# handle floats
|
760
|
+
r['attrs'][a] = response.get_float
|
761
|
+
else
|
762
|
+
# handle everything else as unsigned ints
|
763
|
+
val = response.get_int
|
764
|
+
if (attrs[a] & SPH_ATTR_MULTI) != 0
|
765
|
+
r['attrs'][a] = []
|
766
|
+
1.upto(val) do
|
767
|
+
r['attrs'][a] << response.get_int
|
768
|
+
end
|
769
|
+
else
|
770
|
+
r['attrs'][a] = val
|
771
|
+
end
|
772
|
+
end
|
773
|
+
end
|
774
|
+
result['matches'] << r
|
775
|
+
end
|
776
|
+
result['total'], result['total_found'], msecs, words = response.get_ints(4)
|
777
|
+
result['time'] = '%.3f' % (msecs / 1000.0)
|
778
|
+
|
779
|
+
result['words'] = {}
|
780
|
+
while words > 0
|
781
|
+
words -= 1
|
782
|
+
word = response.get_string
|
783
|
+
docs, hits = response.get_ints(2)
|
784
|
+
result['words'][word] = { 'docs' => docs, 'hits' => hits }
|
785
|
+
end
|
786
|
+
|
787
|
+
results << result
|
788
|
+
end
|
789
|
+
#rescue EOFError
|
790
|
+
# @error = 'incomplete reply'
|
791
|
+
# raise SphinxResponseError, @error
|
792
|
+
end
|
793
|
+
|
794
|
+
return results
|
795
|
+
end
|
796
|
+
|
797
|
+
# Connect to searchd server and generate exceprts from given documents.
|
798
|
+
#
|
799
|
+
# * <tt>docs</tt> -- an array of strings which represent the documents' contents
|
800
|
+
# * <tt>index</tt> -- a string specifiying the index which settings will be used
|
801
|
+
# for stemming, lexing and case folding
|
802
|
+
# * <tt>words</tt> -- a string which contains the words to highlight
|
803
|
+
# * <tt>opts</tt> is a hash which contains additional optional highlighting parameters.
|
804
|
+
#
|
805
|
+
# You can use following parameters:
|
806
|
+
# * <tt>'before_match'</tt> -- a string to insert before a set of matching words, default is "<b>"
|
807
|
+
# * <tt>'after_match'</tt> -- a string to insert after a set of matching words, default is "<b>"
|
808
|
+
# * <tt>'chunk_separator'</tt> -- a string to insert between excerpts chunks, default is " ... "
|
809
|
+
# * <tt>'limit'</tt> -- max excerpt size in symbols (codepoints), default is 256
|
810
|
+
# * <tt>'around'</tt> -- how much words to highlight around each match, default is 5
|
811
|
+
# * <tt>'exact_phrase'</tt> -- whether to highlight exact phrase matches only, default is <tt>false</tt>
|
812
|
+
# * <tt>'single_passage'</tt> -- whether to extract single best passage only, default is false
|
813
|
+
# * <tt>'use_boundaries'</tt> -- whether to extract passages by phrase boundaries setup in tokenizer
|
814
|
+
# * <tt>'weight_order'</tt> -- whether to order best passages in document (default) or weight order
|
815
|
+
#
|
816
|
+
# Returns false on failure.
|
817
|
+
# Returns an array of string excerpts on success.
|
818
|
+
def BuildExcerpts(docs, index, words, opts = {})
|
819
|
+
assert { docs.instance_of? Array }
|
820
|
+
assert { index.instance_of? String }
|
821
|
+
assert { words.instance_of? String }
|
822
|
+
assert { opts.instance_of? Hash }
|
823
|
+
|
824
|
+
# fixup options
|
825
|
+
opts['before_match'] ||= '<b>';
|
826
|
+
opts['after_match'] ||= '</b>';
|
827
|
+
opts['chunk_separator'] ||= ' ... ';
|
828
|
+
opts['limit'] ||= 256;
|
829
|
+
opts['around'] ||= 5;
|
830
|
+
opts['exact_phrase'] ||= false
|
831
|
+
opts['single_passage'] ||= false
|
832
|
+
opts['use_boundaries'] ||= false
|
833
|
+
opts['weight_order'] ||= false
|
834
|
+
|
835
|
+
# build request
|
836
|
+
|
837
|
+
# v.1.0 req
|
838
|
+
flags = 1
|
839
|
+
flags |= 2 if opts['exact_phrase']
|
840
|
+
flags |= 4 if opts['single_passage']
|
841
|
+
flags |= 8 if opts['use_boundaries']
|
842
|
+
flags |= 16 if opts['weight_order']
|
843
|
+
|
844
|
+
request = Request.new
|
845
|
+
request.put_int 0, flags # mode=0, flags=1 (remove spaces)
|
846
|
+
# req index
|
847
|
+
request.put_string index
|
848
|
+
# req words
|
849
|
+
request.put_string words
|
850
|
+
|
851
|
+
# options
|
852
|
+
request.put_string opts['before_match']
|
853
|
+
request.put_string opts['after_match']
|
854
|
+
request.put_string opts['chunk_separator']
|
855
|
+
request.put_int opts['limit'].to_i, opts['around'].to_i
|
856
|
+
|
857
|
+
# documents
|
858
|
+
request.put_int docs.size
|
859
|
+
docs.each do |doc|
|
860
|
+
assert { doc.instance_of? String }
|
861
|
+
|
862
|
+
request.put_string doc
|
863
|
+
end
|
864
|
+
|
865
|
+
response = PerformRequest(:excerpt, request)
|
866
|
+
|
867
|
+
# parse response
|
868
|
+
begin
|
869
|
+
res = []
|
870
|
+
docs.each do |doc|
|
871
|
+
res << response.get_string
|
872
|
+
end
|
873
|
+
rescue EOFError
|
874
|
+
@error = 'incomplete reply'
|
875
|
+
raise SphinxResponseError, @error
|
876
|
+
end
|
877
|
+
return res
|
878
|
+
end
|
879
|
+
|
880
|
+
# Connect to searchd server, and generate keyword list for a given query.
|
881
|
+
#
|
882
|
+
# Returns an array of words on success.
|
883
|
+
def BuildKeywords(query, index, hits)
|
884
|
+
assert { query.instance_of? String }
|
885
|
+
assert { index.instance_of? String }
|
886
|
+
assert { hits.instance_of?(TrueClass) || hits.instance_of?(FalseClass) }
|
887
|
+
|
888
|
+
# build request
|
889
|
+
request = Request.new
|
890
|
+
# v.1.0 req
|
891
|
+
request.put_string query # req query
|
892
|
+
request.put_string index # req index
|
893
|
+
request.put_int hits ? 1 : 0
|
894
|
+
|
895
|
+
response = PerformRequest(:keywords, request)
|
896
|
+
|
897
|
+
# parse response
|
898
|
+
begin
|
899
|
+
res = []
|
900
|
+
nwords = response.get_int
|
901
|
+
0.upto(nwords - 1) do |i|
|
902
|
+
tokenized = response.get_string
|
903
|
+
normalized = response.get_string
|
904
|
+
|
905
|
+
entry = { 'tokenized' => tokenized, 'normalized' => normalized }
|
906
|
+
entry['docs'], entry['hits'] = response.get_ints(2) if hits
|
907
|
+
|
908
|
+
res << entry
|
909
|
+
end
|
910
|
+
rescue EOFError
|
911
|
+
@error = 'incomplete reply'
|
912
|
+
raise SphinxResponseError, @error
|
913
|
+
end
|
914
|
+
|
915
|
+
return res
|
916
|
+
end
|
917
|
+
|
918
|
+
# Batch update given attributes in given rows in given indexes.
|
919
|
+
#
|
920
|
+
# * +index+ is a name of the index to be updated
|
921
|
+
# * +attrs+ is an array of attribute name strings.
|
922
|
+
# * +values+ is a hash where key is document id, and value is an array of
|
923
|
+
# * +mva+ identifies whether update MVA
|
924
|
+
# new attribute values
|
925
|
+
#
|
926
|
+
# Returns number of actually updated documents (0 or more) on success.
|
927
|
+
# Returns -1 on failure.
|
928
|
+
#
|
929
|
+
# Usage example:
|
930
|
+
# sphinx.UpdateAttributes('test1', ['group_id'], { 1 => [456] })
|
931
|
+
def UpdateAttributes(index, attrs, values, mva = false)
|
932
|
+
# verify everything
|
933
|
+
assert { index.instance_of? String }
|
934
|
+
assert { mva.instance_of?(TrueClass) || mva.instance_of?(FalseClass) }
|
935
|
+
|
936
|
+
assert { attrs.instance_of? Array }
|
937
|
+
attrs.each do |attr|
|
938
|
+
assert { attr.instance_of? String }
|
939
|
+
end
|
940
|
+
|
941
|
+
assert { values.instance_of? Hash }
|
942
|
+
values.each do |id, entry|
|
943
|
+
assert { id.instance_of? Fixnum }
|
944
|
+
assert { entry.instance_of? Array }
|
945
|
+
assert { entry.length == attrs.length }
|
946
|
+
entry.each do |v|
|
947
|
+
if mva
|
948
|
+
assert { v.instance_of? Array }
|
949
|
+
v.each { |vv| assert { vv.instance_of? Fixnum } }
|
950
|
+
else
|
951
|
+
assert { v.instance_of? Fixnum }
|
952
|
+
end
|
953
|
+
end
|
954
|
+
end
|
955
|
+
|
956
|
+
# build request
|
957
|
+
request = Request.new
|
958
|
+
request.put_string index
|
959
|
+
|
960
|
+
request.put_int attrs.length
|
961
|
+
for attr in attrs
|
962
|
+
request.put_string attr
|
963
|
+
request.put_int mva ? 1 : 0
|
964
|
+
end
|
965
|
+
|
966
|
+
request.put_int values.length
|
967
|
+
values.each do |id, entry|
|
968
|
+
request.put_int64 id
|
969
|
+
if mva
|
970
|
+
entry.each { |v| request.put_int_array v }
|
971
|
+
else
|
972
|
+
request.put_int(*entry)
|
973
|
+
end
|
974
|
+
end
|
975
|
+
|
976
|
+
response = PerformRequest(:update, request)
|
977
|
+
|
978
|
+
# parse response
|
979
|
+
begin
|
980
|
+
return response.get_int
|
981
|
+
rescue EOFError
|
982
|
+
@error = 'incomplete reply'
|
983
|
+
raise SphinxResponseError, @error
|
984
|
+
end
|
985
|
+
end
|
986
|
+
|
987
|
+
protected
|
988
|
+
|
989
|
+
# Connect to searchd server.
|
990
|
+
def Connect
|
991
|
+
begin
|
992
|
+
sock = TCPSocket.new(@host, @port)
|
993
|
+
rescue
|
994
|
+
@error = "connection to #{@host}:#{@port} failed"
|
995
|
+
raise SphinxConnectError, @error
|
996
|
+
end
|
997
|
+
|
998
|
+
v = sock.recv(4).unpack('N*').first
|
999
|
+
if v < 1
|
1000
|
+
sock.close
|
1001
|
+
@error = "expected searchd protocol version 1+, got version '#{v}'"
|
1002
|
+
raise SphinxConnectError, @error
|
1003
|
+
end
|
1004
|
+
|
1005
|
+
sock.send([1].pack('N'), 0)
|
1006
|
+
sock
|
1007
|
+
end
|
1008
|
+
|
1009
|
+
# Get and check response packet from searchd server.
|
1010
|
+
def GetResponse(sock, client_version)
|
1011
|
+
response = ''
|
1012
|
+
len = 0
|
1013
|
+
|
1014
|
+
header = sock.recv(8)
|
1015
|
+
if header.length == 8
|
1016
|
+
status, ver, len = header.unpack('n2N')
|
1017
|
+
left = len.to_i
|
1018
|
+
while left > 0 do
|
1019
|
+
begin
|
1020
|
+
chunk = sock.recv(left)
|
1021
|
+
if chunk
|
1022
|
+
response << chunk
|
1023
|
+
left -= chunk.length
|
1024
|
+
end
|
1025
|
+
rescue EOFError
|
1026
|
+
break
|
1027
|
+
end
|
1028
|
+
end
|
1029
|
+
end
|
1030
|
+
sock.close
|
1031
|
+
|
1032
|
+
# check response
|
1033
|
+
read = response.length
|
1034
|
+
if response.empty? or read != len.to_i
|
1035
|
+
@error = len \
|
1036
|
+
? "failed to read searchd response (status=#{status}, ver=#{ver}, len=#{len}, read=#{read})" \
|
1037
|
+
: 'received zero-sized searchd response'
|
1038
|
+
raise SphinxResponseError, @error
|
1039
|
+
end
|
1040
|
+
|
1041
|
+
# check status
|
1042
|
+
if (status == SEARCHD_WARNING)
|
1043
|
+
wlen = response[0, 4].unpack('N*').first
|
1044
|
+
@warning = response[4, wlen]
|
1045
|
+
return response[4 + wlen, response.length - 4 - wlen]
|
1046
|
+
end
|
1047
|
+
|
1048
|
+
if status == SEARCHD_ERROR
|
1049
|
+
@error = 'searchd error: ' + response[4, response.length - 4]
|
1050
|
+
raise SphinxInternalError, @error
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
if status == SEARCHD_RETRY
|
1054
|
+
@error = 'temporary searchd error: ' + response[4, response.length - 4]
|
1055
|
+
raise SphinxTemporaryError, @error
|
1056
|
+
end
|
1057
|
+
|
1058
|
+
unless status == SEARCHD_OK
|
1059
|
+
@error = "unknown status code: '#{status}'"
|
1060
|
+
raise SphinxUnknownError, @error
|
1061
|
+
end
|
1062
|
+
|
1063
|
+
# check version
|
1064
|
+
if ver < client_version
|
1065
|
+
@warning = "searchd command v.#{ver >> 8}.#{ver & 0xff} older than client's " +
|
1066
|
+
"v.#{client_version >> 8}.#{client_version & 0xff}, some options might not work"
|
1067
|
+
end
|
1068
|
+
|
1069
|
+
return response
|
1070
|
+
end
|
1071
|
+
|
1072
|
+
# Connect, send query, get response.
|
1073
|
+
def PerformRequest(command, request, additional = nil)
|
1074
|
+
cmd = command.to_s.upcase
|
1075
|
+
command_id = Sphinx::Client.const_get('SEARCHD_COMMAND_' + cmd)
|
1076
|
+
command_ver = Sphinx::Client.const_get('VER_COMMAND_' + cmd)
|
1077
|
+
|
1078
|
+
sock = self.Connect
|
1079
|
+
len = request.to_s.length + (additional != nil ? 4 : 0)
|
1080
|
+
header = [command_id, command_ver, len].pack('nnN')
|
1081
|
+
header << [additional].pack('N') if additional != nil
|
1082
|
+
sock.send(header + request.to_s, 0)
|
1083
|
+
response = self.GetResponse(sock, command_ver)
|
1084
|
+
return Response.new(response)
|
1085
|
+
end
|
1086
|
+
|
1087
|
+
# :stopdoc:
|
1088
|
+
def assert
|
1089
|
+
raise 'Assertion failed!' unless yield if $DEBUG
|
1090
|
+
end
|
1091
|
+
# :startdoc:
|
1092
|
+
end
|
1093
|
+
end
|