riddle 0.9.8.1112
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENCE +20 -0
- data/README +60 -0
- data/lib/riddle.rb +20 -0
- data/lib/riddle/client.rb +548 -0
- data/lib/riddle/client/filter.rb +44 -0
- data/lib/riddle/client/message.rb +65 -0
- data/lib/riddle/client/response.rb +74 -0
- data/spec/fixtures/data/anchor.bin +0 -0
- data/spec/fixtures/data/any.bin +0 -0
- data/spec/fixtures/data/boolean.bin +0 -0
- data/spec/fixtures/data/distinct.bin +0 -0
- data/spec/fixtures/data/field_weights.bin +0 -0
- data/spec/fixtures/data/filter.bin +0 -0
- data/spec/fixtures/data/filter_array.bin +0 -0
- data/spec/fixtures/data/filter_array_exclude.bin +0 -0
- data/spec/fixtures/data/filter_floats.bin +0 -0
- data/spec/fixtures/data/filter_floats_exclude.bin +0 -0
- data/spec/fixtures/data/filter_floats_range.bin +0 -0
- data/spec/fixtures/data/filter_range.bin +0 -0
- data/spec/fixtures/data/filter_range_exclude.bin +0 -0
- data/spec/fixtures/data/group.bin +0 -0
- data/spec/fixtures/data/index.bin +0 -0
- data/spec/fixtures/data/index_weights.bin +0 -0
- data/spec/fixtures/data/phrase.bin +0 -0
- data/spec/fixtures/data/rank_mode.bin +0 -0
- data/spec/fixtures/data/simple.bin +0 -0
- data/spec/fixtures/data/sort.bin +0 -0
- data/spec/fixtures/data/update_simple.bin +0 -0
- data/spec/fixtures/data/weights.bin +0 -0
- data/spec/fixtures/data_generator.php +130 -0
- data/spec/fixtures/sphinx/configuration.erb +38 -0
- data/spec/fixtures/sphinx/people.old.spa +0 -0
- data/spec/fixtures/sphinx/people.old.spd +0 -0
- data/spec/fixtures/sphinx/people.old.sph +0 -0
- data/spec/fixtures/sphinx/people.old.spi +0 -0
- data/spec/fixtures/sphinx/people.old.spm +0 -0
- data/spec/fixtures/sphinx/people.old.spp +0 -0
- data/spec/fixtures/sphinx/people.spa +0 -0
- data/spec/fixtures/sphinx/people.spd +0 -0
- data/spec/fixtures/sphinx/people.sph +0 -0
- data/spec/fixtures/sphinx/people.spi +0 -0
- data/spec/fixtures/sphinx/people.spm +0 -0
- data/spec/fixtures/sphinx/people.spp +0 -0
- data/spec/fixtures/sphinx/searchd.log +4732 -0
- data/spec/fixtures/sphinx/searchd.query.log +783 -0
- data/spec/fixtures/sphinx/spec.conf +38 -0
- data/spec/fixtures/sphinxapi.php +1066 -0
- data/spec/fixtures/sql/conf.example.yml +3 -0
- data/spec/fixtures/sql/conf.yml +3 -0
- data/spec/fixtures/sql/data.sql +25000 -0
- data/spec/fixtures/sql/structure.sql +16 -0
- data/spec/functional/excerpt_spec.rb +102 -0
- data/spec/functional/search_spec.rb +69 -0
- data/spec/functional/update_spec.rb +41 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/sphinx_helper.rb +92 -0
- data/spec/unit/client_spec.rb +154 -0
- data/spec/unit/filter_spec.rb +33 -0
- data/spec/unit/message_spec.rb +63 -0
- data/spec/unit/response_spec.rb +64 -0
- metadata +128 -0
data/MIT-LICENCE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007 Pat Allan
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
This client has been written to interface with Sphinx[http://sphinxsearch.com/]. It is written by
|
2
|
+
{Pat Allan}[http://freelancing-gods.com], and has been influenced by both Dmytro Shteflyuk's Ruby
|
3
|
+
client and the original PHP client - credit where credit's due, after all.
|
4
|
+
|
5
|
+
It does not follow the same syntax as those two, though (not much point writing this otherwise) -
|
6
|
+
opting for a more Ruby-like structure.
|
7
|
+
|
8
|
+
While it doesn't (yet) exist as a gem, you can obtain the sourcecode via subversion. If you
|
9
|
+
are after a specific release, use the tag as follows:
|
10
|
+
|
11
|
+
svn co http://rails-oceania.googlecode.com/svn/patallan/riddle/tags/0.9.8-r1112 riddle
|
12
|
+
|
13
|
+
Or for the most current, just use trunk:
|
14
|
+
|
15
|
+
svn co http://rails-oceania.googlecode.com/svn/patallan/riddle/trunk riddle
|
16
|
+
|
17
|
+
Please note that at the time of writing, only 0.9.8r871 through to 0.9.8r1112 are supported.
|
18
|
+
|
19
|
+
To get started, just instantiate a Client object:
|
20
|
+
|
21
|
+
client = Riddle::Client.new # defaults to localhost and port 3312
|
22
|
+
client = Riddle::Client.new "sphinxserver.domain.tld", 3333 # custom settings
|
23
|
+
|
24
|
+
And then set the parameters to what you want, before running a query:
|
25
|
+
|
26
|
+
client.match_mode = :extended
|
27
|
+
client.query "Pat Allan @state Victoria"
|
28
|
+
|
29
|
+
The results from a query are similar to the other clients - but here's the details. It's a hash with
|
30
|
+
the following keys:
|
31
|
+
|
32
|
+
* :matches
|
33
|
+
* :fields
|
34
|
+
* :attributes
|
35
|
+
* :attribute_names
|
36
|
+
* :words
|
37
|
+
* :total
|
38
|
+
* :total_found
|
39
|
+
* :time
|
40
|
+
* :status
|
41
|
+
* :warning (if appropriate)
|
42
|
+
* :error (if appropriate)
|
43
|
+
|
44
|
+
The key <tt>:matches</tt> returns an array of hashes - the actual search results. Each hash has the
|
45
|
+
document id (<tt>:doc</tt>), the result weighting (<tt>:weight</tt>), and a hash of the attributes for
|
46
|
+
the document (<tt>:attributes</tt>).
|
47
|
+
|
48
|
+
The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of fields and attributes for the
|
49
|
+
documents. The key <tt>:attributes</tt> will return a hash of attribute name and type pairs, and
|
50
|
+
<tt>:words</tt> returns a hash of hashes representing the words from the search, with the number of
|
51
|
+
documents and hits for each, along the lines of:
|
52
|
+
|
53
|
+
results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
|
54
|
+
|
55
|
+
<tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the number of matches available, the
|
56
|
+
total number of matches (which may be greater than the maximum available), and the time in milliseconds
|
57
|
+
that the query took to run.
|
58
|
+
|
59
|
+
<tt>:status</tt> is the error code for the query - and if there was a related warning, it will be under
|
60
|
+
the <tt>:warning</tt> key. Fatal errors will be described under <tt>:error</tt>.
|
data/lib/riddle.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'riddle/client'
|
3
|
+
require 'riddle/client/filter'
|
4
|
+
require 'riddle/client/message'
|
5
|
+
require 'riddle/client/response'
|
6
|
+
|
7
|
+
module Riddle #:nodoc:
|
8
|
+
class ConnectionError < StandardError #:nodoc:
|
9
|
+
end
|
10
|
+
|
11
|
+
module Version #:nodoc:
|
12
|
+
Major = 0
|
13
|
+
Minor = 9
|
14
|
+
Tiny = 8
|
15
|
+
Rev = 1112
|
16
|
+
|
17
|
+
String = [Major, Minor, Tiny].join('.') + "r#{Rev}"
|
18
|
+
GemVersion = [Major, Minor, Tiny, Rev].join('.')
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,548 @@
|
|
1
|
+
module Riddle
|
2
|
+
class VersionError < StandardError; end
|
3
|
+
class ResponseError < StandardError; end
|
4
|
+
|
5
|
+
# This class was heavily based on the existing Client API by Dmytro Shteflyuk
|
6
|
+
# and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
|
7
|
+
# more Ruby-ish (ie. lowercase and underscored method names). I also have
|
8
|
+
# used a few helper classes, just to neaten things up.
|
9
|
+
#
|
10
|
+
# Feel free to use it wherever. Send bug reports, patches, comments and
|
11
|
+
# suggestions to pat at freelancing-gods dot com.
|
12
|
+
#
|
13
|
+
# Most properties of the client are accessible through attribute accessors,
|
14
|
+
# and where relevant use symboles instead of the long constants common in
|
15
|
+
# other clients.
|
16
|
+
# Some examples:
|
17
|
+
#
|
18
|
+
# client.sort_mode = :extended
|
19
|
+
# client.sort_by = "birthday DESC"
|
20
|
+
# client.match_mode = :extended
|
21
|
+
#
|
22
|
+
# To add a filter, you will need to create a Filter object:
|
23
|
+
#
|
24
|
+
# client.filters << Riddle::Client::Filter.new("birthday",
|
25
|
+
# Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
|
26
|
+
#
|
27
|
+
class Client
|
28
|
+
Commands = {
|
29
|
+
:search => 0, # SEARCHD_COMMAND_SEARCH
|
30
|
+
:excerpt => 1, # SEARCHD_COMMAND_EXCERPT
|
31
|
+
:update => 2 # SEARCHD_COMMAND_UPDATE
|
32
|
+
}
|
33
|
+
|
34
|
+
Versions = {
|
35
|
+
:search => 0x112, # VER_COMMAND_SEARCH
|
36
|
+
:excerpt => 0x100, # VER_COMMAND_EXCERPT
|
37
|
+
:update => 0x101 # VER_COMMAND_UPDATE
|
38
|
+
}
|
39
|
+
|
40
|
+
Statuses = {
|
41
|
+
:ok => 0, # SEARCHD_OK
|
42
|
+
:error => 1, # SEARCHD_ERROR
|
43
|
+
:retry => 2, # SEARCHD_RETRY
|
44
|
+
:warning => 3 # SEARCHD_WARNING
|
45
|
+
}
|
46
|
+
|
47
|
+
MatchModes = {
|
48
|
+
:all => 0, # SPH_MATCH_ALL
|
49
|
+
:any => 1, # SPH_MATCH_ANY
|
50
|
+
:phrase => 2, # SPH_MATCH_PHRASE
|
51
|
+
:boolean => 3, # SPH_MATCH_BOOLEAN
|
52
|
+
:extended => 4, # SPH_MATCH_EXTENDED
|
53
|
+
:fullsacn => 5, # SPH_MATCH_FULLSCAN
|
54
|
+
:extended2 => 6 # SPH_MATCH_EXTENDED2
|
55
|
+
}
|
56
|
+
|
57
|
+
RankModes = {
|
58
|
+
:proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
|
59
|
+
:bm25 => 1, # SPH_RANK_BM25
|
60
|
+
:none => 2, # SPH_RANK_NONE
|
61
|
+
:wordcount => 3 # SPH_RANK_WORDCOUNT
|
62
|
+
}
|
63
|
+
|
64
|
+
SortModes = {
|
65
|
+
:relevance => 0, # SPH_SORT_RELEVANCE
|
66
|
+
:attr_desc => 1, # SPH_SORT_ATTR_DESC
|
67
|
+
:attr_asc => 2, # SPH_SORT_ATTR_ASC
|
68
|
+
:time_segments => 3, # SPH_SORT_TIME_SEGMENTS
|
69
|
+
:extended => 4, # SPH_SORT_EXTENDED
|
70
|
+
:expr => 5 # SPH_SORT_EXPR
|
71
|
+
}
|
72
|
+
|
73
|
+
AttributeTypes = {
|
74
|
+
:integer => 1, # SPH_ATTR_INTEGER
|
75
|
+
:timestamp => 2, # SPH_ATTR_TIMESTAMP
|
76
|
+
:ordinal => 3, # SPH_ATTR_ORDINAL
|
77
|
+
:bool => 4, # SPH_ATTR_BOOL
|
78
|
+
:float => 5, # SPH_ATTR_FLOAT
|
79
|
+
:multi => 0x40000000 # SPH_ATTR_MULTI
|
80
|
+
}
|
81
|
+
|
82
|
+
GroupFunctions = {
|
83
|
+
:day => 0, # SPH_GROUPBY_DAY
|
84
|
+
:week => 1, # SPH_GROUPBY_WEEK
|
85
|
+
:month => 2, # SPH_GROUPBY_MONTH
|
86
|
+
:year => 3, # SPH_GROUPBY_YEAR
|
87
|
+
:attr => 4, # SPH_GROUPBY_ATTR
|
88
|
+
:attrpair => 5 # SPH_GROUPBY_ATTRPAIR
|
89
|
+
}
|
90
|
+
|
91
|
+
FilterTypes = {
|
92
|
+
:values => 0, # SPH_FILTER_VALUES
|
93
|
+
:range => 1, # SPH_FILTER_RANGE
|
94
|
+
:float_range => 2 # SPH_FILTER_FLOATRANGE
|
95
|
+
}
|
96
|
+
|
97
|
+
attr_accessor :server, :port, :offset, :limit, :max_matches,
|
98
|
+
:match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
|
99
|
+
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
100
|
+
:retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
|
101
|
+
:max_query_time, :field_weights
|
102
|
+
attr_reader :queue
|
103
|
+
|
104
|
+
# Can instantiate with a specific server and port - otherwise it assumes
|
105
|
+
# defaults of localhost and 3312 respectively. All other settings can be
|
106
|
+
# accessed and changed via the attribute accessors.
|
107
|
+
def initialize(server=nil, port=nil)
|
108
|
+
@server = server || "localhost"
|
109
|
+
@port = port || 3312
|
110
|
+
|
111
|
+
# defaults
|
112
|
+
@offset = 0
|
113
|
+
@limit = 20
|
114
|
+
@max_matches = 1000
|
115
|
+
@match_mode = :all
|
116
|
+
@sort_mode = :relevance
|
117
|
+
@sort_by = ''
|
118
|
+
@weights = []
|
119
|
+
@id_range = 0..0
|
120
|
+
@filters = []
|
121
|
+
@group_by = ''
|
122
|
+
@group_function = :day
|
123
|
+
@group_clause = '@group desc'
|
124
|
+
@group_distinct = ''
|
125
|
+
@cut_off = 0
|
126
|
+
@retry_count = 0
|
127
|
+
@retry_delay = 0
|
128
|
+
@anchor = {}
|
129
|
+
# string keys are index names, integer values are weightings
|
130
|
+
@index_weights = {}
|
131
|
+
@rank_mode = :proximity_bm25
|
132
|
+
@max_query_time = 0
|
133
|
+
# string keys are field names, integer values are weightings
|
134
|
+
@field_weights = {}
|
135
|
+
|
136
|
+
@queue = []
|
137
|
+
end
|
138
|
+
|
139
|
+
# Set the geo-anchor point - with the names of the attributes that contain
|
140
|
+
# the latitude and longitude (in radians), and the reference position.
|
141
|
+
# Note that for geocoding to work properly, you must also set
|
142
|
+
# match_mode to :extended. To sort results by distance, you will
|
143
|
+
# need to set sort_mode to '@geodist asc' for example. Sphinx
|
144
|
+
# expects latitude and longitude to be returned from you SQL source
|
145
|
+
# in radians.
|
146
|
+
#
|
147
|
+
# Example:
|
148
|
+
# client.set_anchor('lat', -0.6591741, 'long', 2.530770)
|
149
|
+
#
|
150
|
+
def set_anchor(lat_attr, lat, long_attr, long)
|
151
|
+
@anchor = {
|
152
|
+
:latitude_attribute => lat_attr,
|
153
|
+
:latitude => lat,
|
154
|
+
:longitude_attribute => long_attr,
|
155
|
+
:longitude => long
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
# Append a query to the queue. This uses the same parameters as the query
|
160
|
+
# method.
|
161
|
+
def append_query(search, index = '*')
|
162
|
+
@queue << query_message(search, index)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Run all the queries currently in the queue. This will return an array of
|
166
|
+
# results hashes.
|
167
|
+
def run
|
168
|
+
response = Response.new request(:search, @queue)
|
169
|
+
|
170
|
+
results = @queue.collect do
|
171
|
+
result = {
|
172
|
+
:matches => [],
|
173
|
+
:fields => [],
|
174
|
+
:attributes => {},
|
175
|
+
:attribute_names => [],
|
176
|
+
:words => {}
|
177
|
+
}
|
178
|
+
|
179
|
+
result[:status] = response.next_int
|
180
|
+
case result[:status]
|
181
|
+
when Statuses[:warning]
|
182
|
+
result[:warning] = response.next
|
183
|
+
when Statuses[:error]
|
184
|
+
result[:error] = response.next
|
185
|
+
next result
|
186
|
+
end
|
187
|
+
|
188
|
+
result[:fields] = response.next_array
|
189
|
+
|
190
|
+
attributes = response.next_int
|
191
|
+
for i in 0...attributes
|
192
|
+
attribute_name = response.next
|
193
|
+
type = response.next_int
|
194
|
+
|
195
|
+
result[:attributes][attribute_name] = type
|
196
|
+
result[:attribute_names] << attribute_name
|
197
|
+
end
|
198
|
+
|
199
|
+
matches = response.next_int
|
200
|
+
is_64_bit = response.next_int
|
201
|
+
for i in 0...matches
|
202
|
+
doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
|
203
|
+
weight = response.next_int
|
204
|
+
|
205
|
+
result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
|
206
|
+
result[:attribute_names].each do |attr|
|
207
|
+
case result[:attributes][attr]
|
208
|
+
when AttributeTypes[:float]
|
209
|
+
result[:matches].last[:attributes][attr] = response.next_float
|
210
|
+
when AttributeTypes[:multi]
|
211
|
+
result[:matches].last[:attributes][attr] = response.next_int_array
|
212
|
+
else
|
213
|
+
result[:matches].last[:attributes][attr] = response.next_int
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
result[:total] = response.next_int.to_i || 0
|
219
|
+
result[:total_found] = response.next_int.to_i || 0
|
220
|
+
result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
|
221
|
+
|
222
|
+
words = response.next_int
|
223
|
+
for i in 0...words
|
224
|
+
word = response.next
|
225
|
+
docs = response.next_int
|
226
|
+
hits = response.next_int
|
227
|
+
result[:words][word] = {:docs => docs, :hits => hits}
|
228
|
+
end
|
229
|
+
|
230
|
+
result
|
231
|
+
end
|
232
|
+
|
233
|
+
@queue.clear
|
234
|
+
results
|
235
|
+
end
|
236
|
+
|
237
|
+
# Query the Sphinx daemon - defaulting to all indexes, but you can specify
|
238
|
+
# a specific one if you wish. The search parameter should be a string
|
239
|
+
# following Sphinx's expectations.
|
240
|
+
#
|
241
|
+
# The object returned from this method is a hash with the following keys:
|
242
|
+
#
|
243
|
+
# * :matches
|
244
|
+
# * :fields
|
245
|
+
# * :attributes
|
246
|
+
# * :attribute_names
|
247
|
+
# * :words
|
248
|
+
# * :total
|
249
|
+
# * :total_found
|
250
|
+
# * :time
|
251
|
+
# * :status
|
252
|
+
# * :warning (if appropriate)
|
253
|
+
# * :error (if appropriate)
|
254
|
+
#
|
255
|
+
# The key <tt>:matches</tt> returns an array of hashes - the actual search
|
256
|
+
# results. Each hash has the document id (<tt>:doc</tt>), the result
|
257
|
+
# weighting (<tt>:weight</tt>), and a hash of the attributes for the
|
258
|
+
# document (<tt>:attributes</tt>).
|
259
|
+
#
|
260
|
+
# The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
|
261
|
+
# fields and attributes for the documents. The key <tt>:attributes</tt>
|
262
|
+
# will return a hash of attribute name and type pairs, and <tt>:words</tt>
|
263
|
+
# returns a hash of hashes representing the words from the search, with the
|
264
|
+
# number of documents and hits for each, along the lines of:
|
265
|
+
#
|
266
|
+
# results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
|
267
|
+
#
|
268
|
+
# <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
|
269
|
+
# number of matches available, the total number of matches (which may be
|
270
|
+
# greater than the maximum available, depending on the number of matches
|
271
|
+
# and your sphinx configuration), and the time in milliseconds that the
|
272
|
+
# query took to run.
|
273
|
+
#
|
274
|
+
# <tt>:status</tt> is the error code for the query - and if there was a
|
275
|
+
# related warning, it will be under the <tt>:warning</tt> key. Fatal errors
|
276
|
+
# will be described under <tt>:error</tt>.
|
277
|
+
#
|
278
|
+
def query(search, index = '*')
|
279
|
+
@queue << query_message(search, index)
|
280
|
+
self.run.first
|
281
|
+
end
|
282
|
+
|
283
|
+
# Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
|
284
|
+
# They may also be abbreviated to fit within a word limit.
|
285
|
+
#
|
286
|
+
# As part of the options hash, you will need to
|
287
|
+
# define:
|
288
|
+
# * :docs
|
289
|
+
# * :words
|
290
|
+
# * :index
|
291
|
+
#
|
292
|
+
# Optional settings include:
|
293
|
+
# * :before_match (defaults to <span class="match">)
|
294
|
+
# * :after_match (defaults to </span>)
|
295
|
+
# * :chunk_separator (defaults to ' … ' - which is an HTML ellipsis)
|
296
|
+
# * :limit (defaults to 256)
|
297
|
+
# * :around (defaults to 5)
|
298
|
+
# * :exact_phrase (defaults to false)
|
299
|
+
# * :single_passage (defaults to false)
|
300
|
+
#
|
301
|
+
# The defaults differ from the official PHP client, as I've opted for
|
302
|
+
# semantic HTML markup.
|
303
|
+
#
|
304
|
+
# Example:
|
305
|
+
#
|
306
|
+
# client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
|
307
|
+
# #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
|
308
|
+
#
|
309
|
+
# lorem_lipsum = "Lorem ipsum dolor..."
|
310
|
+
#
|
311
|
+
# client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
|
312
|
+
# #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
|
313
|
+
# elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua … . Excepteur
|
314
|
+
# sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
|
315
|
+
# laborum. <span class=\"match\">Pat</span> Cash"]
|
316
|
+
#
|
317
|
+
# Workflow:
|
318
|
+
#
|
319
|
+
# Excerpt creation is completely isolated from searching the index. The nominated index is only used to
|
320
|
+
# discover encoding and charset information.
|
321
|
+
#
|
322
|
+
# Therefore, the workflow goes:
|
323
|
+
#
|
324
|
+
# 1. Do the sphinx query.
|
325
|
+
# 2. Fetch the documents found by sphinx from their repositories.
|
326
|
+
# 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
|
327
|
+
#
|
328
|
+
def excerpts(options = {})
|
329
|
+
options[:index] ||= '*'
|
330
|
+
options[:before_match] ||= '<span class="match">'
|
331
|
+
options[:after_match] ||= '</span>'
|
332
|
+
options[:chunk_separator] ||= ' … ' # ellipsis
|
333
|
+
options[:limit] ||= 256
|
334
|
+
options[:around] ||= 5
|
335
|
+
options[:exact_phrase] ||= false
|
336
|
+
options[:single_passage] ||= false
|
337
|
+
|
338
|
+
response = Response.new request(:excerpt, excerpts_message(options))
|
339
|
+
|
340
|
+
options[:docs].collect { response.next }
|
341
|
+
end
|
342
|
+
|
343
|
+
# Update attributes - first parameter is the relevant index, second is an
|
344
|
+
# array of attributes to be updated, and the third is a hash, where the
|
345
|
+
# keys are the document ids, and the values are arrays with the attribute
|
346
|
+
# values - in the same order as the second parameter.
|
347
|
+
#
|
348
|
+
# Example:
|
349
|
+
#
|
350
|
+
# client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
|
351
|
+
#
|
352
|
+
def update(index, attributes, values_by_doc)
|
353
|
+
response = Response.new request(
|
354
|
+
:update,
|
355
|
+
update_message(index, attributes, values_by_doc)
|
356
|
+
)
|
357
|
+
|
358
|
+
response.next_int
|
359
|
+
end
|
360
|
+
|
361
|
+
private
|
362
|
+
|
363
|
+
# Connects to the Sphinx daemon, and yields a socket to use. The socket is
|
364
|
+
# closed at the end of the block.
|
365
|
+
def connect(&block)
|
366
|
+
socket = TCPSocket.new @server, @port
|
367
|
+
|
368
|
+
# Checking version
|
369
|
+
version = socket.recv(4).unpack('N*').first
|
370
|
+
if version < 1
|
371
|
+
socket.close
|
372
|
+
raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
|
373
|
+
end
|
374
|
+
|
375
|
+
# Send version
|
376
|
+
socket.send [1].pack('N'), 0
|
377
|
+
|
378
|
+
begin
|
379
|
+
yield socket
|
380
|
+
ensure
|
381
|
+
socket.close
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
# Send a collection of messages, for a command type (eg, search, excerpts,
|
386
|
+
# update), to the Sphinx daemon.
|
387
|
+
def request(command, messages)
|
388
|
+
response = ""
|
389
|
+
status = -1
|
390
|
+
version = 0
|
391
|
+
length = 0
|
392
|
+
message = Array(messages).join("")
|
393
|
+
|
394
|
+
connect do |socket|
|
395
|
+
case command
|
396
|
+
when :search
|
397
|
+
# Message length is +4 to account for the following count value for
|
398
|
+
# the number of messages (well, that's what I'm assuming).
|
399
|
+
socket.send [
|
400
|
+
Commands[command], Versions[command],
|
401
|
+
4+message.length, messages.length
|
402
|
+
].pack("nnNN") + message, 0
|
403
|
+
else
|
404
|
+
socket.send [
|
405
|
+
Commands[command], Versions[command], message.length
|
406
|
+
].pack("nnN") + message, 0
|
407
|
+
end
|
408
|
+
|
409
|
+
header = socket.recv(8)
|
410
|
+
status, version, length = header.unpack('n2N')
|
411
|
+
|
412
|
+
while response.length < length
|
413
|
+
part = socket.recv(length - response.length)
|
414
|
+
response << part if part
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
if response.empty? || response.length != length
|
419
|
+
raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
|
420
|
+
end
|
421
|
+
|
422
|
+
case status
|
423
|
+
when Statuses[:ok]
|
424
|
+
if version < Versions[command]
|
425
|
+
puts format("searchd command v.%d.%d older than client (v.%d.%d)",
|
426
|
+
version >> 8, version & 0xff,
|
427
|
+
Versions[command] >> 8, Versions[command] & 0xff)
|
428
|
+
end
|
429
|
+
response
|
430
|
+
when Statuses[:warning]
|
431
|
+
length = response[0, 4].unpack('N*').first
|
432
|
+
puts response[4, length]
|
433
|
+
response[4 + length, response.length - 4 - length]
|
434
|
+
when Statuses[:error], Statuses[:retry]
|
435
|
+
raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
|
436
|
+
else
|
437
|
+
raise ResponseError, "Unknown searchd error (status: #{status})"
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
# Generation of the message to send to Sphinx for a search.
|
442
|
+
def query_message(search, index)
|
443
|
+
message = Message.new
|
444
|
+
|
445
|
+
# Mode, Limits, Sort Mode
|
446
|
+
message.append_ints @offset, @limit, MatchModes[@match_mode],
|
447
|
+
RankModes[@rank_mode], SortModes[@sort_mode]
|
448
|
+
message.append_string @sort_by
|
449
|
+
|
450
|
+
# Query
|
451
|
+
message.append_string search
|
452
|
+
|
453
|
+
# Weights
|
454
|
+
message.append_int @weights.length
|
455
|
+
message.append_ints *@weights
|
456
|
+
|
457
|
+
# Index
|
458
|
+
message.append_string index
|
459
|
+
|
460
|
+
# ID Range
|
461
|
+
message.append_int 1
|
462
|
+
message.append_64bit_ints @id_range.first, @id_range.last
|
463
|
+
|
464
|
+
# Filters
|
465
|
+
message.append_int @filters.length
|
466
|
+
@filters.each { |filter| message.append filter.query_message }
|
467
|
+
|
468
|
+
# Grouping
|
469
|
+
message.append_int GroupFunctions[@group_function]
|
470
|
+
message.append_string @group_by
|
471
|
+
message.append_int @max_matches
|
472
|
+
message.append_string @group_clause
|
473
|
+
message.append_ints @cut_off, @retry_count, @retry_delay
|
474
|
+
message.append_string @group_distinct
|
475
|
+
|
476
|
+
# Anchor Point
|
477
|
+
if @anchor.empty?
|
478
|
+
message.append_int 0
|
479
|
+
else
|
480
|
+
message.append_int 1
|
481
|
+
message.append_string @anchor[:latitude_attribute]
|
482
|
+
message.append_string @anchor[:longitude_attribute]
|
483
|
+
message.append_floats @anchor[:latitude], @anchor[:longitude]
|
484
|
+
end
|
485
|
+
|
486
|
+
# Per Index Weights
|
487
|
+
message.append_int @index_weights.length
|
488
|
+
@index_weights.each do |key,val|
|
489
|
+
message.append_string key
|
490
|
+
message.append_int val
|
491
|
+
end
|
492
|
+
|
493
|
+
# Max Query Time
|
494
|
+
message.append_int @max_query_time
|
495
|
+
|
496
|
+
# Per Field Weights
|
497
|
+
message.append_int @field_weights.length
|
498
|
+
@field_weights.each do |key,val|
|
499
|
+
message.append_string key
|
500
|
+
message.append_int val
|
501
|
+
end
|
502
|
+
|
503
|
+
message.to_s
|
504
|
+
end
|
505
|
+
|
506
|
+
# Generation of the message to send to Sphinx for an excerpts request.
|
507
|
+
def excerpts_message(options)
|
508
|
+
message = Message.new
|
509
|
+
|
510
|
+
flags = 1
|
511
|
+
flags |= 2 if options[:exact_phrase]
|
512
|
+
flags |= 4 if options[:single_passage]
|
513
|
+
flags |= 8 if options[:use_boundaries]
|
514
|
+
flags |= 16 if options[:weight_order]
|
515
|
+
|
516
|
+
message.append [0, flags].pack('N2') # 0 = mode
|
517
|
+
message.append_string options[:index]
|
518
|
+
message.append_string options[:words]
|
519
|
+
|
520
|
+
# options
|
521
|
+
message.append_string options[:before_match]
|
522
|
+
message.append_string options[:after_match]
|
523
|
+
message.append_string options[:chunk_separator]
|
524
|
+
message.append_ints options[:limit], options[:around]
|
525
|
+
|
526
|
+
message.append_array options[:docs]
|
527
|
+
|
528
|
+
message.to_s
|
529
|
+
end
|
530
|
+
|
531
|
+
# Generation of the message to send to Sphinx to update attributes of a
|
532
|
+
# document.
|
533
|
+
def update_message(index, attributes, values_by_doc)
|
534
|
+
message = Message.new
|
535
|
+
|
536
|
+
message.append_string index
|
537
|
+
message.append_array attributes
|
538
|
+
|
539
|
+
message.append_int values_by_doc.length
|
540
|
+
values_by_doc.each do |key,values|
|
541
|
+
message.append_64bit_int key # document ID
|
542
|
+
message.append_ints *values # array of new values (integers)
|
543
|
+
end
|
544
|
+
|
545
|
+
message.to_s
|
546
|
+
end
|
547
|
+
end
|
548
|
+
end
|