riddle 0.9.8.1112
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENCE +20 -0
- data/README +60 -0
- data/lib/riddle.rb +20 -0
- data/lib/riddle/client.rb +548 -0
- data/lib/riddle/client/filter.rb +44 -0
- data/lib/riddle/client/message.rb +65 -0
- data/lib/riddle/client/response.rb +74 -0
- data/spec/fixtures/data/anchor.bin +0 -0
- data/spec/fixtures/data/any.bin +0 -0
- data/spec/fixtures/data/boolean.bin +0 -0
- data/spec/fixtures/data/distinct.bin +0 -0
- data/spec/fixtures/data/field_weights.bin +0 -0
- data/spec/fixtures/data/filter.bin +0 -0
- data/spec/fixtures/data/filter_array.bin +0 -0
- data/spec/fixtures/data/filter_array_exclude.bin +0 -0
- data/spec/fixtures/data/filter_floats.bin +0 -0
- data/spec/fixtures/data/filter_floats_exclude.bin +0 -0
- data/spec/fixtures/data/filter_floats_range.bin +0 -0
- data/spec/fixtures/data/filter_range.bin +0 -0
- data/spec/fixtures/data/filter_range_exclude.bin +0 -0
- data/spec/fixtures/data/group.bin +0 -0
- data/spec/fixtures/data/index.bin +0 -0
- data/spec/fixtures/data/index_weights.bin +0 -0
- data/spec/fixtures/data/phrase.bin +0 -0
- data/spec/fixtures/data/rank_mode.bin +0 -0
- data/spec/fixtures/data/simple.bin +0 -0
- data/spec/fixtures/data/sort.bin +0 -0
- data/spec/fixtures/data/update_simple.bin +0 -0
- data/spec/fixtures/data/weights.bin +0 -0
- data/spec/fixtures/data_generator.php +130 -0
- data/spec/fixtures/sphinx/configuration.erb +38 -0
- data/spec/fixtures/sphinx/people.old.spa +0 -0
- data/spec/fixtures/sphinx/people.old.spd +0 -0
- data/spec/fixtures/sphinx/people.old.sph +0 -0
- data/spec/fixtures/sphinx/people.old.spi +0 -0
- data/spec/fixtures/sphinx/people.old.spm +0 -0
- data/spec/fixtures/sphinx/people.old.spp +0 -0
- data/spec/fixtures/sphinx/people.spa +0 -0
- data/spec/fixtures/sphinx/people.spd +0 -0
- data/spec/fixtures/sphinx/people.sph +0 -0
- data/spec/fixtures/sphinx/people.spi +0 -0
- data/spec/fixtures/sphinx/people.spm +0 -0
- data/spec/fixtures/sphinx/people.spp +0 -0
- data/spec/fixtures/sphinx/searchd.log +4732 -0
- data/spec/fixtures/sphinx/searchd.query.log +783 -0
- data/spec/fixtures/sphinx/spec.conf +38 -0
- data/spec/fixtures/sphinxapi.php +1066 -0
- data/spec/fixtures/sql/conf.example.yml +3 -0
- data/spec/fixtures/sql/conf.yml +3 -0
- data/spec/fixtures/sql/data.sql +25000 -0
- data/spec/fixtures/sql/structure.sql +16 -0
- data/spec/functional/excerpt_spec.rb +102 -0
- data/spec/functional/search_spec.rb +69 -0
- data/spec/functional/update_spec.rb +41 -0
- data/spec/spec_helper.rb +26 -0
- data/spec/sphinx_helper.rb +92 -0
- data/spec/unit/client_spec.rb +154 -0
- data/spec/unit/filter_spec.rb +33 -0
- data/spec/unit/message_spec.rb +63 -0
- data/spec/unit/response_spec.rb +64 -0
- metadata +128 -0
data/MIT-LICENCE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007 Pat Allan
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
This client has been written to interface with Sphinx[http://sphinxsearch.com/]. It is written by
|
2
|
+
{Pat Allan}[http://freelancing-gods.com], and has been influenced by both Dmytro Shteflyuk's Ruby
|
3
|
+
client and the original PHP client - credit where credit's due, after all.
|
4
|
+
|
5
|
+
It does not follow the same syntax as those two, though (not much point writing this otherwise) -
|
6
|
+
opting for a more Ruby-like structure.
|
7
|
+
|
8
|
+
While it doesn't (yet) exist as a gem, you can obtain the sourcecode via subversion. If you
|
9
|
+
are after a specific release, use the tag as follows:
|
10
|
+
|
11
|
+
svn co http://rails-oceania.googlecode.com/svn/patallan/riddle/tags/0.9.8-r1112 riddle
|
12
|
+
|
13
|
+
Or for the most current, just use trunk:
|
14
|
+
|
15
|
+
svn co http://rails-oceania.googlecode.com/svn/patallan/riddle/trunk riddle
|
16
|
+
|
17
|
+
Please note that at the time of writing, only 0.9.8r871 through to 0.9.8r1112 are supported.
|
18
|
+
|
19
|
+
To get started, just instantiate a Client object:
|
20
|
+
|
21
|
+
client = Riddle::Client.new # defaults to localhost and port 3312
|
22
|
+
client = Riddle::Client.new "sphinxserver.domain.tld", 3333 # custom settings
|
23
|
+
|
24
|
+
And then set the parameters to what you want, before running a query:
|
25
|
+
|
26
|
+
client.match_mode = :extended
|
27
|
+
client.query "Pat Allan @state Victoria"
|
28
|
+
|
29
|
+
The results from a query are similar to the other clients - but here's the details. It's a hash with
|
30
|
+
the following keys:
|
31
|
+
|
32
|
+
* :matches
|
33
|
+
* :fields
|
34
|
+
* :attributes
|
35
|
+
* :attribute_names
|
36
|
+
* :words
|
37
|
+
* :total
|
38
|
+
* :total_found
|
39
|
+
* :time
|
40
|
+
* :status
|
41
|
+
* :warning (if appropriate)
|
42
|
+
* :error (if appropriate)
|
43
|
+
|
44
|
+
The key <tt>:matches</tt> returns an array of hashes - the actual search results. Each hash has the
|
45
|
+
document id (<tt>:doc</tt>), the result weighting (<tt>:weight</tt>), and a hash of the attributes for
|
46
|
+
the document (<tt>:attributes</tt>).
|
47
|
+
|
48
|
+
The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of fields and attributes for the
|
49
|
+
documents. The key <tt>:attributes</tt> will return a hash of attribute name and type pairs, and
|
50
|
+
<tt>:words</tt> returns a hash of hashes representing the words from the search, with the number of
|
51
|
+
documents and hits for each, along the lines of:
|
52
|
+
|
53
|
+
results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
|
54
|
+
|
55
|
+
<tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the number of matches available, the
|
56
|
+
total number of matches (which may be greater than the maximum available), and the time in milliseconds
|
57
|
+
that the query took to run.
|
58
|
+
|
59
|
+
<tt>:status</tt> is the error code for the query - and if there was a related warning, it will be under
|
60
|
+
the <tt>:warning</tt> key. Fatal errors will be described under <tt>:error</tt>.
|
data/lib/riddle.rb
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'socket'
|
2
|
+
require 'riddle/client'
|
3
|
+
require 'riddle/client/filter'
|
4
|
+
require 'riddle/client/message'
|
5
|
+
require 'riddle/client/response'
|
6
|
+
|
7
|
+
module Riddle #:nodoc:
|
8
|
+
class ConnectionError < StandardError #:nodoc:
|
9
|
+
end
|
10
|
+
|
11
|
+
module Version #:nodoc:
|
12
|
+
Major = 0
|
13
|
+
Minor = 9
|
14
|
+
Tiny = 8
|
15
|
+
Rev = 1112
|
16
|
+
|
17
|
+
String = [Major, Minor, Tiny].join('.') + "r#{Rev}"
|
18
|
+
GemVersion = [Major, Minor, Tiny, Rev].join('.')
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,548 @@
|
|
1
|
+
module Riddle
|
2
|
+
class VersionError < StandardError; end
|
3
|
+
class ResponseError < StandardError; end
|
4
|
+
|
5
|
+
# This class was heavily based on the existing Client API by Dmytro Shteflyuk
|
6
|
+
# and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
|
7
|
+
# more Ruby-ish (ie. lowercase and underscored method names). I also have
|
8
|
+
# used a few helper classes, just to neaten things up.
|
9
|
+
#
|
10
|
+
# Feel free to use it wherever. Send bug reports, patches, comments and
|
11
|
+
# suggestions to pat at freelancing-gods dot com.
|
12
|
+
#
|
13
|
+
# Most properties of the client are accessible through attribute accessors,
|
14
|
+
# and where relevant use symboles instead of the long constants common in
|
15
|
+
# other clients.
|
16
|
+
# Some examples:
|
17
|
+
#
|
18
|
+
# client.sort_mode = :extended
|
19
|
+
# client.sort_by = "birthday DESC"
|
20
|
+
# client.match_mode = :extended
|
21
|
+
#
|
22
|
+
# To add a filter, you will need to create a Filter object:
|
23
|
+
#
|
24
|
+
# client.filters << Riddle::Client::Filter.new("birthday",
|
25
|
+
# Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
|
26
|
+
#
|
27
|
+
class Client
|
28
|
+
Commands = {
|
29
|
+
:search => 0, # SEARCHD_COMMAND_SEARCH
|
30
|
+
:excerpt => 1, # SEARCHD_COMMAND_EXCERPT
|
31
|
+
:update => 2 # SEARCHD_COMMAND_UPDATE
|
32
|
+
}
|
33
|
+
|
34
|
+
Versions = {
|
35
|
+
:search => 0x112, # VER_COMMAND_SEARCH
|
36
|
+
:excerpt => 0x100, # VER_COMMAND_EXCERPT
|
37
|
+
:update => 0x101 # VER_COMMAND_UPDATE
|
38
|
+
}
|
39
|
+
|
40
|
+
Statuses = {
|
41
|
+
:ok => 0, # SEARCHD_OK
|
42
|
+
:error => 1, # SEARCHD_ERROR
|
43
|
+
:retry => 2, # SEARCHD_RETRY
|
44
|
+
:warning => 3 # SEARCHD_WARNING
|
45
|
+
}
|
46
|
+
|
47
|
+
MatchModes = {
|
48
|
+
:all => 0, # SPH_MATCH_ALL
|
49
|
+
:any => 1, # SPH_MATCH_ANY
|
50
|
+
:phrase => 2, # SPH_MATCH_PHRASE
|
51
|
+
:boolean => 3, # SPH_MATCH_BOOLEAN
|
52
|
+
:extended => 4, # SPH_MATCH_EXTENDED
|
53
|
+
:fullsacn => 5, # SPH_MATCH_FULLSCAN
|
54
|
+
:extended2 => 6 # SPH_MATCH_EXTENDED2
|
55
|
+
}
|
56
|
+
|
57
|
+
RankModes = {
|
58
|
+
:proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
|
59
|
+
:bm25 => 1, # SPH_RANK_BM25
|
60
|
+
:none => 2, # SPH_RANK_NONE
|
61
|
+
:wordcount => 3 # SPH_RANK_WORDCOUNT
|
62
|
+
}
|
63
|
+
|
64
|
+
SortModes = {
|
65
|
+
:relevance => 0, # SPH_SORT_RELEVANCE
|
66
|
+
:attr_desc => 1, # SPH_SORT_ATTR_DESC
|
67
|
+
:attr_asc => 2, # SPH_SORT_ATTR_ASC
|
68
|
+
:time_segments => 3, # SPH_SORT_TIME_SEGMENTS
|
69
|
+
:extended => 4, # SPH_SORT_EXTENDED
|
70
|
+
:expr => 5 # SPH_SORT_EXPR
|
71
|
+
}
|
72
|
+
|
73
|
+
AttributeTypes = {
|
74
|
+
:integer => 1, # SPH_ATTR_INTEGER
|
75
|
+
:timestamp => 2, # SPH_ATTR_TIMESTAMP
|
76
|
+
:ordinal => 3, # SPH_ATTR_ORDINAL
|
77
|
+
:bool => 4, # SPH_ATTR_BOOL
|
78
|
+
:float => 5, # SPH_ATTR_FLOAT
|
79
|
+
:multi => 0x40000000 # SPH_ATTR_MULTI
|
80
|
+
}
|
81
|
+
|
82
|
+
GroupFunctions = {
|
83
|
+
:day => 0, # SPH_GROUPBY_DAY
|
84
|
+
:week => 1, # SPH_GROUPBY_WEEK
|
85
|
+
:month => 2, # SPH_GROUPBY_MONTH
|
86
|
+
:year => 3, # SPH_GROUPBY_YEAR
|
87
|
+
:attr => 4, # SPH_GROUPBY_ATTR
|
88
|
+
:attrpair => 5 # SPH_GROUPBY_ATTRPAIR
|
89
|
+
}
|
90
|
+
|
91
|
+
FilterTypes = {
|
92
|
+
:values => 0, # SPH_FILTER_VALUES
|
93
|
+
:range => 1, # SPH_FILTER_RANGE
|
94
|
+
:float_range => 2 # SPH_FILTER_FLOATRANGE
|
95
|
+
}
|
96
|
+
|
97
|
+
attr_accessor :server, :port, :offset, :limit, :max_matches,
|
98
|
+
:match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
|
99
|
+
:group_by, :group_function, :group_clause, :group_distinct, :cut_off,
|
100
|
+
:retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
|
101
|
+
:max_query_time, :field_weights
|
102
|
+
attr_reader :queue
|
103
|
+
|
104
|
+
# Can instantiate with a specific server and port - otherwise it assumes
|
105
|
+
# defaults of localhost and 3312 respectively. All other settings can be
|
106
|
+
# accessed and changed via the attribute accessors.
|
107
|
+
def initialize(server=nil, port=nil)
|
108
|
+
@server = server || "localhost"
|
109
|
+
@port = port || 3312
|
110
|
+
|
111
|
+
# defaults
|
112
|
+
@offset = 0
|
113
|
+
@limit = 20
|
114
|
+
@max_matches = 1000
|
115
|
+
@match_mode = :all
|
116
|
+
@sort_mode = :relevance
|
117
|
+
@sort_by = ''
|
118
|
+
@weights = []
|
119
|
+
@id_range = 0..0
|
120
|
+
@filters = []
|
121
|
+
@group_by = ''
|
122
|
+
@group_function = :day
|
123
|
+
@group_clause = '@group desc'
|
124
|
+
@group_distinct = ''
|
125
|
+
@cut_off = 0
|
126
|
+
@retry_count = 0
|
127
|
+
@retry_delay = 0
|
128
|
+
@anchor = {}
|
129
|
+
# string keys are index names, integer values are weightings
|
130
|
+
@index_weights = {}
|
131
|
+
@rank_mode = :proximity_bm25
|
132
|
+
@max_query_time = 0
|
133
|
+
# string keys are field names, integer values are weightings
|
134
|
+
@field_weights = {}
|
135
|
+
|
136
|
+
@queue = []
|
137
|
+
end
|
138
|
+
|
139
|
+
# Set the geo-anchor point - with the names of the attributes that contain
|
140
|
+
# the latitude and longitude (in radians), and the reference position.
|
141
|
+
# Note that for geocoding to work properly, you must also set
|
142
|
+
# match_mode to :extended. To sort results by distance, you will
|
143
|
+
# need to set sort_mode to '@geodist asc' for example. Sphinx
|
144
|
+
# expects latitude and longitude to be returned from you SQL source
|
145
|
+
# in radians.
|
146
|
+
#
|
147
|
+
# Example:
|
148
|
+
# client.set_anchor('lat', -0.6591741, 'long', 2.530770)
|
149
|
+
#
|
150
|
+
def set_anchor(lat_attr, lat, long_attr, long)
|
151
|
+
@anchor = {
|
152
|
+
:latitude_attribute => lat_attr,
|
153
|
+
:latitude => lat,
|
154
|
+
:longitude_attribute => long_attr,
|
155
|
+
:longitude => long
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
# Append a query to the queue. This uses the same parameters as the query
|
160
|
+
# method.
|
161
|
+
def append_query(search, index = '*')
|
162
|
+
@queue << query_message(search, index)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Run all the queries currently in the queue. This will return an array of
|
166
|
+
# results hashes.
|
167
|
+
def run
|
168
|
+
response = Response.new request(:search, @queue)
|
169
|
+
|
170
|
+
results = @queue.collect do
|
171
|
+
result = {
|
172
|
+
:matches => [],
|
173
|
+
:fields => [],
|
174
|
+
:attributes => {},
|
175
|
+
:attribute_names => [],
|
176
|
+
:words => {}
|
177
|
+
}
|
178
|
+
|
179
|
+
result[:status] = response.next_int
|
180
|
+
case result[:status]
|
181
|
+
when Statuses[:warning]
|
182
|
+
result[:warning] = response.next
|
183
|
+
when Statuses[:error]
|
184
|
+
result[:error] = response.next
|
185
|
+
next result
|
186
|
+
end
|
187
|
+
|
188
|
+
result[:fields] = response.next_array
|
189
|
+
|
190
|
+
attributes = response.next_int
|
191
|
+
for i in 0...attributes
|
192
|
+
attribute_name = response.next
|
193
|
+
type = response.next_int
|
194
|
+
|
195
|
+
result[:attributes][attribute_name] = type
|
196
|
+
result[:attribute_names] << attribute_name
|
197
|
+
end
|
198
|
+
|
199
|
+
matches = response.next_int
|
200
|
+
is_64_bit = response.next_int
|
201
|
+
for i in 0...matches
|
202
|
+
doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
|
203
|
+
weight = response.next_int
|
204
|
+
|
205
|
+
result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
|
206
|
+
result[:attribute_names].each do |attr|
|
207
|
+
case result[:attributes][attr]
|
208
|
+
when AttributeTypes[:float]
|
209
|
+
result[:matches].last[:attributes][attr] = response.next_float
|
210
|
+
when AttributeTypes[:multi]
|
211
|
+
result[:matches].last[:attributes][attr] = response.next_int_array
|
212
|
+
else
|
213
|
+
result[:matches].last[:attributes][attr] = response.next_int
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
result[:total] = response.next_int.to_i || 0
|
219
|
+
result[:total_found] = response.next_int.to_i || 0
|
220
|
+
result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
|
221
|
+
|
222
|
+
words = response.next_int
|
223
|
+
for i in 0...words
|
224
|
+
word = response.next
|
225
|
+
docs = response.next_int
|
226
|
+
hits = response.next_int
|
227
|
+
result[:words][word] = {:docs => docs, :hits => hits}
|
228
|
+
end
|
229
|
+
|
230
|
+
result
|
231
|
+
end
|
232
|
+
|
233
|
+
@queue.clear
|
234
|
+
results
|
235
|
+
end
|
236
|
+
|
237
|
+
# Query the Sphinx daemon - defaulting to all indexes, but you can specify
|
238
|
+
# a specific one if you wish. The search parameter should be a string
|
239
|
+
# following Sphinx's expectations.
|
240
|
+
#
|
241
|
+
# The object returned from this method is a hash with the following keys:
|
242
|
+
#
|
243
|
+
# * :matches
|
244
|
+
# * :fields
|
245
|
+
# * :attributes
|
246
|
+
# * :attribute_names
|
247
|
+
# * :words
|
248
|
+
# * :total
|
249
|
+
# * :total_found
|
250
|
+
# * :time
|
251
|
+
# * :status
|
252
|
+
# * :warning (if appropriate)
|
253
|
+
# * :error (if appropriate)
|
254
|
+
#
|
255
|
+
# The key <tt>:matches</tt> returns an array of hashes - the actual search
|
256
|
+
# results. Each hash has the document id (<tt>:doc</tt>), the result
|
257
|
+
# weighting (<tt>:weight</tt>), and a hash of the attributes for the
|
258
|
+
# document (<tt>:attributes</tt>).
|
259
|
+
#
|
260
|
+
# The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
|
261
|
+
# fields and attributes for the documents. The key <tt>:attributes</tt>
|
262
|
+
# will return a hash of attribute name and type pairs, and <tt>:words</tt>
|
263
|
+
# returns a hash of hashes representing the words from the search, with the
|
264
|
+
# number of documents and hits for each, along the lines of:
|
265
|
+
#
|
266
|
+
# results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
|
267
|
+
#
|
268
|
+
# <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
|
269
|
+
# number of matches available, the total number of matches (which may be
|
270
|
+
# greater than the maximum available, depending on the number of matches
|
271
|
+
# and your sphinx configuration), and the time in milliseconds that the
|
272
|
+
# query took to run.
|
273
|
+
#
|
274
|
+
# <tt>:status</tt> is the error code for the query - and if there was a
|
275
|
+
# related warning, it will be under the <tt>:warning</tt> key. Fatal errors
|
276
|
+
# will be described under <tt>:error</tt>.
|
277
|
+
#
|
278
|
+
def query(search, index = '*')
|
279
|
+
@queue << query_message(search, index)
|
280
|
+
self.run.first
|
281
|
+
end
|
282
|
+
|
283
|
+
# Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
|
284
|
+
# They may also be abbreviated to fit within a word limit.
|
285
|
+
#
|
286
|
+
# As part of the options hash, you will need to
|
287
|
+
# define:
|
288
|
+
# * :docs
|
289
|
+
# * :words
|
290
|
+
# * :index
|
291
|
+
#
|
292
|
+
# Optional settings include:
|
293
|
+
# * :before_match (defaults to <span class="match">)
|
294
|
+
# * :after_match (defaults to </span>)
|
295
|
+
# * :chunk_separator (defaults to ' … ' - which is an HTML ellipsis)
|
296
|
+
# * :limit (defaults to 256)
|
297
|
+
# * :around (defaults to 5)
|
298
|
+
# * :exact_phrase (defaults to false)
|
299
|
+
# * :single_passage (defaults to false)
|
300
|
+
#
|
301
|
+
# The defaults differ from the official PHP client, as I've opted for
|
302
|
+
# semantic HTML markup.
|
303
|
+
#
|
304
|
+
# Example:
|
305
|
+
#
|
306
|
+
# client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
|
307
|
+
# #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
|
308
|
+
#
|
309
|
+
# lorem_lipsum = "Lorem ipsum dolor..."
|
310
|
+
#
|
311
|
+
# client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
|
312
|
+
# #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
|
313
|
+
# elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua … . Excepteur
|
314
|
+
# sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
|
315
|
+
# laborum. <span class=\"match\">Pat</span> Cash"]
|
316
|
+
#
|
317
|
+
# Workflow:
|
318
|
+
#
|
319
|
+
# Excerpt creation is completely isolated from searching the index. The nominated index is only used to
|
320
|
+
# discover encoding and charset information.
|
321
|
+
#
|
322
|
+
# Therefore, the workflow goes:
|
323
|
+
#
|
324
|
+
# 1. Do the sphinx query.
|
325
|
+
# 2. Fetch the documents found by sphinx from their repositories.
|
326
|
+
# 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
|
327
|
+
#
|
328
|
+
def excerpts(options = {})
|
329
|
+
options[:index] ||= '*'
|
330
|
+
options[:before_match] ||= '<span class="match">'
|
331
|
+
options[:after_match] ||= '</span>'
|
332
|
+
options[:chunk_separator] ||= ' … ' # ellipsis
|
333
|
+
options[:limit] ||= 256
|
334
|
+
options[:around] ||= 5
|
335
|
+
options[:exact_phrase] ||= false
|
336
|
+
options[:single_passage] ||= false
|
337
|
+
|
338
|
+
response = Response.new request(:excerpt, excerpts_message(options))
|
339
|
+
|
340
|
+
options[:docs].collect { response.next }
|
341
|
+
end
|
342
|
+
|
343
|
+
# Update attributes - first parameter is the relevant index, second is an
|
344
|
+
# array of attributes to be updated, and the third is a hash, where the
|
345
|
+
# keys are the document ids, and the values are arrays with the attribute
|
346
|
+
# values - in the same order as the second parameter.
|
347
|
+
#
|
348
|
+
# Example:
|
349
|
+
#
|
350
|
+
# client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
|
351
|
+
#
|
352
|
+
def update(index, attributes, values_by_doc)
|
353
|
+
response = Response.new request(
|
354
|
+
:update,
|
355
|
+
update_message(index, attributes, values_by_doc)
|
356
|
+
)
|
357
|
+
|
358
|
+
response.next_int
|
359
|
+
end
|
360
|
+
|
361
|
+
private
|
362
|
+
|
363
|
+
# Connects to the Sphinx daemon, and yields a socket to use. The socket is
|
364
|
+
# closed at the end of the block.
|
365
|
+
def connect(&block)
|
366
|
+
socket = TCPSocket.new @server, @port
|
367
|
+
|
368
|
+
# Checking version
|
369
|
+
version = socket.recv(4).unpack('N*').first
|
370
|
+
if version < 1
|
371
|
+
socket.close
|
372
|
+
raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
|
373
|
+
end
|
374
|
+
|
375
|
+
# Send version
|
376
|
+
socket.send [1].pack('N'), 0
|
377
|
+
|
378
|
+
begin
|
379
|
+
yield socket
|
380
|
+
ensure
|
381
|
+
socket.close
|
382
|
+
end
|
383
|
+
end
|
384
|
+
|
385
|
+
# Send a collection of messages, for a command type (eg, search, excerpts,
|
386
|
+
# update), to the Sphinx daemon.
|
387
|
+
def request(command, messages)
|
388
|
+
response = ""
|
389
|
+
status = -1
|
390
|
+
version = 0
|
391
|
+
length = 0
|
392
|
+
message = Array(messages).join("")
|
393
|
+
|
394
|
+
connect do |socket|
|
395
|
+
case command
|
396
|
+
when :search
|
397
|
+
# Message length is +4 to account for the following count value for
|
398
|
+
# the number of messages (well, that's what I'm assuming).
|
399
|
+
socket.send [
|
400
|
+
Commands[command], Versions[command],
|
401
|
+
4+message.length, messages.length
|
402
|
+
].pack("nnNN") + message, 0
|
403
|
+
else
|
404
|
+
socket.send [
|
405
|
+
Commands[command], Versions[command], message.length
|
406
|
+
].pack("nnN") + message, 0
|
407
|
+
end
|
408
|
+
|
409
|
+
header = socket.recv(8)
|
410
|
+
status, version, length = header.unpack('n2N')
|
411
|
+
|
412
|
+
while response.length < length
|
413
|
+
part = socket.recv(length - response.length)
|
414
|
+
response << part if part
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
418
|
+
if response.empty? || response.length != length
|
419
|
+
raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
|
420
|
+
end
|
421
|
+
|
422
|
+
case status
|
423
|
+
when Statuses[:ok]
|
424
|
+
if version < Versions[command]
|
425
|
+
puts format("searchd command v.%d.%d older than client (v.%d.%d)",
|
426
|
+
version >> 8, version & 0xff,
|
427
|
+
Versions[command] >> 8, Versions[command] & 0xff)
|
428
|
+
end
|
429
|
+
response
|
430
|
+
when Statuses[:warning]
|
431
|
+
length = response[0, 4].unpack('N*').first
|
432
|
+
puts response[4, length]
|
433
|
+
response[4 + length, response.length - 4 - length]
|
434
|
+
when Statuses[:error], Statuses[:retry]
|
435
|
+
raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
|
436
|
+
else
|
437
|
+
raise ResponseError, "Unknown searchd error (status: #{status})"
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
# Generation of the message to send to Sphinx for a search.
|
442
|
+
def query_message(search, index)
|
443
|
+
message = Message.new
|
444
|
+
|
445
|
+
# Mode, Limits, Sort Mode
|
446
|
+
message.append_ints @offset, @limit, MatchModes[@match_mode],
|
447
|
+
RankModes[@rank_mode], SortModes[@sort_mode]
|
448
|
+
message.append_string @sort_by
|
449
|
+
|
450
|
+
# Query
|
451
|
+
message.append_string search
|
452
|
+
|
453
|
+
# Weights
|
454
|
+
message.append_int @weights.length
|
455
|
+
message.append_ints *@weights
|
456
|
+
|
457
|
+
# Index
|
458
|
+
message.append_string index
|
459
|
+
|
460
|
+
# ID Range
|
461
|
+
message.append_int 1
|
462
|
+
message.append_64bit_ints @id_range.first, @id_range.last
|
463
|
+
|
464
|
+
# Filters
|
465
|
+
message.append_int @filters.length
|
466
|
+
@filters.each { |filter| message.append filter.query_message }
|
467
|
+
|
468
|
+
# Grouping
|
469
|
+
message.append_int GroupFunctions[@group_function]
|
470
|
+
message.append_string @group_by
|
471
|
+
message.append_int @max_matches
|
472
|
+
message.append_string @group_clause
|
473
|
+
message.append_ints @cut_off, @retry_count, @retry_delay
|
474
|
+
message.append_string @group_distinct
|
475
|
+
|
476
|
+
# Anchor Point
|
477
|
+
if @anchor.empty?
|
478
|
+
message.append_int 0
|
479
|
+
else
|
480
|
+
message.append_int 1
|
481
|
+
message.append_string @anchor[:latitude_attribute]
|
482
|
+
message.append_string @anchor[:longitude_attribute]
|
483
|
+
message.append_floats @anchor[:latitude], @anchor[:longitude]
|
484
|
+
end
|
485
|
+
|
486
|
+
# Per Index Weights
|
487
|
+
message.append_int @index_weights.length
|
488
|
+
@index_weights.each do |key,val|
|
489
|
+
message.append_string key
|
490
|
+
message.append_int val
|
491
|
+
end
|
492
|
+
|
493
|
+
# Max Query Time
|
494
|
+
message.append_int @max_query_time
|
495
|
+
|
496
|
+
# Per Field Weights
|
497
|
+
message.append_int @field_weights.length
|
498
|
+
@field_weights.each do |key,val|
|
499
|
+
message.append_string key
|
500
|
+
message.append_int val
|
501
|
+
end
|
502
|
+
|
503
|
+
message.to_s
|
504
|
+
end
|
505
|
+
|
506
|
+
# Generation of the message to send to Sphinx for an excerpts request.
|
507
|
+
def excerpts_message(options)
|
508
|
+
message = Message.new
|
509
|
+
|
510
|
+
flags = 1
|
511
|
+
flags |= 2 if options[:exact_phrase]
|
512
|
+
flags |= 4 if options[:single_passage]
|
513
|
+
flags |= 8 if options[:use_boundaries]
|
514
|
+
flags |= 16 if options[:weight_order]
|
515
|
+
|
516
|
+
message.append [0, flags].pack('N2') # 0 = mode
|
517
|
+
message.append_string options[:index]
|
518
|
+
message.append_string options[:words]
|
519
|
+
|
520
|
+
# options
|
521
|
+
message.append_string options[:before_match]
|
522
|
+
message.append_string options[:after_match]
|
523
|
+
message.append_string options[:chunk_separator]
|
524
|
+
message.append_ints options[:limit], options[:around]
|
525
|
+
|
526
|
+
message.append_array options[:docs]
|
527
|
+
|
528
|
+
message.to_s
|
529
|
+
end
|
530
|
+
|
531
|
+
# Generation of the message to send to Sphinx to update attributes of a
|
532
|
+
# document.
|
533
|
+
def update_message(index, attributes, values_by_doc)
|
534
|
+
message = Message.new
|
535
|
+
|
536
|
+
message.append_string index
|
537
|
+
message.append_array attributes
|
538
|
+
|
539
|
+
message.append_int values_by_doc.length
|
540
|
+
values_by_doc.each do |key,values|
|
541
|
+
message.append_64bit_int key # document ID
|
542
|
+
message.append_ints *values # array of new values (integers)
|
543
|
+
end
|
544
|
+
|
545
|
+
message.to_s
|
546
|
+
end
|
547
|
+
end
|
548
|
+
end
|