ryanb-thinking_sphinx 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +60 -0
  3. data/lib/riddle.rb +26 -0
  4. data/lib/riddle/client.rb +639 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +102 -0
  10. data/lib/thinking_sphinx/active_record.rb +141 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +97 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +50 -0
  14. data/lib/thinking_sphinx/association.rb +144 -0
  15. data/lib/thinking_sphinx/attribute.rb +284 -0
  16. data/lib/thinking_sphinx/configuration.rb +283 -0
  17. data/lib/thinking_sphinx/field.rb +200 -0
  18. data/lib/thinking_sphinx/index.rb +340 -0
  19. data/lib/thinking_sphinx/index/builder.rb +195 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +482 -0
  23. data/lib/thinking_sphinx/tasks.rb +86 -0
  24. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +207 -0
  25. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  26. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  27. data/spec/unit/thinking_sphinx/active_record_spec.rb +236 -0
  28. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  29. data/spec/unit/thinking_sphinx/attribute_spec.rb +360 -0
  30. data/spec/unit/thinking_sphinx/configuration_spec.rb +493 -0
  31. data/spec/unit/thinking_sphinx/field_spec.rb +219 -0
  32. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  33. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +68 -0
  34. data/spec/unit/thinking_sphinx/index_spec.rb +277 -0
  35. data/spec/unit/thinking_sphinx/search_spec.rb +190 -0
  36. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  37. data/tasks/thinking_sphinx_tasks.rake +1 -0
  38. metadata +103 -0
data/LICENCE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Pat Allan
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,60 @@
1
+ = Thinking Sphinx
2
+
3
+ == Usage
4
+
5
+ First, if you haven't done so already, check out the main usage[http://ts.freelancing-gods.com/usage.html] page. Once you've done that, the next place to look for information is the specific method docs - ThinkingSphinx::Search and ThinkingSphinx::Index::Builder in particular.
6
+
7
+ Keep in mind that while Thinking Sphinx works for ActiveRecord with Merb, it doesn't yet support DataMapper (although that is planned).
8
+
9
+ == Contributing
10
+
11
+ Fork on GitHub and after you've committed tested patches, send a pull request.
12
+
13
+ To get the spec suite running, you will need to install the not-a-mock gem if you don't already have it:
14
+
15
+ git clone git://github.com/freelancing-god/not-a-mock.git
16
+ cd not-a-mock
17
+ rake gem
18
+ gem install pkg/not_a_mock-1.1.0.gem
19
+
20
+ Then set up your database
21
+
22
+ cp spec/fixtures/database.yml.default spec/fixtures/database.yml
23
+ mysqladmin -u root create thinking_sphinx
24
+
25
+ You should now have a passing test suite from which to build your patch on.
26
+
27
+ rake spec
28
+
29
+ == Contributors
30
+
31
+ Since I first released this library, there's been quite a few people who have submitted patches, to my immense gratitude. Others have suggested syntax changes and general improvements. So my thanks to the following people:
32
+
33
+ - Joost Hietbrink
34
+ - Jonathon Conway
35
+ - Gregory Mirzayantz
36
+ - Tung Nguyen
37
+ - Sean Cribbs
38
+ - Benoit Caccinolo
39
+ - John Barton
40
+ - Oliver Beddows
41
+ - Arthur Zapparoli
42
+ - Dusty Doris
43
+ - Marcus Crafter
44
+ - Patrick Lenz
45
+ - Björn Andreasson
46
+ - James Healy
47
+ - Jae-Jun Hwang
48
+ - Xavier Shay
49
+ - Jason Rust
50
+ - Gopal Patel
51
+ - Chris Heald
52
+ - Peter Vandenberk
53
+ - Josh French
54
+ - Andrew Bennett
55
+ - Jordan Fowler
56
+ - Seth Walker
57
+ - Joe Noon
58
+ - Wolfgang Postler
59
+ - Rick Olson
60
+ - Killian Murphy
@@ -0,0 +1,26 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+ require 'riddle/client'
4
+ require 'riddle/client/filter'
5
+ require 'riddle/client/message'
6
+ require 'riddle/client/response'
7
+
8
+ module Riddle #:nodoc:
9
+ class ConnectionError < StandardError #:nodoc:
10
+ end
11
+
12
+ module Version #:nodoc:
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 8
16
+ # Revision number for RubyForge's sake, taken from what Sphinx
17
+ # outputs to the command line.
18
+ Rev = 1371
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 0
22
+
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
+ end
26
+ end
@@ -0,0 +1,639 @@
1
+ module Riddle
2
+ class VersionError < StandardError; end
3
+ class ResponseError < StandardError; end
4
+
5
+ # This class was heavily based on the existing Client API by Dmytro Shteflyuk
6
+ # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
7
+ # more Ruby-ish (ie. lowercase and underscored method names). I also have
8
+ # used a few helper classes, just to neaten things up.
9
+ #
10
+ # Feel free to use it wherever. Send bug reports, patches, comments and
11
+ # suggestions to pat at freelancing-gods dot com.
12
+ #
13
+ # Most properties of the client are accessible through attribute accessors,
14
+ # and where relevant use symboles instead of the long constants common in
15
+ # other clients.
16
+ # Some examples:
17
+ #
18
+ # client.sort_mode = :extended
19
+ # client.sort_by = "birthday DESC"
20
+ # client.match_mode = :extended
21
+ #
22
+ # To add a filter, you will need to create a Filter object:
23
+ #
24
+ # client.filters << Riddle::Client::Filter.new("birthday",
25
+ # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
26
+ #
27
+ class Client
28
+ Commands = {
29
+ :search => 0, # SEARCHD_COMMAND_SEARCH
30
+ :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
31
+ :update => 2, # SEARCHD_COMMAND_UPDATE
32
+ :keywords => 3 # SEARCHD_COMMAND_KEYWORDS
33
+ }
34
+
35
+ Versions = {
36
+ :search => 0x113, # VER_COMMAND_SEARCH
37
+ :excerpt => 0x100, # VER_COMMAND_EXCERPT
38
+ :update => 0x101, # VER_COMMAND_UPDATE
39
+ :keywords => 0x100 # VER_COMMAND_KEYWORDS
40
+ }
41
+
42
+ Statuses = {
43
+ :ok => 0, # SEARCHD_OK
44
+ :error => 1, # SEARCHD_ERROR
45
+ :retry => 2, # SEARCHD_RETRY
46
+ :warning => 3 # SEARCHD_WARNING
47
+ }
48
+
49
+ MatchModes = {
50
+ :all => 0, # SPH_MATCH_ALL
51
+ :any => 1, # SPH_MATCH_ANY
52
+ :phrase => 2, # SPH_MATCH_PHRASE
53
+ :boolean => 3, # SPH_MATCH_BOOLEAN
54
+ :extended => 4, # SPH_MATCH_EXTENDED
55
+ :fullscan => 5, # SPH_MATCH_FULLSCAN
56
+ :extended2 => 6 # SPH_MATCH_EXTENDED2
57
+ }
58
+
59
+ RankModes = {
60
+ :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
61
+ :bm25 => 1, # SPH_RANK_BM25
62
+ :none => 2, # SPH_RANK_NONE
63
+ :wordcount => 3 # SPH_RANK_WORDCOUNT
64
+ }
65
+
66
+ SortModes = {
67
+ :relevance => 0, # SPH_SORT_RELEVANCE
68
+ :attr_desc => 1, # SPH_SORT_ATTR_DESC
69
+ :attr_asc => 2, # SPH_SORT_ATTR_ASC
70
+ :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
71
+ :extended => 4, # SPH_SORT_EXTENDED
72
+ :expr => 5 # SPH_SORT_EXPR
73
+ }
74
+
75
+ AttributeTypes = {
76
+ :integer => 1, # SPH_ATTR_INTEGER
77
+ :timestamp => 2, # SPH_ATTR_TIMESTAMP
78
+ :ordinal => 3, # SPH_ATTR_ORDINAL
79
+ :bool => 4, # SPH_ATTR_BOOL
80
+ :float => 5, # SPH_ATTR_FLOAT
81
+ :multi => 0x40000000 # SPH_ATTR_MULTI
82
+ }
83
+
84
+ GroupFunctions = {
85
+ :day => 0, # SPH_GROUPBY_DAY
86
+ :week => 1, # SPH_GROUPBY_WEEK
87
+ :month => 2, # SPH_GROUPBY_MONTH
88
+ :year => 3, # SPH_GROUPBY_YEAR
89
+ :attr => 4, # SPH_GROUPBY_ATTR
90
+ :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
91
+ }
92
+
93
+ FilterTypes = {
94
+ :values => 0, # SPH_FILTER_VALUES
95
+ :range => 1, # SPH_FILTER_RANGE
96
+ :float_range => 2 # SPH_FILTER_FLOATRANGE
97
+ }
98
+
99
+ attr_accessor :server, :port, :offset, :limit, :max_matches,
100
+ :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
101
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
102
+ :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
103
+ :max_query_time, :field_weights, :timeout
104
+ attr_reader :queue
105
+
106
+ # Can instantiate with a specific server and port - otherwise it assumes
107
+ # defaults of localhost and 3312 respectively. All other settings can be
108
+ # accessed and changed via the attribute accessors.
109
+ def initialize(server=nil, port=nil)
110
+ @server = server || "localhost"
111
+ @port = port || 3312
112
+
113
+ # defaults
114
+ @offset = 0
115
+ @limit = 20
116
+ @max_matches = 1000
117
+ @match_mode = :all
118
+ @sort_mode = :relevance
119
+ @sort_by = ''
120
+ @weights = []
121
+ @id_range = 0..0
122
+ @filters = []
123
+ @group_by = ''
124
+ @group_function = :day
125
+ @group_clause = '@group desc'
126
+ @group_distinct = ''
127
+ @cut_off = 0
128
+ @retry_count = 0
129
+ @retry_delay = 0
130
+ @anchor = {}
131
+ # string keys are index names, integer values are weightings
132
+ @index_weights = {}
133
+ @rank_mode = :proximity_bm25
134
+ @max_query_time = 0
135
+ # string keys are field names, integer values are weightings
136
+ @field_weights = {}
137
+ @timeout = 0
138
+
139
+ @queue = []
140
+ end
141
+
142
+ # Reset attributes and settings to defaults.
143
+ def reset
144
+ # defaults
145
+ @offset = 0
146
+ @limit = 20
147
+ @max_matches = 1000
148
+ @match_mode = :all
149
+ @sort_mode = :relevance
150
+ @sort_by = ''
151
+ @weights = []
152
+ @id_range = 0..0
153
+ @filters = []
154
+ @group_by = ''
155
+ @group_function = :day
156
+ @group_clause = '@group desc'
157
+ @group_distinct = ''
158
+ @cut_off = 0
159
+ @retry_count = 0
160
+ @retry_delay = 0
161
+ @anchor = {}
162
+ # string keys are index names, integer values are weightings
163
+ @index_weights = {}
164
+ @rank_mode = :proximity_bm25
165
+ @max_query_time = 0
166
+ # string keys are field names, integer values are weightings
167
+ @field_weights = {}
168
+ @timeout = 0
169
+ end
170
+
171
+ # Set the geo-anchor point - with the names of the attributes that contain
172
+ # the latitude and longitude (in radians), and the reference position.
173
+ # Note that for geocoding to work properly, you must also set
174
+ # match_mode to :extended. To sort results by distance, you will
175
+ # need to set sort_mode to '@geodist asc' for example. Sphinx
176
+ # expects latitude and longitude to be returned from you SQL source
177
+ # in radians.
178
+ #
179
+ # Example:
180
+ # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
181
+ #
182
+ def set_anchor(lat_attr, lat, long_attr, long)
183
+ @anchor = {
184
+ :latitude_attribute => lat_attr,
185
+ :latitude => lat,
186
+ :longitude_attribute => long_attr,
187
+ :longitude => long
188
+ }
189
+ end
190
+
191
+ # Append a query to the queue. This uses the same parameters as the query
192
+ # method.
193
+ def append_query(search, index = '*', comments = '')
194
+ @queue << query_message(search, index, comments)
195
+ end
196
+
197
+ # Run all the queries currently in the queue. This will return an array of
198
+ # results hashes.
199
+ def run
200
+ response = Response.new request(:search, @queue)
201
+
202
+ results = @queue.collect do
203
+ result = {
204
+ :matches => [],
205
+ :fields => [],
206
+ :attributes => {},
207
+ :attribute_names => [],
208
+ :words => {}
209
+ }
210
+
211
+ result[:status] = response.next_int
212
+ case result[:status]
213
+ when Statuses[:warning]
214
+ result[:warning] = response.next
215
+ when Statuses[:error]
216
+ result[:error] = response.next
217
+ next result
218
+ end
219
+
220
+ result[:fields] = response.next_array
221
+
222
+ attributes = response.next_int
223
+ for i in 0...attributes
224
+ attribute_name = response.next
225
+ type = response.next_int
226
+
227
+ result[:attributes][attribute_name] = type
228
+ result[:attribute_names] << attribute_name
229
+ end
230
+
231
+ matches = response.next_int
232
+ is_64_bit = response.next_int
233
+ for i in 0...matches
234
+ doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
235
+ weight = response.next_int
236
+
237
+ result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
238
+ result[:attribute_names].each do |attr|
239
+ result[:matches].last[:attributes][attr] = attribute_from_type(
240
+ result[:attributes][attr], response
241
+ )
242
+ end
243
+ end
244
+
245
+ result[:total] = response.next_int.to_i || 0
246
+ result[:total_found] = response.next_int.to_i || 0
247
+ result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
248
+
249
+ words = response.next_int
250
+ for i in 0...words
251
+ word = response.next
252
+ docs = response.next_int
253
+ hits = response.next_int
254
+ result[:words][word] = {:docs => docs, :hits => hits}
255
+ end
256
+
257
+ result
258
+ end
259
+
260
+ @queue.clear
261
+ results
262
+ end
263
+
264
+ # Query the Sphinx daemon - defaulting to all indexes, but you can specify
265
+ # a specific one if you wish. The search parameter should be a string
266
+ # following Sphinx's expectations.
267
+ #
268
+ # The object returned from this method is a hash with the following keys:
269
+ #
270
+ # * :matches
271
+ # * :fields
272
+ # * :attributes
273
+ # * :attribute_names
274
+ # * :words
275
+ # * :total
276
+ # * :total_found
277
+ # * :time
278
+ # * :status
279
+ # * :warning (if appropriate)
280
+ # * :error (if appropriate)
281
+ #
282
+ # The key <tt>:matches</tt> returns an array of hashes - the actual search
283
+ # results. Each hash has the document id (<tt>:doc</tt>), the result
284
+ # weighting (<tt>:weight</tt>), and a hash of the attributes for the
285
+ # document (<tt>:attributes</tt>).
286
+ #
287
+ # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
288
+ # fields and attributes for the documents. The key <tt>:attributes</tt>
289
+ # will return a hash of attribute name and type pairs, and <tt>:words</tt>
290
+ # returns a hash of hashes representing the words from the search, with the
291
+ # number of documents and hits for each, along the lines of:
292
+ #
293
+ # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
294
+ #
295
+ # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
296
+ # number of matches available, the total number of matches (which may be
297
+ # greater than the maximum available, depending on the number of matches
298
+ # and your sphinx configuration), and the time in milliseconds that the
299
+ # query took to run.
300
+ #
301
+ # <tt>:status</tt> is the error code for the query - and if there was a
302
+ # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
303
+ # will be described under <tt>:error</tt>.
304
+ #
305
+ def query(search, index = '*', comments = '')
306
+ @queue << query_message(search, index, comments)
307
+ self.run.first
308
+ end
309
+
310
+ # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
311
+ # They may also be abbreviated to fit within a word limit.
312
+ #
313
+ # As part of the options hash, you will need to
314
+ # define:
315
+ # * :docs
316
+ # * :words
317
+ # * :index
318
+ #
319
+ # Optional settings include:
320
+ # * :before_match (defaults to <span class="match">)
321
+ # * :after_match (defaults to </span>)
322
+ # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
323
+ # * :limit (defaults to 256)
324
+ # * :around (defaults to 5)
325
+ # * :exact_phrase (defaults to false)
326
+ # * :single_passage (defaults to false)
327
+ #
328
+ # The defaults differ from the official PHP client, as I've opted for
329
+ # semantic HTML markup.
330
+ #
331
+ # Example:
332
+ #
333
+ # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
334
+ # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
335
+ #
336
+ # lorem_lipsum = "Lorem ipsum dolor..."
337
+ #
338
+ # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
339
+ # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
340
+ # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
341
+ # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
342
+ # laborum. <span class=\"match\">Pat</span> Cash"]
343
+ #
344
+ # Workflow:
345
+ #
346
+ # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
347
+ # discover encoding and charset information.
348
+ #
349
+ # Therefore, the workflow goes:
350
+ #
351
+ # 1. Do the sphinx query.
352
+ # 2. Fetch the documents found by sphinx from their repositories.
353
+ # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
354
+ #
355
+ def excerpts(options = {})
356
+ options[:index] ||= '*'
357
+ options[:before_match] ||= '<span class="match">'
358
+ options[:after_match] ||= '</span>'
359
+ options[:chunk_separator] ||= ' &#8230; ' # ellipsis
360
+ options[:limit] ||= 256
361
+ options[:around] ||= 5
362
+ options[:exact_phrase] ||= false
363
+ options[:single_passage] ||= false
364
+
365
+ response = Response.new request(:excerpt, excerpts_message(options))
366
+
367
+ options[:docs].collect { response.next }
368
+ end
369
+
370
+ # Update attributes - first parameter is the relevant index, second is an
371
+ # array of attributes to be updated, and the third is a hash, where the
372
+ # keys are the document ids, and the values are arrays with the attribute
373
+ # values - in the same order as the second parameter.
374
+ #
375
+ # Example:
376
+ #
377
+ # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
378
+ #
379
+ def update(index, attributes, values_by_doc)
380
+ response = Response.new request(
381
+ :update,
382
+ update_message(index, attributes, values_by_doc)
383
+ )
384
+
385
+ response.next_int
386
+ end
387
+
388
+ # Generates a keyword list for a given query. Each keyword is represented
389
+ # by a hash, with keys :tokenised and :normalised. If return_hits is set to
390
+ # true it will also report on the number of hits and documents for each
391
+ # keyword (see :hits and :docs keys respectively).
392
+ def keywords(query, index, return_hits = false)
393
+ response = Response.new request(
394
+ :keywords,
395
+ keywords_message(query, index, return_hits)
396
+ )
397
+
398
+ (0...response.next_int).collect do
399
+ hash = {}
400
+ hash[:tokenised] = response.next
401
+ hash[:normalised] = response.next
402
+
403
+ if return_hits
404
+ hash[:docs] = response.next_int
405
+ hash[:hits] = response.next_int
406
+ end
407
+
408
+ hash
409
+ end
410
+ end
411
+
412
+ private
413
+
414
+ # Connects to the Sphinx daemon, and yields a socket to use. The socket is
415
+ # closed at the end of the block.
416
+ def connect(&block)
417
+ socket = nil
418
+ if @timeout == 0
419
+ socket = initialise_connection
420
+ else
421
+ begin
422
+ Timeout.timeout(@timeout) { socket = initialise_connection }
423
+ rescue Timeout::Error
424
+ raise Riddle::ConnectionError,
425
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
426
+ end
427
+ end
428
+
429
+ begin
430
+ yield socket
431
+ ensure
432
+ socket.close
433
+ end
434
+ end
435
+
436
+ def initialise_connection
437
+ socket = TCPSocket.new @server, @port
438
+
439
+ # Checking version
440
+ version = socket.recv(4).unpack('N*').first
441
+ if version < 1
442
+ socket.close
443
+ raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
444
+ end
445
+
446
+ # Send version
447
+ socket.send [1].pack('N'), 0
448
+
449
+ socket
450
+ end
451
+
452
+ # Send a collection of messages, for a command type (eg, search, excerpts,
453
+ # update), to the Sphinx daemon.
454
+ def request(command, messages)
455
+ response = ""
456
+ status = -1
457
+ version = 0
458
+ length = 0
459
+ message = Array(messages).join("")
460
+
461
+ connect do |socket|
462
+ case command
463
+ when :search
464
+ # Message length is +4 to account for the following count value for
465
+ # the number of messages (well, that's what I'm assuming).
466
+ socket.send [
467
+ Commands[command], Versions[command],
468
+ 4+message.length, messages.length
469
+ ].pack("nnNN") + message, 0
470
+ else
471
+ socket.send [
472
+ Commands[command], Versions[command], message.length
473
+ ].pack("nnN") + message, 0
474
+ end
475
+
476
+ header = socket.recv(8)
477
+ status, version, length = header.unpack('n2N')
478
+
479
+ while response.length < length
480
+ part = socket.recv(length - response.length)
481
+ response << part if part
482
+ end
483
+ end
484
+
485
+ if response.empty? || response.length != length
486
+ raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
487
+ end
488
+
489
+ case status
490
+ when Statuses[:ok]
491
+ if version < Versions[command]
492
+ puts format("searchd command v.%d.%d older than client (v.%d.%d)",
493
+ version >> 8, version & 0xff,
494
+ Versions[command] >> 8, Versions[command] & 0xff)
495
+ end
496
+ response
497
+ when Statuses[:warning]
498
+ length = response[0, 4].unpack('N*').first
499
+ puts response[4, length]
500
+ response[4 + length, response.length - 4 - length]
501
+ when Statuses[:error], Statuses[:retry]
502
+ raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
503
+ else
504
+ raise ResponseError, "Unknown searchd error (status: #{status})"
505
+ end
506
+ end
507
+
508
+ # Generation of the message to send to Sphinx for a search.
509
+ def query_message(search, index, comments = '')
510
+ message = Message.new
511
+
512
+ # Mode, Limits, Sort Mode
513
+ message.append_ints @offset, @limit, MatchModes[@match_mode],
514
+ RankModes[@rank_mode], SortModes[@sort_mode]
515
+ message.append_string @sort_by
516
+
517
+ # Query
518
+ message.append_string search
519
+
520
+ # Weights
521
+ message.append_int @weights.length
522
+ message.append_ints *@weights
523
+
524
+ # Index
525
+ message.append_string index
526
+
527
+ # ID Range
528
+ message.append_int 1
529
+ message.append_64bit_ints @id_range.first, @id_range.last
530
+
531
+ # Filters
532
+ message.append_int @filters.length
533
+ @filters.each { |filter| message.append filter.query_message }
534
+
535
+ # Grouping
536
+ message.append_int GroupFunctions[@group_function]
537
+ message.append_string @group_by
538
+ message.append_int @max_matches
539
+ message.append_string @group_clause
540
+ message.append_ints @cut_off, @retry_count, @retry_delay
541
+ message.append_string @group_distinct
542
+
543
+ # Anchor Point
544
+ if @anchor.empty?
545
+ message.append_int 0
546
+ else
547
+ message.append_int 1
548
+ message.append_string @anchor[:latitude_attribute]
549
+ message.append_string @anchor[:longitude_attribute]
550
+ message.append_floats @anchor[:latitude], @anchor[:longitude]
551
+ end
552
+
553
+ # Per Index Weights
554
+ message.append_int @index_weights.length
555
+ @index_weights.each do |key,val|
556
+ message.append_string key.to_s
557
+ message.append_int val
558
+ end
559
+
560
+ # Max Query Time
561
+ message.append_int @max_query_time
562
+
563
+ # Per Field Weights
564
+ message.append_int @field_weights.length
565
+ @field_weights.each do |key,val|
566
+ message.append_string key.to_s
567
+ message.append_int val
568
+ end
569
+
570
+ message.append_string comments
571
+
572
+ message.to_s
573
+ end
574
+
575
+ # Generation of the message to send to Sphinx for an excerpts request.
576
+ def excerpts_message(options)
577
+ message = Message.new
578
+
579
+ flags = 1
580
+ flags |= 2 if options[:exact_phrase]
581
+ flags |= 4 if options[:single_passage]
582
+ flags |= 8 if options[:use_boundaries]
583
+ flags |= 16 if options[:weight_order]
584
+
585
+ message.append [0, flags].pack('N2') # 0 = mode
586
+ message.append_string options[:index]
587
+ message.append_string options[:words]
588
+
589
+ # options
590
+ message.append_string options[:before_match]
591
+ message.append_string options[:after_match]
592
+ message.append_string options[:chunk_separator]
593
+ message.append_ints options[:limit], options[:around]
594
+
595
+ message.append_array options[:docs]
596
+
597
+ message.to_s
598
+ end
599
+
600
+ # Generation of the message to send to Sphinx to update attributes of a
601
+ # document.
602
+ def update_message(index, attributes, values_by_doc)
603
+ message = Message.new
604
+
605
+ message.append_string index
606
+ message.append_array attributes
607
+
608
+ message.append_int values_by_doc.length
609
+ values_by_doc.each do |key,values|
610
+ message.append_64bit_int key # document ID
611
+ message.append_ints *values # array of new values (integers)
612
+ end
613
+
614
+ message.to_s
615
+ end
616
+
617
+ # Generates the simple message to send to the daemon for a keywords request.
618
+ def keywords_message(query, index, return_hits)
619
+ message = Message.new
620
+
621
+ message.append_string query
622
+ message.append_string index
623
+ message.append_int return_hits ? 1 : 0
624
+
625
+ message.to_s
626
+ end
627
+
628
+ def attribute_from_type(type, response)
629
+ type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
630
+
631
+ case type
632
+ when AttributeTypes[:float]
633
+ is_multi ? response.next_float_array : response.next_float
634
+ else
635
+ is_multi ? response.next_int_array : response.next_int
636
+ end
637
+ end
638
+ end
639
+ end