thinking-sphinx 1.2.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +157 -0
  3. data/VERSION.yml +4 -0
  4. data/lib/thinking_sphinx.rb +211 -0
  5. data/lib/thinking_sphinx/active_record.rb +307 -0
  6. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  7. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  8. data/lib/thinking_sphinx/active_record/has_many_association.rb +28 -0
  9. data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
  10. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  11. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  12. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +136 -0
  13. data/lib/thinking_sphinx/association.rb +164 -0
  14. data/lib/thinking_sphinx/attribute.rb +342 -0
  15. data/lib/thinking_sphinx/class_facet.rb +15 -0
  16. data/lib/thinking_sphinx/configuration.rb +282 -0
  17. data/lib/thinking_sphinx/core/array.rb +7 -0
  18. data/lib/thinking_sphinx/core/string.rb +15 -0
  19. data/lib/thinking_sphinx/deltas.rb +30 -0
  20. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  21. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta.rb +30 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  25. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  26. data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
  27. data/lib/thinking_sphinx/excerpter.rb +22 -0
  28. data/lib/thinking_sphinx/facet.rb +125 -0
  29. data/lib/thinking_sphinx/facet_search.rb +134 -0
  30. data/lib/thinking_sphinx/field.rb +82 -0
  31. data/lib/thinking_sphinx/index.rb +99 -0
  32. data/lib/thinking_sphinx/index/builder.rb +286 -0
  33. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  34. data/lib/thinking_sphinx/property.rb +162 -0
  35. data/lib/thinking_sphinx/rails_additions.rb +150 -0
  36. data/lib/thinking_sphinx/search.rb +707 -0
  37. data/lib/thinking_sphinx/search_methods.rb +421 -0
  38. data/lib/thinking_sphinx/source.rb +150 -0
  39. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  40. data/lib/thinking_sphinx/source/sql.rb +128 -0
  41. data/lib/thinking_sphinx/tasks.rb +165 -0
  42. data/rails/init.rb +14 -0
  43. data/spec/lib/thinking_sphinx/active_record/delta_spec.rb +130 -0
  44. data/spec/lib/thinking_sphinx/active_record/has_many_association_spec.rb +49 -0
  45. data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
  46. data/spec/lib/thinking_sphinx/active_record_spec.rb +364 -0
  47. data/spec/lib/thinking_sphinx/association_spec.rb +239 -0
  48. data/spec/lib/thinking_sphinx/attribute_spec.rb +500 -0
  49. data/spec/lib/thinking_sphinx/configuration_spec.rb +268 -0
  50. data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
  51. data/spec/lib/thinking_sphinx/core/string_spec.rb +9 -0
  52. data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
  53. data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
  54. data/spec/lib/thinking_sphinx/facet_spec.rb +333 -0
  55. data/spec/lib/thinking_sphinx/field_spec.rb +154 -0
  56. data/spec/lib/thinking_sphinx/index/builder_spec.rb +455 -0
  57. data/spec/lib/thinking_sphinx/index/faux_column_spec.rb +30 -0
  58. data/spec/lib/thinking_sphinx/index_spec.rb +45 -0
  59. data/spec/lib/thinking_sphinx/rails_additions_spec.rb +203 -0
  60. data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
  61. data/spec/lib/thinking_sphinx/search_spec.rb +1092 -0
  62. data/spec/lib/thinking_sphinx/source_spec.rb +227 -0
  63. data/spec/lib/thinking_sphinx_spec.rb +162 -0
  64. data/tasks/distribution.rb +50 -0
  65. data/tasks/rails.rake +1 -0
  66. data/tasks/testing.rb +83 -0
  67. data/vendor/after_commit/LICENSE +20 -0
  68. data/vendor/after_commit/README +16 -0
  69. data/vendor/after_commit/Rakefile +22 -0
  70. data/vendor/after_commit/init.rb +8 -0
  71. data/vendor/after_commit/lib/after_commit.rb +45 -0
  72. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  73. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  74. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  75. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  76. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  77. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  78. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  79. data/vendor/riddle/lib/riddle.rb +30 -0
  80. data/vendor/riddle/lib/riddle/client.rb +635 -0
  81. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  82. data/vendor/riddle/lib/riddle/client/message.rb +66 -0
  83. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  84. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  85. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  86. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  87. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  88. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  89. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  90. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  91. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  92. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  93. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  94. data/vendor/riddle/lib/riddle/controller.rb +53 -0
  95. metadata +172 -0
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,30 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+
4
+ require 'riddle/client'
5
+ require 'riddle/configuration'
6
+ require 'riddle/controller'
7
+
8
+ module Riddle #:nodoc:
9
+ class ConnectionError < StandardError #:nodoc:
10
+ end
11
+
12
+ module Version #:nodoc:
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 8
16
+ # Revision number for RubyForge's sake, taken from what Sphinx
17
+ # outputs to the command line.
18
+ Rev = 1533
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 10
22
+
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
+ end
26
+
27
+ def self.escape(string)
28
+ string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
29
+ end
30
+ end
@@ -0,0 +1,635 @@
1
+ require 'riddle/client/filter'
2
+ require 'riddle/client/message'
3
+ require 'riddle/client/response'
4
+
5
+ module Riddle
6
+ class VersionError < StandardError; end
7
+ class ResponseError < StandardError; end
8
+
9
+ # This class was heavily based on the existing Client API by Dmytro Shteflyuk
10
+ # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
11
+ # more Ruby-ish (ie. lowercase and underscored method names). I also have
12
+ # used a few helper classes, just to neaten things up.
13
+ #
14
+ # Feel free to use it wherever. Send bug reports, patches, comments and
15
+ # suggestions to pat at freelancing-gods dot com.
16
+ #
17
+ # Most properties of the client are accessible through attribute accessors,
18
+ # and where relevant use symboles instead of the long constants common in
19
+ # other clients.
20
+ # Some examples:
21
+ #
22
+ # client.sort_mode = :extended
23
+ # client.sort_by = "birthday DESC"
24
+ # client.match_mode = :extended
25
+ #
26
+ # To add a filter, you will need to create a Filter object:
27
+ #
28
+ # client.filters << Riddle::Client::Filter.new("birthday",
29
+ # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
30
+ #
31
+ class Client
32
+ Commands = {
33
+ :search => 0, # SEARCHD_COMMAND_SEARCH
34
+ :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
35
+ :update => 2, # SEARCHD_COMMAND_UPDATE
36
+ :keywords => 3 # SEARCHD_COMMAND_KEYWORDS
37
+ }
38
+
39
+ Versions = {
40
+ :search => 0x113, # VER_COMMAND_SEARCH
41
+ :excerpt => 0x100, # VER_COMMAND_EXCERPT
42
+ :update => 0x101, # VER_COMMAND_UPDATE
43
+ :keywords => 0x100 # VER_COMMAND_KEYWORDS
44
+ }
45
+
46
+ Statuses = {
47
+ :ok => 0, # SEARCHD_OK
48
+ :error => 1, # SEARCHD_ERROR
49
+ :retry => 2, # SEARCHD_RETRY
50
+ :warning => 3 # SEARCHD_WARNING
51
+ }
52
+
53
+ MatchModes = {
54
+ :all => 0, # SPH_MATCH_ALL
55
+ :any => 1, # SPH_MATCH_ANY
56
+ :phrase => 2, # SPH_MATCH_PHRASE
57
+ :boolean => 3, # SPH_MATCH_BOOLEAN
58
+ :extended => 4, # SPH_MATCH_EXTENDED
59
+ :fullscan => 5, # SPH_MATCH_FULLSCAN
60
+ :extended2 => 6 # SPH_MATCH_EXTENDED2
61
+ }
62
+
63
+ RankModes = {
64
+ :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
65
+ :bm25 => 1, # SPH_RANK_BM25
66
+ :none => 2, # SPH_RANK_NONE
67
+ :wordcount => 3 # SPH_RANK_WORDCOUNT
68
+ }
69
+
70
+ SortModes = {
71
+ :relevance => 0, # SPH_SORT_RELEVANCE
72
+ :attr_desc => 1, # SPH_SORT_ATTR_DESC
73
+ :attr_asc => 2, # SPH_SORT_ATTR_ASC
74
+ :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
75
+ :extended => 4, # SPH_SORT_EXTENDED
76
+ :expr => 5 # SPH_SORT_EXPR
77
+ }
78
+
79
+ AttributeTypes = {
80
+ :integer => 1, # SPH_ATTR_INTEGER
81
+ :timestamp => 2, # SPH_ATTR_TIMESTAMP
82
+ :ordinal => 3, # SPH_ATTR_ORDINAL
83
+ :bool => 4, # SPH_ATTR_BOOL
84
+ :float => 5, # SPH_ATTR_FLOAT
85
+ :multi => 0x40000000 # SPH_ATTR_MULTI
86
+ }
87
+
88
+ GroupFunctions = {
89
+ :day => 0, # SPH_GROUPBY_DAY
90
+ :week => 1, # SPH_GROUPBY_WEEK
91
+ :month => 2, # SPH_GROUPBY_MONTH
92
+ :year => 3, # SPH_GROUPBY_YEAR
93
+ :attr => 4, # SPH_GROUPBY_ATTR
94
+ :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
95
+ }
96
+
97
+ FilterTypes = {
98
+ :values => 0, # SPH_FILTER_VALUES
99
+ :range => 1, # SPH_FILTER_RANGE
100
+ :float_range => 2 # SPH_FILTER_FLOATRANGE
101
+ }
102
+
103
+ attr_accessor :server, :port, :offset, :limit, :max_matches,
104
+ :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
105
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
106
+ :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
107
+ :max_query_time, :field_weights, :timeout
108
+ attr_reader :queue
109
+
110
+ # Can instantiate with a specific server and port - otherwise it assumes
111
+ # defaults of localhost and 3312 respectively. All other settings can be
112
+ # accessed and changed via the attribute accessors.
113
+ def initialize(server=nil, port=nil)
114
+ @server = server || "localhost"
115
+ @port = port || 3312
116
+
117
+ reset
118
+
119
+ @queue = []
120
+ end
121
+
122
+ # Reset attributes and settings to defaults.
123
+ def reset
124
+ # defaults
125
+ @offset = 0
126
+ @limit = 20
127
+ @max_matches = 1000
128
+ @match_mode = :all
129
+ @sort_mode = :relevance
130
+ @sort_by = ''
131
+ @weights = []
132
+ @id_range = 0..0
133
+ @filters = []
134
+ @group_by = ''
135
+ @group_function = :day
136
+ @group_clause = '@group desc'
137
+ @group_distinct = ''
138
+ @cut_off = 0
139
+ @retry_count = 0
140
+ @retry_delay = 0
141
+ @anchor = {}
142
+ # string keys are index names, integer values are weightings
143
+ @index_weights = {}
144
+ @rank_mode = :proximity_bm25
145
+ @max_query_time = 0
146
+ # string keys are field names, integer values are weightings
147
+ @field_weights = {}
148
+ @timeout = 0
149
+ end
150
+
151
+ # Set the geo-anchor point - with the names of the attributes that contain
152
+ # the latitude and longitude (in radians), and the reference position.
153
+ # Note that for geocoding to work properly, you must also set
154
+ # match_mode to :extended. To sort results by distance, you will
155
+ # need to set sort_mode to '@geodist asc' for example. Sphinx
156
+ # expects latitude and longitude to be returned from you SQL source
157
+ # in radians.
158
+ #
159
+ # Example:
160
+ # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
161
+ #
162
+ def set_anchor(lat_attr, lat, long_attr, long)
163
+ @anchor = {
164
+ :latitude_attribute => lat_attr,
165
+ :latitude => lat,
166
+ :longitude_attribute => long_attr,
167
+ :longitude => long
168
+ }
169
+ end
170
+
171
+ # Append a query to the queue. This uses the same parameters as the query
172
+ # method.
173
+ def append_query(search, index = '*', comments = '')
174
+ @queue << query_message(search, index, comments)
175
+ end
176
+
177
+ # Run all the queries currently in the queue. This will return an array of
178
+ # results hashes.
179
+ def run
180
+ response = Response.new request(:search, @queue)
181
+
182
+ results = @queue.collect do
183
+ result = {
184
+ :matches => [],
185
+ :fields => [],
186
+ :attributes => {},
187
+ :attribute_names => [],
188
+ :words => {}
189
+ }
190
+
191
+ result[:status] = response.next_int
192
+ case result[:status]
193
+ when Statuses[:warning]
194
+ result[:warning] = response.next
195
+ when Statuses[:error]
196
+ result[:error] = response.next
197
+ next result
198
+ end
199
+
200
+ result[:fields] = response.next_array
201
+
202
+ attributes = response.next_int
203
+ for i in 0...attributes
204
+ attribute_name = response.next
205
+ type = response.next_int
206
+
207
+ result[:attributes][attribute_name] = type
208
+ result[:attribute_names] << attribute_name
209
+ end
210
+
211
+ matches = response.next_int
212
+ is_64_bit = response.next_int
213
+ for i in 0...matches
214
+ doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
215
+ weight = response.next_int
216
+
217
+ result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
218
+ result[:attribute_names].each do |attr|
219
+ result[:matches].last[:attributes][attr] = attribute_from_type(
220
+ result[:attributes][attr], response
221
+ )
222
+ end
223
+ end
224
+
225
+ result[:total] = response.next_int.to_i || 0
226
+ result[:total_found] = response.next_int.to_i || 0
227
+ result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
228
+
229
+ words = response.next_int
230
+ for i in 0...words
231
+ word = response.next
232
+ docs = response.next_int
233
+ hits = response.next_int
234
+ result[:words][word] = {:docs => docs, :hits => hits}
235
+ end
236
+
237
+ result
238
+ end
239
+
240
+ @queue.clear
241
+ results
242
+ end
243
+
244
+ # Query the Sphinx daemon - defaulting to all indexes, but you can specify
245
+ # a specific one if you wish. The search parameter should be a string
246
+ # following Sphinx's expectations.
247
+ #
248
+ # The object returned from this method is a hash with the following keys:
249
+ #
250
+ # * :matches
251
+ # * :fields
252
+ # * :attributes
253
+ # * :attribute_names
254
+ # * :words
255
+ # * :total
256
+ # * :total_found
257
+ # * :time
258
+ # * :status
259
+ # * :warning (if appropriate)
260
+ # * :error (if appropriate)
261
+ #
262
+ # The key <tt>:matches</tt> returns an array of hashes - the actual search
263
+ # results. Each hash has the document id (<tt>:doc</tt>), the result
264
+ # weighting (<tt>:weight</tt>), and a hash of the attributes for the
265
+ # document (<tt>:attributes</tt>).
266
+ #
267
+ # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
268
+ # fields and attributes for the documents. The key <tt>:attributes</tt>
269
+ # will return a hash of attribute name and type pairs, and <tt>:words</tt>
270
+ # returns a hash of hashes representing the words from the search, with the
271
+ # number of documents and hits for each, along the lines of:
272
+ #
273
+ # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
274
+ #
275
+ # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
276
+ # number of matches available, the total number of matches (which may be
277
+ # greater than the maximum available, depending on the number of matches
278
+ # and your sphinx configuration), and the time in milliseconds that the
279
+ # query took to run.
280
+ #
281
+ # <tt>:status</tt> is the error code for the query - and if there was a
282
+ # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
283
+ # will be described under <tt>:error</tt>.
284
+ #
285
+ def query(search, index = '*', comments = '')
286
+ @queue << query_message(search, index, comments)
287
+ self.run.first
288
+ end
289
+
290
+ # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
291
+ # They may also be abbreviated to fit within a word limit.
292
+ #
293
+ # As part of the options hash, you will need to
294
+ # define:
295
+ # * :docs
296
+ # * :words
297
+ # * :index
298
+ #
299
+ # Optional settings include:
300
+ # * :before_match (defaults to <span class="match">)
301
+ # * :after_match (defaults to </span>)
302
+ # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
303
+ # * :limit (defaults to 256)
304
+ # * :around (defaults to 5)
305
+ # * :exact_phrase (defaults to false)
306
+ # * :single_passage (defaults to false)
307
+ #
308
+ # The defaults differ from the official PHP client, as I've opted for
309
+ # semantic HTML markup.
310
+ #
311
+ # Example:
312
+ #
313
+ # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
314
+ # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
315
+ #
316
+ # lorem_lipsum = "Lorem ipsum dolor..."
317
+ #
318
+ # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
319
+ # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
320
+ # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
321
+ # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
322
+ # laborum. <span class=\"match\">Pat</span> Cash"]
323
+ #
324
+ # Workflow:
325
+ #
326
+ # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
327
+ # discover encoding and charset information.
328
+ #
329
+ # Therefore, the workflow goes:
330
+ #
331
+ # 1. Do the sphinx query.
332
+ # 2. Fetch the documents found by sphinx from their repositories.
333
+ # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
334
+ #
335
+ def excerpts(options = {})
336
+ options[:index] ||= '*'
337
+ options[:before_match] ||= '<span class="match">'
338
+ options[:after_match] ||= '</span>'
339
+ options[:chunk_separator] ||= ' &#8230; ' # ellipsis
340
+ options[:limit] ||= 256
341
+ options[:around] ||= 5
342
+ options[:exact_phrase] ||= false
343
+ options[:single_passage] ||= false
344
+
345
+ response = Response.new request(:excerpt, excerpts_message(options))
346
+
347
+ options[:docs].collect { response.next }
348
+ end
349
+
350
+ # Update attributes - first parameter is the relevant index, second is an
351
+ # array of attributes to be updated, and the third is a hash, where the
352
+ # keys are the document ids, and the values are arrays with the attribute
353
+ # values - in the same order as the second parameter.
354
+ #
355
+ # Example:
356
+ #
357
+ # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
358
+ #
359
+ def update(index, attributes, values_by_doc)
360
+ response = Response.new request(
361
+ :update,
362
+ update_message(index, attributes, values_by_doc)
363
+ )
364
+
365
+ response.next_int
366
+ end
367
+
368
+ # Generates a keyword list for a given query. Each keyword is represented
369
+ # by a hash, with keys :tokenised and :normalised. If return_hits is set to
370
+ # true it will also report on the number of hits and documents for each
371
+ # keyword (see :hits and :docs keys respectively).
372
+ def keywords(query, index, return_hits = false)
373
+ response = Response.new request(
374
+ :keywords,
375
+ keywords_message(query, index, return_hits)
376
+ )
377
+
378
+ (0...response.next_int).collect do
379
+ hash = {}
380
+ hash[:tokenised] = response.next
381
+ hash[:normalised] = response.next
382
+
383
+ if return_hits
384
+ hash[:docs] = response.next_int
385
+ hash[:hits] = response.next_int
386
+ end
387
+
388
+ hash
389
+ end
390
+ end
391
+
392
+ private
393
+
394
+ # Connects to the Sphinx daemon, and yields a socket to use. The socket is
395
+ # closed at the end of the block.
396
+ def connect(&block)
397
+ socket = nil
398
+ if @timeout == 0
399
+ socket = initialise_connection
400
+ else
401
+ begin
402
+ Timeout.timeout(@timeout) { socket = initialise_connection }
403
+ rescue Timeout::Error
404
+ raise Riddle::ConnectionError,
405
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
406
+ end
407
+ end
408
+
409
+ begin
410
+ yield socket
411
+ ensure
412
+ socket.close
413
+ end
414
+ end
415
+
416
+ def initialise_connection
417
+ socket = initialise_socket
418
+
419
+ # Checking version
420
+ version = socket.recv(4).unpack('N*').first
421
+ if version < 1
422
+ socket.close
423
+ raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
424
+ end
425
+
426
+ # Send version
427
+ socket.send [1].pack('N'), 0
428
+
429
+ socket
430
+ end
431
+
432
+ def initialise_socket
433
+ tries = 0
434
+ begin
435
+ socket = TCPSocket.new @server, @port
436
+ rescue Errno::ECONNREFUSED => e
437
+ retry if (tries += 1) < 5
438
+ raise Riddle::ConnectionError,
439
+ "Connection to #{@server} on #{@port} failed. #{e.message}"
440
+ end
441
+
442
+ socket
443
+ end
444
+
445
+ # Send a collection of messages, for a command type (eg, search, excerpts,
446
+ # update), to the Sphinx daemon.
447
+ def request(command, messages)
448
+ response = ""
449
+ status = -1
450
+ version = 0
451
+ length = 0
452
+ message = Array(messages).join("")
453
+ if message.respond_to?(:force_encoding)
454
+ message = message.force_encoding('ASCII-8BIT')
455
+ end
456
+
457
+ connect do |socket|
458
+ case command
459
+ when :search
460
+ # Message length is +4 to account for the following count value for
461
+ # the number of messages (well, that's what I'm assuming).
462
+ socket.send [
463
+ Commands[command], Versions[command],
464
+ 4+message.length, messages.length
465
+ ].pack("nnNN") + message, 0
466
+ else
467
+ socket.send [
468
+ Commands[command], Versions[command], message.length
469
+ ].pack("nnN") + message, 0
470
+ end
471
+
472
+ header = socket.recv(8)
473
+ status, version, length = header.unpack('n2N')
474
+
475
+ while response.length < (length || 0)
476
+ part = socket.recv(length - response.length)
477
+ response << part if part
478
+ end
479
+ end
480
+
481
+ if response.empty? || response.length != length
482
+ raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
483
+ end
484
+
485
+ case status
486
+ when Statuses[:ok]
487
+ if version < Versions[command]
488
+ puts format("searchd command v.%d.%d older than client (v.%d.%d)",
489
+ version >> 8, version & 0xff,
490
+ Versions[command] >> 8, Versions[command] & 0xff)
491
+ end
492
+ response
493
+ when Statuses[:warning]
494
+ length = response[0, 4].unpack('N*').first
495
+ puts response[4, length]
496
+ response[4 + length, response.length - 4 - length]
497
+ when Statuses[:error], Statuses[:retry]
498
+ raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
499
+ else
500
+ raise ResponseError, "Unknown searchd error (status: #{status})"
501
+ end
502
+ end
503
+
504
+ # Generation of the message to send to Sphinx for a search.
505
+ def query_message(search, index, comments = '')
506
+ message = Message.new
507
+
508
+ # Mode, Limits, Sort Mode
509
+ message.append_ints @offset, @limit, MatchModes[@match_mode],
510
+ RankModes[@rank_mode], SortModes[@sort_mode]
511
+ message.append_string @sort_by
512
+
513
+ # Query
514
+ message.append_string search
515
+
516
+ # Weights
517
+ message.append_int @weights.length
518
+ message.append_ints *@weights
519
+
520
+ # Index
521
+ message.append_string index
522
+
523
+ # ID Range
524
+ message.append_int 1
525
+ message.append_64bit_ints @id_range.first, @id_range.last
526
+
527
+ # Filters
528
+ message.append_int @filters.length
529
+ @filters.each { |filter| message.append filter.query_message }
530
+
531
+ # Grouping
532
+ message.append_int GroupFunctions[@group_function]
533
+ message.append_string @group_by
534
+ message.append_int @max_matches
535
+ message.append_string @group_clause
536
+ message.append_ints @cut_off, @retry_count, @retry_delay
537
+ message.append_string @group_distinct
538
+
539
+ # Anchor Point
540
+ if @anchor.empty?
541
+ message.append_int 0
542
+ else
543
+ message.append_int 1
544
+ message.append_string @anchor[:latitude_attribute]
545
+ message.append_string @anchor[:longitude_attribute]
546
+ message.append_floats @anchor[:latitude], @anchor[:longitude]
547
+ end
548
+
549
+ # Per Index Weights
550
+ message.append_int @index_weights.length
551
+ @index_weights.each do |key,val|
552
+ message.append_string key.to_s
553
+ message.append_int val
554
+ end
555
+
556
+ # Max Query Time
557
+ message.append_int @max_query_time
558
+
559
+ # Per Field Weights
560
+ message.append_int @field_weights.length
561
+ @field_weights.each do |key,val|
562
+ message.append_string key.to_s
563
+ message.append_int val
564
+ end
565
+
566
+ message.append_string comments
567
+
568
+ message.to_s
569
+ end
570
+
571
+ # Generation of the message to send to Sphinx for an excerpts request.
572
+ def excerpts_message(options)
573
+ message = Message.new
574
+
575
+ flags = 1
576
+ flags |= 2 if options[:exact_phrase]
577
+ flags |= 4 if options[:single_passage]
578
+ flags |= 8 if options[:use_boundaries]
579
+ flags |= 16 if options[:weight_order]
580
+
581
+ message.append [0, flags].pack('N2') # 0 = mode
582
+ message.append_string options[:index]
583
+ message.append_string options[:words]
584
+
585
+ # options
586
+ message.append_string options[:before_match]
587
+ message.append_string options[:after_match]
588
+ message.append_string options[:chunk_separator]
589
+ message.append_ints options[:limit], options[:around]
590
+
591
+ message.append_array options[:docs]
592
+
593
+ message.to_s
594
+ end
595
+
596
+ # Generation of the message to send to Sphinx to update attributes of a
597
+ # document.
598
+ def update_message(index, attributes, values_by_doc)
599
+ message = Message.new
600
+
601
+ message.append_string index
602
+ message.append_array attributes
603
+
604
+ message.append_int values_by_doc.length
605
+ values_by_doc.each do |key,values|
606
+ message.append_64bit_int key # document ID
607
+ message.append_ints *values # array of new values (integers)
608
+ end
609
+
610
+ message.to_s
611
+ end
612
+
613
+ # Generates the simple message to send to the daemon for a keywords request.
614
+ def keywords_message(query, index, return_hits)
615
+ message = Message.new
616
+
617
+ message.append_string query
618
+ message.append_string index
619
+ message.append_int return_hits ? 1 : 0
620
+
621
+ message.to_s
622
+ end
623
+
624
+ def attribute_from_type(type, response)
625
+ type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
626
+
627
+ case type
628
+ when AttributeTypes[:float]
629
+ is_multi ? response.next_float_array : response.next_float
630
+ else
631
+ is_multi ? response.next_int_array : response.next_int
632
+ end
633
+ end
634
+ end
635
+ end