thinking-sphinx-099 1.2.12

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +157 -0
  3. data/VERSION.yml +4 -0
  4. data/lib/thinking_sphinx.rb +211 -0
  5. data/lib/thinking_sphinx/active_record.rb +307 -0
  6. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  7. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  8. data/lib/thinking_sphinx/active_record/has_many_association.rb +28 -0
  9. data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
  10. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  11. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  12. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +136 -0
  13. data/lib/thinking_sphinx/association.rb +164 -0
  14. data/lib/thinking_sphinx/attribute.rb +342 -0
  15. data/lib/thinking_sphinx/class_facet.rb +15 -0
  16. data/lib/thinking_sphinx/configuration.rb +282 -0
  17. data/lib/thinking_sphinx/core/array.rb +7 -0
  18. data/lib/thinking_sphinx/core/string.rb +15 -0
  19. data/lib/thinking_sphinx/deltas.rb +30 -0
  20. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  21. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta.rb +30 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  25. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  26. data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
  27. data/lib/thinking_sphinx/excerpter.rb +22 -0
  28. data/lib/thinking_sphinx/facet.rb +125 -0
  29. data/lib/thinking_sphinx/facet_search.rb +134 -0
  30. data/lib/thinking_sphinx/field.rb +82 -0
  31. data/lib/thinking_sphinx/index.rb +99 -0
  32. data/lib/thinking_sphinx/index/builder.rb +286 -0
  33. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  34. data/lib/thinking_sphinx/property.rb +162 -0
  35. data/lib/thinking_sphinx/rails_additions.rb +150 -0
  36. data/lib/thinking_sphinx/search.rb +707 -0
  37. data/lib/thinking_sphinx/search_methods.rb +421 -0
  38. data/lib/thinking_sphinx/source.rb +150 -0
  39. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  40. data/lib/thinking_sphinx/source/sql.rb +128 -0
  41. data/lib/thinking_sphinx/tasks.rb +165 -0
  42. data/rails/init.rb +14 -0
  43. data/spec/lib/thinking_sphinx/active_record/delta_spec.rb +130 -0
  44. data/spec/lib/thinking_sphinx/active_record/has_many_association_spec.rb +49 -0
  45. data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
  46. data/spec/lib/thinking_sphinx/active_record_spec.rb +364 -0
  47. data/spec/lib/thinking_sphinx/association_spec.rb +239 -0
  48. data/spec/lib/thinking_sphinx/attribute_spec.rb +500 -0
  49. data/spec/lib/thinking_sphinx/configuration_spec.rb +268 -0
  50. data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
  51. data/spec/lib/thinking_sphinx/core/string_spec.rb +9 -0
  52. data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
  53. data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
  54. data/spec/lib/thinking_sphinx/facet_spec.rb +333 -0
  55. data/spec/lib/thinking_sphinx/field_spec.rb +154 -0
  56. data/spec/lib/thinking_sphinx/index/builder_spec.rb +455 -0
  57. data/spec/lib/thinking_sphinx/index/faux_column_spec.rb +30 -0
  58. data/spec/lib/thinking_sphinx/index_spec.rb +45 -0
  59. data/spec/lib/thinking_sphinx/rails_additions_spec.rb +203 -0
  60. data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
  61. data/spec/lib/thinking_sphinx/search_spec.rb +1092 -0
  62. data/spec/lib/thinking_sphinx/source_spec.rb +227 -0
  63. data/spec/lib/thinking_sphinx_spec.rb +162 -0
  64. data/tasks/distribution.rb +50 -0
  65. data/tasks/rails.rake +1 -0
  66. data/tasks/testing.rb +83 -0
  67. data/vendor/after_commit/LICENSE +20 -0
  68. data/vendor/after_commit/README +16 -0
  69. data/vendor/after_commit/Rakefile +22 -0
  70. data/vendor/after_commit/init.rb +8 -0
  71. data/vendor/after_commit/lib/after_commit.rb +45 -0
  72. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  73. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  74. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  75. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  76. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  77. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  78. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  79. data/vendor/riddle/lib/riddle.rb +30 -0
  80. data/vendor/riddle/lib/riddle/client.rb +735 -0
  81. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  82. data/vendor/riddle/lib/riddle/client/message.rb +70 -0
  83. data/vendor/riddle/lib/riddle/client/response.rb +94 -0
  84. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  85. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +49 -0
  86. data/vendor/riddle/lib/riddle/configuration/index.rb +146 -0
  87. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  88. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  89. data/vendor/riddle/lib/riddle/configuration/searchd.rb +46 -0
  90. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  91. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  92. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +39 -0
  93. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  94. data/vendor/riddle/lib/riddle/controller.rb +53 -0
  95. metadata +172 -0
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,30 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+
4
+ require 'riddle/client'
5
+ require 'riddle/configuration'
6
+ require 'riddle/controller'
7
+
8
+ module Riddle #:nodoc:
9
+ class ConnectionError < StandardError #:nodoc:
10
+ end
11
+
12
+ module Version #:nodoc:
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 9
16
+ # Revision number for RubyForge's sake, taken from what Sphinx
17
+ # outputs to the command line.
18
+ Rev = 1785
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 6
22
+
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
+ end
26
+
27
+ def self.escape(string)
28
+ string.gsub(/[\(\)\|\-!@~"&\/\\\^\$=]/) { |char| "\\#{char}" }
29
+ end
30
+ end
@@ -0,0 +1,735 @@
1
+ require 'riddle/client/filter'
2
+ require 'riddle/client/message'
3
+ require 'riddle/client/response'
4
+
5
+ module Riddle
6
+ class VersionError < StandardError; end
7
+ class ResponseError < StandardError; end
8
+
9
+ # This class was heavily based on the existing Client API by Dmytro Shteflyuk
10
+ # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
11
+ # more Ruby-ish (ie. lowercase and underscored method names). I also have
12
+ # used a few helper classes, just to neaten things up.
13
+ #
14
+ # Feel free to use it wherever. Send bug reports, patches, comments and
15
+ # suggestions to pat at freelancing-gods dot com.
16
+ #
17
+ # Most properties of the client are accessible through attribute accessors,
18
+ # and where relevant use symboles instead of the long constants common in
19
+ # other clients.
20
+ # Some examples:
21
+ #
22
+ # client.sort_mode = :extended
23
+ # client.sort_by = "birthday DESC"
24
+ # client.match_mode = :extended
25
+ #
26
+ # To add a filter, you will need to create a Filter object:
27
+ #
28
+ # client.filters << Riddle::Client::Filter.new("birthday",
29
+ # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
30
+ #
31
+ class Client
32
+ Commands = {
33
+ :search => 0, # SEARCHD_COMMAND_SEARCH
34
+ :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
35
+ :update => 2, # SEARCHD_COMMAND_UPDATE
36
+ :keywords => 3, # SEARCHD_COMMAND_KEYWORDS
37
+ :persist => 4, # SEARCHD_COMMAND_PERSIST
38
+ :status => 5, # SEARCHD_COMMAND_STATUS
39
+ :query => 6 # SEARCHD_COMMAND_QUERY
40
+ }
41
+
42
+ Versions = {
43
+ :search => 0x116, # VER_COMMAND_SEARCH
44
+ :excerpt => 0x100, # VER_COMMAND_EXCERPT
45
+ :update => 0x102, # VER_COMMAND_UPDATE
46
+ :keywords => 0x100, # VER_COMMAND_KEYWORDS
47
+ :status => 0x100, # VER_COMMAND_STATUS
48
+ :query => 0x100 # VER_COMMAND_QUERY
49
+ }
50
+
51
+ Statuses = {
52
+ :ok => 0, # SEARCHD_OK
53
+ :error => 1, # SEARCHD_ERROR
54
+ :retry => 2, # SEARCHD_RETRY
55
+ :warning => 3 # SEARCHD_WARNING
56
+ }
57
+
58
+ MatchModes = {
59
+ :all => 0, # SPH_MATCH_ALL
60
+ :any => 1, # SPH_MATCH_ANY
61
+ :phrase => 2, # SPH_MATCH_PHRASE
62
+ :boolean => 3, # SPH_MATCH_BOOLEAN
63
+ :extended => 4, # SPH_MATCH_EXTENDED
64
+ :fullscan => 5, # SPH_MATCH_FULLSCAN
65
+ :extended2 => 6 # SPH_MATCH_EXTENDED2
66
+ }
67
+
68
+ RankModes = {
69
+ :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
70
+ :bm25 => 1, # SPH_RANK_BM25
71
+ :none => 2, # SPH_RANK_NONE
72
+ :wordcount => 3, # SPH_RANK_WORDCOUNT
73
+ :proximity => 4, # SPH_RANK_PROXIMITY
74
+ :match_any => 5, # SPH_RANK_MATCHANY
75
+ :fieldmask => 6 # SPH_RANK_FIELDMASK
76
+ }
77
+
78
+ SortModes = {
79
+ :relevance => 0, # SPH_SORT_RELEVANCE
80
+ :attr_desc => 1, # SPH_SORT_ATTR_DESC
81
+ :attr_asc => 2, # SPH_SORT_ATTR_ASC
82
+ :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
83
+ :extended => 4, # SPH_SORT_EXTENDED
84
+ :expr => 5 # SPH_SORT_EXPR
85
+ }
86
+
87
+ AttributeTypes = {
88
+ :integer => 1, # SPH_ATTR_INTEGER
89
+ :timestamp => 2, # SPH_ATTR_TIMESTAMP
90
+ :ordinal => 3, # SPH_ATTR_ORDINAL
91
+ :bool => 4, # SPH_ATTR_BOOL
92
+ :float => 5, # SPH_ATTR_FLOAT
93
+ :bigint => 6, # SPH_ATTR_BIGINT
94
+ :multi => 0x40000000 # SPH_ATTR_MULTI
95
+ }
96
+
97
+ GroupFunctions = {
98
+ :day => 0, # SPH_GROUPBY_DAY
99
+ :week => 1, # SPH_GROUPBY_WEEK
100
+ :month => 2, # SPH_GROUPBY_MONTH
101
+ :year => 3, # SPH_GROUPBY_YEAR
102
+ :attr => 4, # SPH_GROUPBY_ATTR
103
+ :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
104
+ }
105
+
106
+ FilterTypes = {
107
+ :values => 0, # SPH_FILTER_VALUES
108
+ :range => 1, # SPH_FILTER_RANGE
109
+ :float_range => 2 # SPH_FILTER_FLOATRANGE
110
+ }
111
+
112
+ attr_accessor :server, :port, :offset, :limit, :max_matches,
113
+ :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
114
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
115
+ :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
116
+ :max_query_time, :field_weights, :timeout, :overrides, :select
117
+ attr_reader :queue
118
+
119
+ # Can instantiate with a specific server and port - otherwise it assumes
120
+ # defaults of localhost and 3312 respectively. All other settings can be
121
+ # accessed and changed via the attribute accessors.
122
+ def initialize(server=nil, port=nil)
123
+ @server = server || "localhost"
124
+ @port = port || 3312
125
+ @socket = nil
126
+
127
+ reset
128
+
129
+ @queue = []
130
+ end
131
+
132
+ # Reset attributes and settings to defaults.
133
+ def reset
134
+ # defaults
135
+ @offset = 0
136
+ @limit = 20
137
+ @max_matches = 1000
138
+ @match_mode = :all
139
+ @sort_mode = :relevance
140
+ @sort_by = ''
141
+ @weights = []
142
+ @id_range = 0..0
143
+ @filters = []
144
+ @group_by = ''
145
+ @group_function = :day
146
+ @group_clause = '@group desc'
147
+ @group_distinct = ''
148
+ @cut_off = 0
149
+ @retry_count = 0
150
+ @retry_delay = 0
151
+ @anchor = {}
152
+ # string keys are index names, integer values are weightings
153
+ @index_weights = {}
154
+ @rank_mode = :proximity_bm25
155
+ @max_query_time = 0
156
+ # string keys are field names, integer values are weightings
157
+ @field_weights = {}
158
+ @timeout = 0
159
+ @overrides = {}
160
+ @select = "*"
161
+ end
162
+
163
+ # Set the geo-anchor point - with the names of the attributes that contain
164
+ # the latitude and longitude (in radians), and the reference position.
165
+ # Note that for geocoding to work properly, you must also set
166
+ # match_mode to :extended. To sort results by distance, you will
167
+ # need to set sort_mode to '@geodist asc' for example. Sphinx
168
+ # expects latitude and longitude to be returned from you SQL source
169
+ # in radians.
170
+ #
171
+ # Example:
172
+ # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
173
+ #
174
+ def set_anchor(lat_attr, lat, long_attr, long)
175
+ @anchor = {
176
+ :latitude_attribute => lat_attr,
177
+ :latitude => lat,
178
+ :longitude_attribute => long_attr,
179
+ :longitude => long
180
+ }
181
+ end
182
+
183
+ # Append a query to the queue. This uses the same parameters as the query
184
+ # method.
185
+ def append_query(search, index = '*', comments = '')
186
+ @queue << query_message(search, index, comments)
187
+ end
188
+
189
+ # Run all the queries currently in the queue. This will return an array of
190
+ # results hashes.
191
+ def run
192
+ response = Response.new request(:search, @queue)
193
+
194
+ results = @queue.collect do
195
+ result = {
196
+ :matches => [],
197
+ :fields => [],
198
+ :attributes => {},
199
+ :attribute_names => [],
200
+ :words => {}
201
+ }
202
+
203
+ result[:status] = response.next_int
204
+ case result[:status]
205
+ when Statuses[:warning]
206
+ result[:warning] = response.next
207
+ when Statuses[:error]
208
+ result[:error] = response.next
209
+ next result
210
+ end
211
+
212
+ result[:fields] = response.next_array
213
+
214
+ attributes = response.next_int
215
+ for i in 0...attributes
216
+ attribute_name = response.next
217
+ type = response.next_int
218
+
219
+ result[:attributes][attribute_name] = type
220
+ result[:attribute_names] << attribute_name
221
+ end
222
+
223
+ matches = response.next_int
224
+ is_64_bit = response.next_int
225
+ for i in 0...matches
226
+ doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
227
+ weight = response.next_int
228
+
229
+ result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
230
+ result[:attribute_names].each do |attr|
231
+ result[:matches].last[:attributes][attr] = attribute_from_type(
232
+ result[:attributes][attr], response
233
+ )
234
+ end
235
+ end
236
+
237
+ result[:total] = response.next_int.to_i || 0
238
+ result[:total_found] = response.next_int.to_i || 0
239
+ result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
240
+
241
+ words = response.next_int
242
+ for i in 0...words
243
+ word = response.next
244
+ docs = response.next_int
245
+ hits = response.next_int
246
+ result[:words][word] = {:docs => docs, :hits => hits}
247
+ end
248
+
249
+ result
250
+ end
251
+
252
+ @queue.clear
253
+ results
254
+ end
255
+
256
+ # Query the Sphinx daemon - defaulting to all indexes, but you can specify
257
+ # a specific one if you wish. The search parameter should be a string
258
+ # following Sphinx's expectations.
259
+ #
260
+ # The object returned from this method is a hash with the following keys:
261
+ #
262
+ # * :matches
263
+ # * :fields
264
+ # * :attributes
265
+ # * :attribute_names
266
+ # * :words
267
+ # * :total
268
+ # * :total_found
269
+ # * :time
270
+ # * :status
271
+ # * :warning (if appropriate)
272
+ # * :error (if appropriate)
273
+ #
274
+ # The key <tt>:matches</tt> returns an array of hashes - the actual search
275
+ # results. Each hash has the document id (<tt>:doc</tt>), the result
276
+ # weighting (<tt>:weight</tt>), and a hash of the attributes for the
277
+ # document (<tt>:attributes</tt>).
278
+ #
279
+ # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
280
+ # fields and attributes for the documents. The key <tt>:attributes</tt>
281
+ # will return a hash of attribute name and type pairs, and <tt>:words</tt>
282
+ # returns a hash of hashes representing the words from the search, with the
283
+ # number of documents and hits for each, along the lines of:
284
+ #
285
+ # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
286
+ #
287
+ # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
288
+ # number of matches available, the total number of matches (which may be
289
+ # greater than the maximum available, depending on the number of matches
290
+ # and your sphinx configuration), and the time in milliseconds that the
291
+ # query took to run.
292
+ #
293
+ # <tt>:status</tt> is the error code for the query - and if there was a
294
+ # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
295
+ # will be described under <tt>:error</tt>.
296
+ #
297
+ def query(search, index = '*', comments = '')
298
+ @queue << query_message(search, index, comments)
299
+ self.run.first
300
+ end
301
+
302
+ # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
303
+ # They may also be abbreviated to fit within a word limit.
304
+ #
305
+ # As part of the options hash, you will need to
306
+ # define:
307
+ # * :docs
308
+ # * :words
309
+ # * :index
310
+ #
311
+ # Optional settings include:
312
+ # * :before_match (defaults to <span class="match">)
313
+ # * :after_match (defaults to </span>)
314
+ # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
315
+ # * :limit (defaults to 256)
316
+ # * :around (defaults to 5)
317
+ # * :exact_phrase (defaults to false)
318
+ # * :single_passage (defaults to false)
319
+ #
320
+ # The defaults differ from the official PHP client, as I've opted for
321
+ # semantic HTML markup.
322
+ #
323
+ # Example:
324
+ #
325
+ # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
326
+ # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
327
+ #
328
+ # lorem_lipsum = "Lorem ipsum dolor..."
329
+ #
330
+ # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
331
+ # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
332
+ # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
333
+ # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
334
+ # laborum. <span class=\"match\">Pat</span> Cash"]
335
+ #
336
+ # Workflow:
337
+ #
338
+ # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
339
+ # discover encoding and charset information.
340
+ #
341
+ # Therefore, the workflow goes:
342
+ #
343
+ # 1. Do the sphinx query.
344
+ # 2. Fetch the documents found by sphinx from their repositories.
345
+ # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
346
+ #
347
+ def excerpts(options = {})
348
+ options[:index] ||= '*'
349
+ options[:before_match] ||= '<span class="match">'
350
+ options[:after_match] ||= '</span>'
351
+ options[:chunk_separator] ||= ' &#8230; ' # ellipsis
352
+ options[:limit] ||= 256
353
+ options[:around] ||= 5
354
+ options[:exact_phrase] ||= false
355
+ options[:single_passage] ||= false
356
+
357
+ response = Response.new request(:excerpt, excerpts_message(options))
358
+
359
+ options[:docs].collect { response.next }
360
+ end
361
+
362
+ # Update attributes - first parameter is the relevant index, second is an
363
+ # array of attributes to be updated, and the third is a hash, where the
364
+ # keys are the document ids, and the values are arrays with the attribute
365
+ # values - in the same order as the second parameter.
366
+ #
367
+ # Example:
368
+ #
369
+ # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
370
+ #
371
+ def update(index, attributes, values_by_doc)
372
+ response = Response.new request(
373
+ :update,
374
+ update_message(index, attributes, values_by_doc)
375
+ )
376
+
377
+ response.next_int
378
+ end
379
+
380
+ # Generates a keyword list for a given query. Each keyword is represented
381
+ # by a hash, with keys :tokenised and :normalised. If return_hits is set to
382
+ # true it will also report on the number of hits and documents for each
383
+ # keyword (see :hits and :docs keys respectively).
384
+ def keywords(query, index, return_hits = false)
385
+ response = Response.new request(
386
+ :keywords,
387
+ keywords_message(query, index, return_hits)
388
+ )
389
+
390
+ (0...response.next_int).collect do
391
+ hash = {}
392
+ hash[:tokenised] = response.next
393
+ hash[:normalised] = response.next
394
+
395
+ if return_hits
396
+ hash[:docs] = response.next_int
397
+ hash[:hits] = response.next_int
398
+ end
399
+
400
+ hash
401
+ end
402
+ end
403
+
404
+ def status
405
+ response = Response.new request(
406
+ :status, Message.new
407
+ )
408
+
409
+ rows, cols = response.next_int, response.next_int
410
+
411
+ (0...rows).inject({}) do |hash, row|
412
+ hash[response.next.to_sym] = response.next
413
+ hash
414
+ end
415
+ end
416
+
417
+ def add_override(attribute, type, values)
418
+ @overrides[attribute] = {:type => type, :values => values}
419
+ end
420
+
421
+ def open
422
+ open_socket
423
+
424
+ @socket.send [
425
+ Commands[:persist], 0, 4, 1
426
+ ].pack("nnNN"), 0
427
+ end
428
+
429
+ def close
430
+ close_socket
431
+ end
432
+
433
+ private
434
+
435
+ def open_socket
436
+ raise "Already Connected" unless @socket.nil?
437
+
438
+ if @timeout == 0
439
+ @socket = initialise_connection
440
+ else
441
+ begin
442
+ Timeout.timeout(@timeout) { @socket = initialise_connection }
443
+ rescue Timeout::Error
444
+ raise Riddle::ConnectionError,
445
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
446
+ end
447
+ end
448
+
449
+ true
450
+ end
451
+
452
+ def close_socket
453
+ raise "Not Connected" if @socket.nil?
454
+
455
+ @socket.close
456
+ @socket = nil
457
+
458
+ true
459
+ end
460
+
461
+ # Connects to the Sphinx daemon, and yields a socket to use. The socket is
462
+ # closed at the end of the block.
463
+ def connect(&block)
464
+ unless @socket.nil?
465
+ yield @socket
466
+ else
467
+ open_socket
468
+ begin
469
+ yield @socket
470
+ ensure
471
+ close_socket
472
+ end
473
+ end
474
+ end
475
+
476
+ def initialise_connection
477
+ socket = initialise_socket
478
+
479
+ # Send version
480
+ socket.send [1].pack('N'), 0
481
+
482
+ # Checking version
483
+ version = socket.recv(4).unpack('N*').first
484
+ if version < 1
485
+ socket.close
486
+ raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
487
+ end
488
+
489
+ socket
490
+ end
491
+
492
+ def initialise_socket
493
+ tries = 0
494
+ begin
495
+ socket = TCPSocket.new @server, @port
496
+ rescue Errno::ECONNREFUSED => e
497
+ retry if (tries += 1) < 5
498
+ raise Riddle::ConnectionError,
499
+ "Connection to #{@server} on #{@port} failed. #{e.message}"
500
+ end
501
+
502
+ socket
503
+ end
504
+
505
+ # Send a collection of messages, for a command type (eg, search, excerpts,
506
+ # update), to the Sphinx daemon.
507
+ def request(command, messages)
508
+ response = ""
509
+ status = -1
510
+ version = 0
511
+ length = 0
512
+ message = Array(messages).join("")
513
+ if message.respond_to?(:force_encoding)
514
+ message = message.force_encoding('ASCII-8BIT')
515
+ end
516
+
517
+ connect do |socket|
518
+ case command
519
+ when :search
520
+ # Message length is +4 to account for the following count value for
521
+ # the number of messages (well, that's what I'm assuming).
522
+ socket.send [
523
+ Commands[command], Versions[command],
524
+ 4+message.length, messages.length
525
+ ].pack("nnNN") + message, 0
526
+ when :status
527
+ socket.send [
528
+ Commands[command], Versions[command], 4, 1
529
+ ].pack("nnNN"), 0
530
+ else
531
+ socket.send [
532
+ Commands[command], Versions[command], message.length
533
+ ].pack("nnN") + message, 0
534
+ end
535
+
536
+ header = socket.recv(8)
537
+ status, version, length = header.unpack('n2N')
538
+
539
+ while response.length < (length || 0)
540
+ part = socket.recv(length - response.length)
541
+ response << part if part
542
+ end
543
+ end
544
+
545
+ if response.empty? || response.length != length
546
+ raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
547
+ end
548
+
549
+ case status
550
+ when Statuses[:ok]
551
+ if version < Versions[command]
552
+ puts format("searchd command v.%d.%d older than client (v.%d.%d)",
553
+ version >> 8, version & 0xff,
554
+ Versions[command] >> 8, Versions[command] & 0xff)
555
+ end
556
+ response
557
+ when Statuses[:warning]
558
+ length = response[0, 4].unpack('N*').first
559
+ puts response[4, length]
560
+ response[4 + length, response.length - 4 - length]
561
+ when Statuses[:error], Statuses[:retry]
562
+ raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
563
+ else
564
+ raise ResponseError, "Unknown searchd error (status: #{status})"
565
+ end
566
+ end
567
+
568
+ # Generation of the message to send to Sphinx for a search.
569
+ def query_message(search, index, comments = '')
570
+ message = Message.new
571
+
572
+ # Mode, Limits, Sort Mode
573
+ message.append_ints @offset, @limit, MatchModes[@match_mode],
574
+ RankModes[@rank_mode], SortModes[@sort_mode]
575
+ message.append_string @sort_by
576
+
577
+ # Query
578
+ message.append_string search
579
+
580
+ # Weights
581
+ message.append_int @weights.length
582
+ message.append_ints *@weights
583
+
584
+ # Index
585
+ message.append_string index
586
+
587
+ # ID Range
588
+ message.append_int 1
589
+ message.append_64bit_ints @id_range.first, @id_range.last
590
+
591
+ # Filters
592
+ message.append_int @filters.length
593
+ @filters.each { |filter| message.append filter.query_message }
594
+
595
+ # Grouping
596
+ message.append_int GroupFunctions[@group_function]
597
+ message.append_string @group_by
598
+ message.append_int @max_matches
599
+ message.append_string @group_clause
600
+ message.append_ints @cut_off, @retry_count, @retry_delay
601
+ message.append_string @group_distinct
602
+
603
+ # Anchor Point
604
+ if @anchor.empty?
605
+ message.append_int 0
606
+ else
607
+ message.append_int 1
608
+ message.append_string @anchor[:latitude_attribute]
609
+ message.append_string @anchor[:longitude_attribute]
610
+ message.append_floats @anchor[:latitude], @anchor[:longitude]
611
+ end
612
+
613
+ # Per Index Weights
614
+ message.append_int @index_weights.length
615
+ @index_weights.each do |key,val|
616
+ message.append_string key.to_s
617
+ message.append_int val
618
+ end
619
+
620
+ # Max Query Time
621
+ message.append_int @max_query_time
622
+
623
+ # Per Field Weights
624
+ message.append_int @field_weights.length
625
+ @field_weights.each do |key,val|
626
+ message.append_string key.to_s
627
+ message.append_int val
628
+ end
629
+
630
+ message.append_string comments
631
+
632
+ # Overrides
633
+ message.append_int @overrides.length
634
+ @overrides.each do |key,val|
635
+ message.append_string key.to_s
636
+ message.append_int AttributeTypes[val[:type]]
637
+ message.append_int val[:values].length
638
+ val[:values].each do |id,map|
639
+ message.append_64bit_int id
640
+ method = case val[:type]
641
+ when :float
642
+ :append_float
643
+ when :bigint
644
+ :append_64bit_int
645
+ else
646
+ :append_int
647
+ end
648
+ message.send method, map
649
+ end
650
+ end
651
+
652
+ message.append_string @select
653
+
654
+ message.to_s
655
+ end
656
+
657
+ # Generation of the message to send to Sphinx for an excerpts request.
658
+ def excerpts_message(options)
659
+ message = Message.new
660
+
661
+ flags = 1
662
+ flags |= 2 if options[:exact_phrase]
663
+ flags |= 4 if options[:single_passage]
664
+ flags |= 8 if options[:use_boundaries]
665
+ flags |= 16 if options[:weight_order]
666
+
667
+ message.append [0, flags].pack('N2') # 0 = mode
668
+ message.append_string options[:index]
669
+ message.append_string options[:words]
670
+
671
+ # options
672
+ message.append_string options[:before_match]
673
+ message.append_string options[:after_match]
674
+ message.append_string options[:chunk_separator]
675
+ message.append_ints options[:limit], options[:around]
676
+
677
+ message.append_array options[:docs]
678
+
679
+ message.to_s
680
+ end
681
+
682
+ # Generation of the message to send to Sphinx to update attributes of a
683
+ # document.
684
+ def update_message(index, attributes, values_by_doc)
685
+ message = Message.new
686
+
687
+ message.append_string index
688
+ message.append_int attributes.length
689
+ attributes.each_with_index do |attribute, index|
690
+ message.append_string attribute
691
+ message.append_boolean values_by_doc.values.first[index].is_a?(Array)
692
+ end
693
+
694
+ message.append_int values_by_doc.length
695
+ values_by_doc.each do |key,values|
696
+ message.append_64bit_int key # document ID
697
+ values.each do |value|
698
+ case value
699
+ when Array
700
+ message.append_int value.length
701
+ message.append_ints *value
702
+ else
703
+ message.append_int value
704
+ end
705
+ end
706
+ end
707
+
708
+ message.to_s
709
+ end
710
+
711
+ # Generates the simple message to send to the daemon for a keywords request.
712
+ def keywords_message(query, index, return_hits)
713
+ message = Message.new
714
+
715
+ message.append_string query
716
+ message.append_string index
717
+ message.append_int return_hits ? 1 : 0
718
+
719
+ message.to_s
720
+ end
721
+
722
+ def attribute_from_type(type, response)
723
+ type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
724
+
725
+ case type
726
+ when AttributeTypes[:float]
727
+ is_multi ? response.next_float_array : response.next_float
728
+ when AttributeTypes[:bigint]
729
+ is_multi ? response.next_64bit_int_arry : response.next_64bit_int
730
+ else
731
+ is_multi ? response.next_int_array : response.next_int
732
+ end
733
+ end
734
+ end
735
+ end