thinking-sphinx-099 1.2.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +157 -0
  3. data/VERSION.yml +4 -0
  4. data/lib/thinking_sphinx.rb +211 -0
  5. data/lib/thinking_sphinx/active_record.rb +307 -0
  6. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  7. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  8. data/lib/thinking_sphinx/active_record/has_many_association.rb +28 -0
  9. data/lib/thinking_sphinx/active_record/scopes.rb +39 -0
  10. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  11. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  12. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +136 -0
  13. data/lib/thinking_sphinx/association.rb +164 -0
  14. data/lib/thinking_sphinx/attribute.rb +342 -0
  15. data/lib/thinking_sphinx/class_facet.rb +15 -0
  16. data/lib/thinking_sphinx/configuration.rb +282 -0
  17. data/lib/thinking_sphinx/core/array.rb +7 -0
  18. data/lib/thinking_sphinx/core/string.rb +15 -0
  19. data/lib/thinking_sphinx/deltas.rb +30 -0
  20. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  21. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta.rb +30 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  25. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  26. data/lib/thinking_sphinx/deploy/capistrano.rb +100 -0
  27. data/lib/thinking_sphinx/excerpter.rb +22 -0
  28. data/lib/thinking_sphinx/facet.rb +125 -0
  29. data/lib/thinking_sphinx/facet_search.rb +134 -0
  30. data/lib/thinking_sphinx/field.rb +82 -0
  31. data/lib/thinking_sphinx/index.rb +99 -0
  32. data/lib/thinking_sphinx/index/builder.rb +286 -0
  33. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  34. data/lib/thinking_sphinx/property.rb +162 -0
  35. data/lib/thinking_sphinx/rails_additions.rb +150 -0
  36. data/lib/thinking_sphinx/search.rb +707 -0
  37. data/lib/thinking_sphinx/search_methods.rb +421 -0
  38. data/lib/thinking_sphinx/source.rb +150 -0
  39. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  40. data/lib/thinking_sphinx/source/sql.rb +128 -0
  41. data/lib/thinking_sphinx/tasks.rb +165 -0
  42. data/rails/init.rb +14 -0
  43. data/spec/lib/thinking_sphinx/active_record/delta_spec.rb +130 -0
  44. data/spec/lib/thinking_sphinx/active_record/has_many_association_spec.rb +49 -0
  45. data/spec/lib/thinking_sphinx/active_record/scopes_spec.rb +96 -0
  46. data/spec/lib/thinking_sphinx/active_record_spec.rb +364 -0
  47. data/spec/lib/thinking_sphinx/association_spec.rb +239 -0
  48. data/spec/lib/thinking_sphinx/attribute_spec.rb +500 -0
  49. data/spec/lib/thinking_sphinx/configuration_spec.rb +268 -0
  50. data/spec/lib/thinking_sphinx/core/array_spec.rb +9 -0
  51. data/spec/lib/thinking_sphinx/core/string_spec.rb +9 -0
  52. data/spec/lib/thinking_sphinx/excerpter_spec.rb +49 -0
  53. data/spec/lib/thinking_sphinx/facet_search_spec.rb +176 -0
  54. data/spec/lib/thinking_sphinx/facet_spec.rb +333 -0
  55. data/spec/lib/thinking_sphinx/field_spec.rb +154 -0
  56. data/spec/lib/thinking_sphinx/index/builder_spec.rb +455 -0
  57. data/spec/lib/thinking_sphinx/index/faux_column_spec.rb +30 -0
  58. data/spec/lib/thinking_sphinx/index_spec.rb +45 -0
  59. data/spec/lib/thinking_sphinx/rails_additions_spec.rb +203 -0
  60. data/spec/lib/thinking_sphinx/search_methods_spec.rb +152 -0
  61. data/spec/lib/thinking_sphinx/search_spec.rb +1092 -0
  62. data/spec/lib/thinking_sphinx/source_spec.rb +227 -0
  63. data/spec/lib/thinking_sphinx_spec.rb +162 -0
  64. data/tasks/distribution.rb +50 -0
  65. data/tasks/rails.rake +1 -0
  66. data/tasks/testing.rb +83 -0
  67. data/vendor/after_commit/LICENSE +20 -0
  68. data/vendor/after_commit/README +16 -0
  69. data/vendor/after_commit/Rakefile +22 -0
  70. data/vendor/after_commit/init.rb +8 -0
  71. data/vendor/after_commit/lib/after_commit.rb +45 -0
  72. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  73. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  74. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  75. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  76. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  77. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  78. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  79. data/vendor/riddle/lib/riddle.rb +30 -0
  80. data/vendor/riddle/lib/riddle/client.rb +735 -0
  81. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  82. data/vendor/riddle/lib/riddle/client/message.rb +70 -0
  83. data/vendor/riddle/lib/riddle/client/response.rb +94 -0
  84. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  85. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +49 -0
  86. data/vendor/riddle/lib/riddle/configuration/index.rb +146 -0
  87. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  88. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  89. data/vendor/riddle/lib/riddle/configuration/searchd.rb +46 -0
  90. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  91. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  92. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +39 -0
  93. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  94. data/vendor/riddle/lib/riddle/controller.rb +53 -0
  95. metadata +172 -0
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,30 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+
4
+ require 'riddle/client'
5
+ require 'riddle/configuration'
6
+ require 'riddle/controller'
7
+
8
+ module Riddle #:nodoc:
9
+ class ConnectionError < StandardError #:nodoc:
10
+ end
11
+
12
+ module Version #:nodoc:
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 9
16
+ # Revision number for RubyForge's sake, taken from what Sphinx
17
+ # outputs to the command line.
18
+ Rev = 1785
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 6
22
+
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
+ end
26
+
27
+ def self.escape(string)
28
+ string.gsub(/[\(\)\|\-!@~"&\/\\\^\$=]/) { |char| "\\#{char}" }
29
+ end
30
+ end
@@ -0,0 +1,735 @@
1
+ require 'riddle/client/filter'
2
+ require 'riddle/client/message'
3
+ require 'riddle/client/response'
4
+
5
+ module Riddle
6
+ class VersionError < StandardError; end
7
+ class ResponseError < StandardError; end
8
+
9
+ # This class was heavily based on the existing Client API by Dmytro Shteflyuk
10
+ # and Alexy Kovyrin. Their code worked fine, I just wanted something a bit
11
+ # more Ruby-ish (ie. lowercase and underscored method names). I also have
12
+ # used a few helper classes, just to neaten things up.
13
+ #
14
+ # Feel free to use it wherever. Send bug reports, patches, comments and
15
+ # suggestions to pat at freelancing-gods dot com.
16
+ #
17
+ # Most properties of the client are accessible through attribute accessors,
18
+ # and where relevant use symboles instead of the long constants common in
19
+ # other clients.
20
+ # Some examples:
21
+ #
22
+ # client.sort_mode = :extended
23
+ # client.sort_by = "birthday DESC"
24
+ # client.match_mode = :extended
25
+ #
26
+ # To add a filter, you will need to create a Filter object:
27
+ #
28
+ # client.filters << Riddle::Client::Filter.new("birthday",
29
+ # Time.at(1975, 1, 1).to_i..Time.at(1985, 1, 1).to_i, false)
30
+ #
31
+ class Client
32
+ Commands = {
33
+ :search => 0, # SEARCHD_COMMAND_SEARCH
34
+ :excerpt => 1, # SEARCHD_COMMAND_EXCERPT
35
+ :update => 2, # SEARCHD_COMMAND_UPDATE
36
+ :keywords => 3, # SEARCHD_COMMAND_KEYWORDS
37
+ :persist => 4, # SEARCHD_COMMAND_PERSIST
38
+ :status => 5, # SEARCHD_COMMAND_STATUS
39
+ :query => 6 # SEARCHD_COMMAND_QUERY
40
+ }
41
+
42
+ Versions = {
43
+ :search => 0x116, # VER_COMMAND_SEARCH
44
+ :excerpt => 0x100, # VER_COMMAND_EXCERPT
45
+ :update => 0x102, # VER_COMMAND_UPDATE
46
+ :keywords => 0x100, # VER_COMMAND_KEYWORDS
47
+ :status => 0x100, # VER_COMMAND_STATUS
48
+ :query => 0x100 # VER_COMMAND_QUERY
49
+ }
50
+
51
+ Statuses = {
52
+ :ok => 0, # SEARCHD_OK
53
+ :error => 1, # SEARCHD_ERROR
54
+ :retry => 2, # SEARCHD_RETRY
55
+ :warning => 3 # SEARCHD_WARNING
56
+ }
57
+
58
+ MatchModes = {
59
+ :all => 0, # SPH_MATCH_ALL
60
+ :any => 1, # SPH_MATCH_ANY
61
+ :phrase => 2, # SPH_MATCH_PHRASE
62
+ :boolean => 3, # SPH_MATCH_BOOLEAN
63
+ :extended => 4, # SPH_MATCH_EXTENDED
64
+ :fullscan => 5, # SPH_MATCH_FULLSCAN
65
+ :extended2 => 6 # SPH_MATCH_EXTENDED2
66
+ }
67
+
68
+ RankModes = {
69
+ :proximity_bm25 => 0, # SPH_RANK_PROXIMITY_BM25
70
+ :bm25 => 1, # SPH_RANK_BM25
71
+ :none => 2, # SPH_RANK_NONE
72
+ :wordcount => 3, # SPH_RANK_WORDCOUNT
73
+ :proximity => 4, # SPH_RANK_PROXIMITY
74
+ :match_any => 5, # SPH_RANK_MATCHANY
75
+ :fieldmask => 6 # SPH_RANK_FIELDMASK
76
+ }
77
+
78
+ SortModes = {
79
+ :relevance => 0, # SPH_SORT_RELEVANCE
80
+ :attr_desc => 1, # SPH_SORT_ATTR_DESC
81
+ :attr_asc => 2, # SPH_SORT_ATTR_ASC
82
+ :time_segments => 3, # SPH_SORT_TIME_SEGMENTS
83
+ :extended => 4, # SPH_SORT_EXTENDED
84
+ :expr => 5 # SPH_SORT_EXPR
85
+ }
86
+
87
+ AttributeTypes = {
88
+ :integer => 1, # SPH_ATTR_INTEGER
89
+ :timestamp => 2, # SPH_ATTR_TIMESTAMP
90
+ :ordinal => 3, # SPH_ATTR_ORDINAL
91
+ :bool => 4, # SPH_ATTR_BOOL
92
+ :float => 5, # SPH_ATTR_FLOAT
93
+ :bigint => 6, # SPH_ATTR_BIGINT
94
+ :multi => 0x40000000 # SPH_ATTR_MULTI
95
+ }
96
+
97
+ GroupFunctions = {
98
+ :day => 0, # SPH_GROUPBY_DAY
99
+ :week => 1, # SPH_GROUPBY_WEEK
100
+ :month => 2, # SPH_GROUPBY_MONTH
101
+ :year => 3, # SPH_GROUPBY_YEAR
102
+ :attr => 4, # SPH_GROUPBY_ATTR
103
+ :attrpair => 5 # SPH_GROUPBY_ATTRPAIR
104
+ }
105
+
106
+ FilterTypes = {
107
+ :values => 0, # SPH_FILTER_VALUES
108
+ :range => 1, # SPH_FILTER_RANGE
109
+ :float_range => 2 # SPH_FILTER_FLOATRANGE
110
+ }
111
+
112
+ attr_accessor :server, :port, :offset, :limit, :max_matches,
113
+ :match_mode, :sort_mode, :sort_by, :weights, :id_range, :filters,
114
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
115
+ :retry_count, :retry_delay, :anchor, :index_weights, :rank_mode,
116
+ :max_query_time, :field_weights, :timeout, :overrides, :select
117
+ attr_reader :queue
118
+
119
+ # Can instantiate with a specific server and port - otherwise it assumes
120
+ # defaults of localhost and 3312 respectively. All other settings can be
121
+ # accessed and changed via the attribute accessors.
122
+ def initialize(server=nil, port=nil)
123
+ @server = server || "localhost"
124
+ @port = port || 3312
125
+ @socket = nil
126
+
127
+ reset
128
+
129
+ @queue = []
130
+ end
131
+
132
+ # Reset attributes and settings to defaults.
133
+ def reset
134
+ # defaults
135
+ @offset = 0
136
+ @limit = 20
137
+ @max_matches = 1000
138
+ @match_mode = :all
139
+ @sort_mode = :relevance
140
+ @sort_by = ''
141
+ @weights = []
142
+ @id_range = 0..0
143
+ @filters = []
144
+ @group_by = ''
145
+ @group_function = :day
146
+ @group_clause = '@group desc'
147
+ @group_distinct = ''
148
+ @cut_off = 0
149
+ @retry_count = 0
150
+ @retry_delay = 0
151
+ @anchor = {}
152
+ # string keys are index names, integer values are weightings
153
+ @index_weights = {}
154
+ @rank_mode = :proximity_bm25
155
+ @max_query_time = 0
156
+ # string keys are field names, integer values are weightings
157
+ @field_weights = {}
158
+ @timeout = 0
159
+ @overrides = {}
160
+ @select = "*"
161
+ end
162
+
163
+ # Set the geo-anchor point - with the names of the attributes that contain
164
+ # the latitude and longitude (in radians), and the reference position.
165
+ # Note that for geocoding to work properly, you must also set
166
+ # match_mode to :extended. To sort results by distance, you will
167
+ # need to set sort_mode to '@geodist asc' for example. Sphinx
168
+ # expects latitude and longitude to be returned from you SQL source
169
+ # in radians.
170
+ #
171
+ # Example:
172
+ # client.set_anchor('lat', -0.6591741, 'long', 2.530770)
173
+ #
174
+ def set_anchor(lat_attr, lat, long_attr, long)
175
+ @anchor = {
176
+ :latitude_attribute => lat_attr,
177
+ :latitude => lat,
178
+ :longitude_attribute => long_attr,
179
+ :longitude => long
180
+ }
181
+ end
182
+
183
+ # Append a query to the queue. This uses the same parameters as the query
184
+ # method.
185
+ def append_query(search, index = '*', comments = '')
186
+ @queue << query_message(search, index, comments)
187
+ end
188
+
189
+ # Run all the queries currently in the queue. This will return an array of
190
+ # results hashes.
191
+ def run
192
+ response = Response.new request(:search, @queue)
193
+
194
+ results = @queue.collect do
195
+ result = {
196
+ :matches => [],
197
+ :fields => [],
198
+ :attributes => {},
199
+ :attribute_names => [],
200
+ :words => {}
201
+ }
202
+
203
+ result[:status] = response.next_int
204
+ case result[:status]
205
+ when Statuses[:warning]
206
+ result[:warning] = response.next
207
+ when Statuses[:error]
208
+ result[:error] = response.next
209
+ next result
210
+ end
211
+
212
+ result[:fields] = response.next_array
213
+
214
+ attributes = response.next_int
215
+ for i in 0...attributes
216
+ attribute_name = response.next
217
+ type = response.next_int
218
+
219
+ result[:attributes][attribute_name] = type
220
+ result[:attribute_names] << attribute_name
221
+ end
222
+
223
+ matches = response.next_int
224
+ is_64_bit = response.next_int
225
+ for i in 0...matches
226
+ doc = is_64_bit > 0 ? response.next_64bit_int : response.next_int
227
+ weight = response.next_int
228
+
229
+ result[:matches] << {:doc => doc, :weight => weight, :index => i, :attributes => {}}
230
+ result[:attribute_names].each do |attr|
231
+ result[:matches].last[:attributes][attr] = attribute_from_type(
232
+ result[:attributes][attr], response
233
+ )
234
+ end
235
+ end
236
+
237
+ result[:total] = response.next_int.to_i || 0
238
+ result[:total_found] = response.next_int.to_i || 0
239
+ result[:time] = ('%.3f' % (response.next_int / 1000.0)).to_f || 0.0
240
+
241
+ words = response.next_int
242
+ for i in 0...words
243
+ word = response.next
244
+ docs = response.next_int
245
+ hits = response.next_int
246
+ result[:words][word] = {:docs => docs, :hits => hits}
247
+ end
248
+
249
+ result
250
+ end
251
+
252
+ @queue.clear
253
+ results
254
+ end
255
+
256
+ # Query the Sphinx daemon - defaulting to all indexes, but you can specify
257
+ # a specific one if you wish. The search parameter should be a string
258
+ # following Sphinx's expectations.
259
+ #
260
+ # The object returned from this method is a hash with the following keys:
261
+ #
262
+ # * :matches
263
+ # * :fields
264
+ # * :attributes
265
+ # * :attribute_names
266
+ # * :words
267
+ # * :total
268
+ # * :total_found
269
+ # * :time
270
+ # * :status
271
+ # * :warning (if appropriate)
272
+ # * :error (if appropriate)
273
+ #
274
+ # The key <tt>:matches</tt> returns an array of hashes - the actual search
275
+ # results. Each hash has the document id (<tt>:doc</tt>), the result
276
+ # weighting (<tt>:weight</tt>), and a hash of the attributes for the
277
+ # document (<tt>:attributes</tt>).
278
+ #
279
+ # The <tt>:fields</tt> and <tt>:attribute_names</tt> keys return list of
280
+ # fields and attributes for the documents. The key <tt>:attributes</tt>
281
+ # will return a hash of attribute name and type pairs, and <tt>:words</tt>
282
+ # returns a hash of hashes representing the words from the search, with the
283
+ # number of documents and hits for each, along the lines of:
284
+ #
285
+ # results[:words]["Pat"] #=> {:docs => 12, :hits => 15}
286
+ #
287
+ # <tt>:total</tt>, <tt>:total_found</tt> and <tt>:time</tt> return the
288
+ # number of matches available, the total number of matches (which may be
289
+ # greater than the maximum available, depending on the number of matches
290
+ # and your sphinx configuration), and the time in milliseconds that the
291
+ # query took to run.
292
+ #
293
+ # <tt>:status</tt> is the error code for the query - and if there was a
294
+ # related warning, it will be under the <tt>:warning</tt> key. Fatal errors
295
+ # will be described under <tt>:error</tt>.
296
+ #
297
+ def query(search, index = '*', comments = '')
298
+ @queue << query_message(search, index, comments)
299
+ self.run.first
300
+ end
301
+
302
+ # Build excerpts from search terms (the +words+) and the text of documents. Excerpts are bodies of text that have the +words+ highlighted.
303
+ # They may also be abbreviated to fit within a word limit.
304
+ #
305
+ # As part of the options hash, you will need to
306
+ # define:
307
+ # * :docs
308
+ # * :words
309
+ # * :index
310
+ #
311
+ # Optional settings include:
312
+ # * :before_match (defaults to <span class="match">)
313
+ # * :after_match (defaults to </span>)
314
+ # * :chunk_separator (defaults to ' &#8230; ' - which is an HTML ellipsis)
315
+ # * :limit (defaults to 256)
316
+ # * :around (defaults to 5)
317
+ # * :exact_phrase (defaults to false)
318
+ # * :single_passage (defaults to false)
319
+ #
320
+ # The defaults differ from the official PHP client, as I've opted for
321
+ # semantic HTML markup.
322
+ #
323
+ # Example:
324
+ #
325
+ # client.excerpts(:docs => ["Pat Allan, Pat Cash"], :words => 'Pat', :index => 'pats')
326
+ # #=> ["<span class=\"match\">Pat</span> Allan, <span class=\"match\">Pat</span> Cash"]
327
+ #
328
+ # lorem_lipsum = "Lorem ipsum dolor..."
329
+ #
330
+ # client.excerpts(:docs => ["Pat Allan, #{lorem_lipsum} Pat Cash"], :words => 'Pat', :index => 'pats')
331
+ # #=> ["<span class=\"match\">Pat</span> Allan, Lorem ipsum dolor sit amet, consectetur adipisicing
332
+ # elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua &#8230; . Excepteur
333
+ # sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est
334
+ # laborum. <span class=\"match\">Pat</span> Cash"]
335
+ #
336
+ # Workflow:
337
+ #
338
+ # Excerpt creation is completely isolated from searching the index. The nominated index is only used to
339
+ # discover encoding and charset information.
340
+ #
341
+ # Therefore, the workflow goes:
342
+ #
343
+ # 1. Do the sphinx query.
344
+ # 2. Fetch the documents found by sphinx from their repositories.
345
+ # 3. Pass the documents' text to +excerpts+ for marking up of matched terms.
346
+ #
347
+ def excerpts(options = {})
348
+ options[:index] ||= '*'
349
+ options[:before_match] ||= '<span class="match">'
350
+ options[:after_match] ||= '</span>'
351
+ options[:chunk_separator] ||= ' &#8230; ' # ellipsis
352
+ options[:limit] ||= 256
353
+ options[:around] ||= 5
354
+ options[:exact_phrase] ||= false
355
+ options[:single_passage] ||= false
356
+
357
+ response = Response.new request(:excerpt, excerpts_message(options))
358
+
359
+ options[:docs].collect { response.next }
360
+ end
361
+
362
+ # Update attributes - first parameter is the relevant index, second is an
363
+ # array of attributes to be updated, and the third is a hash, where the
364
+ # keys are the document ids, and the values are arrays with the attribute
365
+ # values - in the same order as the second parameter.
366
+ #
367
+ # Example:
368
+ #
369
+ # client.update('people', ['birthday'], {1 => [Time.at(1982, 20, 8).to_i]})
370
+ #
371
+ def update(index, attributes, values_by_doc)
372
+ response = Response.new request(
373
+ :update,
374
+ update_message(index, attributes, values_by_doc)
375
+ )
376
+
377
+ response.next_int
378
+ end
379
+
380
+ # Generates a keyword list for a given query. Each keyword is represented
381
+ # by a hash, with keys :tokenised and :normalised. If return_hits is set to
382
+ # true it will also report on the number of hits and documents for each
383
+ # keyword (see :hits and :docs keys respectively).
384
+ def keywords(query, index, return_hits = false)
385
+ response = Response.new request(
386
+ :keywords,
387
+ keywords_message(query, index, return_hits)
388
+ )
389
+
390
+ (0...response.next_int).collect do
391
+ hash = {}
392
+ hash[:tokenised] = response.next
393
+ hash[:normalised] = response.next
394
+
395
+ if return_hits
396
+ hash[:docs] = response.next_int
397
+ hash[:hits] = response.next_int
398
+ end
399
+
400
+ hash
401
+ end
402
+ end
403
+
404
+ def status
405
+ response = Response.new request(
406
+ :status, Message.new
407
+ )
408
+
409
+ rows, cols = response.next_int, response.next_int
410
+
411
+ (0...rows).inject({}) do |hash, row|
412
+ hash[response.next.to_sym] = response.next
413
+ hash
414
+ end
415
+ end
416
+
417
+ def add_override(attribute, type, values)
418
+ @overrides[attribute] = {:type => type, :values => values}
419
+ end
420
+
421
+ def open
422
+ open_socket
423
+
424
+ @socket.send [
425
+ Commands[:persist], 0, 4, 1
426
+ ].pack("nnNN"), 0
427
+ end
428
+
429
+ def close
430
+ close_socket
431
+ end
432
+
433
+ private
434
+
435
+ def open_socket
436
+ raise "Already Connected" unless @socket.nil?
437
+
438
+ if @timeout == 0
439
+ @socket = initialise_connection
440
+ else
441
+ begin
442
+ Timeout.timeout(@timeout) { @socket = initialise_connection }
443
+ rescue Timeout::Error
444
+ raise Riddle::ConnectionError,
445
+ "Connection to #{@server} on #{@port} timed out after #{@timeout} seconds"
446
+ end
447
+ end
448
+
449
+ true
450
+ end
451
+
452
+ def close_socket
453
+ raise "Not Connected" if @socket.nil?
454
+
455
+ @socket.close
456
+ @socket = nil
457
+
458
+ true
459
+ end
460
+
461
+ # Connects to the Sphinx daemon, and yields a socket to use. The socket is
462
+ # closed at the end of the block.
463
+ def connect(&block)
464
+ unless @socket.nil?
465
+ yield @socket
466
+ else
467
+ open_socket
468
+ begin
469
+ yield @socket
470
+ ensure
471
+ close_socket
472
+ end
473
+ end
474
+ end
475
+
476
+ def initialise_connection
477
+ socket = initialise_socket
478
+
479
+ # Send version
480
+ socket.send [1].pack('N'), 0
481
+
482
+ # Checking version
483
+ version = socket.recv(4).unpack('N*').first
484
+ if version < 1
485
+ socket.close
486
+ raise VersionError, "Can only connect to searchd version 1.0 or better, not version #{version}"
487
+ end
488
+
489
+ socket
490
+ end
491
+
492
+ def initialise_socket
493
+ tries = 0
494
+ begin
495
+ socket = TCPSocket.new @server, @port
496
+ rescue Errno::ECONNREFUSED => e
497
+ retry if (tries += 1) < 5
498
+ raise Riddle::ConnectionError,
499
+ "Connection to #{@server} on #{@port} failed. #{e.message}"
500
+ end
501
+
502
+ socket
503
+ end
504
+
505
+ # Send a collection of messages, for a command type (eg, search, excerpts,
506
+ # update), to the Sphinx daemon.
507
+ def request(command, messages)
508
+ response = ""
509
+ status = -1
510
+ version = 0
511
+ length = 0
512
+ message = Array(messages).join("")
513
+ if message.respond_to?(:force_encoding)
514
+ message = message.force_encoding('ASCII-8BIT')
515
+ end
516
+
517
+ connect do |socket|
518
+ case command
519
+ when :search
520
+ # Message length is +4 to account for the following count value for
521
+ # the number of messages (well, that's what I'm assuming).
522
+ socket.send [
523
+ Commands[command], Versions[command],
524
+ 4+message.length, messages.length
525
+ ].pack("nnNN") + message, 0
526
+ when :status
527
+ socket.send [
528
+ Commands[command], Versions[command], 4, 1
529
+ ].pack("nnNN"), 0
530
+ else
531
+ socket.send [
532
+ Commands[command], Versions[command], message.length
533
+ ].pack("nnN") + message, 0
534
+ end
535
+
536
+ header = socket.recv(8)
537
+ status, version, length = header.unpack('n2N')
538
+
539
+ while response.length < (length || 0)
540
+ part = socket.recv(length - response.length)
541
+ response << part if part
542
+ end
543
+ end
544
+
545
+ if response.empty? || response.length != length
546
+ raise ResponseError, "No response from searchd (status: #{status}, version: #{version})"
547
+ end
548
+
549
+ case status
550
+ when Statuses[:ok]
551
+ if version < Versions[command]
552
+ puts format("searchd command v.%d.%d older than client (v.%d.%d)",
553
+ version >> 8, version & 0xff,
554
+ Versions[command] >> 8, Versions[command] & 0xff)
555
+ end
556
+ response
557
+ when Statuses[:warning]
558
+ length = response[0, 4].unpack('N*').first
559
+ puts response[4, length]
560
+ response[4 + length, response.length - 4 - length]
561
+ when Statuses[:error], Statuses[:retry]
562
+ raise ResponseError, "searchd error (status: #{status}): #{response[4, response.length - 4]}"
563
+ else
564
+ raise ResponseError, "Unknown searchd error (status: #{status})"
565
+ end
566
+ end
567
+
568
+ # Generation of the message to send to Sphinx for a search.
569
+ def query_message(search, index, comments = '')
570
+ message = Message.new
571
+
572
+ # Mode, Limits, Sort Mode
573
+ message.append_ints @offset, @limit, MatchModes[@match_mode],
574
+ RankModes[@rank_mode], SortModes[@sort_mode]
575
+ message.append_string @sort_by
576
+
577
+ # Query
578
+ message.append_string search
579
+
580
+ # Weights
581
+ message.append_int @weights.length
582
+ message.append_ints *@weights
583
+
584
+ # Index
585
+ message.append_string index
586
+
587
+ # ID Range
588
+ message.append_int 1
589
+ message.append_64bit_ints @id_range.first, @id_range.last
590
+
591
+ # Filters
592
+ message.append_int @filters.length
593
+ @filters.each { |filter| message.append filter.query_message }
594
+
595
+ # Grouping
596
+ message.append_int GroupFunctions[@group_function]
597
+ message.append_string @group_by
598
+ message.append_int @max_matches
599
+ message.append_string @group_clause
600
+ message.append_ints @cut_off, @retry_count, @retry_delay
601
+ message.append_string @group_distinct
602
+
603
+ # Anchor Point
604
+ if @anchor.empty?
605
+ message.append_int 0
606
+ else
607
+ message.append_int 1
608
+ message.append_string @anchor[:latitude_attribute]
609
+ message.append_string @anchor[:longitude_attribute]
610
+ message.append_floats @anchor[:latitude], @anchor[:longitude]
611
+ end
612
+
613
+ # Per Index Weights
614
+ message.append_int @index_weights.length
615
+ @index_weights.each do |key,val|
616
+ message.append_string key.to_s
617
+ message.append_int val
618
+ end
619
+
620
+ # Max Query Time
621
+ message.append_int @max_query_time
622
+
623
+ # Per Field Weights
624
+ message.append_int @field_weights.length
625
+ @field_weights.each do |key,val|
626
+ message.append_string key.to_s
627
+ message.append_int val
628
+ end
629
+
630
+ message.append_string comments
631
+
632
+ # Overrides
633
+ message.append_int @overrides.length
634
+ @overrides.each do |key,val|
635
+ message.append_string key.to_s
636
+ message.append_int AttributeTypes[val[:type]]
637
+ message.append_int val[:values].length
638
+ val[:values].each do |id,map|
639
+ message.append_64bit_int id
640
+ method = case val[:type]
641
+ when :float
642
+ :append_float
643
+ when :bigint
644
+ :append_64bit_int
645
+ else
646
+ :append_int
647
+ end
648
+ message.send method, map
649
+ end
650
+ end
651
+
652
+ message.append_string @select
653
+
654
+ message.to_s
655
+ end
656
+
657
+ # Generation of the message to send to Sphinx for an excerpts request.
658
+ def excerpts_message(options)
659
+ message = Message.new
660
+
661
+ flags = 1
662
+ flags |= 2 if options[:exact_phrase]
663
+ flags |= 4 if options[:single_passage]
664
+ flags |= 8 if options[:use_boundaries]
665
+ flags |= 16 if options[:weight_order]
666
+
667
+ message.append [0, flags].pack('N2') # 0 = mode
668
+ message.append_string options[:index]
669
+ message.append_string options[:words]
670
+
671
+ # options
672
+ message.append_string options[:before_match]
673
+ message.append_string options[:after_match]
674
+ message.append_string options[:chunk_separator]
675
+ message.append_ints options[:limit], options[:around]
676
+
677
+ message.append_array options[:docs]
678
+
679
+ message.to_s
680
+ end
681
+
682
+ # Generation of the message to send to Sphinx to update attributes of a
683
+ # document.
684
+ def update_message(index, attributes, values_by_doc)
685
+ message = Message.new
686
+
687
+ message.append_string index
688
+ message.append_int attributes.length
689
+ attributes.each_with_index do |attribute, index|
690
+ message.append_string attribute
691
+ message.append_boolean values_by_doc.values.first[index].is_a?(Array)
692
+ end
693
+
694
+ message.append_int values_by_doc.length
695
+ values_by_doc.each do |key,values|
696
+ message.append_64bit_int key # document ID
697
+ values.each do |value|
698
+ case value
699
+ when Array
700
+ message.append_int value.length
701
+ message.append_ints *value
702
+ else
703
+ message.append_int value
704
+ end
705
+ end
706
+ end
707
+
708
+ message.to_s
709
+ end
710
+
711
+ # Generates the simple message to send to the daemon for a keywords request.
712
+ def keywords_message(query, index, return_hits)
713
+ message = Message.new
714
+
715
+ message.append_string query
716
+ message.append_string index
717
+ message.append_int return_hits ? 1 : 0
718
+
719
+ message.to_s
720
+ end
721
+
722
+ def attribute_from_type(type, response)
723
+ type -= AttributeTypes[:multi] if is_multi = type > AttributeTypes[:multi]
724
+
725
+ case type
726
+ when AttributeTypes[:float]
727
+ is_multi ? response.next_float_array : response.next_float
728
+ when AttributeTypes[:bigint]
729
+ is_multi ? response.next_64bit_int_arry : response.next_64bit_int
730
+ else
731
+ is_multi ? response.next_int_array : response.next_int
732
+ end
733
+ end
734
+ end
735
+ end