freelancing-god-thinking-sphinx 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +25 -0
  3. data/lib/riddle.rb +22 -0
  4. data/lib/riddle/client.rb +593 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +79 -0
  10. data/lib/thinking_sphinx/active_record.rb +115 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +86 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +36 -0
  14. data/lib/thinking_sphinx/association.rb +140 -0
  15. data/lib/thinking_sphinx/attribute.rb +279 -0
  16. data/lib/thinking_sphinx/configuration.rb +275 -0
  17. data/lib/thinking_sphinx/field.rb +186 -0
  18. data/lib/thinking_sphinx/index.rb +234 -0
  19. data/lib/thinking_sphinx/index/builder.rb +197 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +97 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +413 -0
  23. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +184 -0
  24. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  25. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +0 -0
  26. data/spec/unit/thinking_sphinx/active_record_spec.rb +85 -0
  27. data/spec/unit/thinking_sphinx/association_spec.rb +0 -0
  28. data/spec/unit/thinking_sphinx/attribute_spec.rb +73 -0
  29. data/spec/unit/thinking_sphinx/configuration_spec.rb +7 -0
  30. data/spec/unit/thinking_sphinx/field_spec.rb +51 -0
  31. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  32. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +41 -0
  33. data/spec/unit/thinking_sphinx/index_spec.rb +5 -0
  34. data/spec/unit/thinking_sphinx/search_spec.rb +121 -0
  35. data/spec/unit/thinking_sphinx_spec.rb +82 -0
  36. data/tasks/thinking_sphinx_tasks.rake +1 -0
  37. data/tasks/thinking_sphinx_tasks.rb +86 -0
  38. metadata +90 -0
@@ -0,0 +1,56 @@
1
+ module ThinkingSphinx
2
+ module HashExcept
3
+ # Returns a new hash without the given keys.
4
+ def except(*keys)
5
+ rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
6
+ reject { |key,| rejected.include?(key) }
7
+ end
8
+
9
+ # Replaces the hash without only the given keys.
10
+ def except!(*keys)
11
+ replace(except(*keys))
12
+ end
13
+ end
14
+ end
15
+
16
+ Hash.send(
17
+ :include, ThinkingSphinx::HashExcept
18
+ ) unless Hash.instance_methods.include?("except")
19
+
20
+ module ThinkingSphinx
21
+ module ArrayExtractOptions
22
+ def extract_options!
23
+ last.is_a?(::Hash) ? pop : {}
24
+ end
25
+ end
26
+ end
27
+
28
+ Array.send(
29
+ :include, ThinkingSphinx::ArrayExtractOptions
30
+ ) unless Array.instance_methods.include?("extract_options!")
31
+
32
+ module ThinkingSphinx
33
+ module MysqlQuotedTableName
34
+ def quote_table_name(name) #:nodoc:
35
+ quote_column_name(name).gsub('.', '`.`')
36
+ end
37
+ end
38
+ end
39
+
40
+ if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter")
41
+ ActiveRecord::ConnectionAdapters::MysqlAdapter.send(
42
+ :include, ThinkingSphinx::MysqlQuotedTableName
43
+ ) unless ActiveRecord::ConnectionAdapters::MysqlAdapter.instance_methods.include?("quote_table_name")
44
+ end
45
+
46
+ module ThinkingSphinx
47
+ module ActiveRecordQuotedName
48
+ def quoted_table_name
49
+ self.connection.quote_table_name(self.table_name)
50
+ end
51
+ end
52
+ end
53
+
54
+ ActiveRecord::Base.extend(
55
+ ThinkingSphinx::ActiveRecordQuotedName
56
+ ) unless ActiveRecord::Base.respond_to?("quoted_table_name")
@@ -0,0 +1,413 @@
1
+ module ThinkingSphinx
2
+ # Once you've got those indexes in and built, this is the stuff that
3
+ # matters - how to search! This class provides a generic search
4
+ # interface - which you can use to search all your indexed models at once.
5
+ # Most times, you will just want a specific model's results - to search and
6
+ # search_for_ids methods will do the job in exactly the same manner when
7
+ # called from a model.
8
+ #
9
+ class Search
10
+ class << self
11
+ # Searches for results that match the parameters provided. Will only
12
+ # return the ids for the matching objects. See #search for syntax
13
+ # examples.
14
+ #
15
+ def search_for_ids(*args)
16
+ results, client = search_results(*args.clone)
17
+
18
+ begin
19
+ pager = WillPaginate::Collection.new(page,
20
+ client.limit, results[:total])
21
+ pager.replace results[:matches].collect { |match| match[:doc] }
22
+ rescue
23
+ results[:matches].collect { |match| match[:doc] }
24
+ end
25
+ end
26
+
27
+ # Searches through the Sphinx indexes for relevant matches. There's
28
+ # various ways to search, sort, group and filter - which are covered
29
+ # below.
30
+ #
31
+ # Also, if you have WillPaginate installed, the search method can be used
32
+ # just like paginate. The same parameters - :page and :per_page - work as
33
+ # expected, and the returned result set can be used by the will_paginate
34
+ # helper.
35
+ #
36
+ # == Basic Searching
37
+ #
38
+ # The simplest way of searching is straight text.
39
+ #
40
+ # ThinkingSphinx::Search.search "pat"
41
+ # ThinkingSphinx::Search.search "google"
42
+ # User.search "pat", :page => (params[:page] || 1)
43
+ # Article.search "relevant news issue of the day"
44
+ #
45
+ # If you specify :include, like in an #find call, this will be respected
46
+ # when loading the relevant models from the search results.
47
+ #
48
+ # User.search "pat", :include => :posts
49
+ #
50
+ # == Searching by Fields
51
+ #
52
+ # If you want to step it up a level, you can limit your search terms to
53
+ # specific fields:
54
+ #
55
+ # User.search :conditions => {:name => "pat"}
56
+ #
57
+ # This uses Sphinx's extended match mode, unless you specify a different
58
+ # match mode explicitly (but then this way of searching won't work). Also
59
+ # note that you don't need to put in a search string.
60
+ #
61
+ # == Searching by Attributes
62
+ #
63
+ # Also known as filters, you can limit your searches to documents that
64
+ # have specific values for their attributes. There are two ways to do
65
+ # this. The first is one that works in all scenarios - using the :with
66
+ # option.
67
+ #
68
+ # ThinkingSphinx::Search.search :with => {:parent_id => 10}
69
+ #
70
+ # The second is only viable if you're searching with a specific model
71
+ # (not multi-model searching). With a single model, Thinking Sphinx
72
+ # can figure out what attributes and fields are available, so you can
73
+ # put it all in the :conditions hash, and it will sort it out.
74
+ #
75
+ # Node.search :conditions => {:parent_id => 10}
76
+ #
77
+ # Filters can be single values, arrays of values, or ranges.
78
+ #
79
+ # Article.search "East Timor", :conditions => {:rating => 3..5}
80
+ #
81
+ # == Excluding by Attributes
82
+ #
83
+ # Sphinx also supports negative filtering - where the filters are of
84
+ # attribute values to exclude. This is done with the :without option:
85
+ #
86
+ # User.search :without => {:role_id => 1}
87
+ #
88
+ # == Sorting
89
+ #
90
+ # Sphinx can only sort by attributes, so generally you will need to avoid
91
+ # using field names in your :order option. However, if you're searching
92
+ # on a single model, and have specified some fields as sortable, you can
93
+ # use those field names and Thinking Sphinx will interpret accordingly.
94
+ # Remember: this will only happen for single-model searches, and only
95
+ # through the :order option.
96
+ #
97
+ # Location.search "Melbourne", :order => :state
98
+ # User.search :conditions => {:role_id => 2}, :order => "name ASC"
99
+ #
100
+ # Keep in mind that if you use a string, you *must* specify the direction
101
+ # (ASC or DESC) else Sphinx won't return any results. If you use a symbol
102
+ # then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
103
+ # use the :sort_mode option:
104
+ #
105
+ # Location.search "Melbourne", :order => :state, :sort_mode => :desc
106
+ #
107
+ # Of course, there are other sort modes - check out the Sphinx
108
+ # documentation[http://sphinxsearch.com/doc.html] for that level of
109
+ # detail though.
110
+ #
111
+ # == Grouping
112
+ #
113
+ # For this you can use the group_by, group_clause and group_function
114
+ # options - which are all directly linked to Sphinx's expectations. No
115
+ # magic from Thinking Sphinx. It can get a little tricky, so make sure
116
+ # you read all the relevant
117
+ # documentation[http://sphinxsearch.com/doc.html#clustering] first.
118
+ #
119
+ # Yes this section will be expanded, but this is a start.
120
+ #
121
+ # == Geo/Location Searching
122
+ #
123
+ # Sphinx - and therefore Thinking Sphinx - has the facility to search
124
+ # around a geographical point, using a given latitude and longitude. To
125
+ # take advantage of this, you will need to have both of those values in
126
+ # attributes. To search with that point, you can then use one of the
127
+ # following syntax examples:
128
+ #
129
+ # Address.search "Melbourne", :geo => [1.4, -2.217]
130
+ # Address.search "Australia", :geo => [-0.55, 3.108],
131
+ # :latitude_attr => "latit", :longitude_attr => "longit"
132
+ #
133
+ # The first example applies when your latitude and longitude attributes
134
+ # are named any of lat, latitude, lon, long or longitude. If that's not
135
+ # the case, you will need to explicitly state them in your search, _or_
136
+ # you can do so in your model:
137
+ #
138
+ # define_index do
139
+ # # ...
140
+ #
141
+ # set_property :latitude_attr => "latit"
142
+ # set_property :longitude_attr => "longit"
143
+ # end
144
+ #
145
+ # Now, geo-location searching really only has an affect if you have a
146
+ # filter, sort or grouping clause related to it - otherwise it's just a
147
+ # normal search. To make use of the positioning difference, use the
148
+ # special attribute "@geo" in any of your filters or sorting or grouping
149
+ # clauses.
150
+ #
151
+ # And don't forget - both the latitude and longitude you use in your
152
+ # search, and the values in your indexes, need to be stored in radians,
153
+ # _not_ degrees.
154
+ #
155
+ def search(*args)
156
+ results, client = search_results(*args.clone)
157
+
158
+ ::ActiveRecord::Base.logger.error(
159
+ "Sphinx Error: #{results[:error]}"
160
+ ) if results[:error]
161
+
162
+ options = args.extract_options!
163
+ klass = options[:class]
164
+ page = options[:page] ? options[:page].to_i : 1
165
+
166
+ begin
167
+ pager = WillPaginate::Collection.new(page,
168
+ client.limit, results[:total] || 0)
169
+ pager.replace instances_from_results(results[:matches], options, klass)
170
+ rescue StandardError => err
171
+ instances_from_results(results[:matches], options, klass)
172
+ end
173
+ end
174
+
175
+ private
176
+
177
+ # This method handles the common search functionality, and returns both
178
+ # the result hash and the client. Not super elegant, but it'll do for
179
+ # the moment.
180
+ #
181
+ def search_results(*args)
182
+ options = args.extract_options!
183
+ client = client_from_options options
184
+
185
+ query, filters = search_conditions(
186
+ options[:class], options[:conditions] || {}
187
+ )
188
+ client.filters += filters
189
+ client.match_mode = :extended unless query.empty?
190
+ query = args.join(" ") + query
191
+
192
+ set_sort_options! client, options
193
+
194
+ client.limit = options[:per_page].to_i if options[:per_page]
195
+ page = options[:page] ? options[:page].to_i : 1
196
+ client.offset = (page - 1) * client.limit
197
+
198
+ begin
199
+ ::ActiveRecord::Base.logger.debug "Sphinx: #{query}"
200
+ results = client.query query
201
+ ::ActiveRecord::Base.logger.debug "Sphinx Result: #{results[:matches].collect{|m| m[:doc]}.inspect}"
202
+ rescue Errno::ECONNREFUSED => err
203
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
204
+ end
205
+
206
+ return results, client
207
+ end
208
+
209
+ def instances_from_results(results, options = {}, klass = nil)
210
+ if klass.nil?
211
+ results.collect { |result| instance_from_result result, options }
212
+ else
213
+ ids = results.collect { |result| result[:doc] }
214
+ instances = klass.find(
215
+ :all,
216
+ :conditions => {klass.primary_key.to_sym => ids},
217
+ :include => options[:include],
218
+ :select => options[:select]
219
+ )
220
+ ids.collect { |obj_id| instances.detect { |obj| obj.id == obj_id } }
221
+ end
222
+ end
223
+
224
+ # Either use the provided class to instantiate a result from a model, or
225
+ # get the result's CRC value and determine the class from that.
226
+ #
227
+ def instance_from_result(result, options)
228
+ class_from_crc(result[:attributes]["class_crc"]).find(
229
+ result[:doc], :include => options[:include], :select => options[:select]
230
+ )
231
+ end
232
+
233
+ # Convert a CRC value to the corresponding class.
234
+ #
235
+ def class_from_crc(crc)
236
+ unless @models_by_crc
237
+ Configuration.new.load_models
238
+
239
+ @models_by_crc = ThinkingSphinx.indexed_models.inject({}) do |hash, model|
240
+ hash[model.constantize.to_crc32] = model
241
+ hash
242
+ end
243
+ end
244
+
245
+ @models_by_crc[crc].constantize
246
+ end
247
+
248
+ # Set all the appropriate settings for the client, using the provided
249
+ # options hash.
250
+ #
251
+ def client_from_options(options)
252
+ config = ThinkingSphinx::Configuration.new
253
+ client = Riddle::Client.new config.address, config.port
254
+ klass = options[:class]
255
+ index_options = klass ? klass.indexes.last.options : {}
256
+
257
+ [
258
+ :max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
259
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
260
+ :retry_count, :retry_delay, :index_weights, :rank_mode,
261
+ :max_query_time, :field_weights, :filters, :anchor, :limit
262
+ ].each do |key|
263
+ client.send(
264
+ key.to_s.concat("=").to_sym,
265
+ options[key] || index_options[key] || client.send(key)
266
+ )
267
+ end
268
+
269
+ client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
270
+
271
+ # class filters
272
+ client.filters << Riddle::Client::Filter.new(
273
+ "class_crc", options[:classes].collect { |klass| klass.to_crc32 }
274
+ ) if options[:classes]
275
+
276
+ # normal attribute filters
277
+ client.filters += options[:with].collect { |attr,val|
278
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
279
+ } if options[:with]
280
+
281
+ # exclusive attribute filters
282
+ client.filters += options[:without].collect { |attr,val|
283
+ Riddle::Client::Filter.new attr.to_s, filter_value(val), true
284
+ } if options[:without]
285
+
286
+ client
287
+ end
288
+
289
+ def filter_value(value)
290
+ case value
291
+ when Range
292
+ value.first.is_a?(Time) ? value.first.to_i..value.last.to_i : value
293
+ when Array
294
+ value.collect { |val| val.is_a?(Time) ? val.to_i : val }
295
+ else
296
+ Array(value)
297
+ end
298
+ end
299
+
300
+ # Translate field and attribute conditions to the relevant search string
301
+ # and filters.
302
+ #
303
+ def search_conditions(klass, conditions={})
304
+ attributes = klass ? klass.indexes.collect { |index|
305
+ index.attributes.collect { |attrib| attrib.unique_name }
306
+ }.flatten : []
307
+
308
+ search_string = ""
309
+ filters = []
310
+
311
+ conditions.each do |key,val|
312
+ if attributes.include?(key.to_sym)
313
+ filters << Riddle::Client::Filter.new(
314
+ key.to_s,
315
+ val.is_a?(Range) ? val : Array(val)
316
+ )
317
+ else
318
+ search_string << "@#{key} #{val} "
319
+ end
320
+ end
321
+
322
+ filters << Riddle::Client::Filter.new(
323
+ "class_crc", [klass.to_crc32]
324
+ ) if klass
325
+
326
+ return search_string, filters
327
+ end
328
+
329
+ # Return the appropriate latitude and longitude values, depending on
330
+ # whether the relevant attributes have been defined, and also whether
331
+ # there's actually any values.
332
+ #
333
+ def anchor_conditions(klass, options)
334
+ attributes = klass ? klass.indexes.collect { |index|
335
+ index.attributes.collect { |attrib| attrib.unique_name }
336
+ }.flatten : []
337
+
338
+ lat_attr = klass ? klass.indexes.collect { |index|
339
+ index.options[:latitude_attr]
340
+ }.compact.first : nil
341
+
342
+ lon_attr = klass ? klass.indexes.collect { |index|
343
+ index.options[:longitude_attr]
344
+ }.compact.first : nil
345
+
346
+ lat_attr = options[:latitude_attr] if options[:latitude_attr]
347
+ lat_attr ||= :lat if attributes.include?(:lat)
348
+ lat_attr ||= :latitude if attributes.include?(:latitude)
349
+
350
+ lon_attr = options[:longitude_attr] if options[:longitude_attr]
351
+ lon_attr ||= :lon if attributes.include?(:lon)
352
+ lon_attr ||= :long if attributes.include?(:long)
353
+ lon_attr ||= :longitude if attributes.include?(:longitude)
354
+
355
+ lat = options[:lat]
356
+ lon = options[:lon]
357
+
358
+ if options[:geo]
359
+ lat = options[:geo].first
360
+ lon = options[:geo].last
361
+ end
362
+
363
+ lat && lon ? {
364
+ :latitude_attribute => lat_attr,
365
+ :latitude => lat,
366
+ :longitude_attribute => lon_attr,
367
+ :longitude => lon
368
+ } : nil
369
+ end
370
+
371
+ # Set the sort options using the :order key as well as the appropriate
372
+ # Riddle settings.
373
+ #
374
+ def set_sort_options!(client, options)
375
+ klass = options[:class]
376
+ fields = klass ? klass.indexes.collect { |index|
377
+ index.fields.collect { |field| field.unique_name }
378
+ }.flatten : []
379
+
380
+ case order = options[:order]
381
+ when Symbol
382
+ client.sort_mode ||= :attr_asc
383
+ if fields.include?(order)
384
+ client.sort_by = order.to_s.concat("_sort")
385
+ else
386
+ client.sort_by = order.to_s
387
+ end
388
+ when String
389
+ client.sort_mode = :extended
390
+ client.sort_by = sorted_fields_to_attributes(order, fields)
391
+ else
392
+ # do nothing
393
+ end
394
+
395
+ client.sort_mode = :attr_asc if client.sort_mode == :asc
396
+ client.sort_mode = :attr_desc if client.sort_mode == :desc
397
+ end
398
+
399
+ # Search through a collection of fields and translate any appearances
400
+ # of them in a string to their attribute equivalent for sorting.
401
+ #
402
+ def sorted_fields_to_attributes(string, fields)
403
+ fields.each { |field|
404
+ string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
405
+ match.gsub field.to_s, field.to_s.concat("_sort")
406
+ }
407
+ }
408
+
409
+ string
410
+ end
411
+ end
412
+ end
413
+ end