initforthe-thinking-sphinx 1.1.21

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +141 -0
  3. data/lib/thinking_sphinx.rb +215 -0
  4. data/lib/thinking_sphinx/active_record.rb +278 -0
  5. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  6. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  7. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  8. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  9. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  10. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  11. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +135 -0
  12. data/lib/thinking_sphinx/association.rb +164 -0
  13. data/lib/thinking_sphinx/attribute.rb +268 -0
  14. data/lib/thinking_sphinx/class_facet.rb +15 -0
  15. data/lib/thinking_sphinx/collection.rb +148 -0
  16. data/lib/thinking_sphinx/configuration.rb +262 -0
  17. data/lib/thinking_sphinx/core/string.rb +15 -0
  18. data/lib/thinking_sphinx/deltas.rb +30 -0
  19. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  20. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +27 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  25. data/lib/thinking_sphinx/deploy/capistrano.rb +82 -0
  26. data/lib/thinking_sphinx/facet.rb +108 -0
  27. data/lib/thinking_sphinx/facet_collection.rb +59 -0
  28. data/lib/thinking_sphinx/field.rb +82 -0
  29. data/lib/thinking_sphinx/index.rb +99 -0
  30. data/lib/thinking_sphinx/index/builder.rb +287 -0
  31. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  32. data/lib/thinking_sphinx/property.rb +160 -0
  33. data/lib/thinking_sphinx/rails_additions.rb +136 -0
  34. data/lib/thinking_sphinx/search.rb +727 -0
  35. data/lib/thinking_sphinx/search/facets.rb +104 -0
  36. data/lib/thinking_sphinx/source.rb +150 -0
  37. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  38. data/lib/thinking_sphinx/source/sql.rb +126 -0
  39. data/lib/thinking_sphinx/tasks.rb +162 -0
  40. data/rails/init.rb +14 -0
  41. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  42. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  43. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  44. data/spec/unit/thinking_sphinx/active_record_spec.rb +329 -0
  45. data/spec/unit/thinking_sphinx/association_spec.rb +246 -0
  46. data/spec/unit/thinking_sphinx/attribute_spec.rb +338 -0
  47. data/spec/unit/thinking_sphinx/collection_spec.rb +15 -0
  48. data/spec/unit/thinking_sphinx/configuration_spec.rb +222 -0
  49. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  50. data/spec/unit/thinking_sphinx/facet_collection_spec.rb +64 -0
  51. data/spec/unit/thinking_sphinx/facet_spec.rb +302 -0
  52. data/spec/unit/thinking_sphinx/field_spec.rb +154 -0
  53. data/spec/unit/thinking_sphinx/index/builder_spec.rb +355 -0
  54. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  55. data/spec/unit/thinking_sphinx/index_spec.rb +45 -0
  56. data/spec/unit/thinking_sphinx/rails_additions_spec.rb +191 -0
  57. data/spec/unit/thinking_sphinx/search_spec.rb +228 -0
  58. data/spec/unit/thinking_sphinx/source_spec.rb +217 -0
  59. data/spec/unit/thinking_sphinx_spec.rb +151 -0
  60. data/tasks/distribution.rb +67 -0
  61. data/tasks/rails.rake +1 -0
  62. data/tasks/testing.rb +78 -0
  63. data/vendor/after_commit/LICENSE +20 -0
  64. data/vendor/after_commit/README +16 -0
  65. data/vendor/after_commit/Rakefile +22 -0
  66. data/vendor/after_commit/init.rb +8 -0
  67. data/vendor/after_commit/lib/after_commit.rb +45 -0
  68. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  69. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  70. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  71. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  72. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  73. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  74. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  75. data/vendor/riddle/lib/riddle.rb +30 -0
  76. data/vendor/riddle/lib/riddle/client.rb +619 -0
  77. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  78. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  79. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  80. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  81. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  82. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  83. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  84. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  85. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  86. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  87. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  88. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  89. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  90. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  91. metadata +190 -0
@@ -0,0 +1,136 @@
1
+ module ThinkingSphinx
2
+ module HashExcept
3
+ # Returns a new hash without the given keys.
4
+ def except(*keys)
5
+ rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
6
+ reject { |key,| rejected.include?(key) }
7
+ end
8
+
9
+ # Replaces the hash without only the given keys.
10
+ def except!(*keys)
11
+ replace(except(*keys))
12
+ end
13
+ end
14
+ end
15
+
16
+ Hash.send(
17
+ :include, ThinkingSphinx::HashExcept
18
+ ) unless Hash.instance_methods.include?("except")
19
+
20
+ module ThinkingSphinx
21
+ module ArrayExtractOptions
22
+ def extract_options!
23
+ last.is_a?(::Hash) ? pop : {}
24
+ end
25
+ end
26
+ end
27
+
28
+ Array.send(
29
+ :include, ThinkingSphinx::ArrayExtractOptions
30
+ ) unless Array.instance_methods.include?("extract_options!")
31
+
32
+ module ThinkingSphinx
33
+ module AbstractQuotedTableName
34
+ def quote_table_name(name)
35
+ quote_column_name(name)
36
+ end
37
+ end
38
+ end
39
+
40
+ ActiveRecord::ConnectionAdapters::AbstractAdapter.send(
41
+ :include, ThinkingSphinx::AbstractQuotedTableName
42
+ ) unless ActiveRecord::ConnectionAdapters::AbstractAdapter.instance_methods.include?("quote_table_name")
43
+
44
+ module ThinkingSphinx
45
+ module MysqlQuotedTableName
46
+ def quote_table_name(name) #:nodoc:
47
+ quote_column_name(name).gsub('.', '`.`')
48
+ end
49
+ end
50
+ end
51
+
52
+ if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter") or ActiveRecord::Base.respond_to?(:jdbcmysql_connection)
53
+ adapter = ActiveRecord::ConnectionAdapters.const_get(
54
+ defined?(JRUBY_VERSION) ? :JdbcAdapter : :MysqlAdapter
55
+ )
56
+ unless adapter.instance_methods.include?("quote_table_name")
57
+ adapter.send(:include, ThinkingSphinx::MysqlQuotedTableName)
58
+ end
59
+ end
60
+
61
+ module ThinkingSphinx
62
+ module ActiveRecordQuotedName
63
+ def quoted_table_name
64
+ self.connection.quote_table_name(self.table_name)
65
+ end
66
+ end
67
+ end
68
+
69
+ ActiveRecord::Base.extend(
70
+ ThinkingSphinx::ActiveRecordQuotedName
71
+ ) unless ActiveRecord::Base.respond_to?("quoted_table_name")
72
+
73
+ module ThinkingSphinx
74
+ module ActiveRecordStoreFullSTIClass
75
+ def store_full_sti_class
76
+ false
77
+ end
78
+ end
79
+ end
80
+
81
+ ActiveRecord::Base.extend(
82
+ ThinkingSphinx::ActiveRecordStoreFullSTIClass
83
+ ) unless ActiveRecord::Base.respond_to?(:store_full_sti_class)
84
+
85
+ module ThinkingSphinx
86
+ module ClassAttributeMethods
87
+ def cattr_reader(*syms)
88
+ syms.flatten.each do |sym|
89
+ next if sym.is_a?(Hash)
90
+ class_eval(<<-EOS, __FILE__, __LINE__)
91
+ unless defined? @@#{sym}
92
+ @@#{sym} = nil
93
+ end
94
+
95
+ def self.#{sym}
96
+ @@#{sym}
97
+ end
98
+
99
+ def #{sym}
100
+ @@#{sym}
101
+ end
102
+ EOS
103
+ end
104
+ end
105
+
106
+ def cattr_writer(*syms)
107
+ options = syms.extract_options!
108
+ syms.flatten.each do |sym|
109
+ class_eval(<<-EOS, __FILE__, __LINE__)
110
+ unless defined? @@#{sym}
111
+ @@#{sym} = nil
112
+ end
113
+
114
+ def self.#{sym}=(obj)
115
+ @@#{sym} = obj
116
+ end
117
+
118
+ #{"
119
+ def #{sym}=(obj)
120
+ @@#{sym} = obj
121
+ end
122
+ " unless options[:instance_writer] == false }
123
+ EOS
124
+ end
125
+ end
126
+
127
+ def cattr_accessor(*syms)
128
+ cattr_reader(*syms)
129
+ cattr_writer(*syms)
130
+ end
131
+ end
132
+ end
133
+
134
+ Class.extend(
135
+ ThinkingSphinx::ClassAttributeMethods
136
+ ) unless Class.respond_to?(:cattr_reader)
@@ -0,0 +1,727 @@
1
+ require 'thinking_sphinx/search/facets'
2
+
3
+ module ThinkingSphinx
4
+ # Once you've got those indexes in and built, this is the stuff that
5
+ # matters - how to search! This class provides a generic search
6
+ # interface - which you can use to search all your indexed models at once.
7
+ # Most times, you will just want a specific model's results - to search and
8
+ # search_for_ids methods will do the job in exactly the same manner when
9
+ # called from a model.
10
+ #
11
+ class Search
12
+ GlobalFacetOptions = {
13
+ :all_attributes => false,
14
+ :class_facet => true
15
+ }
16
+
17
+ class << self
18
+ include ThinkingSphinx::Search::Facets
19
+
20
+ # Searches for results that match the parameters provided. Will only
21
+ # return the ids for the matching objects. See #search for syntax
22
+ # examples.
23
+ #
24
+ # Note that this only searches the Sphinx index, with no ActiveRecord
25
+ # queries. Thus, if your index is not in sync with the database, this
26
+ # method may return ids that no longer exist there.
27
+ #
28
+ def search_for_ids(*args)
29
+ results, client = search_results(*args.clone)
30
+
31
+ options = args.extract_options!
32
+ page = options[:page] ? options[:page].to_i : 1
33
+
34
+ ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
35
+ end
36
+
37
+ # Searches through the Sphinx indexes for relevant matches. There's
38
+ # various ways to search, sort, group and filter - which are covered
39
+ # below.
40
+ #
41
+ # Also, if you have WillPaginate installed, the search method can be used
42
+ # just like paginate. The same parameters - :page and :per_page - work as
43
+ # expected, and the returned result set can be used by the will_paginate
44
+ # helper.
45
+ #
46
+ # == Basic Searching
47
+ #
48
+ # The simplest way of searching is straight text.
49
+ #
50
+ # ThinkingSphinx::Search.search "pat"
51
+ # ThinkingSphinx::Search.search "google"
52
+ # User.search "pat", :page => (params[:page] || 1)
53
+ # Article.search "relevant news issue of the day"
54
+ #
55
+ # If you specify :include, like in an #find call, this will be respected
56
+ # when loading the relevant models from the search results.
57
+ #
58
+ # User.search "pat", :include => :posts
59
+ #
60
+ # == Match Modes
61
+ #
62
+ # Sphinx supports 5 different matching modes. By default Thinking Sphinx
63
+ # uses :all, which unsurprisingly requires all the supplied search terms
64
+ # to match a result.
65
+ #
66
+ # Alternative modes include:
67
+ #
68
+ # User.search "pat allan", :match_mode => :any
69
+ # User.search "pat allan", :match_mode => :phrase
70
+ # User.search "pat | allan", :match_mode => :boolean
71
+ # User.search "@name pat | @username pat", :match_mode => :extended
72
+ #
73
+ # Any will find results with any of the search terms. Phrase treats the search
74
+ # terms a single phrase instead of individual words. Boolean and extended allow
75
+ # for more complex query syntax, refer to the sphinx documentation for further
76
+ # details.
77
+ #
78
+ # == Weighting
79
+ #
80
+ # Sphinx has support for weighting, where matches in one field can be considered
81
+ # more important than in another. Weights are integers, with 1 as the default.
82
+ # They can be set per-search like this:
83
+ #
84
+ # User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
85
+ #
86
+ # If you're searching multiple models, you can set per-index weights:
87
+ #
88
+ # ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
89
+ #
90
+ # See http://sphinxsearch.com/doc.html#weighting for further details.
91
+ #
92
+ # == Searching by Fields
93
+ #
94
+ # If you want to step it up a level, you can limit your search terms to
95
+ # specific fields:
96
+ #
97
+ # User.search :conditions => {:name => "pat"}
98
+ #
99
+ # This uses Sphinx's extended match mode, unless you specify a different
100
+ # match mode explicitly (but then this way of searching won't work). Also
101
+ # note that you don't need to put in a search string.
102
+ #
103
+ # == Searching by Attributes
104
+ #
105
+ # Also known as filters, you can limit your searches to documents that
106
+ # have specific values for their attributes. There are three ways to do
107
+ # this. The first two techniques work in all scenarios - using the :with
108
+ # or :with_all options.
109
+ #
110
+ # ThinkingSphinx::Search.search :with => {:tag_ids => 10}
111
+ # ThinkingSphinx::Search.search :with => {:tag_ids => [10,12]}
112
+ # ThinkingSphinx::Search.search :with_all => {:tag_ids => [10,12]}
113
+ #
114
+ # The first :with search will match records with a tag_id attribute of 10.
115
+ # The second :with will match records with a tag_id attribute of 10 OR 12.
116
+ # If you need to find records that are tagged with ids 10 AND 12, you
117
+ # will need to use the :with_all search parameter. This is particuarly
118
+ # useful in conjunction with Multi Value Attributes (MVAs).
119
+ #
120
+ # The third filtering technique is only viable if you're searching with a
121
+ # specific model (not multi-model searching). With a single model,
122
+ # Thinking Sphinx can figure out what attributes and fields are available,
123
+ # so you can put it all in the :conditions hash, and it will sort it out.
124
+ #
125
+ # Node.search :conditions => {:parent_id => 10}
126
+ #
127
+ # Filters can be single values, arrays of values, or ranges.
128
+ #
129
+ # Article.search "East Timor", :conditions => {:rating => 3..5}
130
+ #
131
+ # == Excluding by Attributes
132
+ #
133
+ # Sphinx also supports negative filtering - where the filters are of
134
+ # attribute values to exclude. This is done with the :without option:
135
+ #
136
+ # User.search :without => {:role_id => 1}
137
+ #
138
+ # == Excluding by Primary Key
139
+ #
140
+ # There is a shortcut to exclude records by their ActiveRecord primary key:
141
+ #
142
+ # User.search :without_ids => 1
143
+ #
144
+ # Pass an array or a single value.
145
+ #
146
+ # The primary key must be an integer as a negative filter is used. Note
147
+ # that for multi-model search, an id may occur in more than one model.
148
+ #
149
+ # == Infix (Star) Searching
150
+ #
151
+ # By default, Sphinx uses English stemming, e.g. matching "shoes" if you
152
+ # search for "shoe". It won't find "Melbourne" if you search for
153
+ # "elbourn", though.
154
+ #
155
+ # Enable infix searching by something like this in config/sphinx.yml:
156
+ #
157
+ # development:
158
+ # enable_star: 1
159
+ # min_infix_length: 2
160
+ #
161
+ # Note that this will make indexing take longer.
162
+ #
163
+ # With those settings (and after reindexing), wildcard asterisks can be used
164
+ # in queries:
165
+ #
166
+ # Location.search "*elbourn*"
167
+ #
168
+ # To automatically add asterisks around every token (but not operators),
169
+ # pass the :star option:
170
+ #
171
+ # Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
172
+ #
173
+ # This would become "*elbourn* -*ustrali*". The :star option only adds the
174
+ # asterisks. You need to make the config/sphinx.yml changes yourself.
175
+ #
176
+ # By default, the tokens are assumed to match the regular expression /\w+/u.
177
+ # If you've modified the charset_table, pass another regular expression, e.g.
178
+ #
179
+ # User.search("oo@bar.c", :star => /[\w@.]+/u)
180
+ #
181
+ # to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
182
+ #
183
+ # == Sorting
184
+ #
185
+ # Sphinx can only sort by attributes, so generally you will need to avoid
186
+ # using field names in your :order option. However, if you're searching
187
+ # on a single model, and have specified some fields as sortable, you can
188
+ # use those field names and Thinking Sphinx will interpret accordingly.
189
+ # Remember: this will only happen for single-model searches, and only
190
+ # through the :order option.
191
+ #
192
+ # Location.search "Melbourne", :order => :state
193
+ # User.search :conditions => {:role_id => 2}, :order => "name ASC"
194
+ #
195
+ # Keep in mind that if you use a string, you *must* specify the direction
196
+ # (ASC or DESC) else Sphinx won't return any results. If you use a symbol
197
+ # then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
198
+ # use the :sort_mode option:
199
+ #
200
+ # Location.search "Melbourne", :order => :state, :sort_mode => :desc
201
+ #
202
+ # Of course, there are other sort modes - check out the Sphinx
203
+ # documentation[http://sphinxsearch.com/doc.html] for that level of
204
+ # detail though.
205
+ #
206
+ # If desired, you can sort by a column in your model instead of a sphinx
207
+ # field or attribute. This sort only applies to the current page, so is
208
+ # most useful when performing a search with a single page of results.
209
+ #
210
+ # User.search("pat", :sql_order => "name")
211
+ #
212
+ # == Grouping
213
+ #
214
+ # For this you can use the group_by, group_clause and group_function
215
+ # options - which are all directly linked to Sphinx's expectations. No
216
+ # magic from Thinking Sphinx. It can get a little tricky, so make sure
217
+ # you read all the relevant
218
+ # documentation[http://sphinxsearch.com/doc.html#clustering] first.
219
+ #
220
+ # Grouping is done via three parameters within the options hash
221
+ # * <tt>:group_function</tt> determines the way grouping is done
222
+ # * <tt>:group_by</tt> determines the field which is used for grouping
223
+ # * <tt>:group_clause</tt> determines the sorting order
224
+ #
225
+ # As a convenience, you can also use
226
+ # * <tt>:group</tt>
227
+ # which sets :group_by and defaults to :group_function of :attr
228
+ #
229
+ # === group_function
230
+ #
231
+ # Valid values for :group_function are
232
+ # * <tt>:day</tt>, <tt>:week</tt>, <tt>:month</tt>, <tt>:year</tt> - Grouping is done by the respective timeframes.
233
+ # * <tt>:attr</tt>, <tt>:attrpair</tt> - Grouping is done by the specified attributes(s)
234
+ #
235
+ # === group_by
236
+ #
237
+ # This parameter denotes the field by which grouping is done. Note that the
238
+ # specified field must be a sphinx attribute or index.
239
+ #
240
+ # === group_clause
241
+ #
242
+ # This determines the sorting order of the groups. In a grouping search,
243
+ # the matches within a group will sorted by the <tt>:sort_mode</tt> and <tt>:order</tt> parameters.
244
+ # The group matches themselves however, will be sorted by <tt>:group_clause</tt>.
245
+ #
246
+ # The syntax for this is the same as an order parameter in extended sort mode.
247
+ # Namely, you can specify an SQL-like sort expression with up to 5 attributes
248
+ # (including internal attributes), eg: "@relevance DESC, price ASC, @id DESC"
249
+ #
250
+ # === Grouping by timestamp
251
+ #
252
+ # Timestamp grouping groups off items by the day, week, month or year of the
253
+ # attribute given. In order to do this you need to define a timestamp attribute,
254
+ # which pretty much looks like the standard defintion for any attribute.
255
+ #
256
+ # define_index do
257
+ # #
258
+ # # All your other stuff
259
+ # #
260
+ # has :created_at
261
+ # end
262
+ #
263
+ # When you need to fire off your search, it'll go something to the tune of
264
+ #
265
+ # Fruit.search "apricot", :group_function => :day, :group_by => 'created_at'
266
+ #
267
+ # The <tt>@groupby</tt> special attribute will contain the date for that group.
268
+ # Depending on the <tt>:group_function</tt> parameter, the date format will be
269
+ #
270
+ # * <tt>:day</tt> - YYYYMMDD
271
+ # * <tt>:week</tt> - YYYYNNN (NNN is the first day of the week in question,
272
+ # counting from the start of the year )
273
+ # * <tt>:month</tt> - YYYYMM
274
+ # * <tt>:year</tt> - YYYY
275
+ #
276
+ #
277
+ # === Grouping by attribute
278
+ #
279
+ # The syntax is the same as grouping by timestamp, except for the fact that the
280
+ # <tt>:group_function</tt> parameter is changed
281
+ #
282
+ # Fruit.search "apricot", :group_function => :attr, :group_by => 'size'
283
+ #
284
+ #
285
+ # == Geo/Location Searching
286
+ #
287
+ # Sphinx - and therefore Thinking Sphinx - has the facility to search
288
+ # around a geographical point, using a given latitude and longitude. To
289
+ # take advantage of this, you will need to have both of those values in
290
+ # attributes. To search with that point, you can then use one of the
291
+ # following syntax examples:
292
+ #
293
+ # Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
294
+ # Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
295
+ # :latitude_attr => "latit", :longitude_attr => "longit"
296
+ #
297
+ # The first example applies when your latitude and longitude attributes
298
+ # are named any of lat, latitude, lon, long or longitude. If that's not
299
+ # the case, you will need to explicitly state them in your search, _or_
300
+ # you can do so in your model:
301
+ #
302
+ # define_index do
303
+ # has :latit # Float column, stored in radians
304
+ # has :longit # Float column, stored in radians
305
+ #
306
+ # set_property :latitude_attr => "latit"
307
+ # set_property :longitude_attr => "longit"
308
+ # end
309
+ #
310
+ # Now, geo-location searching really only has an affect if you have a
311
+ # filter, sort or grouping clause related to it - otherwise it's just a
312
+ # normal search, and _will not_ return a distance value otherwise. To
313
+ # make use of the positioning difference, use the special attribute
314
+ # "@geodist" in any of your filters or sorting or grouping clauses.
315
+ #
316
+ # And don't forget - both the latitude and longitude you use in your
317
+ # search, and the values in your indexes, need to be stored as a float in radians,
318
+ # _not_ degrees. Keep in mind that if you do this conversion in SQL
319
+ # you will need to explicitly declare a column type of :float.
320
+ #
321
+ # define_index do
322
+ # has 'RADIANS(lat)', :as => :lat, :type => :float
323
+ # # ...
324
+ # end
325
+ #
326
+ # Once you've got your results set, you can access the distances as
327
+ # follows:
328
+ #
329
+ # @results.each_with_geodist do |result, distance|
330
+ # # ...
331
+ # end
332
+ #
333
+ # The distance value is returned as a float, representing the distance in
334
+ # metres.
335
+ #
336
+ # == Handling a Stale Index
337
+ #
338
+ # Especially if you don't use delta indexing, you risk having records in the
339
+ # Sphinx index that are no longer in the database. By default, those will simply
340
+ # come back as nils:
341
+ #
342
+ # >> pat_user.delete
343
+ # >> User.search("pat")
344
+ # Sphinx Result: [1,2]
345
+ # => [nil, <#User id: 2>]
346
+ #
347
+ # (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
348
+ #
349
+ # You can simply Array#compact these results or handle the nils in some other way, but
350
+ # Sphinx will still report two results, and the missing records may upset your layout.
351
+ #
352
+ # If you pass :retry_stale => true to a single-model search, missing records will
353
+ # cause Thinking Sphinx to retry the query but excluding those records. Since search
354
+ # is paginated, the new search could potentially include missing records as well, so by
355
+ # default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
356
+ # times, and so on. If there are still missing ids on the last retry, they are
357
+ # shown as nils.
358
+ #
359
+ def search(*args)
360
+ query = args.clone # an array
361
+ options = query.extract_options!
362
+
363
+ retry_search_on_stale_index(query, options) do
364
+ results, client = search_results(*(query + [options]))
365
+
366
+ log "Sphinx Error: #{results[:error]}", :error if results[:error]
367
+
368
+ klass = options[:class]
369
+ page = options[:page] ? options[:page].to_i : 1
370
+
371
+ ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
372
+ end
373
+ end
374
+
375
+ def retry_search_on_stale_index(query, options, &block)
376
+ stale_ids = []
377
+ stale_retries_left = case options[:retry_stale]
378
+ when true
379
+ 3 # default to three retries
380
+ when nil, false
381
+ 0 # no retries
382
+ else options[:retry_stale].to_i
383
+ end
384
+ begin
385
+ # Passing this in an option so Collection.create_from_results can see it.
386
+ # It should only raise on stale records if there are any retries left.
387
+ options[:raise_on_stale] = stale_retries_left > 0
388
+ block.call
389
+ # If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
390
+ # in the DB and the :raise_on_stale option is set, this exception is raised. We retry
391
+ # a limited number of times, excluding the stale ids from the search.
392
+ rescue StaleIdsException => e
393
+ stale_retries_left -= 1
394
+
395
+ stale_ids |= e.ids # For logging
396
+ options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
397
+
398
+ tries = stale_retries_left
399
+ log "Sphinx Stale Ids (%s %s left): %s" % [
400
+ tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
401
+ ]
402
+
403
+ retry
404
+ end
405
+ end
406
+
407
+ def count(*args)
408
+ results, client = search_results(*args.clone)
409
+ results[:total_found] || 0
410
+ end
411
+
412
+ # Checks if a document with the given id exists within a specific index.
413
+ # Expected parameters:
414
+ #
415
+ # - ID of the document
416
+ # - Index to check within
417
+ # - Options hash (defaults to {})
418
+ #
419
+ # Example:
420
+ #
421
+ # ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
422
+ #
423
+ def search_for_id(*args)
424
+ options = args.extract_options!
425
+ client = client_from_options options
426
+
427
+ query, filters = search_conditions(
428
+ options[:class], options[:conditions] || {}
429
+ )
430
+ client.filters += filters
431
+ client.match_mode = :extended unless query.empty?
432
+ client.id_range = args.first..args.first
433
+
434
+ begin
435
+ return client.query(query, args[1])[:matches].length > 0
436
+ rescue Errno::ECONNREFUSED => err
437
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
438
+ end
439
+ end
440
+
441
+ private
442
+
443
+ # This method handles the common search functionality, and returns both
444
+ # the result hash and the client. Not super elegant, but it'll do for
445
+ # the moment.
446
+ #
447
+ def search_results(*args)
448
+ options = args.extract_options!
449
+ query = args.join(' ')
450
+ client = client_from_options options
451
+
452
+ query = star_query(query, options[:star]) if options[:star]
453
+
454
+ extra_query, filters = search_conditions(
455
+ options[:class], options[:conditions] || {}
456
+ )
457
+ client.filters += filters
458
+ client.match_mode = :extended unless extra_query.empty?
459
+ query = [query, extra_query].join(' ')
460
+ query.strip! # Because "" and " " are not equivalent
461
+
462
+ set_sort_options! client, options
463
+
464
+ client.limit = options[:per_page].to_i if options[:per_page]
465
+ page = options[:page] ? options[:page].to_i : 1
466
+ page = 1 if page <= 0
467
+ client.offset = (page - 1) * client.limit
468
+
469
+ begin
470
+ log "Sphinx: #{query}"
471
+ results = client.query(query, '*', options[:comment] || '')
472
+ log "Sphinx Result:"
473
+ log results[:matches].collect { |m|
474
+ m[:attributes]["sphinx_internal_id"]
475
+ }.inspect
476
+ rescue Errno::ECONNREFUSED => err
477
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
478
+ end
479
+
480
+ return results, client
481
+ end
482
+
483
+ # Set all the appropriate settings for the client, using the provided
484
+ # options hash.
485
+ #
486
+ def client_from_options(options = {})
487
+ config = ThinkingSphinx::Configuration.instance
488
+ client = Riddle::Client.new config.address, config.port
489
+ klass = options[:class]
490
+ index_options = klass ? klass.sphinx_index_options : {}
491
+
492
+ # The Riddle default is per-query max_matches=1000. If we set the
493
+ # per-server max to a smaller value in sphinx.yml, we need to override
494
+ # the Riddle default or else we get search errors like
495
+ # "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
496
+ if per_server_max_matches = config.configuration.searchd.max_matches
497
+ options[:max_matches] ||= per_server_max_matches
498
+ end
499
+
500
+ # Turn :index_weights => { "foo" => 2, User => 1 }
501
+ # into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
502
+ if iw = options[:index_weights]
503
+ options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
504
+ if index.is_a?(Class)
505
+ name = ThinkingSphinx::Index.name(index)
506
+ hash["#{name}_core"] = weight
507
+ hash["#{name}_delta"] = weight
508
+ else
509
+ hash[index] = weight
510
+ end
511
+ hash
512
+ end
513
+ end
514
+
515
+ # Group by defaults using :group
516
+ if options[:group]
517
+ options[:group_by] = options[:group].to_s
518
+ options[:group_function] ||= :attr
519
+ end
520
+
521
+ [
522
+ :max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
523
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
524
+ :retry_count, :retry_delay, :index_weights, :rank_mode,
525
+ :max_query_time, :field_weights, :filters, :anchor, :limit
526
+ ].each do |key|
527
+ client.send(
528
+ key.to_s.concat("=").to_sym,
529
+ options[key] || index_options[key] || client.send(key)
530
+ )
531
+ end
532
+
533
+ options[:classes] = [klass] if klass
534
+
535
+ client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
536
+
537
+ client.filters << Riddle::Client::Filter.new(
538
+ "sphinx_deleted", [0]
539
+ )
540
+
541
+ # class filters
542
+ client.filters << Riddle::Client::Filter.new(
543
+ "class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
544
+ ) if options[:classes]
545
+
546
+ # normal attribute filters
547
+ client.filters += options[:with].collect { |attr,val|
548
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
549
+ } if options[:with]
550
+
551
+ # exclusive attribute filters
552
+ client.filters += options[:without].collect { |attr,val|
553
+ Riddle::Client::Filter.new attr.to_s, filter_value(val), true
554
+ } if options[:without]
555
+
556
+ # every-match attribute filters
557
+ client.filters += options[:with_all].collect { |attr,vals|
558
+ Array(vals).collect { |val|
559
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
560
+ }
561
+ }.flatten if options[:with_all]
562
+
563
+ # exclusive attribute filter on primary key
564
+ client.filters += Array(options[:without_ids]).collect { |id|
565
+ Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
566
+ } if options[:without_ids]
567
+
568
+ client
569
+ end
570
+
571
+ def star_query(query, custom_token = nil)
572
+ token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
573
+
574
+ query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
575
+ pre, proper, post = $`, $&, $'
576
+ is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
577
+ is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
578
+ has_star = pre.ends_with?("*") || post.starts_with?("*")
579
+ if is_operator || is_quote || has_star
580
+ proper
581
+ else
582
+ "*#{proper}*"
583
+ end
584
+ end
585
+ end
586
+
587
+ def filter_value(value)
588
+ case value
589
+ when Range
590
+ value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
591
+ when Array
592
+ value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
593
+ else
594
+ Array(value)
595
+ end
596
+ end
597
+
598
+ # Returns the integer timestamp for a Time object.
599
+ #
600
+ # If using Rails 2.1+, need to handle timezones to translate them back to
601
+ # UTC, as that's what datetimes will be stored as by MySQL.
602
+ #
603
+ # in_time_zone is a method that was added for the timezone support in
604
+ # Rails 2.1, which is why it's used for testing. I'm sure there's better
605
+ # ways, but this does the job.
606
+ #
607
+ def timestamp(value)
608
+ value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
609
+ end
610
+
611
+ # Translate field and attribute conditions to the relevant search string
612
+ # and filters.
613
+ #
614
+ def search_conditions(klass, conditions={})
615
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
616
+ index.attributes.collect { |attrib| attrib.unique_name }
617
+ }.flatten : []
618
+
619
+ search_string = []
620
+ filters = []
621
+
622
+ conditions.each do |key,val|
623
+ if attributes.include?(key.to_sym)
624
+ filters << Riddle::Client::Filter.new(
625
+ key.to_s, filter_value(val)
626
+ )
627
+ else
628
+ search_string << "@#{key} #{val}"
629
+ end
630
+ end
631
+
632
+ return search_string.join(' '), filters
633
+ end
634
+
635
+ # Return the appropriate latitude and longitude values, depending on
636
+ # whether the relevant attributes have been defined, and also whether
637
+ # there's actually any values.
638
+ #
639
+ def anchor_conditions(klass, options)
640
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
641
+ index.attributes.collect { |attrib| attrib.unique_name }
642
+ }.flatten : []
643
+
644
+ lat_attr = klass ? klass.sphinx_indexes.collect { |index|
645
+ index.options[:latitude_attr]
646
+ }.compact.first : nil
647
+
648
+ lon_attr = klass ? klass.sphinx_indexes.collect { |index|
649
+ index.options[:longitude_attr]
650
+ }.compact.first : nil
651
+
652
+ lat_attr = options[:latitude_attr] if options[:latitude_attr]
653
+ lat_attr ||= :lat if attributes.include?(:lat)
654
+ lat_attr ||= :latitude if attributes.include?(:latitude)
655
+
656
+ lon_attr = options[:longitude_attr] if options[:longitude_attr]
657
+ lon_attr ||= :lng if attributes.include?(:lng)
658
+ lon_attr ||= :lon if attributes.include?(:lon)
659
+ lon_attr ||= :long if attributes.include?(:long)
660
+ lon_attr ||= :longitude if attributes.include?(:longitude)
661
+
662
+ lat = options[:lat]
663
+ lon = options[:lon]
664
+
665
+ if options[:geo]
666
+ lat = options[:geo].first
667
+ lon = options[:geo].last
668
+ end
669
+
670
+ lat && lon ? {
671
+ :latitude_attribute => lat_attr.to_s,
672
+ :latitude => lat,
673
+ :longitude_attribute => lon_attr.to_s,
674
+ :longitude => lon
675
+ } : nil
676
+ end
677
+
678
+ # Set the sort options using the :order key as well as the appropriate
679
+ # Riddle settings.
680
+ #
681
+ def set_sort_options!(client, options)
682
+ klass = options[:class]
683
+ fields = klass ? klass.sphinx_indexes.collect { |index|
684
+ index.fields.collect { |field| field.unique_name }
685
+ }.flatten : []
686
+ index_options = klass ? klass.sphinx_index_options : {}
687
+
688
+ order = options[:order] || index_options[:order]
689
+ case order
690
+ when Symbol
691
+ client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
692
+ if fields.include?(order)
693
+ client.sort_by = order.to_s.concat("_sort")
694
+ else
695
+ client.sort_by = order.to_s
696
+ end
697
+ when String
698
+ client.sort_mode = :extended unless options[:sort_mode]
699
+ client.sort_by = sorted_fields_to_attributes(order, fields)
700
+ else
701
+ # do nothing
702
+ end
703
+
704
+ client.sort_mode = :attr_asc if client.sort_mode == :asc
705
+ client.sort_mode = :attr_desc if client.sort_mode == :desc
706
+ end
707
+
708
+ # Search through a collection of fields and translate any appearances
709
+ # of them in a string to their attribute equivalent for sorting.
710
+ #
711
+ def sorted_fields_to_attributes(string, fields)
712
+ fields.each { |field|
713
+ string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
714
+ match.gsub field.to_s, field.to_s.concat("_sort")
715
+ }
716
+ }
717
+
718
+ string
719
+ end
720
+
721
+ def log(message, method = :debug)
722
+ return if ::ActiveRecord::Base.logger.nil?
723
+ ::ActiveRecord::Base.logger.send method, message
724
+ end
725
+ end
726
+ end
727
+ end