initforthe-thinking-sphinx 1.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +141 -0
  3. data/lib/thinking_sphinx.rb +215 -0
  4. data/lib/thinking_sphinx/active_record.rb +278 -0
  5. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  6. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  7. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  8. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  9. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  10. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  11. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +135 -0
  12. data/lib/thinking_sphinx/association.rb +164 -0
  13. data/lib/thinking_sphinx/attribute.rb +268 -0
  14. data/lib/thinking_sphinx/class_facet.rb +15 -0
  15. data/lib/thinking_sphinx/collection.rb +148 -0
  16. data/lib/thinking_sphinx/configuration.rb +262 -0
  17. data/lib/thinking_sphinx/core/string.rb +15 -0
  18. data/lib/thinking_sphinx/deltas.rb +30 -0
  19. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  20. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +27 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  25. data/lib/thinking_sphinx/deploy/capistrano.rb +82 -0
  26. data/lib/thinking_sphinx/facet.rb +108 -0
  27. data/lib/thinking_sphinx/facet_collection.rb +59 -0
  28. data/lib/thinking_sphinx/field.rb +82 -0
  29. data/lib/thinking_sphinx/index.rb +99 -0
  30. data/lib/thinking_sphinx/index/builder.rb +287 -0
  31. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  32. data/lib/thinking_sphinx/property.rb +160 -0
  33. data/lib/thinking_sphinx/rails_additions.rb +136 -0
  34. data/lib/thinking_sphinx/search.rb +727 -0
  35. data/lib/thinking_sphinx/search/facets.rb +104 -0
  36. data/lib/thinking_sphinx/source.rb +150 -0
  37. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  38. data/lib/thinking_sphinx/source/sql.rb +126 -0
  39. data/lib/thinking_sphinx/tasks.rb +162 -0
  40. data/rails/init.rb +14 -0
  41. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  42. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  43. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  44. data/spec/unit/thinking_sphinx/active_record_spec.rb +329 -0
  45. data/spec/unit/thinking_sphinx/association_spec.rb +246 -0
  46. data/spec/unit/thinking_sphinx/attribute_spec.rb +338 -0
  47. data/spec/unit/thinking_sphinx/collection_spec.rb +15 -0
  48. data/spec/unit/thinking_sphinx/configuration_spec.rb +222 -0
  49. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  50. data/spec/unit/thinking_sphinx/facet_collection_spec.rb +64 -0
  51. data/spec/unit/thinking_sphinx/facet_spec.rb +302 -0
  52. data/spec/unit/thinking_sphinx/field_spec.rb +154 -0
  53. data/spec/unit/thinking_sphinx/index/builder_spec.rb +355 -0
  54. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  55. data/spec/unit/thinking_sphinx/index_spec.rb +45 -0
  56. data/spec/unit/thinking_sphinx/rails_additions_spec.rb +191 -0
  57. data/spec/unit/thinking_sphinx/search_spec.rb +228 -0
  58. data/spec/unit/thinking_sphinx/source_spec.rb +217 -0
  59. data/spec/unit/thinking_sphinx_spec.rb +151 -0
  60. data/tasks/distribution.rb +67 -0
  61. data/tasks/rails.rake +1 -0
  62. data/tasks/testing.rb +78 -0
  63. data/vendor/after_commit/LICENSE +20 -0
  64. data/vendor/after_commit/README +16 -0
  65. data/vendor/after_commit/Rakefile +22 -0
  66. data/vendor/after_commit/init.rb +8 -0
  67. data/vendor/after_commit/lib/after_commit.rb +45 -0
  68. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  69. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  70. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  71. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  72. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  73. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  74. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  75. data/vendor/riddle/lib/riddle.rb +30 -0
  76. data/vendor/riddle/lib/riddle/client.rb +619 -0
  77. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  78. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  79. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  80. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  81. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  82. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  83. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  84. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  85. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  86. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  87. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  88. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  89. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  90. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  91. metadata +190 -0
@@ -0,0 +1,136 @@
1
+ module ThinkingSphinx
2
+ module HashExcept
3
+ # Returns a new hash without the given keys.
4
+ def except(*keys)
5
+ rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
6
+ reject { |key,| rejected.include?(key) }
7
+ end
8
+
9
+ # Replaces the hash without only the given keys.
10
+ def except!(*keys)
11
+ replace(except(*keys))
12
+ end
13
+ end
14
+ end
15
+
16
+ Hash.send(
17
+ :include, ThinkingSphinx::HashExcept
18
+ ) unless Hash.instance_methods.include?("except")
19
+
20
+ module ThinkingSphinx
21
+ module ArrayExtractOptions
22
+ def extract_options!
23
+ last.is_a?(::Hash) ? pop : {}
24
+ end
25
+ end
26
+ end
27
+
28
+ Array.send(
29
+ :include, ThinkingSphinx::ArrayExtractOptions
30
+ ) unless Array.instance_methods.include?("extract_options!")
31
+
32
+ module ThinkingSphinx
33
+ module AbstractQuotedTableName
34
+ def quote_table_name(name)
35
+ quote_column_name(name)
36
+ end
37
+ end
38
+ end
39
+
40
+ ActiveRecord::ConnectionAdapters::AbstractAdapter.send(
41
+ :include, ThinkingSphinx::AbstractQuotedTableName
42
+ ) unless ActiveRecord::ConnectionAdapters::AbstractAdapter.instance_methods.include?("quote_table_name")
43
+
44
+ module ThinkingSphinx
45
+ module MysqlQuotedTableName
46
+ def quote_table_name(name) #:nodoc:
47
+ quote_column_name(name).gsub('.', '`.`')
48
+ end
49
+ end
50
+ end
51
+
52
+ if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter") or ActiveRecord::Base.respond_to?(:jdbcmysql_connection)
53
+ adapter = ActiveRecord::ConnectionAdapters.const_get(
54
+ defined?(JRUBY_VERSION) ? :JdbcAdapter : :MysqlAdapter
55
+ )
56
+ unless adapter.instance_methods.include?("quote_table_name")
57
+ adapter.send(:include, ThinkingSphinx::MysqlQuotedTableName)
58
+ end
59
+ end
60
+
61
+ module ThinkingSphinx
62
+ module ActiveRecordQuotedName
63
+ def quoted_table_name
64
+ self.connection.quote_table_name(self.table_name)
65
+ end
66
+ end
67
+ end
68
+
69
+ ActiveRecord::Base.extend(
70
+ ThinkingSphinx::ActiveRecordQuotedName
71
+ ) unless ActiveRecord::Base.respond_to?("quoted_table_name")
72
+
73
+ module ThinkingSphinx
74
+ module ActiveRecordStoreFullSTIClass
75
+ def store_full_sti_class
76
+ false
77
+ end
78
+ end
79
+ end
80
+
81
+ ActiveRecord::Base.extend(
82
+ ThinkingSphinx::ActiveRecordStoreFullSTIClass
83
+ ) unless ActiveRecord::Base.respond_to?(:store_full_sti_class)
84
+
85
+ module ThinkingSphinx
86
+ module ClassAttributeMethods
87
+ def cattr_reader(*syms)
88
+ syms.flatten.each do |sym|
89
+ next if sym.is_a?(Hash)
90
+ class_eval(<<-EOS, __FILE__, __LINE__)
91
+ unless defined? @@#{sym}
92
+ @@#{sym} = nil
93
+ end
94
+
95
+ def self.#{sym}
96
+ @@#{sym}
97
+ end
98
+
99
+ def #{sym}
100
+ @@#{sym}
101
+ end
102
+ EOS
103
+ end
104
+ end
105
+
106
+ def cattr_writer(*syms)
107
+ options = syms.extract_options!
108
+ syms.flatten.each do |sym|
109
+ class_eval(<<-EOS, __FILE__, __LINE__)
110
+ unless defined? @@#{sym}
111
+ @@#{sym} = nil
112
+ end
113
+
114
+ def self.#{sym}=(obj)
115
+ @@#{sym} = obj
116
+ end
117
+
118
+ #{"
119
+ def #{sym}=(obj)
120
+ @@#{sym} = obj
121
+ end
122
+ " unless options[:instance_writer] == false }
123
+ EOS
124
+ end
125
+ end
126
+
127
+ def cattr_accessor(*syms)
128
+ cattr_reader(*syms)
129
+ cattr_writer(*syms)
130
+ end
131
+ end
132
+ end
133
+
134
+ Class.extend(
135
+ ThinkingSphinx::ClassAttributeMethods
136
+ ) unless Class.respond_to?(:cattr_reader)
@@ -0,0 +1,727 @@
1
+ require 'thinking_sphinx/search/facets'
2
+
3
+ module ThinkingSphinx
4
+ # Once you've got those indexes in and built, this is the stuff that
5
+ # matters - how to search! This class provides a generic search
6
+ # interface - which you can use to search all your indexed models at once.
7
+ # Most times, you will just want a specific model's results - to search and
8
+ # search_for_ids methods will do the job in exactly the same manner when
9
+ # called from a model.
10
+ #
11
+ class Search
12
+ GlobalFacetOptions = {
13
+ :all_attributes => false,
14
+ :class_facet => true
15
+ }
16
+
17
+ class << self
18
+ include ThinkingSphinx::Search::Facets
19
+
20
+ # Searches for results that match the parameters provided. Will only
21
+ # return the ids for the matching objects. See #search for syntax
22
+ # examples.
23
+ #
24
+ # Note that this only searches the Sphinx index, with no ActiveRecord
25
+ # queries. Thus, if your index is not in sync with the database, this
26
+ # method may return ids that no longer exist there.
27
+ #
28
+ def search_for_ids(*args)
29
+ results, client = search_results(*args.clone)
30
+
31
+ options = args.extract_options!
32
+ page = options[:page] ? options[:page].to_i : 1
33
+
34
+ ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
35
+ end
36
+
37
+ # Searches through the Sphinx indexes for relevant matches. There's
38
+ # various ways to search, sort, group and filter - which are covered
39
+ # below.
40
+ #
41
+ # Also, if you have WillPaginate installed, the search method can be used
42
+ # just like paginate. The same parameters - :page and :per_page - work as
43
+ # expected, and the returned result set can be used by the will_paginate
44
+ # helper.
45
+ #
46
+ # == Basic Searching
47
+ #
48
+ # The simplest way of searching is straight text.
49
+ #
50
+ # ThinkingSphinx::Search.search "pat"
51
+ # ThinkingSphinx::Search.search "google"
52
+ # User.search "pat", :page => (params[:page] || 1)
53
+ # Article.search "relevant news issue of the day"
54
+ #
55
+ # If you specify :include, like in an #find call, this will be respected
56
+ # when loading the relevant models from the search results.
57
+ #
58
+ # User.search "pat", :include => :posts
59
+ #
60
+ # == Match Modes
61
+ #
62
+ # Sphinx supports 5 different matching modes. By default Thinking Sphinx
63
+ # uses :all, which unsurprisingly requires all the supplied search terms
64
+ # to match a result.
65
+ #
66
+ # Alternative modes include:
67
+ #
68
+ # User.search "pat allan", :match_mode => :any
69
+ # User.search "pat allan", :match_mode => :phrase
70
+ # User.search "pat | allan", :match_mode => :boolean
71
+ # User.search "@name pat | @username pat", :match_mode => :extended
72
+ #
73
+ # Any will find results with any of the search terms. Phrase treats the search
74
+ # terms a single phrase instead of individual words. Boolean and extended allow
75
+ # for more complex query syntax, refer to the sphinx documentation for further
76
+ # details.
77
+ #
78
+ # == Weighting
79
+ #
80
+ # Sphinx has support for weighting, where matches in one field can be considered
81
+ # more important than in another. Weights are integers, with 1 as the default.
82
+ # They can be set per-search like this:
83
+ #
84
+ # User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
85
+ #
86
+ # If you're searching multiple models, you can set per-index weights:
87
+ #
88
+ # ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
89
+ #
90
+ # See http://sphinxsearch.com/doc.html#weighting for further details.
91
+ #
92
+ # == Searching by Fields
93
+ #
94
+ # If you want to step it up a level, you can limit your search terms to
95
+ # specific fields:
96
+ #
97
+ # User.search :conditions => {:name => "pat"}
98
+ #
99
+ # This uses Sphinx's extended match mode, unless you specify a different
100
+ # match mode explicitly (but then this way of searching won't work). Also
101
+ # note that you don't need to put in a search string.
102
+ #
103
+ # == Searching by Attributes
104
+ #
105
+ # Also known as filters, you can limit your searches to documents that
106
+ # have specific values for their attributes. There are three ways to do
107
+ # this. The first two techniques work in all scenarios - using the :with
108
+ # or :with_all options.
109
+ #
110
+ # ThinkingSphinx::Search.search :with => {:tag_ids => 10}
111
+ # ThinkingSphinx::Search.search :with => {:tag_ids => [10,12]}
112
+ # ThinkingSphinx::Search.search :with_all => {:tag_ids => [10,12]}
113
+ #
114
+ # The first :with search will match records with a tag_id attribute of 10.
115
+ # The second :with will match records with a tag_id attribute of 10 OR 12.
116
+ # If you need to find records that are tagged with ids 10 AND 12, you
117
+ # will need to use the :with_all search parameter. This is particuarly
118
+ # useful in conjunction with Multi Value Attributes (MVAs).
119
+ #
120
+ # The third filtering technique is only viable if you're searching with a
121
+ # specific model (not multi-model searching). With a single model,
122
+ # Thinking Sphinx can figure out what attributes and fields are available,
123
+ # so you can put it all in the :conditions hash, and it will sort it out.
124
+ #
125
+ # Node.search :conditions => {:parent_id => 10}
126
+ #
127
+ # Filters can be single values, arrays of values, or ranges.
128
+ #
129
+ # Article.search "East Timor", :conditions => {:rating => 3..5}
130
+ #
131
+ # == Excluding by Attributes
132
+ #
133
+ # Sphinx also supports negative filtering - where the filters are of
134
+ # attribute values to exclude. This is done with the :without option:
135
+ #
136
+ # User.search :without => {:role_id => 1}
137
+ #
138
+ # == Excluding by Primary Key
139
+ #
140
+ # There is a shortcut to exclude records by their ActiveRecord primary key:
141
+ #
142
+ # User.search :without_ids => 1
143
+ #
144
+ # Pass an array or a single value.
145
+ #
146
+ # The primary key must be an integer as a negative filter is used. Note
147
+ # that for multi-model search, an id may occur in more than one model.
148
+ #
149
+ # == Infix (Star) Searching
150
+ #
151
+ # By default, Sphinx uses English stemming, e.g. matching "shoes" if you
152
+ # search for "shoe". It won't find "Melbourne" if you search for
153
+ # "elbourn", though.
154
+ #
155
+ # Enable infix searching by something like this in config/sphinx.yml:
156
+ #
157
+ # development:
158
+ # enable_star: 1
159
+ # min_infix_length: 2
160
+ #
161
+ # Note that this will make indexing take longer.
162
+ #
163
+ # With those settings (and after reindexing), wildcard asterisks can be used
164
+ # in queries:
165
+ #
166
+ # Location.search "*elbourn*"
167
+ #
168
+ # To automatically add asterisks around every token (but not operators),
169
+ # pass the :star option:
170
+ #
171
+ # Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
172
+ #
173
+ # This would become "*elbourn* -*ustrali*". The :star option only adds the
174
+ # asterisks. You need to make the config/sphinx.yml changes yourself.
175
+ #
176
+ # By default, the tokens are assumed to match the regular expression /\w+/u.
177
+ # If you've modified the charset_table, pass another regular expression, e.g.
178
+ #
179
+ # User.search("oo@bar.c", :star => /[\w@.]+/u)
180
+ #
181
+ # to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
182
+ #
183
+ # == Sorting
184
+ #
185
+ # Sphinx can only sort by attributes, so generally you will need to avoid
186
+ # using field names in your :order option. However, if you're searching
187
+ # on a single model, and have specified some fields as sortable, you can
188
+ # use those field names and Thinking Sphinx will interpret accordingly.
189
+ # Remember: this will only happen for single-model searches, and only
190
+ # through the :order option.
191
+ #
192
+ # Location.search "Melbourne", :order => :state
193
+ # User.search :conditions => {:role_id => 2}, :order => "name ASC"
194
+ #
195
+ # Keep in mind that if you use a string, you *must* specify the direction
196
+ # (ASC or DESC) else Sphinx won't return any results. If you use a symbol
197
+ # then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
198
+ # use the :sort_mode option:
199
+ #
200
+ # Location.search "Melbourne", :order => :state, :sort_mode => :desc
201
+ #
202
+ # Of course, there are other sort modes - check out the Sphinx
203
+ # documentation[http://sphinxsearch.com/doc.html] for that level of
204
+ # detail though.
205
+ #
206
+ # If desired, you can sort by a column in your model instead of a sphinx
207
+ # field or attribute. This sort only applies to the current page, so is
208
+ # most useful when performing a search with a single page of results.
209
+ #
210
+ # User.search("pat", :sql_order => "name")
211
+ #
212
+ # == Grouping
213
+ #
214
+ # For this you can use the group_by, group_clause and group_function
215
+ # options - which are all directly linked to Sphinx's expectations. No
216
+ # magic from Thinking Sphinx. It can get a little tricky, so make sure
217
+ # you read all the relevant
218
+ # documentation[http://sphinxsearch.com/doc.html#clustering] first.
219
+ #
220
+ # Grouping is done via three parameters within the options hash
221
+ # * <tt>:group_function</tt> determines the way grouping is done
222
+ # * <tt>:group_by</tt> determines the field which is used for grouping
223
+ # * <tt>:group_clause</tt> determines the sorting order
224
+ #
225
+ # As a convenience, you can also use
226
+ # * <tt>:group</tt>
227
+ # which sets :group_by and defaults to :group_function of :attr
228
+ #
229
+ # === group_function
230
+ #
231
+ # Valid values for :group_function are
232
+ # * <tt>:day</tt>, <tt>:week</tt>, <tt>:month</tt>, <tt>:year</tt> - Grouping is done by the respective timeframes.
233
+ # * <tt>:attr</tt>, <tt>:attrpair</tt> - Grouping is done by the specified attributes(s)
234
+ #
235
+ # === group_by
236
+ #
237
+ # This parameter denotes the field by which grouping is done. Note that the
238
+ # specified field must be a sphinx attribute or index.
239
+ #
240
+ # === group_clause
241
+ #
242
+ # This determines the sorting order of the groups. In a grouping search,
243
+ # the matches within a group will sorted by the <tt>:sort_mode</tt> and <tt>:order</tt> parameters.
244
+ # The group matches themselves however, will be sorted by <tt>:group_clause</tt>.
245
+ #
246
+ # The syntax for this is the same as an order parameter in extended sort mode.
247
+ # Namely, you can specify an SQL-like sort expression with up to 5 attributes
248
+ # (including internal attributes), eg: "@relevance DESC, price ASC, @id DESC"
249
+ #
250
+ # === Grouping by timestamp
251
+ #
252
+ # Timestamp grouping groups off items by the day, week, month or year of the
253
+ # attribute given. In order to do this you need to define a timestamp attribute,
254
+ # which pretty much looks like the standard defintion for any attribute.
255
+ #
256
+ # define_index do
257
+ # #
258
+ # # All your other stuff
259
+ # #
260
+ # has :created_at
261
+ # end
262
+ #
263
+ # When you need to fire off your search, it'll go something to the tune of
264
+ #
265
+ # Fruit.search "apricot", :group_function => :day, :group_by => 'created_at'
266
+ #
267
+ # The <tt>@groupby</tt> special attribute will contain the date for that group.
268
+ # Depending on the <tt>:group_function</tt> parameter, the date format will be
269
+ #
270
+ # * <tt>:day</tt> - YYYYMMDD
271
+ # * <tt>:week</tt> - YYYYNNN (NNN is the first day of the week in question,
272
+ # counting from the start of the year )
273
+ # * <tt>:month</tt> - YYYYMM
274
+ # * <tt>:year</tt> - YYYY
275
+ #
276
+ #
277
+ # === Grouping by attribute
278
+ #
279
+ # The syntax is the same as grouping by timestamp, except for the fact that the
280
+ # <tt>:group_function</tt> parameter is changed
281
+ #
282
+ # Fruit.search "apricot", :group_function => :attr, :group_by => 'size'
283
+ #
284
+ #
285
+ # == Geo/Location Searching
286
+ #
287
+ # Sphinx - and therefore Thinking Sphinx - has the facility to search
288
+ # around a geographical point, using a given latitude and longitude. To
289
+ # take advantage of this, you will need to have both of those values in
290
+ # attributes. To search with that point, you can then use one of the
291
+ # following syntax examples:
292
+ #
293
+ # Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
294
+ # Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
295
+ # :latitude_attr => "latit", :longitude_attr => "longit"
296
+ #
297
+ # The first example applies when your latitude and longitude attributes
298
+ # are named any of lat, latitude, lon, long or longitude. If that's not
299
+ # the case, you will need to explicitly state them in your search, _or_
300
+ # you can do so in your model:
301
+ #
302
+ # define_index do
303
+ # has :latit # Float column, stored in radians
304
+ # has :longit # Float column, stored in radians
305
+ #
306
+ # set_property :latitude_attr => "latit"
307
+ # set_property :longitude_attr => "longit"
308
+ # end
309
+ #
310
+ # Now, geo-location searching really only has an affect if you have a
311
+ # filter, sort or grouping clause related to it - otherwise it's just a
312
+ # normal search, and _will not_ return a distance value otherwise. To
313
+ # make use of the positioning difference, use the special attribute
314
+ # "@geodist" in any of your filters or sorting or grouping clauses.
315
+ #
316
+ # And don't forget - both the latitude and longitude you use in your
317
+ # search, and the values in your indexes, need to be stored as a float in radians,
318
+ # _not_ degrees. Keep in mind that if you do this conversion in SQL
319
+ # you will need to explicitly declare a column type of :float.
320
+ #
321
+ # define_index do
322
+ # has 'RADIANS(lat)', :as => :lat, :type => :float
323
+ # # ...
324
+ # end
325
+ #
326
+ # Once you've got your results set, you can access the distances as
327
+ # follows:
328
+ #
329
+ # @results.each_with_geodist do |result, distance|
330
+ # # ...
331
+ # end
332
+ #
333
+ # The distance value is returned as a float, representing the distance in
334
+ # metres.
335
+ #
336
+ # == Handling a Stale Index
337
+ #
338
+ # Especially if you don't use delta indexing, you risk having records in the
339
+ # Sphinx index that are no longer in the database. By default, those will simply
340
+ # come back as nils:
341
+ #
342
+ # >> pat_user.delete
343
+ # >> User.search("pat")
344
+ # Sphinx Result: [1,2]
345
+ # => [nil, <#User id: 2>]
346
+ #
347
+ # (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
348
+ #
349
+ # You can simply Array#compact these results or handle the nils in some other way, but
350
+ # Sphinx will still report two results, and the missing records may upset your layout.
351
+ #
352
+ # If you pass :retry_stale => true to a single-model search, missing records will
353
+ # cause Thinking Sphinx to retry the query but excluding those records. Since search
354
+ # is paginated, the new search could potentially include missing records as well, so by
355
+ # default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
356
+ # times, and so on. If there are still missing ids on the last retry, they are
357
+ # shown as nils.
358
+ #
359
+ def search(*args)
360
+ query = args.clone # an array
361
+ options = query.extract_options!
362
+
363
+ retry_search_on_stale_index(query, options) do
364
+ results, client = search_results(*(query + [options]))
365
+
366
+ log "Sphinx Error: #{results[:error]}", :error if results[:error]
367
+
368
+ klass = options[:class]
369
+ page = options[:page] ? options[:page].to_i : 1
370
+
371
+ ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
372
+ end
373
+ end
374
+
375
+ def retry_search_on_stale_index(query, options, &block)
376
+ stale_ids = []
377
+ stale_retries_left = case options[:retry_stale]
378
+ when true
379
+ 3 # default to three retries
380
+ when nil, false
381
+ 0 # no retries
382
+ else options[:retry_stale].to_i
383
+ end
384
+ begin
385
+ # Passing this in an option so Collection.create_from_results can see it.
386
+ # It should only raise on stale records if there are any retries left.
387
+ options[:raise_on_stale] = stale_retries_left > 0
388
+ block.call
389
+ # If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
390
+ # in the DB and the :raise_on_stale option is set, this exception is raised. We retry
391
+ # a limited number of times, excluding the stale ids from the search.
392
+ rescue StaleIdsException => e
393
+ stale_retries_left -= 1
394
+
395
+ stale_ids |= e.ids # For logging
396
+ options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
397
+
398
+ tries = stale_retries_left
399
+ log "Sphinx Stale Ids (%s %s left): %s" % [
400
+ tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
401
+ ]
402
+
403
+ retry
404
+ end
405
+ end
406
+
407
+ def count(*args)
408
+ results, client = search_results(*args.clone)
409
+ results[:total_found] || 0
410
+ end
411
+
412
+ # Checks if a document with the given id exists within a specific index.
413
+ # Expected parameters:
414
+ #
415
+ # - ID of the document
416
+ # - Index to check within
417
+ # - Options hash (defaults to {})
418
+ #
419
+ # Example:
420
+ #
421
+ # ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
422
+ #
423
+ def search_for_id(*args)
424
+ options = args.extract_options!
425
+ client = client_from_options options
426
+
427
+ query, filters = search_conditions(
428
+ options[:class], options[:conditions] || {}
429
+ )
430
+ client.filters += filters
431
+ client.match_mode = :extended unless query.empty?
432
+ client.id_range = args.first..args.first
433
+
434
+ begin
435
+ return client.query(query, args[1])[:matches].length > 0
436
+ rescue Errno::ECONNREFUSED => err
437
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
438
+ end
439
+ end
440
+
441
+ private
442
+
443
+ # This method handles the common search functionality, and returns both
444
+ # the result hash and the client. Not super elegant, but it'll do for
445
+ # the moment.
446
+ #
447
+ def search_results(*args)
448
+ options = args.extract_options!
449
+ query = args.join(' ')
450
+ client = client_from_options options
451
+
452
+ query = star_query(query, options[:star]) if options[:star]
453
+
454
+ extra_query, filters = search_conditions(
455
+ options[:class], options[:conditions] || {}
456
+ )
457
+ client.filters += filters
458
+ client.match_mode = :extended unless extra_query.empty?
459
+ query = [query, extra_query].join(' ')
460
+ query.strip! # Because "" and " " are not equivalent
461
+
462
+ set_sort_options! client, options
463
+
464
+ client.limit = options[:per_page].to_i if options[:per_page]
465
+ page = options[:page] ? options[:page].to_i : 1
466
+ page = 1 if page <= 0
467
+ client.offset = (page - 1) * client.limit
468
+
469
+ begin
470
+ log "Sphinx: #{query}"
471
+ results = client.query(query, '*', options[:comment] || '')
472
+ log "Sphinx Result:"
473
+ log results[:matches].collect { |m|
474
+ m[:attributes]["sphinx_internal_id"]
475
+ }.inspect
476
+ rescue Errno::ECONNREFUSED => err
477
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
478
+ end
479
+
480
+ return results, client
481
+ end
482
+
483
+ # Set all the appropriate settings for the client, using the provided
484
+ # options hash.
485
+ #
486
+ def client_from_options(options = {})
487
+ config = ThinkingSphinx::Configuration.instance
488
+ client = Riddle::Client.new config.address, config.port
489
+ klass = options[:class]
490
+ index_options = klass ? klass.sphinx_index_options : {}
491
+
492
+ # The Riddle default is per-query max_matches=1000. If we set the
493
+ # per-server max to a smaller value in sphinx.yml, we need to override
494
+ # the Riddle default or else we get search errors like
495
+ # "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
496
+ if per_server_max_matches = config.configuration.searchd.max_matches
497
+ options[:max_matches] ||= per_server_max_matches
498
+ end
499
+
500
+ # Turn :index_weights => { "foo" => 2, User => 1 }
501
+ # into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
502
+ if iw = options[:index_weights]
503
+ options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
504
+ if index.is_a?(Class)
505
+ name = ThinkingSphinx::Index.name(index)
506
+ hash["#{name}_core"] = weight
507
+ hash["#{name}_delta"] = weight
508
+ else
509
+ hash[index] = weight
510
+ end
511
+ hash
512
+ end
513
+ end
514
+
515
+ # Group by defaults using :group
516
+ if options[:group]
517
+ options[:group_by] = options[:group].to_s
518
+ options[:group_function] ||= :attr
519
+ end
520
+
521
+ [
522
+ :max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
523
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
524
+ :retry_count, :retry_delay, :index_weights, :rank_mode,
525
+ :max_query_time, :field_weights, :filters, :anchor, :limit
526
+ ].each do |key|
527
+ client.send(
528
+ key.to_s.concat("=").to_sym,
529
+ options[key] || index_options[key] || client.send(key)
530
+ )
531
+ end
532
+
533
+ options[:classes] = [klass] if klass
534
+
535
+ client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
536
+
537
+ client.filters << Riddle::Client::Filter.new(
538
+ "sphinx_deleted", [0]
539
+ )
540
+
541
+ # class filters
542
+ client.filters << Riddle::Client::Filter.new(
543
+ "class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
544
+ ) if options[:classes]
545
+
546
+ # normal attribute filters
547
+ client.filters += options[:with].collect { |attr,val|
548
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
549
+ } if options[:with]
550
+
551
+ # exclusive attribute filters
552
+ client.filters += options[:without].collect { |attr,val|
553
+ Riddle::Client::Filter.new attr.to_s, filter_value(val), true
554
+ } if options[:without]
555
+
556
+ # every-match attribute filters
557
+ client.filters += options[:with_all].collect { |attr,vals|
558
+ Array(vals).collect { |val|
559
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
560
+ }
561
+ }.flatten if options[:with_all]
562
+
563
+ # exclusive attribute filter on primary key
564
+ client.filters += Array(options[:without_ids]).collect { |id|
565
+ Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
566
+ } if options[:without_ids]
567
+
568
+ client
569
+ end
570
+
571
+ def star_query(query, custom_token = nil)
572
+ token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
573
+
574
+ query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
575
+ pre, proper, post = $`, $&, $'
576
+ is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
577
+ is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
578
+ has_star = pre.ends_with?("*") || post.starts_with?("*")
579
+ if is_operator || is_quote || has_star
580
+ proper
581
+ else
582
+ "*#{proper}*"
583
+ end
584
+ end
585
+ end
586
+
587
+ def filter_value(value)
588
+ case value
589
+ when Range
590
+ value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
591
+ when Array
592
+ value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
593
+ else
594
+ Array(value)
595
+ end
596
+ end
597
+
598
+ # Returns the integer timestamp for a Time object.
599
+ #
600
+ # If using Rails 2.1+, need to handle timezones to translate them back to
601
+ # UTC, as that's what datetimes will be stored as by MySQL.
602
+ #
603
+ # in_time_zone is a method that was added for the timezone support in
604
+ # Rails 2.1, which is why it's used for testing. I'm sure there's better
605
+ # ways, but this does the job.
606
+ #
607
+ def timestamp(value)
608
+ value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
609
+ end
610
+
611
+ # Translate field and attribute conditions to the relevant search string
612
+ # and filters.
613
+ #
614
+ def search_conditions(klass, conditions={})
615
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
616
+ index.attributes.collect { |attrib| attrib.unique_name }
617
+ }.flatten : []
618
+
619
+ search_string = []
620
+ filters = []
621
+
622
+ conditions.each do |key,val|
623
+ if attributes.include?(key.to_sym)
624
+ filters << Riddle::Client::Filter.new(
625
+ key.to_s, filter_value(val)
626
+ )
627
+ else
628
+ search_string << "@#{key} #{val}"
629
+ end
630
+ end
631
+
632
+ return search_string.join(' '), filters
633
+ end
634
+
635
+ # Return the appropriate latitude and longitude values, depending on
636
+ # whether the relevant attributes have been defined, and also whether
637
+ # there's actually any values.
638
+ #
639
+ def anchor_conditions(klass, options)
640
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
641
+ index.attributes.collect { |attrib| attrib.unique_name }
642
+ }.flatten : []
643
+
644
+ lat_attr = klass ? klass.sphinx_indexes.collect { |index|
645
+ index.options[:latitude_attr]
646
+ }.compact.first : nil
647
+
648
+ lon_attr = klass ? klass.sphinx_indexes.collect { |index|
649
+ index.options[:longitude_attr]
650
+ }.compact.first : nil
651
+
652
+ lat_attr = options[:latitude_attr] if options[:latitude_attr]
653
+ lat_attr ||= :lat if attributes.include?(:lat)
654
+ lat_attr ||= :latitude if attributes.include?(:latitude)
655
+
656
+ lon_attr = options[:longitude_attr] if options[:longitude_attr]
657
+ lon_attr ||= :lng if attributes.include?(:lng)
658
+ lon_attr ||= :lon if attributes.include?(:lon)
659
+ lon_attr ||= :long if attributes.include?(:long)
660
+ lon_attr ||= :longitude if attributes.include?(:longitude)
661
+
662
+ lat = options[:lat]
663
+ lon = options[:lon]
664
+
665
+ if options[:geo]
666
+ lat = options[:geo].first
667
+ lon = options[:geo].last
668
+ end
669
+
670
+ lat && lon ? {
671
+ :latitude_attribute => lat_attr.to_s,
672
+ :latitude => lat,
673
+ :longitude_attribute => lon_attr.to_s,
674
+ :longitude => lon
675
+ } : nil
676
+ end
677
+
678
+ # Set the sort options using the :order key as well as the appropriate
679
+ # Riddle settings.
680
+ #
681
+ def set_sort_options!(client, options)
682
+ klass = options[:class]
683
+ fields = klass ? klass.sphinx_indexes.collect { |index|
684
+ index.fields.collect { |field| field.unique_name }
685
+ }.flatten : []
686
+ index_options = klass ? klass.sphinx_index_options : {}
687
+
688
+ order = options[:order] || index_options[:order]
689
+ case order
690
+ when Symbol
691
+ client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
692
+ if fields.include?(order)
693
+ client.sort_by = order.to_s.concat("_sort")
694
+ else
695
+ client.sort_by = order.to_s
696
+ end
697
+ when String
698
+ client.sort_mode = :extended unless options[:sort_mode]
699
+ client.sort_by = sorted_fields_to_attributes(order, fields)
700
+ else
701
+ # do nothing
702
+ end
703
+
704
+ client.sort_mode = :attr_asc if client.sort_mode == :asc
705
+ client.sort_mode = :attr_desc if client.sort_mode == :desc
706
+ end
707
+
708
+ # Search through a collection of fields and translate any appearances
709
+ # of them in a string to their attribute equivalent for sorting.
710
+ #
711
+ def sorted_fields_to_attributes(string, fields)
712
+ fields.each { |field|
713
+ string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
714
+ match.gsub field.to_s, field.to_s.concat("_sort")
715
+ }
716
+ }
717
+
718
+ string
719
+ end
720
+
721
+ def log(message, method = :debug)
722
+ return if ::ActiveRecord::Base.logger.nil?
723
+ ::ActiveRecord::Base.logger.send method, message
724
+ end
725
+ end
726
+ end
727
+ end