pixeltrix-thinking-sphinx 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. data/LICENCE +20 -0
  2. data/README +107 -0
  3. data/lib/thinking_sphinx.rb +144 -0
  4. data/lib/thinking_sphinx/active_record.rb +245 -0
  5. data/lib/thinking_sphinx/active_record/delta.rb +74 -0
  6. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  7. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  8. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
  9. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
  10. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
  11. data/lib/thinking_sphinx/association.rb +144 -0
  12. data/lib/thinking_sphinx/attribute.rb +258 -0
  13. data/lib/thinking_sphinx/collection.rb +142 -0
  14. data/lib/thinking_sphinx/configuration.rb +236 -0
  15. data/lib/thinking_sphinx/core/string.rb +22 -0
  16. data/lib/thinking_sphinx/deltas.rb +22 -0
  17. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  18. data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
  19. data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
  20. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  23. data/lib/thinking_sphinx/facet.rb +58 -0
  24. data/lib/thinking_sphinx/facet_collection.rb +44 -0
  25. data/lib/thinking_sphinx/field.rb +172 -0
  26. data/lib/thinking_sphinx/index.rb +414 -0
  27. data/lib/thinking_sphinx/index/builder.rb +233 -0
  28. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  29. data/lib/thinking_sphinx/rails_additions.rb +133 -0
  30. data/lib/thinking_sphinx/search.rb +638 -0
  31. data/lib/thinking_sphinx/tasks.rb +128 -0
  32. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  33. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  34. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  35. data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
  36. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  37. data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
  38. data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
  39. data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
  40. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  41. data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
  42. data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
  43. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  44. data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
  45. data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
  46. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  47. data/tasks/distribution.rb +48 -0
  48. data/tasks/rails.rake +1 -0
  49. data/tasks/testing.rb +86 -0
  50. data/vendor/after_commit/LICENSE +20 -0
  51. data/vendor/after_commit/README +16 -0
  52. data/vendor/after_commit/Rakefile +22 -0
  53. data/vendor/after_commit/init.rb +5 -0
  54. data/vendor/after_commit/lib/after_commit.rb +42 -0
  55. data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
  56. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  57. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  58. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  59. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  60. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  61. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  62. data/vendor/riddle/lib/riddle.rb +30 -0
  63. data/vendor/riddle/lib/riddle/client.rb +619 -0
  64. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  65. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  66. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  67. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  68. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  69. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  70. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  71. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  72. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  73. data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
  74. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  75. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  76. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  77. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  78. metadata +157 -0
@@ -0,0 +1,110 @@
1
+ module ThinkingSphinx
2
+ class Index
3
+ # Instances of this class represent database columns and the stack of
4
+ # associations that lead from the base model to them.
5
+ #
6
+ # The name and stack are accessible through methods starting with __ to
7
+ # avoid conflicting with the method_missing calls that build the stack.
8
+ #
9
+ class FauxColumn
10
+ # Create a new column with a pre-defined stack. The top element in the
11
+ # stack will get shifted to be the name value.
12
+ #
13
+ def initialize(*stack)
14
+ @name = stack.pop
15
+ @stack = stack
16
+ end
17
+
18
+ def self.coerce(columns)
19
+ case columns
20
+ when Symbol, String
21
+ FauxColumn.new(columns)
22
+ when Array
23
+ columns.collect { |col| FauxColumn.coerce(col) }
24
+ when FauxColumn
25
+ columns
26
+ else
27
+ nil
28
+ end
29
+ end
30
+
31
+ # Can't use normal method name, as that could be an association or
32
+ # column name.
33
+ #
34
+ def __name
35
+ @name
36
+ end
37
+
38
+ # Can't use normal method name, as that could be an association or
39
+ # column name.
40
+ #
41
+ def __stack
42
+ @stack
43
+ end
44
+
45
+ # Returns true if the stack is empty *and* if the name is a string -
46
+ # which is an indication that of raw SQL, as opposed to a value from a
47
+ # table's column.
48
+ #
49
+ def is_string?
50
+ @name.is_a?(String) && @stack.empty?
51
+ end
52
+
53
+ # This handles any 'invalid' method calls and sets them as the name,
54
+ # and pushing the previous name into the stack. The object returns
55
+ # itself.
56
+ #
57
+ # If there's a single argument, it becomes the name, and the method
58
+ # symbol goes into the stack as well. Multiple arguments means new
59
+ # columns with the original stack and new names (from each argument) gets
60
+ # returned.
61
+ #
62
+ # Easier to explain with examples:
63
+ #
64
+ # col = FauxColumn.new :a, :b, :c
65
+ # col.__name #=> :c
66
+ # col.__stack #=> [:a, :b]
67
+ #
68
+ # col.whatever #=> col
69
+ # col.__name #=> :whatever
70
+ # col.__stack #=> [:a, :b, :c]
71
+ #
72
+ # col.something(:id) #=> col
73
+ # col.__name #=> :id
74
+ # col.__stack #=> [:a, :b, :c, :whatever, :something]
75
+ #
76
+ # cols = col.short(:x, :y, :z)
77
+ # cols[0].__name #=> :x
78
+ # cols[0].__stack #=> [:a, :b, :c, :whatever, :something, :short]
79
+ # cols[1].__name #=> :y
80
+ # cols[1].__stack #=> [:a, :b, :c, :whatever, :something, :short]
81
+ # cols[2].__name #=> :z
82
+ # cols[2].__stack #=> [:a, :b, :c, :whatever, :something, :short]
83
+ #
84
+ # Also, this allows method chaining to build up a relevant stack:
85
+ #
86
+ # col = FauxColumn.new :a, :b
87
+ # col.__name #=> :b
88
+ # col.__stack #=> [:a]
89
+ #
90
+ # col.one.two.three #=> col
91
+ # col.__name #=> :three
92
+ # col.__stack #=> [:a, :b, :one, :two]
93
+ #
94
+ def method_missing(method, *args)
95
+ @stack << @name
96
+ @name = method
97
+
98
+ if (args.empty?)
99
+ self
100
+ elsif (args.length == 1)
101
+ method_missing(args.first)
102
+ else
103
+ args.collect { |arg|
104
+ FauxColumn.new(@stack + [@name, arg])
105
+ }
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,133 @@
1
+ module ThinkingSphinx
2
+ module HashExcept
3
+ # Returns a new hash without the given keys.
4
+ def except(*keys)
5
+ rejected = Set.new(respond_to?(:convert_key) ? keys.map { |key| convert_key(key) } : keys)
6
+ reject { |key,| rejected.include?(key) }
7
+ end
8
+
9
+ # Replaces the hash without only the given keys.
10
+ def except!(*keys)
11
+ replace(except(*keys))
12
+ end
13
+ end
14
+ end
15
+
16
+ Hash.send(
17
+ :include, ThinkingSphinx::HashExcept
18
+ ) unless Hash.instance_methods.include?("except")
19
+
20
+ module ThinkingSphinx
21
+ module ArrayExtractOptions
22
+ def extract_options!
23
+ last.is_a?(::Hash) ? pop : {}
24
+ end
25
+ end
26
+ end
27
+
28
+ Array.send(
29
+ :include, ThinkingSphinx::ArrayExtractOptions
30
+ ) unless Array.instance_methods.include?("extract_options!")
31
+
32
+ module ThinkingSphinx
33
+ module AbstractQuotedTableName
34
+ def quote_table_name(name)
35
+ quote_column_name(name)
36
+ end
37
+ end
38
+ end
39
+
40
+ ActiveRecord::ConnectionAdapters::AbstractAdapter.send(
41
+ :include, ThinkingSphinx::AbstractQuotedTableName
42
+ ) unless ActiveRecord::ConnectionAdapters::AbstractAdapter.instance_methods.include?("quote_table_name")
43
+
44
+ module ThinkingSphinx
45
+ module MysqlQuotedTableName
46
+ def quote_table_name(name) #:nodoc:
47
+ quote_column_name(name).gsub('.', '`.`')
48
+ end
49
+ end
50
+ end
51
+
52
+ if ActiveRecord::ConnectionAdapters.constants.include?("MysqlAdapter")
53
+ ActiveRecord::ConnectionAdapters::MysqlAdapter.send(
54
+ :include, ThinkingSphinx::MysqlQuotedTableName
55
+ ) unless ActiveRecord::ConnectionAdapters::MysqlAdapter.instance_methods.include?("quote_table_name")
56
+ end
57
+
58
+ module ThinkingSphinx
59
+ module ActiveRecordQuotedName
60
+ def quoted_table_name
61
+ self.connection.quote_table_name(self.table_name)
62
+ end
63
+ end
64
+ end
65
+
66
+ ActiveRecord::Base.extend(
67
+ ThinkingSphinx::ActiveRecordQuotedName
68
+ ) unless ActiveRecord::Base.respond_to?("quoted_table_name")
69
+
70
+ module ThinkingSphinx
71
+ module ActiveRecordStoreFullSTIClass
72
+ def store_full_sti_class
73
+ false
74
+ end
75
+ end
76
+ end
77
+
78
+ ActiveRecord::Base.extend(
79
+ ThinkingSphinx::ActiveRecordStoreFullSTIClass
80
+ ) unless ActiveRecord::Base.respond_to?(:store_full_sti_class)
81
+
82
+ module ThinkingSphinx
83
+ module ClassAttributeMethods
84
+ def cattr_reader(*syms)
85
+ syms.flatten.each do |sym|
86
+ next if sym.is_a?(Hash)
87
+ class_eval(<<-EOS, __FILE__, __LINE__)
88
+ unless defined? @@#{sym}
89
+ @@#{sym} = nil
90
+ end
91
+
92
+ def self.#{sym}
93
+ @@#{sym}
94
+ end
95
+
96
+ def #{sym}
97
+ @@#{sym}
98
+ end
99
+ EOS
100
+ end
101
+ end
102
+
103
+ def cattr_writer(*syms)
104
+ options = syms.extract_options!
105
+ syms.flatten.each do |sym|
106
+ class_eval(<<-EOS, __FILE__, __LINE__)
107
+ unless defined? @@#{sym}
108
+ @@#{sym} = nil
109
+ end
110
+
111
+ def self.#{sym}=(obj)
112
+ @@#{sym} = obj
113
+ end
114
+
115
+ #{"
116
+ def #{sym}=(obj)
117
+ @@#{sym} = obj
118
+ end
119
+ " unless options[:instance_writer] == false }
120
+ EOS
121
+ end
122
+ end
123
+
124
+ def cattr_accessor(*syms)
125
+ cattr_reader(*syms)
126
+ cattr_writer(*syms)
127
+ end
128
+ end
129
+ end
130
+
131
+ Class.extend(
132
+ ThinkingSphinx::ClassAttributeMethods
133
+ ) unless Class.respond_to?(:cattr_reader)
@@ -0,0 +1,638 @@
1
+ module ThinkingSphinx
2
+ # Once you've got those indexes in and built, this is the stuff that
3
+ # matters - how to search! This class provides a generic search
4
+ # interface - which you can use to search all your indexed models at once.
5
+ # Most times, you will just want a specific model's results - to search and
6
+ # search_for_ids methods will do the job in exactly the same manner when
7
+ # called from a model.
8
+ #
9
+ class Search
10
+ class << self
11
+ # Searches for results that match the parameters provided. Will only
12
+ # return the ids for the matching objects. See #search for syntax
13
+ # examples.
14
+ #
15
+ # Note that this only searches the Sphinx index, with no ActiveRecord
16
+ # queries. Thus, if your index is not in sync with the database, this
17
+ # method may return ids that no longer exist there.
18
+ #
19
+ def search_for_ids(*args)
20
+ results, client = search_results(*args.clone)
21
+
22
+ options = args.extract_options!
23
+ page = options[:page] ? options[:page].to_i : 1
24
+
25
+ ThinkingSphinx::Collection.ids_from_results(results, page, client.limit, options)
26
+ end
27
+
28
+ # Searches through the Sphinx indexes for relevant matches. There's
29
+ # various ways to search, sort, group and filter - which are covered
30
+ # below.
31
+ #
32
+ # Also, if you have WillPaginate installed, the search method can be used
33
+ # just like paginate. The same parameters - :page and :per_page - work as
34
+ # expected, and the returned result set can be used by the will_paginate
35
+ # helper.
36
+ #
37
+ # == Basic Searching
38
+ #
39
+ # The simplest way of searching is straight text.
40
+ #
41
+ # ThinkingSphinx::Search.search "pat"
42
+ # ThinkingSphinx::Search.search "google"
43
+ # User.search "pat", :page => (params[:page] || 1)
44
+ # Article.search "relevant news issue of the day"
45
+ #
46
+ # If you specify :include, like in an #find call, this will be respected
47
+ # when loading the relevant models from the search results.
48
+ #
49
+ # User.search "pat", :include => :posts
50
+ #
51
+ # == Match Modes
52
+ #
53
+ # Sphinx supports 5 different matching modes. By default Thinking Sphinx
54
+ # uses :all, which unsurprisingly requires all the supplied search terms
55
+ # to match a result.
56
+ #
57
+ # Alternative modes include:
58
+ #
59
+ # User.search "pat allan", :match_mode => :any
60
+ # User.search "pat allan", :match_mode => :phrase
61
+ # User.search "pat | allan", :match_mode => :boolean
62
+ # User.search "@name pat | @username pat", :match_mode => :extended
63
+ #
64
+ # Any will find results with any of the search terms. Phrase treats the search
65
+ # terms a single phrase instead of individual words. Boolean and extended allow
66
+ # for more complex query syntax, refer to the sphinx documentation for further
67
+ # details.
68
+ #
69
+ # == Weighting
70
+ #
71
+ # Sphinx has support for weighting, where matches in one field can be considered
72
+ # more important than in another. Weights are integers, with 1 as the default.
73
+ # They can be set per-search like this:
74
+ #
75
+ # User.search "pat allan", :field_weights => { :alias => 4, :aka => 2 }
76
+ #
77
+ # If you're searching multiple models, you can set per-index weights:
78
+ #
79
+ # ThinkingSphinx::Search.search "pat", :index_weights => { User => 10 }
80
+ #
81
+ # See http://sphinxsearch.com/doc.html#weighting for further details.
82
+ #
83
+ # == Searching by Fields
84
+ #
85
+ # If you want to step it up a level, you can limit your search terms to
86
+ # specific fields:
87
+ #
88
+ # User.search :conditions => {:name => "pat"}
89
+ #
90
+ # This uses Sphinx's extended match mode, unless you specify a different
91
+ # match mode explicitly (but then this way of searching won't work). Also
92
+ # note that you don't need to put in a search string.
93
+ #
94
+ # == Searching by Attributes
95
+ #
96
+ # Also known as filters, you can limit your searches to documents that
97
+ # have specific values for their attributes. There are two ways to do
98
+ # this. The first is one that works in all scenarios - using the :with
99
+ # option.
100
+ #
101
+ # ThinkingSphinx::Search.search :with => {:parent_id => 10}
102
+ #
103
+ # The second is only viable if you're searching with a specific model
104
+ # (not multi-model searching). With a single model, Thinking Sphinx
105
+ # can figure out what attributes and fields are available, so you can
106
+ # put it all in the :conditions hash, and it will sort it out.
107
+ #
108
+ # Node.search :conditions => {:parent_id => 10}
109
+ #
110
+ # Filters can be single values, arrays of values, or ranges.
111
+ #
112
+ # Article.search "East Timor", :conditions => {:rating => 3..5}
113
+ #
114
+ # == Excluding by Attributes
115
+ #
116
+ # Sphinx also supports negative filtering - where the filters are of
117
+ # attribute values to exclude. This is done with the :without option:
118
+ #
119
+ # User.search :without => {:role_id => 1}
120
+ #
121
+ # == Excluding by Primary Key
122
+ #
123
+ # There is a shortcut to exclude records by their ActiveRecord primary key:
124
+ #
125
+ # User.search :without_ids => 1
126
+ #
127
+ # Pass an array or a single value.
128
+ #
129
+ # The primary key must be an integer as a negative filter is used. Note
130
+ # that for multi-model search, an id may occur in more than one model.
131
+ #
132
+ # == Infix (Star) Searching
133
+ #
134
+ # By default, Sphinx uses English stemming, e.g. matching "shoes" if you
135
+ # search for "shoe". It won't find "Melbourne" if you search for
136
+ # "elbourn", though.
137
+ #
138
+ # Enable infix searching by something like this in config/sphinx.yml:
139
+ #
140
+ # development:
141
+ # enable_star: 1
142
+ # min_infix_length: 2
143
+ #
144
+ # Note that this will make indexing take longer.
145
+ #
146
+ # With those settings (and after reindexing), wildcard asterisks can be used
147
+ # in queries:
148
+ #
149
+ # Location.search "*elbourn*"
150
+ #
151
+ # To automatically add asterisks around every token (but not operators),
152
+ # pass the :star option:
153
+ #
154
+ # Location.search "elbourn -ustrali", :star => true, :match_mode => :boolean
155
+ #
156
+ # This would become "*elbourn* -*ustrali*". The :star option only adds the
157
+ # asterisks. You need to make the config/sphinx.yml changes yourself.
158
+ #
159
+ # By default, the tokens are assumed to match the regular expression /\w+/u.
160
+ # If you've modified the charset_table, pass another regular expression, e.g.
161
+ #
162
+ # User.search("oo@bar.c", :star => /[\w@.]+/u)
163
+ #
164
+ # to search for "*oo@bar.c*" and not "*oo*@*bar*.*c*".
165
+ #
166
+ # == Sorting
167
+ #
168
+ # Sphinx can only sort by attributes, so generally you will need to avoid
169
+ # using field names in your :order option. However, if you're searching
170
+ # on a single model, and have specified some fields as sortable, you can
171
+ # use those field names and Thinking Sphinx will interpret accordingly.
172
+ # Remember: this will only happen for single-model searches, and only
173
+ # through the :order option.
174
+ #
175
+ # Location.search "Melbourne", :order => :state
176
+ # User.search :conditions => {:role_id => 2}, :order => "name ASC"
177
+ #
178
+ # Keep in mind that if you use a string, you *must* specify the direction
179
+ # (ASC or DESC) else Sphinx won't return any results. If you use a symbol
180
+ # then Thinking Sphinx assumes ASC, but if you wish to state otherwise,
181
+ # use the :sort_mode option:
182
+ #
183
+ # Location.search "Melbourne", :order => :state, :sort_mode => :desc
184
+ #
185
+ # Of course, there are other sort modes - check out the Sphinx
186
+ # documentation[http://sphinxsearch.com/doc.html] for that level of
187
+ # detail though.
188
+ #
189
+ # == Grouping
190
+ #
191
+ # For this you can use the group_by, group_clause and group_function
192
+ # options - which are all directly linked to Sphinx's expectations. No
193
+ # magic from Thinking Sphinx. It can get a little tricky, so make sure
194
+ # you read all the relevant
195
+ # documentation[http://sphinxsearch.com/doc.html#clustering] first.
196
+ #
197
+ # Yes this section will be expanded, but this is a start.
198
+ #
199
+ # == Geo/Location Searching
200
+ #
201
+ # Sphinx - and therefore Thinking Sphinx - has the facility to search
202
+ # around a geographical point, using a given latitude and longitude. To
203
+ # take advantage of this, you will need to have both of those values in
204
+ # attributes. To search with that point, you can then use one of the
205
+ # following syntax examples:
206
+ #
207
+ # Address.search "Melbourne", :geo => [1.4, -2.217], :order => "@geodist asc"
208
+ # Address.search "Australia", :geo => [-0.55, 3.108], :order => "@geodist asc"
209
+ # :latitude_attr => "latit", :longitude_attr => "longit"
210
+ #
211
+ # The first example applies when your latitude and longitude attributes
212
+ # are named any of lat, latitude, lon, long or longitude. If that's not
213
+ # the case, you will need to explicitly state them in your search, _or_
214
+ # you can do so in your model:
215
+ #
216
+ # define_index do
217
+ # has :latit # Float column, stored in radians
218
+ # has :longit # Float column, stored in radians
219
+ #
220
+ # set_property :latitude_attr => "latit"
221
+ # set_property :longitude_attr => "longit"
222
+ # end
223
+ #
224
+ # Now, geo-location searching really only has an affect if you have a
225
+ # filter, sort or grouping clause related to it - otherwise it's just a
226
+ # normal search, and _will not_ return a distance value otherwise. To
227
+ # make use of the positioning difference, use the special attribute
228
+ # "@geodist" in any of your filters or sorting or grouping clauses.
229
+ #
230
+ # And don't forget - both the latitude and longitude you use in your
231
+ # search, and the values in your indexes, need to be stored as a float in radians,
232
+ # _not_ degrees. Keep in mind that if you do this conversion in SQL
233
+ # you will need to explicitly declare a column type of :float.
234
+ #
235
+ # define_index do
236
+ # has 'RADIANS(lat)', :as => :lat, :type => :float
237
+ # # ...
238
+ # end
239
+ #
240
+ # Once you've got your results set, you can access the distances as
241
+ # follows:
242
+ #
243
+ # @results.each_with_geodist do |result, distance|
244
+ # # ...
245
+ # end
246
+ #
247
+ # The distance value is returned as a float, representing the distance in
248
+ # metres.
249
+ #
250
+ # == Handling a Stale Index
251
+ #
252
+ # Especially if you don't use delta indexing, you risk having records in the
253
+ # Sphinx index that are no longer in the database. By default, those will simply
254
+ # come back as nils:
255
+ #
256
+ # >> pat_user.delete
257
+ # >> User.search("pat")
258
+ # Sphinx Result: [1,2]
259
+ # => [nil, <#User id: 2>]
260
+ #
261
+ # (If you search across multiple models, you'll get ActiveRecord::RecordNotFound.)
262
+ #
263
+ # You can simply Array#compact these results or handle the nils in some other way, but
264
+ # Sphinx will still report two results, and the missing records may upset your layout.
265
+ #
266
+ # If you pass :retry_stale => true to a single-model search, missing records will
267
+ # cause Thinking Sphinx to retry the query but excluding those records. Since search
268
+ # is paginated, the new search could potentially include missing records as well, so by
269
+ # default Thinking Sphinx will retry three times. Pass :retry_stale => 5 to retry five
270
+ # times, and so on. If there are still missing ids on the last retry, they are
271
+ # shown as nils.
272
+ #
273
+ def search(*args)
274
+ query = args.clone # an array
275
+ options = query.extract_options!
276
+
277
+ retry_search_on_stale_index(query, options) do
278
+ results, client = search_results(*(query + [options]))
279
+
280
+ ::ActiveRecord::Base.logger.error(
281
+ "Sphinx Error: #{results[:error]}"
282
+ ) if results[:error]
283
+
284
+ klass = options[:class]
285
+ page = options[:page] ? options[:page].to_i : 1
286
+
287
+ ThinkingSphinx::Collection.create_from_results(results, page, client.limit, options)
288
+ end
289
+ end
290
+
291
+ def retry_search_on_stale_index(query, options, &block)
292
+ stale_ids = []
293
+ stale_retries_left = case options[:retry_stale]
294
+ when true: 3 # default to three retries
295
+ when nil, false: 0 # no retries
296
+ else options[:retry_stale].to_i
297
+ end
298
+ begin
299
+ # Passing this in an option so Collection.create_from_results can see it.
300
+ # It should only raise on stale records if there are any retries left.
301
+ options[:raise_on_stale] = stale_retries_left > 0
302
+ block.call
303
+ # If ThinkingSphinx::Collection.create_from_results found records in Sphinx but not
304
+ # in the DB and the :raise_on_stale option is set, this exception is raised. We retry
305
+ # a limited number of times, excluding the stale ids from the search.
306
+ rescue StaleIdsException => e
307
+ stale_retries_left -= 1
308
+
309
+ stale_ids |= e.ids # For logging
310
+ options[:without_ids] = Array(options[:without_ids]) | e.ids # Actual exclusion
311
+
312
+ tries = stale_retries_left
313
+ ::ActiveRecord::Base.logger.debug("Sphinx Stale Ids (%s %s left): %s" % [
314
+ tries, (tries==1 ? 'try' : 'tries'), stale_ids.join(', ')
315
+ ])
316
+
317
+ retry
318
+ end
319
+ end
320
+
321
+ def count(*args)
322
+ results, client = search_results(*args.clone)
323
+ results[:total_found] || 0
324
+ end
325
+
326
+ # Checks if a document with the given id exists within a specific index.
327
+ # Expected parameters:
328
+ #
329
+ # - ID of the document
330
+ # - Index to check within
331
+ # - Options hash (defaults to {})
332
+ #
333
+ # Example:
334
+ #
335
+ # ThinkingSphinx::Search.search_for_id(10, "user_core", :class => User)
336
+ #
337
+ def search_for_id(*args)
338
+ options = args.extract_options!
339
+ client = client_from_options options
340
+
341
+ query, filters = search_conditions(
342
+ options[:class], options[:conditions] || {}
343
+ )
344
+ client.filters += filters
345
+ client.match_mode = :extended unless query.empty?
346
+ client.id_range = args.first..args.first
347
+
348
+ begin
349
+ return client.query(query, args[1])[:matches].length > 0
350
+ rescue Errno::ECONNREFUSED => err
351
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
352
+ end
353
+ end
354
+
355
+ def facets(*args)
356
+ hash = ThinkingSphinx::FacetCollection.new args
357
+ options = args.extract_options!.clone.merge! :group_function => :attr
358
+
359
+ options[:class].sphinx_facets.inject(hash) do |hash, facet|
360
+ options[:group_by] = facet.attribute_name
361
+
362
+ hash.add_from_results facet, search(*(args + [options]))
363
+ hash
364
+ end
365
+ end
366
+
367
+ private
368
+
369
+ # This method handles the common search functionality, and returns both
370
+ # the result hash and the client. Not super elegant, but it'll do for
371
+ # the moment.
372
+ #
373
+ def search_results(*args)
374
+ options = args.extract_options!
375
+ query = args.join(' ')
376
+ client = client_from_options options
377
+
378
+ query = star_query(query, options[:star]) if options[:star]
379
+
380
+ extra_query, filters = search_conditions(
381
+ options[:class], options[:conditions] || {}
382
+ )
383
+ client.filters += filters
384
+ client.match_mode = :extended unless extra_query.empty?
385
+ query = [query, extra_query].join(' ')
386
+ query.strip! # Because "" and " " are not equivalent
387
+
388
+ set_sort_options! client, options
389
+
390
+ client.limit = options[:per_page].to_i if options[:per_page]
391
+ page = options[:page] ? options[:page].to_i : 1
392
+ client.offset = (page - 1) * client.limit
393
+
394
+ begin
395
+ ::ActiveRecord::Base.logger.debug "Sphinx: #{query}"
396
+ results = client.query query
397
+ ::ActiveRecord::Base.logger.debug "Sphinx Result: #{results[:matches].collect{|m| m[:attributes]["sphinx_internal_id"]}.inspect}"
398
+ rescue Errno::ECONNREFUSED => err
399
+ raise ThinkingSphinx::ConnectionError, "Connection to Sphinx Daemon (searchd) failed."
400
+ end
401
+
402
+ return results, client
403
+ end
404
+
405
+ # Set all the appropriate settings for the client, using the provided
406
+ # options hash.
407
+ #
408
+ def client_from_options(options = {})
409
+ config = ThinkingSphinx::Configuration.instance
410
+ client = Riddle::Client.new config.address, config.port
411
+ klass = options[:class]
412
+ index_options = klass ? klass.sphinx_index_options : {}
413
+
414
+ # The Riddle default is per-query max_matches=1000. If we set the
415
+ # per-server max to a smaller value in sphinx.yml, we need to override
416
+ # the Riddle default or else we get search errors like
417
+ # "per-query max_matches=1000 out of bounds (per-server max_matches=200)"
418
+ if per_server_max_matches = config.configuration.searchd.max_matches
419
+ options[:max_matches] ||= per_server_max_matches
420
+ end
421
+
422
+ # Turn :index_weights => { "foo" => 2, User => 1 }
423
+ # into :index_weights => { "foo" => 2, "user_core" => 1, "user_delta" => 1 }
424
+ if iw = options[:index_weights]
425
+ options[:index_weights] = iw.inject({}) do |hash, (index,weight)|
426
+ if index.is_a?(Class)
427
+ name = ThinkingSphinx::Index.name(index)
428
+ hash["#{name}_core"] = weight
429
+ hash["#{name}_delta"] = weight
430
+ else
431
+ hash[index] = weight
432
+ end
433
+ hash
434
+ end
435
+ end
436
+
437
+ [
438
+ :max_matches, :match_mode, :sort_mode, :sort_by, :id_range,
439
+ :group_by, :group_function, :group_clause, :group_distinct, :cut_off,
440
+ :retry_count, :retry_delay, :index_weights, :rank_mode,
441
+ :max_query_time, :field_weights, :filters, :anchor, :limit
442
+ ].each do |key|
443
+ client.send(
444
+ key.to_s.concat("=").to_sym,
445
+ options[key] || index_options[key] || client.send(key)
446
+ )
447
+ end
448
+
449
+ options[:classes] = [klass] if klass
450
+
451
+ client.anchor = anchor_conditions(klass, options) || {} if client.anchor.empty?
452
+
453
+ client.filters << Riddle::Client::Filter.new(
454
+ "sphinx_deleted", [0]
455
+ )
456
+
457
+ # class filters
458
+ client.filters << Riddle::Client::Filter.new(
459
+ "class_crc", options[:classes].collect { |k| k.to_crc32s }.flatten
460
+ ) if options[:classes]
461
+
462
+ # normal attribute filters
463
+ client.filters += options[:with].collect { |attr,val|
464
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
465
+ } if options[:with]
466
+
467
+ # exclusive attribute filters
468
+ client.filters += options[:without].collect { |attr,val|
469
+ Riddle::Client::Filter.new attr.to_s, filter_value(val), true
470
+ } if options[:without]
471
+
472
+ # every-match attribute filters
473
+ client.filters += options[:with_all].collect { |attr,vals|
474
+ Array(vals).collect { |val|
475
+ Riddle::Client::Filter.new attr.to_s, filter_value(val)
476
+ }
477
+ }.flatten if options[:with_all]
478
+
479
+ # exclusive attribute filter on primary key
480
+ client.filters += Array(options[:without_ids]).collect { |id|
481
+ Riddle::Client::Filter.new 'sphinx_internal_id', filter_value(id), true
482
+ } if options[:without_ids]
483
+
484
+ client
485
+ end
486
+
487
+ def star_query(query, custom_token = nil)
488
+ token = custom_token.is_a?(Regexp) ? custom_token : /\w+/u
489
+
490
+ query.gsub(/("#{token}(.*?#{token})?"|(?![!-])#{token})/u) do
491
+ pre, proper, post = $`, $&, $'
492
+ is_operator = pre.match(%r{(\W|^)[@~/]\Z}) # E.g. "@foo", "/2", "~3", but not as part of a token
493
+ is_quote = proper.starts_with?('"') && proper.ends_with?('"') # E.g. "foo bar", with quotes
494
+ has_star = pre.ends_with?("*") || post.starts_with?("*")
495
+ if is_operator || is_quote || has_star
496
+ proper
497
+ else
498
+ "*#{proper}*"
499
+ end
500
+ end
501
+ end
502
+
503
+ def filter_value(value)
504
+ case value
505
+ when Range
506
+ value.first.is_a?(Time) ? timestamp(value.first)..timestamp(value.last) : value
507
+ when Array
508
+ value.collect { |val| val.is_a?(Time) ? timestamp(val) : val }
509
+ else
510
+ Array(value)
511
+ end
512
+ end
513
+
514
+ # Returns the integer timestamp for a Time object.
515
+ #
516
+ # If using Rails 2.1+, need to handle timezones to translate them back to
517
+ # UTC, as that's what datetimes will be stored as by MySQL.
518
+ #
519
+ # in_time_zone is a method that was added for the timezone support in
520
+ # Rails 2.1, which is why it's used for testing. I'm sure there's better
521
+ # ways, but this does the job.
522
+ #
523
+ def timestamp(value)
524
+ value.respond_to?(:in_time_zone) ? value.utc.to_i : value.to_i
525
+ end
526
+
527
+ # Translate field and attribute conditions to the relevant search string
528
+ # and filters.
529
+ #
530
+ def search_conditions(klass, conditions={})
531
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
532
+ index.attributes.collect { |attrib| attrib.unique_name }
533
+ }.flatten : []
534
+
535
+ search_string = []
536
+ filters = []
537
+
538
+ conditions.each do |key,val|
539
+ if attributes.include?(key.to_sym)
540
+ filters << Riddle::Client::Filter.new(
541
+ key.to_s, filter_value(val)
542
+ )
543
+ else
544
+ search_string << "@#{key} #{val}"
545
+ end
546
+ end
547
+
548
+ return search_string.join(' '), filters
549
+ end
550
+
551
+ # Return the appropriate latitude and longitude values, depending on
552
+ # whether the relevant attributes have been defined, and also whether
553
+ # there's actually any values.
554
+ #
555
+ def anchor_conditions(klass, options)
556
+ attributes = klass ? klass.sphinx_indexes.collect { |index|
557
+ index.attributes.collect { |attrib| attrib.unique_name }
558
+ }.flatten : []
559
+
560
+ lat_attr = klass ? klass.sphinx_indexes.collect { |index|
561
+ index.options[:latitude_attr]
562
+ }.compact.first : nil
563
+
564
+ lon_attr = klass ? klass.sphinx_indexes.collect { |index|
565
+ index.options[:longitude_attr]
566
+ }.compact.first : nil
567
+
568
+ lat_attr = options[:latitude_attr] if options[:latitude_attr]
569
+ lat_attr ||= :lat if attributes.include?(:lat)
570
+ lat_attr ||= :latitude if attributes.include?(:latitude)
571
+
572
+ lon_attr = options[:longitude_attr] if options[:longitude_attr]
573
+ lon_attr ||= :lng if attributes.include?(:lng)
574
+ lon_attr ||= :lon if attributes.include?(:lon)
575
+ lon_attr ||= :long if attributes.include?(:long)
576
+ lon_attr ||= :longitude if attributes.include?(:longitude)
577
+
578
+ lat = options[:lat]
579
+ lon = options[:lon]
580
+
581
+ if options[:geo]
582
+ lat = options[:geo].first
583
+ lon = options[:geo].last
584
+ end
585
+
586
+ lat && lon ? {
587
+ :latitude_attribute => lat_attr.to_s,
588
+ :latitude => lat,
589
+ :longitude_attribute => lon_attr.to_s,
590
+ :longitude => lon
591
+ } : nil
592
+ end
593
+
594
+ # Set the sort options using the :order key as well as the appropriate
595
+ # Riddle settings.
596
+ #
597
+ def set_sort_options!(client, options)
598
+ klass = options[:class]
599
+ fields = klass ? klass.sphinx_indexes.collect { |index|
600
+ index.fields.collect { |field| field.unique_name }
601
+ }.flatten : []
602
+ index_options = klass ? klass.sphinx_index_options : {}
603
+
604
+ order = options[:order] || index_options[:order]
605
+ case order
606
+ when Symbol
607
+ client.sort_mode = :attr_asc if client.sort_mode == :relevance || client.sort_mode.nil?
608
+ if fields.include?(order)
609
+ client.sort_by = order.to_s.concat("_sort")
610
+ else
611
+ client.sort_by = order.to_s
612
+ end
613
+ when String
614
+ client.sort_mode = :extended
615
+ client.sort_by = sorted_fields_to_attributes(order, fields)
616
+ else
617
+ # do nothing
618
+ end
619
+
620
+ client.sort_mode = :attr_asc if client.sort_mode == :asc
621
+ client.sort_mode = :attr_desc if client.sort_mode == :desc
622
+ end
623
+
624
+ # Search through a collection of fields and translate any appearances
625
+ # of them in a string to their attribute equivalent for sorting.
626
+ #
627
+ def sorted_fields_to_attributes(string, fields)
628
+ fields.each { |field|
629
+ string.gsub!(/(^|\s)#{field}(,?\s|$)/) { |match|
630
+ match.gsub field.to_s, field.to_s.concat("_sort")
631
+ }
632
+ }
633
+
634
+ string
635
+ end
636
+ end
637
+ end
638
+ end