nixme-thinking-sphinx 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +52 -0
  3. data/lib/riddle.rb +22 -0
  4. data/lib/riddle/client.rb +593 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +82 -0
  10. data/lib/thinking_sphinx/active_record.rb +138 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +90 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +43 -0
  14. data/lib/thinking_sphinx/association.rb +140 -0
  15. data/lib/thinking_sphinx/attribute.rb +282 -0
  16. data/lib/thinking_sphinx/configuration.rb +277 -0
  17. data/lib/thinking_sphinx/field.rb +198 -0
  18. data/lib/thinking_sphinx/index.rb +334 -0
  19. data/lib/thinking_sphinx/index/builder.rb +212 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +97 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +455 -0
  23. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +185 -0
  24. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  25. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +81 -0
  26. data/spec/unit/thinking_sphinx/active_record_spec.rb +201 -0
  27. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  28. data/spec/unit/thinking_sphinx/attribute_spec.rb +356 -0
  29. data/spec/unit/thinking_sphinx/configuration_spec.rb +476 -0
  30. data/spec/unit/thinking_sphinx/field_spec.rb +215 -0
  31. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  32. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +41 -0
  33. data/spec/unit/thinking_sphinx/index_spec.rb +230 -0
  34. data/spec/unit/thinking_sphinx/search_spec.rb +163 -0
  35. data/spec/unit/thinking_sphinx_spec.rb +107 -0
  36. data/tasks/thinking_sphinx_tasks.rake +1 -0
  37. data/tasks/thinking_sphinx_tasks.rb +86 -0
  38. metadata +90 -0
@@ -0,0 +1,334 @@
1
+ require 'thinking_sphinx/index/builder'
2
+ require 'thinking_sphinx/index/faux_column'
3
+
4
+ module ThinkingSphinx
5
+ # The Index class is a ruby representation of a Sphinx source (not a Sphinx
6
+ # index - yes, I know it's a little confusing. You'll manage). This is
7
+ # another 'internal' Thinking Sphinx class - if you're using it directly,
8
+ # you either know what you're doing, or messing with things beyond your ken.
9
+ # Enjoy.
10
+ #
11
+ class Index
12
+ attr_accessor :model, :fields, :attributes, :conditions, :delta, :options
13
+
14
+ # Create a new index instance by passing in the model it is tied to, and
15
+ # a block to build it with (optional but recommended). For documentation
16
+ # on the syntax for inside the block, the Builder class is what you want.
17
+ #
18
+ # Quick Example:
19
+ #
20
+ # Index.new(User) do
21
+ # indexes login, email
22
+ #
23
+ # has created_at
24
+ #
25
+ # set_property :delta => true
26
+ # end
27
+ #
28
+ def initialize(model, &block)
29
+ @model = model
30
+ @associations = {}
31
+ @fields = []
32
+ @attributes = []
33
+ @conditions = []
34
+ @options = {}
35
+ @delta = false
36
+
37
+ initialize_from_builder(&block) if block_given?
38
+ end
39
+
40
+ def name
41
+ model.name.underscore.tr(':/\\', '_')
42
+ end
43
+
44
+ def to_config(index, database_conf, charset_type)
45
+ # Set up associations and joins
46
+ link!
47
+
48
+ attr_sources = attributes.collect { |attrib|
49
+ attrib.to_sphinx_clause
50
+ }.join("\n ")
51
+
52
+ db_adapter = case adapter
53
+ when :postgres
54
+ "pgsql"
55
+ when :mysql
56
+ "mysql"
57
+ else
58
+ raise "Unsupported Database Adapter: Sphinx only supports MySQL and PosgreSQL"
59
+ end
60
+
61
+ config = <<-SOURCE
62
+
63
+ source #{model.indexes.first.name}_#{index}_core
64
+ {
65
+ type = #{db_adapter}
66
+ sql_host = #{database_conf[:host] || "localhost"}
67
+ sql_user = #{database_conf[:username]}
68
+ sql_pass = #{database_conf[:password]}
69
+ sql_db = #{database_conf[:database]}
70
+
71
+ sql_query_pre = #{charset_type == "utf-8" && adapter == :mysql ? "SET NAMES utf8" : ""}
72
+ #{"sql_query_pre = SET SESSION group_concat_max_len = #{@options[:group_concat_max_len]}" if @options[:group_concat_max_len]}
73
+ sql_query_pre = #{to_sql_query_pre}
74
+ sql_query = #{to_sql.gsub(/\n/, ' ')}
75
+ sql_query_range = #{to_sql_query_range}
76
+ sql_query_info = #{to_sql_query_info}
77
+ #{attr_sources}
78
+ }
79
+ SOURCE
80
+
81
+ if delta?
82
+ config += <<-SOURCE
83
+
84
+ source #{model.indexes.first.name}_#{index}_delta : #{model.indexes.first.name}_#{index}_core
85
+ {
86
+ sql_query_pre =
87
+ sql_query_pre = #{charset_type == "utf-8" && adapter == :mysql ? "SET NAMES utf8" : ""}
88
+ #{"sql_query_pre = SET SESSION group_concat_max_len = #{@options[:group_concat_max_len]}" if @options[:group_concat_max_len]}
89
+ sql_query = #{to_sql(:delta => true).gsub(/\n/, ' ')}
90
+ sql_query_range = #{to_sql_query_range :delta => true}
91
+ }
92
+ SOURCE
93
+ end
94
+
95
+ config
96
+ end
97
+
98
+ # Link all the fields and associations to their corresponding
99
+ # associations and joins. This _must_ be called before interrogating
100
+ # the index's fields and associations for anything that may reference
101
+ # their SQL structure.
102
+ #
103
+ def link!
104
+ base = ::ActiveRecord::Associations::ClassMethods::JoinDependency.new(
105
+ @model, [], nil
106
+ )
107
+
108
+ @fields.each { |field|
109
+ field.model ||= @model
110
+ field.columns.each { |col|
111
+ field.associations[col] = associations(col.__stack.clone)
112
+ field.associations[col].each { |assoc| assoc.join_to(base) }
113
+ }
114
+ }
115
+
116
+ @attributes.each { |attribute|
117
+ attribute.model ||= @model
118
+ attribute.columns.each { |col|
119
+ attribute.associations[col] = associations(col.__stack.clone)
120
+ attribute.associations[col].each { |assoc| assoc.join_to(base) }
121
+ }
122
+ }
123
+ end
124
+
125
+ # Generates the big SQL statement to get the data back for all the fields
126
+ # and attributes, using all the relevant association joins. If you want
127
+ # the version filtered for delta values, send through :delta => true in the
128
+ # options. Won't do much though if the index isn't set up to support a
129
+ # delta sibling.
130
+ #
131
+ # Examples:
132
+ #
133
+ # index.to_sql
134
+ # index.to_sql(:delta => true)
135
+ #
136
+ def to_sql(options={})
137
+ assocs = all_associations
138
+
139
+ where_clause = ""
140
+ if self.delta?
141
+ where_clause << " AND #{@model.quoted_table_name}.#{quote_column('delta')}" +" = #{options[:delta] ? db_boolean(true) : db_boolean(false)}"
142
+ end
143
+ unless @conditions.empty?
144
+ where_clause << " AND " << @conditions.join(" AND ")
145
+ end
146
+
147
+ sql = <<-SQL
148
+ SELECT #{ (
149
+ ["#{@model.quoted_table_name}.#{quote_column(@model.primary_key)}"] +
150
+ @fields.collect { |field| field.to_select_sql } +
151
+ @attributes.collect { |attribute| attribute.to_select_sql }
152
+ ).join(", ") }
153
+ FROM #{ @model.table_name }
154
+ #{ assocs.collect { |assoc| assoc.to_sql }.join(' ') }
155
+ WHERE #{@model.quoted_table_name}.#{quote_column(@model.primary_key)} >= $start
156
+ AND #{@model.quoted_table_name}.#{quote_column(@model.primary_key)} <= $end
157
+ #{ where_clause }
158
+ GROUP BY #{ (
159
+ ["#{@model.quoted_table_name}.#{quote_column(@model.primary_key)}"] +
160
+ @fields.collect { |field| field.to_group_sql }.compact +
161
+ @attributes.collect { |attribute| attribute.to_group_sql }.compact
162
+ ).join(", ") }
163
+ SQL
164
+
165
+ if @model.connection.class.name == "ActiveRecord::ConnectionAdapters::MysqlAdapter"
166
+ sql += " ORDER BY NULL"
167
+ end
168
+
169
+ sql
170
+ end
171
+
172
+ # Simple helper method for the query info SQL - which is a statement that
173
+ # returns the single row for a corresponding id.
174
+ #
175
+ def to_sql_query_info
176
+ "SELECT * FROM #{@model.quoted_table_name} WHERE " +
177
+ " #{quote_column(@model.primary_key)} = $id"
178
+ end
179
+
180
+ # Simple helper method for the query range SQL - which is a statement that
181
+ # returns minimum and maximum id values. These can be filtered by delta -
182
+ # so pass in :delta => true to get the delta version of the SQL.
183
+ #
184
+ def to_sql_query_range(options={})
185
+ min_statement = "MIN(#{quote_column(@model.primary_key)})"
186
+ max_statement = "MAX(#{quote_column(@model.primary_key)})"
187
+
188
+ # Fix to handle Sphinx PostgreSQL bug (it doesn't like NULLs or 0's)
189
+ if adapter == :postgres
190
+ min_statement = "COALESCE(#{min_statement}, 1)"
191
+ max_statement = "COALESCE(#{max_statement}, 1)"
192
+ end
193
+
194
+ sql = "SELECT #{min_statement}, #{max_statement} " +
195
+ "FROM #{@model.quoted_table_name} "
196
+ sql << "WHERE #{@model.quoted_table_name}.#{quote_column('delta')} " +
197
+ "= #{options[:delta] ? db_boolean(true) : db_boolean(false)}" if self.delta?
198
+ sql
199
+ end
200
+
201
+ # Returns the SQL query to run before a full index - ie: nothing unless the
202
+ # index has a delta, and then it's an update statement to set delta values
203
+ # back to 0.
204
+ #
205
+ def to_sql_query_pre
206
+ self.delta? ? "UPDATE #{@model.quoted_table_name} SET #{quote_column('delta')} = #{db_boolean(false)}" : ""
207
+ end
208
+
209
+ # Flag to indicate whether this index has a corresponding delta index.
210
+ #
211
+ def delta?
212
+ @delta
213
+ end
214
+
215
+ def adapter
216
+ @adapter ||= case @model.connection.class.name
217
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
218
+ :mysql
219
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
220
+ :postgres
221
+ else
222
+ raise "Invalid Database Adapter: Sphinx only supports MySQL and PostgreSQL"
223
+ end
224
+ end
225
+
226
+ def prefix_fields
227
+ @fields.select { |field| field.prefixes }
228
+ end
229
+
230
+ def infix_fields
231
+ @fields.select { |field| field.infixes }
232
+ end
233
+
234
+ private
235
+
236
+ def quote_column(column)
237
+ @model.connection.quote_column_name(column)
238
+ end
239
+
240
+ # Does all the magic with the block provided to the base #initialize.
241
+ # Creates a new class subclassed from Builder, and evaluates the block
242
+ # on it, then pulls all relevant settings - fields, attributes, conditions,
243
+ # properties - into the new index.
244
+ #
245
+ # Also creates a CRC attribute for the model.
246
+ #
247
+ def initialize_from_builder(&block)
248
+ builder = Class.new(Builder)
249
+ builder.setup
250
+
251
+ builder.instance_eval &block
252
+
253
+ unless @model.descends_from_active_record?
254
+ stored_class = @model.store_full_sti_class ? @model.name : @model.name.demodulize
255
+ builder.where("#{@model.inheritance_column} = '#{stored_class}'")
256
+ end
257
+
258
+ @fields = builder.fields
259
+ @attributes = builder.attributes
260
+ @conditions = builder.conditions
261
+ @delta = builder.properties[:delta]
262
+ @options = builder.properties.except(:delta)
263
+
264
+ @attributes << Attribute.new(
265
+ FauxColumn.new(@model.to_crc32.to_s),
266
+ :type => :integer,
267
+ :as => :class_crc
268
+ )
269
+ @attributes << Attribute.new(
270
+ FauxColumn.new("0"),
271
+ :type => :integer,
272
+ :as => :sphinx_deleted
273
+ )
274
+ end
275
+
276
+ # Returns all associations used amongst all the fields and attributes.
277
+ # This includes all associations between the model and what the actual
278
+ # columns are from.
279
+ #
280
+ def all_associations
281
+ @all_associations ||= (
282
+ # field associations
283
+ @fields.collect { |field|
284
+ field.associations.values
285
+ }.flatten +
286
+ # attribute associations
287
+ @attributes.collect { |attrib|
288
+ attrib.associations.values
289
+ }.flatten
290
+ ).uniq.collect { |assoc|
291
+ # get ancestors as well as column-level associations
292
+ assoc.ancestors
293
+ }.flatten.uniq
294
+ end
295
+
296
+ # Gets a stack of associations for a specific path.
297
+ #
298
+ def associations(path, parent = nil)
299
+ assocs = []
300
+
301
+ if parent.nil?
302
+ assocs = association(path.shift)
303
+ else
304
+ assocs = parent.children(path.shift)
305
+ end
306
+
307
+ until path.empty?
308
+ point = path.shift
309
+ assocs = assocs.collect { |assoc|
310
+ assoc.children(point)
311
+ }.flatten
312
+ end
313
+
314
+ assocs
315
+ end
316
+
317
+ # Gets the association stack for a specific key.
318
+ #
319
+ def association(key)
320
+ @associations[key] ||= Association.children(@model, key)
321
+ end
322
+
323
+ # Returns the proper boolean value string literal for the
324
+ # current database adapter.
325
+ #
326
+ def db_boolean(val)
327
+ if adapter == :postgres
328
+ val ? 'TRUE' : 'FALSE'
329
+ else
330
+ val ? '1' : '0'
331
+ end
332
+ end
333
+ end
334
+ end
@@ -0,0 +1,212 @@
1
+ module ThinkingSphinx
2
+ class Index
3
+ # The Builder class is the core for the index definition block processing.
4
+ # There are four methods you really need to pay attention to:
5
+ # - indexes (aliased to includes and attribute)
6
+ # - has (aliased to attribute)
7
+ # - where
8
+ # - set_property (aliased to set_properties)
9
+ #
10
+ # The first two of these methods allow you to define what data makes up
11
+ # your indexes. #where provides a method to add manual SQL conditions, and
12
+ # set_property allows you to set some settings on a per-index basis. Check
13
+ # out each method's documentation for better ideas of usage.
14
+ #
15
+ class Builder
16
+ class << self
17
+ # No idea where this is coming from - haven't found it in any ruby or
18
+ # rails documentation. It's not needed though, so it gets undef'd.
19
+ # Hopefully the list of methods that get in the way doesn't get too
20
+ # long.
21
+ undef_method :parent
22
+
23
+ attr_accessor :fields, :attributes, :properties, :conditions
24
+
25
+ # Set up all the collections. Consider this the equivalent of an
26
+ # instance's initialize method.
27
+ #
28
+ def setup
29
+ @fields = []
30
+ @attributes = []
31
+ @properties = {}
32
+ @conditions = []
33
+ end
34
+
35
+ # This is how you add fields - the strings Sphinx looks at - to your
36
+ # index. Technically, to use this method, you need to pass in some
37
+ # columns and options - but there's some neat method_missing stuff
38
+ # happening, so lets stick to the expected syntax within a define_index
39
+ # block.
40
+ #
41
+ # Expected options are :as, which points to a column alias in symbol
42
+ # form, and :sortable, which indicates whether you want to sort by this
43
+ # field.
44
+ #
45
+ # Adding Single-Column Fields:
46
+ #
47
+ # You can use symbols or methods - and can chain methods together to
48
+ # get access down the associations tree.
49
+ #
50
+ # indexes :id, :as => :my_id
51
+ # indexes :name, :sortable => true
52
+ # indexes first_name, last_name, :sortable => true
53
+ # indexes users.posts.content, :as => :post_content
54
+ # indexes users(:id), :as => :user_ids
55
+ #
56
+ # Keep in mind that if any keywords for Ruby methods - such as id or
57
+ # name - clash with your column names, you need to use the symbol
58
+ # version (see the first, second and last examples above).
59
+ #
60
+ # If you specify multiple columns (example #2), a field will be created
61
+ # for each. Don't use the :as option in this case. If you want to merge
62
+ # those columns together, continue reading.
63
+ #
64
+ # Adding Multi-Column Fields:
65
+ #
66
+ # indexes [first_name, last_name], :as => :name
67
+ # indexes [location, parent.location], :as => :location
68
+ #
69
+ # To combine multiple columns into a single field, you need to wrap
70
+ # them in an Array, as shown by the above examples. There's no
71
+ # limitations on whether they're symbols or methods or what level of
72
+ # associations they come from.
73
+ #
74
+ # Adding SQL Fragment Fields
75
+ #
76
+ # You can also define a field using an SQL fragment, useful for when
77
+ # you would like to index a calculated value.
78
+ #
79
+ # indexes "age < 18", :as => :minor
80
+ #
81
+ def indexes(*args)
82
+ options = args.extract_options!
83
+ args.each do |columns|
84
+ columns = FauxColumn.new(columns) if columns.is_a?(Symbol)
85
+ fields << Field.new(columns, options)
86
+
87
+ if fields.last.sortable
88
+ attributes << Attribute.new(
89
+ fields.last.columns.collect { |col| col.clone },
90
+ options.merge(
91
+ :type => :string,
92
+ :as => fields.last.unique_name.to_s.concat("_sort").to_sym
93
+ )
94
+ )
95
+ end
96
+ end
97
+ end
98
+ alias_method :field, :indexes
99
+ alias_method :includes, :indexes
100
+
101
+ # This is the method to add attributes to your index (hence why it is
102
+ # aliased as 'attribute'). The syntax is the same as #indexes, so use
103
+ # that as starting point, but keep in mind the following points.
104
+ #
105
+ # An attribute can have an alias (the :as option), but it is always
106
+ # sortable - so you don't need to explicitly request that. You _can_
107
+ # specify the data type of the attribute (the :type option), but the
108
+ # code's pretty good at figuring that out itself from peering into the
109
+ # database.
110
+ #
111
+ # Attributes are limited to the following types: integers, floats,
112
+ # datetimes (converted to timestamps), booleans and strings. Don't
113
+ # forget that Sphinx converts string attributes to integers, which are
114
+ # useful for sorting, but that's about it.
115
+ #
116
+ # You can also have a collection of integers for multi-value attributes
117
+ # (MVAs). Generally these would be through a has_many relationship,
118
+ # like in this example:
119
+ #
120
+ # has posts(:id), :as => :post_ids
121
+ #
122
+ # This allows you to filter on any of the values tied to a specific
123
+ # record. Might be best to read through the Sphinx documentation to get
124
+ # a better idea of that though.
125
+ #
126
+ # Adding SQL Fragment Attributes
127
+ #
128
+ # You can also define an attribute using an SQL fragment, useful for
129
+ # when you would like to index a calculated value. Don't forget to set
130
+ # the type of the attribute though:
131
+ #
132
+ # indexes "age < 18", :as => :minor, :type => :boolean
133
+ #
134
+ # If you're creating attributes for latitude and longitude, don't
135
+ # forget that Sphinx expects these values to be in radians.
136
+ #
137
+ def has(*args)
138
+ options = args.extract_options!
139
+ args.each do |columns|
140
+ columns = case columns
141
+ when Symbol, String
142
+ FauxColumn.new(columns)
143
+ when Array
144
+ columns.collect { |col|
145
+ case col
146
+ when Symbol, String
147
+ FauxColumn.new(col)
148
+ else
149
+ col
150
+ end
151
+ }
152
+ else
153
+ columns
154
+ end
155
+
156
+ attributes << Attribute.new(columns, options)
157
+ end
158
+ end
159
+ alias_method :attribute, :has
160
+
161
+ # Use this method to add some manual SQL conditions for your index
162
+ # request. You can pass in as many strings as you like, they'll get
163
+ # joined together with ANDs later on.
164
+ #
165
+ # where "user_id = 10"
166
+ # where "parent_type = 'Article'", "created_at < NOW()"
167
+ #
168
+ def where(*args)
169
+ @conditions += args
170
+ end
171
+
172
+ # This is what to use to set properties on the index. Chief amongst
173
+ # those is the delta property - to allow automatic updates to your
174
+ # indexes as new models are added and edited - but also you can
175
+ # define search-related properties which will be the defaults for all
176
+ # searches on the model.
177
+ #
178
+ # set_property :delta => true
179
+ # set_property :field_weights => {"name" => 100}
180
+ #
181
+ # Also, the following two properties are particularly relevant for
182
+ # geo-location searching - latitude_attr and longitude_attr. If your
183
+ # attributes for these two values are named something other than
184
+ # lat/latitude or lon/long/longitude, you can dictate what they are
185
+ # when defining the index, so you don't need to specify them for every
186
+ # geo-related search.
187
+ #
188
+ # set_property :latitude_attr => "lt", :longitude => "lg"
189
+ #
190
+ # Please don't forget to add a boolean field named 'delta' to your
191
+ # model's database table if enabling the delta index for it.
192
+ #
193
+ def set_property(*args)
194
+ options = args.extract_options!
195
+ if options.empty?
196
+ @properties[args[0]] = args[1]
197
+ else
198
+ @properties.merge!(options)
199
+ end
200
+ end
201
+ alias_method :set_properties, :set_property
202
+
203
+ # Handles the generation of new columns for the field and attribute
204
+ # definitions.
205
+ #
206
+ def method_missing(method, *args)
207
+ FauxColumn.new(method, *args)
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end