ryanb-thinking_sphinx 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +60 -0
  3. data/lib/riddle.rb +26 -0
  4. data/lib/riddle/client.rb +639 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +102 -0
  10. data/lib/thinking_sphinx/active_record.rb +141 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +97 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +50 -0
  14. data/lib/thinking_sphinx/association.rb +144 -0
  15. data/lib/thinking_sphinx/attribute.rb +284 -0
  16. data/lib/thinking_sphinx/configuration.rb +283 -0
  17. data/lib/thinking_sphinx/field.rb +200 -0
  18. data/lib/thinking_sphinx/index.rb +340 -0
  19. data/lib/thinking_sphinx/index/builder.rb +195 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +482 -0
  23. data/lib/thinking_sphinx/tasks.rb +86 -0
  24. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +207 -0
  25. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  26. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  27. data/spec/unit/thinking_sphinx/active_record_spec.rb +236 -0
  28. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  29. data/spec/unit/thinking_sphinx/attribute_spec.rb +360 -0
  30. data/spec/unit/thinking_sphinx/configuration_spec.rb +493 -0
  31. data/spec/unit/thinking_sphinx/field_spec.rb +219 -0
  32. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  33. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +68 -0
  34. data/spec/unit/thinking_sphinx/index_spec.rb +277 -0
  35. data/spec/unit/thinking_sphinx/search_spec.rb +190 -0
  36. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  37. data/tasks/thinking_sphinx_tasks.rake +1 -0
  38. metadata +103 -0
@@ -0,0 +1,340 @@
1
+ require 'thinking_sphinx/index/builder'
2
+ require 'thinking_sphinx/index/faux_column'
3
+
4
+ module ThinkingSphinx
5
+ # The Index class is a ruby representation of a Sphinx source (not a Sphinx
6
+ # index - yes, I know it's a little confusing. You'll manage). This is
7
+ # another 'internal' Thinking Sphinx class - if you're using it directly,
8
+ # you either know what you're doing, or messing with things beyond your ken.
9
+ # Enjoy.
10
+ #
11
+ class Index
12
+ attr_accessor :model, :fields, :attributes, :conditions, :delta, :options
13
+
14
+ # Create a new index instance by passing in the model it is tied to, and
15
+ # a block to build it with (optional but recommended). For documentation
16
+ # on the syntax for inside the block, the Builder class is what you want.
17
+ #
18
+ # Quick Example:
19
+ #
20
+ # Index.new(User) do
21
+ # indexes login, email
22
+ #
23
+ # has created_at
24
+ #
25
+ # set_property :delta => true
26
+ # end
27
+ #
28
+ def initialize(model, &block)
29
+ @model = model
30
+ @associations = {}
31
+ @fields = []
32
+ @attributes = []
33
+ @conditions = []
34
+ @options = {}
35
+ @delta = false
36
+
37
+ initialize_from_builder(&block) if block_given?
38
+ end
39
+
40
+ def name
41
+ model.name.underscore.tr(':/\\', '_')
42
+ end
43
+
44
+ def empty?(part = :core)
45
+ config = ThinkingSphinx::Configuration.new
46
+ File.size?("#{config.searchd_file_path}/#{self.name}_#{part}.spa").nil?
47
+ end
48
+
49
+ def to_config(index, database_conf, charset_type)
50
+ # Set up associations and joins
51
+ link!
52
+
53
+ attr_sources = attributes.collect { |attrib|
54
+ attrib.to_sphinx_clause
55
+ }.join("\n ")
56
+
57
+ db_adapter = case adapter
58
+ when :postgres
59
+ "pgsql"
60
+ when :mysql
61
+ "mysql"
62
+ else
63
+ raise "Unsupported Database Adapter: Sphinx only supports MySQL and PosgreSQL"
64
+ end
65
+
66
+ config = <<-SOURCE
67
+
68
+ source #{model.indexes.first.name}_#{index}_core
69
+ {
70
+ type = #{db_adapter}
71
+ sql_host = #{database_conf[:host] || "localhost"}
72
+ sql_user = #{database_conf[:username]}
73
+ sql_pass = #{database_conf[:password]}
74
+ sql_db = #{database_conf[:database]}
75
+ #{"sql_sock = #{database_conf[:socket]}" unless database_conf[:socket].blank? }
76
+
77
+ sql_query_pre = #{charset_type == "utf-8" && adapter == :mysql ? "SET NAMES utf8" : ""}
78
+ #{"sql_query_pre = SET SESSION group_concat_max_len = #{@options[:group_concat_max_len]}" if @options[:group_concat_max_len]}
79
+ sql_query_pre = #{to_sql_query_pre}
80
+ sql_query = #{to_sql.gsub(/\n/, ' ')}
81
+ sql_query_range = #{to_sql_query_range}
82
+ sql_query_info = #{to_sql_query_info}
83
+ #{attr_sources}
84
+ }
85
+ SOURCE
86
+
87
+ if delta?
88
+ config += <<-SOURCE
89
+
90
+ source #{model.indexes.first.name}_#{index}_delta : #{model.indexes.first.name}_#{index}_core
91
+ {
92
+ sql_query_pre =
93
+ sql_query_pre = #{charset_type == "utf-8" && adapter == :mysql ? "SET NAMES utf8" : ""}
94
+ #{"sql_query_pre = SET SESSION group_concat_max_len = #{@options[:group_concat_max_len]}" if @options[:group_concat_max_len]}
95
+ sql_query = #{to_sql(:delta => true).gsub(/\n/, ' ')}
96
+ sql_query_range = #{to_sql_query_range :delta => true}
97
+ }
98
+ SOURCE
99
+ end
100
+
101
+ config
102
+ end
103
+
104
+ # Link all the fields and associations to their corresponding
105
+ # associations and joins. This _must_ be called before interrogating
106
+ # the index's fields and associations for anything that may reference
107
+ # their SQL structure.
108
+ #
109
+ def link!
110
+ base = ::ActiveRecord::Associations::ClassMethods::JoinDependency.new(
111
+ @model, [], nil
112
+ )
113
+
114
+ @fields.each { |field|
115
+ field.model ||= @model
116
+ field.columns.each { |col|
117
+ field.associations[col] = associations(col.__stack.clone)
118
+ field.associations[col].each { |assoc| assoc.join_to(base) }
119
+ }
120
+ }
121
+
122
+ @attributes.each { |attribute|
123
+ attribute.model ||= @model
124
+ attribute.columns.each { |col|
125
+ attribute.associations[col] = associations(col.__stack.clone)
126
+ attribute.associations[col].each { |assoc| assoc.join_to(base) }
127
+ }
128
+ }
129
+ end
130
+
131
+ # Generates the big SQL statement to get the data back for all the fields
132
+ # and attributes, using all the relevant association joins. If you want
133
+ # the version filtered for delta values, send through :delta => true in the
134
+ # options. Won't do much though if the index isn't set up to support a
135
+ # delta sibling.
136
+ #
137
+ # Examples:
138
+ #
139
+ # index.to_sql
140
+ # index.to_sql(:delta => true)
141
+ #
142
+ def to_sql(options={})
143
+ assocs = all_associations
144
+
145
+ where_clause = ""
146
+ if self.delta?
147
+ where_clause << " AND #{@model.quoted_table_name}.#{quote_column('delta')}" +" = #{options[:delta] ? db_boolean(true) : db_boolean(false)}"
148
+ end
149
+ unless @conditions.empty?
150
+ where_clause << " AND " << @conditions.join(" AND ")
151
+ end
152
+
153
+ sql = <<-SQL
154
+ SELECT #{ (
155
+ ["#{@model.quoted_table_name}.#{quote_column(@model.primary_key)}"] +
156
+ @fields.collect { |field| field.to_select_sql } +
157
+ @attributes.collect { |attribute| attribute.to_select_sql }
158
+ ).join(", ") }
159
+ FROM #{ @model.table_name }
160
+ #{ assocs.collect { |assoc| assoc.to_sql }.join(' ') }
161
+ WHERE #{@model.quoted_table_name}.#{quote_column(@model.primary_key)} >= $start
162
+ AND #{@model.quoted_table_name}.#{quote_column(@model.primary_key)} <= $end
163
+ #{ where_clause }
164
+ GROUP BY #{ (
165
+ ["#{@model.quoted_table_name}.#{quote_column(@model.primary_key)}"] +
166
+ @fields.collect { |field| field.to_group_sql }.compact +
167
+ @attributes.collect { |attribute| attribute.to_group_sql }.compact
168
+ ).join(", ") }
169
+ SQL
170
+
171
+ if @model.connection.class.name == "ActiveRecord::ConnectionAdapters::MysqlAdapter"
172
+ sql += " ORDER BY NULL"
173
+ end
174
+
175
+ sql
176
+ end
177
+
178
+ # Simple helper method for the query info SQL - which is a statement that
179
+ # returns the single row for a corresponding id.
180
+ #
181
+ def to_sql_query_info
182
+ "SELECT * FROM #{@model.quoted_table_name} WHERE " +
183
+ " #{quote_column(@model.primary_key)} = $id"
184
+ end
185
+
186
+ # Simple helper method for the query range SQL - which is a statement that
187
+ # returns minimum and maximum id values. These can be filtered by delta -
188
+ # so pass in :delta => true to get the delta version of the SQL.
189
+ #
190
+ def to_sql_query_range(options={})
191
+ min_statement = "MIN(#{quote_column(@model.primary_key)})"
192
+ max_statement = "MAX(#{quote_column(@model.primary_key)})"
193
+
194
+ # Fix to handle Sphinx PostgreSQL bug (it doesn't like NULLs or 0's)
195
+ if adapter == :postgres
196
+ min_statement = "COALESCE(#{min_statement}, 1)"
197
+ max_statement = "COALESCE(#{max_statement}, 1)"
198
+ end
199
+
200
+ sql = "SELECT #{min_statement}, #{max_statement} " +
201
+ "FROM #{@model.quoted_table_name} "
202
+ sql << "WHERE #{@model.quoted_table_name}.#{quote_column('delta')} " +
203
+ "= #{options[:delta] ? db_boolean(true) : db_boolean(false)}" if self.delta?
204
+ sql
205
+ end
206
+
207
+ # Returns the SQL query to run before a full index - ie: nothing unless the
208
+ # index has a delta, and then it's an update statement to set delta values
209
+ # back to 0.
210
+ #
211
+ def to_sql_query_pre
212
+ self.delta? ? "UPDATE #{@model.quoted_table_name} SET #{quote_column('delta')} = #{db_boolean(false)}" : ""
213
+ end
214
+
215
+ # Flag to indicate whether this index has a corresponding delta index.
216
+ #
217
+ def delta?
218
+ @delta
219
+ end
220
+
221
+ def adapter
222
+ @adapter ||= case @model.connection.class.name
223
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
224
+ :mysql
225
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
226
+ :postgres
227
+ else
228
+ raise "Invalid Database Adapter: Sphinx only supports MySQL and PostgreSQL"
229
+ end
230
+ end
231
+
232
+ def prefix_fields
233
+ @fields.select { |field| field.prefixes }
234
+ end
235
+
236
+ def infix_fields
237
+ @fields.select { |field| field.infixes }
238
+ end
239
+
240
+ private
241
+
242
+ def quote_column(column)
243
+ @model.connection.quote_column_name(column)
244
+ end
245
+
246
+ # Does all the magic with the block provided to the base #initialize.
247
+ # Creates a new class subclassed from Builder, and evaluates the block
248
+ # on it, then pulls all relevant settings - fields, attributes, conditions,
249
+ # properties - into the new index.
250
+ #
251
+ # Also creates a CRC attribute for the model.
252
+ #
253
+ def initialize_from_builder(&block)
254
+ builder = Class.new(Builder)
255
+ builder.setup
256
+
257
+ builder.instance_eval &block
258
+
259
+ unless @model.descends_from_active_record?
260
+ stored_class = @model.store_full_sti_class ? @model.name : @model.name.demodulize
261
+ builder.where("#{@model.quoted_table_name}.#{quote_column(@model.inheritance_column)} = '#{stored_class}'")
262
+ end
263
+
264
+ @fields = builder.fields
265
+ @attributes = builder.attributes
266
+ @conditions = builder.conditions
267
+ @delta = builder.properties[:delta]
268
+ @options = builder.properties.except(:delta)
269
+
270
+ @attributes << Attribute.new(
271
+ FauxColumn.new(@model.to_crc32.to_s),
272
+ :type => :integer,
273
+ :as => :class_crc
274
+ )
275
+ @attributes << Attribute.new(
276
+ FauxColumn.new("0"),
277
+ :type => :integer,
278
+ :as => :sphinx_deleted
279
+ )
280
+ end
281
+
282
+ # Returns all associations used amongst all the fields and attributes.
283
+ # This includes all associations between the model and what the actual
284
+ # columns are from.
285
+ #
286
+ def all_associations
287
+ @all_associations ||= (
288
+ # field associations
289
+ @fields.collect { |field|
290
+ field.associations.values
291
+ }.flatten +
292
+ # attribute associations
293
+ @attributes.collect { |attrib|
294
+ attrib.associations.values
295
+ }.flatten
296
+ ).uniq.collect { |assoc|
297
+ # get ancestors as well as column-level associations
298
+ assoc.ancestors
299
+ }.flatten.uniq
300
+ end
301
+
302
+ # Gets a stack of associations for a specific path.
303
+ #
304
+ def associations(path, parent = nil)
305
+ assocs = []
306
+
307
+ if parent.nil?
308
+ assocs = association(path.shift)
309
+ else
310
+ assocs = parent.children(path.shift)
311
+ end
312
+
313
+ until path.empty?
314
+ point = path.shift
315
+ assocs = assocs.collect { |assoc|
316
+ assoc.children(point)
317
+ }.flatten
318
+ end
319
+
320
+ assocs
321
+ end
322
+
323
+ # Gets the association stack for a specific key.
324
+ #
325
+ def association(key)
326
+ @associations[key] ||= Association.children(@model, key)
327
+ end
328
+
329
+ # Returns the proper boolean value string literal for the
330
+ # current database adapter.
331
+ #
332
+ def db_boolean(val)
333
+ if adapter == :postgres
334
+ val ? 'TRUE' : 'FALSE'
335
+ else
336
+ val ? '1' : '0'
337
+ end
338
+ end
339
+ end
340
+ end
@@ -0,0 +1,195 @@
1
+ module ThinkingSphinx
2
+ class Index
3
+ # The Builder class is the core for the index definition block processing.
4
+ # There are four methods you really need to pay attention to:
5
+ # - indexes (aliased to includes and attribute)
6
+ # - has (aliased to attribute)
7
+ # - where
8
+ # - set_property (aliased to set_properties)
9
+ #
10
+ # The first two of these methods allow you to define what data makes up
11
+ # your indexes. #where provides a method to add manual SQL conditions, and
12
+ # set_property allows you to set some settings on a per-index basis. Check
13
+ # out each method's documentation for better ideas of usage.
14
+ #
15
+ class Builder
16
+ class << self
17
+ # No idea where this is coming from - haven't found it in any ruby or
18
+ # rails documentation. It's not needed though, so it gets undef'd.
19
+ # Hopefully the list of methods that get in the way doesn't get too
20
+ # long.
21
+ undef_method :parent
22
+
23
+ attr_accessor :fields, :attributes, :properties, :conditions
24
+
25
+ # Set up all the collections. Consider this the equivalent of an
26
+ # instance's initialize method.
27
+ #
28
+ def setup
29
+ @fields = []
30
+ @attributes = []
31
+ @properties = {}
32
+ @conditions = []
33
+ end
34
+
35
+ # This is how you add fields - the strings Sphinx looks at - to your
36
+ # index. Technically, to use this method, you need to pass in some
37
+ # columns and options - but there's some neat method_missing stuff
38
+ # happening, so lets stick to the expected syntax within a define_index
39
+ # block.
40
+ #
41
+ # Expected options are :as, which points to a column alias in symbol
42
+ # form, and :sortable, which indicates whether you want to sort by this
43
+ # field.
44
+ #
45
+ # Adding Single-Column Fields:
46
+ #
47
+ # You can use symbols or methods - and can chain methods together to
48
+ # get access down the associations tree.
49
+ #
50
+ # indexes :id, :as => :my_id
51
+ # indexes :name, :sortable => true
52
+ # indexes first_name, last_name, :sortable => true
53
+ # indexes users.posts.content, :as => :post_content
54
+ # indexes users(:id), :as => :user_ids
55
+ #
56
+ # Keep in mind that if any keywords for Ruby methods - such as id or
57
+ # name - clash with your column names, you need to use the symbol
58
+ # version (see the first, second and last examples above).
59
+ #
60
+ # If you specify multiple columns (example #2), a field will be created
61
+ # for each. Don't use the :as option in this case. If you want to merge
62
+ # those columns together, continue reading.
63
+ #
64
+ # Adding Multi-Column Fields:
65
+ #
66
+ # indexes [first_name, last_name], :as => :name
67
+ # indexes [location, parent.location], :as => :location
68
+ #
69
+ # To combine multiple columns into a single field, you need to wrap
70
+ # them in an Array, as shown by the above examples. There's no
71
+ # limitations on whether they're symbols or methods or what level of
72
+ # associations they come from.
73
+ #
74
+ # Adding SQL Fragment Fields
75
+ #
76
+ # You can also define a field using an SQL fragment, useful for when
77
+ # you would like to index a calculated value.
78
+ #
79
+ # indexes "age < 18", :as => :minor
80
+ #
81
+ def indexes(*args)
82
+ options = args.extract_options!
83
+ args.each do |columns|
84
+ fields << Field.new(FauxColumn.coerce(columns), options)
85
+
86
+ if fields.last.sortable
87
+ attributes << Attribute.new(
88
+ fields.last.columns.collect { |col| col.clone },
89
+ options.merge(
90
+ :type => :string,
91
+ :as => fields.last.unique_name.to_s.concat("_sort").to_sym
92
+ )
93
+ )
94
+ end
95
+ end
96
+ end
97
+ alias_method :field, :indexes
98
+ alias_method :includes, :indexes
99
+
100
+ # This is the method to add attributes to your index (hence why it is
101
+ # aliased as 'attribute'). The syntax is the same as #indexes, so use
102
+ # that as starting point, but keep in mind the following points.
103
+ #
104
+ # An attribute can have an alias (the :as option), but it is always
105
+ # sortable - so you don't need to explicitly request that. You _can_
106
+ # specify the data type of the attribute (the :type option), but the
107
+ # code's pretty good at figuring that out itself from peering into the
108
+ # database.
109
+ #
110
+ # Attributes are limited to the following types: integers, floats,
111
+ # datetimes (converted to timestamps), booleans and strings. Don't
112
+ # forget that Sphinx converts string attributes to integers, which are
113
+ # useful for sorting, but that's about it.
114
+ #
115
+ # You can also have a collection of integers for multi-value attributes
116
+ # (MVAs). Generally these would be through a has_many relationship,
117
+ # like in this example:
118
+ #
119
+ # has posts(:id), :as => :post_ids
120
+ #
121
+ # This allows you to filter on any of the values tied to a specific
122
+ # record. Might be best to read through the Sphinx documentation to get
123
+ # a better idea of that though.
124
+ #
125
+ # Adding SQL Fragment Attributes
126
+ #
127
+ # You can also define an attribute using an SQL fragment, useful for
128
+ # when you would like to index a calculated value. Don't forget to set
129
+ # the type of the attribute though:
130
+ #
131
+ # indexes "age < 18", :as => :minor, :type => :boolean
132
+ #
133
+ # If you're creating attributes for latitude and longitude, don't
134
+ # forget that Sphinx expects these values to be in radians.
135
+ #
136
+ def has(*args)
137
+ options = args.extract_options!
138
+ args.each do |columns|
139
+ attributes << Attribute.new(FauxColumn.coerce(columns), options)
140
+ end
141
+ end
142
+ alias_method :attribute, :has
143
+
144
+ # Use this method to add some manual SQL conditions for your index
145
+ # request. You can pass in as many strings as you like, they'll get
146
+ # joined together with ANDs later on.
147
+ #
148
+ # where "user_id = 10"
149
+ # where "parent_type = 'Article'", "created_at < NOW()"
150
+ #
151
+ def where(*args)
152
+ @conditions += args
153
+ end
154
+
155
+ # This is what to use to set properties on the index. Chief amongst
156
+ # those is the delta property - to allow automatic updates to your
157
+ # indexes as new models are added and edited - but also you can
158
+ # define search-related properties which will be the defaults for all
159
+ # searches on the model.
160
+ #
161
+ # set_property :delta => true
162
+ # set_property :field_weights => {"name" => 100}
163
+ #
164
+ # Also, the following two properties are particularly relevant for
165
+ # geo-location searching - latitude_attr and longitude_attr. If your
166
+ # attributes for these two values are named something other than
167
+ # lat/latitude or lon/long/longitude, you can dictate what they are
168
+ # when defining the index, so you don't need to specify them for every
169
+ # geo-related search.
170
+ #
171
+ # set_property :latitude_attr => "lt", :longitude => "lg"
172
+ #
173
+ # Please don't forget to add a boolean field named 'delta' to your
174
+ # model's database table if enabling the delta index for it.
175
+ #
176
+ def set_property(*args)
177
+ options = args.extract_options!
178
+ if options.empty?
179
+ @properties[args[0]] = args[1]
180
+ else
181
+ @properties.merge!(options)
182
+ end
183
+ end
184
+ alias_method :set_properties, :set_property
185
+
186
+ # Handles the generation of new columns for the field and attribute
187
+ # definitions.
188
+ #
189
+ def method_missing(method, *args)
190
+ FauxColumn.new(method, *args)
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end