nixme-thinking-sphinx 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +52 -0
  3. data/lib/riddle.rb +22 -0
  4. data/lib/riddle/client.rb +593 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +82 -0
  10. data/lib/thinking_sphinx/active_record.rb +138 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +90 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +43 -0
  14. data/lib/thinking_sphinx/association.rb +140 -0
  15. data/lib/thinking_sphinx/attribute.rb +282 -0
  16. data/lib/thinking_sphinx/configuration.rb +277 -0
  17. data/lib/thinking_sphinx/field.rb +198 -0
  18. data/lib/thinking_sphinx/index.rb +334 -0
  19. data/lib/thinking_sphinx/index/builder.rb +212 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +97 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +455 -0
  23. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +185 -0
  24. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  25. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +81 -0
  26. data/spec/unit/thinking_sphinx/active_record_spec.rb +201 -0
  27. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  28. data/spec/unit/thinking_sphinx/attribute_spec.rb +356 -0
  29. data/spec/unit/thinking_sphinx/configuration_spec.rb +476 -0
  30. data/spec/unit/thinking_sphinx/field_spec.rb +215 -0
  31. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  32. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +41 -0
  33. data/spec/unit/thinking_sphinx/index_spec.rb +230 -0
  34. data/spec/unit/thinking_sphinx/search_spec.rb +163 -0
  35. data/spec/unit/thinking_sphinx_spec.rb +107 -0
  36. data/tasks/thinking_sphinx_tasks.rake +1 -0
  37. data/tasks/thinking_sphinx_tasks.rb +86 -0
  38. metadata +90 -0
@@ -0,0 +1,334 @@
1
+ require 'thinking_sphinx/index/builder'
2
+ require 'thinking_sphinx/index/faux_column'
3
+
4
+ module ThinkingSphinx
5
+ # The Index class is a ruby representation of a Sphinx source (not a Sphinx
6
+ # index - yes, I know it's a little confusing. You'll manage). This is
7
+ # another 'internal' Thinking Sphinx class - if you're using it directly,
8
+ # you either know what you're doing, or messing with things beyond your ken.
9
+ # Enjoy.
10
+ #
11
+ class Index
12
+ attr_accessor :model, :fields, :attributes, :conditions, :delta, :options
13
+
14
+ # Create a new index instance by passing in the model it is tied to, and
15
+ # a block to build it with (optional but recommended). For documentation
16
+ # on the syntax for inside the block, the Builder class is what you want.
17
+ #
18
+ # Quick Example:
19
+ #
20
+ # Index.new(User) do
21
+ # indexes login, email
22
+ #
23
+ # has created_at
24
+ #
25
+ # set_property :delta => true
26
+ # end
27
+ #
28
+ def initialize(model, &block)
29
+ @model = model
30
+ @associations = {}
31
+ @fields = []
32
+ @attributes = []
33
+ @conditions = []
34
+ @options = {}
35
+ @delta = false
36
+
37
+ initialize_from_builder(&block) if block_given?
38
+ end
39
+
40
+ def name
41
+ model.name.underscore.tr(':/\\', '_')
42
+ end
43
+
44
+ def to_config(index, database_conf, charset_type)
45
+ # Set up associations and joins
46
+ link!
47
+
48
+ attr_sources = attributes.collect { |attrib|
49
+ attrib.to_sphinx_clause
50
+ }.join("\n ")
51
+
52
+ db_adapter = case adapter
53
+ when :postgres
54
+ "pgsql"
55
+ when :mysql
56
+ "mysql"
57
+ else
58
+ raise "Unsupported Database Adapter: Sphinx only supports MySQL and PosgreSQL"
59
+ end
60
+
61
+ config = <<-SOURCE
62
+
63
+ source #{model.indexes.first.name}_#{index}_core
64
+ {
65
+ type = #{db_adapter}
66
+ sql_host = #{database_conf[:host] || "localhost"}
67
+ sql_user = #{database_conf[:username]}
68
+ sql_pass = #{database_conf[:password]}
69
+ sql_db = #{database_conf[:database]}
70
+
71
+ sql_query_pre = #{charset_type == "utf-8" && adapter == :mysql ? "SET NAMES utf8" : ""}
72
+ #{"sql_query_pre = SET SESSION group_concat_max_len = #{@options[:group_concat_max_len]}" if @options[:group_concat_max_len]}
73
+ sql_query_pre = #{to_sql_query_pre}
74
+ sql_query = #{to_sql.gsub(/\n/, ' ')}
75
+ sql_query_range = #{to_sql_query_range}
76
+ sql_query_info = #{to_sql_query_info}
77
+ #{attr_sources}
78
+ }
79
+ SOURCE
80
+
81
+ if delta?
82
+ config += <<-SOURCE
83
+
84
+ source #{model.indexes.first.name}_#{index}_delta : #{model.indexes.first.name}_#{index}_core
85
+ {
86
+ sql_query_pre =
87
+ sql_query_pre = #{charset_type == "utf-8" && adapter == :mysql ? "SET NAMES utf8" : ""}
88
+ #{"sql_query_pre = SET SESSION group_concat_max_len = #{@options[:group_concat_max_len]}" if @options[:group_concat_max_len]}
89
+ sql_query = #{to_sql(:delta => true).gsub(/\n/, ' ')}
90
+ sql_query_range = #{to_sql_query_range :delta => true}
91
+ }
92
+ SOURCE
93
+ end
94
+
95
+ config
96
+ end
97
+
98
+ # Link all the fields and associations to their corresponding
99
+ # associations and joins. This _must_ be called before interrogating
100
+ # the index's fields and associations for anything that may reference
101
+ # their SQL structure.
102
+ #
103
+ def link!
104
+ base = ::ActiveRecord::Associations::ClassMethods::JoinDependency.new(
105
+ @model, [], nil
106
+ )
107
+
108
+ @fields.each { |field|
109
+ field.model ||= @model
110
+ field.columns.each { |col|
111
+ field.associations[col] = associations(col.__stack.clone)
112
+ field.associations[col].each { |assoc| assoc.join_to(base) }
113
+ }
114
+ }
115
+
116
+ @attributes.each { |attribute|
117
+ attribute.model ||= @model
118
+ attribute.columns.each { |col|
119
+ attribute.associations[col] = associations(col.__stack.clone)
120
+ attribute.associations[col].each { |assoc| assoc.join_to(base) }
121
+ }
122
+ }
123
+ end
124
+
125
+ # Generates the big SQL statement to get the data back for all the fields
126
+ # and attributes, using all the relevant association joins. If you want
127
+ # the version filtered for delta values, send through :delta => true in the
128
+ # options. Won't do much though if the index isn't set up to support a
129
+ # delta sibling.
130
+ #
131
+ # Examples:
132
+ #
133
+ # index.to_sql
134
+ # index.to_sql(:delta => true)
135
+ #
136
+ def to_sql(options={})
137
+ assocs = all_associations
138
+
139
+ where_clause = ""
140
+ if self.delta?
141
+ where_clause << " AND #{@model.quoted_table_name}.#{quote_column('delta')}" +" = #{options[:delta] ? db_boolean(true) : db_boolean(false)}"
142
+ end
143
+ unless @conditions.empty?
144
+ where_clause << " AND " << @conditions.join(" AND ")
145
+ end
146
+
147
+ sql = <<-SQL
148
+ SELECT #{ (
149
+ ["#{@model.quoted_table_name}.#{quote_column(@model.primary_key)}"] +
150
+ @fields.collect { |field| field.to_select_sql } +
151
+ @attributes.collect { |attribute| attribute.to_select_sql }
152
+ ).join(", ") }
153
+ FROM #{ @model.table_name }
154
+ #{ assocs.collect { |assoc| assoc.to_sql }.join(' ') }
155
+ WHERE #{@model.quoted_table_name}.#{quote_column(@model.primary_key)} >= $start
156
+ AND #{@model.quoted_table_name}.#{quote_column(@model.primary_key)} <= $end
157
+ #{ where_clause }
158
+ GROUP BY #{ (
159
+ ["#{@model.quoted_table_name}.#{quote_column(@model.primary_key)}"] +
160
+ @fields.collect { |field| field.to_group_sql }.compact +
161
+ @attributes.collect { |attribute| attribute.to_group_sql }.compact
162
+ ).join(", ") }
163
+ SQL
164
+
165
+ if @model.connection.class.name == "ActiveRecord::ConnectionAdapters::MysqlAdapter"
166
+ sql += " ORDER BY NULL"
167
+ end
168
+
169
+ sql
170
+ end
171
+
172
+ # Simple helper method for the query info SQL - which is a statement that
173
+ # returns the single row for a corresponding id.
174
+ #
175
+ def to_sql_query_info
176
+ "SELECT * FROM #{@model.quoted_table_name} WHERE " +
177
+ " #{quote_column(@model.primary_key)} = $id"
178
+ end
179
+
180
+ # Simple helper method for the query range SQL - which is a statement that
181
+ # returns minimum and maximum id values. These can be filtered by delta -
182
+ # so pass in :delta => true to get the delta version of the SQL.
183
+ #
184
+ def to_sql_query_range(options={})
185
+ min_statement = "MIN(#{quote_column(@model.primary_key)})"
186
+ max_statement = "MAX(#{quote_column(@model.primary_key)})"
187
+
188
+ # Fix to handle Sphinx PostgreSQL bug (it doesn't like NULLs or 0's)
189
+ if adapter == :postgres
190
+ min_statement = "COALESCE(#{min_statement}, 1)"
191
+ max_statement = "COALESCE(#{max_statement}, 1)"
192
+ end
193
+
194
+ sql = "SELECT #{min_statement}, #{max_statement} " +
195
+ "FROM #{@model.quoted_table_name} "
196
+ sql << "WHERE #{@model.quoted_table_name}.#{quote_column('delta')} " +
197
+ "= #{options[:delta] ? db_boolean(true) : db_boolean(false)}" if self.delta?
198
+ sql
199
+ end
200
+
201
+ # Returns the SQL query to run before a full index - ie: nothing unless the
202
+ # index has a delta, and then it's an update statement to set delta values
203
+ # back to 0.
204
+ #
205
+ def to_sql_query_pre
206
+ self.delta? ? "UPDATE #{@model.quoted_table_name} SET #{quote_column('delta')} = #{db_boolean(false)}" : ""
207
+ end
208
+
209
+ # Flag to indicate whether this index has a corresponding delta index.
210
+ #
211
+ def delta?
212
+ @delta
213
+ end
214
+
215
+ def adapter
216
+ @adapter ||= case @model.connection.class.name
217
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
218
+ :mysql
219
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
220
+ :postgres
221
+ else
222
+ raise "Invalid Database Adapter: Sphinx only supports MySQL and PostgreSQL"
223
+ end
224
+ end
225
+
226
+ def prefix_fields
227
+ @fields.select { |field| field.prefixes }
228
+ end
229
+
230
+ def infix_fields
231
+ @fields.select { |field| field.infixes }
232
+ end
233
+
234
+ private
235
+
236
+ def quote_column(column)
237
+ @model.connection.quote_column_name(column)
238
+ end
239
+
240
+ # Does all the magic with the block provided to the base #initialize.
241
+ # Creates a new class subclassed from Builder, and evaluates the block
242
+ # on it, then pulls all relevant settings - fields, attributes, conditions,
243
+ # properties - into the new index.
244
+ #
245
+ # Also creates a CRC attribute for the model.
246
+ #
247
+ def initialize_from_builder(&block)
248
+ builder = Class.new(Builder)
249
+ builder.setup
250
+
251
+ builder.instance_eval &block
252
+
253
+ unless @model.descends_from_active_record?
254
+ stored_class = @model.store_full_sti_class ? @model.name : @model.name.demodulize
255
+ builder.where("#{@model.inheritance_column} = '#{stored_class}'")
256
+ end
257
+
258
+ @fields = builder.fields
259
+ @attributes = builder.attributes
260
+ @conditions = builder.conditions
261
+ @delta = builder.properties[:delta]
262
+ @options = builder.properties.except(:delta)
263
+
264
+ @attributes << Attribute.new(
265
+ FauxColumn.new(@model.to_crc32.to_s),
266
+ :type => :integer,
267
+ :as => :class_crc
268
+ )
269
+ @attributes << Attribute.new(
270
+ FauxColumn.new("0"),
271
+ :type => :integer,
272
+ :as => :sphinx_deleted
273
+ )
274
+ end
275
+
276
+ # Returns all associations used amongst all the fields and attributes.
277
+ # This includes all associations between the model and what the actual
278
+ # columns are from.
279
+ #
280
+ def all_associations
281
+ @all_associations ||= (
282
+ # field associations
283
+ @fields.collect { |field|
284
+ field.associations.values
285
+ }.flatten +
286
+ # attribute associations
287
+ @attributes.collect { |attrib|
288
+ attrib.associations.values
289
+ }.flatten
290
+ ).uniq.collect { |assoc|
291
+ # get ancestors as well as column-level associations
292
+ assoc.ancestors
293
+ }.flatten.uniq
294
+ end
295
+
296
+ # Gets a stack of associations for a specific path.
297
+ #
298
+ def associations(path, parent = nil)
299
+ assocs = []
300
+
301
+ if parent.nil?
302
+ assocs = association(path.shift)
303
+ else
304
+ assocs = parent.children(path.shift)
305
+ end
306
+
307
+ until path.empty?
308
+ point = path.shift
309
+ assocs = assocs.collect { |assoc|
310
+ assoc.children(point)
311
+ }.flatten
312
+ end
313
+
314
+ assocs
315
+ end
316
+
317
+ # Gets the association stack for a specific key.
318
+ #
319
+ def association(key)
320
+ @associations[key] ||= Association.children(@model, key)
321
+ end
322
+
323
+ # Returns the proper boolean value string literal for the
324
+ # current database adapter.
325
+ #
326
+ def db_boolean(val)
327
+ if adapter == :postgres
328
+ val ? 'TRUE' : 'FALSE'
329
+ else
330
+ val ? '1' : '0'
331
+ end
332
+ end
333
+ end
334
+ end
@@ -0,0 +1,212 @@
1
+ module ThinkingSphinx
2
+ class Index
3
+ # The Builder class is the core for the index definition block processing.
4
+ # There are four methods you really need to pay attention to:
5
+ # - indexes (aliased to includes and attribute)
6
+ # - has (aliased to attribute)
7
+ # - where
8
+ # - set_property (aliased to set_properties)
9
+ #
10
+ # The first two of these methods allow you to define what data makes up
11
+ # your indexes. #where provides a method to add manual SQL conditions, and
12
+ # set_property allows you to set some settings on a per-index basis. Check
13
+ # out each method's documentation for better ideas of usage.
14
+ #
15
+ class Builder
16
+ class << self
17
+ # No idea where this is coming from - haven't found it in any ruby or
18
+ # rails documentation. It's not needed though, so it gets undef'd.
19
+ # Hopefully the list of methods that get in the way doesn't get too
20
+ # long.
21
+ undef_method :parent
22
+
23
+ attr_accessor :fields, :attributes, :properties, :conditions
24
+
25
+ # Set up all the collections. Consider this the equivalent of an
26
+ # instance's initialize method.
27
+ #
28
+ def setup
29
+ @fields = []
30
+ @attributes = []
31
+ @properties = {}
32
+ @conditions = []
33
+ end
34
+
35
+ # This is how you add fields - the strings Sphinx looks at - to your
36
+ # index. Technically, to use this method, you need to pass in some
37
+ # columns and options - but there's some neat method_missing stuff
38
+ # happening, so lets stick to the expected syntax within a define_index
39
+ # block.
40
+ #
41
+ # Expected options are :as, which points to a column alias in symbol
42
+ # form, and :sortable, which indicates whether you want to sort by this
43
+ # field.
44
+ #
45
+ # Adding Single-Column Fields:
46
+ #
47
+ # You can use symbols or methods - and can chain methods together to
48
+ # get access down the associations tree.
49
+ #
50
+ # indexes :id, :as => :my_id
51
+ # indexes :name, :sortable => true
52
+ # indexes first_name, last_name, :sortable => true
53
+ # indexes users.posts.content, :as => :post_content
54
+ # indexes users(:id), :as => :user_ids
55
+ #
56
+ # Keep in mind that if any keywords for Ruby methods - such as id or
57
+ # name - clash with your column names, you need to use the symbol
58
+ # version (see the first, second and last examples above).
59
+ #
60
+ # If you specify multiple columns (example #2), a field will be created
61
+ # for each. Don't use the :as option in this case. If you want to merge
62
+ # those columns together, continue reading.
63
+ #
64
+ # Adding Multi-Column Fields:
65
+ #
66
+ # indexes [first_name, last_name], :as => :name
67
+ # indexes [location, parent.location], :as => :location
68
+ #
69
+ # To combine multiple columns into a single field, you need to wrap
70
+ # them in an Array, as shown by the above examples. There's no
71
+ # limitations on whether they're symbols or methods or what level of
72
+ # associations they come from.
73
+ #
74
+ # Adding SQL Fragment Fields
75
+ #
76
+ # You can also define a field using an SQL fragment, useful for when
77
+ # you would like to index a calculated value.
78
+ #
79
+ # indexes "age < 18", :as => :minor
80
+ #
81
+ def indexes(*args)
82
+ options = args.extract_options!
83
+ args.each do |columns|
84
+ columns = FauxColumn.new(columns) if columns.is_a?(Symbol)
85
+ fields << Field.new(columns, options)
86
+
87
+ if fields.last.sortable
88
+ attributes << Attribute.new(
89
+ fields.last.columns.collect { |col| col.clone },
90
+ options.merge(
91
+ :type => :string,
92
+ :as => fields.last.unique_name.to_s.concat("_sort").to_sym
93
+ )
94
+ )
95
+ end
96
+ end
97
+ end
98
+ alias_method :field, :indexes
99
+ alias_method :includes, :indexes
100
+
101
+ # This is the method to add attributes to your index (hence why it is
102
+ # aliased as 'attribute'). The syntax is the same as #indexes, so use
103
+ # that as starting point, but keep in mind the following points.
104
+ #
105
+ # An attribute can have an alias (the :as option), but it is always
106
+ # sortable - so you don't need to explicitly request that. You _can_
107
+ # specify the data type of the attribute (the :type option), but the
108
+ # code's pretty good at figuring that out itself from peering into the
109
+ # database.
110
+ #
111
+ # Attributes are limited to the following types: integers, floats,
112
+ # datetimes (converted to timestamps), booleans and strings. Don't
113
+ # forget that Sphinx converts string attributes to integers, which are
114
+ # useful for sorting, but that's about it.
115
+ #
116
+ # You can also have a collection of integers for multi-value attributes
117
+ # (MVAs). Generally these would be through a has_many relationship,
118
+ # like in this example:
119
+ #
120
+ # has posts(:id), :as => :post_ids
121
+ #
122
+ # This allows you to filter on any of the values tied to a specific
123
+ # record. Might be best to read through the Sphinx documentation to get
124
+ # a better idea of that though.
125
+ #
126
+ # Adding SQL Fragment Attributes
127
+ #
128
+ # You can also define an attribute using an SQL fragment, useful for
129
+ # when you would like to index a calculated value. Don't forget to set
130
+ # the type of the attribute though:
131
+ #
132
+ # indexes "age < 18", :as => :minor, :type => :boolean
133
+ #
134
+ # If you're creating attributes for latitude and longitude, don't
135
+ # forget that Sphinx expects these values to be in radians.
136
+ #
137
+ def has(*args)
138
+ options = args.extract_options!
139
+ args.each do |columns|
140
+ columns = case columns
141
+ when Symbol, String
142
+ FauxColumn.new(columns)
143
+ when Array
144
+ columns.collect { |col|
145
+ case col
146
+ when Symbol, String
147
+ FauxColumn.new(col)
148
+ else
149
+ col
150
+ end
151
+ }
152
+ else
153
+ columns
154
+ end
155
+
156
+ attributes << Attribute.new(columns, options)
157
+ end
158
+ end
159
+ alias_method :attribute, :has
160
+
161
+ # Use this method to add some manual SQL conditions for your index
162
+ # request. You can pass in as many strings as you like, they'll get
163
+ # joined together with ANDs later on.
164
+ #
165
+ # where "user_id = 10"
166
+ # where "parent_type = 'Article'", "created_at < NOW()"
167
+ #
168
+ def where(*args)
169
+ @conditions += args
170
+ end
171
+
172
+ # This is what to use to set properties on the index. Chief amongst
173
+ # those is the delta property - to allow automatic updates to your
174
+ # indexes as new models are added and edited - but also you can
175
+ # define search-related properties which will be the defaults for all
176
+ # searches on the model.
177
+ #
178
+ # set_property :delta => true
179
+ # set_property :field_weights => {"name" => 100}
180
+ #
181
+ # Also, the following two properties are particularly relevant for
182
+ # geo-location searching - latitude_attr and longitude_attr. If your
183
+ # attributes for these two values are named something other than
184
+ # lat/latitude or lon/long/longitude, you can dictate what they are
185
+ # when defining the index, so you don't need to specify them for every
186
+ # geo-related search.
187
+ #
188
+ # set_property :latitude_attr => "lt", :longitude => "lg"
189
+ #
190
+ # Please don't forget to add a boolean field named 'delta' to your
191
+ # model's database table if enabling the delta index for it.
192
+ #
193
+ def set_property(*args)
194
+ options = args.extract_options!
195
+ if options.empty?
196
+ @properties[args[0]] = args[1]
197
+ else
198
+ @properties.merge!(options)
199
+ end
200
+ end
201
+ alias_method :set_properties, :set_property
202
+
203
+ # Handles the generation of new columns for the field and attribute
204
+ # definitions.
205
+ #
206
+ def method_missing(method, *args)
207
+ FauxColumn.new(method, *args)
208
+ end
209
+ end
210
+ end
211
+ end
212
+ end