jaikoo-thinking-sphinx 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/LICENCE +20 -0
  2. data/README +76 -0
  3. data/lib/thinking_sphinx.rb +112 -0
  4. data/lib/thinking_sphinx/active_record.rb +153 -0
  5. data/lib/thinking_sphinx/active_record/delta.rb +80 -0
  6. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  7. data/lib/thinking_sphinx/active_record/search.rb +50 -0
  8. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +27 -0
  9. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +9 -0
  10. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +84 -0
  11. data/lib/thinking_sphinx/association.rb +144 -0
  12. data/lib/thinking_sphinx/attribute.rb +284 -0
  13. data/lib/thinking_sphinx/collection.rb +105 -0
  14. data/lib/thinking_sphinx/configuration.rb +314 -0
  15. data/lib/thinking_sphinx/field.rb +206 -0
  16. data/lib/thinking_sphinx/index.rb +432 -0
  17. data/lib/thinking_sphinx/index/builder.rb +220 -0
  18. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  19. data/lib/thinking_sphinx/rails_additions.rb +68 -0
  20. data/lib/thinking_sphinx/search.rb +436 -0
  21. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +132 -0
  22. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  23. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  24. data/spec/unit/thinking_sphinx/active_record_spec.rb +295 -0
  25. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  26. data/spec/unit/thinking_sphinx/attribute_spec.rb +360 -0
  27. data/spec/unit/thinking_sphinx/collection_spec.rb +71 -0
  28. data/spec/unit/thinking_sphinx/configuration_spec.rb +512 -0
  29. data/spec/unit/thinking_sphinx/field_spec.rb +224 -0
  30. data/spec/unit/thinking_sphinx/index/builder_spec.rb +34 -0
  31. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +68 -0
  32. data/spec/unit/thinking_sphinx/index_spec.rb +317 -0
  33. data/spec/unit/thinking_sphinx/search_spec.rb +203 -0
  34. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  35. data/tasks/thinking_sphinx_tasks.rake +1 -0
  36. data/tasks/thinking_sphinx_tasks.rb +100 -0
  37. metadata +103 -0
@@ -0,0 +1,50 @@
1
+ module ThinkingSphinx
2
+ module ActiveRecord
3
+ # This module covers the specific model searches - but the syntax is
4
+ # exactly the same as the core Search class - so use that as your refence
5
+ # point.
6
+ #
7
+ module Search
8
+ def self.included(base)
9
+ base.class_eval do
10
+ class << self
11
+ # Searches for results that match the parameters provided. Will only
12
+ # return the ids for the matching objects. See
13
+ # ThinkingSphinx::Search#search for syntax examples.
14
+ #
15
+ def search_for_ids(*args)
16
+ options = args.extract_options!
17
+ options[:class] = self
18
+ args << options
19
+ ThinkingSphinx::Search.search_for_ids(*args)
20
+ end
21
+
22
+ # Searches for results limited to a single model. See
23
+ # ThinkingSphinx::Search#search for syntax examples.
24
+ #
25
+ def search(*args)
26
+ options = args.extract_options!
27
+ options[:class] = self
28
+ args << options
29
+ ThinkingSphinx::Search.search(*args)
30
+ end
31
+
32
+ def search_count(*args)
33
+ options = args.extract_options!
34
+ options[:class] = self
35
+ args << options
36
+ ThinkingSphinx::Search.count(*args)
37
+ end
38
+
39
+ def search_for_id(*args)
40
+ options = args.extract_options!
41
+ options[:class] = self
42
+ args << options
43
+ ThinkingSphinx::Search.search_for_id(*args)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,27 @@
1
+ module ThinkingSphinx
2
+ class AbstractAdapter
3
+ class << self
4
+ def setup
5
+ # Deliberately blank - subclasses should do something though. Well, if
6
+ # they need to.
7
+ end
8
+
9
+ def detect(model)
10
+ case model.connection.class.name
11
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
12
+ ThinkingSphinx::MysqlAdapter
13
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
14
+ ThinkingSphinx::PostgreSQLAdapter
15
+ else
16
+ raise "Invalid Database Adapter: Sphinx only supports MySQL and PostgreSQL"
17
+ end
18
+ end
19
+
20
+ protected
21
+
22
+ def connection
23
+ @connection ||= ::ActiveRecord::Base.connection
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,9 @@
1
+ module ThinkingSphinx
2
+ class MysqlAdapter < AbstractAdapter
3
+ class << self
4
+ def setup
5
+ # Does MySQL actually need to do anything?
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,84 @@
1
+ module ThinkingSphinx
2
+ class PostgreSQLAdapter < AbstractAdapter
3
+ class << self
4
+ def setup
5
+ create_array_accum_function
6
+ create_crc32_function
7
+ end
8
+
9
+ private
10
+
11
+ def create_array_accum_function
12
+ connection.execute "begin"
13
+ connection.execute "savepoint ts"
14
+ begin
15
+ # See http://www.postgresql.org/docs/8.2/interactive/sql-createaggregate.html
16
+ if connection.raw_connection.server_version > 80200
17
+ connection.execute <<-SQL
18
+ CREATE AGGREGATE array_accum (anyelement)
19
+ (
20
+ sfunc = array_append,
21
+ stype = anyarray,
22
+ initcond = '{}'
23
+ );
24
+ SQL
25
+ else
26
+ connection.execute <<-SQL
27
+ CREATE AGGREGATE array_accum
28
+ (
29
+ basetype = anyelement,
30
+ sfunc = array_append,
31
+ stype = anyarray,
32
+ initcond = '{}'
33
+ );
34
+ SQL
35
+ end
36
+ rescue
37
+ connection.execute "rollback to savepoint ts"
38
+ end
39
+ connection.execute "release savepoint ts"
40
+ connection.execute "commit"
41
+ end
42
+
43
+ def create_crc32_function
44
+ connection.execute "begin"
45
+ connection.execute "savepoint ts"
46
+ begin
47
+ connection.execute "CREATE LANGUAGE 'plpgsql';"
48
+ connection.execute <<-SQL
49
+ CREATE OR REPLACE FUNCTION crc32(word text)
50
+ RETURNS bigint AS $$
51
+ DECLARE tmp bigint;
52
+ DECLARE i int;
53
+ DECLARE j int;
54
+ BEGIN
55
+ i = 0;
56
+ tmp = 4294967295;
57
+ LOOP
58
+ tmp = (tmp # get_byte(word::bytea, i))::bigint;
59
+ i = i + 1;
60
+ j = 0;
61
+ LOOP
62
+ tmp = ((tmp >> 1) # (3988292384 * (tmp & 1)))::bigint;
63
+ j = j + 1;
64
+ IF j >= 8 THEN
65
+ EXIT;
66
+ END IF;
67
+ END LOOP;
68
+ IF i >= char_length(word) THEN
69
+ EXIT;
70
+ END IF;
71
+ END LOOP;
72
+ return (tmp # 4294967295);
73
+ END
74
+ $$ IMMUTABLE STRICT LANGUAGE plpgsql;
75
+ SQL
76
+ rescue
77
+ connection.execute "rollback to savepoint ts"
78
+ end
79
+ connection.execute "release savepoint ts"
80
+ connection.execute "commit"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,144 @@
1
+ module ThinkingSphinx
2
+ # Association tracks a specific reflection and join to reference data that
3
+ # isn't in the base model. Very much an internal class for Thinking Sphinx -
4
+ # perhaps because I feel it's not as strong (or simple) as most of the rest.
5
+ #
6
+ class Association
7
+ attr_accessor :parent, :reflection, :join
8
+
9
+ # Create a new association by passing in the parent association, and the
10
+ # corresponding reflection instance. If there is no parent, pass in nil.
11
+ #
12
+ # top = Association.new nil, top_reflection
13
+ # child = Association.new top, child_reflection
14
+ #
15
+ def initialize(parent, reflection)
16
+ @parent, @reflection = parent, reflection
17
+ @children = {}
18
+ end
19
+
20
+ # Get the children associations for a given association name. The only time
21
+ # that there'll actually be more than one association is when the
22
+ # relationship is polymorphic. To keep things simple though, it will always
23
+ # be an Array that gets returned (an empty one if no matches).
24
+ #
25
+ # # where pages is an association on the class tied to the reflection.
26
+ # association.children(:pages)
27
+ #
28
+ def children(assoc)
29
+ @children[assoc] ||= Association.children(@reflection.klass, assoc, self)
30
+ end
31
+
32
+ # Get the children associations for a given class, association name and
33
+ # parent association. Much like the instance method of the same name, it
34
+ # will return an empty array if no associations have the name, and only
35
+ # have multiple association instances if the underlying relationship is
36
+ # polymorphic.
37
+ #
38
+ # Association.children(User, :pages, user_association)
39
+ #
40
+ def self.children(klass, assoc, parent=nil)
41
+ ref = klass.reflect_on_association(assoc)
42
+
43
+ return [] if ref.nil?
44
+ return [Association.new(parent, ref)] unless ref.options[:polymorphic]
45
+
46
+ # association is polymorphic - create associations for each
47
+ # non-polymorphic reflection.
48
+ polymorphic_classes(ref).collect { |klass|
49
+ Association.new parent, ::ActiveRecord::Reflection::AssociationReflection.new(
50
+ ref.macro,
51
+ "#{ref.name}_#{klass.name}".to_sym,
52
+ casted_options(klass, ref),
53
+ ref.active_record
54
+ )
55
+ }
56
+ end
57
+
58
+ # Link up the join for this model from a base join - and set parent
59
+ # associations' joins recursively.
60
+ #
61
+ def join_to(base_join)
62
+ parent.join_to(base_join) if parent && parent.join.nil?
63
+
64
+ @join ||= ::ActiveRecord::Associations::ClassMethods::JoinDependency::JoinAssociation.new(
65
+ @reflection, base_join, parent ? parent.join : base_join.joins.first
66
+ )
67
+ end
68
+
69
+ # Returns the association's join SQL statements - and it replaces
70
+ # ::ts_join_alias:: with the aliased table name so the generated reflection
71
+ # join conditions avoid column name collisions.
72
+ #
73
+ def to_sql
74
+ @join.association_join.gsub(/::ts_join_alias::/,
75
+ "#{@reflection.klass.connection.quote_table_name(@join.parent.aliased_table_name)}"
76
+ )
77
+ end
78
+
79
+ # Returns true if the association - or a parent - is a has_many or
80
+ # has_and_belongs_to_many.
81
+ #
82
+ def is_many?
83
+ case @reflection.macro
84
+ when :has_many, :has_and_belongs_to_many
85
+ true
86
+ else
87
+ @parent ? @parent.is_many? : false
88
+ end
89
+ end
90
+
91
+ # Returns an array of all the associations that lead to this one - starting
92
+ # with the top level all the way to the current association object.
93
+ #
94
+ def ancestors
95
+ (parent ? parent.ancestors : []) << self
96
+ end
97
+
98
+ def has_column?(column)
99
+ @reflection.klass.column_names.include?(column.to_s)
100
+ end
101
+
102
+ private
103
+
104
+ # Returns all the objects that could be currently instantiated from a
105
+ # polymorphic association. This is pretty damn fast if there's an index on
106
+ # the foreign type column - but if there isn't, it can take a while if you
107
+ # have a lot of data.
108
+ #
109
+ def self.polymorphic_classes(ref)
110
+ ref.active_record.connection.select_all(
111
+ "SELECT DISTINCT #{ref.options[:foreign_type]} " +
112
+ "FROM #{ref.active_record.table_name} " +
113
+ "WHERE #{ref.options[:foreign_type]} IS NOT NULL"
114
+ ).collect { |row|
115
+ row[ref.options[:foreign_type]].constantize
116
+ }
117
+ end
118
+
119
+ # Returns a new set of options for an association that mimics an existing
120
+ # polymorphic relationship for a specific class. It adds a condition to
121
+ # filter by the appropriate object.
122
+ #
123
+ def self.casted_options(klass, ref)
124
+ options = ref.options.clone
125
+ options[:polymorphic] = nil
126
+ options[:class_name] = klass.name
127
+ options[:foreign_key] ||= "#{ref.name}_id"
128
+
129
+ quoted_foreign_type = klass.connection.quote_column_name ref.options[:foreign_type]
130
+ case options[:conditions]
131
+ when nil
132
+ options[:conditions] = "::ts_join_alias::.#{quoted_foreign_type} = '#{klass.name}'"
133
+ when Array
134
+ options[:conditions] << "::ts_join_alias::.#{quoted_foreign_type} = '#{klass.name}'"
135
+ when Hash
136
+ options[:conditions].merge!(ref.options[:foreign_type] => klass.name)
137
+ else
138
+ options[:conditions] << " AND ::ts_join_alias::.#{quoted_foreign_type} = '#{klass.name}'"
139
+ end
140
+
141
+ options
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,284 @@
1
+ module ThinkingSphinx
2
+ # Attributes - eternally useful when it comes to filtering, sorting or
3
+ # grouping. This class isn't really useful to you unless you're hacking
4
+ # around with the internals of Thinking Sphinx - but hey, don't let that
5
+ # stop you.
6
+ #
7
+ # One key thing to remember - if you're using the attribute manually to
8
+ # generate SQL statements, you'll need to set the base model, and all the
9
+ # associations. Which can get messy. Use Index.link!, it really helps.
10
+ #
11
+ class Attribute
12
+ attr_accessor :alias, :columns, :associations, :model
13
+
14
+ # To create a new attribute, you'll need to pass in either a single Column
15
+ # or an array of them, and some (optional) options.
16
+ #
17
+ # Valid options are:
18
+ # - :as => :alias_name
19
+ # - :type => :attribute_type
20
+ #
21
+ # Alias is only required in three circumstances: when there's
22
+ # another attribute or field with the same name, when the column name is
23
+ # 'id', or when there's more than one column.
24
+ #
25
+ # Type is not required, unless you want to force a column to be a certain
26
+ # type (but keep in mind the value will not be CASTed in the SQL
27
+ # statements). The only time you really need to use this is when the type
28
+ # can't be figured out by the column - ie: when not actually using a
29
+ # database column as your source.
30
+ #
31
+ # Example usage:
32
+ #
33
+ # Attribute.new(
34
+ # Column.new(:created_at)
35
+ # )
36
+ #
37
+ # Attribute.new(
38
+ # Column.new(:posts, :id),
39
+ # :as => :post_ids
40
+ # )
41
+ #
42
+ # Attribute.new(
43
+ # [Column.new(:pages, :id), Column.new(:articles, :id)],
44
+ # :as => :content_ids
45
+ # )
46
+ #
47
+ # Attribute.new(
48
+ # Column.new("NOW()"),
49
+ # :as => :indexed_at,
50
+ # :type => :datetime
51
+ # )
52
+ #
53
+ # If you're creating attributes for latitude and longitude, don't forget
54
+ # that Sphinx expects these values to be in radians.
55
+ #
56
+ def initialize(columns, options = {})
57
+ @columns = Array(columns)
58
+ @associations = {}
59
+
60
+ raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) }
61
+
62
+ @alias = options[:as]
63
+ @type = options[:type]
64
+ end
65
+
66
+ # Get the part of the SELECT clause related to this attribute. Don't forget
67
+ # to set your model and associations first though.
68
+ #
69
+ # This will concatenate strings and arrays of integers, and convert
70
+ # datetimes to timestamps, as needed.
71
+ #
72
+ def to_select_sql
73
+ clause = @columns.collect { |column|
74
+ column_with_prefix(column)
75
+ }.join(', ')
76
+
77
+ separator = all_ints? ? ',' : ' '
78
+
79
+ clause = concatenate(clause, separator) if concat_ws?
80
+ clause = group_concatenate(clause, separator) if is_many?
81
+ clause = cast_to_datetime(clause) if type == :datetime
82
+ clause = convert_nulls(clause) if type == :string
83
+
84
+ "#{clause} AS #{quote_column(unique_name)}"
85
+ end
86
+
87
+ # Get the part of the GROUP BY clause related to this attribute - if one is
88
+ # needed. If not, all you'll get back is nil. The latter will happen if
89
+ # there isn't actually a real column to get data from, or if there's
90
+ # multiple data values (read: a has_many or has_and_belongs_to_many
91
+ # association).
92
+ #
93
+ def to_group_sql
94
+ case
95
+ when is_many?, is_string?, ThinkingSphinx.use_group_by_shortcut?
96
+ nil
97
+ else
98
+ @columns.collect { |column|
99
+ column_with_prefix(column)
100
+ }
101
+ end
102
+ end
103
+
104
+ # Generates the appropriate attribute statement for a Sphinx configuration
105
+ # file, depending on the attribute's type.
106
+ #
107
+ def to_sphinx_clause
108
+ case type
109
+ when :multi
110
+ "sql_attr_multi = uint #{unique_name} from field"
111
+ when :datetime
112
+ "sql_attr_timestamp = #{unique_name}"
113
+ when :string
114
+ "sql_attr_str2ordinal = #{unique_name}"
115
+ when :float
116
+ "sql_attr_float = #{unique_name}"
117
+ when :boolean
118
+ "sql_attr_bool = #{unique_name}"
119
+ else
120
+ "sql_attr_uint = #{unique_name}"
121
+ end
122
+ end
123
+
124
+ # Returns the unique name of the attribute - which is either the alias of
125
+ # the attribute, or the name of the only column - if there is only one. If
126
+ # there isn't, there should be an alias. Else things probably won't work.
127
+ # Consider yourself warned.
128
+ #
129
+ def unique_name
130
+ if @columns.length == 1
131
+ @alias || @columns.first.__name
132
+ else
133
+ @alias
134
+ end
135
+ end
136
+
137
+ private
138
+
139
+ def concatenate(clause, separator = ' ')
140
+ case @model.connection.class.name
141
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
142
+ "CONCAT_WS('#{separator}', #{clause})"
143
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
144
+ clause.split(', ').join(" || '#{separator}' || ")
145
+ else
146
+ clause
147
+ end
148
+ end
149
+
150
+ def group_concatenate(clause, separator = ' ')
151
+ case @model.connection.class.name
152
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
153
+ "GROUP_CONCAT(#{clause} SEPARATOR '#{separator}')"
154
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
155
+ "array_to_string(array_accum(#{clause}), '#{separator}')"
156
+ else
157
+ clause
158
+ end
159
+ end
160
+
161
+ def cast_to_string(clause)
162
+ case @model.connection.class.name
163
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
164
+ "CAST(#{clause} AS CHAR)"
165
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
166
+ clause
167
+ else
168
+ clause
169
+ end
170
+ end
171
+
172
+ def cast_to_datetime(clause)
173
+ case @model.connection.class.name
174
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
175
+ "UNIX_TIMESTAMP(#{clause})"
176
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
177
+ clause # Rails' datetimes are timestamps in PostgreSQL
178
+ else
179
+ clause
180
+ end
181
+ end
182
+
183
+ def convert_nulls(clause)
184
+ case @model.connection.class.name
185
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
186
+ "IFNULL(#{clause}, '')"
187
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
188
+ "COALESCE(#{clause}, '')"
189
+ else
190
+ clause
191
+ end
192
+ end
193
+
194
+ def quote_column(column)
195
+ @model.connection.quote_column_name(column)
196
+ end
197
+
198
+ # Indication of whether the columns should be concatenated with a space
199
+ # between each value. True if there's either multiple sources or multiple
200
+ # associations.
201
+ #
202
+ def concat_ws?
203
+ multiple_associations? || @columns.length > 1
204
+ end
205
+
206
+ # Checks the association tree for each column - if they're all the same,
207
+ # returns false.
208
+ #
209
+ def multiple_sources?
210
+ first = associations[@columns.first]
211
+
212
+ !@columns.all? { |col| associations[col] == first }
213
+ end
214
+
215
+ # Checks whether any column requires multiple associations (which only
216
+ # happens for polymorphic situations).
217
+ #
218
+ def multiple_associations?
219
+ associations.any? { |col,assocs| assocs.length > 1 }
220
+ end
221
+
222
+ # Builds a column reference tied to the appropriate associations. This
223
+ # dives into the associations hash and their corresponding joins to
224
+ # figure out how to correctly reference a column in SQL.
225
+ #
226
+ def column_with_prefix(column)
227
+ if column.is_string?
228
+ column.__name
229
+ elsif associations[column].empty?
230
+ "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
231
+ else
232
+ associations[column].collect { |assoc|
233
+ assoc.has_column?(column.__name) ?
234
+ "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
235
+ ".#{quote_column(column.__name)}" :
236
+ nil
237
+ }.compact.join(', ')
238
+ end
239
+ end
240
+
241
+ # Could there be more than one value related to the parent record? If so,
242
+ # then this will return true. If not, false. It's that simple.
243
+ #
244
+ def is_many?
245
+ associations.values.flatten.any? { |assoc| assoc.is_many? }
246
+ end
247
+
248
+ # Returns true if any of the columns are string values, instead of database
249
+ # column references.
250
+ def is_string?
251
+ columns.all? { |col| col.is_string? }
252
+ end
253
+
254
+ # Returns the type of the column. If that's not already set, it returns
255
+ # :multi if there's the possibility of more than one value, :string if
256
+ # there's more than one association, otherwise it figures out what the
257
+ # actual column's datatype is and returns that.
258
+ def type
259
+ @type ||= case
260
+ when is_many?
261
+ :multi
262
+ when @associations.values.flatten.length > 1
263
+ :string
264
+ else
265
+ klass = @associations.values.flatten.first ?
266
+ @associations.values.flatten.first.reflection.klass : @model
267
+ klass.columns.detect { |col|
268
+ @columns.collect { |c| c.__name.to_s }.include? col.name
269
+ }.type
270
+ end
271
+ end
272
+
273
+ def all_ints?
274
+ @columns.all? { |col|
275
+ klasses = @associations[col].empty? ? [@model] :
276
+ @associations[col].collect { |assoc| assoc.reflection.klass }
277
+ klasses.all? { |klass|
278
+ column = klass.columns.detect { |column| column.name == col.__name.to_s }
279
+ !column.nil? && column.type == :integer
280
+ }
281
+ }
282
+ end
283
+ end
284
+ end