jaikoo-thinking-sphinx 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/LICENCE +20 -0
  2. data/README +76 -0
  3. data/lib/thinking_sphinx.rb +112 -0
  4. data/lib/thinking_sphinx/active_record.rb +153 -0
  5. data/lib/thinking_sphinx/active_record/delta.rb +80 -0
  6. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  7. data/lib/thinking_sphinx/active_record/search.rb +50 -0
  8. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +27 -0
  9. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +9 -0
  10. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +84 -0
  11. data/lib/thinking_sphinx/association.rb +144 -0
  12. data/lib/thinking_sphinx/attribute.rb +284 -0
  13. data/lib/thinking_sphinx/collection.rb +105 -0
  14. data/lib/thinking_sphinx/configuration.rb +314 -0
  15. data/lib/thinking_sphinx/field.rb +206 -0
  16. data/lib/thinking_sphinx/index.rb +432 -0
  17. data/lib/thinking_sphinx/index/builder.rb +220 -0
  18. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  19. data/lib/thinking_sphinx/rails_additions.rb +68 -0
  20. data/lib/thinking_sphinx/search.rb +436 -0
  21. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +132 -0
  22. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  23. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  24. data/spec/unit/thinking_sphinx/active_record_spec.rb +295 -0
  25. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  26. data/spec/unit/thinking_sphinx/attribute_spec.rb +360 -0
  27. data/spec/unit/thinking_sphinx/collection_spec.rb +71 -0
  28. data/spec/unit/thinking_sphinx/configuration_spec.rb +512 -0
  29. data/spec/unit/thinking_sphinx/field_spec.rb +224 -0
  30. data/spec/unit/thinking_sphinx/index/builder_spec.rb +34 -0
  31. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +68 -0
  32. data/spec/unit/thinking_sphinx/index_spec.rb +317 -0
  33. data/spec/unit/thinking_sphinx/search_spec.rb +203 -0
  34. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  35. data/tasks/thinking_sphinx_tasks.rake +1 -0
  36. data/tasks/thinking_sphinx_tasks.rb +100 -0
  37. metadata +103 -0
@@ -0,0 +1,50 @@
1
+ module ThinkingSphinx
2
+ module ActiveRecord
3
+ # This module covers the specific model searches - but the syntax is
4
+ # exactly the same as the core Search class - so use that as your refence
5
+ # point.
6
+ #
7
+ module Search
8
+ def self.included(base)
9
+ base.class_eval do
10
+ class << self
11
+ # Searches for results that match the parameters provided. Will only
12
+ # return the ids for the matching objects. See
13
+ # ThinkingSphinx::Search#search for syntax examples.
14
+ #
15
+ def search_for_ids(*args)
16
+ options = args.extract_options!
17
+ options[:class] = self
18
+ args << options
19
+ ThinkingSphinx::Search.search_for_ids(*args)
20
+ end
21
+
22
+ # Searches for results limited to a single model. See
23
+ # ThinkingSphinx::Search#search for syntax examples.
24
+ #
25
+ def search(*args)
26
+ options = args.extract_options!
27
+ options[:class] = self
28
+ args << options
29
+ ThinkingSphinx::Search.search(*args)
30
+ end
31
+
32
+ def search_count(*args)
33
+ options = args.extract_options!
34
+ options[:class] = self
35
+ args << options
36
+ ThinkingSphinx::Search.count(*args)
37
+ end
38
+
39
+ def search_for_id(*args)
40
+ options = args.extract_options!
41
+ options[:class] = self
42
+ args << options
43
+ ThinkingSphinx::Search.search_for_id(*args)
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,27 @@
1
+ module ThinkingSphinx
2
+ class AbstractAdapter
3
+ class << self
4
+ def setup
5
+ # Deliberately blank - subclasses should do something though. Well, if
6
+ # they need to.
7
+ end
8
+
9
+ def detect(model)
10
+ case model.connection.class.name
11
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
12
+ ThinkingSphinx::MysqlAdapter
13
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
14
+ ThinkingSphinx::PostgreSQLAdapter
15
+ else
16
+ raise "Invalid Database Adapter: Sphinx only supports MySQL and PostgreSQL"
17
+ end
18
+ end
19
+
20
+ protected
21
+
22
+ def connection
23
+ @connection ||= ::ActiveRecord::Base.connection
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,9 @@
1
+ module ThinkingSphinx
2
+ class MysqlAdapter < AbstractAdapter
3
+ class << self
4
+ def setup
5
+ # Does MySQL actually need to do anything?
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,84 @@
1
+ module ThinkingSphinx
2
+ class PostgreSQLAdapter < AbstractAdapter
3
+ class << self
4
+ def setup
5
+ create_array_accum_function
6
+ create_crc32_function
7
+ end
8
+
9
+ private
10
+
11
+ def create_array_accum_function
12
+ connection.execute "begin"
13
+ connection.execute "savepoint ts"
14
+ begin
15
+ # See http://www.postgresql.org/docs/8.2/interactive/sql-createaggregate.html
16
+ if connection.raw_connection.server_version > 80200
17
+ connection.execute <<-SQL
18
+ CREATE AGGREGATE array_accum (anyelement)
19
+ (
20
+ sfunc = array_append,
21
+ stype = anyarray,
22
+ initcond = '{}'
23
+ );
24
+ SQL
25
+ else
26
+ connection.execute <<-SQL
27
+ CREATE AGGREGATE array_accum
28
+ (
29
+ basetype = anyelement,
30
+ sfunc = array_append,
31
+ stype = anyarray,
32
+ initcond = '{}'
33
+ );
34
+ SQL
35
+ end
36
+ rescue
37
+ connection.execute "rollback to savepoint ts"
38
+ end
39
+ connection.execute "release savepoint ts"
40
+ connection.execute "commit"
41
+ end
42
+
43
+ def create_crc32_function
44
+ connection.execute "begin"
45
+ connection.execute "savepoint ts"
46
+ begin
47
+ connection.execute "CREATE LANGUAGE 'plpgsql';"
48
+ connection.execute <<-SQL
49
+ CREATE OR REPLACE FUNCTION crc32(word text)
50
+ RETURNS bigint AS $$
51
+ DECLARE tmp bigint;
52
+ DECLARE i int;
53
+ DECLARE j int;
54
+ BEGIN
55
+ i = 0;
56
+ tmp = 4294967295;
57
+ LOOP
58
+ tmp = (tmp # get_byte(word::bytea, i))::bigint;
59
+ i = i + 1;
60
+ j = 0;
61
+ LOOP
62
+ tmp = ((tmp >> 1) # (3988292384 * (tmp & 1)))::bigint;
63
+ j = j + 1;
64
+ IF j >= 8 THEN
65
+ EXIT;
66
+ END IF;
67
+ END LOOP;
68
+ IF i >= char_length(word) THEN
69
+ EXIT;
70
+ END IF;
71
+ END LOOP;
72
+ return (tmp # 4294967295);
73
+ END
74
+ $$ IMMUTABLE STRICT LANGUAGE plpgsql;
75
+ SQL
76
+ rescue
77
+ connection.execute "rollback to savepoint ts"
78
+ end
79
+ connection.execute "release savepoint ts"
80
+ connection.execute "commit"
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,144 @@
1
+ module ThinkingSphinx
2
+ # Association tracks a specific reflection and join to reference data that
3
+ # isn't in the base model. Very much an internal class for Thinking Sphinx -
4
+ # perhaps because I feel it's not as strong (or simple) as most of the rest.
5
+ #
6
+ class Association
7
+ attr_accessor :parent, :reflection, :join
8
+
9
+ # Create a new association by passing in the parent association, and the
10
+ # corresponding reflection instance. If there is no parent, pass in nil.
11
+ #
12
+ # top = Association.new nil, top_reflection
13
+ # child = Association.new top, child_reflection
14
+ #
15
+ def initialize(parent, reflection)
16
+ @parent, @reflection = parent, reflection
17
+ @children = {}
18
+ end
19
+
20
+ # Get the children associations for a given association name. The only time
21
+ # that there'll actually be more than one association is when the
22
+ # relationship is polymorphic. To keep things simple though, it will always
23
+ # be an Array that gets returned (an empty one if no matches).
24
+ #
25
+ # # where pages is an association on the class tied to the reflection.
26
+ # association.children(:pages)
27
+ #
28
+ def children(assoc)
29
+ @children[assoc] ||= Association.children(@reflection.klass, assoc, self)
30
+ end
31
+
32
+ # Get the children associations for a given class, association name and
33
+ # parent association. Much like the instance method of the same name, it
34
+ # will return an empty array if no associations have the name, and only
35
+ # have multiple association instances if the underlying relationship is
36
+ # polymorphic.
37
+ #
38
+ # Association.children(User, :pages, user_association)
39
+ #
40
+ def self.children(klass, assoc, parent=nil)
41
+ ref = klass.reflect_on_association(assoc)
42
+
43
+ return [] if ref.nil?
44
+ return [Association.new(parent, ref)] unless ref.options[:polymorphic]
45
+
46
+ # association is polymorphic - create associations for each
47
+ # non-polymorphic reflection.
48
+ polymorphic_classes(ref).collect { |klass|
49
+ Association.new parent, ::ActiveRecord::Reflection::AssociationReflection.new(
50
+ ref.macro,
51
+ "#{ref.name}_#{klass.name}".to_sym,
52
+ casted_options(klass, ref),
53
+ ref.active_record
54
+ )
55
+ }
56
+ end
57
+
58
+ # Link up the join for this model from a base join - and set parent
59
+ # associations' joins recursively.
60
+ #
61
+ def join_to(base_join)
62
+ parent.join_to(base_join) if parent && parent.join.nil?
63
+
64
+ @join ||= ::ActiveRecord::Associations::ClassMethods::JoinDependency::JoinAssociation.new(
65
+ @reflection, base_join, parent ? parent.join : base_join.joins.first
66
+ )
67
+ end
68
+
69
+ # Returns the association's join SQL statements - and it replaces
70
+ # ::ts_join_alias:: with the aliased table name so the generated reflection
71
+ # join conditions avoid column name collisions.
72
+ #
73
+ def to_sql
74
+ @join.association_join.gsub(/::ts_join_alias::/,
75
+ "#{@reflection.klass.connection.quote_table_name(@join.parent.aliased_table_name)}"
76
+ )
77
+ end
78
+
79
+ # Returns true if the association - or a parent - is a has_many or
80
+ # has_and_belongs_to_many.
81
+ #
82
+ def is_many?
83
+ case @reflection.macro
84
+ when :has_many, :has_and_belongs_to_many
85
+ true
86
+ else
87
+ @parent ? @parent.is_many? : false
88
+ end
89
+ end
90
+
91
+ # Returns an array of all the associations that lead to this one - starting
92
+ # with the top level all the way to the current association object.
93
+ #
94
+ def ancestors
95
+ (parent ? parent.ancestors : []) << self
96
+ end
97
+
98
+ def has_column?(column)
99
+ @reflection.klass.column_names.include?(column.to_s)
100
+ end
101
+
102
+ private
103
+
104
+ # Returns all the objects that could be currently instantiated from a
105
+ # polymorphic association. This is pretty damn fast if there's an index on
106
+ # the foreign type column - but if there isn't, it can take a while if you
107
+ # have a lot of data.
108
+ #
109
+ def self.polymorphic_classes(ref)
110
+ ref.active_record.connection.select_all(
111
+ "SELECT DISTINCT #{ref.options[:foreign_type]} " +
112
+ "FROM #{ref.active_record.table_name} " +
113
+ "WHERE #{ref.options[:foreign_type]} IS NOT NULL"
114
+ ).collect { |row|
115
+ row[ref.options[:foreign_type]].constantize
116
+ }
117
+ end
118
+
119
+ # Returns a new set of options for an association that mimics an existing
120
+ # polymorphic relationship for a specific class. It adds a condition to
121
+ # filter by the appropriate object.
122
+ #
123
+ def self.casted_options(klass, ref)
124
+ options = ref.options.clone
125
+ options[:polymorphic] = nil
126
+ options[:class_name] = klass.name
127
+ options[:foreign_key] ||= "#{ref.name}_id"
128
+
129
+ quoted_foreign_type = klass.connection.quote_column_name ref.options[:foreign_type]
130
+ case options[:conditions]
131
+ when nil
132
+ options[:conditions] = "::ts_join_alias::.#{quoted_foreign_type} = '#{klass.name}'"
133
+ when Array
134
+ options[:conditions] << "::ts_join_alias::.#{quoted_foreign_type} = '#{klass.name}'"
135
+ when Hash
136
+ options[:conditions].merge!(ref.options[:foreign_type] => klass.name)
137
+ else
138
+ options[:conditions] << " AND ::ts_join_alias::.#{quoted_foreign_type} = '#{klass.name}'"
139
+ end
140
+
141
+ options
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,284 @@
1
+ module ThinkingSphinx
2
+ # Attributes - eternally useful when it comes to filtering, sorting or
3
+ # grouping. This class isn't really useful to you unless you're hacking
4
+ # around with the internals of Thinking Sphinx - but hey, don't let that
5
+ # stop you.
6
+ #
7
+ # One key thing to remember - if you're using the attribute manually to
8
+ # generate SQL statements, you'll need to set the base model, and all the
9
+ # associations. Which can get messy. Use Index.link!, it really helps.
10
+ #
11
+ class Attribute
12
+ attr_accessor :alias, :columns, :associations, :model
13
+
14
+ # To create a new attribute, you'll need to pass in either a single Column
15
+ # or an array of them, and some (optional) options.
16
+ #
17
+ # Valid options are:
18
+ # - :as => :alias_name
19
+ # - :type => :attribute_type
20
+ #
21
+ # Alias is only required in three circumstances: when there's
22
+ # another attribute or field with the same name, when the column name is
23
+ # 'id', or when there's more than one column.
24
+ #
25
+ # Type is not required, unless you want to force a column to be a certain
26
+ # type (but keep in mind the value will not be CASTed in the SQL
27
+ # statements). The only time you really need to use this is when the type
28
+ # can't be figured out by the column - ie: when not actually using a
29
+ # database column as your source.
30
+ #
31
+ # Example usage:
32
+ #
33
+ # Attribute.new(
34
+ # Column.new(:created_at)
35
+ # )
36
+ #
37
+ # Attribute.new(
38
+ # Column.new(:posts, :id),
39
+ # :as => :post_ids
40
+ # )
41
+ #
42
+ # Attribute.new(
43
+ # [Column.new(:pages, :id), Column.new(:articles, :id)],
44
+ # :as => :content_ids
45
+ # )
46
+ #
47
+ # Attribute.new(
48
+ # Column.new("NOW()"),
49
+ # :as => :indexed_at,
50
+ # :type => :datetime
51
+ # )
52
+ #
53
+ # If you're creating attributes for latitude and longitude, don't forget
54
+ # that Sphinx expects these values to be in radians.
55
+ #
56
+ def initialize(columns, options = {})
57
+ @columns = Array(columns)
58
+ @associations = {}
59
+
60
+ raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) }
61
+
62
+ @alias = options[:as]
63
+ @type = options[:type]
64
+ end
65
+
66
+ # Get the part of the SELECT clause related to this attribute. Don't forget
67
+ # to set your model and associations first though.
68
+ #
69
+ # This will concatenate strings and arrays of integers, and convert
70
+ # datetimes to timestamps, as needed.
71
+ #
72
+ def to_select_sql
73
+ clause = @columns.collect { |column|
74
+ column_with_prefix(column)
75
+ }.join(', ')
76
+
77
+ separator = all_ints? ? ',' : ' '
78
+
79
+ clause = concatenate(clause, separator) if concat_ws?
80
+ clause = group_concatenate(clause, separator) if is_many?
81
+ clause = cast_to_datetime(clause) if type == :datetime
82
+ clause = convert_nulls(clause) if type == :string
83
+
84
+ "#{clause} AS #{quote_column(unique_name)}"
85
+ end
86
+
87
+ # Get the part of the GROUP BY clause related to this attribute - if one is
88
+ # needed. If not, all you'll get back is nil. The latter will happen if
89
+ # there isn't actually a real column to get data from, or if there's
90
+ # multiple data values (read: a has_many or has_and_belongs_to_many
91
+ # association).
92
+ #
93
+ def to_group_sql
94
+ case
95
+ when is_many?, is_string?, ThinkingSphinx.use_group_by_shortcut?
96
+ nil
97
+ else
98
+ @columns.collect { |column|
99
+ column_with_prefix(column)
100
+ }
101
+ end
102
+ end
103
+
104
+ # Generates the appropriate attribute statement for a Sphinx configuration
105
+ # file, depending on the attribute's type.
106
+ #
107
+ def to_sphinx_clause
108
+ case type
109
+ when :multi
110
+ "sql_attr_multi = uint #{unique_name} from field"
111
+ when :datetime
112
+ "sql_attr_timestamp = #{unique_name}"
113
+ when :string
114
+ "sql_attr_str2ordinal = #{unique_name}"
115
+ when :float
116
+ "sql_attr_float = #{unique_name}"
117
+ when :boolean
118
+ "sql_attr_bool = #{unique_name}"
119
+ else
120
+ "sql_attr_uint = #{unique_name}"
121
+ end
122
+ end
123
+
124
+ # Returns the unique name of the attribute - which is either the alias of
125
+ # the attribute, or the name of the only column - if there is only one. If
126
+ # there isn't, there should be an alias. Else things probably won't work.
127
+ # Consider yourself warned.
128
+ #
129
+ def unique_name
130
+ if @columns.length == 1
131
+ @alias || @columns.first.__name
132
+ else
133
+ @alias
134
+ end
135
+ end
136
+
137
+ private
138
+
139
+ def concatenate(clause, separator = ' ')
140
+ case @model.connection.class.name
141
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
142
+ "CONCAT_WS('#{separator}', #{clause})"
143
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
144
+ clause.split(', ').join(" || '#{separator}' || ")
145
+ else
146
+ clause
147
+ end
148
+ end
149
+
150
+ def group_concatenate(clause, separator = ' ')
151
+ case @model.connection.class.name
152
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
153
+ "GROUP_CONCAT(#{clause} SEPARATOR '#{separator}')"
154
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
155
+ "array_to_string(array_accum(#{clause}), '#{separator}')"
156
+ else
157
+ clause
158
+ end
159
+ end
160
+
161
+ def cast_to_string(clause)
162
+ case @model.connection.class.name
163
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
164
+ "CAST(#{clause} AS CHAR)"
165
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
166
+ clause
167
+ else
168
+ clause
169
+ end
170
+ end
171
+
172
+ def cast_to_datetime(clause)
173
+ case @model.connection.class.name
174
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
175
+ "UNIX_TIMESTAMP(#{clause})"
176
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
177
+ clause # Rails' datetimes are timestamps in PostgreSQL
178
+ else
179
+ clause
180
+ end
181
+ end
182
+
183
+ def convert_nulls(clause)
184
+ case @model.connection.class.name
185
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
186
+ "IFNULL(#{clause}, '')"
187
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
188
+ "COALESCE(#{clause}, '')"
189
+ else
190
+ clause
191
+ end
192
+ end
193
+
194
+ def quote_column(column)
195
+ @model.connection.quote_column_name(column)
196
+ end
197
+
198
+ # Indication of whether the columns should be concatenated with a space
199
+ # between each value. True if there's either multiple sources or multiple
200
+ # associations.
201
+ #
202
+ def concat_ws?
203
+ multiple_associations? || @columns.length > 1
204
+ end
205
+
206
+ # Checks the association tree for each column - if they're all the same,
207
+ # returns false.
208
+ #
209
+ def multiple_sources?
210
+ first = associations[@columns.first]
211
+
212
+ !@columns.all? { |col| associations[col] == first }
213
+ end
214
+
215
+ # Checks whether any column requires multiple associations (which only
216
+ # happens for polymorphic situations).
217
+ #
218
+ def multiple_associations?
219
+ associations.any? { |col,assocs| assocs.length > 1 }
220
+ end
221
+
222
+ # Builds a column reference tied to the appropriate associations. This
223
+ # dives into the associations hash and their corresponding joins to
224
+ # figure out how to correctly reference a column in SQL.
225
+ #
226
+ def column_with_prefix(column)
227
+ if column.is_string?
228
+ column.__name
229
+ elsif associations[column].empty?
230
+ "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
231
+ else
232
+ associations[column].collect { |assoc|
233
+ assoc.has_column?(column.__name) ?
234
+ "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
235
+ ".#{quote_column(column.__name)}" :
236
+ nil
237
+ }.compact.join(', ')
238
+ end
239
+ end
240
+
241
+ # Could there be more than one value related to the parent record? If so,
242
+ # then this will return true. If not, false. It's that simple.
243
+ #
244
+ def is_many?
245
+ associations.values.flatten.any? { |assoc| assoc.is_many? }
246
+ end
247
+
248
+ # Returns true if any of the columns are string values, instead of database
249
+ # column references.
250
+ def is_string?
251
+ columns.all? { |col| col.is_string? }
252
+ end
253
+
254
+ # Returns the type of the column. If that's not already set, it returns
255
+ # :multi if there's the possibility of more than one value, :string if
256
+ # there's more than one association, otherwise it figures out what the
257
+ # actual column's datatype is and returns that.
258
+ def type
259
+ @type ||= case
260
+ when is_many?
261
+ :multi
262
+ when @associations.values.flatten.length > 1
263
+ :string
264
+ else
265
+ klass = @associations.values.flatten.first ?
266
+ @associations.values.flatten.first.reflection.klass : @model
267
+ klass.columns.detect { |col|
268
+ @columns.collect { |c| c.__name.to_s }.include? col.name
269
+ }.type
270
+ end
271
+ end
272
+
273
+ def all_ints?
274
+ @columns.all? { |col|
275
+ klasses = @associations[col].empty? ? [@model] :
276
+ @associations[col].collect { |assoc| assoc.reflection.klass }
277
+ klasses.all? { |klass|
278
+ column = klass.columns.detect { |column| column.name == col.__name.to_s }
279
+ !column.nil? && column.type == :integer
280
+ }
281
+ }
282
+ end
283
+ end
284
+ end