ryanb-thinking_sphinx 0.9.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +60 -0
  3. data/lib/riddle.rb +26 -0
  4. data/lib/riddle/client.rb +639 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +102 -0
  10. data/lib/thinking_sphinx/active_record.rb +141 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +97 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +50 -0
  14. data/lib/thinking_sphinx/association.rb +144 -0
  15. data/lib/thinking_sphinx/attribute.rb +284 -0
  16. data/lib/thinking_sphinx/configuration.rb +283 -0
  17. data/lib/thinking_sphinx/field.rb +200 -0
  18. data/lib/thinking_sphinx/index.rb +340 -0
  19. data/lib/thinking_sphinx/index/builder.rb +195 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +482 -0
  23. data/lib/thinking_sphinx/tasks.rb +86 -0
  24. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +207 -0
  25. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  26. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  27. data/spec/unit/thinking_sphinx/active_record_spec.rb +236 -0
  28. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  29. data/spec/unit/thinking_sphinx/attribute_spec.rb +360 -0
  30. data/spec/unit/thinking_sphinx/configuration_spec.rb +493 -0
  31. data/spec/unit/thinking_sphinx/field_spec.rb +219 -0
  32. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  33. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +68 -0
  34. data/spec/unit/thinking_sphinx/index_spec.rb +277 -0
  35. data/spec/unit/thinking_sphinx/search_spec.rb +190 -0
  36. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  37. data/tasks/thinking_sphinx_tasks.rake +1 -0
  38. metadata +103 -0
@@ -0,0 +1,283 @@
1
+ require 'erb'
2
+
3
+ module ThinkingSphinx
4
+ # This class both keeps track of the configuration settings for Sphinx and
5
+ # also generates the resulting file for Sphinx to use.
6
+ #
7
+ # Here are the default settings, relative to RAILS_ROOT where relevant:
8
+ #
9
+ # config file:: config/#{environment}.sphinx.conf
10
+ # searchd log file:: log/searchd.log
11
+ # query log file:: log/searchd.query.log
12
+ # pid file:: log/searchd.#{environment}.pid
13
+ # searchd files:: db/sphinx/#{environment}/
14
+ # address:: 127.0.0.1
15
+ # port:: 3312
16
+ # allow star:: false
17
+ # min prefix length:: 1
18
+ # min infix length:: 1
19
+ # mem limit:: 64M
20
+ # max matches:: 1000
21
+ # morphology:: stem_en
22
+ # charset type:: utf-8
23
+ # charset table:: nil
24
+ # ignore chars:: nil
25
+ # html strip:: false
26
+ # html remove elements:: ''
27
+ #
28
+ # If you want to change these settings, create a YAML file at
29
+ # config/sphinx.yml with settings for each environment, in a similar
30
+ # fashion to database.yml - using the following keys: config_file,
31
+ # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
32
+ # allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
33
+ # max_matches, # morphology, charset_type, charset_table, ignore_chars,
34
+ # html_strip, # html_remove_elements. I think you've got the idea.
35
+ #
36
+ # Each setting in the YAML file is optional - so only put in the ones you
37
+ # want to change.
38
+ #
39
+ # Keep in mind, if for some particular reason you're using a version of
40
+ # Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
41
+ # don't set allow_star to true.
42
+ #
43
+ class Configuration
44
+ attr_accessor :config_file, :searchd_log_file, :query_log_file,
45
+ :pid_file, :searchd_file_path, :address, :port, :enable_star,
46
+ :allow_star, :min_prefix_len, :min_infix_len, :mem_limit, :max_matches,
47
+ :morphology, :charset_type, :charset_table, :ignore_chars, :html_strip,
48
+ :html_remove_elements, :app_root
49
+
50
+ attr_reader :environment
51
+
52
+ # Load in the configuration settings - this will look for config/sphinx.yml
53
+ # and parse it according to the current environment.
54
+ #
55
+ def initialize(app_root = Dir.pwd)
56
+ self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
57
+ self.app_root = Merb.root if defined?(Merb)
58
+ self.app_root ||= app_root
59
+
60
+ self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
61
+ self.searchd_log_file = "#{self.app_root}/log/searchd.log"
62
+ self.query_log_file = "#{self.app_root}/log/searchd.query.log"
63
+ self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
64
+ self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
65
+ self.address = "127.0.0.1"
66
+ self.port = 3312
67
+ self.allow_star = false
68
+ self.enable_star = false
69
+ self.min_prefix_len = nil
70
+ self.min_infix_len = nil
71
+ self.mem_limit = "64M"
72
+ self.max_matches = 1000
73
+ self.morphology = "stem_en"
74
+ self.charset_type = "utf-8"
75
+ self.charset_table = nil
76
+ self.ignore_chars = nil
77
+ self.html_strip = false
78
+ self.html_remove_elements = ""
79
+
80
+ parse_config
81
+ end
82
+
83
+ def self.environment
84
+ @@environment ||= (
85
+ defined?(Merb) ? ENV['MERB_ENV'] : ENV['RAILS_ENV']
86
+ ) || "development"
87
+ end
88
+
89
+ def environment
90
+ self.class.environment
91
+ end
92
+
93
+ # Generate the config file for Sphinx by using all the settings defined and
94
+ # looping through all the models with indexes to build the relevant
95
+ # indexer and searchd configuration, and sources and indexes details.
96
+ #
97
+ def build(file_path=nil)
98
+ load_models
99
+ file_path ||= "#{self.config_file}"
100
+ database_confs = YAML::load(ERB.new(IO.read("#{app_root}/config/database.yml")).result)
101
+ database_confs.symbolize_keys!
102
+ database_conf = database_confs[environment.to_sym]
103
+ database_conf.symbolize_keys!
104
+
105
+ open(file_path, "w") do |file|
106
+ file.write <<-CONFIG
107
+ indexer
108
+ {
109
+ mem_limit = #{self.mem_limit}
110
+ }
111
+
112
+ searchd
113
+ {
114
+ address = #{self.address}
115
+ port = #{self.port}
116
+ log = #{self.searchd_log_file}
117
+ query_log = #{self.query_log_file}
118
+ read_timeout = 5
119
+ max_children = 30
120
+ pid_file = #{self.pid_file}
121
+ max_matches = #{self.max_matches}
122
+ }
123
+ CONFIG
124
+
125
+ ThinkingSphinx.indexed_models.each do |model|
126
+ model = model.constantize
127
+ sources = []
128
+ delta_sources = []
129
+ prefixed_fields = []
130
+ infixed_fields = []
131
+
132
+ model.indexes.each_with_index do |index, i|
133
+ file.write index.to_config(i, database_conf, charset_type)
134
+
135
+ create_array_accum if index.adapter == :postgres
136
+ sources << "#{model.indexes.first.name}_#{i}_core"
137
+ delta_sources << "#{model.indexes.first.name}_#{i}_delta" if index.delta?
138
+ end
139
+
140
+ source_list = sources.collect { |s| "source = #{s}" }.join("\n")
141
+ delta_list = delta_sources.collect { |s| "source = #{s}" }.join("\n")
142
+
143
+ file.write core_index_for_model(model, source_list)
144
+ unless delta_list.blank?
145
+ file.write delta_index_for_model(model, delta_list)
146
+ end
147
+
148
+ file.write distributed_index_for_model(model)
149
+ end
150
+ end
151
+ end
152
+
153
+ # Make sure all models are loaded - without reloading any that
154
+ # ActiveRecord::Base is already aware of (otherwise we start to hit some
155
+ # messy dependencies issues).
156
+ #
157
+ def load_models
158
+ base = "#{app_root}/app/models/"
159
+ Dir["#{base}**/*.rb"].each do |file|
160
+ model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
161
+
162
+ next if model_name.nil?
163
+ next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
164
+ model.name == model_name
165
+ }
166
+
167
+ begin
168
+ model_name.camelize.constantize
169
+ rescue LoadError
170
+ model_name.gsub!(/.*[\/\\]/, '')
171
+ retry
172
+ rescue NameError
173
+ next
174
+ end
175
+ end
176
+ end
177
+
178
+ private
179
+
180
+ # Parse the config/sphinx.yml file - if it exists - then use the attribute
181
+ # accessors to set the appropriate values. Nothing too clever.
182
+ #
183
+ def parse_config
184
+ path = "#{app_root}/config/sphinx.yml"
185
+ return unless File.exists?(path)
186
+
187
+ conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
188
+
189
+ conf.each do |key,value|
190
+ self.send("#{key}=", value) if self.methods.include?("#{key}=")
191
+ end unless conf.nil?
192
+ end
193
+
194
+ def core_index_for_model(model, sources)
195
+ output = <<-INDEX
196
+
197
+ index #{model.indexes.first.name}_core
198
+ {
199
+ #{sources}
200
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_core
201
+ charset_type = #{self.charset_type}
202
+ INDEX
203
+
204
+ morphology = model.indexes.inject(self.morphology) { |morph, index|
205
+ index.options[:morphology] || morph
206
+ }
207
+ output += " morphology = #{morphology}\n" unless morphology.blank?
208
+ output += " charset_table = #{self.charset_table}\n" unless self.charset_table.nil?
209
+ output += " ignore_chars = #{self.ignore_chars}\n" unless self.ignore_chars.nil?
210
+
211
+ if self.allow_star
212
+ # Ye Olde way of turning on enable_star
213
+ output += " enable_star = 1\n"
214
+ output += " min_prefix_len = #{self.min_prefix_len}\n"
215
+ else
216
+ # New, better way of turning on enable_star - thanks to James Healy
217
+ output += " enable_star = 1\n" if self.enable_star
218
+ output += " min_prefix_len = #{self.min_prefix_len}\n" unless self.min_prefix_len.nil?
219
+ output += " min_infix_len = #{self.min_infix_len}\n" unless self.min_infix_len.nil?
220
+ end
221
+
222
+
223
+ output += " html_strip = 1\n" if self.html_strip
224
+ output += " html_remove_elements = #{self.html_remove_elements}\n" unless self.html_remove_elements.blank?
225
+
226
+ unless model.indexes.collect(&:prefix_fields).flatten.empty?
227
+ output += " prefix_fields = #{model.indexes.collect(&:prefix_fields).flatten.map(&:unique_name).join(', ')}\n"
228
+ end
229
+
230
+ unless model.indexes.collect(&:infix_fields).flatten.empty?
231
+ output += " infix_fields = #{model.indexes.collect(&:infix_fields).flatten.map(&:unique_name).join(', ')}\n"
232
+ end
233
+
234
+ output + "}\n"
235
+ end
236
+
237
+ def delta_index_for_model(model, sources)
238
+ <<-INDEX
239
+ index #{model.indexes.first.name}_delta : #{model.indexes.first.name}_core
240
+ {
241
+ #{sources}
242
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_delta
243
+ }
244
+ INDEX
245
+ end
246
+
247
+ def distributed_index_for_model(model)
248
+ sources = ["local = #{model.indexes.first.name}_core"]
249
+ if model.indexes.any? { |index| index.delta? }
250
+ sources << "local = #{model.indexes.first.name}_delta"
251
+ end
252
+
253
+ <<-INDEX
254
+ index #{model.indexes.first.name}
255
+ {
256
+ type = distributed
257
+ #{ sources.join("\n ") }
258
+ charset_type = #{self.charset_type}
259
+ }
260
+ INDEX
261
+ end
262
+
263
+ def create_array_accum
264
+ ::ActiveRecord::Base.connection.execute "begin"
265
+ ::ActiveRecord::Base.connection.execute "savepoint ts"
266
+ begin
267
+ ::ActiveRecord::Base.connection.execute <<-SQL
268
+ CREATE AGGREGATE array_accum (anyelement)
269
+ (
270
+ sfunc = array_append,
271
+ stype = anyarray,
272
+ initcond = '{}'
273
+ );
274
+ SQL
275
+ rescue
276
+ raise unless $!.to_s =~ /already exists with same argument types/
277
+ ::ActiveRecord::Base.connection.execute "rollback to savepoint ts"
278
+ end
279
+ ::ActiveRecord::Base.connection.execute "release savepoint ts"
280
+ ::ActiveRecord::Base.connection.execute "commit"
281
+ end
282
+ end
283
+ end
@@ -0,0 +1,200 @@
1
+ module ThinkingSphinx
2
+ # Fields - holding the string data which Sphinx indexes for your searches.
3
+ # This class isn't really useful to you unless you're hacking around with the
4
+ # internals of Thinking Sphinx - but hey, don't let that stop you.
5
+ #
6
+ # One key thing to remember - if you're using the field manually to
7
+ # generate SQL statements, you'll need to set the base model, and all the
8
+ # associations. Which can get messy. Use Index.link!, it really helps.
9
+ #
10
+ class Field
11
+ attr_accessor :alias, :columns, :sortable, :associations, :model, :infixes, :prefixes
12
+
13
+ # To create a new field, you'll need to pass in either a single Column
14
+ # or an array of them, and some (optional) options. The columns are
15
+ # references to the data that will make up the field.
16
+ #
17
+ # Valid options are:
18
+ # - :as => :alias_name
19
+ # - :sortable => true
20
+ # - :infixes => true
21
+ # - :prefixes => true
22
+ #
23
+ # Alias is only required in three circumstances: when there's
24
+ # another attribute or field with the same name, when the column name is
25
+ # 'id', or when there's more than one column.
26
+ #
27
+ # Sortable defaults to false - but is quite useful when set to true, as
28
+ # it creates an attribute with the same string value (which Sphinx converts
29
+ # to an integer value), which can be sorted by. Thinking Sphinx is smart
30
+ # enough to realise that when you specify fields in sort statements, you
31
+ # mean their respective attributes.
32
+ #
33
+ # If you have partial matching enabled (ie: enable_star), then you can
34
+ # specify certain fields to have their prefixes and infixes indexed. Keep
35
+ # in mind, though, that Sphinx's default is _all_ fields - so once you
36
+ # highlight a particular field, no other fields in the index will have
37
+ # these partial indexes.
38
+ #
39
+ # Here's some examples:
40
+ #
41
+ # Field.new(
42
+ # Column.new(:name)
43
+ # )
44
+ #
45
+ # Field.new(
46
+ # [Column.new(:first_name), Column.new(:last_name)],
47
+ # :as => :name, :sortable => true
48
+ # )
49
+ #
50
+ # Field.new(
51
+ # [Column.new(:posts, :subject), Column.new(:posts, :content)],
52
+ # :as => :posts, :prefixes => true
53
+ # )
54
+ #
55
+ def initialize(columns, options = {})
56
+ @columns = Array(columns)
57
+ @associations = {}
58
+
59
+ raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) }
60
+
61
+ @alias = options[:as]
62
+ @sortable = options[:sortable] || false
63
+ @infixes = options[:infixes] || false
64
+ @prefixes = options[:prefixes] || false
65
+ end
66
+
67
+ # Get the part of the SELECT clause related to this field. Don't forget
68
+ # to set your model and associations first though.
69
+ #
70
+ # This will concatenate strings if there's more than one data source or
71
+ # multiple data values (has_many or has_and_belongs_to_many associations).
72
+ #
73
+ def to_select_sql
74
+ clause = @columns.collect { |column|
75
+ column_with_prefix(column)
76
+ }.join(', ')
77
+
78
+ clause = concatenate(clause) if concat_ws?
79
+ clause = group_concatenate(clause) if is_many?
80
+
81
+ "#{cast_to_string clause } AS #{quote_column(unique_name)}"
82
+ end
83
+
84
+ # Get the part of the GROUP BY clause related to this field - if one is
85
+ # needed. If not, all you'll get back is nil. The latter will happen if
86
+ # there's multiple data values (read: a has_many or has_and_belongs_to_many
87
+ # association).
88
+ #
89
+ def to_group_sql
90
+ case
91
+ when is_many?, ThinkingSphinx.use_group_by_shortcut?
92
+ nil
93
+ else
94
+ @columns.collect { |column|
95
+ column_with_prefix(column)
96
+ }
97
+ end
98
+ end
99
+
100
+ # Returns the unique name of the field - which is either the alias of
101
+ # the field, or the name of the only column - if there is only one. If
102
+ # there isn't, there should be an alias. Else things probably won't work.
103
+ # Consider yourself warned.
104
+ #
105
+ def unique_name
106
+ if @columns.length == 1
107
+ @alias || @columns.first.__name
108
+ else
109
+ @alias
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ def concatenate(clause)
116
+ case @model.connection.class.name
117
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
118
+ "CONCAT_WS(' ', #{clause})"
119
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
120
+ clause.split(', ').join(" || ' ' || ")
121
+ else
122
+ clause
123
+ end
124
+ end
125
+
126
+ def group_concatenate(clause)
127
+ case @model.connection.class.name
128
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
129
+ "GROUP_CONCAT(#{clause} SEPARATOR ' ')"
130
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
131
+ "array_to_string(array_accum(#{clause}), ' ')"
132
+ else
133
+ clause
134
+ end
135
+ end
136
+
137
+ def cast_to_string(clause)
138
+ case @model.connection.class.name
139
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
140
+ "CAST(#{clause} AS CHAR)"
141
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
142
+ clause
143
+ else
144
+ clause
145
+ end
146
+ end
147
+
148
+ def quote_column(column)
149
+ @model.connection.quote_column_name(column)
150
+ end
151
+
152
+ # Indication of whether the columns should be concatenated with a space
153
+ # between each value. True if there's either multiple sources or multiple
154
+ # associations.
155
+ #
156
+ def concat_ws?
157
+ @columns.length > 1 || multiple_associations?
158
+ end
159
+
160
+ # Checks the association tree for each column - if they're all the same,
161
+ # returns false.
162
+ #
163
+ def multiple_sources?
164
+ first = associations[@columns.first]
165
+
166
+ !@columns.all? { |col| associations[col] == first }
167
+ end
168
+
169
+ # Checks whether any column requires multiple associations (which only
170
+ # happens for polymorphic situations).
171
+ #
172
+ def multiple_associations?
173
+ associations.any? { |col,assocs| assocs.length > 1 }
174
+ end
175
+
176
+ # Builds a column reference tied to the appropriate associations. This
177
+ # dives into the associations hash and their corresponding joins to
178
+ # figure out how to correctly reference a column in SQL.
179
+ #
180
+ def column_with_prefix(column)
181
+ if associations[column].empty?
182
+ "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
183
+ else
184
+ associations[column].collect { |assoc|
185
+ assoc.has_column?(column.__name) ?
186
+ "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
187
+ ".#{quote_column(column.__name)}" :
188
+ nil
189
+ }.compact.join(', ')
190
+ end
191
+ end
192
+
193
+ # Could there be more than one value related to the parent record? If so,
194
+ # then this will return true. If not, false. It's that simple.
195
+ #
196
+ def is_many?
197
+ associations.values.flatten.any? { |assoc| assoc.is_many? }
198
+ end
199
+ end
200
+ end