freelancing-god-thinking-sphinx 0.9.8 → 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. data/README +20 -1
  2. data/lib/thinking_sphinx.rb +30 -2
  3. data/lib/thinking_sphinx/active_record.rb +25 -11
  4. data/lib/thinking_sphinx/active_record/delta.rb +46 -53
  5. data/lib/thinking_sphinx/active_record/has_many_association.rb +1 -1
  6. data/lib/thinking_sphinx/active_record/search.rb +8 -1
  7. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +27 -0
  8. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +9 -0
  9. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +84 -0
  10. data/lib/thinking_sphinx/association.rb +4 -0
  11. data/lib/thinking_sphinx/attribute.rb +4 -2
  12. data/lib/thinking_sphinx/collection.rb +105 -0
  13. data/lib/thinking_sphinx/configuration.rb +112 -75
  14. data/lib/thinking_sphinx/field.rb +11 -3
  15. data/lib/thinking_sphinx/index.rb +119 -26
  16. data/lib/thinking_sphinx/index/builder.rb +30 -22
  17. data/lib/thinking_sphinx/index/faux_column.rb +13 -0
  18. data/lib/thinking_sphinx/rails_additions.rb +13 -1
  19. data/lib/thinking_sphinx/search.rb +40 -81
  20. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +73 -127
  21. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +2 -2
  22. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +26 -0
  23. data/spec/unit/thinking_sphinx/active_record_spec.rb +94 -22
  24. data/spec/unit/thinking_sphinx/attribute_spec.rb +8 -4
  25. data/spec/unit/thinking_sphinx/collection_spec.rb +71 -0
  26. data/spec/unit/thinking_sphinx/configuration_spec.rb +149 -113
  27. data/spec/unit/thinking_sphinx/field_spec.rb +13 -4
  28. data/spec/unit/thinking_sphinx/index/builder_spec.rb +1 -0
  29. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +27 -0
  30. data/spec/unit/thinking_sphinx/index_spec.rb +79 -29
  31. data/spec/unit/thinking_sphinx/search_spec.rb +114 -74
  32. data/spec/unit/thinking_sphinx_spec.rb +21 -0
  33. data/tasks/thinking_sphinx_tasks.rb +24 -10
  34. metadata +21 -8
  35. data/lib/riddle.rb +0 -26
  36. data/lib/riddle/client.rb +0 -639
  37. data/lib/riddle/client/filter.rb +0 -44
  38. data/lib/riddle/client/message.rb +0 -65
  39. data/lib/riddle/client/response.rb +0 -84
  40. data/lib/test.rb +0 -46
@@ -230,9 +230,11 @@ module ThinkingSphinx
230
230
  "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
231
231
  else
232
232
  associations[column].collect { |assoc|
233
+ assoc.has_column?(column.__name) ?
233
234
  "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
234
- ".#{quote_column(column.__name)}"
235
- }.join(', ')
235
+ ".#{quote_column(column.__name)}" :
236
+ nil
237
+ }.compact.join(', ')
236
238
  end
237
239
  end
238
240
 
@@ -0,0 +1,105 @@
1
+ module ThinkingSphinx
2
+ class Collection < ::Array
3
+ attr_reader :total_entries, :total_pages, :current_page
4
+ attr_accessor :results
5
+
6
+ def initialize(page, per_page, entries, total_entries)
7
+ @current_page, @per_page, @total_entries = page, per_page, total_entries
8
+
9
+ @total_pages = (entries / @per_page.to_f).ceil
10
+ end
11
+
12
+ def self.ids_from_results(results, page, limit, options)
13
+ collection = self.new(page, limit,
14
+ results[:total] || 0, results[:total_found] || 0
15
+ )
16
+ collection.results = results
17
+ collection.replace results[:matches].collect { |match|
18
+ match[:attributes]["sphinx_internal_id"]
19
+ }
20
+ return collection
21
+ end
22
+
23
+ def self.create_from_results(results, page, limit, options)
24
+ collection = self.new(page, limit,
25
+ results[:total] || 0, results[:total_found] || 0
26
+ )
27
+ collection.results = results
28
+ collection.replace instances_from_matches(results[:matches], options)
29
+ return collection
30
+ end
31
+
32
+ def self.instances_from_matches(matches, options = {})
33
+ return matches.collect { |match|
34
+ instance_from_match match, options
35
+ } unless klass = options[:class]
36
+
37
+ ids = matches.collect { |match| match[:attributes]["sphinx_internal_id"] }
38
+ instances = ids.length > 0 ? klass.find(
39
+ :all,
40
+ :conditions => {klass.primary_key.to_sym => ids},
41
+ :include => options[:include],
42
+ :select => options[:select]
43
+ ) : []
44
+ ids.collect { |obj_id|
45
+ instances.detect { |obj| obj.id == obj_id }
46
+ }
47
+ end
48
+
49
+ def self.instance_from_match(match, options)
50
+ # puts "ARGS: #{match[:attributes]["sphinx_internal_id"].inspect}, {:include => #{options[:include].inspect}, :select => #{options[:select].inspect}}"
51
+ class_from_crc(match[:attributes]["class_crc"]).find(
52
+ match[:attributes]["sphinx_internal_id"],
53
+ :include => options[:include],
54
+ :select => options[:select]
55
+ )
56
+ end
57
+
58
+ def self.class_from_crc(crc)
59
+ @@models_by_crc ||= ThinkingSphinx.indexed_models.inject({}) do |hash, model|
60
+ hash[model.constantize.to_crc32] = model
61
+ model.constantize.subclasses.each { |subclass|
62
+ hash[subclass.to_crc32] = subclass.name
63
+ }
64
+ hash
65
+ end
66
+ @@models_by_crc[crc].constantize
67
+ end
68
+
69
+ def previous_page
70
+ current_page > 1 ? (current_page - 1) : nil
71
+ end
72
+
73
+ def next_page
74
+ current_page < total_pages ? (current_page + 1): nil
75
+ end
76
+
77
+ def offset
78
+ (current_page - 1) * @per_page
79
+ end
80
+
81
+ def method_missing(method, *args, &block)
82
+ super unless method.to_s[/^each_with_.*/]
83
+
84
+ each_with_attribute method.to_s.gsub(/^each_with_/, ''), &block
85
+ end
86
+
87
+ def each_with_group_and_count(&block)
88
+ results[:matches].each_with_index do |match, index|
89
+ yield self[index], match[:attributes]["@group"], match[:attributes]["@count"]
90
+ end
91
+ end
92
+
93
+ def each_with_attribute(attribute, &block)
94
+ results[:matches].each_with_index do |match, index|
95
+ yield self[index], (match[:attributes][attribute] || match[:attributes]["@#{attribute}"])
96
+ end
97
+ end
98
+
99
+ def each_with_weighting(&block)
100
+ results[:matches].each_with_index do |match, index|
101
+ yield self[index], match[:weight]
102
+ end
103
+ end
104
+ end
105
+ end
@@ -1,4 +1,5 @@
1
1
  require 'erb'
2
+ require 'singleton'
2
3
 
3
4
  module ThinkingSphinx
4
5
  # This class both keeps track of the configuration settings for Sphinx and
@@ -29,10 +30,9 @@ module ThinkingSphinx
29
30
  # config/sphinx.yml with settings for each environment, in a similar
30
31
  # fashion to database.yml - using the following keys: config_file,
31
32
  # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
32
- # allow_star, min_prefix_len, min_infix_len, mem_limit, max_matches,
33
- # morphology, charset_type, charset_table, ignore_chars, html_strip,
34
- # html_remove_elements. I think you've got
35
- # the idea.
33
+ # allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
34
+ # max_matches, # morphology, charset_type, charset_table, ignore_chars,
35
+ # html_strip, # html_remove_elements. I think you've got the idea.
36
36
  #
37
37
  # Each setting in the YAML file is optional - so only put in the ones you
38
38
  # want to change.
@@ -42,11 +42,28 @@ module ThinkingSphinx
42
42
  # don't set allow_star to true.
43
43
  #
44
44
  class Configuration
45
+ include Singleton
46
+
47
+ SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
48
+ sql_query_post sql_ranged_throttle sql_query_post_index )
49
+
50
+ IndexOptions = %w( charset_table charset_type docinfo enable_star
51
+ exceptions html_index_attrs html_remove_elements html_strip ignore_chars
52
+ min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
53
+ ngram_len phrase_boundary phrase_boundary_step preopen stopwords
54
+ wordforms )
55
+
56
+ IndexerOptions = %w( max_iops max_iosize mem_limit )
57
+
58
+ SearchdOptions = %w( read_timeout max_children max_matches seamless_rotate
59
+ preopen_indexes unlink_old )
60
+
45
61
  attr_accessor :config_file, :searchd_log_file, :query_log_file,
46
62
  :pid_file, :searchd_file_path, :address, :port, :allow_star,
47
- :min_prefix_len, :min_infix_len, :mem_limit, :max_matches, :morphology,
48
- :charset_type, :charset_table, :ignore_chars, :html_strip,
49
- :html_remove_elements, :app_root
63
+ :database_yml_file, :app_root, :bin_path
64
+
65
+ attr_accessor :source_options, :index_options, :indexer_options,
66
+ :searchd_options
50
67
 
51
68
  attr_reader :environment
52
69
 
@@ -54,10 +71,15 @@ module ThinkingSphinx
54
71
  # and parse it according to the current environment.
55
72
  #
56
73
  def initialize(app_root = Dir.pwd)
74
+ self.reset
75
+ end
76
+
77
+ def reset
57
78
  self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
58
79
  self.app_root = Merb.root if defined?(Merb)
59
80
  self.app_root ||= app_root
60
81
 
82
+ self.database_yml_file = "#{self.app_root}/config/database.yml"
61
83
  self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
62
84
  self.searchd_log_file = "#{self.app_root}/log/searchd.log"
63
85
  self.query_log_file = "#{self.app_root}/log/searchd.query.log"
@@ -66,23 +88,24 @@ module ThinkingSphinx
66
88
  self.address = "127.0.0.1"
67
89
  self.port = 3312
68
90
  self.allow_star = false
69
- self.min_prefix_len = 1
70
- self.min_infix_len = 1
71
- self.mem_limit = "64M"
72
- self.max_matches = 1000
73
- self.morphology = "stem_en"
74
- self.charset_type = "utf-8"
75
- self.charset_table = nil
76
- self.ignore_chars = nil
77
- self.html_strip = false
78
- self.html_remove_elements = ""
91
+ self.bin_path = ""
92
+
93
+ self.source_options = {}
94
+ self.index_options = {
95
+ :charset_type => "utf-8",
96
+ :morphology => "stem_en"
97
+ }
98
+ self.indexer_options = {}
99
+ self.searchd_options = {}
79
100
 
80
101
  parse_config
102
+
103
+ self
81
104
  end
82
105
 
83
106
  def self.environment
84
107
  @@environment ||= (
85
- defined?(Merb) ? ENV['MERB_ENV'] : ENV['RAILS_ENV']
108
+ defined?(Merb) ? Merb.environment : ENV['RAILS_ENV']
86
109
  ) || "development"
87
110
  end
88
111
 
@@ -97,7 +120,7 @@ module ThinkingSphinx
97
120
  def build(file_path=nil)
98
121
  load_models
99
122
  file_path ||= "#{self.config_file}"
100
- database_confs = YAML::load(ERB.new(IO.read("#{app_root}/config/database.yml")).result)
123
+ database_confs = YAML::load(ERB.new(IO.read("#{self.database_yml_file}")).result)
101
124
  database_confs.symbolize_keys!
102
125
  database_conf = database_confs[environment.to_sym]
103
126
  database_conf.symbolize_keys!
@@ -106,7 +129,7 @@ module ThinkingSphinx
106
129
  file.write <<-CONFIG
107
130
  indexer
108
131
  {
109
- mem_limit = #{self.mem_limit}
132
+ #{hash_to_config(self.indexer_options)}
110
133
  }
111
134
 
112
135
  searchd
@@ -115,29 +138,30 @@ searchd
115
138
  port = #{self.port}
116
139
  log = #{self.searchd_log_file}
117
140
  query_log = #{self.query_log_file}
118
- read_timeout = 5
119
- max_children = 30
120
141
  pid_file = #{self.pid_file}
121
- max_matches = #{self.max_matches}
142
+ #{hash_to_config(self.searchd_options)}
122
143
  }
123
144
  CONFIG
124
145
 
125
- ThinkingSphinx.indexed_models.each do |model|
146
+ ThinkingSphinx.indexed_models.each_with_index do |model, model_index|
126
147
  model = model.constantize
127
148
  sources = []
128
149
  delta_sources = []
129
150
  prefixed_fields = []
130
151
  infixed_fields = []
131
152
 
132
- model.indexes.each_with_index do |index, i|
133
- file.write index.to_config(i, database_conf, charset_type)
153
+ model.sphinx_indexes.select { |index| index.model == model }.each_with_index do |index, i|
154
+ file.write index.to_config(model, i, database_conf, model_index)
155
+
156
+ index.adapter_object.setup
134
157
 
135
- create_array_accum if index.adapter == :postgres
136
- sources << "#{model.indexes.first.name}_#{i}_core"
137
- delta_sources << "#{model.indexes.first.name}_#{i}_delta" if index.delta?
158
+ sources << "#{ThinkingSphinx::Index.name(model)}_#{i}_core"
159
+ delta_sources << "#{ThinkingSphinx::Index.name(model)}_#{i}_delta" if index.delta?
138
160
  end
139
161
 
140
- source_list = sources.collect { |s| "source = #{s}" }.join("\n")
162
+ next if sources.empty?
163
+
164
+ source_list = sources.collect { |s| "source = #{s}" }.join("\n")
141
165
  delta_list = delta_sources.collect { |s| "source = #{s}" }.join("\n")
142
166
 
143
167
  file.write core_index_for_model(model, source_list)
@@ -167,14 +191,38 @@ searchd
167
191
  begin
168
192
  model_name.camelize.constantize
169
193
  rescue LoadError
170
- model_name.gsub!(/.*[\/\\]/, '')
171
- retry
194
+ model_name.gsub!(/.*[\/\\]/, '').nil? ? next : retry
172
195
  rescue NameError
173
196
  next
174
197
  end
175
198
  end
176
199
  end
177
200
 
201
+ def hash_to_config(hash)
202
+ hash.collect { |key, value|
203
+ translated_value = case value
204
+ when TrueClass
205
+ "1"
206
+ when FalseClass
207
+ "0"
208
+ when NilClass, ""
209
+ next
210
+ else
211
+ value
212
+ end
213
+ " #{key} = #{translated_value}"
214
+ }.join("\n")
215
+ end
216
+
217
+ def self.options_merge(base, extra)
218
+ base = base.clone
219
+ extra.each do |key, value|
220
+ next if value.nil? || value == ""
221
+ base[key] = value
222
+ end
223
+ base
224
+ end
225
+
178
226
  private
179
227
 
180
228
  # Parse the config/sphinx.yml file - if it exists - then use the attribute
@@ -188,90 +236,79 @@ searchd
188
236
 
189
237
  conf.each do |key,value|
190
238
  self.send("#{key}=", value) if self.methods.include?("#{key}=")
239
+
240
+ self.source_options[key.to_sym] = value if SourceOptions.include?(key.to_s)
241
+ self.index_options[key.to_sym] = value if IndexOptions.include?(key.to_s)
242
+ self.indexer_options[key.to_sym] = value if IndexerOptions.include?(key.to_s)
243
+ self.searchd_options[key.to_sym] = value if SearchdOptions.include?(key.to_s)
191
244
  end unless conf.nil?
245
+
246
+ self.bin_path += '/' unless self.bin_path.blank?
192
247
  end
193
248
 
194
249
  def core_index_for_model(model, sources)
195
250
  output = <<-INDEX
196
251
 
197
- index #{model.indexes.first.name}_core
252
+ index #{ThinkingSphinx::Index.name(model)}_core
198
253
  {
199
254
  #{sources}
200
- path = #{self.searchd_file_path}/#{model.indexes.first.name}_core
201
- charset_type = #{self.charset_type}
255
+ path = #{self.searchd_file_path}/#{ThinkingSphinx::Index.name(model)}_core
202
256
  INDEX
203
257
 
204
- morphology = model.indexes.inject(self.morphology) { |morph, index|
205
- index.options[:morphology] || morph
206
- }
207
- output += " morphology = #{morphology}\n" unless morphology.blank?
208
- output += " charset_table = #{self.charset_table}\n" unless self.charset_table.nil?
209
- output += " ignore_chars = #{self.ignore_chars}\n" unless self.ignore_chars.nil?
258
+ unless combined_index_options(model).empty?
259
+ output += hash_to_config(combined_index_options(model))
260
+ end
210
261
 
211
262
  if self.allow_star
263
+ # Ye Olde way of turning on enable_star
212
264
  output += " enable_star = 1\n"
213
- output += " min_prefix_len = #{self.min_prefix_len}\n"
214
- output += " min_infix_len = #{self.min_infix_len}\n"
265
+ output += " min_prefix_len = #{self.combined_index_options[:min_prefix_len]}\n"
215
266
  end
216
267
 
217
- output += " html_strip = 1\n" if self.html_strip
218
- output += " html_remove_elements = #{self.html_remove_elements}\n" unless self.html_remove_elements.blank?
219
-
220
- unless model.indexes.collect(&:prefix_fields).flatten.empty?
221
- output += " prefix_fields = #{model.indexes.collect(&:prefix_fields).flatten.join(', ')}\n"
268
+ unless model.sphinx_indexes.collect(&:prefix_fields).flatten.empty?
269
+ output += " prefix_fields = #{model.sphinx_indexes.collect(&:prefix_fields).flatten.map(&:unique_name).join(', ')}\n"
270
+ else
271
+ output += " prefix_fields = _\n" unless model.sphinx_indexes.collect(&:infix_fields).flatten.empty?
222
272
  end
223
273
 
224
- unless model.indexes.collect(&:infix_fields).flatten.empty?
225
- output += " infix_fields = #{model.indexes.collect(&:infix_fields).flatten.join(', ')}\n"
274
+ unless model.sphinx_indexes.collect(&:infix_fields).flatten.empty?
275
+ output += " infix_fields = #{model.sphinx_indexes.collect(&:infix_fields).flatten.map(&:unique_name).join(', ')}\n"
276
+ else
277
+ output += " infix_fields = -\n" unless model.sphinx_indexes.collect(&:prefix_fields).flatten.empty?
226
278
  end
227
279
 
228
- output + "}\n"
280
+ output + "\n}\n"
229
281
  end
230
282
 
231
283
  def delta_index_for_model(model, sources)
232
284
  <<-INDEX
233
- index #{model.indexes.first.name}_delta : #{model.indexes.first.name}_core
285
+ index #{ThinkingSphinx::Index.name(model)}_delta : #{ThinkingSphinx::Index.name(model)}_core
234
286
  {
235
287
  #{sources}
236
- path = #{self.searchd_file_path}/#{model.indexes.first.name}_delta
288
+ path = #{self.searchd_file_path}/#{ThinkingSphinx::Index.name(model)}_delta
237
289
  }
238
290
  INDEX
239
291
  end
240
292
 
241
293
  def distributed_index_for_model(model)
242
- sources = ["local = #{model.indexes.first.name}_core"]
243
- if model.indexes.any? { |index| index.delta? }
244
- sources << "local = #{model.indexes.first.name}_delta"
294
+ sources = ["local = #{ThinkingSphinx::Index.name(model)}_core"]
295
+ if model.sphinx_indexes.any? { |index| index.delta? }
296
+ sources << "local = #{ThinkingSphinx::Index.name(model)}_delta"
245
297
  end
246
298
 
247
299
  <<-INDEX
248
- index #{model.indexes.first.name}
300
+ index #{ThinkingSphinx::Index.name(model)}
249
301
  {
250
302
  type = distributed
251
303
  #{ sources.join("\n ") }
252
- charset_type = #{self.charset_type}
253
304
  }
254
305
  INDEX
255
306
  end
256
307
 
257
- def create_array_accum
258
- ::ActiveRecord::Base.connection.execute "begin"
259
- ::ActiveRecord::Base.connection.execute "savepoint ts"
260
- begin
261
- ::ActiveRecord::Base.connection.execute <<-SQL
262
- CREATE AGGREGATE array_accum (anyelement)
263
- (
264
- sfunc = array_append,
265
- stype = anyarray,
266
- initcond = '{}'
267
- );
268
- SQL
269
- rescue
270
- raise unless $!.to_s =~ /already exists with same argument types/
271
- ::ActiveRecord::Base.connection.execute "rollback to savepoint ts"
308
+ def combined_index_options(model)
309
+ model.sphinx_indexes.inject(self.index_options) do |options, index|
310
+ self.class.options_merge(options, index.local_index_options)
272
311
  end
273
- ::ActiveRecord::Base.connection.execute "release savepoint ts"
274
- ::ActiveRecord::Base.connection.execute "commit"
275
312
  end
276
313
  end
277
314
  end
@@ -178,13 +178,17 @@ module ThinkingSphinx
178
178
  # figure out how to correctly reference a column in SQL.
179
179
  #
180
180
  def column_with_prefix(column)
181
- if associations[column].empty?
181
+ if column.is_string?
182
+ column.__name
183
+ elsif associations[column].empty?
182
184
  "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
183
185
  else
184
186
  associations[column].collect { |assoc|
187
+ assoc.has_column?(column.__name) ?
185
188
  "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
186
- ".#{quote_column(column.__name)}"
187
- }.join(', ')
189
+ ".#{quote_column(column.__name)}" :
190
+ nil
191
+ }.compact.join(', ')
188
192
  end
189
193
  end
190
194
 
@@ -194,5 +198,9 @@ module ThinkingSphinx
194
198
  def is_many?
195
199
  associations.values.flatten.any? { |assoc| assoc.is_many? }
196
200
  end
201
+
202
+ def is_string?
203
+ columns.all? { |col| col.is_string? }
204
+ end
197
205
  end
198
206
  end