nixme-thinking-sphinx 0.9.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +52 -0
  3. data/lib/riddle.rb +22 -0
  4. data/lib/riddle/client.rb +593 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +82 -0
  10. data/lib/thinking_sphinx/active_record.rb +138 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +90 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +43 -0
  14. data/lib/thinking_sphinx/association.rb +140 -0
  15. data/lib/thinking_sphinx/attribute.rb +282 -0
  16. data/lib/thinking_sphinx/configuration.rb +277 -0
  17. data/lib/thinking_sphinx/field.rb +198 -0
  18. data/lib/thinking_sphinx/index.rb +334 -0
  19. data/lib/thinking_sphinx/index/builder.rb +212 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +97 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +455 -0
  23. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +185 -0
  24. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  25. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +81 -0
  26. data/spec/unit/thinking_sphinx/active_record_spec.rb +201 -0
  27. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  28. data/spec/unit/thinking_sphinx/attribute_spec.rb +356 -0
  29. data/spec/unit/thinking_sphinx/configuration_spec.rb +476 -0
  30. data/spec/unit/thinking_sphinx/field_spec.rb +215 -0
  31. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  32. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +41 -0
  33. data/spec/unit/thinking_sphinx/index_spec.rb +230 -0
  34. data/spec/unit/thinking_sphinx/search_spec.rb +163 -0
  35. data/spec/unit/thinking_sphinx_spec.rb +107 -0
  36. data/tasks/thinking_sphinx_tasks.rake +1 -0
  37. data/tasks/thinking_sphinx_tasks.rb +86 -0
  38. metadata +90 -0
@@ -0,0 +1,277 @@
1
+ require 'erb'
2
+
3
+ module ThinkingSphinx
4
+ # This class both keeps track of the configuration settings for Sphinx and
5
+ # also generates the resulting file for Sphinx to use.
6
+ #
7
+ # Here are the default settings, relative to RAILS_ROOT where relevant:
8
+ #
9
+ # config file:: config/#{environment}.sphinx.conf
10
+ # searchd log file:: log/searchd.log
11
+ # query log file:: log/searchd.query.log
12
+ # pid file:: log/searchd.#{environment}.pid
13
+ # searchd files:: db/sphinx/#{environment}/
14
+ # address:: 0.0.0.0 (all)
15
+ # port:: 3312
16
+ # allow star:: false
17
+ # min prefix length:: 1
18
+ # min infix length:: 1
19
+ # mem limit:: 64M
20
+ # max matches:: 1000
21
+ # morphology:: stem_en
22
+ # charset type:: utf-8
23
+ # charset table:: nil
24
+ # ignore chars:: nil
25
+ # html strip:: false
26
+ # html remove elements:: ''
27
+ #
28
+ # If you want to change these settings, create a YAML file at
29
+ # config/sphinx.yml with settings for each environment, in a similar
30
+ # fashion to database.yml - using the following keys: config_file,
31
+ # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
32
+ # allow_star, min_prefix_len, min_infix_len, mem_limit, max_matches,
33
+ # morphology, charset_type, charset_table, ignore_chars, html_strip,
34
+ # html_remove_elements. I think you've got
35
+ # the idea.
36
+ #
37
+ # Each setting in the YAML file is optional - so only put in the ones you
38
+ # want to change.
39
+ #
40
+ # Keep in mind, if for some particular reason you're using a version of
41
+ # Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
42
+ # don't set allow_star to true.
43
+ #
44
+ class Configuration
45
+ attr_accessor :config_file, :searchd_log_file, :query_log_file,
46
+ :pid_file, :searchd_file_path, :address, :port, :allow_star,
47
+ :min_prefix_len, :min_infix_len, :mem_limit, :max_matches, :morphology,
48
+ :charset_type, :charset_table, :ignore_chars, :html_strip,
49
+ :html_remove_elements, :app_root
50
+
51
+ attr_reader :environment
52
+
53
+ # Load in the configuration settings - this will look for config/sphinx.yml
54
+ # and parse it according to the current environment.
55
+ #
56
+ def initialize(app_root = Dir.pwd)
57
+ self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
58
+ self.app_root = Merb.root if defined?(Merb)
59
+ self.app_root ||= app_root
60
+
61
+ self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
62
+ self.searchd_log_file = "#{self.app_root}/log/searchd.log"
63
+ self.query_log_file = "#{self.app_root}/log/searchd.query.log"
64
+ self.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
65
+ self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
66
+ self.address = "0.0.0.0"
67
+ self.port = 3312
68
+ self.allow_star = false
69
+ self.min_prefix_len = 1
70
+ self.min_infix_len = 1
71
+ self.mem_limit = "64M"
72
+ self.max_matches = 1000
73
+ self.morphology = "stem_en"
74
+ self.charset_type = "utf-8"
75
+ self.charset_table = nil
76
+ self.ignore_chars = nil
77
+ self.html_strip = false
78
+ self.html_remove_elements = ""
79
+
80
+ parse_config
81
+ end
82
+
83
+ def self.environment
84
+ @@environment ||= (
85
+ defined?(Merb) ? ENV['MERB_ENV'] : ENV['RAILS_ENV']
86
+ ) || "development"
87
+ end
88
+
89
+ def environment
90
+ self.class.environment
91
+ end
92
+
93
+ # Generate the config file for Sphinx by using all the settings defined and
94
+ # looping through all the models with indexes to build the relevant
95
+ # indexer and searchd configuration, and sources and indexes details.
96
+ #
97
+ def build(file_path=nil)
98
+ load_models
99
+ file_path ||= "#{self.config_file}"
100
+ database_confs = YAML::load(ERB.new(IO.read("#{app_root}/config/database.yml")).result)
101
+ database_confs.symbolize_keys!
102
+ database_conf = database_confs[environment.to_sym]
103
+ database_conf.symbolize_keys!
104
+
105
+ open(file_path, "w") do |file|
106
+ file.write <<-CONFIG
107
+ indexer
108
+ {
109
+ mem_limit = #{self.mem_limit}
110
+ }
111
+
112
+ searchd
113
+ {
114
+ address = #{self.address}
115
+ port = #{self.port}
116
+ log = #{self.searchd_log_file}
117
+ query_log = #{self.query_log_file}
118
+ read_timeout = 5
119
+ max_children = 30
120
+ pid_file = #{self.pid_file}
121
+ max_matches = #{self.max_matches}
122
+ }
123
+ CONFIG
124
+
125
+ ThinkingSphinx.indexed_models.each do |model|
126
+ model = model.constantize
127
+ sources = []
128
+ delta_sources = []
129
+ prefixed_fields = []
130
+ infixed_fields = []
131
+
132
+ model.indexes.each_with_index do |index, i|
133
+ file.write index.to_config(i, database_conf, charset_type)
134
+
135
+ create_array_accum if index.adapter == :postgres
136
+ sources << "#{model.indexes.first.name}_#{i}_core"
137
+ delta_sources << "#{model.indexes.first.name}_#{i}_delta" if index.delta?
138
+ end
139
+
140
+ source_list = sources.collect { |s| "source = #{s}" }.join("\n")
141
+ delta_list = delta_sources.collect { |s| "source = #{s}" }.join("\n")
142
+
143
+ file.write core_index_for_model(model, source_list)
144
+ unless delta_list.blank?
145
+ file.write delta_index_for_model(model, delta_list)
146
+ end
147
+
148
+ file.write distributed_index_for_model(model)
149
+ end
150
+ end
151
+ end
152
+
153
+ # Make sure all models are loaded - without reloading any that
154
+ # ActiveRecord::Base is already aware of (otherwise we start to hit some
155
+ # messy dependencies issues).
156
+ #
157
+ def load_models
158
+ base = "#{app_root}/app/models/"
159
+ Dir["#{base}**/*.rb"].each do |file|
160
+ model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
161
+
162
+ next if model_name.nil?
163
+ next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
164
+ model.name == model_name
165
+ }
166
+
167
+ begin
168
+ model_name.classify.constantize
169
+ rescue LoadError
170
+ model_name.gsub!(/.*[\/\\]/, '')
171
+ retry
172
+ rescue NameError
173
+ next
174
+ end
175
+ end
176
+ end
177
+
178
+ private
179
+
180
+ # Parse the config/sphinx.yml file - if it exists - then use the attribute
181
+ # accessors to set the appropriate values. Nothing too clever.
182
+ #
183
+ def parse_config
184
+ path = "#{app_root}/config/sphinx.yml"
185
+ return unless File.exists?(path)
186
+
187
+ conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
188
+
189
+ conf.each do |key,value|
190
+ self.send("#{key}=", value) if self.methods.include?("#{key}=")
191
+ end unless conf.nil?
192
+ end
193
+
194
+ def core_index_for_model(model, sources)
195
+ output = <<-INDEX
196
+
197
+ index #{model.indexes.first.name}_core
198
+ {
199
+ #{sources}
200
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_core
201
+ charset_type = #{self.charset_type}
202
+ INDEX
203
+
204
+ morphology = model.indexes.inject(self.morphology) { |morph, index|
205
+ index.options[:morphology] || morph
206
+ }
207
+ output += " morphology = #{morphology}\n" unless morphology.blank?
208
+ output += " charset_table = #{self.charset_table}\n" unless self.charset_table.nil?
209
+ output += " ignore_chars = #{self.ignore_chars}\n" unless self.ignore_chars.nil?
210
+
211
+ if self.allow_star
212
+ output += " enable_star = 1\n"
213
+ output += " min_prefix_len = #{self.min_prefix_len}\n"
214
+ output += " min_infix_len = #{self.min_infix_len}\n"
215
+ end
216
+
217
+ output += " html_strip = 1\n" if self.html_strip
218
+ output += " html_remove_elements = #{self.html_remove_elements}\n" unless self.html_remove_elements.blank?
219
+
220
+ unless model.indexes.collect(&:prefix_fields).flatten.empty?
221
+ output += " prefix_fields = #{model.indexes.collect(&:prefix_fields).flatten.join(', ')}\n"
222
+ end
223
+
224
+ unless model.indexes.collect(&:infix_fields).flatten.empty?
225
+ output += " infix_fields = #{model.indexes.collect(&:infix_fields).flatten.join(', ')}\n"
226
+ end
227
+
228
+ output + "}\n"
229
+ end
230
+
231
+ def delta_index_for_model(model, sources)
232
+ <<-INDEX
233
+ index #{model.indexes.first.name}_delta : #{model.indexes.first.name}_core
234
+ {
235
+ #{sources}
236
+ path = #{self.searchd_file_path}/#{model.indexes.first.name}_delta
237
+ }
238
+ INDEX
239
+ end
240
+
241
+ def distributed_index_for_model(model)
242
+ sources = ["local = #{model.indexes.first.name}_core"]
243
+ if model.indexes.any? { |index| index.delta? }
244
+ sources << "local = #{model.indexes.first.name}_delta"
245
+ end
246
+
247
+ <<-INDEX
248
+ index #{model.indexes.first.name}
249
+ {
250
+ type = distributed
251
+ #{ sources.join("\n ") }
252
+ charset_type = #{self.charset_type}
253
+ }
254
+ INDEX
255
+ end
256
+
257
+ def create_array_accum
258
+ ::ActiveRecord::Base.connection.execute "begin"
259
+ ::ActiveRecord::Base.connection.execute "savepoint ts"
260
+ begin
261
+ ::ActiveRecord::Base.connection.execute <<-SQL
262
+ CREATE AGGREGATE array_accum (anyelement)
263
+ (
264
+ sfunc = array_append,
265
+ stype = anyarray,
266
+ initcond = '{}'
267
+ );
268
+ SQL
269
+ rescue
270
+ raise unless $!.to_s =~ /already exists with same argument types/
271
+ ::ActiveRecord::Base.connection.execute "rollback to savepoint ts"
272
+ end
273
+ ::ActiveRecord::Base.connection.execute "release savepoint ts"
274
+ ::ActiveRecord::Base.connection.execute "commit"
275
+ end
276
+ end
277
+ end
@@ -0,0 +1,198 @@
1
+ module ThinkingSphinx
2
+ # Fields - holding the string data which Sphinx indexes for your searches.
3
+ # This class isn't really useful to you unless you're hacking around with the
4
+ # internals of Thinking Sphinx - but hey, don't let that stop you.
5
+ #
6
+ # One key thing to remember - if you're using the field manually to
7
+ # generate SQL statements, you'll need to set the base model, and all the
8
+ # associations. Which can get messy. Use Index.link!, it really helps.
9
+ #
10
+ class Field
11
+ attr_accessor :alias, :columns, :sortable, :associations, :model, :infixes, :prefixes
12
+
13
+ # To create a new field, you'll need to pass in either a single Column
14
+ # or an array of them, and some (optional) options. The columns are
15
+ # references to the data that will make up the field.
16
+ #
17
+ # Valid options are:
18
+ # - :as => :alias_name
19
+ # - :sortable => true
20
+ # - :infixes => true
21
+ # - :prefixes => true
22
+ #
23
+ # Alias is only required in three circumstances: when there's
24
+ # another attribute or field with the same name, when the column name is
25
+ # 'id', or when there's more than one column.
26
+ #
27
+ # Sortable defaults to false - but is quite useful when set to true, as
28
+ # it creates an attribute with the same string value (which Sphinx converts
29
+ # to an integer value), which can be sorted by. Thinking Sphinx is smart
30
+ # enough to realise that when you specify fields in sort statements, you
31
+ # mean their respective attributes.
32
+ #
33
+ # If you have partial matching enabled (ie: enable_star), then you can
34
+ # specify certain fields to have their prefixes and infixes indexed. Keep
35
+ # in mind, though, that Sphinx's default is _all_ fields - so once you
36
+ # highlight a particular field, no other fields in the index will have
37
+ # these partial indexes.
38
+ #
39
+ # Here's some examples:
40
+ #
41
+ # Field.new(
42
+ # Column.new(:name)
43
+ # )
44
+ #
45
+ # Field.new(
46
+ # [Column.new(:first_name), Column.new(:last_name)],
47
+ # :as => :name, :sortable => true
48
+ # )
49
+ #
50
+ # Field.new(
51
+ # [Column.new(:posts, :subject), Column.new(:posts, :content)],
52
+ # :as => :posts, :prefixes => true
53
+ # )
54
+ #
55
+ def initialize(columns, options = {})
56
+ @columns = Array(columns)
57
+ @associations = {}
58
+
59
+ raise "Cannot define a field with no columns. Maybe you are trying to index a field with a reserved name (id, name). You can fix this error by using a symbol rather than a bare name (:id instead of id)." if @columns.empty? || @columns.any? { |column| !column.respond_to?(:__stack) }
60
+
61
+ @alias = options[:as]
62
+ @sortable = options[:sortable] || false
63
+ @infixes = options[:infixes] || false
64
+ @prefixes = options[:prefixes] || false
65
+ end
66
+
67
+ # Get the part of the SELECT clause related to this field. Don't forget
68
+ # to set your model and associations first though.
69
+ #
70
+ # This will concatenate strings if there's more than one data source or
71
+ # multiple data values (has_many or has_and_belongs_to_many associations).
72
+ #
73
+ def to_select_sql
74
+ clause = @columns.collect { |column|
75
+ column_with_prefix(column)
76
+ }.join(', ')
77
+
78
+ clause = concatenate(clause) if concat_ws?
79
+ clause = group_concatenate(clause) if is_many?
80
+
81
+ "#{cast_to_string clause } AS #{quote_column(unique_name)}"
82
+ end
83
+
84
+ # Get the part of the GROUP BY clause related to this field - if one is
85
+ # needed. If not, all you'll get back is nil. The latter will happen if
86
+ # there's multiple data values (read: a has_many or has_and_belongs_to_many
87
+ # association).
88
+ #
89
+ def to_group_sql
90
+ case
91
+ when is_many?, ThinkingSphinx.use_group_by_shortcut?
92
+ nil
93
+ else
94
+ @columns.collect { |column|
95
+ column_with_prefix(column)
96
+ }
97
+ end
98
+ end
99
+
100
+ # Returns the unique name of the field - which is either the alias of
101
+ # the field, or the name of the only column - if there is only one. If
102
+ # there isn't, there should be an alias. Else things probably won't work.
103
+ # Consider yourself warned.
104
+ #
105
+ def unique_name
106
+ if @columns.length == 1
107
+ @alias || @columns.first.__name
108
+ else
109
+ @alias
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ def concatenate(clause)
116
+ case @model.connection.class.name
117
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
118
+ "CONCAT_WS(' ', #{clause})"
119
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
120
+ clause.split(', ').join(" || ' ' || ")
121
+ else
122
+ clause
123
+ end
124
+ end
125
+
126
+ def group_concatenate(clause)
127
+ case @model.connection.class.name
128
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
129
+ "GROUP_CONCAT(#{clause} SEPARATOR ' ')"
130
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
131
+ "array_to_string(array_accum(#{clause}), ' ')"
132
+ else
133
+ clause
134
+ end
135
+ end
136
+
137
+ def cast_to_string(clause)
138
+ case @model.connection.class.name
139
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
140
+ "CAST(#{clause} AS CHAR)"
141
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
142
+ clause
143
+ else
144
+ clause
145
+ end
146
+ end
147
+
148
+ def quote_column(column)
149
+ @model.connection.quote_column_name(column)
150
+ end
151
+
152
+ # Indication of whether the columns should be concatenated with a space
153
+ # between each value. True if there's either multiple sources or multiple
154
+ # associations.
155
+ #
156
+ def concat_ws?
157
+ @columns.length > 1 || multiple_associations?
158
+ end
159
+
160
+ # Checks the association tree for each column - if they're all the same,
161
+ # returns false.
162
+ #
163
+ def multiple_sources?
164
+ first = associations[@columns.first]
165
+
166
+ !@columns.all? { |col| associations[col] == first }
167
+ end
168
+
169
+ # Checks whether any column requires multiple associations (which only
170
+ # happens for polymorphic situations).
171
+ #
172
+ def multiple_associations?
173
+ associations.any? { |col,assocs| assocs.length > 1 }
174
+ end
175
+
176
+ # Builds a column reference tied to the appropriate associations. This
177
+ # dives into the associations hash and their corresponding joins to
178
+ # figure out how to correctly reference a column in SQL.
179
+ #
180
+ def column_with_prefix(column)
181
+ if associations[column].empty?
182
+ "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
183
+ else
184
+ associations[column].collect { |assoc|
185
+ "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
186
+ ".#{quote_column(column.__name)}"
187
+ }.join(', ')
188
+ end
189
+ end
190
+
191
+ # Could there be more than one value related to the parent record? If so,
192
+ # then this will return true. If not, false. It's that simple.
193
+ #
194
+ def is_many?
195
+ associations.values.flatten.any? { |assoc| assoc.is_many? }
196
+ end
197
+ end
198
+ end