freelancing-god-thinking-sphinx 0.9.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/LICENCE +20 -0
  2. data/README +25 -0
  3. data/lib/riddle.rb +22 -0
  4. data/lib/riddle/client.rb +593 -0
  5. data/lib/riddle/client/filter.rb +44 -0
  6. data/lib/riddle/client/message.rb +65 -0
  7. data/lib/riddle/client/response.rb +84 -0
  8. data/lib/test.rb +46 -0
  9. data/lib/thinking_sphinx.rb +79 -0
  10. data/lib/thinking_sphinx/active_record.rb +115 -0
  11. data/lib/thinking_sphinx/active_record/delta.rb +86 -0
  12. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  13. data/lib/thinking_sphinx/active_record/search.rb +36 -0
  14. data/lib/thinking_sphinx/association.rb +140 -0
  15. data/lib/thinking_sphinx/attribute.rb +279 -0
  16. data/lib/thinking_sphinx/configuration.rb +275 -0
  17. data/lib/thinking_sphinx/field.rb +186 -0
  18. data/lib/thinking_sphinx/index.rb +234 -0
  19. data/lib/thinking_sphinx/index/builder.rb +197 -0
  20. data/lib/thinking_sphinx/index/faux_column.rb +97 -0
  21. data/lib/thinking_sphinx/rails_additions.rb +56 -0
  22. data/lib/thinking_sphinx/search.rb +413 -0
  23. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +184 -0
  24. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  25. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +0 -0
  26. data/spec/unit/thinking_sphinx/active_record_spec.rb +85 -0
  27. data/spec/unit/thinking_sphinx/association_spec.rb +0 -0
  28. data/spec/unit/thinking_sphinx/attribute_spec.rb +73 -0
  29. data/spec/unit/thinking_sphinx/configuration_spec.rb +7 -0
  30. data/spec/unit/thinking_sphinx/field_spec.rb +51 -0
  31. data/spec/unit/thinking_sphinx/index/builder_spec.rb +33 -0
  32. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +41 -0
  33. data/spec/unit/thinking_sphinx/index_spec.rb +5 -0
  34. data/spec/unit/thinking_sphinx/search_spec.rb +121 -0
  35. data/spec/unit/thinking_sphinx_spec.rb +82 -0
  36. data/tasks/thinking_sphinx_tasks.rake +1 -0
  37. data/tasks/thinking_sphinx_tasks.rb +86 -0
  38. metadata +90 -0
@@ -0,0 +1,275 @@
1
+ module ThinkingSphinx
2
+ # This class both keeps track of the configuration settings for Sphinx and
3
+ # also generates the resulting file for Sphinx to use.
4
+ #
5
+ # Here are the default settings, relative to RAILS_ROOT where relevant:
6
+ #
7
+ # config file:: config/#{environment}.sphinx.conf
8
+ # searchd log file:: log/searchd.log
9
+ # query log file:: log/searchd.query.log
10
+ # pid file:: log/searchd.#{environment}.pid
11
+ # searchd files:: db/sphinx/#{environment}/
12
+ # address:: 0.0.0.0 (all)
13
+ # port:: 3312
14
+ # allow star:: false
15
+ # mem limit:: 64M
16
+ # max matches:: 1000
17
+ # morphology:: stem_en
18
+ # charset type:: utf-8
19
+ # charset table:: nil
20
+ # ignore chars:: nil
21
+ #
22
+ # If you want to change these settings, create a YAML file at
23
+ # config/sphinx.yml with settings for each environment, in a similar
24
+ # fashion to database.yml - using the following keys: config_file,
25
+ # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
26
+ # allow_star, mem_limit, max_matches, morphology, charset_type,
27
+ # charset_table, ignore_chars. I think you've got the idea.
28
+ #
29
+ # Each setting in the YAML file is optional - so only put in the ones you
30
+ # want to change.
31
+ #
32
+ # Keep in mind, if for some particular reason you're using a version of
33
+ # Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
34
+ # don't set allow_star to true.
35
+ #
36
+ class Configuration
37
+ attr_accessor :config_file, :searchd_log_file, :query_log_file,
38
+ :pid_file, :searchd_file_path, :address, :port, :allow_star, :mem_limit,
39
+ :max_matches, :morphology, :charset_type, :charset_table, :ignore_chars,
40
+ :app_root
41
+
42
+ attr_reader :environment
43
+
44
+ # Load in the configuration settings - this will look for config/sphinx.yml
45
+ # and parse it according to the current environment.
46
+ #
47
+ def initialize(app_root = Dir.pwd)
48
+ self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
49
+ self.app_root = Merb.root if defined?(Merb)
50
+ self.app_root ||= app_root
51
+
52
+ self.config_file = "#{app_root}/config/#{environment}.sphinx.conf"
53
+ self.searchd_log_file = "#{app_root}/log/searchd.log"
54
+ self.query_log_file = "#{app_root}/log/searchd.query.log"
55
+ self.pid_file = "#{app_root}/log/searchd.#{environment}.pid"
56
+ self.searchd_file_path = "#{app_root}/db/sphinx/#{environment}"
57
+ self.port = 3312
58
+ self.allow_star = false
59
+ self.mem_limit = "64M"
60
+ self.max_matches = 1000
61
+ self.morphology = "stem_en"
62
+ self.charset_type = "utf-8"
63
+ self.charset_table = nil
64
+ self.ignore_chars = nil
65
+
66
+ parse_config
67
+ end
68
+
69
+ def self.environment
70
+ @@environment ||= (
71
+ defined?(Merb) ? ENV['MERB_ENV'] : ENV['RAILS_ENV']
72
+ ) || "development"
73
+ end
74
+
75
+ def environment
76
+ self.class.environment
77
+ end
78
+
79
+ # Generate the config file for Sphinx by using all the settings defined and
80
+ # looping through all the models with indexes to build the relevant
81
+ # indexer and searchd configuration, and sources and indexes details.
82
+ #
83
+ def build(file_path=nil)
84
+ load_models
85
+ file_path ||= "#{self.config_file}"
86
+ database_confs = YAML.load(File.open("#{app_root}/config/database.yml"))
87
+ database_confs.symbolize_keys!
88
+ database_conf = database_confs[environment.to_sym]
89
+ database_conf.symbolize_keys!
90
+
91
+ open(file_path, "w") do |file|
92
+ file.write <<-CONFIG
93
+ indexer
94
+ {
95
+ mem_limit = #{self.mem_limit}
96
+ }
97
+
98
+ searchd
99
+ {
100
+ port = #{self.port}
101
+ log = #{self.searchd_log_file}
102
+ query_log = #{self.query_log_file}
103
+ read_timeout = 5
104
+ max_children = 30
105
+ pid_file = #{self.pid_file}
106
+ max_matches = #{self.max_matches}
107
+ }
108
+ CONFIG
109
+
110
+ ThinkingSphinx.indexed_models.each do |model|
111
+ model = model.constantize
112
+ sources = []
113
+ prefixed_fields = []
114
+ infixed_fields = []
115
+
116
+ model.indexes.each_with_index do |index, i|
117
+ # Set up associations and joins
118
+ index.link!
119
+
120
+ attr_sources = index.attributes.collect { |attrib|
121
+ attrib.to_sphinx_clause
122
+ }.join("\n ")
123
+
124
+ adapter = case index.adapter
125
+ when :postgres
126
+ create_array_accum
127
+ "pgsql"
128
+ when :mysql
129
+ "mysql"
130
+ else
131
+ raise "Unsupported Database Adapter: Sphinx only supports MySQL and PosgreSQL"
132
+ end
133
+
134
+ file.write <<-SOURCE
135
+
136
+ source #{model.name.downcase}_#{i}_core
137
+ {
138
+ type = #{adapter}
139
+ sql_host = #{database_conf[:host] || "localhost"}
140
+ sql_user = #{database_conf[:username]}
141
+ sql_pass = #{database_conf[:password]}
142
+ sql_db = #{database_conf[:database]}
143
+
144
+ sql_query_pre = #{charset_type == "utf-8" && adapter == "mysql" ? "SET NAMES utf8" : ""}
145
+ sql_query_pre = #{index.to_sql_query_pre}
146
+ sql_query = #{index.to_sql.gsub(/\n/, ' ')}
147
+ sql_query_range = #{index.to_sql_query_range}
148
+ sql_query_info = #{index.to_sql_query_info}
149
+ #{attr_sources}
150
+ }
151
+ SOURCE
152
+
153
+ if index.delta?
154
+ file.write <<-SOURCE
155
+
156
+ source #{model.name.downcase}_#{i}_delta : #{model.name.downcase}_#{i}_core
157
+ {
158
+ sql_query_pre = #{charset_type == "utf-8" && adapter == "mysql" ? "SET NAMES utf8" : ""}
159
+ sql_query = #{index.to_sql(:delta => true).gsub(/\n/, ' ')}
160
+ sql_query_range = #{index.to_sql_query_range :delta => true}
161
+ }
162
+ SOURCE
163
+ end
164
+ sources << "#{model.name.downcase}_#{i}_core"
165
+ end
166
+
167
+ source_list = sources.collect { |s| "source = #{s}" }.join("\n")
168
+ delta_list = source_list.gsub(/_core$/, "_delta")
169
+ file.write <<-INDEX
170
+
171
+ index #{model.name.downcase}_core
172
+ {
173
+ #{source_list}
174
+ path = #{self.searchd_file_path}/#{model.name.downcase}_core
175
+ charset_type = #{self.charset_type}
176
+ INDEX
177
+ file.puts " morphology = #{self.morphology}" unless self.morphology.blank?
178
+ file.puts " charset_table = #{self.charset_table}" unless self.charset_table.nil?
179
+ file.puts " ignore_chars = #{self.ignore_chars}" unless self.ignore_chars.nil?
180
+
181
+ if self.allow_star
182
+ file.puts " enable_star = 1"
183
+ file.puts " min_prefix_len = 1"
184
+ end
185
+
186
+ file.write("}\n")
187
+
188
+ if model.indexes.any? { |index| index.delta? }
189
+ file.write <<-INDEX
190
+
191
+ index #{model.name.downcase}_delta : #{model.name.downcase}_core
192
+ {
193
+ #{delta_list}
194
+ path = #{self.searchd_file_path}/#{model.name.downcase}_delta
195
+ }
196
+
197
+ index #{model.name.downcase}
198
+ {
199
+ type = distributed
200
+ local = #{model.name.downcase}_core
201
+ local = #{model.name.downcase}_delta
202
+ charset_type = #{self.charset_type}
203
+ }
204
+ INDEX
205
+ else
206
+ file.write <<-INDEX
207
+ index #{model.name.downcase}
208
+ {
209
+ type = distributed
210
+ local = #{model.name.downcase}_core
211
+ }
212
+ INDEX
213
+ end
214
+ end
215
+ end
216
+ end
217
+
218
+ # Make sure all models are loaded - without reloading any that
219
+ # ActiveRecord::Base is already aware of (otherwise we start to hit some
220
+ # messy dependencies issues).
221
+ #
222
+ def load_models
223
+ Dir["#{app_root}/app/models/**/*.rb"].each do |file|
224
+ model_name = file.gsub(/^.*\/([\w_]+)\.rb/, '\1')
225
+
226
+ next if model_name.nil?
227
+ next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
228
+ model.name == model_name
229
+ }
230
+
231
+ begin
232
+ model_name.camelize.constantize
233
+ rescue NameError, LoadError
234
+ next
235
+ end
236
+ end
237
+ end
238
+
239
+ private
240
+
241
+ # Parse the config/sphinx.yml file - if it exists - then use the attribute
242
+ # accessors to set the appropriate values. Nothing too clever.
243
+ #
244
+ def parse_config
245
+ path = "#{app_root}/config/sphinx.yml"
246
+ return unless File.exists?(path)
247
+
248
+ conf = YAML.load(File.open(path))[environment]
249
+
250
+ conf.each do |key,value|
251
+ self.send("#{key}=", value) if self.methods.include?("#{key}=")
252
+ end unless conf.nil?
253
+ end
254
+
255
+ def create_array_accum
256
+ execute "begin"
257
+ execute "savepoint ts"
258
+ begin
259
+ execute <<-SQL
260
+ CREATE AGGREGATE array_accum (anyelement)
261
+ (
262
+ sfunc = array_append,
263
+ stype = anyarray,
264
+ initcond = '{}'
265
+ );
266
+ SQL
267
+ rescue
268
+ raise unless $!.to_s =~ /already exists with same argument types/
269
+ execute "rollback to savepoint ts"
270
+ end
271
+ execute "release savepoint foo"
272
+ execute "commit"
273
+ end
274
+ end
275
+ end
@@ -0,0 +1,186 @@
1
+ module ThinkingSphinx
2
+ # Fields - holding the string data which Sphinx indexes for your searches.
3
+ # This class isn't really useful to you unless you're hacking around with the
4
+ # internals of Thinking Sphinx - but hey, don't let that stop you.
5
+ #
6
+ # One key thing to remember - if you're using the field manually to
7
+ # generate SQL statements, you'll need to set the base model, and all the
8
+ # associations. Which can get messy. Use Index.link!, it really helps.
9
+ #
10
+ class Field
11
+ attr_accessor :alias, :columns, :sortable, :associations, :model
12
+
13
+ # To create a new field, you'll need to pass in either a single Column
14
+ # or an array of them, and some (optional) options. The columns are
15
+ # references to the data that will make up the field.
16
+ #
17
+ # Valid options are:
18
+ # - :as => :alias_name
19
+ # - :sortable => true
20
+ #
21
+ # Alias is only required in three circumstances: when there's
22
+ # another attribute or field with the same name, when the column name is
23
+ # 'id', or when there's more than one column.
24
+ #
25
+ # Sortable defaults to false - but is quite useful when set to true, as
26
+ # it creates an attribute with the same string value (which Sphinx converts
27
+ # to an integer value), which can be sorted by. Thinking Sphinx is smart
28
+ # enough to realise that when you specify fields in sort statements, you
29
+ # mean their respective attributes.
30
+ #
31
+ # Here's some examples:
32
+ #
33
+ # Field.new(
34
+ # Column.new(:name)
35
+ # )
36
+ #
37
+ # Field.new(
38
+ # [Column.new(:first_name), Column.new(:last_name)],
39
+ # :as => :name, :sortable => true
40
+ # )
41
+ #
42
+ # Field.new(
43
+ # [Column.new(:posts, :subject), Column.new(:posts, :content)],
44
+ # :as => :posts
45
+ # )
46
+ #
47
+ def initialize(columns, options = {})
48
+ @columns = Array(columns)
49
+ @associations = {}
50
+
51
+ @alias = options[:as]
52
+ @sortable = options[:sortable] || false
53
+ end
54
+
55
+ # Get the part of the SELECT clause related to this field. Don't forget
56
+ # to set your model and associations first though.
57
+ #
58
+ # This will concatenate strings if there's more than one data source or
59
+ # multiple data values (has_many or has_and_belongs_to_many associations).
60
+ #
61
+ def to_select_sql
62
+ clause = @columns.collect { |column|
63
+ column_with_prefix(column)
64
+ }.join(', ')
65
+
66
+ clause = concatenate(clause) if concat_ws?
67
+ clause = group_concatenate(clause) if is_many?
68
+
69
+ "#{cast_to_string clause } AS #{quote_column(unique_name)}"
70
+ end
71
+
72
+ # Get the part of the GROUP BY clause related to this field - if one is
73
+ # needed. If not, all you'll get back is nil. The latter will happen if
74
+ # there's multiple data values (read: a has_many or has_and_belongs_to_many
75
+ # association).
76
+ #
77
+ def to_group_sql
78
+ case
79
+ when is_many?, ThinkingSphinx.use_group_by_shortcut?
80
+ nil
81
+ else
82
+ @columns.collect { |column|
83
+ column_with_prefix(column)
84
+ }
85
+ end
86
+ end
87
+
88
+ # Returns the unique name of the field - which is either the alias of
89
+ # the field, or the name of the only column - if there is only one. If
90
+ # there isn't, there should be an alias. Else things probably won't work.
91
+ # Consider yourself warned.
92
+ #
93
+ def unique_name
94
+ if @columns.length == 1
95
+ @alias || @columns.first.__name
96
+ else
97
+ @alias
98
+ end
99
+ end
100
+
101
+ private
102
+
103
+ def concatenate(clause)
104
+ case @model.connection.class.name
105
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
106
+ "CONCAT_WS(' ', #{clause})"
107
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
108
+ clause.split(', ').join(" || #{separator} || ")
109
+ else
110
+ clause
111
+ end
112
+ end
113
+
114
+ def group_concatenate(clause)
115
+ case @model.connection.class.name
116
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
117
+ "GROUP_CONCAT(#{clause} SEPARATOR ' ')"
118
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
119
+ "array_to_string(array_accum(#{clause}), '#{separator}')"
120
+ else
121
+ clause
122
+ end
123
+ end
124
+
125
+ def cast_to_string(clause)
126
+ case @model.connection.class.name
127
+ when "ActiveRecord::ConnectionAdapters::MysqlAdapter"
128
+ "CAST(#{clause} AS CHAR)"
129
+ when "ActiveRecord::ConnectionAdapters::PostgreSQLAdapter"
130
+ clause
131
+ else
132
+ clause
133
+ end
134
+ end
135
+
136
+ def quote_column(column)
137
+ @model.connection.quote_column_name(column)
138
+ end
139
+
140
+ # Indication of whether the columns should be concatenated with a space
141
+ # between each value. True if there's either multiple sources or multiple
142
+ # associations.
143
+ #
144
+ def concat_ws?
145
+ @columns.length > 1 || multiple_associations?
146
+ end
147
+
148
+ # Checks the association tree for each column - if they're all the same,
149
+ # returns false.
150
+ #
151
+ def multiple_sources?
152
+ first = associations[@columns.first]
153
+
154
+ !@columns.all? { |col| associations[col] == first }
155
+ end
156
+
157
+ # Checks whether any column requires multiple associations (which only
158
+ # happens for polymorphic situations).
159
+ #
160
+ def multiple_associations?
161
+ associations.any? { |col,assocs| assocs.length > 1 }
162
+ end
163
+
164
+ # Builds a column reference tied to the appropriate associations. This
165
+ # dives into the associations hash and their corresponding joins to
166
+ # figure out how to correctly reference a column in SQL.
167
+ #
168
+ def column_with_prefix(column)
169
+ if associations[column].empty?
170
+ "#{@model.quoted_table_name}.#{quote_column(column.__name)}"
171
+ else
172
+ associations[column].collect { |assoc|
173
+ "#{@model.connection.quote_table_name(assoc.join.aliased_table_name)}" +
174
+ ".#{quote_column(column.__name)}"
175
+ }.join(', ')
176
+ end
177
+ end
178
+
179
+ # Could there be more than one value related to the parent record? If so,
180
+ # then this will return true. If not, false. It's that simple.
181
+ #
182
+ def is_many?
183
+ associations.values.flatten.any? { |assoc| assoc.is_many? }
184
+ end
185
+ end
186
+ end