ebeigarts-thinking-sphinx 1.1.21

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +143 -0
  3. data/lib/thinking_sphinx.rb +217 -0
  4. data/lib/thinking_sphinx/active_record.rb +278 -0
  5. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  6. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  7. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  8. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  9. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +53 -0
  10. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  11. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +135 -0
  12. data/lib/thinking_sphinx/association.rb +164 -0
  13. data/lib/thinking_sphinx/attribute.rb +269 -0
  14. data/lib/thinking_sphinx/class_facet.rb +15 -0
  15. data/lib/thinking_sphinx/collection.rb +148 -0
  16. data/lib/thinking_sphinx/configuration.rb +275 -0
  17. data/lib/thinking_sphinx/core/string.rb +15 -0
  18. data/lib/thinking_sphinx/deltas.rb +30 -0
  19. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  20. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +27 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  25. data/lib/thinking_sphinx/deploy/capistrano.rb +82 -0
  26. data/lib/thinking_sphinx/facet.rb +108 -0
  27. data/lib/thinking_sphinx/facet_collection.rb +59 -0
  28. data/lib/thinking_sphinx/field.rb +82 -0
  29. data/lib/thinking_sphinx/index.rb +99 -0
  30. data/lib/thinking_sphinx/index/builder.rb +287 -0
  31. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  32. data/lib/thinking_sphinx/property.rb +160 -0
  33. data/lib/thinking_sphinx/rails_additions.rb +136 -0
  34. data/lib/thinking_sphinx/search.rb +727 -0
  35. data/lib/thinking_sphinx/search/facets.rb +104 -0
  36. data/lib/thinking_sphinx/source.rb +175 -0
  37. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  38. data/lib/thinking_sphinx/source/sql.rb +126 -0
  39. data/lib/thinking_sphinx/tasks.rb +245 -0
  40. data/rails/init.rb +14 -0
  41. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  42. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  43. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  44. data/spec/unit/thinking_sphinx/active_record_spec.rb +329 -0
  45. data/spec/unit/thinking_sphinx/association_spec.rb +246 -0
  46. data/spec/unit/thinking_sphinx/attribute_spec.rb +338 -0
  47. data/spec/unit/thinking_sphinx/collection_spec.rb +15 -0
  48. data/spec/unit/thinking_sphinx/configuration_spec.rb +222 -0
  49. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  50. data/spec/unit/thinking_sphinx/facet_collection_spec.rb +64 -0
  51. data/spec/unit/thinking_sphinx/facet_spec.rb +302 -0
  52. data/spec/unit/thinking_sphinx/field_spec.rb +154 -0
  53. data/spec/unit/thinking_sphinx/index/builder_spec.rb +355 -0
  54. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  55. data/spec/unit/thinking_sphinx/index_spec.rb +45 -0
  56. data/spec/unit/thinking_sphinx/rails_additions_spec.rb +191 -0
  57. data/spec/unit/thinking_sphinx/search_spec.rb +228 -0
  58. data/spec/unit/thinking_sphinx/source_spec.rb +217 -0
  59. data/spec/unit/thinking_sphinx_spec.rb +151 -0
  60. data/tasks/distribution.rb +67 -0
  61. data/tasks/rails.rake +1 -0
  62. data/tasks/testing.rb +100 -0
  63. data/vendor/after_commit/LICENSE +20 -0
  64. data/vendor/after_commit/README +16 -0
  65. data/vendor/after_commit/Rakefile +22 -0
  66. data/vendor/after_commit/init.rb +8 -0
  67. data/vendor/after_commit/lib/after_commit.rb +45 -0
  68. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  69. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  70. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  71. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  72. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  73. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  74. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  75. data/vendor/riddle/lib/riddle.rb +30 -0
  76. data/vendor/riddle/lib/riddle/client.rb +619 -0
  77. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  78. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  79. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  80. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  81. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  82. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  83. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  84. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  85. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  86. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  87. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  88. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  89. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  90. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  91. metadata +191 -0
@@ -0,0 +1,148 @@
1
+ module ThinkingSphinx
2
+ class Collection < ::Array
3
+ attr_reader :total_entries, :total_pages, :current_page, :per_page
4
+ attr_accessor :results
5
+
6
+ # Compatibility with older versions of will_paginate
7
+ alias_method :page_count, :total_pages
8
+
9
+ def initialize(page, per_page, entries, total_entries)
10
+ @current_page, @per_page, @total_entries = page, per_page, total_entries
11
+
12
+ @total_pages = (entries / @per_page.to_f).ceil
13
+ end
14
+
15
+ def self.ids_from_results(results, page, limit, options)
16
+ collection = self.new(page, limit,
17
+ results[:total] || 0, results[:total_found] || 0
18
+ )
19
+ collection.results = results
20
+ collection.replace results[:matches].collect { |match|
21
+ match[:attributes]["sphinx_internal_id"]
22
+ }
23
+ return collection
24
+ end
25
+
26
+ def self.create_from_results(results, page, limit, options)
27
+ collection = self.new(page, limit,
28
+ results[:total] || 0, results[:total_found] || 0
29
+ )
30
+ collection.results = results
31
+ collection.replace instances_from_matches(results[:matches], options)
32
+ return collection
33
+ end
34
+
35
+ def self.instances_from_matches(matches, options = {})
36
+ if klass = options[:class]
37
+ instances_from_class klass, matches, options
38
+ else
39
+ instances_from_classes matches, options
40
+ end
41
+ end
42
+
43
+ def self.instances_from_class(klass, matches, options = {})
44
+ index_options = klass.sphinx_index_options
45
+
46
+ ids = matches.collect { |match| match[:attributes]["sphinx_internal_id"] }
47
+ instances = ids.length > 0 ? klass.find(
48
+ :all,
49
+ :joins => options[:joins],
50
+ :conditions => {klass.primary_key.to_sym => ids},
51
+ :include => (options[:include] || index_options[:include]),
52
+ :select => (options[:select] || index_options[:select]),
53
+ :order => (options[:sql_order] || index_options[:sql_order])
54
+ ) : []
55
+
56
+ # Raise an exception if we find records in Sphinx but not in the DB, so
57
+ # the search method can retry without them. See
58
+ # ThinkingSphinx::Search.retry_search_on_stale_index.
59
+ if options[:raise_on_stale] && instances.length < ids.length
60
+ stale_ids = ids - instances.map {|i| i.id }
61
+ raise StaleIdsException, stale_ids
62
+ end
63
+
64
+ # if the user has specified an SQL order, return the collection
65
+ # without rearranging it into the Sphinx order
66
+ return instances if options[:sql_order]
67
+
68
+ ids.collect { |obj_id|
69
+ instances.detect { |obj| obj.id == obj_id }
70
+ }
71
+ end
72
+
73
+ # Group results by class and call #find(:all) once for each group to reduce
74
+ # the number of #find's in multi-model searches.
75
+ #
76
+ def self.instances_from_classes(matches, options = {})
77
+ groups = matches.group_by { |match| match[:attributes]["class_crc"] }
78
+ groups.each do |crc, group|
79
+ group.replace(
80
+ instances_from_class(class_from_crc(crc), group, options)
81
+ )
82
+ end
83
+
84
+ matches.collect do |match|
85
+ groups.detect { |crc, group|
86
+ crc == match[:attributes]["class_crc"]
87
+ }[1].detect { |obj|
88
+ obj.id == match[:attributes]["sphinx_internal_id"]
89
+ }
90
+ end
91
+ end
92
+
93
+ def self.class_from_crc(crc)
94
+ @@models_by_crc ||= ThinkingSphinx.indexed_models.inject({}) do |hash, model|
95
+ hash[model.constantize.to_crc32] = model
96
+ model.constantize.subclasses.each { |subclass|
97
+ hash[subclass.to_crc32] = subclass.name
98
+ }
99
+ hash
100
+ end
101
+ @@models_by_crc[crc].constantize
102
+ end
103
+
104
+ def previous_page
105
+ current_page > 1 ? (current_page - 1) : nil
106
+ end
107
+
108
+ def next_page
109
+ current_page < total_pages ? (current_page + 1): nil
110
+ end
111
+
112
+ def offset
113
+ (current_page - 1) * @per_page
114
+ end
115
+
116
+ def method_missing(method, *args, &block)
117
+ super unless method.to_s[/^each_with_.*/]
118
+
119
+ each_with_attribute method.to_s.gsub(/^each_with_/, ''), &block
120
+ end
121
+
122
+ def each_with_groupby_and_count(&block)
123
+ results[:matches].each_with_index do |match, index|
124
+ yield self[index], match[:attributes]["@groupby"], match[:attributes]["@count"]
125
+ end
126
+ end
127
+
128
+ def each_with_attribute(attribute, &block)
129
+ results[:matches].each_with_index do |match, index|
130
+ yield self[index], (match[:attributes][attribute] || match[:attributes]["@#{attribute}"])
131
+ end
132
+ end
133
+
134
+ def each_with_weighting(&block)
135
+ results[:matches].each_with_index do |match, index|
136
+ yield self[index], match[:weight]
137
+ end
138
+ end
139
+
140
+ def inject_with_groupby_and_count(initial = nil, &block)
141
+ index = -1
142
+ results[:matches].inject(initial) do |memo, match|
143
+ index += 1
144
+ yield memo, self[index], match[:attributes]["@groupby"], match[:attributes]["@count"]
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,275 @@
1
+ require 'erb'
2
+ require 'singleton'
3
+
4
+ module ThinkingSphinx
5
+ # This class both keeps track of the configuration settings for Sphinx and
6
+ # also generates the resulting file for Sphinx to use.
7
+ #
8
+ # Here are the default settings, relative to RAILS_ROOT where relevant:
9
+ #
10
+ # config file:: config/#{environment}.sphinx.conf
11
+ # searchd log file:: log/searchd.log
12
+ # query log file:: log/searchd.query.log
13
+ # pid file:: log/searchd.#{environment}.pid
14
+ # searchd files:: db/sphinx/#{environment}/
15
+ # address:: 127.0.0.1
16
+ # port:: 3312
17
+ # allow star:: false
18
+ # min prefix length:: 1
19
+ # min infix length:: 1
20
+ # mem limit:: 64M
21
+ # max matches:: 1000
22
+ # morphology:: nil
23
+ # charset type:: utf-8
24
+ # charset table:: nil
25
+ # ignore chars:: nil
26
+ # html strip:: false
27
+ # html remove elements:: ''
28
+ # searchd_binary_name:: searchd
29
+ # indexer_binary_name:: indexer
30
+ #
31
+ # If you want to change these settings, create a YAML file at
32
+ # config/sphinx.yml with settings for each environment, in a similar
33
+ # fashion to database.yml - using the following keys: config_file,
34
+ # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
35
+ # allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
36
+ # max_matches, morphology, charset_type, charset_table, ignore_chars,
37
+ # html_strip, html_remove_elements, delayed_job_priority,
38
+ # searchd_binary_name, indexer_binary_name.
39
+ #
40
+ # I think you've got the idea.
41
+ #
42
+ # Each setting in the YAML file is optional - so only put in the ones you
43
+ # want to change.
44
+ #
45
+ # Keep in mind, if for some particular reason you're using a version of
46
+ # Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
47
+ # don't set allow_star to true.
48
+ #
49
+ class Configuration
50
+ include Singleton
51
+
52
+ SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
53
+ sql_query_post sql_ranged_throttle sql_query_post_index )
54
+
55
+ IndexOptions = %w( charset_table charset_type docinfo enable_star
56
+ exceptions html_index_attrs html_remove_elements html_strip ignore_chars
57
+ min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
58
+ ngram_len phrase_boundary phrase_boundary_step preopen stopwords
59
+ wordforms )
60
+
61
+ CustomOptions = %w( disable_range )
62
+
63
+ attr_accessor :config_file, :searchd_log_file, :query_log_file,
64
+ :pid_file, :searchd_file_path, :address, :port, :allow_star,
65
+ :database_yml_file, :app_root, :bin_path, :model_directories,
66
+ :delayed_job_priority, :searchd_binary_name, :indexer_binary_name
67
+
68
+ attr_accessor :source_options, :index_options
69
+
70
+ attr_writer :type
71
+
72
+ attr_reader :environment, :configuration
73
+
74
+ # Load in the configuration settings - this will look for config/sphinx.yml
75
+ # and parse it according to the current environment.
76
+ #
77
+ def initialize(app_root = Dir.pwd)
78
+ self.reset
79
+ end
80
+
81
+ def self.configure(&block)
82
+ yield instance
83
+ instance.reset(instance.app_root)
84
+ end
85
+
86
+ def reset(custom_app_root=nil)
87
+ if custom_app_root
88
+ self.app_root = custom_app_root
89
+ else
90
+ self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
91
+ self.app_root = Merb.root if defined?(Merb)
92
+ self.app_root ||= app_root
93
+ end
94
+
95
+ @configuration = Riddle::Configuration.new
96
+ @configuration.searchd.address = "127.0.0.1"
97
+ @configuration.searchd.port = 3312
98
+ @configuration.searchd.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
99
+ @configuration.searchd.log = "#{self.app_root}/log/searchd.log"
100
+ @configuration.searchd.query_log = "#{self.app_root}/log/searchd.query.log"
101
+
102
+ self.database_yml_file = "#{self.app_root}/config/database.yml"
103
+ self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
104
+ self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
105
+ self.allow_star = false
106
+ self.bin_path = ""
107
+ self.model_directories = ["#{app_root}/app/models/"] +
108
+ Dir.glob("#{app_root}/vendor/plugins/*/app/models/")
109
+ self.delayed_job_priority = 0
110
+
111
+ self.source_options = {}
112
+ self.index_options = {
113
+ :charset_type => "utf-8"
114
+ }
115
+
116
+ self.searchd_binary_name = "searchd"
117
+ self.indexer_binary_name = "indexer"
118
+
119
+ parse_config
120
+
121
+ self
122
+ end
123
+
124
+ def self.environment
125
+ @@environment ||= (
126
+ defined?(Merb) ? Merb.environment : ENV['RAILS_ENV']
127
+ ) || "development"
128
+ end
129
+
130
+ def environment
131
+ self.class.environment
132
+ end
133
+
134
+ def controller
135
+ @controller ||= Riddle::Controller.new(@configuration, self.config_file)
136
+ end
137
+
138
+ # Generate the config file for Sphinx by using all the settings defined and
139
+ # looping through all the models with indexes to build the relevant
140
+ # indexer and searchd configuration, and sources and indexes details.
141
+ #
142
+ def build(file_path=nil)
143
+ load_models
144
+ file_path ||= "#{self.config_file}"
145
+
146
+ @configuration.indexes.clear
147
+
148
+ ThinkingSphinx.indexed_models.each_with_index do |model, model_index|
149
+ @configuration.indexes.concat model.constantize.to_riddle(model_index)
150
+ end
151
+
152
+ open(file_path, "w") do |file|
153
+ file.write @configuration.render
154
+ end
155
+ end
156
+
157
+ # Make sure all models are loaded - without reloading any that
158
+ # ActiveRecord::Base is already aware of (otherwise we start to hit some
159
+ # messy dependencies issues).
160
+ #
161
+ def load_models
162
+ return if defined?(Rails) && Rails.configuration.cache_classes
163
+
164
+ self.model_directories.each do |base|
165
+ Dir["#{base}**/*.rb"].each do |file|
166
+ model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
167
+
168
+ next if model_name.nil?
169
+ next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
170
+ model.name == model_name
171
+ }
172
+
173
+ begin
174
+ model_name.camelize.constantize
175
+ rescue LoadError
176
+ model_name.gsub!(/.*[\/\\]/, '').nil? ? next : retry
177
+ rescue NameError
178
+ next
179
+ rescue StandardError
180
+ puts "Warning: Error loading #{file}"
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+ def address
187
+ @configuration.searchd.address
188
+ end
189
+
190
+ def address=(address)
191
+ @configuration.searchd.address = address
192
+ end
193
+
194
+ def port
195
+ @configuration.searchd.port
196
+ end
197
+
198
+ def port=(port)
199
+ @configuration.searchd.port = port
200
+ end
201
+
202
+ def pid_file
203
+ @configuration.searchd.pid_file
204
+ end
205
+
206
+ def pid_file=(pid_file)
207
+ @configuration.searchd.pid_file = pid_file
208
+ end
209
+
210
+ def searchd_log_file
211
+ @configuration.searchd.log
212
+ end
213
+
214
+ def searchd_log_file=(file)
215
+ @configuration.searchd.log = file
216
+ end
217
+
218
+ def query_log_file
219
+ @configuration.searchd.query_log
220
+ end
221
+
222
+ def query_log_file=(file)
223
+ @configuration.searchd.query_log = file
224
+ end
225
+
226
+ def type
227
+ return @type if defined?(@type)
228
+ case ::ActiveRecord::Base.sphinx_database_adapter
229
+ when SQLite3Adapter, OracleAdapter
230
+ @type = "xml"
231
+ else
232
+ @type = "sql"
233
+ end
234
+ end
235
+
236
+ private
237
+
238
+ # Parse the config/sphinx.yml file - if it exists - then use the attribute
239
+ # accessors to set the appropriate values. Nothing too clever.
240
+ #
241
+ def parse_config
242
+ path = "#{app_root}/config/sphinx.yml"
243
+ return unless File.exists?(path)
244
+
245
+ conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
246
+
247
+ conf.each do |key,value|
248
+ self.send("#{key}=", value) if self.respond_to?("#{key}=")
249
+ next if key == "type"
250
+
251
+ set_sphinx_setting self.source_options, key, value, SourceOptions
252
+ set_sphinx_setting self.index_options, key, value, IndexOptions
253
+ set_sphinx_setting self.index_options, key, value, CustomOptions
254
+ set_sphinx_setting @configuration.searchd, key, value
255
+ set_sphinx_setting @configuration.indexer, key, value
256
+ end unless conf.nil?
257
+
258
+ self.bin_path += '/' unless self.bin_path.blank?
259
+
260
+ if self.allow_star
261
+ self.index_options[:enable_star] = true
262
+ self.index_options[:min_prefix_len] = 1
263
+ end
264
+ end
265
+
266
+ def set_sphinx_setting(object, key, value, allowed = {})
267
+ if object.is_a?(Hash)
268
+ object[key.to_sym] = value if allowed.include?(key.to_s)
269
+ else
270
+ object.send("#{key}=", value) if object.respond_to?("#{key}")
271
+ send("#{key}=", value) if self.respond_to?("#{key}")
272
+ end
273
+ end
274
+ end
275
+ end
@@ -0,0 +1,15 @@
1
+ require 'zlib'
2
+
3
+ module ThinkingSphinx
4
+ module Core
5
+ module String
6
+ def to_crc32
7
+ Zlib.crc32 self
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+ class String
14
+ include ThinkingSphinx::Core::String
15
+ end
@@ -0,0 +1,30 @@
1
+ require 'thinking_sphinx/deltas/default_delta'
2
+ require 'thinking_sphinx/deltas/delayed_delta'
3
+ require 'thinking_sphinx/deltas/datetime_delta'
4
+
5
+ module ThinkingSphinx
6
+ module Deltas
7
+ def self.parse(index)
8
+ delta_option = index.local_options.delete(:delta)
9
+ case delta_option
10
+ when TrueClass, :default
11
+ DefaultDelta.new index, index.local_options
12
+ when :delayed
13
+ DelayedDelta.new index, index.local_options
14
+ when :datetime
15
+ DatetimeDelta.new index, index.local_options
16
+ when FalseClass, nil
17
+ nil
18
+ else
19
+ if delta_option.is_a?(String)
20
+ delta_option = Kernel.const_get(delta_option)
21
+ end
22
+ if delta_option.ancestors.include?(ThinkingSphinx::Deltas::DefaultDelta)
23
+ delta_option.new index, index.local_options
24
+ else
25
+ raise "Unknown delta type"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end