initforthe-thinking-sphinx 1.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +141 -0
  3. data/lib/thinking_sphinx.rb +215 -0
  4. data/lib/thinking_sphinx/active_record.rb +278 -0
  5. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  6. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  7. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  8. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  9. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +42 -0
  10. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  11. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +135 -0
  12. data/lib/thinking_sphinx/association.rb +164 -0
  13. data/lib/thinking_sphinx/attribute.rb +268 -0
  14. data/lib/thinking_sphinx/class_facet.rb +15 -0
  15. data/lib/thinking_sphinx/collection.rb +148 -0
  16. data/lib/thinking_sphinx/configuration.rb +262 -0
  17. data/lib/thinking_sphinx/core/string.rb +15 -0
  18. data/lib/thinking_sphinx/deltas.rb +30 -0
  19. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  20. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +27 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  25. data/lib/thinking_sphinx/deploy/capistrano.rb +82 -0
  26. data/lib/thinking_sphinx/facet.rb +108 -0
  27. data/lib/thinking_sphinx/facet_collection.rb +59 -0
  28. data/lib/thinking_sphinx/field.rb +82 -0
  29. data/lib/thinking_sphinx/index.rb +99 -0
  30. data/lib/thinking_sphinx/index/builder.rb +287 -0
  31. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  32. data/lib/thinking_sphinx/property.rb +160 -0
  33. data/lib/thinking_sphinx/rails_additions.rb +136 -0
  34. data/lib/thinking_sphinx/search.rb +727 -0
  35. data/lib/thinking_sphinx/search/facets.rb +104 -0
  36. data/lib/thinking_sphinx/source.rb +150 -0
  37. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  38. data/lib/thinking_sphinx/source/sql.rb +126 -0
  39. data/lib/thinking_sphinx/tasks.rb +162 -0
  40. data/rails/init.rb +14 -0
  41. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  42. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  43. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  44. data/spec/unit/thinking_sphinx/active_record_spec.rb +329 -0
  45. data/spec/unit/thinking_sphinx/association_spec.rb +246 -0
  46. data/spec/unit/thinking_sphinx/attribute_spec.rb +338 -0
  47. data/spec/unit/thinking_sphinx/collection_spec.rb +15 -0
  48. data/spec/unit/thinking_sphinx/configuration_spec.rb +222 -0
  49. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  50. data/spec/unit/thinking_sphinx/facet_collection_spec.rb +64 -0
  51. data/spec/unit/thinking_sphinx/facet_spec.rb +302 -0
  52. data/spec/unit/thinking_sphinx/field_spec.rb +154 -0
  53. data/spec/unit/thinking_sphinx/index/builder_spec.rb +355 -0
  54. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  55. data/spec/unit/thinking_sphinx/index_spec.rb +45 -0
  56. data/spec/unit/thinking_sphinx/rails_additions_spec.rb +191 -0
  57. data/spec/unit/thinking_sphinx/search_spec.rb +228 -0
  58. data/spec/unit/thinking_sphinx/source_spec.rb +217 -0
  59. data/spec/unit/thinking_sphinx_spec.rb +151 -0
  60. data/tasks/distribution.rb +67 -0
  61. data/tasks/rails.rake +1 -0
  62. data/tasks/testing.rb +78 -0
  63. data/vendor/after_commit/LICENSE +20 -0
  64. data/vendor/after_commit/README +16 -0
  65. data/vendor/after_commit/Rakefile +22 -0
  66. data/vendor/after_commit/init.rb +8 -0
  67. data/vendor/after_commit/lib/after_commit.rb +45 -0
  68. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  69. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  70. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  71. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  72. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  73. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  74. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  75. data/vendor/riddle/lib/riddle.rb +30 -0
  76. data/vendor/riddle/lib/riddle/client.rb +619 -0
  77. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  78. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  79. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  80. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  81. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  82. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  83. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  84. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  85. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  86. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  87. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  88. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  89. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  90. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  91. metadata +190 -0
@@ -0,0 +1,262 @@
1
+ require 'erb'
2
+ require 'singleton'
3
+
4
+ module ThinkingSphinx
5
+ # This class both keeps track of the configuration settings for Sphinx and
6
+ # also generates the resulting file for Sphinx to use.
7
+ #
8
+ # Here are the default settings, relative to RAILS_ROOT where relevant:
9
+ #
10
+ # config file:: config/#{environment}.sphinx.conf
11
+ # searchd log file:: log/searchd.log
12
+ # query log file:: log/searchd.query.log
13
+ # pid file:: log/searchd.#{environment}.pid
14
+ # searchd files:: db/sphinx/#{environment}/
15
+ # address:: 127.0.0.1
16
+ # port:: 3312
17
+ # allow star:: false
18
+ # min prefix length:: 1
19
+ # min infix length:: 1
20
+ # mem limit:: 64M
21
+ # max matches:: 1000
22
+ # morphology:: nil
23
+ # charset type:: utf-8
24
+ # charset table:: nil
25
+ # ignore chars:: nil
26
+ # html strip:: false
27
+ # html remove elements:: ''
28
+ # searchd_binary_name:: searchd
29
+ # indexer_binary_name:: indexer
30
+ #
31
+ # If you want to change these settings, create a YAML file at
32
+ # config/sphinx.yml with settings for each environment, in a similar
33
+ # fashion to database.yml - using the following keys: config_file,
34
+ # searchd_log_file, query_log_file, pid_file, searchd_file_path, port,
35
+ # allow_star, enable_star, min_prefix_len, min_infix_len, mem_limit,
36
+ # max_matches, morphology, charset_type, charset_table, ignore_chars,
37
+ # html_strip, html_remove_elements, delayed_job_priority,
38
+ # searchd_binary_name, indexer_binary_name.
39
+ #
40
+ # I think you've got the idea.
41
+ #
42
+ # Each setting in the YAML file is optional - so only put in the ones you
43
+ # want to change.
44
+ #
45
+ # Keep in mind, if for some particular reason you're using a version of
46
+ # Sphinx older than 0.9.8 r871 (that's prior to the proper 0.9.8 release),
47
+ # don't set allow_star to true.
48
+ #
49
+ class Configuration
50
+ include Singleton
51
+
52
+ SourceOptions = %w( mysql_connect_flags sql_range_step sql_query_pre
53
+ sql_query_post sql_ranged_throttle sql_query_post_index )
54
+
55
+ IndexOptions = %w( charset_table charset_type docinfo enable_star
56
+ exceptions html_index_attrs html_remove_elements html_strip ignore_chars
57
+ min_infix_len min_prefix_len min_word_len mlock morphology ngram_chars
58
+ ngram_len phrase_boundary phrase_boundary_step preopen stopwords
59
+ wordforms )
60
+
61
+ CustomOptions = %w( disable_range )
62
+
63
+ attr_accessor :config_file, :searchd_log_file, :query_log_file,
64
+ :pid_file, :searchd_file_path, :address, :port, :allow_star,
65
+ :database_yml_file, :app_root, :bin_path, :model_directories,
66
+ :delayed_job_priority, :searchd_binary_name, :indexer_binary_name
67
+
68
+ attr_accessor :source_options, :index_options
69
+
70
+ attr_reader :environment, :configuration
71
+
72
+ # Load in the configuration settings - this will look for config/sphinx.yml
73
+ # and parse it according to the current environment.
74
+ #
75
+ def initialize(app_root = Dir.pwd)
76
+ self.reset
77
+ end
78
+
79
+ def self.configure(&block)
80
+ yield instance
81
+ instance.reset(instance.app_root)
82
+ end
83
+
84
+ def reset(custom_app_root=nil)
85
+ if custom_app_root
86
+ self.app_root = custom_app_root
87
+ else
88
+ self.app_root = RAILS_ROOT if defined?(RAILS_ROOT)
89
+ self.app_root = Merb.root if defined?(Merb)
90
+ self.app_root ||= app_root
91
+ end
92
+
93
+ @configuration = Riddle::Configuration.new
94
+ @configuration.searchd.address = "127.0.0.1"
95
+ @configuration.searchd.port = 3312
96
+ @configuration.searchd.pid_file = "#{self.app_root}/log/searchd.#{environment}.pid"
97
+ @configuration.searchd.log = "#{self.app_root}/log/searchd.log"
98
+ @configuration.searchd.query_log = "#{self.app_root}/log/searchd.query.log"
99
+
100
+ self.database_yml_file = "#{self.app_root}/config/database.yml"
101
+ self.config_file = "#{self.app_root}/config/#{environment}.sphinx.conf"
102
+ self.searchd_file_path = "#{self.app_root}/db/sphinx/#{environment}"
103
+ self.allow_star = false
104
+ self.bin_path = ""
105
+ self.model_directories = ["#{app_root}/app/models/"] +
106
+ Dir.glob("#{app_root}/vendor/plugins/*/app/models/")
107
+ self.delayed_job_priority = 0
108
+
109
+ self.source_options = {}
110
+ self.index_options = {
111
+ :charset_type => "utf-8"
112
+ }
113
+
114
+ self.searchd_binary_name = "searchd"
115
+ self.indexer_binary_name = "indexer"
116
+
117
+ parse_config
118
+
119
+ self
120
+ end
121
+
122
+ def self.environment
123
+ @@environment ||= (
124
+ defined?(Merb) ? Merb.environment : ENV['RAILS_ENV']
125
+ ) || "development"
126
+ end
127
+
128
+ def environment
129
+ self.class.environment
130
+ end
131
+
132
+ def controller
133
+ @controller ||= Riddle::Controller.new(@configuration, self.config_file)
134
+ end
135
+
136
+ # Generate the config file for Sphinx by using all the settings defined and
137
+ # looping through all the models with indexes to build the relevant
138
+ # indexer and searchd configuration, and sources and indexes details.
139
+ #
140
+ def build(file_path=nil)
141
+ load_models
142
+ file_path ||= "#{self.config_file}"
143
+
144
+ @configuration.indexes.clear
145
+
146
+ ThinkingSphinx.indexed_models.each_with_index do |model, model_index|
147
+ @configuration.indexes.concat model.constantize.to_riddle(model_index)
148
+ end
149
+
150
+ open(file_path, "w") do |file|
151
+ file.write @configuration.render
152
+ end
153
+ end
154
+
155
+ # Make sure all models are loaded - without reloading any that
156
+ # ActiveRecord::Base is already aware of (otherwise we start to hit some
157
+ # messy dependencies issues).
158
+ #
159
+ def load_models
160
+ return if defined?(Rails) && Rails.configuration.cache_classes
161
+
162
+ self.model_directories.each do |base|
163
+ Dir["#{base}**/*.rb"].each do |file|
164
+ model_name = file.gsub(/^#{base}([\w_\/\\]+)\.rb/, '\1')
165
+
166
+ next if model_name.nil?
167
+ next if ::ActiveRecord::Base.send(:subclasses).detect { |model|
168
+ model.name == model_name
169
+ }
170
+
171
+ begin
172
+ model_name.camelize.constantize
173
+ rescue LoadError
174
+ model_name.gsub!(/.*[\/\\]/, '').nil? ? next : retry
175
+ rescue NameError
176
+ next
177
+ rescue StandardError
178
+ puts "Warning: Error loading #{file}"
179
+ end
180
+ end
181
+ end
182
+ end
183
+
184
+ def address
185
+ @configuration.searchd.address
186
+ end
187
+
188
+ def address=(address)
189
+ @configuration.searchd.address = address
190
+ end
191
+
192
+ def port
193
+ @configuration.searchd.port
194
+ end
195
+
196
+ def port=(port)
197
+ @configuration.searchd.port = port
198
+ end
199
+
200
+ def pid_file
201
+ @configuration.searchd.pid_file
202
+ end
203
+
204
+ def pid_file=(pid_file)
205
+ @configuration.searchd.pid_file = pid_file
206
+ end
207
+
208
+ def searchd_log_file
209
+ @configuration.searchd.log
210
+ end
211
+
212
+ def searchd_log_file=(file)
213
+ @configuration.searchd.log = file
214
+ end
215
+
216
+ def query_log_file
217
+ @configuration.searchd.query_log
218
+ end
219
+
220
+ def query_log_file=(file)
221
+ @configuration.searchd.query_log = file
222
+ end
223
+
224
+ private
225
+
226
+ # Parse the config/sphinx.yml file - if it exists - then use the attribute
227
+ # accessors to set the appropriate values. Nothing too clever.
228
+ #
229
+ def parse_config
230
+ path = "#{app_root}/config/sphinx.yml"
231
+ return unless File.exists?(path)
232
+
233
+ conf = YAML::load(ERB.new(IO.read(path)).result)[environment]
234
+
235
+ conf.each do |key,value|
236
+ self.send("#{key}=", value) if self.respond_to?("#{key}=")
237
+
238
+ set_sphinx_setting self.source_options, key, value, SourceOptions
239
+ set_sphinx_setting self.index_options, key, value, IndexOptions
240
+ set_sphinx_setting self.index_options, key, value, CustomOptions
241
+ set_sphinx_setting @configuration.searchd, key, value
242
+ set_sphinx_setting @configuration.indexer, key, value
243
+ end unless conf.nil?
244
+
245
+ self.bin_path += '/' unless self.bin_path.blank?
246
+
247
+ if self.allow_star
248
+ self.index_options[:enable_star] = true
249
+ self.index_options[:min_prefix_len] = 1
250
+ end
251
+ end
252
+
253
+ def set_sphinx_setting(object, key, value, allowed = {})
254
+ if object.is_a?(Hash)
255
+ object[key.to_sym] = value if allowed.include?(key.to_s)
256
+ else
257
+ object.send("#{key}=", value) if object.respond_to?("#{key}")
258
+ send("#{key}=", value) if self.respond_to?("#{key}")
259
+ end
260
+ end
261
+ end
262
+ end
@@ -0,0 +1,15 @@
1
+ require 'zlib'
2
+
3
+ module ThinkingSphinx
4
+ module Core
5
+ module String
6
+ def to_crc32
7
+ Zlib.crc32 self
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+ class String
14
+ include ThinkingSphinx::Core::String
15
+ end
@@ -0,0 +1,30 @@
1
+ require 'thinking_sphinx/deltas/default_delta'
2
+ require 'thinking_sphinx/deltas/delayed_delta'
3
+ require 'thinking_sphinx/deltas/datetime_delta'
4
+
5
+ module ThinkingSphinx
6
+ module Deltas
7
+ def self.parse(index)
8
+ delta_option = index.local_options.delete(:delta)
9
+ case delta_option
10
+ when TrueClass, :default
11
+ DefaultDelta.new index, index.local_options
12
+ when :delayed
13
+ DelayedDelta.new index, index.local_options
14
+ when :datetime
15
+ DatetimeDelta.new index, index.local_options
16
+ when FalseClass, nil
17
+ nil
18
+ else
19
+ if delta_option.is_a?(String)
20
+ delta_option = Kernel.const_get(delta_option)
21
+ end
22
+ if delta_option.ancestors.include?(ThinkingSphinx::Deltas::DefaultDelta)
23
+ delta_option.new index, index.local_options
24
+ else
25
+ raise "Unknown delta type"
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,50 @@
1
+ module ThinkingSphinx
2
+ module Deltas
3
+ class DatetimeDelta < ThinkingSphinx::Deltas::DefaultDelta
4
+ attr_accessor :column, :threshold
5
+
6
+ def initialize(index, options)
7
+ @index = index
8
+ @column = options.delete(:delta_column) || :updated_at
9
+ @threshold = options.delete(:threshold) || 1.day
10
+ end
11
+
12
+ def index(model, instance = nil)
13
+ # do nothing
14
+ true
15
+ end
16
+
17
+ def delayed_index(model)
18
+ config = ThinkingSphinx::Configuration.instance
19
+ rotate = ThinkingSphinx.sphinx_running? ? "--rotate" : ""
20
+
21
+ output = `#{config.bin_path}indexer --config #{config.config_file} #{rotate} #{delta_index_name model}`
22
+ output += `#{config.bin_path}indexer --config #{config.config_file} #{rotate} --merge #{core_index_name model} #{delta_index_name model} --merge-dst-range sphinx_deleted 0 0`
23
+ puts output unless ThinkingSphinx.suppress_delta_output?
24
+
25
+ true
26
+ end
27
+
28
+ def toggle(instance)
29
+ # do nothing
30
+ end
31
+
32
+ def toggled(instance)
33
+ instance.send(@column) > @threshold.ago
34
+ end
35
+
36
+ def reset_query(model)
37
+ nil
38
+ end
39
+
40
+ def clause(model, toggled)
41
+ if toggled
42
+ "#{model.quoted_table_name}.#{model.connection.quote_column_name(@column.to_s)}" +
43
+ " > #{adapter.time_difference(@threshold)}"
44
+ else
45
+ nil
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,68 @@
1
+ module ThinkingSphinx
2
+ module Deltas
3
+ class DefaultDelta
4
+ attr_accessor :column
5
+
6
+ def initialize(index, options)
7
+ @index = index
8
+ @column = options.delete(:delta_column) || :delta
9
+ end
10
+
11
+ def index(model, instance = nil)
12
+ return true unless ThinkingSphinx.updates_enabled? &&
13
+ ThinkingSphinx.deltas_enabled?
14
+ return true if instance && !toggled(instance)
15
+
16
+ config = ThinkingSphinx::Configuration.instance
17
+ client = Riddle::Client.new config.address, config.port
18
+ rotate = ThinkingSphinx.sphinx_running? ? "--rotate" : ""
19
+
20
+ output = `#{config.bin_path}indexer --config #{config.config_file} #{rotate} #{delta_index_name model}`
21
+ puts(output) unless ThinkingSphinx.suppress_delta_output?
22
+
23
+ client.update(
24
+ core_index_name(model),
25
+ ['sphinx_deleted'],
26
+ {instance.sphinx_document_id => [1]}
27
+ ) if instance && ThinkingSphinx.sphinx_running? && instance.in_both_indexes?
28
+
29
+ true
30
+ end
31
+
32
+ def toggle(instance)
33
+ instance.delta = true
34
+ end
35
+
36
+ def toggled(instance)
37
+ instance.delta
38
+ end
39
+
40
+ def reset_query(model)
41
+ "UPDATE #{model.quoted_table_name} SET " +
42
+ "#{model.connection.quote_column_name(@column.to_s)} = #{adapter.boolean(false)} " +
43
+ "WHERE #{model.connection.quote_column_name(@column.to_s)} = #{adapter.boolean(true)}"
44
+ end
45
+
46
+ def clause(model, toggled)
47
+ "#{model.quoted_table_name}.#{model.connection.quote_column_name(@column.to_s)}" +
48
+ " = #{adapter.boolean(toggled)}"
49
+ end
50
+
51
+ protected
52
+
53
+ def core_index_name(model)
54
+ "#{model.source_of_sphinx_index.name.underscore.tr(':/\\', '_')}_core"
55
+ end
56
+
57
+ def delta_index_name(model)
58
+ "#{model.source_of_sphinx_index.name.underscore.tr(':/\\', '_')}_delta"
59
+ end
60
+
61
+ private
62
+
63
+ def adapter
64
+ @adapter = @index.model.sphinx_database_adapter
65
+ end
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,27 @@
1
+ require 'delayed/job'
2
+
3
+ require 'thinking_sphinx/deltas/delayed_delta/delta_job'
4
+ require 'thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job'
5
+ require 'thinking_sphinx/deltas/delayed_delta/job'
6
+
7
+ module ThinkingSphinx
8
+ module Deltas
9
+ class DelayedDelta < ThinkingSphinx::Deltas::DefaultDelta
10
+ def index(model, instance = nil)
11
+ ThinkingSphinx::Deltas::Job.enqueue(
12
+ ThinkingSphinx::Deltas::DeltaJob.new(delta_index_name(model)),
13
+ ThinkingSphinx::Configuration.instance.delayed_job_priority
14
+ )
15
+
16
+ Delayed::Job.enqueue(
17
+ ThinkingSphinx::Deltas::FlagAsDeletedJob.new(
18
+ core_index_name(model), instance.sphinx_document_id
19
+ ),
20
+ ThinkingSphinx::Configuration.instance.delayed_job_priority
21
+ ) if instance
22
+
23
+ true
24
+ end
25
+ end
26
+ end
27
+ end