ebeigarts-thinking-sphinx 1.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. data/LICENCE +20 -0
  2. data/README.textile +143 -0
  3. data/lib/thinking_sphinx.rb +217 -0
  4. data/lib/thinking_sphinx/active_record.rb +278 -0
  5. data/lib/thinking_sphinx/active_record/attribute_updates.rb +48 -0
  6. data/lib/thinking_sphinx/active_record/delta.rb +87 -0
  7. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  8. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  9. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +53 -0
  10. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +54 -0
  11. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +135 -0
  12. data/lib/thinking_sphinx/association.rb +164 -0
  13. data/lib/thinking_sphinx/attribute.rb +269 -0
  14. data/lib/thinking_sphinx/class_facet.rb +15 -0
  15. data/lib/thinking_sphinx/collection.rb +148 -0
  16. data/lib/thinking_sphinx/configuration.rb +275 -0
  17. data/lib/thinking_sphinx/core/string.rb +15 -0
  18. data/lib/thinking_sphinx/deltas.rb +30 -0
  19. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  20. data/lib/thinking_sphinx/deltas/default_delta.rb +68 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +27 -0
  22. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  23. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  24. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  25. data/lib/thinking_sphinx/deploy/capistrano.rb +82 -0
  26. data/lib/thinking_sphinx/facet.rb +108 -0
  27. data/lib/thinking_sphinx/facet_collection.rb +59 -0
  28. data/lib/thinking_sphinx/field.rb +82 -0
  29. data/lib/thinking_sphinx/index.rb +99 -0
  30. data/lib/thinking_sphinx/index/builder.rb +287 -0
  31. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  32. data/lib/thinking_sphinx/property.rb +160 -0
  33. data/lib/thinking_sphinx/rails_additions.rb +136 -0
  34. data/lib/thinking_sphinx/search.rb +727 -0
  35. data/lib/thinking_sphinx/search/facets.rb +104 -0
  36. data/lib/thinking_sphinx/source.rb +175 -0
  37. data/lib/thinking_sphinx/source/internal_properties.rb +46 -0
  38. data/lib/thinking_sphinx/source/sql.rb +126 -0
  39. data/lib/thinking_sphinx/tasks.rb +245 -0
  40. data/rails/init.rb +14 -0
  41. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  42. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  43. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  44. data/spec/unit/thinking_sphinx/active_record_spec.rb +329 -0
  45. data/spec/unit/thinking_sphinx/association_spec.rb +246 -0
  46. data/spec/unit/thinking_sphinx/attribute_spec.rb +338 -0
  47. data/spec/unit/thinking_sphinx/collection_spec.rb +15 -0
  48. data/spec/unit/thinking_sphinx/configuration_spec.rb +222 -0
  49. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  50. data/spec/unit/thinking_sphinx/facet_collection_spec.rb +64 -0
  51. data/spec/unit/thinking_sphinx/facet_spec.rb +302 -0
  52. data/spec/unit/thinking_sphinx/field_spec.rb +154 -0
  53. data/spec/unit/thinking_sphinx/index/builder_spec.rb +355 -0
  54. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  55. data/spec/unit/thinking_sphinx/index_spec.rb +45 -0
  56. data/spec/unit/thinking_sphinx/rails_additions_spec.rb +191 -0
  57. data/spec/unit/thinking_sphinx/search_spec.rb +228 -0
  58. data/spec/unit/thinking_sphinx/source_spec.rb +217 -0
  59. data/spec/unit/thinking_sphinx_spec.rb +151 -0
  60. data/tasks/distribution.rb +67 -0
  61. data/tasks/rails.rake +1 -0
  62. data/tasks/testing.rb +100 -0
  63. data/vendor/after_commit/LICENSE +20 -0
  64. data/vendor/after_commit/README +16 -0
  65. data/vendor/after_commit/Rakefile +22 -0
  66. data/vendor/after_commit/init.rb +8 -0
  67. data/vendor/after_commit/lib/after_commit.rb +45 -0
  68. data/vendor/after_commit/lib/after_commit/active_record.rb +114 -0
  69. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  70. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  71. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  72. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  73. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  74. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  75. data/vendor/riddle/lib/riddle.rb +30 -0
  76. data/vendor/riddle/lib/riddle/client.rb +619 -0
  77. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  78. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  79. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  80. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  81. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  82. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  83. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  84. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  85. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  86. data/vendor/riddle/lib/riddle/configuration/section.rb +43 -0
  87. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  88. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  89. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  90. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  91. metadata +191 -0
@@ -0,0 +1,53 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
2
+ require 'test/unit'
3
+ require 'rubygems'
4
+ require 'activerecord'
5
+ require 'after_commit'
6
+ require 'after_commit/active_record'
7
+ require 'after_commit/connection_adapters'
8
+
9
+ ActiveRecord::Base.establish_connection({"adapter" => "sqlite3", "database" => 'test.sqlite3'})
10
+ begin
11
+ ActiveRecord::Base.connection.execute("drop table mock_records");
12
+ rescue
13
+ end
14
+ ActiveRecord::Base.connection.execute("create table mock_records(id int)");
15
+
16
+ require File.dirname(__FILE__) + '/../init.rb'
17
+
18
+ class MockRecord < ActiveRecord::Base
19
+ attr_accessor :after_commit_on_create_called
20
+ attr_accessor :after_commit_on_update_called
21
+ attr_accessor :after_commit_on_destroy_called
22
+
23
+ after_commit_on_create :do_create
24
+ def do_create
25
+ self.after_commit_on_create_called = true
26
+ end
27
+
28
+ after_commit_on_update :do_update
29
+ def do_update
30
+ self.after_commit_on_update_called = true
31
+ end
32
+
33
+ after_commit_on_create :do_destroy
34
+ def do_destroy
35
+ self.after_commit_on_destroy_called = true
36
+ end
37
+ end
38
+
39
+ class AfterCommitTest < Test::Unit::TestCase
40
+ def test_after_commit_on_create_is_called
41
+ assert_equal true, MockRecord.create!.after_commit_on_create_called
42
+ end
43
+
44
+ def test_after_commit_on_update_is_called
45
+ record = MockRecord.create!
46
+ record.save
47
+ assert_equal true, record.after_commit_on_update_called
48
+ end
49
+
50
+ def test_after_commit_on_destroy_is_called
51
+ assert_equal true, MockRecord.create!.destroy.after_commit_on_destroy_called
52
+ end
53
+ end
@@ -0,0 +1,251 @@
1
+ module Delayed
2
+
3
+ class DeserializationError < StandardError
4
+ end
5
+
6
+ class Job < ActiveRecord::Base
7
+ MAX_ATTEMPTS = 25
8
+ MAX_RUN_TIME = 4.hours
9
+ set_table_name :delayed_jobs
10
+
11
+ # By default failed jobs are destroyed after too many attempts.
12
+ # If you want to keep them around (perhaps to inspect the reason
13
+ # for the failure), set this to false.
14
+ cattr_accessor :destroy_failed_jobs
15
+ self.destroy_failed_jobs = true
16
+
17
+ # Every worker has a unique name which by default is the pid of the process.
18
+ # There are some advantages to overriding this with something which survives worker retarts:
19
+ # Workers can safely resume working on tasks which are locked by themselves. The worker will assume that it crashed before.
20
+ cattr_accessor :worker_name
21
+ self.worker_name = "host:#{Socket.gethostname} pid:#{Process.pid}" rescue "pid:#{Process.pid}"
22
+
23
+ NextTaskSQL = '(run_at <= ? AND (locked_at IS NULL OR locked_at < ?) OR (locked_by = ?)) AND failed_at IS NULL'
24
+ NextTaskOrder = 'priority DESC, run_at ASC'
25
+
26
+ ParseObjectFromYaml = /\!ruby\/\w+\:([^\s]+)/
27
+
28
+ cattr_accessor :min_priority, :max_priority
29
+ self.min_priority = nil
30
+ self.max_priority = nil
31
+
32
+ class LockError < StandardError
33
+ end
34
+
35
+ def self.clear_locks!
36
+ update_all("locked_by = null, locked_at = null", ["locked_by = ?", worker_name])
37
+ end
38
+
39
+ def failed?
40
+ failed_at
41
+ end
42
+ alias_method :failed, :failed?
43
+
44
+ def payload_object
45
+ @payload_object ||= deserialize(self['handler'])
46
+ end
47
+
48
+ def name
49
+ @name ||= begin
50
+ payload = payload_object
51
+ if payload.respond_to?(:display_name)
52
+ payload.display_name
53
+ else
54
+ payload.class.name
55
+ end
56
+ end
57
+ end
58
+
59
+ def payload_object=(object)
60
+ self['handler'] = object.to_yaml
61
+ end
62
+
63
+ def reschedule(message, backtrace = [], time = nil)
64
+ if self.attempts < MAX_ATTEMPTS
65
+ time ||= Job.db_time_now + (attempts ** 4) + 5
66
+
67
+ self.attempts += 1
68
+ self.run_at = time
69
+ self.last_error = message + "\n" + backtrace.join("\n")
70
+ self.unlock
71
+ save!
72
+ else
73
+ logger.info "* [JOB] PERMANENTLY removing #{self.name} because of #{attempts} consequetive failures."
74
+ destroy_failed_jobs ? destroy : update_attribute(:failed_at, Time.now)
75
+ end
76
+ end
77
+
78
+ def self.enqueue(*args, &block)
79
+ object = block_given? ? EvaledJob.new(&block) : args.shift
80
+
81
+ unless object.respond_to?(:perform) || block_given?
82
+ raise ArgumentError, 'Cannot enqueue items which do not respond to perform'
83
+ end
84
+
85
+ priority = args[0] || 0
86
+ run_at = args[1]
87
+
88
+ Job.create(:payload_object => object, :priority => priority.to_i, :run_at => run_at)
89
+ end
90
+
91
+ def self.find_available(limit = 5, max_run_time = MAX_RUN_TIME)
92
+
93
+ time_now = db_time_now
94
+
95
+ sql = NextTaskSQL.dup
96
+
97
+ conditions = [time_now, time_now - max_run_time, worker_name]
98
+
99
+ if self.min_priority
100
+ sql << ' AND (priority >= ?)'
101
+ conditions << min_priority
102
+ end
103
+
104
+ if self.max_priority
105
+ sql << ' AND (priority <= ?)'
106
+ conditions << max_priority
107
+ end
108
+
109
+ conditions.unshift(sql)
110
+
111
+ records = ActiveRecord::Base.silence do
112
+ find(:all, :conditions => conditions, :order => NextTaskOrder, :limit => limit)
113
+ end
114
+
115
+ records.sort_by { rand() }
116
+ end
117
+
118
+ # Get the payload of the next job we can get an exclusive lock on.
119
+ # If no jobs are left we return nil
120
+ def self.reserve(max_run_time = MAX_RUN_TIME, &block)
121
+
122
+ # We get up to 5 jobs from the db. In face we cannot get exclusive access to a job we try the next.
123
+ # this leads to a more even distribution of jobs across the worker processes
124
+ find_available(5, max_run_time).each do |job|
125
+ begin
126
+ logger.info "* [JOB] aquiring lock on #{job.name}"
127
+ job.lock_exclusively!(max_run_time, worker_name)
128
+ runtime = Benchmark.realtime do
129
+ invoke_job(job.payload_object, &block)
130
+ job.destroy
131
+ end
132
+ logger.info "* [JOB] #{job.name} completed after %.4f" % runtime
133
+
134
+ return job
135
+ rescue LockError
136
+ # We did not get the lock, some other worker process must have
137
+ logger.warn "* [JOB] failed to aquire exclusive lock for #{job.name}"
138
+ rescue StandardError => e
139
+ job.reschedule e.message, e.backtrace
140
+ log_exception(job, e)
141
+ return job
142
+ end
143
+ end
144
+
145
+ nil
146
+ end
147
+
148
+ # This method is used internally by reserve method to ensure exclusive access
149
+ # to the given job. It will rise a LockError if it cannot get this lock.
150
+ def lock_exclusively!(max_run_time, worker = worker_name)
151
+ now = self.class.db_time_now
152
+ affected_rows = if locked_by != worker
153
+ # We don't own this job so we will update the locked_by name and the locked_at
154
+ self.class.update_all(["locked_at = ?, locked_by = ?", now, worker], ["id = ? and (locked_at is null or locked_at < ?)", id, (now - max_run_time.to_i)])
155
+ else
156
+ # We already own this job, this may happen if the job queue crashes.
157
+ # Simply resume and update the locked_at
158
+ self.class.update_all(["locked_at = ?", now], ["id = ? and locked_by = ?", id, worker])
159
+ end
160
+ raise LockError.new("Attempted to aquire exclusive lock failed") unless affected_rows == 1
161
+
162
+ self.locked_at = now
163
+ self.locked_by = worker
164
+ end
165
+
166
+ def unlock
167
+ self.locked_at = nil
168
+ self.locked_by = nil
169
+ end
170
+
171
+ # This is a good hook if you need to report job processing errors in additional or different ways
172
+ def self.log_exception(job, error)
173
+ logger.error "* [JOB] #{job.name} failed with #{error.class.name}: #{error.message} - #{job.attempts} failed attempts"
174
+ logger.error(error)
175
+ end
176
+
177
+ def self.work_off(num = 100)
178
+ success, failure = 0, 0
179
+
180
+ num.times do
181
+ job = self.reserve do |j|
182
+ begin
183
+ j.perform
184
+ success += 1
185
+ rescue
186
+ failure += 1
187
+ raise
188
+ end
189
+ end
190
+
191
+ break if job.nil?
192
+ end
193
+
194
+ return [success, failure]
195
+ end
196
+
197
+ # Moved into its own method so that new_relic can trace it.
198
+ def self.invoke_job(job, &block)
199
+ block.call(job)
200
+ end
201
+
202
+ private
203
+
204
+ def deserialize(source)
205
+ handler = YAML.load(source) rescue nil
206
+
207
+ unless handler.respond_to?(:perform)
208
+ if handler.nil? && source =~ ParseObjectFromYaml
209
+ handler_class = $1
210
+ end
211
+ attempt_to_load(handler_class || handler.class)
212
+ handler = YAML.load(source)
213
+ end
214
+
215
+ return handler if handler.respond_to?(:perform)
216
+
217
+ raise DeserializationError,
218
+ 'Job failed to load: Unknown handler. Try to manually require the appropiate file.'
219
+ rescue TypeError, LoadError, NameError => e
220
+ raise DeserializationError,
221
+ "Job failed to load: #{e.message}. Try to manually require the required file."
222
+ end
223
+
224
+ # Constantize the object so that ActiveSupport can attempt
225
+ # its auto loading magic. Will raise LoadError if not successful.
226
+ def attempt_to_load(klass)
227
+ klass.constantize
228
+ end
229
+
230
+ def self.db_time_now
231
+ (ActiveRecord::Base.default_timezone == :utc) ? Time.now.utc : Time.now
232
+ end
233
+
234
+ protected
235
+
236
+ def before_save
237
+ self.run_at ||= self.class.db_time_now
238
+ end
239
+
240
+ end
241
+
242
+ class EvaledJob
243
+ def initialize
244
+ @job = yield
245
+ end
246
+
247
+ def perform
248
+ eval(@job)
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,30 @@
1
+ require 'socket'
2
+ require 'timeout'
3
+
4
+ require 'riddle/client'
5
+ require 'riddle/configuration'
6
+ require 'riddle/controller'
7
+
8
+ module Riddle #:nodoc:
9
+ class ConnectionError < StandardError #:nodoc:
10
+ end
11
+
12
+ module Version #:nodoc:
13
+ Major = 0
14
+ Minor = 9
15
+ Tiny = 8
16
+ # Revision number for RubyForge's sake, taken from what Sphinx
17
+ # outputs to the command line.
18
+ Rev = 1533
19
+ # Release number to mark my own fixes, beyond feature parity with
20
+ # Sphinx itself.
21
+ Release = 5
22
+
23
+ String = [Major, Minor, Tiny].join('.')
24
+ GemVersion = [Major, Minor, Tiny, Rev, Release].join('.')
25
+ end
26
+
27
+ def self.escape(string)
28
+ string.gsub(/[\(\)\|\-!@~"&\/]/) { |char| "\\#{char}" }
29
+ end
30
+ end