dpickett-thinking-sphinx 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/LICENCE +20 -0
  2. data/README +107 -0
  3. data/lib/thinking_sphinx/active_record/delta.rb +74 -0
  4. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  5. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  6. data/lib/thinking_sphinx/active_record.rb +245 -0
  7. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
  8. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
  9. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
  10. data/lib/thinking_sphinx/association.rb +144 -0
  11. data/lib/thinking_sphinx/attribute.rb +254 -0
  12. data/lib/thinking_sphinx/class_facet.rb +20 -0
  13. data/lib/thinking_sphinx/collection.rb +142 -0
  14. data/lib/thinking_sphinx/configuration.rb +236 -0
  15. data/lib/thinking_sphinx/core/string.rb +22 -0
  16. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  17. data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
  18. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  19. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  20. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
  22. data/lib/thinking_sphinx/deltas.rb +22 -0
  23. data/lib/thinking_sphinx/facet.rb +58 -0
  24. data/lib/thinking_sphinx/facet_collection.rb +45 -0
  25. data/lib/thinking_sphinx/field.rb +172 -0
  26. data/lib/thinking_sphinx/index/builder.rb +233 -0
  27. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  28. data/lib/thinking_sphinx/index.rb +432 -0
  29. data/lib/thinking_sphinx/rails_additions.rb +133 -0
  30. data/lib/thinking_sphinx/search.rb +654 -0
  31. data/lib/thinking_sphinx/tasks.rb +128 -0
  32. data/lib/thinking_sphinx.rb +145 -0
  33. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  34. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  35. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  36. data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
  37. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  38. data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
  39. data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
  40. data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
  41. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  42. data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
  43. data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
  44. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  45. data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
  46. data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
  47. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  48. data/tasks/distribution.rb +48 -0
  49. data/tasks/rails.rake +1 -0
  50. data/tasks/testing.rb +86 -0
  51. data/vendor/after_commit/LICENSE +20 -0
  52. data/vendor/after_commit/README +16 -0
  53. data/vendor/after_commit/Rakefile +22 -0
  54. data/vendor/after_commit/init.rb +5 -0
  55. data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
  56. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  57. data/vendor/after_commit/lib/after_commit.rb +42 -0
  58. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  59. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  60. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  61. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  62. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  63. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  64. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  65. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  66. data/vendor/riddle/lib/riddle/client.rb +619 -0
  67. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  68. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  69. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  70. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  71. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  72. data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
  73. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  74. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  75. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  76. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  77. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  78. data/vendor/riddle/lib/riddle.rb +30 -0
  79. metadata +158 -0
@@ -0,0 +1,251 @@
1
+ module Delayed
2
+
3
+ class DeserializationError < StandardError
4
+ end
5
+
6
+ class Job < ActiveRecord::Base
7
+ MAX_ATTEMPTS = 25
8
+ MAX_RUN_TIME = 4.hours
9
+ set_table_name :delayed_jobs
10
+
11
+ # By default failed jobs are destroyed after too many attempts.
12
+ # If you want to keep them around (perhaps to inspect the reason
13
+ # for the failure), set this to false.
14
+ cattr_accessor :destroy_failed_jobs
15
+ self.destroy_failed_jobs = true
16
+
17
+ # Every worker has a unique name which by default is the pid of the process.
18
+ # There are some advantages to overriding this with something which survives worker retarts:
19
+ # Workers can safely resume working on tasks which are locked by themselves. The worker will assume that it crashed before.
20
+ cattr_accessor :worker_name
21
+ self.worker_name = "host:#{Socket.gethostname} pid:#{Process.pid}" rescue "pid:#{Process.pid}"
22
+
23
+ NextTaskSQL = '(run_at <= ? AND (locked_at IS NULL OR locked_at < ?) OR (locked_by = ?)) AND failed_at IS NULL'
24
+ NextTaskOrder = 'priority DESC, run_at ASC'
25
+
26
+ ParseObjectFromYaml = /\!ruby\/\w+\:([^\s]+)/
27
+
28
+ cattr_accessor :min_priority, :max_priority
29
+ self.min_priority = nil
30
+ self.max_priority = nil
31
+
32
+ class LockError < StandardError
33
+ end
34
+
35
+ def self.clear_locks!
36
+ update_all("locked_by = null, locked_at = null", ["locked_by = ?", worker_name])
37
+ end
38
+
39
+ def failed?
40
+ failed_at
41
+ end
42
+ alias_method :failed, :failed?
43
+
44
+ def payload_object
45
+ @payload_object ||= deserialize(self['handler'])
46
+ end
47
+
48
+ def name
49
+ @name ||= begin
50
+ payload = payload_object
51
+ if payload.respond_to?(:display_name)
52
+ payload.display_name
53
+ else
54
+ payload.class.name
55
+ end
56
+ end
57
+ end
58
+
59
+ def payload_object=(object)
60
+ self['handler'] = object.to_yaml
61
+ end
62
+
63
+ def reschedule(message, backtrace = [], time = nil)
64
+ if self.attempts < MAX_ATTEMPTS
65
+ time ||= Job.db_time_now + (attempts ** 4) + 5
66
+
67
+ self.attempts += 1
68
+ self.run_at = time
69
+ self.last_error = message + "\n" + backtrace.join("\n")
70
+ self.unlock
71
+ save!
72
+ else
73
+ logger.info "* [JOB] PERMANENTLY removing #{self.name} because of #{attempts} consequetive failures."
74
+ destroy_failed_jobs ? destroy : update_attribute(:failed_at, Time.now)
75
+ end
76
+ end
77
+
78
+ def self.enqueue(*args, &block)
79
+ object = block_given? ? EvaledJob.new(&block) : args.shift
80
+
81
+ unless object.respond_to?(:perform) || block_given?
82
+ raise ArgumentError, 'Cannot enqueue items which do not respond to perform'
83
+ end
84
+
85
+ priority = args[0] || 0
86
+ run_at = args[1]
87
+
88
+ Job.create(:payload_object => object, :priority => priority.to_i, :run_at => run_at)
89
+ end
90
+
91
+ def self.find_available(limit = 5, max_run_time = MAX_RUN_TIME)
92
+
93
+ time_now = db_time_now
94
+
95
+ sql = NextTaskSQL.dup
96
+
97
+ conditions = [time_now, time_now - max_run_time, worker_name]
98
+
99
+ if self.min_priority
100
+ sql << ' AND (priority >= ?)'
101
+ conditions << min_priority
102
+ end
103
+
104
+ if self.max_priority
105
+ sql << ' AND (priority <= ?)'
106
+ conditions << max_priority
107
+ end
108
+
109
+ conditions.unshift(sql)
110
+
111
+ records = ActiveRecord::Base.silence do
112
+ find(:all, :conditions => conditions, :order => NextTaskOrder, :limit => limit)
113
+ end
114
+
115
+ records.sort_by { rand() }
116
+ end
117
+
118
+ # Get the payload of the next job we can get an exclusive lock on.
119
+ # If no jobs are left we return nil
120
+ def self.reserve(max_run_time = MAX_RUN_TIME, &block)
121
+
122
+ # We get up to 5 jobs from the db. In face we cannot get exclusive access to a job we try the next.
123
+ # this leads to a more even distribution of jobs across the worker processes
124
+ find_available(5, max_run_time).each do |job|
125
+ begin
126
+ logger.info "* [JOB] aquiring lock on #{job.name}"
127
+ job.lock_exclusively!(max_run_time, worker_name)
128
+ runtime = Benchmark.realtime do
129
+ invoke_job(job.payload_object, &block)
130
+ job.destroy
131
+ end
132
+ logger.info "* [JOB] #{job.name} completed after %.4f" % runtime
133
+
134
+ return job
135
+ rescue LockError
136
+ # We did not get the lock, some other worker process must have
137
+ logger.warn "* [JOB] failed to aquire exclusive lock for #{job.name}"
138
+ rescue StandardError => e
139
+ job.reschedule e.message, e.backtrace
140
+ log_exception(job, e)
141
+ return job
142
+ end
143
+ end
144
+
145
+ nil
146
+ end
147
+
148
+ # This method is used internally by reserve method to ensure exclusive access
149
+ # to the given job. It will rise a LockError if it cannot get this lock.
150
+ def lock_exclusively!(max_run_time, worker = worker_name)
151
+ now = self.class.db_time_now
152
+ affected_rows = if locked_by != worker
153
+ # We don't own this job so we will update the locked_by name and the locked_at
154
+ self.class.update_all(["locked_at = ?, locked_by = ?", now, worker], ["id = ? and (locked_at is null or locked_at < ?)", id, (now - max_run_time.to_i)])
155
+ else
156
+ # We already own this job, this may happen if the job queue crashes.
157
+ # Simply resume and update the locked_at
158
+ self.class.update_all(["locked_at = ?", now], ["id = ? and locked_by = ?", id, worker])
159
+ end
160
+ raise LockError.new("Attempted to aquire exclusive lock failed") unless affected_rows == 1
161
+
162
+ self.locked_at = now
163
+ self.locked_by = worker
164
+ end
165
+
166
+ def unlock
167
+ self.locked_at = nil
168
+ self.locked_by = nil
169
+ end
170
+
171
+ # This is a good hook if you need to report job processing errors in additional or different ways
172
+ def self.log_exception(job, error)
173
+ logger.error "* [JOB] #{job.name} failed with #{error.class.name}: #{error.message} - #{job.attempts} failed attempts"
174
+ logger.error(error)
175
+ end
176
+
177
+ def self.work_off(num = 100)
178
+ success, failure = 0, 0
179
+
180
+ num.times do
181
+ job = self.reserve do |j|
182
+ begin
183
+ j.perform
184
+ success += 1
185
+ rescue
186
+ failure += 1
187
+ raise
188
+ end
189
+ end
190
+
191
+ break if job.nil?
192
+ end
193
+
194
+ return [success, failure]
195
+ end
196
+
197
+ # Moved into its own method so that new_relic can trace it.
198
+ def self.invoke_job(job, &block)
199
+ block.call(job)
200
+ end
201
+
202
+ private
203
+
204
+ def deserialize(source)
205
+ handler = YAML.load(source) rescue nil
206
+
207
+ unless handler.respond_to?(:perform)
208
+ if handler.nil? && source =~ ParseObjectFromYaml
209
+ handler_class = $1
210
+ end
211
+ attempt_to_load(handler_class || handler.class)
212
+ handler = YAML.load(source)
213
+ end
214
+
215
+ return handler if handler.respond_to?(:perform)
216
+
217
+ raise DeserializationError,
218
+ 'Job failed to load: Unknown handler. Try to manually require the appropiate file.'
219
+ rescue TypeError, LoadError, NameError => e
220
+ raise DeserializationError,
221
+ "Job failed to load: #{e.message}. Try to manually require the required file."
222
+ end
223
+
224
+ # Constantize the object so that ActiveSupport can attempt
225
+ # its auto loading magic. Will raise LoadError if not successful.
226
+ def attempt_to_load(klass)
227
+ klass.constantize
228
+ end
229
+
230
+ def self.db_time_now
231
+ (ActiveRecord::Base.default_timezone == :utc) ? Time.now.utc : Time.now
232
+ end
233
+
234
+ protected
235
+
236
+ def before_save
237
+ self.run_at ||= self.class.db_time_now
238
+ end
239
+
240
+ end
241
+
242
+ class EvaledJob
243
+ def initialize
244
+ @job = yield
245
+ end
246
+
247
+ def perform
248
+ eval(@job)
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,53 @@
1
+ module Riddle
2
+ class Client
3
+ # Used for querying Sphinx.
4
+ class Filter
5
+ attr_accessor :attribute, :values, :exclude
6
+
7
+ # Attribute name, values (which can be an array or a range), and whether
8
+ # the filter should be exclusive.
9
+ def initialize(attribute, values, exclude=false)
10
+ @attribute, @values, @exclude = attribute, values, exclude
11
+ end
12
+
13
+ def exclude?
14
+ self.exclude
15
+ end
16
+
17
+ # Returns the message for this filter to send to the Sphinx service
18
+ def query_message
19
+ message = Message.new
20
+
21
+ message.append_string self.attribute.to_s
22
+ case self.values
23
+ when Range
24
+ if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
25
+ message.append_int FilterTypes[:float_range]
26
+ message.append_floats self.values.first, self.values.last
27
+ else
28
+ message.append_int FilterTypes[:range]
29
+ message.append_ints self.values.first, self.values.last
30
+ end
31
+ when Array
32
+ message.append_int FilterTypes[:values]
33
+ message.append_int self.values.length
34
+ # using to_f is a hack from the php client - to workaround 32bit
35
+ # signed ints on x32 platforms
36
+ message.append_ints *self.values.collect { |val|
37
+ case val
38
+ when TrueClass
39
+ 1.0
40
+ when FalseClass
41
+ 0.0
42
+ else
43
+ val.to_f
44
+ end
45
+ }
46
+ end
47
+ message.append_int self.exclude? ? 1 : 0
48
+
49
+ message.to_s
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,65 @@
1
+ module Riddle
2
+ class Client
3
+ # This class takes care of the translation of ints, strings and arrays to
4
+ # the format required by the Sphinx service.
5
+ class Message
6
+ def initialize
7
+ @message = ""
8
+ @size_method = @message.respond_to?(:bytesize) ? :bytesize : :length
9
+ end
10
+
11
+ # Append raw data (only use if you know what you're doing)
12
+ def append(*args)
13
+ return if args.length == 0
14
+
15
+ args.each { |arg| @message << arg }
16
+ end
17
+
18
+ # Append a string's length, then the string itself
19
+ def append_string(str)
20
+ @message << [str.send(@size_method)].pack('N') + str
21
+ end
22
+
23
+ # Append an integer
24
+ def append_int(int)
25
+ @message << [int].pack('N')
26
+ end
27
+
28
+ def append_64bit_int(int)
29
+ @message << [int >> 32, int & 0xFFFFFFFF].pack('NN')
30
+ end
31
+
32
+ # Append a float
33
+ def append_float(float)
34
+ @message << [float].pack('f').unpack('L*').pack("N")
35
+ end
36
+
37
+ # Append multiple integers
38
+ def append_ints(*ints)
39
+ ints.each { |int| append_int(int) }
40
+ end
41
+
42
+ def append_64bit_ints(*ints)
43
+ ints.each { |int| append_64bit_int(int) }
44
+ end
45
+
46
+ # Append multiple floats
47
+ def append_floats(*floats)
48
+ floats.each { |float| append_float(float) }
49
+ end
50
+
51
+ # Append an array of strings - first appends the length of the array,
52
+ # then each item's length and value.
53
+ def append_array(array)
54
+ append_int(array.length)
55
+
56
+ array.each { |item| append_string(item) }
57
+ end
58
+
59
+ # Returns the entire message
60
+ def to_s
61
+ @message
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,84 @@
1
+ module Riddle
2
+ class Client
3
+ # Used to interrogate responses from the Sphinx daemon. Keep in mind none
4
+ # of the methods here check whether the data they're grabbing are what the
5
+ # user expects - it just assumes the user knows what the data stream is
6
+ # made up of.
7
+ class Response
8
+ # Create with the data to interpret
9
+ def initialize(str)
10
+ @str = str
11
+ @marker = 0
12
+ end
13
+
14
+ # Return the next string value in the stream
15
+ def next
16
+ len = next_int
17
+ result = @str[@marker, len]
18
+ @marker += len
19
+
20
+ return result
21
+ end
22
+
23
+ # Return the next integer value from the stream
24
+ def next_int
25
+ int = @str[@marker, 4].unpack('N*').first
26
+ @marker += 4
27
+
28
+ return int
29
+ end
30
+
31
+ def next_64bit_int
32
+ high, low = @str[@marker, 8].unpack('N*N*')[0..1]
33
+ @marker += 8
34
+
35
+ return (high << 32) + low
36
+ end
37
+
38
+ # Return the next float value from the stream
39
+ def next_float
40
+ float = @str[@marker, 4].unpack('N*').pack('L').unpack('f*').first
41
+ @marker += 4
42
+
43
+ return float
44
+ end
45
+
46
+ # Returns an array of string items
47
+ def next_array
48
+ count = next_int
49
+ items = []
50
+ for i in 0...count
51
+ items << self.next
52
+ end
53
+
54
+ return items
55
+ end
56
+
57
+ # Returns an array of int items
58
+ def next_int_array
59
+ count = next_int
60
+ items = []
61
+ for i in 0...count
62
+ items << self.next_int
63
+ end
64
+
65
+ return items
66
+ end
67
+
68
+ def next_float_array
69
+ count = next_int
70
+ items = []
71
+ for i in 0...count
72
+ items << self.next_float
73
+ end
74
+
75
+ return items
76
+ end
77
+
78
+ # Returns the length of the streamed data
79
+ def length
80
+ @str.length
81
+ end
82
+ end
83
+ end
84
+ end