dpickett-thinking-sphinx 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. data/LICENCE +20 -0
  2. data/README +107 -0
  3. data/lib/thinking_sphinx/active_record/delta.rb +74 -0
  4. data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
  5. data/lib/thinking_sphinx/active_record/search.rb +57 -0
  6. data/lib/thinking_sphinx/active_record.rb +245 -0
  7. data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
  8. data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
  9. data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
  10. data/lib/thinking_sphinx/association.rb +144 -0
  11. data/lib/thinking_sphinx/attribute.rb +254 -0
  12. data/lib/thinking_sphinx/class_facet.rb +20 -0
  13. data/lib/thinking_sphinx/collection.rb +142 -0
  14. data/lib/thinking_sphinx/configuration.rb +236 -0
  15. data/lib/thinking_sphinx/core/string.rb +22 -0
  16. data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
  17. data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
  18. data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
  19. data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
  20. data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
  21. data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
  22. data/lib/thinking_sphinx/deltas.rb +22 -0
  23. data/lib/thinking_sphinx/facet.rb +58 -0
  24. data/lib/thinking_sphinx/facet_collection.rb +45 -0
  25. data/lib/thinking_sphinx/field.rb +172 -0
  26. data/lib/thinking_sphinx/index/builder.rb +233 -0
  27. data/lib/thinking_sphinx/index/faux_column.rb +110 -0
  28. data/lib/thinking_sphinx/index.rb +432 -0
  29. data/lib/thinking_sphinx/rails_additions.rb +133 -0
  30. data/lib/thinking_sphinx/search.rb +654 -0
  31. data/lib/thinking_sphinx/tasks.rb +128 -0
  32. data/lib/thinking_sphinx.rb +145 -0
  33. data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
  34. data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
  35. data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
  36. data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
  37. data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
  38. data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
  39. data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
  40. data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
  41. data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
  42. data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
  43. data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
  44. data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
  45. data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
  46. data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
  47. data/spec/unit/thinking_sphinx_spec.rb +129 -0
  48. data/tasks/distribution.rb +48 -0
  49. data/tasks/rails.rake +1 -0
  50. data/tasks/testing.rb +86 -0
  51. data/vendor/after_commit/LICENSE +20 -0
  52. data/vendor/after_commit/README +16 -0
  53. data/vendor/after_commit/Rakefile +22 -0
  54. data/vendor/after_commit/init.rb +5 -0
  55. data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
  56. data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
  57. data/vendor/after_commit/lib/after_commit.rb +42 -0
  58. data/vendor/after_commit/test/after_commit_test.rb +53 -0
  59. data/vendor/delayed_job/lib/delayed/job.rb +251 -0
  60. data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
  61. data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
  62. data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
  63. data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
  64. data/vendor/riddle/lib/riddle/client/message.rb +65 -0
  65. data/vendor/riddle/lib/riddle/client/response.rb +84 -0
  66. data/vendor/riddle/lib/riddle/client.rb +619 -0
  67. data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
  68. data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
  69. data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
  70. data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
  71. data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
  72. data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
  73. data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
  74. data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
  75. data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
  76. data/vendor/riddle/lib/riddle/configuration.rb +33 -0
  77. data/vendor/riddle/lib/riddle/controller.rb +44 -0
  78. data/vendor/riddle/lib/riddle.rb +30 -0
  79. metadata +158 -0
@@ -0,0 +1,251 @@
1
+ module Delayed
2
+
3
+ class DeserializationError < StandardError
4
+ end
5
+
6
+ class Job < ActiveRecord::Base
7
+ MAX_ATTEMPTS = 25
8
+ MAX_RUN_TIME = 4.hours
9
+ set_table_name :delayed_jobs
10
+
11
+ # By default failed jobs are destroyed after too many attempts.
12
+ # If you want to keep them around (perhaps to inspect the reason
13
+ # for the failure), set this to false.
14
+ cattr_accessor :destroy_failed_jobs
15
+ self.destroy_failed_jobs = true
16
+
17
+ # Every worker has a unique name which by default is the pid of the process.
18
+ # There are some advantages to overriding this with something which survives worker retarts:
19
+ # Workers can safely resume working on tasks which are locked by themselves. The worker will assume that it crashed before.
20
+ cattr_accessor :worker_name
21
+ self.worker_name = "host:#{Socket.gethostname} pid:#{Process.pid}" rescue "pid:#{Process.pid}"
22
+
23
+ NextTaskSQL = '(run_at <= ? AND (locked_at IS NULL OR locked_at < ?) OR (locked_by = ?)) AND failed_at IS NULL'
24
+ NextTaskOrder = 'priority DESC, run_at ASC'
25
+
26
+ ParseObjectFromYaml = /\!ruby\/\w+\:([^\s]+)/
27
+
28
+ cattr_accessor :min_priority, :max_priority
29
+ self.min_priority = nil
30
+ self.max_priority = nil
31
+
32
+ class LockError < StandardError
33
+ end
34
+
35
+ def self.clear_locks!
36
+ update_all("locked_by = null, locked_at = null", ["locked_by = ?", worker_name])
37
+ end
38
+
39
+ def failed?
40
+ failed_at
41
+ end
42
+ alias_method :failed, :failed?
43
+
44
+ def payload_object
45
+ @payload_object ||= deserialize(self['handler'])
46
+ end
47
+
48
+ def name
49
+ @name ||= begin
50
+ payload = payload_object
51
+ if payload.respond_to?(:display_name)
52
+ payload.display_name
53
+ else
54
+ payload.class.name
55
+ end
56
+ end
57
+ end
58
+
59
+ def payload_object=(object)
60
+ self['handler'] = object.to_yaml
61
+ end
62
+
63
+ def reschedule(message, backtrace = [], time = nil)
64
+ if self.attempts < MAX_ATTEMPTS
65
+ time ||= Job.db_time_now + (attempts ** 4) + 5
66
+
67
+ self.attempts += 1
68
+ self.run_at = time
69
+ self.last_error = message + "\n" + backtrace.join("\n")
70
+ self.unlock
71
+ save!
72
+ else
73
+ logger.info "* [JOB] PERMANENTLY removing #{self.name} because of #{attempts} consequetive failures."
74
+ destroy_failed_jobs ? destroy : update_attribute(:failed_at, Time.now)
75
+ end
76
+ end
77
+
78
+ def self.enqueue(*args, &block)
79
+ object = block_given? ? EvaledJob.new(&block) : args.shift
80
+
81
+ unless object.respond_to?(:perform) || block_given?
82
+ raise ArgumentError, 'Cannot enqueue items which do not respond to perform'
83
+ end
84
+
85
+ priority = args[0] || 0
86
+ run_at = args[1]
87
+
88
+ Job.create(:payload_object => object, :priority => priority.to_i, :run_at => run_at)
89
+ end
90
+
91
+ def self.find_available(limit = 5, max_run_time = MAX_RUN_TIME)
92
+
93
+ time_now = db_time_now
94
+
95
+ sql = NextTaskSQL.dup
96
+
97
+ conditions = [time_now, time_now - max_run_time, worker_name]
98
+
99
+ if self.min_priority
100
+ sql << ' AND (priority >= ?)'
101
+ conditions << min_priority
102
+ end
103
+
104
+ if self.max_priority
105
+ sql << ' AND (priority <= ?)'
106
+ conditions << max_priority
107
+ end
108
+
109
+ conditions.unshift(sql)
110
+
111
+ records = ActiveRecord::Base.silence do
112
+ find(:all, :conditions => conditions, :order => NextTaskOrder, :limit => limit)
113
+ end
114
+
115
+ records.sort_by { rand() }
116
+ end
117
+
118
+ # Get the payload of the next job we can get an exclusive lock on.
119
+ # If no jobs are left we return nil
120
+ def self.reserve(max_run_time = MAX_RUN_TIME, &block)
121
+
122
+ # We get up to 5 jobs from the db. In face we cannot get exclusive access to a job we try the next.
123
+ # this leads to a more even distribution of jobs across the worker processes
124
+ find_available(5, max_run_time).each do |job|
125
+ begin
126
+ logger.info "* [JOB] aquiring lock on #{job.name}"
127
+ job.lock_exclusively!(max_run_time, worker_name)
128
+ runtime = Benchmark.realtime do
129
+ invoke_job(job.payload_object, &block)
130
+ job.destroy
131
+ end
132
+ logger.info "* [JOB] #{job.name} completed after %.4f" % runtime
133
+
134
+ return job
135
+ rescue LockError
136
+ # We did not get the lock, some other worker process must have
137
+ logger.warn "* [JOB] failed to aquire exclusive lock for #{job.name}"
138
+ rescue StandardError => e
139
+ job.reschedule e.message, e.backtrace
140
+ log_exception(job, e)
141
+ return job
142
+ end
143
+ end
144
+
145
+ nil
146
+ end
147
+
148
+ # This method is used internally by reserve method to ensure exclusive access
149
+ # to the given job. It will rise a LockError if it cannot get this lock.
150
+ def lock_exclusively!(max_run_time, worker = worker_name)
151
+ now = self.class.db_time_now
152
+ affected_rows = if locked_by != worker
153
+ # We don't own this job so we will update the locked_by name and the locked_at
154
+ self.class.update_all(["locked_at = ?, locked_by = ?", now, worker], ["id = ? and (locked_at is null or locked_at < ?)", id, (now - max_run_time.to_i)])
155
+ else
156
+ # We already own this job, this may happen if the job queue crashes.
157
+ # Simply resume and update the locked_at
158
+ self.class.update_all(["locked_at = ?", now], ["id = ? and locked_by = ?", id, worker])
159
+ end
160
+ raise LockError.new("Attempted to aquire exclusive lock failed") unless affected_rows == 1
161
+
162
+ self.locked_at = now
163
+ self.locked_by = worker
164
+ end
165
+
166
+ def unlock
167
+ self.locked_at = nil
168
+ self.locked_by = nil
169
+ end
170
+
171
+ # This is a good hook if you need to report job processing errors in additional or different ways
172
+ def self.log_exception(job, error)
173
+ logger.error "* [JOB] #{job.name} failed with #{error.class.name}: #{error.message} - #{job.attempts} failed attempts"
174
+ logger.error(error)
175
+ end
176
+
177
+ def self.work_off(num = 100)
178
+ success, failure = 0, 0
179
+
180
+ num.times do
181
+ job = self.reserve do |j|
182
+ begin
183
+ j.perform
184
+ success += 1
185
+ rescue
186
+ failure += 1
187
+ raise
188
+ end
189
+ end
190
+
191
+ break if job.nil?
192
+ end
193
+
194
+ return [success, failure]
195
+ end
196
+
197
+ # Moved into its own method so that new_relic can trace it.
198
+ def self.invoke_job(job, &block)
199
+ block.call(job)
200
+ end
201
+
202
+ private
203
+
204
+ def deserialize(source)
205
+ handler = YAML.load(source) rescue nil
206
+
207
+ unless handler.respond_to?(:perform)
208
+ if handler.nil? && source =~ ParseObjectFromYaml
209
+ handler_class = $1
210
+ end
211
+ attempt_to_load(handler_class || handler.class)
212
+ handler = YAML.load(source)
213
+ end
214
+
215
+ return handler if handler.respond_to?(:perform)
216
+
217
+ raise DeserializationError,
218
+ 'Job failed to load: Unknown handler. Try to manually require the appropiate file.'
219
+ rescue TypeError, LoadError, NameError => e
220
+ raise DeserializationError,
221
+ "Job failed to load: #{e.message}. Try to manually require the required file."
222
+ end
223
+
224
+ # Constantize the object so that ActiveSupport can attempt
225
+ # its auto loading magic. Will raise LoadError if not successful.
226
+ def attempt_to_load(klass)
227
+ klass.constantize
228
+ end
229
+
230
+ def self.db_time_now
231
+ (ActiveRecord::Base.default_timezone == :utc) ? Time.now.utc : Time.now
232
+ end
233
+
234
+ protected
235
+
236
+ def before_save
237
+ self.run_at ||= self.class.db_time_now
238
+ end
239
+
240
+ end
241
+
242
+ class EvaledJob
243
+ def initialize
244
+ @job = yield
245
+ end
246
+
247
+ def perform
248
+ eval(@job)
249
+ end
250
+ end
251
+ end
@@ -0,0 +1,7 @@
1
+ module Delayed
2
+ module MessageSending
3
+ def send_later(method, *args)
4
+ Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,55 @@
1
+ module Delayed
2
+ class PerformableMethod < Struct.new(:object, :method, :args)
3
+ CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
4
+ AR_STRING_FORMAT = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
5
+
6
+ def initialize(object, method, args)
7
+ raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
8
+
9
+ self.object = dump(object)
10
+ self.args = args.map { |a| dump(a) }
11
+ self.method = method.to_sym
12
+ end
13
+
14
+ def display_name
15
+ case self.object
16
+ when CLASS_STRING_FORMAT then "#{$1}.#{method}"
17
+ when AR_STRING_FORMAT then "#{$1}##{method}"
18
+ else "Unknown##{method}"
19
+ end
20
+ end
21
+
22
+ def perform
23
+ load(object).send(method, *args.map{|a| load(a)})
24
+ rescue ActiveRecord::RecordNotFound
25
+ # We cannot do anything about objects which were deleted in the meantime
26
+ true
27
+ end
28
+
29
+ private
30
+
31
+ def load(arg)
32
+ case arg
33
+ when CLASS_STRING_FORMAT then $1.constantize
34
+ when AR_STRING_FORMAT then $1.constantize.find($2)
35
+ else arg
36
+ end
37
+ end
38
+
39
+ def dump(arg)
40
+ case arg
41
+ when Class then class_to_string(arg)
42
+ when ActiveRecord::Base then ar_to_string(arg)
43
+ else arg
44
+ end
45
+ end
46
+
47
+ def ar_to_string(obj)
48
+ "AR:#{obj.class}:#{obj.id}"
49
+ end
50
+
51
+ def class_to_string(obj)
52
+ "CLASS:#{obj.name}"
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,54 @@
1
+ module Delayed
2
+ class Worker
3
+ SLEEP = 5
4
+
5
+ cattr_accessor :logger
6
+ self.logger = if defined?(Merb::Logger)
7
+ Merb.logger
8
+ elsif defined?(RAILS_DEFAULT_LOGGER)
9
+ RAILS_DEFAULT_LOGGER
10
+ end
11
+
12
+ def initialize(options={})
13
+ @quiet = options[:quiet]
14
+ Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
15
+ Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
16
+ end
17
+
18
+ def start
19
+ say "*** Starting job worker #{Delayed::Job.worker_name}"
20
+
21
+ trap('TERM') { say 'Exiting...'; $exit = true }
22
+ trap('INT') { say 'Exiting...'; $exit = true }
23
+
24
+ loop do
25
+ result = nil
26
+
27
+ realtime = Benchmark.realtime do
28
+ result = Delayed::Job.work_off
29
+ end
30
+
31
+ count = result.sum
32
+
33
+ break if $exit
34
+
35
+ if count.zero?
36
+ sleep(SLEEP)
37
+ else
38
+ say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
39
+ end
40
+
41
+ break if $exit
42
+ end
43
+
44
+ ensure
45
+ Delayed::Job.clear_locks!
46
+ end
47
+
48
+ def say(text)
49
+ puts text unless @quiet
50
+ logger.info text if logger
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,53 @@
1
+ module Riddle
2
+ class Client
3
+ # Used for querying Sphinx.
4
+ class Filter
5
+ attr_accessor :attribute, :values, :exclude
6
+
7
+ # Attribute name, values (which can be an array or a range), and whether
8
+ # the filter should be exclusive.
9
+ def initialize(attribute, values, exclude=false)
10
+ @attribute, @values, @exclude = attribute, values, exclude
11
+ end
12
+
13
+ def exclude?
14
+ self.exclude
15
+ end
16
+
17
+ # Returns the message for this filter to send to the Sphinx service
18
+ def query_message
19
+ message = Message.new
20
+
21
+ message.append_string self.attribute.to_s
22
+ case self.values
23
+ when Range
24
+ if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
25
+ message.append_int FilterTypes[:float_range]
26
+ message.append_floats self.values.first, self.values.last
27
+ else
28
+ message.append_int FilterTypes[:range]
29
+ message.append_ints self.values.first, self.values.last
30
+ end
31
+ when Array
32
+ message.append_int FilterTypes[:values]
33
+ message.append_int self.values.length
34
+ # using to_f is a hack from the php client - to workaround 32bit
35
+ # signed ints on x32 platforms
36
+ message.append_ints *self.values.collect { |val|
37
+ case val
38
+ when TrueClass
39
+ 1.0
40
+ when FalseClass
41
+ 0.0
42
+ else
43
+ val.to_f
44
+ end
45
+ }
46
+ end
47
+ message.append_int self.exclude? ? 1 : 0
48
+
49
+ message.to_s
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,65 @@
1
+ module Riddle
2
+ class Client
3
+ # This class takes care of the translation of ints, strings and arrays to
4
+ # the format required by the Sphinx service.
5
+ class Message
6
+ def initialize
7
+ @message = ""
8
+ @size_method = @message.respond_to?(:bytesize) ? :bytesize : :length
9
+ end
10
+
11
+ # Append raw data (only use if you know what you're doing)
12
+ def append(*args)
13
+ return if args.length == 0
14
+
15
+ args.each { |arg| @message << arg }
16
+ end
17
+
18
+ # Append a string's length, then the string itself
19
+ def append_string(str)
20
+ @message << [str.send(@size_method)].pack('N') + str
21
+ end
22
+
23
+ # Append an integer
24
+ def append_int(int)
25
+ @message << [int].pack('N')
26
+ end
27
+
28
+ def append_64bit_int(int)
29
+ @message << [int >> 32, int & 0xFFFFFFFF].pack('NN')
30
+ end
31
+
32
+ # Append a float
33
+ def append_float(float)
34
+ @message << [float].pack('f').unpack('L*').pack("N")
35
+ end
36
+
37
+ # Append multiple integers
38
+ def append_ints(*ints)
39
+ ints.each { |int| append_int(int) }
40
+ end
41
+
42
+ def append_64bit_ints(*ints)
43
+ ints.each { |int| append_64bit_int(int) }
44
+ end
45
+
46
+ # Append multiple floats
47
+ def append_floats(*floats)
48
+ floats.each { |float| append_float(float) }
49
+ end
50
+
51
+ # Append an array of strings - first appends the length of the array,
52
+ # then each item's length and value.
53
+ def append_array(array)
54
+ append_int(array.length)
55
+
56
+ array.each { |item| append_string(item) }
57
+ end
58
+
59
+ # Returns the entire message
60
+ def to_s
61
+ @message
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,84 @@
1
+ module Riddle
2
+ class Client
3
+ # Used to interrogate responses from the Sphinx daemon. Keep in mind none
4
+ # of the methods here check whether the data they're grabbing are what the
5
+ # user expects - it just assumes the user knows what the data stream is
6
+ # made up of.
7
+ class Response
8
+ # Create with the data to interpret
9
+ def initialize(str)
10
+ @str = str
11
+ @marker = 0
12
+ end
13
+
14
+ # Return the next string value in the stream
15
+ def next
16
+ len = next_int
17
+ result = @str[@marker, len]
18
+ @marker += len
19
+
20
+ return result
21
+ end
22
+
23
+ # Return the next integer value from the stream
24
+ def next_int
25
+ int = @str[@marker, 4].unpack('N*').first
26
+ @marker += 4
27
+
28
+ return int
29
+ end
30
+
31
+ def next_64bit_int
32
+ high, low = @str[@marker, 8].unpack('N*N*')[0..1]
33
+ @marker += 8
34
+
35
+ return (high << 32) + low
36
+ end
37
+
38
+ # Return the next float value from the stream
39
+ def next_float
40
+ float = @str[@marker, 4].unpack('N*').pack('L').unpack('f*').first
41
+ @marker += 4
42
+
43
+ return float
44
+ end
45
+
46
+ # Returns an array of string items
47
+ def next_array
48
+ count = next_int
49
+ items = []
50
+ for i in 0...count
51
+ items << self.next
52
+ end
53
+
54
+ return items
55
+ end
56
+
57
+ # Returns an array of int items
58
+ def next_int_array
59
+ count = next_int
60
+ items = []
61
+ for i in 0...count
62
+ items << self.next_int
63
+ end
64
+
65
+ return items
66
+ end
67
+
68
+ def next_float_array
69
+ count = next_int
70
+ items = []
71
+ for i in 0...count
72
+ items << self.next_float
73
+ end
74
+
75
+ return items
76
+ end
77
+
78
+ # Returns the length of the streamed data
79
+ def length
80
+ @str.length
81
+ end
82
+ end
83
+ end
84
+ end