RubyGems - dpickett-thinking-sphinx - Versions diffs - 1.1.4 - Mend

dpickett-thinking-sphinx 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

data/LICENCE +20 -0
data/README +107 -0
data/lib/thinking_sphinx/active_record/delta.rb +74 -0
data/lib/thinking_sphinx/active_record/has_many_association.rb +29 -0
data/lib/thinking_sphinx/active_record/search.rb +57 -0
data/lib/thinking_sphinx/active_record.rb +245 -0
data/lib/thinking_sphinx/adapters/abstract_adapter.rb +34 -0
data/lib/thinking_sphinx/adapters/mysql_adapter.rb +53 -0
data/lib/thinking_sphinx/adapters/postgresql_adapter.rb +129 -0
data/lib/thinking_sphinx/association.rb +144 -0
data/lib/thinking_sphinx/attribute.rb +254 -0
data/lib/thinking_sphinx/class_facet.rb +20 -0
data/lib/thinking_sphinx/collection.rb +142 -0
data/lib/thinking_sphinx/configuration.rb +236 -0
data/lib/thinking_sphinx/core/string.rb +22 -0
data/lib/thinking_sphinx/deltas/datetime_delta.rb +50 -0
data/lib/thinking_sphinx/deltas/default_delta.rb +65 -0
data/lib/thinking_sphinx/deltas/delayed_delta/delta_job.rb +24 -0
data/lib/thinking_sphinx/deltas/delayed_delta/flag_as_deleted_job.rb +27 -0
data/lib/thinking_sphinx/deltas/delayed_delta/job.rb +26 -0
data/lib/thinking_sphinx/deltas/delayed_delta.rb +25 -0
data/lib/thinking_sphinx/deltas.rb +22 -0
data/lib/thinking_sphinx/facet.rb +58 -0
data/lib/thinking_sphinx/facet_collection.rb +45 -0
data/lib/thinking_sphinx/field.rb +172 -0
data/lib/thinking_sphinx/index/builder.rb +233 -0
data/lib/thinking_sphinx/index/faux_column.rb +110 -0
data/lib/thinking_sphinx/index.rb +432 -0
data/lib/thinking_sphinx/rails_additions.rb +133 -0
data/lib/thinking_sphinx/search.rb +654 -0
data/lib/thinking_sphinx/tasks.rb +128 -0
data/lib/thinking_sphinx.rb +145 -0
data/spec/unit/thinking_sphinx/active_record/delta_spec.rb +136 -0
data/spec/unit/thinking_sphinx/active_record/has_many_association_spec.rb +53 -0
data/spec/unit/thinking_sphinx/active_record/search_spec.rb +107 -0
data/spec/unit/thinking_sphinx/active_record_spec.rb +256 -0
data/spec/unit/thinking_sphinx/association_spec.rb +247 -0
data/spec/unit/thinking_sphinx/attribute_spec.rb +212 -0
data/spec/unit/thinking_sphinx/collection_spec.rb +14 -0
data/spec/unit/thinking_sphinx/configuration_spec.rb +136 -0
data/spec/unit/thinking_sphinx/core/string_spec.rb +9 -0
data/spec/unit/thinking_sphinx/field_spec.rb +145 -0
data/spec/unit/thinking_sphinx/index/builder_spec.rb +5 -0
data/spec/unit/thinking_sphinx/index/faux_column_spec.rb +30 -0
data/spec/unit/thinking_sphinx/index_spec.rb +54 -0
data/spec/unit/thinking_sphinx/search_spec.rb +59 -0
data/spec/unit/thinking_sphinx_spec.rb +129 -0
data/tasks/distribution.rb +48 -0
data/tasks/rails.rake +1 -0
data/tasks/testing.rb +86 -0
data/vendor/after_commit/LICENSE +20 -0
data/vendor/after_commit/README +16 -0
data/vendor/after_commit/Rakefile +22 -0
data/vendor/after_commit/init.rb +5 -0
data/vendor/after_commit/lib/after_commit/active_record.rb +91 -0
data/vendor/after_commit/lib/after_commit/connection_adapters.rb +103 -0
data/vendor/after_commit/lib/after_commit.rb +42 -0
data/vendor/after_commit/test/after_commit_test.rb +53 -0
data/vendor/delayed_job/lib/delayed/job.rb +251 -0
data/vendor/delayed_job/lib/delayed/message_sending.rb +7 -0
data/vendor/delayed_job/lib/delayed/performable_method.rb +55 -0
data/vendor/delayed_job/lib/delayed/worker.rb +54 -0
data/vendor/riddle/lib/riddle/client/filter.rb +53 -0
data/vendor/riddle/lib/riddle/client/message.rb +65 -0
data/vendor/riddle/lib/riddle/client/response.rb +84 -0
data/vendor/riddle/lib/riddle/client.rb +619 -0
data/vendor/riddle/lib/riddle/configuration/distributed_index.rb +48 -0
data/vendor/riddle/lib/riddle/configuration/index.rb +142 -0
data/vendor/riddle/lib/riddle/configuration/indexer.rb +19 -0
data/vendor/riddle/lib/riddle/configuration/remote_index.rb +17 -0
data/vendor/riddle/lib/riddle/configuration/searchd.rb +25 -0
data/vendor/riddle/lib/riddle/configuration/section.rb +37 -0
data/vendor/riddle/lib/riddle/configuration/source.rb +23 -0
data/vendor/riddle/lib/riddle/configuration/sql_source.rb +34 -0
data/vendor/riddle/lib/riddle/configuration/xml_source.rb +28 -0
data/vendor/riddle/lib/riddle/configuration.rb +33 -0
data/vendor/riddle/lib/riddle/controller.rb +44 -0
data/vendor/riddle/lib/riddle.rb +30 -0
metadata +158 -0

data/vendor/delayed_job/lib/delayed/job.rb ADDED Viewed

@@ -0,0 +1,251 @@
+module Delayed
+  class DeserializationError < StandardError
+  end
+  class Job < ActiveRecord::Base
+    MAX_ATTEMPTS = 25
+    MAX_RUN_TIME = 4.hours
+    set_table_name :delayed_jobs
+    # By default failed jobs are destroyed after too many attempts.
+    # If you want to keep them around (perhaps to inspect the reason
+    # for the failure), set this to false.
+    cattr_accessor :destroy_failed_jobs
+    self.destroy_failed_jobs = true
+    # Every worker has a unique name which by default is the pid of the process.
+    # There are some advantages to overriding this with something which survives worker retarts:
+    # Workers can safely resume working on tasks which are locked by themselves. The worker will assume that it crashed before.
+    cattr_accessor :worker_name
+    self.worker_name = "host:#{Socket.gethostname} pid:#{Process.pid}" rescue "pid:#{Process.pid}"
+    NextTaskSQL         = '(run_at <= ? AND (locked_at IS NULL OR locked_at < ?) OR (locked_by = ?)) AND failed_at IS NULL'
+    NextTaskOrder       = 'priority DESC, run_at ASC'
+    ParseObjectFromYaml = /\!ruby\/\w+\:([^\s]+)/
+    cattr_accessor :min_priority, :max_priority
+    self.min_priority = nil
+    self.max_priority = nil
+    class LockError < StandardError
+    end
+    def self.clear_locks!
+      update_all("locked_by = null, locked_at = null", ["locked_by = ?", worker_name])
+    end
+    def failed?
+      failed_at
+    end
+    alias_method :failed, :failed?
+    def payload_object
+      @payload_object ||= deserialize(self['handler'])
+    end
+    def name
+      @name ||= begin
+        payload = payload_object
+        if payload.respond_to?(:display_name)
+          payload.display_name
+        else
+          payload.class.name
+        end
+      end
+    end
+    def payload_object=(object)
+      self['handler'] = object.to_yaml
+    end
+    def reschedule(message, backtrace = [], time = nil)
+      if self.attempts < MAX_ATTEMPTS
+        time ||= Job.db_time_now + (attempts ** 4) + 5
+        self.attempts    += 1
+        self.run_at       = time
+        self.last_error   = message + "\n" + backtrace.join("\n")
+        self.unlock
+        save!
+      else
+        logger.info "* [JOB] PERMANENTLY removing #{self.name} because of #{attempts} consequetive failures."
+        destroy_failed_jobs ? destroy : update_attribute(:failed_at, Time.now)
+      end
+    end
+    def self.enqueue(*args, &block)
+      object = block_given? ? EvaledJob.new(&block) : args.shift
+      unless object.respond_to?(:perform) || block_given?
+        raise ArgumentError, 'Cannot enqueue items which do not respond to perform'
+      end
+      priority = args[0] || 0
+      run_at   = args[1]
+      Job.create(:payload_object => object, :priority => priority.to_i, :run_at => run_at)
+    end
+    def self.find_available(limit = 5, max_run_time = MAX_RUN_TIME)
+      time_now = db_time_now
+      sql = NextTaskSQL.dup
+      conditions = [time_now, time_now - max_run_time, worker_name]
+      if self.min_priority
+        sql << ' AND (priority >= ?)'
+        conditions << min_priority
+      end
+      if self.max_priority
+        sql << ' AND (priority <= ?)'
+        conditions << max_priority
+      end
+      conditions.unshift(sql)
+      records = ActiveRecord::Base.silence do
+        find(:all, :conditions => conditions, :order => NextTaskOrder, :limit => limit)
+      end
+      records.sort_by { rand() }
+    end
+    # Get the payload of the next job we can get an exclusive lock on.
+    # If no jobs are left we return nil
+    def self.reserve(max_run_time = MAX_RUN_TIME, &block)
+      # We get up to 5 jobs from the db. In face we cannot get exclusive access to a job we try the next.
+      # this leads to a more even distribution of jobs across the worker processes
+      find_available(5, max_run_time).each do |job|
+        begin
+          logger.info "* [JOB] aquiring lock on #{job.name}"
+          job.lock_exclusively!(max_run_time, worker_name)
+          runtime =  Benchmark.realtime do
+            invoke_job(job.payload_object, &block)
+            job.destroy
+          end
+          logger.info "* [JOB] #{job.name} completed after %.4f" % runtime
+          return job
+        rescue LockError
+          # We did not get the lock, some other worker process must have
+          logger.warn "* [JOB] failed to aquire exclusive lock for #{job.name}"
+        rescue StandardError => e
+          job.reschedule e.message, e.backtrace
+          log_exception(job, e)
+          return job
+        end
+      end
+      nil
+    end
+    # This method is used internally by reserve method to ensure exclusive access
+    # to the given job. It will rise a LockError if it cannot get this lock.
+    def lock_exclusively!(max_run_time, worker = worker_name)
+      now = self.class.db_time_now
+      affected_rows = if locked_by != worker
+        # We don't own this job so we will update the locked_by name and the locked_at
+        self.class.update_all(["locked_at = ?, locked_by = ?", now, worker], ["id = ? and (locked_at is null or locked_at < ?)", id, (now - max_run_time.to_i)])
+      else
+        # We already own this job, this may happen if the job queue crashes.
+        # Simply resume and update the locked_at
+        self.class.update_all(["locked_at = ?", now], ["id = ? and locked_by = ?", id, worker])
+      end
+      raise LockError.new("Attempted to aquire exclusive lock failed") unless affected_rows == 1
+      self.locked_at    = now
+      self.locked_by    = worker
+    end
+    def unlock
+      self.locked_at    = nil
+      self.locked_by    = nil
+    end
+    # This is a good hook if you need to report job processing errors in additional or different ways
+    def self.log_exception(job, error)
+      logger.error "* [JOB] #{job.name} failed with #{error.class.name}: #{error.message} - #{job.attempts} failed attempts"
+      logger.error(error)
+    end
+    def self.work_off(num = 100)
+      success, failure = 0, 0
+      num.times do
+        job = self.reserve do |j|
+          begin
+            j.perform
+            success += 1
+          rescue
+            failure += 1
+            raise
+          end
+        end
+        break if job.nil?
+      end
+      return [success, failure]
+    end
+    # Moved into its own method so that new_relic can trace it.
+    def self.invoke_job(job, &block)
+      block.call(job)
+    end
+  private
+    def deserialize(source)
+      handler = YAML.load(source) rescue nil
+      unless handler.respond_to?(:perform)
+        if handler.nil? && source =~ ParseObjectFromYaml
+          handler_class = $1
+        end
+        attempt_to_load(handler_class || handler.class)
+        handler = YAML.load(source)
+      end
+      return handler if handler.respond_to?(:perform)
+      raise DeserializationError,
+        'Job failed to load: Unknown handler. Try to manually require the appropiate file.'
+    rescue TypeError, LoadError, NameError => e
+      raise DeserializationError,
+        "Job failed to load: #{e.message}. Try to manually require the required file."
+    end
+    # Constantize the object so that ActiveSupport can attempt
+    # its auto loading magic. Will raise LoadError if not successful.
+    def attempt_to_load(klass)
+       klass.constantize
+    end
+    def self.db_time_now
+      (ActiveRecord::Base.default_timezone == :utc) ? Time.now.utc : Time.now
+    end
+  protected
+    def before_save
+      self.run_at ||= self.class.db_time_now
+    end
+  end
+  class EvaledJob
+    def initialize
+      @job = yield
+    end
+    def perform
+      eval(@job)
+    end
+  end
+end

data/vendor/delayed_job/lib/delayed/message_sending.rb ADDED Viewed

@@ -0,0 +1,7 @@
+module Delayed
+  module MessageSending
+    def send_later(method, *args)
+      Delayed::Job.enqueue Delayed::PerformableMethod.new(self, method.to_sym, args)
+    end
+  end
+end

data/vendor/delayed_job/lib/delayed/performable_method.rb ADDED Viewed

@@ -0,0 +1,55 @@
+module Delayed
+  class PerformableMethod < Struct.new(:object, :method, :args)
+    CLASS_STRING_FORMAT = /^CLASS\:([A-Z][\w\:]+)$/
+    AR_STRING_FORMAT    = /^AR\:([A-Z][\w\:]+)\:(\d+)$/
+    def initialize(object, method, args)
+      raise NoMethodError, "undefined method `#{method}' for #{self.inspect}" unless object.respond_to?(method)
+      self.object = dump(object)
+      self.args   = args.map { |a| dump(a) }
+      self.method = method.to_sym
+    end
+    def display_name
+      case self.object
+      when CLASS_STRING_FORMAT then "#{$1}.#{method}"
+      when AR_STRING_FORMAT    then "#{$1}##{method}"
+      else "Unknown##{method}"
+      end
+    end
+    def perform
+      load(object).send(method, *args.map{|a| load(a)})
+    rescue ActiveRecord::RecordNotFound
+      # We cannot do anything about objects which were deleted in the meantime
+      true
+    end
+    private
+    def load(arg)
+      case arg
+      when CLASS_STRING_FORMAT then $1.constantize
+      when AR_STRING_FORMAT    then $1.constantize.find($2)
+      else arg
+      end
+    end
+    def dump(arg)
+      case arg
+      when Class              then class_to_string(arg)
+      when ActiveRecord::Base then ar_to_string(arg)
+      else arg
+      end
+    end
+    def ar_to_string(obj)
+      "AR:#{obj.class}:#{obj.id}"
+    end
+    def class_to_string(obj)
+      "CLASS:#{obj.name}"
+    end
+  end
+end

data/vendor/delayed_job/lib/delayed/worker.rb ADDED Viewed

@@ -0,0 +1,54 @@
+module Delayed
+  class Worker
+    SLEEP = 5
+    cattr_accessor :logger
+    self.logger = if defined?(Merb::Logger)
+                    Merb.logger
+                  elsif defined?(RAILS_DEFAULT_LOGGER)
+                    RAILS_DEFAULT_LOGGER
+                  end
+    def initialize(options={})
+      @quiet = options[:quiet]
+      Delayed::Job.min_priority = options[:min_priority] if options.has_key?(:min_priority)
+      Delayed::Job.max_priority = options[:max_priority] if options.has_key?(:max_priority)
+    end
+    def start
+      say "*** Starting job worker #{Delayed::Job.worker_name}"
+      trap('TERM') { say 'Exiting...'; $exit = true }
+      trap('INT')  { say 'Exiting...'; $exit = true }
+      loop do
+        result = nil
+        realtime = Benchmark.realtime do
+          result = Delayed::Job.work_off
+        end
+        count = result.sum
+        break if $exit
+        if count.zero?
+          sleep(SLEEP)
+        else
+          say "#{count} jobs processed at %.4f j/s, %d failed ..." % [count / realtime, result.last]
+        end
+        break if $exit
+      end
+    ensure
+      Delayed::Job.clear_locks!
+    end
+    def say(text)
+      puts text unless @quiet
+      logger.info text if logger
+    end
+  end
+end

data/vendor/riddle/lib/riddle/client/filter.rb ADDED Viewed

@@ -0,0 +1,53 @@
+module Riddle
+  class Client
+    # Used for querying Sphinx.
+    class Filter
+      attr_accessor :attribute, :values, :exclude
+      # Attribute name, values (which can be an array or a range), and whether
+      # the filter should be exclusive.
+      def initialize(attribute, values, exclude=false)
+        @attribute, @values, @exclude = attribute, values, exclude
+      end
+      def exclude?
+        self.exclude
+      end
+      # Returns the message for this filter to send to the Sphinx service
+      def query_message
+        message = Message.new
+        message.append_string self.attribute.to_s
+        case self.values
+        when Range
+          if self.values.first.is_a?(Float) && self.values.last.is_a?(Float)
+            message.append_int FilterTypes[:float_range]
+            message.append_floats self.values.first, self.values.last
+          else
+            message.append_int FilterTypes[:range]
+            message.append_ints self.values.first, self.values.last
+          end
+        when Array
+          message.append_int FilterTypes[:values]
+          message.append_int self.values.length
+          # using to_f is a hack from the php client - to workaround 32bit
+          # signed ints on x32 platforms
+          message.append_ints *self.values.collect { |val|
+            case val
+            when TrueClass
+              1.0
+            when FalseClass
+              0.0
+            else
+              val.to_f
+            end
+          }
+        end
+        message.append_int self.exclude? ? 1 : 0
+        message.to_s
+      end
+    end
+  end
+end

data/vendor/riddle/lib/riddle/client/message.rb ADDED Viewed

@@ -0,0 +1,65 @@
+module Riddle
+  class Client
+    # This class takes care of the translation of ints, strings and arrays to
+    # the format required by the Sphinx service.
+    class Message
+      def initialize
+        @message = ""
+        @size_method = @message.respond_to?(:bytesize) ? :bytesize : :length
+      end
+      # Append raw data (only use if you know what you're doing)
+      def append(*args)
+        return if args.length == 0
+        args.each { |arg| @message << arg }
+      end
+      # Append a string's length, then the string itself
+      def append_string(str)
+        @message << [str.send(@size_method)].pack('N') + str
+      end
+      # Append an integer
+      def append_int(int)
+        @message << [int].pack('N')
+      end
+      def append_64bit_int(int)
+        @message << [int >> 32, int & 0xFFFFFFFF].pack('NN')
+      end
+      # Append a float
+      def append_float(float)
+        @message << [float].pack('f').unpack('L*').pack("N")
+      end
+      # Append multiple integers
+      def append_ints(*ints)
+        ints.each { |int| append_int(int) }
+      end
+      def append_64bit_ints(*ints)
+        ints.each { |int| append_64bit_int(int) }
+      end
+      # Append multiple floats
+      def append_floats(*floats)
+        floats.each { |float| append_float(float) }
+      end
+      # Append an array of strings - first appends the length of the array,
+      # then each item's length and value.
+      def append_array(array)
+        append_int(array.length)
+        array.each { |item| append_string(item) }
+      end
+      # Returns the entire message
+      def to_s
+        @message
+      end
+    end
+  end
+end

data/vendor/riddle/lib/riddle/client/response.rb ADDED Viewed

@@ -0,0 +1,84 @@
+module Riddle
+  class Client
+    # Used to interrogate responses from the Sphinx daemon. Keep in mind none
+    # of the methods here check whether the data they're grabbing are what the
+    # user expects - it just assumes the user knows what the data stream is
+    # made up of.
+    class Response
+      # Create with the data to interpret
+      def initialize(str)
+        @str = str
+        @marker = 0
+      end
+      # Return the next string value in the stream
+      def next
+        len = next_int
+        result = @str[@marker, len]
+        @marker += len
+        return result
+      end
+      # Return the next integer value from the stream
+      def next_int
+        int = @str[@marker, 4].unpack('N*').first
+        @marker += 4
+        return int
+      end
+      def next_64bit_int
+        high, low = @str[@marker, 8].unpack('N*N*')[0..1]
+        @marker += 8
+        return (high << 32) + low
+      end
+      # Return the next float value from the stream
+      def next_float
+        float = @str[@marker, 4].unpack('N*').pack('L').unpack('f*').first
+        @marker += 4
+        return float
+      end
+      # Returns an array of string items
+      def next_array
+        count = next_int
+        items = []
+        for i in 0...count
+          items << self.next
+        end
+        return items
+      end
+      # Returns an array of int items
+      def next_int_array
+        count = next_int
+        items = []
+        for i in 0...count
+          items << self.next_int
+        end
+        return items
+      end
+      def next_float_array
+        count = next_int
+        items = []
+        for i in 0...count
+          items << self.next_float
+        end
+        return items
+      end
+      # Returns the length of the streamed data
+      def length
+        @str.length
+      end
+    end
+  end
+end