RubyGems - dharmarth-starling - Versions diffs - 0.9.9 - Mend

dharmarth-starling 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

data/CHANGELOG +56 -0
data/LICENSE +20 -0
data/README.rdoc +106 -0
data/Rakefile +56 -0
data/bin/starling +6 -0
data/bin/starling_top +57 -0
data/etc/sample-config.yml +9 -0
data/etc/starling.redhat +66 -0
data/etc/starling.ubuntu +71 -0
data/lib/starling.rb +181 -0
data/lib/starling/handler.rb +237 -0
data/lib/starling/persistent_queue.rb +156 -0
data/lib/starling/queue_collection.rb +147 -0
data/lib/starling/server.rb +125 -0
data/lib/starling/server_runner.rb +317 -0
data/spec/starling_server_spec.rb +216 -0
metadata +107 -0

data/lib/starling.rb ADDED

@@ -0,0 +1,181 @@
+require 'memcache'
+class Starling < MemCache
+  WAIT_TIME = 0.25
+  alias_method :_original_get, :get
+  alias_method :_original_delete, :delete
+  def initialize(*args)
+    super
+    # @buckets is no longer used in newer version of Memcache-client(1.6.2 onwards)
+    unless instance_variable_defined?(:@buckets)
+      # Create an array of server buckets for weight selection of servers.
+      @buckets = []
+      @servers.each do |server|
+        server.weight.times { @buckets.push(server) }
+      end
+    end
+  end
+  ##
+  # fetch an item from a queue.
+  def get(*args)
+    loop do
+      response = _original_get(*args)
+      return response unless response.nil?
+      sleep WAIT_TIME
+    end
+  end
+  ##
+  # will return the next item or nil
+  def fetch(*args)
+    _original_get(*args)
+  end
+  ##
+  # Delete the key (queue) from all Starling servers. This is necessary
+  # because the random way a server is chosen in #get_server_for_key
+  # implies that the queue could easily be spread across the entire
+  # Starling cluster.
+  def delete(key, expiry = 0)
+    with_servers do
+      _original_delete(key, expiry)
+    end
+  end
+  ##
+  # Provides a way to work with a specific list of servers by
+  # forcing all calls to #get_server_for_key to use a specific
+  # server, and changing that server each time that the call
+  # yields to the block provided.  This helps work around the
+  # normally random nature of the #get_server_for_key method.
+  #
+  # Acquires the mutex for the entire duration of the call
+  # since unrelated calls to #get_server_for_key might be
+  # adversely affected by the non_random result.
+  def with_servers(my_servers = @servers.dup)
+    return unless block_given?
+    with_lock do
+      my_servers.each do |server|
+        @force_server = server
+        yield
+      end
+      @force_server = nil
+    end
+  end
+  ##
+  # insert +value+ into +queue+.
+  #
+  # +expiry+ is expressed as a UNIX timestamp
+  #
+  # If +raw+ is true, +value+ will not be Marshalled. If +raw+ = :yaml, +value+
+  # will be serialized with YAML, instead.
+  def set(queue, value, expiry = 0, raw = false)
+    retries = 0
+    begin
+      if raw == :yaml
+        value = YAML.dump(value)
+        raw = true
+      end
+      super(queue, value, expiry, raw)
+    rescue MemCache::MemCacheError => e
+      retries += 1
+      sleep WAIT_TIME
+      retry unless retries > 3
+      raise e
+    end
+  end
+  ##
+  # returns the number of items in +queue+. If +queue+ is +:all+, a hash of all
+  # queue sizes will be returned.
+  def sizeof(queue, statistics = nil)
+    statistics ||= stats
+    if queue == :all
+      queue_sizes = {}
+      available_queues(statistics).each do |queue|
+        queue_sizes[queue] = sizeof(queue, statistics)
+      end
+      return queue_sizes
+    end
+    statistics.inject(0) { |m,(k,v)| m + v["queue_#{queue}_items"].to_i }
+  end
+  ##
+  # returns a list of available (currently allocated) queues.
+  def available_queues(statistics = nil)
+    statistics ||= stats
+    statistics.map { |k,v|
+      v.keys
+    }.flatten.uniq.grep(/^queue_(.*)_items/).map { |v|
+      v.gsub(/^queue_/, '').gsub(/_items$/, '')
+    }.reject { |v|
+      v =~ /_total$/ || v =~ /_expired$/
+    }
+  end
+  ##
+  # iterator to flush +queue+. Each element will be passed to the provided
+  # +block+
+  def flush(queue)
+    sizeof(queue).times do
+      v = get(queue)
+      yield v if block_given?
+    end
+  end
+  private
+  def get_server_for_key(key)
+    raise ArgumentError, "illegal character in key #{key.inspect}" if key =~ /\s/
+    raise ArgumentError, "key too long #{key.inspect}" if key.length > 250
+    raise MemCacheError, "No servers available" if @servers.empty?
+    return @force_server if @force_server
+    bukkits = @buckets.dup
+    bukkits.count {|i| !i.nil?}.times do |try|
+      n = rand(bukkits.count {|i| !i.nil?})
+      server = bukkits[n]
+      return server if server.alive?
+      bukkits.delete_at(n)
+    end
+    raise MemCacheError, "No servers available (all dead)"
+  end
+end
+class MemCache
+ protected
+  ##
+  # Ensure that everything within the given block is executed
+  # within the locked mutex if this client is multithreaded.
+  # If the client isn't multithreaded, the block is simply executed.
+  def with_lock
+    return unless block_given?
+    begin
+      @mutex.lock if @multithread
+      yield
+    ensure
+      @mutex.unlock if @multithread
+    end
+  end
+end

data/lib/starling/handler.rb ADDED

@@ -0,0 +1,237 @@
+module StarlingServer
+  ##
+  # This is an internal class that's used by Starling::Server to handle the
+  # MemCache protocol and act as an interface between the Server and the
+  # QueueCollection.
+  class Handler < EventMachine::Connection
+    DATA_PACK_FMT = "Ia*".freeze
+    # ERROR responses
+    ERR_UNKNOWN_COMMAND = "CLIENT_ERROR bad command line format\r\n".freeze
+    # GET Responses
+    GET_COMMAND = /\Aget (.{1,250})\s*\r\n/m
+    GET_RESPONSE       = "VALUE %s %s %s\r\n%s\r\nEND\r\n".freeze
+    GET_RESPONSE_EMPTY = "END\r\n".freeze
+    # SET Responses
+    SET_COMMAND = /\Aset (.{1,250}) ([0-9]+) ([0-9]+) ([0-9]+)\r\n/m
+    SET_RESPONSE_SUCCESS  = "STORED\r\n".freeze
+    SET_RESPONSE_FAILURE  = "NOT STORED\r\n".freeze
+    SET_CLIENT_DATA_ERROR = "CLIENT_ERROR bad data chunk\r\nERROR\r\n".freeze
+    # DELETE Responses
+    DELETE_COMMAND = /\Adelete (.{1,250}) ([0-9]+)\r\n/m
+    DELETE_RESPONSE = "END\r\n".freeze
+    # STAT Response
+    STATS_COMMAND = /\Astats\r\n/m
+    STATS_RESPONSE = "STAT pid %d\r
+STAT uptime %d\r
+STAT time %d\r
+STAT version %s\r
+STAT rusage_user %0.6f\r
+STAT rusage_system %0.6f\r
+STAT curr_items %d\r
+STAT total_items %d\r
+STAT bytes %d\r
+STAT curr_connections %d\r
+STAT total_connections %d\r
+STAT cmd_get %d\r
+STAT cmd_set %d\r
+STAT get_hits %d\r
+STAT get_misses %d\r
+STAT bytes_read %d\r
+STAT bytes_written %d\r
+STAT limit_maxbytes %d\r
+%sEND\r\n".freeze
+    QUEUE_STATS_RESPONSE = "STAT queue_%s_items %d\r
+STAT queue_%s_total_items %d\r
+STAT queue_%s_logsize %d\r
+STAT queue_%s_expired_items %d\r
+STAT queue_%s_age %d\r\n".freeze
+    SHUTDOWN_COMMAND = /\Ashutdown\r\n/m
+    QUIT_COMMAND = /\Aquit\r\n/m
+    @@next_session_id = 1
+    ##
+    # Creates a new handler for the MemCache protocol that communicates with a
+    # given client.
+    def initialize(options = {})
+      @opts = options
+    end
+    ##
+    # Process incoming commands from the attached client.
+    def post_init
+      @stash = []
+      @data = ""
+      @data_buf = ""
+      @server = @opts[:server]
+      @logger = StarlingServer::Base.logger
+      @expiry_stats = Hash.new(0)
+      @expected_length = nil
+      @server.stats[:total_connections] += 1
+      set_comm_inactivity_timeout @opts[:timeout]
+      @queue_collection = @opts[:queue]
+      @session_id = @@next_session_id
+      @@next_session_id += 1
+      peer = Socket.unpack_sockaddr_in(get_peername)
+      #@logger.debug "(#{@session_id}) New session from #{peer[1]}:#{peer[0]}"
+    end
+    def receive_data(incoming)
+      @server.stats[:bytes_read] += incoming.size
+      @data << incoming
+      while data = @data.slice!(/.*?\r\n/m)
+        response = process(data)
+      end
+      send_data response if response
+    end
+    def process(data)
+      data = @data_buf + data if @data_buf.size > 0
+      # our only non-normal state is consuming an object's data
+      # when @expected_length is present
+      if @expected_length && data.size == @expected_length
+        response = set_data(data)
+        @data_buf = ""
+        return response
+      elsif @expected_length
+        @data_buf = data
+        return
+      end
+      case data
+      when SET_COMMAND
+        @server.stats[:set_requests] += 1
+        set($1, $2, $3, $4.to_i)
+      when GET_COMMAND
+        @server.stats[:get_requests] += 1
+        get($1)
+      when STATS_COMMAND
+        stats
+      when SHUTDOWN_COMMAND
+        # no point in responding, they'll never get it.
+        Runner::shutdown
+      when DELETE_COMMAND
+        delete $1
+      when QUIT_COMMAND
+        # ignore the command, client is closing connection.
+        return nil
+      else
+        logger.warn "Unknown command: #{data}."
+        respond ERR_UNKNOWN_COMMAND
+      end
+    rescue => e
+      logger.error "Error handling request: #{e}."
+      logger.debug e.backtrace.join("\n")
+      respond GET_RESPONSE_EMPTY
+    end
+    def unbind
+      #@logger.debug "(#{@session_id}) connection ends"
+    end
+  private
+    def delete(queue)
+      @queue_collection.delete(queue)
+      respond DELETE_RESPONSE
+    end
+    def respond(str, *args)
+      response = sprintf(str, *args)
+      @server.stats[:bytes_written] += response.length
+      response
+    end
+    def set(key, flags, expiry, len)
+      @expected_length = len + 2
+      @stash = [ key, flags, expiry ]
+      nil
+    end
+    def set_data(incoming)
+      key, flags, expiry = @stash
+      data = incoming.slice(0...@expected_length-2)
+      @stash = []
+      @expected_length = nil
+      internal_data = [expiry.to_i, data].pack(DATA_PACK_FMT)
+      if @queue_collection.put(key, internal_data)
+        respond SET_RESPONSE_SUCCESS
+      else
+        respond SET_RESPONSE_FAILURE
+      end
+    end
+    def get(key)
+      now = Time.now.to_i
+      while response = @queue_collection.take(key)
+        expiry, data = response.unpack(DATA_PACK_FMT)
+        break if expiry == 0 || expiry >= now
+        @expiry_stats[key] += 1
+        expiry, data = nil
+      end
+      if data
+        respond GET_RESPONSE, key, 0, data.size, data
+      else
+        respond GET_RESPONSE_EMPTY
+      end
+    end
+    def stats
+      respond STATS_RESPONSE,
+        Process.pid, # pid
+        Time.now - @server.stats(:start_time), # uptime
+        Time.now.to_i, # time
+        StarlingServer::VERSION, # version
+        Process.times.utime, # rusage_user
+        Process.times.stime, # rusage_system
+        @queue_collection.stats(:current_size), # curr_items
+        @queue_collection.stats(:total_items), # total_items
+        @queue_collection.stats(:current_bytes), # bytes
+        @server.stats(:connections), # curr_connections
+        @server.stats(:total_connections), # total_connections
+        @server.stats(:get_requests), # get count
+        @server.stats(:set_requests), # set count
+        @queue_collection.stats(:get_hits),
+        @queue_collection.stats(:get_misses),
+        @server.stats(:bytes_read), # total bytes read
+        @server.stats(:bytes_written), # total bytes written
+        0, # limit_maxbytes
+        queue_stats
+    end
+    def queue_stats
+      @queue_collection.queues.inject("") do |m,(k,v)|
+        m + sprintf(QUEUE_STATS_RESPONSE,
+                      k, v.length,
+                      k, v.total_items,
+                      k, v.logsize,
+                      k, @expiry_stats[k],
+                      k, v.current_age)
+      end
+    end
+    def logger
+      @logger
+    end
+  end
+end

data/lib/starling/persistent_queue.rb ADDED

@@ -0,0 +1,156 @@
+module StarlingServer
+  ##
+  # PersistentQueue is a subclass of Ruby's thread-safe Queue class. It adds a
+  # transactional log to the in-memory Queue, which enables quickly rebuilding
+  # the Queue in the event of a sever outage.
+  class PersistentQueue < Queue
+    ##
+    # When a log reaches the SOFT_LOG_MAX_SIZE, the Queue will wait until
+    # it is empty, and will then rotate the log file.
+    SOFT_LOG_MAX_SIZE = 16 * (1024**2) # 16 MB
+    TRX_CMD_PUSH = "\000".freeze
+    TRX_CMD_POP = "\001".freeze
+    TRX_PUSH = "\000%s%s".freeze
+    TRX_POP = "\001".freeze
+    attr_reader :initial_bytes
+    attr_reader :total_items
+    attr_reader :logsize
+    attr_reader :current_age
+    ##
+    # Create a new PersistentQueue at +persistence_path+/+queue_name+.
+    # If a queue log exists at that path, the Queue will be loaded from
+    # disk before being available for use.
+    def initialize(persistence_path, queue_name, debug = false)
+      @persistence_path = persistence_path
+      @queue_name = queue_name
+      @total_items = 0
+      super()
+      @initial_bytes = replay_transaction_log(debug)
+      @current_age = 0
+    end
+    ##
+    # Pushes +value+ to the queue. By default, +push+ will write to the
+    # transactional log. Set +log_trx=false+ to override this behaviour.
+    def push(value, log_trx = true)
+      if log_trx
+        raise NoTransactionLog unless @trx
+        size = [value.size].pack("I")
+        transaction sprintf(TRX_PUSH, size, value)
+      end
+      @total_items += 1
+      super([now_usec, value])
+    end
+    ##
+    # Retrieves data from the queue.
+    def pop(log_trx = true)
+      raise NoTransactionLog if log_trx && !@trx
+      begin
+        rv = super(!log_trx)
+      rescue ThreadError
+        puts "WARNING: The queue was empty when trying to pop(). Technically this shouldn't ever happen. Probably a bug in the transactional underpinnings. Or maybe shutdown didn't happen cleanly at some point. Ignoring."
+        rv = [now_usec, '']
+      end
+      transaction "\001" if log_trx
+      @current_age = (now_usec - rv[0]) / 1000
+      rv[1]
+    end
+    ##
+    # Safely closes the transactional queue.
+    def close
+      # Ok, yeah, this is lame, and is *technically* a race condition. HOWEVER,
+      # the QueueCollection *should* have stopped processing requests, and I don't
+      # want to add yet another Mutex around all the push and pop methods. So we
+      # do the next simplest thing, and minimize the time we'll stick around before
+      # @trx is nil.
+      @not_trx = @trx
+      @trx = nil
+      @not_trx.close
+    end
+    def purge
+      close
+      File.delete(log_path)
+    end
+    private
+    def log_path #:nodoc:
+      File.join(@persistence_path, @queue_name)
+    end
+    def reopen_log #:nodoc:
+      @trx = File.new(log_path, File::CREAT|File::RDWR)
+      @logsize = File.size(log_path)
+    end
+    def rotate_log #:nodoc:
+      @trx.close
+      backup_logfile = "#{log_path}.#{Time.now.to_i}"
+      File.rename(log_path, backup_logfile)
+      reopen_log
+      File.unlink(backup_logfile)
+    end
+    def replay_transaction_log(debug) #:nodoc:
+      reopen_log
+      bytes_read = 0
+      print "Reading back transaction log for #{@queue_name} " if debug
+      while !@trx.eof?
+        cmd = @trx.read(1)
+        case cmd
+        when TRX_CMD_PUSH
+          print ">" if debug
+          raw_size = @trx.read(4)
+          next unless raw_size
+          size = raw_size.unpack("I").first
+          data = @trx.read(size)
+          next unless data
+          push(data, false)
+          bytes_read += data.size
+        when TRX_CMD_POP
+          print "<" if debug
+          bytes_read -= pop(false).size
+        else
+          puts "Error reading transaction log: " +
+               "I don't understand '#{cmd}' (skipping)." if debug
+        end
+      end
+      print " done.\n" if debug
+      return bytes_read
+    end
+    def transaction(data) #:nodoc:
+      raise "no transaction log handle. that totally sucks." unless @trx
+      @trx.write_nonblock data
+      @logsize += data.size
+      rotate_log if @logsize > SOFT_LOG_MAX_SIZE && self.length == 0
+    end
+    def now_usec
+      now = Time.now
+      now.to_i * 1000000 + now.usec
+    end
+  end
+end