RubyGems - sorceror_poseidon_cluster - Versions diffs - 0.4.0 - Mend

sorceror_poseidon_cluster 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

checksums.yaml +7 -0
data/.coveralls.yml +1 -0
data/.gitignore +8 -0
data/.travis.yml +7 -0
data/Gemfile +10 -0
data/Gemfile.lock +85 -0
data/README.md +95 -0
data/Rakefile +21 -0
data/examples/consumer_group.rb +33 -0
data/lib/poseidon/cluster.rb +28 -0
data/lib/poseidon/consumer_group.rb +467 -0
data/lib/poseidon_cluster.rb +1 -0
data/scenario/.gitignore +1 -0
data/scenario/consumer.rb +17 -0
data/scenario/producer.rb +23 -0
data/scenario/run.rb +35 -0
data/scenario/scenario.rb +134 -0
data/sorceror_poseidon_cluster.gemspec +27 -0
data/spec/lib/poseidon/cluster_spec.rb +19 -0
data/spec/lib/poseidon/consumer_group_spec.rb +313 -0
data/spec/spec_helper.rb +14 -0
metadata +184 -0

data/lib/poseidon/consumer_group.rb ADDED Viewed

@@ -0,0 +1,467 @@
+# A ConsumerGroup operates on all partitions of a single topic. The goal is to ensure
+# each topic message is consumed only once, no matter of the number of consumer instances within
+# a cluster, as described in: http://kafka.apache.org/documentation.html#distributionimpl.
+#
+# The ConsumerGroup internally creates multiple PartitionConsumer instances. It uses Zookkeper
+# and follows a simple consumer rebalancing algorithms which allows all the consumers
+# in a group to come into consensus on which consumer is consuming which partitions. Each
+# ConsumerGroup can 'claim' 0-n partitions and will consume their messages until another
+# ConsumerGroup instance joins or leaves the cluster.
+#
+# Please note: ConsumerGroups themselves don't implement any threading or concurrency.
+# When consuming messages, they simply round-robin across the claimed partitions. If you wish
+# to parallelize consumption simply create multiple ConsumerGroups instances. The built-in
+# concensus algorithm will automatically rebalance the available partitions between them and you
+# can then decide for yourself if you want to run them in multiple thread or processes, ideally
+# on multiple boxes.
+#
+# Unlike stated in the Kafka documentation, consumer rebalancing is *only* triggered on each
+# addition or removal of consumers within the same group, while the addition of broker nodes
+# and/or partition *does currently not trigger* a rebalancing cycle.
+#
+# @api public
+class Poseidon::ConsumerGroup
+  DEFAULT_CLAIM_TIMEOUT = 30
+  DEFAULT_LOOP_DELAY = 1
+  # Poseidon::ConsumerGroup::Consumer is internally used by Poseidon::ConsumerGroup.
+  # Don't invoke it directly.
+  #
+  # @api private
+  class Consumer < ::Poseidon::PartitionConsumer
+    # @attr_reader [Integer] partition consumer partition
+    attr_reader :partition
+    # @api private
+    def initialize(group, partition, options = {})
+      broker = group.leader(partition)
+      offset = group.offset(partition)
+      offset = (options[:trail] ? :latest_offset : :earliest_offset) if offset == 0
+      options.delete(:trail)
+      super group.id, broker.host, broker.port, group.topic, partition, offset, options
+    end
+  end
+  # @param [Integer] pnum number of partitions size
+  # @param [Array<String>] cids consumer IDs
+  # @param [String] id consumer ID
+  # @return [Range, NilClass] selectable range, if any
+  def self.pick(pnum, cids, id)
+    cids = cids.sort
+    pos  = cids.index(id)
+    return unless pos && pos < cids.size
+    step = pnum.fdiv(cids.size).ceil
+    frst = pos*step
+    last = (pos+1)*step-1
+    last = pnum-1 if last > pnum-1
+    return if last < 0 || last < frst
+    (frst..last)
+  end
+  # @attr_reader [String] name Group name
+  attr_reader :name
+  # @attr_reader [String] topic Topic name
+  attr_reader :topic
+  # @attr_reader [Poseidon::BrokerPool] pool Broker pool
+  attr_reader :pool
+  # @attr_reader [ZK::Client] zk Zookeeper client
+  attr_reader :zk
+  # @attr_reader [Hash] options Consumer options
+  attr_reader :options
+  # Create a new consumer group, which processes all partition of the specified topic.
+  #
+  # @param [String] name Group name
+  # @param [Array<String>] brokers A list of known brokers, e.g. ["localhost:9092"]
+  # @param [Array<String>] zookeepers A list of known zookeepers, e.g. ["localhost:2181"]
+  # @param [String] topic Topic to operate on
+  # @param [Hash] options Consumer options
+  # @option options [Integer] :max_bytes Maximum number of bytes to fetch. Default: 1048576 (1MB)
+  # @option options [Integer] :max_wait_ms How long to block until the server sends us data. Default: 100 (100ms)
+  # @option options [Integer] :min_bytes Smallest amount of data the server should send us. Default: 0 (Send us data as soon as it is ready)
+  # @option options [Integer] :claim_timeout Maximum number of seconds to wait for a partition claim. Default: 10
+  # @option options [Integer] :loop_delay Number of seconds to delay the next fetch (in #fetch_loop) if nothing was returned. Default: 1
+  # @option options [Integer] :socket_timeout_ms broker connection wait timeout in ms. Default: 10000
+  # @option options [Boolean] :register Automatically register instance and start consuming. Default: true
+  # @option options [Boolean] :trail Starts reading messages from the latest partitions offsets and skips 'old' messages . Default: false
+  #
+  # @api public
+  def initialize(name, brokers, zookeepers, topic, options = {})
+    @name       = name
+    @topic      = topic
+    @zk         = ::ZK.new(zookeepers.join(","))
+    # Poseidon::BrokerPool doesn't provide default value for this option
+    # Configuring default value like this isn't beautiful, though.. by kssminus
+    options[:socket_timeout_ms] ||= 10000
+    @options    = options
+    @consumers  = []
+    @pool       = ::Poseidon::BrokerPool.new(id, brokers, options[:socket_timeout_ms])
+    @mutex      = Mutex.new
+    register! unless options[:register] == false
+  end
+  # @return [String] a globally unique identifier
+  def id
+    @id ||= [name, Poseidon::Cluster.guid].join("-")
+  end
+  # @return [Hash<Symbol,String>] registry paths
+  def registries
+    @registries ||= {
+      consumer: "/consumers/#{name}/ids",
+      owner:    "/consumers/#{name}/owners/#{topic}",
+      offset:   "/consumers/#{name}/offsets/#{topic}",
+    }
+  end
+  # @return [Poseidon::ClusterMetadata] cluster metadata
+  def metadata
+    @metadata ||= Poseidon::ClusterMetadata.new.tap {|m| m.update pool.fetch_metadata([topic]) }
+  end
+  # @return [Poseidon::TopicMetadata] topic metadata
+  def topic_metadata
+    @topic_metadata ||= metadata.metadata_for_topics([topic])[topic]
+  end
+  # @return [Boolean] true if registered
+  def registered?
+    !!zk.children(consumer_path, ignore: :no_node)
+  end
+  # @return [Boolean] true if registration was successful, false if already registered
+  def register!
+    return false if registered?
+    # Register instance
+    registries.each do |_, path|
+      zk.mkdir_p(path)
+    end
+    zk.create(consumer_path, "{}", ephemeral: true)
+    zk.register(registries[:consumer]) {|_| rebalance! }
+    # Rebalance
+    rebalance!
+  end
+  # Reloads metadata/broker/partition information
+  def reload
+    @metadata = @topic_metadata = nil
+    metadata
+    self
+  end
+  # Closes the consumer group gracefully, only really useful in tests
+  # @api private
+  def close
+    synchronize { release_all! }
+    zk.close
+  end
+  # @param [Integer] partition
+  # @return [Poseidon::Protocol::Broker] the leader for the given partition
+  def leader(partition)
+    metadata.lead_broker_for_partition(topic, partition)
+  end
+  # @param [Integer] partition
+  # @return [Integer] the latest stored offset for the given partition
+  def offset(partition)
+    data, _ = zk.get offset_path(partition), ignore: :no_node
+    data.to_i
+  end
+  # Commits the latest offset for a partition
+  # @param [Integer] partition
+  # @param [Integer] offset
+  def commit(partition, offset)
+    zk.set offset_path(partition), offset.to_s
+    unlock(offset)
+  rescue ZK::Exceptions::NoNode
+    zk.create offset_path(partition), offset.to_s, {:ignore => :node_exists}
+    unlock(offset)
+  end
+  # Sorted partitions by broker address (so partitions on the same broker are clustered together)
+  # @return [Array<Poseidon::Protocol::PartitionMetadata>] sorted partitions
+  def partitions
+    return [] unless topic_metadata
+    topic_metadata.available_partitions.sort_by do |part|
+      broker = metadata.brokers[part.leader]
+      [broker.host, broker.port].join(":")
+    end
+  end
+  # Partitions currently claimed and consumed by this group instance
+  # @return [Array<Integer>] partition IDs
+  def claimed
+    @consumers.map(&:partition).sort
+  end
+  # Checks out a single partition consumer. Round-robins between claimed partitions.
+  #
+  # @yield [consumer] The processing block
+  # @yieldparam [Consumer] consumer The consumer instance
+  # @yieldreturn [Boolean] return false to stop auto-commit
+  #
+  # @param [Hash] opts
+  # @option opts [Boolean] :commit Automatically commit consumer offset (default: true)
+  # @return [Boolean] true if a consumer was checked out, false if none could be claimed
+  #
+  # @example
+  #
+  #   ok = group.checkout do |consumer|
+  #     puts "Checked out consumer for partition #{consumer.partition}"
+  #   end
+  #   ok # => true if the block was run, false otherwise
+  #
+  # @api public
+  def checkout(opts = {})
+    register!
+    lock
+    @current_consumer = @consumers.shift
+    if @current_consumer.nil?
+      unlock
+      return false
+    end
+    @consumers.push @current_consumer
+    commit =  yield @current_consumer
+    unless opts[:commit] == false || commit == false
+      commit @current_consumer.partition, @current_consumer.offset
+    end
+    true
+  rescue StandardError => e
+    unlock
+    raise e
+  end
+  # Convenience method to fetch messages from the broker.
+  # Round-robins between claimed partitions.
+  #
+  # @yield [partition, messages] The processing block
+  # @yieldparam [Integer] partition The source partition
+  # @yieldparam [Array<Message>] messages The fetched messages
+  # @yieldreturn [Boolean] return false to prevent auto-commit
+  #
+  # @param [Hash] opts
+  # @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
+  # @return [Boolean] true if messages were fetched, false if none could be claimed
+  #
+  # @example
+  #
+  #   ok = group.fetch do |n, messages|
+  #     puts "Fetched #{messages.size} messages for partition #{n}"
+  #   end
+  #   ok # => true if the block was run, false otherwise
+  #
+  # @api public
+  def fetch(opts = {})
+    checkout(opts) do |consumer|
+      payloads = consumer.fetch
+      unless payloads.empty?
+        yield consumer.partition, payloads
+      else
+        if opts[:commit] == false
+          commit consumer.partition, consumer.offset
+        end
+        true
+      end
+    end
+  end
+  # Initializes an infinite fetch loop. This method blocks!
+  #
+  # Will wait for `loop_delay` seconds after each failed fetch. This may happen when there is
+  # no new data or when the consumer hasn't claimed any partitions.
+  #
+  # SPECIAL ATTENTION:
+  # When 'breaking out' of the loop, you must do it before processing the messages, as the
+  # the last offset will not be committed. Please see examples below.
+  #
+  # @yield [partition, messages] The processing block
+  # @yieldparam [Integer] partition The source partition, may be -1 if no partitions are claimed
+  # @yieldparam [Array<Message>] messages The fetched messages
+  # @yieldreturn [Boolean] return false to prevent auto-commit
+  #
+  # @param [Hash] opts
+  # @option opts [Boolean] :commit Automatically commit consumed offset (default: true)
+  # @option opts [Boolean] :loop_delay Delay override in seconds after unsuccessful fetch.
+  #
+  # @example
+  #
+  #   group.fetch_loop do |n, messages|
+  #     puts "Fetched #{messages.size} messages for partition #{n}"
+  #   end
+  #   puts "Done" # => this code is never reached
+  #
+  # @example Stopping the loop (wrong)
+  #
+  #   counts = Hash.new(0)
+  #   group.fetch_loop do |n, messages|
+  #     counts[n] += messages.size
+  #     puts "Status: #{counts.inspect}"
+  #     break if counts[0] > 100
+  #   end
+  #   puts "Result: #{counts.inspect}"
+  #   puts "Offset: #{group.offset(0)}"
+  #
+  #   # Output:
+  #   # Status: {0=>30}
+  #   # Status: {0=>60}
+  #   # Status: {0=>90}
+  #   # Status: {0=>120}
+  #   # Result: {0=>120}
+  #   # Offset: 90      # => Last offset was not committed!
+  #
+  # @example Stopping the loop (correct)
+  #
+  #   counts = Hash.new(0)
+  #   group.fetch_loop do |n, messages|
+  #     break if counts[0] > 100
+  #     counts[n] += messages.size
+  #     puts "Status: #{counts.inspect}"
+  #   end
+  #   puts "Result: #{counts.inspect}"
+  #   puts "Offset: #{group.offset(0)}"
+  #
+  #   # Output:
+  #   # Status: {0=>30}
+  #   # Status: {0=>60}
+  #   # Status: {0=>90}
+  #   # Status: {0=>120}
+  #   # Result: {0=>120}
+  #   # Offset: 120
+  #
+  # @api public
+  def fetch_loop(opts = {})
+    delay = opts[:loop_delay] || options[:loop_delay] || DEFAULT_LOOP_DELAY
+    loop do
+      mp = false
+      ok = fetch(opts) do |n, messages|
+        mp = !messages.empty?
+        yield n, messages
+      end
+      # Yield over an empty array if nothing claimed,
+      # to allow user to e.g. break out of the loop
+      unless ok
+        yield -1, []
+      end
+      # Sleep if either not claimes or nothing returned
+      unless ok && mp
+        sleep delay
+      end
+    end
+  end
+  protected
+    # Rebalance algorithm:
+    #
+    # * let CG be all consumers in the same group that consume topic T
+    # * let PT be all partitions producing topic T
+    # * sort CG
+    # * sort PT (so partitions on the same broker are clustered together)
+    # * let POS be our index position in CG and let N = size(PT)/size(CG)
+    # * assign partitions from POS*N to (POS+1)*N-1
+    def rebalance!
+      return if @pending
+      @pending = true
+      synchronize do
+        @pending = nil
+        release_all!
+        reload
+        ids = zk.children(registries[:consumer], watch: true)
+        pms = partitions
+        rng = self.class.pick(pms.size, ids, id)
+        pms[rng].each do |pm|
+          if @pending
+            release_all!
+            break
+          end
+          consumer = claim!(pm.id)
+          @consumers.push(consumer) if consumer
+        end if rng
+      end
+    end
+    # Release all consumer claims
+    def release_all!
+      @consumers.each {|c| release!(c.partition) }
+      @consumers.clear
+    end
+  private
+    def lock
+      @mutex.lock
+    end
+    def unlock(offset=nil)
+      raise "Mutex should be locked, possibly committing out of order" unless  @mutex.locked?
+      if offset
+        @mutex.unlock if @current_consumer.offset == offset
+      else
+        @mutex.unlock
+      end
+    end
+    def synchronize
+      @mutex.synchronize { yield }
+    end
+    # Claim the ownership of the partition for this consumer
+    # @raise [Timeout::Error]
+    def claim!(partition)
+      path = claim_path(partition)
+      Timeout.timeout options[:claim_timout] || DEFAULT_CLAIM_TIMEOUT do
+        while zk.create(path, id, ephemeral: true, ignore: :node_exists).nil?
+          return if @pending
+          sleep(0.1)
+        end
+      end
+      Consumer.new self, partition, options.dup
+    end
+    # Release ownership of the partition
+    def release!(partition)
+      zk.delete claim_path(partition), ignore: :no_node
+    end
+    # @return [String] zookeeper ownership claim path
+    def claim_path(partition)
+      "#{registries[:owner]}/#{partition}"
+    end
+    # @return [String] zookeeper offset storage path
+    def offset_path(partition)
+      "#{registries[:offset]}/#{partition}"
+    end
+    # @return [String] zookeeper consumer registration path
+    def consumer_path
+      "#{registries[:consumer]}/#{id}"
+    end
+end

data/lib/poseidon_cluster.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'poseidon/cluster'

data/scenario/.gitignore ADDED Viewed

	@@ -0,0 +1 @@
1	+ output.txt

data/scenario/consumer.rb ADDED Viewed

@@ -0,0 +1,17 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'poseidon_cluster'
+name     = ARGV[0].to_s
+output   = File.open(ARGV[1], "a")
+output.sync = true
+total    = 0
+consumer = Poseidon::ConsumerGroup.new "my-group", ["localhost:29092"], ["localhost:22181"], "my-topic", max_bytes: 256*1024
+consumer.fetch_loop do |n, messages|
+  break if name[0] > 'Q' && total > 0
+  messages.each do |m|
+    output.write "#{name},#{n},#{m.value}\n"
+  end
+  total += messages.size
+end

data/scenario/producer.rb ADDED Viewed

@@ -0,0 +1,23 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'poseidon'
+limit, offset = ARGV[0].to_i, ARGV[1].to_i
+producer = Poseidon::Producer.new ["localhost:29092"], "poseidon-producer"
+while limit > 0 do
+  batch  = limit > 10000 ? 10000 : limit
+  limit -= batch
+  messages = (0...batch).map do
+    num = offset.to_s.rjust(8, "0")
+    offset += 1
+    Poseidon::MessageToSend.new "my-topic", num, Time.now.to_s+num
+  end
+  10.times do
+    ok = producer.send_messages messages
+    break if ok
+    sleep(1)
+  end
+end

data/scenario/run.rb ADDED Viewed

@@ -0,0 +1,35 @@
+#!/usr/bin/env ruby
+require 'bundler/setup'
+require 'timeout'
+require File.expand_path("../scenario", __FILE__)
+# Start Zookeeper & Kafka
+Scenario.run do
+  5.times do
+    produce 1000
+  end
+  consume "A"
+  consume "B"
+  consume "C"
+  checkpoint!
+  15.times { produce 1000 }
+  consume "D"
+  10.times { produce 1000 }
+  consume "X"
+  10.times { produce 1000 }
+  checkpoint!
+  20.times { produce 1000 }
+  consume "E"
+  consume "F"
+  15.times { produce 1000 }
+  consume "Y"
+  50.times { produce 100 }
+  20.times { produce 1000 }
+  checkpoint!
+end

data/scenario/scenario.rb ADDED Viewed

@@ -0,0 +1,134 @@
+require 'fileutils'
+require 'pathname'
+module Scenario
+  extend self
+  ROOT    = Pathname.new(File.expand_path("../", __FILE__))
+  VERSION = "0.8.1.1"
+  SERVER  = ROOT.join "kafka_2.10-#{VERSION}"
+  TOPIC_NAME = "my-topic"
+  KAFKA_BIN  = SERVER.join("bin", "kafka-server-start.sh")
+  KAFKA_CFG  = SERVER.join("config", "server-poseidon.properties")
+  KAFKA_TMP  = "/tmp/kafka-logs-poseidon"
+  ZOOKP_BIN  = SERVER.join("bin", "zookeeper-server-start.sh")
+  ZOOKP_CFG  = SERVER.join("config", "zookeeper-poseidon.properties")
+  ZOOKP_TMP  = "/tmp/zookeeper-poseidon"
+  LOG4J_CFG  = SERVER.join("config", "log4j.properties")
+  OUTPUT     = Scenario::ROOT.join("output.txt")
+  @@pids     = {}
+  @@total    = 0
+  def run(&block)
+    setup
+    instance_eval(&block)
+  rescue => e
+    abort [e, *e.backtrace[0,20]].join("\n")
+  ensure
+    teardown
+  end
+  def setup
+    FileUtils.rm_rf OUTPUT.to_s
+    configure
+    # Ensure all required files are present
+    [KAFKA_BIN, ZOOKP_BIN, KAFKA_CFG, ZOOKP_CFG].each do |path|
+      abort "Unable to locate #{path}. File does not exist!" unless path.file?
+    end
+    Signal.trap("INT") { teardown }
+    spawn KAFKA_BIN, KAFKA_CFG
+    spawn ZOOKP_BIN, ZOOKP_CFG
+    sleep(2)
+  end
+  def teardown
+    @@pids.each do |_, pid|
+      Process.kill :TERM, pid
+    end
+    sleep(1)
+    FileUtils.rm_rf KAFKA_TMP.to_s
+    FileUtils.rm_rf ZOOKP_TMP.to_s
+    fail! unless numlines == @@total
+  end
+  def configure
+    download
+    KAFKA_CFG.open("w") do |f|
+      f.write SERVER.join("config", "server.properties").read.
+        sub("=9092", "=29092").
+        sub(":2181", ":22181").
+        sub("num.partitions=2", "num.partitions=12").
+        sub("log.flush.interval.ms=1000", "log.flush.interval.ms=10").
+        sub("/tmp/kafka-logs", KAFKA_TMP)
+    end
+    ZOOKP_CFG.open("w") do |f|
+      f.write SERVER.join("config", "zookeeper.properties").read.
+        sub("/tmp/zookeeper", ZOOKP_TMP).
+        sub("=2181", "=22181")
+    end
+    content = LOG4J_CFG.read
+    LOG4J_CFG.open("w") do |f|
+      f.write content.gsub("INFO", "FATAL")
+    end if content.include?("INFO")
+  end
+  def download
+    return if SERVER.directory?
+    sh "cd #{ROOT} && curl http://www.mirrorservice.org/sites/ftp.apache.org/kafka/#{VERSION}/kafka_2.10-#{VERSION}.tgz | tar xz"
+  end
+  def checkpoint!(timeout = 100)
+    puts "--> Verifying #{@@total}"
+    timeout.times do
+      if numlines > @@total
+        break
+      elsif numlines < @@total
+        sleep(1)
+      else
+        return
+      end
+    end
+    fail!
+  end
+  def consume(name)
+    puts "--> Launching consumer #{name}"
+    spawn ROOT.join("consumer.rb"), name, OUTPUT
+  end
+  def produce(count)
+    puts "--> Producing messages #{@@total}-#{@@total+count-1}"
+    sh ROOT.join("producer.rb"), count, @@total
+    @@total += count
+  end
+  def numlines
+    `wc -l #{OUTPUT} 2> /dev/null`.to_i
+  end
+  def abort(message)
+    Kernel.abort "ERROR: #{message}"
+  end
+  def fail!
+    Kernel.abort "FAILED: expected #{@@total} but was #{numlines}"
+  end
+  def sh(*bits)
+    cmd = bits.join(" ")
+    system(cmd) || abort(cmd)
+  end
+  def spawn(*args)
+    cmd = args.join(" ")
+    @@pids[cmd] = Process.spawn(cmd)
+  end
+end