RubyGems - xcflushd - Versions diffs - 1.0.0.rc2 - Mend

xcflushd 1.0.0.rc2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +7 -0
data/.codeclimate.yml +31 -0
data/.gitignore +8 -0
data/.rspec +3 -0
data/.rubocop.yml +1156 -0
data/.ruby-gemset +1 -0
data/.ruby-version +1 -0
data/.simplecov +3 -0
data/.travis.yml +5 -0
data/Dockerfile +99 -0
data/Gemfile +4 -0
data/Gemfile.lock +78 -0
data/LICENSE +202 -0
data/Makefile +17 -0
data/NOTICE +14 -0
data/README.md +118 -0
data/Rakefile +6 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/docs/design.md +100 -0
data/exe/xcflushd +114 -0
data/lib/xcflushd/3scale_client_ext.rb +12 -0
data/lib/xcflushd/authorization.rb +49 -0
data/lib/xcflushd/authorizer.rb +122 -0
data/lib/xcflushd/credentials.rb +66 -0
data/lib/xcflushd/flusher.rb +146 -0
data/lib/xcflushd/flusher_error_handler.rb +78 -0
data/lib/xcflushd/gli_helpers.rb +83 -0
data/lib/xcflushd/logger.rb +9 -0
data/lib/xcflushd/priority_auth_renewer.rb +253 -0
data/lib/xcflushd/reporter.rb +70 -0
data/lib/xcflushd/runner.rb +165 -0
data/lib/xcflushd/storage.rb +263 -0
data/lib/xcflushd/storage_keys.rb +113 -0
data/lib/xcflushd/threading.rb +12 -0
data/lib/xcflushd/version.rb +3 -0
data/lib/xcflushd.rb +11 -0
data/script/test +10 -0
data/xcflushd.gemspec +39 -0
metadata +266 -0

data/lib/xcflushd/gli_helpers.rb ADDED Viewed

@@ -0,0 +1,83 @@
+require 'uri'
+require 'xcflushd/runner'
+module Xcflushd
+  module GLIHelpers
+    POSITIVE_N_RE = /\A[1-9]\d*\z/.freeze
+    class PositiveMinMaxInt
+      # this allows 0 or more as MIN, 1 or more as MAX
+      POSITIVE_MIN_MAX_RE = /\A(?<min>\d+):(?<max>[1-9]\d*)\z/.freeze
+      private_constant :POSITIVE_MIN_MAX_RE
+      def self.match(str)
+        md = POSITIVE_MIN_MAX_RE.match str
+        return false if md.nil?
+        min, max = [md[:min].to_i, md[:max].to_i]
+        return false if max < min
+        new min, max
+      end
+      attr_reader :min, :max
+      def initialize(min, max)
+        @min, @max = min, max
+      end
+      def to_a
+        [self]
+      end
+    end
+    # URI parsing for GLI
+    class GenericURI
+      # https://tools.ietf.org/html/rfc3986#appendix-A
+      SCHEME_RE = /[[:alpha:]][[[:alpha:]][[:digit:]]\+-\.]*:\/\//
+      private_constant :SCHEME_RE
+      def self.new(s, default_port = nil)
+        # URI.parse won't correctly parse a URI without a scheme
+        unless SCHEME_RE.match s
+          s = "generic://#{s}"
+        end
+        uri = URI.parse(s)
+        # exit with an error if no host parsed
+        return false unless uri.host
+        if !uri.port && default_port
+          uri.port = default_port
+        end
+        uri.define_singleton_method :to_a do
+          [self]
+        end
+        uri
+      end
+    end
+    class RedisURI
+      DEFAULT_PORT = 6379
+      private_constant :DEFAULT_PORT
+      def self.match(s)
+        GenericURI.new(s, DEFAULT_PORT)
+      end
+    end
+    class BackendURI
+      def self.match(s)
+        GenericURI.new(s)
+      end
+    end
+    def start_xcflusher(options)
+      Xcflushd::Runner.run(Hash[options.map { |k, v| [k.to_s.tr('-', '_').to_sym, v] }])
+    end
+    def set_title(title)
+      if Process.respond_to? :setproctitle
+        Process.setproctitle title
+      else
+        $0 = title
+      end
+    end
+  end
+end

data/lib/xcflushd/logger.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'logger'
+module Xcflushd
+  class Logger
+    def self.new(*args)
+      ::Logger.new(*args)
+    end
+  end
+end

data/lib/xcflushd/priority_auth_renewer.rb ADDED Viewed

@@ -0,0 +1,253 @@
+require 'xcflushd/threading'
+module Xcflushd
+  # Apart from flushing all the cached reports and renewing the authorizations
+  # periodically, we need to provide a mechanism to renew a specific auth at
+  # any time. The information needed is the combination of service, application
+  # credentials and metric.
+  #
+  # When the client looks for the auth of a combination in the cache, it might
+  # not be there. It could be an authorization that has never been cached or one
+  # that has expired. In that case, we need to provide a way to check a
+  # specific authorization without waiting for the next flush cycle.
+  #
+  # We use Redis publish/subscribe to solve this problem. We use 2 different
+  # type of channels:
+  #   1) Auth requests channel. It's the channel where the client specifies the
+  #      combinations that need to be checked. xcflushd is subscribed to the
+  #      channel. There is only one channel of this type.
+  #   2) Responses channel. Every time there's a request for a specific
+  #      combination, a channel of this type is created. The client is
+  #      subscribed to this channel, and xcflushd will publish the authorization
+  #      status once it gets it from 3scale.
+  class PriorityAuthRenewer
+    # Number of times that a response is published
+    TIMES_TO_PUBLISH = 5
+    private_constant :TIMES_TO_PUBLISH
+    # We need two separate Redis clients: one for subscribing to a channel and
+    # the other one to publish to different channels. It is specified in the
+    # Redis website: http://redis.io/topics/pubsub
+    def initialize(authorizer, storage, redis_pub, redis_sub,
+                   auth_ttl, logger, threads)
+      @authorizer = authorizer
+      @storage = storage
+      @redis_pub = redis_pub
+      @redis_sub = redis_sub
+      @auth_ttl = auth_ttl
+      @logger = logger
+      # We can receive several requests to renew the authorization of a
+      # combination while we are already renewing it. We want to avoid
+      # performing several calls to 3scale asking for the same thing. For that
+      # reason, we use a map to keep track of the combinations that we are
+      # renewing.
+      # This map is updated from different threads. We use Concurrent::Map to
+      # ensure thread-safety.
+      @current_auths = Concurrent::Map.new
+      min_threads, max_threads = if threads
+                                   [threads.min, threads.max]
+                                 else
+                                   Threading.default_threads_value
+                                 end
+      @thread_pool = Concurrent::ThreadPoolExecutor.new(
+        min_threads: min_threads,
+        max_threads: max_threads)
+    end
+    def shutdown
+      @thread_pool.shutdown
+    end
+    def wait_for_termination(secs = nil)
+      @thread_pool.wait_for_termination(secs)
+    end
+    def terminate
+      @thread_pool.kill
+    end
+    def start
+      begin
+        subscribe_to_requests_channel
+      rescue StandardError => e
+        logger.error("PriorityAuthRenewer can't subscribe to the requests "\
+                     "channel - #{e.class} #{e.message} #{e.cause}")
+        raise e
+      end
+    end
+    private
+    attr_reader :authorizer, :storage, :redis_pub, :redis_sub, :auth_ttl,
+                :logger, :current_auths, :thread_pool
+    def subscribe_to_requests_channel
+      redis_sub.subscribe(StorageKeys::AUTH_REQUESTS_CHANNEL) do |on|
+        on.subscribe do |channel, _subscriptions|
+          logger.info("PriorityAuthRenewer correctly subscribed to #{channel}")
+        end
+        on.message do |_channel, msg|
+          begin
+            # The renew and publish operations need to be done asynchronously.
+            # Renewing the authorizations involves getting them from 3scale,
+            # making networks requests, and also updating Redis. We cannot block
+            # until we get all that done. That is why we need to treat the
+            # messages received in the channel concurrently.
+            unless currently_authorizing?(msg)
+              async_renew_and_publish_task(msg).execute
+            end
+          rescue Concurrent::RejectedExecutionError => e
+            # This error is raised when we try to submit a task to the thread
+            # pool and it is rejected.
+            # After we call shutdown() on the thread pool, this error will be
+            # raised. We do not want to log errors in this case.
+            unless thread_pool.shuttingdown?
+              logger.error('Error while treating a message received in the '\
+                           "requests channel: #{e.message}")
+            end
+          rescue StandardError => e
+            # If we do not rescue from an exception raised while treating a
+            # message, the redis client instance used stops receiving messages.
+            # We need to make sure that we'll rescue in all cases.
+            # Keep in mind that this will not rescue from exceptions raised in
+            # async tasks because they are executed in different threads.
+            logger.error('Error while treating a message received in the '\
+                         "requests channel: #{e.message}")
+          end
+        end
+      end
+    end
+    # Apart from renewing the auth of the combination received, we also renew
+    # all the metrics of the associated application. The reason is that to renew
+    # a single metric we need to perform one call to 3scale, and to renew all
+    # the limited metrics of an application we also need one. If the metric
+    # received does not have limits defined, we need to perform two calls, but
+    # still it is worth to renew all of them for that price.
+    #
+    # Note: Some exceptions can be raised inside the futures that are executed
+    # by the thread pool. For example, when 3scale is not accessible, when
+    # renewing the cached authorizations fails, or when publishing to the
+    # response channels fails. Trying to recover from all those cases does not
+    # seem to be worth it. The request that published the message will wait for
+    # a response that will not arrive and eventually, it will timeout. However,
+    # if the request retries, it is likely to succeed, as the kind of errors
+    # listed above are (hopefully) temporary.
+    def async_renew_and_publish_task(channel_msg)
+      Concurrent::Future.new(executor: thread_pool) do
+        success = true
+        begin
+          combination = auth_channel_msg_2_combination(channel_msg)
+          app_auths = app_authorizations(combination)
+          renew(combination[:service_id], combination[:credentials], app_auths)
+          metric_auth = app_auths[combination[:metric]]
+        rescue StandardError
+          # If we do not do rescue, we would not be able to process the same
+          # message again.
+          success = false
+        ensure
+          mark_auth_task_as_finished(channel_msg)
+        end
+        # We only publish a message when there aren't any errors. When
+        # success is false, we could have renewed some auths, so this could
+        # be more fine grained and ping the subscribers that are not interested
+        # in the auths that failed. Also, as we do not publish anything when
+        # there is an error, the subscriber waits until it timeouts.
+        # This is good enough for now, but there is room for improvement.
+        publish_auth_repeatedly(combination, metric_auth) if success
+      end
+    end
+    def auth_channel_msg_2_combination(msg)
+      StorageKeys.pubsub_auth_msg_2_auth_info(msg)
+    end
+    def app_authorizations(combination)
+      authorizer.authorizations(combination[:service_id],
+                                combination[:credentials],
+                                [combination[:metric]])
+    end
+    def renew(service_id, credentials, auths)
+      storage.renew_auths(service_id, credentials, auths, auth_ttl)
+    end
+    def channel_for_combination(combination)
+      StorageKeys.pubsub_auths_resp_channel(combination[:service_id],
+                                            combination[:credentials],
+                                            combination[:metric])
+    end
+    def publish_auth_repeatedly(combination, authorization)
+      # There is a race condition here. A renew and publish task is only run
+      # when there is not another one renewing the same combination. When there
+      # is another, the incoming request does not trigger a new task, but waits
+      # for the publish below. The request could miss the published message
+      # if events happened in this order:
+      #   1) The request publishes the combination it needs in the requests
+      #      channel.
+      #   2) A new task is not executed, because there is another renewing
+      #      the same combination.
+      #   3) That task publishes the result.
+      #   4) The request subscribes to receive the result, but now it is
+      #      too late.
+      # I cannot think of an easy way to solve this. There is some time
+      # between the moment the requests performs the publish and the
+      # subscribe actions. To mitigate the problem we can publish several
+      # times during some ms. We will see if this is good enough.
+      # Trade-off: publishing too much increases the Redis load. Waiting too
+      # much makes the incoming request slow.
+      publish_failures = 0
+      TIMES_TO_PUBLISH.times do |t|
+        begin
+          publish_auth(combination, authorization)
+        rescue
+          publish_failures += 1
+        end
+        sleep((1.0/50)*((t+1)**2))
+      end
+      if publish_failures > 0
+        logger.warn('There was an error while publishing a response in the '\
+                    "priority channel. Combination: #{combination}".freeze)
+      end
+    end
+    def publish_auth(combination, authorization)
+      msg = if authorization.authorized?
+              '1'.freeze
+            else
+               authorization.reason ? "0:#{authorization.reason}" : '0'.freeze
+            end
+      redis_pub.publish(channel_for_combination(combination), msg)
+    end
+    def currently_authorizing?(channel_msg)
+      # A simple solution would be something like:
+      # if !current_auths[channel_msg]
+      #   current_auths[channel_msg] = true;
+      #   perform_work
+      #   current_auths.delete(channel_msg)
+      # end
+      # The problem is that the read/write is not atomic. Therefore, several
+      # threads could enter the if at the same time repeating work. That is
+      # why we use concurrent-ruby's Map#put_if_absent, which is atomic.
+      # The value we set in the map is not relevant. #put_if_absent returns
+      # nil when the key is not in the map, which means that we are not
+      # currently authorizing it. That is all we care about.
+      current_auths.put_if_absent(channel_msg, true) != nil
+    end
+    def mark_auth_task_as_finished(channel_msg)
+      current_auths.delete(channel_msg)
+    end
+  end
+end

data/lib/xcflushd/reporter.rb ADDED Viewed

@@ -0,0 +1,70 @@
+require '3scale_client'
+module Xcflushd
+  class Reporter
+    class ReporterError < Flusher::XcflushdError
+      def initialize(service_id, transaction, specific_msg)
+        super("Error reporting this transaction: #{transaction} "\
+              "for service with id #{service_id}. "\
+              "#{specific_msg}")
+      end
+    end
+    # Exception raised when the 3scale client is not called with the right
+    # params. This happens when there are programming errors.
+    class ThreeScaleBadParams < ReporterError
+      def initialize(service_id, transaction)
+        super(service_id, transaction,
+              'There might be a bug in the program.'.freeze)
+      end
+    end
+    # Exception raised when the 3scale client is called with the right params
+    # but it returns a ServerError. Most of the time this means that 3scale is
+    # unreachable, although it could also be caused by a bug in the 3scale
+    # service management API.
+    class ThreeScaleInternalError < ReporterError
+      def initialize(service_id, transaction)
+        super(service_id, transaction, '3scale seems to be unreachable.'.freeze)
+      end
+    end
+    # Exception raised when the 3scale client made the call, but did not
+    # succeed. This happens when the credentials are invalid. For example, when
+    # an invalid provider key is used.
+    class ThreeScaleAuthError < ReporterError
+      def initialize(service_id, transaction)
+        super(service_id, transaction,
+              'Invalid credentials. Check the provider key'.freeze)
+      end
+    end
+    def initialize(threescale_client)
+      @threescale_client = threescale_client
+    end
+    def report(service_id, credentials, usage)
+      transaction = credentials.creds.merge(usage: usage)
+      begin
+        resp = threescale_client.report(transactions: [transaction],
+                                        service_id: service_id)
+      # TODO: get rid of the coupling with ThreeScale::ServerError
+      rescue ThreeScale::ServerError, SocketError
+        # We'll get a SocketError if there's a timeout when contacting 3scale.
+        raise ThreeScaleInternalError.new(service_id, transaction)
+      rescue ArgumentError
+        raise ThreeScaleBadParams.new(service_id, transaction)
+      end
+      raise ThreeScaleAuthError.new(service_id, transaction) unless resp.success?
+      true
+    end
+    private
+    attr_reader :threescale_client
+  end
+end

data/lib/xcflushd/runner.rb ADDED Viewed

@@ -0,0 +1,165 @@
+require 'xcflushd'
+require 'redis'
+require '3scale_client'
+require 'xcflushd/3scale_client_ext'
+module Xcflushd
+  class Runner
+    class << self
+      # Amount of time to wait before retrying the subscription to the
+      # priority auth renewal pubsub channel.
+      PRIORITY_SUBSCRIPTION_RETRY_WAIT = 5
+      private_constant :PRIORITY_SUBSCRIPTION_RETRY_WAIT
+      # Maximum time to wait for a graceful shutdown before becoming more
+      # aggressive at killing thread pools.
+      DEFAULT_MAX_TERM_WAIT = 30
+      private_constant :DEFAULT_MAX_TERM_WAIT
+      # because Ruby is not providing us wakeup from sleep itself, we
+      # sleep in small intervals and check if we have been signalled
+      MAX_IDLING_SIGNAL_LATENCY = 5
+      private_constant :MAX_IDLING_SIGNAL_LATENCY
+      def run(opts = {})
+        setup_sighandlers
+        @max_term_wait = opts[:max_term_wait] || DEFAULT_MAX_TERM_WAIT
+        @logger = Logger.new(STDOUT)
+        redis_host = opts[:redis].host
+        redis_port = opts[:redis].port
+        redis = Redis.new(host: redis_host, port: redis_port, driver: :hiredis)
+        storage = Storage.new(redis, @logger, StorageKeys)
+        threescale = ThreeScale::Client.new(provider_key: opts[:provider_key],
+                                            host: opts[:backend].host,
+                                            port: opts[:backend].port ||
+                                              (opts[:secure] ? 443 : 80),
+                                            secure: opts[:secure],
+                                            persistent: true)
+        reporter = Reporter.new(threescale)
+        authorizer = Authorizer.new(threescale)
+        redis_pub = Redis.new(host: redis_host, port: redis_port, driver: :hiredis)
+        redis_sub = Redis.new(host: redis_host, port: redis_port, driver: :hiredis)
+        auth_ttl = opts[:auth_ttl]
+        error_handler = FlusherErrorHandler.new(@logger, storage)
+        @flusher = Flusher.new(reporter, authorizer, storage,
+                               auth_ttl, error_handler, opts[:threads])
+        @prio_auth_renewer = PriorityAuthRenewer.new(authorizer, storage,
+                                                     redis_pub, redis_sub,
+                                                     auth_ttl, @logger,
+                                                     opts[:prio_threads])
+        @prio_auth_renewer_thread = start_priority_auth_renewer
+        flush_periodically(opts[:frequency])
+      end
+      private
+      def start_priority_auth_renewer
+        Thread.new do
+          loop do
+            break if @exit
+            begin
+              @prio_auth_renewer.start
+            rescue StandardError
+              sleep PRIORITY_SUBSCRIPTION_RETRY_WAIT
+            end
+          end
+        end
+      end
+      def flush_periodically(flush_freq)
+        loop do
+          break if @exit
+          begin
+            @logger.info('Flushing...')
+            flusher_start = Time.now
+            next_flush = flusher_start + flush_freq
+            @flusher.flush
+            flusher_runtime = Time.now - flusher_start
+            @logger.info("Flush completed in #{flusher_runtime} seconds")
+          rescue StandardError => e
+            # Let's make sure that we treat all the standard errors to ensure that
+            # the flusher keeps running.
+            @logger.error(e)
+          end
+          loop do
+            # sleep in small intervals to check if signalled
+            break if @exit
+            time_remaining = next_flush - Time.now
+            break if time_remaining <= 0
+            sleep([MAX_IDLING_SIGNAL_LATENCY, time_remaining].min)
+          end
+        end
+        @logger.info('Exiting')
+      rescue Exception => e
+        @logger.fatal("Unhandled exception #{e.class}, shutting down: #{e.cause} - #{e}")
+      ensure
+        shutdown
+      end
+      # Shutting down xcflushd
+      #
+      # We issue shutdown commands to the thread pools in the auth renewer and
+      # the flusher, wait a bit for a graceful termination and then proceed with
+      # more drastic ways.
+      #
+      # Note that there is no @prio_auth_renewer_thread.join(timeout).
+      #
+      # This is because that thread is blocked in the Redis pubsub mechanism.
+      # Since that is handled by the Redis gem and there is no way to exit it
+      # unless an unhandled exception is raised or an explicit unsubscribe
+      # command is issued from within one of the pubsub message handlers, we
+      # can't do much to issue an unsubscribe command (it would be issued from
+      # an external place and would block on the Redis gem's internal
+      # synchronization primitives).
+      #
+      # Therefore if we did the join we would be wasting that time once the
+      # thread pool is terminated, so we just go ahead and kill the thread right
+      # away (in terminate).
+      #
+      def shutdown
+        shutdown_deadline = Time.now + @max_term_wait
+        tasks = [@prio_auth_renewer, @flusher]
+        tasks.each do |task|
+          with_logged_shutdown { task.shutdown }
+        end
+        tasks.each do |task|
+          with_logged_shutdown do
+            task.wait_for_termination(shutdown_deadline - Time.now)
+          end
+        end
+      ensure
+        terminate
+      end
+      def terminate
+        [@prio_auth_renewer, @flusher, @prio_auth_renewer_thread].each do |task|
+          with_logged_shutdown { task.terminate }
+        end
+      end
+      def with_logged_shutdown
+        yield
+      rescue Exception => e
+        begin
+          @logger.error("while shutting down: #{e.class}, cause #{e.cause} - #{e}")
+        rescue Exception
+          # we want to avoid barfing if logger also breaks so that further
+          # processing can continue.
+        end
+      end
+      def setup_sighandlers
+        @exit = false
+        Signal.trap('EXIT') { @exit = true }
+        Signal.trap('INT') { @exit = true }
+      end
+    end
+  end
+end