RubyGems - xcflushd - Versions diffs - 1.0.0.rc2 - Mend

xcflushd 1.0.0.rc2

Files changed (40) hide show

checksums.yaml +7 -0
data/.codeclimate.yml +31 -0
data/.gitignore +8 -0
data/.rspec +3 -0
data/.rubocop.yml +1156 -0
data/.ruby-gemset +1 -0
data/.ruby-version +1 -0
data/.simplecov +3 -0
data/.travis.yml +5 -0
data/Dockerfile +99 -0
data/Gemfile +4 -0
data/Gemfile.lock +78 -0
data/LICENSE +202 -0
data/Makefile +17 -0
data/NOTICE +14 -0
data/README.md +118 -0
data/Rakefile +6 -0
data/bin/console +14 -0
data/bin/setup +8 -0
data/docs/design.md +100 -0
data/exe/xcflushd +114 -0
data/lib/xcflushd/3scale_client_ext.rb +12 -0
data/lib/xcflushd/authorization.rb +49 -0
data/lib/xcflushd/authorizer.rb +122 -0
data/lib/xcflushd/credentials.rb +66 -0
data/lib/xcflushd/flusher.rb +146 -0
data/lib/xcflushd/flusher_error_handler.rb +78 -0
data/lib/xcflushd/gli_helpers.rb +83 -0
data/lib/xcflushd/logger.rb +9 -0
data/lib/xcflushd/priority_auth_renewer.rb +253 -0
data/lib/xcflushd/reporter.rb +70 -0
data/lib/xcflushd/runner.rb +165 -0
data/lib/xcflushd/storage.rb +263 -0
data/lib/xcflushd/storage_keys.rb +113 -0
data/lib/xcflushd/threading.rb +12 -0
data/lib/xcflushd/version.rb +3 -0
data/lib/xcflushd.rb +11 -0
data/script/test +10 -0
data/xcflushd.gemspec +39 -0
metadata +266 -0

data/lib/xcflushd/storage.rb ADDED Viewed

@@ -0,0 +1,263 @@
+module Xcflushd
+  # The error handling could be improved to try to avoid losing reports
+  # However, there are trade-offs to be made. Complex error handling can
+  # complicate a lot the code. Also, there are no guarantees that the code in
+  # the rescue clauses will be executed correctly. For example, if an smembers
+  # operations fails because Redis is not accessible, and the error handling
+  # consists of performing other operations to Redis, the error handling could
+  # fail too.
+  # Some characteristics of Redis, like the absence of rollbacks limit the
+  # kind of things we can do in case of error.
+  # In the future, we might explore other options like lua scripts or keeping a
+  # journal (in Redis or disk).
+  class Storage
+    # Some Redis operations might block the server for a long time if they need
+    # to operate on big collections of keys or values.
+    # For that reason, when using pipelines, instead of sending all the keys in
+    # a single pipeline, we send them in batches.
+    # If the batch is too big, we might block the server for a long time. If it
+    # is too little, we will waste time in network round-trips to the server.
+    REDIS_BATCH_KEYS = 500
+    private_constant :REDIS_BATCH_KEYS
+    RETRIEVING_REPORTS_ERROR = 'Reports cannot be retrieved.'.freeze
+    private_constant :RETRIEVING_REPORTS_ERROR
+    SOME_REPORTS_MISSING_ERROR = 'Some reports could not be retrieved'.freeze
+    private_constant :SOME_REPORTS_MISSING_ERROR
+    CLEANUP_ERROR = 'Failed to delete some keys that are no longer needed.'.freeze
+    private_constant :CLEANUP_ERROR
+    class RenewAuthError < Flusher::XcflushdError
+      def initialize(service_id, credentials)
+        super("Error while renewing the auth for service ID: #{service_id} "\
+              "and credentials: #{credentials}")
+      end
+    end
+    def initialize(storage, logger, storage_keys)
+      @storage = storage
+      @logger = logger
+      @storage_keys = storage_keys
+    end
+    # This performs a cleanup of the reports to be flushed.
+    # We can decide later whether it is better to leave this responsibility
+    # to the caller of the method.
+    #
+    # Returns an array of hashes where each of them has a service_id,
+    # credentials, and a usage. The usage is another hash where the keys are
+    # the metrics and the values are guaranteed to respond to to_i and to_s.
+    def reports_to_flush
+      # The Redis rename command overwrites the key with the new name if it
+      # exists. This means that if the rename operation fails in a flush cycle,
+      # and succeeds in a next one, the data that the key had in the first
+      # flush cycle will be lost.
+      # For that reason, every time we need to rename a key, we will use a
+      # unique suffix. This way, when the rename operation fails, the key
+      # will not be overwritten later, and we will be able to recover its
+      # content.
+      suffix = suffix_for_unique_naming
+      report_keys = report_keys_to_flush(suffix)
+      if report_keys.empty?
+        logger.warn "No reports available to flush"
+        report_keys
+      else
+        reports(report_keys, suffix)
+      end
+    end
+    def renew_auths(service_id, credentials, authorizations, auth_ttl)
+      hash_key = hash_key(:auth, service_id, credentials)
+      authorizations.each_slice(REDIS_BATCH_KEYS) do |authorizations_slice|
+        authorizations_slice.each do |metric, auth|
+          storage.hset(hash_key, metric, auth_value(auth))
+        end
+      end
+      set_auth_validity(service_id, credentials, auth_ttl)
+    rescue Redis::BaseError
+      raise RenewAuthError.new(service_id, credentials)
+    end
+    def report(reports)
+      reports.each do |report|
+        increase_usage(report)
+        add_to_set_keys_cached_reports(report)
+      end
+    end
+    private
+    attr_reader :storage, :logger, :storage_keys
+    def report_keys_to_flush(suffix)
+      begin
+        return [] if storage.scard(set_keys_cached_reports) == 0
+        storage.rename(set_keys_cached_reports,
+                       set_keys_flushing_reports(suffix))
+      rescue Redis::BaseError
+        # We could not even start the process of getting the reports, so just
+        # log an error and return [].
+        logger.error(RETRIEVING_REPORTS_ERROR)
+        return []
+      end
+      flushing_reports = flushing_report_keys(suffix)
+      keys_with_flushing_prefix = flushing_reports.map do |key|
+        storage_keys.name_key_to_flush(key, suffix)
+      end
+      # Hash with old names as keys and new ones as values
+      key_names = Hash[flushing_reports.zip(keys_with_flushing_prefix)]
+      rename(key_names)
+      key_names.values
+    end
+    def flushing_report_keys(suffix)
+      res = storage.smembers(set_keys_flushing_reports(suffix))
+    rescue Redis::BaseError
+      logger.error(RETRIEVING_REPORTS_ERROR)
+      []
+    else
+      # We only delete the set if there is not an error. If there is an error,
+      # it's not deleted, so it can be recovered later.
+      delete([set_keys_flushing_reports(suffix)])
+      res
+    end
+    # Returns a report (hash with service_id, credentials, and usage) for each of
+    # the keys received.
+    def reports(keys_to_flush, suffix)
+      result = []
+      keys_to_flush.each_slice(REDIS_BATCH_KEYS) do |keys|
+        begin
+          usages = storage.pipelined { keys.each { |k| storage.hgetall(k) } }
+        rescue Redis::BaseError
+          # The reports in a batch where hgetall failed will not be reported
+          # now, but they will not be lost. They keys will not be deleted, so
+          # we will be able to retrieve them later and retry.
+          # We cannot know which ones failed because we are using a pipeline.
+          logger.error(SOME_REPORTS_MISSING_ERROR)
+        else
+          keys.each_with_index do |key, i|
+            # The usage could be empty if we failed to rename the key in the
+            # previous step. hgetall returns {} for keys that do not exist.
+            unless usages[i].empty?
+              service_id, creds = storage_keys.service_and_creds(key, suffix)
+              result << { service_id: service_id,
+                          credentials: creds,
+                          usage: usages[i] }
+            end
+          end
+          delete(keys)
+        end
+      end
+      result
+    end
+    def rename(keys)
+      keys.each_slice(REDIS_BATCH_KEYS) do |keys_slice|
+        begin
+          storage.pipelined do
+            keys_slice.each do |old_name, new_name|
+              storage.rename(old_name, new_name)
+            end
+          end
+        rescue Redis::BaseError
+          # The cached reports will not be reported now, but they will not be
+          # lost. They will be reported next time there are hits for that
+          # specific metric.
+          # We cannot know which ones failed because we are using a pipeline.
+          logger.warn(SOME_REPORTS_MISSING_ERROR)
+        end
+      end
+    end
+    def delete(keys)
+      tries ||= 3
+      storage.del(keys)
+    rescue Redis::BaseError
+      # Failing to delete certain keys could be problematic. That's why we
+      # retry in case the error is temporary, like a network hiccup.
+      #
+      # When we rename keys, we give them a unique suffix so they are not
+      # overwritten in the next cycle and we can retrieve their content
+      # later. On the other hand, when we can retrieve their content
+      # successfully, we delete them. The problem is that the delete operation
+      # can fail. When trying to recover contents of keys that failed to be
+      # renamed we'll not be able to distinguish these 2 cases:
+      # 1) The key is there because we decided not to delete it to retrieve
+      #    its content later.
+      # 2) The key is there because the delete operation failed.
+      # We could take a look at the logs to figure out what happened, but of
+      # course that is not an ideal solution.
+      if tries > 0
+        tries -= 1
+        sleep(0.1)
+        retry
+      else
+        logger.error("#{CLEANUP_ERROR} Keys: #{keys}")
+      end
+    end
+    def set_auth_validity(service_id, credentials, auth_ttl)
+      # Redis does not allow us to set a TTL for hash key fields. TTLs can only
+      # be applied to the key containing the hash. This is not a problem
+      # because we always renew all the metrics of an application at the same
+      # time.
+      storage.expire(hash_key(:auth, service_id, credentials), auth_ttl)
+    end
+    def increase_usage(report)
+      hash_key = hash_key(:report, report[:service_id], report[:credentials])
+      report[:usage].each_slice(REDIS_BATCH_KEYS) do |usages|
+        usages.each do |usage|
+          metric, value = usage
+          storage.hincrby(hash_key, metric, value)
+        end
+      end
+    end
+    def add_to_set_keys_cached_reports(report)
+      hash_key = hash_key(:report, report[:service_id], report[:credentials])
+      storage.sadd(set_keys_cached_reports, hash_key)
+    end
+    def auth_value(auth)
+      if auth.authorized?
+        '1'.freeze
+      else
+        auth.reason ? "0:#{auth.reason}" : '0'.freeze
+      end
+    end
+    def suffix_for_unique_naming
+      "_#{Time.now.utc.strftime('%Y%m%d%H%M%S'.freeze)}"
+    end
+    def set_keys_flushing_reports(suffix)
+      "#{storage_keys::SET_KEYS_FLUSHING_REPORTS}#{suffix}"
+    end
+    def hash_key(type, service_id, credentials)
+      storage_keys.send("#{type}_hash_key", service_id, credentials)
+    end
+    def set_keys_cached_reports
+      storage_keys::SET_KEYS_CACHED_REPORTS
+    end
+  end
+end

data/lib/xcflushd/storage_keys.rb ADDED Viewed

@@ -0,0 +1,113 @@
+module Xcflushd
+  # This class defines the interface of the flusher with Redis. It defines how
+  # to build all the keys that contain cached reports and authorizations, and
+  # also, all the keys used by the pubsub mechanism.
+  class StorageKeys
+    # Note: Some of the keys and messages in this class contain the credentials
+    # needed to authenticate an application. Credentials always appear in
+    # sorted in alphabetical order. They need to be, otherwise, we could have
+    # several keys or messages that refer to the same credentials.
+    # Pubsub channel in which a client publishes for asking about the
+    # authorization status of an application.
+    AUTH_REQUESTS_CHANNEL = 'xc_channel_auth_requests'.freeze
+    # Set that contains the keys of the cached reports
+    SET_KEYS_CACHED_REPORTS = 'report_keys'.freeze
+    # Set that contains the keys of the cached reports to be flushed
+    SET_KEYS_FLUSHING_REPORTS = 'flushing_report_keys'.freeze
+    # Prefix of pubsub channels where the authorization statuses are published.
+    AUTH_RESPONSES_CHANNEL_PREFIX = 'xc_channel_auth_response:'.freeze
+    private_constant :AUTH_RESPONSES_CHANNEL_PREFIX
+    # Prefix to identify cached reports.
+    REPORT_KEY_PREFIX = 'report,'.freeze
+    private_constant :REPORT_KEY_PREFIX
+    # Prefix to identify cached reports that are ready to be flushed
+    KEY_TO_FLUSH_PREFIX = 'to_flush:'.freeze
+    private_constant :KEY_TO_FLUSH_PREFIX
+    class << self
+      # Returns the storage key that contains the cached authorizations for the
+      # given { service_id, credentials } pair.
+      def auth_hash_key(service_id, credentials)
+        hash_key(:auth, service_id, credentials)
+      end
+      # Returns the storage key that contains the cached reports for the given
+      # { service_id, credentials } pair.
+      def report_hash_key(service_id, credentials)
+        hash_key(:report, service_id, credentials)
+      end
+      # Pubsub channel to which the client subscribes to receive a response
+      # after asking for an authorization.
+      def pubsub_auths_resp_channel(service_id, credentials, metric)
+        AUTH_RESPONSES_CHANNEL_PREFIX +
+            "service_id:#{service_id}," +
+            "#{credentials.to_sorted_escaped_s}," +
+            "metric:#{metric}"
+      end
+      # Returns a hash that contains service_id, credentials, and metric from
+      # a message published in the pubsub channel for auth requests.
+      # Expected format of the message:
+      #   service_id:<service_id>,<credentials>,metric:<metric>.
+      #   With all the ',' and ':' in the values escaped.
+      #   <credentials> contains the credentials needed for authentication
+      #   separated by ','. For example: app_id:my_app_id,user_key:my_user_key.
+      def pubsub_auth_msg_2_auth_info(msg)
+        msg_split = msg.split(/(?<!\\),/)
+        service_id = msg_split.first.sub('service_id:'.freeze, ''.freeze)
+        creds = Credentials.from(
+            msg_split[1..-2].join(',').sub('credentials:'.freeze, ''.freeze))
+        metric = msg_split.last.sub('metric:'.freeze, ''.freeze)
+        res = { service_id: service_id, credentials: creds, metric: metric }
+        res.map do |k, v|
+          # Credentials are already unescaped
+          [k, v.is_a?(Credentials) ? v : v.gsub("\\,", ','.freeze)
+                                          .gsub("\\:", ':'.freeze)]
+        end.to_h
+      end
+      # Returns an array of size 2 with a service and the credentials encoded
+      # given a key marked as 'to be flushed' and its suffix.
+      def service_and_creds(key_to_flush, suffix)
+        escaped_service, escaped_creds =
+            key_to_flush.sub("#{KEY_TO_FLUSH_PREFIX}#{REPORT_KEY_PREFIX}", '')
+                        .sub(suffix, '')
+                        .split(/(?<!\\),/)
+        # escaped_service is a string with 'service_id:' followed by the escaped
+        # service ID. escaped_creds starts with 'credentials:' and is followed
+        # by the escaped credentials.
+        service = escaped_service
+                      .sub('service_id:'.freeze, ''.freeze)
+                      .gsub("\\,", ','.freeze).gsub("\\:", ':'.freeze)
+        creds = Credentials.from(escaped_creds.sub(
+            'credentials:'.freeze, ''.freeze))
+        [service, creds]
+      end
+      def name_key_to_flush(report_key, suffix)
+        "#{KEY_TO_FLUSH_PREFIX}#{report_key}#{suffix}"
+      end
+      private
+      def hash_key(type, service_id, creds)
+        "#{type},service_id:#{service_id},#{creds.to_sorted_escaped_s}"
+      end
+    end
+  end
+end

data/lib/xcflushd/threading.rb ADDED Viewed

@@ -0,0 +1,12 @@
+# Helper for default threading values.
+require 'concurrent'
+module Xcflushd
+  module Threading
+    def self.default_threads_value
+      cpus = Concurrent.processor_count
+      # default thread pool minimum is zero
+      return 0, cpus * 4
+    end
+  end
+end

data/lib/xcflushd/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Xcflushd
+  VERSION = "1.0.0.rc2"
+end

data/lib/xcflushd.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'xcflushd/logger'
+require 'xcflushd/flusher'
+require 'xcflushd/authorization'
+require 'xcflushd/storage_keys'
+require 'xcflushd/credentials'
+require 'xcflushd/authorizer'
+require 'xcflushd/reporter'
+require 'xcflushd/storage'
+require 'xcflushd/flusher_error_handler'
+require 'xcflushd/priority_auth_renewer'
+require 'xcflushd/version'

data/script/test ADDED Viewed

@@ -0,0 +1,10 @@
+#!/bin/bash
+SCRIPT_DIR=$(dirname "$(readlink -f $0)")
+pushd ${SCRIPT_DIR} > /dev/null
+export TEST_COVERAGE=1
+bundle exec rake spec
+STATUS=$?
+popd > /dev/null
+exit ${STATUS}

data/xcflushd.gemspec ADDED Viewed

@@ -0,0 +1,39 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'xcflushd/version'
+Gem::Specification.new do |spec|
+  spec.name          = "xcflushd"
+  spec.version       = Xcflushd::VERSION
+  spec.authors       = ["Alejandro Martinez Ruiz", "David Ortiz Lopez"]
+  spec.email         = ["support@3scale.net"]
+  spec.summary       = %q{Daemon for flushing XC reports to 3scale.}
+  spec.description   = %q{Daemon for flushing XC reports to 3scale.}
+  spec.homepage      = "https://github.com/3scale/xcflushd"
+  spec.license       = "Apache-2.0"
+  spec.files         = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
+  spec.bindir        = "exe"
+  spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
+  spec.require_paths = ["lib"]
+  spec.required_ruby_version = '>= 2.1.0'
+  spec.add_runtime_dependency "3scale_client", "~> 2.10.0"
+  spec.add_runtime_dependency "gli", "= 2.14.0"
+  spec.add_runtime_dependency "redis", "= 3.3.1"
+  spec.add_runtime_dependency "hiredis", "= 0.6.1"
+  spec.add_runtime_dependency "concurrent-ruby", "1.0.2"
+  spec.add_runtime_dependency "net-http-persistent", "2.9.4"
+  spec.add_runtime_dependency "daemons", "= 1.2.4"
+  spec.add_development_dependency "bundler", "~> 1.12"
+  spec.add_development_dependency "rake", "~> 11.0"
+  spec.add_development_dependency "rspec", "~> 3.0"
+  spec.add_development_dependency "fakeredis", "~> 0.6.0"
+  spec.add_development_dependency "simplecov", "~> 0.12.0"
+  spec.add_development_dependency "rubocop", "~> 0.46.0"
+end