RubyGems - prorate - Versions diffs - 0.1.0 → 0.3.0 - Mend

prorate 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.travis.yml +14 -4
data/README.md +2 -2
data/lib/prorate/rate_limit.lua +50 -0
data/lib/prorate/throttle.rb +46 -9
data/lib/prorate/version.rb +1 -1
data/scripts/bm.rb +43 -0
data/scripts/bm_latency_lb_vs_mget.rb +61 -0
data/scripts/reload_lua.rb +6 -0
metadata +6 -5
data/lib/prorate/block_for.rb +0 -13
data/lib/prorate/counter.rb +0 -53
data/lib/prorate/throttled.rb +0 -9

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d846e16d888b11395f59a44e8d487e9c3cccf336
-  data.tar.gz: 16051a7c67452164d40c3d8c785a26b973eaf54e
+  metadata.gz: db453351faca0b61a4517795368fe719fe5c07bb
+  data.tar.gz: 165c6088be69a4a3b291059aa4e872061e5f999f
 SHA512:
-  metadata.gz: b241b77bf6ec18bd394a0360bebd6b9e10b3b0cde21f1642860b0425d33b8b3a50dbaba9a5fe4fd7b5595dd95a566dfe122939a94711b651afec125e3ecfcc94
-  data.tar.gz: 1f110ef412d5c231f1eed8550f3957c4fc9797eef42f3a92de802b2be1d15b06c450b1913706c036dd30924925acfce410ec29adf495539ea0dd08763f904d50
+  metadata.gz: b9dd96de6c8915e8ef39f7737e930976d4f83909b8eb861456966ede6a2d62cd82f0b40b634af8da824bbd303d84e83205924d6ceb51369a17e82e6eec01523f
+  data.tar.gz: 5e349bc7288a6da431d9ef7177fc77f2041638ace47fe319af1e533846472fe234e1760ccd3d26bb208ad3e649eb99ecbed7733dc14871f53f63e19dd4b512f7

data/.travis.yml CHANGED

@@ -1,5 +1,15 @@
-sudo: false
-language: ruby
 rvm:
-  - 2.2.5
-before_install: gem install bundler -v 1.12.5
+- 2.2.5
+- 2.3.3
+- 2.4.1
+services:
+- redis
+dist: trusty # https://docs.travis-ci.com/user/trusty-ci-environment/
+sudo: false
+cache: bundler
+# Travis permits the following phases: before_install, install, after_install, before_script, script, after_script
+script:
+  - bundle exec rspec

data/README.md CHANGED

@@ -1,7 +1,8 @@
 # Prorate
 Provides a low-level time-based throttle. Is mainly meant for situations where using something like Rack::Attack is not very
-useful since you need access to more variables.
+useful since you need access to more variables. Under the hood, this uses a Lua script that implements the
+[Leaky Bucket](https://en.wikipedia.org/wiki/Leaky_bucket) algorithm in a single threaded and race condition safe way.
 ## Installation
@@ -23,7 +24,6 @@ Or install it yourself as:
 Within your Rails controller:
-    throttle_args[:block_for] ||= throttle_args.fetch(:period)
     t = Prorate::Throttle.new(redis: Redis.new, logger: Rails.logger,
         name: "throttle-login-email", limit: 20, period: 5.seconds)
     # Add all the parameters that function as a discriminator

data/lib/prorate/rate_limit.lua ADDED

@@ -0,0 +1,50 @@
+-- Single threaded Leaky Bucket implementation.
+-- args: key_base, leak_rate, max_bucket_capacity, block_duration
+-- returns: an array of two integers, the first of which indicates the remaining block time.
+-- if the block time is nonzero, the second integer is always zero. If the block time is zero,
+-- the second integer indicates the level of the bucket
+-- this is required to be able to use TIME and writes; basically it lifts the script into IO
+redis.replicate_commands()
+-- make some nicer looking variable names:
+local retval = nil
+local bucket_level_key = ARGV[1] .. ".bucket_level"
+local last_updated_key = ARGV[1] .. ".last_updated"
+local block_key = ARGV[1] .. ".block"
+local max_bucket_capacity = tonumber(ARGV[2])
+local leak_rate = tonumber(ARGV[3])
+local block_duration = tonumber(ARGV[4])
+local now = tonumber(redis.call("TIME")[1]) --unix timestamp, will be required in all paths
+local key_lifetime = math.ceil(max_bucket_capacity / leak_rate)
+local blocked_until = redis.call("GET", block_key)
+if blocked_until then
+  return {(tonumber(blocked_until) - now), 0}
+end
+-- get current bucket level
+local bucket_level = tonumber(redis.call("GET", bucket_level_key))
+if not bucket_level then
+  -- this throttle/identifier combo does not exist yet, so much calculation can be skipped
+  redis.call("SETEX", bucket_level_key, key_lifetime, 1) -- set bucket with initial value
+  retval =  {0, 1}
+else
+  -- if it already exists, do the leaky bucket thing
+  local last_updated = tonumber(redis.call("GET", last_updated_key)) or now -- use sensible default of 'now' if the key does not exist
+  local new_bucket_level = math.max(0, bucket_level - (leak_rate * (now - last_updated)))
+  if (new_bucket_level + 1) <= max_bucket_capacity then
+    new_bucket_level = new_bucket_level + 1
+    retval = {0, math.ceil(new_bucket_level)}
+  else
+    redis.call("SETEX", block_key, block_duration, now + block_duration)
+    retval = {block_duration, 0}
+  end
+  redis.call("SETEX", bucket_level_key, key_lifetime, new_bucket_level) --still needs to be saved
+end
+-- update last_updated for this bucket, required in all branches
+redis.call("SETEX", last_updated_key, key_lifetime, now)
+return retval

data/lib/prorate/throttle.rb CHANGED

@@ -1,11 +1,36 @@
 require 'digest'
 module Prorate
+  class Throttled < StandardError
+    attr_reader :retry_in_seconds
+    def initialize(try_again_in)
+      @retry_in_seconds = try_again_in
+      super("Throttled, please lower your temper and try again in #{retry_in_seconds} seconds")
+    end
+  end
+  class ScriptHashMismatch < StandardError
+  end
+  class MisconfiguredThrottle < StandardError
+  end
   class Throttle < Ks.strict(:name, :limit, :period, :block_for, :redis, :logger)
+    def self.get_script_hash
+      script_filepath = File.join(__dir__,"rate_limit.lua")
+      script = File.read(script_filepath)
+      Digest::SHA1.hexdigest(script)
+    end
+    CURRENT_SCRIPT_HASH = get_script_hash
     def initialize(*)
       super
       @discriminators = [name.to_s]
       self.redis = NullPool.new(redis) unless redis.respond_to?(:with)
+      raise MisconfiguredThrottle if ((period <= 0) || (limit <= 0))
+      @leak_rate = limit.to_f / period # tokens per second;
     end
     def <<(discriminator)
@@ -17,17 +42,29 @@ module Prorate
       identifier = [name, discriminator].join(':')
       redis.with do |r|
-        logger.info { "Checking throttle block %s" % name }
-        raise Throttled.new(block_for) if Prorate::BlockFor.blocked?(id: identifier, redis: r)
         logger.info { "Applying throttle counter %s" % name }
-        c = Prorate::Counter.new(redis: r, id: identifier, logger: logger, window_size: period)
-        after_increment = c.incr
-        if after_increment > limit
-          logger.warn { "Throttle %s exceeded limit of %d at %d" % [name, limit, after_increment] }
-          Prorate::BlockFor.block!(redis: r, id: identifier, duration: block_for)
-          raise Throttled.new(period)
+        remaining_block_time, bucket_level = run_lua_throttler(redis: r, identifier: identifier, bucket_capacity: limit, leak_rate: @leak_rate, block_for: block_for)
+        if remaining_block_time > 0
+          logger.warn { "Throttle %s exceeded limit of %d in %d seconds and is blocked for the next %d seconds" % [name, limit, period, remaining_block_time] }
+          raise Throttled.new(remaining_block_time)
         end
+        available_calls = limit - bucket_level
+      end
+    end
+    def run_lua_throttler(redis: , identifier: , bucket_capacity: , leak_rate: , block_for: )
+      redis.evalsha(CURRENT_SCRIPT_HASH, [], [identifier, bucket_capacity, leak_rate, block_for])
+    rescue Redis::CommandError => e
+      if e.message.include? "NOSCRIPT"
+        # The Redis server has never seen this script before. Needs to run only once in the entire lifetime of the Redis server (unless the script changes)
+        script_filepath = File.join(__dir__,"rate_limit.lua")
+        script = File.read(script_filepath)
+        raise ScriptHashMismatch if Digest::SHA1.hexdigest(script) != CURRENT_SCRIPT_HASH
+        redis.script(:load, script)
+        redis.evalsha(CURRENT_SCRIPT_HASH, [], [identifier, bucket_capacity, leak_rate, block_for])
+      else
+        raise e
       end
     end
   end

data/lib/prorate/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Prorate
-  VERSION = "0.1.0"
+  VERSION = "0.3.0"
 end

data/scripts/bm.rb ADDED

@@ -0,0 +1,43 @@
+# Runs a mild benchmark and prints out the average time a call to 'throttle!' takes.
+require 'prorate'
+require 'benchmark'
+require 'redis'
+require 'securerandom'
+def average_ms(ary)
+  ary.map{|x| x*1000}.inject(0,&:+) / ary.length
+end
+r = Redis.new
+logz = Logger.new(STDERR)
+logz.level = Logger::FATAL # block out most stuff
+times = []
+50.times do
+  times << Benchmark.realtime {
+    t = Prorate::Throttle.new(redis: r, logger: logz, name: "throttle-login-email", limit: 60, period: 30, block_for: 5)
+    # Add all the parameters that function as a discriminator
+    t << '127.0.2.1'
+    t << 'no_person@nowhere.com'
+    t.throttle!
+  }
+end
+puts average_ms times
+times = []
+50.times do
+  email = SecureRandom.hex(20)
+  ip = SecureRandom.hex(10)
+  times << Benchmark.realtime {
+    t = Prorate::Throttle.new(redis: r, logger: logz, name: "throttle-login-email", limit: 30, period: 30, block_for: 5)
+    # Add all the parameters that function as a discriminator
+    t << ip
+    t << email
+    t.throttle!
+  }
+end
+puts average_ms times

data/scripts/bm_latency_lb_vs_mget.rb ADDED

@@ -0,0 +1,61 @@
+# Runs a mild benchmark and prints out the average time a call to 'throttle!' takes.
+require 'prorate'
+require 'benchmark'
+require 'redis'
+require 'securerandom'
+def average_ms(ary)
+  ary.map{|x| x*1000}.inject(0,&:+) / ary.length
+end
+r = Redis.new
+# 4000000.times do
+#   random1 = SecureRandom.hex(10)
+#   random2 = SecureRandom.hex(10)
+#   r.set(random1,random2)
+# end
+logz = Logger.new(STDERR)
+logz.level = Logger::FATAL # block out most stuff
+times = []
+15.times do
+  id = SecureRandom.hex(10)
+  times << Benchmark.realtime {
+    r.evalsha('c95c5f1197cef04ec4afd7d64760f9175933e55a', [], [id, 120, 50, 10]) # values beyond 120 chosen more or less at random
+  }
+end
+puts average_ms times
+def key_for_ts(ts)
+  "th:%s:%d" % [@id, ts]
+end
+times = []
+15.times do
+  id = SecureRandom.hex(10)
+  sec, _ = r.time # Use Redis time instead of the system timestamp, so that all the nodes are consistent
+  ts = sec.to_i # All Redis results are strings
+  k = key_for_ts(ts)
+  times << Benchmark.realtime {
+    r.multi do |txn|
+      # Increment the counter
+      txn.incr(k)
+      txn.expire(k, 120)
+      span_start = ts - 120
+      span_end = ts + 1
+      possible_keys = (span_start..span_end).map{|prev_time| key_for_ts(prev_time) }
+      # Fetch all the counter values within the time window. Despite the fact that this
+      # will return thousands of elements for large sliding window sizes, the values are
+      # small and an MGET in Redis is pretty cheap, so perf should stay well within limits.
+      txn.mget(*possible_keys)
+    end
+  }
+end
+puts average_ms times

data/scripts/reload_lua.rb ADDED

@@ -0,0 +1,6 @@
+# Reloads the script into redis and prints out the SHA it can be called with
+require 'redis'
+r = Redis.new
+script = File.read('../lib/prorate/rate_limit.lua')
+sha = r.script(:load,script)
+puts sha

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: prorate
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Julik Tarkhanov
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2017-02-14 00:00:00.000000000 Z
+date: 2017-07-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ks
@@ -111,14 +111,15 @@ files:
 - bin/console
 - bin/setup
 - lib/prorate.rb
-- lib/prorate/block_for.rb
-- lib/prorate/counter.rb
 - lib/prorate/null_logger.rb
 - lib/prorate/null_pool.rb
+- lib/prorate/rate_limit.lua
 - lib/prorate/throttle.rb
-- lib/prorate/throttled.rb
 - lib/prorate/version.rb
 - prorate.gemspec
+- scripts/bm.rb
+- scripts/bm_latency_lb_vs_mget.rb
+- scripts/reload_lua.rb
 homepage: https://github.com/WeTransfer/prorate
 licenses:
 - MIT

data/lib/prorate/block_for.rb DELETED

@@ -1,13 +0,0 @@
-module Prorate
-  module BlockFor
-    def self.block!(redis:, id:, duration:)
-      k = "bl:%s" % id
-      redis.setex(k, duration.to_i, 1)
-    end
-    def self.blocked?(redis:, id:)
-      k = "bl:%s" % id
-      !!redis.get(k)
-    end
-  end
-end

data/lib/prorate/counter.rb DELETED

@@ -1,53 +0,0 @@
-module Prorate
-  # The counter implements a rolling window throttling mechanism. At each call to incr(), the Redis time
-  # is obtained. A counter then gets set at the key corresponding to the timestamp of the request, with a
-  # granularity of a second. If requests are done continuously and in large volume, the counter will therefore
-  # create one key for each second of the given rolling window size. he counters per second are set to auto-expire
-  # after the window lapses. When incr() is performed, there is
-  class Counter
-    def initialize(redis:, logger: NullLogger, id:, window_size:)
-      @redis = redis
-      @logger = logger
-      @id = id
-      @in_span_of_seconds = window_size.to_i.abs
-    end
-    # Increments the throttle counter for this identifier, and returns the total number of requests
-    # performed so far within the given time span. The caller can then determine whether the request has
-    # to be throttled or can be let through.
-    def incr
-      sec, _ = @redis.time # Use Redis time instead of the system timestamp, so that all the nodes are consistent
-      ts = sec.to_i # All Redis results are strings
-      k = key_for_ts(ts)
-      # Do the Redis stuff in a transaction, and capture only the necessary values
-      # (the result of MULTI is all the return values of each call in sequence)
-      *_, done_last_second, _, counter_values = @redis.multi do |txn|
-        # Increment the counter
-        txn.incr(k)
-        txn.expire(k, @in_span_of_seconds)
-        span_start = ts - @in_span_of_seconds
-        span_end = ts + 1
-        possible_keys = (span_start..span_end).map{|prev_time| key_for_ts(prev_time) }
-        @logger.debug { "%s: Scanning %d possible keys" % [@id, possible_keys.length] }
-        # Fetch all the counter values within the time window. Despite the fact that this
-        # will return thousands of elements for large sliding window sizes, the values are
-        # small and an MGET in Redis is pretty cheap, so perf should stay well within limits.
-        txn.mget(*possible_keys)
-      end
-      # Sum all the values. The empty keys return nils from MGET, which become 0 on to_i casts.
-      total_requests_during_period = counter_values.map(&:to_i).inject(&:+)
-      @logger.debug { "%s: %d reqs total during the last %d seconds" % [@id, total_requests_during_period, @in_span_of_seconds] }
-      total_requests_during_period
-    end
-    private
-    def key_for_ts(ts)
-      "th:%s:%d" % [@id, ts]
-    end
-  end
-end

data/lib/prorate/throttled.rb DELETED

@@ -1,9 +0,0 @@
-module Prorate
-  class Throttled < StandardError
-    attr_reader :retry_in_seconds
-    def initialize(try_again_in)
-      @retry_in_seconds = try_again_in
-      super("Throttled, please lower your temper and try again in %d seconds" % try_again_in)
-    end
-  end
-end