RubyGems - prorate - Versions diffs - 0.1.0 → 0.3.0 - Mend

prorate 0.1.0 → 0.3.0

Files changed (13) hide show

checksums.yaml +4 -4
data/.travis.yml +14 -4
data/README.md +2 -2
data/lib/prorate/rate_limit.lua +50 -0
data/lib/prorate/throttle.rb +46 -9
data/lib/prorate/version.rb +1 -1
data/scripts/bm.rb +43 -0
data/scripts/bm_latency_lb_vs_mget.rb +61 -0
data/scripts/reload_lua.rb +6 -0
metadata +6 -5
data/lib/prorate/block_for.rb +0 -13
data/lib/prorate/counter.rb +0 -53
data/lib/prorate/throttled.rb +0 -9

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d846e16d888b11395f59a44e8d487e9c3cccf336
-  data.tar.gz: 16051a7c67452164d40c3d8c785a26b973eaf54e
+  metadata.gz: db453351faca0b61a4517795368fe719fe5c07bb
+  data.tar.gz: 165c6088be69a4a3b291059aa4e872061e5f999f
 SHA512:
-  metadata.gz: b241b77bf6ec18bd394a0360bebd6b9e10b3b0cde21f1642860b0425d33b8b3a50dbaba9a5fe4fd7b5595dd95a566dfe122939a94711b651afec125e3ecfcc94
-  data.tar.gz: 1f110ef412d5c231f1eed8550f3957c4fc9797eef42f3a92de802b2be1d15b06c450b1913706c036dd30924925acfce410ec29adf495539ea0dd08763f904d50
+  metadata.gz: b9dd96de6c8915e8ef39f7737e930976d4f83909b8eb861456966ede6a2d62cd82f0b40b634af8da824bbd303d84e83205924d6ceb51369a17e82e6eec01523f
+  data.tar.gz: 5e349bc7288a6da431d9ef7177fc77f2041638ace47fe319af1e533846472fe234e1760ccd3d26bb208ad3e649eb99ecbed7733dc14871f53f63e19dd4b512f7

data/.travis.yml CHANGED

@@ -1,5 +1,15 @@
-sudo: false
-language: ruby
 rvm:
-  - 2.2.5
-before_install: gem install bundler -v 1.12.5
+- 2.2.5
+- 2.3.3
+- 2.4.1
+services:
+- redis
+dist: trusty # https://docs.travis-ci.com/user/trusty-ci-environment/
+sudo: false
+cache: bundler
+# Travis permits the following phases: before_install, install, after_install, before_script, script, after_script
+script:
+  - bundle exec rspec

data/README.md CHANGED

@@ -1,7 +1,8 @@
 # Prorate
 Provides a low-level time-based throttle. Is mainly meant for situations where using something like Rack::Attack is not very
-useful since you need access to more variables.
+useful since you need access to more variables. Under the hood, this uses a Lua script that implements the
+[Leaky Bucket](https://en.wikipedia.org/wiki/Leaky_bucket) algorithm in a single threaded and race condition safe way.
 ## Installation
@@ -23,7 +24,6 @@ Or install it yourself as:
 Within your Rails controller:
-    throttle_args[:block_for] ||= throttle_args.fetch(:period)
     t = Prorate::Throttle.new(redis: Redis.new, logger: Rails.logger,
         name: "throttle-login-email", limit: 20, period: 5.seconds)
     # Add all the parameters that function as a discriminator

data/lib/prorate/rate_limit.lua ADDED

@@ -0,0 +1,50 @@
+-- Single threaded Leaky Bucket implementation.
+-- args: key_base, leak_rate, max_bucket_capacity, block_duration
+-- returns: an array of two integers, the first of which indicates the remaining block time.
+-- if the block time is nonzero, the second integer is always zero. If the block time is zero,
+-- the second integer indicates the level of the bucket
+-- this is required to be able to use TIME and writes; basically it lifts the script into IO
+redis.replicate_commands()
+-- make some nicer looking variable names:
+local retval = nil
+local bucket_level_key = ARGV[1] .. ".bucket_level"
+local last_updated_key = ARGV[1] .. ".last_updated"
+local block_key = ARGV[1] .. ".block"
+local max_bucket_capacity = tonumber(ARGV[2])
+local leak_rate = tonumber(ARGV[3])
+local block_duration = tonumber(ARGV[4])
+local now = tonumber(redis.call("TIME")[1]) --unix timestamp, will be required in all paths
+local key_lifetime = math.ceil(max_bucket_capacity / leak_rate)
+local blocked_until = redis.call("GET", block_key)
+if blocked_until then
+  return {(tonumber(blocked_until) - now), 0}
+end
+-- get current bucket level
+local bucket_level = tonumber(redis.call("GET", bucket_level_key))
+if not bucket_level then
+  -- this throttle/identifier combo does not exist yet, so much calculation can be skipped
+  redis.call("SETEX", bucket_level_key, key_lifetime, 1) -- set bucket with initial value
+  retval =  {0, 1}
+else
+  -- if it already exists, do the leaky bucket thing
+  local last_updated = tonumber(redis.call("GET", last_updated_key)) or now -- use sensible default of 'now' if the key does not exist
+  local new_bucket_level = math.max(0, bucket_level - (leak_rate * (now - last_updated)))
+  if (new_bucket_level + 1) <= max_bucket_capacity then
+    new_bucket_level = new_bucket_level + 1
+    retval = {0, math.ceil(new_bucket_level)}
+  else
+    redis.call("SETEX", block_key, block_duration, now + block_duration)
+    retval = {block_duration, 0}
+  end
+  redis.call("SETEX", bucket_level_key, key_lifetime, new_bucket_level) --still needs to be saved
+end
+-- update last_updated for this bucket, required in all branches
+redis.call("SETEX", last_updated_key, key_lifetime, now)
+return retval

data/lib/prorate/throttle.rb CHANGED

@@ -1,11 +1,36 @@
 require 'digest'
 module Prorate
+  class Throttled < StandardError
+    attr_reader :retry_in_seconds
+    def initialize(try_again_in)
+      @retry_in_seconds = try_again_in
+      super("Throttled, please lower your temper and try again in #{retry_in_seconds} seconds")
+    end
+  end
+  class ScriptHashMismatch < StandardError
+  end
+  class MisconfiguredThrottle < StandardError
+  end
   class Throttle < Ks.strict(:name, :limit, :period, :block_for, :redis, :logger)
+    def self.get_script_hash
+      script_filepath = File.join(__dir__,"rate_limit.lua")
+      script = File.read(script_filepath)
+      Digest::SHA1.hexdigest(script)
+    end
+    CURRENT_SCRIPT_HASH = get_script_hash
     def initialize(*)
       super
       @discriminators = [name.to_s]
       self.redis = NullPool.new(redis) unless redis.respond_to?(:with)
+      raise MisconfiguredThrottle if ((period <= 0) || (limit <= 0))
+      @leak_rate = limit.to_f / period # tokens per second;
     end
     def <<(discriminator)
@@ -17,17 +42,29 @@ module Prorate
       identifier = [name, discriminator].join(':')
       redis.with do |r|
-        logger.info { "Checking throttle block %s" % name }
-        raise Throttled.new(block_for) if Prorate::BlockFor.blocked?(id: identifier, redis: r)
         logger.info { "Applying throttle counter %s" % name }
-        c = Prorate::Counter.new(redis: r, id: identifier, logger: logger, window_size: period)
-        after_increment = c.incr
-        if after_increment > limit
-          logger.warn { "Throttle %s exceeded limit of %d at %d" % [name, limit, after_increment] }
-          Prorate::BlockFor.block!(redis: r, id: identifier, duration: block_for)
-          raise Throttled.new(period)
+        remaining_block_time, bucket_level = run_lua_throttler(redis: r, identifier: identifier, bucket_capacity: limit, leak_rate: @leak_rate, block_for: block_for)
+        if remaining_block_time > 0
+          logger.warn { "Throttle %s exceeded limit of %d in %d seconds and is blocked for the next %d seconds" % [name, limit, period, remaining_block_time] }
+          raise Throttled.new(remaining_block_time)
         end
+        available_calls = limit - bucket_level
+      end
+    end
+    def run_lua_throttler(redis: , identifier: , bucket_capacity: , leak_rate: , block_for: )
+      redis.evalsha(CURRENT_SCRIPT_HASH, [], [identifier, bucket_capacity, leak_rate, block_for])
+    rescue Redis::CommandError => e
+      if e.message.include? "NOSCRIPT"
+        # The Redis server has never seen this script before. Needs to run only once in the entire lifetime of the Redis server (unless the script changes)
+        script_filepath = File.join(__dir__,"rate_limit.lua")
+        script = File.read(script_filepath)
+        raise ScriptHashMismatch if Digest::SHA1.hexdigest(script) != CURRENT_SCRIPT_HASH
+        redis.script(:load, script)
+        redis.evalsha(CURRENT_SCRIPT_HASH, [], [identifier, bucket_capacity, leak_rate, block_for])
+      else
+        raise e
       end
     end
   end

data/lib/prorate/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Prorate
-  VERSION = "0.1.0"
+  VERSION = "0.3.0"
 end

data/scripts/bm.rb ADDED

@@ -0,0 +1,43 @@
+# Runs a mild benchmark and prints out the average time a call to 'throttle!' takes.
+require 'prorate'
+require 'benchmark'
+require 'redis'
+require 'securerandom'
+def average_ms(ary)
+  ary.map{|x| x*1000}.inject(0,&:+) / ary.length
+end
+r = Redis.new
+logz = Logger.new(STDERR)
+logz.level = Logger::FATAL # block out most stuff
+times = []
+50.times do
+  times << Benchmark.realtime {
+    t = Prorate::Throttle.new(redis: r, logger: logz, name: "throttle-login-email", limit: 60, period: 30, block_for: 5)
+    # Add all the parameters that function as a discriminator
+    t << '127.0.2.1'
+    t << 'no_person@nowhere.com'
+    t.throttle!
+  }
+end
+puts average_ms times
+times = []
+50.times do
+  email = SecureRandom.hex(20)
+  ip = SecureRandom.hex(10)
+  times << Benchmark.realtime {
+    t = Prorate::Throttle.new(redis: r, logger: logz, name: "throttle-login-email", limit: 30, period: 30, block_for: 5)
+    # Add all the parameters that function as a discriminator
+    t << ip
+    t << email
+    t.throttle!
+  }
+end
+puts average_ms times

data/scripts/bm_latency_lb_vs_mget.rb ADDED

@@ -0,0 +1,61 @@
+# Runs a mild benchmark and prints out the average time a call to 'throttle!' takes.
+require 'prorate'
+require 'benchmark'
+require 'redis'
+require 'securerandom'
+def average_ms(ary)
+  ary.map{|x| x*1000}.inject(0,&:+) / ary.length
+end
+r = Redis.new
+# 4000000.times do
+#   random1 = SecureRandom.hex(10)
+#   random2 = SecureRandom.hex(10)
+#   r.set(random1,random2)
+# end
+logz = Logger.new(STDERR)
+logz.level = Logger::FATAL # block out most stuff
+times = []
+15.times do
+  id = SecureRandom.hex(10)
+  times << Benchmark.realtime {
+    r.evalsha('c95c5f1197cef04ec4afd7d64760f9175933e55a', [], [id, 120, 50, 10]) # values beyond 120 chosen more or less at random
+  }
+end
+puts average_ms times
+def key_for_ts(ts)
+  "th:%s:%d" % [@id, ts]
+end
+times = []
+15.times do
+  id = SecureRandom.hex(10)
+  sec, _ = r.time # Use Redis time instead of the system timestamp, so that all the nodes are consistent
+  ts = sec.to_i # All Redis results are strings
+  k = key_for_ts(ts)
+  times << Benchmark.realtime {
+    r.multi do |txn|
+      # Increment the counter
+      txn.incr(k)
+      txn.expire(k, 120)
+      span_start = ts - 120
+      span_end = ts + 1
+      possible_keys = (span_start..span_end).map{|prev_time| key_for_ts(prev_time) }
+      # Fetch all the counter values within the time window. Despite the fact that this
+      # will return thousands of elements for large sliding window sizes, the values are
+      # small and an MGET in Redis is pretty cheap, so perf should stay well within limits.
+      txn.mget(*possible_keys)
+    end
+  }
+end
+puts average_ms times

data/scripts/reload_lua.rb ADDED

@@ -0,0 +1,6 @@
+# Reloads the script into redis and prints out the SHA it can be called with
+require 'redis'
+r = Redis.new
+script = File.read('../lib/prorate/rate_limit.lua')
+sha = r.script(:load,script)
+puts sha

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: prorate
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Julik Tarkhanov
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2017-02-14 00:00:00.000000000 Z
+date: 2017-07-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ks
@@ -111,14 +111,15 @@ files:
 - bin/console
 - bin/setup
 - lib/prorate.rb
-- lib/prorate/block_for.rb
-- lib/prorate/counter.rb
 - lib/prorate/null_logger.rb
 - lib/prorate/null_pool.rb
+- lib/prorate/rate_limit.lua
 - lib/prorate/throttle.rb
-- lib/prorate/throttled.rb
 - lib/prorate/version.rb
 - prorate.gemspec
+- scripts/bm.rb
+- scripts/bm_latency_lb_vs_mget.rb
+- scripts/reload_lua.rb
 homepage: https://github.com/WeTransfer/prorate
 licenses:
 - MIT

data/lib/prorate/block_for.rb DELETED

@@ -1,13 +0,0 @@
-module Prorate
-  module BlockFor
-    def self.block!(redis:, id:, duration:)
-      k = "bl:%s" % id
-      redis.setex(k, duration.to_i, 1)
-    end
-    def self.blocked?(redis:, id:)
-      k = "bl:%s" % id
-      !!redis.get(k)
-    end
-  end
-end

data/lib/prorate/counter.rb DELETED

@@ -1,53 +0,0 @@
-module Prorate
-  # The counter implements a rolling window throttling mechanism. At each call to incr(), the Redis time
-  # is obtained. A counter then gets set at the key corresponding to the timestamp of the request, with a
-  # granularity of a second. If requests are done continuously and in large volume, the counter will therefore
-  # create one key for each second of the given rolling window size. he counters per second are set to auto-expire
-  # after the window lapses. When incr() is performed, there is
-  class Counter
-    def initialize(redis:, logger: NullLogger, id:, window_size:)
-      @redis = redis
-      @logger = logger
-      @id = id
-      @in_span_of_seconds = window_size.to_i.abs
-    end
-    # Increments the throttle counter for this identifier, and returns the total number of requests
-    # performed so far within the given time span. The caller can then determine whether the request has
-    # to be throttled or can be let through.
-    def incr
-      sec, _ = @redis.time # Use Redis time instead of the system timestamp, so that all the nodes are consistent
-      ts = sec.to_i # All Redis results are strings
-      k = key_for_ts(ts)
-      # Do the Redis stuff in a transaction, and capture only the necessary values
-      # (the result of MULTI is all the return values of each call in sequence)
-      *_, done_last_second, _, counter_values = @redis.multi do |txn|
-        # Increment the counter
-        txn.incr(k)
-        txn.expire(k, @in_span_of_seconds)
-        span_start = ts - @in_span_of_seconds
-        span_end = ts + 1
-        possible_keys = (span_start..span_end).map{|prev_time| key_for_ts(prev_time) }
-        @logger.debug { "%s: Scanning %d possible keys" % [@id, possible_keys.length] }
-        # Fetch all the counter values within the time window. Despite the fact that this
-        # will return thousands of elements for large sliding window sizes, the values are
-        # small and an MGET in Redis is pretty cheap, so perf should stay well within limits.
-        txn.mget(*possible_keys)
-      end
-      # Sum all the values. The empty keys return nils from MGET, which become 0 on to_i casts.
-      total_requests_during_period = counter_values.map(&:to_i).inject(&:+)
-      @logger.debug { "%s: %d reqs total during the last %d seconds" % [@id, total_requests_during_period, @in_span_of_seconds] }
-      total_requests_during_period
-    end
-    private
-    def key_for_ts(ts)
-      "th:%s:%d" % [@id, ts]
-    end
-  end
-end

data/lib/prorate/throttled.rb DELETED

@@ -1,9 +0,0 @@
-module Prorate
-  class Throttled < StandardError
-    attr_reader :retry_in_seconds
-    def initialize(try_again_in)
-      @retry_in_seconds = try_again_in
-      super("Throttled, please lower your temper and try again in %d seconds" % try_again_in)
-    end
-  end
-end