RubyGems - pecorino - Versions diffs - 0.1.1 → 0.3.0 - Mend

pecorino 0.1.1 → 0.3.0

Files changed (17) hide show

checksums.yaml +4 -4
data/.github/workflows/ci.yml +76 -0
data/CHANGELOG.md +10 -1
data/Gemfile +0 -4
data/README.md +28 -15
data/Rakefile +7 -1
data/lib/pecorino/install_generator.rb +5 -4
data/lib/pecorino/leaky_bucket.rb +29 -89
data/lib/pecorino/postgres.rb +107 -0
data/lib/pecorino/railtie.rb +3 -1
data/lib/pecorino/sqlite.rb +125 -0
data/lib/pecorino/throttle.rb +9 -30
data/lib/pecorino/version.rb +1 -1
data/lib/pecorino.rb +26 -4
data/pecorino.gemspec +19 -14
metadata +81 -9
data/.github/workflows/main.yml +0 -16

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a58274f909ba72f93ce478129cd8cfb08893be9a6691a7a892b5e3be455fdd59
-  data.tar.gz: 9bd363bc28d2095a3394abc36f0291c21b7b10e8652b8238243167a262f44740
+  metadata.gz: 2f94aa734cb0bb50657f5484bbdd8bfcaabc7c2d6b7d9329361d41456ba49db6
+  data.tar.gz: 97e01c53e828092ce60be1412a288a70446ec9cc5ab783a6fa6e3ba147de1ee5
 SHA512:
-  metadata.gz: b9fbe0ec9ca780eb2e546b0f47bfcb0b4c579ac2f688dc59b7ebbcad39ebba875759fe62982c80b8f04e39553df6d1a64f7e8e9300d8f32b6d5f57c7c8a68405
-  data.tar.gz: c909b8bb23045ef42eca9346f8956744aeb2f97d5c5446f62f5b16c808b56d99abbc291e5a40d2de31fa2027b4ed789567d51af98e44a23b4c0c2062cf7a71cd
+  metadata.gz: 4d83ebb84009492403ca8950d181f4689b42782ab3f65f7fe5091cab92fc4f739ecd64625449ab784309a122f09e62525c262c71f3c602d3d538f4ac511a78e3
+  data.tar.gz: 93d3a2845713c6dc71ff1f35e5433fcbca6837fdb336db00a7e403e5719f1e65eadac7dc819301c8886d40dc3f99c59b196d64004145de786d0875c42b98e635

data/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,76 @@
+name: CI
+on:
+  - push
+  - pull_request
+env:
+  BUNDLE_PATH: vendor/bundle
+jobs:
+  # lint:
+  #   name: Code Style
+  #   runs-on: ubuntu-22.04
+  #   if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
+  #   strategy:
+  #     matrix:
+  #       ruby:
+  #         - '2.7'
+  #   steps:
+  #     - name: Checkout
+  #       uses: actions/checkout@v4
+  #     - name: Setup Ruby
+  #       uses: ruby/setup-ruby@v1
+  #       with:
+  #         ruby-version: ${{ matrix.ruby }}
+  #         bundler-cache: true
+  #     - name: Rubocop Cache
+  #       uses: actions/cache@v3
+  #       with:
+  #         path: ~/.cache/rubocop_cache
+  #         key: ${{ runner.os }}-rubocop-${{ hashFiles('.rubocop.yml') }}
+  #         restore-keys: |
+  #           ${{ runner.os }}-rubocop-
+  #     - name: Rubocop
+  #       run: bundle exec rubocop
+  test:
+    name: Tests
+    runs-on: ubuntu-22.04
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
+    strategy:
+      matrix:
+        ruby:
+          # - '2.6'
+          - '3.2'
+    services:
+      # mysql:
+      #   image: mysql:5.7
+      #   env:
+      #     MYSQL_ALLOW_EMPTY_PASSWORD: yes
+      #   ports:
+      #     - 3306:3306
+      #   options: --health-cmd="mysqladmin ping" --health-interval=10s --health-timeout=5s --health-retries=3
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
+        ports:
+          - 5432:5432
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup Ruby
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: ${{ matrix.ruby }}
+          bundler-cache: true
+      - name: "Tests and Lint"
+        run: bundle exec rake
+        env:
+          PGHOST: localhost
+          PGUSER: postgres
+          PGPASSWORD: postgres
+          TESTOPTS: "--fail-fast"
+        #   MYSQL_HOST: 127.0.0.1
+        #   MYSQL_PORT: 3306

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,13 @@
-## [Unreleased]
+## [0.3.0] - 2024-01-18
+- Allow `over_time` in addition to `leak_rate`, which is a more intuitive parameter to tweak
+- Set default `block_for` to the time it takes the bucket to leak out completely instead of 30 seconds
+## [0.2.0] - 2024-01-09
+- [Add support for SQLite](https://github.com/cheddar-me/pecorino/pull/9)
+- [Use comparisons in SQL to determine whether the leaky bucket did overflow](https://github.com/cheddar-me/pecorino/pull/8)
+- [Change the way Structs are defined to appease Tapioca/Sorbet](https://github.com/cheddar-me/pecorino/pull/6)
 ## [0.1.0] - 2023-10-30

data/Gemfile CHANGED Viewed

@@ -4,7 +4,3 @@ source "https://rubygems.org"
 # Specify your gem's dependencies in pecorino.gemspec
 gemspec
-gem "rake", "~> 13.0"
-gem "minitest", "~> 5.0"

data/README.md CHANGED Viewed

@@ -2,9 +2,9 @@
 Pecorino is a rate limiter based on the concept of leaky buckets. It uses your DB as the storage backend for the throttles. It is compact, easy to install, and does not require additional infrastructure. The approach used by Pecorino has been previously used by [prorate](https://github.com/WeTransfer/prorate) with Redis, and that approach has proven itself.
-Pecorino is designed to integrate seamlessly into any Rails application using a Postgres database (at the moment there is no MySQL support, we would be delighted if you could add it).
+Pecorino is designed to integrate seamlessly into any Rails application using a PostgreSQL or SQLite database (at the moment there is no MySQL support, we would be delighted if you could add it).
-If you would like to know more about the leaky bucket algorithm: the [Wikipedia article](https://en.wikipedia.org/wiki/Leaky_bucket) is a great starting point.
+If you would like to know more about the leaky bucket algorithm: [this article](http://live.julik.nl/2022/08/the-unreasonable-effectiveness-of-leaky-buckets) or the [Wikipedia article](https://en.wikipedia.org/wiki/Leaky_bucket) are both good starting points.
 ## Installation
@@ -17,23 +17,34 @@ gem 'pecorino'
 And then execute:
     $ bundle install
-Or install it yourself as:
-    $ gem install pecorino
+    $ bin/rails g pecorino:install
+    $ bin/rails db:migrate
 ## Usage
-First, add and run the migration to create the pecorino tables:
+Once the installation is done you can use Pecorino to start defining your throttles. Imagine you have a resource called `vault` and you want to limit the number of updates to it to 5 per second. To achieve that, instantiate a new `Throttle` in your controller or job code, and then trigger it using `Throttle#request!`. A call to `request!` registers 1 token getting added to the bucket. If the bucket is full, or the throttle is currently in "block" mode (has recently been triggered), a `Pecorino::Throttle::Throttled` exception will be raised.
-    $ bin/rails g pecorino:install
-    $ bin/rails db:migrate
+```ruby
+throttle = Pecorino::Throttle.new(key: "vault", over_time: 1.second, capacity: 5)
+throttle.request!
+```
+In a Rails controller you can then rescue from this exception to render the appropriate response:
+```ruby
+rescue_from Pecorino::Throttle::Throttled do |e|
+  response.set_header('Retry-After', e.retry_after.to_s)
+  render nothing: true, status: 429
+end
+```
-Once that is done, you can use Pecorino to start defining your throttles. Imagine you have a resource called `vault` and you want to limit the number of updates to it to 5 per second. To achieve that, instantiate a new `Throttle` in your controller or job code, and then trigger it using `Throttle#request!`. A call to `request!` registers 1 token getting added to the bucket. If the bucket is full, or the throttle is currently in "block" mode (has recently been triggered), a `Pecorino::Throttle::Throttled` exception will be raised.
+and in a Rack application you can rescue inline:
 ```ruby
-throttle = Pecorino::Throttle.new(key: "vault", leak_rate: 5, capacity: 5)
-throttle.request!
+def call(env)
+  # ...your code
+rescue Pecorino::Throttle::Throttled => e
+  [429, {"Retry-After" => e.retry_after.to_s}, []]
+end
 ```
 The exception has an attribute called `retry_after` which you can use to render the appropriate 429 response.
@@ -47,7 +58,7 @@ return render :capacity_exceeded unless throttle.able_to_accept?
 If you are dealing with a metered resource (like throughput, money, amount of storage...) you can supply the number of tokens to either `request!` or `able_to_accept?` to indicate the desired top-up of the leaky bucket. For example, if you are maintaining user wallets and want to ensure no more than 100 dollars may be taken from the wallet within a certain amount of time, you can do it like so:
 ```ruby
-throttle = Pecorino::Throttle.new(key: "wallet_t_#{current_user.id}", leak_rate: 100 / 60.0 / 60.0, capacity: 100, block_for: 60*60*3)
+throttle = Pecorino::Throttle.new(key: "wallet_t_#{current_user.id}", over_time_: 1.hour, capacity: 100, block_for: 60*60*3)
 throttle.request!(20) # Attempt to withdraw 20 dollars
 throttle.request!(20) # Attempt to withdraw 20 dollars more
 throttle.request!(20) # Attempt to withdraw 20 dollars more
@@ -72,9 +83,11 @@ Check out the inline YARD documentation for more options.
 We recommend running the following bit of code every couple of hours (via cron or similar) to delete the stale blocks and leaky buckets from the system:
-    Pecorino.prune!
+```ruby
+Pecorino.prune!
+```
-## Using unlogged tables for reduced replication load
+## Using unlogged tables for reduced replication load (PostgreSQL)
 Throttles and leaky buckets are transient resources. If you are using Postgres replication, it might be prudent to set the Pecorino tables to `UNLOGGED` which will exclude them from replication - and save you bandwidth and storage on your RR. To do so, add the following statements to your migration:

data/Rakefile CHANGED Viewed

@@ -2,6 +2,7 @@
 require "bundler/gem_tasks"
 require "rake/testtask"
+require "standard/rake"
 Rake::TestTask.new(:test) do |t|
   t.libs << "test"
@@ -9,4 +10,9 @@ Rake::TestTask.new(:test) do |t|
   t.test_files = FileList["test/**/*_test.rb"]
 end
-task default: :test
+task :format do
+  `bundle exec standardrb --fix`
+  `bundle exec magic_frozen_string_literal .`
+end
+task default: [:test, :standard]

data/lib/pecorino/install_generator.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 # frozen_string_literal: true
-require 'rails/generators'
-require 'rails/generators/active_record'
+require "rails/generators"
+require "rails/generators/active_record"
 module Pecorino
   #
@@ -13,11 +14,11 @@ module Pecorino
     TEMPLATES = File.join(File.dirname(__FILE__))
     source_paths << TEMPLATES
-    class_option :database, type: :string, aliases: %i(--db), desc: "The database for your migration. By default, the current environment's primary database is used."
+    class_option :database, type: :string, aliases: %i[--db], desc: "The database for your migration. By default, the current environment's primary database is used."
     # Generates monolithic migration file that contains all database changes.
     def create_migration_file
-      migration_template 'migrations/create_pecorino_tables.rb.erb', File.join(db_migrate_path, "create_pecorino_tables.rb")
+      migration_template "migrations/create_pecorino_tables.rb.erb", File.join(db_migrate_path, "create_pecorino_tables.rb")
     end
     private

data/lib/pecorino/leaky_bucket.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 # This offers just the leaky bucket implementation with fill control, but without the timed lock.
-# It does not raise any exceptions, it just tracks the state of a leaky bucket in Postgres.
+# It does not raise any exceptions, it just tracks the state of a leaky bucket in the database.
 #
 # Leak rate is specified directly in tokens per second, instead of specifying the block period.
 # The bucket level is stored and returned as a Float which allows for finer-grained measurement,
@@ -25,7 +25,7 @@
 # The storage use is one DB row per leaky bucket you need to manage (likely - one throttled entity such
 # as a combination of an IP address + the URL you need to procect). The `key` is an arbitrary string you provide.
 class Pecorino::LeakyBucket
-  class State < Struct.new(:level, :full)
+  State = Struct.new(:level, :full) do
     # Returns the level of the bucket after the operation on the LeakyBucket
     # object has taken place. There is a guarantee that no tokens have leaked
     # from the bucket between the operation and the freezing of the State
@@ -59,21 +59,40 @@ class Pecorino::LeakyBucket
     end
   end
-  # Creates a new LeakyBucket. The object controls 1 row in Postgres which is
+  # The key (name) of the leaky bucket
+  #   @return [String]
+  attr_reader :key
+  # The leak rate (tokens per second) of the bucket
+  #   @return [Float]
+  attr_reader :leak_rate
+  # The capacity of the bucket in tokens
+  #   @return [Float]
+  attr_reader :capacity
+  # Creates a new LeakyBucket. The object controls 1 row in the database is
   # specific to the bucket key.
   #
   # @param key[String] the key for the bucket. The key also gets used
   #   to derive locking keys, so that operations on a particular bucket
   #   are always serialized.
-  # @param leak_rate[Float] the leak rate of the bucket, in tokens per second
+  # @param leak_rate[Float] the leak rate of the bucket, in tokens per second.
+  #   Either `leak_rate` or `over_time` can be used, but not both.
+  # @param over_time[#to_f] over how many seconds the bucket will leak out to 0 tokens.
+  #   The value is assumed to be the number of seconds
+  #   - or a duration which returns the number of seconds from `to_f`.
+  #   Either `leak_rate` or `over_time` can be used, but not both.
   # @param capacity[Numeric] how many tokens is the bucket capped at.
   #   Filling up the bucket using `fillup()` will add to that number, but
   #   the bucket contents will then be capped at this value. So with
   #   bucket_capacity set to 12 and a `fillup(14)` the bucket will reach the level
   #   of 12, and will then immediately start leaking again.
-  def initialize(key:, leak_rate:, capacity:)
+  def initialize(key:, capacity:, leak_rate: nil, over_time: nil)
+    raise ArgumentError, "Either leak_rate: or over_time: must be specified" if leak_rate.nil? && over_time.nil?
+    raise ArgumentError, "Either leak_rate: or over_time: may be specified, but not both" if leak_rate && over_time
+    @leak_rate = leak_rate || (over_time.to_f / capacity)
     @key = key
-    @leak_rate = leak_rate.to_f
     @capacity = capacity.to_f
   end
@@ -86,7 +105,8 @@ class Pecorino::LeakyBucket
   # @param n_tokens[Float]
   # @return [State] the state of the bucket after the operation
   def fillup(n_tokens)
-    add_tokens(n_tokens.to_f)
+    capped_level_after_fillup, did_overflow = Pecorino.adapter.add_tokens(capacity: @capacity, key: @key, leak_rate: @leak_rate, n_tokens: n_tokens)
+    State.new(capped_level_after_fillup, did_overflow)
   end
   # Returns the current state of the bucket, containing the level and whether the bucket is full.
@@ -94,34 +114,8 @@ class Pecorino::LeakyBucket
   #
   # @return [State] the snapshotted state of the bucket at time of query
   def state
-    conn = ActiveRecord::Base.connection
-    query_params = {
-      key: @key,
-      capa: @capacity.to_f,
-      leak_rate: @leak_rate.to_f
-    }
-    # The `level` of the bucket is what got stored at `last_touched_at` time, and we can
-    # extrapolate from it to see how many tokens have leaked out since `last_touched_at` -
-    # we don't need to UPDATE the value in the bucket here
-    sql = ActiveRecord::Base.sanitize_sql_array([<<~SQL, query_params])
-      SELECT
-        GREATEST(
-          0.0, LEAST(
-            :capa,
-            t.level - (EXTRACT(EPOCH FROM (clock_timestamp() - t.last_touched_at)) * :leak_rate)
-          )
-        )
-      FROM
-        pecorino_leaky_buckets AS t
-      WHERE
-        key = :key
-    SQL
-    # If the return value of the query is a NULL it means no such bucket exists,
-    # so we assume the bucket is empty
-    current_level = conn.uncached { conn.select_value(sql) } || 0.0
-    State.new(current_level, (@capacity - current_level).abs < 0.01)
+    current_level, is_full = Pecorino.adapter.state(key: @key, capacity: @capacity, leak_rate: @leak_rate)
+    State.new(current_level, is_full)
   end
   # Tells whether the bucket can accept the amount of tokens without overflowing.
@@ -135,58 +129,4 @@ class Pecorino::LeakyBucket
   def able_to_accept?(n_tokens)
     (state.level + n_tokens) < @capacity
   end
-  private
-  def add_tokens(n_tokens)
-    conn = ActiveRecord::Base.connection
-    # Take double the time it takes the bucket to empty under normal circumstances
-    # until the bucket may be deleted.
-    may_be_deleted_after_seconds = (@capacity.to_f / @leak_rate.to_f) * 2.0
-    # Create the leaky bucket if it does not exist, and update
-    # to the new level, taking the leak rate into account - if the bucket exists.
-    query_params = {
-      key: @key,
-      capa: @capacity.to_f,
-      delete_after_s: may_be_deleted_after_seconds,
-      leak_rate: @leak_rate.to_f,
-      fillup: n_tokens.to_f
-    }
-    sql = ActiveRecord::Base.sanitize_sql_array([<<~SQL, query_params])
-      INSERT INTO pecorino_leaky_buckets AS t
-        (key, last_touched_at, may_be_deleted_after, level)
-      VALUES
-        (
-          :key,
-          clock_timestamp(),
-          clock_timestamp() + ':delete_after_s second'::interval,
-          GREATEST(0.0,
-            LEAST(
-              :capa,
-              :fillup
-            )
-          )
-        )
-      ON CONFLICT (key) DO UPDATE SET
-        last_touched_at = EXCLUDED.last_touched_at,
-        may_be_deleted_after = EXCLUDED.may_be_deleted_after,
-        level = GREATEST(0.0,
-          LEAST(
-              :capa,
-              t.level + :fillup - (EXTRACT(EPOCH FROM (EXCLUDED.last_touched_at - t.last_touched_at)) * :leak_rate)
-          )
-        )
-      RETURNING level
-    SQL
-    # Note the use of .uncached here. The AR query cache will actually see our
-    # query as a repeat (since we use "select_value" for the RETURNING bit) and will not call into Postgres
-    # correctly, thus the clock_timestamp() value would be frozen between calls. We don't want that here.
-    # See https://stackoverflow.com/questions/73184531/why-would-postgres-clock-timestamp-freeze-inside-a-rails-unit-test
-    level_after_fillup = conn.uncached { conn.select_value(sql) }
-    State.new(level_after_fillup, (@capacity - level_after_fillup).abs < 0.01)
-  end
 end

data/lib/pecorino/postgres.rb ADDED Viewed

@@ -0,0 +1,107 @@
+# frozen_string_literal: true
+Pecorino::Postgres = Struct.new(:model_class) do
+  def state(key:, capacity:, leak_rate:)
+    query_params = {
+      key: key.to_s,
+      capacity: capacity.to_f,
+      leak_rate: leak_rate.to_f
+    }
+    # The `level` of the bucket is what got stored at `last_touched_at` time, and we can
+    # extrapolate from it to see how many tokens have leaked out since `last_touched_at` -
+    # we don't need to UPDATE the value in the bucket here
+    sql = model_class.sanitize_sql_array([<<~SQL, query_params])
+      SELECT
+        GREATEST(
+          0.0, LEAST(
+            :capacity,
+            t.level - (EXTRACT(EPOCH FROM (clock_timestamp() - t.last_touched_at)) * :leak_rate)
+          )
+        )
+      FROM
+        pecorino_leaky_buckets AS t
+      WHERE
+        key = :key
+    SQL
+    # If the return value of the query is a NULL it means no such bucket exists,
+    # so we assume the bucket is empty
+    current_level = model_class.connection.uncached { model_class.connection.select_value(sql) } || 0.0
+    [current_level, capacity - current_level.abs < 0.01]
+  end
+  def add_tokens(key:, capacity:, leak_rate:, n_tokens:)
+    # Take double the time it takes the bucket to empty under normal circumstances
+    # until the bucket may be deleted.
+    may_be_deleted_after_seconds = (capacity.to_f / leak_rate.to_f) * 2.0
+    # Create the leaky bucket if it does not exist, and update
+    # to the new level, taking the leak rate into account - if the bucket exists.
+    query_params = {
+      key: key.to_s,
+      capacity: capacity.to_f,
+      delete_after_s: may_be_deleted_after_seconds,
+      leak_rate: leak_rate.to_f,
+      fillup: n_tokens.to_f
+    }
+    sql = model_class.sanitize_sql_array([<<~SQL, query_params])
+      INSERT INTO pecorino_leaky_buckets AS t
+        (key, last_touched_at, may_be_deleted_after, level)
+      VALUES
+        (
+          :key,
+          clock_timestamp(),
+          clock_timestamp() + ':delete_after_s second'::interval,
+          GREATEST(0.0,
+            LEAST(
+              :capacity,
+              :fillup
+            )
+          )
+        )
+      ON CONFLICT (key) DO UPDATE SET
+        last_touched_at = EXCLUDED.last_touched_at,
+        may_be_deleted_after = EXCLUDED.may_be_deleted_after,
+        level = GREATEST(0.0,
+          LEAST(
+              :capacity,
+              t.level + :fillup - (EXTRACT(EPOCH FROM (EXCLUDED.last_touched_at - t.last_touched_at)) * :leak_rate)
+          )
+        )
+      RETURNING
+        level,
+        -- Compare level to the capacity inside the DB so that we won't have rounding issues
+        level >= :capacity AS did_overflow
+    SQL
+    # Note the use of .uncached here. The AR query cache will actually see our
+    # query as a repeat (since we use "select_one" for the RETURNING bit) and will not call into Postgres
+    # correctly, thus the clock_timestamp() value would be frozen between calls. We don't want that here.
+    # See https://stackoverflow.com/questions/73184531/why-would-postgres-clock-timestamp-freeze-inside-a-rails-unit-test
+    upserted = model_class.connection.uncached { model_class.connection.select_one(sql) }
+    capped_level_after_fillup, did_overflow = upserted.fetch("level"), upserted.fetch("did_overflow")
+    [capped_level_after_fillup, did_overflow]
+  end
+  def set_block(key:, block_for:)
+    query_params = {key: key.to_s, block_for: block_for.to_f}
+    block_set_query = model_class.sanitize_sql_array([<<~SQL, query_params])
+      INSERT INTO pecorino_blocks AS t
+        (key, blocked_until)
+      VALUES
+        (:key, NOW() + ':block_for seconds'::interval)
+      ON CONFLICT (key) DO UPDATE SET
+        blocked_until = GREATEST(EXCLUDED.blocked_until, t.blocked_until)
+      RETURNING blocked_until;
+    SQL
+    model_class.connection.uncached { model_class.connection.select_value(block_set_query) }
+  end
+  def blocked_until(key:)
+    block_check_query = model_class.sanitize_sql_array([<<~SQL, key])
+      SELECT blocked_until FROM pecorino_blocks WHERE key = ? AND blocked_until >= NOW() LIMIT 1
+    SQL
+    model_class.connection.uncached { model_class.connection.select_value(block_check_query) }
+  end
+end

data/lib/pecorino/railtie.rb CHANGED Viewed

@@ -1,7 +1,9 @@
+# frozen_string_literal: true
 module Pecorino
   class Railtie < Rails::Railtie
     generators do
       require_relative "install_generator"
     end
   end
-end
+end

data/lib/pecorino/sqlite.rb ADDED Viewed

@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+Pecorino::Sqlite = Struct.new(:model_class) do
+  def state(key:, capacity:, leak_rate:)
+    # With a server database, it is really important to use the clock of the database itself so
+    # that concurrent requests will see consistent bucket level calculations. Since SQLite is
+    # actually in-process, there is no point using DB functions - and besides, SQLite reduces
+    # the time precision to the nearest millisecond - and the calculations with timestamps are
+    # obtuse. Therefore we can use the current time inside the Ruby VM - it doesn't matter all that
+    # much but saves us on writing some gnarly SQL to have SQLite produce consistent precise timestamps.
+    query_params = {
+      key: key.to_s,
+      capacity: capacity.to_f,
+      leak_rate: leak_rate.to_f,
+      now_s: Time.now.to_f
+    }
+    # The `level` of the bucket is what got stored at `last_touched_at` time, and we can
+    # extrapolate from it to see how many tokens have leaked out since `last_touched_at` -
+    # we don't need to UPDATE the value in the bucket here
+    sql = model_class.sanitize_sql_array([<<~SQL, query_params])
+      SELECT
+        MAX(
+          0.0, MIN(
+            :capacity,
+            t.level - ((:now_s - t.last_touched_at) * :leak_rate)
+          )
+        )
+      FROM
+        pecorino_leaky_buckets AS t
+      WHERE
+        key = :key
+    SQL
+    # If the return value of the query is a NULL it means no such bucket exists,
+    # so we assume the bucket is empty
+    current_level = model_class.connection.uncached { model_class.connection.select_value(sql) } || 0.0
+    [current_level, capacity - current_level.abs < 0.01]
+  end
+  def add_tokens(key:, capacity:, leak_rate:, n_tokens:)
+    # Take double the time it takes the bucket to empty under normal circumstances
+    # until the bucket may be deleted.
+    may_be_deleted_after_seconds = (capacity.to_f / leak_rate.to_f) * 2.0
+    # Create the leaky bucket if it does not exist, and update
+    # to the new level, taking the leak rate into account - if the bucket exists.
+    query_params = {
+      key: key.to_s,
+      capacity: capacity.to_f,
+      delete_after_s: may_be_deleted_after_seconds,
+      leak_rate: leak_rate.to_f,
+      now_s: Time.now.to_f, # See above as to why we are using a time value passed in
+      fillup: n_tokens.to_f,
+      id: SecureRandom.uuid # SQLite3 does not autogenerate UUIDs
+    }
+    sql = model_class.sanitize_sql_array([<<~SQL, query_params])
+      INSERT INTO pecorino_leaky_buckets AS t
+        (id, key, last_touched_at, may_be_deleted_after, level)
+      VALUES
+        (
+          :id,
+          :key,
+          :now_s, -- Precision loss must be avoided here as it is used for calculations
+          DATETIME('now', '+:delete_after_s seconds'), -- Precision loss is acceptable here
+          MAX(0.0,
+            MIN(
+              :capacity,
+              :fillup
+            )
+          )
+        )
+      ON CONFLICT (key) DO UPDATE SET
+        last_touched_at = EXCLUDED.last_touched_at,
+        may_be_deleted_after = EXCLUDED.may_be_deleted_after,
+        level = MAX(0.0,
+          MIN(
+              :capacity,
+              t.level + :fillup - ((:now_s - t.last_touched_at) * :leak_rate)
+          )
+        )
+      RETURNING
+        level,
+        -- Compare level to the capacity inside the DB so that we won't have rounding issues
+        level >= :capacity AS did_overflow
+    SQL
+    # Note the use of .uncached here. The AR query cache will actually see our
+    # query as a repeat (since we use "select_one" for the RETURNING bit) and will not call into Postgres
+    # correctly, thus the clock_timestamp() value would be frozen between calls. We don't want that here.
+    # See https://stackoverflow.com/questions/73184531/why-would-postgres-clock-timestamp-freeze-inside-a-rails-unit-test
+    upserted = model_class.connection.uncached { model_class.connection.select_one(sql) }
+    capped_level_after_fillup, one_if_did_overflow = upserted.fetch("level"), upserted.fetch("did_overflow")
+    [capped_level_after_fillup, one_if_did_overflow == 1]
+  end
+  def set_block(key:, block_for:)
+    query_params = {id: SecureRandom.uuid, key: key.to_s, block_for: block_for.to_f, now_s: Time.now.to_f}
+    block_set_query = model_class.sanitize_sql_array([<<~SQL, query_params])
+      INSERT INTO pecorino_blocks AS t
+        (id, key, blocked_until)
+      VALUES
+        (:id, :key, :now_s + :block_for)
+      ON CONFLICT (key) DO UPDATE SET
+        blocked_until = MAX(EXCLUDED.blocked_until, t.blocked_until)
+      RETURNING blocked_until;
+    SQL
+    blocked_until_s = model_class.connection.uncached { model_class.connection.select_value(block_set_query) }
+    Time.at(blocked_until_s)
+  end
+  def blocked_until(key:)
+    now_s = Time.now.to_f
+    block_check_query = model_class.sanitize_sql_array([<<~SQL, {now_s: now_s, key: key}])
+      SELECT
+        blocked_until
+      FROM
+        pecorino_blocks
+      WHERE
+        key = :key AND blocked_until >= :now_s LIMIT 1
+    SQL
+    blocked_until_s = model_class.connection.uncached { model_class.connection.select_value(block_check_query) }
+    blocked_until_s && Time.at(blocked_until_s)
+  end
+end

data/lib/pecorino/throttle.rb CHANGED Viewed

@@ -6,7 +6,7 @@
 # the block is lifted. The block time can be arbitrarily higher or lower than the amount
 # of time it takes for the leaky bucket to leak out
 class Pecorino::Throttle
-  class State < Struct.new(:blocked_until)
+  State = Struct.new(:blocked_until) do
     # Tells whether this throttle is blocked, either due to the leaky bucket having filled up
     # or due to there being a timed block set because of an earlier event of the bucket having
     # filled up
@@ -43,13 +43,14 @@ class Pecorino::Throttle
   end
   # @param key[String] the key for both the block record and the leaky bucket
-  # @param block_for[Numeric] the number of seconds to block any further requests for
+  # @param block_for[Numeric] the number of seconds to block any further requests for. Defaults to time it takes
+  #   the bucket to leak out to the level of 0
   # @param leaky_bucket_options Options for `Pecorino::LeakyBucket.new`
   # @see PecorinoLeakyBucket.new
-  def initialize(key:, block_for: 30, **leaky_bucket_options)
+  def initialize(key:, block_for: nil, **)
+    @bucket = Pecorino::LeakyBucket.new(key:, **)
     @key = key.to_s
-    @block_for = block_for.to_f
-    @bucket = Pecorino::LeakyBucket.new(key:, **leaky_bucket_options)
+    @block_for = block_for ? block_for.to_f : (@bucket.capacity / @bucket.leak_rate)
   end
   # Tells whether the throttle will let this number of requests pass without raising
@@ -60,8 +61,7 @@ class Pecorino::Throttle
   # @param n_tokens[Float]
   # @return [boolean]
   def able_to_accept?(n_tokens = 1)
-    conn = ActiveRecord::Base.connection
-    !blocked_until(conn) && @bucket.able_to_accept?(n_tokens)
+    Pecorino.adapter.blocked_until(key: @key).nil? && @bucket.able_to_accept?(n_tokens)
   end
   # Register that a request is being performed. Will raise Throttled
@@ -98,35 +98,14 @@ class Pecorino::Throttle
   #
   # @return [State] the state of the throttle after filling up the leaky bucket / trying to pass the block
   def request(n = 1)
-    conn = ActiveRecord::Base.connection
-    existing_blocked_until = blocked_until(conn)
+    existing_blocked_until = Pecorino.adapter.blocked_until(key: @key)
     return State.new(existing_blocked_until.utc) if existing_blocked_until
     # Topup the leaky bucket
     return State.new(nil) unless @bucket.fillup(n.to_f).full?
     # and set the block if we reached it
-    query_params = {key: @key, block_for: @block_for}
-    block_set_query = ActiveRecord::Base.sanitize_sql_array([<<~SQL, query_params])
-      INSERT INTO pecorino_blocks AS t
-        (key, blocked_until)
-      VALUES
-        (:key, NOW() + ':block_for seconds'::interval)
-      ON CONFLICT (key) DO UPDATE SET
-        blocked_until = GREATEST(EXCLUDED.blocked_until, t.blocked_until)
-      RETURNING blocked_until;
-    SQL
-    fresh_blocked_until = conn.uncached { conn.select_value(block_set_query) }
+    fresh_blocked_until = Pecorino.adapter.set_block(key: @key, block_for: @block_for)
     State.new(fresh_blocked_until.utc)
   end
-  private
-  def blocked_until(via_connection)
-    block_check_query = ActiveRecord::Base.sanitize_sql_array([<<~SQL, @key])
-      SELECT blocked_until FROM pecorino_blocks WHERE key = ? AND blocked_until >= NOW() LIMIT 1
-    SQL
-    via_connection.uncached { via_connection.select_value(block_check_query) }
-  end
 end

data/lib/pecorino/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Pecorino
-  VERSION = "0.1.1"
+  VERSION = "0.3.0"
 end

data/lib/pecorino.rb CHANGED Viewed

@@ -1,11 +1,17 @@
 # frozen_string_literal: true
+require "active_support/concern"
+require "active_record/sanitization"
 require_relative "pecorino/version"
 require_relative "pecorino/leaky_bucket"
 require_relative "pecorino/throttle"
 require_relative "pecorino/railtie" if defined?(Rails::Railtie)
 module Pecorino
+  autoload :Postgres, "pecorino/postgres"
+  autoload :Sqlite, "pecorino/sqlite"
   # Deletes stale leaky buckets and blocks which have expired. Run this method regularly to
   # avoid accumulating too many unused rows in your tables.
   #
@@ -19,12 +25,12 @@ module Pecorino
     ActiveRecord::Base.connection.execute("DELETE FROM pecorino_leaky_buckets WHERE may_be_deleted_after < NOW()")
   end
   # Creates the tables and indexes needed for Pecorino. Call this from your migrations like so:
-  # class CreatePecorinoTables < ActiveRecord::Migration<%= migration_version %>
   #
-  #     def change
-  #       Pecorino.create_tables(self)
+  #     class CreatePecorinoTables < ActiveRecord::Migration[7.0]
+  #       def change
+  #         Pecorino.create_tables(self)
+  #       end
   #     end
   #
   # @param active_record_schema[ActiveRecord::SchemaMigration] the migration through which we will create the tables
@@ -46,4 +52,20 @@ module Pecorino
     active_record_schema.add_index :pecorino_blocks, [:key], unique: true
     active_record_schema.add_index :pecorino_blocks, [:blocked_until]
   end
+  # Returns the database implementation for setting the values atomically. Since the implementation
+  # differs per database, this method will return a different adapter depending on which database is
+  # being used
+  def self.adapter
+    model_class = ActiveRecord::Base
+    adapter_name = model_class.connection.adapter_name
+    case adapter_name
+    when /postgres/i
+      Pecorino::Postgres.new(model_class)
+    when /sqlite/i
+      Pecorino::Sqlite.new(model_class)
+    else
+      raise "Pecorino does not support #{adapter_name} just yet"
+    end
+  end
 end

data/pecorino.gemspec CHANGED Viewed

@@ -3,15 +3,15 @@
 require_relative "lib/pecorino/version"
 Gem::Specification.new do |spec|
-  spec.name          = "pecorino"
-  spec.version       = Pecorino::VERSION
-  spec.authors       = ["Julik Tarkhanov"]
-  spec.email         = ["me@julik.nl"]
-  spec.summary       = "Database-based rate limiter using leaky buckets"
-  spec.description   = "Pecorino allows you to define throttles and rate meters for your metered resources, all through your standard DB"
-  spec.homepage      = "https://github.com/cheddar-me/pecorino"
-  spec.license       = "MIT"
+  spec.name = "pecorino"
+  spec.version = Pecorino::VERSION
+  spec.authors = ["Julik Tarkhanov"]
+  spec.email = ["me@julik.nl"]
+  spec.summary = "Database-based rate limiter using leaky buckets"
+  spec.description = "Pecorino allows you to define throttles and rate meters for your metered resources, all through your standard DB"
+  spec.homepage = "https://github.com/cheddar-me/pecorino"
+  spec.license = "MIT"
   spec.required_ruby_version = ">= 2.4.0"
   # spec.metadata["allowed_push_host"] = "TODO: Set to 'https://mygemserver.com'"
@@ -25,15 +25,20 @@ Gem::Specification.new do |spec|
   spec.files = Dir.chdir(File.expand_path(__dir__)) do
     `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
   end
-  spec.bindir        = "exe"
-  spec.executables   = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
+  spec.bindir = "exe"
+  spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
   spec.require_paths = ["lib"]
   # Uncomment to register a new dependency of your gem
   spec.add_dependency "activerecord", "~> 7"
-  spec.add_dependency "pg"
-  spec.add_development_dependency "activesupport", "~> 7"
-  spec.add_development_dependency "rails", "~> 7"
+  spec.add_development_dependency "pg"
+  spec.add_development_dependency "sqlite3"
+  spec.add_development_dependency "activesupport", "~> 7.0"
+  spec.add_development_dependency "rake", "~> 13.0"
+  spec.add_development_dependency "minitest", "~> 5.0"
+  spec.add_development_dependency "standard"
+  spec.add_development_dependency "magic_frozen_string_literal"
+  spec.add_development_dependency "minitest-fail-fast"
   # For more information and examples about making a new gem, checkout our
   # guide at: https://bundler.io/guides/creating_gem.html

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: pecorino
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.3.0
 platform: ruby
 authors:
 - Julik Tarkhanov
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-11-02 00:00:00.000000000 Z
+date: 2024-01-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activerecord
@@ -31,7 +31,21 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
-  type: :runtime
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: sqlite3
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
@@ -44,28 +58,84 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '7'
+        version: '7.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '7'
+        version: '7.0'
 - !ruby/object:Gem::Dependency
-  name: rails
+  name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '7'
+        version: '13.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '7'
+        version: '13.0'
+- !ruby/object:Gem::Dependency
+  name: minitest
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '5.0'
+- !ruby/object:Gem::Dependency
+  name: standard
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: magic_frozen_string_literal
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: minitest-fail-fast
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Pecorino allows you to define throttles and rate meters for your metered
   resources, all through your standard DB
 email:
@@ -74,7 +144,7 @@ executables: []
 extensions: []
 extra_rdoc_files: []
 files:
-- ".github/workflows/main.yml"
+- ".github/workflows/ci.yml"
 - ".gitignore"
 - ".ruby-version"
 - CHANGELOG.md
@@ -87,7 +157,9 @@ files:
 - lib/pecorino/install_generator.rb
 - lib/pecorino/leaky_bucket.rb
 - lib/pecorino/migrations/create_pecorino_tables.rb.erb
+- lib/pecorino/postgres.rb
 - lib/pecorino/railtie.rb
+- lib/pecorino/sqlite.rb
 - lib/pecorino/throttle.rb
 - lib/pecorino/version.rb
 - pecorino.gemspec

data/.github/workflows/main.yml DELETED Viewed

@@ -1,16 +0,0 @@
-name: Ruby
-on: [push,pull_request]
-jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-    - name: Set up Ruby
-      uses: ruby/setup-ruby@v1
-      with:
-        ruby-version: 2.6.3
-        bundler-cache: true
-    - name: Run the default task
-      run: bundle exec rake